上一篇:cacti+nagios 整合企业级监控平台(三)
六、nagios 监控报警配置
- nagios QQ群消息报警
请点击如上地址查看
- nagios 邮件报警配置
- 测试邮件发送成功后,修改nagios配置文件
vim /usr/local/nagios/etc/objects/commands.cfg
#增加以下内容
# 'notify-host-by-email-mutt' command definition
define command{
command_name notify-host-by-email-mutt
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\nHost: $HOSTNAME$\nState: $HOSTSTATE$\nAddress: $HOSTADDRESS$\nInfo: $HOSTOUTPUT$\n
\nDate/Time: $LONGDATETIME$\n" | /usr/bin/mutt -s "** $NOTIFICATIONTYPE$ Host Alert: $HOSTNAME$ is $HOSTSTATE$ **" $CONTACTEMAIL$
}
# 'notify-service-by-email-mutt' command definition
define command{
command_name notify-service-by-email-mutt
command_line /usr/bin/printf "%b" "***** Nagios *****\n\nNotification Type: $NOTIFICATIONTYPE$\n\nService: $SERVICEDESC$\nHost: $HOSTALIAS$\nAddress: $HOSTADDRESS$\nState: $SERVC
ESTATE$\n\nDate/Time: $LONGDATETIME$\n\nAdditional Info:\n\n$SERVICEOUTPUT$\n" | /usr/bin/mutt -s "** $NOTIFICATIONTYPE$ Service Alert: $HOSTALIAS$/$SERVICEDESC$ is $SERVICESTATE$ **" $CONT
ACTEMAIL$
}
- 我这把联系人的和报警模块集成了一下
vim /usr/local/nagios/etc/objects/contact.cfg
define contact{
name yunwei-contact
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r,f,s
host_notification_options d,u,r,f,s
service_notification_commands notify-service-by-email-mutt,notify-service-by-qq
host_notification_commands notify-host-by-email-mutt,notify-host-by-qq
register 0
}
define contact{
contact_name leoiceo
use yunwei-contact
alias lmb
email leoiceo@gmail.com,158***34@139.com
}
#联系人组
define contactgroup{
contactgroup_name yunwei-contactgroup
alias Nagios Inception Contact
members limengbo
}
define host{
name host
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
notification_period 24x7
register 0
}
#定义主机报警设置
define host{
name cn-server
use host
check_period 24x7
;检查主机的时间段
check_interval 5
;nagios对主机的检查时间间隔,这里是5分钟
retry_interval 1
;重试检查时间间隔,单位是分钟
max_check_attempts 10
;nagios对主机的最大检查次数,也就是nagios在检查发现某主机异常时,并不马上判断为异常状况;
;而是多试几次,因为有可能只是一时网络太拥挤,或是一些其他原因,让主机受到了一点影响;
;这里的10就是最多试10次的意思。
check_command check-hosts-alive
notification_period workhours
notification_interval 120 ;报警间隔120分钟
notification_options d,u,r
contact_groups yunwei-contactgroup
register 0
}
define service{
name service
active_checks_enabled 1
passive_checks_enabled 1
parallelize_check 1
obsess_over_service 1
check_freshness 0
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
is_volatile 0
check_period 24x7
max_check_attempts 3 ;nagios对服务的最大检查次数。
retry_check_interval 1 ;重试检查时间间隔,单位是分钟
notification_options w,c,r
;w,u,c,r u即unknown,表示不明状态.可不报
notification_interval 60
notification_period 24x7
register 0
notes_url /nagios/cgi-bin/show.cgi?host=$HOSTNAME$&service=$SERVICEDESC$
}
#定义服务的报警设置
define service{
name cn-service
use service
contact_groups yunwei-contactgroup
max_check_attempts 4 ;异常后的检查次数
normal_check_interval 3 ;服务的检查时间间隔3分钟
retry_check_interval 1 ;重试检查间隔
register 0
}
- 定义一个服务组列表,别名用中文方便同事查看
vim servicegroup.cfg
;系统负载
define servicegroup{
servicegroup_name Current Load
alias 系统负载 ;Check_Load
}
;/data data分区空闲率
define servicegroup{
servicegroup_name Data Partition
alias Data分区空闲率 ;Check_Data
}
;/ 根分区磁盘使用率
define servicegroup{
servicegroup_name Root Partition
alias 根分区空闲率 ;Check_Root
}
;交换内存使用率
define servicegroup{
servicegroup_name Swap Usage
alias 交换分区空闲率 ;Check_Swap
}
;运行进程数监控
define servicegroup{
servicegroup_name Total Processes
alias 总进程数 ;Check_Processes
}
;游戏端口检测
define servicegroup{
servicegroup_name Game Port
alias 游戏端口存活状态 ;Check_Game
}
;僵尸进程监控
define servicegroup{
servicegroup_name Zombie Processses
alias 僵尸进程监控 ;check_zombie_procs
}
define hostgroup{
hostgroup_name test-servers ;The name of the hostgroup
alias 测试服务器 ;Long name of the group
members log,login
}
define hostgroup {
hostgroup_name dev-servers
alias 开发服务器
members devtest
}
七、nagios-plugin 客户端配置
- 安装nagios-plugins-2.0.3.tar.gz和nrpe-2.15.tar.gz 即可
tar zxf nagios-plugins-2.0.3.tar.gz
cd nagios-plugins-2.0.3
./configure
make && make install
cd ../
tar zxf nrpe-2.15.tar.gz
cd nrpe-2.15
./configure --prefix=/usr/local/icinga --enable-ssl --with-nrpe-user=nagios --with-nrpe-group=nagios --with-nagios-user=nagios --with-nagios-group=nagios --enable-command-args
make all
make install-plugin
make install-daemon
make install-daemon-config
> /usr/local/nagios/etc/nrpe.cfg
cat >> /usr/local/nagios/etc/nrpe.cfg << EOF
log_facility=daemon
pid_file=/var/run/nrpe.pid
server_port=5666
nrpe_user=nagios
nrpe_group=nagios
allowed_hosts=127.0.0.1,nagios监控端的IP
dont_blame_nrpe=1 #支持传参
debug=0
command_timeout=60
connection_timeout=300
command[check_uptime]=/usr/local/nagios/libexec/check_uptime
command[check_http_args]=/usr/local/nagios/libexec/check_http -H $ARG1$ -u /$ARG2$ -t 30
command[check_users_args]=/usr/local/nagios/libexec/check_users -w $ARG1$ -c $ARG2$
command[check_load_args]=/usr/local/nagios/libexec/check_load -w $ARG1$ -c $ARG2$
command[check_root_args]=/usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p /
command[check_data_args]=/usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p /data
command[check_disk_args]=/usr/local/nagios/libexec/check_disk -w $ARG1$ -c $ARG2$ -p /$ARG3$
command[check_zombie_procs_args]=/usr/local/nagios/libexec/check_procs -w $ARG1$ -c $ARG2$ -s Z
command[check_total_procs_args]=/usr/local/nagios/libexec/check_procs -w $ARG1$ -c $ARG2$
command[check_tcp_args]=/usr/local/nagios/libexec/check_tcp -p $ARG1$ -w $ARG2$ -c $ARG3$
command[check_swap_args]=/usr/local/nagios/libexec/check_swap -w $ARG1$ -c $ARG2$
command[check_mem_args]=/usr/local/nagios/libexec/check_mem -w $ARG1$ -c $ARG2$
command[check_net_args]=/usr/local/nagios/libexec/check_net_traffic.sh -d $ARG1$ -w $ARG2$ -c $ARG3$
EOF
/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d