Nagios监控linux服务器 一、客户端 安装所需的软件(nagios-plugins、nrpe) #wget #wget 1、安装nagios-plugins #增加一个用户 useradd nagios -s /sbin/nologin -M 解压并安装 #tar zvxf nagios-plugins-1.4.16.tar.gz && cd nagios-plugins-1.4.16 #./configure --with-nagios-user=nagios --with-nagios-group=nagios && make && make install #chown -R nagios:nagios /usr/local/nagios 2、安装nrpe #tar -zxvf nrpe-2.13.tar.gz && cd nrpe-2.13 #./configure --enable-ssl --with-ssl-lib (前提是已经安装了openssl与openssl-devel) #make all && make install-plugin && make install-daemon && make install-daemon-config 3、配置nrpe 配置nrpe信息 #vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行 allowed_hosts=, #注意修改为nagios服务器的IP: #配置监控对象 说明:由监控原理可知被监控端做监控,然后将数据传给监控服务器绘总,设置监控详细参数主要是设置被监控端的nrpe.cfg文件,可以看到里面监控对象 vim /usr/local/nagios/etc/nrpe.cfg,查找并修改如下一行 command[check_users]=/usr/local/nagios/libexec/check_users -w 5 -c 10 command[check_load]=/usr/local/nagios/libexec/check_load -w 15,10,5 -c 30,25,20 command[check_disk]=/usr/local/nagios/libexec/check_disk -w 10% -c 5% command[check_zombie_procs]=/usr/local/nagios/libexec/check_procs -w 5 -c 10 -s Z command[check_total_procs]=/usr/local/nagios/libexec/check_procs -w 500 -c 550 command[check_swap]=/usr/local/nagios/libexec/check_swap -w 20% -c 10% command[check_ping]=/usr/local/nagios/libexec/check_ping -H -w 100.0,20% -c 500.0,60% 后两行是自添加的 4、启动nrpe,并测试 #/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d #echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local #netstat -atulnp | grep nrpe tcp 0 0* LISTEN 5201/nrpe #/usr/local/nagios/libexec/check_nrpe -H localhost NRPE v2.13 二、服务器端 1、安装所需的软件(nagios、nagios-plugins、nrpe)下载后源码包安装,不赘述 wget wget wget 安装完成后配置一下报警的邮箱 # grep email /usr/local/nagios/etc/objects/contacts.cfg email ; <<***** CHANGE THIS TO YOUR EMAIL ADDRESS ****** 2、配置 #在commands.cfg中定义nrpe这个外部构件 #vim /usr/local/nagios/etc/nagios.cfg,打开下面这一行 cfg_file=/usr/local/nagios/etc/objects/commands.cfg #vim /usr/local/nagios/etc/objects/commands.cfg,增加如下一行 check nrpe define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ } 3、#配置要监控的linux主机 #cd /usr/local/nagios/etc/objects #mv localhost.cfg rhel5.cfg 更改如下 注意下面定义的check名字要与linux客户端/usr/local/nagios/etc/nrpe.cfg文件中的名字一致 #cat rhel5.cfg # Define a host for the local machine define host{ use linux-server host_name rhel5 alias rhel5 address } # SERVICE DEFINITIONS # Define a service to "ping" the local machine define service{ use generic-service ; Name of service template to use host_name rhel5 service_description PING check_command check_nrpe!check_ping } # Define a service to check the disk space of the root partition # on the local machine. Warning if < 20% free, critical if # < 10% free space on partition. define service{ use generic-service ; Name of service template to use host_name rhel5 service_description Root Partition check_command check_nrpe!check_disk } # Define a service to check the number of currently logged in # users on the local machine. Warning if > 20 users, critical # if > 50 users. define service{ use generic-service ; Name of service template to use host_name rhel5 service_description Current Users check_command check_nrpe!check_users } # Define a service to check the number of currently running procs # on the local machine. Warning if > 250 processes, critical if # > 400 users. define service{ use generic-service ; Name of service template to use host_name rhel5 service_description Total Processes check_command check_nrpe!check_total_procs } # Define a service to check the load on the local machine. define service{ use generic-service ; Name of service template to use host_name rhel5 service_description Current Load check_command check_nrpe!check_load } # Define a service to check the swap usage the local machine. # Critical if less than 10% of swap is free, warning if less than 20% is free define service{ use generic-service ; Name of service template to use host_name rhel5 service_description Swap Usage check_command check_nrpe!check_swap } 4、启动nrpc,并测试 #/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d #echo “/usr/local/nagios/bin/nrpe -c /usr/local/nagios/etc/nrpe.cfg -d &> /dev/null” >> /etc/rc.local #netstat -tulpn | grep nrpe tcp 0 0* LISTEN 14371/nrpe #/usr/local/nagios/libexec/check_nrpe -H #linux客户端的IP地址: NRPE v2.13 #/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg 测试都没有错误才是语法正确,哪里有错误会提示哪里改正之 Total Warnings: 0 Total Errors: 0 Things look okay - No serious problems were detected during the pre-flight check 5、重启服务 # service nagios restart Running configuration check...done. Stopping nagios: done. Starting nagios: done. 网页访问 http://ip/nagios