CentOS + Nagios 监控服务器基本功能

October 21, 2010 – 8:39 pm

测试环境

CentOS 5.5 x86_64,安装开发工具和开发库。
关闭selinux
服务端IP地址:192.168.90.187

安装nagios

groupadd nagios
useradd -g nagios -md /usr/local/nagios nagios
groupadd nagioscmd
usermod -G nagioscmd apache
usermod -G nagioscmd nagios
wget ftp://192.168.1.242/Linux/nagios/nagios-2.9.tar.gz
wget ftp://192.168.1.242/Linux/nagios/nagios-plugins-1.4.8.tar.gz
tar zxf nagios-2.9.tar.gz
tar zxf nagios-plugins-1.4.8.tar.gz
cd nagios-2.9
./configure --with-cgiurl=/nagios/cgi-bin --with-htmlurl=/nagios --with-nagios-user=nagios --with-nagios-group=nagios --with-command-group=nagioscmd
make all
make install
make install-init
make install-commandmode
make install-config
cd ..
cd nagios-plugins-1.4.8
./configure
make && make install
cd ..

创建身份验证秘钥

cd /usr/local/nagios/etc
htpasswd -c htpasswd.users nagios
chown apache:nagioscmd htpasswd.users
chmod 600 htpasswd.users

配置apache
安装apache步骤省略,确保httpd.conf里有Include conf.d/*.conf,然后在conf.d目录里创建nagios.conf,输入以下内容:

# conf.d/nagios.conf
Alias /nagios/cgi-bin /usr/local/nagios/sbin
<
Directory "/usr/local/nagios/sbin">
 
AddHandler cgi-script cgi pl
 
Options ExecCGI
 
AllowOverride None
 
Order allow,deny
 
HostnameLookups On
 
Allow from localhost
 
Allow from 127.0.0.1
 
Allow from 192.168.90.
 
AuthName "nagios access"
 
AuthType Basic
 
AuthUserFile /usr/local/nagios/etc/htpasswd.users
 
Require valid-user
</
Directory>
Alias /nagios /usr/local/nagios/share
<
Directory "/usr/local/nagios/share">
 
Options None
 
AllowOverride None
 
Order allow,deny
 
HostnameLookups On
 
Allow from localhost
 
Allow from 127.0.0.1
 
Allow from 192.168.90.
 
AuthName "nagios access"
 
AuthType Basic
 
AuthUserFile /usr/local/nagios/etc/htpasswd.users
 
Require valid-user
</
Directory>
chmod 755 -R /usr

启动apache,访问:http://192.168.90.187/nagios,即可以看到nagios界面。
配置nagios监控本机5个基本功能:ping、磁盘使用率、本地用户、总进程数和CPU负载。

mkdir -p /usr/local/nagios/etc/lan_objects
mkdir example
mv cgi.cfg-sample commands.cfg-sample localhost.cfg-sample nagios.cfg-sample resource.cfg-sample example/
cd example/
cp cgi.cfg-sample ../cgi.cfg
cp resource.cfg-sample ../resource.cfg
cp commands.cfg-sample ../lan_objects/commands.cfg
vi /usr/local/nagios/etc/nagios.cfg
################
# nagios.cfg
# main Nagios configuration file
################
log_file=/usr/local/nagios/var/nagios.log
cfg_dir=/usr/local/nagios/etc/lan_objects
object_cache_file=/usr/local/nagios/var/objects.cache
resource_file=/usr/local/nagios/etc/resource.cfg
status_file=/usr/local/nagios/var/status.dat
 
nagios_user=nagios
nagios_group=nagios
 
check_external_commands=1
command_check_interval=-1
command_file=/usr/local/nagios/var/rw/nagios.cmd
 
comment_file=/usr/local/nagios/var/comments.dat
downtime_file=/usr/local/nagios/var/downtime.dat
lock_file=/usr/local/nagios/var/nagios.lock
temp_file=/usr/local/nagios/var/nagios.tmp
event_broker_options=-1
 
log_rotation_method=d
log_archive_path=/usr/local/nagios/var/archives
use_syslog=1
log_notifications=1
log_service_retries=1
 
log_host_retries=1
log_event_handlers=1
log_initial_states=0
log_external_commands=1
log_passive_checks=1
 
service_inter_check_delay_method=s
max_service_check_spread=30
service_interleave_factor=s
host_inter_check_delay_method=s
max_host_check_spread=30
 
max_concurrent_checks=0
service_reaper_frequency=10
auto_reschedule_checks=0
auto_rescheduling_interval=30
auto_rescheduling_window=180
 
sleep_time=0.25
service_check_timeout=60
host_check_timeout=30
event_handler_timeout=30
notification_timeout=30
 
ocsp_timeout=5
perfdata_timeout=5
retain_state_information=1
state_retention_file=/usr/local/nagios/var/retention.dat
retention_update_interval=60
 
use_retained_program_state=1
use_retained_scheduling_info=0
interval_length=60
use_aggressive_host_checking=0
execute_service_checks=1
 
accept_passive_service_checks=1
execute_host_checks=1
accept_passive_host_checks=1
enable_notifications=1
enable_event_handlers=1
 
process_performance_data=0
obsess_over_services=0
check_for_orphaned_services=0
check_service_freshness=1
service_freshness_check_interval=60
 
check_host_freshness=0
host_freshness_check_interval=60
aggregate_status_updates=1
status_update_interval=15
enable_flap_detection=0
 
low_service_flap_threshold=5.0
high_service_flap_threshold=20.0
low_host_flap_threshold=5.0
high_host_flap_threshold=20.0
date_format=us
 
p1_file=/usr/local/nagios/bin/p1.pl
illegal_object_name_chars=`~!$%^&*|'"<>?,()=
illegal_macro_output_chars=`~$&|'
"<>
use_regexp_matching=0
use_true_regexp_matching=0
 
admin_email=nagios
admin_pager=pagenagios
daemon_dumps_core=0
vi /usr/local/nagios/etc/lan_objects/timeperiods.cfg
# Time periods
# All times are valid for all
# checks and notifications
 
define timeperiod{
        
timeperiod_name 24x7
        
alias           24 Hours A Day, 7 Days A Week
        
sunday          00:00-24:00
        
monday          00:00-24:00
        
tuesday         00:00-24:00
        
wednesday       00:00-24:00
        
thursday        00:00-24:00
        
friday          00:00-24:00
        
saturday        00:00-24:00
        
}
vi /usr/local/nagios/etc/lan_objects/contacts.cfg
################
# Contacts- individuals and groups
################
define contact{
        
contact_name                    nagios
        
alias                           Nagios Admin
        
service_notification_period     24x7
        
host_notification_period        24x7
        
service_notification_options    w,u,c,r
        
host_notification_options       d,r
        
service_notification_commands   notify-by-email
        
host_notification_commands      host-notify-by-email
        
email                           luanhelh@163.com
        
}
 
# contact groups
# Nagios only talks to contact groups, not individuals
# members must be Nagios users, alias and contact_group
# are whatever you want
 
define contactgroup{
        
contactgroup_name       admins
        
alias                   Nagios Administrators
        
members                 nagios
        
}
vi /usr/local/nagios/etc/lan_objects/hosts.cfg
################
# Hosts file- individual hosts and host groups
################
# Generic host definition template - This is NOT a real host, just a template!
 
define host{
 
name                generic-host   
 
notifications_enabled       1   
 
event_handler_enabled       1
 
flap_detection_enabled      1
 
failure_prediction_enabled  1 
 
process_perf_data           1 
 
retain_status_information   1 
 
retain_nonstatus_information  1 
;
DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
 
register  0       
        
}
# local host definition
 
define host{
        
use                     generic-host
        
host_name               localhost
        
alias                   Nagios Server
        
address                 127.0.0.1
        
check_command           check-host-alive
        
max_check_attempts      10
        
check_period            24x7
        
notification_interval   120
        
notification_period     24x7
        
notification_options    d,r
        
contact_groups      admins
        
}
 
##############
# Host groups
##############
 
# Every host must belong to a host group
 
define hostgroup{
        
hostgroup_name  test
        
alias           Test Servers
        
members         localhost
        
}
vi /usr/local/nagios/etc/lan_objects/services.cfg
################
# Services
################
 
# Generic service definition template - This is NOT a real service, just a template!
 
define service{
 
name           generic-service
 
active_checks_enabled    1 
 
passive_checks_enabled   1 
 
parallelize_check        1 
 
obsess_over_service      1 
 
check_freshness          0 
 
notifications_enabled        1 
 
event_handler_enabled        1
 
flap_detection_enabled       1
 
failure_prediction_enabled   1
 
process_perf_data            1
 
retain_status_information    1
 
retain_nonstatus_information 1
;
DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
 
register    0 
        
}
 
# Define a service to "ping" the local machine
 
define service{
        
use                             generic-service
        
host_name                       localhost
        
service_description             PING
        
is_volatile                     0
        
check_period                    24x7
        
max_check_attempts              4
        
normal_check_interval           5
        
retry_check_interval            1
        
contact_groups                  admins
        
notification_options              w,u,c,r
        
notification_interval           960
        
notification_period             24x7
        
check_command                  check_ping!100.0,20%!500.0,60%
        
}
 
# Define a service to check the disk space of the root partition
# on the local machineWarning if < 20% free, critical if
# <
10% free space on partition.
 
define service{
        
use                             generic-service   
        
host_name                       localhost
        
service_description             Root Partition
        
is_volatile                     0
        
check_period                    24x7
        
max_check_attempts              4
        
normal_check_interval           5
        
retry_check_interval            1
        
contact_groups                  admins
        
notification_options              w,u,c,r
        
notification_interval           960
        
notification_period             24x7
        
check_command                  check_local_disk!20%!10%!/
        
}
 
# Define a service to check the number of currently logged in
# users on the local machineWarning if > 20 users, critical
# if > 50 users.
 
define service{
        
use                             generic-service 
        
host_name                       localhost
        
service_description             Current Users
        
is_volatile                     0
        
check_period                    24x7
        
max_check_attempts              4
        
normal_check_interval           5
        
retry_check_interval            1
        
contact_groups                  admins
        
notification_options              w,u,c,r
        
notification_interval           960
        
notification_period             24x7
        
check_command                  check_local_users!20!50
        
}
 
# Define a service to check the number of currently running procs
# on the local machineWarning if > 250 processes, critical if
# >
400 users.
 
define service{
        
use                             generic-service 
        
host_name                       localhost
        
service_description             Total Processes
        
is_volatile                     0
        
check_period                    24x7
        
max_check_attempts              4
        
normal_check_interval           5
        
retry_check_interval            1
        
contact_groups                  admins
    
notification_options        w,u,c,r
        
notification_interval           960
        
notification_period             24x7
    
check_command            check_local_procs!250!400
        
}
 
# Define a service to check the load on the local machine.
 
define service{
        
use                             generic-service 
        
host_name                       localhost
        
service_description             Current Load
        
is_volatile                     0
        
check_period                    24x7
        
max_check_attempts              4
        
normal_check_interval           5
        
retry_check_interval            1
        
contact_groups                  admins
        
notification_options              w,u,c,r
        
notification_interval           960
        
notification_period             24x7
        
check_command                  check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
        
}

设置配置文件权限:

chown -R nagios:nagios /usr/local/nagios/etc/lan_objects/
chmod -R 644 /usr/local/nagios/etc/lan_objects/
chown nagios:nagios /usr/local/nagios/etc/nagios.cfg
chmod 644 /usr/local/nagios/etc/nagios.cfg
chown nagios:nagios /usr/local/nagios/etc/resource.cfg
chmod 600 /usr/local/nagios/etc/resource.cfg
chown nagios:nagios /usr/local/nagios/etc/cgi.cfg
chmod 644 /usr/local/nagios/etc/cgi.cfg

运行nagios的语法检查器看看:

/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

检查通过结果为:

[root@vps1 ~]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg

Nagios 2.9
Copyright (c) 1999-2007 Ethan Galstad (http://www.nagios.org)
Last Modified: 04-10-2007
License: GPL

Reading configuration data...

Running pre-flight check on configuration data...

Checking services...
        Checked 5 services.
Checking hosts...
        Checked 1 hosts.
Checking host groups...
        Checked 1 host groups.
Checking service groups...
        Checked 0 service groups.
Checking contacts...
        Checked 1 contacts.
Checking contact groups...
        Checked 1 contact groups.
Checking service escalations...
        Checked 0 service escalations.
Checking service dependencies...
        Checked 0 service dependencies.
Checking host escalations...
        Checked 0 host escalations.
Checking host dependencies...
        Checked 0 host dependencies.
Checking commands...
        Checked 22 commands.
Checking time periods...
        Checked 1 time periods.
Checking extended host info definitions...
        Checked 0 extended host info definitions.
Checking extended service info definitions...
        Checked 0 extended service info definitions.
Checking for circular paths between hosts...
Checking for circular host and service dependencies...
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...

Total Warnings: 0
Total Errors:   0

Things look okay - No serious problems were detected during the pre-flight check

启动nagios服务

/etc/init.d/nagios start

现在访问http://192.168.90.187/nagios,点击左侧的“Service Detail”,看看是不是显示监控信息了,如图:

1.png

配置开机启动nagios服务:

chkconfig --add nagios
chkconfig --level 345 nagios on
  1. 1 Trackback(s)

  2. Oct 22, 2010: Symfony Askeet24翻译校对完成-lightphp轻型开发框架发布 » Blog Archive » CentOS + Nagios 监控服务器SSHD

Post a Comment