CentOS + Nagios 监控服务器基本功能
October 21, 2010 – 8:39 pm测试环境
CentOS 5.5 x86_64,安装开发工具和开发库。
关闭selinux
服务端IP地址:192.168.90.187
关闭selinux
服务端IP地址:192.168.90.187
安装nagios
groupadd nagios
useradd -g nagios -md /usr/local/nagios nagios
groupadd nagioscmd
usermod -G nagioscmd apache
usermod -G nagioscmd nagios
wget ftp://192.168.1.242/Linux/nagios/nagios-2.9.tar.gz
wget ftp://192.168.1.242/Linux/nagios/nagios-plugins-1.4.8.tar.gz
tar zxf nagios-2.9.tar.gz
tar zxf nagios-plugins-1.4.8.tar.gz
cd nagios-2.9
./configure --with-cgiurl=/nagios/cgi-bin --with-htmlurl=/nagios --with-nagios-user=nagios --with-nagios-group=nagios --with-command-group=nagioscmd
make all
make install
make install-init
make install-commandmode
make install-config
cd ..
cd nagios-plugins-1.4.8
./configure
make && make install
cd ..
useradd -g nagios -md /usr/local/nagios nagios
groupadd nagioscmd
usermod -G nagioscmd apache
usermod -G nagioscmd nagios
wget ftp://192.168.1.242/Linux/nagios/nagios-2.9.tar.gz
wget ftp://192.168.1.242/Linux/nagios/nagios-plugins-1.4.8.tar.gz
tar zxf nagios-2.9.tar.gz
tar zxf nagios-plugins-1.4.8.tar.gz
cd nagios-2.9
./configure --with-cgiurl=/nagios/cgi-bin --with-htmlurl=/nagios --with-nagios-user=nagios --with-nagios-group=nagios --with-command-group=nagioscmd
make all
make install
make install-init
make install-commandmode
make install-config
cd ..
cd nagios-plugins-1.4.8
./configure
make && make install
cd ..
创建身份验证秘钥
cd /usr/local/nagios/etc
htpasswd -c htpasswd.users nagios
chown apache:nagioscmd htpasswd.users
chmod 600 htpasswd.users
htpasswd -c htpasswd.users nagios
chown apache:nagioscmd htpasswd.users
chmod 600 htpasswd.users
配置apache
安装apache步骤省略,确保httpd.conf里有Include conf.d/*.conf,然后在conf.d目录里创建nagios.conf,输入以下内容:
# conf.d/nagios.conf
Alias /nagios/cgi-bin /usr/local/nagios/sbin
<Directory "/usr/local/nagios/sbin">
AddHandler cgi-script cgi pl
Options ExecCGI
AllowOverride None
Order allow,deny
HostnameLookups On
Allow from localhost
Allow from 127.0.0.1
Allow from 192.168.90.
AuthName "nagios access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
Alias /nagios /usr/local/nagios/share
<Directory "/usr/local/nagios/share">
Options None
AllowOverride None
Order allow,deny
HostnameLookups On
Allow from localhost
Allow from 127.0.0.1
Allow from 192.168.90.
AuthName "nagios access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
Alias /nagios/cgi-bin /usr/local/nagios/sbin
<Directory "/usr/local/nagios/sbin">
AddHandler cgi-script cgi pl
Options ExecCGI
AllowOverride None
Order allow,deny
HostnameLookups On
Allow from localhost
Allow from 127.0.0.1
Allow from 192.168.90.
AuthName "nagios access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
Alias /nagios /usr/local/nagios/share
<Directory "/usr/local/nagios/share">
Options None
AllowOverride None
Order allow,deny
HostnameLookups On
Allow from localhost
Allow from 127.0.0.1
Allow from 192.168.90.
AuthName "nagios access"
AuthType Basic
AuthUserFile /usr/local/nagios/etc/htpasswd.users
Require valid-user
</Directory>
chmod 755 -R /usr
启动apache,访问:http://192.168.90.187/nagios,即可以看到nagios界面。
配置nagios监控本机5个基本功能:ping、磁盘使用率、本地用户、总进程数和CPU负载。
mkdir -p /usr/local/nagios/etc/lan_objects
mkdir example
mv cgi.cfg-sample commands.cfg-sample localhost.cfg-sample nagios.cfg-sample resource.cfg-sample example/
cd example/
cp cgi.cfg-sample ../cgi.cfg
cp resource.cfg-sample ../resource.cfg
cp commands.cfg-sample ../lan_objects/commands.cfg
mkdir example
mv cgi.cfg-sample commands.cfg-sample localhost.cfg-sample nagios.cfg-sample resource.cfg-sample example/
cd example/
cp cgi.cfg-sample ../cgi.cfg
cp resource.cfg-sample ../resource.cfg
cp commands.cfg-sample ../lan_objects/commands.cfg
vi /usr/local/nagios/etc/nagios.cfg
################
# nagios.cfg
# main Nagios configuration file
################
log_file=/usr/local/nagios/var/nagios.log
cfg_dir=/usr/local/nagios/etc/lan_objects
object_cache_file=/usr/local/nagios/var/objects.cache
resource_file=/usr/local/nagios/etc/resource.cfg
status_file=/usr/local/nagios/var/status.dat
nagios_user=nagios
nagios_group=nagios
check_external_commands=1
command_check_interval=-1
command_file=/usr/local/nagios/var/rw/nagios.cmd
comment_file=/usr/local/nagios/var/comments.dat
downtime_file=/usr/local/nagios/var/downtime.dat
lock_file=/usr/local/nagios/var/nagios.lock
temp_file=/usr/local/nagios/var/nagios.tmp
event_broker_options=-1
log_rotation_method=d
log_archive_path=/usr/local/nagios/var/archives
use_syslog=1
log_notifications=1
log_service_retries=1
log_host_retries=1
log_event_handlers=1
log_initial_states=0
log_external_commands=1
log_passive_checks=1
service_inter_check_delay_method=s
max_service_check_spread=30
service_interleave_factor=s
host_inter_check_delay_method=s
max_host_check_spread=30
max_concurrent_checks=0
service_reaper_frequency=10
auto_reschedule_checks=0
auto_rescheduling_interval=30
auto_rescheduling_window=180
sleep_time=0.25
service_check_timeout=60
host_check_timeout=30
event_handler_timeout=30
notification_timeout=30
ocsp_timeout=5
perfdata_timeout=5
retain_state_information=1
state_retention_file=/usr/local/nagios/var/retention.dat
retention_update_interval=60
use_retained_program_state=1
use_retained_scheduling_info=0
interval_length=60
use_aggressive_host_checking=0
execute_service_checks=1
accept_passive_service_checks=1
execute_host_checks=1
accept_passive_host_checks=1
enable_notifications=1
enable_event_handlers=1
process_performance_data=0
obsess_over_services=0
check_for_orphaned_services=0
check_service_freshness=1
service_freshness_check_interval=60
check_host_freshness=0
host_freshness_check_interval=60
aggregate_status_updates=1
status_update_interval=15
enable_flap_detection=0
low_service_flap_threshold=5.0
high_service_flap_threshold=20.0
low_host_flap_threshold=5.0
high_host_flap_threshold=20.0
date_format=us
p1_file=/usr/local/nagios/bin/p1.pl
illegal_object_name_chars=`~!$%^&*|'"<>?,()=
illegal_macro_output_chars=`~$&|'"<>
use_regexp_matching=0
use_true_regexp_matching=0
admin_email=nagios
admin_pager=pagenagios
daemon_dumps_core=0
# nagios.cfg
# main Nagios configuration file
################
log_file=/usr/local/nagios/var/nagios.log
cfg_dir=/usr/local/nagios/etc/lan_objects
object_cache_file=/usr/local/nagios/var/objects.cache
resource_file=/usr/local/nagios/etc/resource.cfg
status_file=/usr/local/nagios/var/status.dat
nagios_user=nagios
nagios_group=nagios
check_external_commands=1
command_check_interval=-1
command_file=/usr/local/nagios/var/rw/nagios.cmd
comment_file=/usr/local/nagios/var/comments.dat
downtime_file=/usr/local/nagios/var/downtime.dat
lock_file=/usr/local/nagios/var/nagios.lock
temp_file=/usr/local/nagios/var/nagios.tmp
event_broker_options=-1
log_rotation_method=d
log_archive_path=/usr/local/nagios/var/archives
use_syslog=1
log_notifications=1
log_service_retries=1
log_host_retries=1
log_event_handlers=1
log_initial_states=0
log_external_commands=1
log_passive_checks=1
service_inter_check_delay_method=s
max_service_check_spread=30
service_interleave_factor=s
host_inter_check_delay_method=s
max_host_check_spread=30
max_concurrent_checks=0
service_reaper_frequency=10
auto_reschedule_checks=0
auto_rescheduling_interval=30
auto_rescheduling_window=180
sleep_time=0.25
service_check_timeout=60
host_check_timeout=30
event_handler_timeout=30
notification_timeout=30
ocsp_timeout=5
perfdata_timeout=5
retain_state_information=1
state_retention_file=/usr/local/nagios/var/retention.dat
retention_update_interval=60
use_retained_program_state=1
use_retained_scheduling_info=0
interval_length=60
use_aggressive_host_checking=0
execute_service_checks=1
accept_passive_service_checks=1
execute_host_checks=1
accept_passive_host_checks=1
enable_notifications=1
enable_event_handlers=1
process_performance_data=0
obsess_over_services=0
check_for_orphaned_services=0
check_service_freshness=1
service_freshness_check_interval=60
check_host_freshness=0
host_freshness_check_interval=60
aggregate_status_updates=1
status_update_interval=15
enable_flap_detection=0
low_service_flap_threshold=5.0
high_service_flap_threshold=20.0
low_host_flap_threshold=5.0
high_host_flap_threshold=20.0
date_format=us
p1_file=/usr/local/nagios/bin/p1.pl
illegal_object_name_chars=`~!$%^&*|'"<>?,()=
illegal_macro_output_chars=`~$&|'"<>
use_regexp_matching=0
use_true_regexp_matching=0
admin_email=nagios
admin_pager=pagenagios
daemon_dumps_core=0
vi /usr/local/nagios/etc/lan_objects/timeperiods.cfg
# Time periods
# All times are valid for all
# checks and notifications
define timeperiod{
timeperiod_name 24x7
alias 24 Hours A Day, 7 Days A Week
sunday 00:00-24:00
monday 00:00-24:00
tuesday 00:00-24:00
wednesday 00:00-24:00
thursday 00:00-24:00
friday 00:00-24:00
saturday 00:00-24:00
}
# All times are valid for all
# checks and notifications
define timeperiod{
timeperiod_name 24x7
alias 24 Hours A Day, 7 Days A Week
sunday 00:00-24:00
monday 00:00-24:00
tuesday 00:00-24:00
wednesday 00:00-24:00
thursday 00:00-24:00
friday 00:00-24:00
saturday 00:00-24:00
}
vi /usr/local/nagios/etc/lan_objects/contacts.cfg
################
# Contacts- individuals and groups
################
define contact{
contact_name nagios
alias Nagios Admin
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email luanhelh@163.com
}
# contact groups
# Nagios only talks to contact groups, not individuals
# members must be Nagios users, alias and contact_group
# are whatever you want
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagios
}
# Contacts- individuals and groups
################
define contact{
contact_name nagios
alias Nagios Admin
service_notification_period 24x7
host_notification_period 24x7
service_notification_options w,u,c,r
host_notification_options d,r
service_notification_commands notify-by-email
host_notification_commands host-notify-by-email
email luanhelh@163.com
}
# contact groups
# Nagios only talks to contact groups, not individuals
# members must be Nagios users, alias and contact_group
# are whatever you want
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagios
}
vi /usr/local/nagios/etc/lan_objects/hosts.cfg
################
# Hosts file- individual hosts and host groups
################
# Generic host definition template - This is NOT a real host, just a template!
define host{
name generic-host
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
register 0
}
# local host definition
define host{
use generic-host
host_name localhost
alias Nagios Server
address 127.0.0.1
check_command check-host-alive
max_check_attempts 10
check_period 24x7
notification_interval 120
notification_period 24x7
notification_options d,r
contact_groups admins
}
##############
# Host groups
##############
# Every host must belong to a host group
define hostgroup{
hostgroup_name test
alias Test Servers
members localhost
}
# Hosts file- individual hosts and host groups
################
# Generic host definition template - This is NOT a real host, just a template!
define host{
name generic-host
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE!
register 0
}
# local host definition
define host{
use generic-host
host_name localhost
alias Nagios Server
address 127.0.0.1
check_command check-host-alive
max_check_attempts 10
check_period 24x7
notification_interval 120
notification_period 24x7
notification_options d,r
contact_groups admins
}
##############
# Host groups
##############
# Every host must belong to a host group
define hostgroup{
hostgroup_name test
alias Test Servers
members localhost
}
vi /usr/local/nagios/etc/lan_objects/services.cfg
################
# Services
################
# Generic service definition template - This is NOT a real service, just a template!
define service{
name generic-service
active_checks_enabled 1
passive_checks_enabled 1
parallelize_check 1
obsess_over_service 1
check_freshness 0
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
register 0
}
# Define a service to "ping" the local machine
define service{
use generic-service
host_name localhost
service_description PING
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use generic-service
host_name localhost
service_description Root Partition
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_disk!20%!10%!/
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use generic-service
host_name localhost
service_description Current Users
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_users!20!50
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use generic-service
host_name localhost
service_description Total Processes
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_procs!250!400
}
# Define a service to check the load on the local machine.
define service{
use generic-service
host_name localhost
service_description Current Load
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
# Services
################
# Generic service definition template - This is NOT a real service, just a template!
define service{
name generic-service
active_checks_enabled 1
passive_checks_enabled 1
parallelize_check 1
obsess_over_service 1
check_freshness 0
notifications_enabled 1
event_handler_enabled 1
flap_detection_enabled 1
failure_prediction_enabled 1
process_perf_data 1
retain_status_information 1
retain_nonstatus_information 1
; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE!
register 0
}
# Define a service to "ping" the local machine
define service{
use generic-service
host_name localhost
service_description PING
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_ping!100.0,20%!500.0,60%
}
# Define a service to check the disk space of the root partition
# on the local machine. Warning if < 20% free, critical if
# < 10% free space on partition.
define service{
use generic-service
host_name localhost
service_description Root Partition
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_disk!20%!10%!/
}
# Define a service to check the number of currently logged in
# users on the local machine. Warning if > 20 users, critical
# if > 50 users.
define service{
use generic-service
host_name localhost
service_description Current Users
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_users!20!50
}
# Define a service to check the number of currently running procs
# on the local machine. Warning if > 250 processes, critical if
# > 400 users.
define service{
use generic-service
host_name localhost
service_description Total Processes
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_procs!250!400
}
# Define a service to check the load on the local machine.
define service{
use generic-service
host_name localhost
service_description Current Load
is_volatile 0
check_period 24x7
max_check_attempts 4
normal_check_interval 5
retry_check_interval 1
contact_groups admins
notification_options w,u,c,r
notification_interval 960
notification_period 24x7
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
设置配置文件权限:
chown -R nagios:nagios /usr/local/nagios/etc/lan_objects/
chmod -R 644 /usr/local/nagios/etc/lan_objects/
chown nagios:nagios /usr/local/nagios/etc/nagios.cfg
chmod 644 /usr/local/nagios/etc/nagios.cfg
chown nagios:nagios /usr/local/nagios/etc/resource.cfg
chmod 600 /usr/local/nagios/etc/resource.cfg
chown nagios:nagios /usr/local/nagios/etc/cgi.cfg
chmod 644 /usr/local/nagios/etc/cgi.cfg
chmod -R 644 /usr/local/nagios/etc/lan_objects/
chown nagios:nagios /usr/local/nagios/etc/nagios.cfg
chmod 644 /usr/local/nagios/etc/nagios.cfg
chown nagios:nagios /usr/local/nagios/etc/resource.cfg
chmod 600 /usr/local/nagios/etc/resource.cfg
chown nagios:nagios /usr/local/nagios/etc/cgi.cfg
chmod 644 /usr/local/nagios/etc/cgi.cfg
运行nagios的语法检查器看看:
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
检查通过结果为:
[root@vps1 ~]# /usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg
Nagios 2.9
Copyright (c) 1999-2007 Ethan Galstad (http://www.nagios.org)
Last Modified: 04-10-2007
License: GPL
Reading configuration data...
Running pre-flight check on configuration data...
Checking services...
Checked 5 services.
Checking hosts...
Checked 1 hosts.
Checking host groups...
Checked 1 host groups.
Checking service groups...
Checked 0 service groups.
Checking contacts...
Checked 1 contacts.
Checking contact groups...
Checked 1 contact groups.
Checking service escalations...
Checked 0 service escalations.
Checking service dependencies...
Checked 0 service dependencies.
Checking host escalations...
Checked 0 host escalations.
Checking host dependencies...
Checked 0 host dependencies.
Checking commands...
Checked 22 commands.
Checking time periods...
Checked 1 time periods.
Checking extended host info definitions...
Checked 0 extended host info definitions.
Checking extended service info definitions...
Checked 0 extended service info definitions.
Checking for circular paths between hosts...
Checking for circular host and service dependencies...
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flight check
Nagios 2.9
Copyright (c) 1999-2007 Ethan Galstad (http://www.nagios.org)
Last Modified: 04-10-2007
License: GPL
Reading configuration data...
Running pre-flight check on configuration data...
Checking services...
Checked 5 services.
Checking hosts...
Checked 1 hosts.
Checking host groups...
Checked 1 host groups.
Checking service groups...
Checked 0 service groups.
Checking contacts...
Checked 1 contacts.
Checking contact groups...
Checked 1 contact groups.
Checking service escalations...
Checked 0 service escalations.
Checking service dependencies...
Checked 0 service dependencies.
Checking host escalations...
Checked 0 host escalations.
Checking host dependencies...
Checked 0 host dependencies.
Checking commands...
Checked 22 commands.
Checking time periods...
Checked 1 time periods.
Checking extended host info definitions...
Checked 0 extended host info definitions.
Checking extended service info definitions...
Checked 0 extended service info definitions.
Checking for circular paths between hosts...
Checking for circular host and service dependencies...
Checking global event handlers...
Checking obsessive compulsive processor commands...
Checking misc settings...
Total Warnings: 0
Total Errors: 0
Things look okay - No serious problems were detected during the pre-flight check
启动nagios服务
/etc/init.d/nagios start
现在访问http://192.168.90.187/nagios,点击左侧的“Service Detail”,看看是不是显示监控信息了,如图:
配置开机启动nagios服务:
chkconfig --add nagios
chkconfig --level 345 nagios on
chkconfig --level 345 nagios on
1 Trackback(s)