blob: 90c8a8f7f5d7797f5767672251bcee1990f075c1 [file] [log] [blame] [edit]
###############################################################################
## Monit control file
###############################################################################
##
## Comments begin with a '#' and extend through the end of the line. Keywords
## are case insensitive. All path's MUST BE FULLY QUALIFIED, starting with '/'.
##
## Bellow is the example of some frequently used statements. For information
## about the control file, a complete list of statements and options please
## have a look in the monit manual.
##
##
###############################################################################
## Global section
###############################################################################
##
## Start monit in background (run as daemon) and check the services at 2-minute
## intervals.
#
set daemon 120
#
#
## Set syslog logging with the 'daemon' facility. If the FACILITY option is
## omited, monit will use 'user' facility by default. You can specify the
## path to the file for monit native logging.
#
# set logfile syslog facility log_daemon
#
#
## Set list of mailservers for alert delivery. Multiple servers may be
## specified using comma separator. By default monit uses port 25 - it is
## possible to override it with the PORT option.
#
# set mailserver mail.bar.baz, # primary mailserver
# backup.bar.baz port 10025, # backup mailserver on port 10025
# localhost # fallback relay
#
#
## By default monit will drop the event alert, in the case that there is no
## mailserver available. In the case that you want to keep the events for
## later delivery retry, you can use the EVENTQUEUE statement. The base
## directory where undelivered events will be stored is specified by the
## BASEDIR option. You can limit the maximal queue size using the SLOTS
## option (if omited then the queue is limited just by the backend filesystem).
#
# set eventqueue
# basedir /var/monit # set the base directory where events will be stored
# slots 100 # optionally limit the queue size
#
#
## Monit by default uses the following alert mail format:
##
## --8<--
## From: monit@$HOST # sender
## Subject: monit alert -- $EVENT $SERVICE # subject
##
## $EVENT Service $SERVICE #
## #
## Date: $DATE #
## Action: $ACTION #
## Host: $HOST # body
## Description: $DESCRIPTION #
## #
## Your faithful employee, #
## monit #
## --8<--
##
## You can override the alert message format or its parts such as subject
## or sender using the MAIL-FORMAT statement. Macros such as $DATE, etc.
## are expanded on runtime. For example to override the sender:
#
# set mail-format { from: monit@foo.bar }
#
#
## You can set the alert recipients here, which will receive the alert for
## each service. The event alerts may be restricted using the list.
#
# set alert sysadm@foo.bar # receive all alerts
# set alert manager@foo.bar only on { timeout } # receive just service-
# # timeout alert
#
#
## Monit has an embedded webserver, which can be used to view the
## configuration, actual services parameters or manage the services using the
## web interface.
#
set httpd port 2812 and
use address localhost # only accept connection from localhost
allow localhost # allow localhost to connect to the server and
# allow admin:monit # require user 'admin' with password 'monit'
#
#
###############################################################################
## Services
###############################################################################
##
## Check the general system resources such as load average, cpu and memory
## usage. Each rule specifies the tested resource, the limit and the action
## which will be performed in the case that the test failed.
#
check system localhost
# if loadavg (1min) > 4 then alert
# if loadavg (5min) > 2 then alert
# if memory usage > 75% then alert
# if cpu usage (user) > 70% then alert
# if cpu usage (system) > 30% then alert
# if cpu usage (wait) > 20% then alert
#
#
# vixie cron
check process cron with pidfile /var/run/cron.pid
group system
start program = "/etc/init.d/vixie-cron start"
stop program = "/etc/init.d/vixie-cron stop"
if 5 restarts within 5 cycles then timeout
depends on cron_rc
check file cron_rc with path /etc/init.d/vixie-cron
group system
if failed checksum then unmonitor
if failed permission 755 then unmonitor
if failed uid root then unmonitor
if failed gid root then unmonitor
check process syslogd with pidfile /var/run/syslog-ng.pid
start program = "/etc/init.d/syslog-ng start"
stop program = "/etc/init.d/syslog-ng stop"
if 5 restarts within 5 cycles then timeout
check file syslogd_file with path /var/log/messages
if timestamp > 65 minutes then alert # Have you seen "-- MARK --"?
# vsftp
check process vsftpd with pidfile /var/run/vsftpd.pid
start program = "/etc/init.d/vsftpd start"
stop program = "/etc/init.d/vsftpd stop"
if failed port 21 protocol ftp then restart
if 5 restarts within 5 cycles then timeout
check process sshd with pidfile /var/run/sshd.pid
start program "/etc/init.d/sshd start"
stop program "/etc/init.d/sshd stop"
if failed port 22 protocol ssh then restart
if 5 restarts within 5 cycles then timeout
# apache2
check process apache with pidfile /var/run/apache2.pid
group www
start program = "/etc/init.d/apache2 start"
stop program = "/etc/init.d/apache2 stop"
if failed port 80 protocol http then restart
if 5 restarts within 5 cycles then timeout
# postfix
check process postfix with pidfile /var/spool/postfix/pid/master.pid
group mail
start program = "/etc/init.d/postfix start"
stop program = "/etc/init.d/postfix stop"
if failed port 25 protocol smtp then restart
if 5 restarts within 5 cycles then timeout
depends on postfix_rc
check file postfix_rc with path /etc/init.d/postfix
group mail
if failed checksum then unmonitor
if failed permission 755 then unmonitor
if failed uid root then unmonitor
if failed gid root then unmonitor
# postgresql
check process postgresql with pidfile /var/postgresql/data/postmaster.pid
group database
start program = "/etc/init.d/postgresql start"
stop program = "/etc/init.d/postgresql stop"
# slapd
check process slapd with pidfile /var/run/openldap/slapd.pid
group database
start program = "/etc/init.d/slapd start"
stop program = "/etc/init.d/slapd stop"
# if failed port 389 protocol ldap3 then restart
# if 5 restarts within 5 cycles then timeout
check process smbd with pidfile /var/run/samba/smbd.pid
group samba
start program = "/etc/init.d/samba start"
stop program = "/etc/init.d/samba stop"
if failed port 139 type TCP then restart
if 5 restarts within 5 cycles then timeout
depends on smbd_bin
check file smbd_bin with path /usr/sbin/smbd
group samba
if failed checksum then unmonitor
if failed permission 755 then unmonitor
if failed uid root then unmonitor
if failed gid root then unmonitor
check process nmbd with pidfile /var/run/samba/nmbd.pid
group samba
start program = "/etc/init.d/samba start"
stop program = "/etc/init.d/samba stop"
if failed port 138 type UDP then restart
if failed port 137 type UDP then restart
if 5 restarts within 5 cycles then timeout
depends on nmbd_bin
check file nmbd_bin with path /usr/sbin/nmbd
group samba
if failed checksum then unmonitor
if failed permission 755 then unmonitor
if failed uid root then unmonitor
if failed gid root then unmonitor
check process ddclient with pidfile /var/run/ddclient.pid
group other
start program = "/etc/init.d/ddclient start"
stop program = "/etc/init.d/ddclient stop"
## Check a file for existence, checksum, permissions, uid and gid. In addition
## to the recipients in the global section, customized alert will be send to
## the additional recipient. The service may be grouped using the GROUP option.
#
# check file apache_bin with path /usr/local/apache/bin/httpd
# if failed checksum and
# expect the sum 8f7f419955cefa0b33a2ba316cba3659 then unmonitor
# if failed permission 755 then unmonitor
# if failed uid root then unmonitor
# if failed gid root then unmonitor
# alert security@foo.bar on {
# checksum, permission, uid, gid, unmonitor
# } with the mail-format { subject: Alarm! }
# group server
#
#
## Check that a process is running, responding on the HTTP and HTTPS request,
## check its resource usage such as cpu and memory, number of children.
## In the case that the process is not running, monit will restart it by
## default. In the case that the service was restarted very often and the
## problem remains, it is possible to disable the monitoring using the
## TIMEOUT statement. The service depends on another service (apache_bin) which
## is defined in the monit control file as well.
#
# check process apache with pidfile /usr/local/apache/logs/httpd.pid
# start program = "/etc/init.d/httpd start"
# stop program = "/etc/init.d/httpd stop"
# if cpu > 60% for 2 cycles then alert
# if cpu > 80% for 5 cycles then restart
# if totalmem > 200.0 MB for 5 cycles then restart
# if children > 250 then restart
# if loadavg(5min) greater than 10 for 8 cycles then stop
# if failed host www.tildeslash.com port 80 protocol http
# and request "/monit/doc/next.php"
# then restart
# if failed port 443 type tcpssl protocol http
# with timeout 15 seconds
# then restart
# if 3 restarts within 5 cycles then timeout
# depends on apache_bin
# group server
#
#
## Check the filesystem permissions, uid, gid, space and inode usage. Other
## services such as databases may depend on this resource and automatical
## graceful stop may be cascaded to them before the filesystem will become
## full and the data will be lost.
#
# check filesystem datafs with path /dev/sdb1
# start program = "/bin/mount /data"
# stop program = "/bin/umount /data"
# if failed permission 660 then unmonitor
# if failed uid root then unmonitor
# if failed gid disk then unmonitor
# if space usage > 80% for 5 times within 15 cycles then alert
# if space usage > 99% then stop
# if inode usage > 30000 then alert
# if inode usage > 99% then stop
# group server
#
#
## Check a file's timestamp: when it becomes older then 15 minutes, the
## file is not updated and something is wrong. In the case that the size
## of the file exceeded given limit, perform the script.
#
# check file database with path /data/mydatabase.db
# if failed permission 700 then alert
# if failed uid data then alert
# if failed gid data then alert
# if timestamp > 15 minutes then alert
# if size > 100 MB then exec "/my/cleanup/script"
#
#
## Check the directory permission, uid and gid. An event is triggered
## if the directory does not belong to the user with the uid 0 and
## the gid 0. In the addition the permissions have to match the octal
## description of 755 (see chmod(1)).
#
# check directory bin with path /bin
# if failed permission 755 then unmonitor
# if failed uid 0 then unmonitor
# if failed gid 0 then unmonitor
#
#
## Check the remote host network services availability and the response
## content. One of three pings, a successfull connection to a port and
## application level network check is performed.
#
# check host myserver with address 192.168.1.1
# if failed icmp type echo count 3 with timeout 3 seconds then alert
# if failed port 3306 protocol mysql with timeout 15 seconds then alert
# if failed url
# http://user:password@www.foo.bar:8080/?querystring
# and content == 'action="j_security_check"'
# then alert
#
#
###############################################################################
## Includes
###############################################################################
##
## It is possible to include the configuration or its parts from other files or
## directories.
#
# include /etc/monit.d/*
#
#