| .\" Automatically generated by Pod::Man 2.22 (Pod::Simple 3.07) |
| .\" |
| .\" Standard preamble: |
| .\" ======================================================================== |
| .de Sp \" Vertical space (when we can't use .PP) |
| .if t .sp .5v |
| .if n .sp |
| .. |
| .de Vb \" Begin verbatim text |
| .ft CW |
| .nf |
| .ne \\$1 |
| .. |
| .de Ve \" End verbatim text |
| .ft R |
| .fi |
| .. |
| .\" Set up some character translations and predefined strings. \*(-- will |
| .\" give an unbreakable dash, \*(PI will give pi, \*(L" will give a left |
| .\" double quote, and \*(R" will give a right double quote. \*(C+ will |
| .\" give a nicer C++. Capital omega is used to do unbreakable dashes and |
| .\" therefore won't be available. \*(C` and \*(C' expand to `' in nroff, |
| .\" nothing in troff, for use with C<>. |
| .tr \(*W- |
| .ds C+ C\v'-.1v'\h'-1p'\s-2+\h'-1p'+\s0\v'.1v'\h'-1p' |
| .ie n \{\ |
| . ds -- \(*W- |
| . ds PI pi |
| . if (\n(.H=4u)&(1m=24u) .ds -- \(*W\h'-12u'\(*W\h'-12u'-\" diablo 10 pitch |
| . if (\n(.H=4u)&(1m=20u) .ds -- \(*W\h'-12u'\(*W\h'-8u'-\" diablo 12 pitch |
| . ds L" "" |
| . ds R" "" |
| . ds C` "" |
| . ds C' "" |
| 'br\} |
| .el\{\ |
| . ds -- \|\(em\| |
| . ds PI \(*p |
| . ds L" `` |
| . ds R" '' |
| 'br\} |
| .\" |
| .\" Escape single quotes in literal strings from groff's Unicode transform. |
| .ie \n(.g .ds Aq \(aq |
| .el .ds Aq ' |
| .\" |
| .\" If the F register is turned on, we'll generate index entries on stderr for |
| .\" titles (.TH), headers (.SH), subsections (.SS), items (.Ip), and index |
| .\" entries marked with X<> in POD. Of course, you'll have to process the |
| .\" output yourself in some meaningful fashion. |
| .ie \nF \{\ |
| . de IX |
| . tm Index:\\$1\t\\n%\t"\\$2" |
| .. |
| . nr % 0 |
| . rr F |
| .\} |
| .el \{\ |
| . de IX |
| .. |
| .\} |
| .\" |
| .\" Accent mark definitions (@(#)ms.acc 1.5 88/02/08 SMI; from UCB 4.2). |
| .\" Fear. Run. Save yourself. No user-serviceable parts. |
| . \" fudge factors for nroff and troff |
| .if n \{\ |
| . ds #H 0 |
| . ds #V .8m |
| . ds #F .3m |
| . ds #[ \f1 |
| . ds #] \fP |
| .\} |
| .if t \{\ |
| . ds #H ((1u-(\\\\n(.fu%2u))*.13m) |
| . ds #V .6m |
| . ds #F 0 |
| . ds #[ \& |
| . ds #] \& |
| .\} |
| . \" simple accents for nroff and troff |
| .if n \{\ |
| . ds ' \& |
| . ds ` \& |
| . ds ^ \& |
| . ds , \& |
| . ds ~ ~ |
| . ds / |
| .\} |
| .if t \{\ |
| . ds ' \\k:\h'-(\\n(.wu*8/10-\*(#H)'\'\h"|\\n:u" |
| . ds ` \\k:\h'-(\\n(.wu*8/10-\*(#H)'\`\h'|\\n:u' |
| . ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'^\h'|\\n:u' |
| . ds , \\k:\h'-(\\n(.wu*8/10)',\h'|\\n:u' |
| . ds ~ \\k:\h'-(\\n(.wu-\*(#H-.1m)'~\h'|\\n:u' |
| . ds / \\k:\h'-(\\n(.wu*8/10-\*(#H)'\z\(sl\h'|\\n:u' |
| .\} |
| . \" troff and (daisy-wheel) nroff accents |
| .ds : \\k:\h'-(\\n(.wu*8/10-\*(#H+.1m+\*(#F)'\v'-\*(#V'\z.\h'.2m+\*(#F'.\h'|\\n:u'\v'\*(#V' |
| .ds 8 \h'\*(#H'\(*b\h'-\*(#H' |
| .ds o \\k:\h'-(\\n(.wu+\w'\(de'u-\*(#H)/2u'\v'-.3n'\*(#[\z\(de\v'.3n'\h'|\\n:u'\*(#] |
| .ds d- \h'\*(#H'\(pd\h'-\w'~'u'\v'-.25m'\f2\(hy\fP\v'.25m'\h'-\*(#H' |
| .ds D- D\\k:\h'-\w'D'u'\v'-.11m'\z\(hy\v'.11m'\h'|\\n:u' |
| .ds th \*(#[\v'.3m'\s+1I\s-1\v'-.3m'\h'-(\w'I'u*2/3)'\s-1o\s+1\*(#] |
| .ds Th \*(#[\s+2I\s-2\h'-\w'I'u*3/5'\v'-.3m'o\v'.3m'\*(#] |
| .ds ae a\h'-(\w'a'u*4/10)'e |
| .ds Ae A\h'-(\w'A'u*4/10)'E |
| . \" corrections for vroff |
| .if v .ds ~ \\k:\h'-(\\n(.wu*9/10-\*(#H)'\s-2\u~\d\s+2\h'|\\n:u' |
| .if v .ds ^ \\k:\h'-(\\n(.wu*10/11-\*(#H)'\v'-.4m'^\v'.4m'\h'|\\n:u' |
| . \" for low resolution devices (crt and lpr) |
| .if \n(.H>23 .if \n(.V>19 \ |
| \{\ |
| . ds : e |
| . ds 8 ss |
| . ds o a |
| . ds d- d\h'-1'\(ga |
| . ds D- D\h'-1'\(hy |
| . ds th \o'bp' |
| . ds Th \o'LP' |
| . ds ae ae |
| . ds Ae AE |
| .\} |
| .rm #[ #] #H #V #F C |
| .\" ======================================================================== |
| .\" |
| .IX Title "MONIT 1" |
| .TH MONIT 1 "www.mmonit.com" "May 06. 2012" "User Commands" |
| .\" For nroff, turn off justification. Always turn off hyphenation; it makes |
| .\" way too many mistakes in technical documents. |
| .if n .ad l |
| .nh |
| .SH "NAME" |
| Monit \- utility for monitoring services on a Unix system |
| .SH "SYNOPSIS" |
| .IX Header "SYNOPSIS" |
| \&\fBmonit\fR [options] {arguments} |
| .SH "DESCRIPTION" |
| .IX Header "DESCRIPTION" |
| \&\fBmonit\fR is a utility for managing and monitoring processes, |
| programs, files, directories and filesystems on a Unix system. |
| Monit conducts automatic maintenance and repair and can execute |
| meaningful causal actions in error situations. E.g. Monit can |
| start a process if it does not run, restart a process if it does |
| not respond and stop a process if it uses too much resources. You |
| can use Monit to monitor files, directories and filesystems for |
| changes, such as timestamps changes, checksum changes or size |
| changes. |
| .PP |
| Monit is controlled via an easy to configure control file based |
| on a free-format, token-oriented syntax. Monit logs to syslog or |
| to its own log file and notifies you about error conditions via |
| customizable alert messages. Monit can perform various \s-1TCP/IP\s0 |
| network checks, protocol checks and can utilize \s-1SSL\s0 for such |
| checks. Monit provides a http(s) interface and you may use a |
| browser to access the Monit program. |
| .SH "GENERAL OPERATION" |
| .IX Header "GENERAL OPERATION" |
| The behavior of Monit is controlled by command-line options |
| \&\fIand\fR a run control file, monitrc, |
| the syntax of which we describe in a later section. Command-line |
| options override \fI.monitrc\fR declarations. |
| .PP |
| The default location for \fImonitrc\fR is \fI~/.monitrc\fR. If this |
| file does not exist, Monit will try \fI/etc/monitrc\fR and a few |
| other places. See \s-1FILES\s0 for details. You can also |
| specify the control file directly by using the \fI\-c\fR command-line |
| switch to monit. For instance, |
| .PP |
| .Vb 1 |
| \& $ monit \-c /var/monit/monitrc |
| .Ve |
| .PP |
| Before Monit is started the first time, you can test the control |
| file for syntax errors: |
| .PP |
| .Vb 2 |
| \& $ monit \-t |
| \& $ Control file syntax OK |
| .Ve |
| .PP |
| If there was an error, Monit will print an error message to the |
| console, including the line number in the control file from where |
| the error was found. |
| .PP |
| Once you have a working Monit control file you can start Monit |
| from the console, like so: |
| .PP |
| .Vb 1 |
| \& $ monit |
| .Ve |
| .PP |
| You can change some configuration directives via command-line |
| switches, but for simplicity it is recommended that you put these |
| in the control file. |
| .PP |
| If all goes well, Monit will now detach from the terminal and run |
| as a background process, i.e. as a daemon process. As a daemon, |
| Monit runs in cycles; It monitor services, then goes to sleep for |
| a configured period, then wakes up and start monitoring again in |
| an endless loop. |
| .SS "Options" |
| .IX Subsection "Options" |
| The following options are recognized by Monit. However, it is |
| recommended that you set options (when applicable) directly in |
| the \fI.monitrc\fR control file. |
| .PP |
| \&\fB\-c\fR \fIfile\fR |
| Use this control file |
| .PP |
| \&\fB\-d\fR \fIn\fR |
| Run Monit as a daemon once per \fIn\fR seconds. Or use \fI\*(L"set |
| daemon\*(R"\fR in monitrc. |
| .PP |
| \&\fB\-g\fR \fIname\fR |
| Set group name for start, stop, restart, monitor and |
| unmonitor action. |
| .PP |
| \&\fB\-l\fR \fIlogfile\fR |
| Print log information to this file. Or use \fI\*(L"set logfile\*(R"\fR |
| in monitrc. |
| .PP |
| \&\fB\-p\fR \fIpidfile\fR |
| Use this lock file in daemon mode. Or use \fI\*(L"set pidfile\*(R"\fR |
| in monitrc. |
| .PP |
| \&\fB\-s\fR \fIstatefile\fR |
| Write state information to this file. Or use \fI\*(L"set |
| statefile\*(R"\fR in monitrc. |
| .PP |
| \&\fB\-I\fR |
| Do not run in background (needed for run from init) |
| .PP |
| \&\fB\-t\fR |
| Run syntax check for the control file |
| .PP |
| \&\fB\-v\fR |
| Verbose mode, work noisy (diagnostic output) |
| .PP |
| \&\fB\-vv\fR |
| Very verbose mode, same as \-v plus log stack-trace on error |
| .PP |
| \&\fB\-H\fR \fI[filename]\fR |
| Print \s-1MD5\s0 and \s-1SHA1\s0 hashes of the file or of stdin if the |
| filename is omitted; Monit will exit afterwards |
| .PP |
| \&\fB\-V\fR |
| Print version number and patch level |
| .PP |
| \&\fB\-h\fR |
| Print a help text |
| .SS "Arguments" |
| .IX Subsection "Arguments" |
| Once you have Monit running as a daemon process, you can call |
| Monit with one of the following arguments. Monit will then |
| connect to the Monit daemon (on \s-1TCP\s0 port 127.0.0.1:2812 by |
| default) and ask the Monit daemon to perform the requested |
| action. In other words; calling monit without arguments starts |
| the Monit daemon, and calling monit \fIwith\fR arguments enables you |
| to communicate with the Monit daemon process. |
| .IP "start all" 4 |
| .IX Item "start all" |
| Start all services listed in the control file and enable |
| monitoring for them. If the group option is set (\fI\-g\fR), only |
| start and enable monitoring of services in the named group (\*(L"all\*(R" |
| is not required in this case). |
| .IP "start name" 4 |
| .IX Item "start name" |
| Start the named service and enable monitoring for it. The name is |
| a service entry name from the monitrc file. |
| .IP "stop all" 4 |
| .IX Item "stop all" |
| Stop all services listed in the control file and disable their |
| monitoring. If the group option is set, only stop and disable |
| monitoring of the services in the named group (all" is not |
| required in this case). |
| .IP "stop name" 4 |
| .IX Item "stop name" |
| Stop the named service and disable its monitoring. The name is a |
| service entry name from the monitrc file. |
| .IP "restart all" 4 |
| .IX Item "restart all" |
| Stop and start \fIall\fR services. If the group option is set, only |
| restart the services in the named group (\*(L"all\*(R" is not required in |
| this case). |
| .IP "restart name" 4 |
| .IX Item "restart name" |
| Restart the named service. The name is a service entry name from |
| the monitrc file. |
| .IP "monitor all" 4 |
| .IX Item "monitor all" |
| Enable monitoring of all services listed in the control file. If |
| the group option is set, only start monitoring of services in the |
| named group (\*(L"all\*(R" is not required in this case). |
| .IP "monitor name" 4 |
| .IX Item "monitor name" |
| Enable monitoring of the named service. The name is a service |
| entry name from the monitrc file. Monit will also enable |
| monitoring of all services this service depends on. |
| .IP "unmonitor all" 4 |
| .IX Item "unmonitor all" |
| Disable monitoring of all services listed in the control file. If |
| the group option is set, only disable monitoring of services in |
| the named group (\*(L"all\*(R" is not required in this case). |
| .IP "unmonitor name" 4 |
| .IX Item "unmonitor name" |
| Disable monitoring of the named service. The name is a service |
| entry name from the monitrc file. Monit will also disable |
| monitoring of all services that depends on this service. |
| .IP "status" 4 |
| .IX Item "status" |
| Print status information of each service. |
| .IP "summary" 4 |
| .IX Item "summary" |
| Print a short status summary. |
| .IP "reload" 4 |
| .IX Item "reload" |
| Reinitialize a running Monit daemon, the daemon will reread its |
| configuration, close and reopen log files. |
| .IP "quit" 4 |
| .IX Item "quit" |
| Kill the Monit daemon process |
| .IP "validate" 4 |
| .IX Item "validate" |
| Check all services listed in the control file. This action is |
| also the default behavior when Monit runs in daemon mode. |
| .IP "procmatch regex" 4 |
| .IX Item "procmatch regex" |
| Allows for easy testing of pattern for process match check. The |
| command takes regular expression as an argument and displays all |
| running processes matching the pattern. |
| .SH "WHAT TO MONITOR?" |
| .IX Header "WHAT TO MONITOR?" |
| You can use Monit to monitor daemon \fBprocesses\fR or similar |
| programs running on localhost. Monit is particular useful for |
| monitoring daemon processes, such as those started at system boot |
| time from /etc/init.d/. For instance sendmail, sshd, apache and |
| mysql. In contrast to many other monitoring systems, Monit can act if |
| an error situation should occur, e.g.; if sendmail is not |
| running, monit can start sendmail again automatically or if |
| apache is using too many resources (e.g. if a DoS attack is in |
| progress) Monit can stop or restart apache and send you an alert |
| message. Monit can also monitor process characteristics, such as |
| how much memory or cpu cycles a process is using. |
| .PP |
| You can also use Monit to monitor \fBfiles\fR, \fBdirectories\fR and |
| \&\fBfilesystems\fR on localhost. Monit can monitor these items for |
| changes, such as timestamps changes, checksum changes or size |
| changes. This is also useful for security reasons \- you can |
| monitor the md5 or sha1 checksum of files that should not change |
| and get an alert or perform an action if they should change. |
| .PP |
| Monit can monitor \fBnetwork connections\fR to various servers, |
| either on localhost or on remote hosts. \s-1TCP\s0, \s-1UDP\s0 and Unix Domain |
| Sockets are supported. Network test can be performed on a |
| protocol level; Monit has built-in tests for the main Internet |
| protocols, such as \s-1HTTP\s0, \s-1SMTP\s0 etc. Even if a protocol is not |
| supported you can still test the server because you can configure |
| Monit to send any data and test the response from the server. |
| .PP |
| Monit can be used to test \fBprograms\fR or scripts at certain |
| times, much like cron, but in addition, you can test the exit |
| value of a program and perform an action or send an alert if the |
| exit value indicate an error. This means that you can use Monit |
| to perform any type of check you can write a script for. |
| .PP |
| Finally, Monit can be used to monitor general \fBsystem\fR resources |
| on localhost such as overall \s-1CPU\s0 usage, Memory and Load Average. |
| .SH "THE MONIT CONTROL FILE" |
| .IX Header "THE MONIT CONTROL FILE" |
| Monit is configured and controlled via a control file called |
| \&\fImonitrc\fR. The default location for this file is ~/.monitrc. If |
| this file does not exist, Monit will try /etc/monitrc, then |
| \&\f(CW@sysconfdir\fR@/monitrc and finally ./monitrc. The value of |
| \&\f(CW@sysconfdir\fR@ is given at configure time as ./configure |
| \&\-\-sysconfdir. For instance, using \fI./configure \-\-sysconfdir |
| /var/monit/etc\fR will make Monit search for \fImonitrc\fR in |
| \&\fI/var/monit/etc\fR |
| .PP |
| Monit uses its own Domain Specific Language (\s-1DSL\s0); The control |
| file consists of a series of service entries and global option |
| statements in a free-format, token-oriented syntax. |
| .PP |
| Comments begin with a \fB#\fR and extend through the end of the |
| line. There are three kinds of tokens in the control file: |
| \&\fIkeywords\fR, \fInumbers\fR and \fIstrings\fR. On a semantic level, the |
| control file consists of only three type of entries: |
| .IP "1. Global set-statements" 4 |
| .IX Item "1. Global set-statements" |
| A global set-statement starts with the keyword \fIset\fR and the |
| item to configure. |
| .IP "2. Global include-statement" 4 |
| .IX Item "2. Global include-statement" |
| The include statement consists of the keyword \fIinclude\fR and |
| a glob string. |
| .IP "3. One or more service entry statements." 4 |
| .IX Item "3. One or more service entry statements." |
| A service entry starts with the keyword \fIcheck\fR followed by the |
| service type. |
| .PP |
| The meaning of the various statements will be explained in the |
| following sections. |
| .SH "LOGGING" |
| .IX Header "LOGGING" |
| Monit will log status and error messages to a log file. Use the |
| \&\fIset logfile\fR statement in the monitrc control file. To setup |
| Monit to log to its own logfile, use e.g. \fIset logfile |
| /var/log/monit.log\fR. If \fBsyslog\fR is given as a value for the |
| \&\fI\-l\fR command-line switch (or the keyword \fIset logfile syslog\fR |
| is found in the control file) Monit will use the \fBsyslog\fR system |
| daemon to log messages with a priority assigned to each message |
| based on the context. To turn off logging, simply do not set the |
| logfile in the control file (and of course, do not use the \-l |
| switch) |
| .SH "DAEMON MODE" |
| .IX Header "DAEMON MODE" |
| Use |
| .PP |
| .Vb 1 |
| \& set daemon n (where n is a number in seconds) |
| .Ve |
| .PP |
| to specify Monit's poll cycle length and run Monit in daemon |
| mode. You must specify a numeric argument which is a polling |
| interval in seconds. In daemon mode, Monit detaches from the |
| console, puts itself in the background and runs continuously, |
| monitoring each specified service and then goes to sleep for the |
| given poll interval, wakes up and start monitoring again in an |
| endless cycle. |
| .PP |
| Alternatively, you can use the \fI\-d\fR command line switch to set |
| the poll interval, but it is strongly recommended to set the poll |
| interval in your \fI~/.monitrc\fR file, by using \fIset daemon\fR. |
| .PP |
| Monit will then always start in daemon mode. If you do not use |
| this statement and do not start monit with the \-d option, Monit |
| will just run through the service checks once and then exit. This |
| may be useful in some situations, but Monit is primarily designed |
| to run as a daemon process. |
| .PP |
| Calling monit with a Monit daemon running in the background sends |
| a wake-up signal to the daemon, forcing it to check services |
| immediately. Calling monit with the quit argument will kill a |
| running Monit daemon process instead of waking it up. |
| .SH "INIT SUPPORT" |
| .IX Header "INIT SUPPORT" |
| The \fIset init\fR statement prevents Monit from transforming itself |
| into a daemon process. Instead Monit will run as a foreground |
| process. (You should still use set daemon to specify the poll |
| cycle). |
| .PP |
| This is required to run Monit from init. Using init to start |
| Monit is probably the best way to run Monit if you want to be |
| certain that you always have a running Monit daemon on your |
| system. Another option is to run Monit from crontab. In any case, |
| you should make sure that the control file does not have any |
| syntax errors before you start Monit from init or crontab. |
| .PP |
| To setup Monit to run from init, you can either use the set init |
| statement in Monit's control file or use the \-I option from the |
| command line. Here is what you must add to /etc/inittab: |
| .PP |
| .Vb 2 |
| \& # Run Monit in standard run\-levels |
| \& mo:2345:respawn:/usr/local/bin/monit \-Ic /etc/monitrc |
| .Ve |
| .PP |
| After you have modified init's configuration file, you can run |
| the following command to re-examine /etc/inittab and start Monit: |
| .PP |
| .Vb 1 |
| \& telinit q |
| .Ve |
| .PP |
| For systems without telinit: |
| .PP |
| .Vb 1 |
| \& kill \-1 1 |
| .Ve |
| .PP |
| If Monit is used to monitor services that are also started at |
| boot time (e.g. services started via \s-1SYSV\s0 init rc scripts or via |
| inittab) then, in some cases, a race condition could occur. That |
| is; if a service is slow to start, Monit can assume that the |
| service is not running and possibly try to start it and raise an |
| alert, while, in fact the service is already about to start or |
| already in its startup sequence. Please see the \s-1FAQ\s0 for a |
| solution to this problem. |
| .SH "INCLUDE FILES" |
| .IX Header "INCLUDE FILES" |
| The Monit control file, \fImonitrc\fR, can include additional |
| configuration files. This feature helps one to maintain a certain |
| structure or to place repeating settings into one file. Include |
| statements can be placed at virtually any spot. The syntax is the |
| following: |
| .PP |
| .Vb 1 |
| \& include globstring |
| .Ve |
| .PP |
| The globstring is any kind of string as defined in \fIglob\fR\|(7). Thus, |
| you can refer to a single file or you can load several files at |
| once. If you want to use whitespace in your string the globstring |
| need to be embedded into quotes (') or double quotes ("). If the |
| globstring matches a directory instead of a file, it is silently |
| ignored. |
| .PP |
| Any \fIinclude\fR statements in included files are parsed as in the |
| main control file. |
| .PP |
| If the globstring matches several results, the files are included |
| in a non sorted manner. If you need to rely on a certain order, |
| you might need to use single \fIinclude\fR statements. |
| .PP |
| An example, |
| .PP |
| .Vb 1 |
| \& include /etc/monit.d/*.cfg |
| .Ve |
| .PP |
| This will load any file matching the globstring. That is, all |
| files in \fI/etc/monit.d\fR that ends with the prefix \fI.cfg\fR. |
| .SH "GROUP SUPPORT" |
| .IX Header "GROUP SUPPORT" |
| Service entries in the control file, \fImonitrc\fR, can be grouped |
| together by the \fIgroup\fR statement. The syntax is simply (keyword |
| in capital): |
| .PP |
| .Vb 1 |
| \& GROUP groupname |
| .Ve |
| .PP |
| With this statement it is possible to group similar service |
| entries together and manage them as a whole. Monit provides |
| functions to start, stop, restart, monitor and unmonitor a |
| group of services, like so: |
| .PP |
| To start a group of services from the console: |
| .PP |
| .Vb 1 |
| \& Monit \-g <groupname> start |
| .Ve |
| .PP |
| To stop a group of services: |
| .PP |
| .Vb 1 |
| \& Monit \-g <groupname> stop |
| .Ve |
| .PP |
| To restart a group of services: |
| .PP |
| .Vb 1 |
| \& Monit \-g <groupname> restart |
| .Ve |
| .PP |
| Note: |
| the \fIstatus\fR and \fIsummary\fR commands don't support the \-g |
| option and will print the state of all services. |
| .PP |
| Service can be added to multiple groups by adding group statement |
| multiple times: |
| .PP |
| .Vb 2 |
| \& group www |
| \& group filesystem |
| .Ve |
| .SH "MONITORING MODE" |
| .IX Header "MONITORING MODE" |
| Monit supports three monitoring modes per service: \fIactive\fR, |
| \&\fIpassive\fR and \fImanual\fR. See also the example section below for |
| usage of the mode statement. |
| .PP |
| In \fIactive\fR mode, Monit will monitor a service and in case of |
| problems Monit will act and raise alerts, start, stop or restart |
| the service. Active mode is the default mode. |
| .PP |
| In \fIpassive\fR mode, Monit will passively monitor a service and |
| specifically \fBnot\fR try to fix a problem, but it will still raise |
| alerts in case of a problem. |
| .PP |
| For use in clustered environments there is also a \fImanual\fR |
| mode. In this mode, Monit will enter \fIactive\fR mode \fBonly\fR if a |
| service was brought under monit's control, for example by |
| executing the following command in the console: |
| .PP |
| .Vb 2 |
| \& Monit start sybase |
| \& (Monit will call sybase\*(Aqs start method and enable monitoring) |
| .Ve |
| .PP |
| If a service was not started by Monit or was stopped or disabled |
| for example by: |
| .PP |
| .Vb 2 |
| \& Monit stop sybase |
| \& (Monit will call sybase\*(Aqs stop method and disable monitoring) |
| .Ve |
| .PP |
| Monit will then not monitor the service. This allows for having |
| services configured in monitrc and start it with Monit only if it |
| should run. This feature can be used to build a simple failsafe |
| cluster. |
| .PP |
| A service's monitoring state is persistent across Monit restart. |
| This means that you probably would like to make certain that |
| services in manual mode are stopped or in unmonitored mode at |
| server shutdown. Do for instance the following in a server |
| shutdown script: |
| .PP |
| .Vb 1 |
| \& Monit stop sybase |
| .Ve |
| .PP |
| or |
| .PP |
| .Vb 1 |
| \& Monit unmonitor sybase |
| .Ve |
| .PP |
| If you use Monit in a HA-cluster you should place the state file |
| in a temporary filesystem so if the machine should crash and the |
| stand-by machine take over services, any manual monitoring mode |
| services that were started on the crashed machine won't be |
| started on reboot. Use for example: |
| .PP |
| .Vb 1 |
| \& set statefile /tmp/monit.state |
| .Ve |
| .SH "ALERT MESSAGES" |
| .IX Header "ALERT MESSAGES" |
| Monit will raise an email alert in the following situations: |
| .PP |
| .Vb 10 |
| \& o A service timed out |
| \& o A service does not exist |
| \& o A service related data access problem |
| \& o A service related program execution problem |
| \& o A service is of invalid object type |
| \& o A program status failed |
| \& o A icmp problem |
| \& o A port connection problem |
| \& o A resource statement match |
| \& o A file checksum problem |
| \& o A file size problem |
| \& o A file/directory timestamp problem |
| \& o A file/directory/filesystem permission problem |
| \& o A file/directory/filesystem uid problem |
| \& o A file/directory/filesystem gid problem |
| \& o An action is done per administrator\*(Aqs request |
| .Ve |
| .PP |
| Monit will send an alert each time a monitored object changed. |
| This involves: |
| .PP |
| .Vb 8 |
| \& o Monit started, stopped or reloaded |
| \& o A file checksum changed |
| \& o A file size changed |
| \& o A file content match |
| \& o A file/directory timestamp changed |
| \& o A filesystem mount flags changed |
| \& o A process PID changed |
| \& o A process PPID changed |
| .Ve |
| .PP |
| You use the alert statement to notify Monit that you want alert |
| messages sent to an email address. If you do not specify an alert |
| statement, Monit will not send alert messages. |
| .PP |
| There are two forms of alert statement: |
| .PP |
| .Vb 2 |
| \& o Global \- common for all services |
| \& o Local \- per service |
| .Ve |
| .PP |
| In both cases you can use more than one alert statement. In other |
| words, you can send many different emails to many different |
| addresses. |
| .PP |
| Recipients in the global and in the local lists are alerted when |
| a service failed, recovered or changed. If the same email address |
| is in the global and in the local list, Monit will only send one |
| alert. Local (per service) defined alert email addresses override |
| global addresses in case of a conflict. Finally, you may choose |
| to only use a global alert list (recommended), a local per |
| service list or both. |
| .PP |
| It is also possible to disable the global alerts locally for |
| particular service(s) and recipients. |
| .SS "Setting a global alert statement" |
| .IX Subsection "Setting a global alert statement" |
| If a change occurred on a monitored services, Monit will send an |
| alert to all recipients in the global list who has registered |
| interest for the event type. Here is the syntax for the global |
| alert statement: |
| .IP "\s-1SET\s0 \s-1ALERT\s0 mail-address [ [\s-1NOT\s0] {events}] [\s-1MAIL\-FORMAT\s0 {mail\-format}] [\s-1REMINDER\s0 number]" 4 |
| .IX Item "SET ALERT mail-address [ [NOT] {events}] [MAIL-FORMAT {mail-format}] [REMINDER number]" |
| .PP |
| Simply using the following in the global section of monitrc: |
| .PP |
| .Vb 1 |
| \& set alert foo@bar |
| .Ve |
| .PP |
| will send a default email to the address foo@bar whenever an |
| event occurred on any service. Such an event may be that a |
| service timed out, a service doesn't exist and so on. If you want |
| to send alert messages to more email addresses, add a \fIset alert |
| \&'email'\fR statement for each address. |
| .PP |
| For explanations of the \fIevents, MAIL-FORMAT and \s-1REMINDER\s0\fR |
| keywords above, please see below. |
| .PP |
| You can also use the \s-1NOT\s0 option ahead of the events list which |
| will reverse the meaning of the list. That is, only send alerts |
| for events \fInot\fR in the list. This can save you some |
| configuration bytes if you are interested in most events except a |
| few. |
| .SS "Setting a local alert statement" |
| .IX Subsection "Setting a local alert statement" |
| Each service can also have its own recipient list. |
| .IP "\s-1ALERT\s0 mail-address [ [\s-1NOT\s0] {events}] [\s-1MAIL\-FORMAT\s0 {mail\-format}] [\s-1REMINDER\s0 number]" 4 |
| .IX Item "ALERT mail-address [ [NOT] {events}] [MAIL-FORMAT {mail-format}] [REMINDER number]" |
| .PP |
| or |
| .IP "\s-1NOALERT\s0 mail-address" 4 |
| .IX Item "NOALERT mail-address" |
| .PP |
| If you only want an alert message sent for certain events and for |
| certain service(s), for example only for timeout events or only |
| if a service died, then postfix the alert-statement with a filter |
| block: |
| .PP |
| .Vb 3 |
| \& check process myproc with pidfile /var/run/my.pid |
| \& alert foo@bar only on { timeout, nonexist } |
| \& ... |
| .Ve |
| .PP |
| (\fIonly\fR and \fIon\fR are noise keywords, ignored by Monit. As a |
| side note; Noise keywords are used in the control file grammar to |
| make an entry resemble English and thus make it easier to read |
| (or, so goes the philosophy). The full set of available noise |
| keywords are listed below in the Control File section). |
| .PP |
| You can also setup to send alerts for all events except some by |
| putting the word \fInot\fR ahead of the list. For example, if you |
| want to receive alerts for all events except Monit instance |
| events, you can write (note that the noise words 'but' and 'on' |
| are optional): |
| .PP |
| .Vb 3 |
| \& check system myserver |
| \& alert foo@bar but not on { instance } |
| \& ... |
| .Ve |
| .PP |
| instead of: |
| .PP |
| .Vb 10 |
| \& alert foo@bar on { action |
| \& checksum |
| \& connection |
| \& content |
| \& data |
| \& exec |
| \& fsflags |
| \& gid |
| \& icmp |
| \& invalid |
| \& nonexist |
| \& permission |
| \& pid |
| \& ppid |
| \& resource |
| \& size |
| \& status |
| \& timeout |
| \& timestamp |
| \& uid |
| \& uptime } |
| .Ve |
| .PP |
| This will send alerts for all events to foo@bar, except Monit |
| instance events. An instance event \s-1BTW\s0, is an event fired |
| whenever the Monit program start or stop. |
| .PP |
| Event filtering can be used to send an email to different email |
| addresses depending on the events that occurred. For instance: |
| .PP |
| .Vb 3 |
| \& alert foo@bar { nonexist, timeout, resource, icmp, connection } |
| \& alert security@bar on { checksum, permission, uid, gid } |
| \& alert manager@bar |
| .Ve |
| .PP |
| This will send an alert message to foo@bar whenever a nonexist, |
| timeout, resource or connection problem occurs and a message to |
| security@bar if a checksum, permission, uid or gid problem |
| occurs. And finally, a message to manager@bar whenever any error |
| event occurs. |
| .PP |
| Here is the list of events you can use in a mail-filter: \fIaction, |
| checksum, connection, content, data, exec, fsflags, gid, icmp, |
| instance, invalid, nonexist, permission, pid, ppid, resource, size, |
| status, timeout, timestamp, uid, uptime\fR |
| .PP |
| You can also disable the alerts locally using the \s-1NOALERT\s0 |
| statement. This is useful if you have lots of services monitored |
| and are using the global alert statement, but don't want to |
| receive alerts for some minor subset of services: |
| .PP |
| .Vb 1 |
| \& noalert appadmin@bar |
| .Ve |
| .PP |
| For example, if you stick the noalert statement in a 'check |
| system' entry, you won't receive system related alerts (such as |
| Monit instance started/stopped/reloaded alert, system overloaded |
| alert, etc.) but will receive alerts for all other monitored |
| services. |
| .PP |
| The following example will alert foo@bar on all events on all |
| services by default, except the service mybar which will send an |
| alert only on timeout. The trick is based on the fact that local |
| definition of the same recipient overrides the global setting |
| (including registered events and mail format): |
| .PP |
| .Vb 1 |
| \& set alert foo@bar |
| \& |
| \& check process myfoo with pidfile /var/run/myfoo.pid |
| \& ... |
| \& check process mybar with pidfile /var/run/mybar.pid |
| \& alert foo@bar only on { timeout } |
| .Ve |
| .SS "Alert message layout" |
| .IX Subsection "Alert message layout" |
| Monit provides a default mail message layout that is short and to |
| the point. Here's an example of a standard alert mail sent by |
| monit: |
| .PP |
| .Vb 4 |
| \& From: monit@tildeslash.com |
| \& Subject: Monit alert \-\- Does not exist apache |
| \& To: hauk@tildeslash.com |
| \& Date: Thu, 04 Sep 2003 02:33:03 +0200 |
| \& |
| \& Does not exist Service apache |
| \& |
| \& Date: Thu, 04 Sep 2003 02:33:03 +0200 |
| \& Action: restart |
| \& Host: www.tildeslash.com |
| \& |
| \& Your faithful employee, |
| \& monit |
| .Ve |
| .PP |
| If you want to, you can change the format of this message with |
| the optional \fImail-format\fR statement. The syntax for this |
| statement is as follows: |
| .PP |
| .Vb 8 |
| \& mail\-format { |
| \& from: monit@localhost |
| \& reply\-to: support@domain.com |
| \& subject: $SERVICE $EVENT at $DATE |
| \& message: Monit $ACTION $SERVICE at $DATE on $HOST: $DESCRIPTION. |
| \& Yours sincerely, |
| \& monit |
| \& } |
| .Ve |
| .PP |
| Where the keyword \fIfrom:\fR is the email address Monit should |
| pretend it is sending from. It does not have to be a real mail |
| address, but it must be a proper formatted mail address, on the |
| form: name@domain. The \fIreply-to:\fR keyword can be used to set |
| the reply-to mail header. The keyword \fIsubject:\fR is for the |
| email subject line. The subject must be on only \fIone\fR line. The |
| \&\fImessage:\fR keyword denotes the mail body. If used, this keyword |
| should always be the last in a mail-format statement. The mail |
| body can be as long as you want, but must \fBnot\fR contain the '}' |
| character. |
| .PP |
| All of these format keywords are optional, but if used, you must |
| provide at least one. Thus if you only want to change the from |
| address Monit is using you can do: |
| .PP |
| .Vb 1 |
| \& set alert foo@bar with mail\-format { from: bofh@bar.baz } |
| .Ve |
| .PP |
| From the previous example you will notice that some special \f(CW$XXX\fR |
| variables were used. If used, they will be substituted and |
| expanded into the text with these values: |
| .IP "\(bu" 4 |
| \&\fI\f(CI$EVENT\fI\fR |
| .Sp |
| .Vb 2 |
| \& A string describing the event that occurred. The values are |
| \& fixed and are: |
| \& |
| \& Event: | Failure state: | Success state: |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& ACTION | "Action done" | "Action done" |
| \& CHECKSUM | "Checksum failed" | "Checksum succeeded" |
| \& CONNECTION| "Connection failed" | "Connection succeeded" |
| \& CONTENT | "Content failed", | "Content succeeded" |
| \& DATA | "Data access error" | "Data access succeeded" |
| \& EXEC | "Execution failed" | "Execution succeeded" |
| \& FSFLAG | "Filesystem flags failed"| "Filesystem flags succeeded" |
| \& GID | "GID failed" | "GID succeeded" |
| \& ICMP | "ICMP failed" | "ICMP succeeded" |
| \& INSTANCE | "Monit instance changed" | "Monit instance changed not" |
| \& INVALID | "Invalid type" | "Type succeeded" |
| \& NONEXIST | "Does not exist" | "Exists" |
| \& PERMISSION| "Permission failed" | "Permission succeeded" |
| \& PID | "PID failed" | "PID succeeded" |
| \& PPID | "PPID failed" | "PPID succeeded" |
| \& RESOURCE | "Resource limit matched" | "Resource limit succeeded" |
| \& SIZE | "Size failed" | "Size succeeded" |
| \& STATUS | "Status failed" | "Status succeeded" |
| \& TIMEOUT | "Timeout" | "Timeout recovery" |
| \& TIMESTAMP | "Timestamp failed" | "Timestamp succeeded" |
| \& UID | "UID failed" | "UID succeeded" |
| \& UPTIME | "Uptime failed" | "Uptime succeeded" |
| .Ve |
| .IP "\(bu" 4 |
| \&\fI\f(CI$SERVICE\fI\fR |
| .Sp |
| .Vb 1 |
| \& The service entry name in monitrc |
| .Ve |
| .IP "\(bu" 4 |
| \&\fI\f(CI$DATE\fI\fR |
| .Sp |
| .Vb 1 |
| \& The current time and date (RFC 822 date style). |
| .Ve |
| .IP "\(bu" 4 |
| \&\fI\f(CI$HOST\fI\fR |
| .Sp |
| .Vb 1 |
| \& The name of the host Monit is running on |
| .Ve |
| .IP "\(bu" 4 |
| \&\fI\f(CI$ACTION\fI\fR |
| .Sp |
| .Vb 2 |
| \& The name of the action which was done. Action names are fixed |
| \& and are: |
| \& |
| \& Action: | Name: |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& ALERT | "alert" |
| \& EXEC | "exec" |
| \& RESTART | "restart" |
| \& START | "start" |
| \& STOP | "stop" |
| \& UNMONITOR| "unmonitor" |
| .Ve |
| .IP "\(bu" 4 |
| \&\fI\f(CI$DESCRIPTION\fI\fR |
| .Sp |
| .Vb 1 |
| \& The description of the error condition |
| .Ve |
| .SS "Setting a global mail format" |
| .IX Subsection "Setting a global mail format" |
| It is possible to set a standard mail format with the following |
| global set-statement (keywords are in capital): |
| .IP "\s-1SET\s0 MAIL-FORMAT {mail\-format}" 4 |
| .IX Item "SET MAIL-FORMAT {mail-format}" |
| .PP |
| Format set with this statement will apply to every alert |
| statement that does \fInot\fR have its own specified mail-format. |
| This statement is most useful for setting a default from address |
| for messages sent by monit, like so: |
| .PP |
| .Vb 1 |
| \& set mail\-format { from: monit@foo.bar.no } |
| .Ve |
| .SS "Setting an error reminder" |
| .IX Subsection "Setting an error reminder" |
| Monit by default sends just one error notification if a service |
| failed and another when it recovered. If you want to be notified |
| more then once if a service remains in a failed state, you can |
| use the reminder option to the alert statement (keywords are in |
| capital): |
| .IP "\s-1ALERT\s0 ... [\s-1WITH\s0] \s-1REMINDER\s0 [\s-1ON\s0] number [\s-1CYCLES\s0]" 4 |
| .IX Item "ALERT ... [WITH] REMINDER [ON] number [CYCLES]" |
| .PP |
| For example if you want to be notified each tenth cycle if a |
| service remains in a failed state, you can use: |
| .PP |
| .Vb 1 |
| \& alert foo@bar with reminder on 10 cycles |
| .Ve |
| .PP |
| Likewise if you want to be notified on each failed cycle, you can |
| use: |
| .PP |
| .Vb 1 |
| \& alert foo@bar with reminder on 1 cycle |
| .Ve |
| .SS "Setting a mail server for alert messages" |
| .IX Subsection "Setting a mail server for alert messages" |
| The mail server Monit should use to send alert messages is |
| defined with a global set statement (keywords are in capital and |
| optional statements in [brackets]): |
| .PP |
| .Vb 5 |
| \& SET MAILSERVER {hostname|ip\-address [PORT port] |
| \& [USERNAME username] [PASSWORD password] |
| \& [using SSLV2|SSLV3|TLSV1] [CERTMD5 checksum]}+ |
| \& [with TIMEOUT X SECONDS] |
| \& [using HOSTNAME hostname] |
| .Ve |
| .PP |
| The port statement allows one to use \s-1SMTP\s0 servers other then those |
| listening on port 25. If omitted, port 25 is used unless ssl or |
| tls is used, in which case port 465 is used by default. |
| .PP |
| Monit support plain smtp authentication \- you can set a username |
| and a password using the \s-1USERNAME\s0 and \s-1PASSWORD\s0 options. |
| .PP |
| To use secure communication, use the \s-1SSLV2\s0, \s-1SSLV3\s0 or \s-1TLSV1\s0 |
| options, you can also specify the server certificate checksum |
| using \s-1CERTMD5\s0 option. |
| .PP |
| As you can see, it is possible to set several \s-1SMTP\s0 servers. If |
| Monit cannot connect to the first server in the list it will try |
| the second server and so on. Monit has a default 5 seconds |
| connection timeout and if the \s-1SMTP\s0 server is slow, Monit could |
| timeout when connecting or reading from the server. If this is |
| the case, you can use the optional timeout statement to explicit |
| set the timeout to a higher value if needed. Here is an example |
| for setting several mail servers: |
| .PP |
| .Vb 3 |
| \& set mailserver mail.tildeslash.com, mail.foo.bar port 10025 |
| \& username "Rabbi" password "Loew" using tlsv1, localhost |
| \& with timeout 15 seconds |
| .Ve |
| .PP |
| Here Monit will first try to connect to the server |
| \&\*(L"mail.tildeslash.com\*(R", if this server is down Monit will try |
| \&\*(L"mail.foo.bar\*(R" on port 10025 using the given credentials via tls |
| and finally \*(L"localhost\*(R". We also set an explicit connect and read |
| timeout; If Monit cannot connect to the first \s-1SMTP\s0 server in the |
| list within 15 seconds it will try the next server and so on. The |
| \&\fIset mailserver ..\fR statement is optional and if not defined |
| Monit will not send email alerts. Not setting a mail server is |
| recommended only if alert notification is delegated to M/Monit. |
| .PP |
| Monit, by default, use the local host name in \s-1SMTP\s0 \s-1HELO/EHLO\s0 and |
| in the Message-ID header. Some mail servers check this |
| information against \s-1DNS\s0 for spam protection and can reject the |
| email if the \s-1DNS\s0 and the hostname used in the transaction does |
| not match. If this is the case, you can override the default |
| local host name by using the \s-1HOSTNAME\s0 option: |
| .PP |
| .Vb 2 |
| \& set mailserver mail.tildeslash.com using hostname |
| \& "myhost.example.org" |
| .Ve |
| .SS "Event queue" |
| .IX Subsection "Event queue" |
| If the \s-1MTA\s0 (mail server) for sending alerts is not available, |
| Monit \fIcan\fR queue events on the local file-system until the \s-1MTA\s0 |
| recover. Monit will then post queued events in order with their |
| original timestamp so the events are not lost. This feature is |
| most useful if Monit is used together with M/Monit and when event |
| history is important. |
| .PP |
| The event queue is persistent across Monit restarts and provided |
| that the back-end filesystem is persistent too, across system |
| restart as well. |
| .PP |
| By default, the queue is disabled and if the alert handler fails, |
| Monit will simply drop the alert message. To enable the event |
| queue, add the following statement to the Monit control file: |
| .PP |
| .Vb 1 |
| \& SET EVENTQUEUE BASEDIR <path> [SLOTS <number>] |
| .Ve |
| .PP |
| The <path> is the path to the directory where events will be |
| stored. Optionally if you want to limit the queue size, use the |
| slots option to only store up to \fInumber\fR event messages. If the |
| slots option is not used, Monit will store as many events as the |
| backend filesystem allows. |
| .PP |
| Example: |
| .PP |
| .Vb 3 |
| \& set eventqueue |
| \& basedir /var/monit |
| \& slots 5000 |
| .Ve |
| .PP |
| Events are stored in a binary format, with one file per event. |
| The file size is ca. 130 bytes or a bit more (depending on the |
| message length). The file name is composed of the unix timestamp, |
| underscore and the service name, for example: |
| .PP |
| .Vb 1 |
| \& /var/monit/1131269471_apache |
| .Ve |
| .PP |
| If you are running more then one Monit instance on the same |
| machine, you \fBmust\fR use separated event queue directories to |
| avoid sending wrong alerts to the wrong addresses. |
| .PP |
| If you want to purge the queue by hand, that is, remove queued |
| event-files, Monit should be stopped before the removal. |
| .SH "SERVICE TIMEOUT" |
| .IX Header "SERVICE TIMEOUT" |
| \&\fBMonit\fR provides a service timeout mechanism for situations |
| where a service simply refuses to start or respond over a longer |
| period. |
| .PP |
| The timeout mechanism is based on number of service restarts and |
| number of poll-cycles. For example, if a service had \fIx\fR |
| restarts within \fIy\fR poll-cycles (where \fIx\fR <= \fIy\fR) then Monit |
| will perform an action (for example unmonitor the service). If a |
| timeout occurs, Monit will send an alert message if you have |
| register interest for this event. |
| .PP |
| The syntax for the timeout statement is as follows (keywords are |
| in capital): |
| .IP "\s-1IF\s0 <number> \s-1RESTART\s0 <number> \s-1CYCLE\s0(S) \s-1THEN\s0 <action>" 4 |
| .IX Item "IF <number> RESTART <number> CYCLE(S) THEN <action>" |
| .PP |
| Here is an example where Monit will unmonitor the service if it |
| was restarted 2 times within 3 cycles: |
| .PP |
| .Vb 1 |
| \& if 2 restarts within 3 cycles then unmonitor |
| .Ve |
| .PP |
| To have Monit check the service again after a monitoring was |
| disabled, run 'monit monitor <servicename>' from the command |
| line. |
| .PP |
| Example for setting custom exec on timeout: |
| .PP |
| .Vb 1 |
| \& if 5 restarts within 5 cycles then exec "/foo/bar" |
| .Ve |
| .PP |
| Example for stopping the service: |
| .PP |
| .Vb 1 |
| \& if 7 restarts within 10 cycles then stop |
| .Ve |
| .SH "SERVICE TESTS" |
| .IX Header "SERVICE TESTS" |
| Monit provides several tests you can use in a 'check service' |
| entry to test a service. There are two classes of tests: |
| variable and constant tests. That is, the condition we test |
| is either constant e.g. a number or it can vary. |
| .PP |
| A constant test has this general format: |
| .IP "\s-1IF\s0 <\s-1TEST\s0> [[<X>] [\s-1TIMES\s0 \s-1WITHIN\s0] <Y> \s-1CYCLES\s0] \s-1THEN\s0 \s-1ACTION\s0 [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] [\s-1TIMES\s0 \s-1WITHIN\s0] <Y> \s-1CYCLES\s0] \s-1THEN\s0 \s-1ACTION\s0]" 4 |
| .IX Item "IF <TEST> [[<X>] [TIMES WITHIN] <Y> CYCLES] THEN ACTION [ELSE IF SUCCEEDED [[<X>] [TIMES WITHIN] <Y> CYCLES] THEN ACTION]" |
| .PP |
| If the <\s-1TEST\s0> condition should evaluate to true, then the |
| selected action is executed each cycle the test condition remains |
| true. The comparison value is constant. Recovery action is |
| evaluated only once (on a failed to succeeded state change only). |
| The '\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0' part is optional, if omitted, Monit will |
| still send an alert on recovery. The alert is sent only once for |
| each state change unless overridden by the 'reminder' alert |
| option. |
| .PP |
| A variable test has this general format: |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 <\s-1TEST\s0> [[<X>] [\s-1TIMES\s0 \s-1WITHIN\s0] <Y> \s-1CYCLES\s0] \s-1THEN\s0 \s-1ACTION\s0" 4 |
| .IX Item "IF CHANGED <TEST> [[<X>] [TIMES WITHIN] <Y> CYCLES] THEN ACTION" |
| .PP |
| If the <\s-1TEST\s0> should evaluate to true, then the selected action |
| is executed once. The comparison value is a variable where the |
| last result becomes the new value and is used for comparisons in |
| future cycles. An alert is delivered each time the condition |
| becomes true. |
| .PP |
| You can use this test for alerts or for some automatic action, |
| for example to reload monitored process after its configuration |
| file was changed. Variable tests are supported for 'checksum', |
| \&'size', 'pid, 'ppid' and 'timestamp' tests only. |
| .IP "... [[<X>] [\s-1TIMES\s0 \s-1WITHIN\s0] <Y> \s-1CYCLES\s0] ..." 4 |
| .IX Item "... [[<X>] [TIMES WITHIN] <Y> CYCLES] ..." |
| .PP |
| If a test match, its action is executed at once. This behavior |
| can optionally be changed and you can for instance require that a |
| test must match over several poll cycles before the action is |
| executed by using the statement above. You can use this in |
| several ways. For example: |
| .PP |
| .Vb 1 |
| \& if failed port 80 for 3 times within 5 cycles then alert |
| .Ve |
| .PP |
| or |
| .PP |
| .Vb 1 |
| \& if failed port 80 for 10 cycles then unmonitor |
| .Ve |
| .PP |
| If you don't specify <X> times, it equals <Y> by default, thus |
| the test match if it evaluate to true for <Y> consecutive cycles. |
| .PP |
| It is possible to use this option to tune and prevent a rush of |
| notifications. You can use this option for failed, succeeded, |
| recovered or changed rules. Here is a more complex example: |
| .PP |
| .Vb 5 |
| \& check filesystem rootfs with path /dev/hda1 |
| \& if space usage > 80% for 5 times within 15 cycles |
| \& then alert else if succeeded for 10 cycles then alert |
| \& if space usage > 90% for 5 cycles then |
| \& exec \*(Aq/try/to/free/the/space\*(Aq |
| .Ve |
| .PP |
| In each test you must select the action to be executed from this |
| list: |
| .IP "\(bu" 4 |
| \&\fB\s-1ALERT\s0\fR sends the user an alert event on each state change (for |
| constant tests) or on each change (for variable tests). |
| .IP "\(bu" 4 |
| \&\fB\s-1RESTART\s0\fR restarts the service \fIand\fR sends an alert. Restart is |
| conducted by first calling the service's registered stop method |
| and then the service's start method. |
| .IP "\(bu" 4 |
| \&\fB\s-1START\s0\fR starts the service by calling the service's registered |
| start method \fIand\fR send an alert. |
| .IP "\(bu" 4 |
| \&\fB\s-1STOP\s0\fR stops the service by calling the service's registered |
| stop method \fIand\fR send an alert. If Monit stops a service it |
| will not be checked by Monit anymore nor restarted again later. |
| To reactivate monitoring of the service again you must explicitly |
| enable monitoring from the web interface or from the console, |
| e.g. 'monit monitor apache'. |
| .IP "\(bu" 4 |
| \&\fB\s-1EXEC\s0\fR can be used to execute an arbitrary program \fIand\fR send |
| an alert. If you choose this action you must state the program to |
| be executed and if the program require arguments you must enclose |
| the program and its arguments in a quoted string. You may |
| optionally specify the uid and gid the executed program should |
| switch to upon start. For instance: |
| .Sp |
| .Vb 2 |
| \& exec "/usr/local/tomcat/bin/startup.sh" |
| \& as uid nobody and gid nobody |
| .Ve |
| .Sp |
| The uid and gid switch can be useful if the program to be started |
| cannot change to a lesser privileged user and group. This is |
| typically needed for Java Servers. Remember, if Monit is run by |
| the superuser, then all programs executed by Monit will be |
| started with superuser privileges unless the uid and gid |
| extension was used. |
| .IP "\(bu" 4 |
| \&\fB\s-1UNMONITOR\s0\fR will disable monitoring of the service \fIand\fR send |
| an alert. The service will not be checked by Monit anymore nor |
| restarted again later. To reactivate monitoring of the service |
| you must explicitly enable monitoring from monit's web interface |
| or from the console using the monitor argument. |
| .SS "\s-1EXISTENCE\s0 \s-1TESTING\s0" |
| .IX Subsection "EXISTENCE TESTING" |
| Monit's default action when services does not exist (for example |
| a process is not running, a file doesn't exist, etc.) is to |
| perform service restart action. |
| .PP |
| The default action can be overrided with following statement: |
| .IP "\s-1IF\s0 [\s-1DOES\s0] \s-1NOT\s0 \s-1EXIST\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF [DOES] NOT EXIST [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| Example: |
| .PP |
| .Vb 2 |
| \& check file with path /cifs/mydata |
| \& if does not exist for 5 cycles then exec "/usr/bin/mount_cifs.sh" |
| .Ve |
| .SS "\s-1RESOURCE\s0 \s-1TESTING\s0" |
| .IX Subsection "RESOURCE TESTING" |
| Monit can examine how much system resources a service is using. |
| This test can only be used within a system or process service |
| entry in the Monit control file. |
| .PP |
| Depending on system or process characteristics, services can be |
| stopped or restarted and alerts can be generated. Thus it is |
| possible to utilize systems which are idle and to spare system |
| under high load. |
| .PP |
| The full syntax for a resource-statement used for resource |
| testing is as follows (keywords are in capital and optional |
| statements in [brackets]), |
| .IP "\s-1IF\s0 resource operator value [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF resource operator value [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIresource\fR is a choice of \*(L"\s-1CPU\s0\*(R", \*(L"\s-1TOTALCPU\s0\*(R", |
| \&\*(L"\s-1CPU\s0([user|system|wait])\*(R", \*(L"\s-1MEMORY\s0\*(R", \*(L"\s-1SWAP\s0\*(R", \*(L"\s-1CHILDREN\s0\*(R", \*(L"\s-1TOTALMEMORY\s0\*(R", |
| \&\*(L"\s-1LOADAVG\s0([1min|5min|15min])\*(R". Some resource tests can be used |
| inside a check system entry, some in a check process entry and |
| some in both: |
| .PP |
| System only resource tests: |
| .PP |
| \&\s-1CPU\s0([user|system|wait]) is the percent of time the system spend |
| in user or system/kernel space. Some systems such as linux 2.6 |
| supports a 'wait' indicator as well. |
| .PP |
| \&\s-1SWAP\s0 is the swap usage of the system in either percent (of the |
| systems total) or as an amount (Byte, kB, \s-1MB\s0, \s-1GB\s0). |
| .PP |
| Process only resource tests: |
| .PP |
| \&\s-1CPU\s0 is the \s-1CPU\s0 usage of the process itself (percent). |
| .PP |
| \&\s-1TOTALCPU\s0 is the total \s-1CPU\s0 usage of the process and its children |
| in (percent). You will want to use \s-1TOTALCPU\s0 typically for |
| services like Apache web server where one master process forks the |
| child processes as workers. |
| .PP |
| \&\s-1CHILDREN\s0 is the number of child processes of the process. |
| .PP |
| \&\s-1TOTALMEMORY\s0 is the memory usage of the process and its child |
| processes in either percent or as an amount (Byte, kB, \s-1MB\s0, \s-1GB\s0). |
| .PP |
| System and process resource tests: |
| .PP |
| \&\s-1MEMORY\s0 is the memory usage of the system or of a process (without |
| children) in either percent (of the systems total) or as an |
| amount (Byte, kB, \s-1MB\s0, \s-1GB\s0). |
| .PP |
| \&\s-1LOADAVG\s0([1min|5min|15min]) refers to the system's load average. |
| The load average is the number of processes in the system run |
| queue, averaged over the specified time period. |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R", \*(L">\*(R", \*(L"!=\*(R", \*(L"==\*(R" in C notation, |
| \&\*(L"gt\*(R", \*(L"lt\*(R", \*(L"eq\*(R", \*(L"ne\*(R" in shell sh notation and \*(L"greater\*(R", |
| \&\*(L"less\*(R", \*(L"equal\*(R", \*(L"notequal\*(R" in human readable form (if not |
| specified, default is \s-1EQUAL\s0). |
| .PP |
| \&\fIvalue\fR is either an integer or a real number (except for |
| \&\s-1CHILDREN\s0). For \s-1CPU\s0, \s-1TOTALCPU\s0, \s-1MEMORY\s0 and \s-1TOTALMEMORY\s0 you need to |
| specify a \fIunit\fR. This could be \*(L"%\*(R" or if applicable \*(L"B\*(R" (Byte), |
| \&\*(L"kB\*(R" (1024 Byte), \*(L"\s-1MB\s0\*(R" (1024 KiloByte) or \*(L"\s-1GB\s0\*(R" (1024 MegaByte). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| To calculate the cycles, a counter is raised whenever the |
| expression above is true and it is lowered whenever it is false |
| (but not below 0). All counters are reset in case of a restart. |
| .PP |
| The following is an example to check that the \s-1CPU\s0 usage of a |
| service is not going beyond 50% during five poll cycles. If it |
| does, Monit will restart the service: |
| .PP |
| .Vb 1 |
| \& if cpu is greater than 50% for 5 cycles then restart |
| .Ve |
| .PP |
| See also the example section below. |
| .SS "\s-1FILE\s0 \s-1CHECKSUM\s0 \s-1TESTING\s0" |
| .IX Subsection "FILE CHECKSUM TESTING" |
| The checksum statement may only be used in a file service |
| entry. If specified in the control file, Monit will compute |
| a md5 or sha1 checksum for a file. |
| .PP |
| The checksum test in constant form is used to verify that a |
| file does not change. Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1FAILED\s0 [MD5|SHA1] \s-1CHECKSUM\s0 [\s-1EXPECT\s0 checksum] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED [MD5|SHA1] CHECKSUM [EXPECT checksum] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| The checksum test in variable form is used to watch for |
| file changes. Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 [MD5|SHA1] \s-1CHECKSUM\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED [MD5|SHA1] CHECKSUM [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| The choice of \s-1MD5\s0 or \s-1SHA1\s0 is optional. \s-1MD5\s0 features a 256 bit |
| and \s-1SHA1\s0 a 320 bit checksum. If this option is omitted Monit |
| tries to guess the method from the \s-1EXPECT\s0 string or uses \s-1MD5\s0 as |
| default. |
| .PP |
| \&\fIexpect\fR is optional and if used it specifies a md5 or sha1 |
| string Monit should expect when testing a file's checksum. If |
| \&\fIexpect\fR is used, Monit will not compute an initial checksum for |
| the file, but instead use the string you submit. For example: |
| .PP |
| .Vb 3 |
| \& if failed checksum and |
| \& expect the sum 8f7f419955cefa0b33a2ba316cba3659 |
| \& then alert |
| .Ve |
| .PP |
| You can, for example, use the \s-1GNU\s0 utility \fI\fImd5sum\fI\|(1)\fR or |
| \&\fI\fIsha1sum\fI\|(1)\fR to create a checksum string for a file and |
| use this string in the expect-statement. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The checksum statement in variable form may be used to check a |
| file for changes and if changed, do a specified action. For |
| instance to reload a server if its configuration file was |
| changed. The following illustrates this for the apache web |
| server: |
| .PP |
| .Vb 3 |
| \& check file httpd.conf path /usr/local/apache/conf/httpd.conf |
| \& if changed sha1 checksum |
| \& then exec "/usr/local/apache/bin/apachectl graceful" |
| .Ve |
| .PP |
| If you plan to use the checksum statement for security reasons, |
| (a very good idea, by the way) and to monitor a file or files |
| which should not change, then please use the constant form and |
| also read the \s-1DEPENDENCY\s0 \s-1TREE\s0 section below to see a detailed |
| example on how to do this properly. |
| .PP |
| Monit can also test the checksum for files on a remote host via |
| the \s-1HTTP\s0 protocol. See the \s-1CONNECTION\s0 \s-1TESTING\s0 section below. |
| .SS "\s-1TIMESTAMP\s0 \s-1TESTING\s0" |
| .IX Subsection "TIMESTAMP TESTING" |
| The timestamp statement may only be used in a file, fifo or |
| directory service entry. |
| .PP |
| The timestamp test in constant form is used to verify various |
| timestamp conditions. Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1TIMESTAMP\s0 [[operator] value [unit]] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF TIMESTAMP [[operator] value [unit]] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| The timestamp statement in variable form is simply to test an |
| existing file or directory for timestamp changes and if changed, |
| execute an action. Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 \s-1TIMESTAMP\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED TIMESTAMP [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R", \*(L">\*(R", \*(L"!=\*(R", \*(L"==\*(R" in C notation, |
| \&\*(L"\s-1GT\s0\*(R", \*(L"\s-1LT\s0\*(R", \*(L"\s-1EQ\s0\*(R", \*(L"\s-1NE\s0\*(R" in shell sh notation and \*(L"\s-1GREATER\s0\*(R", |
| \&\*(L"\s-1LESS\s0\*(R", \*(L"\s-1EQUAL\s0\*(R", \*(L"\s-1NOTEQUAL\s0\*(R" in human readable form (if not |
| specified, default is \s-1EQUAL\s0). |
| .PP |
| \&\fIvalue\fR is a time watermark. |
| .PP |
| \&\fIunit\fR is either \*(L"\s-1SECOND\s0\*(R", \*(L"\s-1MINUTE\s0\*(R", \*(L"\s-1HOUR\s0\*(R" or \*(L"\s-1DAY\s0\*(R" (it is also |
| possible to use \*(L"\s-1SECONDS\s0\*(R", \*(L"\s-1MINUTES\s0\*(R", \*(L"\s-1HOURS\s0\*(R", or \*(L"\s-1DAYS\s0\*(R"). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The variable timestamp statement is useful for checking a file |
| for changes and then execute an action. This version was written |
| particularly with configuration files in mind. For instance, if |
| you monitor the apache web server you can use this statement to |
| reload apache if the \fIhttpd.conf\fR (apache's configuration file) |
| was changed. Like so: |
| .PP |
| .Vb 3 |
| \& check file httpd.conf with path /usr/local/apache/conf/httpd.conf |
| \& if changed timestamp |
| \& then exec "/usr/local/apache/bin/apachectl graceful" |
| .Ve |
| .PP |
| The constant timestamp version is useful for monitoring systems |
| able to report its state by changing the timestamp of certain |
| state files. For instance the \fIiPlanet Messaging server stored |
| process\fR system updates the timestamp of the following files: |
| .PP |
| .Vb 3 |
| \& o stored.ckp |
| \& o stored.lcu |
| \& o stored.per |
| .Ve |
| .PP |
| If a task should fail, the system keeps the timestamp. To report |
| stored problems you can use the following statements: |
| .PP |
| .Vb 2 |
| \& check file stored.ckp with path /msg\-foo/config/stored.ckp |
| \& if timestamp > 1 minute then alert |
| \& |
| \& check file stored.lcu with path /msg\-foo/config/stored.lcu |
| \& if timestamp > 5 minutes then alert |
| \& |
| \& check file stored.per with path /msg\-foo/config/stored.per |
| \& if timestamp > 1 hour then alert |
| .Ve |
| .PP |
| As mentioned above, you can also use the timestamp statement for |
| monitoring directories for changes. If files are added or removed |
| from a directory, its timestamp is changed: |
| .PP |
| .Vb 2 |
| \& check directory mydir path /foo/directory |
| \& if timestamp > 1 hour then alert |
| .Ve |
| .PP |
| or |
| .PP |
| .Vb 2 |
| \& check directory myotherdir path /foo/secure/directory |
| \& if timestamp < 1 hour then alert |
| .Ve |
| .PP |
| The following example is a hack for restarting a process after a |
| certain time. Sometimes this is a necessary workaround for some |
| third-party applications, until the vendor fix a problem: |
| .PP |
| .Vb 3 |
| \& check file server.pid path /var/run/server.pid |
| \& if timestamp > 7 days |
| \& then exec "/usr/local/server/restart\-server" |
| .Ve |
| .SS "\s-1FILE\s0 \s-1SIZE\s0 \s-1TESTING\s0" |
| .IX Subsection "FILE SIZE TESTING" |
| The size statement may only be used in a check file service |
| entry. If specified in the control file, Monit will compute |
| a size for a file. |
| .PP |
| The size test in constant form is used to verify various size |
| conditions. Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1SIZE\s0 [[operator] value [unit]] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF SIZE [[operator] value [unit]] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| The size statement in variable form is simply to test an existing |
| file for size changes and if changed, execute an action. Syntax |
| (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 \s-1SIZE\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED SIZE [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R", \*(L">\*(R", \*(L"!=\*(R", \*(L"==\*(R" in C notation, |
| \&\*(L"\s-1GT\s0\*(R", \*(L"\s-1LT\s0\*(R", \*(L"\s-1EQ\s0\*(R", \*(L"\s-1NE\s0\*(R" in shell sh notation and \*(L"\s-1GREATER\s0\*(R", |
| \&\*(L"\s-1LESS\s0\*(R", \*(L"\s-1EQUAL\s0\*(R", \*(L"\s-1NOTEQUAL\s0\*(R" in human readable form (if not |
| specified, default is \s-1EQUAL\s0). |
| .PP |
| \&\fIvalue\fR is a size watermark. |
| .PP |
| \&\fIunit\fR is a choice of \*(L"B\*(R",\*(L"\s-1KB\s0\*(R",\*(L"\s-1MB\s0\*(R",\*(L"\s-1GB\s0\*(R" or long alternatives |
| \&\*(L"byte\*(R", \*(L"kilobyte\*(R", \*(L"megabyte\*(R", \*(L"gigabyte\*(R". If it is not |
| specified, \*(L"byte\*(R" unit is assumed by default. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The variable size test form is useful for checking a file for |
| changes and send an alert or execute an action. Monit will |
| register the size of the file at startup and monitor the file for |
| changes. As soon as the value changes, Monit will perform the |
| specified action, reset the registered value to the new value and |
| continue monitoring and test if the size changes again. |
| .PP |
| One example of use for this statement is to conduct security |
| checks, for instance: |
| .PP |
| .Vb 2 |
| \& check file su with path /bin/su |
| \& if changed size then exec "/sbin/ifconfig eth0 down" |
| .Ve |
| .PP |
| which will \*(L"cut the cable\*(R" and stop a possible intruder from |
| compromising the system further. This test is just one of many |
| you may use to increase the security awareness on a system. If |
| you plan to use Monit for security reasons we recommend that you |
| use this test in combination with other supported tests like |
| checksum, timestamp, and so on. |
| .PP |
| The constant form of this test can be useful in similar or |
| different contexts. It can, for instance, be used to test if a |
| certain file size was exceeded and then alert you or Monit may |
| execute a certain action specified by you. An example is to use |
| this statement to rotate log files after they have reached a |
| certain size or to check that a database file does not grow |
| beyond a specified threshold. |
| .PP |
| To rotate a log file: |
| .PP |
| .Vb 3 |
| \& check file myapp.log with path /var/log/myapp.log |
| \& if size > 50 MB then |
| \& exec "/usr/local/bin/rotate /var/log/myapp.log myapp" |
| .Ve |
| .PP |
| where /usr/local/bin/rotate may be a simple script, such as: |
| .PP |
| .Vb 3 |
| \& #/bin/bash |
| \& /bin/mv $1 $1.\`date +%y\-%m\-%d\` |
| \& /usr/bin/pkill \-HUP $2 |
| .Ve |
| .PP |
| Or you may use this statement to trigger the \fIlogrotate\fR\|(8) |
| program, to do an \*(L"emergency\*(R" rotate. Or to send an alert if a |
| file becomes a known bottleneck if it grows behind a certain size |
| because of limits in a database engine: |
| .PP |
| .Vb 2 |
| \& check file mydb with path /data/mydatabase.db |
| \& if size > 1 GB then alert |
| .Ve |
| .PP |
| This is a more restrictive form of the first example where the |
| size is explicitly defined (note that the real su size is system |
| dependent): |
| .PP |
| .Vb 2 |
| \& check file su with path /bin/su |
| \& if size != 95564 then exec "/sbin/ifconfig eth0 down" |
| .Ve |
| .SS "\s-1FILE\s0 \s-1CONTENT\s0 \s-1TESTING\s0" |
| .IX Subsection "FILE CONTENT TESTING" |
| The match statement allows you to test the content of a text file |
| by using regular expressions. This is a great feature if you need |
| to periodically test files, such as log files, for certain |
| patterns. If a pattern match, Monit defaults to raise an alert, |
| other actions are also possible. |
| .PP |
| The syntax (keywords in capital) for using this test is: |
| .IP "\s-1IF\s0 [\s-1NOT\s0] \s-1MATCH\s0 {regex|path} [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF [NOT] MATCH {regex|path} [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIregex\fR is a string containing the extended regular expression. |
| See also \fIregex\fR\|(7). |
| .PP |
| \&\fIpath\fR is an absolute path to a file containing extended |
| regular expression on every line. See also \fIregex\fR\|(7). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| You can use the \fI\s-1NOT\s0\fR statement to invert a match. |
| .PP |
| The content is only being checked every cycle. If content is |
| being added and removed between two checks they are unnoticed. |
| .PP |
| On startup the read position is set to the end of the file |
| and Monit continue to scan to the end of file on each cycle. |
| But if the file size should decrease or inode change the read |
| position is set to the start of the file. |
| .PP |
| Only lines ending with a newline character are inspected. Thus, |
| lines are being ignored until they have been completed with this |
| character. Also note that only the first 511 characters of a line |
| are inspected. |
| .IP "\s-1IGNORE\s0 [\s-1NOT\s0] \s-1MATCH\s0 {regex|path}" 4 |
| .IX Item "IGNORE [NOT] MATCH {regex|path}" |
| .PP |
| Lines matching an \fI\s-1IGNORE\s0\fR are not inspected during later |
| evaluations. \fI\s-1IGNORE\s0 \s-1MATCH\s0\fR has always precedence over |
| \&\fI\s-1IF\s0 \s-1MATCH\s0\fR. |
| .PP |
| All \fI\s-1IGNORE\s0 \s-1MATCH\s0\fR statements are evaluated first, in the |
| order of their appearance. Thereafter, all the \fI\s-1IF\s0 \s-1MATCH\s0\fR |
| statements are evaluated. |
| .PP |
| A real life example might look like this: |
| .PP |
| .Vb 7 |
| \& check file syslog with path /var/log/syslog |
| \& ignore match |
| \& "^\ew{3} [ :0\-9]{11} [._[:alnum:]\-]+ monit\e[[0\-9]+\e]:" |
| \& ignore match /etc/monit/ignore.regex |
| \& if match |
| \& "^\ew{3} [ :0\-9]{11} [._[:alnum:]\-]+ mrcoffee\e[[0\-9]+\e]:" |
| \& if match /etc/monit/active.regex then alert |
| .Ve |
| .SS "\s-1FILESYSTEM\s0 \s-1FLAGS\s0 \s-1TESTING\s0" |
| .IX Subsection "FILESYSTEM FLAGS TESTING" |
| Monit can test the flags of a filesystem for changes. This test |
| is implicit and Monit will send alert in case of failure by |
| default. |
| .PP |
| This test is useful for detecting changes of the filesystem flags |
| such as when the filesystem became read-only based on disk errors |
| or the mount flags were changed (such as nosuid). Each platform |
| provides different set of flags. \s-1POSIX\s0 define the \s-1RDONLY\s0 and |
| \&\s-1NOSUID\s0 flags which should work on all platforms. Some platforms |
| (such as FreeBSD) has additonal flags. |
| .PP |
| The syntax for the fsflags statement is: |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 \s-1FSFLAGS\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED FSFLAGS [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| Example: |
| .PP |
| .Vb 3 |
| \& check filesystem rootfs with path / |
| \& if changed fsflags then exec "/my/script" |
| \& alert root@localhost |
| .Ve |
| .SS "\s-1SPACE\s0 \s-1TESTING\s0" |
| .IX Subsection "SPACE TESTING" |
| Monit can test file systems for space usage. This test may |
| only be used within a check filesystem service entry in the |
| Monit control file. |
| .PP |
| Monit will check a filesystem's total space usage. If you only |
| want to check available space for non-superuser, you must set the |
| watermark appropriately (i.e. total space minus reserved blocks |
| for the superuser). |
| .PP |
| You can obtain (and set) the superuser's reserved blocks size, |
| for example by using the tune2fs utility on Linux. On Linux 5% of |
| available blocks are reserved for the superuser by default. On |
| solaris 10% of the blocks are reserved. You can also use tunefs |
| on solaris to change values on a live filesystem. |
| .PP |
| The full syntax for the space statement is: |
| .IP "\s-1IF\s0 \s-1SPACE\s0 operator value unit [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF SPACE operator value unit [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R",\*(L">\*(R",\*(L"!=\*(R",\*(L"==\*(R" in c notation, \*(L"gt\*(R", |
| \&\*(L"lt\*(R", \*(L"eq\*(R", \*(L"ne\*(R" in shell sh notation and \*(L"greater\*(R", \*(L"less\*(R", |
| \&\*(L"equal\*(R", \*(L"notequal\*(R" in human readable form (if not specified, |
| default is \s-1EQUAL\s0). |
| .PP |
| \&\fIunit\fR is a choice of \*(L"B\*(R",\*(L"\s-1KB\s0\*(R",\*(L"\s-1MB\s0\*(R",\*(L"\s-1GB\s0\*(R", \*(L"%\*(R" or long |
| alternatives \*(L"byte\*(R", \*(L"kilobyte\*(R", \*(L"megabyte\*(R", \*(L"gigabyte\*(R", |
| \&\*(L"percent\*(R". |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .SS "\s-1INODE\s0 \s-1TESTING\s0" |
| .IX Subsection "INODE TESTING" |
| If supported by the file-system, you can use Monit to test |
| for inodes usage. This test may only be used within a check |
| filesystem service entry in the Monit control file. |
| .PP |
| If the filesystem becomes unavailable, Monit will call the |
| service's registered start method, if it is defined and if Monit |
| is running in active mode. If Monit runs in passive mode or the |
| start methods is not defined, Monit will just send an error |
| alert. |
| .PP |
| The syntax for the inode statement is: |
| .IP "\s-1IF\s0 \s-1INODE\s0(S) operator value [unit] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF INODE(S) operator value [unit] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R",\*(L">\*(R",\*(L"!=\*(R",\*(L"==\*(R" in c notation, \*(L"gt\*(R", |
| \&\*(L"lt\*(R", \*(L"eq\*(R", \*(L"ne\*(R" in shell sh notation and \*(L"greater\*(R", \*(L"less\*(R", |
| \&\*(L"equal\*(R", \*(L"notequal\*(R" in human readable form (if not specified, |
| default is \s-1EQUAL\s0). |
| .PP |
| \&\fIunit\fR is optional. If not specified, the value is an absolute |
| count of inodes. You can use the \*(L"%\*(R" character or the longer |
| alternative \*(L"percent\*(R" as a unit. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .SS "\s-1PERMISSION\s0 \s-1TESTING\s0" |
| .IX Subsection "PERMISSION TESTING" |
| Monit can monitor the permission of file objects. This test may |
| only be used within a file, fifo, directory or filesystem service |
| entry in the Monit control file. |
| .PP |
| The syntax for the permission statement is: |
| .IP "\s-1IF\s0 \s-1FAILED\s0 \s-1PERM\s0(\s-1ISSION\s0) octalnumber [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED PERM(ISSION) octalnumber [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIoctalnumber\fR defines permissions for a file, a directory or a |
| filesystem as four octal digits (0\-7). Valid range: 0000 \- 7777 (you |
| can omit the leading zeros, Monit will add the zeros to the left |
| thus for example \*(L"640\*(R" is valid value and matches \*(L"0640\*(R"). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The web interface will show a permission warning if the test |
| failed. |
| .PP |
| We recommend that you use the \s-1UNMONITOR\s0 action in a permission |
| statement. The rationale for this feature is security and that |
| Monit does not start a possible cracked program or script. |
| Example: |
| .PP |
| .Vb 2 |
| \& check file monit.bin with path "/usr/local/bin/monit" |
| \& if failed permission 0555 then unmonitor |
| .Ve |
| .PP |
| If the test fails, Monit will simply send an alert and stop |
| monitoring the file and propagate an unmonitor action upward in |
| a depend tree. |
| .SS "\s-1UID\s0 \s-1TESTING\s0" |
| .IX Subsection "UID TESTING" |
| Monit can monitor the owner user id (uid) of a file object. |
| This test may only be used within a check \- file, fifo, |
| directory or filesystem service entry in the Monit control |
| file. |
| .PP |
| The syntax for the uid statement is: |
| .IP "\s-1IF\s0 \s-1FAILED\s0 \s-1UID\s0 user [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED UID user [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIuser\fR defines a user id either in numeric or in string form. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The web interface will show a uid warning if the test should |
| fail. |
| .PP |
| We recommend that you use the \s-1UNMONITOR\s0 action in a uid |
| statement. The rationale for this feature is security and that |
| Monit does not start a possible cracked program or script. |
| Example: |
| .PP |
| .Vb 2 |
| \& check file passwd with path /etc/passwd |
| \& if failed uid root then unmonitor |
| .Ve |
| .PP |
| If the test fails, Monit will simply send an alert and stop |
| monitoring the file and propagate an unmonitor action upward in |
| a depend tree. |
| .SS "\s-1GID\s0 \s-1TESTING\s0" |
| .IX Subsection "GID TESTING" |
| Monit can monitor the owner group id (gid) of file objects. This |
| test may only be used within a file, fifo, directory or |
| filesystem service entry in the Monit control file. |
| .PP |
| The syntax for the gid statement is: |
| .IP "\s-1IF\s0 \s-1FAILED\s0 \s-1GID\s0 user [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED GID user [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIuser\fR defines a group id either in numeric or in string form. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| The web interface will show a gid warning if the test should |
| fail. |
| .PP |
| We recommend that you use the \s-1UNMONITOR\s0 action in a gid |
| statement. The rationale for this feature is security and that |
| Monit does not start a possible cracked program or script. |
| Example: |
| .PP |
| .Vb 2 |
| \& check file shadow with path /etc/shadow |
| \& if failed gid root then unmonitor |
| .Ve |
| .PP |
| If the test fails, Monit will simply send an alert and stop |
| monitoring the file and propagate an unmonitor action upward in |
| a depend tree. |
| .SS "\s-1PID\s0 \s-1TESTING\s0" |
| .IX Subsection "PID TESTING" |
| Monit can test the process identification number (pid) of a |
| process for changes. This test is implicit and Monit will send a |
| alert in the case of failure by default. |
| .PP |
| The syntax for the pid statement is: |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 \s-1PID\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED PID [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| This test is useful to detect possible process restarts which has |
| occurred in the timeframe between two Monit testing cycles. In |
| the case that the restart was fast and the process provides |
| expected service (i.e. all tests succeeded) you will be notified |
| that the process was replaced. |
| .PP |
| For example sshd daemon can restart very quickly, thus if someone |
| changes its configuration and do sshd restart outside of Monit's |
| control you will be notified that the process was replaced by a |
| new instance (or you can optionally do some other action such as |
| preventively stop sshd). |
| .PP |
| Another example is a MySQL Cluster which has its own watchdog |
| with process restart ability. You can use Monit for redundant |
| monitoring. |
| .PP |
| Example: |
| .PP |
| .Vb 2 |
| \& check process sshd with pidfile /var/run/sshd.pid |
| \& if changed pid then exec "/my/script" |
| .Ve |
| .SS "\s-1PPID\s0 \s-1TESTING\s0" |
| .IX Subsection "PPID TESTING" |
| Monit can test the process parent process identification number |
| (ppid) of a process for changes. This test is implicit and Monit |
| will send alert in the case of failure by default. |
| .PP |
| The syntax for the ppid statement is: |
| .IP "\s-1IF\s0 \s-1CHANGED\s0 \s-1PPID\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action" 4 |
| .IX Item "IF CHANGED PPID [[<X>] <Y> CYCLES] THEN action" |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| This test is useful for detecting changes of a process parent. |
| .PP |
| Example: |
| .PP |
| .Vb 2 |
| \& check process myproc with pidfile /var/run/myproc.pid |
| \& if changed ppid then exec "/my/script" |
| .Ve |
| .SS "\s-1UPTIME\s0 \s-1TESTING\s0" |
| .IX Subsection "UPTIME TESTING" |
| The uptime statement may only be used in a check process service |
| entry. If specified in the control file, Monit will test the |
| process uptime. |
| .PP |
| Syntax (keywords are in capital): |
| .IP "\s-1IF\s0 \s-1UPTIME\s0 [[operator] value [unit]] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF UPTIME [[operator] value [unit]] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R", \*(L">\*(R", \*(L"!=\*(R", \*(L"==\*(R" in C notation, |
| \&\*(L"\s-1GT\s0\*(R", \*(L"\s-1LT\s0\*(R", \*(L"\s-1EQ\s0\*(R", \*(L"\s-1NE\s0\*(R" in shell sh notation and \*(L"\s-1GREATER\s0\*(R", |
| \&\*(L"\s-1LESS\s0\*(R", \*(L"\s-1EQUAL\s0\*(R", \*(L"\s-1NOTEQUAL\s0\*(R" in human readable form (if not |
| specified, default is \s-1EQUAL\s0). |
| .PP |
| \&\fIvalue\fR is a uptime watermark. |
| .PP |
| \&\fIunit\fR is either \*(L"\s-1SECOND\s0\*(R", \*(L"\s-1MINUTE\s0\*(R", \*(L"\s-1HOUR\s0\*(R" or \*(L"\s-1DAY\s0\*(R" (it is also |
| possible to use \*(L"\s-1SECONDS\s0\*(R", \*(L"\s-1MINUTES\s0\*(R", \*(L"\s-1HOURS\s0\*(R", or \*(L"\s-1DAYS\s0\*(R"). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| Example of restarting the process if the uptime exceeded 3 days: |
| .PP |
| .Vb 4 |
| \& check process myapp with pidfile /var/run/myapp.pid |
| \& start program = "/etc/init.d/myapp start" |
| \& stop program = "/etc/init.d/myapp stop" |
| \& if uptime > 3 days then restart |
| .Ve |
| .SS "\s-1CONNECTION\s0 \s-1TESTING\s0" |
| .IX Subsection "CONNECTION TESTING" |
| Monit is able to perform connection testing via networked |
| ports or via Unix sockets. A connection test may only be |
| used within a check process or within a check host service |
| entry in the Monit control file. |
| .PP |
| If a service listens on one or more sockets, Monit can connect to |
| the port (using either tcp or udp) and verify that the service |
| will accept a connection and that it is possible to write and |
| read from the socket. If a connection is not accepted or if there |
| is a problem with socket i/o, Monit will assume that something is |
| wrong and execute a specified action. If Monit is compiled with |
| openssl, then ssl based network services can also be tested. |
| .PP |
| The full syntax for the statement used for connection testing is |
| as follows (keywords are in capital and optional statements in |
| [brackets]), |
| .IP "\s-1IF\s0 \s-1FAILED\s0 [host] port [type] [protocol|{send/expect}+] [timeout] [retry] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED [host] port [type] [protocol|{send/expect}+] [timeout] [retry] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| or for Unix sockets, |
| .IP "\s-1IF\s0 \s-1FAILED\s0 [unixsocket] [type] [protocol|{send/expect}+] [timeout] [retry] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED [unixsocket] [type] [protocol|{send/expect}+] [timeout] [retry] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fBhost:HOST hostname\fR. Optionally specify the host to connect to. |
| If the host is not given then localhost is assumed if this test |
| is used inside a process entry. If this test was used inside a |
| remote host entry then the entry's remote host is assumed. |
| Although \fIhost\fR is intended for testing name based virtual host |
| in a \s-1HTTP\s0 server running on local or remote host, it does allow |
| the connection statement to be used to test a server running on |
| another machine. This may be useful; For instance if you use |
| Apache httpd as a front-end and an application-server as the |
| back-end running on another machine, this statement may be used |
| to test that the back-end server is running and if not raise an |
| alert. |
| .PP |
| \&\fBport:PORT number\fR. The port number to connect to |
| .PP |
| \&\fBunixsocket:UNIXSOCKET \s-1PATH\s0\fR. Specifies the path to a Unix |
| socket. Servers based on Unix sockets always run on the local |
| machine and do not use a port. |
| .PP |
| \&\fBtype:TYPE {TCP|UDP|TCPSSL}\fR. Optionally specify the socket type |
| Monit should use when trying to connect to the port. The |
| different socket types are; \s-1TCP\s0, \s-1UDP\s0 or \s-1TCPSSL\s0, where \s-1TCP\s0 is a |
| regular stream based socket, \s-1UDP\s0 is a datagram socket and \s-1TCPSSL\s0 |
| specifies that Monit should use a \s-1TCP\s0 socket with \s-1SSL\s0 when |
| connecting to a port. The default socket type is \s-1TCP\s0. If \s-1TCPSSL\s0 |
| is used you may optionally specify the \s-1SSL/TLS\s0 protocol to be |
| used and the md5 sum of the server's certificate. The \s-1TCPSSL\s0 |
| options are: |
| .PP |
| .Vb 1 |
| \& TCPSSL [SSLAUTO|SSLV2|SSLV3|TLSV1] [CERTMD5 md5sum] |
| .Ve |
| .PP |
| \&\fBproto(col):PROTO {protocols}\fR. Optionally specify the protocol |
| Monit should speak when a connection is established. At the |
| moment Monit knows how to speak: |
| \fIAPACHE-STATUS\fR |
| \fI\s-1DNS\s0\fR |
| \fI\s-1DWP\s0\fR |
| \fI\s-1FTP\s0\fR |
| \fI\s-1GPS\s0\fR |
| \fI\s-1HTTP\s0\fR |
| \fI\s-1IMAP\s0\fR |
| \fI\s-1CLAMAV\s0\fR |
| \fI\s-1LDAP2\s0\fR |
| \fI\s-1LDAP3\s0\fR |
| \fI\s-1LMTP\s0\fR |
| \fI\s-1MEMCACHE\s0\fR |
| \fI\s-1MYSQL\s0\fR |
| \fI\s-1NNTP\s0\fR |
| \fI\s-1NTP3\s0\fR |
| \fI\s-1POP\s0\fR |
| \fIPOSTFIX-POLICY\fR |
| \fI\s-1RADIUS\s0\fR |
| \fI\s-1RDATE\s0\fR |
| \fI\s-1RSYNC\s0\fR |
| \fI\s-1SIP\s0\fR |
| \fI\s-1SMTP\s0\fR |
| \fI\s-1SSH\s0\fR |
| \fI\s-1TNS\s0\fR |
| \fI\s-1PGSQL\s0\fR |
| If you have compiled Monit with ssl support, Monit can also speak |
| the \s-1SSL\s0 variants such as: |
| \fI\s-1HTTPS\s0\fR |
| \fI\s-1FTPS\s0\fR |
| \fI\s-1POPS\s0\fR |
| \fI\s-1IMAPS\s0\fR |
| To use the \s-1SSL\s0 protocol support you need to define the socket as |
| \&\s-1SSL\s0 and use the general protocol name (for example in the case of |
| \&\s-1HTTPS\s0) : |
| \s-1TYPE\s0 \s-1TCPSSL\s0 \s-1PROTOCOL\s0 \s-1HTTP\s0 |
| If the server's protocol is not found in this list, simply do not |
| specify the protocol and Monit will utilize a default test, |
| including test if it is possible to read and write to the |
| port. This default test is in most cases more than good enough to |
| deduce if the server behind the port is up or not. |
| .PP |
| The protocol statement is: |
| .PP |
| .Vb 1 |
| \& PROTO(COL) {name} |
| .Ve |
| .PP |
| The \s-1HTTP\s0 protocol supports in addition: |
| .IP "\(bu" 4 |
| \&\s-1REQUEST\s0 |
| .IP "\(bu" 4 |
| \&\s-1HOSTHEADER\s0 |
| .IP "\(bu" 4 |
| \&\s-1CHECKSUM\s0 |
| .PP |
| .Vb 1 |
| \& PROTO(COL) HTTP [REQUEST {"/path"} [with CHECKSUM checksum] [with HOSTHEADER "string"] |
| .Ve |
| .PP |
| The Host header option can be used to explicit specify the \s-1HTTP\s0 |
| host header in the request. If not used, Monit will use the |
| hostname or IP-address of the host as specified in the statement. |
| Specifying a host header is useful if you want to connect to the |
| host using an IP-address, and the web-server handle name based |
| virtual hosts. Examples: |
| .PP |
| .Vb 4 |
| \& if failed host 192.168.1.100 port 8080 protocol http |
| \& and request \*(Aq/testing\*(Aq hostheader \*(Aqexample.com\*(Aq |
| \& with timeout 20 seconds for 2 cycles |
| \& then alert |
| .Ve |
| .PP |
| In addition to the standard protocols, the \fIAPACHE-STATUS\fR |
| protocol is a test of a specific server type, rather than a |
| generic protocol. Server performance is examined using the status |
| page generated by Apache's mod_status, which is expected to be at |
| its default address of http://www.example.com/server\-status. |
| Currently the \fIAPACHE-STATUS\fR protocol examines the percentage |
| of Apache child processes which are |
| .PP |
| .Vb 10 |
| \& o logging (loglimit) |
| \& o closing connections (closelimit) |
| \& o performing DNS lookups (dnslimit) |
| \& o in keepalive with a client (keepalivelimit) |
| \& o replying to a client (replylimit) |
| \& o receiving a request (requestlimit) |
| \& o initialising (startlimit) |
| \& o waiting for incoming connections (waitlimit) |
| \& o gracefully closing down (gracefullimit) |
| \& o performing cleanup procedures (cleanuplimit) |
| .Ve |
| .PP |
| Each of these quantities can be compared against a value relative |
| to the total number of active Apache child processes. If the |
| comparison expression is true the chosen action is performed. |
| .PP |
| The apache-status protocol statement is formally defined as |
| (keywords in uppercase): |
| .PP |
| .Vb 1 |
| \& PROTO(COL) {limit} OP PERCENT [OR {limit} OP PERCENT]* |
| .Ve |
| .PP |
| where {limit} is one or more of: loglimit, closelimit, dnslimit, |
| keepalivelimit, replylimit, requestlimit, startlimit, waitlimit |
| gracefullimit or cleanuplimit. The operator \s-1OP\s0 is one of: |
| [<|=|>]. |
| .PP |
| You can combine all of these test into one expression or you can |
| choose to test a certain limit. If you combine the limits you |
| must or' them together using the \s-1OR\s0 keyword. |
| .PP |
| Here's an example were we test for a loglimit more than 10 |
| percent, a dnslimit over 25 percent and a wait limit less than 20 |
| percent of processes. See also more examples below in the example |
| section. |
| .PP |
| .Vb 5 |
| \& protocol apache\-status |
| \& loglimit > 10% or |
| \& dnslimit > 50% or |
| \& waitlimit < 20% |
| \& then alert |
| .Ve |
| .PP |
| Obviously, do not use this test unless the httpd server you are |
| testing is Apache Httpd and mod_status is activated on the |
| server. |
| .PP |
| \&\fBsend/expect: {SEND|EXPECT} \*(L"string\*(R" ...\fR. If Monit does not |
| support the protocol spoken by the server, you can write your own |
| protocol-test using \fIsend\fR and \fIexpect\fR strings. The \fI\s-1SEND\s0\fR |
| statement sends a string to the server port and the \fI\s-1EXPECT\s0\fR |
| statement compares a string read from the server with the string |
| given in the expect statement. If your system supports \s-1POSIX\s0 |
| regular expressions, you can use regular expressions in the |
| expect string, see \fIregex\fR\|(7) to learn more about the types of |
| regular expressions you can use in an expect string. Otherwise |
| the string is used as it is. The send/expect statement is: |
| .PP |
| .Vb 1 |
| \& [{SEND|EXPECT} "string"]+ |
| .Ve |
| .PP |
| Note that Monit will send a string as it is, and you \fBmust\fR |
| remember to include \s-1CR\s0 and \s-1LF\s0 in the string sent to the server if |
| the protocol expect such characters to terminate a string (most |
| text based protocols used over Internet does). Likewise monit |
| will read up to 256 bytes from the server and use this string |
| when comparing the expect string. If the server sends strings |
| terminated by \s-1CRLF\s0, (i.e. \*(L"\er\en\*(R") you \fImay\fR remember to add the |
| same terminating characters to the string you expect from the |
| server. |
| .PP |
| As mentioned above, Monit limits the expect input to 255 bytes. |
| You can override the default value by using this set statement at |
| the top of the Monit configuration file: |
| .PP |
| .Vb 1 |
| \& SET EXPECTBUFFER <number> ["b"|"kb"|"mb"] |
| .Ve |
| .PP |
| For example, to set the expect buffer to read 10 kilobytes: |
| .PP |
| .Vb 1 |
| \& set expectbuffer 10 kb |
| .Ve |
| .PP |
| Note, if you want to test the number of bytes returned from the |
| server you need to work around a bound check limit in \s-1POSIX\s0 |
| regex. You cannot use something like expect \*(L".{5000}\*(R" as the max |
| number in a boundary check usually is {255}. However this should |
| work, expect \*(L"(.{250}){20}\*(R" |
| .PP |
| You can use non-printable characters in a send string if needed. |
| Use the hex notation, \e0xHEXHEX to send any char in the range |
| \&\e0x00\-\e0xFF, that is, 0\-255 in decimal. This may be useful when |
| testing some network protocols, particularly those over \s-1UDP\s0. For |
| example, to test a quake 3 server you can use the following, |
| .PP |
| .Vb 2 |
| \& send "\e0xFF\e0xFF\e0xFF\e0xFFgetstatus" |
| \& expect "sv_floodProtect|sv_maxPing" |
| .Ve |
| .PP |
| Finally, send/expect can be used with any socket type, such as |
| \&\s-1TCP\s0 sockets, \s-1UNIX\s0 sockets and \s-1UDP\s0 sockets. |
| .PP |
| \&\fBtimeout:with \s-1TIMEOUT\s0 x \s-1SECONDS\s0\fR. Optionally specifies the |
| connect and read timeout for the connection. If Monit cannot |
| connect to the server within this time it will assume that the |
| connection failed and execute the specified action. The default |
| connect timeout is 5 seconds. |
| .PP |
| \&\fBretry:RETRY x\fR. Optionally specifies the number of consecutive |
| retries within the same testing cycle in the case that the |
| connection failed. The default is fail on first error. |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .PP |
| Connection testing using the \s-1URL\s0 notation |
| .IX Subsection "Connection testing using the URL notation" |
| .PP |
| You can test a \s-1HTTP\s0 server using the compact \s-1URL\s0 syntax. This |
| test also allow you to use \s-1POSIX\s0 regular expressions to test the |
| content returned by the \s-1HTTP\s0 server. |
| .PP |
| The full syntax for the \s-1URL\s0 statement is as follows (keywords are |
| in capital and optional statements in [brackets]): |
| .PP |
| .Vb 5 |
| \& IF FAILED URL URL\-spec |
| \& [CONTENT {==|!=} "regular\-expression"] |
| \& [TIMEOUT number SECONDS] [[<X>] <Y> CYCLES] |
| \& THEN action |
| \& [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action] |
| .Ve |
| .PP |
| Where URL-spec is an \s-1URL\s0 on the standard form as specified in \s-1RFC\s0 |
| 2396: |
| .PP |
| .Vb 1 |
| \& <protocol>://<authority><path>?<query> |
| .Ve |
| .PP |
| Here is an example of an \s-1URL\s0 where all components are used: |
| .PP |
| .Vb 1 |
| \& http://user:password@www.foo.bar:8080/document/?querystring#ref |
| .Ve |
| .PP |
| If a username and password is included in the \s-1URL\s0 Monit will |
| attempt to login at the server using \fBBasic Authentication\fR. |
| .PP |
| Testing the content returned by the server is optional. If used, |
| you can test if the content \fBmatch\fR or does \fBnot match\fR a |
| regular expression. Here's an example on how the \s-1URL\s0 statement |
| can be used in a \fIcheck service\fR: |
| .PP |
| .Vb 4 |
| \& check host FOO with address www.foo.bar |
| \& if failed (url http://user:password@www.foo.bar:8080/login?querystring |
| \& and content == \*(Aqup\*(Aq) |
| \& then ... |
| .Ve |
| .PP |
| Note that the content option extends the \s-1URL\s0 by the expected data |
| and does not act as standalone failure specification. The syntax is |
| \&\*(L"if failed (<\s-1URL\s0> and <content>)\*(R". |
| .PP |
| Monit will look at the content-length header returned by the |
| server and download this amount before testing the content. That |
| is, if the content-length is more than 1Mb or this header is not |
| set by the server Monit will default to download up to 1 Mb and |
| not more. |
| .PP |
| Only the http(s) protocol is supported in an \s-1URL\s0 statement. If |
| the protocol is \fBhttps\fR Monit will use \s-1SSL\s0 when connecting to |
| the server. |
| .PP |
| Remote host ping test |
| .IX Subsection "Remote host ping test" |
| .PP |
| In addition Monit can perform \s-1ICMP\s0 Echo tests in remote host |
| checks. The icmp test may only be used in a check host entry and |
| Monit must run with super user privileges, that is, the root user |
| must run monit. The reason is that the icmp test utilize a raw |
| socket to send the icmp packet and only the super user is allowed |
| to create a raw socket. |
| .PP |
| The full syntax for the \s-1ICMP\s0 Echo statement used for ping testing |
| is as follows (keywords are in capital and optional statements in |
| [brackets]): |
| .PP |
| .Vb 5 |
| \& IF FAILED ICMP TYPE ECHO |
| \& [COUNT number] [WITH] [TIMEOUT number SECONDS] |
| \& [[<X>] <Y> CYCLES] |
| \& THEN action |
| \& [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action] |
| .Ve |
| .PP |
| The rules for action and timeout are the same as those mentioned |
| above in the \s-1CONNECTION\s0 \s-1TESTING\s0 section. The count parameter |
| specifies how many consecutive echo requests will be send to the |
| host in one cycle. In the case that no reply came within timeout |
| frame, Monit reports error. When at least one reply was received, |
| the test will pass. Monit sends by default three echo requests in |
| one cycle to prevent the random packet loss from generating false |
| alarm (i.e. up to 66% packet loss is tolerated). You can set the |
| count option to a value between 1 and 20, which can serve as an |
| error ratio. For example if you require 100% ping success, set |
| the count to 1 (i.e. just one request will be sent, and if the |
| packet was lost an error will be reported). |
| .PP |
| An icmp ping test is useful for testing if a host is up, before |
| testing ports at the host. If an icmp ping test is used in a |
| check host entry, this test is run first and if the ping test |
| should fail we assume that the connection to the host is down and |
| Monit does \fInot\fR continue to test any ports. Here's an example: |
| .PP |
| .Vb 6 |
| \& check host xyzzy with address xyzzy.org |
| \& if failed icmp type echo count 5 with timeout 15 seconds |
| \& then alert |
| \& if failed port 80 proto http then alert |
| \& if failed port 443 type TCPSSL proto http then alert |
| \& alert foo@bar |
| .Ve |
| .PP |
| In this case, if the icmp test should fail you will get \fIone\fR |
| alert and only one alert as long as the host is down, and equally |
| important, Monit will \fInot\fR test port 80 and port 443. Likewise |
| if the icmp ping test should succeed (again) Monit will continue |
| to test both port 80 and 443. |
| .PP |
| Keep in mind though that some firewalls can block icmp packages |
| and thus render the test useless. |
| .PP |
| Examples |
| .IX Subsection "Examples" |
| .PP |
| To check a port connection and receive an alert if Monit cannot |
| connect to the port, use the following statement: |
| .PP |
| .Vb 1 |
| \& if failed port 80 then alert |
| .Ve |
| .PP |
| In this case the machine in question is assumed to be the default |
| host. For a process entry it's \fIlocalhost\fR and for a remote host |
| entry it's the \fIaddress\fR of the remote host. Monit will conduct |
| a tcp connection to the host at port 80 and use tcp by default. |
| If you want to connect with udp, you can specify this after the |
| port-statement; |
| .PP |
| .Vb 1 |
| \& if failed port 53 type udp protocol dns then alert |
| .Ve |
| .PP |
| Monit will stop trying to connect to the port after 5 seconds and |
| assume that the server behind the port is down. You may increase |
| or decrease the connect timeout by explicit add a connection |
| timeout. In the following example the timeout is increased to 15 |
| seconds and if Monit cannot connect to the server within 15 |
| seconds the test will fail and an alert message is sent. |
| .PP |
| .Vb 1 |
| \& if failed port 80 with timeout 15 seconds then alert |
| .Ve |
| .PP |
| If a server is listening to a Unix socket the following statement |
| can be used: |
| .PP |
| .Vb 1 |
| \& if failed unixsocket /var/run/sophie then alert |
| .Ve |
| .PP |
| A Unix socket is used by some servers for fast (interprocess) |
| communication on localhost only. A Unix socket is specified by a |
| path and in the example above the path, /var/run/sophie, |
| specifies a Unix socket. |
| .PP |
| If your machine answers for several virtual hosts you can prefix |
| the port statement with a host-statement like so: |
| .PP |
| .Vb 3 |
| \& if failed host www.sol.no port 80 then alert |
| \& if failed host 80.69.226.133 port 443 then alert |
| \& if failed host kvasir.sol.no port 80 then alert |
| .Ve |
| .PP |
| And as mentioned above, if you do not specify a host-statement, |
| \&\fIlocalhost\fR or \fIaddress\fR is assumed. |
| .PP |
| Monit also knows how to speak some of the more popular Internet |
| protocols. So, besides testing for connections, Monit can also |
| speak with the server in question to verify that the server |
| works. For example, the following is used to test a http server: |
| .PP |
| .Vb 2 |
| \& if failed host www.tildeslash.com port 80 proto http |
| \& then restart |
| .Ve |
| .PP |
| Some protocols also support a request statement. This statement |
| can be used to ask the server for a special document entity. |
| .PP |
| Currently \fBonly\fR the \fI\s-1HTTP\s0\fR protocol module supports the |
| request statement, such as: |
| .PP |
| .Vb 3 |
| \& if failed host www.myhost.com port 80 protocol http |
| \& and request "/data/show.php?a=b&c=d" |
| \& then restart |
| .Ve |
| .PP |
| The request must contain an \s-1URI\s0 string specifying a document from |
| the http server. The string will be \s-1URL\s0 encoded by Monit before |
| it sends the request to the http server, so it's okay to use \s-1URL\s0 |
| unsafe characters in the request. If the request statement isn't |
| specified, the default web server page will be requested. |
| .PP |
| You can override default Host header in \s-1HTTP\s0 request: |
| .PP |
| .Vb 3 |
| \& if failed host 192.168.1.100 port 80 protocol http |
| \& hostheader "example.com" |
| \& then alert |
| .Ve |
| .PP |
| You can also test the checksum for documents returned by a http |
| server. You can use either \s-1MD5\s0 sums: |
| .PP |
| .Vb 4 |
| \& if failed port 80 protocol http |
| \& and request "/page.html" |
| \& with checksum 8f7f419955cefa0b33a2ba316cba3659 |
| \& then alert |
| .Ve |
| .PP |
| Or you can use \s-1SHA1\s0 sums: |
| .PP |
| .Vb 4 |
| \& if failed port 80 protocol http |
| \& and request "/page.html" |
| \& with checksum e428302e260e0832007d82de853aa8edf19cd872 |
| \& then alert |
| .Ve |
| .PP |
| Monit will compute a checksum (either \s-1MD5\s0 or \s-1SHA1\s0 is used, |
| depending on length of the hash) for the document (in the above |
| case, /page.html) and compare the computed checksum with the |
| expected checksum. If the sums does not match then the if-tests |
| action is performed, in this case alert. Note that Monit will |
| \&\fBnot\fR test the checksum for a document if the server does not |
| set the \s-1HTTP\s0 \fIContent-Length\fR header. A \s-1HTTP\s0 server should set |
| this header when it server a static document (i.e. a file). A |
| server will often use chunked transfer encoding instead when |
| serving dynamic content (e.g. a document created by a CGI-script |
| or a Servlet), but to test the checksum for dynamic content is |
| not very useful. There are no limitation on the document size, |
| but keep in mind that Monit will use time to download the |
| document over the network so it's probably smart not to ask monit |
| to compute a checksum for documents larger than 1Mb or so, |
| depending on you network connection of course. Tip; If you get a |
| checksum error even if the document has the correct sum, the |
| reason may be that the download timed out. In this case, explicit |
| set a longer timeout than the default 5 seconds. |
| .PP |
| As mentioned above, if the server protocol is not supported by |
| Monit you can write your own protocol test using send/expect |
| strings. Here we show a protocol test using send/expect for an |
| imaginary \*(L"Ali Baba and the Forty Thieves\*(R" protocol: |
| .PP |
| .Vb 6 |
| \& if failed host cave.persia.ir port 4040 |
| \& send "Open, Sesame!\er\en" |
| \& expect "Please enter the cave\er\en" |
| \& send "Shut, Sesame!\er\en" |
| \& expect "See you later [A\-Za\-z ]+\er\en" |
| \& then restart |
| .Ve |
| .PP |
| The \fI\s-1TCPSSL\s0\fR statement can optionally test the md5 sum of the |
| server's certificate. You must state the md5 certificate string |
| you expect the server to deliver and upon a connect to the |
| server, the server's actual md5 sum certificate string is tested. |
| Any other symbol but [A\-Fa\-f0\-9] is being ignored in that sting. |
| Thus it is possible to copy and paste the output of e.g. openssl. |
| If they do not match, the connection test fails. If the ssl |
| version handshake does not work properly you can also force a |
| specific ssl version, as we demonstrate in this example: |
| .PP |
| .Vb 10 |
| \& if failed host shop.sol.no port 443 |
| \& type TCPSSL SSLV3 # Force Monit to use ssl version 3 |
| \& # We expect the server to return this md5 certificate sum |
| \& # as either 12\-34\-56\-78\-90\-AB\-CD\-EF\-12\-34\-56\-78\-90\-AB\-CD\-EF |
| \& # or e.g. 1234567890ABCDEF1234567890ABCDEF |
| \& # or e.g. 1234567890abcdef1234567890abcdef |
| \& # what ever come in more handy (see text above) |
| \& CERTMD5 12\-34\-56\-78\-90\-AB\-CD\-EF\-12\-34\-56\-78\-90\-AB\-CD\-EF |
| \& protocol http |
| \& then restart |
| .Ve |
| .PP |
| Here's an example where a connection test is used inside a |
| process entry: |
| .PP |
| .Vb 4 |
| \& check process apache with pidfile /var/run/apache.pid |
| \& start program = "/etc/init.d/httpd start" |
| \& stop program = "/etc/init.d/httpd stop" |
| \& if failed host www.tildeslash.com port 80 then restart |
| .Ve |
| .PP |
| Here, a connection test is used in a remote host entry: |
| .PP |
| .Vb 2 |
| \& check host up2date with address ftp.redhat.com |
| \& if failed port 21 and protocol ftp then alert |
| .Ve |
| .PP |
| Since we did not explicit specify a host in the above test, monit |
| will connect to port 21 at ftp.redhat.com. Apropos, the host |
| address can be specified as a dotted \s-1IP\s0 address string or as |
| hostname in the \s-1DNS\s0. The following is exactly[*] the same test, |
| but here an ip address is used instead: |
| .PP |
| .Vb 2 |
| \& check host up2date with address 66.187.232.30 |
| \& if failed port 21 and protocol ftp then alert |
| .Ve |
| .PP |
| [*] Well, not quite, since we specify an ip-address directly we |
| will bypass any \s-1DNS\s0 round-robin setup, but that's another story. |
| .PP |
| Testing the \s-1SIP\s0 protocol |
| .IX Subsection "Testing the SIP protocol" |
| .PP |
| The \s-1SIP\s0 protocol is used by communication platform servers such |
| as Asterisk and FreeSWITCH. |
| .PP |
| The \s-1SIP\s0 test is similar to the other protocol tests, but in |
| addition allows extra optional parameters. |
| .IP "\s-1IF\s0 \s-1FAILED\s0 [host] [port] [type] \s-1PROTOCOL\s0 sip [\s-1AND\s0] [\s-1TARGET\s0 valid@uri] [\s-1AND\s0] [\s-1MAXFORWARD\s0 n] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED [host] [port] [type] PROTOCOL sip [AND] [TARGET valid@uri] [AND] [MAXFORWARD n] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\s-1TARGET\s0 : |
| you may specify an alternative recipient for the message, |
| by adding a valid sip uri after this keyword. |
| .PP |
| \&\s-1MAXFORWARD\s0 : |
| Limit the number of proxies or gateways that can forward the |
| request to the next server. It's value is an integer in the range |
| 0\-255, set by default to 70. If max-forward = 0, the next server |
| may respond 200 \s-1OK\s0 (test succeeded) or send a 483 Too Many Hops |
| (test failed) |
| .PP |
| \&\s-1SIP\s0 examples: |
| .PP |
| .Vb 4 |
| \& check host openser_all with address 127.0.0.1 |
| \& if failed port 5060 type udp protocol sip |
| \& with target "localhost:5060" and maxforward 6 |
| \& then alert |
| .Ve |
| .PP |
| If sips is supported, that is, sip over ssl, specify tcpssl as |
| the connection type. |
| .PP |
| .Vb 4 |
| \& check host fwd.pulver.com with address fwd.pulver.com |
| \& if failed port 5060 type tcpssl protocol SIP |
| \& and target 613@fwd.pulver.com maxforward 10 |
| \& then alert |
| .Ve |
| .PP |
| For more examples, see the example section below. |
| .PP |
| Testing the \s-1RADIUS\s0 protocol |
| .IX Subsection "Testing the RADIUS protocol" |
| .PP |
| The \s-1RADIUS\s0 test is similar to the other protocol tests, but in |
| addition allows extra optional parameters. |
| .IP "\s-1IF\s0 \s-1FAILED\s0 [host] [port] [type] \s-1PROTOCOL\s0 radius [\s-1SECRET\s0 string] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF FAILED [host] [port] [type] PROTOCOL radius [SECRET string] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\s-1SECRET:\s0 |
| you may specify an alternative secret, default is \*(L"testing123\*(R". |
| .PP |
| \&\s-1RADIUS\s0 example: |
| .PP |
| .Vb 7 |
| \& check process radiusd with pidfile /var/run/radiusd.pid |
| \& start program = "/etc/init.d/freeradius start" |
| \& stop program = "/etc/init.d/freeradius stop" |
| \& if failed host 127.0.0.1 port 1812 type udp protocol radius |
| \& secret testing123 |
| \& then alert |
| \& if 5 restarts within 5 cycles then timeout |
| .Ve |
| .SS "\s-1PROGRAM\s0 \s-1STATUS\s0 \s-1TESTING\s0" |
| .IX Subsection "PROGRAM STATUS TESTING" |
| You can check the exit status of a program or a script. This test |
| may only be used within a check program service entry in the Monit |
| control file. |
| .PP |
| An example: |
| .PP |
| .Vb 2 |
| \& check program myscript with path "/usr/local/bin/myscript.sh" |
| \& if status != 0 then alert |
| .Ve |
| .PP |
| Monit will execute the program periodically and if the exit |
| status of the program does not match the expected result, Monit |
| can perform an action. In the example above, Monit will raise an |
| alert if the exit value of \fImyscript\fR is different from 0. By |
| convention, 0 means the program exited normally. |
| .PP |
| Program checks are asynchronous. Meaning that Monit will not wait |
| for the program to exit, but instead, Monit will start the |
| program in the background and immediately continue checking the |
| next service entry in \fImonitrc\fR. At the next cycle, Monit will |
| check if the program has finished and if so, collect the programs |
| exit status \- if the status indicate a failure, Monit will raise |
| an alert message containing the program's error (stderr) output, |
| if any. If the program has not exited after the first cycle, |
| Monit will wait another cycle and so on. If the program is still |
| running after 5 minutes, Monit will kill it and generate a |
| program timeout event. It is possible to override the default |
| timeout (see the syntax below). |
| .PP |
| The asynchronous nature of the program check allows for |
| non-blocking behavior in the current Monit design, but it comes |
| with a side-effect: when the program has finished executing and |
| is waiting for Monit to collect the result, it becomes a |
| so-called \*(L"zombie\*(R" process. A zombie process does not consume |
| any system resources (only the \s-1PID\s0 remains in use) and it is |
| under Monit's control; The zombie process is removed from the |
| system as soon as Monit collects the exit status. This means that |
| every \*(L"check program\*(R" will be associated with either a running |
| process or a temporary zombie. This unwanted zombie side-effect |
| will be removed in a later release of Monit. |
| .PP |
| The syntax of the program status statement is: |
| .IP "\s-1IF\s0 \s-1STATUS\s0 operator value [\s-1TIMEOUT\s0 <N> \s-1SECONDS\s0] [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action [\s-1ELSE\s0 \s-1IF\s0 \s-1SUCCEEDED\s0 [[<X>] <Y> \s-1CYCLES\s0] \s-1THEN\s0 action]" 4 |
| .IX Item "IF STATUS operator value [TIMEOUT <N> SECONDS] [[<X>] <Y> CYCLES] THEN action [ELSE IF SUCCEEDED [[<X>] <Y> CYCLES] THEN action]" |
| .PP |
| \&\fIoperator\fR is a choice of \*(L"<\*(R",\*(L">\*(R",\*(L"!=\*(R",\*(L"==\*(R" in c notation, \*(L"gt\*(R", |
| \&\*(L"lt\*(R", \*(L"eq\*(R", \*(L"ne\*(R" in shell sh notation and \*(L"greater\*(R", \*(L"less\*(R", |
| \&\*(L"equal\*(R", \*(L"notequal\*(R" in human readable form (if not specified, |
| default is \s-1EQUAL\s0). |
| .PP |
| \&\fIaction\fR is a choice of \*(L"\s-1ALERT\s0\*(R", \*(L"\s-1RESTART\s0\*(R", \*(L"\s-1START\s0\*(R", \*(L"\s-1STOP\s0\*(R", |
| \&\*(L"\s-1EXEC\s0\*(R" or \*(L"\s-1UNMONITOR\s0\*(R". |
| .SH "SERVICE POLL TIME" |
| .IX Header "SERVICE POLL TIME" |
| Services are checked in regular intervals given by the \fIset |
| daemon n\fR statement. Checks are performed in the same order as |
| they are written in the \fI.monitrc\fR file, except if dependencies |
| are setup between services, in which case the services hierarchy |
| may alternate the order of the checks. |
| .PP |
| It is possible to modify the check schedule using the \fIevery\fR |
| statement. |
| .PP |
| There are three variants: |
| .IP "1. custom interval based on poll cycle length multiple" 4 |
| .IX Item "1. custom interval based on poll cycle length multiple" |
| .Vb 1 |
| \& EVERY [number] CYCLES |
| .Ve |
| .IP "2. test schedule based on cron-style string" 4 |
| .IX Item "2. test schedule based on cron-style string" |
| .Vb 1 |
| \& EVERY [cron] |
| .Ve |
| .IP "3. do-not-test schedule based on cron-style string" 4 |
| .IX Item "3. do-not-test schedule based on cron-style string" |
| .Vb 1 |
| \& NOT EVERY [cron] |
| .Ve |
| .PP |
| A cron-style string, consist of 5 fields separated with |
| white-space. All fields are required: |
| .PP |
| .Vb 7 |
| \& Name: | Allowed values: | Special characters: |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& Minutes | 0\-59 | * \- , |
| \& Hours | 0\-23 | * \- , |
| \& Day of month | 1\-31 | * \- , |
| \& Month | 1\-12 (1=jan, 12=dec) | * \- , |
| \& Day of week | 0\-6 (0=sunday, 6=saturday) | * \- , |
| .Ve |
| .PP |
| The special characters: |
| .PP |
| .Vb 10 |
| \& Character: | Description: |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& * (asterisk) | The asterisk indicates that the expression will |
| \& | match for all values of the field; e.g., using |
| \& | an asterisk in the 4th field (month) would |
| \& | indicate every month. |
| \& \- (hyphen) | Hyphens are used to define ranges. For example, |
| \& | 8\-9 in the hour field indicate between 8AM and |
| \& | 9AM. Note that range is from time1 until and |
| \& | including time2. That is, from 8AM and until |
| \& | 10AM unless minutes are set. Another example, |
| \& | 1\-5 in the weekday field, specify from monday to |
| \& | friday (including friday). |
| \& , (comma) | Comma are used to specify a sequence. For example |
| \& | 17,18 in the day field indicate the 17th and 18th |
| \& | day of the month. A sequence can also include |
| \& | ranges. For example, using 1\-5,0 in the weekday |
| \& | field indicate monday to friday and sunday. |
| .Ve |
| .PP |
| Example 1: Check once per two cycles |
| .PP |
| .Vb 2 |
| \& check process nginx with pidfile /var/run/nginx.pid |
| \& every 2 cycles |
| .Ve |
| .PP |
| Example 2: Check every workday 8AM\-7PM |
| .PP |
| .Vb 3 |
| \& check program checkOracleDatabase with |
| \& path /var/monit/programs/checkoracle.pl |
| \& every "* 8\-19 * * 1\-5" |
| .Ve |
| .PP |
| Example 3: Do not run the check in the backup window on |
| Sunday 0AM\-3AM |
| .PP |
| .Vb 2 |
| \& check process mysqld with pidfile /var/run/mysqld.pid |
| \& not every "* 0\-3 * * 0" |
| .Ve |
| .PP |
| Limitations: |
| .PP |
| The current test scheduler is poll cycle based. When Monit starts |
| testing and the service test is constraint with the \fIevery cron\fR |
| statement, it checks whether the current time match the |
| cron-string pattern. If it does, the test is done, otherwise it |
| is skipped. The cron specification thus does not guarantee when |
| exactly the test will run \- that depends on the default poll time |
| and the length of the testing cycle. In other words, we cannot |
| guarantee that Monit will run on a specific time. Therefor we |
| \&\fBstrongly\fR recommend to use an asterix in the minute field or at |
| minimum a range, e..g. 0\-15. \fBNever\fR use a specific minute as |
| Monit may not run on that minute. |
| .PP |
| We will address this limitation in a future release and convert |
| the test scheduler from serial polling into a parallel |
| non-blocking scheduler where checks are guaranteed to run on time |
| and with seconds resolution. |
| .SH "MONIT HTTPD" |
| .IX Header "MONIT HTTPD" |
| If specified in the control file, Monit will start a Monit daemon |
| with http support. From a Browser you can then start and stop |
| services, disable or enable service monitoring as well as view |
| the status of each service. Also, if Monit logs to its own file, |
| you can view the content of this logfile in a Browser. |
| .PP |
| The control file statement for starting a Monit daemon with http |
| support is a global set-statement: |
| .IP "set httpd port 2812" 4 |
| .IX Item "set httpd port 2812" |
| .PP |
| And you can use this \s-1URL\s0, \fIhttp://localhost:2812/\fR, to access |
| the daemon from a browser. The port number, in this case 2812, |
| can be any number that you are allowed to bind to. |
| .PP |
| If you have compiled Monit with openssl, you can also start the |
| httpd server with ssl support, using the following expression: |
| .PP |
| .Vb 3 |
| \& set httpd port 2812 |
| \& ssl enable |
| \& pemfile /etc/certs/monit.pem |
| .Ve |
| .PP |
| And you can use this \s-1URL\s0, \fIhttps://localhost:2812/\fR, to access |
| the Monit web server over an ssl encrypted connection. |
| .PP |
| The pemfile, in the example above, holds both the server's |
| private key and certificate. This file should be stored in a safe |
| place on the filesystem and should have strict permissions, that |
| is, no more than 0700. |
| .PP |
| In addition, if you want to check for client certificates you can |
| use the \s-1CLIENTPEMFILE\s0 statement. In this case, a connecting |
| client has to provided a certificate known by Monit in order to |
| connect. This file also needs to have all necessary \s-1CA\s0 |
| certificates. A configuration could look like: |
| .PP |
| .Vb 4 |
| \& set httpd port 2812 |
| \& ssl enable |
| \& pemfile /etc/certs/monit.pem |
| \& clientpemfile /etc/certs/monit\-client.pem |
| .Ve |
| .PP |
| By default self signed client certificates are not allowed. If |
| you want to use a self signed certificate from a client it has to |
| be allowed explicitly with the \s-1ALLOWSELFCERTIFICATION\s0 statement. |
| .PP |
| For more information on how to use Monit with \s-1SSL\s0 and for more |
| information about certificates and generating pem files, please |
| consult the \s-1README\s0.SSL file accompanying the software. |
| .PP |
| If you only want the http server to accept connect requests to |
| one host addresses you can specify the bind address either as an |
| \&\s-1IP\s0 number string or as a hostname. In the following example we |
| bind the http server to the loopback device. In other words the |
| http server will only be reachable from localhost: |
| .PP |
| .Vb 1 |
| \& set httpd port 2812 and use the address 127.0.0.1 |
| .Ve |
| .PP |
| or |
| .PP |
| .Vb 1 |
| \& set httpd port 2812 and use the address localhost |
| .Ve |
| .PP |
| If you do not use the \s-1ADDRESS\s0 statement the http server will |
| accept connections on any/all local addresses. |
| .PP |
| It is possible to hide monit's httpd server version, which |
| usually is available in httpd header responses and in error |
| pages. |
| .PP |
| .Vb 3 |
| \& set httpd port 2812 |
| \& ... |
| \& signature {enable|disable} |
| .Ve |
| .PP |
| Use \fIdisable\fR to hide the server signature \- Monit will only |
| report its name (e.g. 'monit' instead of for example 'monit |
| 4.2'). By default the version signature is enabled. It is worth |
| to stress that this option provides no security advantage and |
| falls into the \*(L"security through obscurity\*(R" category. |
| .PP |
| If you remove the httpd statement from the config file, monit |
| will stop the httpd server on configuration reload. Likewise if |
| you change the port number, Monit will restart the http server |
| using the new specified port number. |
| .PP |
| The status page displayed by the Monit web server is |
| automatically refreshed with the same poll time set for the monit |
| daemon. |
| .PP |
| \&\fBNote:\fR |
| .PP |
| We strongly recommend that you start Monit with http support (and |
| bind the server to localhost, only, unless you are behind a |
| firewall). The built-in web-server is small and does not use much |
| resources, and more \fIimportantly\fR, Monit can use the http server |
| for interprocess communication between a Monit client and a monit |
| daemon. |
| .PP |
| For instance, you \fImust\fR start a Monit daemon with http support |
| if you want to be able to use most of the available console |
| commands. I.e. 'Monit stop all', 'Monit start all' etc. |
| .PP |
| If a Monit daemon is running in the background we will ask the |
| daemon (via the \s-1HTTP\s0 protocol) to execute the above commands. |
| That is, the daemon is requested to start and stop the services. |
| This ensures that a daemon will not restart a service that you |
| requested to stop and that (any) timeout lock will be removed |
| from a service when you start it. |
| .SS "\s-1FIPS\s0 support" |
| .IX Subsection "FIPS support" |
| Monit built-in web-server supports the OpenSSL \s-1FIPS\s0 module. |
| To enable this mode, your OpenSSL library must first be built |
| with \s-1FIPS\s0 support. Then in the Monit control file, simply |
| add this \fIset\fR statement at the top; |
| .PP |
| .Vb 1 |
| \& set fips |
| .Ve |
| .PP |
| Note that the \s-1FIPS\s0 module may not be supported in the latest |
| version of OpenSSL. So make sure that your version of OpenSSL |
| support the \s-1FIPS\s0 object module before attempting to enable this |
| in Monit. |
| .SS "Monit \s-1HTTPD\s0 Authentication" |
| .IX Subsection "Monit HTTPD Authentication" |
| Monit supports two types of authentication schema's for |
| connecting to the httpd server, (three, if you count \s-1SSL\s0 client |
| certificate validation). Both schema's can be used together or by |
| itself. You \fBmust\fR choose at least one. |
| .PP |
| Host and network allow list |
| .IX Subsection "Host and network allow list" |
| .PP |
| The http server maintains an access-control list of hosts and |
| networks allowed to connect to the server. You can add as many |
| hosts as you want to, but only hosts with a valid domain name or |
| its \s-1IP\s0 address are allowed. Networks require a network \s-1IP\s0 and a |
| netmask to be accepted. |
| .PP |
| The http server will query a name server to check any hosts |
| connecting to the server. If a host (client) is trying to connect |
| to the server, but cannot be found in the access list or cannot |
| be resolved, the server will shutdown the connection to the |
| client promptly. |
| .PP |
| Control file example: |
| .PP |
| .Vb 6 |
| \& set httpd port 2812 |
| \& allow localhost |
| \& allow my.other.work.machine.com |
| \& allow 10.1.1.1 |
| \& allow 192.168.1.0/255.255.255.0 |
| \& allow 10.0.0.0/8 |
| .Ve |
| .PP |
| Clients, not mentioned in the allow list, trying to connect to |
| the server are logged with their ip-address. |
| .PP |
| Basic Authentication |
| .IX Subsection "Basic Authentication" |
| .PP |
| This authentication schema is \s-1HTTP\s0 specific and described in more |
| detail in \s-1RFC\s0 2617. |
| .PP |
| In short; a server challenge a client (e.g. a Browser) to send |
| authentication information (username and password) and if |
| accepted, the server will allow the client access to the |
| requested document. |
| .PP |
| The biggest weakness with Basic Authentication is that the |
| username and password is sent in clear-text (i.e. base64 encoded) |
| over the network. It is therefor recommended that you do not use |
| this authentication method unless you run the Monit http server |
| with \fIssl\fR support. With ssl support it is completely safe to |
| use Basic Authentication since \fBall\fR http data, including Basic |
| Authentication headers will be encrypted. |
| .PP |
| Monit will use Basic Authentication if an allow statement |
| contains a username and a password separated with a single ':' |
| character, like so: \fIallow username:password\fR. The username and |
| password must be written in clear-text. Special characters |
| can be used but the password has to be quoted. |
| .PP |
| \&\s-1PAM\s0 is supported as well on platforms which provide \s-1PAM\s0 (such |
| as Linux, Mac \s-1OS\s0 X, FreeBSD, NetBSD). The syntax is: |
| \&\fIallow \f(CI@mygroup\fI\fR which provides access to the user of group |
| called \fImygroup\fR. Monit uses \s-1PAM\s0 service called \fImonit\fR for |
| \&\s-1PAM\s0 authentication, see \s-1PAM\s0 manual page for detailed instructions |
| how to set the \s-1PAM\s0 service and \s-1PAM\s0 authentication plugins. |
| Example Monit \s-1PAM\s0 for Mac \s-1OS\s0 X \- /etc/pam.d/monit: |
| .PP |
| .Vb 5 |
| \& # monit: auth account password session |
| \& auth sufficient pam_securityserver.so |
| \& auth sufficient pam_unix.so |
| \& auth required pam_deny.so |
| \& account required pam_permit.so |
| .Ve |
| .PP |
| And configuration part for monitrc which allows only group admins |
| authenticated using via \s-1PAM\s0 to access the http interface: |
| .PP |
| .Vb 1 |
| \& set httpd port 2812 allow @admin |
| .Ve |
| .PP |
| Alternatively you can use files in \*(L"htpasswd\*(R" format (one |
| user:passwd entry per line), like so: \fIallow |
| [cleartext|crypt|md5] /path [users]\fR. By default cleartext |
| passwords are read. In case the passwords are digested it is |
| necessary to specify the cryptographic method. If you do not want |
| all users in the password file to have access to Monit you can |
| specify only those users that should have access, in the allow |
| statement. Otherwise all users are added. |
| .PP |
| Example1: |
| .PP |
| .Vb 3 |
| \& set httpd port 2812 |
| \& allow hauk:password |
| \& allow md5 /etc/httpd/htpasswd john paul ringo george |
| .Ve |
| .PP |
| If you use this method together with a host list, then only |
| clients from the listed hosts will be allowed to connect to the |
| Monit http server and each client will be asked to provide a |
| username and a password. |
| .PP |
| Example2: |
| .PP |
| .Vb 4 |
| \& set httpd port 2812 |
| \& allow localhost |
| \& allow 10.1.1.1 |
| \& allow hauk:"password" |
| .Ve |
| .PP |
| If you only want to use Basic Authentication, then just provide |
| allow entries with username and password or password files as in |
| example 1 above. |
| .PP |
| Finally it is possible to define some users as read-only. A |
| read-only user can read the Monit web pages but will \fInot\fR get |
| access to push-buttons and cannot change a service from the web |
| interface. |
| .PP |
| .Vb 5 |
| \& set httpd port 2812 |
| \& allow admin:password |
| \& allow hauk:password read\-only |
| \& allow @admins |
| \& allow @users read\-only |
| .Ve |
| .PP |
| A user is set to read-only by using the \fIread-only\fR keyword |
| \&\fBafter\fR username:password. In the above example the user \fIhauk\fR |
| is defined as a read-only user, while the \fIadmin\fR user has all |
| access rights. |
| .PP |
| If you use Basic Authentication it is a good idea to set the |
| access permission for the control file (~/.monitrc) to only |
| readable and writable for the user running monit, because the |
| password is written in clear-text. (Use this command, /bin/chmod |
| 600 ~/.monitrc). In fact, since Monit \fBversion 3.0\fR, Monit will |
| complain and exit if the control file is readable by others. |
| .PP |
| Clients trying to connect to the server but supply the wrong |
| username and/or password are logged with their ip-address. |
| .PP |
| If the Monit command line interface is being used, at least one |
| cleartext password is necessary. Otherwise, the Monit command |
| line interface will not be able to connect to the Monit daemon |
| server. |
| .SH "DEPENDENCIES" |
| .IX Header "DEPENDENCIES" |
| If specified in the control file, Monit can do dependency |
| checking before start, stop, monitoring or unmonitoring of |
| services. The dependency statement may be used within any service |
| entries in the Monit control file. |
| .PP |
| The syntax for the depend statement is simply: |
| .IP "\s-1DEPENDS\s0 on service[, service [,...]]" 4 |
| .IX Item "DEPENDS on service[, service [,...]]" |
| .PP |
| Where \fBservice\fR is a service entry name, for instance \fBapache\fR |
| or \fBdatafs\fR. |
| .PP |
| You may add more than one service name of any type or use more |
| than one depend statement in an entry. |
| .PP |
| Services specified in a \fIdepend\fR statement will be checked |
| during stop/start/monitor/unmonitor operations. If a service is |
| stopped or unmonitored it will stop/unmonitor any services that |
| depends on itself. Likewise, if a service is started, it will |
| first stop any services that depends on itself and after it is |
| started, start all depending services again. If the service is to |
| be monitored (enable monitoring), all services which this service |
| depends on will be monitored before enabling monitoring of this |
| service. |
| .PP |
| Here is an example where we set up an apache service entry to |
| depend on the underlying apache binary. If the binary should |
| change an alert is sent and apache is not monitored anymore. The |
| rationale is security and that Monit should not execute a |
| possibly cracked apache binary. |
| .PP |
| .Vb 7 |
| \& (1) check process apache |
| \& (2) with pidfile "/usr/local/apache/logs/httpd.pid" |
| \& (3) ... |
| \& (4) depends on httpd |
| \& (5) |
| \& (6) check file httpd with path /usr/local/apache/bin/httpd |
| \& (7) if failed checksum then unmonitor |
| .Ve |
| .PP |
| The first entry is the process entry for apache shown before |
| (abbreviated for clarity). The fourth line sets up a dependency |
| between this entry and the service entry named httpd in line 6. A |
| depend tree works as follows, if an action is conducted in a |
| lower branch it will propagate upward in the tree and for every |
| dependent entry execute the same action. In this case, if the |
| checksum should fail in line 7 then an unmonitor action is |
| executed and the apache binary is not checked anymore. But since |
| the apache process entry depends on the httpd entry this entry |
| will also execute the unmonitor action. In short, if the checksum |
| test for the httpd binary file should fail, both the check file |
| httpd entry and the check process apache entry is set in |
| un-monitoring mode. |
| .PP |
| A dependency tree is a general construct and can be used between |
| all types of service entries and span many levels and propagate |
| any supported action (except the exec action which will not |
| propagate upward in a dependency tree for obvious reasons). |
| .PP |
| Here is another different example. Consider the following common |
| server setup: |
| .PP |
| .Vb 2 |
| \& WEB\-SERVER \-> APPLICATION\-SERVER \-> DATABASE \-> FILESYSTEM |
| \& (a) (b) (c) (d) |
| .Ve |
| .PP |
| You can set dependencies so that the web-server depends on the |
| application server to run before the web-server starts and the |
| application server depends on the database server and the |
| database depends on the file-system to be mounted before it |
| starts. See also the example section below for examples using the |
| depend statement. |
| .PP |
| Here we describe how Monit will function with the above |
| dependencies: |
| .IP "If no servers are running" 4 |
| .IX Item "If no servers are running" |
| Monit will start the servers in the following order: \fId\fR, \fIc\fR, |
| \&\fIb\fR, \fIa\fR |
| .IP "If all servers are running" 4 |
| .IX Item "If all servers are running" |
| When you run 'Monit stop all' this is the stop order: \fIa\fR, \fIb\fR, |
| \&\fIc\fR, \fId\fR. If you run 'Monit stop d' then \fIa\fR, \fIb\fR and \fIc\fR |
| are also stopped because they depend on \fId\fR and finally \fId\fR is |
| stopped. |
| .IP "If \fIa\fR does not run" 4 |
| .IX Item "If a does not run" |
| When Monit runs it will start \fIa\fR |
| .IP "If \fIb\fR does not run" 4 |
| .IX Item "If b does not run" |
| When Monit runs it will first stop \fIa\fR then start \fIb\fR and |
| finally start \fIa\fR again. |
| .IP "If \fIc\fR does not run" 4 |
| .IX Item "If c does not run" |
| When Monit runs it will first stop \fIa\fR and \fIb\fR then start \fIc\fR |
| and finally start \fIb\fR then \fIa\fR. |
| .IP "If \fId\fR does not run" 4 |
| .IX Item "If d does not run" |
| When Monit runs it will first stop \fIa\fR, \fIb\fR and \fIc\fR then start |
| \&\fId\fR and finally start \fIc\fR, \fIb\fR then \fIa\fR. |
| .IP "If the control file contains a depend loop." 4 |
| .IX Item "If the control file contains a depend loop." |
| A depend loop is for example; a\->b and b\->a or a\->b\->c\->a. |
| .Sp |
| When Monit starts it will check for such loops and complain and |
| exit if a loop was found. It will also exit with a complaint if a |
| depend statement was used that does not point to a service in the |
| control file. |
| .SH "THE RUN CONTROL FILE" |
| .IX Header "THE RUN CONTROL FILE" |
| The preferred way to set up Monit is to write a \fI.monitrc\fR file |
| in your home directory. When there is a conflict between the |
| command-line arguments and the arguments in this file, the |
| command-line arguments take precedence. To protect the security |
| of your control file and passwords the control file must have |
| permissions \fIno more than 0700\fR (u=xrw,g=,o=); Monit will |
| complain and exit otherwise. |
| .SS "Run Control Syntax" |
| .IX Subsection "Run Control Syntax" |
| Comments begin with a '#' and extend through the end of the line. |
| Otherwise the file consists of a series of service entries or |
| global option statements in a free-format, token-oriented syntax. |
| .PP |
| There are three kinds of tokens: grammar , numbers (i.e. |
| decimal digit sequences) and strings. Strings can be either |
| quoted or unquoted. A quoted string is bounded by double quotes |
| and may contain whitespace (and quoted digits are treated as a |
| string). An unquoted string is any whitespace-delimited token, |
| containing characters and/or numbers. |
| .PP |
| On a semantic level, the control file consists of two types of |
| entries: |
| .IP "1. Global set-statements" 4 |
| .IX Item "1. Global set-statements" |
| A global set-statement starts with the keyword \fIset\fR and the |
| item to configure. |
| .IP "2. One or more service entry statements." 4 |
| .IX Item "2. One or more service entry statements." |
| Each service entry consists of the keywords `check', followed by |
| the service type. Each entry requires a <unique> descriptive |
| name, which may be freely chosen. This name is used by monit |
| to refer to the service internally and in all interactions |
| with the user. |
| .PP |
| Currently, eight types of check statements are supported: |
| .IP "1. \s-1CHECK\s0 \s-1PROCESS\s0 <unique name> <\s-1PIDFILE\s0 <path> | \s-1MATCHING\s0 <regex>>" 4 |
| .IX Item "1. CHECK PROCESS <unique name> <PIDFILE <path> | MATCHING <regex>>" |
| <path> is the absolute path to the program's pidfile. If the |
| pidfile does not exist or does not contain the pid number of a |
| running process, Monit will call the entry's start method if |
| defined. |
| <regex> is alternative process specification using pattern matching |
| to process name (command line) from process table instead of pidfile. |
| The first match is used so this form of check is useful for unique |
| pattern matching \- the pidfile should be used where possible as it |
| defines expected pid exactly (pattern matching won't be useful for |
| Apache in most cases for example). |
| The pattern can be obtained using \fImonit procmatch \*(L".*\*(R"\fR \s-1CLI\s0 command |
| which lists all processes visible to Monit or using the \fIps\fR utility. |
| The \*(L"procmatch\*(R" \s-1CLI\s0 command can be used to test your pattern as well. |
| If Monit runs in passive mode or the start methods is not defined, |
| Monit will just send alerts on errors. |
| .IP "2. \s-1CHECK\s0 \s-1FILE\s0 <unique name> \s-1PATH\s0 <path>" 4 |
| .IX Item "2. CHECK FILE <unique name> PATH <path>" |
| <path> is the absolute path to the file. If the file does not |
| exist or disappeared, Monit will call the entry's start method if |
| defined, if <path> does not point to a regular file type (for |
| instance a directory), Monit will disable monitoring of this |
| entry. If Monit runs in passive mode or the start methods is not |
| defined, Monit will just send alerts on errors. |
| .IP "3. \s-1CHECK\s0 \s-1FIFO\s0 <unique name> \s-1PATH\s0 <path>" 4 |
| .IX Item "3. CHECK FIFO <unique name> PATH <path>" |
| <path> is the absolute path to the fifo. If the fifo does not |
| exist or disappeared, Monit will call the entry's start method if |
| defined, if <path> does not point to a fifo type (for |
| instance a directory), Monit will disable monitoring of this |
| entry. If Monit runs in passive mode or the start methods is not |
| defined, Monit will just send alerts on errors. |
| .IP "4. \s-1CHECK\s0 \s-1FILESYSTEM\s0 <unique name> \s-1PATH\s0 <path>" 4 |
| .IX Item "4. CHECK FILESYSTEM <unique name> PATH <path>" |
| <path> is the path to the filesystem block special device, mount point, |
| file or a directory which is part of a filesystem. It is |
| recommended to use a block special file directly (for example |
| /dev/hda1 on Linux or /dev/dsk/c0t0d0s1 on Solaris, etc.) If you |
| use a mount point (for example /data), be careful, because if the |
| filesystem is unmounted the test will still be true because the mount |
| point exist. |
| .Sp |
| If the filesystem becomes unavailable, Monit will call the entry's |
| start method if defined. if <path> does not point to a filesystem, |
| Monit will disable monitoring of this entry. If Monit runs in |
| passive mode or the start methods is not defined, Monit will just |
| send alerts on errors. |
| .IP "5. \s-1CHECK\s0 \s-1DIRECTORY\s0 <unique name> \s-1PATH\s0 <path>" 4 |
| .IX Item "5. CHECK DIRECTORY <unique name> PATH <path>" |
| <path> is the absolute path to the directory. If the directory |
| does not exist or disappeared, Monit will call the entry's start |
| method if defined, if <path> does not point to a directory, monit |
| will disable monitoring of this entry. If Monit runs in passive |
| mode or the start methods is not defined, Monit will just send |
| alerts on errors. |
| .IP "6. \s-1CHECK\s0 \s-1HOST\s0 <unique name> \s-1ADDRESS\s0 <host address>" 4 |
| .IX Item "6. CHECK HOST <unique name> ADDRESS <host address>" |
| The host address can be specified as a hostname string or as an |
| ip-address string on a dotted decimal format. Such as, |
| tildeslash.com or \*(L"64.87.72.95\*(R". |
| .IP "7. \s-1CHECK\s0 \s-1SYSTEM\s0 <unique name>" 4 |
| .IX Item "7. CHECK SYSTEM <unique name>" |
| The system name is usually hostname, but any descriptive name can be |
| used. This test allows one to check general system resources such as |
| \&\s-1CPU\s0 usage (percent of time spent in user, system and wait), total |
| memory usage or load average. |
| .IP "8. \s-1CHECK\s0 \s-1PROGRAM\s0 <unique name> \s-1PATH\s0 <executable file>" 4 |
| .IX Item "8. CHECK PROGRAM <unique name> PATH <executable file>" |
| <path> is the absolute path to the executable program or script. |
| The \fIstatus\fR test allows to check the program's exit status. |
| .PP |
| You can use noise keywords like 'if', `and', `with(in)', `has', |
| `using', 'use', 'on(ly)', `usage' and `program(s)' anywhere in an |
| entry to make it resemble English. They're ignored, but can make |
| entries much easier to read at a glance. The punctuation |
| characters ';' ',' and '=' are also ignored. Keywords are case |
| insensitive. |
| .PP |
| .Vb 1 |
| \& Here are the legal global keywords: |
| \& |
| \& Keyword Function |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& set daemon Set a background poll interval in seconds. |
| \& set init Set Monit to run from init. Monit will not |
| \& transform itself into a daemon process. |
| \& set logfile Name of a file to dump error\- and status\- |
| \& messages to. If syslog is specified as the |
| \& file, Monit will utilize the syslog daemon |
| \& to log messages. This can optionally be |
| \& followed by \*(Aqfacility <facility>\*(Aq where |
| \& facility is \*(Aqlog_local0\*(Aq \- \*(Aqlog_local7\*(Aq or |
| \& \*(Aqlog_daemon\*(Aq. If no facility is specified, |
| \& LOG_USER is used. |
| \& set mailserver The mailserver used for sending alert |
| \& notifications. If the mailserver is not |
| \& defined, Monit will try to use \*(Aqlocalhost\*(Aq |
| \& as the smtp\-server for sending mail. You |
| \& can add more mail servers, if Monit cannot |
| \& connect to the first server it will try the |
| \& next server and so on. |
| \& set mail\-format Set a global mail format for all alert |
| \& messages emitted by monit. |
| \& set idfile Explicit set the location of the Monit id |
| \& file. E.g. set idfile /var/monit/id. |
| \& set pidfile Explicit set the location of the Monit lock |
| \& file. E.g. set pidfile /var/run/xyzmonit.pid. |
| \& set statefile Explicit set the location of the file Monit |
| \& will write state data to. If not set, the |
| \& default is $HOME/.monit.state. |
| \& set httpd port Activates Monit http server at the given |
| \& port number. |
| \& ssl enable Enables ssl support for the httpd server. |
| \& Requires the use of the pemfile statement. |
| \& ssl disable Disables ssl support for the httpd server. |
| \& It is equal to omitting any ssl statement. |
| \& pemfile Set the pemfile to be used with ssl. |
| \& clientpemfile Set the pemfile to be used when client |
| \& certificates should be checked by monit. |
| \& address If specified, the http server will only |
| \& accept connect requests to this addresses |
| \& This statement is an optional part of the |
| \& set httpd statement. |
| \& allow Specifies a host or IP address allowed to |
| \& connect to the http server. Can also specify |
| \& a username and password allowed to connect |
| \& to the server. More than one allow statement |
| \& are allowed. This statement is also an |
| \& optional part of the set httpd statement. |
| \& read\-only Set the user defined in username:password |
| \& to read only. A read\-only user cannot change |
| \& a service from the Monit web interface. |
| \& include include a file or files matching the globstring |
| \& |
| \& Here are the legal service entry keywords: |
| \& |
| \& Keyword Function |
| \& \-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\-\- |
| \& check Starts an entry and must be followed by the type |
| \& of monitored service {filesystem|directory|file|host |
| \& process|system|program} and a descriptive name for |
| \& the service. |
| \& pidfile Specify the process pidfile. Every |
| \& process must create a pidfile with its |
| \& current process id. This statement should only |
| \& be used in a process service entry. |
| \& path Must be followed by a path to the block |
| \& special file for filesystem, regular |
| \& file, directory or a process\*(Aqs pidfile. |
| \& group Specify a groupname for a service entry. |
| \& start The program used to start the specified |
| \& service. Full path is required. This |
| \& statement is optional, but recommended. |
| \& stop The program used to stop the specified |
| \& service. Full path is required. This |
| \& statement is optional, but recommended. |
| \& pid and ppid These keywords may be used as standalone |
| \& statements in a process service entry to |
| \& override the alert action for change of |
| \& process pid and ppid. |
| \& uid and gid These keywords are either 1) an optional part of |
| \& a start, stop or exec statement. They may be |
| \& used to specify a user id and a group id the |
| \& program (process) should switch to upon start. |
| \& This feature can only be used if the superuser |
| \& is running monit. 2) uid and gid may also be |
| \& used as standalone statements in a file service |
| \& entry to test a file\*(Aqs uid and gid attributes. |
| \& host The hostname or IP address to test the port |
| \& at. This keyword can only be used together |
| \& with a port statement or in the check host |
| \& statement. |
| \& port Specify a TCP/IP service port number which |
| \& a process is listening on. This statement |
| \& is also optional. If this statement is not |
| \& prefixed with a host\-statement, localhost is |
| \& used as the hostname to test the port at. |
| \& type Specifies the socket type Monit should use when |
| \& testing a connection to a port. If the type |
| \& keyword is omitted, tcp is used. This keyword |
| \& must be followed by either tcp, udp or tcpssl. |
| \& tcp Specifies that Monit should use a TCP |
| \& socket type (stream) when testing a port. |
| \& tcpssl Specifies that Monit should use a TCP socket |
| \& type (stream) and the secure socket layer (ssl) |
| \& when testing a port connection. |
| \& udp Specifies that Monit should use a UDP socket |
| \& type (datagram) when testing a port. |
| \& certmd5 The md5 sum of a certificate a ssl forged |
| \& server has to deliver. |
| \& proto(col) This keyword specifies the type of service |
| \& found at the port. See CONNECTION TESTING |
| \& for list of supported protocols. |
| \& You\*(Aqre welcome to write new protocol test |
| \& modules. If no protocol is specified Monit will |
| \& use a default test which in most cases are good |
| \& enough. |
| \& request Specifies a server request and must come |
| \& after the protocol keyword mentioned above. |
| \& \- for http it can contain an URL and an |
| \& optional query string. |
| \& \- other protocols does not support this |
| \& statement yet |
| \& send/expect These keywords specify a generic protocol. |
| \& Both require a string whether to be sent or |
| \& to be matched against (as extended regex if |
| \& supported). Send/expect can not be used |
| \& together with the proto(col) statement. |
| \& unix(socket) Specifies a Unix socket file and used like |
| \& the port statement above to test a Unix |
| \& domain network socket connection. |
| \& URL Specify an URL string which Monit will use for |
| \& connection testing. |
| \& content Optional sub\-statement for the URL statement. |
| \& Specifies that Monit should test the content |
| \& returned by the server against a regular |
| \& expression. |
| \& timeout x sec. Define a network port connection timeout. Must |
| \& be followed by a number in seconds and the |
| \& keyword, seconds. |
| \& timeout Define a service timeout. Must be followed by |
| \& two digits. The first digit is max number of |
| \& restarts for the service. The second digit |
| \& is the cycle interval to test restarts. |
| \& This statement is optional. |
| \& alert Specifies an email address for notification |
| \& if a service event occurs. Alert can also |
| \& be postfixed, to only send a message for |
| \& certain events. See the examples above. More |
| \& than one alert statement is allowed in an |
| \& entry. This statement is also optional. |
| \& noalert Specifies an email address which don\*(Aqt want |
| \& to receive alerts. This statement is also |
| \& optional. |
| \& restart, stop These keywords may be used as actions for |
| \& unmonitor, various test statements. The exec statement is |
| \& start and special in that it requires a following string |
| \& exec specifying the program to be execute. You may |
| \& also specify an UID and GID for the exec |
| \& statement. The program executed will then run |
| \& using the specified user id and group id. |
| \& mail\-format Specifies a mail format for an alert message |
| \& This statement is an optional part of the |
| \& alert statement. |
| \& checksum Specify that Monit should compute and monitor a |
| \& file\*(Aqs md5/sha1 checksum. May only be used in a |
| \& check file entry. |
| \& expect Specifies a md5/sha1 checksum string Monit |
| \& should expect when testing the checksum. This |
| \& statement is an optional part of the checksum |
| \& statement. |
| \& timestamp Specifies an expected timestamp for a file |
| \& or directory. More than one timestamp statement |
| \& are allowed. May only be used in a check file or |
| \& check directory entry. |
| \& changed Part of a timestamp statement and used as an |
| \& operator to simply test for a timestamp change. |
| \& every Validate this entry only at every n poll cycle |
| \& or per cron specification. Useful in daemon mode |
| \& when the cycle is short and a service takes some |
| \& time to start or to suppress monitoring during |
| \& backup windows. |
| \& mode Must be followed either by the keyword active, |
| \& passive or manual. If active, Monit will restart |
| \& the service if it is not running (this is the |
| \& default behavior). If passive, Monit will not |
| \& (re)start the service if it is not running \- it |
| \& will only monitor and send alerts (resource |
| \& related restart and stop options are ignored |
| \& in this mode also). If manual, Monit will enter |
| \& active mode only if a service was started under |
| \& monit\*(Aqs control otherwise the service isn\*(Aqt |
| \& monitored. |
| \& cpu Must be followed by a compare operator, a number |
| \& with "%" and an action. This statement is used |
| \& to check the cpu usage in percent of a process |
| \& with its children over a number of cycles. If |
| \& the compare expression matches then the |
| \& specified action is executed. |
| \& mem The equivalent to the cpu token for memory of a |
| \& process (w/o children!). This token must be |
| \& followed by a compare operator a number with |
| \& unit {B|KB|MB|GB|%|byte|kilobyte|megabyte| |
| \& gigabyte|percent} and an action. |
| \& swap Token for system swap usage monitoring. This token |
| \& must be followed by a compare operator a number with |
| \& unit {B|KB|MB|GB|%|byte|kilobyte|megabyte|gigabyte|percent} |
| \& and an action. |
| \& loadavg Must be followed by [1min,5min,15min] in (), a |
| \& compare operator, a number and an action. This |
| \& statement is used to check the system load |
| \& average over a number of cycles. If the compare |
| \& expression matches then the specified action is |
| \& executed. |
| \& children This is the number of child processes spawn by a |
| \& process. The syntax is the same as above. |
| \& totalmem The equivalent of mem, except totalmem is an |
| \& aggregation of memory, not only used by a |
| \& process but also by all its child |
| \& processes. The syntax is the same as above. |
| \& space Must be followed by a compare operator, a |
| \& number, unit {B|KB|MB|GB|%|byte|kilobyte| |
| \& megabyte|gigabyte|percent} and an action. |
| \& inode(s) Must be followed by a compare operator, integer |
| \& number, optionally by percent sign (if not, the |
| \& limit is absolute) and an action. |
| \& perm(ission) Must be followed by an octal number describing |
| \& the permissions. |
| \& size Must be followed by a compare operator, a |
| \& number, unit {B|KB|MB|GB|byte|kilobyte| |
| \& megabyte|gigabyte} and an action. |
| \& uptime Must be followed by a compare operator, a |
| \& number, unit {second(s)|minute(s)|hour(s)|day(s)} |
| \& and an action. |
| \& depends (on) Must be followed by the name of a service this |
| \& service depends on. |
| .Ve |
| .PP |
| Here's the complete list of reserved \fBkeywords\fR used by monit: |
| .PP |
| \&\fIif\fR, \fIthen\fR, \fIelse\fR, \fIset\fR, \fIdaemon\fR, \fIlogfile\fR, |
| \&\fIsyslog\fR, \fIaddress\fR, \fIhttpd\fR, \fIssl\fR, \fIenable\fR, \fIdisable\fR, |
| \&\fIpemfile\fR, \fIallow\fR, \fIread-only\fR, \fIcheck\fR, \fIinit\fR, \fIcount\fR, |
| \&\fIpidfile\fR, \fIstatefile\fR, \fIgroup\fR, \fIstart\fR, \fIstop\fR, \fIuid\fR, |
| \&\fIgid\fR, \fIconnection\fR, \fIport(number)\fR, \fIunix(socket)\fR, \fItype\fR, |
| \&\fIproto(col)\fR, \fItcp\fR, \fItcpssl\fR, \fIudp\fR, \fIalert\fR, \fInoalert\fR, |
| \&\fImail-format\fR, \fIrestart\fR, \fItimeout\fR, \fIchecksum\fR, \fIresource\fR, |
| \&\fIexpect\fR, \fIsend\fR, \fImailserver\fR, \fIevery\fR, \fImode\fR, \fIactive\fR, |
| \&\fIpassive\fR, \fImanual\fR, \fIdepends\fR, \fIhost\fR, \fIdefault\fR, \fIhttp\fR, |
| \&\fIftp\fR, \fIsmtp\fR, \fIpop\fR, \fIntp3\fR, \fInntp\fR, \fIimap\fR, \fIclamav\fR, |
| \&\fIssh\fR, \fIdwp\fR, \fIldap2\fR, \fIldap3\fR, \fItns\fR, \fIrequest\fR, \fIcpu\fR, |
| \&\fImem\fR, \fItotalmem\fR, \fIswap\fR, \fIchildren\fR, \fIloadavg\fR, \fItimestamp\fR, |
| \&\fIchanged\fR, \fIsecond(s)\fR, \fIminute(s)\fR, \fIhour(s)\fR, \fIday(s)\fR, |
| \&\fIspace\fR, \fIinode\fR, \fIpid\fR, \fIppid\fR, \fIperm(ission)\fR, \fIicmp\fR, |
| \&\fIprocess\fR, \fIfile\fR, \fIdirectory\fR, \fIfilesystem\fR, \fIsize\fR, \fIaction\fR, |
| \&\fIunmonitor\fR, \fIrdate\fR, \fIrsync\fR, \fIdata\fR, \fIinvalid\fR, \fIexec\fR, |
| \&\fInonexist\fR, \fIpolicy\fR, \fIreminder\fR, \fIinstance\fR, \fIeventqueue\fR, |
| \&\fIbasedir\fR, \fIslot(s)\fR, \fIsystem\fR, \fIidfile\fR, \fIgps\fR, \fIradius\fR, |
| \&\fIsecret\fR, \fItarget\fR, \fImaxforward\fR, \fIhostheader\fR, \fIregister\fR, |
| \&\fIcredentials\fR, \fIfips\fR, \fIstatus\fR, \fIuptime\fR and \fIfailed\fR |
| .PP |
| And here is a complete list of \fBnoise keywords\fR ignored by |
| monit: |
| .PP |
| \&\fIis\fR, \fIas\fR, \fIare\fR, \fIon(ly)\fR, \fIwith(in|out)\fR, \fIand\fR, \fIhas\fR, |
| \&\fIusing\fR, \fIuse\fR, \fIthe\fR, \fIsum\fR, \fIprogram(s)\fR, \fIthan\fR, \fIfor\fR, |
| \&\fIusage\fR, \fIwas\fR, \fIbut\fR, \fIof\fR. |
| .PP |
| \&\fBNote:\fR If the \fIstart\fR or \fIstop\fR programs are shell scripts, |
| then the script must begin with \f(CW\*(C`#!\*(C'\fR and the remainder of the |
| first line must specify an interpreter for the program. E.g. |
| \&\f(CW\*(C`#!/bin/sh\*(C'\fR |
| .PP |
| It's possible to write scripts directly into the \fIstart\fR and |
| \&\fIstop\fR entries by using a string of shell-commands. Like so: |
| .PP |
| .Vb 2 |
| \& start="/bin/bash \-c \*(Aqecho $$ > pidfile; exec program\*(Aq" |
| \& stop="/bin/bash \-c \*(Aqkill \-s SIGTERM \`cat pidfile\`\*(Aq" |
| .Ve |
| .SS "\s-1CONFIGURATION\s0 \s-1EXAMPLES\s0" |
| .IX Subsection "CONFIGURATION EXAMPLES" |
| The simplest form is just the check statement. In this example we |
| check to see if the server is running and log a message if not: |
| .PP |
| .Vb 1 |
| \& check process resin with pidfile /usr/local/resin/srun.pid |
| .Ve |
| .PP |
| Checking process without pidfile: |
| .PP |
| .Vb 1 |
| \& check process pager matching "/sbin/dynamic_pager \-F /private/var/vm/swapfile" |
| .Ve |
| .PP |
| To have Monit start the server if it's not running, add a start |
| statement: |
| .PP |
| .Vb 3 |
| \& check process resin with pidfile /usr/local/resin/srun.pid |
| \& start program = "/usr/local/resin/bin/srun.sh start" |
| \& stop program = "/usr/local/resin/bin/srun.sh stop" |
| .Ve |
| .PP |
| Here's a more advanced example for monitoring an apache |
| web-server listening on the default port number for \s-1HTTP\s0 and |
| \&\s-1HTTPS\s0. In this example Monit will restart apache if it's not |
| accepting connections at the port numbers. The method Monit use |
| for a process restart is to first execute the stop-program, wait |
| up to 30s for the process to stop and then execute the start-program |
| and wait up to 30s for it to start. The length of start or stop |
| timeout can be overridden using the 'timeout' option. If Monit was |
| unable to stop or start the service a failed alert message will |
| be sent if you have requested alert messages to be sent. |
| .PP |
| .Vb 5 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start program = "/etc/init.d/httpd start" with timeout 60 seconds |
| \& stop program = "/etc/init.d/httpd stop" |
| \& if failed port 80 then restart |
| \& if failed port 443 with timeout 15 seconds then restart |
| .Ve |
| .PP |
| This example demonstrate how you can run a program as a specified |
| user (uid) and with a specified group (gid). Many daemon programs |
| will do the uid and gid switch by them self, but for those |
| programs that does not (e.g. Java programs), monit's ability to |
| start a program as a certain user can be very useful. In this |
| example we start the Tomcat Java Servlet Engine as the standard |
| \&\fInobody\fR user and group. Please note that Monit will only switch |
| uid and gid for a program if the super-user is running monit, |
| otherwise Monit will simply ignore the request to change uid and |
| gid. |
| .PP |
| .Vb 7 |
| \& check process tomcat with pidfile /var/run/tomcat.pid |
| \& start program = "/etc/init.d/tomcat start" |
| \& as uid nobody and gid nobody |
| \& stop program = "/etc/init.d/tomcat stop" |
| \& # You can also use id numbers instead and write: |
| \& as uid 99 and with gid 99 |
| \& if failed port 8080 then alert |
| .Ve |
| .PP |
| In this example we use udp for connection testing to check if the |
| name-server is running and also use timeout and alert: |
| .PP |
| .Vb 5 |
| \& check process named with pidfile /var/run/named.pid |
| \& start program = "/etc/init.d/named start" |
| \& stop program = "/etc/init.d/named stop" |
| \& if failed port 53 use type udp protocol dns then restart |
| \& if 3 restarts within 5 cycles then timeout |
| .Ve |
| .PP |
| The following example illustrates how to check if the service |
| \&'sophie' is answering connections on its Unix domain socket: |
| .PP |
| .Vb 4 |
| \& check process sophie with pidfile /var/run/sophie.pid |
| \& start program = "/etc/init.d/sophie start" |
| \& stop program = "/etc/init.d/sophie stop" |
| \& if failed unix /var/run/sophie then restart |
| .Ve |
| .PP |
| In this example we check an apache web-server running on |
| localhost that answers for several IP-based virtual hosts or |
| vhosts, hence the host statement before port: |
| .PP |
| .Vb 7 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start "/etc/init.d/httpd start" |
| \& stop "/etc/init.d/httpd stop" |
| \& if failed host www.sol.no port 80 then alert |
| \& if failed host shop.sol.no port 443 then alert |
| \& if failed host chat.sol.no port 80 then alert |
| \& if failed host www.tildeslash.com port 80 then alert |
| .Ve |
| .PP |
| To make sure that Monit is communicating with a http server a |
| protocol test can be added: |
| .PP |
| .Vb 6 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start "/etc/init.d/httpd start" |
| \& stop "/etc/init.d/httpd stop" |
| \& if failed host www.sol.no port 80 |
| \& protocol HTTP |
| \& then alert |
| .Ve |
| .PP |
| This example shows a different way to check a webserver using |
| the send/expect mechanism: |
| .PP |
| .Vb 7 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start "/etc/init.d/httpd start" |
| \& stop "/etc/init.d/httpd stop" |
| \& if failed host www.sol.no port 80 |
| \& send "GET / HTTP/1.0\er\enHost: www.sol.no\er\en\er\en" |
| \& expect "HTTP/[0\-9\e.]{3} 200 .*\er\en" |
| \& then alert |
| .Ve |
| .PP |
| To make sure that Apache is logging successfully (i.e. no more |
| than 60 percent of child servers are logging), use its mod_status |
| page at www.sol.no/server\-status with this special protocol test: |
| .PP |
| .Vb 5 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start "/etc/init.d/httpd start" |
| \& stop "/etc/init.d/httpd stop" |
| \& if failed host www.sol.no port 80 |
| \& protocol apache\-status loglimit > 60% then restart |
| .Ve |
| .PP |
| This configuration can be used to alert you if 25 percent or more |
| of Apache child processes are stuck performing \s-1DNS\s0 lookups: |
| .PP |
| .Vb 5 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start "/etc/init.d/httpd start" |
| \& stop "/etc/init.d/httpd stop" |
| \& if failed host www.sol.no port 80 |
| \& protocol apache\-status dnslimit > 25% then alert |
| .Ve |
| .PP |
| Here we use an icmp ping test to check if a remote host is up and |
| if not send an alert: |
| .PP |
| .Vb 3 |
| \& check host www.tildeslash.com with address www.tildeslash.com |
| \& if failed icmp type echo count 5 with timeout 15 seconds |
| \& then alert |
| .Ve |
| .PP |
| In the following example we ask Monit to compute and verify the |
| checksum for the underlying apache binary used by the start and |
| stop programs. If the the checksum test should fail, monitoring |
| will be disabled to prevent possibly starting a compromised |
| binary: |
| .PP |
| .Vb 5 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start program = "/etc/init.d/httpd start" |
| \& stop program = "/etc/init.d/httpd stop" |
| \& if failed host www.tildeslash.com port 80 then restart |
| \& depends on apache_bin |
| \& |
| \& check file apache_bin with path /usr/local/apache/bin/httpd |
| \& if failed checksum then unmonitor |
| .Ve |
| .PP |
| In this example we ask Monit to test the checksum for a document |
| on a remote server. If the checksum was changed we send an alert: |
| .PP |
| .Vb 5 |
| \& check host tildeslash with address www.tildeslash.com |
| \& if failed port 80 protocol http |
| \& and request "/monit/dist/monit\-4.0.tar.gz" |
| \& with checksum f9d26b8393736b5dfad837bb13780786 |
| \& then alert |
| .Ve |
| .PP |
| Here are a couple of tests for some popular communication |
| servers, using the \s-1SIP\s0 protocol. First we test a FreeSWITCH |
| server and then an Asterisk server |
| .PP |
| .Vb 12 |
| \& check process freeswitch |
| \& with pidfile /usr/local/freeswitch/log/freeswitch.pid |
| \& start program = a\*^XX/usr/local/freeswitch/bin/freeswitch \-nc \-hpa\*^XX |
| \& stop program = a\*^XX/usr/local/freeswitch/bin/freeswitch \-stopa\*^XX |
| \& if totalmem > 1000.0 MB for 5 cycles then alert |
| \& if totalmem > 1500.0 MB for 5 cycles then alert |
| \& if totalmem > 2000.0 MB for 5 cycles then restart |
| \& if cpu > 60% for 5 cycles then alert |
| \& if failed port 5060 type udp protocol SIP |
| \& target me@foo.bar and maxforward 10 |
| \& then restart |
| \& if 5 restarts within 5 cycles then timeout |
| \& |
| \& check process asterisk |
| \& with pidfile /var/run/asterisk/asterisk.pid |
| \& start program = a\*^XX/usr/sbin/asteriska\*^XX |
| \& stop program = a\*^XX/usr/sbin/asterisk \-r \-x a\*^XXshutdown nowa\*^XXa\*^XX |
| \& if totalmem > 1000.0 MB for 5 cycles then alert |
| \& if totalmem > 1500.0 MB for 5 cycles then alert |
| \& if totalmem > 2000.0 MB for 5 cycles then restart |
| \& if cpu > 60% for 5 cycles then alert |
| \& if failed port 5060 type udp protocol SIP |
| \& and target me@foo.bar maxforward 10 |
| \& then restart |
| \& if 5 restarts within 5 cycles then timeout |
| .Ve |
| .PP |
| Some servers are slow starters, like for example Java based |
| Application Servers. So if we want to keep the poll-cycle low |
| (i.e. < 60 seconds) but allow some services to take its time to |
| start, the \fBevery\fR statement is handy: |
| .PP |
| .Vb 4 |
| \& check process dynamo with pidfile /etc/dynamo.pid every 2 cycles |
| \& start program = "/etc/init.d/dynamo start" |
| \& stop program = "/etc/init.d/dynamo stop" |
| \& if failed port 8840 then alert |
| .Ve |
| .PP |
| Here is an example where we group together two database entries |
| so you can manage them together, e.g.; 'Monit \-g database start |
| all'. The mode statement is also illustrated in the first entry |
| and have the effect that Monit will not try to (re)start this |
| service if it is not running: |
| .PP |
| .Vb 5 |
| \& check process sybase with pidfile /var/run/sybase.pid |
| \& start = "/etc/init.d/sybase start" |
| \& stop = "/etc/init.d/sybase stop" |
| \& mode passive |
| \& group database |
| \& |
| \& check process oracle with pidfile /var/run/oracle.pid |
| \& start program = "/etc/init.d/oracle start" |
| \& stop program = "/etc/init.d/oracle stop" |
| \& mode active # Not necessary really, since it\*(Aqs the default |
| \& if failed port 9001 then restart |
| \& group database |
| .Ve |
| .PP |
| Here is an example to show the usage of the resource checks. It |
| will send an alert when the \s-1CPU\s0 usage of the http daemon and its |
| child processes raises beyond 60% for over two cycles. Apache is |
| restarted if the \s-1CPU\s0 usage is over 80% for five cycles or the |
| memory usage over 100Mb for five cycles or if the machines load |
| average is more than 10 for 8 cycles: |
| .PP |
| .Vb 8 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start program = "/etc/init.d/httpd start" |
| \& stop program = "/etc/init.d/httpd stop" |
| \& if cpu > 40% for 2 cycles then alert |
| \& if totalcpu > 60% for 2 cycles then alert |
| \& if totalcpu > 80% for 5 cycles then restart |
| \& if mem > 100 MB for 5 cycles then stop |
| \& if loadavg(5min) greater than 10.0 for 8 cycles then stop |
| .Ve |
| .PP |
| This examples demonstrate the timestamp statement with exec and |
| how you may restart apache if its configuration file was |
| changed. |
| .PP |
| .Vb 3 |
| \& check file httpd.conf with path /etc/httpd/httpd.conf |
| \& if changed timestamp |
| \& then exec "/etc/init.d/httpd graceful" |
| .Ve |
| .PP |
| In this example we demonstrate usage of the extended alert |
| statement and a file check dependency: |
| .PP |
| .Vb 10 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start = "/etc/init.d/httpd start" |
| \& stop = "/etc/init.d/httpd stop" |
| \& alert admin@bar on {nonexist, timeout} |
| \& with mail\-format { |
| \& from: bofh@$HOST |
| \& subject: apache $EVENT \- $ACTION |
| \& message: This event occurred on $HOST at $DATE. |
| \& Your faithful employee, |
| \& monit |
| \& } |
| \& if failed host www.tildeslash.com port 80 then restart |
| \& if 3 restarts within 5 cycles then timeout |
| \& depend httpd_bin |
| \& group apache |
| \& |
| \& check file httpd_bin with path /usr/local/apache/bin/httpd |
| \& alert security@bar on {checksum, timestamp, |
| \& permission, uid, gid} |
| \& with mail\-format {subject: Alaaarrm! on $HOST} |
| \& if failed checksum |
| \& and expect 8f7f419955cefa0b33a2ba316cba3659 |
| \& then unmonitor |
| \& if failed permission 755 then unmonitor |
| \& if failed uid root then unmonitor |
| \& if failed gid root then unmonitor |
| \& if changed timestamp then alert |
| \& group apache |
| .Ve |
| .PP |
| In this example, we demonstrate usage of the depend statement. In |
| this case, we want to start oracle and apache. However, we've set |
| up apache to use oracle as a back end, and if oracle is |
| restarted, apache must be restarted as well. |
| .PP |
| .Vb 4 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start = "/etc/init.d/httpd start" |
| \& stop = "/etc/init.d/httpd stop" |
| \& depends on oracle |
| \& |
| \& check process oracle with pidfile /var/run/oracle.pid |
| \& start = "/etc/init.d/oracle start" |
| \& stop = "/etc/init.d/oracle stop" |
| \& if failed port 9001 then restart |
| .Ve |
| .PP |
| Next, we have 2 services, oracle-import and oracle-export that |
| need to be restarted if oracle is restarted, but are independent |
| of each other. |
| .PP |
| .Vb 4 |
| \& check process oracle with pidfile /var/run/oracle.pid |
| \& start = "/etc/init.d/oracle start" |
| \& stop = "/etc/init.d/oracle stop" |
| \& if failed port 9001 then restart |
| \& |
| \& check process oracle\-import |
| \& with pidfile /var/run/oracle\-import.pid |
| \& start = "/etc/init.d/oracle\-import start" |
| \& stop = "/etc/init.d/oracle\-import stop" |
| \& depends on oracle |
| \& |
| \& check process oracle\-export |
| \& with pidfile /var/run/oracle\-export.pid |
| \& start = "/etc/init.d/oracle\-export start" |
| \& stop = "/etc/init.d/oracle\-export stop" |
| \& depends on oracle |
| .Ve |
| .PP |
| Finally an example with all statements: |
| .PP |
| .Vb 10 |
| \& check process apache with pidfile /var/run/httpd.pid |
| \& start program = "/etc/init.d/httpd start" |
| \& stop program = "/etc/init.d/httpd stop" |
| \& if 3 restarts within 5 cycles then timeout |
| \& if failed host www.sol.no port 80 protocol http |
| \& and use the request "/login.cgi" |
| \& then alert |
| \& if failed host shop.sol.no port 443 type tcpssl |
| \& protocol http and with timeout 15 seconds |
| \& then restart |
| \& if cpu is greater than 60% for 2 cycles then alert |
| \& if cpu > 80% for 5 cycles then restart |
| \& if totalmem > 100 MB then stop |
| \& if children > 200 then alert |
| \& alert bofh@bar with mail\-format {from: monit@foo.bar.no} |
| \& every 2 cycles |
| \& mode active |
| \& depends on weblogic |
| \& depends on httpd.pid |
| \& depends on httpd.conf |
| \& depends on httpd_bin |
| \& depends on datafs |
| \& group server |
| \& |
| \& check file httpd.pid with path /usr/local/apache/logs/httpd.pid |
| \& group server |
| \& if timestamp > 7 days then restart |
| \& every 2 cycles |
| \& alert bofh@bar with mail\-format {from: monit@foo.bar.no} |
| \& depends on datafs |
| \& |
| \& check file httpd.conf with path /etc/httpd/httpd.conf |
| \& group server |
| \& if timestamp was changed |
| \& then exec "/usr/local/apache/bin/apachectl graceful" |
| \& every 2 cycles |
| \& alert bofh@bar with mail\-format {from: monit@foo.bar.no} |
| \& depends on datafs |
| \& |
| \& check file httpd_bin with path /usr/local/apache/bin/httpd |
| \& group server |
| \& if failed checksum and expect the sum |
| \& 8f7f419955cefa0b33a2ba316cba3659 then unmonitor |
| \& if failed permission 755 then unmonitor |
| \& if failed uid root then unmonitor |
| \& if failed gid root then unmonitor |
| \& if changed size then alert |
| \& if changed timestamp then alert |
| \& every 2 cycles |
| \& alert bofh@bar with mail\-format {from: monit@foo.bar.no} |
| \& alert foo@bar on { checksum, size, timestamp, uid, gid } |
| \& depends on datafs |
| \& |
| \& check filesystem datafs with path /dev/sdb1 |
| \& group server |
| \& start program = "/bin/mount /data" |
| \& stop program = "/bin/umount /data" |
| \& if failed permission 660 then unmonitor |
| \& if failed uid root then unmonitor |
| \& if failed gid disk then unmonitor |
| \& if space usage > 80 % then alert |
| \& if space usage > 94 % then stop |
| \& if inode usage > 80 % then alert |
| \& if inode usage > 94 % then stop |
| \& alert root@localhost |
| \& |
| \& check host ftp.redhat.com with address ftp.redhat.com |
| \& if failed icmp type echo with timeout 15 seconds |
| \& then alert |
| \& if failed port 21 protocol ftp |
| \& then exec "/usr/X11R6/bin/xmessage \-display |
| \& :0 ftp connection failed" |
| \& alert foo@bar.com |
| \& |
| \& check host www.gnu.org with address www.gnu.org |
| \& if failed port 80 protocol http |
| \& and request "/pub/gnu/bash/bash\-2.05b.tar.gz" |
| \& with checksum 8f7f419955cefa0b33a2ba316cba3659 |
| \& then alert |
| \& alert rms@gnu.org with mail\-format { |
| \& subject: The gnu server may be hacked again! } |
| .Ve |
| .PP |
| Note: only the \fBcheck statement\fR is mandatory, the other |
| statements are optional and the order of the optional statements |
| is not important. |
| .SH "FILES" |
| .IX Header "FILES" |
| \&\fI~/.monitrc\fR |
| Default run control file |
| .PP |
| \&\fI/etc/monitrc\fR |
| If the control file is not found in the default |
| location and /etc contains a \fImonitrc\fR file, this |
| file will be used instead. |
| .PP |
| \&\fI./monitrc\fR |
| If the control file is not found in either of the |
| previous two locations, and the current working |
| directory contains a \fImonitrc\fR file, this file is |
| used instead. |
| .PP |
| \&\fI~/.monit.pid\fR |
| Lock file to help prevent concurrent runs (non-root |
| mode). |
| .PP |
| \&\fI/var/run/monit.pid\fR |
| Lock file to help prevent concurrent runs (root mode, |
| Linux systems). |
| .PP |
| \&\fI/etc/monit.pid\fR |
| Lock file to help prevent concurrent runs (root mode, |
| systems without /var/run). |
| .PP |
| \&\fI~/.monit.state\fR |
| Monit saves its state to this file and utilizes |
| information found in this file to recover from |
| a crash. This is a binary file and its content is |
| only of interest to monit. You may set the location |
| of this file in the Monit control file or by using |
| the \-s switch when Monit is started. |
| .PP |
| \&\fI~/.monit.id\fR |
| Monit save its unique id to this file. |
| .SH "ENVIRONMENT" |
| .IX Header "ENVIRONMENT" |
| No environment variables are used by Monit. However, when Monit |
| execute a script or a program Monit will set several environment |
| variables which can be utilized by the executable. The following |
| and \fIonly\fR the following environment variables are available: |
| .IP "\s-1MONIT_EVENT\s0" 4 |
| .IX Item "MONIT_EVENT" |
| The event that occurred on the service |
| .IP "\s-1MONIT_DESCRIPTION\s0" 4 |
| .IX Item "MONIT_DESCRIPTION" |
| A description of the error condition |
| .IP "\s-1MONIT_SERVICE\s0" 4 |
| .IX Item "MONIT_SERVICE" |
| The name of the service (from monitrc) on which the event |
| occurred. |
| .IP "\s-1MONIT_DATE\s0" 4 |
| .IX Item "MONIT_DATE" |
| The time and date (rfc 822 style) the event occurred |
| .IP "\s-1MONIT_HOST\s0" 4 |
| .IX Item "MONIT_HOST" |
| The host the event occurred on |
| .PP |
| The following environment variables are only available for |
| process service entries: |
| .IP "\s-1MONIT_PROCESS_PID\s0" 4 |
| .IX Item "MONIT_PROCESS_PID" |
| The process pid. This may be 0 if the process was (re)started, |
| .IP "\s-1MONIT_PROCESS_MEMORY\s0" 4 |
| .IX Item "MONIT_PROCESS_MEMORY" |
| Process memory. This may be 0 if the process was (re)started, |
| .IP "\s-1MONIT_PROCESS_CHILDREN\s0" 4 |
| .IX Item "MONIT_PROCESS_CHILDREN" |
| Process children. This may be 0 if the process was (re)started, |
| .IP "\s-1MONIT_PROCESS_CPU_PERCENT\s0" 4 |
| .IX Item "MONIT_PROCESS_CPU_PERCENT" |
| Process cpu%. This may be 0 if the process was (re)started, |
| .PP |
| In addition the following spartan \s-1PATH\s0 environment variable is |
| available: |
| .IP "PATH=/bin:/usr/bin:/sbin:/usr/sbin" 4 |
|