blob: 8acb1c1d3f2a24c287c316713dbeb30b236c08d2 [file] [log] [blame]
/* $Header: /cvsroot/watchdog/watchdog/src/watchdog.c,v 1.5 2009/02/25 09:38:18 meskes Exp $ */
/*************************************************************/
/* Original version was an example in the kernel source tree */
/* */
/* Most of the rest was written by me, Michael Meskes */
/* meskes@debian.org */
/* */
/*************************************************************/
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "extern.h"
#include <errno.h>
#include <fcntl.h>
#include <getopt.h>
#include <limits.h>
#include <netdb.h>
#include <sched.h>
#include <signal.h>
#include <stdlib.h>
#include <arpa/inet.h>
#include <sys/mman.h>
#include <sys/wait.h>
#include <sys/stat.h>
#include <sys/types.h>
#include <sys/ioctl.h>
#include <linux/oom.h>
#include <linux/watchdog.h>
#include <string.h>
#include <libgen.h>
#include <dirent.h>
#include <unistd.h>
#include "watch_err.h"
#if USE_SYSLOG
#include <syslog.h>
#endif /* USE_SYSLOG */
#include "watchdog_log.h"
#include "watchdog_service.h"
#if defined(BUILD_EUREKA)
#include "watchdog_upload_util.h"
#endif
static int no_act = FALSE;
#if USE_SYSLOG
int verbose = FALSE;
#endif /* USE_SYSLOG */
volatile sig_atomic_t _running = 1;
#define ADMIN "admin"
#define CHANGE "change"
#define DEVICE "watchdog-device"
#define DEVICE_TIMEOUT "watchdog-timeout"
#define FILENAME "file"
#define INTERFACE "interface"
#define INTERVAL "interval"
#define LOGTICK "logtick"
#define MAXLOAD1 "max-load-1"
#define MAXLOAD5 "max-load-5"
#define MAXLOAD15 "max-load-15"
#define MAXTEMP "max-temperature"
#define MINMEM "min-memory"
#define SERVERPIDFILE "pidfile"
#define PING "ping"
#define PINGCOUNT "ping-count"
#define PRIORITY "priority"
#define REALTIME "realtime"
#define REPAIRBIN "repair-binary"
#define REPAIRTIMEOUT "repair-timeout"
#define TEMP "temperature-device"
#define TESTBIN "test-binary"
#define TESTTIMEOUT "test-timeout"
#define HEARTBEAT "heartbeat-file"
#define HBSTAMPS "heartbeat-stamps"
#define LOGDIR "log-dir"
#define TESTDIR "test-directory"
#ifndef TESTBIN_PATH
#define TESTBIN_PATH NULL
#endif
pid_t pid;
int tint = 1, softboot = FALSE, watchdog = -1, load = -1, mem = -1, temp = -1;
int logtick = 1, ticker = 1, schedprio = 1;
int maxload1 = 0, maxload5 = 0, maxload15 = 0, minpages = 0;
int maxtemp = 120, hbstamps = 300, lastts, nrts;
int pingcount = 3;
int devtimeout = TIMER_MARGIN;
char *tempname = NULL, *devname = NULL, *admin = "root", *progname;
char *timestamps, *heartbeat;
time_t timeout = 0, rtimeout = 0;
FILE *hb;
char* logdir = "/var/log/watchdog";
char *filename_buf;
#if defined(_POSIX_MEMLOCK)
int mlocked = FALSE, realtime = FALSE;
#endif
static void usage(void)
{
fprintf(stderr, "%s version %d.%d, usage:\n", progname, MAJOR_VERSION, MINOR_VERSION);
#if USE_SYSLOG
fprintf(stderr, "%s [-F] [-f] [-c <config_file>] [-v] [-s] [-b] [-q]\n", progname);
#else /* USE_SYSLOG */
fprintf(stderr, "%s [-F] [-f] [-c <config_file>] [-s] [-b] [-q]\n", progname);
#endif /* USE_SYSLOG */
exit(1);
}
/* Try to sync */
static int sync_system(int sync_it)
{
if (sync_it) {
sync();
sync();
}
return (0);
}
/* execute repair binary */
static int repair(char *rbinary, int result, char *name, int version)
{
pid_t child_pid;
pid_t r_pid;
char parm[5];
int ret;
/* no binary given, we have to reboot */
if (rbinary == NULL)
return (result);
sprintf(parm, "%d", result);
child_pid = fork();
if (!child_pid) {
/* Don't want the stdin and stdout of our repair program
* to cause trouble.
* So make stdout and stderr go to their respective files */
strcpy(filename_buf, logdir);
strcat(filename_buf, "/repair-bin.stdout");
if (!freopen(filename_buf, "a+", stdout))
exit (errno);
strcpy(filename_buf, logdir);
strcat(filename_buf, "/repair-bin.stderr");
if (!freopen(filename_buf, "a+", stderr))
exit (errno);
/* now start binary */
if (version == 0) {
if (name == NULL)
execl(rbinary, rbinary, parm, NULL);
else
execl(rbinary, rbinary, parm, name, NULL);
} else /* if (version == 1) */ {
if (name == NULL)
execl(rbinary, rbinary, "repair", parm, NULL);
else
execl(rbinary, rbinary, "repair", parm, name, NULL);
}
/* execl should only return in case of an error */
/* so we return the reboot code */
return (errno);
} else if (child_pid < 0) { /* fork failed */
int err = errno;
if (errno == EAGAIN) { /* process table full */
#if USE_SYSLOG
syslog(LOG_ERR, "process table is full!");
#endif /* USE_SYSLOG */
return (EREBOOT);
} else if (softboot)
return (err);
else
return (ENOERR);
}
if (rtimeout > 0) {
time_t left = rtimeout;
do {
sleep (1);
r_pid = waitpid(child_pid, &result, WNOHANG);
if (r_pid)
break;
left--;
} while (left > 0);
} else
r_pid = waitpid(child_pid, &result, 0);
if (r_pid == 0) {
#if USE_SYSLOG
syslog(LOG_ERR, "repair child %d timed out", child_pid);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
return (EREBOOT);
} else if (r_pid != child_pid) {
int err = errno;
#if USE_SYSLOG
syslog(LOG_ERR, "child %d does not exist (errno = %d = '%m')", child_pid, err);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
if (softboot)
return (err);
}
/* check result */
ret = WEXITSTATUS(result);
if (ret != 0) {
#if USE_SYSLOG
syslog(LOG_ERR, "repair binary %s returned %d", rbinary, ret);
#endif /* USE_SYSLOG */
if (ret == ERESET) /* repair script says force hard reset, we give it a try */
sleep(devtimeout * 4);
/* for all other errors or if we still live, we let shutdown handle it */
return (ret);
}
return (ENOERR);
}
static void wd_action(int result, char *rbinary, char *name, int version)
{
/* if no-action flag set, do nothing */
/* no error, keep on working */
if (result == ENOERR || no_act == TRUE)
return;
/* error that might be repairable */
if (result != EREBOOT)
result = repair(rbinary, result, name, version);
#if defined(BUILD_EUREKA) && defined(DUMPSTATE_FILENAME)
if (result != ENOERR)
{
if (name && strlen(name) > 0) {
set_watchdog_reason(name);
} else {
set_watchdog_reason("unknown");
}
set_watchdog_upload_flag(1);
}
#endif
/* if still error, reboot */
if (result != ENOERR)
do_shutdown(result, false);
}
static void do_check(int res, char *rbinary, char *name)
{
wd_action(res, rbinary, name, 0);
wd_action(keep_alive(), rbinary, "keep-alive-do-check", 0);
}
static void do_check2(int res, char *r_specific, char *r_global, char *name)
{
wd_action(res, r_specific, name, 1);
wd_action(keep_alive(), r_global, "keep-alive-do-check2", 0);
}
/* Self-repairing binaries list */
struct list *tr_bin = NULL;
char *test_dir = TESTBIN_PATH;
struct list *file = NULL, *target = NULL, *pidfile = NULL, *iface = NULL;
char *tbinary, *rbinary;
static void add_list(struct list **list, char *name)
{
struct list *new, *act;
if ((new = (struct list *) calloc(1, sizeof(struct list))) == NULL) {
fprintf(stderr, "%s: out of memory\n", progname);
exit(1);
}
new->name = name;
memset((char *) (&(new->parameter)), '\0', sizeof(union wdog_options));
if (*list == NULL)
*list = new;
else {
for (act = *list; act->next != NULL; act = act->next);
act->next = new;
}
}
static int spool(char *line, int *i, int offset)
{
for ((*i) += offset; line[*i] == ' ' || line[*i] == '\t'; (*i)++);
if (line[*i] == '=')
(*i)++;
for (; line[*i] == ' ' || line[*i] == '\t'; (*i)++);
if (line[*i] == '\0')
return(1);
else
return(0);
}
/* default location of config file, fallback if it is not presnt */
#define CONFIG_FILENAME_DEFAULT "/tmp/watchdog/watchdog.conf"
static void read_config(char *configfile, char *progname)
{
FILE *wc;
int gotload5 = FALSE, gotload15 = FALSE;
if ((wc = fopen(CONFIG_FILENAME_DEFAULT, "r"))) {
fprintf(stderr, "use default config file \"%s\"", CONFIG_FILENAME_DEFAULT);
} else if ((wc = fopen(configfile, "r")) == NULL) {
fprintf(stderr, "%s: Can't open config file \"%s\": %s ", progname, configfile, strerror(errno));
exit(1);
}
while (!feof(wc)) {
char *line = NULL;
size_t n;
if (getline(&line, &n, wc) == -1) {
if (!ferror(wc))
break;
else {
perror(progname);
exit(1);
}
} else {
int i, j;
/* scan the actual line for an option */
/* first remove the leading blanks */
for (i = 0; line[i] == ' ' || line[i] == '\t'; i++);
/* if the next sign is a '#' we have a comment */
if (line[i] == '#')
continue;
/* also remove the trailing blanks and the \n */
for (j = strlen(line) - 1; line[j] == ' ' || line[j] == '\t' || line[j] == '\n'; j--);
line[j + 1] = '\0';
/* if the line is empty now, we don't have to parse it */
if (strlen(line + i) == 0)
continue;
/* now check for an option */
if (strncmp(line + i, FILENAME, strlen(FILENAME)) == 0) {
if (spool(line, &i, strlen(FILENAME)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
add_list(&file, strdup(line + i));
} else if (strncmp(line + i, CHANGE, strlen(CHANGE)) == 0) {
struct list *ptr;
if (spool(line, &i, strlen(CHANGE)))
continue;
if (!file) { /* no file entered yet */
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
continue;
}
for (ptr = file; ptr->next != NULL; ptr = ptr->next);
if (ptr->parameter.file.mtime != 0)
fprintf(stderr, "Duplicate change interval option in config file. Ignoring first entry.\n");
ptr->parameter.file.mtime = atoi(line + i);
} else if (strncmp(line + i, SERVERPIDFILE, strlen(SERVERPIDFILE)) == 0) {
if (spool(line, &i, strlen(SERVERPIDFILE)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
add_list(&pidfile, strdup(line + i));
} else if (strncmp(line + i, PINGCOUNT, strlen(PINGCOUNT)) == 0) {
if (spool(line, &i, strlen(PINGCOUNT)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
pingcount = atol(line + i);
} else if (strncmp(line + i, PING, strlen(PING)) == 0) {
if (spool(line, &i, strlen(PING)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
add_list(&target, strdup(line + i));
} else if (strncmp(line + i, INTERFACE, strlen(INTERFACE)) == 0) {
if (spool(line, &i, strlen(INTERFACE)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
add_list(&iface, strdup(line + i));
} else if (strncmp(line + i, REALTIME, strlen(REALTIME)) == 0) {
(void)spool(line, &i, strlen(REALTIME));
realtime = (strncmp(line + i, "yes", 3) == 0) ? TRUE : FALSE;
} else if (strncmp(line + i, PRIORITY, strlen(PRIORITY)) == 0) {
if (spool(line, &i, strlen(PRIORITY)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
schedprio = atol(line + i);
} else if (strncmp(line + i, REPAIRBIN, strlen(REPAIRBIN)) == 0) {
if (spool(line, &i, strlen(REPAIRBIN)))
rbinary = NULL;
else
rbinary = strdup(line + i);
} else if (strncmp(line + i, REPAIRTIMEOUT, strlen(REPAIRTIMEOUT)) == 0) {
if (spool(line, &i, strlen(REPAIRTIMEOUT)))
rtimeout = 0;
else
rtimeout = atol(line + i);
} else if (strncmp(line + i, TESTBIN, strlen(TESTBIN)) == 0) {
if (spool(line, &i, strlen(TESTBIN)))
tbinary = NULL;
else
tbinary = strdup(line + i);
} else if (strncmp(line + i, TESTTIMEOUT, strlen(TESTTIMEOUT)) == 0) {
if (spool(line, &i, strlen(TESTTIMEOUT)))
timeout = 0;
else
timeout = atol(line + i);
} else if (strncmp(line + i, HEARTBEAT, strlen(HEARTBEAT)) == 0) {
if (spool(line, &i, strlen(HEARTBEAT)))
heartbeat = NULL;
else
heartbeat = strdup(line + i);
} else if (strncmp(line + i, HBSTAMPS, strlen(HBSTAMPS)) == 0) {
if (spool(line, &i, strlen(HBSTAMPS)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
hbstamps = atol(line + i);
} else if (strncmp(line + i, ADMIN, strlen(ADMIN)) == 0) {
if (spool(line, &i, strlen(ADMIN)))
admin = NULL;
else
admin = strdup(line + i);
} else if (strncmp(line + i, INTERVAL, strlen(INTERVAL)) == 0) {
if (spool(line, &i, strlen(INTERVAL)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
tint = atol(line + i);
} else if (strncmp(line + i, LOGTICK, strlen(LOGTICK)) == 0) {
if (spool(line, &i, strlen(LOGTICK)))
logtick = ticker = 1;
else
logtick = ticker = atol(line + i);
} else if (strncmp(line + i, DEVICE, strlen(DEVICE)) == 0) {
if (spool(line, &i, strlen(DEVICE)))
devname = NULL;
else
devname = strdup(line + i);
} else if (strncmp(line + i, DEVICE_TIMEOUT, strlen(DEVICE_TIMEOUT)) == 0) {
if (spool(line, &i, strlen(DEVICE_TIMEOUT)))
fprintf(stderr, "Ignoring invalid line in config file: %s ", line);
else
devtimeout = atol(line + i);
} else if (strncmp(line + i, TEMP, strlen(TEMP)) == 0) {
if (spool(line, &i, strlen(TEMP)))
tempname = NULL;
else
tempname = strdup(line + i);
} else if (strncmp(line + i, MAXTEMP, strlen(MAXTEMP)) == 0) {
if (spool(line, &i, strlen(MAXTEMP)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
maxtemp = atol(line + i);
} else if (strncmp(line + i, MAXLOAD15, strlen(MAXLOAD15)) == 0) {
if (spool(line, &i, strlen(MAXLOAD15)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else {
maxload15 = atol(line + i);
gotload15 = TRUE;
}
} else if (strncmp(line + i, MAXLOAD1, strlen(MAXLOAD1)) == 0) {
if (spool(line, &i, strlen(MAXLOAD1)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else {
maxload1 = atol(line + i);
if (!gotload5)
maxload5 = maxload1 * 3 / 4;
if (!gotload15)
maxload15 = maxload1 / 2;
}
} else if (strncmp(line + i, MAXLOAD5, strlen(MAXLOAD5)) == 0) {
if (spool(line, &i, strlen(MAXLOAD5)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else {
maxload5 = atol(line + i);
gotload5 = TRUE;
}
} else if (strncmp(line + i, MINMEM, strlen(MINMEM)) == 0) {
if (spool(line, &i, strlen(MINMEM)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
minpages = atol(line + i);
} else if (strncmp(line + i, LOGDIR, strlen(LOGDIR)) == 0) {
if (spool(line, &i, strlen(LOGDIR)))
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
else
logdir = strdup(line + i);
} else if (strncmp(line + i, TESTDIR, strlen(TESTDIR)) == 0) {
if (spool(line, &i, strlen(TESTDIR)))
fprintf(stderr, "Ignoring invalid line in config file: %s ", line);
else
test_dir = strdup(line + i);
} else {
fprintf(stderr, "Ignoring invalid line in config file:\n%s\n", line);
}
}
}
if (fclose(wc) != 0) {
perror(progname);
exit(1);
}
}
static void add_test_binaries(const char *path)
{
DIR *d;
struct dirent dentry;
struct dirent *rdret;
struct stat sb;
int ret;
char fname[PATH_MAX];
char *fdup;
if (!path)
return;
ret = stat(path, &sb);
if (ret < 0)
return;
if (!S_ISDIR(sb.st_mode))
return;
d = opendir(path);
if (!d)
return;
do {
ret = readdir_r(d, &dentry, &rdret);
if (ret)
break;
if (rdret == NULL)
break;
ret = snprintf(fname, sizeof(fname), "%s/%s",
path, dentry.d_name);
if (ret >= sizeof(fname))
continue;
ret = stat(fname, &sb);
if (ret < 0)
continue;
if (!S_ISREG(sb.st_mode))
continue;
if (!(sb.st_mode & S_IXUSR))
continue;
if (!(sb.st_mode & S_IRUSR))
continue;
fdup = strdup(fname);
if (!fdup)
continue;
#ifdef HAVE_SYSLOG_H
syslog(LOG_DEBUG, "adding %s to list of auto-repair binaries",
fdup);
#endif
add_list(&tr_bin, fdup);
} while (1);
}
static void old_option(int c, char *configfile)
{
fprintf(stderr, "Option -%c is no longer valid, please specify it in %s.\n", c, configfile);
usage();
}
int main(int argc, char *const argv[])
{
FILE *fp;
int c, foreground = FALSE, force = FALSE, sync_it = FALSE;
int hold;
char *configfile = CONFIG_FILENAME;
struct list *act;
pid_t child_pid;
int oom_adjusted = 0;
struct stat s;
#if USE_SYSLOG
char *opts = "d:i:n:Ffsvbql:p:t:c:r:m:a:";
struct option long_options[] =
{
{"config-file", required_argument, NULL, 'c'},
{"foreground", no_argument, NULL, 'F'},
{"force", no_argument, NULL, 'f'},
{"sync", no_argument, NULL, 's'},
{"no-action", no_argument, NULL, 'q'},
{"verbose", no_argument, NULL, 'v'},
{"softboot", no_argument, NULL, 'b'},
{NULL, 0, NULL, 0}
};
long count = 0L;
struct watchdog_info ident;
#else /* USE_SYSLOG */
char *opts = "d:i:n:Ffsbql:p:t:c:r:m:a:";
struct option long_options[] =
{
{"config-file", required_argument, NULL, 'c'},
{"foreground", no_argument, NULL, 'F'},
{"force", no_argument, NULL, 'f'},
{"sync", no_argument, NULL, 's'},
{"no-action", no_argument, NULL, 'q'},
{"softboot", no_argument, NULL, 'b'},
{NULL, 0, NULL, 0}
};
#endif /* USE_SYSLOG */
progname = basename(argv[0]);
/* check the options */
/* there aren't that many any more */
while ((c = getopt_long(argc, argv, opts, long_options, NULL)) != EOF) {
if (c == -1)
break;
switch (c) {
case 'n':
case 'p':
case 'a':
case 'r':
case 'd':
case 't':
case 'l':
case 'm':
case 'i':
old_option(c, configfile);
break;
case 'c':
configfile = optarg;
break;
case 'F':
foreground = TRUE;
break;
case 'f':
force = TRUE;
break;
case 's':
sync_it = TRUE;
break;
case 'b':
softboot = TRUE;
break;
case 'q':
no_act = TRUE;
break;
#if USE_SYSLOG
case 'v':
verbose = TRUE;
break;
#endif /* USE_SYSLOG */
default:
usage();
}
}
#if defined(BUILD_EUREKA)
// Before watchdog reboots the system, it writes dumpstate
// and sets the system property "persist.watchdog.upload".
// Check the system property to see if there's a need to
// setup an upload.
setup_watchdog_upload();
#endif
read_config(configfile, progname);
add_test_binaries(test_dir);
if (tint < 0)
usage();
if (tint >= devtimeout && !force) {
fprintf(stderr, "%s error:\n", progname);
fprintf(stderr, "This interval length might reboot the system while the process sleeps!\n");
fprintf(stderr, "To force this interval length use the -f option.\n");
exit(1);
}
if (maxload1 > 0 && maxload1 < MINLOAD && !force) {
fprintf(stderr, "%s error:\n", progname);
fprintf(stderr, "Using this maximal load average might reboot the system too often!\n");
fprintf(stderr, "To force this load average use the -f option.\n");
exit(1);
}
/* make sure we get our own log directory */
if (mkdir (logdir, 0750) && errno != EEXIST) {
fprintf(stderr, "%s error:\n", progname);
fprintf(stderr, "Cannot create directory %s\n", logdir);
exit (1);
}
/* set up pinging if in ping mode */
if (target != NULL) {
for (act = target; act != NULL; act = act->next) {
struct protoent *proto;
struct pingmode *net = (struct pingmode *) calloc(1, sizeof(struct pingmode));
if (net == NULL) {
fprintf(stderr, "%s: out of memory\n", progname);
exit(1);
}
/* setup the socket */
memset(&(net->to), 0, sizeof(struct sockaddr));
((struct sockaddr_in *) &(net->to))->sin_family = AF_INET;
if ((((struct sockaddr_in *) &(net->to))->sin_addr.s_addr = inet_addr(act->name)) == (unsigned int) -1) {
(void) fprintf(stderr, "%s: unknown host %s\n", progname, act->name);
exit(1);
}
if (!(net->packet = (unsigned char *) malloc((unsigned int) (DATALEN + MAXIPLEN + MAXICMPLEN)))) {
fprintf(stderr, "%s: out of memory\n", progname);
exit(1);
}
if (!(proto = getprotobyname("icmp"))) {
(void) fprintf(stderr, "%s: unknown protocol icmp.\n", progname);
exit(1);
}
if ((net->sock_fp = socket(AF_INET, SOCK_RAW, proto->p_proto)) < 0
|| fcntl(net->sock_fp, F_SETFD, 1)) {
perror(progname);
exit(1);
}
/* this is necessary for broadcast pings to work */
(void) setsockopt(net->sock_fp, SOL_SOCKET, SO_BROADCAST, (char *)&hold, sizeof(hold));
hold = 48 * 1024;
(void) setsockopt(net->sock_fp, SOL_SOCKET, SO_RCVBUF, (char *) &hold,
sizeof(hold));
act->parameter.net = *net;
}
}
/* make sure we're on the root partition */
if (chdir("/") < 0) {
perror(progname);
exit(1);
}
/* allocate some memory to store a filename, this is needed later on even
* if the system runs out of memory */
filename_buf = (char*)malloc(strlen(logdir) + sizeof("/repair-bin.stdout") + 1);
if (!filename_buf) {
perror(progname);
exit(1);
}
#if !defined(DEBUG)
if ( ! foreground ) {
/* fork to go into the background */
if ((child_pid = fork()) < 0) {
perror(progname);
exit(1);
} else if (child_pid > 0) {
/* fork was okay */
/* wait for child to exit */
if (waitpid(child_pid, NULL, 0) != child_pid) {
perror(progname);
exit(1);
}
/* and exit myself */
exit(0);
}
/* and fork again to make sure we inherit all rights from init */
if ((child_pid = fork()) < 0) {
perror(progname);
exit(1);
} else if (child_pid > 0)
exit(0);
/* now we're free */
#if USE_SYSLOG
/* Okay, we're a daemon */
/* but we're still attached to the tty */
/* create our own session */
setsid();
/* with USE_SYSLOG we don't do any console IO */
close(0);
close(1);
close(2);
#endif /* USE_SYSLOG */
}
#endif /* !DEBUG */
#if USE_SYSLOG
/* Log the starting message */
openlog(progname, LOG_PID, LOG_DAEMON);
syslog(LOG_INFO, "starting daemon (%d.%d):", MAJOR_VERSION, MINOR_VERSION);
syslog(LOG_INFO, "int=%ds realtime=%s sync=%s soft=%s mla=%d mem=%d",
tint,
realtime ? "yes" : "no",
sync_it ? "yes" : "no",
softboot ? "yes" : "no",
maxload1, minpages);
if (target == NULL)
syslog(LOG_INFO, "ping: no machine to check");
else
for (act = target; act != NULL; act = act->next)
syslog(LOG_INFO, "ping: %s", act->name);
if (file == NULL)
syslog(LOG_INFO, "file: no file to check");
else
for (act = file; act != NULL; act = act->next)
syslog(LOG_INFO, "file: %s:%d", act->name, act->parameter.file.mtime);
if (pidfile == NULL)
syslog(LOG_INFO, "pidfile: no server process to check");
else
for (act = pidfile; act != NULL; act = act->next)
syslog(LOG_INFO, "pidfile: %s", act->name);
if (iface == NULL)
syslog(LOG_INFO, "interface: no interface to check");
else
for (act = iface; act != NULL; act = act->next)
syslog(LOG_INFO, "interface: %s", act->name);
syslog(LOG_INFO, "test=%s(%ld) repair=%s(%ld) alive=%s heartbeat=%s temp=%s to=%s no_act=%s",
(tbinary == NULL) ? "none" : tbinary, timeout,
(rbinary == NULL) ? "none" : rbinary, rtimeout,
(devname == NULL) ? "none" : devname,
(heartbeat == NULL) ? "none" : heartbeat,
(tempname == NULL) ? "none" : tempname,
(admin == NULL) ? "noone" : admin,
(no_act == TRUE) ? "yes" : "no");
#endif /* USE_SYSLOG */
#ifdef EVENT_FILENAME
watchdog_event_log_init();
#endif
/* open the device */
if (devname != NULL && no_act == FALSE) {
watchdog = open(devname, O_WRONLY);
if (watchdog == -1) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", devname, errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
/* do not exit here per default */
/* we can use watchdog even if there is no watchdog device */
}
if (watchdog >= 0) {
if (devtimeout > 0) {
/* Set the watchdog hard-stop timeout; default = unset (use
driver default) */
if (ioctl(watchdog, WDIOC_SETTIMEOUT, &devtimeout) < 0) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot set timeout %d (errno = %d = '%m')", devtimeout, errno);
#else
perror(progname);
#endif
}
}
#if USE_SYSLOG
/* Also log watchdog identity */
if (ioctl(watchdog, WDIOC_GETSUPPORT, &ident) < 0) {
syslog(LOG_ERR, "cannot get watchdog identity (errno = %d = '%m')", errno);
}
else {
ident.identity[sizeof(ident.identity) - 1] = '\0'; /* Be sure */
syslog(LOG_INFO, "hardware wartchdog identity: %s", ident.identity);
}
#endif
}
}
/* MJ 16/2/2000, need to keep track of the watchdog writes so that
I can have a potted history of recent reboots */
if ( heartbeat != NULL ) {
hb = ((hb = fopen(heartbeat, "r+")) == NULL) ? fopen(heartbeat, "w+") : hb;
if ( hb == NULL ) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", heartbeat, errno);
#else
perror(progname);
#endif
}
else {
char rbuf[TS_SIZE + 1];
/* Allocate memory for keeping the timestamps in */
nrts = 0;
lastts = 0;
timestamps = (char *) calloc(hbstamps, TS_SIZE);
if ( timestamps == NULL ) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot allocate memory for timestamps (errno = %d = '%m')", errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
}
else {
/* read any previous timestamps */
rewind(hb);
while ( fgets(rbuf, TS_SIZE + 1, hb) != NULL ) {
memcpy(timestamps + (TS_SIZE * lastts), rbuf, TS_SIZE);
if (nrts < hbstamps)
nrts++;
++lastts;
lastts = lastts % hbstamps;
}
/* Write an indication that the watchdog has started to the heartbeat file */
/* copy it to the buffer */
sprintf(rbuf, "%*s\n", TS_SIZE - 1, "--restart--");
memcpy(timestamps + (lastts * TS_SIZE), rbuf, TS_SIZE);
// success
if (nrts < hbstamps)
nrts++;
++lastts;
lastts = lastts % hbstamps;
}
}
}
if (maxload1 > 0) {
/* open the load average file */
load = open("/proc/loadavg", O_RDONLY);
if (load == -1) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot open /proc/loadavg (errno = %d = '%m')", errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
}
}
if (minpages > 0) {
/* open the memory info file */
mem = open("/proc/meminfo", O_RDONLY);
if (mem == -1) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot open /proc/meminfo (errno = %d = '%m')", errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
}
}
if (tempname != NULL && no_act == FALSE) {
/* open the temperature file */
temp = open(tempname, O_RDONLY);
if (temp == -1) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot open %s (errno = %d = '%m')", tempname, errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
}
}
/* init reboot service */
if (reboot_service_init()) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot init reboot service %s", strerror(errno));
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
}
/* tuck my process id away */
pid = getpid();
fp = fopen(PIDFILE, "w");
if (fp != NULL) {
fprintf(fp, "%d\n", pid);
(void) fclose(fp);
}
/* set signal term to set our run flag to 0 so that */
/* we make sure watchdog device is closed when receiving SIGTERM */
signal(SIGTERM, sigterm_handler);
#if defined(_POSIX_MEMLOCK)
if (realtime == TRUE) {
/* for Eureka, do not call mlockall(), but keep realtime scheduler */
#if !defined(BUILD_EUREKA)
/* lock all actual and future pages into memory */
if (mlockall(MCL_CURRENT | MCL_FUTURE) != 0) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot lock realtime memory (errno = %d = '%m')", errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
} else {
#endif // !BUILD_EUREKA
struct sched_param sp;
/* now set the scheduler */
sp.sched_priority = schedprio;
if (sched_setscheduler(0, SCHED_RR, &sp) != 0) {
#if USE_SYSLOG
syslog(LOG_ERR, "cannot set scheduler (errno = %d = '%m')", errno);
#else /* USE_SYSLOG */
perror(progname);
#endif /* USE_SYSLOG */
} else
mlocked = TRUE;
#if !defined(BUILD_EUREKA)
}
#endif // !BUILD_EUREKA
}
#endif
/* tell oom killer to not kill this process */
#ifdef OOM_SCORE_ADJ_MIN
if ( ! stat("/proc/self/oom_score_adj", &s) ) {
fp = fopen("/proc/self/oom_score_adj", "w");
if (fp) {
fprintf(fp, "%d\n", OOM_SCORE_ADJ_MIN);
(void) fclose(fp);
oom_adjusted = 1;
}
}
#endif
#ifdef OOM_DISABLE
if ( ! oom_adjusted ) {
if ( ! stat("/proc/self/oom_adj", &s) ) {
fp = fopen("/proc/self/oom_adj", "w");
if (fp) {
fprintf(fp, "%d\n", OOM_DISABLE);
(void) fclose(fp);
oom_adjusted = 1;
}
}
}
#endif
#if USE_SYSLOG
if ( ! oom_adjusted ) {
syslog(LOG_WARNING, "unable to disable oom handling!");
}
#endif /* USE_SYSLOG */
/* main loop: update after <tint> seconds */
while (_running) {
wd_action(keep_alive(), rbinary, "keep-alive-main", 0);
/* sync system if we have to */
do_check(sync_system(sync_it), rbinary, "sync-system");
/* check file table */
do_check(check_file_table(), rbinary, "check-file-table");
/* check load average */
do_check(check_load(), rbinary, "check-load-average");
/* check free memory */
do_check(check_memory(), rbinary, "check-free-memory");
/* check temperature */
do_check(check_temp(), rbinary, "check-temperature");
/* in filemode stat file */
for (act = file; act != NULL; act = act->next)
do_check(check_file_stat(act), rbinary, act->name);
/* in pidmode kill -0 processes */
for (act = pidfile; act != NULL; act = act->next)
do_check(check_pidfile(act), rbinary, act->name);
/* in network mode check the given devices for input */
for (act = iface; act != NULL; act = act->next)
do_check(check_iface(act), rbinary, act->name);
/* in ping mode ping the ip address */
for (act = target; act != NULL; act = act->next)
do_check(check_net(act->name, act->parameter.net.sock_fp, act->parameter.net.to, act->parameter.net.packet, tint , pingcount), rbinary, act->name);
/* in user mode execute the given binary or just test fork() call */
do_check(check_bin(tbinary, timeout, 0), rbinary, "test-binary");
#ifdef TESTBIN_PATH
/* test/repair binaries in the watchdog.d directory */
for (act = tr_bin; act != NULL; act = act->next)
/* Use version 1 for testbin-path */
do_check2(check_bin(act->name, timeout, 1), act->name, rbinary, "test-binary2");
#endif
/* do custom check */
do_check(custom_check(), rbinary, "custom");
/* finally sleep some seconds */
/* this should make watchdog sleep tint seconds alltogther */
for (int i = 0; i < tint*2; ++i) {
if (is_reboot_requested())
do_shutdown(EREBOOT, true);
usleep(500000);
}
#if USE_SYSLOG
/* do verbose logging */
if (verbose && logtick && (--ticker == 0)) {
ticker = logtick;
count += logtick;
syslog(LOG_INFO, "still alive after %ld interval(s)", count);
}
#endif /* USE_SYSLOG */
if (is_reboot_requested())
do_shutdown(EREBOOT, true);
}
terminate();
/* not reached */
exit (EXIT_SUCCESS);
}