Rename the WATCHDOG option to SW_WATCHDOG and make it use the

generic watchdoc(9) interface.

Make watchdogd(8) perform as watchdog(8) as well, and make it
possible to specify a check command to run, timeout and sleep
periods.

Update watchdog(4) to talk about the generic interface and add
new watchdog(8) page.
This commit is contained in:
Poul-Henning Kamp 2004-02-28 20:56:35 +00:00
parent 2676fbe88c
commit 4103b7652d
8 changed files with 283 additions and 122 deletions

View File

@ -1,4 +1,4 @@
.\" Copyright (c) 2004 Poul-Henning Kamp <phk@FreeBSD.org>
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
.\" All rights reserved.
.\"
@ -26,42 +26,49 @@
.\" $FreeBSD$
.\"
.Dd June 25, 2003
.Dt WATCHDOG 4
.Dt watchdog 4
.Os
.Sh NAME
.Nm watchdog
.Nd Software watchdog
.Nd Hardware and Software watchdog
.Sh SYNOPSIS
.Cd options WATCHDOG
.Cd options CPU_ELAN
.Cd options CPU_GEODE
.Cd options SW_WATCHDOG
.Pp
.In sys/watchdog.h
.Bd -literal
u_int u = WD_ACTIVE | WD_8SEC;
int fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
ioctl(fd, WDIOCPATPAT, &u);
.Ed
.Sh DESCRIPTION
.Nm
is a set of checks and routines which allow the implementation of a software
watchdog solution.
is a facility for controlling hardware and software watchdog facilities.
.Pp
The user interface for
.Nm
is implemented via a trio of sysctl OIDs.
When
.Li debug.watchdog.enabled
is set to a positive value,
.Nm
timeout checks are performed.
In order to keep the watchdog from triggering,
.Li debug.watchdog.reset
must be accessed,
by reading or writing,
within every
.Li debug.watchdog.timeout
seconds.
Failure to keep the
.Nm
updated will result in the kernel outputting interrupt counts,
backtraces,
and then attempting to enter
.Xr ddb 9 .
The interface is through a device named "/dev/" _PATH_WATCHDOG which
responds to a single ioctl call, WDIOCPATPAT.
.Pp
The ioctl call takes an argument which consists of a timeout value
specified an integer power of two nanoseconds.
.Pp
In the flag WD_ACTIVE signals that the watchdog will be kept from
timing out from userland, for instance by the
.Xr watchdogd 8
daemon.
.Pp
To disable the watchdogs, use an argument of zero.
.Pp
The ioctl call will return success if just one of the available
watchdog implementations support the request.
If the ioctl fails, for instance if no watchdog supports the timeout
length, all watchdogs are disabled and must be explicitly reenabled.
.Sh SEE ALSO
.Xr sysctl 8 ,
.Xr watchdogd 8
.Xr watchdogd 9
.Sh HISTORY
The
.Nm
@ -70,7 +77,12 @@ code first appeared in
.Sh AUTHORS
.An -nosplit
The
.Nm
.Nm watchdog
facility were written by
.An Poul-Henning Kamp Aq phk@FreeBSD.org .
.Pp
The
.Nm SW_WATCHDOG
code and manual page were written by
.An Sean Kelly Aq smkelly@FreeBSD.org .
Some contributions were made by

View File

@ -2089,11 +2089,9 @@ options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
options HW_WDOG
#
# Add software watchdog routines. This will add some sysctl OIDs that
# can be used in combination with an external daemon to create a
# software-based watchdog solution.
# Add software watchdog routines.
#
options WATCHDOG
options SW_WATCHDOG
#
# Disable swapping of upages and stack pages. This option removes all

View File

@ -153,9 +153,9 @@ SHMSEG opt_sysvipc.h
SYSVMSG opt_sysvipc.h
SYSVSEM opt_sysvipc.h
SYSVSHM opt_sysvipc.h
SW_WATCHDOG opt_watchdog.h
TTYHOG opt_tty.h
VFS_AIO
WATCHDOG opt_watchdog.h
WLCACHE opt_wavelan.h
WLDEBUG opt_wavelan.h

View File

@ -90,21 +90,14 @@ long cp_time[CPUSTATES];
SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
"LU", "CPU time statistics");
#ifdef WATCHDOG
static int sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS);
static void watchdog_fire(void);
#ifdef SW_WATCHDOG
#include <sys/watchdog.h>
static int watchdog_ticks;
static int watchdog_enabled;
static unsigned int watchdog_ticks;
static int watchdog_timeout = 20;
SYSCTL_NODE(_debug, OID_AUTO, watchdog, CTLFLAG_RW, 0, "System watchdog");
SYSCTL_INT(_debug_watchdog, OID_AUTO, enabled, CTLFLAG_RW, &watchdog_enabled,
0, "Enable the watchdog");
SYSCTL_INT(_debug_watchdog, OID_AUTO, timeout, CTLFLAG_RW, &watchdog_timeout,
0, "Timeout for watchdog checkins");
#endif /* WATCHDOG */
static void watchdog_fire(void);
static void watchdog_config(void *, u_int, int *);
#endif /* SW_WATCHDOG */
/*
* Clock handling routines.
@ -167,6 +160,9 @@ initclocks(dummy)
if (profhz == 0)
profhz = i;
psratio = profhz / i;
#ifdef SW_WATCHDOG
EVENTHANDLER_REGISTER(watchdog_list, watchdog_config, NULL, 0);
#endif
}
/*
@ -251,11 +247,10 @@ hardclock(frame)
if (need_softclock)
swi_sched(softclock_ih, 0);
#ifdef WATCHDOG
if (watchdog_enabled > 0 &&
(int)(ticks - watchdog_ticks) >= (hz * watchdog_timeout))
#ifdef SW_WATCHDOG
if (watchdog_enabled > 0 && --watchdog_ticks <= 0)
watchdog_fire();
#endif /* WATCHDOG */
#endif /* SW_WATCHDOG */
}
/*
@ -508,24 +503,25 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
0, 0, sysctl_kern_clockrate, "S,clockinfo",
"Rate and period of various kernel clocks");
#ifdef WATCHDOG
/*
* Reset the watchdog timer to ticks, thus preventing the watchdog
* from firing for another watchdog timeout period.
*/
static int
sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS)
#ifdef SW_WATCHDOG
static void
watchdog_config(void *unused __unused, u_int cmd, int *err)
{
int ret;
u_int u;
ret = 0;
watchdog_ticks = ticks;
return sysctl_handle_int(oidp, &ret, 0, req);
if (cmd) {
u = cmd & WD_INTERVAL;
if (u < WD_TO_1SEC)
return;
watchdog_ticks = (1 << (u - WD_TO_1SEC)) * hz;
watchdog_enabled = 1;
*err = 0;
} else {
watchdog_enabled = 0;
}
}
SYSCTL_PROC(_debug_watchdog, OID_AUTO, reset, CTLFLAG_RW, 0, 0,
sysctl_watchdog_reset, "I", "Reset the watchdog");
/*
* Handle a watchdog timeout by dumping interrupt information and
* then either dropping to DDB or panicing.
@ -560,4 +556,4 @@ watchdog_fire(void)
#endif /* DDB */
}
#endif /* WATCHDOG */
#endif /* SW_WATCHDOG */

View File

@ -1,7 +1,14 @@
# $FreeBSD$
PROG= watchdogd
MAN= watchdogd.8
LINKS= ${BINDIR}/watchdogd ${BINDIR}/watchdog
MAN= watchdogd.8 watchdog.8
WARNS?= 6
LDADD= -lm
DPADD= ${LIBM}
.include <bsd.prog.mk>
test: ${PROG}
./${PROG} -t 1.0

View File

@ -0,0 +1,66 @@
.\" Copyright (c) 2004 Poul-Henning Kamp <phk@FreeBSD.org>
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd February 28, 2004
.Dt WATCHDOG 8
.Os
.Sh NAME
.Nm watchdog
.Nd watchdog control program
.Sh SYNOPSIS
.Nm
.Op Fl d
.Op Fl t Ar timeout
.Sh DESCRIPTION
The
.Nm
utility can be used to control the kernels watchdog facility.
.Pp
The
.Fl t Ar timeout
specifies the desired timeout period in seconds, a value of
zero will disable the watchdog.
.Sh SEE ALSO
.Xr watchdogd 8 ,
.Xr watchdog 4 ,
.Xr watchdog 9 ,
.Sh AUTHORS
.An -nosplit
The
.Nm
utility and manual page were written by
.An Sean Kelly Aq smkelly@FreeBSD.org
and
.An Poul-Henning Kamp Aq phk@FreeBSD.org
.Pp
Some contributions made by
.An Jeff Roberson Aq jeff@FreeBSD.org .
.Sh HISTORY
The
.Nm
utility appeared in
.Fx 5.1 .

View File

@ -1,3 +1,4 @@
.\" Copyright (c) 2004 Poul-Henning Kamp <phk@FreeBSD.org>
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
.\" All rights reserved.
.\"
@ -29,21 +30,46 @@
.Os
.Sh NAME
.Nm watchdogd
.Nd Software watchdog daemon
.Nd Watchdog daemon
.Sh SYNOPSIS
.Nm
.Op Fl d
.Op Fl e Ar cmd
.Op Fl I Ar file
.Op Fl s Ar sleep
.Op Fl t Ar timeout
.Sh DESCRIPTION
The
.Nm
utility interfaces with the kernel's software watchdog facility to ensure
utility interfaces with the kernel's watchdog facility to ensure
that the system is in a working state.
If
.Nm
is unable to interface with the kernel over a specific timeout,
the kernel will take actions to assist in debugging or restarting the computer.
.Pp
If
.Fl e Ar cmd
is specified,
.Nm
will attempt to execute this command with
.Xr system 3
and only if the command returns with a zero exit code will the
watchdog be reset.
If
.Fl e Ar cmd
is not specified the daemon will perform a trivial filesystem
check instead.
.Pp
The
.Fl -s Ar sleep
argument can be used to control the sleep period between each execution
of the check and defaults to one second.
.Pp
The
.Fl -t Ar timeout
specifies the desired timeout period in seconds.
.Pp
One possible circumstance which will cause a watchdog timeout is an interrupt
storm.
If this occurs,
@ -80,13 +106,16 @@ will not fork into the background at startup.
.El
.Sh SEE ALSO
.Xr watchdog 4 ,
.Xr sysctl 8
.Xr watchdog 8 ,
.Xr watchdog 9 ,
.Sh AUTHORS
.An -nosplit
The
.Nm
utility and manual page were written by
.An Sean Kelly Aq smkelly@FreeBSD.org .
.An Sean Kelly Aq smkelly@FreeBSD.org
and
.An Poul-Henning Kamp Aq phk@FreeBSD.org .
.Pp
Some contributions made by
.An Jeff Roberson Aq jeff@FreeBSD.org .

View File

@ -35,12 +35,17 @@ __FBSDID("$FreeBSD$");
#include <sys/stat.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/watchdog.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <math.h>
#include <paths.h>
#include <signal.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
@ -49,7 +54,7 @@ static void sighandler(int);
static void watchdog_loop(void);
static int watchdog_init(void);
static int watchdog_onoff(int onoff);
static int watchdog_tickle(void);
static int watchdog_patpat(void);
static void usage(void);
int debugging = 0;
@ -57,6 +62,12 @@ int end_program = 0;
const char *pidfile = _PATH_VARRUN "watchdogd.pid";
int reset_mib[3];
size_t reset_miblen = 3;
u_int timeout = WD_TO_16SEC;
u_int passive = 0;
int is_daemon = 0;
int fd = -1;
int nap = 1;
char *test_cmd = NULL;
/*
* Periodically write to the debug.watchdog.reset sysctl OID
@ -81,30 +92,40 @@ main(int argc, char *argv[])
if (watchdog_init() == -1)
errx(EX_SOFTWARE, "unable to initialize watchdog");
if (watchdog_onoff(1) == -1)
exit(EX_SOFTWARE);
if (is_daemon) {
if (watchdog_onoff(1) == -1)
exit(EX_SOFTWARE);
if (debugging == 0 && daemon(0, 0) == -1) {
if (debugging == 0 && daemon(0, 0) == -1) {
watchdog_onoff(0);
err(EX_OSERR, "daemon");
}
signal(SIGHUP, SIG_IGN);
signal(SIGINT, sighandler);
signal(SIGTERM, sighandler);
fp = fopen(pidfile, "w");
if (fp != NULL) {
fprintf(fp, "%d\n", getpid());
fclose(fp);
}
watchdog_loop();
/* exiting */
watchdog_onoff(0);
err(EX_OSERR, "daemon");
unlink(pidfile);
return (EX_OK);
} else {
if (passive)
timeout |= WD_PASSIVE;
else
timeout |= WD_ACTIVE;
if (watchdog_patpat() < 0)
err(EX_OSERR, "patting the dog");
return (EX_OK);
}
signal(SIGHUP, SIG_IGN);
signal(SIGINT, sighandler);
signal(SIGTERM, sighandler);
fp = fopen(pidfile, "w");
if (fp != NULL) {
fprintf(fp, "%d\n", getpid());
fclose(fp);
}
watchdog_loop();
/* exiting */
watchdog_onoff(0);
unlink(pidfile);
return (EX_OK);
}
/*
@ -125,15 +146,12 @@ sighandler(int signum)
static int
watchdog_init()
{
int error;
error = sysctlnametomib("debug.watchdog.reset", reset_mib,
&reset_miblen);
if (error == -1) {
warn("could not find reset OID");
return (error);
}
return watchdog_tickle();
fd = open("/dev/" _PATH_WATCHDOG, O_RDWR);
if (fd >= 0)
return (0);
warn("Could not open watchdog device");
return (-1);
}
/*
@ -148,11 +166,14 @@ watchdog_loop(void)
while (end_program == 0) {
failed = 0;
failed = stat("/etc", &sb);
if (test_cmd != NULL)
failed = system(test_cmd);
else
failed = stat("/etc", &sb);
if (failed == 0)
watchdog_tickle();
sleep(1);
watchdog_patpat();
sleep(nap);
}
}
@ -161,10 +182,10 @@ watchdog_loop(void)
* to keep the watchdog from firing.
*/
int
watchdog_tickle(void)
watchdog_patpat(void)
{
return sysctl(reset_mib, reset_miblen, NULL, NULL, NULL, 0);
return ioctl(fd, WDIOCPATPAT, &timeout);
}
/*
@ -174,22 +195,12 @@ watchdog_tickle(void)
static int
watchdog_onoff(int onoff)
{
int mib[3];
int error;
size_t len;
len = 3;
error = sysctlnametomib("debug.watchdog.enabled", mib, &len);
if (error == 0)
error = sysctl(mib, len, NULL, NULL, &onoff, sizeof(onoff));
if (error == -1) {
warn("could not %s watchdog",
(onoff > 0) ? "enable" : "disable");
return (error);
}
return (0);
if (onoff)
timeout |= WD_ACTIVE;
else
timeout &= ~WD_ACTIVE;
return watchdog_patpat();
}
/*
@ -198,7 +209,10 @@ watchdog_onoff(int onoff)
static void
usage()
{
fprintf(stderr, "usage: watchdogd [-d] [-I file]\n");
if (is_daemon)
fprintf(stderr, "usage: watchdogd [-d] [-e cmd] [-I file]\n");
else
fprintf(stderr, "usage: watchdog [-d] [-t]\n");
exit(EX_USAGE);
}
@ -209,8 +223,14 @@ static void
parseargs(int argc, char *argv[])
{
int c;
char *p;
double a;
while ((c = getopt(argc, argv, "I:d?")) != -1) {
c = strlen(argv[0]);
if (argv[0][c - 1] == 'd')
is_daemon = 1;
while ((c = getopt(argc, argv,
is_daemon ? "I:de:s:t:?" : "dt:?")) != -1) {
switch (c) {
case 'I':
pidfile = optarg;
@ -218,10 +238,43 @@ parseargs(int argc, char *argv[])
case 'd':
debugging = 1;
break;
case 'e':
test_cmd = strdup(optarg);
break;
#ifdef notyet
case 'p':
passive = 1;
break;
#endif
case 's':
p = NULL;
errno = 0;
nap = strtol(optarg, &p, 0);
if ((p != NULL && *p != '\0') || errno != 0)
errx(EX_USAGE, "-s argument is not a number");
break;
case 't':
p = NULL;
errno = 0;
a = strtod(optarg, &p);
if ((p != NULL && *p != '\0') || errno != 0)
errx(EX_USAGE, "-t argument is not a number");
if (a < 0)
errx(EX_USAGE, "-t argument must be positive");
if (a == 0)
timeout = WD_TO_NEVER;
else
timeout = 1.0 + log(a * 1e9) / log(2.0);
if (debugging)
printf("Timeout is 2^%d nanoseconds\n",
timeout);
break;
case '?':
default:
usage();
/* NOTREACHED */
}
}
if (is_daemon && timeout < WD_TO_1SEC)
errx(EX_USAGE, "-t argument is less than one second.");
}