- Add a software watchdog facility.

This commit has two pieces. One half is the watchdog kernel code which lives
primarily in hardclock() in sys/kern/kern_clock.c. The other half is a userland
daemon which, when run, will keep the watchdog from firing while the userland
is intact and functioning.

Approved by:	jeff (mentor)
This commit is contained in:
Sean Kelly 2003-06-26 09:50:52 +00:00
parent b1d6647755
commit 370c3cb57c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=116874
12 changed files with 560 additions and 2 deletions

View File

@ -430,6 +430,7 @@ jail_list="" # Space separated list of names of jails
jail_set_hostname_allow="YES" # Allow root user in a jail to change its hostname
jail_socket_unixiproute_only="YES" # Route only TCP/IP within a jail
jail_sysvipc_allow="NO" # Allow SystemV IPC use from within a jail
watchdogd_enable="NO" # Start the software watchdog daemon
##############################################################
### Define source_rc_confs, the mechanism used by /etc/rc.* ##

View File

@ -17,8 +17,8 @@ FILES= DAEMON LOGIN NETWORKING SERVERS abi accounting addswap adjkerntz amd \
ntpdate othermta pccard pcvt ppp-user pppoed pwcheck quota random \
rarpd rcconf.sh root route6d routed rpcbind rtadvd rwho savecore \
securelevel sendmail serial sppp sshd swap1 syscons sysctl \
syslogd timed ttys usbd vinum virecover ypbind yppasswdd ypserv \
ypset ypupdated ypxfrd
syslogd timed ttys usbd vinum virecover watchdogd ypbind \
yppasswdd ypserv ypset ypupdated ypxfrd
FILESDIR= /etc/rc.d
FILESMODE= ${BINMODE}

51
etc/rc.d/watchdogd Normal file
View File

@ -0,0 +1,51 @@
#!/bin/sh
# Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# 1. Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
# SUCH DAMAGE.
#
# $FreeBSD$
#
# PROVIDE: watchdogd
# REQUIRE: DAEMON
# KEYWORD: FreeBSD
. /etc/rc.subr
name="watchdogd"
rcvar="`set_rcvar`"
command="/usr/sbin/${name}"
start_precmd="watchdogd_precmd"
pidfile="/var/run/${name}.pid"
watchdogd_precmd()
{
if ! sysctl debug.watchdog >/dev/null 2>&1; then
err 1 "Your kernel doesn't have watchdog support."
fi
return 0
}
load_rc_config $name
run_rc_command "$1"

View File

@ -270,6 +270,7 @@ MAN= aac.4 \
vlan.4 \
vpo.4 \
vr.4 \
watchdog.4 \
wb.4 \
wi.4 \
witness.4 \

77
share/man/man4/watchdog.4 Normal file
View File

@ -0,0 +1,77 @@
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd June 25, 2003
.Dt WATCHDOG 4
.Os
.Sh NAME
.Nm watchdog
.Nd Software watchdog
.Sh SYNOPSIS
.Cd options WATCHDOG
.Sh DESCRIPTION
.Nm
is a set of checks and routines which allow the implementation of a software
watchdog solution.
.Pp
The user interface for
.Nm
is implemented via a trio of sysctl OIDs.
When
.Li debug.watchdog.enabled
is set to a positive value,
.Nm
timeout checks are performed.
In order to keep the watchdog from triggering,
.Li debug.watchdog.reset
must be accessed,
by reading or writing,
within every
.Li debug.watchdog.timeout
seconds.
Failure to keep the
.Nm
updated will result in the kernel outputting interrupt counts,
backtraces,
and then attempting to enter
.Xr ddb 9 .
.Sh SEE ALSO
.Xr sysctl 8 ,
.Xr watchdogd 8
.Sh HISTORY
The
.Nm
code first appeared in
.Fx 5.1 .
.Sh AUTHORS
.An -nosplit
The
.Nm
code and manual page were written by
.An Sean Kelly Aq smkelly@FreeBSD.org .
Some contributions were made by
.An Jeff Roberson Aq jeff@FreeBSD.org .

View File

@ -2064,6 +2064,13 @@ options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP
#
options HW_WDOG
#
# Add software watchdog routines. This will add some sysctl OIDs that
# can be used in combination with an external daemon to create a
# software-based watchdog solution.
#
options WATCHDOG
#
# Disable swapping of upages and stack pages. This option removes all
# code which actually performs swapping, so it's not possible to turn

View File

@ -442,6 +442,7 @@ MUTEX_PROFILING opt_global.h
NPX_DEBUG opt_debug_npx.h
NETATALKDEBUG opt_atalk.h
SI_DEBUG opt_debug_si.h
WATCHDOG opt_watchdog.h
# Fb options
FB_DEBUG opt_fb.h

View File

@ -42,6 +42,8 @@
__FBSDID("$FreeBSD$");
#include "opt_ntp.h"
#include "opt_ddb.h"
#include "opt_watchdog.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -71,6 +73,10 @@ __FBSDID("$FreeBSD$");
#include <sys/gmon.h>
#endif
#ifdef DDB
#include <ddb/ddb.h>
#endif
#ifdef DEVICE_POLLING
extern void hardclock_device_poll(void);
#endif /* DEVICE_POLLING */
@ -84,6 +90,22 @@ long cp_time[CPUSTATES];
SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time),
"LU", "CPU time statistics");
#ifdef WATCHDOG
static int sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS);
static void watchdog_fire(void);
static int watchdog_enabled;
static unsigned int watchdog_ticks;
static int watchdog_timeout = 20;
SYSCTL_NODE(_debug, OID_AUTO, watchdog, CTLFLAG_RW, 0, "System watchdog");
SYSCTL_INT(_debug_watchdog, OID_AUTO, enabled, CTLFLAG_RW, &watchdog_enabled,
0, "Enable the watchdog");
SYSCTL_INT(_debug_watchdog, OID_AUTO, timeout, CTLFLAG_RW, &watchdog_timeout,
0, "Timeout for watchdog checkins");
#endif /* WATCHDOG */
/*
* Clock handling routines.
*
@ -228,6 +250,12 @@ hardclock(frame)
*/
if (need_softclock)
swi_sched(softclock_ih, 0);
#ifdef WATCHDOG
if (watchdog_enabled > 0 &&
(int)(ticks - watchdog_ticks) >= (hz * watchdog_timeout))
watchdog_fire();
#endif /* WATCHDOG */
}
/*
@ -481,3 +509,57 @@ sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
0, 0, sysctl_kern_clockrate, "S,clockinfo",
"Rate and period of various kernel clocks");
#ifdef WATCHDOG
/*
* Reset the watchdog timer to ticks, thus preventing the watchdog
* from firing for another watchdog timeout period.
*/
static int
sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS)
{
int ret;
ret = 0;
watchdog_ticks = ticks;
return sysctl_handle_int(oidp, &ret, 0, req);
}
SYSCTL_PROC(_debug_watchdog, OID_AUTO, reset, CTLFLAG_RW, 0, 0,
sysctl_watchdog_reset, "I", "Reset the watchdog");
/*
* Handle a watchdog timeout by dumping interrupt information and
* then either dropping to DDB or panicing.
*/
static void
watchdog_fire(void)
{
int nintr;
u_int64_t inttotal;
u_long *curintr;
char *curname;
curintr = intrcnt;
curname = intrnames;
inttotal = 0;
nintr = eintrcnt - intrcnt;
printf("interrupt total\n");
while (--nintr >= 0) {
if (*curintr)
printf("%-12s %20lu\n", curname, *curintr);
curname += strlen(curname) + 1;
inttotal += *curintr++;
}
printf("Total %20llu\n", inttotal);
#ifdef DDB
db_print_backtrace();
Debugger("watchdog timeout");
#else /* !DDB */
panic("watchdog timeout");
#endif /* DDB */
}
#endif /* WATCHDOG */

View File

@ -124,6 +124,7 @@ SUBDIR= IPXrouted \
vipw \
vnconfig \
watch \
watchdogd \
wicontrol \
yp_mkdb \
ypbind \

View File

@ -0,0 +1,8 @@
# $FreeBSD$
PROG= watchdogd
SRCS= watchdogd.c
MAN= watchdogd.8
WARNS= 6
.include <bsd.prog.mk>

View File

@ -0,0 +1,97 @@
.\" Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
.\" All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
.\" are met:
.\" 1. Redistributions of source code must retain the above copyright
.\" notice, this list of conditions and the following disclaimer.
.\" 2. Redistributions in binary form must reproduce the above copyright
.\" notice, this list of conditions and the following disclaimer in the
.\" documentation and/or other materials provided with the distribution.
.\"
.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
.\" SUCH DAMAGE.
.\"
.\" $FreeBSD$
.\"
.Dd June 25, 2003
.Dt WATCHDOGD 8
.Os
.Sh NAME
.Nm watchdogd
.Nd Software watchdog daemon
.Sh SYNOPSIS
.Nm
.Op Fl d
.Op Fl I Ar file
.Sh DESCRIPTION
The
.Nm
utility interfaces with the kernel's software watchdog facility to ensure
that the system is in a working state.
If
.Nm
is unable to interface with the kernel over a specific timeout,
the kernel will take actions to assist in debugging or restarting the computer.
.Pp
One possible circumstance which will cause a watchdog timeout is an interrupt
storm.
If this occurs,
.Nm
will no longer execute and thus the kernel's watchdog routines will take
action after a configurable timeout.
.Pp
Upon receiving the
.Dv SIGTERM
or
.Dv SIGINT
signals,
.Nm
will first instruct the kernel to no longer perform watchdog checks and then
will terminate.
.Pp
The
.Nm
utility recognizes the following runtime options:
.Bl -tag -width ".Fl I Ar file"
.It Fl I Ar file
Write the process id of the
.Nm
utility in the specified file.
.It Fl d
Don't fork.
When this option is specified,
.Nm
will not fork into the background at startup.
.El
.Sh FILES
.Bl -tag -width "/var/run/watchdogd.pid" -compact
.It Pa /var/run/watchdogd.pid
.El
.Sh SEE ALSO
.Xr watchdog 4 ,
.Xr sysctl 8
.Sh AUTHORS
.An -nosplit
The
.Nm
utility and manual page were written by
.An Sean Kelly Aq smkelly@FreeBSD.org .
.Pp
Some contributions made by
.An Jeff Roberson Aq jeff@FreeBSD.org .
.Sh HISTORY
The
.Nm
utility appeared in
.Fx 5.1 .

View File

@ -0,0 +1,232 @@
/*
* Copyright (c) 2003 Sean M. Kelly <smkelly@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/*
* Software watchdog daemon.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/errno.h>
#include <sys/sysctl.h>
#include <sys/time.h>
#include <sys/rtprio.h>
#include <sys/stat.h>
#include <err.h>
#include <paths.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <signal.h>
static void parseargs(int, char *[]);
static void sighandler(int);
static void watchdog_loop(void);
static int watchdog_init(void);
static int watchdog_onoff(int onoff);
static int watchdog_tickle(void);
static void usage(void);
int debugging = 0;
int end_program = 0;
const char *pidfile = _PATH_VARRUN "watchdogd.pid";
int reset_mib[3];
int reset_miblen = 3;
/*
* Periodically write to the debug.watchdog.reset sysctl OID
* to keep the software watchdog from firing.
*/
int
main(int argc, char *argv[])
{
struct rtprio rtp;
FILE *fp;
if (getuid() != 0)
errx(EX_SOFTWARE, "not super user");
parseargs(argc, argv);
rtp.type = RTP_PRIO_REALTIME;
rtp.prio = 0;
if (rtprio(RTP_SET, 0, &rtp) == -1)
err(EX_OSERR, "rtprio");
if (watchdog_init() == -1)
exit(EX_SOFTWARE);
if (watchdog_onoff(1) == -1)
exit(EX_SOFTWARE);
if (debugging == 0 && daemon(0, 0) == -1) {
watchdog_onoff(0);
err(EX_OSERR, "daemon");
}
signal(SIGHUP, SIG_IGN);
signal(SIGINT, sighandler);
signal(SIGTERM, sighandler);
fp = fopen(pidfile, "w");
if (fp != NULL) {
fprintf(fp, "%d\n", getpid());
fclose(fp);
}
watchdog_loop();
/* exiting */
watchdog_onoff(0);
unlink(pidfile);
return (EX_OK);
}
/*
* Catch signals and begin shutdown process.
*/
static void
sighandler(int signum)
{
if (signum == SIGINT || signum == SIGTERM)
end_program = 1;
}
/*
* Locate the OID for the 'debug.watchdog.reset' sysctl setting.
* Upon finding it, do an initial reset on the watchdog.
*/
static int
watchdog_init()
{
int error;
error = sysctlnametomib("debug.watchdog.reset", reset_mib,
&reset_miblen);
if (error == -1) {
fprintf(stderr, "Could not find reset OID: %s\n",
strerror(errno));
return (error);
}
return watchdog_tickle();
}
/*
* Main program loop which is iterated every second.
*/
static void
watchdog_loop(void)
{
struct stat sb;
int failed;
while (end_program == 0) {
failed = 0;
failed = stat("/etc", &sb);
if (failed == 0)
watchdog_tickle();
sleep(1);
}
}
/*
* Reset the watchdog timer. This function must be called periodically
* to keep the watchdog from firing.
*/
int
watchdog_tickle(void)
{
return sysctl(reset_mib, reset_miblen, NULL, NULL, NULL, 0);
}
/*
* Toggle the kernel's watchdog. This routine is used to enable and
* disable the watchdog.
*/
static int
watchdog_onoff(int onoff)
{
int mib[3]
int error
int len;
len = 3;
error = sysctlnametomib("debug.watchdog.enabled", mib, &len);
if (error == 0)
error = sysctl(mib, len, NULL, NULL, &onoff, sizeof(onoff));
if (error == -1) {
fprintf(stderr, "Could not %s watchdog: %s\n",
(onoff > 0) ? "enable" : "disable",
strerror(errno));
return (error);
}
return (0);
}
/*
* Tell user how to use the program.
*/
static void
usage()
{
fprintf(stderr, "usage: watchdogd [-d] [-I file]\n");
exit(EX_USAGE);
}
/*
* Handle the few command line arguments supported.
*/
static void
parseargs(int argc, char *argv[])
{
int c;
while ((c = getopt(argc, argv, "I:d?")) != -1) {
switch (c) {
case 'I':
pidfile = optarg;
break;
case 'd':
debugging = 1;
break;
case '?':
default:
usage();
/* NOTREACHED */
}
}
}