diff --git a/etc/defaults/rc.conf b/etc/defaults/rc.conf index 1b6f06641080..d7623ac912be 100644 --- a/etc/defaults/rc.conf +++ b/etc/defaults/rc.conf @@ -430,6 +430,7 @@ jail_list="" # Space separated list of names of jails jail_set_hostname_allow="YES" # Allow root user in a jail to change its hostname jail_socket_unixiproute_only="YES" # Route only TCP/IP within a jail jail_sysvipc_allow="NO" # Allow SystemV IPC use from within a jail +watchdogd_enable="NO" # Start the software watchdog daemon ############################################################## ### Define source_rc_confs, the mechanism used by /etc/rc.* ## diff --git a/etc/rc.d/Makefile b/etc/rc.d/Makefile index 4104707f1702..734a3622fb27 100755 --- a/etc/rc.d/Makefile +++ b/etc/rc.d/Makefile @@ -17,8 +17,8 @@ FILES= DAEMON LOGIN NETWORKING SERVERS abi accounting addswap adjkerntz amd \ ntpdate othermta pccard pcvt ppp-user pppoed pwcheck quota random \ rarpd rcconf.sh root route6d routed rpcbind rtadvd rwho savecore \ securelevel sendmail serial sppp sshd swap1 syscons sysctl \ - syslogd timed ttys usbd vinum virecover ypbind yppasswdd ypserv \ - ypset ypupdated ypxfrd + syslogd timed ttys usbd vinum virecover watchdogd ypbind \ + yppasswdd ypserv ypset ypupdated ypxfrd FILESDIR= /etc/rc.d FILESMODE= ${BINMODE} diff --git a/etc/rc.d/watchdogd b/etc/rc.d/watchdogd new file mode 100644 index 000000000000..e2ee5f79cc7a --- /dev/null +++ b/etc/rc.d/watchdogd @@ -0,0 +1,51 @@ +#!/bin/sh + +# Copyright (c) 2003 Sean M. Kelly +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +# ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +# FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +# DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +# OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +# LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +# OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +# SUCH DAMAGE. +# +# $FreeBSD$ +# + +# PROVIDE: watchdogd +# REQUIRE: DAEMON +# KEYWORD: FreeBSD + +. /etc/rc.subr + +name="watchdogd" +rcvar="`set_rcvar`" +command="/usr/sbin/${name}" +start_precmd="watchdogd_precmd" +pidfile="/var/run/${name}.pid" + +watchdogd_precmd() +{ + if ! sysctl debug.watchdog >/dev/null 2>&1; then + err 1 "Your kernel doesn't have watchdog support." + fi + return 0 +} + +load_rc_config $name +run_rc_command "$1" diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index b598875c10b3..31985677a429 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -270,6 +270,7 @@ MAN= aac.4 \ vlan.4 \ vpo.4 \ vr.4 \ + watchdog.4 \ wb.4 \ wi.4 \ witness.4 \ diff --git a/share/man/man4/watchdog.4 b/share/man/man4/watchdog.4 new file mode 100644 index 000000000000..b00ccda2ebd0 --- /dev/null +++ b/share/man/man4/watchdog.4 @@ -0,0 +1,77 @@ + +.\" Copyright (c) 2003 Sean M. Kelly +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 25, 2003 +.Dt WATCHDOG 4 +.Os +.Sh NAME +.Nm watchdog +.Nd Software watchdog +.Sh SYNOPSIS +.Cd options WATCHDOG +.Sh DESCRIPTION +.Nm +is a set of checks and routines which allow the implementation of a software +watchdog solution. +.Pp +The user interface for +.Nm +is implemented via a trio of sysctl OIDs. +When +.Li debug.watchdog.enabled +is set to a positive value, +.Nm +timeout checks are performed. +In order to keep the watchdog from triggering, +.Li debug.watchdog.reset +must be accessed, +by reading or writing, +within every +.Li debug.watchdog.timeout +seconds. +Failure to keep the +.Nm +updated will result in the kernel outputting interrupt counts, +backtraces, +and then attempting to enter +.Xr ddb 9 . +.Sh SEE ALSO +.Xr sysctl 8 , +.Xr watchdogd 8 +.Sh HISTORY +The +.Nm +code first appeared in +.Fx 5.1 . +.Sh AUTHORS +.An -nosplit +The +.Nm +code and manual page were written by +.An Sean Kelly Aq smkelly@FreeBSD.org . +Some contributions were made by +.An Jeff Roberson Aq jeff@FreeBSD.org . diff --git a/sys/conf/NOTES b/sys/conf/NOTES index f616f88d1e43..c0c8df0adcfb 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2064,6 +2064,13 @@ options BOOTP_WIRED_TO=fxp0 # Use interface fxp0 for BOOTP # options HW_WDOG +# +# Add software watchdog routines. This will add some sysctl OIDs that +# can be used in combination with an external daemon to create a +# software-based watchdog solution. +# +options WATCHDOG + # # Disable swapping of upages and stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn diff --git a/sys/conf/options b/sys/conf/options index cf0f70df4365..9d3c724d82f0 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -442,6 +442,7 @@ MUTEX_PROFILING opt_global.h NPX_DEBUG opt_debug_npx.h NETATALKDEBUG opt_atalk.h SI_DEBUG opt_debug_si.h +WATCHDOG opt_watchdog.h # Fb options FB_DEBUG opt_fb.h diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 50a48b10084b..53729d2040c3 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -42,6 +42,8 @@ __FBSDID("$FreeBSD$"); #include "opt_ntp.h" +#include "opt_ddb.h" +#include "opt_watchdog.h" #include #include @@ -71,6 +73,10 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef DDB +#include +#endif + #ifdef DEVICE_POLLING extern void hardclock_device_poll(void); #endif /* DEVICE_POLLING */ @@ -84,6 +90,22 @@ long cp_time[CPUSTATES]; SYSCTL_OPAQUE(_kern, OID_AUTO, cp_time, CTLFLAG_RD, &cp_time, sizeof(cp_time), "LU", "CPU time statistics"); +#ifdef WATCHDOG +static int sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS); +static void watchdog_fire(void); + +static int watchdog_enabled; +static unsigned int watchdog_ticks; +static int watchdog_timeout = 20; + +SYSCTL_NODE(_debug, OID_AUTO, watchdog, CTLFLAG_RW, 0, "System watchdog"); +SYSCTL_INT(_debug_watchdog, OID_AUTO, enabled, CTLFLAG_RW, &watchdog_enabled, + 0, "Enable the watchdog"); +SYSCTL_INT(_debug_watchdog, OID_AUTO, timeout, CTLFLAG_RW, &watchdog_timeout, + 0, "Timeout for watchdog checkins"); + +#endif /* WATCHDOG */ + /* * Clock handling routines. * @@ -228,6 +250,12 @@ hardclock(frame) */ if (need_softclock) swi_sched(softclock_ih, 0); + +#ifdef WATCHDOG + if (watchdog_enabled > 0 && + (int)(ticks - watchdog_ticks) >= (hz * watchdog_timeout)) + watchdog_fire(); +#endif /* WATCHDOG */ } /* @@ -481,3 +509,57 @@ sysctl_kern_clockrate(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo", "Rate and period of various kernel clocks"); + +#ifdef WATCHDOG +/* + * Reset the watchdog timer to ticks, thus preventing the watchdog + * from firing for another watchdog timeout period. + */ +static int +sysctl_watchdog_reset(SYSCTL_HANDLER_ARGS) +{ + int ret; + + ret = 0; + watchdog_ticks = ticks; + return sysctl_handle_int(oidp, &ret, 0, req); +} + +SYSCTL_PROC(_debug_watchdog, OID_AUTO, reset, CTLFLAG_RW, 0, 0, + sysctl_watchdog_reset, "I", "Reset the watchdog"); + +/* + * Handle a watchdog timeout by dumping interrupt information and + * then either dropping to DDB or panicing. + */ +static void +watchdog_fire(void) +{ + int nintr; + u_int64_t inttotal; + u_long *curintr; + char *curname; + + curintr = intrcnt; + curname = intrnames; + inttotal = 0; + nintr = eintrcnt - intrcnt; + + printf("interrupt total\n"); + while (--nintr >= 0) { + if (*curintr) + printf("%-12s %20lu\n", curname, *curintr); + curname += strlen(curname) + 1; + inttotal += *curintr++; + } + printf("Total %20llu\n", inttotal); + +#ifdef DDB + db_print_backtrace(); + Debugger("watchdog timeout"); +#else /* !DDB */ + panic("watchdog timeout"); +#endif /* DDB */ +} + +#endif /* WATCHDOG */ diff --git a/usr.sbin/Makefile b/usr.sbin/Makefile index b394af90b550..73d331108e38 100644 --- a/usr.sbin/Makefile +++ b/usr.sbin/Makefile @@ -124,6 +124,7 @@ SUBDIR= IPXrouted \ vipw \ vnconfig \ watch \ + watchdogd \ wicontrol \ yp_mkdb \ ypbind \ diff --git a/usr.sbin/watchdogd/Makefile b/usr.sbin/watchdogd/Makefile new file mode 100644 index 000000000000..bc8bfd24d4a6 --- /dev/null +++ b/usr.sbin/watchdogd/Makefile @@ -0,0 +1,8 @@ +# $FreeBSD$ + +PROG= watchdogd +SRCS= watchdogd.c +MAN= watchdogd.8 +WARNS= 6 + +.include diff --git a/usr.sbin/watchdogd/watchdogd.8 b/usr.sbin/watchdogd/watchdogd.8 new file mode 100644 index 000000000000..b8f7ce8ce0a3 --- /dev/null +++ b/usr.sbin/watchdogd/watchdogd.8 @@ -0,0 +1,97 @@ +.\" Copyright (c) 2003 Sean M. Kelly +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd June 25, 2003 +.Dt WATCHDOGD 8 +.Os +.Sh NAME +.Nm watchdogd +.Nd Software watchdog daemon +.Sh SYNOPSIS +.Nm +.Op Fl d +.Op Fl I Ar file +.Sh DESCRIPTION +The +.Nm +utility interfaces with the kernel's software watchdog facility to ensure +that the system is in a working state. +If +.Nm +is unable to interface with the kernel over a specific timeout, +the kernel will take actions to assist in debugging or restarting the computer. +.Pp +One possible circumstance which will cause a watchdog timeout is an interrupt +storm. +If this occurs, +.Nm +will no longer execute and thus the kernel's watchdog routines will take +action after a configurable timeout. +.Pp +Upon receiving the +.Dv SIGTERM +or +.Dv SIGINT +signals, +.Nm +will first instruct the kernel to no longer perform watchdog checks and then +will terminate. +.Pp +The +.Nm +utility recognizes the following runtime options: +.Bl -tag -width ".Fl I Ar file" +.It Fl I Ar file +Write the process id of the +.Nm +utility in the specified file. +.It Fl d +Don't fork. +When this option is specified, +.Nm +will not fork into the background at startup. +.El +.Sh FILES +.Bl -tag -width "/var/run/watchdogd.pid" -compact +.It Pa /var/run/watchdogd.pid +.El +.Sh SEE ALSO +.Xr watchdog 4 , +.Xr sysctl 8 +.Sh AUTHORS +.An -nosplit +The +.Nm +utility and manual page were written by +.An Sean Kelly Aq smkelly@FreeBSD.org . +.Pp +Some contributions made by +.An Jeff Roberson Aq jeff@FreeBSD.org . +.Sh HISTORY +The +.Nm +utility appeared in +.Fx 5.1 . diff --git a/usr.sbin/watchdogd/watchdogd.c b/usr.sbin/watchdogd/watchdogd.c new file mode 100644 index 000000000000..3dd2f528e14e --- /dev/null +++ b/usr.sbin/watchdogd/watchdogd.c @@ -0,0 +1,232 @@ +/* + * Copyright (c) 2003 Sean M. Kelly + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * Software watchdog daemon. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +static void parseargs(int, char *[]); +static void sighandler(int); +static void watchdog_loop(void); +static int watchdog_init(void); +static int watchdog_onoff(int onoff); +static int watchdog_tickle(void); +static void usage(void); + +int debugging = 0; +int end_program = 0; +const char *pidfile = _PATH_VARRUN "watchdogd.pid"; +int reset_mib[3]; +int reset_miblen = 3; + +/* + * Periodically write to the debug.watchdog.reset sysctl OID + * to keep the software watchdog from firing. + */ +int +main(int argc, char *argv[]) +{ + struct rtprio rtp; + FILE *fp; + + if (getuid() != 0) + errx(EX_SOFTWARE, "not super user"); + + parseargs(argc, argv); + + rtp.type = RTP_PRIO_REALTIME; + rtp.prio = 0; + if (rtprio(RTP_SET, 0, &rtp) == -1) + err(EX_OSERR, "rtprio"); + + if (watchdog_init() == -1) + exit(EX_SOFTWARE); + + if (watchdog_onoff(1) == -1) + exit(EX_SOFTWARE); + + if (debugging == 0 && daemon(0, 0) == -1) { + watchdog_onoff(0); + err(EX_OSERR, "daemon"); + } + + signal(SIGHUP, SIG_IGN); + signal(SIGINT, sighandler); + signal(SIGTERM, sighandler); + + fp = fopen(pidfile, "w"); + if (fp != NULL) { + fprintf(fp, "%d\n", getpid()); + fclose(fp); + } + + watchdog_loop(); + + /* exiting */ + watchdog_onoff(0); + unlink(pidfile); + return (EX_OK); +} + +/* + * Catch signals and begin shutdown process. + */ +static void +sighandler(int signum) +{ + + if (signum == SIGINT || signum == SIGTERM) + end_program = 1; +} + +/* + * Locate the OID for the 'debug.watchdog.reset' sysctl setting. + * Upon finding it, do an initial reset on the watchdog. + */ +static int +watchdog_init() +{ + int error; + + error = sysctlnametomib("debug.watchdog.reset", reset_mib, + &reset_miblen); + if (error == -1) { + fprintf(stderr, "Could not find reset OID: %s\n", + strerror(errno)); + return (error); + } + return watchdog_tickle(); +} + +/* + * Main program loop which is iterated every second. + */ +static void +watchdog_loop(void) +{ + struct stat sb; + int failed; + + while (end_program == 0) { + failed = 0; + + failed = stat("/etc", &sb); + + if (failed == 0) + watchdog_tickle(); + sleep(1); + } +} + +/* + * Reset the watchdog timer. This function must be called periodically + * to keep the watchdog from firing. + */ +int +watchdog_tickle(void) +{ + + return sysctl(reset_mib, reset_miblen, NULL, NULL, NULL, 0); +} + +/* + * Toggle the kernel's watchdog. This routine is used to enable and + * disable the watchdog. + */ +static int +watchdog_onoff(int onoff) +{ + int mib[3] + int error + int len; + + len = 3; + + error = sysctlnametomib("debug.watchdog.enabled", mib, &len); + if (error == 0) + error = sysctl(mib, len, NULL, NULL, &onoff, sizeof(onoff)); + + if (error == -1) { + fprintf(stderr, "Could not %s watchdog: %s\n", + (onoff > 0) ? "enable" : "disable", + strerror(errno)); + return (error); + } + return (0); +} + +/* + * Tell user how to use the program. + */ +static void +usage() +{ + fprintf(stderr, "usage: watchdogd [-d] [-I file]\n"); + exit(EX_USAGE); +} + +/* + * Handle the few command line arguments supported. + */ +static void +parseargs(int argc, char *argv[]) +{ + int c; + + while ((c = getopt(argc, argv, "I:d?")) != -1) { + switch (c) { + case 'I': + pidfile = optarg; + break; + case 'd': + debugging = 1; + break; + case '?': + default: + usage(); + /* NOTREACHED */ + } + } +}