Add a new exit-timeout option to watchdogd.
Watchdogd currently disables the watchdog when it exits, such as during rc.shutdown processing. That leaves the system vulnerable to getting hung or deadlocked during the shutdown part of a reboot. For embedded systems it's especially important that the hardware watchdog always be active. It can also be useful for servers that are administered remotely. The new -x <seconds> option tells watchdogd to program the watchdog with the given timeout just before exiting. The -x value can be longer or shorter than the -t normal time value, to allow for various exceptional conditions at shutdown such as allowing extra time for buffer flushing. The exit value is also used internally in the "failsafe" handling (which used to just disable the watchdog), on the theory that if you're using this option, "safe" means having the watchdog always running, not disabled. The default is still to disable the watchdog on exit if -x is not specified. Differential Revision: https://reviews.freebsd.org/D2556 (timed out)
This commit is contained in:
parent
57eba68124
commit
7b4a83b1d0
@ -27,7 +27,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd November 16, 2014
|
||||
.Dd May 11, 2015
|
||||
.Dt WATCHDOGD 8
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -46,6 +46,7 @@
|
||||
.Op Fl s Ar sleep
|
||||
.Op Fl t Ar timeout
|
||||
.Op Fl T Ar script_timeout
|
||||
.Op Fl x Ar exit_timeout
|
||||
.Sh DESCRIPTION
|
||||
The
|
||||
.Nm
|
||||
@ -103,14 +104,25 @@ defaults to the value specified by the
|
||||
.Fl s Ar sleep
|
||||
option.
|
||||
.Pp
|
||||
The
|
||||
.Fl x Ar exit_timeout
|
||||
argument is the timeout period (in seconds) to leave in effect when the
|
||||
program exits.
|
||||
Using
|
||||
.Fl x
|
||||
with a non-zero value protects against lockup during a reboot by
|
||||
triggering a hardware reset if the software reboot doesn't complete
|
||||
before the given timeout expires.
|
||||
.Pp
|
||||
Upon receiving the
|
||||
.Dv SIGTERM
|
||||
or
|
||||
.Dv SIGINT
|
||||
signals,
|
||||
.Nm
|
||||
will first instruct the kernel to no longer perform watchdog checks and then
|
||||
will terminate.
|
||||
will terminate, after first instructing the kernel to either disable the
|
||||
timeout or reset it to the value given by
|
||||
.Fl x Ar exit_timeout .
|
||||
.Pp
|
||||
The
|
||||
.Nm
|
||||
|
@ -77,6 +77,7 @@ static int debugging = 0;
|
||||
static int end_program = 0;
|
||||
static const char *pidfile = _PATH_VARRUN "watchdogd.pid";
|
||||
static u_int timeout = WD_TO_128SEC;
|
||||
static u_int exit_timeout = WD_TO_NEVER;
|
||||
static u_int pretimeout = 0;
|
||||
static u_int timeout_sec;
|
||||
static u_int passive = 0;
|
||||
@ -461,10 +462,10 @@ watchdog_onoff(int onoff)
|
||||
/* pat one more time for good measure */
|
||||
return watchdog_patpat((timeout|WD_ACTIVE));
|
||||
} else {
|
||||
return watchdog_patpat(0);
|
||||
return watchdog_patpat(exit_timeout);
|
||||
}
|
||||
failsafe:
|
||||
watchdog_patpat(0);
|
||||
watchdog_patpat(exit_timeout);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -476,8 +477,8 @@ usage(void)
|
||||
{
|
||||
if (is_daemon)
|
||||
fprintf(stderr, "usage:\n"
|
||||
" watchdogd [-dnSw] [-e cmd] [-I file] [-s sleep] [-t timeout]\n"
|
||||
" [-T script_timeout]\n"
|
||||
" watchdogd [-dnSw] [-e cmd] [-I pidfile] [-s sleep] [-t timeout]\n"
|
||||
" [-T script_timeout] [-x exit_timeout]\n"
|
||||
" [--debug]\n"
|
||||
" [--pretimeout seconds] [-pretimeout-action action]\n"
|
||||
" [--softtimeout] [-softtimeout-action action]\n"
|
||||
@ -697,7 +698,7 @@ parseargs(int argc, char *argv[])
|
||||
is_daemon = 1;
|
||||
|
||||
if (is_daemon)
|
||||
getopt_shortopts = "I:de:ns:t:ST:w?";
|
||||
getopt_shortopts = "I:de:ns:t:ST:wx:?";
|
||||
else
|
||||
getopt_shortopts = "dt:?";
|
||||
|
||||
@ -741,6 +742,11 @@ parseargs(int argc, char *argv[])
|
||||
case 'w':
|
||||
do_timedog = 1;
|
||||
break;
|
||||
case 'x':
|
||||
exit_timeout = parse_timeout_to_pow2ns(c, NULL, optarg);
|
||||
if (exit_timeout != 0)
|
||||
exit_timeout |= WD_ACTIVE;
|
||||
break;
|
||||
case 0:
|
||||
lopt = longopts[longindex].name;
|
||||
if (!strcmp(lopt, "pretimeout")) {
|
||||
|
Loading…
Reference in New Issue
Block a user