Add an "options MP_WATCHDOG" to i386. This option allows one of the

logical CPUs on a system to be used as a dedicated watchdog to cause a
drop to the debugger and/or generate an NMI to the boot processor if
the kernel ceases to respond.  A sysctl enables the watchdog running
out of the processor's idle thread; a callout is launched to reset a
timer in the watchdog.  If the callout fails to reset the timer for ten
seconds, the watchdog will fire.  The sysctl allows you to select which
CPU will run the watchdog.

A sample "debug.leak_schedlock" is included, which causes a sysctl to
spin holding sched_lock in order to trigger the watchdog.  On my Xeons,
the watchdog is able to detect this failure mode and break into the
debugger, which cannot otherwise be done without an NMI button.

This option does not currently work with sched_ule due to ule's push
notion of scheduling, similar to machdep.hlt_logical_cpus failing to
work with that scheduler.

On face value, this might seem somewhat inefficient, but there are a
lot of dual-processor Xeons with HTT around, so using one as a watchdog
for testing is not as inefficient as one might fear.
This commit is contained in:
rwatson 2004-08-15 18:02:09 +00:00
parent 437ee5545c
commit abf6ea2973
7 changed files with 529 additions and 0 deletions

View File

@ -0,0 +1,225 @@
/*-
* Copyright (c) 2004 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_mp_watchdog.h"
#include <sys/param.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <machine/smp.h>
#include <machine/apicreg.h>
#include <machine/apicvar.h>
#include <machine/mp_watchdog.h>
/*
* mp_swatchdog hijacks the idle thread on a specified CPU, prevents new work
* from being scheduled there, and uses it as a "watchdog" to detect kernel
* failure on other CPUs. This is made reasonable by inclusion of logical
* processors in Xeon hardware. The watchdog is configured by setting the
* debug.watchdog_cpu sysctl to the CPU of interest. A callout will then
* begin executing reseting a timer that is gradually lowered by the watching
* thread. If the timer reaches 0, the watchdog fires by ether dropping
* directly to the debugger, or by sending an NMI IPI to the boot processor.
* This is a somewhat less efficient substitute for dedicated watchdog
* hardware, but can be quite an effective tool for debugging hangs.
*
* XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but
* doesn't yet.
*/
static int watchdog_cpu = -1;
static int watchdog_dontfire = 1;
static int watchdog_timer = -1;
static int watchdog_nmi = 1;
SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0,
"IPI the boot processor with an NMI to enter the debugger");
static struct callout watchdog_callout;
/*
* Number of seconds before the watchdog will fire if the callout fails to
* reset the timer.
*/
#define WATCHDOG_THRESHOLD 10
static void
watchdog_init(void *arg)
{
callout_init(&watchdog_callout, CALLOUT_MPSAFE);
}
/*
* This callout resets a timer until the watchdog kicks in. It acquires some
* critical locks to make sure things haven't gotten wedged with hose locks
* held.
*/
static void
watchdog_function(void *arg)
{
/*
* Since the timer ran, we must not be wedged. Acquire some critical
* locks to make sure. Then reset the timer.
*/
mtx_lock(&Giant);
mtx_lock_spin(&sched_lock);
watchdog_timer = WATCHDOG_THRESHOLD;
mtx_unlock_spin(&sched_lock);
mtx_unlock(&Giant);
callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
}
SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL);
/*
* This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff
* to disable the watchdog.
*/
static int
sysctl_watchdog(SYSCTL_HANDLER_ARGS)
{
int error, temp;
temp = watchdog_cpu;
error = sysctl_handle_int(oidp, &temp, 0, req);
if (error)
return (error);
if (req->newptr != NULL) {
if (temp == -1 || temp == 0xffffffff) {
/*
* Disable the watcdog.
*/
watchdog_cpu = -1;
watchdog_dontfire = 1;
callout_stop(&watchdog_callout);
printf("watchdog stopped\n");
} else {
watchdog_timer = WATCHDOG_THRESHOLD;
watchdog_dontfire = 0;
watchdog_cpu = temp;
callout_reset(&watchdog_callout, 1 * hz,
watchdog_function, NULL);
}
}
return (0);
}
SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_watchdog, "IU", "");
/*
* A badly behaved sysctl that leaks the sched lock when written to. Then
* spin holding it just to make matters worse. This can be used to test the
* effectiveness of the watchdog by generating a fairly hard and nast hang.
* Note that Giant is also held in the current world order when we get here.
*/
static int
sysctl_leak_schedlock(SYSCTL_HANDLER_ARGS)
{
int error, temp;
temp = 0;
error = sysctl_handle_int(oidp, &temp, 0, req);
if (error)
return (error);
if (req->newptr != NULL) {
if (temp) {
printf("Leaking the sched lock...\n");
mtx_lock_spin(&sched_lock);
while (1);
}
}
return (0);
}
SYSCTL_PROC(_debug, OID_AUTO, leak_schedlock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_leak_schedlock, "IU", "");
/*
* Drop into the debugger by sending an IPI NMI to the boot processor.
*/
static void
watchdog_ipi_nmi(void)
{
/*
* Deliver NMI to the boot processor. Why not?
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI,
boot_cpu_id);
lapic_ipi_wait(-1);
}
/*
* ap_watchdog() is called by the SMP idle loop code. It works on the same
* premise that the disabling of logical processors does: that if the cpu is
* idle, then it can ignore the world from then on, as nothing will be
* scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and
* explicit process migration (sched_bind()), this is not an unreasonable
* assumption.
*/
void
ap_watchdog(u_int cpuid)
{
char old_pcomm[MAXCOMLEN + 1];
struct proc *p;
if (watchdog_cpu != cpuid)
return;
printf("watchdog started on cpu %d\n", cpuid);
p = curproc;
bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1);
snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid);
while (1) {
DELAY(1000000); /* One second. */
if (watchdog_cpu != cpuid)
break;
atomic_subtract_int(&watchdog_timer, 1);
if (watchdog_timer < 4)
printf("Watchdog timer: %d\n", watchdog_timer);
if (watchdog_timer == 0 && watchdog_dontfire == 0) {
printf("Watchdog firing!\n");
watchdog_dontfire = 1;
if (watchdog_nmi)
watchdog_ipi_nmi();
else
kdb_enter("mp_watchdog");
}
}
bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1);
printf("watchdog stopped on cpu %d\n", cpuid);
}

View File

@ -0,0 +1,34 @@
/*-
* Copyright (c) 2004 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_MP_WATCHDOG_H_
#define _MACHINE_MP_WATCHDOG_H_
void ap_watchdog(u_int cpuid);
#endif /* !_MACHINE_MP_WATCHDOG_H_ */

View File

@ -234,6 +234,7 @@ i386/i386/machdep.c standard
i386/i386/mem.c optional mem
i386/i386/mp_clock.c optional smp
i386/i386/mp_machdep.c optional smp
i386/i386/mp_watchdog.c optional mp_watchdog smp
i386/i386/mpboot.s optional smp
i386/i386/mptable.c optional apic
i386/i386/mptable_pci.c optional apic pci

View File

@ -15,6 +15,7 @@ PMAP_SHPGPERPROC opt_pmap.h
POWERFAIL_NMI opt_trap.h
PPC_DEBUG opt_ppc.h
PPC_PROBE_CHIPSET opt_ppc.h
MP_WATCHDOG opt_mp_watchdog.h
# Options for emulators. These should only be used at config time, so
# they are handled like options for static filesystems

View File

@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
#include "opt_apic.h"
#include "opt_cpu.h"
#include "opt_kstack_pages.h"
#include "opt_mp_watchdog.h"
#if !defined(lint)
#if !defined(SMP)
@ -73,6 +74,7 @@ __FBSDID("$FreeBSD$");
#include <machine/apicreg.h>
#include <machine/clock.h>
#include <machine/md_var.h>
#include <machine/mp_watchdog.h>
#include <machine/pcb.h>
#include <machine/smp.h>
#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
@ -1289,8 +1291,15 @@ int
mp_grab_cpu_hlt(void)
{
u_int mask = PCPU_GET(cpumask);
#ifdef MP_WATCHDIG
u_int cpuid = PCPU_GET(cpuid);
#endif
int retval;
#ifdef MP_WATCHDOG
ap_watchdog(cpuid);
#endif
retval = mask & hlt_cpus_mask;
while (mask & hlt_cpus_mask)
__asm __volatile("sti; hlt" : : : "memory");

225
sys/i386/i386/mp_watchdog.c Normal file
View File

@ -0,0 +1,225 @@
/*-
* Copyright (c) 2004 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_mp_watchdog.h"
#include <sys/param.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pcpu.h>
#include <sys/proc.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <machine/smp.h>
#include <machine/apicreg.h>
#include <machine/apicvar.h>
#include <machine/mp_watchdog.h>
/*
* mp_swatchdog hijacks the idle thread on a specified CPU, prevents new work
* from being scheduled there, and uses it as a "watchdog" to detect kernel
* failure on other CPUs. This is made reasonable by inclusion of logical
* processors in Xeon hardware. The watchdog is configured by setting the
* debug.watchdog_cpu sysctl to the CPU of interest. A callout will then
* begin executing reseting a timer that is gradually lowered by the watching
* thread. If the timer reaches 0, the watchdog fires by ether dropping
* directly to the debugger, or by sending an NMI IPI to the boot processor.
* This is a somewhat less efficient substitute for dedicated watchdog
* hardware, but can be quite an effective tool for debugging hangs.
*
* XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but
* doesn't yet.
*/
static int watchdog_cpu = -1;
static int watchdog_dontfire = 1;
static int watchdog_timer = -1;
static int watchdog_nmi = 1;
SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0,
"IPI the boot processor with an NMI to enter the debugger");
static struct callout watchdog_callout;
/*
* Number of seconds before the watchdog will fire if the callout fails to
* reset the timer.
*/
#define WATCHDOG_THRESHOLD 10
static void
watchdog_init(void *arg)
{
callout_init(&watchdog_callout, CALLOUT_MPSAFE);
}
/*
* This callout resets a timer until the watchdog kicks in. It acquires some
* critical locks to make sure things haven't gotten wedged with hose locks
* held.
*/
static void
watchdog_function(void *arg)
{
/*
* Since the timer ran, we must not be wedged. Acquire some critical
* locks to make sure. Then reset the timer.
*/
mtx_lock(&Giant);
mtx_lock_spin(&sched_lock);
watchdog_timer = WATCHDOG_THRESHOLD;
mtx_unlock_spin(&sched_lock);
mtx_unlock(&Giant);
callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
}
SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL);
/*
* This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff
* to disable the watchdog.
*/
static int
sysctl_watchdog(SYSCTL_HANDLER_ARGS)
{
int error, temp;
temp = watchdog_cpu;
error = sysctl_handle_int(oidp, &temp, 0, req);
if (error)
return (error);
if (req->newptr != NULL) {
if (temp == -1 || temp == 0xffffffff) {
/*
* Disable the watcdog.
*/
watchdog_cpu = -1;
watchdog_dontfire = 1;
callout_stop(&watchdog_callout);
printf("watchdog stopped\n");
} else {
watchdog_timer = WATCHDOG_THRESHOLD;
watchdog_dontfire = 0;
watchdog_cpu = temp;
callout_reset(&watchdog_callout, 1 * hz,
watchdog_function, NULL);
}
}
return (0);
}
SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_watchdog, "IU", "");
/*
* A badly behaved sysctl that leaks the sched lock when written to. Then
* spin holding it just to make matters worse. This can be used to test the
* effectiveness of the watchdog by generating a fairly hard and nast hang.
* Note that Giant is also held in the current world order when we get here.
*/
static int
sysctl_leak_schedlock(SYSCTL_HANDLER_ARGS)
{
int error, temp;
temp = 0;
error = sysctl_handle_int(oidp, &temp, 0, req);
if (error)
return (error);
if (req->newptr != NULL) {
if (temp) {
printf("Leaking the sched lock...\n");
mtx_lock_spin(&sched_lock);
while (1);
}
}
return (0);
}
SYSCTL_PROC(_debug, OID_AUTO, leak_schedlock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
sysctl_leak_schedlock, "IU", "");
/*
* Drop into the debugger by sending an IPI NMI to the boot processor.
*/
static void
watchdog_ipi_nmi(void)
{
/*
* Deliver NMI to the boot processor. Why not?
*/
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI,
boot_cpu_id);
lapic_ipi_wait(-1);
}
/*
* ap_watchdog() is called by the SMP idle loop code. It works on the same
* premise that the disabling of logical processors does: that if the cpu is
* idle, then it can ignore the world from then on, as nothing will be
* scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and
* explicit process migration (sched_bind()), this is not an unreasonable
* assumption.
*/
void
ap_watchdog(u_int cpuid)
{
char old_pcomm[MAXCOMLEN + 1];
struct proc *p;
if (watchdog_cpu != cpuid)
return;
printf("watchdog started on cpu %d\n", cpuid);
p = curproc;
bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1);
snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid);
while (1) {
DELAY(1000000); /* One second. */
if (watchdog_cpu != cpuid)
break;
atomic_subtract_int(&watchdog_timer, 1);
if (watchdog_timer < 4)
printf("Watchdog timer: %d\n", watchdog_timer);
if (watchdog_timer == 0 && watchdog_dontfire == 0) {
printf("Watchdog firing!\n");
watchdog_dontfire = 1;
if (watchdog_nmi)
watchdog_ipi_nmi();
else
kdb_enter("mp_watchdog");
}
}
bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1);
printf("watchdog stopped on cpu %d\n", cpuid);
}

View File

@ -0,0 +1,34 @@
/*-
* Copyright (c) 2004 Robert N. M. Watson
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MACHINE_MP_WATCHDOG_H_
#define _MACHINE_MP_WATCHDOG_H_
void ap_watchdog(u_int cpuid);
#endif /* !_MACHINE_MP_WATCHDOG_H_ */