Add an "options MP_WATCHDOG" to i386. This option allows one of the
logical CPUs on a system to be used as a dedicated watchdog to cause a drop to the debugger and/or generate an NMI to the boot processor if the kernel ceases to respond. A sysctl enables the watchdog running out of the processor's idle thread; a callout is launched to reset a timer in the watchdog. If the callout fails to reset the timer for ten seconds, the watchdog will fire. The sysctl allows you to select which CPU will run the watchdog. A sample "debug.leak_schedlock" is included, which causes a sysctl to spin holding sched_lock in order to trigger the watchdog. On my Xeons, the watchdog is able to detect this failure mode and break into the debugger, which cannot otherwise be done without an NMI button. This option does not currently work with sched_ule due to ule's push notion of scheduling, similar to machdep.hlt_logical_cpus failing to work with that scheduler. On face value, this might seem somewhat inefficient, but there are a lot of dual-processor Xeons with HTT around, so using one as a watchdog for testing is not as inefficient as one might fear.
This commit is contained in:
parent
437ee5545c
commit
abf6ea2973
225
sys/amd64/amd64/mp_watchdog.c
Normal file
225
sys/amd64/amd64/mp_watchdog.c
Normal file
@ -0,0 +1,225 @@
|
||||
/*-
|
||||
* Copyright (c) 2004 Robert N. M. Watson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_mp_watchdog.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kdb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/smp.h>
|
||||
#include <machine/apicreg.h>
|
||||
#include <machine/apicvar.h>
|
||||
#include <machine/mp_watchdog.h>
|
||||
|
||||
/*
|
||||
* mp_swatchdog hijacks the idle thread on a specified CPU, prevents new work
|
||||
* from being scheduled there, and uses it as a "watchdog" to detect kernel
|
||||
* failure on other CPUs. This is made reasonable by inclusion of logical
|
||||
* processors in Xeon hardware. The watchdog is configured by setting the
|
||||
* debug.watchdog_cpu sysctl to the CPU of interest. A callout will then
|
||||
* begin executing reseting a timer that is gradually lowered by the watching
|
||||
* thread. If the timer reaches 0, the watchdog fires by ether dropping
|
||||
* directly to the debugger, or by sending an NMI IPI to the boot processor.
|
||||
* This is a somewhat less efficient substitute for dedicated watchdog
|
||||
* hardware, but can be quite an effective tool for debugging hangs.
|
||||
*
|
||||
* XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but
|
||||
* doesn't yet.
|
||||
*/
|
||||
static int watchdog_cpu = -1;
|
||||
static int watchdog_dontfire = 1;
|
||||
static int watchdog_timer = -1;
|
||||
static int watchdog_nmi = 1;
|
||||
|
||||
SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0,
|
||||
"IPI the boot processor with an NMI to enter the debugger");
|
||||
|
||||
static struct callout watchdog_callout;
|
||||
|
||||
/*
|
||||
* Number of seconds before the watchdog will fire if the callout fails to
|
||||
* reset the timer.
|
||||
*/
|
||||
#define WATCHDOG_THRESHOLD 10
|
||||
|
||||
static void
|
||||
watchdog_init(void *arg)
|
||||
{
|
||||
|
||||
callout_init(&watchdog_callout, CALLOUT_MPSAFE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This callout resets a timer until the watchdog kicks in. It acquires some
|
||||
* critical locks to make sure things haven't gotten wedged with hose locks
|
||||
* held.
|
||||
*/
|
||||
static void
|
||||
watchdog_function(void *arg)
|
||||
{
|
||||
|
||||
/*
|
||||
* Since the timer ran, we must not be wedged. Acquire some critical
|
||||
* locks to make sure. Then reset the timer.
|
||||
*/
|
||||
mtx_lock(&Giant);
|
||||
mtx_lock_spin(&sched_lock);
|
||||
watchdog_timer = WATCHDOG_THRESHOLD;
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
mtx_unlock(&Giant);
|
||||
callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
|
||||
}
|
||||
SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL);
|
||||
|
||||
/*
|
||||
* This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff
|
||||
* to disable the watchdog.
|
||||
*/
|
||||
static int
|
||||
sysctl_watchdog(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, temp;
|
||||
|
||||
temp = watchdog_cpu;
|
||||
error = sysctl_handle_int(oidp, &temp, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (req->newptr != NULL) {
|
||||
if (temp == -1 || temp == 0xffffffff) {
|
||||
/*
|
||||
* Disable the watcdog.
|
||||
*/
|
||||
watchdog_cpu = -1;
|
||||
watchdog_dontfire = 1;
|
||||
callout_stop(&watchdog_callout);
|
||||
printf("watchdog stopped\n");
|
||||
} else {
|
||||
watchdog_timer = WATCHDOG_THRESHOLD;
|
||||
watchdog_dontfire = 0;
|
||||
watchdog_cpu = temp;
|
||||
callout_reset(&watchdog_callout, 1 * hz,
|
||||
watchdog_function, NULL);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
|
||||
sysctl_watchdog, "IU", "");
|
||||
|
||||
/*
|
||||
* A badly behaved sysctl that leaks the sched lock when written to. Then
|
||||
* spin holding it just to make matters worse. This can be used to test the
|
||||
* effectiveness of the watchdog by generating a fairly hard and nast hang.
|
||||
* Note that Giant is also held in the current world order when we get here.
|
||||
*/
|
||||
static int
|
||||
sysctl_leak_schedlock(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, temp;
|
||||
|
||||
temp = 0;
|
||||
error = sysctl_handle_int(oidp, &temp, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (req->newptr != NULL) {
|
||||
if (temp) {
|
||||
printf("Leaking the sched lock...\n");
|
||||
mtx_lock_spin(&sched_lock);
|
||||
while (1);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_debug, OID_AUTO, leak_schedlock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
|
||||
sysctl_leak_schedlock, "IU", "");
|
||||
|
||||
/*
|
||||
* Drop into the debugger by sending an IPI NMI to the boot processor.
|
||||
*/
|
||||
static void
|
||||
watchdog_ipi_nmi(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* Deliver NMI to the boot processor. Why not?
|
||||
*/
|
||||
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
|
||||
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI,
|
||||
boot_cpu_id);
|
||||
lapic_ipi_wait(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* ap_watchdog() is called by the SMP idle loop code. It works on the same
|
||||
* premise that the disabling of logical processors does: that if the cpu is
|
||||
* idle, then it can ignore the world from then on, as nothing will be
|
||||
* scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and
|
||||
* explicit process migration (sched_bind()), this is not an unreasonable
|
||||
* assumption.
|
||||
*/
|
||||
void
|
||||
ap_watchdog(u_int cpuid)
|
||||
{
|
||||
char old_pcomm[MAXCOMLEN + 1];
|
||||
struct proc *p;
|
||||
|
||||
if (watchdog_cpu != cpuid)
|
||||
return;
|
||||
|
||||
printf("watchdog started on cpu %d\n", cpuid);
|
||||
p = curproc;
|
||||
bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1);
|
||||
snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid);
|
||||
while (1) {
|
||||
DELAY(1000000); /* One second. */
|
||||
if (watchdog_cpu != cpuid)
|
||||
break;
|
||||
atomic_subtract_int(&watchdog_timer, 1);
|
||||
if (watchdog_timer < 4)
|
||||
printf("Watchdog timer: %d\n", watchdog_timer);
|
||||
if (watchdog_timer == 0 && watchdog_dontfire == 0) {
|
||||
printf("Watchdog firing!\n");
|
||||
watchdog_dontfire = 1;
|
||||
if (watchdog_nmi)
|
||||
watchdog_ipi_nmi();
|
||||
else
|
||||
kdb_enter("mp_watchdog");
|
||||
}
|
||||
}
|
||||
bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1);
|
||||
printf("watchdog stopped on cpu %d\n", cpuid);
|
||||
}
|
34
sys/amd64/include/mp_watchdog.h
Normal file
34
sys/amd64/include/mp_watchdog.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2004 Robert N. M. Watson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MACHINE_MP_WATCHDOG_H_
|
||||
#define _MACHINE_MP_WATCHDOG_H_
|
||||
|
||||
void ap_watchdog(u_int cpuid);
|
||||
|
||||
#endif /* !_MACHINE_MP_WATCHDOG_H_ */
|
@ -234,6 +234,7 @@ i386/i386/machdep.c standard
|
||||
i386/i386/mem.c optional mem
|
||||
i386/i386/mp_clock.c optional smp
|
||||
i386/i386/mp_machdep.c optional smp
|
||||
i386/i386/mp_watchdog.c optional mp_watchdog smp
|
||||
i386/i386/mpboot.s optional smp
|
||||
i386/i386/mptable.c optional apic
|
||||
i386/i386/mptable_pci.c optional apic pci
|
||||
|
@ -15,6 +15,7 @@ PMAP_SHPGPERPROC opt_pmap.h
|
||||
POWERFAIL_NMI opt_trap.h
|
||||
PPC_DEBUG opt_ppc.h
|
||||
PPC_PROBE_CHIPSET opt_ppc.h
|
||||
MP_WATCHDOG opt_mp_watchdog.h
|
||||
|
||||
# Options for emulators. These should only be used at config time, so
|
||||
# they are handled like options for static filesystems
|
||||
|
@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "opt_apic.h"
|
||||
#include "opt_cpu.h"
|
||||
#include "opt_kstack_pages.h"
|
||||
#include "opt_mp_watchdog.h"
|
||||
|
||||
#if !defined(lint)
|
||||
#if !defined(SMP)
|
||||
@ -73,6 +74,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/apicreg.h>
|
||||
#include <machine/clock.h>
|
||||
#include <machine/md_var.h>
|
||||
#include <machine/mp_watchdog.h>
|
||||
#include <machine/pcb.h>
|
||||
#include <machine/smp.h>
|
||||
#include <machine/smptests.h> /** COUNT_XINVLTLB_HITS */
|
||||
@ -1289,8 +1291,15 @@ int
|
||||
mp_grab_cpu_hlt(void)
|
||||
{
|
||||
u_int mask = PCPU_GET(cpumask);
|
||||
#ifdef MP_WATCHDIG
|
||||
u_int cpuid = PCPU_GET(cpuid);
|
||||
#endif
|
||||
int retval;
|
||||
|
||||
#ifdef MP_WATCHDOG
|
||||
ap_watchdog(cpuid);
|
||||
#endif
|
||||
|
||||
retval = mask & hlt_cpus_mask;
|
||||
while (mask & hlt_cpus_mask)
|
||||
__asm __volatile("sti; hlt" : : : "memory");
|
||||
|
225
sys/i386/i386/mp_watchdog.c
Normal file
225
sys/i386/i386/mp_watchdog.c
Normal file
@ -0,0 +1,225 @@
|
||||
/*-
|
||||
* Copyright (c) 2004 Robert N. M. Watson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#include "opt_mp_watchdog.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kdb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/systm.h>
|
||||
|
||||
#include <machine/smp.h>
|
||||
#include <machine/apicreg.h>
|
||||
#include <machine/apicvar.h>
|
||||
#include <machine/mp_watchdog.h>
|
||||
|
||||
/*
|
||||
* mp_swatchdog hijacks the idle thread on a specified CPU, prevents new work
|
||||
* from being scheduled there, and uses it as a "watchdog" to detect kernel
|
||||
* failure on other CPUs. This is made reasonable by inclusion of logical
|
||||
* processors in Xeon hardware. The watchdog is configured by setting the
|
||||
* debug.watchdog_cpu sysctl to the CPU of interest. A callout will then
|
||||
* begin executing reseting a timer that is gradually lowered by the watching
|
||||
* thread. If the timer reaches 0, the watchdog fires by ether dropping
|
||||
* directly to the debugger, or by sending an NMI IPI to the boot processor.
|
||||
* This is a somewhat less efficient substitute for dedicated watchdog
|
||||
* hardware, but can be quite an effective tool for debugging hangs.
|
||||
*
|
||||
* XXXRW: This should really use the watchdog(9)/watchdog(4) framework, but
|
||||
* doesn't yet.
|
||||
*/
|
||||
static int watchdog_cpu = -1;
|
||||
static int watchdog_dontfire = 1;
|
||||
static int watchdog_timer = -1;
|
||||
static int watchdog_nmi = 1;
|
||||
|
||||
SYSCTL_INT(_debug, OID_AUTO, watchdog_nmi, CTLFLAG_RW, &watchdog_nmi, 0,
|
||||
"IPI the boot processor with an NMI to enter the debugger");
|
||||
|
||||
static struct callout watchdog_callout;
|
||||
|
||||
/*
|
||||
* Number of seconds before the watchdog will fire if the callout fails to
|
||||
* reset the timer.
|
||||
*/
|
||||
#define WATCHDOG_THRESHOLD 10
|
||||
|
||||
static void
|
||||
watchdog_init(void *arg)
|
||||
{
|
||||
|
||||
callout_init(&watchdog_callout, CALLOUT_MPSAFE);
|
||||
}
|
||||
|
||||
/*
|
||||
* This callout resets a timer until the watchdog kicks in. It acquires some
|
||||
* critical locks to make sure things haven't gotten wedged with hose locks
|
||||
* held.
|
||||
*/
|
||||
static void
|
||||
watchdog_function(void *arg)
|
||||
{
|
||||
|
||||
/*
|
||||
* Since the timer ran, we must not be wedged. Acquire some critical
|
||||
* locks to make sure. Then reset the timer.
|
||||
*/
|
||||
mtx_lock(&Giant);
|
||||
mtx_lock_spin(&sched_lock);
|
||||
watchdog_timer = WATCHDOG_THRESHOLD;
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
mtx_unlock(&Giant);
|
||||
callout_reset(&watchdog_callout, 1 * hz, watchdog_function, NULL);
|
||||
}
|
||||
SYSINIT(watchdog_init, SI_SUB_DRIVERS, SI_ORDER_ANY, watchdog_init, NULL);
|
||||
|
||||
/*
|
||||
* This sysctl sets which CPU is the watchdog CPU. Set to -1 or 0xffffffff
|
||||
* to disable the watchdog.
|
||||
*/
|
||||
static int
|
||||
sysctl_watchdog(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, temp;
|
||||
|
||||
temp = watchdog_cpu;
|
||||
error = sysctl_handle_int(oidp, &temp, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (req->newptr != NULL) {
|
||||
if (temp == -1 || temp == 0xffffffff) {
|
||||
/*
|
||||
* Disable the watcdog.
|
||||
*/
|
||||
watchdog_cpu = -1;
|
||||
watchdog_dontfire = 1;
|
||||
callout_stop(&watchdog_callout);
|
||||
printf("watchdog stopped\n");
|
||||
} else {
|
||||
watchdog_timer = WATCHDOG_THRESHOLD;
|
||||
watchdog_dontfire = 0;
|
||||
watchdog_cpu = temp;
|
||||
callout_reset(&watchdog_callout, 1 * hz,
|
||||
watchdog_function, NULL);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_debug, OID_AUTO, watchdog, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
|
||||
sysctl_watchdog, "IU", "");
|
||||
|
||||
/*
|
||||
* A badly behaved sysctl that leaks the sched lock when written to. Then
|
||||
* spin holding it just to make matters worse. This can be used to test the
|
||||
* effectiveness of the watchdog by generating a fairly hard and nast hang.
|
||||
* Note that Giant is also held in the current world order when we get here.
|
||||
*/
|
||||
static int
|
||||
sysctl_leak_schedlock(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, temp;
|
||||
|
||||
temp = 0;
|
||||
error = sysctl_handle_int(oidp, &temp, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
if (req->newptr != NULL) {
|
||||
if (temp) {
|
||||
printf("Leaking the sched lock...\n");
|
||||
mtx_lock_spin(&sched_lock);
|
||||
while (1);
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
SYSCTL_PROC(_debug, OID_AUTO, leak_schedlock, CTLTYPE_INT|CTLFLAG_RW, 0, 0,
|
||||
sysctl_leak_schedlock, "IU", "");
|
||||
|
||||
/*
|
||||
* Drop into the debugger by sending an IPI NMI to the boot processor.
|
||||
*/
|
||||
static void
|
||||
watchdog_ipi_nmi(void)
|
||||
{
|
||||
|
||||
/*
|
||||
* Deliver NMI to the boot processor. Why not?
|
||||
*/
|
||||
lapic_ipi_raw(APIC_DEST_DESTFLD | APIC_TRIGMOD_EDGE |
|
||||
APIC_LEVEL_ASSERT | APIC_DESTMODE_PHY | APIC_DELMODE_NMI,
|
||||
boot_cpu_id);
|
||||
lapic_ipi_wait(-1);
|
||||
}
|
||||
|
||||
/*
|
||||
* ap_watchdog() is called by the SMP idle loop code. It works on the same
|
||||
* premise that the disabling of logical processors does: that if the cpu is
|
||||
* idle, then it can ignore the world from then on, as nothing will be
|
||||
* scheduled on it. Leaving aside multi-runqueue schedulers (SCHED_ULE) and
|
||||
* explicit process migration (sched_bind()), this is not an unreasonable
|
||||
* assumption.
|
||||
*/
|
||||
void
|
||||
ap_watchdog(u_int cpuid)
|
||||
{
|
||||
char old_pcomm[MAXCOMLEN + 1];
|
||||
struct proc *p;
|
||||
|
||||
if (watchdog_cpu != cpuid)
|
||||
return;
|
||||
|
||||
printf("watchdog started on cpu %d\n", cpuid);
|
||||
p = curproc;
|
||||
bcopy(p->p_comm, old_pcomm, MAXCOMLEN + 1);
|
||||
snprintf(p->p_comm, MAXCOMLEN + 1, "mp_watchdog cpu %d", cpuid);
|
||||
while (1) {
|
||||
DELAY(1000000); /* One second. */
|
||||
if (watchdog_cpu != cpuid)
|
||||
break;
|
||||
atomic_subtract_int(&watchdog_timer, 1);
|
||||
if (watchdog_timer < 4)
|
||||
printf("Watchdog timer: %d\n", watchdog_timer);
|
||||
if (watchdog_timer == 0 && watchdog_dontfire == 0) {
|
||||
printf("Watchdog firing!\n");
|
||||
watchdog_dontfire = 1;
|
||||
if (watchdog_nmi)
|
||||
watchdog_ipi_nmi();
|
||||
else
|
||||
kdb_enter("mp_watchdog");
|
||||
}
|
||||
}
|
||||
bcopy(old_pcomm, p->p_comm, MAXCOMLEN + 1);
|
||||
printf("watchdog stopped on cpu %d\n", cpuid);
|
||||
}
|
34
sys/i386/include/mp_watchdog.h
Normal file
34
sys/i386/include/mp_watchdog.h
Normal file
@ -0,0 +1,34 @@
|
||||
/*-
|
||||
* Copyright (c) 2004 Robert N. M. Watson
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*
|
||||
* $FreeBSD$
|
||||
*/
|
||||
|
||||
#ifndef _MACHINE_MP_WATCHDOG_H_
|
||||
#define _MACHINE_MP_WATCHDOG_H_
|
||||
|
||||
void ap_watchdog(u_int cpuid);
|
||||
|
||||
#endif /* !_MACHINE_MP_WATCHDOG_H_ */
|
Loading…
x
Reference in New Issue
Block a user