Introduce the new kernel thread called "deadlock resolver".
While the name is pretentious, a good explanation of its targets is reported in this 17 months old presentation e-mail: http://lists.freebsd.org/pipermail/freebsd-arch/2008-August/008452.html In order to implement it, the sq_type in sleepqueues is mandatory and not only compiled along with INVARIANTS option. Additively, a new sleepqueue function, sleepq_type() is added, returning the type of the sleepqueue linked to a wchan. Three new sysctls are added in order to configure the thread: debug.deadlkres.slptime_threshold debug.deadlkres.blktime_threshold debug.deadlkres.sleepfreq rappresenting the thresholds for sleep and block time that will lead to a deadlock matching (when exceeded), while the sleepfreq rappresents the number of seconds between 2 consecutive thread runnings. In order to enable the deadlock resolver thread recompile your kernel with the option DEADLKRES. Reviewed by: jeff Tested by: pho, Giovanni Trematerra Sponsored by: Nokia Incorporated, Sandvine Incorporated MFC after: 2 weeks
This commit is contained in:
parent
ff451e0c50
commit
f7829d0d5c
5
UPDATING
5
UPDATING
@ -22,6 +22,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
|
||||
machines to maximize performance. (To disable malloc debugging, run
|
||||
ln -s aj /etc/malloc.conf.)
|
||||
|
||||
20100108:
|
||||
Introduce the kernel thread "deadlock resolver" (which can be enabled
|
||||
via the DEADLKRES option, see NOTES for more details) and the
|
||||
sleepq_type() function for sleepqueues.
|
||||
|
||||
20091202:
|
||||
The rc.firewall and rc.firewall6 were unified, and
|
||||
rc.firewall6 and rc.d/ip6fw were removed.
|
||||
|
@ -23,7 +23,7 @@
|
||||
.\"
|
||||
.\" $FreeBSD$
|
||||
.\"
|
||||
.Dd December 12, 2009
|
||||
.Dd January 8, 2010
|
||||
.Dt SLEEPQUEUE 9
|
||||
.Os
|
||||
.Sh NAME
|
||||
@ -44,6 +44,7 @@
|
||||
.Nm sleepq_sleepcnt ,
|
||||
.Nm sleepq_timedwait ,
|
||||
.Nm sleepq_timedwait_sig ,
|
||||
.Nm sleepq_type ,
|
||||
.Nm sleepq_wait ,
|
||||
.Nm sleepq_wait_sig
|
||||
.Nd manage the queues of sleeping threads
|
||||
@ -84,6 +85,8 @@
|
||||
.Fn sleepq_timedwait "void *wchan"
|
||||
.Ft int
|
||||
.Fn sleepq_timedwait_sig "void *wchan" "int signal_caught"
|
||||
.Ft int
|
||||
.Fn sleepq_type "void *wchan"
|
||||
.Ft void
|
||||
.Fn sleepq_wait "void *wchan"
|
||||
.Ft int
|
||||
@ -366,6 +369,12 @@ given a
|
||||
.Fa wchan .
|
||||
.Pp
|
||||
The
|
||||
.Fn sleepq_type
|
||||
function returns the type of
|
||||
.Fa wchan
|
||||
associated to a sleepqueue.
|
||||
.Pp
|
||||
The
|
||||
.Fn sleepq_abort ,
|
||||
.Fn sleepq_broadcast ,
|
||||
and
|
||||
|
@ -2530,6 +2530,11 @@ options BOOTP_BLOCKSIZE=8192 # Override NFS block size
|
||||
#
|
||||
options SW_WATCHDOG
|
||||
|
||||
#
|
||||
# Add the software deadlock resolver thread.
|
||||
#
|
||||
options DEADLKRES
|
||||
|
||||
#
|
||||
# Disable swapping of stack pages. This option removes all
|
||||
# code which actually performs swapping, so it's not possible to turn
|
||||
|
@ -72,6 +72,7 @@ COMPAT_FREEBSD6 opt_compat.h
|
||||
COMPAT_FREEBSD7 opt_compat.h
|
||||
COMPILING_LINT opt_global.h
|
||||
CY_PCI_FASTINTR
|
||||
DEADLKRES opt_watchdog.h
|
||||
DIRECTIO
|
||||
FULL_PREEMPTION opt_sched.h
|
||||
IPI_PREEMPTION opt_sched.h
|
||||
|
@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/callout.h>
|
||||
#include <sys/kdb.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/kthread.h>
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/resource.h>
|
||||
#include <sys/resourcevar.h>
|
||||
#include <sys/sched.h>
|
||||
#include <sys/signalvar.h>
|
||||
#include <sys/sleepqueue.h>
|
||||
#include <sys/smp.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS)
|
||||
SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE,
|
||||
0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics");
|
||||
|
||||
#ifdef DEADLKRES
|
||||
static int slptime_threshold = 1800;
|
||||
static int blktime_threshold = 900;
|
||||
static int sleepfreq = 3;
|
||||
|
||||
static void
|
||||
deadlkres(void)
|
||||
{
|
||||
struct proc *p;
|
||||
struct thread *td;
|
||||
void *wchan;
|
||||
int blkticks, slpticks, slptype, tryl, tticks;
|
||||
|
||||
tryl = 0;
|
||||
for (;;) {
|
||||
blkticks = blktime_threshold * hz;
|
||||
slpticks = slptime_threshold * hz;
|
||||
|
||||
/*
|
||||
* Avoid to sleep on the sx_lock in order to avoid a possible
|
||||
* priority inversion problem leading to starvation.
|
||||
* If the lock can't be held after 100 tries, panic.
|
||||
*/
|
||||
if (!sx_try_slock(&allproc_lock)) {
|
||||
if (tryl > 100)
|
||||
panic("%s: possible deadlock detected on allproc_lock\n",
|
||||
__func__);
|
||||
tryl++;
|
||||
pause("allproc_lock deadlkres", sleepfreq * hz);
|
||||
continue;
|
||||
}
|
||||
tryl = 0;
|
||||
FOREACH_PROC_IN_SYSTEM(p) {
|
||||
PROC_LOCK(p);
|
||||
FOREACH_THREAD_IN_PROC(p, td) {
|
||||
thread_lock(td);
|
||||
if (TD_ON_LOCK(td)) {
|
||||
|
||||
/*
|
||||
* The thread should be blocked on a
|
||||
* turnstile, simply check if the
|
||||
* turnstile channel is in good state.
|
||||
*/
|
||||
MPASS(td->td_blocked != NULL);
|
||||
tticks = ticks - td->td_blktick;
|
||||
thread_unlock(td);
|
||||
if (tticks > blkticks) {
|
||||
|
||||
/*
|
||||
* Accordingly with provided
|
||||
* thresholds, this thread is
|
||||
* stuck for too long on a
|
||||
* turnstile.
|
||||
*/
|
||||
PROC_UNLOCK(p);
|
||||
sx_sunlock(&allproc_lock);
|
||||
panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
|
||||
__func__, td, tticks);
|
||||
}
|
||||
} else if (TD_IS_SLEEPING(td)) {
|
||||
|
||||
/*
|
||||
* Check if the thread is sleeping on a
|
||||
* lock, otherwise skip the check.
|
||||
* Drop the thread lock in order to
|
||||
* avoid a LOR with the sleepqueue
|
||||
* spinlock.
|
||||
*/
|
||||
wchan = td->td_wchan;
|
||||
tticks = ticks - td->td_slptick;
|
||||
thread_unlock(td);
|
||||
slptype = sleepq_type(wchan);
|
||||
if ((slptype == SLEEPQ_SX ||
|
||||
slptype == SLEEPQ_LK) &&
|
||||
tticks > slpticks) {
|
||||
|
||||
/*
|
||||
* Accordingly with provided
|
||||
* thresholds, this thread is
|
||||
* stuck for too long on a
|
||||
* sleepqueue.
|
||||
*/
|
||||
PROC_UNLOCK(p);
|
||||
sx_sunlock(&allproc_lock);
|
||||
panic("%s: possible deadlock detected for %p, blocked for %d ticks\n",
|
||||
__func__, td, tticks);
|
||||
}
|
||||
} else
|
||||
thread_unlock(td);
|
||||
}
|
||||
PROC_UNLOCK(p);
|
||||
}
|
||||
sx_sunlock(&allproc_lock);
|
||||
|
||||
/* Sleep for sleepfreq seconds. */
|
||||
pause("deadlkres", sleepfreq * hz);
|
||||
}
|
||||
}
|
||||
|
||||
static struct kthread_desc deadlkres_kd = {
|
||||
"deadlkres",
|
||||
deadlkres,
|
||||
(struct thread **)NULL
|
||||
};
|
||||
|
||||
SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd);
|
||||
|
||||
SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver");
|
||||
SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW,
|
||||
&slptime_threshold, 0,
|
||||
"Number of seconds within is valid to sleep on a sleepqueue");
|
||||
SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW,
|
||||
&blktime_threshold, 0,
|
||||
"Number of seconds within is valid to block on a turnstile");
|
||||
SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0,
|
||||
"Number of seconds between any deadlock resolver thread run");
|
||||
#endif /* DEADLKRES */
|
||||
|
||||
void
|
||||
read_cpu_time(long *cp_time)
|
||||
{
|
||||
|
@ -122,8 +122,8 @@ struct sleepqueue {
|
||||
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
|
||||
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
|
||||
void *sq_wchan; /* (c) Wait channel. */
|
||||
#ifdef INVARIANTS
|
||||
int sq_type; /* (c) Queue type. */
|
||||
#ifdef INVARIANTS
|
||||
struct lock_object *sq_lock; /* (c) Associated lock. */
|
||||
#endif
|
||||
};
|
||||
@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
|
||||
("thread's sleep queue has a non-empty free list"));
|
||||
KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer"));
|
||||
sq->sq_lock = lock;
|
||||
sq->sq_type = flags & SLEEPQ_TYPE;
|
||||
#endif
|
||||
#ifdef SLEEPQUEUE_PROFILING
|
||||
sc->sc_depth++;
|
||||
@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags,
|
||||
sq = td->td_sleepqueue;
|
||||
LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash);
|
||||
sq->sq_wchan = wchan;
|
||||
sq->sq_type = flags & SLEEPQ_TYPE;
|
||||
} else {
|
||||
MPASS(wchan == sq->sq_wchan);
|
||||
MPASS(lock == sq->sq_lock);
|
||||
@ -668,6 +668,28 @@ sleepq_timedwait_sig(void *wchan, int pri)
|
||||
return (rvalt);
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the type of sleepqueue given a waitchannel.
|
||||
*/
|
||||
int
|
||||
sleepq_type(void *wchan)
|
||||
{
|
||||
struct sleepqueue *sq;
|
||||
int type;
|
||||
|
||||
MPASS(wchan != NULL);
|
||||
|
||||
sleepq_lock(wchan);
|
||||
sq = sleepq_lookup(wchan);
|
||||
if (sq == NULL) {
|
||||
sleepq_release(wchan);
|
||||
return (-1);
|
||||
}
|
||||
type = sq->sq_type;
|
||||
sleepq_release(wchan);
|
||||
return (type);
|
||||
}
|
||||
|
||||
/*
|
||||
* Removes a thread from a sleep queue and makes it
|
||||
* runnable.
|
||||
@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepqueue)
|
||||
return;
|
||||
found:
|
||||
db_printf("Wait channel: %p\n", sq->sq_wchan);
|
||||
#ifdef INVARIANTS
|
||||
db_printf("Queue type: %d\n", sq->sq_type);
|
||||
#ifdef INVARIANTS
|
||||
if (sq->sq_lock) {
|
||||
lock = sq->sq_lock;
|
||||
db_printf("Associated Interlock: %p - (%s) %s\n", lock,
|
||||
|
@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
|
||||
td->td_tsqueue = queue;
|
||||
td->td_blocked = ts;
|
||||
td->td_lockname = lock->lo_name;
|
||||
td->td_blktick = ticks;
|
||||
TD_SET_LOCK(td);
|
||||
mtx_unlock_spin(&tc->tc_lock);
|
||||
propagate_priority(td);
|
||||
@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type)
|
||||
MPASS(TD_CAN_RUN(td));
|
||||
td->td_blocked = NULL;
|
||||
td->td_lockname = NULL;
|
||||
td->td_blktick = 0;
|
||||
#ifdef INVARIANTS
|
||||
td->td_tsqueue = 0xff;
|
||||
#endif
|
||||
|
@ -218,6 +218,7 @@ struct thread {
|
||||
struct ucred *td_ucred; /* (k) Reference to credentials. */
|
||||
u_int td_estcpu; /* (t) estimated cpu utilization */
|
||||
int td_slptick; /* (t) Time at sleep. */
|
||||
int td_blktick; /* (t) Time spent blocked. */
|
||||
struct rusage td_ru; /* (t) rusage information */
|
||||
uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */
|
||||
uint64_t td_runtime; /* (t) How many cpu ticks we've run. */
|
||||
|
@ -112,6 +112,7 @@ void sleepq_set_timeout(void *wchan, int timo);
|
||||
u_int sleepq_sleepcnt(void *wchan, int queue);
|
||||
int sleepq_timedwait(void *wchan, int pri);
|
||||
int sleepq_timedwait_sig(void *wchan, int pri);
|
||||
int sleepq_type(void *wchan);
|
||||
void sleepq_wait(void *wchan, int pri);
|
||||
int sleepq_wait_sig(void *wchan, int pri);
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user