diff --git a/UPDATING b/UPDATING index cba0fb95f37d..288925611833 100644 --- a/UPDATING +++ b/UPDATING @@ -15,6 +15,11 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW ON IA64 OR SUN4V: debugging tools present in HEAD were left in place because sun4v support still needs work to become production ready. +20100125: + Introduce the kernel thread "deadlock resolver" (which can be enabled + via the DEADLKRES option, see NOTES for more details) and the + sleepq_type() function for sleepqueues. + 20090929: 802.11s D3.03 support was committed. This is incompatible with the previous code, which was based on D3.0. diff --git a/share/man/man9/sleepqueue.9 b/share/man/man9/sleepqueue.9 index d1d17cd63cff..63d0ebbfdedf 100644 --- a/share/man/man9/sleepqueue.9 +++ b/share/man/man9/sleepqueue.9 @@ -23,7 +23,7 @@ .\" .\" $FreeBSD$ .\" -.Dd January 18, 2010 +.Dd January 25, 2010 .Dt SLEEPQUEUE 9 .Os .Sh NAME @@ -44,6 +44,7 @@ .Nm sleepq_sleepcnt , .Nm sleepq_timedwait , .Nm sleepq_timedwait_sig , +.Nm sleepq_type , .Nm sleepq_wait , .Nm sleepq_wait_sig .Nd manage the queues of sleeping threads @@ -84,6 +85,8 @@ .Fn sleepq_timedwait "void *wchan" .Ft int .Fn sleepq_timedwait_sig "void *wchan" "int signal_caught" +.Ft int +.Fn sleepq_type "void *wchan" .Ft void .Fn sleepq_wait "void *wchan" .Ft int @@ -366,6 +369,12 @@ given a .Fa wchan . .Pp The +.Fn sleepq_type +function returns the type of +.Fa wchan +associated to a sleepqueue. +.Pp +The .Fn sleepq_abort , .Fn sleepq_broadcast , and diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 3c2a99b663c6..3d737246a9a0 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -2472,6 +2472,11 @@ options BOOTP_BLOCKSIZE=8192 # Override NFS block size # options SW_WATCHDOG +# +# Add the software deadlock resolver thread. +# +options DEADLKRES + # # Disable swapping of stack pages. This option removes all # code which actually performs swapping, so it's not possible to turn diff --git a/sys/conf/options b/sys/conf/options index 5405559567e7..e336fdc84a05 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -72,6 +72,7 @@ COMPAT_FREEBSD6 opt_compat.h COMPAT_FREEBSD7 opt_compat.h COMPILING_LINT opt_global.h CY_PCI_FASTINTR +DEADLKRES opt_watchdog.h DIRECTIO FULL_PREEMPTION opt_sched.h IPI_PREEMPTION opt_sched.h diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index e95bc19152cb..2844103fa406 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -48,14 +48,16 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include +#include #include #include #include #include #include #include +#include #include #include #include @@ -159,6 +161,124 @@ sysctl_kern_cp_times(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_kern, OID_AUTO, cp_times, CTLTYPE_LONG|CTLFLAG_RD|CTLFLAG_MPSAFE, 0,0, sysctl_kern_cp_times, "LU", "per-CPU time statistics"); +#ifdef DEADLKRES +static int slptime_threshold = 1800; +static int blktime_threshold = 900; +static int sleepfreq = 3; + +static void +deadlkres(void) +{ + struct proc *p; + struct thread *td; + void *wchan; + int blkticks, slpticks, slptype, tryl, tticks; + + tryl = 0; + for (;;) { + blkticks = blktime_threshold * hz; + slpticks = slptime_threshold * hz; + + /* + * Avoid to sleep on the sx_lock in order to avoid a possible + * priority inversion problem leading to starvation. + * If the lock can't be held after 100 tries, panic. + */ + if (!sx_try_slock(&allproc_lock)) { + if (tryl > 100) + panic("%s: possible deadlock detected on allproc_lock\n", + __func__); + tryl++; + pause("allproc_lock deadlkres", sleepfreq * hz); + continue; + } + tryl = 0; + FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); + if (TD_ON_LOCK(td)) { + + /* + * The thread should be blocked on a + * turnstile, simply check if the + * turnstile channel is in good state. + */ + MPASS(td->td_blocked != NULL); + tticks = ticks - td->td_blktick; + thread_unlock(td); + if (tticks > blkticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * turnstile. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else if (TD_IS_SLEEPING(td)) { + + /* + * Check if the thread is sleeping on a + * lock, otherwise skip the check. + * Drop the thread lock in order to + * avoid a LOR with the sleepqueue + * spinlock. + */ + wchan = td->td_wchan; + tticks = ticks - td->td_slptick; + thread_unlock(td); + slptype = sleepq_type(wchan); + if ((slptype == SLEEPQ_SX || + slptype == SLEEPQ_LK) && + tticks > slpticks) { + + /* + * Accordingly with provided + * thresholds, this thread is + * stuck for too long on a + * sleepqueue. + */ + PROC_UNLOCK(p); + sx_sunlock(&allproc_lock); + panic("%s: possible deadlock detected for %p, blocked for %d ticks\n", + __func__, td, tticks); + } + } else + thread_unlock(td); + } + PROC_UNLOCK(p); + } + sx_sunlock(&allproc_lock); + + /* Sleep for sleepfreq seconds. */ + pause("deadlkres", sleepfreq * hz); + } +} + +static struct kthread_desc deadlkres_kd = { + "deadlkres", + deadlkres, + (struct thread **)NULL +}; + +SYSINIT(deadlkres, SI_SUB_CLOCKS, SI_ORDER_ANY, kthread_start, &deadlkres_kd); + +SYSCTL_NODE(_debug, OID_AUTO, deadlkres, CTLFLAG_RW, 0, "Deadlock resolver"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, slptime_threshold, CTLFLAG_RW, + &slptime_threshold, 0, + "Number of seconds within is valid to sleep on a sleepqueue"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, blktime_threshold, CTLFLAG_RW, + &blktime_threshold, 0, + "Number of seconds within is valid to block on a turnstile"); +SYSCTL_INT(_debug_deadlkres, OID_AUTO, sleepfreq, CTLFLAG_RW, &sleepfreq, 0, + "Number of seconds between any deadlock resolver thread run"); +#endif /* DEADLKRES */ + void read_cpu_time(long *cp_time) { diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index a0496bdaad35..5df74d06e9e6 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -122,8 +122,8 @@ struct sleepqueue { LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ void *sq_wchan; /* (c) Wait channel. */ -#ifdef INVARIANTS int sq_type; /* (c) Queue type. */ +#ifdef INVARIANTS struct lock_object *sq_lock; /* (c) Associated lock. */ #endif }; @@ -317,7 +317,6 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, ("thread's sleep queue has a non-empty free list")); KASSERT(sq->sq_wchan == NULL, ("stale sq_wchan pointer")); sq->sq_lock = lock; - sq->sq_type = flags & SLEEPQ_TYPE; #endif #ifdef SLEEPQUEUE_PROFILING sc->sc_depth++; @@ -330,6 +329,7 @@ sleepq_add(void *wchan, struct lock_object *lock, const char *wmesg, int flags, sq = td->td_sleepqueue; LIST_INSERT_HEAD(&sc->sc_queues, sq, sq_hash); sq->sq_wchan = wchan; + sq->sq_type = flags & SLEEPQ_TYPE; } else { MPASS(wchan == sq->sq_wchan); MPASS(lock == sq->sq_lock); @@ -668,6 +668,28 @@ sleepq_timedwait_sig(void *wchan, int pri) return (rvalt); } +/* + * Returns the type of sleepqueue given a waitchannel. + */ +int +sleepq_type(void *wchan) +{ + struct sleepqueue *sq; + int type; + + MPASS(wchan != NULL); + + sleepq_lock(wchan); + sq = sleepq_lookup(wchan); + if (sq == NULL) { + sleepq_release(wchan); + return (-1); + } + type = sq->sq_type; + sleepq_release(wchan); + return (type); +} + /* * Removes a thread from a sleep queue and makes it * runnable. @@ -1176,8 +1198,8 @@ DB_SHOW_COMMAND(sleepq, db_show_sleepqueue) return; found: db_printf("Wait channel: %p\n", sq->sq_wchan); -#ifdef INVARIANTS db_printf("Queue type: %d\n", sq->sq_type); +#ifdef INVARIANTS if (sq->sq_lock) { lock = sq->sq_lock; db_printf("Associated Interlock: %p - (%s) %s\n", lock, diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c index 31c8cfc4837d..d8f774899d1d 100644 --- a/sys/kern/subr_turnstile.c +++ b/sys/kern/subr_turnstile.c @@ -733,6 +733,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue) td->td_tsqueue = queue; td->td_blocked = ts; td->td_lockname = lock->lo_name; + td->td_blktick = ticks; TD_SET_LOCK(td); mtx_unlock_spin(&tc->tc_lock); propagate_priority(td); @@ -925,6 +926,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type) MPASS(TD_CAN_RUN(td)); td->td_blocked = NULL; td->td_lockname = NULL; + td->td_blktick = 0; #ifdef INVARIANTS td->td_tsqueue = 0xff; #endif diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 642a6307fdc0..c0af11f6b3d3 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -217,6 +217,7 @@ struct thread { struct ucred *td_ucred; /* (k) Reference to credentials. */ u_int td_estcpu; /* (t) estimated cpu utilization */ int td_slptick; /* (t) Time at sleep. */ + int td_blktick; /* (t) Time spent blocked. */ struct rusage td_ru; /* (t) rusage information */ uint64_t td_incruntime; /* (t) Cpu ticks to transfer to proc. */ uint64_t td_runtime; /* (t) How many cpu ticks we've run. */ diff --git a/sys/sys/sleepqueue.h b/sys/sys/sleepqueue.h index 224d602c01f4..3e33e6b34c70 100644 --- a/sys/sys/sleepqueue.h +++ b/sys/sys/sleepqueue.h @@ -112,6 +112,7 @@ void sleepq_set_timeout(void *wchan, int timo); u_int sleepq_sleepcnt(void *wchan, int queue); int sleepq_timedwait(void *wchan, int pri); int sleepq_timedwait_sig(void *wchan, int pri); +int sleepq_type(void *wchan); void sleepq_wait(void *wchan, int pri); int sleepq_wait_sig(void *wchan, int pri);