Rewrite subr_sleepqueue.c use of callouts to not depend on the

specifics of callout KPI.  Esp., do not depend on the exact interface
of callout_stop(9) return values.

The main change is that instead of requiring precise callouts, code
maintains absolute time to wake up.  Callouts now should ensure that a
wake occurs at the requested moment, but we can tolerate both run-away
callout, and callout_stop(9) lying about running callout either way.

As consequence, it removes the constant source of the bugs where
sleepq_check_timeout() causes uninterruptible thread state where the
thread is detached from CPU, see e.g. r234952 and r296320.

Patch also removes dual meaning of the TDF_TIMEOUT flag, making code
(IMO much) simpler to reason about.

Tested by:	pho
Reviewed by:	jhb
Sponsored by:	The FreeBSD Foundation
MFC after:	1 month
Differential revision:	https://reviews.freebsd.org/D7137
This commit is contained in:
Konstantin Belousov 2016-07-28 09:09:55 +00:00
parent a9e182e895
commit 2d19b736ed
4 changed files with 61 additions and 66 deletions

View File

@ -375,8 +375,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
db_printf(" lock: %s turnstile: %p\n", td->td_lockname, db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
td->td_blocked); td->td_blocked);
if (TD_ON_SLEEPQ(td)) if (TD_ON_SLEEPQ(td))
db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg, db_printf(
td->td_wchan); " wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
td->td_wmesg, td->td_wchan,
(long)sbttobt(td->td_sleeptimo).sec,
(uintmax_t)sbttobt(td->td_sleeptimo).frac,
(long)sbttobt(sbinuptime()).sec,
(uintmax_t)sbttobt(sbinuptime()).frac);
db_printf(" priority: %d\n", td->td_priority); db_printf(" priority: %d\n", td->td_priority);
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock); db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
if (td->td_swvoltick != 0) { if (td->td_swvoltick != 0) {

View File

@ -318,7 +318,7 @@ thread_reap(void)
/* /*
* Don't even bother to lock if none at this instant, * Don't even bother to lock if none at this instant,
* we really don't care about the next instant.. * we really don't care about the next instant.
*/ */
if (!TAILQ_EMPTY(&zombie_threads)) { if (!TAILQ_EMPTY(&zombie_threads)) {
mtx_lock_spin(&zombie_lock); mtx_lock_spin(&zombie_lock);
@ -383,6 +383,7 @@ thread_free(struct thread *td)
if (td->td_kstack != 0) if (td->td_kstack != 0)
vm_thread_dispose(td); vm_thread_dispose(td);
vm_domain_policy_cleanup(&td->td_vm_dom_policy); vm_domain_policy_cleanup(&td->td_vm_dom_policy);
callout_drain(&td->td_slpcallout);
uma_zfree(thread_zone, td); uma_zfree(thread_zone, td);
} }
@ -580,6 +581,7 @@ thread_wait(struct proc *p)
td->td_cpuset = NULL; td->td_cpuset = NULL;
cpu_thread_clean(td); cpu_thread_clean(td);
thread_cow_free(td); thread_cow_free(td);
callout_drain(&td->td_slpcallout);
thread_reap(); /* check for zombie threads etc. */ thread_reap(); /* check for zombie threads etc. */
} }

View File

@ -378,6 +378,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
{ {
struct sleepqueue_chain *sc; struct sleepqueue_chain *sc;
struct thread *td; struct thread *td;
sbintime_t pr1;
td = curthread; td = curthread;
sc = SC_LOOKUP(wchan); sc = SC_LOOKUP(wchan);
@ -387,8 +388,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
MPASS(wchan != NULL); MPASS(wchan != NULL);
if (cold) if (cold)
panic("timed sleep before timers are working"); panic("timed sleep before timers are working");
callout_reset_sbt_on(&td->td_slpcallout, sbt, pr, KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC); td->td_tid, td, (uintmax_t)td->td_sleeptimo));
thread_lock(td);
callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
thread_unlock(td);
callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
C_DIRECT_EXEC);
} }
/* /*
@ -576,37 +583,36 @@ static int
sleepq_check_timeout(void) sleepq_check_timeout(void)
{ {
struct thread *td; struct thread *td;
int res;
td = curthread; td = curthread;
THREAD_LOCK_ASSERT(td, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED);
/* /*
* If TDF_TIMEOUT is set, we timed out. * If TDF_TIMEOUT is set, we timed out. But recheck
* td_sleeptimo anyway.
*/ */
if (td->td_flags & TDF_TIMEOUT) { res = 0;
if (td->td_sleeptimo != 0) {
if (td->td_sleeptimo <= sbinuptime())
res = EWOULDBLOCK;
td->td_sleeptimo = 0;
}
if (td->td_flags & TDF_TIMEOUT)
td->td_flags &= ~TDF_TIMEOUT; td->td_flags &= ~TDF_TIMEOUT;
return (EWOULDBLOCK); else
} /*
* We ignore the situation where timeout subsystem was
/* * unable to stop our callout. The struct thread is
* If TDF_TIMOFAIL is set, the timeout ran after we had * type-stable, the callout will use the correct
* already been woken up. * memory when running. The checks of the
*/ * td_sleeptimo value in this function and in
if (td->td_flags & TDF_TIMOFAIL) * sleepq_timeout() ensure that the thread does not
td->td_flags &= ~TDF_TIMOFAIL; * get spurious wakeups, even if the callout was reset
* or thread reused.
/* */
* If callout_stop() fails, then the timeout is running on callout_stop(&td->td_slpcallout);
* another CPU, so synchronize with it to avoid having it return (res);
* accidentally wake up a subsequent sleep.
*/
else if (_callout_stop_safe(&td->td_slpcallout, CS_EXECUTING, NULL)
== 0) {
td->td_flags |= TDF_TIMEOUT;
TD_SET_SLEEPING(td);
mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
}
return (0);
} }
/* /*
@ -914,12 +920,17 @@ sleepq_timeout(void *arg)
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)", CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name); (void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
/*
* First, see if the thread is asleep and get the wait channel if
* it is.
*/
thread_lock(td); thread_lock(td);
if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
/*
* The thread does not want a timeout (yet).
*/
} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
/*
* See if the thread is asleep and get the wait
* channel if it is.
*/
wchan = td->td_wchan; wchan = td->td_wchan;
sc = SC_LOOKUP(wchan); sc = SC_LOOKUP(wchan);
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock); THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
@ -927,40 +938,16 @@ sleepq_timeout(void *arg)
MPASS(sq != NULL); MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT; td->td_flags |= TDF_TIMEOUT;
wakeup_swapper = sleepq_resume_thread(sq, td, 0); wakeup_swapper = sleepq_resume_thread(sq, td, 0);
thread_unlock(td); } else if (TD_ON_SLEEPQ(td)) {
if (wakeup_swapper) /*
kick_proc0(); * If the thread is on the SLEEPQ but isn't sleeping
return; * yet, it can either be on another CPU in between
} * sleepq_add() and one of the sleepq_*wait*()
* routines or it can be in sleepq_catch_signals().
/* */
* If the thread is on the SLEEPQ but isn't sleeping yet, it
* can either be on another CPU in between sleepq_add() and
* one of the sleepq_*wait*() routines or it can be in
* sleepq_catch_signals().
*/
if (TD_ON_SLEEPQ(td)) {
td->td_flags |= TDF_TIMEOUT; td->td_flags |= TDF_TIMEOUT;
thread_unlock(td);
return;
} }
/*
* Now check for the edge cases. First, if TDF_TIMEOUT is set,
* then the other thread has already yielded to us, so clear
* the flag and resume it. If TDF_TIMEOUT is not set, then the
* we know that the other thread is not on a sleep queue, but it
* hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
* to let it know that the timeout has already run and doesn't
* need to be canceled.
*/
if (td->td_flags & TDF_TIMEOUT) {
MPASS(TD_IS_SLEEPING(td));
td->td_flags &= ~TDF_TIMEOUT;
TD_CLR_SLEEPING(td);
wakeup_swapper = setrunnable(td);
} else
td->td_flags |= TDF_TIMOFAIL;
thread_unlock(td); thread_unlock(td);
if (wakeup_swapper) if (wakeup_swapper)
kick_proc0(); kick_proc0();

View File

@ -282,6 +282,7 @@ struct thread {
int td_no_sleeping; /* (k) Sleeping disabled count. */ int td_no_sleeping; /* (k) Sleeping disabled count. */
int td_dom_rr_idx; /* (k) RR Numa domain selection. */ int td_dom_rr_idx; /* (k) RR Numa domain selection. */
void *td_su; /* (k) FFS SU private */ void *td_su; /* (k) FFS SU private */
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
#define td_endzero td_sigmask #define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */ /* Copied during fork1() or create_thread(). */
@ -388,7 +389,7 @@ do { \
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */ #define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */ #define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */ #define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */ #define TDF_UNUSED12 0x00001000 /* --available-- */
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */ #define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */ #define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */ #define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */