Rewrite subr_sleepqueue.c use of callouts to not depend on the

specifics of callout KPI.  Esp., do not depend on the exact interface
of callout_stop(9) return values.

The main change is that instead of requiring precise callouts, code
maintains absolute time to wake up.  Callouts now should ensure that a
wake occurs at the requested moment, but we can tolerate both run-away
callout, and callout_stop(9) lying about running callout either way.

As consequence, it removes the constant source of the bugs where
sleepq_check_timeout() causes uninterruptible thread state where the
thread is detached from CPU, see e.g. r234952 and r296320.

Patch also removes dual meaning of the TDF_TIMEOUT flag, making code
(IMO much) simpler to reason about.

Tested by:	pho
Reviewed by:	jhb
Sponsored by:	The FreeBSD Foundation
MFC after:	1 month
Differential revision:	https://reviews.freebsd.org/D7137
This commit is contained in:
Konstantin Belousov 2016-07-28 09:09:55 +00:00
parent a9e182e895
commit 2d19b736ed
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=303426
4 changed files with 61 additions and 66 deletions

View File

@ -375,8 +375,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
td->td_blocked);
if (TD_ON_SLEEPQ(td))
db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg,
td->td_wchan);
db_printf(
" wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
td->td_wmesg, td->td_wchan,
(long)sbttobt(td->td_sleeptimo).sec,
(uintmax_t)sbttobt(td->td_sleeptimo).frac,
(long)sbttobt(sbinuptime()).sec,
(uintmax_t)sbttobt(sbinuptime()).frac);
db_printf(" priority: %d\n", td->td_priority);
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
if (td->td_swvoltick != 0) {

View File

@ -318,7 +318,7 @@ thread_reap(void)
/*
* Don't even bother to lock if none at this instant,
* we really don't care about the next instant..
* we really don't care about the next instant.
*/
if (!TAILQ_EMPTY(&zombie_threads)) {
mtx_lock_spin(&zombie_lock);
@ -383,6 +383,7 @@ thread_free(struct thread *td)
if (td->td_kstack != 0)
vm_thread_dispose(td);
vm_domain_policy_cleanup(&td->td_vm_dom_policy);
callout_drain(&td->td_slpcallout);
uma_zfree(thread_zone, td);
}
@ -580,6 +581,7 @@ thread_wait(struct proc *p)
td->td_cpuset = NULL;
cpu_thread_clean(td);
thread_cow_free(td);
callout_drain(&td->td_slpcallout);
thread_reap(); /* check for zombie threads etc. */
}

View File

@ -378,6 +378,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
{
struct sleepqueue_chain *sc;
struct thread *td;
sbintime_t pr1;
td = curthread;
sc = SC_LOOKUP(wchan);
@ -387,8 +388,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
MPASS(wchan != NULL);
if (cold)
panic("timed sleep before timers are working");
callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
td->td_tid, td, (uintmax_t)td->td_sleeptimo));
thread_lock(td);
callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
thread_unlock(td);
callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
C_DIRECT_EXEC);
}
/*
@ -576,37 +583,36 @@ static int
sleepq_check_timeout(void)
{
struct thread *td;
int res;
td = curthread;
THREAD_LOCK_ASSERT(td, MA_OWNED);
/*
* If TDF_TIMEOUT is set, we timed out.
* If TDF_TIMEOUT is set, we timed out. But recheck
* td_sleeptimo anyway.
*/
if (td->td_flags & TDF_TIMEOUT) {
res = 0;
if (td->td_sleeptimo != 0) {
if (td->td_sleeptimo <= sbinuptime())
res = EWOULDBLOCK;
td->td_sleeptimo = 0;
}
if (td->td_flags & TDF_TIMEOUT)
td->td_flags &= ~TDF_TIMEOUT;
return (EWOULDBLOCK);
}
/*
* If TDF_TIMOFAIL is set, the timeout ran after we had
* already been woken up.
*/
if (td->td_flags & TDF_TIMOFAIL)
td->td_flags &= ~TDF_TIMOFAIL;
/*
* If callout_stop() fails, then the timeout is running on
* another CPU, so synchronize with it to avoid having it
* accidentally wake up a subsequent sleep.
*/
else if (_callout_stop_safe(&td->td_slpcallout, CS_EXECUTING, NULL)
== 0) {
td->td_flags |= TDF_TIMEOUT;
TD_SET_SLEEPING(td);
mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
}
return (0);
else
/*
* We ignore the situation where timeout subsystem was
* unable to stop our callout. The struct thread is
* type-stable, the callout will use the correct
* memory when running. The checks of the
* td_sleeptimo value in this function and in
* sleepq_timeout() ensure that the thread does not
* get spurious wakeups, even if the callout was reset
* or thread reused.
*/
callout_stop(&td->td_slpcallout);
return (res);
}
/*
@ -914,12 +920,17 @@ sleepq_timeout(void *arg)
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
/*
* First, see if the thread is asleep and get the wait channel if
* it is.
*/
thread_lock(td);
if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
/*
* The thread does not want a timeout (yet).
*/
} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
/*
* See if the thread is asleep and get the wait
* channel if it is.
*/
wchan = td->td_wchan;
sc = SC_LOOKUP(wchan);
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
@ -927,40 +938,16 @@ sleepq_timeout(void *arg)
MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT;
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
thread_unlock(td);
if (wakeup_swapper)
kick_proc0();
return;
}
/*
* If the thread is on the SLEEPQ but isn't sleeping yet, it
* can either be on another CPU in between sleepq_add() and
* one of the sleepq_*wait*() routines or it can be in
* sleepq_catch_signals().
*/
if (TD_ON_SLEEPQ(td)) {
} else if (TD_ON_SLEEPQ(td)) {
/*
* If the thread is on the SLEEPQ but isn't sleeping
* yet, it can either be on another CPU in between
* sleepq_add() and one of the sleepq_*wait*()
* routines or it can be in sleepq_catch_signals().
*/
td->td_flags |= TDF_TIMEOUT;
thread_unlock(td);
return;
}
/*
* Now check for the edge cases. First, if TDF_TIMEOUT is set,
* then the other thread has already yielded to us, so clear
* the flag and resume it. If TDF_TIMEOUT is not set, then the
* we know that the other thread is not on a sleep queue, but it
* hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
* to let it know that the timeout has already run and doesn't
* need to be canceled.
*/
if (td->td_flags & TDF_TIMEOUT) {
MPASS(TD_IS_SLEEPING(td));
td->td_flags &= ~TDF_TIMEOUT;
TD_CLR_SLEEPING(td);
wakeup_swapper = setrunnable(td);
} else
td->td_flags |= TDF_TIMOFAIL;
thread_unlock(td);
if (wakeup_swapper)
kick_proc0();

View File

@ -282,6 +282,7 @@ struct thread {
int td_no_sleeping; /* (k) Sleeping disabled count. */
int td_dom_rr_idx; /* (k) RR Numa domain selection. */
void *td_su; /* (k) FFS SU private */
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
#define td_endzero td_sigmask
/* Copied during fork1() or create_thread(). */
@ -388,7 +389,7 @@ do { \
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
#define TDF_UNUSED12 0x00001000 /* --available-- */
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */