Rewrite subr_sleepqueue.c use of callouts to not depend on the
specifics of callout KPI. Esp., do not depend on the exact interface of callout_stop(9) return values. The main change is that instead of requiring precise callouts, code maintains absolute time to wake up. Callouts now should ensure that a wake occurs at the requested moment, but we can tolerate both run-away callout, and callout_stop(9) lying about running callout either way. As consequence, it removes the constant source of the bugs where sleepq_check_timeout() causes uninterruptible thread state where the thread is detached from CPU, see e.g. r234952 and r296320. Patch also removes dual meaning of the TDF_TIMEOUT flag, making code (IMO much) simpler to reason about. Tested by: pho Reviewed by: jhb Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D7137
This commit is contained in:
parent
a9e182e895
commit
2d19b736ed
@ -375,8 +375,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
|
||||
db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
|
||||
td->td_blocked);
|
||||
if (TD_ON_SLEEPQ(td))
|
||||
db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg,
|
||||
td->td_wchan);
|
||||
db_printf(
|
||||
" wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
|
||||
td->td_wmesg, td->td_wchan,
|
||||
(long)sbttobt(td->td_sleeptimo).sec,
|
||||
(uintmax_t)sbttobt(td->td_sleeptimo).frac,
|
||||
(long)sbttobt(sbinuptime()).sec,
|
||||
(uintmax_t)sbttobt(sbinuptime()).frac);
|
||||
db_printf(" priority: %d\n", td->td_priority);
|
||||
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
|
||||
if (td->td_swvoltick != 0) {
|
||||
|
@ -318,7 +318,7 @@ thread_reap(void)
|
||||
|
||||
/*
|
||||
* Don't even bother to lock if none at this instant,
|
||||
* we really don't care about the next instant..
|
||||
* we really don't care about the next instant.
|
||||
*/
|
||||
if (!TAILQ_EMPTY(&zombie_threads)) {
|
||||
mtx_lock_spin(&zombie_lock);
|
||||
@ -383,6 +383,7 @@ thread_free(struct thread *td)
|
||||
if (td->td_kstack != 0)
|
||||
vm_thread_dispose(td);
|
||||
vm_domain_policy_cleanup(&td->td_vm_dom_policy);
|
||||
callout_drain(&td->td_slpcallout);
|
||||
uma_zfree(thread_zone, td);
|
||||
}
|
||||
|
||||
@ -580,6 +581,7 @@ thread_wait(struct proc *p)
|
||||
td->td_cpuset = NULL;
|
||||
cpu_thread_clean(td);
|
||||
thread_cow_free(td);
|
||||
callout_drain(&td->td_slpcallout);
|
||||
thread_reap(); /* check for zombie threads etc. */
|
||||
}
|
||||
|
||||
|
@ -378,6 +378,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
|
||||
{
|
||||
struct sleepqueue_chain *sc;
|
||||
struct thread *td;
|
||||
sbintime_t pr1;
|
||||
|
||||
td = curthread;
|
||||
sc = SC_LOOKUP(wchan);
|
||||
@ -387,8 +388,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
|
||||
MPASS(wchan != NULL);
|
||||
if (cold)
|
||||
panic("timed sleep before timers are working");
|
||||
callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
|
||||
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
|
||||
KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
|
||||
td->td_tid, td, (uintmax_t)td->td_sleeptimo));
|
||||
thread_lock(td);
|
||||
callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
|
||||
thread_unlock(td);
|
||||
callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
|
||||
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
|
||||
C_DIRECT_EXEC);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -576,37 +583,36 @@ static int
|
||||
sleepq_check_timeout(void)
|
||||
{
|
||||
struct thread *td;
|
||||
int res;
|
||||
|
||||
td = curthread;
|
||||
THREAD_LOCK_ASSERT(td, MA_OWNED);
|
||||
|
||||
/*
|
||||
* If TDF_TIMEOUT is set, we timed out.
|
||||
* If TDF_TIMEOUT is set, we timed out. But recheck
|
||||
* td_sleeptimo anyway.
|
||||
*/
|
||||
if (td->td_flags & TDF_TIMEOUT) {
|
||||
res = 0;
|
||||
if (td->td_sleeptimo != 0) {
|
||||
if (td->td_sleeptimo <= sbinuptime())
|
||||
res = EWOULDBLOCK;
|
||||
td->td_sleeptimo = 0;
|
||||
}
|
||||
if (td->td_flags & TDF_TIMEOUT)
|
||||
td->td_flags &= ~TDF_TIMEOUT;
|
||||
return (EWOULDBLOCK);
|
||||
}
|
||||
|
||||
/*
|
||||
* If TDF_TIMOFAIL is set, the timeout ran after we had
|
||||
* already been woken up.
|
||||
*/
|
||||
if (td->td_flags & TDF_TIMOFAIL)
|
||||
td->td_flags &= ~TDF_TIMOFAIL;
|
||||
|
||||
/*
|
||||
* If callout_stop() fails, then the timeout is running on
|
||||
* another CPU, so synchronize with it to avoid having it
|
||||
* accidentally wake up a subsequent sleep.
|
||||
*/
|
||||
else if (_callout_stop_safe(&td->td_slpcallout, CS_EXECUTING, NULL)
|
||||
== 0) {
|
||||
td->td_flags |= TDF_TIMEOUT;
|
||||
TD_SET_SLEEPING(td);
|
||||
mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
|
||||
}
|
||||
return (0);
|
||||
else
|
||||
/*
|
||||
* We ignore the situation where timeout subsystem was
|
||||
* unable to stop our callout. The struct thread is
|
||||
* type-stable, the callout will use the correct
|
||||
* memory when running. The checks of the
|
||||
* td_sleeptimo value in this function and in
|
||||
* sleepq_timeout() ensure that the thread does not
|
||||
* get spurious wakeups, even if the callout was reset
|
||||
* or thread reused.
|
||||
*/
|
||||
callout_stop(&td->td_slpcallout);
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -914,12 +920,17 @@ sleepq_timeout(void *arg)
|
||||
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
|
||||
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
|
||||
|
||||
/*
|
||||
* First, see if the thread is asleep and get the wait channel if
|
||||
* it is.
|
||||
*/
|
||||
thread_lock(td);
|
||||
if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
|
||||
|
||||
if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
|
||||
/*
|
||||
* The thread does not want a timeout (yet).
|
||||
*/
|
||||
} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
|
||||
/*
|
||||
* See if the thread is asleep and get the wait
|
||||
* channel if it is.
|
||||
*/
|
||||
wchan = td->td_wchan;
|
||||
sc = SC_LOOKUP(wchan);
|
||||
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
|
||||
@ -927,40 +938,16 @@ sleepq_timeout(void *arg)
|
||||
MPASS(sq != NULL);
|
||||
td->td_flags |= TDF_TIMEOUT;
|
||||
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
|
||||
thread_unlock(td);
|
||||
if (wakeup_swapper)
|
||||
kick_proc0();
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* If the thread is on the SLEEPQ but isn't sleeping yet, it
|
||||
* can either be on another CPU in between sleepq_add() and
|
||||
* one of the sleepq_*wait*() routines or it can be in
|
||||
* sleepq_catch_signals().
|
||||
*/
|
||||
if (TD_ON_SLEEPQ(td)) {
|
||||
} else if (TD_ON_SLEEPQ(td)) {
|
||||
/*
|
||||
* If the thread is on the SLEEPQ but isn't sleeping
|
||||
* yet, it can either be on another CPU in between
|
||||
* sleepq_add() and one of the sleepq_*wait*()
|
||||
* routines or it can be in sleepq_catch_signals().
|
||||
*/
|
||||
td->td_flags |= TDF_TIMEOUT;
|
||||
thread_unlock(td);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Now check for the edge cases. First, if TDF_TIMEOUT is set,
|
||||
* then the other thread has already yielded to us, so clear
|
||||
* the flag and resume it. If TDF_TIMEOUT is not set, then the
|
||||
* we know that the other thread is not on a sleep queue, but it
|
||||
* hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
|
||||
* to let it know that the timeout has already run and doesn't
|
||||
* need to be canceled.
|
||||
*/
|
||||
if (td->td_flags & TDF_TIMEOUT) {
|
||||
MPASS(TD_IS_SLEEPING(td));
|
||||
td->td_flags &= ~TDF_TIMEOUT;
|
||||
TD_CLR_SLEEPING(td);
|
||||
wakeup_swapper = setrunnable(td);
|
||||
} else
|
||||
td->td_flags |= TDF_TIMOFAIL;
|
||||
thread_unlock(td);
|
||||
if (wakeup_swapper)
|
||||
kick_proc0();
|
||||
|
@ -282,6 +282,7 @@ struct thread {
|
||||
int td_no_sleeping; /* (k) Sleeping disabled count. */
|
||||
int td_dom_rr_idx; /* (k) RR Numa domain selection. */
|
||||
void *td_su; /* (k) FFS SU private */
|
||||
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
|
||||
#define td_endzero td_sigmask
|
||||
|
||||
/* Copied during fork1() or create_thread(). */
|
||||
@ -388,7 +389,7 @@ do { \
|
||||
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
|
||||
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
|
||||
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
|
||||
#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
|
||||
#define TDF_UNUSED12 0x00001000 /* --available-- */
|
||||
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
|
||||
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
|
||||
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
|
||||
|
Loading…
Reference in New Issue
Block a user