Rewrite subr_sleepqueue.c use of callouts to not depend on the
specifics of callout KPI. Esp., do not depend on the exact interface of callout_stop(9) return values. The main change is that instead of requiring precise callouts, code maintains absolute time to wake up. Callouts now should ensure that a wake occurs at the requested moment, but we can tolerate both run-away callout, and callout_stop(9) lying about running callout either way. As consequence, it removes the constant source of the bugs where sleepq_check_timeout() causes uninterruptible thread state where the thread is detached from CPU, see e.g. r234952 and r296320. Patch also removes dual meaning of the TDF_TIMEOUT flag, making code (IMO much) simpler to reason about. Tested by: pho Reviewed by: jhb Sponsored by: The FreeBSD Foundation MFC after: 1 month Differential revision: https://reviews.freebsd.org/D7137
This commit is contained in:
parent
a9e182e895
commit
2d19b736ed
@ -375,8 +375,13 @@ DB_SHOW_COMMAND(thread, db_show_thread)
|
|||||||
db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
|
db_printf(" lock: %s turnstile: %p\n", td->td_lockname,
|
||||||
td->td_blocked);
|
td->td_blocked);
|
||||||
if (TD_ON_SLEEPQ(td))
|
if (TD_ON_SLEEPQ(td))
|
||||||
db_printf(" wmesg: %s wchan: %p\n", td->td_wmesg,
|
db_printf(
|
||||||
td->td_wchan);
|
" wmesg: %s wchan: %p sleeptimo %lx. %jx (curr %lx. %jx)\n",
|
||||||
|
td->td_wmesg, td->td_wchan,
|
||||||
|
(long)sbttobt(td->td_sleeptimo).sec,
|
||||||
|
(uintmax_t)sbttobt(td->td_sleeptimo).frac,
|
||||||
|
(long)sbttobt(sbinuptime()).sec,
|
||||||
|
(uintmax_t)sbttobt(sbinuptime()).frac);
|
||||||
db_printf(" priority: %d\n", td->td_priority);
|
db_printf(" priority: %d\n", td->td_priority);
|
||||||
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
|
db_printf(" container lock: %s (%p)\n", lock->lo_name, lock);
|
||||||
if (td->td_swvoltick != 0) {
|
if (td->td_swvoltick != 0) {
|
||||||
|
@ -318,7 +318,7 @@ thread_reap(void)
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Don't even bother to lock if none at this instant,
|
* Don't even bother to lock if none at this instant,
|
||||||
* we really don't care about the next instant..
|
* we really don't care about the next instant.
|
||||||
*/
|
*/
|
||||||
if (!TAILQ_EMPTY(&zombie_threads)) {
|
if (!TAILQ_EMPTY(&zombie_threads)) {
|
||||||
mtx_lock_spin(&zombie_lock);
|
mtx_lock_spin(&zombie_lock);
|
||||||
@ -383,6 +383,7 @@ thread_free(struct thread *td)
|
|||||||
if (td->td_kstack != 0)
|
if (td->td_kstack != 0)
|
||||||
vm_thread_dispose(td);
|
vm_thread_dispose(td);
|
||||||
vm_domain_policy_cleanup(&td->td_vm_dom_policy);
|
vm_domain_policy_cleanup(&td->td_vm_dom_policy);
|
||||||
|
callout_drain(&td->td_slpcallout);
|
||||||
uma_zfree(thread_zone, td);
|
uma_zfree(thread_zone, td);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -580,6 +581,7 @@ thread_wait(struct proc *p)
|
|||||||
td->td_cpuset = NULL;
|
td->td_cpuset = NULL;
|
||||||
cpu_thread_clean(td);
|
cpu_thread_clean(td);
|
||||||
thread_cow_free(td);
|
thread_cow_free(td);
|
||||||
|
callout_drain(&td->td_slpcallout);
|
||||||
thread_reap(); /* check for zombie threads etc. */
|
thread_reap(); /* check for zombie threads etc. */
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -378,6 +378,7 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
|
|||||||
{
|
{
|
||||||
struct sleepqueue_chain *sc;
|
struct sleepqueue_chain *sc;
|
||||||
struct thread *td;
|
struct thread *td;
|
||||||
|
sbintime_t pr1;
|
||||||
|
|
||||||
td = curthread;
|
td = curthread;
|
||||||
sc = SC_LOOKUP(wchan);
|
sc = SC_LOOKUP(wchan);
|
||||||
@ -387,8 +388,14 @@ sleepq_set_timeout_sbt(void *wchan, sbintime_t sbt, sbintime_t pr,
|
|||||||
MPASS(wchan != NULL);
|
MPASS(wchan != NULL);
|
||||||
if (cold)
|
if (cold)
|
||||||
panic("timed sleep before timers are working");
|
panic("timed sleep before timers are working");
|
||||||
callout_reset_sbt_on(&td->td_slpcallout, sbt, pr,
|
KASSERT(td->td_sleeptimo == 0, ("td %d %p td_sleeptimo %jx",
|
||||||
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_DIRECT_EXEC);
|
td->td_tid, td, (uintmax_t)td->td_sleeptimo));
|
||||||
|
thread_lock(td);
|
||||||
|
callout_when(sbt, pr, flags, &td->td_sleeptimo, &pr1);
|
||||||
|
thread_unlock(td);
|
||||||
|
callout_reset_sbt_on(&td->td_slpcallout, td->td_sleeptimo, pr1,
|
||||||
|
sleepq_timeout, td, PCPU_GET(cpuid), flags | C_PRECALC |
|
||||||
|
C_DIRECT_EXEC);
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -576,37 +583,36 @@ static int
|
|||||||
sleepq_check_timeout(void)
|
sleepq_check_timeout(void)
|
||||||
{
|
{
|
||||||
struct thread *td;
|
struct thread *td;
|
||||||
|
int res;
|
||||||
|
|
||||||
td = curthread;
|
td = curthread;
|
||||||
THREAD_LOCK_ASSERT(td, MA_OWNED);
|
THREAD_LOCK_ASSERT(td, MA_OWNED);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If TDF_TIMEOUT is set, we timed out.
|
* If TDF_TIMEOUT is set, we timed out. But recheck
|
||||||
|
* td_sleeptimo anyway.
|
||||||
*/
|
*/
|
||||||
if (td->td_flags & TDF_TIMEOUT) {
|
res = 0;
|
||||||
|
if (td->td_sleeptimo != 0) {
|
||||||
|
if (td->td_sleeptimo <= sbinuptime())
|
||||||
|
res = EWOULDBLOCK;
|
||||||
|
td->td_sleeptimo = 0;
|
||||||
|
}
|
||||||
|
if (td->td_flags & TDF_TIMEOUT)
|
||||||
td->td_flags &= ~TDF_TIMEOUT;
|
td->td_flags &= ~TDF_TIMEOUT;
|
||||||
return (EWOULDBLOCK);
|
else
|
||||||
}
|
/*
|
||||||
|
* We ignore the situation where timeout subsystem was
|
||||||
/*
|
* unable to stop our callout. The struct thread is
|
||||||
* If TDF_TIMOFAIL is set, the timeout ran after we had
|
* type-stable, the callout will use the correct
|
||||||
* already been woken up.
|
* memory when running. The checks of the
|
||||||
*/
|
* td_sleeptimo value in this function and in
|
||||||
if (td->td_flags & TDF_TIMOFAIL)
|
* sleepq_timeout() ensure that the thread does not
|
||||||
td->td_flags &= ~TDF_TIMOFAIL;
|
* get spurious wakeups, even if the callout was reset
|
||||||
|
* or thread reused.
|
||||||
/*
|
*/
|
||||||
* If callout_stop() fails, then the timeout is running on
|
callout_stop(&td->td_slpcallout);
|
||||||
* another CPU, so synchronize with it to avoid having it
|
return (res);
|
||||||
* accidentally wake up a subsequent sleep.
|
|
||||||
*/
|
|
||||||
else if (_callout_stop_safe(&td->td_slpcallout, CS_EXECUTING, NULL)
|
|
||||||
== 0) {
|
|
||||||
td->td_flags |= TDF_TIMEOUT;
|
|
||||||
TD_SET_SLEEPING(td);
|
|
||||||
mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
|
|
||||||
}
|
|
||||||
return (0);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -914,12 +920,17 @@ sleepq_timeout(void *arg)
|
|||||||
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
|
CTR3(KTR_PROC, "sleepq_timeout: thread %p (pid %ld, %s)",
|
||||||
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
|
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
|
||||||
|
|
||||||
/*
|
|
||||||
* First, see if the thread is asleep and get the wait channel if
|
|
||||||
* it is.
|
|
||||||
*/
|
|
||||||
thread_lock(td);
|
thread_lock(td);
|
||||||
if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
|
|
||||||
|
if (td->td_sleeptimo > sbinuptime() || td->td_sleeptimo == 0) {
|
||||||
|
/*
|
||||||
|
* The thread does not want a timeout (yet).
|
||||||
|
*/
|
||||||
|
} else if (TD_IS_SLEEPING(td) && TD_ON_SLEEPQ(td)) {
|
||||||
|
/*
|
||||||
|
* See if the thread is asleep and get the wait
|
||||||
|
* channel if it is.
|
||||||
|
*/
|
||||||
wchan = td->td_wchan;
|
wchan = td->td_wchan;
|
||||||
sc = SC_LOOKUP(wchan);
|
sc = SC_LOOKUP(wchan);
|
||||||
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
|
THREAD_LOCKPTR_ASSERT(td, &sc->sc_lock);
|
||||||
@ -927,40 +938,16 @@ sleepq_timeout(void *arg)
|
|||||||
MPASS(sq != NULL);
|
MPASS(sq != NULL);
|
||||||
td->td_flags |= TDF_TIMEOUT;
|
td->td_flags |= TDF_TIMEOUT;
|
||||||
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
|
wakeup_swapper = sleepq_resume_thread(sq, td, 0);
|
||||||
thread_unlock(td);
|
} else if (TD_ON_SLEEPQ(td)) {
|
||||||
if (wakeup_swapper)
|
/*
|
||||||
kick_proc0();
|
* If the thread is on the SLEEPQ but isn't sleeping
|
||||||
return;
|
* yet, it can either be on another CPU in between
|
||||||
}
|
* sleepq_add() and one of the sleepq_*wait*()
|
||||||
|
* routines or it can be in sleepq_catch_signals().
|
||||||
/*
|
*/
|
||||||
* If the thread is on the SLEEPQ but isn't sleeping yet, it
|
|
||||||
* can either be on another CPU in between sleepq_add() and
|
|
||||||
* one of the sleepq_*wait*() routines or it can be in
|
|
||||||
* sleepq_catch_signals().
|
|
||||||
*/
|
|
||||||
if (TD_ON_SLEEPQ(td)) {
|
|
||||||
td->td_flags |= TDF_TIMEOUT;
|
td->td_flags |= TDF_TIMEOUT;
|
||||||
thread_unlock(td);
|
|
||||||
return;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* Now check for the edge cases. First, if TDF_TIMEOUT is set,
|
|
||||||
* then the other thread has already yielded to us, so clear
|
|
||||||
* the flag and resume it. If TDF_TIMEOUT is not set, then the
|
|
||||||
* we know that the other thread is not on a sleep queue, but it
|
|
||||||
* hasn't resumed execution yet. In that case, set TDF_TIMOFAIL
|
|
||||||
* to let it know that the timeout has already run and doesn't
|
|
||||||
* need to be canceled.
|
|
||||||
*/
|
|
||||||
if (td->td_flags & TDF_TIMEOUT) {
|
|
||||||
MPASS(TD_IS_SLEEPING(td));
|
|
||||||
td->td_flags &= ~TDF_TIMEOUT;
|
|
||||||
TD_CLR_SLEEPING(td);
|
|
||||||
wakeup_swapper = setrunnable(td);
|
|
||||||
} else
|
|
||||||
td->td_flags |= TDF_TIMOFAIL;
|
|
||||||
thread_unlock(td);
|
thread_unlock(td);
|
||||||
if (wakeup_swapper)
|
if (wakeup_swapper)
|
||||||
kick_proc0();
|
kick_proc0();
|
||||||
|
@ -282,6 +282,7 @@ struct thread {
|
|||||||
int td_no_sleeping; /* (k) Sleeping disabled count. */
|
int td_no_sleeping; /* (k) Sleeping disabled count. */
|
||||||
int td_dom_rr_idx; /* (k) RR Numa domain selection. */
|
int td_dom_rr_idx; /* (k) RR Numa domain selection. */
|
||||||
void *td_su; /* (k) FFS SU private */
|
void *td_su; /* (k) FFS SU private */
|
||||||
|
sbintime_t td_sleeptimo; /* (t) Sleep timeout. */
|
||||||
#define td_endzero td_sigmask
|
#define td_endzero td_sigmask
|
||||||
|
|
||||||
/* Copied during fork1() or create_thread(). */
|
/* Copied during fork1() or create_thread(). */
|
||||||
@ -388,7 +389,7 @@ do { \
|
|||||||
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
|
#define TDF_ALLPROCSUSP 0x00000200 /* suspended by SINGLE_ALLPROC */
|
||||||
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
|
#define TDF_BOUNDARY 0x00000400 /* Thread suspended at user boundary */
|
||||||
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
|
#define TDF_ASTPENDING 0x00000800 /* Thread has some asynchronous events. */
|
||||||
#define TDF_TIMOFAIL 0x00001000 /* Timeout from sleep after we were awake. */
|
#define TDF_UNUSED12 0x00001000 /* --available-- */
|
||||||
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
|
#define TDF_SBDRY 0x00002000 /* Stop only on usermode boundary. */
|
||||||
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
|
#define TDF_UPIBLOCKED 0x00004000 /* Thread blocked on user PI mutex. */
|
||||||
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
|
#define TDF_NEEDSUSPCHK 0x00008000 /* Thread may need to suspend. */
|
||||||
|
Loading…
Reference in New Issue
Block a user