- Pass the priority argument from *sleep() into sleepq and down into

sched_sleep().  This removes extra thread_lock() acquisition and
   allows the scheduler to decide what to do with the static boost.
 - Change the priority arguments to cv_* to match sleepq/msleep/etc.
   where 0 means no priority change.  Catch -1 in cv_broadcastpri() and
   convert it to 0 for now.
 - Set a flag when sleeping in a way that is compatible with swapping
   since direct priority comparisons are meaningless now.
 - Add a sysctl to ule, kern.sched.static_boost, that defaults to on which
   controls the boost behavior.  Turning it off gives better performance
   in some workloads but needs more investigation.
 - While we're modifying sleepq, change signal and broadcast to both
   return with the lock held as the lock was held on enter.

Reviewed by:	jhb, peter
This commit is contained in:
Jeff Roberson 2008-03-12 06:31:06 +00:00
parent df4691b984
commit c5aa6b581d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=177085
14 changed files with 90 additions and 95 deletions

View File

@ -131,7 +131,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock)
lock_state = class->lc_unlock(lock);
if (class->lc_flags & LC_SLEEPABLE)
sleepq_lock(cvp);
sleepq_wait(cvp);
sleepq_wait(cvp, 0);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@ -184,7 +184,7 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock)
class->lc_unlock(lock);
if (class->lc_flags & LC_SLEEPABLE)
sleepq_lock(cvp);
sleepq_wait(cvp);
sleepq_wait(cvp, 0);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@ -242,7 +242,7 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock)
lock_state = class->lc_unlock(lock);
if (class->lc_flags & LC_SLEEPABLE)
sleepq_lock(cvp);
rval = sleepq_wait_sig(cvp);
rval = sleepq_wait_sig(cvp, 0);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@ -302,7 +302,7 @@ _cv_timedwait(struct cv *cvp, struct lock_object *lock, int timo)
lock_state = class->lc_unlock(lock);
if (class->lc_flags & LC_SLEEPABLE)
sleepq_lock(cvp);
rval = sleepq_timedwait(cvp);
rval = sleepq_timedwait(cvp, 0);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@ -366,7 +366,7 @@ _cv_timedwait_sig(struct cv *cvp, struct lock_object *lock, int timo)
lock_state = class->lc_unlock(lock);
if (class->lc_flags & LC_SLEEPABLE)
sleepq_lock(cvp);
rval = sleepq_timedwait_sig(cvp);
rval = sleepq_timedwait_sig(cvp, 0);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))
@ -393,7 +393,7 @@ cv_signal(struct cv *cvp)
sleepq_lock(cvp);
if (cvp->cv_waiters > 0) {
cvp->cv_waiters--;
sleepq_signal(cvp, SLEEPQ_CONDVAR, -1, 0);
sleepq_signal(cvp, SLEEPQ_CONDVAR, 0, 0);
}
sleepq_release(cvp);
}
@ -405,11 +405,16 @@ cv_signal(struct cv *cvp)
void
cv_broadcastpri(struct cv *cvp, int pri)
{
/*
* XXX sleepq_broadcast pri argument changed from -1 meaning
* no pri to 0 meaning no pri.
*/
if (pri == -1)
pri = 0;
sleepq_lock(cvp);
if (cvp->cv_waiters > 0) {
cvp->cv_waiters = 0;
sleepq_broadcast(cvp, SLEEPQ_CONDVAR, pri, 0);
} else
sleepq_release(cvp);
}
sleepq_release(cvp);
}

View File

@ -404,10 +404,9 @@ _sx_downgrade(struct sx *sx, const char *file, int line)
atomic_store_rel_ptr(&sx->sx_lock, SX_SHARERS_LOCK(1) |
(x & SX_LOCK_EXCLUSIVE_WAITERS));
if (x & SX_LOCK_SHARED_WAITERS)
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
SQ_SHARED_QUEUE);
else
sleepq_release(&sx->lock_object);
sleepq_release(&sx->lock_object);
LOCK_LOG_LOCK("XDOWNGRADE", &sx->lock_object, 0, 0, file, line);
}
@ -556,9 +555,9 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
if (!(opts & SX_INTERRUPTIBLE))
sleepq_wait(&sx->lock_object);
sleepq_wait(&sx->lock_object, 0);
else
error = sleepq_wait_sig(&sx->lock_object);
error = sleepq_wait_sig(&sx->lock_object, 0);
if (error) {
if (LOCK_LOG_TEST(&sx->lock_object, 0))
@ -627,7 +626,8 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
__func__, sx, queue == SQ_SHARED_QUEUE ? "shared" :
"exclusive");
atomic_store_rel_ptr(&sx->sx_lock, x);
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1, queue);
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0, queue);
sleepq_release(&sx->lock_object);
}
/*
@ -761,9 +761,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
if (!(opts & SX_INTERRUPTIBLE))
sleepq_wait(&sx->lock_object);
sleepq_wait(&sx->lock_object, 0);
else
error = sleepq_wait_sig(&sx->lock_object);
error = sleepq_wait_sig(&sx->lock_object, 0);
if (error) {
if (LOCK_LOG_TEST(&sx->lock_object, 0))
@ -861,8 +861,9 @@ _sx_sunlock_hard(struct sx *sx, const char *file, int line)
if (LOCK_LOG_TEST(&sx->lock_object, 0))
CTR2(KTR_LOCK, "%s: %p waking up all thread on"
"exclusive queue", __func__, sx);
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, -1,
sleepq_broadcast(&sx->lock_object, SLEEPQ_SX, 0,
SQ_EXCLUSIVE_QUEUE);
sleepq_release(&sx->lock_object);
break;
}
}

View File

@ -160,6 +160,7 @@ _sleep(ident, lock, priority, wmesg, timo)
return (0);
}
catch = priority & PCATCH;
pri = priority & PRIMASK;
rval = 0;
/*
@ -207,25 +208,14 @@ _sleep(ident, lock, priority, wmesg, timo)
lock_state = class->lc_unlock(lock);
sleepq_lock(ident);
}
/*
* Adjust this thread's priority, if necessary.
*/
pri = priority & PRIMASK;
if (pri != 0 && pri != td->td_priority) {
thread_lock(td);
sched_prio(td, pri);
thread_unlock(td);
}
if (timo && catch)
rval = sleepq_timedwait_sig(ident);
rval = sleepq_timedwait_sig(ident, pri);
else if (timo)
rval = sleepq_timedwait(ident);
rval = sleepq_timedwait(ident, pri);
else if (catch)
rval = sleepq_wait_sig(ident);
rval = sleepq_wait_sig(ident, pri);
else {
sleepq_wait(ident);
sleepq_wait(ident, pri);
rval = 0;
}
#ifdef KTRACE
@ -307,9 +297,9 @@ msleep_spin(ident, mtx, wmesg, timo)
sleepq_lock(ident);
#endif
if (timo)
rval = sleepq_timedwait(ident);
rval = sleepq_timedwait(ident, 0);
else {
sleepq_wait(ident);
sleepq_wait(ident, 0);
rval = 0;
}
#ifdef KTRACE
@ -347,7 +337,8 @@ wakeup(ident)
{
sleepq_lock(ident);
sleepq_broadcast(ident, SLEEPQ_SLEEP, -1, 0);
sleepq_broadcast(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
}
/*
@ -361,7 +352,7 @@ wakeup_one(ident)
{
sleepq_lock(ident);
sleepq_signal(ident, SLEEPQ_SLEEP, -1, 0);
sleepq_signal(ident, SLEEPQ_SLEEP, 0, 0);
sleepq_release(ident);
}

View File

@ -881,8 +881,8 @@ thread_suspend_switch(struct thread *td)
p->p_suspcount++;
PROC_UNLOCK(p);
thread_lock(td);
sched_sleep(td);
TD_SET_SUSPENDED(td);
sched_sleep(td, 0);
PROC_SUNLOCK(p);
DROP_GIANT();
mi_switch(SW_VOL, NULL);
@ -901,8 +901,8 @@ thread_suspend_one(struct thread *td)
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SUSPENDED(td), ("already suspended"));
p->p_suspcount++;
sched_sleep(td);
TD_SET_SUSPENDED(td);
sched_sleep(td, 0);
}
void

View File

@ -563,7 +563,7 @@ _callout_stop_safe(c, safe)
sleepq_add(&callout_wait,
&callout_lock.lock_object, "codrain",
SLEEPQ_SLEEP, 0);
sleepq_wait(&callout_wait);
sleepq_wait(&callout_wait, 0);
sq_locked = 0;
/* Reacquire locks previously released. */

View File

@ -799,12 +799,16 @@ sched_unlend_user_prio(struct thread *td, u_char prio)
}
void
sched_sleep(struct thread *td)
sched_sleep(struct thread *td, int pri)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_slptick = ticks;
td->td_sched->ts_slptime = 0;
if (pri)
sched_prio(td, pri);
if (TD_IS_SUSPENDED(td) || pri <= PSOCK)
td->td_flags |= TDF_CANSWAP;
}
void
@ -922,6 +926,7 @@ sched_wakeup(struct thread *td)
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td->td_sched;
td->td_flags &= ~TDF_CANSWAP;
if (ts->ts_slptime > 1) {
updatepri(td);
resetpriority(td);

View File

@ -185,6 +185,7 @@ static int preempt_thresh = PRI_MIN_KERN;
#else
static int preempt_thresh = 0;
#endif
static int static_boost = 1;
/*
* tdq - per processor runqs and statistics. All fields are protected by the
@ -1856,12 +1857,16 @@ sched_nice(struct proc *p, int nice)
* Record the sleep time for the interactivity scorer.
*/
void
sched_sleep(struct thread *td)
sched_sleep(struct thread *td, int prio)
{
THREAD_LOCK_ASSERT(td, MA_OWNED);
td->td_slptick = ticks;
if (TD_IS_SUSPENDED(td) || prio <= PSOCK)
td->td_flags |= TDF_CANSWAP;
if (static_boost && prio)
sched_prio(td, prio);
}
/*
@ -1876,6 +1881,7 @@ sched_wakeup(struct thread *td)
THREAD_LOCK_ASSERT(td, MA_OWNED);
ts = td->td_sched;
td->td_flags &= ~TDF_CANSWAP;
/*
* If we slept for more than a tick update our interactivity and
* priority.
@ -2555,6 +2561,8 @@ SYSCTL_INT(_kern_sched, OID_AUTO, interact, CTLFLAG_RW, &sched_interact, 0,
"Interactivity score threshold");
SYSCTL_INT(_kern_sched, OID_AUTO, preempt_thresh, CTLFLAG_RW, &preempt_thresh,
0,"Min priority for preemption, lower priorities have greater precedence");
SYSCTL_INT(_kern_sched, OID_AUTO, static_boost, CTLFLAG_RW, &static_boost,
0,"Controls whether static kernel priorities are assigned to sleeping threads.");
#ifdef SMP
SYSCTL_INT(_kern_sched, OID_AUTO, affinity, CTLFLAG_RW, &affinity, 0,
"Number of hz ticks to keep thread affinity for");

View File

@ -149,7 +149,7 @@ static uma_zone_t sleepq_zone;
/*
* Prototypes for non-exported routines.
*/
static int sleepq_catch_signals(void *wchan);
static int sleepq_catch_signals(void *wchan, int pri);
static int sleepq_check_signals(void);
static int sleepq_check_timeout(void);
#ifdef INVARIANTS
@ -158,7 +158,7 @@ static void sleepq_dtor(void *mem, int size, void *arg);
static int sleepq_init(void *mem, int size, int flags);
static void sleepq_resume_thread(struct sleepqueue *sq, struct thread *td,
int pri);
static void sleepq_switch(void *wchan);
static void sleepq_switch(void *wchan, int pri);
static void sleepq_timeout(void *arg);
/*
@ -367,7 +367,7 @@ sleepq_set_timeout(void *wchan, int timo)
* may have transitioned from the sleepq lock to a run lock.
*/
static int
sleepq_catch_signals(void *wchan)
sleepq_catch_signals(void *wchan, int pri)
{
struct sleepqueue_chain *sc;
struct sleepqueue *sq;
@ -411,7 +411,7 @@ sleepq_catch_signals(void *wchan)
thread_lock(td);
if (ret == 0) {
if (!(td->td_flags & TDF_INTERRUPT)) {
sleepq_switch(wchan);
sleepq_switch(wchan, pri);
return (0);
}
/* KSE threads tried unblocking us. */
@ -424,7 +424,7 @@ sleepq_catch_signals(void *wchan)
*/
if (TD_ON_SLEEPQ(td)) {
sq = sleepq_lookup(wchan);
sleepq_resume_thread(sq, td, -1);
sleepq_resume_thread(sq, td, 0);
}
mtx_unlock_spin(&sc->sc_lock);
MPASS(td->td_lock != &sc->sc_lock);
@ -436,7 +436,7 @@ sleepq_catch_signals(void *wchan)
* Returns with thread lock.
*/
static void
sleepq_switch(void *wchan)
sleepq_switch(void *wchan, int pri)
{
struct sleepqueue_chain *sc;
struct sleepqueue *sq;
@ -464,15 +464,14 @@ sleepq_switch(void *wchan)
if (td->td_flags & TDF_TIMEOUT) {
MPASS(TD_ON_SLEEPQ(td));
sq = sleepq_lookup(wchan);
sleepq_resume_thread(sq, td, -1);
sleepq_resume_thread(sq, td, 0);
mtx_unlock_spin(&sc->sc_lock);
return;
}
thread_lock_set(td, &sc->sc_lock);
MPASS(td->td_sleepqueue == NULL);
sched_sleep(td);
sched_sleep(td, pri);
thread_lock_set(td, &sc->sc_lock);
TD_SET_SLEEPING(td);
SCHED_STAT_INC(switch_sleepq);
mi_switch(SW_VOL, NULL);
@ -551,14 +550,14 @@ sleepq_check_signals(void)
* Block the current thread until it is awakened from its sleep queue.
*/
void
sleepq_wait(void *wchan)
sleepq_wait(void *wchan, int pri)
{
struct thread *td;
td = curthread;
MPASS(!(td->td_flags & TDF_SINTR));
thread_lock(td);
sleepq_switch(wchan);
sleepq_switch(wchan, pri);
thread_unlock(td);
}
@ -567,12 +566,12 @@ sleepq_wait(void *wchan)
* or it is interrupted by a signal.
*/
int
sleepq_wait_sig(void *wchan)
sleepq_wait_sig(void *wchan, int pri)
{
int rcatch;
int rval;
rcatch = sleepq_catch_signals(wchan);
rcatch = sleepq_catch_signals(wchan, pri);
rval = sleepq_check_signals();
thread_unlock(curthread);
if (rcatch)
@ -585,7 +584,7 @@ sleepq_wait_sig(void *wchan)
* or it times out while waiting.
*/
int
sleepq_timedwait(void *wchan)
sleepq_timedwait(void *wchan, int pri)
{
struct thread *td;
int rval;
@ -593,7 +592,7 @@ sleepq_timedwait(void *wchan)
td = curthread;
MPASS(!(td->td_flags & TDF_SINTR));
thread_lock(td);
sleepq_switch(wchan);
sleepq_switch(wchan, pri);
rval = sleepq_check_timeout();
thread_unlock(td);
@ -605,11 +604,11 @@ sleepq_timedwait(void *wchan)
* it is interrupted by a signal, or it times out waiting to be awakened.
*/
int
sleepq_timedwait_sig(void *wchan)
sleepq_timedwait_sig(void *wchan, int pri)
{
int rcatch, rvalt, rvals;
rcatch = sleepq_catch_signals(wchan);
rcatch = sleepq_catch_signals(wchan, pri);
rvalt = sleepq_check_timeout();
rvals = sleepq_check_signals();
thread_unlock(curthread);
@ -673,8 +672,8 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri)
TD_CLR_SLEEPING(td);
/* Adjust priority if requested. */
MPASS(pri == -1 || (pri >= PRI_MIN && pri <= PRI_MAX));
if (pri != -1 && td->td_priority > pri)
MPASS(pri == 0 || (pri >= PRI_MIN && pri <= PRI_MAX));
if (pri != 0 && td->td_priority > pri)
sched_prio(td, pri);
setrunnable(td);
}
@ -760,10 +759,8 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue)
KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__));
MPASS((queue >= 0) && (queue < NR_SLEEPQS));
sq = sleepq_lookup(wchan);
if (sq == NULL) {
sleepq_release(wchan);
if (sq == NULL)
return;
}
KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
("%s: mismatch between sleep/wakeup and cv_*", __func__));
@ -774,7 +771,6 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue)
sleepq_resume_thread(sq, td, pri);
thread_unlock(td);
}
sleepq_release(wchan);
}
/*
@ -805,7 +801,7 @@ sleepq_timeout(void *arg)
sq = sleepq_lookup(wchan);
MPASS(sq != NULL);
td->td_flags |= TDF_TIMEOUT;
sleepq_resume_thread(sq, td, -1);
sleepq_resume_thread(sq, td, 0);
thread_unlock(td);
return;
}
@ -872,7 +868,7 @@ sleepq_remove(struct thread *td, void *wchan)
thread_lock(td);
MPASS(sq != NULL);
MPASS(td->td_wchan == wchan);
sleepq_resume_thread(sq, td, -1);
sleepq_resume_thread(sq, td, 0);
thread_unlock(td);
sleepq_release(wchan);
}
@ -916,7 +912,7 @@ sleepq_abort(struct thread *td, int intrval)
MPASS(sq != NULL);
/* Thread is asleep on sleep queue sq, so wake it up. */
sleepq_resume_thread(sq, td, -1);
sleepq_resume_thread(sq, td, 0);
}
#ifdef DDB

View File

@ -72,7 +72,7 @@ void cv_broadcastpri(struct cv *cvp, int pri);
#define cv_timedwait_sig(cvp, lock, timo) \
_cv_timedwait_sig((cvp), &(lock)->lock_object, (timo))
#define cv_broadcast(cvp) cv_broadcastpri(cvp, -1)
#define cv_broadcast(cvp) cv_broadcastpri(cvp, 0)
#define cv_wmesg(cvp) ((cvp)->cv_description)

View File

@ -335,7 +335,7 @@ do { \
#define TDF_SINTR 0x00000008 /* Sleep is interruptible. */
#define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */
#define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */
#define TDF_UNUSEDx40 0x00000040 /* --available-- */
#define TDF_CANSWAP 0x00000040 /* Thread can be swapped. */
#define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */
#define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */
#define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */
@ -782,7 +782,7 @@ MALLOC_DECLARE(M_ZOMBIE);
} while (0)
/* Check whether a thread is safe to be swapped out. */
#define thread_safetoswapout(td) (TD_IS_SLEEPING(td) || TD_IS_SUSPENDED(td))
#define thread_safetoswapout(td) ((td)->td_flags & TDF_CANSWAP)
/* Control whether or not it is safe for curthread to sleep. */
#define THREAD_NO_SLEEPING() do { \

View File

@ -100,7 +100,7 @@ void sched_lend_prio(struct thread *td, u_char prio);
void sched_lend_user_prio(struct thread *td, u_char pri);
fixpt_t sched_pctcpu(struct thread *td);
void sched_prio(struct thread *td, u_char prio);
void sched_sleep(struct thread *td);
void sched_sleep(struct thread *td, int prio);
void sched_switch(struct thread *td, struct thread *newtd, int flags);
void sched_throw(struct thread *td);
void sched_unlend_prio(struct thread *td, u_char prio);

View File

@ -102,10 +102,10 @@ void sleepq_release(void *wchan);
void sleepq_remove(struct thread *td, void *wchan);
void sleepq_signal(void *wchan, int flags, int pri, int queue);
void sleepq_set_timeout(void *wchan, int timo);
int sleepq_timedwait(void *wchan);
int sleepq_timedwait_sig(void *wchan);
void sleepq_wait(void *wchan);
int sleepq_wait_sig(void *wchan);
int sleepq_timedwait(void *wchan, int pri);
int sleepq_timedwait_sig(void *wchan, int pri);
void sleepq_wait(void *wchan, int pri);
int sleepq_wait_sig(void *wchan, int pri);
#endif /* _KERNEL */
#endif /* !_SYS_SLEEPQUEUE_H_ */

View File

@ -917,8 +917,7 @@ int action;
* This could be refined to support
* swapping out a thread.
*/
if ((td->td_priority) < PSOCK ||
!thread_safetoswapout(td)) {
if (!thread_safetoswapout(td)) {
thread_unlock(td);
goto nextproc;
}

View File

@ -95,7 +95,6 @@ SYSCTL_PROC(_vm, VM_LOADAVG, loadavg, CTLTYPE_STRUCT|CTLFLAG_RD,
static int
vmtotal(SYSCTL_HANDLER_ARGS)
{
/* XXXKSE almost completely broken */
struct proc *p;
struct vmtotal total;
vm_map_entry_t entry;
@ -139,25 +138,16 @@ vmtotal(SYSCTL_HANDLER_ARGS)
break;
default:
FOREACH_THREAD_IN_PROC(p, td) {
/* Need new statistics XXX */
thread_lock(td);
switch (td->td_state) {
case TDS_INHIBITED:
/*
* XXX stats no longer synchronized.
*/
if (TD_ON_LOCK(td) ||
(td->td_inhibitors ==
TDI_SWAPPED)) {
if (TD_IS_SWAPPED(td))
total.t_sw++;
} else if (TD_IS_SLEEPING(td) ||
TD_AWAITING_INTR(td) ||
TD_IS_SUSPENDED(td)) {
if (td->td_priority <= PZERO)
total.t_dw++;
else
total.t_sl++;
}
else if (TD_IS_SLEEPING(td) &&
td->td_priority <= PZERO)
total.t_dw++;
else
total.t_sl++;
break;
case TDS_CAN_RUN: