Add wakeup_any(), cheaper wakeup_one() for taskqueue(9).

wakeup_one() and underlying sleepq_signal() spend additional time trying
to be fair, waking thread with highest priority, sleeping longest time.
But in case of taskqueue there are many absolutely identical threads, and
any fairness between them is quite pointless.  It makes even worse, since
round-robin wakeups not only make previous CPU affinity in scheduler quite
useless, but also hide from user chance to see CPU bottlenecks, when
sequential workload with one request at a time looks evenly distributed
between multiple threads.

This change adds new SLEEPQ_UNFAIR flag to sleepq_signal(), making it wakeup
thread that went to sleep last, but no longer in context switch (to avoid
immediate spinning on the thread lock).  On top of that new wakeup_any()
function is added, equivalent to wakeup_one(), but setting the flag.
On top of that taskqueue(9) is switchied to wakeup_any() to wakeup its
threads.

As result, on 72-core Xeon v4 machine sequential ZFS write to 12 ZVOLs
with 16KB block size spend 34% less time in wakeup_any() and descendants
then it was spending in wakeup_one(), and total write throughput increased
by ~10% with the same as before CPU usage.

Reviewed by:	markj, mmacy
MFC after:	2 weeks
Sponsored by:	iXsystems, Inc.
Differential Revision:	https://reviews.freebsd.org/D20669
This commit is contained in:
mav 2019-06-20 01:15:33 +00:00
parent d82071d5c7
commit da627eba88
9 changed files with 78 additions and 22 deletions

View File

@ -1880,7 +1880,8 @@ MLINKS+=sleep.9 msleep.9 \
sleep.9 tsleep.9 \ sleep.9 tsleep.9 \
sleep.9 tsleep_sbt.9 \ sleep.9 tsleep_sbt.9 \
sleep.9 wakeup.9 \ sleep.9 wakeup.9 \
sleep.9 wakeup_one.9 sleep.9 wakeup_one.9 \
sleep.9 wakeup_any.9
MLINKS+=sleepqueue.9 init_sleepqueues.9 \ MLINKS+=sleepqueue.9 init_sleepqueues.9 \
sleepqueue.9 sleepq_abort.9 \ sleepqueue.9 sleepq_abort.9 \
sleepqueue.9 sleepq_add.9 \ sleepqueue.9 sleepq_add.9 \

View File

@ -25,7 +25,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd March 4, 2018 .Dd June 19, 2019
.Dt SLEEP 9 .Dt SLEEP 9
.Os .Os
.Sh NAME .Sh NAME
@ -38,7 +38,9 @@
.Nm pause_sbt , .Nm pause_sbt ,
.Nm tsleep , .Nm tsleep ,
.Nm tsleep_sbt , .Nm tsleep_sbt ,
.Nm wakeup .Nm wakeup ,
.Nm wakeup_one ,
.Nm wakeup_any
.Nd wait for events .Nd wait for events
.Sh SYNOPSIS .Sh SYNOPSIS
.In sys/param.h .In sys/param.h
@ -70,6 +72,8 @@
.Fn wakeup "void *chan" .Fn wakeup "void *chan"
.Ft void .Ft void
.Fn wakeup_one "void *chan" .Fn wakeup_one "void *chan"
.Ft void
.Fn wakeup_any "void *chan"
.Sh DESCRIPTION .Sh DESCRIPTION
The functions The functions
.Fn tsleep , .Fn tsleep ,
@ -79,8 +83,9 @@ The functions
.Fn pause_sig , .Fn pause_sig ,
.Fn pause_sbt , .Fn pause_sbt ,
.Fn wakeup , .Fn wakeup ,
.Fn wakeup_one ,
and and
.Fn wakeup_one .Fn wakeup_any
handle event-based thread blocking. handle event-based thread blocking.
If a thread must wait for an If a thread must wait for an
external event, it is put to sleep by external event, it is put to sleep by
@ -252,9 +257,10 @@ function is a wrapper around
.Fn tsleep .Fn tsleep
that suspends execution of the current thread for the indicated timeout. that suspends execution of the current thread for the indicated timeout.
The thread can not be awakened early by signals or calls to The thread can not be awakened early by signals or calls to
.Fn wakeup .Fn wakeup ,
.Fn wakeup_one
or or
.Fn wakeup_one . .Fn wakeup_any .
The The
.Fn pause_sig .Fn pause_sig
function is a variant of function is a variant of
@ -263,8 +269,8 @@ which can be awakened early by signals.
.Pp .Pp
The The
.Fn wakeup_one .Fn wakeup_one
function makes the first thread in the queue that is sleeping on the function makes the first highest priority thread in the queue that is
parameter sleeping on the parameter
.Fa chan .Fa chan
runnable. runnable.
This reduces the load when a large number of threads are sleeping on This reduces the load when a large number of threads are sleeping on
@ -293,6 +299,16 @@ pay particular attention to ensure that no other threads wait on the
same same
.Fa chan . .Fa chan .
.Pp .Pp
The
.Fn wakeup_any
function is similar to
.Fn wakeup_one ,
except that it makes runnable last thread on the queue (sleeping less),
ignoring fairness.
It can be used when threads sleeping on the
.Fa chan
are known to be identical and there is no reason to be fair.
.Pp
If the timeout given by If the timeout given by
.Fa timo .Fa timo
or or

View File

@ -22,7 +22,7 @@
.\" .\"
.\" $FreeBSD$ .\" $FreeBSD$
.\" .\"
.Dd September 22, 2014 .Dd June 19, 2019
.Dt SLEEPQUEUE 9 .Dt SLEEPQUEUE 9
.Os .Os
.Sh NAME .Sh NAME
@ -290,7 +290,8 @@ and
functions. functions.
The The
.Fn sleepq_signal .Fn sleepq_signal
function awakens the highest priority thread sleeping on a wait channel while function awakens the highest priority thread sleeping on a wait channel
(if SLEEPQ_UNFAIR flag is set, thread that went to sleep recently) while
.Fn sleepq_broadcast .Fn sleepq_broadcast
awakens all of the threads sleeping on a wait channel. awakens all of the threads sleeping on a wait channel.
The The

View File

@ -368,6 +368,19 @@ wakeup_one(void *ident)
kick_proc0(); kick_proc0();
} }
void
wakeup_any(void *ident)
{
int wakeup_swapper;
sleepq_lock(ident);
wakeup_swapper = sleepq_signal(ident, SLEEPQ_SLEEP | SLEEPQ_UNFAIR,
0, 0);
sleepq_release(ident);
if (wakeup_swapper)
kick_proc0();
}
static void static void
kdb_switch(void) kdb_switch(void)
{ {

View File

@ -123,7 +123,7 @@ CTASSERT(powerof2(SC_TABLESIZE));
* c - sleep queue chain lock * c - sleep queue chain lock
*/ */
struct sleepqueue { struct sleepqueue {
TAILQ_HEAD(, thread) sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */ struct threadqueue sq_blocked[NR_SLEEPQS]; /* (c) Blocked threads. */
u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */ u_int sq_blockedcnt[NR_SLEEPQS]; /* (c) N. of blocked threads. */
LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */ LIST_ENTRY(sleepqueue) sq_hash; /* (c) Chain and free list. */
LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */ LIST_HEAD(, sleepqueue) sq_free; /* (c) Free queues. */
@ -889,12 +889,14 @@ sleepq_init(void *mem, int size, int flags)
} }
/* /*
* Find the highest priority thread sleeping on a wait channel and resume it. * Find thread sleeping on a wait channel and resume it.
*/ */
int int
sleepq_signal(void *wchan, int flags, int pri, int queue) sleepq_signal(void *wchan, int flags, int pri, int queue)
{ {
struct sleepqueue_chain *sc;
struct sleepqueue *sq; struct sleepqueue *sq;
struct threadqueue *head;
struct thread *td, *besttd; struct thread *td, *besttd;
int wakeup_swapper; int wakeup_swapper;
@ -907,17 +909,34 @@ sleepq_signal(void *wchan, int flags, int pri, int queue)
KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE),
("%s: mismatch between sleep/wakeup and cv_*", __func__)); ("%s: mismatch between sleep/wakeup and cv_*", __func__));
head = &sq->sq_blocked[queue];
if (flags & SLEEPQ_UNFAIR) {
/* /*
* Find the highest priority thread on the queue. If there is a * Find the most recently sleeping thread, but try to
* tie, use the thread that first appears in the queue as it has * skip threads still in process of context switch to
* been sleeping the longest since threads are always added to * avoid spinning on the thread lock.
* the tail of sleep queues.
*/ */
besttd = TAILQ_FIRST(&sq->sq_blocked[queue]); sc = SC_LOOKUP(wchan);
TAILQ_FOREACH(td, &sq->sq_blocked[queue], td_slpq) { besttd = TAILQ_LAST_FAST(head, thread, td_slpq);
while (besttd->td_lock != &sc->sc_lock) {
td = TAILQ_PREV_FAST(besttd, head, thread, td_slpq);
if (td == NULL)
break;
besttd = td;
}
} else {
/*
* Find the highest priority thread on the queue. If there
* is a tie, use the thread that first appears in the queue
* as it has been sleeping the longest since threads are
* always added to the tail of sleep queues.
*/
besttd = td = TAILQ_FIRST(head);
while ((td = TAILQ_NEXT(td, td_slpq)) != NULL) {
if (td->td_priority < besttd->td_priority) if (td->td_priority < besttd->td_priority)
besttd = td; besttd = td;
} }
}
MPASS(besttd != NULL); MPASS(besttd != NULL);
thread_lock(besttd); thread_lock(besttd);
wakeup_swapper = sleepq_resume_thread(sq, besttd, pri); wakeup_swapper = sleepq_resume_thread(sq, besttd, pri);

View File

@ -804,7 +804,7 @@ taskqueue_thread_enqueue(void *context)
tqp = context; tqp = context;
tq = *tqp; tq = *tqp;
wakeup_one(tq); wakeup_any(tq);
} }
TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL, TASKQUEUE_DEFINE(swi, taskqueue_swi_enqueue, NULL,

View File

@ -829,6 +829,10 @@ struct { \
#define TAILQ_PREV(elm, headname, field) \ #define TAILQ_PREV(elm, headname, field) \
(*(((struct headname *)((elm)->field.tqe_prev))->tqh_last)) (*(((struct headname *)((elm)->field.tqe_prev))->tqh_last))
#define TAILQ_PREV_FAST(elm, head, type, field) \
((elm)->field.tqe_prev == &(head)->tqh_first ? NULL : \
__containerof((elm)->field.tqe_prev, QUEUE_TYPEOF(type), field.tqe_next))
#define TAILQ_REMOVE(head, elm, field) do { \ #define TAILQ_REMOVE(head, elm, field) do { \
QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \ QMD_SAVELINK(oldnext, (elm)->field.tqe_next); \
QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \ QMD_SAVELINK(oldprev, (elm)->field.tqe_prev); \

View File

@ -84,6 +84,7 @@ struct thread;
#define SLEEPQ_SX 0x03 /* Used by an sx lock. */ #define SLEEPQ_SX 0x03 /* Used by an sx lock. */
#define SLEEPQ_LK 0x04 /* Used by a lockmgr. */ #define SLEEPQ_LK 0x04 /* Used by a lockmgr. */
#define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */ #define SLEEPQ_INTERRUPTIBLE 0x100 /* Sleep is interruptible. */
#define SLEEPQ_UNFAIR 0x200 /* Unfair wakeup order. */
void init_sleepqueues(void); void init_sleepqueues(void);
int sleepq_abort(struct thread *td, int intrval); int sleepq_abort(struct thread *td, int intrval);

View File

@ -489,6 +489,7 @@ int pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr,
_sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags)) _sleep((chan), NULL, (pri), (wmesg), (bt), (pr), (flags))
void wakeup(void * chan); void wakeup(void * chan);
void wakeup_one(void * chan); void wakeup_one(void * chan);
void wakeup_any(void * chan);
/* /*
* Common `struct cdev *' stuff are declared here to avoid #include poisoning * Common `struct cdev *' stuff are declared here to avoid #include poisoning