Make kern_nanosleep() and pause_sbt() to use per-CPU sleep queues.

This removes significant sleep queue lock congestion on multithreaded
microbenchmarks, making them scale to multiple CPUs almost linearly.
This commit is contained in:
Alexander Motin 2013-03-12 06:58:49 +00:00
parent 29c70b5ed9
commit 0dbf17e6eb
3 changed files with 12 additions and 13 deletions

View File

@ -85,7 +85,7 @@ SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup,
NULL);
int hogticks;
static int pause_wchan;
static uint8_t pause_wchan[MAXCPU];
static struct callout loadav_callout;
@ -198,7 +198,8 @@ _sleep(void *ident, struct lock_object *lock, int priority,
if (TD_ON_SLEEPQ(td))
sleepq_remove(td, td->td_wchan);
if (ident == &pause_wchan)
if ((uint8_t *)ident >= &pause_wchan[0] &&
(uint8_t *)ident <= &pause_wchan[MAXCPU - 1])
sleepq_flags = SLEEPQ_PAUSE;
else
sleepq_flags = SLEEPQ_SLEEP;
@ -372,7 +373,7 @@ pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags)
DELAY((sbt & 0xffffffff) / SBT_1US);
return (0);
}
return (_sleep(&pause_wchan, NULL, 0, wmesg, sbt, pr, flags));
return (_sleep(&pause_wchan[curcpu], NULL, 0, wmesg, sbt, pr, flags));
}
/*

View File

@ -477,7 +477,7 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts)
return (0);
}
static int nanowait;
static uint8_t nanowait[MAXCPU];
int
kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
@ -503,8 +503,8 @@ kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt)
if (TIMESEL(&sbt, tmp))
sbt += tc_tick_sbt;
sbt += tmp;
error = tsleep_sbt(&nanowait, PWAIT | PCATCH, "nanslp", sbt, prec,
C_ABSOLUTE);
error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp",
sbt, prec, C_ABSOLUTE);
if (error != EWOULDBLOCK) {
if (error == ERESTART)
error = EINTR;

View File

@ -88,16 +88,14 @@ __FBSDID("$FreeBSD$");
#endif
/*
* Constants for the hash table of sleep queue chains. These constants are
* the same ones that 4BSD (and possibly earlier versions of BSD) used.
* Basically, we ignore the lower 8 bits of the address since most wait
* channel pointers are aligned and only look at the next 7 bits for the
* hash. SC_TABLESIZE must be a power of two for SC_MASK to work properly.
* Constants for the hash table of sleep queue chains.
* SC_TABLESIZE must be a power of two for SC_MASK to work properly.
*/
#define SC_TABLESIZE 128 /* Must be power of 2. */
#define SC_TABLESIZE 256 /* Must be power of 2. */
#define SC_MASK (SC_TABLESIZE - 1)
#define SC_SHIFT 8
#define SC_HASH(wc) (((uintptr_t)(wc) >> SC_SHIFT) & SC_MASK)
#define SC_HASH(wc) ((((uintptr_t)(wc) >> SC_SHIFT) ^ (uintptr_t)(wc)) & \
SC_MASK)
#define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)]
#define NR_SLEEPQS 2
/*