Add an implementation of turnstiles and change the sleep mutex code to use

turnstiles to implement blocking isntead of implementing a thread queue
directly.  These turnstiles are somewhat similar to those used in Solaris 7
as described in Solaris Internals but are also different.

Turnstiles do not come out of a fixed-sized pool.  Rather, each thread is
assigned a turnstile when it is created that it frees when it is destroyed.
When a thread blocks on a lock, it donates its turnstile to that lock to
serve as queue of blocked threads.  The queue associated with a given lock
is found by a lookup in a simple hash table.  The turnstile itself is
protected by a lock associated with its entry in the hash table.  This
means that sched_lock is no longer needed to contest on a mutex.  Instead,
sched_lock is only used when manipulating run queues or thread priorities.
Turnstiles also implement priority propagation inherently.

Currently turnstiles only support mutexes.  Eventually, however, turnstiles
may grow two queue's to support a non-sleepable reader/writer lock
implementation.  For more details, see the comments in sys/turnstile.h and
kern/subr_turnstile.c.

The two primary advantages from the turnstile code include: 1) the size
of struct mutex shrinks by four pointers as it no longer stores the
thread queue linkages directly, and 2) less contention on sched_lock in
SMP systems including the ability for multiple CPUs to contend on different
locks simultaneously (not that this last detail is necessarily that much of
a big win).  Note that 1) means that this commit is a kernel ABI breaker,
so don't mix old modules with a new kernel and vice versa.

Tested on:	i386 SMP, sparc64 SMP, alpha SMP
This commit is contained in:
John Baldwin 2003-11-11 22:07:29 +00:00
parent a6cb9d8e99
commit 961a7b244d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=122514
9 changed files with 570 additions and 1032 deletions

View File

@ -1154,6 +1154,7 @@ kern/subr_scanf.c standard
kern/subr_smp.c optional smp
kern/subr_taskqueue.c standard
kern/subr_trap.c standard
kern/subr_turnstile.c standard
kern/subr_witness.c optional witness
kern/sys_generic.c standard
kern/sys_pipe.c standard

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/signalvar.h>
#include <sys/sx.h>
#include <sys/tty.h>
#include <sys/turnstile.h>
#include <sys/user.h>
#include <sys/jail.h>
#include <sys/kse.h>
@ -190,6 +191,7 @@ thread_init(void *mem, int size)
vm_thread_new(td, 0);
mtx_unlock(&Giant);
cpu_thread_setup(td);
td->td_turnstile = turnstile_alloc();
td->td_sched = (struct td_sched *)&td[1];
}
@ -202,6 +204,7 @@ thread_fini(void *mem, int size)
struct thread *td;
td = (struct thread *)mem;
turnstile_free(td->td_turnstile);
vm_thread_dispose(td);
}

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sched.h>
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/turnstile.h>
#include <sys/vmmeter.h>
#include <machine/atomic.h>
@ -90,122 +91,6 @@ struct lock_class lock_class_mtx_spin = {
struct mtx sched_lock;
struct mtx Giant;
/*
* Prototypes for non-exported routines.
*/
static void propagate_priority(struct thread *);
static void
propagate_priority(struct thread *td)
{
int pri = td->td_priority;
struct mtx *m = td->td_blocked;
mtx_assert(&sched_lock, MA_OWNED);
for (;;) {
struct thread *td1;
td = mtx_owner(m);
if (td == NULL) {
/*
* This really isn't quite right. Really
* ought to bump priority of thread that
* next acquires the mutex.
*/
MPASS(m->mtx_lock == MTX_CONTESTED);
return;
}
MPASS(td->td_proc != NULL);
MPASS(td->td_proc->p_magic == P_MAGIC);
KASSERT(!TD_IS_SLEEPING(td), (
"sleeping thread (pid %d) owns a mutex",
td->td_proc->p_pid));
if (td->td_priority <= pri) /* lower is higher priority */
return;
/*
* If lock holder is actually running, just bump priority.
*/
if (TD_IS_RUNNING(td)) {
td->td_priority = pri;
return;
}
#ifndef SMP
/*
* For UP, we check to see if td is curthread (this shouldn't
* ever happen however as it would mean we are in a deadlock.)
*/
KASSERT(td != curthread, ("Deadlock detected"));
#endif
/*
* If on run queue move to new run queue, and quit.
* XXXKSE this gets a lot more complicated under threads
* but try anyhow.
*/
if (TD_ON_RUNQ(td)) {
MPASS(td->td_blocked == NULL);
sched_prio(td, pri);
return;
}
/*
* Adjust for any other cases.
*/
td->td_priority = pri;
/*
* If we aren't blocked on a mutex, we should be.
*/
KASSERT(TD_ON_LOCK(td), (
"process %d(%s):%d holds %s but isn't blocked on a mutex\n",
td->td_proc->p_pid, td->td_proc->p_comm, td->td_state,
m->mtx_object.lo_name));
/*
* Pick up the mutex that td is blocked on.
*/
m = td->td_blocked;
MPASS(m != NULL);
/*
* Check if the thread needs to be moved up on
* the blocked chain
*/
if (td == TAILQ_FIRST(&m->mtx_blocked)) {
continue;
}
td1 = TAILQ_PREV(td, threadqueue, td_lockq);
if (td1->td_priority <= pri) {
continue;
}
/*
* Remove thread from blocked chain and determine where
* it should be moved up to. Since we know that td1 has
* a lower priority than td, we know that at least one
* thread in the chain has a lower priority and that
* td1 will thus not be NULL after the loop.
*/
TAILQ_REMOVE(&m->mtx_blocked, td, td_lockq);
TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq) {
MPASS(td1->td_proc->p_magic == P_MAGIC);
if (td1->td_priority > pri)
break;
}
MPASS(td1 != NULL);
TAILQ_INSERT_BEFORE(td1, td, td_lockq);
CTR4(KTR_LOCK,
"propagate_priority: p %p moved before %p on [%p] %s",
td, td1, m, m->mtx_object.lo_name);
}
}
#ifdef MUTEX_PROFILING
SYSCTL_NODE(_debug, OID_AUTO, mutex, CTLFLAG_RD, NULL, "mutex debugging");
SYSCTL_NODE(_debug_mutex, OID_AUTO, prof, CTLFLAG_RD, NULL, "mutex profiling");
@ -484,8 +369,8 @@ _mtx_trylock(struct mtx *m, int opts, const char *file, int line)
void
_mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
{
struct turnstile *ts;
struct thread *td = curthread;
struct thread *td1;
#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
struct thread *owner;
#endif
@ -509,15 +394,15 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
while (!_obtain_lock(m, td)) {
mtx_lock_spin(&sched_lock);
ts = turnstile_lookup(&m->mtx_object);
v = m->mtx_lock;
/*
* Check if the lock has been released while spinning for
* the sched_lock.
* the turnstile chain lock.
*/
if (v == MTX_UNOWNED) {
mtx_unlock_spin(&sched_lock);
turnstile_release(&m->mtx_object);
#ifdef __i386__
ia32_pause();
#endif
@ -531,14 +416,9 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
* necessary.
*/
if (v == MTX_CONTESTED) {
td1 = TAILQ_FIRST(&m->mtx_blocked);
MPASS(td1 != NULL);
MPASS(ts != NULL);
m->mtx_lock = (uintptr_t)td | MTX_CONTESTED;
LIST_INSERT_HEAD(&td->td_contested, m, mtx_contested);
if (td1->td_priority < td->td_priority)
td->td_priority = td1->td_priority;
mtx_unlock_spin(&sched_lock);
turnstile_claim(ts);
return;
}
@ -550,7 +430,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
if ((v & MTX_CONTESTED) == 0 &&
!atomic_cmpset_ptr(&m->mtx_lock, (void *)v,
(void *)(v | MTX_CONTESTED))) {
mtx_unlock_spin(&sched_lock);
turnstile_release(&m->mtx_object);
#ifdef __i386__
ia32_pause();
#endif
@ -564,7 +444,7 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
owner = (struct thread *)(v & MTX_FLAGMASK);
if (m != &Giant && TD_IS_RUNNING(owner)) {
mtx_unlock_spin(&sched_lock);
turnstile_release(&m->mtx_object);
while (mtx_owner(m) == owner && TD_IS_RUNNING(owner)) {
#ifdef __i386__
ia32_pause();
@ -579,42 +459,6 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
*/
mtx_assert(m, MA_NOTOWNED);
#ifdef notyet
/*
* If we're borrowing an interrupted thread's VM context, we
* must clean up before going to sleep.
*/
if (td->td_ithd != NULL) {
struct ithd *it = td->td_ithd;
if (it->it_interrupted) {
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK,
"_mtx_lock_sleep: %p interrupted %p",
it, it->it_interrupted);
intr_thd_fixup(it);
}
}
#endif
/*
* Put us on the list of threads blocked on this mutex
* and add this mutex to the owning thread's list of
* contested mutexes if needed.
*/
if (TAILQ_EMPTY(&m->mtx_blocked)) {
td1 = mtx_owner(m);
LIST_INSERT_HEAD(&td1->td_contested, m, mtx_contested);
TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
} else {
TAILQ_FOREACH(td1, &m->mtx_blocked, td_lockq)
if (td1->td_priority > td->td_priority)
break;
if (td1)
TAILQ_INSERT_BEFORE(td1, td, td_lockq);
else
TAILQ_INSERT_TAIL(&m->mtx_blocked, td, td_lockq);
}
#ifdef KTR
if (!cont_logged) {
CTR6(KTR_CONTENTION,
@ -627,27 +471,9 @@ _mtx_lock_sleep(struct mtx *m, int opts, const char *file, int line)
#endif
/*
* Save who we're blocked on.
* Block on the turnstile.
*/
td->td_blocked = m;
td->td_lockname = m->mtx_object.lo_name;
TD_SET_LOCK(td);
propagate_priority(td);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR3(KTR_LOCK,
"_mtx_lock_sleep: p %p blocked on [%p] %s", td, m,
m->mtx_object.lo_name);
td->td_proc->p_stats->p_ru.ru_nvcsw++;
mi_switch();
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR3(KTR_LOCK,
"_mtx_lock_sleep: p %p free from blocked on [%p] %s",
td, m, m->mtx_object.lo_name);
mtx_unlock_spin(&sched_lock);
turnstile_wait(ts, &m->mtx_object, mtx_owner(m));
}
#ifdef KTR
@ -724,11 +550,8 @@ _mtx_lock_spin(struct mtx *m, int opts, const char *file, int line)
void
_mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
{
struct turnstile *ts;
struct thread *td, *td1;
struct mtx *m1;
int pri;
td = curthread;
if (mtx_recursed(m)) {
if (--(m->mtx_recurse) == 0)
@ -738,57 +561,47 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
return;
}
mtx_lock_spin(&sched_lock);
ts = turnstile_lookup(&m->mtx_object);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p contested", m);
td1 = TAILQ_FIRST(&m->mtx_blocked);
#if defined(SMP) && defined(ADAPTIVE_MUTEXES)
if (td1 == NULL) {
if (ts == NULL) {
_release_lock_quick(m);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p no sleepers", m);
mtx_unlock_spin(&sched_lock);
turnstile_release(&m->mtx_object);
return;
}
#else
MPASS(ts != NULL);
#endif
MPASS(td->td_proc->p_magic == P_MAGIC);
MPASS(td1->td_proc->p_magic == P_MAGIC);
TAILQ_REMOVE(&m->mtx_blocked, td1, td_lockq);
LIST_REMOVE(m, mtx_contested);
if (TAILQ_EMPTY(&m->mtx_blocked)) {
/* XXX */
td1 = turnstile_head(ts);
if (turnstile_signal(ts)) {
_release_lock_quick(m);
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p not held", m);
} else
} else {
m->mtx_lock = MTX_CONTESTED;
pri = PRI_MAX;
LIST_FOREACH(m1, &td->td_contested, mtx_contested) {
int cp = TAILQ_FIRST(&m1->mtx_blocked)->td_priority;
if (cp < pri)
pri = cp;
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR1(KTR_LOCK, "_mtx_unlock_sleep: %p still contested",
m);
}
turnstile_unpend(ts);
if (pri > td->td_base_pri)
pri = td->td_base_pri;
td->td_priority = pri;
if (LOCK_LOG_TEST(&m->mtx_object, opts))
CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p contested setrunqueue %p",
m, td1);
td1->td_blocked = NULL;
TD_CLR_LOCK(td1);
if (!TD_CAN_RUN(td1)) {
mtx_unlock_spin(&sched_lock);
/*
* XXX: This is just a hack until preemption is done. However,
* once preemption is done we need to either wrap the
* turnstile_signal() and release of the actual lock in an
* extra critical section or change the preemption code to
* always just set a flag and never do instant-preempts.
*/
td = curthread;
if (td->td_critnest > 0 || td1->td_priority >= td->td_priority)
return;
}
setrunqueue(td1);
if (td->td_critnest == 1 && td1->td_priority < pri) {
mtx_lock_spin(&sched_lock);
if (!TD_IS_RUNNING(td1)) {
#ifdef notyet
if (td->td_ithd != NULL) {
struct ithd *it = td->td_ithd;
@ -813,7 +626,6 @@ _mtx_unlock_sleep(struct mtx *m, int opts, const char *file, int line)
CTR2(KTR_LOCK, "_mtx_unlock_sleep: %p resuming lock=%p",
m, (void *)m->mtx_lock);
}
mtx_unlock_spin(&sched_lock);
return;
@ -948,7 +760,6 @@ mtx_init(struct mtx *m, const char *name, const char *type, int opts)
lock->lo_flags |= LO_DUPOK;
m->mtx_lock = MTX_UNOWNED;
TAILQ_INIT(&m->mtx_blocked);
LOCK_LOG_INIT(lock, opts);
@ -992,6 +803,9 @@ mutex_init(void)
/* Setup thread0 so that mutexes work. */
LIST_INIT(&thread0.td_contested);
/* Setup turnstiles so that sleep mutexes work. */
init_turnstiles();
/*
* Initialize mutexes.
*/

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/signalvar.h>
#include <sys/sx.h>
#include <sys/tty.h>
#include <sys/turnstile.h>
#include <sys/user.h>
#include <sys/jail.h>
#include <sys/kse.h>
@ -190,6 +191,7 @@ thread_init(void *mem, int size)
vm_thread_new(td, 0);
mtx_unlock(&Giant);
cpu_thread_setup(td);
td->td_turnstile = turnstile_alloc();
td->td_sched = (struct td_sched *)&td[1];
}
@ -202,6 +204,7 @@ thread_fini(void *mem, int size)
struct thread *td;
td = (struct thread *)mem;
turnstile_free(td->td_turnstile);
vm_thread_dispose(td);
}

File diff suppressed because it is too large Load Diff

View File

@ -288,6 +288,8 @@ static struct witness_order_list_entry order_lists[] = {
{ "intr table", &lock_class_mtx_spin },
{ "ithread table lock", &lock_class_mtx_spin },
{ "sched lock", &lock_class_mtx_spin },
{ "turnstile chain", &lock_class_mtx_spin },
{ "td_contested", &lock_class_mtx_spin },
{ "callout", &lock_class_mtx_spin },
/*
* leaf locks
@ -342,9 +344,7 @@ static struct mtx all_mtx = {
LO_INITIALIZED, /* mtx_object.lo_flags */
{ NULL, NULL }, /* mtx_object.lo_list */
NULL }, /* mtx_object.lo_witness */
MTX_UNOWNED, 0, /* mtx_lock, mtx_recurse */
TAILQ_HEAD_INITIALIZER(all_mtx.mtx_blocked),
{ NULL, NULL } /* mtx_contested */
MTX_UNOWNED, 0 /* mtx_lock, mtx_recurse */
};
/*

View File

@ -38,8 +38,6 @@ struct mtx {
struct lock_object mtx_object; /* Common lock properties. */
volatile uintptr_t mtx_lock; /* Owner and flags. */
volatile u_int mtx_recurse; /* Number of recursive holds. */
TAILQ_HEAD(, thread) mtx_blocked; /* Threads blocked on us. */
LIST_ENTRY(mtx) mtx_contested; /* Next contested mtx. */
#ifdef MUTEX_PROFILING
/*

View File

@ -137,6 +137,8 @@ struct filedesc_to_leader {
#define FILEDESC_LOCKED(fd) mtx_owned(&(fd)->fd_mtx)
#define FILEDESC_LOCK_ASSERT(fd, type) mtx_assert(&(fd)->fd_mtx, (type))
struct thread;
int closef(struct file *fp, struct thread *p);
int dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd,
int mode, int error);

View File

@ -144,6 +144,7 @@ struct pargs {
* n - not locked, lazy
* o - ktrace lock
* p - select lock (sellock)
* q - td_contested lock
* r - p_peers lock
* x - created at fork, only changes during single threading in exec
* z - zombie threads/kse/ksegroup lock
@ -159,6 +160,7 @@ struct nlminfo;
struct p_sched;
struct td_sched;
struct trapframe;
struct turnstile;
/*
* Here we define the four structures used for process information.
@ -259,11 +261,12 @@ struct thread {
TAILQ_ENTRY(thread) td_kglist; /* (*) All threads in this ksegrp. */
/* The two queues below should someday be merged. */
TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. XXXKSE */
TAILQ_ENTRY(thread) td_lockq; /* (j) Lock queue. XXXKSE */
TAILQ_ENTRY(thread) td_slpq; /* (j) Sleep queue. */
TAILQ_ENTRY(thread) td_lockq; /* (j) Lock queue. */
TAILQ_ENTRY(thread) td_runq; /* (j/z) Run queue(s). XXXKSE */
TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos. */
struct turnstile *td_turnstile; /* (k) Associated turnstile. */
/* Cleared during fork1() or thread_sched_upcall(). */
#define td_startzero td_flags
@ -278,10 +281,10 @@ struct thread {
u_char td_lastcpu; /* (j) Last cpu we were on. */
u_char td_oncpu; /* (j) Which cpu we are on. */
short td_locks; /* (k) DEBUG: lockmgr count of locks. */
struct mtx *td_blocked; /* (j) Mutex process is blocked on. */
struct turnstile *td_blocked; /* (j) Lock process is blocked on. */
struct ithd *td_ithd; /* (b) For interrupt threads only. */
const char *td_lockname; /* (j) Name of lock blocked on. */
LIST_HEAD(, mtx) td_contested; /* (j) Contested locks. */
LIST_HEAD(, turnstile) td_contested; /* (q) Contested locks. */
struct lock_list_entry *td_sleeplocks; /* (k) Held sleep locks. */
int td_intr_nesting_level; /* (k) Interrupt recursion. */
int td_pinned; /* (k) Temporary cpu pin count. */
@ -342,6 +345,7 @@ struct thread {
#define TDF_IDLETD 0x000020 /* This is one of the per-CPU idle threads. */
#define TDF_SELECT 0x000040 /* Selecting; wakeup/waiting danger. */
#define TDF_CVWAITQ 0x000080 /* Thread is on a cv_waitq (not slpq). */
#define TDF_TSNOBLOCK 0x000100 /* Don't block on a turnstile due to race. */
#define TDF_ONSLEEPQ 0x000200 /* On the sleep queue. */
#define TDF_ASTPENDING 0x000800 /* Thread has some asynchronous events. */
#define TDF_TIMOFAIL 0x001000 /* Timeout from sleep after we were awake. */