freebsd-nq/sys/kern/kern_umtx.c

672 lines
16 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2004, David Xu <davidxu@freebsd.org>
* Copyright (c) 2002, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
2003-06-11 00:56:59 +00:00
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/sysent.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/eventhandler.h>
#include <sys/thr.h>
#include <sys/umtx.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#define UMTX_PRIVATE 0
#define UMTX_SHARED 1
struct umtx_key {
int type;
union {
struct {
vm_object_t object;
long offset;
} shared;
struct {
struct umtx *umtx;
long pid;
} private;
struct {
void *ptr;
long word;
} both;
} info;
};
struct umtx_q {
LIST_ENTRY(umtx_q) uq_next; /* Linked list for the hash. */
struct umtx_key uq_key; /* Umtx key. */
int uq_flags; /* Umtx flags. */
#define UQF_UMTXQ 0x0001
struct thread *uq_thread; /* The thread waits on. */
vm_offset_t uq_addr; /* Umtx's virtual address. */
};
LIST_HEAD(umtx_head, umtx_q);
struct umtxq_chain {
struct mtx uc_lock; /* Lock for this chain. */
struct umtx_head uc_queue; /* List of sleep queues. */
#define UCF_BUSY 0x01
int uc_flags;
int uc_waiters;
};
#define GOLDEN_RATIO_PRIME 2654404609U
#define UMTX_CHAINS 128
#define UMTX_SHIFTS (__WORD_BIT - 7)
static struct umtxq_chain umtxq_chains[UMTX_CHAINS];
static MALLOC_DEFINE(M_UMTX, "umtx", "UMTX queue memory");
static void umtxq_init_chains(void *);
static int umtxq_hash(struct umtx_key *key);
static struct mtx *umtxq_mtx(int chain);
static void umtxq_lock(struct umtx_key *key);
static void umtxq_unlock(struct umtx_key *key);
static void umtxq_busy(struct umtx_key *key);
static void umtxq_unbusy(struct umtx_key *key);
static void umtxq_insert(struct umtx_q *uq);
static void umtxq_remove(struct umtx_q *uq);
static int umtxq_sleep(struct thread *td, struct umtx_key *key,
int prio, const char *wmesg, int timo);
static int umtxq_count(struct umtx_key *key);
static int umtxq_signal(struct umtx_key *key, int nr_wakeup);
static int umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2);
static int umtx_key_get(struct thread *td, struct umtx *umtx,
struct umtx_key *key);
static void umtx_key_release(struct umtx_key *key);
SYSINIT(umtx, SI_SUB_EVENTHANDLER+1, SI_ORDER_MIDDLE, umtxq_init_chains, NULL);
struct umtx_q *
umtxq_alloc(void)
{
return (malloc(sizeof(struct umtx_q), M_UMTX, M_WAITOK|M_ZERO));
}
void
umtxq_free(struct umtx_q *uq)
{
free(uq, M_UMTX);
}
static void
umtxq_init_chains(void *arg __unused)
{
int i;
for (i = 0; i < UMTX_CHAINS; ++i) {
mtx_init(&umtxq_chains[i].uc_lock, "umtxq_lock", NULL,
MTX_DEF | MTX_DUPOK);
LIST_INIT(&umtxq_chains[i].uc_queue);
umtxq_chains[i].uc_flags = 0;
umtxq_chains[i].uc_waiters = 0;
}
}
static inline int
umtxq_hash(struct umtx_key *key)
{
unsigned n = (uintptr_t)key->info.both.ptr + key->info.both.word;
return (((n * GOLDEN_RATIO_PRIME) >> UMTX_SHIFTS) % UMTX_CHAINS);
}
static inline int
umtx_key_match(const struct umtx_key *k1, const struct umtx_key *k2)
{
return (k1->type == k2->type &&
k1->info.both.ptr == k2->info.both.ptr &&
k1->info.both.word == k2->info.both.word);
}
static inline struct mtx *
umtxq_mtx(int chain)
{
return (&umtxq_chains[chain].uc_lock);
}
static inline void
umtxq_busy(struct umtx_key *key)
{
int chain = umtxq_hash(key);
mtx_assert(umtxq_mtx(chain), MA_OWNED);
while (umtxq_chains[chain].uc_flags & UCF_BUSY) {
umtxq_chains[chain].uc_waiters++;
msleep(&umtxq_chains[chain], umtxq_mtx(chain),
0, "umtxq_busy", 0);
umtxq_chains[chain].uc_waiters--;
}
umtxq_chains[chain].uc_flags |= UCF_BUSY;
}
static inline void
umtxq_unbusy(struct umtx_key *key)
{
int chain = umtxq_hash(key);
mtx_assert(umtxq_mtx(chain), MA_OWNED);
KASSERT(umtxq_chains[chain].uc_flags & UCF_BUSY, ("not busy"));
umtxq_chains[chain].uc_flags &= ~UCF_BUSY;
if (umtxq_chains[chain].uc_waiters)
wakeup_one(&umtxq_chains[chain]);
}
static inline void
umtxq_lock(struct umtx_key *key)
{
int chain = umtxq_hash(key);
mtx_lock(umtxq_mtx(chain));
}
2004-11-30 12:18:53 +00:00
static inline void
umtxq_unlock(struct umtx_key *key)
{
int chain = umtxq_hash(key);
mtx_unlock(umtxq_mtx(chain));
}
/*
* Insert a thread onto the umtx queue.
*/
static inline void
umtxq_insert(struct umtx_q *uq)
{
struct umtx_head *head;
int chain = umtxq_hash(&uq->uq_key);
mtx_assert(umtxq_mtx(chain), MA_OWNED);
head = &umtxq_chains[chain].uc_queue;
LIST_INSERT_HEAD(head, uq, uq_next);
uq->uq_flags |= UQF_UMTXQ;
}
/*
* Remove thread from the umtx queue.
*/
static inline void
umtxq_remove(struct umtx_q *uq)
{
mtx_assert(umtxq_mtx(umtxq_hash(&uq->uq_key)), MA_OWNED);
if (uq->uq_flags & UQF_UMTXQ) {
LIST_REMOVE(uq, uq_next);
/* turning off UQF_UMTXQ should be the last thing. */
uq->uq_flags &= ~UQF_UMTXQ;
}
}
static int
umtxq_count(struct umtx_key *key)
{
struct umtx_q *uq;
struct umtx_head *head;
int chain, count = 0;
chain = umtxq_hash(key);
mtx_assert(umtxq_mtx(chain), MA_OWNED);
head = &umtxq_chains[chain].uc_queue;
LIST_FOREACH(uq, head, uq_next) {
if (umtx_key_match(&uq->uq_key, key)) {
if (++count > 1)
break;
}
}
return (count);
}
static int
umtxq_signal(struct umtx_key *key, int n_wake)
{
struct umtx_q *uq, *next;
struct umtx_head *head;
struct thread *blocked = NULL;
int chain, ret;
ret = 0;
chain = umtxq_hash(key);
mtx_assert(umtxq_mtx(chain), MA_OWNED);
head = &umtxq_chains[chain].uc_queue;
for (uq = LIST_FIRST(head); uq; uq = next) {
next = LIST_NEXT(uq, uq_next);
if (umtx_key_match(&uq->uq_key, key)) {
blocked = uq->uq_thread;
umtxq_remove(uq);
wakeup(blocked);
if (++ret >= n_wake)
break;
}
}
return (ret);
}
static inline int
umtxq_sleep(struct thread *td, struct umtx_key *key, int priority,
const char *wmesg, int timo)
{
int chain = umtxq_hash(key);
int error = msleep(td, umtxq_mtx(chain), priority, wmesg, timo);
if (error == EWOULDBLOCK)
error = ETIMEDOUT;
return (error);
}
static int
umtx_key_get(struct thread *td, struct umtx *umtx, struct umtx_key *key)
{
vm_map_t map;
vm_map_entry_t entry;
vm_pindex_t pindex;
vm_prot_t prot;
boolean_t wired;
map = &td->td_proc->p_vmspace->vm_map;
if (vm_map_lookup(&map, (vm_offset_t)umtx, VM_PROT_WRITE,
&entry, &key->info.shared.object, &pindex, &prot,
&wired) != KERN_SUCCESS) {
return EFAULT;
}
2006-02-04 06:36:39 +00:00
if (VM_INHERIT_SHARE == entry->inheritance) {
key->type = UMTX_SHARED;
key->info.shared.offset = entry->offset + entry->start -
(vm_offset_t)umtx;
vm_object_reference(key->info.shared.object);
} else {
key->type = UMTX_PRIVATE;
key->info.private.umtx = umtx;
key->info.private.pid = td->td_proc->p_pid;
}
vm_map_lookup_done(map, entry);
return (0);
}
static inline void
umtx_key_release(struct umtx_key *key)
{
if (key->type == UMTX_SHARED)
vm_object_deallocate(key->info.shared.object);
}
static inline int
umtxq_queue_me(struct thread *td, struct umtx *umtx, struct umtx_q *uq)
{
int error;
if ((error = umtx_key_get(td, umtx, &uq->uq_key)) != 0)
return (error);
uq->uq_addr = (vm_offset_t)umtx;
uq->uq_thread = td;
umtxq_lock(&uq->uq_key);
/* hmm, for condition variable, we don't need busy flag. */
umtxq_busy(&uq->uq_key);
umtxq_insert(uq);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
return (0);
}
static int
_do_lock(struct thread *td, struct umtx *umtx, long id, int timo)
{
struct umtx_q *uq;
intptr_t owner;
intptr_t old;
int error = 0;
uq = td->td_umtxq;
/*
* Care must be exercised when dealing with umtx structure. It
* can fault on any access.
*/
for (;;) {
/*
* Try the uncontested case. This should be done in userland.
*/
owner = casuptr((intptr_t *)&umtx->u_owner,
UMTX_UNOWNED, id);
/* The acquire succeeded. */
if (owner == UMTX_UNOWNED)
return (0);
/* The address was invalid. */
if (owner == -1)
return (EFAULT);
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMTX_CONTESTED) {
owner = casuptr((intptr_t *)&umtx->u_owner,
UMTX_CONTESTED, id | UMTX_CONTESTED);
if (owner == UMTX_CONTESTED)
return (0);
/* The address was invalid. */
if (owner == -1)
return (EFAULT);
/* If this failed the lock has changed, restart. */
continue;
}
/*
* If we caught a signal, we have retried and now
* exit immediately.
*/
if (error || (error = umtxq_queue_me(td, umtx, uq)) != 0)
return (error);
/*
* Set the contested bit so that a release in user space
* knows to use the system call for unlock. If this fails
* either some one else has acquired the lock or it has been
* released.
*/
old = casuptr((intptr_t *)&umtx->u_owner, owner,
owner | UMTX_CONTESTED);
/* The address was invalid. */
if (old == -1) {
umtxq_lock(&uq->uq_key);
umtxq_busy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (EFAULT);
}
/*
* We set the contested bit, sleep. Otherwise the lock changed
Fix umtx locking, for libthr, in the kernel. 1. There was a race condition between a thread unlocking a umtx and the thread contesting it. If the unlocking thread won the race it may try to wakeup a thread that was not yet in msleep(). The contesting thread would then go to sleep to await a wakeup that would never come. It's not possible to close the race by using a lock because calls to casuptr() may have to fault a page in from swap. Instead, the race was closed by introducing a flag that the unlocking thread will set when waking up a thread. The contesting thread will check for this flag before going to sleep. For now the flag is kept in td_flags, but it may be better to use some other member or create a new one because of the possible performance/contention issues of having to own sched_lock. Thanks to jhb for pointing me in the right direction on this one. 2. Once a umtx was contested all future locks and unlocks were happening in the kernel, regardless of whether it was contested or not. To prevent this from happening, when a thread locks a umtx it checks the queue for that umtx and unsets the contested bit if there are no other threads waiting on it. Again, this is slightly more complicated than it needs to be because we can't hold a lock across casuptr(). So, the thread has to check the queue again after unseting the bit, and reset the contested bit if it finds that another thread has put itself on the queue in the mean time. 3. Remove the if... block for unlocking an uncontested umtx, and replace it with a KASSERT. The _only_ time a thread should be unlocking a umtx in the kernel is if it is contested.
2003-07-17 11:06:40 +00:00
* and we need to retry or we lost a race to the thread
* unlocking the umtx.
*/
umtxq_lock(&uq->uq_key);
if (old == owner && (uq->uq_flags & UQF_UMTXQ)) {
error = umtxq_sleep(td, &uq->uq_key, PCATCH,
"umtx", timo);
}
umtxq_busy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
}
return (0);
}
static int
do_lock(struct thread *td, struct umtx *umtx, long id,
struct timespec *timeout)
{
struct timespec ts, ts2, ts3;
struct timeval tv;
int error;
if (timeout == NULL) {
error = _do_lock(td, umtx, id, 0);
} else {
getnanouptime(&ts);
timespecadd(&ts, timeout);
TIMESPEC_TO_TIMEVAL(&tv, timeout);
for (;;) {
error = _do_lock(td, umtx, id, tvtohz(&tv));
if (error != ETIMEDOUT)
break;
getnanouptime(&ts2);
if (timespeccmp(&ts2, &ts, >=)) {
error = ETIMEDOUT;
break;
}
ts3 = ts;
timespecsub(&ts3, &ts2);
TIMESPEC_TO_TIMEVAL(&tv, &ts3);
}
}
/*
* This lets userland back off critical region if needed.
*/
if (error == ERESTART)
error = EINTR;
return (error);
}
static int
do_unlock(struct thread *td, struct umtx *umtx, long id)
{
struct umtx_key key;
intptr_t owner;
intptr_t old;
int error;
int count;
/*
* Make sure we own this mtx.
*
* XXX Need a {fu,su}ptr this is not correct on arch where
* sizeof(intptr_t) != sizeof(long).
*/
if ((owner = fuword(&umtx->u_owner)) == -1)
return (EFAULT);
if ((owner & ~UMTX_CONTESTED) != id)
return (EPERM);
Fix umtx locking, for libthr, in the kernel. 1. There was a race condition between a thread unlocking a umtx and the thread contesting it. If the unlocking thread won the race it may try to wakeup a thread that was not yet in msleep(). The contesting thread would then go to sleep to await a wakeup that would never come. It's not possible to close the race by using a lock because calls to casuptr() may have to fault a page in from swap. Instead, the race was closed by introducing a flag that the unlocking thread will set when waking up a thread. The contesting thread will check for this flag before going to sleep. For now the flag is kept in td_flags, but it may be better to use some other member or create a new one because of the possible performance/contention issues of having to own sched_lock. Thanks to jhb for pointing me in the right direction on this one. 2. Once a umtx was contested all future locks and unlocks were happening in the kernel, regardless of whether it was contested or not. To prevent this from happening, when a thread locks a umtx it checks the queue for that umtx and unsets the contested bit if there are no other threads waiting on it. Again, this is slightly more complicated than it needs to be because we can't hold a lock across casuptr(). So, the thread has to check the queue again after unseting the bit, and reset the contested bit if it finds that another thread has put itself on the queue in the mean time. 3. Remove the if... block for unlocking an uncontested umtx, and replace it with a KASSERT. The _only_ time a thread should be unlocking a umtx in the kernel is if it is contested.
2003-07-17 11:06:40 +00:00
/* We should only ever be in here for contested locks */
if ((owner & UMTX_CONTESTED) == 0)
return (EINVAL);
if ((error = umtx_key_get(td, umtx, &key)) != 0)
return (error);
umtxq_lock(&key);
umtxq_busy(&key);
count = umtxq_count(&key);
umtxq_unlock(&key);
/*
* When unlocking the umtx, it must be marked as unowned if
* there is zero or one thread only waiting for it.
* Otherwise, it must be marked as contested.
*/
old = casuptr((intptr_t *)&umtx->u_owner, owner,
count <= 1 ? UMTX_UNOWNED : UMTX_CONTESTED);
umtxq_lock(&key);
umtxq_signal(&key, 0);
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
if (old == -1)
return (EFAULT);
if (old != owner)
return (EINVAL);
return (0);
}
static int
do_wait(struct thread *td, struct umtx *umtx, long id, struct timespec *timeout)
{
struct umtx_q *uq;
struct timespec ts, ts2, ts3;
struct timeval tv;
long tmp;
int error = 0;
uq = td->td_umtxq;
if ((error = umtxq_queue_me(td, umtx, uq)) != 0)
return (error);
tmp = fuword(&umtx->u_owner);
if (tmp != id) {
umtxq_lock(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
} else if (timeout == NULL) {
umtxq_lock(&uq->uq_key);
if (uq->uq_flags & UQF_UMTXQ)
error = umtxq_sleep(td, &uq->uq_key,
PCATCH, "ucond", 0);
if (!(uq->uq_flags & UQF_UMTXQ))
error = 0;
else
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
} else {
getnanouptime(&ts);
timespecadd(&ts, timeout);
TIMESPEC_TO_TIMEVAL(&tv, timeout);
for (;;) {
umtxq_lock(&uq->uq_key);
if (uq->uq_flags & UQF_UMTXQ) {
error = umtxq_sleep(td, &uq->uq_key, PCATCH,
"ucond", tvtohz(&tv));
}
if (!(uq->uq_flags & UQF_UMTXQ)) {
umtxq_unlock(&uq->uq_key);
goto out;
}
umtxq_unlock(&uq->uq_key);
if (error != ETIMEDOUT)
break;
getnanouptime(&ts2);
if (timespeccmp(&ts2, &ts, >=)) {
error = ETIMEDOUT;
break;
}
ts3 = ts;
timespecsub(&ts3, &ts2);
TIMESPEC_TO_TIMEVAL(&tv, &ts3);
}
umtxq_lock(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
}
out:
umtx_key_release(&uq->uq_key);
if (error == ERESTART)
error = EINTR;
return (error);
}
int
kern_umtx_wake(struct thread *td, void *uaddr, int n_wake)
{
struct umtx_key key;
int ret;
if ((ret = umtx_key_get(td, uaddr, &key)) != 0)
return (ret);
umtxq_lock(&key);
ret = umtxq_signal(&key, n_wake);
umtxq_unlock(&key);
umtx_key_release(&key);
return (0);
}
int
_umtx_lock(struct thread *td, struct _umtx_lock_args *uap)
/* struct umtx *umtx */
{
return _do_lock(td, uap->umtx, td->td_tid, 0);
}
int
_umtx_unlock(struct thread *td, struct _umtx_unlock_args *uap)
/* struct umtx *umtx */
{
return do_unlock(td, uap->umtx, td->td_tid);
}
int
_umtx_op(struct thread *td, struct _umtx_op_args *uap)
{
struct timespec timeout;
struct timespec *ts;
int error;
switch(uap->op) {
case UMTX_OP_LOCK:
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
ts = NULL;
else {
error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
if (error != 0)
break;
if (timeout.tv_nsec >= 1000000000 ||
timeout.tv_nsec < 0) {
error = EINVAL;
break;
}
ts = &timeout;
}
error = do_lock(td, uap->umtx, uap->id, ts);
break;
case UMTX_OP_UNLOCK:
error = do_unlock(td, uap->umtx, uap->id);
break;
case UMTX_OP_WAIT:
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
ts = NULL;
else {
error = copyin(uap->uaddr2, &timeout, sizeof(timeout));
if (error != 0)
break;
if (timeout.tv_nsec >= 1000000000 ||
timeout.tv_nsec < 0) {
error = EINVAL;
break;
}
ts = &timeout;
}
error = do_wait(td, uap->umtx, uap->id, ts);
break;
case UMTX_OP_WAKE:
error = kern_umtx_wake(td, uap->umtx, uap->id);
break;
default:
error = EINVAL;
break;
}
return (error);
}