Add two commands to _umtx_op system call to allow a simple mutex to be

locked and unlocked completely in userland. by locking and unlocking mutex
in userland, it reduces the total time a mutex is locked by a thread,
in some application code, a mutex only protects a small piece of code, the
code's execution time is less than a simple system call, if a lock contention
happens, however in current implemenation, the lock holder has to extend its
locking time and enter kernel to unlock it, the change avoids this disadvantage,
it first sets mutex to free state and then enters kernel and wake one waiter
up. This improves performance dramatically in some sysbench mutex tests.

Tested by: kris
Sounds great: jeff
This commit is contained in:
David Xu 2008-06-24 07:32:12 +00:00
parent ef0b687ced
commit 7de1ecef2d
5 changed files with 251 additions and 90 deletions

View File

@ -93,6 +93,8 @@ static int mutex_self_trylock(pthread_mutex_t);
static int mutex_self_lock(pthread_mutex_t,
const struct timespec *abstime);
static int mutex_unlock_common(pthread_mutex_t *);
static int mutex_lock_sleep(struct pthread *, pthread_mutex_t,
const struct timespec *);
__weak_reference(__pthread_mutex_init, pthread_mutex_init);
__strong_reference(__pthread_mutex_init, _pthread_mutex_init);
@ -346,25 +348,24 @@ __pthread_mutex_trylock(pthread_mutex_t *mutex)
}
static int
mutex_lock_sleep(struct pthread *curthread, pthread_mutex_t m,
const struct timespec * abstime)
mutex_lock_sleep(struct pthread *curthread, struct pthread_mutex *m,
const struct timespec *abstime)
{
struct timespec ts, ts2;
uint32_t id;
int ret;
uint32_t id, owner;
int count;
int ret;
if (m->m_owner == curthread)
return mutex_self_lock(m, abstime);
id = TID(curthread);
if (__predict_false(m->m_owner == curthread))
return mutex_self_lock(m, abstime);
/*
* For adaptive mutexes, spin for a bit in the expectation
* that if the application requests this mutex type then
* the lock is likely to be released quickly and it is
* faster than entering the kernel
*/
if (m->m_lock.m_flags & UMUTEX_PRIO_PROTECT)
if (m->m_lock.m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT))
goto sleep_in_kernel;
if (!_thr_is_smp)
@ -372,10 +373,12 @@ mutex_lock_sleep(struct pthread *curthread, pthread_mutex_t m,
count = m->m_spinloops;
while (count--) {
if (m->m_lock.m_owner == UMUTEX_UNOWNED) {
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0)
owner = m->m_lock.m_owner;
if ((owner & ~UMUTEX_CONTESTED) == 0) {
if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
ret = 0;
goto done;
}
}
CPU_SPINWAIT;
}
@ -384,49 +387,43 @@ mutex_lock_sleep(struct pthread *curthread, pthread_mutex_t m,
count = m->m_yieldloops;
while (count--) {
_sched_yield();
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0)
goto done;
owner = m->m_lock.m_owner;
if ((owner & ~UMUTEX_CONTESTED) == 0) {
if (atomic_cmpset_acq_32(&m->m_lock.m_owner, owner, id|owner)) {
ret = 0;
goto done;
}
}
}
sleep_in_kernel:
if (abstime == NULL) {
ret = __thr_umutex_lock(&m->m_lock);
ret = __thr_umutex_lock(&m->m_lock, id);
} else if (__predict_false(
abstime->tv_sec < 0 || abstime->tv_nsec < 0 ||
abstime->tv_nsec < 0 ||
abstime->tv_nsec >= 1000000000)) {
ret = EINVAL;
} else {
clock_gettime(CLOCK_REALTIME, &ts);
TIMESPEC_SUB(&ts2, abstime, &ts);
ret = __thr_umutex_timedlock(&m->m_lock, &ts2);
/*
* Timed out wait is not restarted if
* it was interrupted, not worth to do it.
*/
if (ret == EINTR)
ret = ETIMEDOUT;
ret = __thr_umutex_timedlock(&m->m_lock, id, abstime);
}
done:
if (ret == 0)
ENQUEUE_MUTEX(curthread, m);
return (ret);
}
static inline int
mutex_lock_common(struct pthread *curthread, struct pthread_mutex *m,
const struct timespec * abstime)
const struct timespec *abstime)
{
uint32_t id;
int ret;
id = TID(curthread);
ret = _thr_umutex_trylock2(&m->m_lock, id);
if (ret == 0)
if (_thr_umutex_trylock2(&m->m_lock, TID(curthread)) == 0) {
ENQUEUE_MUTEX(curthread, m);
else
ret = mutex_lock_sleep(curthread, m, abstime);
return (ret);
return (0);
}
return (mutex_lock_sleep(curthread, m, abstime));
}
int
@ -450,6 +447,7 @@ __pthread_mutex_lock(pthread_mutex_t *mutex)
return (ret);
m = *mutex;
}
return (mutex_lock_common(curthread, m, NULL));
}

View File

@ -48,25 +48,74 @@ _thr_umutex_init(struct umutex *mtx)
}
int
__thr_umutex_lock(struct umutex *mtx)
__thr_umutex_lock(struct umutex *mtx, uint32_t id)
{
return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
uint32_t owner;
if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
for (;;) {
/* wait in kernel */
_umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, 0);
owner = mtx->m_owner;
if ((owner & ~UMUTEX_CONTESTED) == 0 &&
atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
return (0);
}
}
return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, 0);
}
int
__thr_umutex_timedlock(struct umutex *mtx,
const struct timespec *timeout)
__thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
const struct timespec *ets)
{
if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
timeout->tv_nsec <= 0)))
struct timespec timo, cts;
uint32_t owner;
int ret;
clock_gettime(CLOCK_REALTIME, &cts);
TIMESPEC_SUB(&timo, ets, &cts);
if (timo.tv_sec < 0)
return (ETIMEDOUT);
return _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0,
__DECONST(void *, timeout));
for (;;) {
if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
/* wait in kernel */
ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_WAIT, 0, 0, &timo);
/* now try to lock it */
owner = mtx->m_owner;
if ((owner & ~UMUTEX_CONTESTED) == 0 &&
atomic_cmpset_acq_32(&mtx->m_owner, owner, id|owner))
return (0);
} else {
ret = _umtx_op_err(mtx, UMTX_OP_MUTEX_LOCK, 0, 0, &timo);
if (ret == 0)
break;
}
if (ret == ETIMEDOUT)
break;
clock_gettime(CLOCK_REALTIME, &cts);
TIMESPEC_SUB(&timo, ets, &cts);
if (timo.tv_sec < 0 || (timo.tv_sec == 0 && timo.tv_nsec == 0)) {
ret = ETIMEDOUT;
break;
}
}
return (ret);
}
int
__thr_umutex_unlock(struct umutex *mtx)
__thr_umutex_unlock(struct umutex *mtx, uint32_t id)
{
if ((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0) {
atomic_cmpset_rel_32(&mtx->m_owner, id | UMUTEX_CONTESTED, UMUTEX_CONTESTED);
return _umtx_op_err(mtx, UMTX_OP_MUTEX_WAKE, 0, 0, 0);
}
return _umtx_op_err(mtx, UMTX_OP_MUTEX_UNLOCK, 0, 0, 0);
}
@ -123,7 +172,8 @@ _thr_ucond_wait(struct ucond *cv, struct umutex *m,
{
if (timeout && (timeout->tv_sec < 0 || (timeout->tv_sec == 0 &&
timeout->tv_nsec <= 0))) {
__thr_umutex_unlock(m);
struct pthread *curthread = _get_curthread();
_thr_umutex_unlock(m, TID(curthread));
return (ETIMEDOUT);
}
return _umtx_op_err(cv, UMTX_OP_CV_WAIT,

View File

@ -34,10 +34,10 @@
#define DEFAULT_UMUTEX {0,0, {0,0},{0,0,0,0}}
int __thr_umutex_lock(struct umutex *mtx) __hidden;
int __thr_umutex_timedlock(struct umutex *mtx,
int __thr_umutex_lock(struct umutex *mtx, uint32_t id) __hidden;
int __thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
const struct timespec *timeout) __hidden;
int __thr_umutex_unlock(struct umutex *mtx) __hidden;
int __thr_umutex_unlock(struct umutex *mtx, uint32_t id) __hidden;
int __thr_umutex_trylock(struct umutex *mtx) __hidden;
int __thr_umutex_set_ceiling(struct umutex *mtx, uint32_t ceiling,
uint32_t *oldceiling) __hidden;
@ -71,26 +71,30 @@ _thr_umutex_trylock(struct umutex *mtx, uint32_t id)
static inline int
_thr_umutex_trylock2(struct umutex *mtx, uint32_t id)
{
if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id))
if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id) != 0)
return (0);
if ((uint32_t)mtx->m_owner == UMUTEX_CONTESTED &&
__predict_true((mtx->m_flags & (UMUTEX_PRIO_PROTECT | UMUTEX_PRIO_INHERIT)) == 0))
if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_CONTESTED, id | UMUTEX_CONTESTED))
return (0);
return (EBUSY);
}
static inline int
_thr_umutex_lock(struct umutex *mtx, uint32_t id)
{
if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id))
if (_thr_umutex_trylock2(mtx, id) == 0)
return (0);
return (__thr_umutex_lock(mtx));
return (__thr_umutex_lock(mtx, id));
}
static inline int
_thr_umutex_timedlock(struct umutex *mtx, uint32_t id,
const struct timespec *timeout)
{
if (atomic_cmpset_acq_32(&mtx->m_owner, UMUTEX_UNOWNED, id))
if (_thr_umutex_trylock2(mtx, id) == 0)
return (0);
return (__thr_umutex_timedlock(mtx, timeout));
return (__thr_umutex_timedlock(mtx, id, timeout));
}
static inline int
@ -98,7 +102,7 @@ _thr_umutex_unlock(struct umutex *mtx, uint32_t id)
{
if (atomic_cmpset_rel_32(&mtx->m_owner, id, UMUTEX_UNOWNED))
return (0);
return (__thr_umutex_unlock(mtx));
return (__thr_umutex_unlock(mtx, id));
}
static inline int

View File

@ -66,6 +66,9 @@ __FBSDID("$FreeBSD$");
#define TYPE_PP_UMUTEX 5
#define TYPE_RWLOCK 6
#define _UMUTEX_TRY 1
#define _UMUTEX_WAIT 2
/* Key to represent a unique userland synchronous object */
struct umtx_key {
int hash;
@ -1037,7 +1040,7 @@ kern_umtx_wake(struct thread *td, void *uaddr, int n_wake, int is_private)
*/
static int
_do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
int try)
int mode)
{
struct umtx_q *uq;
uint32_t owner, old, id;
@ -1051,40 +1054,46 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
* can fault on any access.
*/
for (;;) {
/*
* Try the uncontested case. This should be done in userland.
*/
owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
owner = fuword32(__DEVOLATILE(void *, &m->m_owner));
if (mode == _UMUTEX_WAIT) {
if (owner == UMUTEX_UNOWNED || owner == UMUTEX_CONTESTED)
return (0);
} else {
/*
* Try the uncontested case. This should be done in userland.
*/
owner = casuword32(&m->m_owner, UMUTEX_UNOWNED, id);
/* The acquire succeeded. */
if (owner == UMUTEX_UNOWNED)
return (0);
/* The address was invalid. */
if (owner == -1)
return (EFAULT);
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMUTEX_CONTESTED) {
owner = casuword32(&m->m_owner,
UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
if (owner == UMUTEX_CONTESTED)
/* The acquire succeeded. */
if (owner == UMUTEX_UNOWNED)
return (0);
/* The address was invalid. */
if (owner == -1)
return (EFAULT);
/* If this failed the lock has changed, restart. */
continue;
/* If no one owns it but it is contested try to acquire it. */
if (owner == UMUTEX_CONTESTED) {
owner = casuword32(&m->m_owner,
UMUTEX_CONTESTED, id | UMUTEX_CONTESTED);
if (owner == UMUTEX_CONTESTED)
return (0);
/* The address was invalid. */
if (owner == -1)
return (EFAULT);
/* If this failed the lock has changed, restart. */
continue;
}
}
if ((flags & UMUTEX_ERROR_CHECK) != 0 &&
(owner & ~UMUTEX_CONTESTED) == id)
return (EDEADLK);
if (try != 0)
if (mode == _UMUTEX_TRY)
return (EBUSY);
/*
@ -1101,7 +1110,6 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
umtxq_lock(&uq->uq_key);
umtxq_busy(&uq->uq_key);
umtxq_insert(uq);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
/*
@ -1116,6 +1124,7 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
if (old == -1) {
umtxq_lock(&uq->uq_key);
umtxq_remove(uq);
umtxq_unbusy(&uq->uq_key);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (EFAULT);
@ -1127,6 +1136,7 @@ _do_lock_normal(struct thread *td, struct umutex *m, uint32_t flags, int timo,
* unlocking the umtx.
*/
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
if (old == owner)
error = umtxq_sleep(uq, "umtxn", timo);
umtxq_remove(uq);
@ -1162,7 +1172,6 @@ do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
if ((owner & ~UMUTEX_CONTESTED) != id)
return (EPERM);
/* This should be done in userland */
if ((owner & UMUTEX_CONTESTED) == 0) {
old = casuword32(&m->m_owner, owner, UMUTEX_UNOWNED);
if (old == -1)
@ -1201,6 +1210,50 @@ do_unlock_normal(struct thread *td, struct umutex *m, uint32_t flags)
return (0);
}
/*
* Check if the mutex is available and wake up a waiter,
* only for simple mutex.
*/
static int
do_wake_umutex(struct thread *td, struct umutex *m)
{
struct umtx_key key;
uint32_t owner;
uint32_t flags;
int error;
int count;
owner = fuword32(__DEVOLATILE(uint32_t *, &m->m_owner));
if (owner == -1)
return (EFAULT);
if ((owner & ~UMUTEX_CONTESTED) != 0)
return (0);
flags = fuword32(&m->m_flags);
/* We should only ever be in here for contested locks */
if ((error = umtx_key_get(m, TYPE_NORMAL_UMUTEX, GET_SHARE(flags),
&key)) != 0)
return (error);
umtxq_lock(&key);
umtxq_busy(&key);
count = umtxq_count(&key);
umtxq_unlock(&key);
if (count <= 1)
owner = casuword32(&m->m_owner, UMUTEX_CONTESTED, UMUTEX_UNOWNED);
umtxq_lock(&key);
if (count != 0 && (owner & ~UMUTEX_CONTESTED) == 0)
umtxq_signal(&key, 1);
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
return (0);
}
static inline struct umtx_pi *
umtx_pi_alloc(int flags)
{
@ -2144,15 +2197,15 @@ do_set_ceiling(struct thread *td, struct umutex *m, uint32_t ceiling,
static int
_do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
int try)
int mode)
{
switch(flags & (UMUTEX_PRIO_INHERIT | UMUTEX_PRIO_PROTECT)) {
case 0:
return (_do_lock_normal(td, m, flags, timo, try));
return (_do_lock_normal(td, m, flags, timo, mode));
case UMUTEX_PRIO_INHERIT:
return (_do_lock_pi(td, m, flags, timo, try));
return (_do_lock_pi(td, m, flags, timo, mode));
case UMUTEX_PRIO_PROTECT:
return (_do_lock_pp(td, m, flags, timo, try));
return (_do_lock_pp(td, m, flags, timo, mode));
}
return (EINVAL);
}
@ -2162,7 +2215,7 @@ _do_lock_umutex(struct thread *td, struct umutex *m, int flags, int timo,
*/
static int
do_lock_umutex(struct thread *td, struct umutex *m,
struct timespec *timeout, int try)
struct timespec *timeout, int mode)
{
struct timespec ts, ts2, ts3;
struct timeval tv;
@ -2174,16 +2227,16 @@ do_lock_umutex(struct thread *td, struct umutex *m,
return (EFAULT);
if (timeout == NULL) {
error = _do_lock_umutex(td, m, flags, 0, try);
error = _do_lock_umutex(td, m, flags, 0, mode);
/* Mutex locking is restarted if it is interrupted. */
if (error == EINTR)
if (error == EINTR && mode != _UMUTEX_WAIT)
error = ERESTART;
} else {
getnanouptime(&ts);
timespecadd(&ts, timeout);
TIMESPEC_TO_TIMEVAL(&tv, timeout);
for (;;) {
error = _do_lock_umutex(td, m, flags, tvtohz(&tv), try);
error = _do_lock_umutex(td, m, flags, tvtohz(&tv), mode);
if (error != ETIMEDOUT)
break;
getnanouptime(&ts2);
@ -2830,7 +2883,36 @@ __umtx_op_lock_umutex(struct thread *td, struct _umtx_op_args *uap)
static int
__umtx_op_trylock_umutex(struct thread *td, struct _umtx_op_args *uap)
{
return do_lock_umutex(td, uap->obj, NULL, 1);
return do_lock_umutex(td, uap->obj, NULL, _UMUTEX_TRY);
}
static int
__umtx_op_wait_umutex(struct thread *td, struct _umtx_op_args *uap)
{
struct timespec *ts, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
ts = NULL;
else {
error = copyin(uap->uaddr2, &timeout,
sizeof(timeout));
if (error != 0)
return (error);
if (timeout.tv_nsec >= 1000000000 ||
timeout.tv_nsec < 0) {
return (EINVAL);
}
ts = &timeout;
}
return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
}
static int
__umtx_op_wake_umutex(struct thread *td, struct _umtx_op_args *uap)
{
return do_wake_umutex(td, uap->obj);
}
static int
@ -2952,7 +3034,9 @@ static _umtx_op_func op_table[] = {
__umtx_op_rw_wrlock, /* UMTX_OP_RW_WRLOCK */
__umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
__umtx_op_wait_uint_private, /* UMTX_OP_WAIT_UINT_PRIVATE */
__umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wait_umutex, /* UMTX_OP_UMUTEX_WAIT */
__umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
};
int
@ -3066,6 +3150,27 @@ __umtx_op_lock_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
return do_lock_umutex(td, uap->obj, ts, 0);
}
static int
__umtx_op_wait_umutex_compat32(struct thread *td, struct _umtx_op_args *uap)
{
struct timespec *ts, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
ts = NULL;
else {
error = copyin_timeout32(uap->uaddr2, &timeout);
if (error != 0)
return (error);
if (timeout.tv_nsec >= 1000000000 ||
timeout.tv_nsec < 0)
return (EINVAL);
ts = &timeout;
}
return do_lock_umutex(td, uap->obj, ts, _UMUTEX_WAIT);
}
static int
__umtx_op_cv_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
{
@ -3170,7 +3275,9 @@ static _umtx_op_func op_table_compat32[] = {
__umtx_op_rw_wrlock_compat32, /* UMTX_OP_RW_WRLOCK */
__umtx_op_rw_unlock, /* UMTX_OP_RW_UNLOCK */
__umtx_op_wait_uint_private_compat32, /* UMTX_OP_WAIT_UINT_PRIVATE */
__umtx_op_wake_private /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wait_umutex_compat32, /* UMTX_OP_UMUTEX_WAIT */
__umtx_op_wake_umutex /* UMTX_OP_UMUTEX_WAKE */
};
int

View File

@ -101,7 +101,9 @@ struct urwlock {
#define UMTX_OP_RW_UNLOCK 14
#define UMTX_OP_WAIT_UINT_PRIVATE 15
#define UMTX_OP_WAKE_PRIVATE 16
#define UMTX_OP_MAX 17
#define UMTX_OP_MUTEX_WAIT 17
#define UMTX_OP_MUTEX_WAKE 18
#define UMTX_OP_MAX 19
/* flags for UMTX_OP_CV_WAIT */
#define UMTX_CHECK_UNPARKING 0x01