The current POSIX semaphore implementation stores the _has_waiters flag

in a separate word from the _count.  This does not permit both items to
be updated atomically in a portable manner.  As a result, sem_post()
must always perform a system call to safely clear _has_waiters.

This change removes the _has_waiters field and instead uses the high bit
of _count as the _has_waiters flag.  A new umtx object type (_usem2) and
two new umtx operations are added (SEM_WAIT2 and SEM_WAKE2) to implement
these semantics.  The older operations are still supported under the
COMPAT_FREEBSD9/10 options.  The POSIX semaphore API in libc has
been updated to use the new implementation.  Note that the new
implementation is not compatible with the previous implementation.
However, this only affects static binaries (which cannot be helped by
symbol versioning).  Binaries using a dynamic libc will continue to work
fine.  SEM_MAGIC has been bumped so that mismatched binaries will error
rather than corrupting a shared semaphore.  In addition, a padding field
has been added to sem_t so that it remains the same size.

Differential Revision:	https://reviews.freebsd.org/D961
Reported by:	adrian
Reviewed by:	kib, jilles (earlier version)
Sponsored by:	Norse
This commit is contained in:
John Baldwin 2014-10-24 20:02:44 +00:00
parent 7d313e7bdb
commit 53e1ffbbce
6 changed files with 212 additions and 21 deletions

View File

@ -38,7 +38,8 @@
struct _sem {
__uint32_t _magic;
struct _usem _kern;
struct _usem2 _kern;
__uint32_t _padding; /* Preserve structure size */
};
typedef struct _sem sem_t;

View File

@ -61,7 +61,9 @@ __weak_reference(_sem_unlink, sem_unlink);
__weak_reference(_sem_wait, sem_wait);
#define SEM_PREFIX "/tmp/SEMD"
#define SEM_MAGIC ((u_int32_t)0x73656d31)
#define SEM_MAGIC ((u_int32_t)0x73656d32)
_Static_assert(SEM_VALUE_MAX <= USEM_MAX_COUNT, "SEM_VALUE_MAX too large");
struct sem_nameinfo {
int open_count;
@ -131,7 +133,6 @@ _sem_init(sem_t *sem, int pshared, unsigned int value)
bzero(sem, sizeof(sem_t));
sem->_magic = SEM_MAGIC;
sem->_kern._count = (u_int32_t)value;
sem->_kern._has_waiters = 0;
sem->_kern._flags = pshared ? USYNC_PROCESS_SHARED : 0;
return (0);
}
@ -212,7 +213,6 @@ _sem_open(const char *name, int flags, ...)
sem_t tmp;
tmp._magic = SEM_MAGIC;
tmp._kern._has_waiters = 0;
tmp._kern._count = value;
tmp._kern._flags = USYNC_PROCESS_SHARED | SEM_NAMED;
if (_write(fd, &tmp, sizeof(tmp)) != sizeof(tmp))
@ -332,18 +332,18 @@ _sem_getvalue(sem_t * __restrict sem, int * __restrict sval)
if (sem_check_validity(sem) != 0)
return (-1);
*sval = (int)sem->_kern._count;
*sval = (int)USEM_COUNT(sem->_kern._count);
return (0);
}
static __inline int
usem_wake(struct _usem *sem)
usem_wake(struct _usem2 *sem)
{
return _umtx_op(sem, UMTX_OP_SEM_WAKE, 0, NULL, NULL);
return _umtx_op(sem, UMTX_OP_SEM2_WAKE, 0, NULL, NULL);
}
static __inline int
usem_wait(struct _usem *sem, const struct timespec *abstime)
usem_wait(struct _usem2 *sem, const struct timespec *abstime)
{
struct _umtx_time *tm_p, timeout;
size_t tm_size;
@ -358,7 +358,7 @@ usem_wait(struct _usem *sem, const struct timespec *abstime)
tm_p = &timeout;
tm_size = sizeof(timeout);
}
return _umtx_op(sem, UMTX_OP_SEM_WAIT, 0,
return _umtx_op(sem, UMTX_OP_SEM2_WAIT, 0,
(void *)tm_size, __DECONST(void*, tm_p));
}
@ -370,7 +370,7 @@ _sem_trywait(sem_t *sem)
if (sem_check_validity(sem) != 0)
return (-1);
while ((val = sem->_kern._count) > 0) {
while (USEM_COUNT(val = sem->_kern._count) > 0) {
if (atomic_cmpset_acq_int(&sem->_kern._count, val, val - 1))
return (0);
}
@ -390,7 +390,7 @@ _sem_timedwait(sem_t * __restrict sem,
retval = 0;
_pthread_testcancel();
for (;;) {
while ((val = sem->_kern._count) > 0) {
while (USEM_COUNT(val = sem->_kern._count) > 0) {
if (atomic_cmpset_acq_int(&sem->_kern._count, val, val - 1))
return (0);
}
@ -439,9 +439,10 @@ _sem_post(sem_t *sem)
do {
count = sem->_kern._count;
if (count + 1 > SEM_VALUE_MAX)
if (USEM_COUNT(count) + 1 > SEM_VALUE_MAX)
return (EOVERFLOW);
} while(!atomic_cmpset_rel_int(&sem->_kern._count, count, count+1));
(void)usem_wake(&sem->_kern);
} while (!atomic_cmpset_rel_int(&sem->_kern._count, count, count + 1));
if (count & USEM_HAS_WAITERS)
usem_wake(&sem->_kern);
return (0);
}

View File

@ -2710,6 +2710,7 @@ do_rw_unlock(struct thread *td, struct urwlock *rwlock)
return (error);
}
#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
static int
do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
{
@ -2731,7 +2732,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
umtxq_busy(&uq->uq_key);
umtxq_insert(uq);
umtxq_unlock(&uq->uq_key);
casuword32(__DEVOLATILE(uint32_t *, &sem->_has_waiters), 0, 1);
casuword32(&sem->_has_waiters, 0, 1);
count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
if (count != 0) {
umtxq_lock(&uq->uq_key);
@ -2761,7 +2762,7 @@ do_sem_wait(struct thread *td, struct _usem *sem, struct _umtx_time *timeout)
}
/*
* Signal a userland condition variable.
* Signal a userland semaphore.
*/
static int
do_sem_wake(struct thread *td, struct _usem *sem)
@ -2795,6 +2796,119 @@ do_sem_wake(struct thread *td, struct _usem *sem)
umtx_key_release(&key);
return (error);
}
#endif
static int
do_sem2_wait(struct thread *td, struct _usem2 *sem, struct _umtx_time *timeout)
{
struct abs_timeout timo;
struct umtx_q *uq;
uint32_t count, flags;
int error;
uq = td->td_umtxq;
flags = fuword32(&sem->_flags);
error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &uq->uq_key);
if (error != 0)
return (error);
if (timeout != NULL)
abs_timeout_init2(&timo, timeout);
umtxq_lock(&uq->uq_key);
umtxq_busy(&uq->uq_key);
umtxq_insert(uq);
umtxq_unlock(&uq->uq_key);
count = fuword32(__DEVOLATILE(uint32_t *, &sem->_count));
if (count == -1) {
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (EFAULT);
}
for (;;) {
if (USEM_COUNT(count) != 0) {
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (0);
}
if (count == USEM_HAS_WAITERS)
break;
count = casuword32(&sem->_count, 0, USEM_HAS_WAITERS);
if (count == -1) {
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
umtxq_remove(uq);
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (EFAULT);
}
if (count == 0)
break;
}
umtxq_lock(&uq->uq_key);
umtxq_unbusy(&uq->uq_key);
error = umtxq_sleep(uq, "usem", timeout == NULL ? NULL : &timo);
if ((uq->uq_flags & UQF_UMTXQ) == 0)
error = 0;
else {
umtxq_remove(uq);
/* A relative timeout cannot be restarted. */
if (error == ERESTART && timeout != NULL &&
(timeout->_flags & UMTX_ABSTIME) == 0)
error = EINTR;
}
umtxq_unlock(&uq->uq_key);
umtx_key_release(&uq->uq_key);
return (error);
}
/*
* Signal a userland semaphore.
*/
static int
do_sem2_wake(struct thread *td, struct _usem2 *sem)
{
struct umtx_key key;
int error, cnt;
uint32_t count, flags;
flags = fuword32(&sem->_flags);
if ((error = umtx_key_get(sem, TYPE_SEM, GET_SHARE(flags), &key)) != 0)
return (error);
umtxq_lock(&key);
umtxq_busy(&key);
cnt = umtxq_count(&key);
if (cnt > 0) {
umtxq_signal(&key, 1);
/*
* If this was the last sleeping thread, clear the waiters
* flag in _count.
*/
if (cnt == 1) {
umtxq_unlock(&key);
count = fuword32(&sem->_count);
while (count != -1 && count & USEM_HAS_WAITERS)
count = casuword32(&sem->_count, count,
count & ~USEM_HAS_WAITERS);
if (count == -1)
error = EFAULT;
umtxq_lock(&key);
}
}
umtxq_unbusy(&key);
umtxq_unlock(&key);
umtx_key_release(&key);
return (error);
}
inline int
umtx_copyin_timeout(const void *addr, struct timespec *tsp)
@ -3066,6 +3180,7 @@ __umtx_op_rw_unlock(struct thread *td, struct _umtx_op_args *uap)
return do_rw_unlock(td, uap->obj);
}
#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
static int
__umtx_op_sem_wait(struct thread *td, struct _umtx_op_args *uap)
{
@ -3090,6 +3205,7 @@ __umtx_op_sem_wake(struct thread *td, struct _umtx_op_args *uap)
{
return do_sem_wake(td, uap->obj);
}
#endif
static int
__umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
@ -3097,6 +3213,31 @@ __umtx_op_wake2_umutex(struct thread *td, struct _umtx_op_args *uap)
return do_wake2_umutex(td, uap->obj, uap->val);
}
static int
__umtx_op_sem2_wait(struct thread *td, struct _umtx_op_args *uap)
{
struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
tm_p = NULL;
else {
error = umtx_copyin_umtx_time(
uap->uaddr2, (size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
tm_p = &timeout;
}
return (do_sem2_wait(td, uap->obj, tm_p));
}
static int
__umtx_op_sem2_wake(struct thread *td, struct _umtx_op_args *uap)
{
return do_sem2_wake(td, uap->obj);
}
typedef int (*_umtx_op_func)(struct thread *td, struct _umtx_op_args *uap);
static _umtx_op_func op_table[] = {
@ -3119,10 +3260,17 @@ static _umtx_op_func op_table[] = {
__umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wait_umutex, /* UMTX_OP_MUTEX_WAIT */
__umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */
#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
__umtx_op_sem_wait, /* UMTX_OP_SEM_WAIT */
__umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
#else
__umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */
__umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */
#endif
__umtx_op_nwake_private, /* UMTX_OP_NWAKE_PRIVATE */
__umtx_op_wake2_umutex /* UMTX_OP_MUTEX_WAKE2 */
__umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */
__umtx_op_sem2_wait, /* UMTX_OP_SEM2_WAIT */
__umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */
};
int
@ -3320,6 +3468,7 @@ __umtx_op_wait_uint_private_compat32(struct thread *td, struct _umtx_op_args *ua
return do_wait(td, uap->obj, uap->val, tm_p, 1, 1);
}
#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
static int
__umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
{
@ -3338,6 +3487,26 @@ __umtx_op_sem_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
}
return (do_sem_wait(td, uap->obj, tm_p));
}
#endif
static int
__umtx_op_sem2_wait_compat32(struct thread *td, struct _umtx_op_args *uap)
{
struct _umtx_time *tm_p, timeout;
int error;
/* Allow a null timespec (wait forever). */
if (uap->uaddr2 == NULL)
tm_p = NULL;
else {
error = umtx_copyin_umtx_time32(uap->uaddr2,
(size_t)uap->uaddr1, &timeout);
if (error != 0)
return (error);
tm_p = &timeout;
}
return (do_sem2_wait(td, uap->obj, tm_p));
}
static int
__umtx_op_nwake_private32(struct thread *td, struct _umtx_op_args *uap)
@ -3385,10 +3554,17 @@ static _umtx_op_func op_table_compat32[] = {
__umtx_op_wake_private, /* UMTX_OP_WAKE_PRIVATE */
__umtx_op_wait_umutex_compat32, /* UMTX_OP_MUTEX_WAIT */
__umtx_op_wake_umutex, /* UMTX_OP_MUTEX_WAKE */
#if defined(COMPAT_FREEBSD9) || defined(COMPAT_FREEBSD10)
__umtx_op_sem_wait_compat32, /* UMTX_OP_SEM_WAIT */
__umtx_op_sem_wake, /* UMTX_OP_SEM_WAKE */
#else
__umtx_op_unimpl, /* UMTX_OP_SEM_WAIT */
__umtx_op_unimpl, /* UMTX_OP_SEM_WAKE */
#endif
__umtx_op_nwake_private32, /* UMTX_OP_NWAKE_PRIVATE */
__umtx_op_wake2_umutex /* UMTX_OP_MUTEX_WAKE2 */
__umtx_op_wake2_umutex, /* UMTX_OP_MUTEX_WAKE2 */
__umtx_op_sem2_wait_compat32, /* UMTX_OP_SEM2_WAIT */
__umtx_op_sem2_wake, /* UMTX_OP_SEM2_WAKE */
};
int

View File

@ -61,6 +61,11 @@ struct _usem {
__uint32_t _flags;
};
struct _usem2 {
volatile __uint32_t _count; /* Waiters flag in high bit. */
__uint32_t _flags;
};
struct _umtx_time {
struct timespec _timeout;
__uint32_t _flags;

View File

@ -52,6 +52,11 @@
/* _usem flags */
#define SEM_NAMED 0x0002
/* _usem2 count field */
#define USEM_HAS_WAITERS 0x80000000U
#define USEM_MAX_COUNT 0x7fffffffU
#define USEM_COUNT(c) ((c) & USEM_MAX_COUNT)
/* op code for _umtx_op */
#define UMTX_OP_RESERVED0 0
#define UMTX_OP_RESERVED1 1
@ -72,11 +77,13 @@
#define UMTX_OP_WAKE_PRIVATE 16
#define UMTX_OP_MUTEX_WAIT 17
#define UMTX_OP_MUTEX_WAKE 18 /* deprecated */
#define UMTX_OP_SEM_WAIT 19
#define UMTX_OP_SEM_WAKE 20
#define UMTX_OP_SEM_WAIT 19 /* deprecated */
#define UMTX_OP_SEM_WAKE 20 /* deprecated */
#define UMTX_OP_NWAKE_PRIVATE 21
#define UMTX_OP_MUTEX_WAKE2 22
#define UMTX_OP_MAX 23
#define UMTX_OP_SEM2_WAIT 23
#define UMTX_OP_SEM2_WAKE 24
#define UMTX_OP_MAX 25
/* Flags for UMTX_OP_CV_WAIT */
#define CVWAIT_CHECK_UNPARKING 0x01

View File

@ -424,6 +424,7 @@ static struct xlat umtx_ops[] = {
X(UMTX_OP_WAIT_UINT_PRIVATE) X(UMTX_OP_WAKE_PRIVATE)
X(UMTX_OP_MUTEX_WAIT) X(UMTX_OP_MUTEX_WAKE) X(UMTX_OP_SEM_WAIT)
X(UMTX_OP_SEM_WAKE) X(UMTX_OP_NWAKE_PRIVATE) X(UMTX_OP_MUTEX_WAKE2)
X(UMTX_OP_SEM2_WAIT) X(UMTX_OP_SEM2_WAKE)
XEND
};