sx: uninline slock/sunlock

Shared locking routines explicitly read the value and test it. If the
change attempt fails, they fall back to a regular function which would
retry in a loop.

The problem is that with many concurrent readers the risk of failure is pretty
high and even the value returned by fcmpset is very likely going to be stale
by the time the loop in the fallback routine is reached.

Uninline said primitives. It gives a throughput increase when doing concurrent
slocks/sunlocks with 80 hardware threads from ~50 mln/s to ~56 mln/s.

Interestingly, rwlock primitives are already not inlined.
This commit is contained in:
Mateusz Guzik 2017-02-05 05:20:29 +00:00
parent fa47404353
commit 9d2e4290ff
2 changed files with 9 additions and 101 deletions

View File

@ -275,29 +275,6 @@ sx_destroy(struct sx *sx)
lock_destroy(&sx->lock_object);
}
int
_sx_slock(struct sx *sx, int opts, const char *file, int line)
{
int error = 0;
if (SCHEDULER_STOPPED())
return (0);
KASSERT(kdb_active != 0 || !TD_IS_IDLETHREAD(curthread),
("sx_slock() by idle thread %p on sx %s @ %s:%d",
curthread, sx->lock_object.lo_name, file, line));
KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
("sx_slock() of destroyed sx @ %s:%d", file, line));
WITNESS_CHECKORDER(&sx->lock_object, LOP_NEWORDER, file, line, NULL);
error = __sx_slock(sx, opts, file, line);
if (!error) {
LOCK_LOG_LOCK("SLOCK", &sx->lock_object, 0, 0, file, line);
WITNESS_LOCK(&sx->lock_object, 0, file, line);
TD_LOCKS_INC(curthread);
}
return (error);
}
int
sx_try_slock_(struct sx *sx, const char *file, int line)
{
@ -390,21 +367,6 @@ sx_try_xlock_(struct sx *sx, const char *file, int line)
return (rval);
}
void
_sx_sunlock(struct sx *sx, const char *file, int line)
{
if (SCHEDULER_STOPPED())
return;
KASSERT(sx->sx_lock != SX_LOCK_DESTROYED,
("sx_sunlock() of destroyed sx @ %s:%d", file, line));
_sx_assert(sx, SA_SLOCKED, file, line);
WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
__sx_sunlock(sx, file, line);
TD_LOCKS_DEC(curthread);
}
void
_sx_xunlock(struct sx *sx, const char *file, int line)
{
@ -840,14 +802,8 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int line)
kick_proc0();
}
/*
* This function represents the so-called 'hard case' for sx_slock
* operation. All 'easy case' failures are redirected to this. Note
* that ideally this would be a static function, but it needs to be
* accessible from at least sx.h.
*/
int
_sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
_sx_slock(struct sx *sx, int opts, const char *file, int line)
{
GIANT_DECLARE;
#ifdef ADAPTIVE_SX
@ -1051,14 +1007,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
return (error);
}
/*
* This function represents the so-called 'hard case' for sx_sunlock
* operation. All 'easy case' failures are redirected to this. Note
* that ideally this would be a static function, but it needs to be
* accessible from at least sx.h.
*/
void
_sx_sunlock_hard(struct sx *sx, const char *file, int line)
_sx_sunlock(struct sx *sx, const char *file, int line)
{
uintptr_t x;
int wakeup_swapper;
@ -1066,6 +1016,7 @@ _sx_sunlock_hard(struct sx *sx, const char *file, int line)
if (SCHEDULER_STOPPED())
return;
LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
x = SX_READ_VALUE(sx);
for (;;) {
/*

View File

@ -111,10 +111,8 @@ void _sx_sunlock(struct sx *sx, const char *file, int line);
void _sx_xunlock(struct sx *sx, const char *file, int line);
int _sx_xlock_hard(struct sx *sx, uintptr_t v, uintptr_t tid, int opts,
const char *file, int line);
int _sx_slock_hard(struct sx *sx, int opts, const char *file, int line);
void _sx_xunlock_hard(struct sx *sx, uintptr_t tid, const char *file, int
line);
void _sx_sunlock_hard(struct sx *sx, const char *file, int line);
#if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
void _sx_assert(const struct sx *sx, int what, const char *file, int line);
#endif
@ -179,41 +177,6 @@ __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
_sx_xunlock_hard(sx, tid, file, line);
}
/* Acquire a shared lock. */
static __inline int
__sx_slock(struct sx *sx, int opts, const char *file, int line)
{
uintptr_t x = sx->sx_lock;
int error = 0;
if (!(x & SX_LOCK_SHARED) ||
!atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
error = _sx_slock_hard(sx, opts, file, line);
else
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
0, 0, file, line, LOCKSTAT_READER);
return (error);
}
/*
* Release a shared lock. We can just drop a single shared lock so
* long as we aren't trying to drop the last shared lock when other
* threads are waiting for an exclusive lock. This takes advantage of
* the fact that an unlocked lock is encoded as a shared lock with a
* count of 0.
*/
static __inline void
__sx_sunlock(struct sx *sx, const char *file, int line)
{
uintptr_t x = sx->sx_lock;
LOCKSTAT_PROFILE_RELEASE_RWLOCK(sx__release, sx, LOCKSTAT_READER);
if (x == (SX_SHARERS_LOCK(1) | SX_LOCK_EXCLUSIVE_WAITERS) ||
!atomic_cmpset_rel_ptr(&sx->sx_lock, x, x - SX_ONE_SHARER))
_sx_sunlock_hard(sx, file, line);
}
/*
* Public interface for lock operations.
*/
@ -227,12 +190,6 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
_sx_xlock((sx), SX_INTERRUPTIBLE, (file), (line))
#define sx_xunlock_(sx, file, line) \
_sx_xunlock((sx), (file), (line))
#define sx_slock_(sx, file, line) \
(void)_sx_slock((sx), 0, (file), (line))
#define sx_slock_sig_(sx, file, line) \
_sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
#define sx_sunlock_(sx, file, line) \
_sx_sunlock((sx), (file), (line))
#else
#define sx_xlock_(sx, file, line) \
(void)__sx_xlock((sx), curthread, 0, (file), (line))
@ -240,13 +197,13 @@ __sx_sunlock(struct sx *sx, const char *file, int line)
__sx_xlock((sx), curthread, SX_INTERRUPTIBLE, (file), (line))
#define sx_xunlock_(sx, file, line) \
__sx_xunlock((sx), curthread, (file), (line))
#define sx_slock_(sx, file, line) \
(void)__sx_slock((sx), 0, (file), (line))
#define sx_slock_sig_(sx, file, line) \
__sx_slock((sx), SX_INTERRUPTIBLE, (file), (line))
#define sx_sunlock_(sx, file, line) \
__sx_sunlock((sx), (file), (line))
#endif /* LOCK_DEBUG > 0 || SX_NOINLINE */
#define sx_slock_(sx, file, line) \
(void)_sx_slock((sx), 0, (file), (line))
#define sx_slock_sig_(sx, file, line) \
_sx_slock((sx), SX_INTERRUPTIBLE, (file) , (line))
#define sx_sunlock_(sx, file, line) \
_sx_sunlock((sx), (file), (line))
#define sx_try_slock(sx) sx_try_slock_((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_xlock(sx) sx_try_xlock_((sx), LOCK_FILE, LOCK_LINE)
#define sx_try_upgrade(sx) sx_try_upgrade_((sx), LOCK_FILE, LOCK_LINE)