From b584eb2e906be33d9126e463220d376cb6892cf0 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Wed, 22 Nov 2017 22:04:04 +0000 Subject: [PATCH] locks: pass the found lock value to unlock slow path This avoids an explicit read later. While here whack the cheaply obtainable 'tid' argument. --- sys/kern/kern_mutex.c | 15 +++++++++------ sys/kern/kern_rwlock.c | 9 ++++++--- sys/kern/kern_sx.c | 17 ++++++++++------- sys/sys/mutex.h | 23 +++++++++++++---------- sys/sys/rwlock.h | 9 ++++++--- sys/sys/sx.h | 8 ++++---- 6 files changed, 48 insertions(+), 33 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 7b6b3e0549b3..8e51e6a45f9b 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -277,7 +277,7 @@ __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) mtx_assert(m, MA_OWNED); #ifdef LOCK_PROFILING - __mtx_unlock_sleep(c, opts, file, line); + __mtx_unlock_sleep(c, (uintptr_t)curthread, opts, file, line); #else __mtx_unlock(m, curthread, opts, file, line); #endif @@ -1002,24 +1002,27 @@ thread_lock_set(struct thread *td, struct mtx *new) */ #if LOCK_DEBUG > 0 void -__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) +__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, + const char *file, int line) #else void -__mtx_unlock_sleep(volatile uintptr_t *c) +__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v) #endif { struct mtx *m; struct turnstile *ts; - uintptr_t tid, v; + uintptr_t tid; if (SCHEDULER_STOPPED()) return; tid = (uintptr_t)curthread; m = mtxlock2mtx(c); - v = MTX_READ_VALUE(m); - if (v & MTX_RECURSED) { + if (__predict_false(v == tid)) + v = MTX_READ_VALUE(m); + + if (__predict_false(v & MTX_RECURSED)) { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index 35c6b44e4821..d519c5369b03 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -1078,18 +1078,21 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) * on this lock. */ void -__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid LOCK_FILE_LINE_ARG_DEF) +__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF) { struct rwlock *rw; struct turnstile *ts; - uintptr_t v, setv; + uintptr_t tid, setv; int queue; + tid = (uintptr_t)curthread; if (SCHEDULER_STOPPED()) return; rw = rwlock2rw(c); - v = RW_READ_VALUE(rw); + if (__predict_false(v == tid)) + v = RW_READ_VALUE(rw); + if (v & RW_LOCK_WRITER_RECURSED) { if (--(rw->rw_recurse) == 0) atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED); diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index 36481c66d0ec..d54141a3a83d 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -799,18 +799,22 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF) * accessible from at least sx.h. */ void -_sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF) +_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF) { - uintptr_t x, setx; + uintptr_t tid, setx; int queue, wakeup_swapper; if (SCHEDULER_STOPPED()) return; - MPASS(!(sx->sx_lock & SX_LOCK_SHARED)); + tid = (uintptr_t)curthread; - x = SX_READ_VALUE(sx); - if (x & SX_LOCK_RECURSED) { + if (__predict_false(x == tid)) + x = SX_READ_VALUE(sx); + + MPASS(!(x & SX_LOCK_SHARED)); + + if (__predict_false(x & SX_LOCK_RECURSED)) { /* The lock is recursed, unrecurse one level. */ if ((--sx->sx_recurse) == 0) atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED); @@ -824,8 +828,7 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF) atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)) return; - MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS | - SX_LOCK_EXCLUSIVE_WAITERS)); + MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS)); if (LOCK_LOG_TEST(&sx->lock_object, 0)) CTR2(KTR_LOCK, "%s: %p contested", __func__, sx); diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index 9d20bbdbef9d..a5a5b32fde6d 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -98,11 +98,11 @@ void mutex_init(void); #if LOCK_DEBUG > 0 void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, const char *file, int line); -void __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, - int line); +void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts, + const char *file, int line); #else void __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v); -void __mtx_unlock_sleep(volatile uintptr_t *c); +void __mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v); #endif #ifdef SMP @@ -164,13 +164,13 @@ void _thread_lock(struct thread *); #if LOCK_DEBUG > 0 #define _mtx_lock_sleep(m, v, o, f, l) \ __mtx_lock_sleep(&(m)->mtx_lock, v, o, f, l) -#define _mtx_unlock_sleep(m, o, f, l) \ - __mtx_unlock_sleep(&(m)->mtx_lock, o, f, l) +#define _mtx_unlock_sleep(m, v, o, f, l) \ + __mtx_unlock_sleep(&(m)->mtx_lock, v, o, f, l) #else #define _mtx_lock_sleep(m, v, o, f, l) \ __mtx_lock_sleep(&(m)->mtx_lock, v) -#define _mtx_unlock_sleep(m, o, f, l) \ - __mtx_unlock_sleep(&(m)->mtx_lock) +#define _mtx_unlock_sleep(m, v, o, f, l) \ + __mtx_unlock_sleep(&(m)->mtx_lock, v) #endif #ifdef SMP #if LOCK_DEBUG > 0 @@ -215,6 +215,9 @@ void _thread_lock(struct thread *); #define _mtx_release_lock_quick(mp) \ atomic_store_rel_ptr(&(mp)->mtx_lock, MTX_UNOWNED) +#define _mtx_release_lock_fetch(mp, vp) \ + atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, (vp), MTX_UNOWNED) + /* * Full lock operations that are suitable to be inlined in non-debug * kernels. If the lock cannot be acquired or released trivially then @@ -292,11 +295,11 @@ void _thread_lock(struct thread *); /* Unlock a normal mutex. */ #define __mtx_unlock(mp, tid, opts, file, line) do { \ - uintptr_t _tid = (uintptr_t)(tid); \ + uintptr_t _v = (uintptr_t)(tid); \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\ - !_mtx_release_lock((mp), _tid))) \ - _mtx_unlock_sleep((mp), (opts), (file), (line)); \ + !_mtx_release_lock_fetch((mp), &_v))) \ + _mtx_unlock_sleep((mp), _v, (opts), (file), (line)); \ } while (0) /* diff --git a/sys/sys/rwlock.h b/sys/sys/rwlock.h index 180773cdadeb..2bf31ec6d021 100644 --- a/sys/sys/rwlock.h +++ b/sys/sys/rwlock.h @@ -92,6 +92,9 @@ #define _rw_write_unlock(rw, tid) \ atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) +#define _rw_write_unlock_fetch(rw, tid) \ + atomic_fcmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED) + /* * Full lock operations that are suitable to be inlined in non-debug * kernels. If the lock cannot be acquired or released trivially then @@ -110,11 +113,11 @@ /* Release a write lock. */ #define __rw_wunlock(rw, tid, file, line) do { \ - uintptr_t _tid = (uintptr_t)(tid); \ + uintptr_t _v = (uintptr_t)(tid); \ \ if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) || \ - !_rw_write_unlock((rw), _tid))) \ - _rw_wunlock_hard((rw), _tid, (file), (line)); \ + !_rw_write_unlock_fetch((rw), &_v))) \ + _rw_wunlock_hard((rw), _v, (file), (line)); \ } while (0) /* diff --git a/sys/sys/sx.h b/sys/sys/sx.h index 8f56bba24a2a..3550ce664af0 100644 --- a/sys/sys/sx.h +++ b/sys/sys/sx.h @@ -116,7 +116,7 @@ void _sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF); void _sx_sunlock(struct sx *sx, const char *file, int line); void _sx_xunlock(struct sx *sx, const char *file, int line); int _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF); -void _sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF); +void _sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF); #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT) void _sx_assert(const struct sx *sx, int what, const char *file, int line); #endif @@ -170,11 +170,11 @@ __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file, static __inline void __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line) { - uintptr_t tid = (uintptr_t)td; + uintptr_t x = (uintptr_t)td; if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) || - !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))) - _sx_xunlock_hard(sx, tid); + !atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, SX_LOCK_UNLOCKED))) + _sx_xunlock_hard(sx, x); } #endif