locks: pass the found lock value to unlock slow path

This avoids an explicit read later. While here whack the cheaply obtainable 'tid' argument.
svn path=/head/; revision=326107
2017-11-22 22:04:04 +00:00 · 2017-11-22 22:04:04 +00:00 · b584eb2e90 · 2020-12-20 02:59:44 +00:00
commit b584eb2e90
parent 013c0b493f
6 changed files with 48 additions and 33 deletions
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@ -277,7 +277,7 @@ __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line)
 	mtx_assert(m, MA_OWNED);

 #ifdef LOCK_PROFILING
-	__mtx_unlock_sleep(c, opts, file, line);
+	__mtx_unlock_sleep(c, (uintptr_t)curthread, opts, file, line);
 #else
 	__mtx_unlock(m, curthread, opts, file, line);
 #endif
@ -1002,24 +1002,27 @@ thread_lock_set(struct thread *td, struct mtx *new)
 */
 #if LOCK_DEBUG > 0
 void
-__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line)
+__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
+    const char *file, int line)
 #else
 void
-__mtx_unlock_sleep(volatile uintptr_t *c)
+__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v)
 #endif
 {
 	struct mtx *m;
 	struct turnstile *ts;
-	uintptr_t tid, v;
+	uintptr_t tid;

 	if (SCHEDULER_STOPPED())
 		return;

 	tid = (uintptr_t)curthread;
 	m = mtxlock2mtx(c);
-	v = MTX_READ_VALUE(m);

-	if (v & MTX_RECURSED) {
+	if (__predict_false(v == tid))
+		v = MTX_READ_VALUE(m);
+
+	if (__predict_false(v & MTX_RECURSED)) {
 		if (--(m->mtx_recurse) == 0)
 			atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED);
 		if (LOCK_LOG_TEST(&m->lock_object, opts))
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@ -1078,18 +1078,21 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 * on this lock.
 */
 void
-__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t tid LOCK_FILE_LINE_ARG_DEF)
+__rw_wunlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 {
 	struct rwlock *rw;
 	struct turnstile *ts;
-	uintptr_t v, setv;
+	uintptr_t tid, setv;
 	int queue;

+	tid = (uintptr_t)curthread;
 	if (SCHEDULER_STOPPED())
 		return;

 	rw = rwlock2rw(c);
-	v = RW_READ_VALUE(rw);
+	if (__predict_false(v == tid))
+		v = RW_READ_VALUE(rw);
+
 	if (v & RW_LOCK_WRITER_RECURSED) {
 		if (--(rw->rw_recurse) == 0)
 			atomic_clear_ptr(&rw->rw_lock, RW_LOCK_WRITER_RECURSED);
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@ -799,18 +799,22 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
 * accessible from at least sx.h.
 */
 void
-_sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF)
+_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
 {
-	uintptr_t x, setx;
+	uintptr_t tid, setx;
 	int queue, wakeup_swapper;

 	if (SCHEDULER_STOPPED())
 		return;

-	MPASS(!(sx->sx_lock & SX_LOCK_SHARED));
+	tid = (uintptr_t)curthread;

-	x = SX_READ_VALUE(sx);
-	if (x & SX_LOCK_RECURSED) {
+	if (__predict_false(x == tid))
+		x = SX_READ_VALUE(sx);
+
+	MPASS(!(x & SX_LOCK_SHARED));
+
+	if (__predict_false(x & SX_LOCK_RECURSED)) {
 		/* The lock is recursed, unrecurse one level. */
 		if ((--sx->sx_recurse) == 0)
 			atomic_clear_ptr(&sx->sx_lock, SX_LOCK_RECURSED);
@ -824,8 +828,7 @@ _sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF)
 	    atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED))
 		return;

-	MPASS(sx->sx_lock & (SX_LOCK_SHARED_WAITERS |
-	    SX_LOCK_EXCLUSIVE_WAITERS));
+	MPASS(x & (SX_LOCK_SHARED_WAITERS | SX_LOCK_EXCLUSIVE_WAITERS));
 	if (LOCK_LOG_TEST(&sx->lock_object, 0))
 		CTR2(KTR_LOCK, "%s: %p contested", __func__, sx);

--- a/sys/sys/mutex.h
+++ b/sys/sys/mutex.h
@ -98,11 +98,11 @@ void	mutex_init(void);
 #if LOCK_DEBUG > 0
 void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
 	    const char *file, int line);
-void	__mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file,
-	    int line);
+void	__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v, int opts,
+	    const char *file, int line);
 #else
 void	__mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v);
-void	__mtx_unlock_sleep(volatile uintptr_t *c);
+void	__mtx_unlock_sleep(volatile uintptr_t *c, uintptr_t v);
 #endif

 #ifdef SMP
@ -164,13 +164,13 @@ void	_thread_lock(struct thread *);
 #if LOCK_DEBUG > 0
 #define	_mtx_lock_sleep(m, v, o, f, l)					\
 	__mtx_lock_sleep(&(m)->mtx_lock, v, o, f, l)
-#define	_mtx_unlock_sleep(m, o, f, l)					\
-	__mtx_unlock_sleep(&(m)->mtx_lock, o, f, l)
+#define	_mtx_unlock_sleep(m, v, o, f, l)				\
+	__mtx_unlock_sleep(&(m)->mtx_lock, v, o, f, l)
 #else
 #define	_mtx_lock_sleep(m, v, o, f, l)					\
 	__mtx_lock_sleep(&(m)->mtx_lock, v)
-#define	_mtx_unlock_sleep(m, o, f, l)					\
-	__mtx_unlock_sleep(&(m)->mtx_lock)
+#define	_mtx_unlock_sleep(m, v, o, f, l)				\
+	__mtx_unlock_sleep(&(m)->mtx_lock, v)
 #endif
 #ifdef SMP
 #if LOCK_DEBUG > 0
@ -215,6 +215,9 @@ void	_thread_lock(struct thread *);
 #define _mtx_release_lock_quick(mp)					\
 	atomic_store_rel_ptr(&(mp)->mtx_lock, MTX_UNOWNED)

+#define	_mtx_release_lock_fetch(mp, vp)					\
+	atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, (vp), MTX_UNOWNED)
+
 /*
 * Full lock operations that are suitable to be inlined in non-debug
 * kernels.  If the lock cannot be acquired or released trivially then
@ -292,11 +295,11 @@ void	_thread_lock(struct thread *);

 /* Unlock a normal mutex. */
 #define __mtx_unlock(mp, tid, opts, file, line) do {			\
-	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v = (uintptr_t)(tid);				\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\
-	    !_mtx_release_lock((mp), _tid)))				\
-		_mtx_unlock_sleep((mp), (opts), (file), (line));	\
+	    !_mtx_release_lock_fetch((mp), &_v)))			\
+		_mtx_unlock_sleep((mp), _v, (opts), (file), (line));	\
 } while (0)

 /*
--- a/sys/sys/rwlock.h
+++ b/sys/sys/rwlock.h
@ -92,6 +92,9 @@
 #define	_rw_write_unlock(rw, tid)					\
 	atomic_cmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)

+#define	_rw_write_unlock_fetch(rw, tid)					\
+	atomic_fcmpset_rel_ptr(&(rw)->rw_lock, (tid), RW_UNLOCKED)
+
 /*
 * Full lock operations that are suitable to be inlined in non-debug
 * kernels.  If the lock cannot be acquired or released trivially then
@ -110,11 +113,11 @@

 /* Release a write lock. */
 #define	__rw_wunlock(rw, tid, file, line) do {				\
-	uintptr_t _tid = (uintptr_t)(tid);				\
+	uintptr_t _v = (uintptr_t)(tid);				\
 									\
 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(rw__release) ||	\
-	    !_rw_write_unlock((rw), _tid)))				\
-		_rw_wunlock_hard((rw), _tid, (file), (line));		\
+	    !_rw_write_unlock_fetch((rw), &_v)))			\
+		_rw_wunlock_hard((rw), _v, (file), (line));		\
 } while (0)

 /*
--- a/sys/sys/sx.h
+++ b/sys/sys/sx.h
@ -116,7 +116,7 @@ void	_sx_sunlock_int(struct sx *sx LOCK_FILE_LINE_ARG_DEF);
 void	_sx_sunlock(struct sx *sx, const char *file, int line);
 void	_sx_xunlock(struct sx *sx, const char *file, int line);
 int	_sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF);
-void	_sx_xunlock_hard(struct sx *sx, uintptr_t tid LOCK_FILE_LINE_ARG_DEF);
+void	_sx_xunlock_hard(struct sx *sx, uintptr_t x LOCK_FILE_LINE_ARG_DEF);
 #if defined(INVARIANTS) || defined(INVARIANT_SUPPORT)
 void	_sx_assert(const struct sx *sx, int what, const char *file, int line);
 #endif
@ -170,11 +170,11 @@ __sx_xlock(struct sx *sx, struct thread *td, int opts, const char *file,
 static __inline void
 __sx_xunlock(struct sx *sx, struct thread *td, const char *file, int line)
 {
-	uintptr_t tid = (uintptr_t)td;
+	uintptr_t x = (uintptr_t)td;

 	if (__predict_false(LOCKSTAT_PROFILE_ENABLED(sx__release) ||
-	    !atomic_cmpset_rel_ptr(&sx->sx_lock, tid, SX_LOCK_UNLOCKED)))
-		_sx_xunlock_hard(sx, tid);
+	    !atomic_fcmpset_rel_ptr(&sx->sx_lock, &x, SX_LOCK_UNLOCKED)))
+		_sx_xunlock_hard(sx, x);
 }
 #endif