From 91ae39e258c0003ec2003a085cd9ef03efe9461e Mon Sep 17 00:00:00 2001 From: mjg Date: Sun, 5 Feb 2017 08:04:11 +0000 Subject: [PATCH] mtx: move lockstat handling out of inline primitives Lockstat requires checking if it is enabled and if so, calling a 6 argument function. Further, determining whether to call it on unlock requires pre-reading the lock value. This is problematic in at least 3 ways: - more branches in the hot path than necessary - additional cacheline ping pong under contention - bigger code Instead, check first if lockstat handling is necessary and if so, just fall back to regular locking routines. For this purpose a new macro is introduced (LOCKSTAT_PROFILE_ENABLED). LOCK_PROFILING uninlines all primitives. Fold in the current inline lock variant into the _mtx_lock_flags to retain the support. With this change the inline variants are not used when LOCK_PROFILING is defined and thus can ignore its existence. This results in: text data bss dec hex filename 22259667 1303208 4994976 28557851 1b3c21b kernel.orig 21797315 1303208 4994976 28095499 1acb40b kernel.patched i.e. about 3% reduction in text size. A remaining action is to remove spurious arguments for internal kernel consumers. --- sys/kern/kern_mutex.c | 20 ++++++++++++-------- sys/sys/lockstat.h | 8 ++++++++ sys/sys/mutex.h | 21 ++++++++------------- sys/sys/sdt.h | 3 +++ 4 files changed, 31 insertions(+), 21 deletions(-) diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index 8e6133d6bfde..f49d87c81999 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -265,6 +265,7 @@ void __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; + uintptr_t tid, v; if (SCHEDULER_STOPPED()) return; @@ -282,7 +283,13 @@ __mtx_lock_flags(volatile uintptr_t *c, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); - __mtx_lock(m, curthread, opts, file, line); + tid = (uintptr_t)curthread; + v = MTX_UNOWNED; + if (!_mtx_obtain_lock_fetch(m, &v, tid)) + _mtx_lock_sleep(m, v, tid, opts, file, line); + else + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, + m, 0, 0, file, line); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, (opts & ~MTX_RECURSE) | LOP_EXCLUSIVE, @@ -310,7 +317,7 @@ __mtx_unlock_flags(volatile uintptr_t *c, int opts, const char *file, int line) line); mtx_assert(m, MA_OWNED); - __mtx_unlock(m, curthread, opts, file, line); + __mtx_unlock_sleep(c, opts, file, line); TD_LOCKS_DEC(curthread); } @@ -887,20 +894,17 @@ __mtx_unlock_sleep(volatile uintptr_t *c, int opts, const char *file, int line) { struct mtx *m; struct turnstile *ts; - uintptr_t v; if (SCHEDULER_STOPPED()) return; m = mtxlock2mtx(c); - v = MTX_READ_VALUE(m); - if (v == (uintptr_t)curthread) { + if (!mtx_recursed(m)) { + LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, m); if (_mtx_release_lock(m, (uintptr_t)curthread)) return; - } - - if (mtx_recursed(m)) { + } else { if (--(m->mtx_recurse) == 0) atomic_clear_ptr(&m->mtx_lock, MTX_RECURSED); if (LOCK_LOG_TEST(&m->lock_object, opts)) diff --git a/sys/sys/lockstat.h b/sys/sys/lockstat.h index e5503539298f..958e9503185e 100644 --- a/sys/sys/lockstat.h +++ b/sys/sys/lockstat.h @@ -107,6 +107,10 @@ extern int lockstat_enabled; LOCKSTAT_RECORD1(probe, lp, a); \ } while (0) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) SDT_PROBE_ENABLED(lockstat, , , probe) +#endif + struct lock_object; uint64_t lockstat_nsecs(struct lock_object *); @@ -130,6 +134,10 @@ uint64_t lockstat_nsecs(struct lock_object *); #define LOCKSTAT_PROFILE_RELEASE_RWLOCK(probe, lp, a) \ LOCKSTAT_PROFILE_RELEASE_LOCK(probe, lp) +#ifndef LOCK_PROFILING +#define LOCKSTAT_PROFILE_ENABLED(probe) 0 +#endif + #endif /* !KDTRACE_HOOKS */ #endif /* _KERNEL */ #endif /* _SYS_LOCKSTAT_H */ diff --git a/sys/sys/mutex.h b/sys/sys/mutex.h index e72d23d1cac4..e4c69b76c939 100644 --- a/sys/sys/mutex.h +++ b/sys/sys/mutex.h @@ -171,10 +171,8 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #define _mtx_obtain_lock(mp, tid) \ atomic_cmpset_acq_ptr(&(mp)->mtx_lock, MTX_UNOWNED, (tid)) -#define _mtx_obtain_lock_fetch(mp, vp, tid) ({ \ - *vp = MTX_UNOWNED; \ - atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, vp, (tid)); \ -}) +#define _mtx_obtain_lock_fetch(mp, vp, tid) \ + atomic_fcmpset_rel_ptr(&(mp)->mtx_lock, vp, (tid)) /* Try to release mtx_lock if it is unrecursed and uncontested. */ #define _mtx_release_lock(mp, tid) \ @@ -193,13 +191,11 @@ void thread_lock_flags_(struct thread *, int, const char *, int); /* Lock a normal mutex. */ #define __mtx_lock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ - uintptr_t _v; \ + uintptr_t _v = MTX_UNOWNED; \ \ - if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__acquire) ||\ + !_mtx_obtain_lock_fetch((mp), &_v, _tid))) \ _mtx_lock_sleep((mp), _v, _tid, (opts), (file), (line));\ - else \ - LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, \ - mp, 0, 0, file, line); \ } while (0) /* @@ -211,7 +207,7 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #ifdef SMP #define __mtx_lock_spin(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ - uintptr_t _v; \ + uintptr_t _v = MTX_UNOWNED; \ \ spinlock_enter(); \ if (!_mtx_obtain_lock_fetch((mp), &_v, _tid)) { \ @@ -270,9 +266,8 @@ void thread_lock_flags_(struct thread *, int, const char *, int); #define __mtx_unlock(mp, tid, opts, file, line) do { \ uintptr_t _tid = (uintptr_t)(tid); \ \ - if ((mp)->mtx_recurse == 0) \ - LOCKSTAT_PROFILE_RELEASE_LOCK(adaptive__release, mp); \ - if (!_mtx_release_lock((mp), _tid)) \ + if (__predict_false(LOCKSTAT_PROFILE_ENABLED(adaptive__release) ||\ + !_mtx_release_lock((mp), _tid))) \ _mtx_unlock_sleep((mp), (opts), (file), (line)); \ } while (0) diff --git a/sys/sys/sdt.h b/sys/sys/sdt.h index 25423d764e3c..42598837fd72 100644 --- a/sys/sys/sdt.h +++ b/sys/sys/sdt.h @@ -160,6 +160,9 @@ SET_DECLARE(sdt_argtypes_set, struct sdt_argtype); #define SDT_PROBE_DECLARE(prov, mod, func, name) \ extern struct sdt_probe sdt_##prov##_##mod##_##func##_##name[1] +#define SDT_PROBE_ENABLED(prov, mod, func, name) \ + __predict_false((sdt_##prov##_##mod##_##func##_##name->id)) + #define SDT_PROBE(prov, mod, func, name, arg0, arg1, arg2, arg3, arg4) do { \ if (__predict_false(sdt_##prov##_##mod##_##func##_##name->id)) \ (*sdt_probe_func)(sdt_##prov##_##mod##_##func##_##name->id, \