From 2d008d4579eea1dabbd9175371c3b0c3d9437bf8 Mon Sep 17 00:00:00 2001 From: avg Date: Fri, 12 Jun 2015 10:01:24 +0000 Subject: [PATCH] several lockstat improvements 0. For spin events report time spent spinning, not a loop count. While loop count is much easier and cheaper to obtain it is hard to reason about the reported numbers, espcially for adaptive locks where both spinning and sleeping can happen. So, it's better to compare apples and apples. 1. Teach lockstat about FreeBSD rw locks. This is done in part by changing the corresponding probes and in part by changing what probes lockstat should expect. 2. Teach lockstat that rw locks are adaptive and can spin on FreeBSD. 3. Report lock acquisition events for successful rw try-lock operations. 4. Teach lockstat about FreeBSD sx locks. Reporting of events for those locks completely mirrors rw locks. 5. Report spin and block events before acquisition event. This is behavior documented for the upstream, so it makes sense to stick to it. Note that because of FreeBSD adaptive lock implementations both the spin and block events may be reported for the same acquisition while the upstream reports only one of them. Differential Revision: https://reviews.freebsd.org/D2727 Reviewed by: markj MFC after: 17 days Relnotes: yes Sponsored by: ClusterHQ --- .../opensolaris/cmd/lockstat/lockstat.c | 40 +++++++----- sys/kern/kern_mutex.c | 36 ++++++++--- sys/kern/kern_rwlock.c | 62 +++++++++++++------ sys/kern/kern_sx.c | 55 +++++++++++----- sys/sys/lockstat.h | 3 + 5 files changed, 138 insertions(+), 58 deletions(-) diff --git a/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c b/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c index 4c0c0d48c9de..5c711600b06a 100644 --- a/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c +++ b/cddl/contrib/opensolaris/cmd/lockstat/lockstat.c @@ -157,14 +157,22 @@ static ls_event_info_t g_event_info[LS_MAX_EVENTS] = { "lockstat:::rw-block", "arg2 != 0 && arg3 == 1" }, { 'C', "Lock", "R/W reader blocked by write wanted", "nsec", "lockstat:::rw-block", "arg2 != 0 && arg3 == 0 && arg4" }, - { 'C', "Lock", "Unknown event (type 8)", "units" }, - { 'C', "Lock", "Unknown event (type 9)", "units" }, - { 'C', "Lock", "Unknown event (type 10)", "units" }, - { 'C', "Lock", "Unknown event (type 11)", "units" }, - { 'C', "Lock", "Unknown event (type 12)", "units" }, - { 'C', "Lock", "Unknown event (type 13)", "units" }, - { 'C', "Lock", "Unknown event (type 14)", "units" }, - { 'C', "Lock", "Unknown event (type 15)", "units" }, + { 'C', "Lock", "R/W writer spin on writer", "nsec", + "lockstat:::rw-spin", "arg2 == 0 && arg3 == 1" }, + { 'C', "Lock", "R/W writer spin on readers", "nsec", + "lockstat:::rw-spin", "arg2 == 0 && arg3 == 0 && arg4" }, + { 'C', "Lock", "R/W reader spin on writer", "nsec", + "lockstat:::rw-spin", "arg2 != 0 && arg3 == 1" }, + { 'C', "Lock", "R/W reader spin on write wanted", "nsec", + "lockstat:::rw-spin", "arg2 != 0 && arg3 == 0 && arg4" }, + { 'C', "Lock", "SX exclusive block", "nsec", + "lockstat:::sx-block", "arg2 == 0" }, + { 'C', "Lock", "SX shared block", "nsec", + "lockstat:::sx-block", "arg2 != 0" }, + { 'C', "Lock", "SX exclusive spin", "nsec", + "lockstat:::sx-spin", "arg2 == 0" }, + { 'C', "Lock", "SX shared spin", "nsec", + "lockstat:::sx-spin", "arg2 != 0" }, { 'C', "Lock", "Unknown event (type 16)", "units" }, { 'C', "Lock", "Unknown event (type 17)", "units" }, { 'C', "Lock", "Unknown event (type 18)", "units" }, @@ -188,13 +196,17 @@ static ls_event_info_t g_event_info[LS_MAX_EVENTS] = { "lockstat:::spin-release", NULL, "lockstat:::spin-acquire" }, { 'H', "Lock", "R/W writer hold", "nsec", - "lockstat:::rw-release", "arg1 == 0", - "lockstat:::rw-acquire" }, + "lockstat::rw_wunlock:rw-release", NULL, + "lockstat::rw_wlock:rw-acquire" }, { 'H', "Lock", "R/W reader hold", "nsec", - "lockstat:::rw-release", "arg1 != 0", - "lockstat:::rw-acquire" }, - { 'H', "Lock", "Unknown event (type 36)", "units" }, - { 'H', "Lock", "Unknown event (type 37)", "units" }, + "lockstat::rw_runlock:rw-release", NULL, + "lockstat::rw_rlock:rw-acquire" }, + { 'H', "Lock", "SX shared hold", "nsec", + "lockstat::sx_sunlock:sx-release", NULL, + "lockstat::sx_slock:sx-acquire" }, + { 'H', "Lock", "SX exclusive hold", "nsec", + "lockstat::sx_xunlock:sx-release", NULL, + "lockstat::sx_xlock:sx-acquire" }, { 'H', "Lock", "Unknown event (type 38)", "units" }, { 'H', "Lock", "Unknown event (type 39)", "units" }, { 'H', "Lock", "Unknown event (type 40)", "units" }, diff --git a/sys/kern/kern_mutex.c b/sys/kern/kern_mutex.c index ea6f7a62a69c..72fc4348ff1d 100644 --- a/sys/kern/kern_mutex.c +++ b/sys/kern/kern_mutex.c @@ -384,6 +384,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; + int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) @@ -414,6 +415,9 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, CTR4(KTR_LOCK, "_mtx_lock_sleep: %s contested (lock=%p) at %s:%d", m->lock_object.lo_name, (void *)m->mtx_lock, file, line); +#ifdef KDTRACE_HOOKS + all_time -= lockstat_nsecs(); +#endif while (!_mtx_obtain_lock(m, tid)) { #ifdef KDTRACE_HOOKS @@ -517,6 +521,9 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, sleep_cnt++; #endif } +#ifdef KDTRACE_HOOKS + all_time += lockstat_nsecs(); +#endif #ifdef KTR if (cont_logged) { CTR4(KTR_CONTENTION, @@ -534,7 +541,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t tid, int opts, * Only record the loops spinning and not sleeping. */ if (spin_cnt > sleep_cnt) - LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (spin_cnt - sleep_cnt)); + LOCKSTAT_RECORD1(LS_MTX_LOCK_SPIN, m, (all_time - sleep_time)); #endif } @@ -574,6 +581,9 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, int contested = 0; uint64_t waittime = 0; #endif +#ifdef KDTRACE_HOOKS + int64_t spin_time = 0; +#endif if (SCHEDULER_STOPPED()) return; @@ -589,6 +599,9 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, PMC_SOFT_CALL( , , lock, failed); #endif lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime); +#ifdef KDTRACE_HOOKS + spin_time -= lockstat_nsecs(); +#endif while (!_mtx_obtain_lock(m, tid)) { /* Give interrupts a chance while we spin. */ @@ -606,6 +619,9 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, } spinlock_enter(); } +#ifdef KDTRACE_HOOKS + spin_time += lockstat_nsecs(); +#endif if (LOCK_LOG_TEST(&m->lock_object, opts)) CTR1(KTR_LOCK, "_mtx_lock_spin: %p spin done", m); @@ -614,7 +630,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t tid, int opts, LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m, contested, waittime, (file), (line)); - LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, i); + LOCKSTAT_RECORD1(LS_MTX_SPIN_LOCK_SPIN, m, spin_time); } #endif /* SMP */ @@ -629,7 +645,7 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) uint64_t waittime = 0; #endif #ifdef KDTRACE_HOOKS - uint64_t spin_cnt = 0; + int64_t spin_time = 0; #endif i = 0; @@ -638,6 +654,9 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) if (SCHEDULER_STOPPED()) return; +#ifdef KDTRACE_HOOKS + spin_time -= lockstat_nsecs(); +#endif for (;;) { retry: spinlock_enter(); @@ -654,9 +673,6 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) WITNESS_CHECKORDER(&m->lock_object, opts | LOP_NEWORDER | LOP_EXCLUSIVE, file, line, NULL); while (!_mtx_obtain_lock(m, tid)) { -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif if (m->mtx_lock == tid) { m->mtx_recurse++; break; @@ -685,17 +701,17 @@ thread_lock_flags_(struct thread *td, int opts, const char *file, int line) if (m == td->td_lock) break; __mtx_unlock_spin(m); /* does spinlock_exit() */ -#ifdef KDTRACE_HOOKS - spin_cnt++; -#endif } +#ifdef KDTRACE_HOOKS + spin_time += lockstat_nsecs(); +#endif if (m->mtx_recurse == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_MTX_SPIN_LOCK_ACQUIRE, m, contested, waittime, (file), (line)); LOCK_LOG_LOCK("LOCK", &m->lock_object, opts, m->mtx_recurse, file, line); WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line); - LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_cnt); + LOCKSTAT_RECORD1(LS_THREAD_LOCK_SPIN, m, spin_time); } struct mtx * diff --git a/sys/kern/kern_rwlock.c b/sys/kern/kern_rwlock.c index d3cfbdda6c86..f75912b2e9b6 100644 --- a/sys/kern/kern_rwlock.c +++ b/sys/kern/kern_rwlock.c @@ -300,6 +300,9 @@ __rw_try_wlock(volatile uintptr_t *c, const char *file, int line) if (rval) { WITNESS_LOCK(&rw->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); + if (!rw_recursed(rw)) + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, + rw, 0, 0, file, line); curthread->td_locks++; } return (rval); @@ -352,9 +355,11 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) #endif uintptr_t v; #ifdef KDTRACE_HOOKS + uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; + int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) @@ -372,6 +377,10 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) rw->lock_object.lo_name, file, line)); WITNESS_CHECKORDER(&rw->lock_object, LOP_NEWORDER, file, line, NULL); +#ifdef KDTRACE_HOOKS + all_time -= lockstat_nsecs(); + state = rw->rw_lock; +#endif for (;;) { #ifdef KDTRACE_HOOKS spin_cnt++; @@ -534,7 +543,19 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) CTR2(KTR_LOCK, "%s: %p resuming from turnstile", __func__, rw); } +#ifdef KDTRACE_HOOKS + all_time += lockstat_nsecs(); + if (sleep_time) + LOCKSTAT_RECORD4(LS_RW_RLOCK_BLOCK, rw, sleep_time, + LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, + (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); + /* Record only the loops spinning and not sleeping. */ + if (spin_cnt > sleep_cnt) + LOCKSTAT_RECORD4(LS_RW_RLOCK_SPIN, rw, all_time - sleep_time, + LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, + (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); +#endif /* * TODO: acquire "owner of record" here. Here be turnstile dragons * however. turnstiles don't like owners changing between calls to @@ -546,16 +567,6 @@ __rw_rlock(volatile uintptr_t *c, const char *file, int line) WITNESS_LOCK(&rw->lock_object, 0, file, line); curthread->td_locks++; curthread->td_rw_rlocks++; -#ifdef KDTRACE_HOOKS - if (sleep_time) - LOCKSTAT_RECORD1(LS_RW_RLOCK_BLOCK, rw, sleep_time); - - /* - * Record only the loops spinning and not sleeping. - */ - if (spin_cnt > sleep_cnt) - LOCKSTAT_RECORD1(LS_RW_RLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); -#endif } int @@ -583,6 +594,8 @@ __rw_try_rlock(volatile uintptr_t *c, const char *file, int line) LOCK_LOG_TRY("RLOCK", &rw->lock_object, 0, 1, file, line); WITNESS_LOCK(&rw->lock_object, LOP_TRYLOCK, file, line); + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_RLOCK_ACQUIRE, + rw, 0, 0, file, line); curthread->td_locks++; curthread->td_rw_rlocks++; return (1); @@ -727,9 +740,11 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, int contested = 0; #endif #ifdef KDTRACE_HOOKS + uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; + int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) @@ -751,6 +766,10 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, rw->lock_object.lo_name, (void *)rw->rw_lock, file, line); +#ifdef KDTRACE_HOOKS + all_time -= lockstat_nsecs(); + state = rw->rw_lock; +#endif while (!_rw_write_lock(rw, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; @@ -888,18 +907,21 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t tid, const char *file, spintries = 0; #endif } +#ifdef KDTRACE_HOOKS + all_time += lockstat_nsecs(); + if (sleep_time) + LOCKSTAT_RECORD4(LS_RW_WLOCK_BLOCK, rw, sleep_time, + LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0, + (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); + + /* Record only the loops spinning and not sleeping. */ + if (spin_cnt > sleep_cnt) + LOCKSTAT_RECORD4(LS_RW_WLOCK_SPIN, rw, all_time - sleep_time, + LOCKSTAT_READER, (state & RW_LOCK_READ) == 0, + (state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state)); +#endif LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_RW_WLOCK_ACQUIRE, rw, contested, waittime, file, line); -#ifdef KDTRACE_HOOKS - if (sleep_time) - LOCKSTAT_RECORD1(LS_RW_WLOCK_BLOCK, rw, sleep_time); - - /* - * Record only the loops spinning and not sleeping. - */ - if (spin_cnt > sleep_cnt) - LOCKSTAT_RECORD1(LS_RW_WLOCK_SPIN, rw, (spin_cnt - sleep_cnt)); -#endif } /* diff --git a/sys/kern/kern_sx.c b/sys/kern/kern_sx.c index a986590ec9b8..91e8c60f899d 100644 --- a/sys/kern/kern_sx.c +++ b/sys/kern/kern_sx.c @@ -288,6 +288,8 @@ sx_try_slock_(struct sx *sx, const char *file, int line) if (atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER)) { LOCK_LOG_TRY("SLOCK", &sx->lock_object, 0, 1, file, line); WITNESS_LOCK(&sx->lock_object, LOP_TRYLOCK, file, line); + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, + sx, 0, 0, file, line); curthread->td_locks++; return (1); } @@ -348,6 +350,9 @@ sx_try_xlock_(struct sx *sx, const char *file, int line) if (rval) { WITNESS_LOCK(&sx->lock_object, LOP_EXCLUSIVE | LOP_TRYLOCK, file, line); + if (!sx_recursed(sx)) + LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, + sx, 0, 0, file, line); curthread->td_locks++; } @@ -509,9 +514,11 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, #endif int error = 0; #ifdef KDTRACE_HOOKS + uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; + int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) @@ -533,6 +540,10 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, CTR5(KTR_LOCK, "%s: %s contested (lock=%p) at %s:%d", __func__, sx->lock_object.lo_name, (void *)sx->sx_lock, file, line); +#ifdef KDTRACE_HOOKS + all_time -= lockstat_nsecs(); + state = sx->sx_lock; +#endif while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) { #ifdef KDTRACE_HOOKS spin_cnt++; @@ -705,17 +716,21 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file, CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } - - GIANT_RESTORE(); +#ifdef KDTRACE_HOOKS + all_time += lockstat_nsecs(); + if (sleep_time) + LOCKSTAT_RECORD4(LS_SX_XLOCK_BLOCK, sx, sleep_time, + LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, + (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); + if (spin_cnt > sleep_cnt) + LOCKSTAT_RECORD4(LS_SX_XLOCK_SPIN, sx, all_time - sleep_time, + LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0, + (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); +#endif if (!error) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_XLOCK_ACQUIRE, sx, contested, waittime, file, line); -#ifdef KDTRACE_HOOKS - if (sleep_time) - LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time); - if (spin_cnt > sleep_cnt) - LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt)); -#endif + GIANT_RESTORE(); return (error); } @@ -801,14 +816,21 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) uintptr_t x; int error = 0; #ifdef KDTRACE_HOOKS + uintptr_t state; uint64_t spin_cnt = 0; uint64_t sleep_cnt = 0; int64_t sleep_time = 0; + int64_t all_time = 0; #endif if (SCHEDULER_STOPPED()) return (0); +#ifdef KDTRACE_HOOKS + state = sx->sx_lock; + all_time -= lockstat_nsecs(); +#endif + /* * As with rwlocks, we don't make any attempt to try to block * shared locks once there is an exclusive waiter. @@ -958,15 +980,20 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line) CTR2(KTR_LOCK, "%s: %p resuming from sleep queue", __func__, sx); } +#ifdef KDTRACE_HOOKS + all_time += lockstat_nsecs(); + if (sleep_time) + LOCKSTAT_RECORD4(LS_SX_SLOCK_BLOCK, sx, sleep_time, + LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, + (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); + if (spin_cnt > sleep_cnt) + LOCKSTAT_RECORD4(LS_SX_SLOCK_SPIN, sx, all_time - sleep_time, + LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0, + (state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state)); +#endif if (error == 0) LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(LS_SX_SLOCK_ACQUIRE, sx, contested, waittime, file, line); -#ifdef KDTRACE_HOOKS - if (sleep_time) - LOCKSTAT_RECORD1(LS_SX_XLOCK_BLOCK, sx, sleep_time); - if (spin_cnt > sleep_cnt) - LOCKSTAT_RECORD1(LS_SX_XLOCK_SPIN, sx, (spin_cnt - sleep_cnt)); -#endif GIANT_RESTORE(); return (error); } diff --git a/sys/sys/lockstat.h b/sys/sys/lockstat.h index ed9cffa92b62..2a7853fb95bf 100644 --- a/sys/sys/lockstat.h +++ b/sys/sys/lockstat.h @@ -198,6 +198,9 @@ extern uint64_t lockstat_nsecs(void); (*lockstat_probe_func)(id, (uintptr_t)(lp), 0, 0, 0, 0); \ } while (0) +#define LOCKSTAT_WRITER 0 +#define LOCKSTAT_READER 1 + #else /* !KDTRACE_HOOKS */ #define LOCKSTAT_RECORD(probe, lp, arg1)