- Re-implement lock profiling in such a way that it no longer breaks
the ABI when enabled. There is no longer an embedded lock_profile_object in each lock. Instead a list of lock_profile_objects is kept per-thread for each lock it may own. The cnt_hold statistic is now always 0 to facilitate this. - Support shared locking by tracking individual lock instances and statistics in the per-thread per-instance lock_profile_object. - Make the lock profiling hash table a per-cpu singly linked list with a per-cpu static lock_prof allocator. This removes the need for an array of spinlocks and reduces cache contention between cores. - Use a seperate hash for spinlocks and other locks so that only a critical_enter() is required and not a spinlock_enter() to modify the per-cpu tables. - Count time spent spinning in the lock statistics. - Remove the LOCK_PROFILE_SHARED option as it is always supported now. - Specifically drop and release the scheduler locks in both schedulers since we track owners now. In collaboration with: Kip Macy Sponsored by: Nokia
This commit is contained in:
parent
4c908c35e0
commit
eea4f254fe
@ -540,7 +540,6 @@ MUTEX_DEBUG opt_global.h
|
||||
MUTEX_NOINLINE opt_global.h
|
||||
LOCK_PROFILING opt_global.h
|
||||
LOCK_PROFILING_FAST opt_global.h
|
||||
LOCK_PROFILING_SHARED opt_global.h
|
||||
MSIZE opt_global.h
|
||||
REGRESSION opt_global.h
|
||||
RESTARTABLE_PANICS opt_global.h
|
||||
|
@ -123,20 +123,6 @@ struct lock_class lock_class_mtx_spin = {
|
||||
struct mtx blocked_lock;
|
||||
struct mtx Giant;
|
||||
|
||||
#ifdef LOCK_PROFILING
|
||||
static inline void lock_profile_init(void)
|
||||
{
|
||||
int i;
|
||||
/* Initialize the mutex profiling locks */
|
||||
for (i = 0; i < LPROF_LOCK_SIZE; i++) {
|
||||
mtx_init(&lprof_locks[i], "mprof lock",
|
||||
NULL, MTX_SPIN|MTX_QUIET|MTX_NOPROFILE);
|
||||
}
|
||||
}
|
||||
#else
|
||||
static inline void lock_profile_init(void) {;}
|
||||
#endif
|
||||
|
||||
void
|
||||
assert_mtx(struct lock_object *lock, int what)
|
||||
{
|
||||
@ -425,7 +411,7 @@ _mtx_lock_sleep(struct mtx *m, uintptr_t tid, int opts, const char *file,
|
||||
}
|
||||
#endif
|
||||
lock_profile_obtain_lock_success(&m->lock_object, contested,
|
||||
waittime, (file), (line));
|
||||
waittime, file, line);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -514,7 +500,8 @@ retry:
|
||||
m->mtx_recurse++;
|
||||
break;
|
||||
}
|
||||
lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
|
||||
lock_profile_obtain_lock_failed(&m->lock_object,
|
||||
&contested, &waittime);
|
||||
/* Give interrupts a chance while we spin. */
|
||||
spinlock_exit();
|
||||
while (m->mtx_lock != MTX_UNOWNED) {
|
||||
@ -535,8 +522,9 @@ retry:
|
||||
break;
|
||||
_rel_spin_lock(m); /* does spinlock_exit() */
|
||||
}
|
||||
lock_profile_obtain_lock_success(&m->lock_object, contested,
|
||||
waittime, (file), (line));
|
||||
if (m->mtx_recurse == 0)
|
||||
lock_profile_obtain_lock_success(&m->lock_object, contested,
|
||||
waittime, (file), (line));
|
||||
WITNESS_LOCK(&m->lock_object, opts | LOP_EXCLUSIVE, file, line);
|
||||
}
|
||||
|
||||
@ -794,8 +782,6 @@ mutex_init(void)
|
||||
mtx_init(&proc0.p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
|
||||
mtx_init(&devmtx, "cdev", NULL, MTX_DEF);
|
||||
mtx_lock(&Giant);
|
||||
|
||||
lock_profile_init();
|
||||
}
|
||||
|
||||
#ifdef DDB
|
||||
|
@ -227,10 +227,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
#ifdef ADAPTIVE_RWLOCKS
|
||||
volatile struct thread *owner;
|
||||
#endif
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
uint64_t waittime = 0;
|
||||
int contested = 0;
|
||||
#endif
|
||||
uintptr_t x;
|
||||
|
||||
KASSERT(rw->rw_lock != RW_DESTROYED,
|
||||
@ -273,12 +271,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
MPASS((x & RW_LOCK_READ_WAITERS) == 0);
|
||||
if (atomic_cmpset_acq_ptr(&rw->rw_lock, x,
|
||||
x + RW_ONE_READER)) {
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
if (RW_READERS(x) == 0)
|
||||
lock_profile_obtain_lock_success(
|
||||
&rw->lock_object, contested,
|
||||
waittime, file, line);
|
||||
#endif
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR4(KTR_LOCK,
|
||||
"%s: %p succeed %p -> %p", __func__,
|
||||
@ -289,6 +281,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
cpu_spinwait();
|
||||
continue;
|
||||
}
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object,
|
||||
&contested, &waittime);
|
||||
|
||||
#ifdef ADAPTIVE_RWLOCKS
|
||||
/*
|
||||
@ -301,10 +295,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
|
||||
__func__, rw, owner);
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object,
|
||||
&contested, &waittime);
|
||||
#endif
|
||||
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
|
||||
TD_IS_RUNNING(owner))
|
||||
cpu_spinwait();
|
||||
@ -369,10 +359,6 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
|
||||
rw);
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
|
||||
&waittime);
|
||||
#endif
|
||||
turnstile_wait(ts, rw_owner(rw), TS_SHARED_QUEUE);
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
|
||||
@ -384,7 +370,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
|
||||
* however. turnstiles don't like owners changing between calls to
|
||||
* turnstile_wait() currently.
|
||||
*/
|
||||
|
||||
lock_profile_obtain_lock_success( &rw->lock_object, contested,
|
||||
waittime, file, line);
|
||||
LOCK_LOG_LOCK("RLOCK", &rw->lock_object, 0, 0, file, line);
|
||||
WITNESS_LOCK(&rw->lock_object, 0, file, line);
|
||||
curthread->td_locks++;
|
||||
@ -431,9 +418,6 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
|
||||
*/
|
||||
KASSERT(!(x & RW_LOCK_READ_WAITERS),
|
||||
("%s: waiting readers", __func__));
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
lock_profile_release_lock(&rw->lock_object);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* If there aren't any waiters for a write lock, then try
|
||||
@ -510,6 +494,7 @@ _rw_runlock(struct rwlock *rw, const char *file, int line)
|
||||
turnstile_chain_unlock(&rw->lock_object);
|
||||
break;
|
||||
}
|
||||
lock_profile_release_lock(&rw->lock_object);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -544,6 +529,8 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
|
||||
rw->lock_object.lo_name, (void *)rw->rw_lock, file, line);
|
||||
|
||||
while (!_rw_write_lock(rw, tid)) {
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object,
|
||||
&contested, &waittime);
|
||||
#ifdef ADAPTIVE_RWLOCKS
|
||||
/*
|
||||
* If the lock is write locked and the owner is
|
||||
@ -556,8 +543,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR3(KTR_LOCK, "%s: spinning on %p held by %p",
|
||||
__func__, rw, owner);
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object,
|
||||
&contested, &waittime);
|
||||
while ((struct thread*)RW_OWNER(rw->rw_lock) == owner &&
|
||||
TD_IS_RUNNING(owner))
|
||||
cpu_spinwait();
|
||||
@ -641,8 +626,6 @@ _rw_wlock_hard(struct rwlock *rw, uintptr_t tid, const char *file, int line)
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR2(KTR_LOCK, "%s: %p blocking on turnstile", __func__,
|
||||
rw);
|
||||
lock_profile_obtain_lock_failed(&rw->lock_object, &contested,
|
||||
&waittime);
|
||||
turnstile_wait(ts, rw_owner(rw), TS_EXCLUSIVE_QUEUE);
|
||||
if (LOCK_LOG_TEST(&rw->lock_object, 0))
|
||||
CTR2(KTR_LOCK, "%s: %p resuming from turnstile",
|
||||
|
@ -302,11 +302,8 @@ _sx_sunlock(struct sx *sx, const char *file, int line)
|
||||
curthread->td_locks--;
|
||||
WITNESS_UNLOCK(&sx->lock_object, 0, file, line);
|
||||
LOCK_LOG_LOCK("SUNLOCK", &sx->lock_object, 0, 0, file, line);
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
if (SX_SHARERS(sx->sx_lock) == 1)
|
||||
lock_profile_release_lock(&sx->lock_object);
|
||||
#endif
|
||||
__sx_sunlock(sx, file, line);
|
||||
lock_profile_release_lock(&sx->lock_object);
|
||||
}
|
||||
|
||||
void
|
||||
@ -450,6 +447,8 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
|
||||
sx->lock_object.lo_name, (void *)sx->sx_lock, file, line);
|
||||
|
||||
while (!atomic_cmpset_acq_ptr(&sx->sx_lock, SX_LOCK_UNLOCKED, tid)) {
|
||||
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
|
||||
&waittime);
|
||||
#ifdef ADAPTIVE_SX
|
||||
/*
|
||||
* If the lock is write locked and the owner is
|
||||
@ -467,8 +466,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
|
||||
"%s: spinning on %p held by %p",
|
||||
__func__, sx, owner);
|
||||
GIANT_SAVE();
|
||||
lock_profile_obtain_lock_failed(
|
||||
&sx->lock_object, &contested, &waittime);
|
||||
while (SX_OWNER(sx->sx_lock) == x &&
|
||||
TD_IS_RUNNING(owner))
|
||||
cpu_spinwait();
|
||||
@ -555,8 +552,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t tid, int opts, const char *file,
|
||||
__func__, sx);
|
||||
|
||||
GIANT_SAVE();
|
||||
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
|
||||
&waittime);
|
||||
sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
|
||||
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
|
||||
SLEEPQ_INTERRUPTIBLE : 0), SQ_EXCLUSIVE_QUEUE);
|
||||
@ -648,10 +643,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
#ifdef ADAPTIVE_SX
|
||||
volatile struct thread *owner;
|
||||
#endif
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
uint64_t waittime = 0;
|
||||
int contested = 0;
|
||||
#endif
|
||||
uintptr_t x;
|
||||
int error = 0;
|
||||
|
||||
@ -672,12 +665,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
MPASS(!(x & SX_LOCK_SHARED_WAITERS));
|
||||
if (atomic_cmpset_acq_ptr(&sx->sx_lock, x,
|
||||
x + SX_ONE_SHARER)) {
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
if (SX_SHARERS(x) == 0)
|
||||
lock_profile_obtain_lock_success(
|
||||
&sx->lock_object, contested,
|
||||
waittime, file, line);
|
||||
#endif
|
||||
if (LOCK_LOG_TEST(&sx->lock_object, 0))
|
||||
CTR4(KTR_LOCK,
|
||||
"%s: %p succeed %p -> %p", __func__,
|
||||
@ -687,6 +674,8 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
}
|
||||
continue;
|
||||
}
|
||||
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
|
||||
&waittime);
|
||||
|
||||
#ifdef ADAPTIVE_SX
|
||||
/*
|
||||
@ -694,7 +683,7 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
* the owner stops running or the state of the lock
|
||||
* changes.
|
||||
*/
|
||||
else if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
|
||||
if (sx->lock_object.lo_flags & SX_ADAPTIVESPIN) {
|
||||
x = SX_OWNER(x);
|
||||
owner = (struct thread *)x;
|
||||
if (TD_IS_RUNNING(owner)) {
|
||||
@ -703,10 +692,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
"%s: spinning on %p held by %p",
|
||||
__func__, sx, owner);
|
||||
GIANT_SAVE();
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
lock_profile_obtain_lock_failed(
|
||||
&sx->lock_object, &contested, &waittime);
|
||||
#endif
|
||||
while (SX_OWNER(sx->sx_lock) == x &&
|
||||
TD_IS_RUNNING(owner))
|
||||
cpu_spinwait();
|
||||
@ -772,10 +757,6 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
__func__, sx);
|
||||
|
||||
GIANT_SAVE();
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
|
||||
&waittime);
|
||||
#endif
|
||||
sleepq_add(&sx->lock_object, NULL, sx->lock_object.lo_name,
|
||||
SLEEPQ_SX | ((opts & SX_INTERRUPTIBLE) ?
|
||||
SLEEPQ_INTERRUPTIBLE : 0), SQ_SHARED_QUEUE);
|
||||
@ -795,6 +776,9 @@ _sx_slock_hard(struct sx *sx, int opts, const char *file, int line)
|
||||
CTR2(KTR_LOCK, "%s: %p resuming from sleep queue",
|
||||
__func__, sx);
|
||||
}
|
||||
if (error == 0)
|
||||
lock_profile_obtain_lock_success(&sx->lock_object, contested,
|
||||
waittime, file, line);
|
||||
|
||||
GIANT_RESTORE();
|
||||
return (error);
|
||||
|
@ -555,6 +555,8 @@ thread_link(struct thread *td, struct proc *p)
|
||||
td->td_flags = TDF_INMEM;
|
||||
|
||||
LIST_INIT(&td->td_contested);
|
||||
LIST_INIT(&td->td_lprof[0]);
|
||||
LIST_INIT(&td->td_lprof[1]);
|
||||
sigqueue_init(&td->td_sigqueue, p);
|
||||
callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
|
||||
TAILQ_INSERT_HEAD(&p->p_threads, td, td_plist);
|
||||
|
@ -878,9 +878,11 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
|
||||
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
|
||||
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
|
||||
#endif
|
||||
|
||||
/* I feel sleepy */
|
||||
lock_profile_release_lock(&sched_lock.lock_object);
|
||||
cpu_switch(td, newtd, td->td_lock);
|
||||
lock_profile_obtain_lock_success(&sched_lock.lock_object,
|
||||
0, 0, __FILE__, __LINE__);
|
||||
/*
|
||||
* Where am I? What year is it?
|
||||
* We are in the same thread that went to sleep above,
|
||||
@ -1375,6 +1377,7 @@ sched_throw(struct thread *td)
|
||||
mtx_lock_spin(&sched_lock);
|
||||
spinlock_exit();
|
||||
} else {
|
||||
lock_profile_release_lock(&sched_lock.lock_object);
|
||||
MPASS(td->td_lock == &sched_lock);
|
||||
}
|
||||
mtx_assert(&sched_lock, MA_OWNED);
|
||||
@ -1394,6 +1397,8 @@ sched_fork_exit(struct thread *td)
|
||||
*/
|
||||
td->td_oncpu = PCPU_GET(cpuid);
|
||||
sched_lock.mtx_lock = (uintptr_t)td;
|
||||
lock_profile_obtain_lock_success(&sched_lock.lock_object,
|
||||
0, 0, __FILE__, __LINE__);
|
||||
THREAD_LOCK_ASSERT(td, MA_OWNED | MA_NOTRECURSED);
|
||||
}
|
||||
|
||||
|
@ -1894,6 +1894,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
|
||||
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
|
||||
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT);
|
||||
#endif
|
||||
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
|
||||
TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd;
|
||||
cpu_switch(td, newtd, mtx);
|
||||
/*
|
||||
@ -1903,6 +1904,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
|
||||
*/
|
||||
cpuid = PCPU_GET(cpuid);
|
||||
tdq = TDQ_CPU(cpuid);
|
||||
lock_profile_obtain_lock_success(
|
||||
&TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
|
||||
#ifdef HWPMC_HOOKS
|
||||
if (PMC_PROC_IS_USING_PMCS(td->td_proc))
|
||||
PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN);
|
||||
@ -2618,6 +2621,7 @@ sched_throw(struct thread *td)
|
||||
} else {
|
||||
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
|
||||
tdq_load_rem(tdq, td->td_sched);
|
||||
lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object);
|
||||
}
|
||||
KASSERT(curthread->td_md.md_spinlock_count == 1, ("invalid count"));
|
||||
newtd = choosethread();
|
||||
@ -2650,6 +2654,8 @@ sched_fork_exit(struct thread *td)
|
||||
MPASS(td->td_lock == TDQ_LOCKPTR(tdq));
|
||||
td->td_oncpu = cpuid;
|
||||
TDQ_LOCK_ASSERT(tdq, MA_OWNED | MA_NOTRECURSED);
|
||||
lock_profile_obtain_lock_success(
|
||||
&TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__);
|
||||
}
|
||||
|
||||
static SYSCTL_NODE(_kern, OID_AUTO, sched, CTLFLAG_RW, 0,
|
||||
|
@ -40,17 +40,24 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/linker_set.h>
|
||||
#include <sys/lock.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/lock_profile.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/pcpu.h>
|
||||
#include <sys/proc.h>
|
||||
#include <sys/sbuf.h>
|
||||
#include <sys/smp.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
#endif
|
||||
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
CTASSERT(LOCK_CLASS_MAX == 15);
|
||||
|
||||
struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
|
||||
@ -62,136 +69,6 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = {
|
||||
&lock_class_lockmgr,
|
||||
};
|
||||
|
||||
#ifdef LOCK_PROFILING
|
||||
#include <machine/cpufunc.h>
|
||||
|
||||
SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
|
||||
SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
|
||||
int lock_prof_enable = 0;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, enable, CTLFLAG_RW,
|
||||
&lock_prof_enable, 0, "Enable lock profiling");
|
||||
|
||||
/*
|
||||
* lprof_buf is a static pool of profiling records to avoid possible
|
||||
* reentrance of the memory allocation functions.
|
||||
*
|
||||
* Note: NUM_LPROF_BUFFERS must be smaller than LPROF_HASH_SIZE.
|
||||
*/
|
||||
struct lock_prof lprof_buf[LPROF_HASH_SIZE];
|
||||
static int allocated_lprof_buf;
|
||||
struct mtx lprof_locks[LPROF_LOCK_SIZE];
|
||||
|
||||
|
||||
/* SWAG: sbuf size = avg stat. line size * number of locks */
|
||||
#define LPROF_SBUF_SIZE 256 * 400
|
||||
|
||||
static int lock_prof_acquisitions;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, acquisitions, CTLFLAG_RD,
|
||||
&lock_prof_acquisitions, 0, "Number of lock acquistions recorded");
|
||||
static int lock_prof_records;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, records, CTLFLAG_RD,
|
||||
&lock_prof_records, 0, "Number of profiling records");
|
||||
static int lock_prof_maxrecords = LPROF_HASH_SIZE;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, maxrecords, CTLFLAG_RD,
|
||||
&lock_prof_maxrecords, 0, "Maximum number of profiling records");
|
||||
static int lock_prof_rejected;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
|
||||
&lock_prof_rejected, 0, "Number of rejected profiling records");
|
||||
static int lock_prof_hashsize = LPROF_HASH_SIZE;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, hashsize, CTLFLAG_RD,
|
||||
&lock_prof_hashsize, 0, "Hash size");
|
||||
static int lock_prof_collisions = 0;
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, collisions, CTLFLAG_RD,
|
||||
&lock_prof_collisions, 0, "Number of hash collisions");
|
||||
|
||||
#ifndef USE_CPU_NANOSECONDS
|
||||
u_int64_t
|
||||
nanoseconds(void)
|
||||
{
|
||||
struct timespec tv;
|
||||
|
||||
nanotime(&tv);
|
||||
return (tv.tv_sec * (u_int64_t)1000000000 + tv.tv_nsec);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
struct sbuf *sb;
|
||||
int error, i;
|
||||
static int multiplier = 1;
|
||||
const char *p;
|
||||
|
||||
if (allocated_lprof_buf == 0)
|
||||
return (SYSCTL_OUT(req, "No locking recorded",
|
||||
sizeof("No locking recorded")));
|
||||
|
||||
retry_sbufops:
|
||||
sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
|
||||
sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
|
||||
"max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
|
||||
for (i = 0; i < LPROF_HASH_SIZE; ++i) {
|
||||
if (lprof_buf[i].name == NULL)
|
||||
continue;
|
||||
for (p = lprof_buf[i].file;
|
||||
p != NULL && strncmp(p, "../", 3) == 0; p += 3)
|
||||
/* nothing */ ;
|
||||
sbuf_printf(sb, "%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
|
||||
lprof_buf[i].cnt_max / 1000,
|
||||
lprof_buf[i].cnt_tot / 1000,
|
||||
lprof_buf[i].cnt_wait / 1000,
|
||||
lprof_buf[i].cnt_cur,
|
||||
lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
|
||||
lprof_buf[i].cnt_tot / (lprof_buf[i].cnt_cur * 1000),
|
||||
lprof_buf[i].cnt_cur == 0 ? (uintmax_t)0 :
|
||||
lprof_buf[i].cnt_wait / (lprof_buf[i].cnt_cur * 1000),
|
||||
lprof_buf[i].cnt_contest_holding,
|
||||
lprof_buf[i].cnt_contest_locking,
|
||||
p, lprof_buf[i].line,
|
||||
lprof_buf[i].type,
|
||||
lprof_buf[i].name);
|
||||
if (sbuf_overflowed(sb)) {
|
||||
sbuf_delete(sb);
|
||||
multiplier++;
|
||||
goto retry_sbufops;
|
||||
}
|
||||
}
|
||||
|
||||
sbuf_finish(sb);
|
||||
error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
|
||||
sbuf_delete(sb);
|
||||
return (error);
|
||||
}
|
||||
static int
|
||||
reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, v;
|
||||
|
||||
if (allocated_lprof_buf == 0)
|
||||
return (0);
|
||||
|
||||
v = 0;
|
||||
error = sysctl_handle_int(oidp, &v, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
if (req->newptr == NULL)
|
||||
return (error);
|
||||
if (v == 0)
|
||||
return (0);
|
||||
|
||||
bzero(lprof_buf, LPROF_HASH_SIZE*sizeof(*lprof_buf));
|
||||
allocated_lprof_buf = 0;
|
||||
return (0);
|
||||
}
|
||||
|
||||
SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
|
||||
NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
|
||||
|
||||
SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
|
||||
NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
|
||||
#endif
|
||||
|
||||
void
|
||||
lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
|
||||
const char *type, int flags)
|
||||
@ -216,7 +93,6 @@ lock_init(struct lock_object *lock, struct lock_class *class, const char *name,
|
||||
lock->lo_flags |= flags | LO_INITIALIZED;
|
||||
LOCK_LOG_INIT(lock, 0);
|
||||
WITNESS_INIT(lock);
|
||||
lock_profile_object_init(lock, class, name);
|
||||
}
|
||||
|
||||
void
|
||||
@ -224,7 +100,6 @@ lock_destroy(struct lock_object *lock)
|
||||
{
|
||||
|
||||
KASSERT(lock_initalized(lock), ("lock %p is not initialized", lock));
|
||||
lock_profile_object_destroy(lock);
|
||||
WITNESS_DESTROY(lock);
|
||||
LOCK_LOG_DESTROY(lock, 0);
|
||||
lock->lo_flags &= ~LO_INITIALIZED;
|
||||
@ -253,17 +128,376 @@ DB_SHOW_COMMAND(lock, db_show_lock)
|
||||
#endif
|
||||
|
||||
#ifdef LOCK_PROFILING
|
||||
void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line)
|
||||
{
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
|
||||
lo->lo_profile_obj.lpo_contest_holding = 0;
|
||||
/*
|
||||
* One object per-thread for each lock the thread owns. Tracks individual
|
||||
* lock instances.
|
||||
*/
|
||||
struct lock_profile_object {
|
||||
LIST_ENTRY(lock_profile_object) lpo_link;
|
||||
struct lock_object *lpo_obj;
|
||||
const char *lpo_file;
|
||||
int lpo_line;
|
||||
uint16_t lpo_ref;
|
||||
uint16_t lpo_cnt;
|
||||
u_int64_t lpo_acqtime;
|
||||
u_int64_t lpo_waittime;
|
||||
u_int lpo_contest_locking;
|
||||
};
|
||||
|
||||
/*
|
||||
* One lock_prof for each (file, line, lock object) triple.
|
||||
*/
|
||||
struct lock_prof {
|
||||
SLIST_ENTRY(lock_prof) link;
|
||||
const char *file;
|
||||
const char *name;
|
||||
int line;
|
||||
int ticks;
|
||||
const char *type;
|
||||
uintmax_t cnt_max;
|
||||
uintmax_t cnt_tot;
|
||||
uintmax_t cnt_wait;
|
||||
uintmax_t cnt_cur;
|
||||
uintmax_t cnt_contest_locking;
|
||||
};
|
||||
|
||||
SLIST_HEAD(lphead, lock_prof);
|
||||
|
||||
#define LPROF_HASH_SIZE 4096
|
||||
#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1)
|
||||
#define LPROF_CACHE_SIZE 4096
|
||||
|
||||
/*
|
||||
* Array of objects and profs for each type of object for each cpu. Spinlocks
|
||||
* are handled seperately because a thread may be preempted and acquire a
|
||||
* spinlock while in the lock profiling code of a non-spinlock. In this way
|
||||
* we only need a critical section to protect the per-cpu lists.
|
||||
*/
|
||||
struct lock_prof_type {
|
||||
struct lphead lpt_lpalloc;
|
||||
struct lpohead lpt_lpoalloc;
|
||||
struct lphead lpt_hash[LPROF_HASH_SIZE];
|
||||
struct lock_prof lpt_prof[LPROF_CACHE_SIZE];
|
||||
struct lock_profile_object lpt_objs[LPROF_CACHE_SIZE];
|
||||
};
|
||||
|
||||
struct lock_prof_cpu {
|
||||
struct lock_prof_type lpc_types[2]; /* One for spin one for other. */
|
||||
};
|
||||
|
||||
struct lock_prof_cpu *lp_cpu[MAXCPU];
|
||||
|
||||
int lock_prof_enable = 0;
|
||||
|
||||
/* SWAG: sbuf size = avg stat. line size * number of locks */
|
||||
#define LPROF_SBUF_SIZE 256 * 400
|
||||
|
||||
static int lock_prof_rejected;
|
||||
static int lock_prof_skipspin;
|
||||
static int lock_prof_skipcount;
|
||||
|
||||
#ifndef USE_CPU_NANOSECONDS
|
||||
u_int64_t
|
||||
nanoseconds(void)
|
||||
{
|
||||
struct bintime bt;
|
||||
u_int64_t ns;
|
||||
|
||||
binuptime(&bt);
|
||||
/* From bintime2timespec */
|
||||
ns = bt.sec * (u_int64_t)1000000000;
|
||||
ns += ((uint64_t)1000000000 * (uint32_t)(bt.frac >> 32)) >> 32;
|
||||
return (ns);
|
||||
}
|
||||
#endif
|
||||
|
||||
static void
|
||||
lock_prof_init_type(struct lock_prof_type *type)
|
||||
{
|
||||
int i;
|
||||
|
||||
SLIST_INIT(&type->lpt_lpalloc);
|
||||
LIST_INIT(&type->lpt_lpoalloc);
|
||||
for (i = 0; i < LPROF_CACHE_SIZE; i++) {
|
||||
SLIST_INSERT_HEAD(&type->lpt_lpalloc, &type->lpt_prof[i],
|
||||
link);
|
||||
LIST_INSERT_HEAD(&type->lpt_lpoalloc, &type->lpt_objs[i],
|
||||
lpo_link);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
lock_prof_init(void *arg)
|
||||
{
|
||||
int cpu;
|
||||
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
lp_cpu[cpu] = malloc(sizeof(*lp_cpu[cpu]), M_DEVBUF,
|
||||
M_WAITOK | M_ZERO);
|
||||
lock_prof_init_type(&lp_cpu[cpu]->lpc_types[0]);
|
||||
lock_prof_init_type(&lp_cpu[cpu]->lpc_types[1]);
|
||||
}
|
||||
}
|
||||
SYSINIT(lockprof, SI_SUB_SMP, SI_ORDER_ANY, lock_prof_init, NULL);
|
||||
|
||||
static void
|
||||
lock_prof_reset(void)
|
||||
{
|
||||
struct lock_prof_cpu *lpc;
|
||||
int enabled, i, cpu;
|
||||
|
||||
enabled = lock_prof_enable;
|
||||
lock_prof_enable = 0;
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
lpc = lp_cpu[cpu];
|
||||
for (i = 0; i < LPROF_CACHE_SIZE; i++) {
|
||||
LIST_REMOVE(&lpc->lpc_types[0].lpt_objs[i], lpo_link);
|
||||
LIST_REMOVE(&lpc->lpc_types[1].lpt_objs[i], lpo_link);
|
||||
}
|
||||
bzero(lpc, sizeof(*lpc));
|
||||
lock_prof_init_type(&lpc->lpc_types[0]);
|
||||
lock_prof_init_type(&lpc->lpc_types[1]);
|
||||
}
|
||||
lock_prof_enable = enabled;
|
||||
}
|
||||
|
||||
static void
|
||||
lock_prof_output(struct lock_prof *lp, struct sbuf *sb)
|
||||
{
|
||||
const char *p;
|
||||
|
||||
for (p = lp->file; p != NULL && strncmp(p, "../", 3) == 0; p += 3);
|
||||
sbuf_printf(sb,
|
||||
"%6ju %12ju %12ju %11ju %5ju %5ju %12ju %12ju %s:%d (%s:%s)\n",
|
||||
lp->cnt_max / 1000, lp->cnt_tot / 1000,
|
||||
lp->cnt_wait / 1000, lp->cnt_cur,
|
||||
lp->cnt_cur == 0 ? (uintmax_t)0 :
|
||||
lp->cnt_tot / (lp->cnt_cur * 1000),
|
||||
lp->cnt_cur == 0 ? (uintmax_t)0 :
|
||||
lp->cnt_wait / (lp->cnt_cur * 1000),
|
||||
(uintmax_t)0, lp->cnt_contest_locking,
|
||||
p, lp->line, lp->type, lp->name);
|
||||
}
|
||||
|
||||
static void
|
||||
lock_prof_sum(struct lock_prof *match, struct lock_prof *dst, int hash,
|
||||
int spin, int t)
|
||||
{
|
||||
struct lock_prof_type *type;
|
||||
struct lock_prof *l;
|
||||
int cpu;
|
||||
|
||||
dst->file = match->file;
|
||||
dst->line = match->line;
|
||||
dst->type = match->type;
|
||||
dst->name = match->name;
|
||||
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (lp_cpu[cpu] == NULL)
|
||||
continue;
|
||||
type = &lp_cpu[cpu]->lpc_types[spin];
|
||||
SLIST_FOREACH(l, &type->lpt_hash[hash], link) {
|
||||
if (l->ticks == t)
|
||||
continue;
|
||||
if (l->file != match->file || l->line != match->line ||
|
||||
l->name != match->name || l->type != match->type)
|
||||
continue;
|
||||
l->ticks = t;
|
||||
if (l->cnt_max > dst->cnt_max)
|
||||
dst->cnt_max = l->cnt_max;
|
||||
dst->cnt_tot += l->cnt_tot;
|
||||
dst->cnt_wait += l->cnt_wait;
|
||||
dst->cnt_cur += l->cnt_cur;
|
||||
dst->cnt_contest_locking += l->cnt_contest_locking;
|
||||
}
|
||||
}
|
||||
|
||||
if (contested)
|
||||
lo->lo_profile_obj.lpo_contest_locking++;
|
||||
|
||||
l->lpo_filename = file;
|
||||
l->lpo_lineno = line;
|
||||
}
|
||||
|
||||
static void
|
||||
lock_prof_type_stats(struct lock_prof_type *type, struct sbuf *sb, int spin,
|
||||
int t)
|
||||
{
|
||||
struct lock_prof *l;
|
||||
int i;
|
||||
|
||||
for (i = 0; i < LPROF_HASH_SIZE; ++i) {
|
||||
SLIST_FOREACH(l, &type->lpt_hash[i], link) {
|
||||
struct lock_prof lp = {};
|
||||
|
||||
if (l->ticks == t)
|
||||
continue;
|
||||
lock_prof_sum(l, &lp, i, spin, t);
|
||||
lock_prof_output(&lp, sb);
|
||||
if (sbuf_overflowed(sb))
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
dump_lock_prof_stats(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
static int multiplier = 1;
|
||||
struct sbuf *sb;
|
||||
int error, cpu, t;
|
||||
|
||||
retry_sbufops:
|
||||
sb = sbuf_new(NULL, NULL, LPROF_SBUF_SIZE * multiplier, SBUF_FIXEDLEN);
|
||||
sbuf_printf(sb, "\n%6s %12s %12s %11s %5s %5s %12s %12s %s\n",
|
||||
"max", "total", "wait_total", "count", "avg", "wait_avg", "cnt_hold", "cnt_lock", "name");
|
||||
t = ticks;
|
||||
for (cpu = 0; cpu <= mp_maxid; cpu++) {
|
||||
if (lp_cpu[cpu] == NULL)
|
||||
continue;
|
||||
lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[0], sb, 0, t);
|
||||
lock_prof_type_stats(&lp_cpu[cpu]->lpc_types[1], sb, 1, t);
|
||||
if (sbuf_overflowed(sb)) {
|
||||
sbuf_delete(sb);
|
||||
multiplier++;
|
||||
goto retry_sbufops;
|
||||
}
|
||||
}
|
||||
|
||||
sbuf_finish(sb);
|
||||
error = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
|
||||
sbuf_delete(sb);
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
enable_lock_prof(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, v;
|
||||
|
||||
v = lock_prof_enable;
|
||||
error = sysctl_handle_int(oidp, &v, v, req);
|
||||
if (error)
|
||||
return (error);
|
||||
if (req->newptr == NULL)
|
||||
return (error);
|
||||
if (v == lock_prof_enable)
|
||||
return (0);
|
||||
if (v == 1)
|
||||
lock_prof_reset();
|
||||
lock_prof_enable = !!v;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
reset_lock_prof_stats(SYSCTL_HANDLER_ARGS)
|
||||
{
|
||||
int error, v;
|
||||
|
||||
v = 0;
|
||||
error = sysctl_handle_int(oidp, &v, 0, req);
|
||||
if (error)
|
||||
return (error);
|
||||
if (req->newptr == NULL)
|
||||
return (error);
|
||||
if (v == 0)
|
||||
return (0);
|
||||
lock_prof_reset();
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static struct lock_prof *
|
||||
lock_profile_lookup(struct lock_object *lo, int spin, const char *file,
|
||||
int line)
|
||||
{
|
||||
const char *unknown = "(unknown)";
|
||||
struct lock_prof_type *type;
|
||||
struct lock_prof *lp;
|
||||
struct lphead *head;
|
||||
const char *p;
|
||||
u_int hash;
|
||||
|
||||
p = file;
|
||||
if (p == NULL || *p == '\0')
|
||||
p = unknown;
|
||||
hash = (uintptr_t)lo->lo_name * 31 + (uintptr_t)p * 31 + line;
|
||||
hash &= LPROF_HASH_MASK;
|
||||
type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
|
||||
head = &type->lpt_hash[hash];
|
||||
SLIST_FOREACH(lp, head, link) {
|
||||
if (lp->line == line && lp->file == p &&
|
||||
lp->name == lo->lo_name)
|
||||
return (lp);
|
||||
|
||||
}
|
||||
lp = SLIST_FIRST(&type->lpt_lpalloc);
|
||||
if (lp == NULL) {
|
||||
lock_prof_rejected++;
|
||||
return (lp);
|
||||
}
|
||||
SLIST_REMOVE_HEAD(&type->lpt_lpalloc, link);
|
||||
lp->file = p;
|
||||
lp->line = line;
|
||||
lp->type = lo->lo_type;
|
||||
lp->name = lo->lo_name;
|
||||
SLIST_INSERT_HEAD(&type->lpt_hash[hash], lp, link);
|
||||
return (lp);
|
||||
}
|
||||
|
||||
static struct lock_profile_object *
|
||||
lock_profile_object_lookup(struct lock_object *lo, int spin, const char *file,
|
||||
int line)
|
||||
{
|
||||
struct lock_profile_object *l;
|
||||
struct lock_prof_type *type;
|
||||
struct lpohead *head;
|
||||
|
||||
head = &curthread->td_lprof[spin];
|
||||
LIST_FOREACH(l, head, lpo_link)
|
||||
if (l->lpo_obj == lo && l->lpo_file == file &&
|
||||
l->lpo_line == line)
|
||||
return (l);
|
||||
critical_enter();
|
||||
type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
|
||||
l = LIST_FIRST(&type->lpt_lpoalloc);
|
||||
if (l == NULL) {
|
||||
lock_prof_rejected++;
|
||||
critical_exit();
|
||||
return (NULL);
|
||||
}
|
||||
LIST_REMOVE(l, lpo_link);
|
||||
critical_exit();
|
||||
l->lpo_obj = lo;
|
||||
l->lpo_file = file;
|
||||
l->lpo_line = line;
|
||||
l->lpo_cnt = 0;
|
||||
LIST_INSERT_HEAD(head, l, lpo_link);
|
||||
|
||||
return (l);
|
||||
}
|
||||
|
||||
void
|
||||
lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
|
||||
uint64_t waittime, const char *file, int line)
|
||||
{
|
||||
static int lock_prof_count;
|
||||
struct lock_profile_object *l;
|
||||
int spin;
|
||||
|
||||
/* don't reset the timer when/if recursing */
|
||||
if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
|
||||
return;
|
||||
if (lock_prof_skipcount &&
|
||||
(++lock_prof_count % lock_prof_skipcount) == 0)
|
||||
return;
|
||||
spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
|
||||
if (spin && lock_prof_skipspin == 1)
|
||||
return;
|
||||
l = lock_profile_object_lookup(lo, spin, file, line);
|
||||
if (l == NULL)
|
||||
return;
|
||||
l->lpo_cnt++;
|
||||
if (++l->lpo_ref > 1)
|
||||
return;
|
||||
l->lpo_contest_locking = contested;
|
||||
l->lpo_acqtime = nanoseconds();
|
||||
if (waittime && (l->lpo_acqtime > waittime))
|
||||
l->lpo_waittime = l->lpo_acqtime - waittime;
|
||||
@ -271,87 +505,65 @@ void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, ui
|
||||
l->lpo_waittime = 0;
|
||||
}
|
||||
|
||||
void _lock_profile_release_lock(struct lock_object *lo)
|
||||
void
|
||||
lock_profile_release_lock(struct lock_object *lo)
|
||||
{
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
struct lock_profile_object *l;
|
||||
struct lock_prof_type *type;
|
||||
struct lock_prof *lp;
|
||||
u_int64_t holdtime;
|
||||
struct lpohead *head;
|
||||
int spin;
|
||||
|
||||
if (l->lpo_acqtime) {
|
||||
const char *unknown = "(unknown)";
|
||||
u_int64_t acqtime, now, waittime;
|
||||
struct lock_prof *mpp;
|
||||
u_int hash;
|
||||
const char *p = l->lpo_filename;
|
||||
int collision = 0;
|
||||
|
||||
now = nanoseconds();
|
||||
acqtime = l->lpo_acqtime;
|
||||
waittime = l->lpo_waittime;
|
||||
if (now <= acqtime)
|
||||
return;
|
||||
if (p == NULL || *p == '\0')
|
||||
p = unknown;
|
||||
hash = (l->lpo_namehash * 31 * 31 + (uintptr_t)p * 31 + l->lpo_lineno) & LPROF_HASH_MASK;
|
||||
mpp = &lprof_buf[hash];
|
||||
while (mpp->name != NULL) {
|
||||
if (mpp->line == l->lpo_lineno &&
|
||||
mpp->file == p &&
|
||||
mpp->namehash == l->lpo_namehash)
|
||||
break;
|
||||
/* If the lprof_hash entry is allocated to someone
|
||||
* else, try the next one
|
||||
*/
|
||||
collision = 1;
|
||||
hash = (hash + 1) & LPROF_HASH_MASK;
|
||||
mpp = &lprof_buf[hash];
|
||||
}
|
||||
if (mpp->name == NULL) {
|
||||
int buf;
|
||||
|
||||
buf = atomic_fetchadd_int(&allocated_lprof_buf, 1);
|
||||
/* Just exit if we cannot get a trace buffer */
|
||||
if (buf >= LPROF_HASH_SIZE) {
|
||||
++lock_prof_rejected;
|
||||
return;
|
||||
}
|
||||
mpp->file = p;
|
||||
mpp->line = l->lpo_lineno;
|
||||
mpp->namehash = l->lpo_namehash;
|
||||
mpp->type = l->lpo_type;
|
||||
mpp->name = lo->lo_name;
|
||||
|
||||
if (collision)
|
||||
++lock_prof_collisions;
|
||||
|
||||
/*
|
||||
* We might have raced someone else but who cares,
|
||||
* they'll try again next time
|
||||
*/
|
||||
++lock_prof_records;
|
||||
}
|
||||
LPROF_LOCK(hash);
|
||||
/*
|
||||
* Record if the lock has been held longer now than ever
|
||||
* before.
|
||||
*/
|
||||
if (now - acqtime > mpp->cnt_max)
|
||||
mpp->cnt_max = now - acqtime;
|
||||
mpp->cnt_tot += now - acqtime;
|
||||
mpp->cnt_wait += waittime;
|
||||
mpp->cnt_cur++;
|
||||
/*
|
||||
* There's a small race, really we should cmpxchg
|
||||
* 0 with the current value, but that would bill
|
||||
* the contention to the wrong lock instance if
|
||||
* it followed this also.
|
||||
*/
|
||||
mpp->cnt_contest_holding += l->lpo_contest_holding;
|
||||
mpp->cnt_contest_locking += l->lpo_contest_locking;
|
||||
LPROF_UNLOCK(hash);
|
||||
|
||||
}
|
||||
l->lpo_acqtime = 0;
|
||||
l->lpo_waittime = 0;
|
||||
l->lpo_contest_locking = 0;
|
||||
l->lpo_contest_holding = 0;
|
||||
if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE))
|
||||
return;
|
||||
spin = LOCK_CLASS(lo) == &lock_class_mtx_spin;
|
||||
head = &curthread->td_lprof[spin];
|
||||
critical_enter();
|
||||
LIST_FOREACH(l, head, lpo_link)
|
||||
if (l->lpo_obj == lo)
|
||||
break;
|
||||
if (l == NULL)
|
||||
goto out;
|
||||
if (--l->lpo_ref > 0)
|
||||
goto out;
|
||||
lp = lock_profile_lookup(lo, spin, l->lpo_file, l->lpo_line);
|
||||
if (lp == NULL)
|
||||
goto release;
|
||||
holdtime = nanoseconds() - l->lpo_acqtime;
|
||||
if (holdtime < 0)
|
||||
goto release;
|
||||
/*
|
||||
* Record if the lock has been held longer now than ever
|
||||
* before.
|
||||
*/
|
||||
if (holdtime > lp->cnt_max)
|
||||
lp->cnt_max = holdtime;
|
||||
lp->cnt_tot += holdtime;
|
||||
lp->cnt_wait += l->lpo_waittime;
|
||||
lp->cnt_contest_locking += l->lpo_contest_locking;
|
||||
lp->cnt_cur += l->lpo_cnt;
|
||||
release:
|
||||
LIST_REMOVE(l, lpo_link);
|
||||
type = &lp_cpu[PCPU_GET(cpuid)]->lpc_types[spin];
|
||||
LIST_INSERT_HEAD(&type->lpt_lpoalloc, l, lpo_link);
|
||||
out:
|
||||
critical_exit();
|
||||
}
|
||||
|
||||
SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging");
|
||||
SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling");
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW,
|
||||
&lock_prof_skipspin, 0, "Skip profiling on spinlocks.");
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipcount, CTLFLAG_RW,
|
||||
&lock_prof_skipcount, 0, "Sample approximately every N lock acquisitions.");
|
||||
SYSCTL_INT(_debug_lock_prof, OID_AUTO, rejected, CTLFLAG_RD,
|
||||
&lock_prof_rejected, 0, "Number of rejected profiling records");
|
||||
SYSCTL_PROC(_debug_lock_prof, OID_AUTO, stats, CTLTYPE_STRING | CTLFLAG_RD,
|
||||
NULL, 0, dump_lock_prof_stats, "A", "Lock profiling statistics");
|
||||
SYSCTL_PROC(_debug_lock_prof, OID_AUTO, reset, CTLTYPE_INT | CTLFLAG_RW,
|
||||
NULL, 0, reset_lock_prof_stats, "I", "Reset lock profiling statistics");
|
||||
SYSCTL_PROC(_debug_lock_prof, OID_AUTO, enable, CTLTYPE_INT | CTLFLAG_RW,
|
||||
NULL, 0, enable_lock_prof, "I", "Enable lock profiling");
|
||||
|
||||
#endif
|
||||
|
@ -31,35 +31,10 @@
|
||||
#ifndef _SYS__LOCK_H_
|
||||
#define _SYS__LOCK_H_
|
||||
|
||||
struct lock_profile_object {
|
||||
/*
|
||||
* This does not result in variant structure sizes because
|
||||
* MUTEX_PROFILING is in opt_global.h
|
||||
*/
|
||||
u_int64_t lpo_acqtime;
|
||||
u_int64_t lpo_waittime;
|
||||
const char *lpo_filename;
|
||||
u_int lpo_namehash;
|
||||
int lpo_lineno;
|
||||
const char *lpo_type;
|
||||
/*
|
||||
* Fields relating to measuring contention on mutexes.
|
||||
* holding must be accessed atomically since it's
|
||||
* modified by threads that don't yet hold the mutex.
|
||||
* locking is only modified and referenced while
|
||||
* the mutex is held.
|
||||
*/
|
||||
u_int lpo_contest_holding;
|
||||
u_int lpo_contest_locking;
|
||||
};
|
||||
|
||||
struct lock_object {
|
||||
const char *lo_name; /* Individual lock name. */
|
||||
const char *lo_type; /* General lock type. */
|
||||
u_int lo_flags;
|
||||
#ifdef LOCK_PROFILING
|
||||
struct lock_profile_object lo_profile_obj;
|
||||
#endif
|
||||
union { /* Data for witness. */
|
||||
STAILQ_ENTRY(lock_object) lod_list;
|
||||
struct witness *lod_witness;
|
||||
|
@ -31,139 +31,54 @@
|
||||
#ifndef _SYS_LOCK_PROFILE_H_
|
||||
#define _SYS_LOCK_PROFILE_H_
|
||||
|
||||
#ifdef LOCK_PROFILING
|
||||
#include <sys/stdint.h>
|
||||
#include <sys/ktr.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <machine/atomic.h>
|
||||
#include <machine/cpufunc.h>
|
||||
#ifdef _KERNEL
|
||||
|
||||
#ifndef LPROF_HASH_SIZE
|
||||
#define LPROF_HASH_SIZE 4096
|
||||
#define LPROF_HASH_MASK (LPROF_HASH_SIZE - 1)
|
||||
#endif
|
||||
struct lock_profile_object;
|
||||
LIST_HEAD(lpohead, lock_profile_object);
|
||||
|
||||
#ifdef LOCK_PROFILING
|
||||
#include <sys/lock.h>
|
||||
|
||||
#ifndef USE_CPU_NANOSECONDS
|
||||
u_int64_t nanoseconds(void);
|
||||
#endif
|
||||
|
||||
struct lock_prof {
|
||||
const char *name;
|
||||
const char *type;
|
||||
const char *file;
|
||||
u_int namehash;
|
||||
int line;
|
||||
uintmax_t cnt_max;
|
||||
uintmax_t cnt_tot;
|
||||
uintmax_t cnt_wait;
|
||||
uintmax_t cnt_cur;
|
||||
uintmax_t cnt_contest_holding;
|
||||
uintmax_t cnt_contest_locking;
|
||||
};
|
||||
|
||||
extern struct lock_prof lprof_buf[LPROF_HASH_SIZE];
|
||||
#define LPROF_SBUF_SIZE 256 * 400
|
||||
|
||||
/* We keep a smaller pool of spin mutexes for protecting the lprof hash entries */
|
||||
#define LPROF_LOCK_SIZE 16
|
||||
#define LPROF_LOCK_MASK (LPROF_LOCK_SIZE - 1)
|
||||
#define LPROF_LHASH(hash) ((hash) & LPROF_LOCK_MASK)
|
||||
|
||||
#define LPROF_LOCK(hash) mtx_lock_spin(&lprof_locks[LPROF_LHASH(hash)])
|
||||
#define LPROF_UNLOCK(hash) mtx_unlock_spin(&lprof_locks[LPROF_LHASH(hash)])
|
||||
|
||||
#ifdef _KERNEL
|
||||
extern struct mtx lprof_locks[LPROF_LOCK_SIZE];
|
||||
extern int lock_prof_enable;
|
||||
|
||||
void _lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line);
|
||||
void _lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart);
|
||||
void _lock_profile_release_lock(struct lock_object *lo);
|
||||
void lock_profile_obtain_lock_success(struct lock_object *lo, int contested,
|
||||
uint64_t waittime, const char *file, int line);
|
||||
void lock_profile_release_lock(struct lock_object *lo);
|
||||
|
||||
static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {
|
||||
const char *p;
|
||||
u_int hash = 0;
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
|
||||
l->lpo_acqtime = 0;
|
||||
l->lpo_waittime = 0;
|
||||
l->lpo_filename = NULL;
|
||||
l->lpo_lineno = 0;
|
||||
l->lpo_contest_holding = 0;
|
||||
l->lpo_contest_locking = 0;
|
||||
l->lpo_type = class->lc_name;
|
||||
|
||||
/* Hash the mutex name to an int so we don't have to strcmp() it repeatedly */
|
||||
for (p = name; *p != '\0'; p++)
|
||||
hash = 31 * hash + *p;
|
||||
l->lpo_namehash = hash;
|
||||
#if 0
|
||||
if (opts & MTX_PROFILE)
|
||||
l->lpo_stack = stack_create();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
lock_profile_object_destroy(struct lock_object *lo)
|
||||
static inline void
|
||||
lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
|
||||
uint64_t *waittime)
|
||||
{
|
||||
#if 0
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
if (lo->lo_flags & LO_PROFILE)
|
||||
stack_destroy(l->lpo_stack);
|
||||
#endif
|
||||
if (!lock_prof_enable || (lo->lo_flags & LO_NOPROFILE) || *contested)
|
||||
return;
|
||||
*waittime = nanoseconds();
|
||||
*contested = 1;
|
||||
}
|
||||
|
||||
static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested,
|
||||
uint64_t *waittime)
|
||||
{
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
|
||||
if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
|
||||
*contested == 0) {
|
||||
*waittime = nanoseconds();
|
||||
atomic_add_int(&l->lpo_contest_holding, 1);
|
||||
*contested = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime, const char *file, int line)
|
||||
{
|
||||
|
||||
/* don't reset the timer when/if recursing */
|
||||
if (!(lo->lo_flags & LO_NOPROFILE) && lock_prof_enable &&
|
||||
lo->lo_profile_obj.lpo_acqtime == 0) {
|
||||
#ifdef LOCK_PROFILING_FAST
|
||||
if (contested == 0)
|
||||
return;
|
||||
#endif
|
||||
_lock_profile_obtain_lock_success(lo, contested, waittime, file, line);
|
||||
}
|
||||
}
|
||||
static inline void lock_profile_release_lock(struct lock_object *lo)
|
||||
{
|
||||
struct lock_profile_object *l = &lo->lo_profile_obj;
|
||||
|
||||
if (!(lo->lo_flags & LO_NOPROFILE) && l->lpo_acqtime)
|
||||
_lock_profile_release_lock(lo);
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#else /* !LOCK_PROFILING */
|
||||
|
||||
#ifdef _KERNEL
|
||||
static inline void lock_profile_update_wait(struct lock_object *lo, uint64_t waitstart) {;}
|
||||
static inline void lock_profile_update_contest_locking(struct lock_object *lo, int contested) {;}
|
||||
static inline void lock_profile_release_lock(struct lock_object *lo) {;}
|
||||
static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime) {;}
|
||||
static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,
|
||||
const char *file, int line) {;}
|
||||
static inline void lock_profile_object_destroy(struct lock_object *lo) {;}
|
||||
static inline void lock_profile_object_init(struct lock_object *lo, struct lock_class *class, const char *name) {;}
|
||||
static inline void
|
||||
lock_profile_release_lock(struct lock_object *lo)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* _KERNEL */
|
||||
static inline void
|
||||
lock_profile_obtain_lock_failed(struct lock_object *lo, int *contested, uint64_t *waittime)
|
||||
{
|
||||
}
|
||||
|
||||
static inline void
|
||||
lock_profile_obtain_lock_success(struct lock_object *lo, int contested, uint64_t waittime,
|
||||
const char *file, int line)
|
||||
{
|
||||
}
|
||||
|
||||
#endif /* !LOCK_PROFILING */
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* _SYS_LOCK_PROFILE_H_ */
|
||||
|
@ -45,6 +45,7 @@
|
||||
#endif
|
||||
#include <sys/queue.h>
|
||||
#include <sys/_lock.h>
|
||||
#include <sys/lock_profile.h>
|
||||
#include <sys/_mutex.h>
|
||||
#include <sys/priority.h>
|
||||
#include <sys/rtprio.h> /* XXX. */
|
||||
@ -298,6 +299,7 @@ struct thread {
|
||||
struct td_sched *td_sched; /* (*) Scheduler-specific data. */
|
||||
struct kaudit_record *td_ar; /* (k) Active audit record, if any. */
|
||||
int td_syscalls; /* per-thread syscall count (used by NFS :)) */
|
||||
struct lpohead td_lprof[2]; /* (a) lock profiling objects. */
|
||||
};
|
||||
|
||||
struct mtx *thread_lock_block(struct thread *);
|
||||
|
@ -178,11 +178,9 @@ __sx_slock(struct sx *sx, int opts, const char *file, int line)
|
||||
if (!(x & SX_LOCK_SHARED) ||
|
||||
!atomic_cmpset_acq_ptr(&sx->sx_lock, x, x + SX_ONE_SHARER))
|
||||
error = _sx_slock_hard(sx, opts, file, line);
|
||||
#ifdef LOCK_PROFILING_SHARED
|
||||
else if (SX_SHARERS(x) == 0)
|
||||
else
|
||||
lock_profile_obtain_lock_success(&sx->lock_object, 0, 0, file,
|
||||
line);
|
||||
#endif
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user