locks: slightly depessimize lockstat

The slow path is always taken when lockstat is enabled. This induces
rdtsc (or other) calls to get the cycle count even when there was no
contention.

Still go to the slow path to not mess with the fast path, but avoid
the heavy lifting unless necessary.

This reduces sys and real time during -j 80 buildkernel:
before: 3651.84s user 1105.59s system 5394% cpu 1:28.18 total
after: 3685.99s user 975.74s system 5450% cpu 1:25.53 total
disabled: 3697.96s user 411.13s system 5261% cpu 1:18.10 total

So note this is still a significant hit.

LOCK_PROFILING results are not affected.
This commit is contained in:
Mateusz Guzik 2018-03-17 19:26:33 +00:00
parent 3cec5c77d6
commit 09bdec20a0
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=331109
3 changed files with 108 additions and 68 deletions

View File

@ -491,8 +491,25 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
int doing_lockprof;
#endif
td = curthread;
tid = (uintptr_t)td;
m = mtxlock2mtx(c);
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) {
while (v == MTX_UNOWNED) {
if (_mtx_obtain_lock_fetch(m, &v, tid))
goto out_lockstat;
}
doing_lockprof = 1;
all_time -= lockstat_nsecs(&m->lock_object);
}
#endif
#ifdef LOCK_PROFILING
doing_lockprof = 1;
#endif
if (SCHEDULER_STOPPED_TD(td))
return;
@ -501,7 +518,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
#elif defined(KDTRACE_HOOKS)
lock_delay_arg_init(&lda, NULL);
#endif
m = mtxlock2mtx(c);
if (__predict_false(v == MTX_UNOWNED))
v = MTX_READ_VALUE(m);
@ -532,13 +549,6 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
CTR4(KTR_LOCK,
"_mtx_lock_sleep: %s contested (lock=%p) at %s:%d",
m->lock_object.lo_name, (void *)m->mtx_lock, file, line);
#ifdef LOCK_PROFILING
doing_lockprof = 1;
#elif defined(KDTRACE_HOOKS)
doing_lockprof = lockstat_enabled;
if (__predict_false(doing_lockprof))
all_time -= lockstat_nsecs(&m->lock_object);
#endif
for (;;) {
if (v == MTX_UNOWNED) {
@ -660,10 +670,6 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
#endif
#ifdef KDTRACE_HOOKS
all_time += lockstat_nsecs(&m->lock_object);
#endif
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
waittime, file, line);
#ifdef KDTRACE_HOOKS
if (sleep_time)
LOCKSTAT_RECORD1(adaptive__block, m, sleep_time);
@ -672,7 +678,10 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
*/
if (lda.spin_cnt > sleep_cnt)
LOCKSTAT_RECORD1(adaptive__spin, m, all_time - sleep_time);
out_lockstat:
#endif
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(adaptive__acquire, m, contested,
waittime, file, line);
}
#ifdef SMP
@ -708,6 +717,20 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
tid = (uintptr_t)curthread;
m = mtxlock2mtx(c);
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(adaptive__acquire)) {
while (v == MTX_UNOWNED) {
if (_mtx_obtain_lock_fetch(m, &v, tid))
goto out_lockstat;
}
doing_lockprof = 1;
spin_time -= lockstat_nsecs(&m->lock_object);
}
#endif
#ifdef LOCK_PROFILING
doing_lockprof = 1;
#endif
if (__predict_false(v == MTX_UNOWNED))
v = MTX_READ_VALUE(m);
@ -730,13 +753,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
PMC_SOFT_CALL( , , lock, failed);
#endif
lock_profile_obtain_lock_failed(&m->lock_object, &contested, &waittime);
#ifdef LOCK_PROFILING
doing_lockprof = 1;
#elif defined(KDTRACE_HOOKS)
doing_lockprof = lockstat_enabled;
if (__predict_false(doing_lockprof))
spin_time -= lockstat_nsecs(&m->lock_object);
#endif
for (;;) {
if (v == MTX_UNOWNED) {
if (_mtx_obtain_lock_fetch(m, &v, tid))
@ -767,13 +784,12 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
#endif
#ifdef KDTRACE_HOOKS
spin_time += lockstat_nsecs(&m->lock_object);
if (lda.spin_cnt != 0)
LOCKSTAT_RECORD1(spin__spin, m, spin_time);
out_lockstat:
#endif
LOCKSTAT_PROFILE_OBTAIN_LOCK_SUCCESS(spin__acquire, m,
contested, waittime, file, line);
#ifdef KDTRACE_HOOKS
if (lda.spin_cnt != 0)
LOCKSTAT_RECORD1(spin__spin, m, spin_time);
#endif
}
#endif /* SMP */

View File

@ -438,7 +438,21 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
#endif
#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
uintptr_t state;
int doing_lockprof;
int doing_lockprof = 0;
#endif
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
goto out_lockstat;
doing_lockprof = 1;
all_time -= lockstat_nsecs(&rw->lock_object);
state = v;
}
#endif
#ifdef LOCK_PROFILING
doing_lockprof = 1;
state = v;
#endif
if (SCHEDULER_STOPPED())
@ -456,17 +470,6 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
lock_profile_obtain_lock_failed(&rw->lock_object,
&contested, &waittime);
#ifdef LOCK_PROFILING
doing_lockprof = 1;
state = v;
#elif defined(KDTRACE_HOOKS)
doing_lockprof = lockstat_enabled;
if (__predict_false(doing_lockprof)) {
all_time -= lockstat_nsecs(&rw->lock_object);
state = v;
}
#endif
for (;;) {
if (__rw_rlock_try(rw, td, &v, false LOCK_FILE_LINE_ARG))
break;
@ -615,6 +618,7 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_READER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
out_lockstat:
#endif
/*
* TODO: acquire "owner of record" here. Here be turnstile dragons
@ -892,10 +896,28 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
#endif
#if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
uintptr_t state;
int doing_lockprof;
int doing_lockprof = 0;
#endif
tid = (uintptr_t)curthread;
rw = rwlock2rw(c);
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(rw__acquire)) {
while (v == RW_UNLOCKED) {
if (_rw_write_lock_fetch(rw, &v, tid))
goto out_lockstat;
}
doing_lockprof = 1;
all_time -= lockstat_nsecs(&rw->lock_object);
state = v;
}
#endif
#ifdef LOCK_PROFILING
doing_lockprof = 1;
state = v;
#endif
if (SCHEDULER_STOPPED())
return;
@ -904,7 +926,6 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
#elif defined(KDTRACE_HOOKS)
lock_delay_arg_init(&lda, NULL);
#endif
rw = rwlock2rw(c);
if (__predict_false(v == RW_UNLOCKED))
v = RW_READ_VALUE(rw);
@ -929,17 +950,6 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
lock_profile_obtain_lock_failed(&rw->lock_object,
&contested, &waittime);
#ifdef LOCK_PROFILING
doing_lockprof = 1;
state = v;
#elif defined(KDTRACE_HOOKS)
doing_lockprof = lockstat_enabled;
if (__predict_false(doing_lockprof)) {
all_time -= lockstat_nsecs(&rw->lock_object);
state = v;
}
#endif
for (;;) {
if (v == RW_UNLOCKED) {
if (_rw_write_lock_fetch(rw, &v, tid))
@ -1101,6 +1111,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
LOCKSTAT_RECORD4(rw__spin, rw, all_time - sleep_time,
LOCKSTAT_WRITER, (state & RW_LOCK_READ) == 0,
(state & RW_LOCK_READ) == 0 ? 0 : RW_READERS(state));
out_lockstat:
#endif
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(rw__acquire, rw, contested,
waittime, file, line, LOCKSTAT_WRITER);

View File

@ -565,6 +565,23 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
int extra_work = 0;
tid = (uintptr_t)curthread;
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
while (x == SX_LOCK_UNLOCKED) {
if (atomic_fcmpset_acq_ptr(&sx->sx_lock, &x, tid))
goto out_lockstat;
}
extra_work = 1;
all_time -= lockstat_nsecs(&sx->lock_object);
state = x;
}
#endif
#ifdef LOCK_PROFILING
extra_work = 1;
state = x;
#endif
if (SCHEDULER_STOPPED())
return (0);
@ -603,16 +620,6 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
&waittime);
#ifdef LOCK_PROFILING
extra_work = 1;
state = x;
#elif defined(KDTRACE_HOOKS)
extra_work = lockstat_enabled;
if (__predict_false(extra_work)) {
all_time -= lockstat_nsecs(&sx->lock_object);
state = x;
}
#endif
#ifndef INVARIANTS
GIANT_SAVE(extra_work);
#endif
@ -800,6 +807,7 @@ _sx_xlock_hard(struct sx *sx, uintptr_t x, int opts LOCK_FILE_LINE_ARG_DEF)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_WRITER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
out_lockstat:
#endif
if (!error)
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,
@ -932,6 +940,20 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
#endif
int extra_work = 0;
#ifdef KDTRACE_HOOKS
if (LOCKSTAT_PROFILE_ENABLED(sx__acquire)) {
if (__sx_slock_try(sx, &x LOCK_FILE_LINE_ARG))
goto out_lockstat;
extra_work = 1;
all_time -= lockstat_nsecs(&sx->lock_object);
state = x;
}
#endif
#ifdef LOCK_PROFILING
extra_work = 1;
state = x;
#endif
if (SCHEDULER_STOPPED())
return (0);
@ -951,16 +973,6 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
lock_profile_obtain_lock_failed(&sx->lock_object, &contested,
&waittime);
#ifdef LOCK_PROFILING
extra_work = 1;
state = x;
#elif defined(KDTRACE_HOOKS)
extra_work = lockstat_enabled;
if (__predict_false(extra_work)) {
all_time -= lockstat_nsecs(&sx->lock_object);
state = x;
}
#endif
#ifndef INVARIANTS
GIANT_SAVE(extra_work);
#endif
@ -1102,6 +1114,7 @@ _sx_slock_hard(struct sx *sx, int opts, uintptr_t x LOCK_FILE_LINE_ARG_DEF)
LOCKSTAT_RECORD4(sx__spin, sx, all_time - sleep_time,
LOCKSTAT_READER, (state & SX_LOCK_SHARED) == 0,
(state & SX_LOCK_SHARED) == 0 ? 0 : SX_SHARERS(state));
out_lockstat:
#endif
if (error == 0) {
LOCKSTAT_PROFILE_OBTAIN_RWLOCK_SUCCESS(sx__acquire, sx,