Further improvements to LOCK_PROFILING:

- Fix missing initialization in kern_rwlock.c causing bogus times to be collected - Move updates to the lock hash to after the lock is released for spin mutexes, sleep mutexes, and sx locks - Add new kernel build option LOCK_PROFILE_FAST - only update lock profiling statistics when an acquisition is contended. This reduces the overhead of LOCK_PROFILING to increasing system time by 20%-25% which on "make -j8 kernel-toolchain" on a dual woodcrest is unmeasurable in terms of wall-clock time. Contrast this to enabling lock profiling without LOCK_PROFILE_FAST and I see a 5x-6x slowdown in wall-clock time.
svn path=/head/; revision=167054
2007-02-27 06:42:05 +00:00 · 2007-02-27 06:42:05 +00:00 · f183910b97 · 2020-12-20 02:59:44 +00:00
commit f183910b97
parent fc5fe41fe9
6 changed files with 46 additions and 11 deletions
--- a/sys/conf/options
+++ b/sys/conf/options
@ -548,6 +548,7 @@ MCLSHIFT		opt_global.h
 MUTEX_DEBUG		opt_global.h
 MUTEX_NOINLINE		opt_global.h
 LOCK_PROFILING		opt_global.h
+LOCK_PROFILING_FAST	opt_global.h
 MSIZE			opt_global.h
 REGRESSION		opt_global.h
 RESTARTABLE_PANICS	opt_global.h
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@ -158,6 +158,8 @@ void
 _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
 {

+	struct lock_object lo;
+	
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock() of destroyed mutex @ %s:%d", file, line));
@ -169,9 +171,12 @@ _mtx_unlock_flags(struct mtx *m, int opts, const char *file, int line)
 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
-
-	lock_profile_release_lock(&m->mtx_object);
+#ifdef LOCK_PROFILING
+	memcpy(&lo, &m->mtx_object, sizeof(lo));
+	m->mtx_object.lo_flags &= ~LO_CONTESTED;
+#endif
 	_rel_sleep_lock(m, curthread, opts, file, line);
+	lock_profile_release_lock(&lo);
 }

 void
@ -196,6 +201,8 @@ void
 _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
 {

+	struct lock_object lo;
+	
 	MPASS(curthread != NULL);
 	KASSERT(m->mtx_lock != MTX_DESTROYED,
 	    ("mtx_unlock_spin() of destroyed mutex @ %s:%d", file, line));
@ -206,8 +213,12 @@ _mtx_unlock_spin_flags(struct mtx *m, int opts, const char *file, int line)
 	LOCK_LOG_LOCK("UNLOCK", &m->mtx_object, opts, m->mtx_recurse, file,
 	    line);
 	mtx_assert(m, MA_OWNED);
-	lock_profile_release_lock(&m->mtx_object);
+#ifdef LOCK_PROFILING
+	memcpy(&lo, &m->mtx_object, sizeof(lo));
+	m->mtx_object.lo_flags &= ~LO_CONTESTED;
+#endif
 	_rel_spin_lock(m);
+	lock_profile_release_lock(&lo);
 }

 /*
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@ -150,8 +150,8 @@ _rw_rlock(struct rwlock *rw, const char *file, int line)
 #ifdef SMP
 	volatile struct thread *owner;
 #endif
-	uint64_t waitstart;
-	int contested;
+	uint64_t waitstart = 0;
+	int contested = 0;
 	uintptr_t x;

 	KASSERT(rw_wowner(rw) != curthread,
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@ -228,7 +228,9 @@ _sx_try_xlock(struct sx *sx, const char *file, int line)
 void
 _sx_sunlock(struct sx *sx, const char *file, int line)
 {
-
+	struct lock_object lo;
+	int count = -1;
+	
 	_sx_assert(sx, SX_SLOCKED, file, line);
 	mtx_lock(sx->sx_lock);

@ -238,8 +240,13 @@ _sx_sunlock(struct sx *sx, const char *file, int line)
 	/* Release. */
 	sx->sx_cnt--;

-       if (sx->sx_cnt == 0)
-	       lock_profile_release_lock(&sx->sx_object);
+#ifdef LOCK_PROFILING
+	if (sx->sx_cnt == 0) {
+		memcpy(&lo, &sx->sx_object, sizeof(lo));
+		sx->sx_object.lo_flags &= ~LO_CONTESTED;
+		count = 0;
+	}
+#endif
 	/*
 	 * If we just released the last shared lock, wake any waiters up, giving
 	 * exclusive lockers precedence.  In order to make sure that exclusive
@ -255,12 +262,16 @@ _sx_sunlock(struct sx *sx, const char *file, int line)
 	LOCK_LOG_LOCK("SUNLOCK", &sx->sx_object, 0, 0, file, line);

 	mtx_unlock(sx->sx_lock);
+	if (count == 0)
+		lock_profile_release_lock(&lo);
+
 }

 void
 _sx_xunlock(struct sx *sx, const char *file, int line)
 {
-
+	struct lock_object lo;
+	
 	_sx_assert(sx, SX_XLOCKED, file, line);
 	mtx_lock(sx->sx_lock);
 	MPASS(sx->sx_cnt == -1);
@ -272,7 +283,10 @@ _sx_xunlock(struct sx *sx, const char *file, int line)
 	sx->sx_cnt++;
 	sx->sx_xholder = NULL;

-	lock_profile_release_lock(&sx->sx_object);
+#ifdef LOCK_PROFILING
+	memcpy(&lo, &sx->sx_object, sizeof(lo));
+	sx->sx_object.lo_flags &= ~LO_CONTESTED;
+#endif
 	/*
 	 * Wake up waiters if there are any.  Give precedence to slock waiters.
 	 */
@ -284,6 +298,7 @@ _sx_xunlock(struct sx *sx, const char *file, int line)
 	LOCK_LOG_LOCK("XUNLOCK", &sx->sx_object, 0, 0, file, line);

 	mtx_unlock(sx->sx_lock);
+	lock_profile_release_lock(&lo);
 }

 int
--- a/sys/sys/lock.h
+++ b/sys/sys/lock.h
@ -70,6 +70,7 @@ struct lock_class {
 #define	LO_ENROLLPEND	0x00800000	/* On the pending enroll list. */
 #define	LO_CLASSMASK	0x0f000000	/* Class index bitmask. */
 #define LO_NOPROFILE    0x10000000      /* Don't profile this lock */
+#define LO_CONTESTED    0x20000000      /* Lock was contested */

 /*
 * Lock classes are statically assigned an index into the gobal lock_classes
--- a/sys/sys/lock_profile.h
+++ b/sys/sys/lock_profile.h
@ -122,6 +122,7 @@ static inline void lock_profile_obtain_lock_failed(struct lock_object *lo, int *

 	if (lock_prof_enable && *contested == 0) {
 		*waittime = nanoseconds();
+		lo->lo_flags |= LO_CONTESTED;
 		atomic_add_int(&l->lpo_contest_holding, 1);
 		*contested = 1;
 	}
@ -137,8 +138,14 @@ static inline void lock_profile_obtain_lock_success(struct lock_object *lo, int
 static inline void lock_profile_release_lock(struct lock_object *lo)
 {
 	struct lock_profile_object *l = &lo->lo_profile_obj;
-	if (lock_prof_enable || l->lpo_acqtime)
+#ifdef LOCK_PROFILING_FAST
+	if((lo->lo_flags & LO_CONTESTED) == 0)
+		return;
+#endif		
+	if (lock_prof_enable || l->lpo_acqtime) {
+		lo->lo_flags &= ~LO_CONTESTED;
 		_lock_profile_release_lock(lo);
+	}
 }

 #else /* !LOCK_PROFILING */