From 982d11f836278f1e95ae1ae398aa4d1d07a19006 Mon Sep 17 00:00:00 2001
From: Jeff Roberson <jeff@FreeBSD.org>
Date: Tue, 5 Jun 2007 00:00:57 +0000
Subject: [PATCH] Commit 14/14 of sched_lock decomposition.  - Use
 thread_lock() rather than sched_lock for per-thread scheduling   
 sychronization.  - Use the per-process spinlock rather than the sched_lock
 for per-process    scheduling synchronization.

Tested by:      kris, current@
Tested on:      i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc.
Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each)
---
 sys/amd64/amd64/machdep.c           |  8 ++--
 sys/amd64/linux32/linux32_machdep.c | 12 +++---
 sys/compat/linprocfs/linprocfs.c    |  4 +-
 sys/compat/ndis/subr_ntoskrnl.c     |  4 +-
 sys/compat/svr4/svr4_misc.c         |  6 +--
 sys/dev/hwpmc/hwpmc_mod.c           | 12 +++---
 sys/dev/md/md.c                     |  4 +-
 sys/fs/procfs/procfs_ctl.c          |  8 ++--
 sys/fs/procfs/procfs_ioctl.c        |  4 +-
 sys/fs/procfs/procfs_status.c       |  4 +-
 sys/geom/eli/g_eli.c                |  4 +-
 sys/geom/geom_kern.c                | 12 +++---
 sys/geom/journal/g_journal.c        |  4 +-
 sys/geom/mirror/g_mirror.c          |  4 +-
 sys/geom/raid3/g_raid3.c            |  4 +-
 sys/i386/i386/machdep.c             |  8 ++--
 sys/i386/isa/npx.c                  |  4 +-
 sys/i386/linux/linux_machdep.c      | 12 +++---
 sys/ia64/ia64/pmap.c                | 10 +++--
 sys/kern/init_main.c                |  8 ++--
 sys/kern/kern_acct.c                |  4 +-
 sys/kern/kern_cpu.c                 | 16 +++----
 sys/kern/kern_exit.c                | 22 +++++-----
 sys/kern/kern_idle.c                |  4 +-
 sys/kern/kern_intr.c                | 48 ++++++++++-----------
 sys/kern/kern_kthread.c             |  4 +-
 sys/kern/kern_lockf.c               | 24 +++++++----
 sys/kern/kern_poll.c                |  8 ++--
 sys/kern/kern_proc.c                | 28 +++++++------
 sys/kern/kern_resource.c            | 57 ++++++++++++++-----------
 sys/kern/kern_shutdown.c            | 12 +++---
 sys/kern/kern_subr.c                |  4 +-
 sys/kern/kern_thr.c                 | 23 +++++-----
 sys/kern/kern_time.c                |  8 ++--
 sys/kern/ksched.c                   |  8 ----
 sys/kern/subr_prof.c                | 17 ++++----
 sys/kern/subr_smp.c                 |  4 +-
 sys/kern/subr_taskqueue.c           |  4 +-
 sys/kern/subr_trap.c                | 19 +++++----
 sys/kern/sys_generic.c              | 32 +++++++-------
 sys/kern/sys_process.c              | 33 ++++++++-------
 sys/netncp/ncp_sock.c               | 22 +++++-----
 sys/netsmb/smb_trantcp.c            | 14 +++----
 sys/pc98/pc98/machdep.c             |  8 ++--
 sys/security/mac_lomac/mac_lomac.c  |  4 +-
 sys/ufs/ffs/ffs_snapshot.c          | 24 +++++++----
 sys/vm/vm_glue.c                    | 65 +++++++++++++++++------------
 sys/vm/vm_meter.c                   | 12 ++++--
 sys/vm/vm_pageout.c                 | 19 +++++----
 sys/vm/vm_zeroidle.c                |  8 ++--
 50 files changed, 373 insertions(+), 318 deletions(-)

diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 0b757492c256..a52ea5169b91 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -466,9 +466,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 
 #ifdef SMP
 	/* Schedule ourselves on the indicated cpu. */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_bind(curthread, cpu_id);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/* Calibrate by measuring a short delay. */
@@ -479,9 +479,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 	intr_restore(reg);
 
 #ifdef SMP
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_unbind(curthread);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/*
diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c
index 86a53ae57c42..9983e1fcd6fa 100644
--- a/sys/amd64/linux32/linux32_machdep.c
+++ b/sys/amd64/linux32/linux32_machdep.c
@@ -486,10 +486,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	return (0);
 }
@@ -529,10 +529,10 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	/* wait for the children to exit, ie. emulate vfork */
 	PROC_LOCK(p2);
@@ -715,10 +715,10 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	td->td_retval[0] = p2->p_pid;
 	td->td_retval[1] = 0;
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 787ea1e2b611..e5cd300d6cba 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -636,7 +636,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
 	if (P_SHOULDSTOP(p)) {
 		state = "T (stopped)";
 	} else {
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		switch(p->p_state) {
 		case PRS_NEW:
 			state = "I (idle)";
@@ -666,7 +666,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS)
 			state = "? (unknown)";
 			break;
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 	}
 
 	fill_kinfo_proc(p, &kp);
diff --git a/sys/compat/ndis/subr_ntoskrnl.c b/sys/compat/ndis/subr_ntoskrnl.c
index a7ba1ad5e1e4..23adca9367de 100644
--- a/sys/compat/ndis/subr_ntoskrnl.c
+++ b/sys/compat/ndis/subr_ntoskrnl.c
@@ -3824,7 +3824,7 @@ ntoskrnl_dpc_thread(arg)
 	 * once scheduled by an ISR.
 	 */
 
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 #ifdef NTOSKRNL_MULTIPLE_DPCS
 #if __FreeBSD_version >= 502102
 	sched_bind(curthread, kq->kq_cpu);
@@ -3834,7 +3834,7 @@ ntoskrnl_dpc_thread(arg)
 #if __FreeBSD_version < 600000
         curthread->td_base_pri = PRI_MIN_KERN;
 #endif
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	while (1) {
 		KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL);
diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c
index f1f44eac4353..71d994d111cd 100644
--- a/sys/compat/svr4/svr4_misc.c
+++ b/sys/compat/svr4/svr4_misc.c
@@ -1253,12 +1253,12 @@ svr4_sys_waitsys(td, uap)
 		 * See if we have a stopped or continued process.
 		 * XXX: This duplicates the same code in kern_wait().
 		 */
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if ((p->p_flag & P_STOPPED_SIG) &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    (p->p_flag & P_WAITED) == 0 &&
 		    (p->p_flag & P_TRACED || uap->options & SVR4_WSTOPPED)) {
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 		        if (((uap->options & SVR4_WNOWAIT)) == 0)
 				p->p_flag |= P_WAITED;
 			sx_sunlock(&proctree_lock);
@@ -1278,7 +1278,7 @@ svr4_sys_waitsys(td, uap)
 			DPRINTF(("jobcontrol %d\n", pid));
 			return (svr4_setinfo(pid, &ru, status, uap->info));
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		if (uap->options & SVR4_WCONTINUED &&
 		    (p->p_flag & P_CONTINUED)) {
 			sx_sunlock(&proctree_lock);
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 71b8e6e0df8c..6c0e0ea27254 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -591,10 +591,10 @@ static void
 pmc_save_cpu_binding(struct pmc_binding *pb)
 {
 	PMCDBG(CPU,BND,2, "%s", "save-cpu");
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	pb->pb_bound = sched_is_bound(curthread);
 	pb->pb_cpu   = curthread->td_oncpu;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 	PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu);
 }
 
@@ -607,12 +607,12 @@ pmc_restore_cpu_binding(struct pmc_binding *pb)
 {
 	PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d",
 	    curthread->td_oncpu, pb->pb_cpu);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	if (pb->pb_bound)
 		sched_bind(curthread, pb->pb_cpu);
 	else
 		sched_unbind(curthread);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 	PMCDBG(CPU,BND,2, "%s", "restore-cpu done");
 }
 
@@ -631,9 +631,9 @@ pmc_select_cpu(int cpu)
 	    "disabled CPU %d", __LINE__, cpu));
 
 	PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_bind(curthread, cpu);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	KASSERT(curthread->td_oncpu == cpu,
 	    ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__,
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index aaf7e9d5192f..48cd0595bd16 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -690,9 +690,9 @@ md_kthread(void *arg)
 	int error;
 
 	sc = arg;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 	if (sc->type == MD_VNODE)
 		curthread->td_pflags |= TDP_NORUNNINGBUF;
 
diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c
index 65f39f6caa1d..89aa304d99e0 100644
--- a/sys/fs/procfs/procfs_ctl.c
+++ b/sys/fs/procfs/procfs_ctl.c
@@ -286,9 +286,9 @@ procfs_control(struct thread *td, struct proc *p, int op)
 		panic("procfs_control");
 	}
 
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	thread_unsuspend(p); /* If it can run, let it do so. */
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	return (0);
 }
 
@@ -344,9 +344,9 @@ procfs_doprocctl(PFS_FILL_ARGS)
 #endif
 				/* XXXKSE: */
 				p->p_flag &= ~P_STOPPED_SIG;
-				mtx_lock_spin(&sched_lock);
+				PROC_SLOCK(p);
 				thread_unsuspend(p);
-				mtx_unlock_spin(&sched_lock);
+				PROC_SUNLOCK(p);
 			} else
 				psignal(p, nm->nm_val);
 			PROC_UNLOCK(p);
diff --git a/sys/fs/procfs/procfs_ioctl.c b/sys/fs/procfs/procfs_ioctl.c
index 8f87eef127ef..bd003e05c743 100644
--- a/sys/fs/procfs/procfs_ioctl.c
+++ b/sys/fs/procfs/procfs_ioctl.c
@@ -185,9 +185,9 @@ procfs_ioctl(PFS_IOCTL_ARGS)
 		if (P_SHOULDSTOP(p)) {
 			p->p_xstat = sig;
 			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG);
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			thread_unsuspend(p);
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 		} else if (sig)
 			psignal(p, sig);
 #else
diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c
index 63827758b113..b92d1577ae08 100644
--- a/sys/fs/procfs/procfs_status.c
+++ b/sys/fs/procfs/procfs_status.c
@@ -112,7 +112,7 @@ procfs_doprocstatus(PFS_FILL_ARGS)
 		sbuf_printf(sb, "noflags");
 	}
 
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 #ifdef KSE
 	if (p->p_flag & P_SA)
 		wmesg = "-kse- ";
@@ -127,7 +127,7 @@ procfs_doprocstatus(PFS_FILL_ARGS)
 		} else
 			wmesg = "nochan";
 	}
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 
 	if (p->p_sflag & PS_INMEM) {
 		struct timeval start, ut, st;
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index b0e4cc8f81c7..d9f74d200ccb 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -332,11 +332,11 @@ g_eli_worker(void *arg)
 			tsleep(wr, 0, "geli:smp", hz / 4);
 	}
 #endif
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
 	if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0)
 		sched_bind(curthread, wr->w_number);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm);
 
diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c
index 421e7e62e974..d32757928284 100644
--- a/sys/geom/geom_kern.c
+++ b/sys/geom/geom_kern.c
@@ -88,9 +88,9 @@ g_up_procbody(void)
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_io_schedule_up(tp);
 	}
@@ -111,9 +111,9 @@ g_down_procbody(void)
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_io_schedule_down(tp);
 	}
@@ -134,9 +134,9 @@ g_event_procbody(void)
 	struct thread *tp = FIRST_THREAD_IN_PROC(p);
 
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(tp);
 	sched_prio(tp, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(tp);
 	for(;;) {
 		g_run_events();
 		tsleep(&g_wait_event, PRIBIO, "-", hz/10);
diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c
index 6be6580a1c01..eff82935f2fa 100644
--- a/sys/geom/journal/g_journal.c
+++ b/sys/geom/journal/g_journal.c
@@ -2057,9 +2057,9 @@ g_journal_worker(void *arg)
 	time_t last_write;
 	int type;
 
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	sc = arg;
 	type = 0;	/* gcc */
diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c
index 419611eb520c..24a2fad8f5c0 100644
--- a/sys/geom/mirror/g_mirror.c
+++ b/sys/geom/mirror/g_mirror.c
@@ -1768,9 +1768,9 @@ g_mirror_worker(void *arg)
 	int timeout;
 
 	sc = arg;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c
index 776b6bd3a0a7..98747eadd7b5 100644
--- a/sys/geom/raid3/g_raid3.c
+++ b/sys/geom/raid3/g_raid3.c
@@ -2017,9 +2017,9 @@ g_raid3_worker(void *arg)
 	int timeout;
 
 	sc = arg;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, PRIBIO);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	sx_xlock(&sc->sc_lock);
 	for (;;) {
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index f9398dfd2d99..15dc66459a67 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1058,9 +1058,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 
 #ifdef SMP
 	/* Schedule ourselves on the indicated cpu. */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_bind(curthread, cpu_id);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/* Calibrate by measuring a short delay. */
@@ -1071,9 +1071,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 	intr_restore(reg);
 
 #ifdef SMP
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_unbind(curthread);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/*
diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c
index e9ba8b067b6f..c5d381e34df1 100644
--- a/sys/i386/isa/npx.c
+++ b/sys/i386/isa/npx.c
@@ -230,9 +230,9 @@ npx_intr(dummy)
 	td = PCPU_GET(fpcurthread);
 	if (td != NULL) {
 		td->td_pcb->pcb_flags |= PCB_NPXTRAP;
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		td->td_flags |= TDF_ASTPENDING;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 	}
 	return (FILTER_HANDLED);
 }
diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c
index 5f916ed2d11b..75ccd3ed6cbc 100644
--- a/sys/i386/linux/linux_machdep.c
+++ b/sys/i386/linux/linux_machdep.c
@@ -325,10 +325,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	return (0);
 }
@@ -368,10 +368,10 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	/* wait for the children to exit, ie. emulate vfork */
 	PROC_LOCK(p2);
@@ -569,10 +569,10 @@ linux_clone(struct thread *td, struct linux_clone_args *args)
 	/*
 	 * Make this runnable after we are finished with it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td2);
 	TD_SET_CAN_RUN(td2);
 	sched_add(td2, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td2);
 
 	td->td_retval[0] = p2->p_pid;
 	td->td_retval[1] = 0;
diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c
index 8879fd783ebe..0a0694e97624 100644
--- a/sys/ia64/ia64/pmap.c
+++ b/sys/ia64/ia64/pmap.c
@@ -2235,8 +2235,7 @@ pmap_switch(pmap_t pm)
 	pmap_t prevpm;
 	int i;
 
-	mtx_assert(&sched_lock, MA_OWNED);
-
+	THREAD_LOCK_ASSERT(curthread, MA_OWNED);
 	prevpm = PCPU_GET(current_pmap);
 	if (prevpm == pm)
 		return (prevpm);
@@ -2263,10 +2262,13 @@ static pmap_t
 pmap_install(pmap_t pm)
 {
 	pmap_t prevpm;
+	struct thread *td;
 
-	mtx_lock_spin(&sched_lock);
+	td = curthread;
+	thread_lock(td);
 	prevpm = pmap_switch(pm);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
+
 	return (prevpm);
 }
 
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 8f99b19bab97..0be3af326d98 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -713,9 +713,9 @@ create_init(const void *udata __unused)
 	PROC_UNLOCK(initproc);
 	crfree(oldcred);
 	cred_update_thread(FIRST_THREAD_IN_PROC(initproc));
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(initproc);
 	initproc->p_sflag |= PS_INMEM;
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(initproc);
 	cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL);
 }
 SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL)
@@ -729,9 +729,9 @@ kick_init(const void *udata __unused)
 	struct thread *td;
 
 	td = FIRST_THREAD_IN_PROC(initproc);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	TD_SET_CAN_RUN(td);
 	sched_add(td, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 }
 SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL)
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
index 69a171afa84f..e7409b33bdc0 100644
--- a/sys/kern/kern_acct.c
+++ b/sys/kern/kern_acct.c
@@ -612,9 +612,9 @@ acct_thread(void *dummy)
 
 	/* This is a low-priority kernel thread. */
 	pri = PRI_MAX_KERN;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_prio(curthread, pri);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	/* If another accounting kthread is already running, just die. */
 	sx_xlock(&acct_sx);
diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c
index 3d2adfd2f813..6a72b9c6de58 100644
--- a/sys/kern/kern_cpu.c
+++ b/sys/kern/kern_cpu.c
@@ -298,17 +298,17 @@ cf_set_method(device_t dev, const struct cf_level *level, int priority)
 		cpu_id = PCPU_GET(cpuid);
 		pc = cpu_get_pcpu(set->dev);
 		if (cpu_id != pc->pc_cpuid) {
-			mtx_lock_spin(&sched_lock);
+			thread_lock(curthread);
 			sched_bind(curthread, pc->pc_cpuid);
-			mtx_unlock_spin(&sched_lock);
+			thread_unlock(curthread);
 		}
 		CF_DEBUG("setting abs freq %d on %s (cpu %d)\n", set->freq,
 		    device_get_nameunit(set->dev), PCPU_GET(cpuid));
 		error = CPUFREQ_DRV_SET(set->dev, set);
 		if (cpu_id != pc->pc_cpuid) {
-			mtx_lock_spin(&sched_lock);
+			thread_lock(curthread);
 			sched_unbind(curthread);
-			mtx_unlock_spin(&sched_lock);
+			thread_unlock(curthread);
 		}
 		if (error) {
 			goto out;
@@ -327,17 +327,17 @@ cf_set_method(device_t dev, const struct cf_level *level, int priority)
 		cpu_id = PCPU_GET(cpuid);
 		pc = cpu_get_pcpu(set->dev);
 		if (cpu_id != pc->pc_cpuid) {
-			mtx_lock_spin(&sched_lock);
+			thread_lock(curthread);
 			sched_bind(curthread, pc->pc_cpuid);
-			mtx_unlock_spin(&sched_lock);
+			thread_unlock(curthread);
 		}
 		CF_DEBUG("setting rel freq %d on %s (cpu %d)\n", set->freq,
 		    device_get_nameunit(set->dev), PCPU_GET(cpuid));
 		error = CPUFREQ_DRV_SET(set->dev, set);
 		if (cpu_id != pc->pc_cpuid) {
-			mtx_lock_spin(&sched_lock);
+			thread_lock(curthread);
 			sched_unbind(curthread);
-			mtx_unlock_spin(&sched_lock);
+			thread_unlock(curthread);
 		}
 		if (error) {
 			/* XXX Back out any successful setting? */
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 54ac39247ef6..9cab3216bc7e 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -523,12 +523,13 @@ exit1(struct thread *td, int rv)
 	 * proc lock.
 	 */
 	wakeup(p->p_pptr);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p->p_pptr);
+	sched_exit(p->p_pptr, td);
+	PROC_SUNLOCK(p->p_pptr);
+	PROC_SLOCK(p);
 	p->p_state = PRS_ZOMBIE;
 	PROC_UNLOCK(p->p_pptr);
 
-	sched_exit(p->p_pptr, td);
-
 	/*
 	 * Hopefully no one will try to deliver a signal to the process this
 	 * late in the game.
@@ -718,12 +719,13 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options,
 			 * in thread_exit() after having dropped the process
 			 * lock via PROC_UNLOCK() but before it has completed
 			 * cpu_throw().  In that case, the other thread must
-			 * still hold sched_lock, so simply by acquiring
-			 * sched_lock once we will wait long enough for the
+			 * still hold the proc slock, so simply by acquiring
+			 * proc slock once we will wait long enough for the
 			 * thread to exit in that case.
+			 * XXX This is questionable.
 			 */
-			mtx_lock_spin(&sched_lock);
-			mtx_unlock_spin(&sched_lock);
+			PROC_SLOCK(p);
+			PROC_SUNLOCK(p);
 			
 			td->td_retval[0] = p->p_pid;
 			if (status)
@@ -820,12 +822,12 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options,
 			sx_xunlock(&allproc_lock);
 			return (0);
 		}
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if ((p->p_flag & P_STOPPED_SIG) &&
 		    (p->p_suspcount == p->p_numthreads) &&
 		    (p->p_flag & P_WAITED) == 0 &&
 		    (p->p_flag & P_TRACED || options & WUNTRACED)) {
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 			p->p_flag |= P_WAITED;
 			sx_xunlock(&proctree_lock);
 			td->td_retval[0] = p->p_pid;
@@ -839,7 +841,7 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options,
 
 			return (0);
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) {
 			sx_xunlock(&proctree_lock);
 			td->td_retval[0] = p->p_pid;
diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c
index f8ef0dd0acc7..7d0925ee9579 100644
--- a/sys/kern/kern_idle.c
+++ b/sys/kern/kern_idle.c
@@ -73,13 +73,13 @@ idle_setup(void *dummy)
 
 		PROC_LOCK(p);
 		p->p_flag |= P_NOLOAD;
-		mtx_lock_spin(&sched_lock);
 		td = FIRST_THREAD_IN_PROC(p);
+		thread_lock(td);
 		TD_SET_CAN_RUN(td);
 		td->td_flags |= TDF_IDLETD;
 		sched_class(td, PRI_IDLE);
 		sched_prio(td, PRI_MAX_IDLE);
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		PROC_UNLOCK(p);
 #ifdef SMP
 	}
diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c
index a33f96806e6c..d754440c3c15 100644
--- a/sys/kern/kern_intr.c
+++ b/sys/kern/kern_intr.c
@@ -173,9 +173,9 @@ ithread_update(struct intr_thread *ithd)
 	/* Update name and priority. */
 	strlcpy(td->td_proc->p_comm, ie->ie_fullname,
 	    sizeof(td->td_proc->p_comm));
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	sched_prio(td, pri);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 }
 
 /*
@@ -342,10 +342,10 @@ ithread_create(const char *name)
 	if (error)
 		panic("kthread_create() failed with %d", error);
 	td = FIRST_THREAD_IN_PROC(p);	/* XXXKSE */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	sched_class(td, PRI_ITHD);
 	TD_SET_IWAIT(td);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	td->td_pflags |= TDP_ITHREAD;
 	ithd->it_thread = td;
 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
@@ -367,10 +367,10 @@ ithread_create(const char *name, struct intr_handler *ih)
 	if (error)
 		panic("kthread_create() failed with %d", error);
 	td = FIRST_THREAD_IN_PROC(p);	/* XXXKSE */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	sched_class(td, PRI_ITHD);
 	TD_SET_IWAIT(td);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	td->td_pflags |= TDP_ITHREAD;
 	ithd->it_thread = td;
 	CTR2(KTR_INTR, "%s: created %s", __func__, name);
@@ -385,13 +385,13 @@ ithread_destroy(struct intr_thread *ithread)
 
 	CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name);
 	td = ithread->it_thread;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	ithread->it_flags |= IT_DEAD;
 	if (TD_AWAITING_INTR(td)) {
 		TD_CLR_IWAIT(td);
 		sched_add(td, SRQ_INTR);
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 }
 
 #ifndef INTR_FILTER
@@ -622,7 +622,7 @@ intr_event_remove_handler(void *cookie)
 	 * so we have to remove the handler here rather than letting the
 	 * thread do it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(ie->ie_thread->it_thread);
 	if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
 		handler->ih_flags |= IH_DEAD;
 
@@ -634,7 +634,7 @@ intr_event_remove_handler(void *cookie)
 		ie->ie_thread->it_need = 1;
 	} else
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(ie->ie_thread->it_thread);
 	while (handler->ih_flags & IH_DEAD)
 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
 	intr_event_update(ie);
@@ -699,11 +699,11 @@ intr_event_schedule_thread(struct intr_event *ie)
 
 	/*
 	 * Set it_need to tell the thread to keep running if it is already
-	 * running.  Then, grab sched_lock and see if we actually need to
-	 * put this thread on the runqueue.
+	 * running.  Then, lock the thread and see if we actually need to
+	 * put it on the runqueue.
 	 */
 	it->it_need = 1;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
 		    p->p_comm);
@@ -713,7 +713,7 @@ intr_event_schedule_thread(struct intr_event *ie)
 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
 		    __func__, p->p_pid, p->p_comm, it->it_need, td->td_state);
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	return (0);
 }
@@ -771,7 +771,7 @@ intr_event_remove_handler(void *cookie)
 	 * so we have to remove the handler here rather than letting the
 	 * thread do it.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(it->it_thread);
 	if (!TD_AWAITING_INTR(it->it_thread) && !cold) {
 		handler->ih_flags |= IH_DEAD;
 
@@ -783,7 +783,7 @@ intr_event_remove_handler(void *cookie)
 		it->it_need = 1;
 	} else
 		TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(it->it_thread);
 	while (handler->ih_flags & IH_DEAD)
 		msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
 	/* 
@@ -853,11 +853,11 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
 
 	/*
 	 * Set it_need to tell the thread to keep running if it is already
-	 * running.  Then, grab sched_lock and see if we actually need to
-	 * put this thread on the runqueue.
+	 * running.  Then, lock the thread and see if we actually need to
+	 * put it on the runqueue.
 	 */
 	it->it_need = 1;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid,
 		    p->p_comm);
@@ -867,7 +867,7 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it)
 		CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d",
 		    __func__, p->p_pid, p->p_comm, it->it_need, td->td_state);
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	return (0);
 }
@@ -1128,13 +1128,13 @@ ithread_loop(void *arg)
 		 * lock.  This may take a while and it_need may get
 		 * set again, so we have to check it again.
 		 */
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
 			TD_SET_IWAIT(td);
 			ie->ie_count = 0;
 			mi_switch(SW_VOL, NULL);
 		}
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 	}
 }
 #else
@@ -1202,13 +1202,13 @@ ithread_loop(void *arg)
 		 * lock.  This may take a while and it_need may get
 		 * set again, so we have to check it again.
 		 */
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
 			TD_SET_IWAIT(td);
 			ie->ie_count = 0;
 			mi_switch(SW_VOL, NULL);
 		}
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 	}
 }
 
diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c
index c473bb0948cd..03f7f47020b3 100644
--- a/sys/kern/kern_kthread.c
+++ b/sys/kern/kern_kthread.c
@@ -113,9 +113,9 @@ kthread_create(void (*func)(void *), void *arg,
 
 	/* Delay putting it on the run queue until now. */
 	if (!(flags & RFSTOPPED)) {
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		sched_add(td, SRQ_BORING); 
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 	}
 
 	return 0;
diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c
index 483f953f8cb8..aaedc114ab68 100644
--- a/sys/kern/kern_lockf.c
+++ b/sys/kern/kern_lockf.c
@@ -266,16 +266,19 @@ lf_setlock(lock)
 		 */
 		if ((lock->lf_flags & F_POSIX) &&
 		    (block->lf_flags & F_POSIX)) {
-			register struct proc *wproc;
+			struct proc *wproc;
+			struct proc *nproc;
 			struct thread *td;
-			register struct lockf *waitblock;
+			struct lockf *waitblock;
 			int i = 0;
 
 			/* The block is waiting on something */
-			/* XXXKSE this is not complete under threads */
 			wproc = (struct proc *)block->lf_id;
-			mtx_lock_spin(&sched_lock);
+restart:
+			nproc = NULL;
+			PROC_SLOCK(wproc);
 			FOREACH_THREAD_IN_PROC(wproc, td) {
+				thread_lock(td);
 				while (td->td_wchan &&
 				    (td->td_wmesg == lockstr) &&
 				    (i++ < maxlockdepth)) {
@@ -284,15 +287,20 @@ lf_setlock(lock)
 					waitblock = waitblock->lf_next;
 					if ((waitblock->lf_flags & F_POSIX) == 0)
 						break;
-					wproc = (struct proc *)waitblock->lf_id;
-					if (wproc == (struct proc *)lock->lf_id) {
-						mtx_unlock_spin(&sched_lock);
+					nproc = (struct proc *)waitblock->lf_id;
+					if (nproc == (struct proc *)lock->lf_id) {
+						PROC_SUNLOCK(wproc);
+						thread_unlock(td);
 						free(lock, M_LOCKF);
 						return (EDEADLK);
 					}
 				}
+				thread_unlock(td);
 			}
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(wproc);
+			wproc = nproc;
+			if (wproc)
+				goto restart;
 		}
 		/*
 		 * For flock type locks, we must first remove
diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c
index 3e3efbf8b48c..fa5d58a3134b 100644
--- a/sys/kern/kern_poll.c
+++ b/sys/kern/kern_poll.c
@@ -580,17 +580,17 @@ poll_idle(void)
 
 	rtp.prio = RTP_PRIO_MAX;	/* lowest priority */
 	rtp.type = RTP_PRIO_IDLE;
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(td->td_proc);
 	rtp_to_pri(&rtp, td);
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(td->td_proc);
 
 	for (;;) {
 		if (poll_in_idle_loop && poll_handlers > 0) {
 			idlepoll_sleeping = 0;
 			ether_poll(poll_each_burst);
-			mtx_lock_spin(&sched_lock);
+			thread_lock(td);
 			mi_switch(SW_VOL, NULL);
-			mtx_unlock_spin(&sched_lock);
+			thread_unlock(td);
 		} else {
 			idlepoll_sleeping = 1;
 			tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3);
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index a8ac12e92ccd..7abdfcf90f84 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -177,6 +177,7 @@ proc_init(void *mem, int size, int flags)
 	td = thread_alloc();
 	bzero(&p->p_mtx, sizeof(struct mtx));
 	mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
+	mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE);
 	p->p_stats = pstats_alloc();
 	proc_linkup(p, td);
 	sched_newproc(p, td);
@@ -669,7 +670,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 		kp->ki_sigcatch = ps->ps_sigcatch;
 		mtx_unlock(&ps->ps_mtx);
 	}
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	if (p->p_state != PRS_NEW &&
 	    p->p_state != PRS_ZOMBIE &&
 	    p->p_vmspace != NULL) {
@@ -695,7 +696,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 	kp->ki_nice = p->p_nice;
 	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
 		kp->ki_start = p->p_stats->p_start;
 		timevaladd(&kp->ki_start, &boottime);
@@ -747,7 +748,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 
 /*
  * Fill in information that is thread specific.
- * Must be called with sched_lock locked.
+ * Must be called with p_slock locked.
  */
 static void
 fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp)
@@ -755,7 +756,9 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp)
 	struct proc *p;
 
 	p = td->td_proc;
+	PROC_SLOCK_ASSERT(p, MA_OWNED);
 
+	thread_lock(td);
 	if (td->td_wmesg != NULL)
 		strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg));
 	else
@@ -813,6 +816,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp)
 
 	SIGSETOR(kp->ki_siglist, td->td_siglist);
 	kp->ki_sigmask = td->td_sigmask;
+	thread_unlock(td);
 }
 
 /*
@@ -824,10 +828,10 @@ fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp)
 {
 
 	fill_kinfo_proc_only(p, kp);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	if (FIRST_THREAD_IN_PROC(p) != NULL)
 		fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp);
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 }
 
 struct pstats *
@@ -894,14 +898,14 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
 
 	fill_kinfo_proc_only(p, &kinfo_proc);
 	if (flags & KERN_PROC_NOTHREADS) {
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (FIRST_THREAD_IN_PROC(p) != NULL)
 			fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), &kinfo_proc);
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc,
 				   sizeof(kinfo_proc));
 	} else {
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (FIRST_THREAD_IN_PROC(p) != NULL)
 			FOREACH_THREAD_IN_PROC(p, td) {
 				fill_kinfo_thread(td, &kinfo_proc);
@@ -913,7 +917,7 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags)
 		else
 			error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc,
 					   sizeof(kinfo_proc));
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 	}
 	PROC_UNLOCK(p);
 	if (error)
@@ -1003,12 +1007,12 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS)
 			/*
 			 * Skip embryonic processes.
 			 */
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			if (p->p_state == PRS_NEW) {
-				mtx_unlock_spin(&sched_lock);
+				PROC_SUNLOCK(p);
 				continue;
 			}
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 			PROC_LOCK(p);
 			KASSERT(p->p_ucred != NULL,
 			    ("process credential is NULL for non-NEW proc"));
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 8ddff9a9bddd..a0e39b947849 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -263,9 +263,9 @@ donice(struct thread *td, struct proc *p, int n)
 		n = PRIO_MIN;
  	if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0)
 		return (EACCES);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	sched_nice(p, n);
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	return (0);
 }
 
@@ -306,7 +306,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (uap->lwpid == 0 || uap->lwpid == td->td_tid)
 			td1 = td;
 		else
@@ -315,7 +315,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 			pri_to_rtp(td1, &rtp);
 		else
 			error = ESRCH;
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
@@ -342,7 +342,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 			}
 		}
 
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (uap->lwpid == 0 || uap->lwpid == td->td_tid)
 			td1 = td;
 		else
@@ -351,7 +351,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap)
 			error = rtp_to_pri(&rtp, td1);
 		else
 			error = ESRCH;
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		break;
 	default:
 		error = EINVAL;
@@ -402,7 +402,7 @@ rtprio(td, uap)
 	case RTP_LOOKUP:
 		if ((error = p_cansee(td, p)))
 			break;
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		/*
 		 * Return OUR priority if no pid specified,
 		 * or if one is, report the highest priority
@@ -430,7 +430,7 @@ rtprio(td, uap)
 				}
 			}
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		PROC_UNLOCK(p);
 		return (copyout(&rtp, uap->rtp, sizeof(struct rtprio)));
 	case RTP_SET:
@@ -468,7 +468,7 @@ rtprio(td, uap)
 		 * do all the threads on that process. If we
 		 * specify our own pid we do the latter.
 		 */
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (uap->pid == 0) {
 			error = rtp_to_pri(&rtp, td);
 		} else {
@@ -477,7 +477,7 @@ rtprio(td, uap)
 					break;
 			}
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		break;
 	default:
 		error = EINVAL;
@@ -492,9 +492,9 @@ rtp_to_pri(struct rtprio *rtp, struct thread *td)
 {
 	u_char	newpri;
 
-	mtx_assert(&sched_lock, MA_OWNED);
 	if (rtp->prio > RTP_PRIO_MAX)
 		return (EINVAL);
+	thread_lock(td);
 	switch (RTP_PRIO_BASE(rtp->type)) {
 	case RTP_PRIO_REALTIME:
 		newpri = PRI_MIN_REALTIME + rtp->prio;
@@ -506,12 +506,14 @@ rtp_to_pri(struct rtprio *rtp, struct thread *td)
 		newpri = PRI_MIN_IDLE + rtp->prio;
 		break;
 	default:
+		thread_unlock(td);
 		return (EINVAL);
 	}
 	sched_class(td, rtp->type);	/* XXX fix */
 	sched_user_prio(td, newpri);
 	if (curthread == td)
 		sched_prio(curthread, td->td_user_pri); /* XXX dubious */
+	thread_unlock(td);
 	return (0);
 }
 
@@ -519,7 +521,7 @@ void
 pri_to_rtp(struct thread *td, struct rtprio *rtp)
 {
 
-	mtx_assert(&sched_lock, MA_OWNED);
+	thread_lock(td);
 	switch (PRI_BASE(td->td_pri_class)) {
 	case PRI_REALTIME:
 		rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME;
@@ -534,6 +536,7 @@ pri_to_rtp(struct thread *td, struct rtprio *rtp)
 		break;
 	}
 	rtp->type = td->td_pri_class;
+	thread_unlock(td);
 }
 
 #if defined(COMPAT_43)
@@ -634,10 +637,13 @@ lim_cb(void *arg)
 	 */
 	if (p->p_cpulimit == RLIM_INFINITY)
 		return;
-	mtx_lock_spin(&sched_lock);
-	FOREACH_THREAD_IN_PROC(p, td)
+	PROC_SLOCK(p);
+	FOREACH_THREAD_IN_PROC(p, td) {
+		thread_lock(td);
 		ruxagg(&p->p_rux, td);
-	mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
+	}
+	PROC_SUNLOCK(p);
 	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
 		lim_rlimit(p, RLIMIT_CPU, &rlim);
 		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
@@ -699,9 +705,9 @@ kern_setrlimit(td, which, limp)
 		if (limp->rlim_cur != RLIM_INFINITY &&
 		    p->p_cpulimit == RLIM_INFINITY)
 			callout_reset(&p->p_limco, hz, lim_cb, p);
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		p->p_cpulimit = limp->rlim_cur;
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		break;
 	case RLIMIT_DATA:
 		if (limp->rlim_cur > maxdsiz)
@@ -828,9 +834,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 	uint64_t u;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	mtx_assert(&sched_lock, MA_NOTOWNED);
-	mtx_lock_spin(&sched_lock);
-
+	PROC_SLOCK(p);
 	/*
 	 * If we are getting stats for the current process, then add in the
 	 * stats that this thread has accumulated in its current time slice.
@@ -843,9 +847,9 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 		p->p_rux.rux_runtime += u - PCPU_GET(switchtime);
 		PCPU_SET(switchtime, u);
 	}
-	/* Work on a copy of p_rux so we can let go of sched_lock */
+	/* Work on a copy of p_rux so we can let go of p_slock */
 	rux = p->p_rux;
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	calcru1(p, &rux, up, sp);
 	/* Update the result from the p_rux copy */
 	p->p_rux.rux_uu = rux.rux_uu;
@@ -1013,6 +1017,9 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 void
 ruxagg(struct rusage_ext *rux, struct thread *td)
 {
+
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
+	PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED);
 	rux->rux_runtime += td->td_runtime;
 	rux->rux_uticks += td->td_uticks;
 	rux->rux_sticks += td->td_sticks;
@@ -1033,17 +1040,19 @@ rufetch(struct proc *p, struct rusage *ru)
 	struct thread *td;
 
 	memset(ru, 0, sizeof(*ru));
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	if (p->p_ru == NULL)  {
 		KASSERT(p->p_numthreads > 0,
 		    ("rufetch: No threads or ru in proc %p", p));
 		FOREACH_THREAD_IN_PROC(p, td) {
+			thread_lock(td);
 			ruxagg(&p->p_rux, td);
+			thread_unlock(td);
 			rucollect(ru, &td->td_ru);
 		}
 	} else
 		*ru = *p->p_ru;
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 }
 
 /*
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 01035e18fb62..bb54faa732d3 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -267,9 +267,9 @@ boot(int howto)
 	 * systems don't shutdown properly (i.e., ACPI power off) if we
 	 * run on another processor.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_bind(curthread, 0);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 	KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0"));
 #endif
 	/* We're in the process of rebooting. */
@@ -340,9 +340,9 @@ boot(int howto)
 			 */
 			DROP_GIANT();
 			for (subiter = 0; subiter < 50 * iter; subiter++) {
-				mtx_lock_spin(&sched_lock);
+				thread_lock(curthread);
 				mi_switch(SW_VOL, NULL);
-				mtx_unlock_spin(&sched_lock);
+				thread_unlock(curthread);
 				DELAY(1000);
 			}
 			PICKUP_GIANT();
@@ -555,9 +555,9 @@ panic(const char *fmt, ...)
 	}
 #endif
 #endif
-	mtx_lock_spin(&sched_lock);
+	/*thread_lock(td); */
 	td->td_flags |= TDF_INPANIC;
-	mtx_unlock_spin(&sched_lock);
+	/* thread_unlock(td); */
 	if (!sync_on_panic)
 		bootopt |= RB_NOSYNC;
 	boot(bootopt);
diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c
index 8dc207ea22ad..2101026ce5e4 100644
--- a/sys/kern/kern_subr.c
+++ b/sys/kern/kern_subr.c
@@ -453,11 +453,11 @@ uio_yield(void)
 	struct thread *td;
 
 	td = curthread;
-	mtx_lock_spin(&sched_lock);
 	DROP_GIANT();
+	thread_lock(td);
 	sched_prio(td, td->td_user_pri);
 	mi_switch(SW_INVOL, NULL);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	PICKUP_GIANT();
 }
 
diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c
index 3201955babbc..33ec6c49e616 100644
--- a/sys/kern/kern_thr.c
+++ b/sys/kern/kern_thr.c
@@ -226,12 +226,15 @@ create_thread(struct thread *td, mcontext_t *ctx,
 	PROC_LOCK(td->td_proc);
 	td->td_proc->p_flag |= P_HADTHREADS;
 	newtd->td_sigmask = td->td_sigmask;
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	thread_link(newtd, p); 
-	PROC_UNLOCK(p);
-
+	thread_lock(td);
 	/* let the scheduler know about these things. */
 	sched_fork_thread(td, newtd);
+	thread_unlock(td);
+	PROC_SUNLOCK(p);
+	PROC_UNLOCK(p);
+	thread_lock(newtd);
 	if (rtp != NULL) {
 		if (!(td->td_pri_class == PRI_TIMESHARE &&
 		      rtp->type == RTP_PRIO_NORMAL)) {
@@ -242,7 +245,7 @@ create_thread(struct thread *td, mcontext_t *ctx,
 	TD_SET_CAN_RUN(newtd);
 	/* if ((flags & THR_SUSPENDED) == 0) */
 		sched_add(newtd, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(newtd);
 
 	return (error);
 }
@@ -275,7 +278,7 @@ thr_exit(struct thread *td, struct thr_exit_args *uap)
 
 	PROC_LOCK(p);
 	sigqueue_flush(&td->td_sigqueue);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 
 	/*
 	 * Shutting down last thread in the proc.  This will actually
@@ -286,7 +289,7 @@ thr_exit(struct thread *td, struct thr_exit_args *uap)
 		thread_exit();
 		/* NOTREACHED */
 	}
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 	PROC_UNLOCK(p);
 	return (0);
 }
@@ -379,9 +382,9 @@ kern_thr_suspend(struct thread *td, struct timespec *tsp)
 		error = msleep((void *)td, &td->td_proc->p_mtx, PCATCH, "lthr",
 		    hz);
 	if (td->td_flags & TDF_THRWAKEUP) {
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		td->td_flags &= ~TDF_THRWAKEUP;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		PROC_UNLOCK(td->td_proc);
 		return (0);
 	}
@@ -414,9 +417,9 @@ thr_wake(struct thread *td, struct thr_wake_args *uap)
 		PROC_UNLOCK(p);
 		return (ESRCH);
 	}
-	mtx_lock_spin(&sched_lock);
+	thread_lock(ttd);
 	ttd->td_flags |= TDF_THRWAKEUP;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(ttd);
 	wakeup((void *)ttd);
 	PROC_UNLOCK(p);
 	return (0);
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index c434b9374010..8634c8a68d85 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -552,9 +552,9 @@ kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv)
 				timevalsub(&aitv->it_value, &ctv);
 		}
 	} else {
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		*aitv = p->p_stats->p_timer[which];
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 	}
 	return (0);
 }
@@ -623,10 +623,10 @@ kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv,
 				timevalsub(&oitv->it_value, &ctv);
 		}
 	} else {
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		*oitv = p->p_stats->p_timer[which];
 		p->p_stats->p_timer[which] = *aitv;
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 	}
 	return (0);
 }
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index 785c04c38c2e..192034fec9cd 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -104,9 +104,7 @@ getscheduler(struct ksched *ksched, struct thread *td, int *policy)
 	struct rtprio rtp;
 	int e = 0;
 
-	mtx_lock_spin(&sched_lock);
 	pri_to_rtp(td, &rtp);
-	mtx_unlock_spin(&sched_lock);
 	switch (rtp.type)
 	{
 		case RTP_PRIO_FIFO:
@@ -151,9 +149,7 @@ ksched_getparam(struct ksched *ksched,
 {
 	struct rtprio rtp;
 
-	mtx_lock_spin(&sched_lock);
 	pri_to_rtp(td, &rtp);
-	mtx_unlock_spin(&sched_lock);
 	if (RTP_PRIO_IS_REALTIME(rtp.type))
 		param->sched_priority = rtpprio_to_p4prio(rtp.prio);
 
@@ -186,9 +182,7 @@ ksched_setscheduler(struct ksched *ksched,
 			rtp.type = (policy == SCHED_FIFO)
 				? RTP_PRIO_FIFO : RTP_PRIO_REALTIME;
 
-			mtx_lock_spin(&sched_lock);
 			rtp_to_pri(&rtp, td);
-			mtx_unlock_spin(&sched_lock);
 		}
 		else
 			e = EPERM;
@@ -200,9 +194,7 @@ ksched_setscheduler(struct ksched *ksched,
 		{
 			rtp.type = RTP_PRIO_NORMAL;
 			rtp.prio = p4prio_to_rtpprio(param->sched_priority);
-			mtx_lock_spin(&sched_lock);
 			rtp_to_pri(&rtp, td);
-			mtx_unlock_spin(&sched_lock);
 		}
 		break;
 		
diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c
index 498d9d2fc958..8da12ac9e559 100644
--- a/sys/kern/subr_prof.c
+++ b/sys/kern/subr_prof.c
@@ -423,12 +423,12 @@ profil(td, uap)
 	}
 	PROC_LOCK(p);
 	upp = &td->td_proc->p_stats->p_prof;
-	mtx_lock_spin(&time_lock);
+	PROC_SLOCK(p);
 	upp->pr_off = uap->offset;
 	upp->pr_scale = uap->scale;
 	upp->pr_base = uap->samples;
 	upp->pr_size = uap->size;
-	mtx_unlock_spin(&time_lock);
+	PROC_SUNLOCK(p);
 	startprofclock(p);
 	PROC_UNLOCK(p);
 
@@ -468,22 +468,22 @@ addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks)
 	if (ticks == 0)
 		return;
 	prof = &td->td_proc->p_stats->p_prof;
-	mtx_lock_spin(&time_lock);
+	PROC_SLOCK(td->td_proc);
 	if (pc < prof->pr_off ||
 	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) {
-		mtx_unlock_spin(&time_lock);		
+		PROC_SUNLOCK(td->td_proc);
 		return;			/* out of range; ignore */
 	}
 
 	addr = prof->pr_base + i;
-	mtx_unlock_spin(&time_lock);
+	PROC_SUNLOCK(td->td_proc);
 	if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) {
 		td->td_profil_addr = pc;
 		td->td_profil_ticks = ticks;
 		td->td_pflags |= TDP_OWEUPC;
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		td->td_flags |= TDF_ASTPENDING;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 	}
 }
 
@@ -511,12 +511,15 @@ addupc_task(struct thread *td, uintfptr_t pc, u_int ticks)
 	}
 	p->p_profthreads++;
 	prof = &p->p_stats->p_prof;
+	PROC_SLOCK(p);
 	if (pc < prof->pr_off ||
 	    (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) {
+		PROC_SUNLOCK(p);
 		goto out;
 	}
 
 	addr = prof->pr_base + i;
+	PROC_SUNLOCK(p);
 	PROC_UNLOCK(p);
 	if (copyin(addr, &v, sizeof(v)) == 0) {
 		v += ticks;
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index d16e2fdb467e..6205799d19db 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -159,7 +159,7 @@ forward_signal(struct thread *td)
 	 * this thread, so all we need to do is poke it if it is currently
 	 * executing so that it executes ast().
 	 */
-	mtx_assert(&sched_lock, MA_OWNED);
+	THREAD_LOCK_ASSERT(td, MA_OWNED);
 	KASSERT(TD_IS_RUNNING(td),
 	    ("forward_signal: thread is not TDS_RUNNING"));
 
@@ -187,8 +187,6 @@ forward_roundrobin(void)
 	struct thread *td;
 	cpumask_t id, map, me;
 
-	mtx_assert(&sched_lock, MA_OWNED);
-
 	CTR0(KTR_SMP, "forward_roundrobin()");
 
 	if (!smp_started || cold || panicstr)
diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c
index 3ae846d707a5..3e210ccbf23f 100644
--- a/sys/kern/subr_taskqueue.c
+++ b/sys/kern/subr_taskqueue.c
@@ -349,15 +349,15 @@ taskqueue_start_threads(struct taskqueue **tqp, int count, int pri,
 		} else
 			tq->tq_pcount++;
 	}
-	mtx_lock_spin(&sched_lock);
 	for (i = 0; i < count; i++) {
 		if (tq->tq_pproc[i] == NULL)
 			continue;
 		td = FIRST_THREAD_IN_PROC(tq->tq_pproc[i]);
+		thread_lock(td);
 		sched_prio(td, pri);
 		sched_add(td, SRQ_BORING);
+		thread_unlock(td);
 	}
-	mtx_unlock_spin(&sched_lock);
 
 	return (0);
 }
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index f839ace03637..6fc92cc35bc6 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -82,11 +82,11 @@ userret(struct thread *td, struct trapframe *frame)
 #ifdef DIAGNOSTIC
 	/* Check that we called signotify() enough. */
 	PROC_LOCK(p);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 ||
 	    (td->td_flags & TDF_ASTPENDING) == 0))
 		printf("failed to set signal flags properly for ast()\n");
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	PROC_UNLOCK(p);
 #endif
 
@@ -163,7 +163,7 @@ ast(struct trapframe *framep)
 	KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode"));
 	WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode");
 	mtx_assert(&Giant, MA_NOTOWNED);
-	mtx_assert(&sched_lock, MA_NOTOWNED);
+	THREAD_LOCK_ASSERT(td, MA_NOTOWNED);
 	td->td_frame = framep;
 	td->td_pticks = 0;
 
@@ -179,8 +179,7 @@ ast(struct trapframe *framep)
 	 * AST's saved in sflag, the astpending flag will be set and
 	 * ast() will be called again.
 	 */
-	mtx_lock_spin(&sched_lock);
-	flags = td->td_flags;
+	PROC_SLOCK(p);
 	sflag = p->p_sflag;
 	if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND))
 		p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND);
@@ -188,9 +187,12 @@ ast(struct trapframe *framep)
 	if (p->p_sflag & PS_MACPEND)
 		p->p_sflag &= ~PS_MACPEND;
 #endif
+	thread_lock(td);
+	PROC_SUNLOCK(p);
+	flags = td->td_flags;
 	td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK |
 	    TDF_NEEDRESCHED | TDF_INTERRUPT);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	PCPU_INC(cnt.v_trap);
 
 	/*
@@ -239,10 +241,11 @@ ast(struct trapframe *framep)
 		if (KTRPOINT(td, KTR_CSW))
 			ktrcsw(1, 1);
 #endif
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td);
 		sched_prio(td, td->td_user_pri);
+		SCHED_STAT_INC(switch_needresched);
 		mi_switch(SW_INVOL, NULL);
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 #ifdef KTRACE
 		if (KTRPOINT(td, KTR_CSW))
 			ktrcsw(0, 1);
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 50044607f0fa..992d181f0088 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -722,9 +722,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	mtx_lock(&sellock);
 retry:
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 	error = selscan(td, ibits, obits, nd);
@@ -747,12 +747,12 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 	 * collisions and rescan the file descriptors if
 	 * necessary.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		goto retry;
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	if (timo > 0)
 		error = cv_timedwait_sig(&selwait, &sellock, timo);
@@ -764,9 +764,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
 
 done:
 	clear_selinfo_list(td);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 done_nosellock:
@@ -896,9 +896,9 @@ poll(td, uap)
 	mtx_lock(&sellock);
 retry:
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 	error = pollscan(td, bits, nfds);
@@ -919,12 +919,12 @@ poll(td, uap)
 	 * sellock, so check TDF_SELECT and the number of collisions
 	 * and rescan the file descriptors if necessary.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		goto retry;
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	if (timo > 0)
 		error = cv_timedwait_sig(&selwait, &sellock, timo);
@@ -936,9 +936,9 @@ poll(td, uap)
 
 done:
 	clear_selinfo_list(td);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 done_nosellock:
@@ -1109,9 +1109,9 @@ doselwakeup(sip, pri)
 	}
 	TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
 	sip->si_thread = NULL;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	sleepq_remove(td, &selwait);
 	mtx_unlock(&sellock);
 }
diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c
index 785d45d3d241..0ac970613feb 100644
--- a/sys/kern/sys_process.c
+++ b/sys/kern/sys_process.c
@@ -527,12 +527,12 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 			sx_slock(&allproc_lock);
 			FOREACH_PROC_IN_SYSTEM(p) {
 				PROC_LOCK(p);
-				mtx_lock_spin(&sched_lock);
+				PROC_SLOCK(p);
 				FOREACH_THREAD_IN_PROC(p, td2) {
 					if (td2->td_tid == pid)
 						break;
 				}
-				mtx_unlock_spin(&sched_lock);
+				PROC_SUNLOCK(p);
 				if (td2 != NULL)
 					break; /* proc lock held */
 				PROC_UNLOCK(p);
@@ -701,15 +701,15 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		break;
 
 	case PT_SUSPEND:
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td2);
 		td2->td_flags |= TDF_DBSUSPEND;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td2);
 		break;
 
 	case PT_RESUME:
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td2);
 		td2->td_flags &= ~TDF_DBSUSPEND;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td2);
 		break;
 
 	case PT_STEP:
@@ -780,32 +780,35 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 			proctree_locked = 0;
 		}
 		/* deliver or queue signal */
-		mtx_lock_spin(&sched_lock);
+		thread_lock(td2);
 		td2->td_flags &= ~TDF_XSIG;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td2);
 		td2->td_xsig = data;
 		p->p_xstat = data;
 		p->p_xthread = NULL;
 		if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) {
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			if (req == PT_DETACH) {
 				struct thread *td3;
-				FOREACH_THREAD_IN_PROC(p, td3)
+				FOREACH_THREAD_IN_PROC(p, td3) {
+					thread_lock(td3);
 					td3->td_flags &= ~TDF_DBSUSPEND; 
+					thread_unlock(td3);
+				}
 			}
 			/*
 			 * unsuspend all threads, to not let a thread run,
 			 * you should use PT_SUSPEND to suspend it before
 			 * continuing process.
 			 */
-			mtx_unlock_spin(&sched_lock);
 #ifdef KSE
+			PROC_SUNLOCK(p);
 			thread_continued(p);
+			PROC_SLOCK(p);
 #endif
 			p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED);
-			mtx_lock_spin(&sched_lock);
 			thread_unsuspend(p);
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 		}
 
 		if (data)
@@ -968,13 +971,13 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data)
 		buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK);
 		tmp = 0;
 		PROC_LOCK(p);
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		FOREACH_THREAD_IN_PROC(p, td2) {
 			if (tmp >= num)
 				break;
 			buf[tmp++] = td2->td_tid;
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		PROC_UNLOCK(p);
 		error = copyout(buf, addr, tmp * sizeof(lwpid_t));
 		free(buf, M_TEMP);
diff --git a/sys/netncp/ncp_sock.c b/sys/netncp/ncp_sock.c
index c4701817b1b1..0fe320fc6c34 100644
--- a/sys/netncp/ncp_sock.c
+++ b/sys/netncp/ncp_sock.c
@@ -189,9 +189,9 @@ ncp_poll(struct socket *so, int events)
 
 	/* Fake up enough state to look like we are in poll(2). */
 	mtx_lock(&sellock);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 	TAILQ_INIT(&td->td_selq);
 
@@ -200,9 +200,9 @@ ncp_poll(struct socket *so, int events)
 	/* Tear down the fake poll(2) state. */
 	mtx_lock(&sellock);
 	clear_selinfo_list(td);
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 	return (revents);
@@ -229,9 +229,9 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
 
 retry:
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 	TAILQ_INIT(&td->td_selq);
@@ -257,12 +257,12 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
 	 * the process, test TDF_SELECT and rescan file descriptors if
 	 * necessary.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		goto retry;
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	if (timo > 0)
 		error = cv_timedwait(&selwait, &sellock, timo);
@@ -274,9 +274,9 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
 done:
 	clear_selinfo_list(td);
 
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 done_noproclock:
diff --git a/sys/netsmb/smb_trantcp.c b/sys/netsmb/smb_trantcp.c
index 5db6b7e47573..3f4ccaffa208 100644
--- a/sys/netsmb/smb_trantcp.c
+++ b/sys/netsmb/smb_trantcp.c
@@ -115,9 +115,9 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events,
 retry:
 
 	ncoll = nselcoll;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags |= TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 	/* XXX: Should be done when the thread is initialized. */
@@ -144,12 +144,12 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events,
 	 * the process, test P_SELECT and rescan file descriptors if
 	 * necessary.
 	 */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
 		goto retry;
 	}
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 
 	if (timo > 0)
 		error = cv_timedwait(&selwait, &sellock, timo);
@@ -161,9 +161,9 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events,
 done:
 	clear_selinfo_list(td);
 	
-	mtx_lock_spin(&sched_lock);
+	thread_lock(td);
 	td->td_flags &= ~TDF_SELECT;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 	mtx_unlock(&sellock);
 
 done_noproclock:
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index b5b61b244a14..1e5f6cfa00ae 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -1055,9 +1055,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 
 #ifdef SMP
 	/* Schedule ourselves on the indicated cpu. */
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_bind(curthread, cpu_id);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/* Calibrate by measuring a short delay. */
@@ -1068,9 +1068,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 	intr_restore(reg);
 
 #ifdef SMP
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	sched_unbind(curthread);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 #endif
 
 	/*
diff --git a/sys/security/mac_lomac/mac_lomac.c b/sys/security/mac_lomac/mac_lomac.c
index ae575317501f..8d9b13e0a979 100644
--- a/sys/security/mac_lomac/mac_lomac.c
+++ b/sys/security/mac_lomac/mac_lomac.c
@@ -536,10 +536,10 @@ maybe_demote(struct mac_lomac *subjlabel, struct mac_lomac *objlabel,
 		subj->mac_lomac.ml_rangelow = objlabel->ml_single;
 	subj->mac_lomac.ml_rangehigh = objlabel->ml_single;
 	subj->mac_lomac.ml_flags |= MAC_LOMAC_FLAG_UPDATE;
-	mtx_lock_spin(&sched_lock);
+	thread_lock(curthread);
 	curthread->td_flags |= TDF_ASTPENDING;
 	curthread->td_proc->p_sflag |= PS_MACPEND;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(curthread);
 
 	/*
 	 * Avoid memory allocation while holding a mutex; cache the
diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c
index 4a614a75b7ca..792b375158f1 100644
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@@ -389,12 +389,15 @@ ffs_snapshot(mp, snapfile)
 	 * Recind nice scheduling while running with the filesystem suspended.
 	 */
 	if (td->td_proc->p_nice > 0) {
-		PROC_LOCK(td->td_proc);
-		mtx_lock_spin(&sched_lock);
-		saved_nice = td->td_proc->p_nice;
-		sched_nice(td->td_proc, 0);
-		mtx_unlock_spin(&sched_lock);
-		PROC_UNLOCK(td->td_proc);
+		struct proc *p;
+
+		p = td->td_proc;
+		PROC_LOCK(p);
+		PROC_SLOCK(p);
+		saved_nice = p->p_nice;
+		sched_nice(p, 0);
+		PROC_SUNLOCK(p);
+		PROC_UNLOCK(p);
 	}
 	/*
 	 * Suspend operation on filesystem.
@@ -809,10 +812,13 @@ ffs_snapshot(mp, snapfile)
 out:
 	NDFREE(&nd, NDF_ONLY_PNBUF);
 	if (saved_nice > 0) {
-		PROC_LOCK(td->td_proc);
-		mtx_lock_spin(&sched_lock);
+		struct proc *p;
+
+		p = td->td_proc;
+		PROC_LOCK(p);
+		PROC_SLOCK(p);
 		sched_nice(td->td_proc, saved_nice);
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		PROC_UNLOCK(td->td_proc);
 	}
 	UFS_LOCK(ump);
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index cb2a657b1c3e..3a08855111be 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -619,24 +619,26 @@ faultin(p)
 		 * busy swapping it in.
 		 */
 		++p->p_lock;
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		p->p_sflag |= PS_SWAPPINGIN;
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		PROC_UNLOCK(p);
 
 		FOREACH_THREAD_IN_PROC(p, td)
 			vm_thread_swapin(td);
 
 		PROC_LOCK(p);
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		p->p_sflag &= ~PS_SWAPPINGIN;
 		p->p_sflag |= PS_INMEM;
 		FOREACH_THREAD_IN_PROC(p, td) {
+			thread_lock(td);
 			TD_CLR_SWAPPED(td);
 			if (TD_CAN_RUN(td))
 				setrunnable(td);
+			thread_unlock(td);
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 
 		wakeup(&p->p_sflag);
 
@@ -672,9 +674,9 @@ scheduler(dummy)
 loop:
 	if (vm_page_count_min()) {
 		VM_WAIT;
-		mtx_lock_spin(&sched_lock);
+		thread_lock(&thread0);
 		proc0_rescan = 0;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(&thread0);
 		goto loop;
 	}
 
@@ -685,13 +687,14 @@ scheduler(dummy)
 		if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
 			continue;
 		}
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		FOREACH_THREAD_IN_PROC(p, td) {
 			/*
 			 * An otherwise runnable thread of a process
 			 * swapped out has only the TDI_SWAPPED bit set.
 			 * 
 			 */
+			thread_lock(td);
 			if (td->td_inhibitors == TDI_SWAPPED) {
 				pri = p->p_swtime + td->td_slptime;
 				if ((p->p_sflag & PS_SWAPINREQ) == 0) {
@@ -709,8 +712,9 @@ scheduler(dummy)
 					ppri = pri;
 				}
 			}
+			thread_unlock(td);
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 	}
 	sx_sunlock(&allproc_lock);
 
@@ -718,13 +722,13 @@ scheduler(dummy)
 	 * Nothing to do, back to sleep.
 	 */
 	if ((p = pp) == NULL) {
-		mtx_lock_spin(&sched_lock);
+		thread_lock(&thread0);
 		if (!proc0_rescan) {
 			TD_SET_IWAIT(&thread0);
 			mi_switch(SW_VOL, NULL);
 		}
 		proc0_rescan = 0;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(&thread0);
 		goto loop;
 	}
 	PROC_LOCK(p);
@@ -736,15 +740,15 @@ scheduler(dummy)
 	 */
 	if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) {
 		PROC_UNLOCK(p);
-		mtx_lock_spin(&sched_lock);
+		thread_lock(&thread0);
 		proc0_rescan = 0;
-		mtx_unlock_spin(&sched_lock);
+		thread_unlock(&thread0);
 		goto loop;
 	}
 
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	p->p_sflag &= ~PS_SWAPINREQ;
-	mtx_unlock_spin(&sched_lock);
+	PROC_SUNLOCK(p);
 
 	/*
 	 * We would like to bring someone in. (only if there is space).
@@ -752,10 +756,12 @@ scheduler(dummy)
 	 */
 	faultin(p);
 	PROC_UNLOCK(p);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	p->p_swtime = 0;
+	PROC_SUNLOCK(p);
+	thread_lock(&thread0);
 	proc0_rescan = 0;
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(&thread0);
 	goto loop;
 }
 
@@ -763,7 +769,8 @@ void kick_proc0(void)
 {
 	struct thread *td = &thread0;
 
-		
+	/* XXX This will probably cause a LOR in some cases */
+	thread_lock(td);
 	if (TD_AWAITING_INTR(td)) {
 		CTR2(KTR_INTR, "%s: sched_add %d", __func__, 0);
 		TD_CLR_IWAIT(td);
@@ -773,6 +780,7 @@ void kick_proc0(void)
 		CTR2(KTR_INTR, "%s: state %d",
 		    __func__, td->td_state);
 	}
+	thread_unlock(td);
 	
 }
 
@@ -821,12 +829,12 @@ int action;
 		 * creation.  It may have no
 		 * address space or lock yet.
 		 */
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		if (p->p_state == PRS_NEW) {
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 			continue;
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 
 		/*
 		 * An aio daemon switches its
@@ -876,7 +884,7 @@ int action;
 			break;
 
 		case PRS_NORMAL:
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			/*
 			 * do not swapout a realtime process
 			 * Check all the thread groups..
@@ -929,7 +937,7 @@ int action;
 				 (minslptime > swap_idle_threshold2))) {
 				swapout(p);
 				didswap++;
-				mtx_unlock_spin(&sched_lock);
+				PROC_SUNLOCK(p);
 				PROC_UNLOCK(p);
 				vm_map_unlock(&vm->vm_map);
 				vmspace_free(vm);
@@ -937,7 +945,7 @@ int action;
 				goto retry;
 			}
 nextproc:			
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 		}
 nextproc2:
 		PROC_UNLOCK(p);
@@ -962,7 +970,7 @@ swapout(p)
 	struct thread *td;
 
 	PROC_LOCK_ASSERT(p, MA_OWNED);
-	mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED);
+	mtx_assert(&p->p_slock, MA_OWNED | MA_NOTRECURSED);
 #if defined(SWAP_DEBUG)
 	printf("swapping out %d\n", p->p_pid);
 #endif
@@ -996,15 +1004,18 @@ swapout(p)
 	p->p_sflag &= ~PS_INMEM;
 	p->p_sflag |= PS_SWAPPINGOUT;
 	PROC_UNLOCK(p);
-	FOREACH_THREAD_IN_PROC(p, td)
+	FOREACH_THREAD_IN_PROC(p, td) {
+		thread_lock(td);
 		TD_SET_SWAPPED(td);
-	mtx_unlock_spin(&sched_lock);
+		thread_unlock(td);
+	}
+	PROC_SUNLOCK(p);
 
 	FOREACH_THREAD_IN_PROC(p, td)
 		vm_thread_swapout(td);
 
 	PROC_LOCK(p);
-	mtx_lock_spin(&sched_lock);
+	PROC_SLOCK(p);
 	p->p_sflag &= ~PS_SWAPPINGOUT;
 	p->p_swtime = 0;
 }
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index d4b51e727793..4d70155f28a9 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -131,17 +131,21 @@ vmtotal(SYSCTL_HANDLER_ARGS)
 	FOREACH_PROC_IN_SYSTEM(p) {
 		if (p->p_flag & P_SYSTEM)
 			continue;
-		mtx_lock_spin(&sched_lock);
+		PROC_SLOCK(p);
 		switch (p->p_state) {
 		case PRS_NEW:
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 			continue;
 			break;
 		default:
 			FOREACH_THREAD_IN_PROC(p, td) {
 				/* Need new statistics  XXX */
+				thread_lock(td);
 				switch (td->td_state) {
 				case TDS_INHIBITED:
+					/*
+					 * XXX stats no longer synchronized.
+					 */
 					if (TD_ON_LOCK(td) ||
 					    (td->td_inhibitors ==
 					    TDI_SWAPPED)) {
@@ -162,13 +166,15 @@ vmtotal(SYSCTL_HANDLER_ARGS)
 				case TDS_RUNQ:
 				case TDS_RUNNING:
 					total.t_rq++;
+					thread_unlock(td);
 					continue;
 				default:
 					break;
 				}
+				thread_unlock(td);
 			}
 		}
-		mtx_unlock_spin(&sched_lock);
+		PROC_SUNLOCK(p);
 		/*
 		 * Note active objects.
 		 */
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index dcf69ef621f4..99630ce25658 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -1246,22 +1246,24 @@ vm_pageout_scan(int pass)
 			 * If the process is in a non-running type state,
 			 * don't touch it.  Check all the threads individually.
 			 */
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td)) {
+					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
+				thread_unlock(td);
 			}
+			PROC_SUNLOCK(p);
 			if (breakout) {
-				mtx_unlock_spin(&sched_lock);
 				PROC_UNLOCK(p);
 				continue;
 			}
-			mtx_unlock_spin(&sched_lock);
 			/*
 			 * get the process size
 			 */
@@ -1287,9 +1289,9 @@ vm_pageout_scan(int pass)
 		sx_sunlock(&allproc_lock);
 		if (bigproc != NULL) {
 			killproc(bigproc, "out of swap space");
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(bigproc);
 			sched_nice(bigproc, PRIO_MIN);
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(bigproc);
 			PROC_UNLOCK(bigproc);
 			wakeup(&cnt.v_free_count);
 		}
@@ -1599,17 +1601,20 @@ vm_daemon()
 			 * if the process is in a non-running type state,
 			 * don't touch it.
 			 */
-			mtx_lock_spin(&sched_lock);
+			PROC_SLOCK(p);
 			breakout = 0;
 			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
 				if (!TD_ON_RUNQ(td) &&
 				    !TD_IS_RUNNING(td) &&
 				    !TD_IS_SLEEPING(td)) {
+					thread_unlock(td);
 					breakout = 1;
 					break;
 				}
+				thread_unlock(td);
 			}
-			mtx_unlock_spin(&sched_lock);
+			PROC_SUNLOCK(p);
 			if (breakout) {
 				PROC_UNLOCK(p);
 				continue;
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index 5af84e01c188..b21d01f32ab0 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -145,9 +145,9 @@ vm_pagezero(void __unused *arg)
 			vm_page_zero_idle();
 #ifndef PREEMPTION
 			if (sched_runnable()) {
-				mtx_lock_spin(&sched_lock);
+				thread_lock(curthread);
 				mi_switch(SW_VOL, NULL);
-				mtx_unlock_spin(&sched_lock);
+				thread_unlock(curthread);
 			}
 #endif
 		} else {
@@ -176,11 +176,11 @@ pagezero_start(void __unused *arg)
 	PROC_LOCK(pagezero_proc);
 	pagezero_proc->p_flag |= P_NOLOAD;
 	PROC_UNLOCK(pagezero_proc);
-	mtx_lock_spin(&sched_lock);
 	td = FIRST_THREAD_IN_PROC(pagezero_proc);
+	thread_lock(td);
 	sched_class(td, PRI_IDLE);
 	sched_prio(td, PRI_MAX_IDLE);
 	sched_add(td, SRQ_BORING);
-	mtx_unlock_spin(&sched_lock);
+	thread_unlock(td);
 }
 SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL)