From 982d11f836278f1e95ae1ae398aa4d1d07a19006 Mon Sep 17 00:00:00 2001 From: Jeff Roberson Date: Tue, 5 Jun 2007 00:00:57 +0000 Subject: [PATCH] Commit 14/14 of sched_lock decomposition. - Use thread_lock() rather than sched_lock for per-thread scheduling sychronization. - Use the per-process spinlock rather than the sched_lock for per-process scheduling synchronization. Tested by: kris, current@ Tested on: i386, amd64, ULE, 4BSD, libthr, libkse, PREEMPTION, etc. Discussed with: kris, attilio, kmacy, jhb, julian, bde (small parts each) --- sys/amd64/amd64/machdep.c | 8 ++-- sys/amd64/linux32/linux32_machdep.c | 12 +++--- sys/compat/linprocfs/linprocfs.c | 4 +- sys/compat/ndis/subr_ntoskrnl.c | 4 +- sys/compat/svr4/svr4_misc.c | 6 +-- sys/dev/hwpmc/hwpmc_mod.c | 12 +++--- sys/dev/md/md.c | 4 +- sys/fs/procfs/procfs_ctl.c | 8 ++-- sys/fs/procfs/procfs_ioctl.c | 4 +- sys/fs/procfs/procfs_status.c | 4 +- sys/geom/eli/g_eli.c | 4 +- sys/geom/geom_kern.c | 12 +++--- sys/geom/journal/g_journal.c | 4 +- sys/geom/mirror/g_mirror.c | 4 +- sys/geom/raid3/g_raid3.c | 4 +- sys/i386/i386/machdep.c | 8 ++-- sys/i386/isa/npx.c | 4 +- sys/i386/linux/linux_machdep.c | 12 +++--- sys/ia64/ia64/pmap.c | 10 +++-- sys/kern/init_main.c | 8 ++-- sys/kern/kern_acct.c | 4 +- sys/kern/kern_cpu.c | 16 +++---- sys/kern/kern_exit.c | 22 +++++----- sys/kern/kern_idle.c | 4 +- sys/kern/kern_intr.c | 48 ++++++++++----------- sys/kern/kern_kthread.c | 4 +- sys/kern/kern_lockf.c | 24 +++++++---- sys/kern/kern_poll.c | 8 ++-- sys/kern/kern_proc.c | 28 +++++++------ sys/kern/kern_resource.c | 57 ++++++++++++++----------- sys/kern/kern_shutdown.c | 12 +++--- sys/kern/kern_subr.c | 4 +- sys/kern/kern_thr.c | 23 +++++----- sys/kern/kern_time.c | 8 ++-- sys/kern/ksched.c | 8 ---- sys/kern/subr_prof.c | 17 ++++---- sys/kern/subr_smp.c | 4 +- sys/kern/subr_taskqueue.c | 4 +- sys/kern/subr_trap.c | 19 +++++---- sys/kern/sys_generic.c | 32 +++++++------- sys/kern/sys_process.c | 33 ++++++++------- sys/netncp/ncp_sock.c | 22 +++++----- sys/netsmb/smb_trantcp.c | 14 +++---- sys/pc98/pc98/machdep.c | 8 ++-- sys/security/mac_lomac/mac_lomac.c | 4 +- sys/ufs/ffs/ffs_snapshot.c | 24 +++++++---- sys/vm/vm_glue.c | 65 +++++++++++++++++------------ sys/vm/vm_meter.c | 12 ++++-- sys/vm/vm_pageout.c | 19 +++++---- sys/vm/vm_zeroidle.c | 8 ++-- 50 files changed, 373 insertions(+), 318 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 0b757492c256..a52ea5169b91 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -466,9 +466,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) #ifdef SMP /* Schedule ourselves on the indicated cpu. */ - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, cpu_id); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ @@ -479,9 +479,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) intr_restore(reg); #ifdef SMP - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* diff --git a/sys/amd64/linux32/linux32_machdep.c b/sys/amd64/linux32/linux32_machdep.c index 86a53ae57c42..9983e1fcd6fa 100644 --- a/sys/amd64/linux32/linux32_machdep.c +++ b/sys/amd64/linux32/linux32_machdep.c @@ -486,10 +486,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); return (0); } @@ -529,10 +529,10 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); /* wait for the children to exit, ie. emulate vfork */ PROC_LOCK(p2); @@ -715,10 +715,10 @@ linux_clone(struct thread *td, struct linux_clone_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c index 787ea1e2b611..e5cd300d6cba 100644 --- a/sys/compat/linprocfs/linprocfs.c +++ b/sys/compat/linprocfs/linprocfs.c @@ -636,7 +636,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) if (P_SHOULDSTOP(p)) { state = "T (stopped)"; } else { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); switch(p->p_state) { case PRS_NEW: state = "I (idle)"; @@ -666,7 +666,7 @@ linprocfs_doprocstatus(PFS_FILL_ARGS) state = "? (unknown)"; break; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } fill_kinfo_proc(p, &kp); diff --git a/sys/compat/ndis/subr_ntoskrnl.c b/sys/compat/ndis/subr_ntoskrnl.c index a7ba1ad5e1e4..23adca9367de 100644 --- a/sys/compat/ndis/subr_ntoskrnl.c +++ b/sys/compat/ndis/subr_ntoskrnl.c @@ -3824,7 +3824,7 @@ ntoskrnl_dpc_thread(arg) * once scheduled by an ISR. */ - mtx_lock_spin(&sched_lock); + thread_lock(curthread); #ifdef NTOSKRNL_MULTIPLE_DPCS #if __FreeBSD_version >= 502102 sched_bind(curthread, kq->kq_cpu); @@ -3834,7 +3834,7 @@ ntoskrnl_dpc_thread(arg) #if __FreeBSD_version < 600000 curthread->td_base_pri = PRI_MIN_KERN; #endif - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); while (1) { KeWaitForSingleObject(&kq->kq_proc, 0, 0, TRUE, NULL); diff --git a/sys/compat/svr4/svr4_misc.c b/sys/compat/svr4/svr4_misc.c index f1f44eac4353..71d994d111cd 100644 --- a/sys/compat/svr4/svr4_misc.c +++ b/sys/compat/svr4/svr4_misc.c @@ -1253,12 +1253,12 @@ svr4_sys_waitsys(td, uap) * See if we have a stopped or continued process. * XXX: This duplicates the same code in kern_wait(). */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if ((p->p_flag & P_STOPPED_SIG) && (p->p_suspcount == p->p_numthreads) && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || uap->options & SVR4_WSTOPPED)) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (((uap->options & SVR4_WNOWAIT)) == 0) p->p_flag |= P_WAITED; sx_sunlock(&proctree_lock); @@ -1278,7 +1278,7 @@ svr4_sys_waitsys(td, uap) DPRINTF(("jobcontrol %d\n", pid)); return (svr4_setinfo(pid, &ru, status, uap->info)); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (uap->options & SVR4_WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_sunlock(&proctree_lock); diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 71b8e6e0df8c..6c0e0ea27254 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -591,10 +591,10 @@ static void pmc_save_cpu_binding(struct pmc_binding *pb) { PMCDBG(CPU,BND,2, "%s", "save-cpu"); - mtx_lock_spin(&sched_lock); + thread_lock(curthread); pb->pb_bound = sched_is_bound(curthread); pb->pb_cpu = curthread->td_oncpu; - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); PMCDBG(CPU,BND,2, "save-cpu cpu=%d", pb->pb_cpu); } @@ -607,12 +607,12 @@ pmc_restore_cpu_binding(struct pmc_binding *pb) { PMCDBG(CPU,BND,2, "restore-cpu curcpu=%d restore=%d", curthread->td_oncpu, pb->pb_cpu); - mtx_lock_spin(&sched_lock); + thread_lock(curthread); if (pb->pb_bound) sched_bind(curthread, pb->pb_cpu); else sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); PMCDBG(CPU,BND,2, "%s", "restore-cpu done"); } @@ -631,9 +631,9 @@ pmc_select_cpu(int cpu) "disabled CPU %d", __LINE__, cpu)); PMCDBG(CPU,SEL,2, "select-cpu cpu=%d", cpu); - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, cpu); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); KASSERT(curthread->td_oncpu == cpu, ("[pmc,%d] CPU not bound [cpu=%d, curr=%d]", __LINE__, diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index aaf7e9d5192f..48cd0595bd16 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -690,9 +690,9 @@ md_kthread(void *arg) int error; sc = arg; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); if (sc->type == MD_VNODE) curthread->td_pflags |= TDP_NORUNNINGBUF; diff --git a/sys/fs/procfs/procfs_ctl.c b/sys/fs/procfs/procfs_ctl.c index 65f39f6caa1d..89aa304d99e0 100644 --- a/sys/fs/procfs/procfs_ctl.c +++ b/sys/fs/procfs/procfs_ctl.c @@ -286,9 +286,9 @@ procfs_control(struct thread *td, struct proc *p, int op) panic("procfs_control"); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); /* If it can run, let it do so. */ - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (0); } @@ -344,9 +344,9 @@ procfs_doprocctl(PFS_FILL_ARGS) #endif /* XXXKSE: */ p->p_flag &= ~P_STOPPED_SIG; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } else psignal(p, nm->nm_val); PROC_UNLOCK(p); diff --git a/sys/fs/procfs/procfs_ioctl.c b/sys/fs/procfs/procfs_ioctl.c index 8f87eef127ef..bd003e05c743 100644 --- a/sys/fs/procfs/procfs_ioctl.c +++ b/sys/fs/procfs/procfs_ioctl.c @@ -185,9 +185,9 @@ procfs_ioctl(PFS_IOCTL_ARGS) if (P_SHOULDSTOP(p)) { p->p_xstat = sig; p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } else if (sig) psignal(p, sig); #else diff --git a/sys/fs/procfs/procfs_status.c b/sys/fs/procfs/procfs_status.c index 63827758b113..b92d1577ae08 100644 --- a/sys/fs/procfs/procfs_status.c +++ b/sys/fs/procfs/procfs_status.c @@ -112,7 +112,7 @@ procfs_doprocstatus(PFS_FILL_ARGS) sbuf_printf(sb, "noflags"); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); #ifdef KSE if (p->p_flag & P_SA) wmesg = "-kse- "; @@ -127,7 +127,7 @@ procfs_doprocstatus(PFS_FILL_ARGS) } else wmesg = "nochan"; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (p->p_sflag & PS_INMEM) { struct timeval start, ut, st; diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index b0e4cc8f81c7..d9f74d200ccb 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -332,11 +332,11 @@ g_eli_worker(void *arg) tsleep(wr, 0, "geli:smp", hz / 4); } #endif - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, PRIBIO); if (sc->sc_crypto == G_ELI_CRYPTO_SW && g_eli_threads == 0) sched_bind(curthread, wr->w_number); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); G_ELI_DEBUG(1, "Thread %s started.", curthread->td_proc->p_comm); diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c index 421e7e62e974..d32757928284 100644 --- a/sys/geom/geom_kern.c +++ b/sys/geom/geom_kern.c @@ -88,9 +88,9 @@ g_up_procbody(void) struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + thread_lock(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(tp); for(;;) { g_io_schedule_up(tp); } @@ -111,9 +111,9 @@ g_down_procbody(void) struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + thread_lock(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(tp); for(;;) { g_io_schedule_down(tp); } @@ -134,9 +134,9 @@ g_event_procbody(void) struct thread *tp = FIRST_THREAD_IN_PROC(p); mtx_assert(&Giant, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); + thread_lock(tp); sched_prio(tp, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(tp); for(;;) { g_run_events(); tsleep(&g_wait_event, PRIBIO, "-", hz/10); diff --git a/sys/geom/journal/g_journal.c b/sys/geom/journal/g_journal.c index 6be6580a1c01..eff82935f2fa 100644 --- a/sys/geom/journal/g_journal.c +++ b/sys/geom/journal/g_journal.c @@ -2057,9 +2057,9 @@ g_journal_worker(void *arg) time_t last_write; int type; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); sc = arg; type = 0; /* gcc */ diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 419611eb520c..24a2fad8f5c0 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -1768,9 +1768,9 @@ g_mirror_worker(void *arg) int timeout; sc = arg; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); sx_xlock(&sc->sc_lock); for (;;) { diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c index 776b6bd3a0a7..98747eadd7b5 100644 --- a/sys/geom/raid3/g_raid3.c +++ b/sys/geom/raid3/g_raid3.c @@ -2017,9 +2017,9 @@ g_raid3_worker(void *arg) int timeout; sc = arg; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, PRIBIO); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); sx_xlock(&sc->sc_lock); for (;;) { diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index f9398dfd2d99..15dc66459a67 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1058,9 +1058,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) #ifdef SMP /* Schedule ourselves on the indicated cpu. */ - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, cpu_id); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ @@ -1071,9 +1071,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) intr_restore(reg); #ifdef SMP - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* diff --git a/sys/i386/isa/npx.c b/sys/i386/isa/npx.c index e9ba8b067b6f..c5d381e34df1 100644 --- a/sys/i386/isa/npx.c +++ b/sys/i386/isa/npx.c @@ -230,9 +230,9 @@ npx_intr(dummy) td = PCPU_GET(fpcurthread); if (td != NULL) { td->td_pcb->pcb_flags |= PCB_NPXTRAP; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } return (FILTER_HANDLED); } diff --git a/sys/i386/linux/linux_machdep.c b/sys/i386/linux/linux_machdep.c index 5f916ed2d11b..75ccd3ed6cbc 100644 --- a/sys/i386/linux/linux_machdep.c +++ b/sys/i386/linux/linux_machdep.c @@ -325,10 +325,10 @@ linux_fork(struct thread *td, struct linux_fork_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); return (0); } @@ -368,10 +368,10 @@ linux_vfork(struct thread *td, struct linux_vfork_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); /* wait for the children to exit, ie. emulate vfork */ PROC_LOCK(p2); @@ -569,10 +569,10 @@ linux_clone(struct thread *td, struct linux_clone_args *args) /* * Make this runnable after we are finished with it. */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); TD_SET_CAN_RUN(td2); sched_add(td2, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); td->td_retval[0] = p2->p_pid; td->td_retval[1] = 0; diff --git a/sys/ia64/ia64/pmap.c b/sys/ia64/ia64/pmap.c index 8879fd783ebe..0a0694e97624 100644 --- a/sys/ia64/ia64/pmap.c +++ b/sys/ia64/ia64/pmap.c @@ -2235,8 +2235,7 @@ pmap_switch(pmap_t pm) pmap_t prevpm; int i; - mtx_assert(&sched_lock, MA_OWNED); - + THREAD_LOCK_ASSERT(curthread, MA_OWNED); prevpm = PCPU_GET(current_pmap); if (prevpm == pm) return (prevpm); @@ -2263,10 +2262,13 @@ static pmap_t pmap_install(pmap_t pm) { pmap_t prevpm; + struct thread *td; - mtx_lock_spin(&sched_lock); + td = curthread; + thread_lock(td); prevpm = pmap_switch(pm); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); + return (prevpm); } diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 8f99b19bab97..0be3af326d98 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -713,9 +713,9 @@ create_init(const void *udata __unused) PROC_UNLOCK(initproc); crfree(oldcred); cred_update_thread(FIRST_THREAD_IN_PROC(initproc)); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(initproc); initproc->p_sflag |= PS_INMEM; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(initproc); cpu_set_fork_handler(FIRST_THREAD_IN_PROC(initproc), start_init, NULL); } SYSINIT(init, SI_SUB_CREATE_INIT, SI_ORDER_FIRST, create_init, NULL) @@ -729,9 +729,9 @@ kick_init(const void *udata __unused) struct thread *td; td = FIRST_THREAD_IN_PROC(initproc); - mtx_lock_spin(&sched_lock); + thread_lock(td); TD_SET_CAN_RUN(td); sched_add(td, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } SYSINIT(kickinit, SI_SUB_KTHREAD_INIT, SI_ORDER_FIRST, kick_init, NULL) diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index 69a171afa84f..e7409b33bdc0 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -612,9 +612,9 @@ acct_thread(void *dummy) /* This is a low-priority kernel thread. */ pri = PRI_MAX_KERN; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_prio(curthread, pri); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); /* If another accounting kthread is already running, just die. */ sx_xlock(&acct_sx); diff --git a/sys/kern/kern_cpu.c b/sys/kern/kern_cpu.c index 3d2adfd2f813..6a72b9c6de58 100644 --- a/sys/kern/kern_cpu.c +++ b/sys/kern/kern_cpu.c @@ -298,17 +298,17 @@ cf_set_method(device_t dev, const struct cf_level *level, int priority) cpu_id = PCPU_GET(cpuid); pc = cpu_get_pcpu(set->dev); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); } CF_DEBUG("setting abs freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); } if (error) { goto out; @@ -327,17 +327,17 @@ cf_set_method(device_t dev, const struct cf_level *level, int priority) cpu_id = PCPU_GET(cpuid); pc = cpu_get_pcpu(set->dev); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, pc->pc_cpuid); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); } CF_DEBUG("setting rel freq %d on %s (cpu %d)\n", set->freq, device_get_nameunit(set->dev), PCPU_GET(cpuid)); error = CPUFREQ_DRV_SET(set->dev, set); if (cpu_id != pc->pc_cpuid) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); } if (error) { /* XXX Back out any successful setting? */ diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 54ac39247ef6..9cab3216bc7e 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -523,12 +523,13 @@ exit1(struct thread *td, int rv) * proc lock. */ wakeup(p->p_pptr); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p->p_pptr); + sched_exit(p->p_pptr, td); + PROC_SUNLOCK(p->p_pptr); + PROC_SLOCK(p); p->p_state = PRS_ZOMBIE; PROC_UNLOCK(p->p_pptr); - sched_exit(p->p_pptr, td); - /* * Hopefully no one will try to deliver a signal to the process this * late in the game. @@ -718,12 +719,13 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options, * in thread_exit() after having dropped the process * lock via PROC_UNLOCK() but before it has completed * cpu_throw(). In that case, the other thread must - * still hold sched_lock, so simply by acquiring - * sched_lock once we will wait long enough for the + * still hold the proc slock, so simply by acquiring + * proc slock once we will wait long enough for the * thread to exit in that case. + * XXX This is questionable. */ - mtx_lock_spin(&sched_lock); - mtx_unlock_spin(&sched_lock); + PROC_SLOCK(p); + PROC_SUNLOCK(p); td->td_retval[0] = p->p_pid; if (status) @@ -820,12 +822,12 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options, sx_xunlock(&allproc_lock); return (0); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if ((p->p_flag & P_STOPPED_SIG) && (p->p_suspcount == p->p_numthreads) && (p->p_flag & P_WAITED) == 0 && (p->p_flag & P_TRACED || options & WUNTRACED)) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); p->p_flag |= P_WAITED; sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; @@ -839,7 +841,7 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options, return (0); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (options & WCONTINUED && (p->p_flag & P_CONTINUED)) { sx_xunlock(&proctree_lock); td->td_retval[0] = p->p_pid; diff --git a/sys/kern/kern_idle.c b/sys/kern/kern_idle.c index f8ef0dd0acc7..7d0925ee9579 100644 --- a/sys/kern/kern_idle.c +++ b/sys/kern/kern_idle.c @@ -73,13 +73,13 @@ idle_setup(void *dummy) PROC_LOCK(p); p->p_flag |= P_NOLOAD; - mtx_lock_spin(&sched_lock); td = FIRST_THREAD_IN_PROC(p); + thread_lock(td); TD_SET_CAN_RUN(td); td->td_flags |= TDF_IDLETD; sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); PROC_UNLOCK(p); #ifdef SMP } diff --git a/sys/kern/kern_intr.c b/sys/kern/kern_intr.c index a33f96806e6c..d754440c3c15 100644 --- a/sys/kern/kern_intr.c +++ b/sys/kern/kern_intr.c @@ -173,9 +173,9 @@ ithread_update(struct intr_thread *ithd) /* Update name and priority. */ strlcpy(td->td_proc->p_comm, ie->ie_fullname, sizeof(td->td_proc->p_comm)); - mtx_lock_spin(&sched_lock); + thread_lock(td); sched_prio(td, pri); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } /* @@ -342,10 +342,10 @@ ithread_create(const char *name) if (error) panic("kthread_create() failed with %d", error); td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ - mtx_lock_spin(&sched_lock); + thread_lock(td); sched_class(td, PRI_ITHD); TD_SET_IWAIT(td); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); td->td_pflags |= TDP_ITHREAD; ithd->it_thread = td; CTR2(KTR_INTR, "%s: created %s", __func__, name); @@ -367,10 +367,10 @@ ithread_create(const char *name, struct intr_handler *ih) if (error) panic("kthread_create() failed with %d", error); td = FIRST_THREAD_IN_PROC(p); /* XXXKSE */ - mtx_lock_spin(&sched_lock); + thread_lock(td); sched_class(td, PRI_ITHD); TD_SET_IWAIT(td); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); td->td_pflags |= TDP_ITHREAD; ithd->it_thread = td; CTR2(KTR_INTR, "%s: created %s", __func__, name); @@ -385,13 +385,13 @@ ithread_destroy(struct intr_thread *ithread) CTR2(KTR_INTR, "%s: killing %s", __func__, ithread->it_event->ie_name); td = ithread->it_thread; - mtx_lock_spin(&sched_lock); + thread_lock(td); ithread->it_flags |= IT_DEAD; if (TD_AWAITING_INTR(td)) { TD_CLR_IWAIT(td); sched_add(td, SRQ_INTR); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } #ifndef INTR_FILTER @@ -622,7 +622,7 @@ intr_event_remove_handler(void *cookie) * so we have to remove the handler here rather than letting the * thread do it. */ - mtx_lock_spin(&sched_lock); + thread_lock(ie->ie_thread->it_thread); if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) { handler->ih_flags |= IH_DEAD; @@ -634,7 +634,7 @@ intr_event_remove_handler(void *cookie) ie->ie_thread->it_need = 1; } else TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock_spin(&sched_lock); + thread_unlock(ie->ie_thread->it_thread); while (handler->ih_flags & IH_DEAD) msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); intr_event_update(ie); @@ -699,11 +699,11 @@ intr_event_schedule_thread(struct intr_event *ie) /* * Set it_need to tell the thread to keep running if it is already - * running. Then, grab sched_lock and see if we actually need to - * put this thread on the runqueue. + * running. Then, lock the thread and see if we actually need to + * put it on the runqueue. */ it->it_need = 1; - mtx_lock_spin(&sched_lock); + thread_lock(td); if (TD_AWAITING_INTR(td)) { CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, p->p_comm); @@ -713,7 +713,7 @@ intr_event_schedule_thread(struct intr_event *ie) CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", __func__, p->p_pid, p->p_comm, it->it_need, td->td_state); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); return (0); } @@ -771,7 +771,7 @@ intr_event_remove_handler(void *cookie) * so we have to remove the handler here rather than letting the * thread do it. */ - mtx_lock_spin(&sched_lock); + thread_lock(it->it_thread); if (!TD_AWAITING_INTR(it->it_thread) && !cold) { handler->ih_flags |= IH_DEAD; @@ -783,7 +783,7 @@ intr_event_remove_handler(void *cookie) it->it_need = 1; } else TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - mtx_unlock_spin(&sched_lock); + thread_unlock(it->it_thread); while (handler->ih_flags & IH_DEAD) msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); /* @@ -853,11 +853,11 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it) /* * Set it_need to tell the thread to keep running if it is already - * running. Then, grab sched_lock and see if we actually need to - * put this thread on the runqueue. + * running. Then, lock the thread and see if we actually need to + * put it on the runqueue. */ it->it_need = 1; - mtx_lock_spin(&sched_lock); + thread_lock(td); if (TD_AWAITING_INTR(td)) { CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, p->p_comm); @@ -867,7 +867,7 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it) CTR5(KTR_INTR, "%s: pid %d (%s): it_need %d, state %d", __func__, p->p_pid, p->p_comm, it->it_need, td->td_state); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); return (0); } @@ -1128,13 +1128,13 @@ ithread_loop(void *arg) * lock. This may take a while and it_need may get * set again, so we have to check it again. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { TD_SET_IWAIT(td); ie->ie_count = 0; mi_switch(SW_VOL, NULL); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } #else @@ -1202,13 +1202,13 @@ ithread_loop(void *arg) * lock. This may take a while and it_need may get * set again, so we have to check it again. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { TD_SET_IWAIT(td); ie->ie_count = 0; mi_switch(SW_VOL, NULL); } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } diff --git a/sys/kern/kern_kthread.c b/sys/kern/kern_kthread.c index c473bb0948cd..03f7f47020b3 100644 --- a/sys/kern/kern_kthread.c +++ b/sys/kern/kern_kthread.c @@ -113,9 +113,9 @@ kthread_create(void (*func)(void *), void *arg, /* Delay putting it on the run queue until now. */ if (!(flags & RFSTOPPED)) { - mtx_lock_spin(&sched_lock); + thread_lock(td); sched_add(td, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } return 0; diff --git a/sys/kern/kern_lockf.c b/sys/kern/kern_lockf.c index 483f953f8cb8..aaedc114ab68 100644 --- a/sys/kern/kern_lockf.c +++ b/sys/kern/kern_lockf.c @@ -266,16 +266,19 @@ lf_setlock(lock) */ if ((lock->lf_flags & F_POSIX) && (block->lf_flags & F_POSIX)) { - register struct proc *wproc; + struct proc *wproc; + struct proc *nproc; struct thread *td; - register struct lockf *waitblock; + struct lockf *waitblock; int i = 0; /* The block is waiting on something */ - /* XXXKSE this is not complete under threads */ wproc = (struct proc *)block->lf_id; - mtx_lock_spin(&sched_lock); +restart: + nproc = NULL; + PROC_SLOCK(wproc); FOREACH_THREAD_IN_PROC(wproc, td) { + thread_lock(td); while (td->td_wchan && (td->td_wmesg == lockstr) && (i++ < maxlockdepth)) { @@ -284,15 +287,20 @@ lf_setlock(lock) waitblock = waitblock->lf_next; if ((waitblock->lf_flags & F_POSIX) == 0) break; - wproc = (struct proc *)waitblock->lf_id; - if (wproc == (struct proc *)lock->lf_id) { - mtx_unlock_spin(&sched_lock); + nproc = (struct proc *)waitblock->lf_id; + if (nproc == (struct proc *)lock->lf_id) { + PROC_SUNLOCK(wproc); + thread_unlock(td); free(lock, M_LOCKF); return (EDEADLK); } } + thread_unlock(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(wproc); + wproc = nproc; + if (wproc) + goto restart; } /* * For flock type locks, we must first remove diff --git a/sys/kern/kern_poll.c b/sys/kern/kern_poll.c index 3e3efbf8b48c..fa5d58a3134b 100644 --- a/sys/kern/kern_poll.c +++ b/sys/kern/kern_poll.c @@ -580,17 +580,17 @@ poll_idle(void) rtp.prio = RTP_PRIO_MAX; /* lowest priority */ rtp.type = RTP_PRIO_IDLE; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(td->td_proc); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(td->td_proc); for (;;) { if (poll_in_idle_loop && poll_handlers > 0) { idlepoll_sleeping = 0; ether_poll(poll_each_burst); - mtx_lock_spin(&sched_lock); + thread_lock(td); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } else { idlepoll_sleeping = 1; tsleep(&idlepoll_sleeping, 0, "pollid", hz * 3); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index a8ac12e92ccd..7abdfcf90f84 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -177,6 +177,7 @@ proc_init(void *mem, int size, int flags) td = thread_alloc(); bzero(&p->p_mtx, sizeof(struct mtx)); mtx_init(&p->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK); + mtx_init(&p->p_slock, "process slock", NULL, MTX_SPIN | MTX_RECURSE); p->p_stats = pstats_alloc(); proc_linkup(p, td); sched_newproc(p, td); @@ -669,7 +670,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_sigcatch = ps->ps_sigcatch; mtx_unlock(&ps->ps_mtx); } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (p->p_state != PRS_NEW && p->p_state != PRS_ZOMBIE && p->p_vmspace != NULL) { @@ -695,7 +696,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_nice = p->p_nice; rufetch(p, &kp->ki_rusage); kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) { kp->ki_start = p->p_stats->p_start; timevaladd(&kp->ki_start, &boottime); @@ -747,7 +748,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) /* * Fill in information that is thread specific. - * Must be called with sched_lock locked. + * Must be called with p_slock locked. */ static void fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) @@ -755,7 +756,9 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) struct proc *p; p = td->td_proc; + PROC_SLOCK_ASSERT(p, MA_OWNED); + thread_lock(td); if (td->td_wmesg != NULL) strlcpy(kp->ki_wmesg, td->td_wmesg, sizeof(kp->ki_wmesg)); else @@ -813,6 +816,7 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp) SIGSETOR(kp->ki_siglist, td->td_siglist); kp->ki_sigmask = td->td_sigmask; + thread_unlock(td); } /* @@ -824,10 +828,10 @@ fill_kinfo_proc(struct proc *p, struct kinfo_proc *kp) { fill_kinfo_proc_only(p, kp); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (FIRST_THREAD_IN_PROC(p) != NULL) fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), kp); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } struct pstats * @@ -894,14 +898,14 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) fill_kinfo_proc_only(p, &kinfo_proc); if (flags & KERN_PROC_NOTHREADS) { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (FIRST_THREAD_IN_PROC(p) != NULL) fill_kinfo_thread(FIRST_THREAD_IN_PROC(p), &kinfo_proc); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); } else { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (FIRST_THREAD_IN_PROC(p) != NULL) FOREACH_THREAD_IN_PROC(p, td) { fill_kinfo_thread(td, &kinfo_proc); @@ -913,7 +917,7 @@ sysctl_out_proc(struct proc *p, struct sysctl_req *req, int flags) else error = SYSCTL_OUT(req, (caddr_t)&kinfo_proc, sizeof(kinfo_proc)); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } PROC_UNLOCK(p); if (error) @@ -1003,12 +1007,12 @@ sysctl_kern_proc(SYSCTL_HANDLER_ARGS) /* * Skip embryonic processes. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (p->p_state == PRS_NEW) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); continue; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_LOCK(p); KASSERT(p->p_ucred != NULL, ("process credential is NULL for non-NEW proc")); diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 8ddff9a9bddd..a0e39b947849 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -263,9 +263,9 @@ donice(struct thread *td, struct proc *p, int n) n = PRIO_MIN; if (n < p->p_nice && priv_check(td, PRIV_SCHED_SETPRIORITY) != 0) return (EACCES); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); sched_nice(p, n); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); return (0); } @@ -306,7 +306,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (uap->lwpid == 0 || uap->lwpid == td->td_tid) td1 = td; else @@ -315,7 +315,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) pri_to_rtp(td1, &rtp); else error = ESRCH; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: @@ -342,7 +342,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) } } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (uap->lwpid == 0 || uap->lwpid == td->td_tid) td1 = td; else @@ -351,7 +351,7 @@ rtprio_thread(struct thread *td, struct rtprio_thread_args *uap) error = rtp_to_pri(&rtp, td1); else error = ESRCH; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); break; default: error = EINVAL; @@ -402,7 +402,7 @@ rtprio(td, uap) case RTP_LOOKUP: if ((error = p_cansee(td, p))) break; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * Return OUR priority if no pid specified, * or if one is, report the highest priority @@ -430,7 +430,7 @@ rtprio(td, uap) } } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); return (copyout(&rtp, uap->rtp, sizeof(struct rtprio))); case RTP_SET: @@ -468,7 +468,7 @@ rtprio(td, uap) * do all the threads on that process. If we * specify our own pid we do the latter. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (uap->pid == 0) { error = rtp_to_pri(&rtp, td); } else { @@ -477,7 +477,7 @@ rtprio(td, uap) break; } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); break; default: error = EINVAL; @@ -492,9 +492,9 @@ rtp_to_pri(struct rtprio *rtp, struct thread *td) { u_char newpri; - mtx_assert(&sched_lock, MA_OWNED); if (rtp->prio > RTP_PRIO_MAX) return (EINVAL); + thread_lock(td); switch (RTP_PRIO_BASE(rtp->type)) { case RTP_PRIO_REALTIME: newpri = PRI_MIN_REALTIME + rtp->prio; @@ -506,12 +506,14 @@ rtp_to_pri(struct rtprio *rtp, struct thread *td) newpri = PRI_MIN_IDLE + rtp->prio; break; default: + thread_unlock(td); return (EINVAL); } sched_class(td, rtp->type); /* XXX fix */ sched_user_prio(td, newpri); if (curthread == td) sched_prio(curthread, td->td_user_pri); /* XXX dubious */ + thread_unlock(td); return (0); } @@ -519,7 +521,7 @@ void pri_to_rtp(struct thread *td, struct rtprio *rtp) { - mtx_assert(&sched_lock, MA_OWNED); + thread_lock(td); switch (PRI_BASE(td->td_pri_class)) { case PRI_REALTIME: rtp->prio = td->td_base_user_pri - PRI_MIN_REALTIME; @@ -534,6 +536,7 @@ pri_to_rtp(struct thread *td, struct rtprio *rtp) break; } rtp->type = td->td_pri_class; + thread_unlock(td); } #if defined(COMPAT_43) @@ -634,10 +637,13 @@ lim_cb(void *arg) */ if (p->p_cpulimit == RLIM_INFINITY) return; - mtx_lock_spin(&sched_lock); - FOREACH_THREAD_IN_PROC(p, td) + PROC_SLOCK(p); + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); ruxagg(&p->p_rux, td); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); + } + PROC_SUNLOCK(p); if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { lim_rlimit(p, RLIMIT_CPU, &rlim); if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { @@ -699,9 +705,9 @@ kern_setrlimit(td, which, limp) if (limp->rlim_cur != RLIM_INFINITY && p->p_cpulimit == RLIM_INFINITY) callout_reset(&p->p_limco, hz, lim_cb, p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_cpulimit = limp->rlim_cur; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); break; case RLIMIT_DATA: if (limp->rlim_cur > maxdsiz) @@ -828,9 +834,7 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp) uint64_t u; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); - mtx_lock_spin(&sched_lock); - + PROC_SLOCK(p); /* * If we are getting stats for the current process, then add in the * stats that this thread has accumulated in its current time slice. @@ -843,9 +847,9 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp) p->p_rux.rux_runtime += u - PCPU_GET(switchtime); PCPU_SET(switchtime, u); } - /* Work on a copy of p_rux so we can let go of sched_lock */ + /* Work on a copy of p_rux so we can let go of p_slock */ rux = p->p_rux; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); calcru1(p, &rux, up, sp); /* Update the result from the p_rux copy */ p->p_rux.rux_uu = rux.rux_uu; @@ -1013,6 +1017,9 @@ ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, void ruxagg(struct rusage_ext *rux, struct thread *td) { + + THREAD_LOCK_ASSERT(td, MA_OWNED); + PROC_SLOCK_ASSERT(td->td_proc, MA_OWNED); rux->rux_runtime += td->td_runtime; rux->rux_uticks += td->td_uticks; rux->rux_sticks += td->td_sticks; @@ -1033,17 +1040,19 @@ rufetch(struct proc *p, struct rusage *ru) struct thread *td; memset(ru, 0, sizeof(*ru)); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (p->p_ru == NULL) { KASSERT(p->p_numthreads > 0, ("rufetch: No threads or ru in proc %p", p)); FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); ruxagg(&p->p_rux, td); + thread_unlock(td); rucollect(ru, &td->td_ru); } } else *ru = *p->p_ru; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } /* diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 01035e18fb62..bb54faa732d3 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -267,9 +267,9 @@ boot(int howto) * systems don't shutdown properly (i.e., ACPI power off) if we * run on another processor. */ - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, 0); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); KASSERT(PCPU_GET(cpuid) == 0, ("boot: not running on cpu 0")); #endif /* We're in the process of rebooting. */ @@ -340,9 +340,9 @@ boot(int howto) */ DROP_GIANT(); for (subiter = 0; subiter < 50 * iter; subiter++) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); DELAY(1000); } PICKUP_GIANT(); @@ -555,9 +555,9 @@ panic(const char *fmt, ...) } #endif #endif - mtx_lock_spin(&sched_lock); + /*thread_lock(td); */ td->td_flags |= TDF_INPANIC; - mtx_unlock_spin(&sched_lock); + /* thread_unlock(td); */ if (!sync_on_panic) bootopt |= RB_NOSYNC; boot(bootopt); diff --git a/sys/kern/kern_subr.c b/sys/kern/kern_subr.c index 8dc207ea22ad..2101026ce5e4 100644 --- a/sys/kern/kern_subr.c +++ b/sys/kern/kern_subr.c @@ -453,11 +453,11 @@ uio_yield(void) struct thread *td; td = curthread; - mtx_lock_spin(&sched_lock); DROP_GIANT(); + thread_lock(td); sched_prio(td, td->td_user_pri); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); PICKUP_GIANT(); } diff --git a/sys/kern/kern_thr.c b/sys/kern/kern_thr.c index 3201955babbc..33ec6c49e616 100644 --- a/sys/kern/kern_thr.c +++ b/sys/kern/kern_thr.c @@ -226,12 +226,15 @@ create_thread(struct thread *td, mcontext_t *ctx, PROC_LOCK(td->td_proc); td->td_proc->p_flag |= P_HADTHREADS; newtd->td_sigmask = td->td_sigmask; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); thread_link(newtd, p); - PROC_UNLOCK(p); - + thread_lock(td); /* let the scheduler know about these things. */ sched_fork_thread(td, newtd); + thread_unlock(td); + PROC_SUNLOCK(p); + PROC_UNLOCK(p); + thread_lock(newtd); if (rtp != NULL) { if (!(td->td_pri_class == PRI_TIMESHARE && rtp->type == RTP_PRIO_NORMAL)) { @@ -242,7 +245,7 @@ create_thread(struct thread *td, mcontext_t *ctx, TD_SET_CAN_RUN(newtd); /* if ((flags & THR_SUSPENDED) == 0) */ sched_add(newtd, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(newtd); return (error); } @@ -275,7 +278,7 @@ thr_exit(struct thread *td, struct thr_exit_args *uap) PROC_LOCK(p); sigqueue_flush(&td->td_sigqueue); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * Shutting down last thread in the proc. This will actually @@ -286,7 +289,7 @@ thr_exit(struct thread *td, struct thr_exit_args *uap) thread_exit(); /* NOTREACHED */ } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); return (0); } @@ -379,9 +382,9 @@ kern_thr_suspend(struct thread *td, struct timespec *tsp) error = msleep((void *)td, &td->td_proc->p_mtx, PCATCH, "lthr", hz); if (td->td_flags & TDF_THRWAKEUP) { - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_THRWAKEUP; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); PROC_UNLOCK(td->td_proc); return (0); } @@ -414,9 +417,9 @@ thr_wake(struct thread *td, struct thr_wake_args *uap) PROC_UNLOCK(p); return (ESRCH); } - mtx_lock_spin(&sched_lock); + thread_lock(ttd); ttd->td_flags |= TDF_THRWAKEUP; - mtx_unlock_spin(&sched_lock); + thread_unlock(ttd); wakeup((void *)ttd); PROC_UNLOCK(p); return (0); diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index c434b9374010..8634c8a68d85 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -552,9 +552,9 @@ kern_getitimer(struct thread *td, u_int which, struct itimerval *aitv) timevalsub(&aitv->it_value, &ctv); } } else { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); *aitv = p->p_stats->p_timer[which]; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } return (0); } @@ -623,10 +623,10 @@ kern_setitimer(struct thread *td, u_int which, struct itimerval *aitv, timevalsub(&oitv->it_value, &ctv); } } else { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); *oitv = p->p_stats->p_timer[which]; p->p_stats->p_timer[which] = *aitv; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } return (0); } diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 785c04c38c2e..192034fec9cd 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -104,9 +104,7 @@ getscheduler(struct ksched *ksched, struct thread *td, int *policy) struct rtprio rtp; int e = 0; - mtx_lock_spin(&sched_lock); pri_to_rtp(td, &rtp); - mtx_unlock_spin(&sched_lock); switch (rtp.type) { case RTP_PRIO_FIFO: @@ -151,9 +149,7 @@ ksched_getparam(struct ksched *ksched, { struct rtprio rtp; - mtx_lock_spin(&sched_lock); pri_to_rtp(td, &rtp); - mtx_unlock_spin(&sched_lock); if (RTP_PRIO_IS_REALTIME(rtp.type)) param->sched_priority = rtpprio_to_p4prio(rtp.prio); @@ -186,9 +182,7 @@ ksched_setscheduler(struct ksched *ksched, rtp.type = (policy == SCHED_FIFO) ? RTP_PRIO_FIFO : RTP_PRIO_REALTIME; - mtx_lock_spin(&sched_lock); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); } else e = EPERM; @@ -200,9 +194,7 @@ ksched_setscheduler(struct ksched *ksched, { rtp.type = RTP_PRIO_NORMAL; rtp.prio = p4prio_to_rtpprio(param->sched_priority); - mtx_lock_spin(&sched_lock); rtp_to_pri(&rtp, td); - mtx_unlock_spin(&sched_lock); } break; diff --git a/sys/kern/subr_prof.c b/sys/kern/subr_prof.c index 498d9d2fc958..8da12ac9e559 100644 --- a/sys/kern/subr_prof.c +++ b/sys/kern/subr_prof.c @@ -423,12 +423,12 @@ profil(td, uap) } PROC_LOCK(p); upp = &td->td_proc->p_stats->p_prof; - mtx_lock_spin(&time_lock); + PROC_SLOCK(p); upp->pr_off = uap->offset; upp->pr_scale = uap->scale; upp->pr_base = uap->samples; upp->pr_size = uap->size; - mtx_unlock_spin(&time_lock); + PROC_SUNLOCK(p); startprofclock(p); PROC_UNLOCK(p); @@ -468,22 +468,22 @@ addupc_intr(struct thread *td, uintfptr_t pc, u_int ticks) if (ticks == 0) return; prof = &td->td_proc->p_stats->p_prof; - mtx_lock_spin(&time_lock); + PROC_SLOCK(td->td_proc); if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) { - mtx_unlock_spin(&time_lock); + PROC_SUNLOCK(td->td_proc); return; /* out of range; ignore */ } addr = prof->pr_base + i; - mtx_unlock_spin(&time_lock); + PROC_SUNLOCK(td->td_proc); if ((v = fuswintr(addr)) == -1 || suswintr(addr, v + ticks) == -1) { td->td_profil_addr = pc; td->td_profil_ticks = ticks; td->td_pflags |= TDP_OWEUPC; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_ASTPENDING; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } } @@ -511,12 +511,15 @@ addupc_task(struct thread *td, uintfptr_t pc, u_int ticks) } p->p_profthreads++; prof = &p->p_stats->p_prof; + PROC_SLOCK(p); if (pc < prof->pr_off || (i = PC_TO_INDEX(pc, prof)) >= prof->pr_size) { + PROC_SUNLOCK(p); goto out; } addr = prof->pr_base + i; + PROC_SUNLOCK(p); PROC_UNLOCK(p); if (copyin(addr, &v, sizeof(v)) == 0) { v += ticks; diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index d16e2fdb467e..6205799d19db 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -159,7 +159,7 @@ forward_signal(struct thread *td) * this thread, so all we need to do is poke it if it is currently * executing so that it executes ast(). */ - mtx_assert(&sched_lock, MA_OWNED); + THREAD_LOCK_ASSERT(td, MA_OWNED); KASSERT(TD_IS_RUNNING(td), ("forward_signal: thread is not TDS_RUNNING")); @@ -187,8 +187,6 @@ forward_roundrobin(void) struct thread *td; cpumask_t id, map, me; - mtx_assert(&sched_lock, MA_OWNED); - CTR0(KTR_SMP, "forward_roundrobin()"); if (!smp_started || cold || panicstr) diff --git a/sys/kern/subr_taskqueue.c b/sys/kern/subr_taskqueue.c index 3ae846d707a5..3e210ccbf23f 100644 --- a/sys/kern/subr_taskqueue.c +++ b/sys/kern/subr_taskqueue.c @@ -349,15 +349,15 @@ taskqueue_start_threads(struct taskqueue **tqp, int count, int pri, } else tq->tq_pcount++; } - mtx_lock_spin(&sched_lock); for (i = 0; i < count; i++) { if (tq->tq_pproc[i] == NULL) continue; td = FIRST_THREAD_IN_PROC(tq->tq_pproc[i]); + thread_lock(td); sched_prio(td, pri); sched_add(td, SRQ_BORING); + thread_unlock(td); } - mtx_unlock_spin(&sched_lock); return (0); } diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index f839ace03637..6fc92cc35bc6 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -82,11 +82,11 @@ userret(struct thread *td, struct trapframe *frame) #ifdef DIAGNOSTIC /* Check that we called signotify() enough. */ PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + thread_lock(td); if (SIGPENDING(td) && ((td->td_flags & TDF_NEEDSIGCHK) == 0 || (td->td_flags & TDF_ASTPENDING) == 0)) printf("failed to set signal flags properly for ast()\n"); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); PROC_UNLOCK(p); #endif @@ -163,7 +163,7 @@ ast(struct trapframe *framep) KASSERT(TRAPF_USERMODE(framep), ("ast in kernel mode")); WITNESS_WARN(WARN_PANIC, NULL, "Returning to user mode"); mtx_assert(&Giant, MA_NOTOWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); + THREAD_LOCK_ASSERT(td, MA_NOTOWNED); td->td_frame = framep; td->td_pticks = 0; @@ -179,8 +179,7 @@ ast(struct trapframe *framep) * AST's saved in sflag, the astpending flag will be set and * ast() will be called again. */ - mtx_lock_spin(&sched_lock); - flags = td->td_flags; + PROC_SLOCK(p); sflag = p->p_sflag; if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND)) p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND); @@ -188,9 +187,12 @@ ast(struct trapframe *framep) if (p->p_sflag & PS_MACPEND) p->p_sflag &= ~PS_MACPEND; #endif + thread_lock(td); + PROC_SUNLOCK(p); + flags = td->td_flags; td->td_flags &= ~(TDF_ASTPENDING | TDF_NEEDSIGCHK | TDF_NEEDRESCHED | TDF_INTERRUPT); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); PCPU_INC(cnt.v_trap); /* @@ -239,10 +241,11 @@ ast(struct trapframe *framep) if (KTRPOINT(td, KTR_CSW)) ktrcsw(1, 1); #endif - mtx_lock_spin(&sched_lock); + thread_lock(td); sched_prio(td, td->td_user_pri); + SCHED_STAT_INC(switch_needresched); mi_switch(SW_INVOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); #ifdef KTRACE if (KTRPOINT(td, KTR_CSW)) ktrcsw(0, 1); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 50044607f0fa..992d181f0088 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -722,9 +722,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, mtx_lock(&sellock); retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); error = selscan(td, ibits, obits, nd); @@ -747,12 +747,12 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, * collisions and rescan the file descriptors if * necessary. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); + thread_unlock(td); goto retry; } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); if (timo > 0) error = cv_timedwait_sig(&selwait, &sellock, timo); @@ -764,9 +764,9 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); done_nosellock: @@ -896,9 +896,9 @@ poll(td, uap) mtx_lock(&sellock); retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); error = pollscan(td, bits, nfds); @@ -919,12 +919,12 @@ poll(td, uap) * sellock, so check TDF_SELECT and the number of collisions * and rescan the file descriptors if necessary. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); + thread_unlock(td); goto retry; } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); if (timo > 0) error = cv_timedwait_sig(&selwait, &sellock, timo); @@ -936,9 +936,9 @@ poll(td, uap) done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); done_nosellock: @@ -1109,9 +1109,9 @@ doselwakeup(sip, pri) } TAILQ_REMOVE(&td->td_selq, sip, si_thrlist); sip->si_thread = NULL; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); sleepq_remove(td, &selwait); mtx_unlock(&sellock); } diff --git a/sys/kern/sys_process.c b/sys/kern/sys_process.c index 785d45d3d241..0ac970613feb 100644 --- a/sys/kern/sys_process.c +++ b/sys/kern/sys_process.c @@ -527,12 +527,12 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (td2->td_tid == pid) break; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (td2 != NULL) break; /* proc lock held */ PROC_UNLOCK(p); @@ -701,15 +701,15 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) break; case PT_SUSPEND: - mtx_lock_spin(&sched_lock); + thread_lock(td2); td2->td_flags |= TDF_DBSUSPEND; - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); break; case PT_RESUME: - mtx_lock_spin(&sched_lock); + thread_lock(td2); td2->td_flags &= ~TDF_DBSUSPEND; - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); break; case PT_STEP: @@ -780,32 +780,35 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) proctree_locked = 0; } /* deliver or queue signal */ - mtx_lock_spin(&sched_lock); + thread_lock(td2); td2->td_flags &= ~TDF_XSIG; - mtx_unlock_spin(&sched_lock); + thread_unlock(td2); td2->td_xsig = data; p->p_xstat = data; p->p_xthread = NULL; if ((p->p_flag & (P_STOPPED_SIG | P_STOPPED_TRACE)) != 0) { - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (req == PT_DETACH) { struct thread *td3; - FOREACH_THREAD_IN_PROC(p, td3) + FOREACH_THREAD_IN_PROC(p, td3) { + thread_lock(td3); td3->td_flags &= ~TDF_DBSUSPEND; + thread_unlock(td3); + } } /* * unsuspend all threads, to not let a thread run, * you should use PT_SUSPEND to suspend it before * continuing process. */ - mtx_unlock_spin(&sched_lock); #ifdef KSE + PROC_SUNLOCK(p); thread_continued(p); + PROC_SLOCK(p); #endif p->p_flag &= ~(P_STOPPED_TRACE|P_STOPPED_SIG|P_WAITED); - mtx_lock_spin(&sched_lock); thread_unsuspend(p); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } if (data) @@ -968,13 +971,13 @@ kern_ptrace(struct thread *td, int req, pid_t pid, void *addr, int data) buf = malloc(num * sizeof(lwpid_t), M_TEMP, M_WAITOK); tmp = 0; PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td2) { if (tmp >= num) break; buf[tmp++] = td2->td_tid; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); error = copyout(buf, addr, tmp * sizeof(lwpid_t)); free(buf, M_TEMP); diff --git a/sys/netncp/ncp_sock.c b/sys/netncp/ncp_sock.c index c4701817b1b1..0fe320fc6c34 100644 --- a/sys/netncp/ncp_sock.c +++ b/sys/netncp/ncp_sock.c @@ -189,9 +189,9 @@ ncp_poll(struct socket *so, int events) /* Fake up enough state to look like we are in poll(2). */ mtx_lock(&sellock); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); TAILQ_INIT(&td->td_selq); @@ -200,9 +200,9 @@ ncp_poll(struct socket *so, int events) /* Tear down the fake poll(2) state. */ mtx_lock(&sellock); clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); return (revents); @@ -229,9 +229,9 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv, retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); TAILQ_INIT(&td->td_selq); @@ -257,12 +257,12 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv, * the process, test TDF_SELECT and rescan file descriptors if * necessary. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); + thread_unlock(td); goto retry; } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); if (timo > 0) error = cv_timedwait(&selwait, &sellock, timo); @@ -274,9 +274,9 @@ ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv, done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); done_noproclock: diff --git a/sys/netsmb/smb_trantcp.c b/sys/netsmb/smb_trantcp.c index 5db6b7e47573..3f4ccaffa208 100644 --- a/sys/netsmb/smb_trantcp.c +++ b/sys/netsmb/smb_trantcp.c @@ -115,9 +115,9 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, retry: ncoll = nselcoll; - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags |= TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); /* XXX: Should be done when the thread is initialized. */ @@ -144,12 +144,12 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, * the process, test P_SELECT and rescan file descriptors if * necessary. */ - mtx_lock_spin(&sched_lock); + thread_lock(td); if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) { - mtx_unlock_spin(&sched_lock); + thread_unlock(td); goto retry; } - mtx_unlock_spin(&sched_lock); + thread_unlock(td); if (timo > 0) error = cv_timedwait(&selwait, &sellock, timo); @@ -161,9 +161,9 @@ nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events, done: clear_selinfo_list(td); - mtx_lock_spin(&sched_lock); + thread_lock(td); td->td_flags &= ~TDF_SELECT; - mtx_unlock_spin(&sched_lock); + thread_unlock(td); mtx_unlock(&sellock); done_noproclock: diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index b5b61b244a14..1e5f6cfa00ae 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -1055,9 +1055,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) #ifdef SMP /* Schedule ourselves on the indicated cpu. */ - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_bind(curthread, cpu_id); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* Calibrate by measuring a short delay. */ @@ -1068,9 +1068,9 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate) intr_restore(reg); #ifdef SMP - mtx_lock_spin(&sched_lock); + thread_lock(curthread); sched_unbind(curthread); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); #endif /* diff --git a/sys/security/mac_lomac/mac_lomac.c b/sys/security/mac_lomac/mac_lomac.c index ae575317501f..8d9b13e0a979 100644 --- a/sys/security/mac_lomac/mac_lomac.c +++ b/sys/security/mac_lomac/mac_lomac.c @@ -536,10 +536,10 @@ maybe_demote(struct mac_lomac *subjlabel, struct mac_lomac *objlabel, subj->mac_lomac.ml_rangelow = objlabel->ml_single; subj->mac_lomac.ml_rangehigh = objlabel->ml_single; subj->mac_lomac.ml_flags |= MAC_LOMAC_FLAG_UPDATE; - mtx_lock_spin(&sched_lock); + thread_lock(curthread); curthread->td_flags |= TDF_ASTPENDING; curthread->td_proc->p_sflag |= PS_MACPEND; - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); /* * Avoid memory allocation while holding a mutex; cache the diff --git a/sys/ufs/ffs/ffs_snapshot.c b/sys/ufs/ffs/ffs_snapshot.c index 4a614a75b7ca..792b375158f1 100644 --- a/sys/ufs/ffs/ffs_snapshot.c +++ b/sys/ufs/ffs/ffs_snapshot.c @@ -389,12 +389,15 @@ ffs_snapshot(mp, snapfile) * Recind nice scheduling while running with the filesystem suspended. */ if (td->td_proc->p_nice > 0) { - PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); - saved_nice = td->td_proc->p_nice; - sched_nice(td->td_proc, 0); - mtx_unlock_spin(&sched_lock); - PROC_UNLOCK(td->td_proc); + struct proc *p; + + p = td->td_proc; + PROC_LOCK(p); + PROC_SLOCK(p); + saved_nice = p->p_nice; + sched_nice(p, 0); + PROC_SUNLOCK(p); + PROC_UNLOCK(p); } /* * Suspend operation on filesystem. @@ -809,10 +812,13 @@ ffs_snapshot(mp, snapfile) out: NDFREE(&nd, NDF_ONLY_PNBUF); if (saved_nice > 0) { - PROC_LOCK(td->td_proc); - mtx_lock_spin(&sched_lock); + struct proc *p; + + p = td->td_proc; + PROC_LOCK(p); + PROC_SLOCK(p); sched_nice(td->td_proc, saved_nice); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(td->td_proc); } UFS_LOCK(ump); diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index cb2a657b1c3e..3a08855111be 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -619,24 +619,26 @@ faultin(p) * busy swapping it in. */ ++p->p_lock; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_sflag |= PS_SWAPPINGIN; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapin(td); PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_sflag &= ~PS_SWAPPINGIN; p->p_sflag |= PS_INMEM; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); TD_CLR_SWAPPED(td); if (TD_CAN_RUN(td)) setrunnable(td); + thread_unlock(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); wakeup(&p->p_sflag); @@ -672,9 +674,9 @@ scheduler(dummy) loop: if (vm_page_count_min()) { VM_WAIT; - mtx_lock_spin(&sched_lock); + thread_lock(&thread0); proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + thread_unlock(&thread0); goto loop; } @@ -685,13 +687,14 @@ scheduler(dummy) if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) { continue; } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { /* * An otherwise runnable thread of a process * swapped out has only the TDI_SWAPPED bit set. * */ + thread_lock(td); if (td->td_inhibitors == TDI_SWAPPED) { pri = p->p_swtime + td->td_slptime; if ((p->p_sflag & PS_SWAPINREQ) == 0) { @@ -709,8 +712,9 @@ scheduler(dummy) ppri = pri; } } + thread_unlock(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } sx_sunlock(&allproc_lock); @@ -718,13 +722,13 @@ scheduler(dummy) * Nothing to do, back to sleep. */ if ((p = pp) == NULL) { - mtx_lock_spin(&sched_lock); + thread_lock(&thread0); if (!proc0_rescan) { TD_SET_IWAIT(&thread0); mi_switch(SW_VOL, NULL); } proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + thread_unlock(&thread0); goto loop; } PROC_LOCK(p); @@ -736,15 +740,15 @@ scheduler(dummy) */ if (p->p_sflag & (PS_INMEM | PS_SWAPPINGOUT | PS_SWAPPINGIN)) { PROC_UNLOCK(p); - mtx_lock_spin(&sched_lock); + thread_lock(&thread0); proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + thread_unlock(&thread0); goto loop; } - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_sflag &= ~PS_SWAPINREQ; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); /* * We would like to bring someone in. (only if there is space). @@ -752,10 +756,12 @@ scheduler(dummy) */ faultin(p); PROC_UNLOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_swtime = 0; + PROC_SUNLOCK(p); + thread_lock(&thread0); proc0_rescan = 0; - mtx_unlock_spin(&sched_lock); + thread_unlock(&thread0); goto loop; } @@ -763,7 +769,8 @@ void kick_proc0(void) { struct thread *td = &thread0; - + /* XXX This will probably cause a LOR in some cases */ + thread_lock(td); if (TD_AWAITING_INTR(td)) { CTR2(KTR_INTR, "%s: sched_add %d", __func__, 0); TD_CLR_IWAIT(td); @@ -773,6 +780,7 @@ void kick_proc0(void) CTR2(KTR_INTR, "%s: state %d", __func__, td->td_state); } + thread_unlock(td); } @@ -821,12 +829,12 @@ int action; * creation. It may have no * address space or lock yet. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); if (p->p_state == PRS_NEW) { - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); continue; } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); /* * An aio daemon switches its @@ -876,7 +884,7 @@ int action; break; case PRS_NORMAL: - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); /* * do not swapout a realtime process * Check all the thread groups.. @@ -929,7 +937,7 @@ int action; (minslptime > swap_idle_threshold2))) { swapout(p); didswap++; - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); PROC_UNLOCK(p); vm_map_unlock(&vm->vm_map); vmspace_free(vm); @@ -937,7 +945,7 @@ int action; goto retry; } nextproc: - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); } nextproc2: PROC_UNLOCK(p); @@ -962,7 +970,7 @@ swapout(p) struct thread *td; PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_OWNED | MA_NOTRECURSED); + mtx_assert(&p->p_slock, MA_OWNED | MA_NOTRECURSED); #if defined(SWAP_DEBUG) printf("swapping out %d\n", p->p_pid); #endif @@ -996,15 +1004,18 @@ swapout(p) p->p_sflag &= ~PS_INMEM; p->p_sflag |= PS_SWAPPINGOUT; PROC_UNLOCK(p); - FOREACH_THREAD_IN_PROC(p, td) + FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); TD_SET_SWAPPED(td); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); + } + PROC_SUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) vm_thread_swapout(td); PROC_LOCK(p); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); p->p_sflag &= ~PS_SWAPPINGOUT; p->p_swtime = 0; } diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c index d4b51e727793..4d70155f28a9 100644 --- a/sys/vm/vm_meter.c +++ b/sys/vm/vm_meter.c @@ -131,17 +131,21 @@ vmtotal(SYSCTL_HANDLER_ARGS) FOREACH_PROC_IN_SYSTEM(p) { if (p->p_flag & P_SYSTEM) continue; - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); switch (p->p_state) { case PRS_NEW: - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); continue; break; default: FOREACH_THREAD_IN_PROC(p, td) { /* Need new statistics XXX */ + thread_lock(td); switch (td->td_state) { case TDS_INHIBITED: + /* + * XXX stats no longer synchronized. + */ if (TD_ON_LOCK(td) || (td->td_inhibitors == TDI_SWAPPED)) { @@ -162,13 +166,15 @@ vmtotal(SYSCTL_HANDLER_ARGS) case TDS_RUNQ: case TDS_RUNNING: total.t_rq++; + thread_unlock(td); continue; default: break; } + thread_unlock(td); } } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); /* * Note active objects. */ diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index dcf69ef621f4..99630ce25658 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -1246,22 +1246,24 @@ vm_pageout_scan(int pass) * If the process is in a non-running type state, * don't touch it. Check all the threads individually. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td)) { + thread_unlock(td); breakout = 1; break; } + thread_unlock(td); } + PROC_SUNLOCK(p); if (breakout) { - mtx_unlock_spin(&sched_lock); PROC_UNLOCK(p); continue; } - mtx_unlock_spin(&sched_lock); /* * get the process size */ @@ -1287,9 +1289,9 @@ vm_pageout_scan(int pass) sx_sunlock(&allproc_lock); if (bigproc != NULL) { killproc(bigproc, "out of swap space"); - mtx_lock_spin(&sched_lock); + PROC_SLOCK(bigproc); sched_nice(bigproc, PRIO_MIN); - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(bigproc); PROC_UNLOCK(bigproc); wakeup(&cnt.v_free_count); } @@ -1599,17 +1601,20 @@ vm_daemon() * if the process is in a non-running type state, * don't touch it. */ - mtx_lock_spin(&sched_lock); + PROC_SLOCK(p); breakout = 0; FOREACH_THREAD_IN_PROC(p, td) { + thread_lock(td); if (!TD_ON_RUNQ(td) && !TD_IS_RUNNING(td) && !TD_IS_SLEEPING(td)) { + thread_unlock(td); breakout = 1; break; } + thread_unlock(td); } - mtx_unlock_spin(&sched_lock); + PROC_SUNLOCK(p); if (breakout) { PROC_UNLOCK(p); continue; diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c index 5af84e01c188..b21d01f32ab0 100644 --- a/sys/vm/vm_zeroidle.c +++ b/sys/vm/vm_zeroidle.c @@ -145,9 +145,9 @@ vm_pagezero(void __unused *arg) vm_page_zero_idle(); #ifndef PREEMPTION if (sched_runnable()) { - mtx_lock_spin(&sched_lock); + thread_lock(curthread); mi_switch(SW_VOL, NULL); - mtx_unlock_spin(&sched_lock); + thread_unlock(curthread); } #endif } else { @@ -176,11 +176,11 @@ pagezero_start(void __unused *arg) PROC_LOCK(pagezero_proc); pagezero_proc->p_flag |= P_NOLOAD; PROC_UNLOCK(pagezero_proc); - mtx_lock_spin(&sched_lock); td = FIRST_THREAD_IN_PROC(pagezero_proc); + thread_lock(td); sched_class(td, PRI_IDLE); sched_prio(td, PRI_MAX_IDLE); sched_add(td, SRQ_BORING); - mtx_unlock_spin(&sched_lock); + thread_unlock(td); } SYSINIT(pagezero, SI_SUB_KTHREAD_VM, SI_ORDER_ANY, pagezero_start, NULL)