From b3e9e682cf2d37fd8310fa054e56c959fe8b664e Mon Sep 17 00:00:00 2001 From: Ryan Stone Date: Tue, 15 May 2012 01:30:25 +0000 Subject: [PATCH] Implement the DTrace sched provider. This implementation aims to be compatible with the sched provider implemented by Solaris and its open- source derivatives. Full documentation of the sched provider can be found on Oracle's DTrace wiki pages. Note that for compatibility with scripts originally written for Solaris, serveral probes are defined that will never fire. These probes are defined to fire when Solaris-specific features perform certain actions. As these features are not present in FreeBSD, the probes can never fire. Also, I have added a two probes that are not defined in Solaris, lend-pri and load-change. These probes have been added to make it possible to collect schedgraph data with DTrace. Finally, a few probes are defined in Solaris to take a cpuinfo_t * argument. As it was not immediately clear to me how to translate that to FreeBSD, currently those probes are passed NULL in place of a cpuinfo_t *. Sponsored by: Sandvine Incorporated MFC after: 2 weeks --- sys/kern/kern_clock.c | 6 ++++++ sys/kern/kern_synch.c | 17 +++++++++++++++++ sys/kern/kern_thread.c | 6 ++++++ sys/kern/sched_4bsd.c | 39 +++++++++++++++++++++++++++++++++++++- sys/kern/sched_ule.c | 36 ++++++++++++++++++++++++++++++++++- sys/kern/subr_sleepqueue.c | 8 ++++++++ sys/kern/subr_turnstile.c | 10 ++++++++++ sys/sys/sdt.h | 3 +++ 8 files changed, 123 insertions(+), 2 deletions(-) diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 57dd632fe5da..5e10200a49ad 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include "opt_kdb.h" #include "opt_device_polling.h" #include "opt_hwpmc_hooks.h" +#include "opt_kdtrace.h" #include "opt_ntp.h" #include "opt_watchdog.h" @@ -56,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -88,6 +90,9 @@ SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL); /* Spin-lock protecting profiling statistics. */ static struct mtx time_lock; +SDT_PROVIDER_DECLARE(sched); +SDT_PROBE_DEFINE2(sched, , , tick, tick, "struct thread *", "struct proc *"); + static int sysctl_kern_cp_time(SYSCTL_HANDLER_ARGS) { @@ -760,6 +765,7 @@ statclock_cnt(int cnt, int usermode) ru->ru_maxrss = rss; KTR_POINT2(KTR_SCHED, "thread", sched_tdname(td), "statclock", "prio:%d", td->td_priority, "stathz:%d", (stathz)?stathz:hz); + SDT_PROBE2(sched, , , tick, td, td->td_proc); thread_lock_flags(td, MTX_QUIET); for ( ; cnt > 0; cnt--) sched_clock(td); diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 85b11f9f94e0..e2e4081bf895 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -37,6 +37,7 @@ #include __FBSDID("$FreeBSD$"); +#include "opt_kdtrace.h" #include "opt_ktrace.h" #include "opt_sched.h" @@ -51,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -105,6 +107,20 @@ SYSCTL_INT(_kern, OID_AUTO, fscale, CTLFLAG_RD, 0, FSCALE, ""); static void loadav(void *arg); +SDT_PROVIDER_DECLARE(sched); +SDT_PROBE_DEFINE(sched, , , preempt, preempt); + +/* + * These probes reference Solaris features that are not implemented in FreeBSD. + * Create the probes anyway for compatibility with existing D scripts; they'll + * just never fire. + */ +SDT_PROBE_DEFINE(sched, , , cpucaps_sleep, cpucaps-sleep); +SDT_PROBE_DEFINE(sched, , , cpucaps_wakeup, cpucaps-wakeup); +SDT_PROBE_DEFINE(sched, , , schedctl_nopreempt, schedctl-nopreempt); +SDT_PROBE_DEFINE(sched, , , schedctl_preempt, schedctl-preempt); +SDT_PROBE_DEFINE(sched, , , schedctl_yield, schedctl-yield); + void sleepinit(void) { @@ -462,6 +478,7 @@ mi_switch(int flags, struct thread *newtd) "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, "lockname:\"%s\"", td->td_lockname); #endif + SDT_PROBE0(sched, , , preempt); #ifdef XEN PT_UPDATES_FLUSH(); #endif diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index d4c5c4c8d2e0..3fbe96f470e9 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -27,6 +27,7 @@ */ #include "opt_witness.h" +#include "opt_kdtrace.h" #include "opt_hwpmc_hooks.h" #include @@ -39,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -59,6 +61,10 @@ __FBSDID("$FreeBSD$"); #include #include +SDT_PROVIDER_DECLARE(proc); +SDT_PROBE_DEFINE(proc, , , lwp_exit, lwp-exit); + + /* * thread related storage. */ diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 1bd1cdd5ef5a..bfae580e39d2 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -244,12 +245,31 @@ SYSCTL_INT(_kern_sched, OID_AUTO, followon, CTLFLAG_RW, "allow threads to share a quantum"); #endif +SDT_PROVIDER_DEFINE(sched); + +SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *", + "struct proc *", "uint8_t"); +SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *", + "struct proc *", "void *"); +SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *", + "struct proc *", "void *", "int"); +SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *", + "struct proc *", "uint8_t", "struct thread *"); +SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int"); +SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *", + "struct proc *"); +SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu); +SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu); +SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *", + "struct proc *"); + static __inline void sched_load_add(void) { sched_tdcnt++; KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt); + SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt); } static __inline void @@ -258,6 +278,7 @@ sched_load_rem(void) sched_tdcnt--; KTR_COUNTER0(KTR_SCHED, "load", "global load", sched_tdcnt); + SDT_PROBE2(sched, , , load_change, NOCPU, sched_tdcnt); } /* * Arrange to reschedule if necessary, taking the priorities and @@ -795,10 +816,13 @@ sched_priority(struct thread *td, u_char prio) KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "priority change", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(curthread)); + SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio); if (td != curthread && prio > td->td_priority) { KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread), "lend prio", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(td)); + SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio, + curthread); } THREAD_LOCK_ASSERT(td, MA_OWNED); if (td->td_priority == prio) @@ -987,6 +1011,9 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif + + SDT_PROBE2(sched, , , off_cpu, td, td->td_proc); + /* I feel sleepy */ lock_profile_release_lock(&sched_lock.lock_object); #ifdef KDTRACE_HOOKS @@ -1018,11 +1045,14 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) * needed to, or the thread_wait() or wait() will * need to reap it. */ + + SDT_PROBE0(sched, , , on_cpu); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); #endif - } + } else + SDT_PROBE0(sched, , , remain_cpu); #ifdef SMP if (td->td_flags & TDF_IDLETD) @@ -1223,6 +1253,8 @@ sched_add(struct thread *td, int flags) sched_tdname(curthread)); KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup", KTR_ATTR_LINKED, sched_tdname(td)); + SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, + flags & SRQ_PREEMPTED); /* @@ -1315,6 +1347,8 @@ sched_add(struct thread *td, int flags) sched_tdname(curthread)); KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup", KTR_ATTR_LINKED, sched_tdname(td)); + STD_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, + flags & SRQ_PREEMPTED); /* * Now that the thread is moving to the run-queue, set the lock @@ -1362,6 +1396,7 @@ sched_rem(struct thread *td) KTR_STATE2(KTR_SCHED, "thread", sched_tdname(td), "runq rem", "prio:%d", td->td_priority, KTR_ATTR_LINKED, sched_tdname(curthread)); + SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL); if ((td->td_flags & TDF_NOLOAD) == 0) sched_load_rem(); @@ -1425,6 +1460,8 @@ sched_choose(void) void sched_preempt(struct thread *td) { + + SDT_PROBE2(sched, , , surrender, td, td->td_proc); thread_lock(td); if (td->td_critnest > 1) td->td_owepreempt = 1; diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index 7e5af09eaa57..538a44ef3dce 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -327,6 +328,24 @@ static void sched_initticks(void *dummy); SYSINIT(sched_initticks, SI_SUB_CLOCKS, SI_ORDER_THIRD, sched_initticks, NULL); +SDT_PROVIDER_DEFINE(sched); + +SDT_PROBE_DEFINE3(sched, , , change_pri, change-pri, "struct thread *", + "struct proc *", "uint8_t"); +SDT_PROBE_DEFINE3(sched, , , dequeue, dequeue, "struct thread *", + "struct proc *", "void *"); +SDT_PROBE_DEFINE4(sched, , , enqueue, enqueue, "struct thread *", + "struct proc *", "void *", "int"); +SDT_PROBE_DEFINE4(sched, , , lend_pri, lend-pri, "struct thread *", + "struct proc *", "uint8_t", "struct thread *"); +SDT_PROBE_DEFINE2(sched, , , load_change, load-change, "int", "int"); +SDT_PROBE_DEFINE2(sched, , , off_cpu, off-cpu, "struct thread *", + "struct proc *"); +SDT_PROBE_DEFINE(sched, , , on_cpu, on-cpu); +SDT_PROBE_DEFINE(sched, , , remain_cpu, remain-cpu); +SDT_PROBE_DEFINE2(sched, , , surrender, surrender, "struct thread *", + "struct proc *"); + /* * Print the threads waiting on a run-queue. */ @@ -509,6 +528,7 @@ tdq_load_add(struct tdq *tdq, struct thread *td) if ((td->td_flags & TDF_NOLOAD) == 0) tdq->tdq_sysload++; KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load); + SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load); } /* @@ -528,6 +548,7 @@ tdq_load_rem(struct tdq *tdq, struct thread *td) if ((td->td_flags & TDF_NOLOAD) == 0) tdq->tdq_sysload--; KTR_COUNTER0(KTR_SCHED, "load", tdq->tdq_loadname, tdq->tdq_load); + SDT_PROBE2(sched, , , load_change, (int)TDQ_ID(tdq), tdq->tdq_load); } /* @@ -1625,10 +1646,13 @@ sched_thread_priority(struct thread *td, u_char prio) KTR_POINT3(KTR_SCHED, "thread", sched_tdname(td), "prio", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(curthread)); + SDT_PROBE3(sched, , , change_pri, td, td->td_proc, prio); if (td != curthread && prio > td->td_priority) { KTR_POINT3(KTR_SCHED, "thread", sched_tdname(curthread), "lend prio", "prio:%d", td->td_priority, "new prio:%d", prio, KTR_ATTR_LINKED, sched_tdname(td)); + SDT_PROBE4(sched, , , lend_pri, td, td->td_proc, prio, + curthread); } ts = td->td_sched; THREAD_LOCK_ASSERT(td, MA_OWNED); @@ -1879,6 +1903,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_OUT); #endif + SDT_PROBE2(sched, , , off_cpu, td, td->td_proc); lock_profile_release_lock(&TDQ_LOCKPTR(tdq)->lock_object); TDQ_LOCKPTR(tdq)->mtx_lock = (uintptr_t)newtd; sched_pctcpu_update(newtd->td_sched, 0); @@ -1903,12 +1928,16 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) tdq = TDQ_CPU(cpuid); lock_profile_obtain_lock_success( &TDQ_LOCKPTR(tdq)->lock_object, 0, 0, __FILE__, __LINE__); + + SDT_PROBE0(sched, , , on_cpu); #ifdef HWPMC_HOOKS if (PMC_PROC_IS_USING_PMCS(td->td_proc)) PMC_SWITCH_CONTEXT(td, PMC_FN_CSW_IN); #endif - } else + } else { thread_unblock_switch(td, mtx); + SDT_PROBE0(sched, , , remain_cpu); + } /* * Assert that all went well and return. */ @@ -2102,6 +2131,8 @@ sched_preempt(struct thread *td) { struct tdq *tdq; + SDT_PROBE2(sched, , , surrender, td, td->td_proc); + thread_lock(td); tdq = TDQ_SELF(); TDQ_LOCK_ASSERT(tdq, MA_OWNED); @@ -2330,6 +2361,8 @@ sched_add(struct thread *td, int flags) sched_tdname(curthread)); KTR_POINT1(KTR_SCHED, "thread", sched_tdname(curthread), "wokeup", KTR_ATTR_LINKED, sched_tdname(td)); + SDT_PROBE4(sched, , , enqueue, td, td->td_proc, NULL, + flags & SRQ_PREEMPTED); THREAD_LOCK_ASSERT(td, MA_OWNED); /* * Recalculate the priority before we select the target cpu or @@ -2375,6 +2408,7 @@ sched_rem(struct thread *td) KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "runq rem", "prio:%d", td->td_priority); + SDT_PROBE3(sched, , , dequeue, td, td->td_proc, NULL); tdq = TDQ_CPU(td->td_sched->ts_cpu); TDQ_LOCK_ASSERT(tdq, MA_OWNED); MPASS(td->td_lock == TDQ_LOCKPTR(tdq)); diff --git a/sys/kern/subr_sleepqueue.c b/sys/kern/subr_sleepqueue.c index bbf79db88337..b868289e720d 100644 --- a/sys/kern/subr_sleepqueue.c +++ b/sys/kern/subr_sleepqueue.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include "opt_sleepqueue_profiling.h" #include "opt_ddb.h" +#include "opt_kdtrace.h" #include "opt_sched.h" #include @@ -75,6 +76,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -166,6 +168,9 @@ static int sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, static void sleepq_switch(void *wchan, int pri); static void sleepq_timeout(void *arg); +SDT_PROBE_DECLARE(sched, , , sleep); +SDT_PROBE_DECLARE(sched, , , wakeup); + /* * Early initialization of sleep queues that is called from the sleepinit() * SYSINIT. @@ -534,6 +539,7 @@ sleepq_switch(void *wchan, int pri) MPASS(td->td_sleepqueue == NULL); sched_sleep(td, pri); thread_lock_set(td, &sc->sc_lock); + SDT_PROBE0(sched, , , sleep); TD_SET_SLEEPING(td); mi_switch(SW_VOL | SWT_SLEEPQ, NULL); KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING")); @@ -715,6 +721,8 @@ sleepq_resume_thread(struct sleepqueue *sq, struct thread *td, int pri) sc = SC_LOOKUP(sq->sq_wchan); mtx_assert(&sc->sc_lock, MA_OWNED); + SDT_PROBE2(sched, , , wakeup, td, td->td_proc); + /* Remove the thread from the queue. */ sq->sq_blockedcnt[td->td_sqqueue]--; TAILQ_REMOVE(&sq->sq_blocked[td->td_sqqueue], td, td_slpq); diff --git a/sys/kern/subr_turnstile.c b/sys/kern/subr_turnstile.c index c3370db4f08e..76fb96400362 100644 --- a/sys/kern/subr_turnstile.c +++ b/sys/kern/subr_turnstile.c @@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ddb.h" +#include "opt_kdtrace.h" #include "opt_turnstile_profiling.h" #include "opt_sched.h" @@ -73,6 +74,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -167,6 +169,11 @@ static void turnstile_dtor(void *mem, int size, void *arg); static int turnstile_init(void *mem, int size, int flags); static void turnstile_fini(void *mem, int size); +SDT_PROVIDER_DECLARE(sched); +SDT_PROBE_DEFINE(sched, , , sleep, sleep); +SDT_PROBE_DEFINE2(sched, , , wakeup, wakeup, "struct thread *", + "struct proc *"); + /* * Walks the chain of turnstiles and their owners to propagate the priority * of the thread being blocked to all the threads holding locks that have to @@ -740,6 +747,8 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue) CTR4(KTR_LOCK, "%s: td %d blocked on [%p] %s", __func__, td->td_tid, lock, lock->lo_name); + SDT_PROBE0(sched, , , sleep); + THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock); mi_switch(SW_VOL | SWT_TURNSTILE, NULL); @@ -916,6 +925,7 @@ turnstile_unpend(struct turnstile *ts, int owner_type) while (!TAILQ_EMPTY(&pending_threads)) { td = TAILQ_FIRST(&pending_threads); TAILQ_REMOVE(&pending_threads, td, td_lockq); + SDT_PROBE2(sched, , , wakeup, td, td->td_proc); thread_lock(td); THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock); MPASS(td->td_proc->p_magic == P_MAGIC); diff --git a/sys/sys/sdt.h b/sys/sys/sdt.h index 90d169906d49..e7b5478cdad0 100644 --- a/sys/sys/sdt.h +++ b/sys/sys/sdt.h @@ -92,6 +92,7 @@ #define SDT_PROBE_DEFINE4(prov, mod, func, name, sname, arg0, arg1, arg2, arg3) #define SDT_PROBE_DEFINE5(prov, mod, func, name, sname, arg0, arg1, arg2, arg3, arg4) +#define SDT_PROBE0(prov, mod, func, name) #define SDT_PROBE1(prov, mod, func, name, arg0) #define SDT_PROBE2(prov, mod, func, name, arg0, arg1) #define SDT_PROBE3(prov, mod, func, name, arg0, arg1, arg2) @@ -231,6 +232,8 @@ struct sdt_provider { SDT_PROBE_ARGTYPE(prov, mod, func, name, 3, arg3); \ SDT_PROBE_ARGTYPE(prov, mod, func, name, 4, arg4) +#define SDT_PROBE0(prov, mod, func, name) \ + SDT_PROBE(prov, mod, func, name, 0, 0, 0, 0, 0) #define SDT_PROBE1(prov, mod, func, name, arg0) \ SDT_PROBE(prov, mod, func, name, arg0, 0, 0, 0, 0) #define SDT_PROBE2(prov, mod, func, name, arg0, arg1) \