- Make SCHED_STATS more generic by adding a wrapper to create the

variables and sysctl nodes.
 - In reset walk the children of kern_sched_stats and reset the counters
   via the oid_arg1 pointer.  This allows us to add arbitrary counters to
   the tree and still reset them properly.
 - Define a set of switch types to be passed with flags to mi_switch().
   These types are named SWT_*.  These types correspond to SCHED_STATS
   counters and are automatically handled in this way.
 - Make the new SWT_ types more specific than the older switch stats.
   There are now stats for idle switches, remote idle wakeups, remote
   preemption ithreads idling, etc.
 - Add switch statistics for ULE's pickcpu algorithm.  These stats include
   how much migration there is, how often affinity was successful, how
   often threads were migrated to the local cpu on wakeup, etc.

Sponsored by:	Nokia
This commit is contained in:
Jeff Roberson 2008-04-17 04:20:10 +00:00
parent f86476aa7d
commit 8df78c41d6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=178272
15 changed files with 122 additions and 67 deletions

View File

@ -1231,7 +1231,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}
@ -1389,7 +1389,7 @@ ithread_loop(void *arg)
if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) {
TD_SET_IWAIT(td);
ie->ie_count = 0;
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
thread_unlock(td);
}

View File

@ -456,7 +456,7 @@ uio_yield(void)
DROP_GIANT();
thread_lock(td);
sched_prio(td, td->td_user_pri);
mi_switch(SW_INVOL, NULL);
mi_switch(SW_INVOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
PICKUP_GIANT();
}

View File

@ -73,25 +73,35 @@ static int kern_sched_preemption = 0;
SYSCTL_INT(_kern_sched, OID_AUTO, preemption, CTLFLAG_RD,
&kern_sched_preemption, 0, "Kernel preemption enabled");
/*
* Support for scheduler stats exported via kern.sched.stats. All stats may
* be reset with kern.sched.stats.reset = 1. Stats may be defined elsewhere
* with SCHED_STAT_DEFINE().
*/
#ifdef SCHED_STATS
long switch_preempt;
long switch_owepreempt;
long switch_turnstile;
long switch_sleepq;
long switch_sleepqtimo;
long switch_relinquish;
long switch_needresched;
static SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, preempt, CTLFLAG_RD, &switch_preempt, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, owepreempt, CTLFLAG_RD, &switch_owepreempt, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, turnstile, CTLFLAG_RD, &switch_turnstile, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepq, CTLFLAG_RD, &switch_sleepq, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, sleepqtimo, CTLFLAG_RD, &switch_sleepqtimo, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, relinquish, CTLFLAG_RD, &switch_relinquish, 0, "");
SYSCTL_INT(_kern_sched_stats, OID_AUTO, needresched, CTLFLAG_RD, &switch_needresched, 0, "");
long sched_switch_stats[SWT_COUNT]; /* Switch reasons from mi_switch(). */
SYSCTL_NODE(_kern_sched, OID_AUTO, stats, CTLFLAG_RW, 0, "switch stats");
SCHED_STAT_DEFINE_VAR(uncategorized, &sched_switch_stats[SWT_NONE], "");
SCHED_STAT_DEFINE_VAR(preempt, &sched_switch_stats[SWT_PREEMPT], "");
SCHED_STAT_DEFINE_VAR(owepreempt, &sched_switch_stats[SWT_OWEPREEMPT], "");
SCHED_STAT_DEFINE_VAR(turnstile, &sched_switch_stats[SWT_TURNSTILE], "");
SCHED_STAT_DEFINE_VAR(sleepq, &sched_switch_stats[SWT_SLEEPQ], "");
SCHED_STAT_DEFINE_VAR(sleepqtimo, &sched_switch_stats[SWT_SLEEPQTIMO], "");
SCHED_STAT_DEFINE_VAR(relinquish, &sched_switch_stats[SWT_RELINQUISH], "");
SCHED_STAT_DEFINE_VAR(needresched, &sched_switch_stats[SWT_NEEDRESCHED], "");
SCHED_STAT_DEFINE_VAR(idle, &sched_switch_stats[SWT_IDLE], "");
SCHED_STAT_DEFINE_VAR(iwait, &sched_switch_stats[SWT_IWAIT], "");
SCHED_STAT_DEFINE_VAR(suspend, &sched_switch_stats[SWT_SUSPEND], "");
SCHED_STAT_DEFINE_VAR(remotepreempt, &sched_switch_stats[SWT_REMOTEPREEMPT],
"");
SCHED_STAT_DEFINE_VAR(remotewakeidle, &sched_switch_stats[SWT_REMOTEWAKEIDLE],
"");
static int
sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
{
struct sysctl_oid *p;
int error;
int val;
@ -101,14 +111,15 @@ sysctl_stats_reset(SYSCTL_HANDLER_ARGS)
return (error);
if (val == 0)
return (0);
switch_preempt = 0;
switch_owepreempt = 0;
switch_turnstile = 0;
switch_sleepq = 0;
switch_sleepqtimo = 0;
switch_relinquish = 0;
switch_needresched = 0;
/*
* Traverse the list of children of _kern_sched_stats and reset each
* to 0. Skip the reset entry.
*/
SLIST_FOREACH(p, oidp->oid_parent, oid_link) {
if (p == oidp || p->oid_arg1 == NULL)
continue;
*(long *)p->oid_arg1 = 0;
}
return (0);
}
@ -164,6 +175,7 @@ void
critical_exit(void)
{
struct thread *td;
int flags;
td = curthread;
KASSERT(td->td_critnest != 0,
@ -175,8 +187,12 @@ critical_exit(void)
td->td_critnest = 1;
thread_lock(td);
td->td_critnest--;
SCHED_STAT_INC(switch_owepreempt);
mi_switch(SW_INVOL|SW_PREEMPT, NULL);
flags = SW_INVOL | SW_PREEMPT;
if (TD_IS_IDLETHREAD(td))
flags |= SWT_IDLE;
else
flags |= SWT_OWEPREEMPT;
mi_switch(flags, NULL);
thread_unlock(td);
}
} else

View File

@ -38,6 +38,7 @@
__FBSDID("$FreeBSD$");
#include "opt_ktrace.h"
#include "opt_sched.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -390,6 +391,9 @@ mi_switch(int flags, struct thread *newtd)
td->td_ru.ru_nvcsw++;
else
td->td_ru.ru_nivcsw++;
#ifdef SCHED_STATS
SCHED_STAT_INC(sched_switch_stats[flags & SW_TYPE_MASK]);
#endif
/*
* Compute the amount of time during which the current
* thread was running, and add that to its total so far.
@ -533,7 +537,7 @@ yield(struct thread *td, struct yield_args *uap)
thread_lock(td);
sched_prio(td, PRI_MAX_TIMESHARE);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
td->td_retval[0] = 0;
return (0);

View File

@ -723,7 +723,7 @@ thread_suspend_check(int return_instead)
td->td_flags |= TDF_BOUNDARY;
}
PROC_SUNLOCK(p);
mi_switch(SW_INVOL, NULL);
mi_switch(SW_INVOL | SWT_SUSPEND, NULL);
if (return_instead == 0)
td->td_flags &= ~TDF_BOUNDARY;
thread_unlock(td);
@ -756,7 +756,7 @@ thread_suspend_switch(struct thread *td)
sched_sleep(td, 0);
PROC_SUNLOCK(p);
DROP_GIANT();
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_SUSPEND, NULL);
thread_unlock(td);
PICKUP_GIANT();
PROC_LOCK(p);

View File

@ -316,8 +316,7 @@ maybe_preempt(struct thread *td)
TD_SET_RUNNING(td);
CTR3(KTR_PROC, "preempting to thread %p (pid %d, %s)\n", td,
td->td_proc->p_pid, td->td_name);
SCHED_STAT_INC(switch_preempt);
mi_switch(SW_INVOL|SW_PREEMPT, td);
mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, td);
/*
* td's lock pointer may have changed. We have to return with it
* locked.
@ -1332,7 +1331,7 @@ sched_preempt(struct thread *td)
if (td->td_critnest > 1)
td->td_owepreempt = 1;
else
mi_switch(SW_INVOL | SW_PREEMPT, NULL);
mi_switch(SW_INVOL | SW_PREEMPT | SWT_PREEMPT, NULL);
thread_unlock(td);
}
@ -1397,8 +1396,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
SCHED_STAT_INC(switch_relinquish);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}
@ -1448,7 +1446,7 @@ sched_idletd(void *dummy)
cpu_idle();
mtx_lock_spin(&sched_lock);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IDLE, NULL);
mtx_unlock_spin(&sched_lock);
}
}

View File

@ -909,7 +909,7 @@ tdq_idled(struct tdq *tdq)
}
spinlock_exit();
TDQ_UNLOCK(steal);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IDLE, NULL);
thread_unlock(curthread);
return (0);
@ -1073,6 +1073,13 @@ sched_setcpu(struct thread *td, int cpu, int flags)
return (tdq);
}
SCHED_STAT_DEFINE(pickcpu_intrbind, "Soft interrupt binding");
SCHED_STAT_DEFINE(pickcpu_idle_affinity, "Picked idle cpu based on affinity");
SCHED_STAT_DEFINE(pickcpu_affinity, "Picked cpu based on affinity");
SCHED_STAT_DEFINE(pickcpu_lowest, "Selected lowest load");
SCHED_STAT_DEFINE(pickcpu_local, "Migrated to current cpu");
SCHED_STAT_DEFINE(pickcpu_migration, "Selection may have caused migration");
static int
sched_pickcpu(struct thread *td, int flags)
{
@ -1098,8 +1105,10 @@ sched_pickcpu(struct thread *td, int flags)
* the interrupt.
*/
if (td->td_priority <= PRI_MAX_ITHD && THREAD_CAN_SCHED(td, self) &&
curthread->td_intr_nesting_level)
curthread->td_intr_nesting_level && ts->ts_cpu != self) {
SCHED_STAT_INC(pickcpu_intrbind);
ts->ts_cpu = self;
}
/*
* If the thread can run on the last cpu and the affinity has not
* expired or it is idle run it there.
@ -1107,10 +1116,14 @@ sched_pickcpu(struct thread *td, int flags)
pri = td->td_priority;
tdq = TDQ_CPU(ts->ts_cpu);
if (THREAD_CAN_SCHED(td, ts->ts_cpu)) {
if (tdq->tdq_lowpri > PRI_MIN_IDLE)
if (tdq->tdq_lowpri > PRI_MIN_IDLE) {
SCHED_STAT_INC(pickcpu_idle_affinity);
return (ts->ts_cpu);
if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri)
}
if (SCHED_AFFINITY(ts, CG_SHARE_L2) && tdq->tdq_lowpri > pri) {
SCHED_STAT_INC(pickcpu_affinity);
return (ts->ts_cpu);
}
}
/*
* Search for the highest level in the tree that still has affinity.
@ -1129,8 +1142,13 @@ sched_pickcpu(struct thread *td, int flags)
* Compare the lowest loaded cpu to current cpu.
*/
if (THREAD_CAN_SCHED(td, self) && TDQ_CPU(self)->tdq_lowpri > pri &&
TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE)
TDQ_CPU(cpu)->tdq_lowpri < PRI_MIN_IDLE) {
SCHED_STAT_INC(pickcpu_local);
cpu = self;
} else
SCHED_STAT_INC(pickcpu_lowest);
if (cpu != ts->ts_cpu)
SCHED_STAT_INC(pickcpu_migration);
KASSERT(cpu != -1, ("sched_pickcpu: Failed to find a cpu."));
return (cpu);
}
@ -1989,10 +2007,15 @@ sched_preempt(struct thread *td)
TDQ_LOCK_ASSERT(tdq, MA_OWNED);
tdq->tdq_ipipending = 0;
if (td->td_priority > tdq->tdq_lowpri) {
int flags;
flags = SW_INVOL | SW_PREEMPT;
if (td->td_critnest > 1)
td->td_owepreempt = 1;
else if (TD_IS_IDLETHREAD(td))
mi_switch(flags | SWT_REMOTEWAKEIDLE, NULL);
else
mi_switch(SW_INVOL | SW_PREEMPT, NULL);
mi_switch(flags | SWT_REMOTEPREEMPT, NULL);
}
thread_unlock(td);
}
@ -2378,8 +2401,7 @@ void
sched_relinquish(struct thread *td)
{
thread_lock(td);
SCHED_STAT_INC(switch_relinquish);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_RELINQUISH, NULL);
thread_unlock(td);
}

View File

@ -486,8 +486,7 @@ sleepq_switch(void *wchan, int pri)
sched_sleep(td, pri);
thread_lock_set(td, &sc->sc_lock);
TD_SET_SLEEPING(td);
SCHED_STAT_INC(switch_sleepq);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_SLEEPQ, NULL);
KASSERT(TD_IS_RUNNING(td), ("running but not TDS_RUNNING"));
CTR3(KTR_PROC, "sleepq resume: thread %p (pid %ld, %s)",
(void *)td, (long)td->td_proc->p_pid, (void *)td->td_name);
@ -527,8 +526,7 @@ sleepq_check_timeout(void)
else if (callout_stop(&td->td_slpcallout) == 0) {
td->td_flags |= TDF_TIMEOUT;
TD_SET_SLEEPING(td);
SCHED_STAT_INC(switch_sleepqtimo);
mi_switch(SW_INVOL, NULL);
mi_switch(SW_INVOL | SWT_SLEEPQTIMO, NULL);
}
return (0);
}

View File

@ -211,8 +211,7 @@ ast(struct trapframe *framep)
#endif
thread_lock(td);
sched_prio(td, td->td_user_pri);
SCHED_STAT_INC(switch_needresched);
mi_switch(SW_INVOL, NULL);
mi_switch(SW_INVOL | SWT_NEEDRESCHED, NULL);
thread_unlock(td);
#ifdef KTRACE
if (KTRPOINT(td, KTR_CSW))

View File

@ -741,8 +741,7 @@ turnstile_wait(struct turnstile *ts, struct thread *owner, int queue)
td->td_tid, lock, lock->lo_name);
THREAD_LOCKPTR_ASSERT(td, &ts->ts_lock);
SCHED_STAT_INC(switch_turnstile);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_TURNSTILE, NULL);
if (LOCK_LOG_TEST(lock, 0))
CTR4(KTR_LOCK, "%s: td %d free from blocked on [%p] %s",

View File

@ -588,10 +588,26 @@ struct proc {
#ifdef _KERNEL
/* Flags for mi_switch(). */
#define SW_VOL 0x0001 /* Voluntary switch. */
#define SW_INVOL 0x0002 /* Involuntary switch. */
#define SW_PREEMPT 0x0004 /* The invol switch is a preemption */
/* Types and flags for mi_switch(). */
#define SW_TYPE_MASK 0xff /* First 8 bits are switch type */
#define SWT_NONE 0 /* Unspecified switch. */
#define SWT_PREEMPT 1 /* Switching due to preemption. */
#define SWT_OWEPREEMPT 2 /* Switching due to opepreempt. */
#define SWT_TURNSTILE 3 /* Turnstile contention. */
#define SWT_SLEEPQ 4 /* Sleepq wait. */
#define SWT_SLEEPQTIMO 5 /* Sleepq timeout wait. */
#define SWT_RELINQUISH 6 /* yield call. */
#define SWT_NEEDRESCHED 7 /* NEEDRESCHED was set. */
#define SWT_IDLE 8 /* Switching from the idle thread. */
#define SWT_IWAIT 9 /* Waiting for interrupts. */
#define SWT_SUSPEND 10 /* Thread suspended. */
#define SWT_REMOTEPREEMPT 11 /* Remote processor preempted. */
#define SWT_REMOTEWAKEIDLE 12 /* Remote processor preempted idle. */
#define SWT_COUNT 13 /* Number of switch types. */
/* Flags */
#define SW_VOL 0x0100 /* Voluntary switch. */
#define SW_INVOL 0x0200 /* Involuntary switch. */
#define SW_PREEMPT 0x0400 /* The invol switch is a preemption */
/* How values for thread_single(). */
#define SINGLE_NO_EXIT 0

View File

@ -154,17 +154,19 @@ sched_unpin(void)
#define SRQ_PREEMPTED 0x0008 /* has been preempted.. be kind */
#define SRQ_BORROWING 0x0010 /* Priority updated due to prio_lend */
/* Switch stats. */
/* Scheduler stats. */
#ifdef SCHED_STATS
extern long switch_preempt;
extern long switch_owepreempt;
extern long switch_turnstile;
extern long switch_sleepq;
extern long switch_sleepqtimo;
extern long switch_relinquish;
extern long switch_needresched;
extern long sched_switch_stats[SWT_COUNT];
#define SCHED_STAT_DEFINE_VAR(name, ptr, descr) \
SYSCTL_LONG(_kern_sched_stats, OID_AUTO, name, CTLFLAG_RD, ptr, 0, descr)
#define SCHED_STAT_DEFINE(name, descr) \
unsigned long name; \
SCHED_STAT_DEFINE_VAR(name, &name, descr)
#define SCHED_STAT_INC(var) atomic_add_long(&(var), 1)
#else
#define SCHED_STAT_DEFINE_VAR(name, descr, ptr)
#define SCHED_STAT_DEFINE(name, descr)
#define SCHED_STAT_INC(var)
#endif

View File

@ -632,6 +632,7 @@ SYSCTL_DECL(_kern_features);
SYSCTL_DECL(_kern_ipc);
SYSCTL_DECL(_kern_proc);
SYSCTL_DECL(_kern_sched);
SYSCTL_DECL(_kern_sched_stats);
SYSCTL_DECL(_sysctl);
SYSCTL_DECL(_vm);
SYSCTL_DECL(_vm_stats);

View File

@ -735,7 +735,7 @@ scheduler(dummy)
thread_lock(&thread0);
if (!proc0_rescan) {
TD_SET_IWAIT(&thread0);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IWAIT, NULL);
}
proc0_rescan = 0;
thread_unlock(&thread0);

View File

@ -127,7 +127,7 @@ vm_pagezero(void __unused *arg)
#ifndef PREEMPTION
if (sched_runnable()) {
thread_lock(curthread);
mi_switch(SW_VOL, NULL);
mi_switch(SW_VOL | SWT_IDLE, NULL);
thread_unlock(curthread);
}
#endif