CPU time accounting speedup (step 2)

Keep accounting time (in per-cpu) cputicks and the statistics counts
in the thread and summarize into struct proc when at context switch.

Don't reach across CPUs in calcru().

Add code to calibrate the top speed of cpu_tickrate() for variable
cpu_tick hardware (like TSC on power managed machines).

Don't enforce monotonicity (at least for now) in calcru.  While the
calibrated cpu_tickrate ramps up it may not be true.

Use 27MHz counter on i386/Geode.

Use TSC on amd64 & i386 if present.

Use tick counter on sparc64
This commit is contained in:
Poul-Henning Kamp 2006-02-11 09:33:07 +00:00
parent e1fa2d9556
commit e8444a7e6f
12 changed files with 217 additions and 88 deletions

View File

@ -77,6 +77,7 @@ init_TSC(void)
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %lu Hz\n", tsc_freq);
set_cputicker(rdtsc, tsc_freq, 1);
}

View File

@ -110,6 +110,20 @@ static struct timecounter geode_timecounter = {
1000
};
static uint64_t
geode_cputicks(void)
{
unsigned c;
static unsigned last;
static uint64_t offset;
c = inl(geode_counter);
if (c < last)
offset += (1LL << 32);
last = c;
return (offset | c);
}
/*
* The GEODE watchdog runs from a 32kHz frequency. One period of that is
* 31250 nanoseconds which we round down to 2^14 nanoseconds. The watchdog
@ -176,6 +190,7 @@ geode_probe(device_t self)
tc_init(&geode_timecounter);
EVENTHANDLER_REGISTER(watchdog_list, geode_watchdog,
NULL, 0);
set_cputicker(geode_cputicks, 27000000, 0);
}
} else if (pci_get_devid(self) == 0x0510100b) {
gpio = pci_read_config(self, PCIR_BAR(0), 4);

View File

@ -86,6 +86,7 @@ init_TSC(void)
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
set_cputicker(rdtsc, tsc_freq, 1);
}

View File

@ -415,7 +415,7 @@ statclock(int usermode)
*/
if (p->p_flag & P_SA)
thread_statclock(1);
p->p_rux.rux_uticks++;
td->td_uticks++;
if (p->p_nice > NZERO)
cp_time[CP_NICE]++;
else
@ -435,13 +435,13 @@ statclock(int usermode)
*/
if ((td->td_pflags & TDP_ITHREAD) ||
td->td_intr_nesting_level >= 2) {
p->p_rux.rux_iticks++;
td->td_iticks++;
cp_time[CP_INTR]++;
} else {
if (p->p_flag & P_SA)
thread_statclock(0);
td->td_pticks++;
p->p_rux.rux_sticks++;
td->td_sticks++;
if (td != PCPU_GET(idlethread))
cp_time[CP_SYS]++;
else

View File

@ -545,6 +545,9 @@ retry:
/* Do the same timestamp bookkeeping that mi_switch() would do. */
new_switchtime = cpu_ticks();
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
p->p_rux.rux_uticks += td->td_uticks;
p->p_rux.rux_sticks += td->td_sticks;
p->p_rux.rux_iticks += td->td_iticks;
PCPU_SET(switchtime, new_switchtime);
PCPU_SET(switchticks, ticks);
cnt.v_swtch++;

View File

@ -694,7 +694,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
kp->ki_swtime = p->p_swtime;
kp->ki_pid = p->p_pid;
kp->ki_nice = p->p_nice;
kp->ki_runtime = p->p_rux.rux_runtime * 1000000 / cpu_tickrate();
kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
mtx_unlock_spin(&sched_lock);
if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
kp->ki_start = p->p_stats->p_start;

View File

@ -69,8 +69,6 @@ static struct mtx uihashtbl_mtx;
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long uihash; /* size of hash table - 1 */
static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
@ -694,57 +692,6 @@ getrlimit(td, uap)
return (error);
}
/*
* Transform the running time and tick information in proc p into user,
* system, and interrupt time usage.
*/
void
calcru(p, up, sp)
struct proc *p;
struct timeval *up;
struct timeval *sp;
{
uint64_t bt;
struct rusage_ext rux;
struct thread *td;
int bt_valid;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_NOTOWNED);
bt_valid = 0;
bt = 0;
mtx_lock_spin(&sched_lock);
rux = p->p_rux;
FOREACH_THREAD_IN_PROC(p, td) {
if (TD_IS_RUNNING(td)) {
/*
* Adjust for the current time slice. This is
* actually fairly important since the error here is
* on the order of a time quantum which is much
* greater than the precision of binuptime().
*/
KASSERT(td->td_oncpu != NOCPU,
("%s: running thread has no CPU", __func__));
if (!bt_valid) {
bt = cpu_ticks();
bt_valid = 1;
}
/*
* XXX: Doesn't this mean that this quantum will
* XXX: get counted twice if calcru() is called
* XXX: from SIGINFO ?
*/
rux.rux_runtime +=
(bt - pcpu_find(td->td_oncpu)->pc_switchtime);
}
}
mtx_unlock_spin(&sched_lock);
calcru1(p, &rux, up, sp);
p->p_rux.rux_uu = rux.rux_uu;
p->p_rux.rux_su = rux.rux_su;
p->p_rux.rux_iu = rux.rux_iu;
}
void
calccru(p, up, sp)
struct proc *p;
@ -753,35 +700,52 @@ calccru(p, up, sp)
{
PROC_LOCK_ASSERT(p, MA_OWNED);
calcru1(p, &p->p_crux, up, sp);
calcru(p, up, sp);
}
static void
calcru1(p, ruxp, up, sp)
struct proc *p;
struct rusage_ext *ruxp;
struct timeval *up;
struct timeval *sp;
/*
* Transform the running time and tick information in proc p into user,
* system, and interrupt time usage. If appropriate, include the current
* time slice on this CPU.
*/
void
calcru(struct proc *p, struct timeval *up, struct timeval *sp)
{
struct thread *td;
struct rusage_ext *ruxp = &p->p_rux;
uint64_t u;
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_NOTOWNED);
mtx_lock_spin(&sched_lock);
if (curthread->td_proc == p) {
td = curthread;
u = cpu_ticks();
ruxp->rux_runtime += (u - PCPU_GET(switchtime));
PCPU_SET(switchtime, u);
ruxp->rux_uticks += td->td_uticks;
td->td_uticks = 0;
ruxp->rux_iticks += td->td_iticks;
td->td_iticks = 0;
ruxp->rux_sticks += td->td_sticks;
td->td_sticks = 0;
}
ut = ruxp->rux_uticks;
st = ruxp->rux_sticks;
it = ruxp->rux_iticks;
tu = ruxp->rux_runtime;
mtx_unlock_spin(&sched_lock);
tu = cputick2usec(tu);
tt = ut + st + it;
if (tt == 0) {
st = 1;
tt = 1;
}
tu = (ruxp->rux_runtime * 1000000LL) / cpu_tickrate();
ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu;
if (tu < ptu) {
printf(
"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
tu = ptu;
}
if ((int64_t)tu < 0) {
printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
(intmax_t)tu, p->p_pid, p->p_comm);
@ -792,7 +756,17 @@ calcru1(p, ruxp, up, sp)
uu = (tu * ut) / tt;
su = (tu * st) / tt;
iu = tu - uu - su;
if (tu < ptu) {
printf(
"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
printf("u %ju:%ju/%ju s %ju:%ju/%ju i %ju:%ju/%ju\n",
(uintmax_t)ut, (uintmax_t)ruxp->rux_uu, uu,
(uintmax_t)st, (uintmax_t)ruxp->rux_su, su,
(uintmax_t)it, (uintmax_t)ruxp->rux_iu, iu);
tu = ptu;
}
#if 0
/* Enforce monotonicity. */
if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) {
if (uu < ruxp->rux_uu)
@ -814,6 +788,9 @@ calcru1(p, ruxp, up, sp)
KASSERT(iu >= ruxp->rux_iu,
("calcru: monotonisation botch 2"));
}
KASSERT(uu + su + iu <= tu,
("calcru: monotisation botch 3"));
#endif
ruxp->rux_uu = uu;
ruxp->rux_su = su;
ruxp->rux_iu = iu;

View File

@ -386,6 +386,12 @@ mi_switch(int flags, struct thread *newtd)
*/
new_switchtime = cpu_ticks();
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
p->p_rux.rux_uticks += td->td_uticks;
td->td_uticks = 0;
p->p_rux.rux_iticks += td->td_iticks;
td->td_iticks = 0;
p->p_rux.rux_sticks += td->td_sticks;
td->td_sticks = 0;
td->td_generation++; /* bump preempt-detect counter */

View File

@ -116,6 +116,7 @@ TC_STATS(nsetclock);
#undef TC_STATS
static void tc_windup(void);
static void cpu_tick_calibrate(int);
static int
sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
@ -364,6 +365,7 @@ tc_setclock(struct timespec *ts)
struct timespec ts2;
struct bintime bt, bt2;
cpu_tick_calibrate(1);
nsetclock++;
binuptime(&bt2);
timespec2bintime(ts, &bt);
@ -380,6 +382,7 @@ tc_setclock(struct timespec *ts)
(intmax_t)ts2.tv_sec, ts2.tv_nsec,
(intmax_t)ts->tv_sec, ts->tv_nsec);
}
cpu_tick_calibrate(1);
}
/*
@ -476,8 +479,8 @@ tc_windup(void)
* x = a * 2^32 / 10^9 = a * 4.294967296
*
* The range of th_adjustment is +/- 5000PPM so inside a 64bit int
* we can only multiply by about 850 without overflowing, but that
* leaves suitably precise fractions for multiply before divide.
* we can only multiply by about 850 without overflowing, that
* leaves no suitably precise fractions for multiply before divide.
*
* Divide before multiply with a fraction of 2199/512 results in a
* systematic undercompensation of 10PPM of th_adjustment. On a
@ -750,11 +753,16 @@ void
tc_ticktock(void)
{
static int count;
static time_t last_calib;
if (++count < tc_tick)
return;
count = 0;
tc_windup();
if (time_uptime != last_calib && !(time_uptime & 0xf)) {
cpu_tick_calibrate(0);
last_calib = time_uptime;
}
}
static void
@ -784,13 +792,18 @@ inittimecounter(void *dummy)
SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL)
/* Cpu tick handling -------------------------------------------------*/
static int cpu_tick_variable;
static uint64_t cpu_tick_frequency;
static
uint64_t
tc_cpu_ticks(void)
{
static uint64_t base;
static unsigned last;
uint64_t u;
unsigned u;
struct timecounter *tc;
tc = timehands->th_counter;
@ -801,5 +814,120 @@ tc_cpu_ticks(void)
return (u + base);
}
uint64_t (*cpu_ticks)(void) = tc_cpu_ticks;
uint64_t (*cpu_tickrate)(void) = tc_getfrequency;
/*
* This function gets called ever 16 seconds on only one designated
* CPU in the system from hardclock() via tc_ticktock().
*
* Whenever the real time clock is stepped we get called with reset=1
* to make sure we handle suspend/resume and similar events correctly.
*/
static void
cpu_tick_calibrate(int reset)
{
static uint64_t c_last;
uint64_t c_this, c_delta;
static struct bintime t_last;
struct bintime t_this, t_delta;
if (reset) {
/* The clock was stepped, abort & reset */
t_last.sec = 0;
return;
}
/* we don't calibrate fixed rate cputicks */
if (!cpu_tick_variable)
return;
getbinuptime(&t_this);
c_this = cpu_ticks();
if (t_last.sec != 0) {
c_delta = c_this - c_last;
t_delta = t_this;
bintime_sub(&t_delta, &t_last);
if (0 && bootverbose) {
struct timespec ts;
bintime2timespec(&t_delta, &ts);
printf("%ju %ju.%016jx %ju.%09ju",
(uintmax_t)c_delta >> 4,
(uintmax_t)t_delta.sec, (uintmax_t)t_delta.frac,
(uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec);
}
/*
* Validate that 16 +/- 1/256 seconds passed.
* After division by 16 this gives us a precision of
* roughly 250PPM which is sufficient
*/
if (t_delta.sec > 16 || (
t_delta.sec == 16 && t_delta.frac >= (0x01LL << 56))) {
/* too long */
if (0 && bootverbose)
printf("\ttoo long\n");
} else if (t_delta.sec < 15 ||
(t_delta.sec == 15 && t_delta.frac <= (0xffLL << 56))) {
/* too short */
if (0 && bootverbose)
printf("\ttoo short\n");
} else {
/* just right */
c_delta >>= 4;
if (c_delta > cpu_tick_frequency) {
if (0 && bootverbose)
printf("\thigher\n");
cpu_tick_frequency = c_delta;
} else {
if (0 && bootverbose)
printf("\tlower\n");
}
}
}
c_last = c_this;
t_last = t_this;
}
void
set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
{
if (func == NULL) {
cpu_ticks = tc_cpu_ticks;
} else {
cpu_tick_frequency = freq;
cpu_tick_variable = var;
cpu_ticks = func;
}
}
uint64_t
cpu_tickrate(void)
{
if (cpu_ticks == tc_cpu_ticks)
return (tc_getfrequency());
return (cpu_tick_frequency);
}
/*
* We need to be slightly careful converting cputicks to microseconds.
* There is plenty of margin in 64 bits of microseconds (half a million
* years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
* before divide conversion (to retain precision) we find that the
* margin shrinks to 1.5 hours (one millionth of 146y).
* With a three prong approach we never loose significant bits, no
* matter what the cputick rate and length of timeinterval is.
*/
uint64_t
cputick2usec(uint64_t tick)
{
if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */
return (tick / (cpu_tickrate() / 1000000LL));
else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */
return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
else
return ((tick * 1000000LL) / cpu_tickrate());
}
cpu_tick_f *cpu_ticks = tc_cpu_ticks;

View File

@ -72,13 +72,6 @@ tick_cputicks(void)
return (rd(tick));
}
static uint64_t
tick_cputickrate(void)
{
return (tick_freq);
}
void
cpu_initclocks(void)
{
@ -171,8 +164,7 @@ tick_init(u_long clock)
*/
tick_stop();
cpu_ticks = tick_cputicks;
cpu_tickrate = tick_cputickrate;
set_cputicker(tick_cputicks, tick_freq, 0);
}
void

View File

@ -283,6 +283,9 @@ struct thread {
struct thread *td_standin; /* (k + a) Use this for an upcall. */
struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */
u_int td_pticks; /* (k) Statclock hits for profiling */
u_int td_sticks; /* (k) Statclock hits in system mode. */
u_int td_iticks; /* (k) Statclock hits in intr mode. */
u_int td_uticks; /* (k) Statclock hits in user mode. */
u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */
u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */
int td_intrval; /* (j) Return value of TDF_INTERRUPT. */

View File

@ -238,8 +238,11 @@ int setenv(const char *name, const char *value);
int unsetenv(const char *name);
int testenv(const char *name);
extern uint64_t (*cpu_ticks)(void);
extern uint64_t (*cpu_tickrate)(void);
typedef uint64_t (cpu_tick_f)(void);
void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var);
extern cpu_tick_f *cpu_ticks;
uint64_t cpu_tickrate(void);
uint64_t cputick2usec(uint64_t tick);
#ifdef APM_FIXUP_CALLTODO
struct timeval;