CPU time accounting speedup (step 2)
Keep accounting time (in per-cpu) cputicks and the statistics counts in the thread and summarize into struct proc when at context switch. Don't reach across CPUs in calcru(). Add code to calibrate the top speed of cpu_tickrate() for variable cpu_tick hardware (like TSC on power managed machines). Don't enforce monotonicity (at least for now) in calcru. While the calibrated cpu_tickrate ramps up it may not be true. Use 27MHz counter on i386/Geode. Use TSC on amd64 & i386 if present. Use tick counter on sparc64
This commit is contained in:
parent
e1fa2d9556
commit
e8444a7e6f
@ -77,6 +77,7 @@ init_TSC(void)
|
||||
tsc_freq = tscval[1] - tscval[0];
|
||||
if (bootverbose)
|
||||
printf("TSC clock: %lu Hz\n", tsc_freq);
|
||||
set_cputicker(rdtsc, tsc_freq, 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -110,6 +110,20 @@ static struct timecounter geode_timecounter = {
|
||||
1000
|
||||
};
|
||||
|
||||
static uint64_t
|
||||
geode_cputicks(void)
|
||||
{
|
||||
unsigned c;
|
||||
static unsigned last;
|
||||
static uint64_t offset;
|
||||
|
||||
c = inl(geode_counter);
|
||||
if (c < last)
|
||||
offset += (1LL << 32);
|
||||
last = c;
|
||||
return (offset | c);
|
||||
}
|
||||
|
||||
/*
|
||||
* The GEODE watchdog runs from a 32kHz frequency. One period of that is
|
||||
* 31250 nanoseconds which we round down to 2^14 nanoseconds. The watchdog
|
||||
@ -176,6 +190,7 @@ geode_probe(device_t self)
|
||||
tc_init(&geode_timecounter);
|
||||
EVENTHANDLER_REGISTER(watchdog_list, geode_watchdog,
|
||||
NULL, 0);
|
||||
set_cputicker(geode_cputicks, 27000000, 0);
|
||||
}
|
||||
} else if (pci_get_devid(self) == 0x0510100b) {
|
||||
gpio = pci_read_config(self, PCIR_BAR(0), 4);
|
||||
|
@ -86,6 +86,7 @@ init_TSC(void)
|
||||
tsc_freq = tscval[1] - tscval[0];
|
||||
if (bootverbose)
|
||||
printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
|
||||
set_cputicker(rdtsc, tsc_freq, 1);
|
||||
}
|
||||
|
||||
|
||||
|
@ -415,7 +415,7 @@ statclock(int usermode)
|
||||
*/
|
||||
if (p->p_flag & P_SA)
|
||||
thread_statclock(1);
|
||||
p->p_rux.rux_uticks++;
|
||||
td->td_uticks++;
|
||||
if (p->p_nice > NZERO)
|
||||
cp_time[CP_NICE]++;
|
||||
else
|
||||
@ -435,13 +435,13 @@ statclock(int usermode)
|
||||
*/
|
||||
if ((td->td_pflags & TDP_ITHREAD) ||
|
||||
td->td_intr_nesting_level >= 2) {
|
||||
p->p_rux.rux_iticks++;
|
||||
td->td_iticks++;
|
||||
cp_time[CP_INTR]++;
|
||||
} else {
|
||||
if (p->p_flag & P_SA)
|
||||
thread_statclock(0);
|
||||
td->td_pticks++;
|
||||
p->p_rux.rux_sticks++;
|
||||
td->td_sticks++;
|
||||
if (td != PCPU_GET(idlethread))
|
||||
cp_time[CP_SYS]++;
|
||||
else
|
||||
|
@ -545,6 +545,9 @@ retry:
|
||||
/* Do the same timestamp bookkeeping that mi_switch() would do. */
|
||||
new_switchtime = cpu_ticks();
|
||||
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
|
||||
p->p_rux.rux_uticks += td->td_uticks;
|
||||
p->p_rux.rux_sticks += td->td_sticks;
|
||||
p->p_rux.rux_iticks += td->td_iticks;
|
||||
PCPU_SET(switchtime, new_switchtime);
|
||||
PCPU_SET(switchticks, ticks);
|
||||
cnt.v_swtch++;
|
||||
|
@ -694,7 +694,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
|
||||
kp->ki_swtime = p->p_swtime;
|
||||
kp->ki_pid = p->p_pid;
|
||||
kp->ki_nice = p->p_nice;
|
||||
kp->ki_runtime = p->p_rux.rux_runtime * 1000000 / cpu_tickrate();
|
||||
kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
|
||||
kp->ki_start = p->p_stats->p_start;
|
||||
|
@ -69,8 +69,6 @@ static struct mtx uihashtbl_mtx;
|
||||
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
|
||||
static u_long uihash; /* size of hash table - 1 */
|
||||
|
||||
static void calcru1(struct proc *p, struct rusage_ext *ruxp,
|
||||
struct timeval *up, struct timeval *sp);
|
||||
static int donice(struct thread *td, struct proc *chgp, int n);
|
||||
static struct uidinfo *uilookup(uid_t uid);
|
||||
|
||||
@ -694,57 +692,6 @@ getrlimit(td, uap)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Transform the running time and tick information in proc p into user,
|
||||
* system, and interrupt time usage.
|
||||
*/
|
||||
void
|
||||
calcru(p, up, sp)
|
||||
struct proc *p;
|
||||
struct timeval *up;
|
||||
struct timeval *sp;
|
||||
{
|
||||
uint64_t bt;
|
||||
struct rusage_ext rux;
|
||||
struct thread *td;
|
||||
int bt_valid;
|
||||
|
||||
PROC_LOCK_ASSERT(p, MA_OWNED);
|
||||
mtx_assert(&sched_lock, MA_NOTOWNED);
|
||||
bt_valid = 0;
|
||||
bt = 0;
|
||||
mtx_lock_spin(&sched_lock);
|
||||
rux = p->p_rux;
|
||||
FOREACH_THREAD_IN_PROC(p, td) {
|
||||
if (TD_IS_RUNNING(td)) {
|
||||
/*
|
||||
* Adjust for the current time slice. This is
|
||||
* actually fairly important since the error here is
|
||||
* on the order of a time quantum which is much
|
||||
* greater than the precision of binuptime().
|
||||
*/
|
||||
KASSERT(td->td_oncpu != NOCPU,
|
||||
("%s: running thread has no CPU", __func__));
|
||||
if (!bt_valid) {
|
||||
bt = cpu_ticks();
|
||||
bt_valid = 1;
|
||||
}
|
||||
/*
|
||||
* XXX: Doesn't this mean that this quantum will
|
||||
* XXX: get counted twice if calcru() is called
|
||||
* XXX: from SIGINFO ?
|
||||
*/
|
||||
rux.rux_runtime +=
|
||||
(bt - pcpu_find(td->td_oncpu)->pc_switchtime);
|
||||
}
|
||||
}
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
calcru1(p, &rux, up, sp);
|
||||
p->p_rux.rux_uu = rux.rux_uu;
|
||||
p->p_rux.rux_su = rux.rux_su;
|
||||
p->p_rux.rux_iu = rux.rux_iu;
|
||||
}
|
||||
|
||||
void
|
||||
calccru(p, up, sp)
|
||||
struct proc *p;
|
||||
@ -753,35 +700,52 @@ calccru(p, up, sp)
|
||||
{
|
||||
|
||||
PROC_LOCK_ASSERT(p, MA_OWNED);
|
||||
calcru1(p, &p->p_crux, up, sp);
|
||||
calcru(p, up, sp);
|
||||
}
|
||||
|
||||
static void
|
||||
calcru1(p, ruxp, up, sp)
|
||||
struct proc *p;
|
||||
struct rusage_ext *ruxp;
|
||||
struct timeval *up;
|
||||
struct timeval *sp;
|
||||
/*
|
||||
* Transform the running time and tick information in proc p into user,
|
||||
* system, and interrupt time usage. If appropriate, include the current
|
||||
* time slice on this CPU.
|
||||
*/
|
||||
|
||||
void
|
||||
calcru(struct proc *p, struct timeval *up, struct timeval *sp)
|
||||
{
|
||||
struct thread *td;
|
||||
struct rusage_ext *ruxp = &p->p_rux;
|
||||
uint64_t u;
|
||||
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
|
||||
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
|
||||
|
||||
PROC_LOCK_ASSERT(p, MA_OWNED);
|
||||
mtx_assert(&sched_lock, MA_NOTOWNED);
|
||||
mtx_lock_spin(&sched_lock);
|
||||
if (curthread->td_proc == p) {
|
||||
td = curthread;
|
||||
u = cpu_ticks();
|
||||
ruxp->rux_runtime += (u - PCPU_GET(switchtime));
|
||||
PCPU_SET(switchtime, u);
|
||||
ruxp->rux_uticks += td->td_uticks;
|
||||
td->td_uticks = 0;
|
||||
ruxp->rux_iticks += td->td_iticks;
|
||||
td->td_iticks = 0;
|
||||
ruxp->rux_sticks += td->td_sticks;
|
||||
td->td_sticks = 0;
|
||||
}
|
||||
|
||||
ut = ruxp->rux_uticks;
|
||||
st = ruxp->rux_sticks;
|
||||
it = ruxp->rux_iticks;
|
||||
tu = ruxp->rux_runtime;
|
||||
mtx_unlock_spin(&sched_lock);
|
||||
tu = cputick2usec(tu);
|
||||
tt = ut + st + it;
|
||||
if (tt == 0) {
|
||||
st = 1;
|
||||
tt = 1;
|
||||
}
|
||||
tu = (ruxp->rux_runtime * 1000000LL) / cpu_tickrate();
|
||||
ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu;
|
||||
if (tu < ptu) {
|
||||
printf(
|
||||
"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
|
||||
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
|
||||
tu = ptu;
|
||||
}
|
||||
if ((int64_t)tu < 0) {
|
||||
printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
|
||||
(intmax_t)tu, p->p_pid, p->p_comm);
|
||||
@ -792,7 +756,17 @@ calcru1(p, ruxp, up, sp)
|
||||
uu = (tu * ut) / tt;
|
||||
su = (tu * st) / tt;
|
||||
iu = tu - uu - su;
|
||||
|
||||
if (tu < ptu) {
|
||||
printf(
|
||||
"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
|
||||
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
|
||||
printf("u %ju:%ju/%ju s %ju:%ju/%ju i %ju:%ju/%ju\n",
|
||||
(uintmax_t)ut, (uintmax_t)ruxp->rux_uu, uu,
|
||||
(uintmax_t)st, (uintmax_t)ruxp->rux_su, su,
|
||||
(uintmax_t)it, (uintmax_t)ruxp->rux_iu, iu);
|
||||
tu = ptu;
|
||||
}
|
||||
#if 0
|
||||
/* Enforce monotonicity. */
|
||||
if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) {
|
||||
if (uu < ruxp->rux_uu)
|
||||
@ -814,6 +788,9 @@ calcru1(p, ruxp, up, sp)
|
||||
KASSERT(iu >= ruxp->rux_iu,
|
||||
("calcru: monotonisation botch 2"));
|
||||
}
|
||||
KASSERT(uu + su + iu <= tu,
|
||||
("calcru: monotisation botch 3"));
|
||||
#endif
|
||||
ruxp->rux_uu = uu;
|
||||
ruxp->rux_su = su;
|
||||
ruxp->rux_iu = iu;
|
||||
|
@ -386,6 +386,12 @@ mi_switch(int flags, struct thread *newtd)
|
||||
*/
|
||||
new_switchtime = cpu_ticks();
|
||||
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
|
||||
p->p_rux.rux_uticks += td->td_uticks;
|
||||
td->td_uticks = 0;
|
||||
p->p_rux.rux_iticks += td->td_iticks;
|
||||
td->td_iticks = 0;
|
||||
p->p_rux.rux_sticks += td->td_sticks;
|
||||
td->td_sticks = 0;
|
||||
|
||||
td->td_generation++; /* bump preempt-detect counter */
|
||||
|
||||
|
@ -116,6 +116,7 @@ TC_STATS(nsetclock);
|
||||
#undef TC_STATS
|
||||
|
||||
static void tc_windup(void);
|
||||
static void cpu_tick_calibrate(int);
|
||||
|
||||
static int
|
||||
sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
|
||||
@ -364,6 +365,7 @@ tc_setclock(struct timespec *ts)
|
||||
struct timespec ts2;
|
||||
struct bintime bt, bt2;
|
||||
|
||||
cpu_tick_calibrate(1);
|
||||
nsetclock++;
|
||||
binuptime(&bt2);
|
||||
timespec2bintime(ts, &bt);
|
||||
@ -380,6 +382,7 @@ tc_setclock(struct timespec *ts)
|
||||
(intmax_t)ts2.tv_sec, ts2.tv_nsec,
|
||||
(intmax_t)ts->tv_sec, ts->tv_nsec);
|
||||
}
|
||||
cpu_tick_calibrate(1);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -476,8 +479,8 @@ tc_windup(void)
|
||||
* x = a * 2^32 / 10^9 = a * 4.294967296
|
||||
*
|
||||
* The range of th_adjustment is +/- 5000PPM so inside a 64bit int
|
||||
* we can only multiply by about 850 without overflowing, but that
|
||||
* leaves suitably precise fractions for multiply before divide.
|
||||
* we can only multiply by about 850 without overflowing, that
|
||||
* leaves no suitably precise fractions for multiply before divide.
|
||||
*
|
||||
* Divide before multiply with a fraction of 2199/512 results in a
|
||||
* systematic undercompensation of 10PPM of th_adjustment. On a
|
||||
@ -750,11 +753,16 @@ void
|
||||
tc_ticktock(void)
|
||||
{
|
||||
static int count;
|
||||
static time_t last_calib;
|
||||
|
||||
if (++count < tc_tick)
|
||||
return;
|
||||
count = 0;
|
||||
tc_windup();
|
||||
if (time_uptime != last_calib && !(time_uptime & 0xf)) {
|
||||
cpu_tick_calibrate(0);
|
||||
last_calib = time_uptime;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
@ -784,13 +792,18 @@ inittimecounter(void *dummy)
|
||||
|
||||
SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL)
|
||||
|
||||
/* Cpu tick handling -------------------------------------------------*/
|
||||
|
||||
static int cpu_tick_variable;
|
||||
static uint64_t cpu_tick_frequency;
|
||||
|
||||
static
|
||||
uint64_t
|
||||
tc_cpu_ticks(void)
|
||||
{
|
||||
static uint64_t base;
|
||||
static unsigned last;
|
||||
uint64_t u;
|
||||
unsigned u;
|
||||
struct timecounter *tc;
|
||||
|
||||
tc = timehands->th_counter;
|
||||
@ -801,5 +814,120 @@ tc_cpu_ticks(void)
|
||||
return (u + base);
|
||||
}
|
||||
|
||||
uint64_t (*cpu_ticks)(void) = tc_cpu_ticks;
|
||||
uint64_t (*cpu_tickrate)(void) = tc_getfrequency;
|
||||
/*
|
||||
* This function gets called ever 16 seconds on only one designated
|
||||
* CPU in the system from hardclock() via tc_ticktock().
|
||||
*
|
||||
* Whenever the real time clock is stepped we get called with reset=1
|
||||
* to make sure we handle suspend/resume and similar events correctly.
|
||||
*/
|
||||
|
||||
static void
|
||||
cpu_tick_calibrate(int reset)
|
||||
{
|
||||
static uint64_t c_last;
|
||||
uint64_t c_this, c_delta;
|
||||
static struct bintime t_last;
|
||||
struct bintime t_this, t_delta;
|
||||
|
||||
if (reset) {
|
||||
/* The clock was stepped, abort & reset */
|
||||
t_last.sec = 0;
|
||||
return;
|
||||
}
|
||||
|
||||
/* we don't calibrate fixed rate cputicks */
|
||||
if (!cpu_tick_variable)
|
||||
return;
|
||||
|
||||
getbinuptime(&t_this);
|
||||
c_this = cpu_ticks();
|
||||
if (t_last.sec != 0) {
|
||||
c_delta = c_this - c_last;
|
||||
t_delta = t_this;
|
||||
bintime_sub(&t_delta, &t_last);
|
||||
if (0 && bootverbose) {
|
||||
struct timespec ts;
|
||||
bintime2timespec(&t_delta, &ts);
|
||||
printf("%ju %ju.%016jx %ju.%09ju",
|
||||
(uintmax_t)c_delta >> 4,
|
||||
(uintmax_t)t_delta.sec, (uintmax_t)t_delta.frac,
|
||||
(uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec);
|
||||
}
|
||||
/*
|
||||
* Validate that 16 +/- 1/256 seconds passed.
|
||||
* After division by 16 this gives us a precision of
|
||||
* roughly 250PPM which is sufficient
|
||||
*/
|
||||
if (t_delta.sec > 16 || (
|
||||
t_delta.sec == 16 && t_delta.frac >= (0x01LL << 56))) {
|
||||
/* too long */
|
||||
if (0 && bootverbose)
|
||||
printf("\ttoo long\n");
|
||||
} else if (t_delta.sec < 15 ||
|
||||
(t_delta.sec == 15 && t_delta.frac <= (0xffLL << 56))) {
|
||||
/* too short */
|
||||
if (0 && bootverbose)
|
||||
printf("\ttoo short\n");
|
||||
} else {
|
||||
/* just right */
|
||||
c_delta >>= 4;
|
||||
if (c_delta > cpu_tick_frequency) {
|
||||
if (0 && bootverbose)
|
||||
printf("\thigher\n");
|
||||
cpu_tick_frequency = c_delta;
|
||||
} else {
|
||||
if (0 && bootverbose)
|
||||
printf("\tlower\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
c_last = c_this;
|
||||
t_last = t_this;
|
||||
}
|
||||
|
||||
void
|
||||
set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
|
||||
{
|
||||
|
||||
if (func == NULL) {
|
||||
cpu_ticks = tc_cpu_ticks;
|
||||
} else {
|
||||
cpu_tick_frequency = freq;
|
||||
cpu_tick_variable = var;
|
||||
cpu_ticks = func;
|
||||
}
|
||||
}
|
||||
|
||||
uint64_t
|
||||
cpu_tickrate(void)
|
||||
{
|
||||
|
||||
if (cpu_ticks == tc_cpu_ticks)
|
||||
return (tc_getfrequency());
|
||||
return (cpu_tick_frequency);
|
||||
}
|
||||
|
||||
/*
|
||||
* We need to be slightly careful converting cputicks to microseconds.
|
||||
* There is plenty of margin in 64 bits of microseconds (half a million
|
||||
* years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
|
||||
* before divide conversion (to retain precision) we find that the
|
||||
* margin shrinks to 1.5 hours (one millionth of 146y).
|
||||
* With a three prong approach we never loose significant bits, no
|
||||
* matter what the cputick rate and length of timeinterval is.
|
||||
*/
|
||||
|
||||
uint64_t
|
||||
cputick2usec(uint64_t tick)
|
||||
{
|
||||
|
||||
if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */
|
||||
return (tick / (cpu_tickrate() / 1000000LL));
|
||||
else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */
|
||||
return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
|
||||
else
|
||||
return ((tick * 1000000LL) / cpu_tickrate());
|
||||
}
|
||||
|
||||
cpu_tick_f *cpu_ticks = tc_cpu_ticks;
|
||||
|
@ -72,13 +72,6 @@ tick_cputicks(void)
|
||||
return (rd(tick));
|
||||
}
|
||||
|
||||
static uint64_t
|
||||
tick_cputickrate(void)
|
||||
{
|
||||
|
||||
return (tick_freq);
|
||||
}
|
||||
|
||||
void
|
||||
cpu_initclocks(void)
|
||||
{
|
||||
@ -171,8 +164,7 @@ tick_init(u_long clock)
|
||||
*/
|
||||
tick_stop();
|
||||
|
||||
cpu_ticks = tick_cputicks;
|
||||
cpu_tickrate = tick_cputickrate;
|
||||
set_cputicker(tick_cputicks, tick_freq, 0);
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -283,6 +283,9 @@ struct thread {
|
||||
struct thread *td_standin; /* (k + a) Use this for an upcall. */
|
||||
struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */
|
||||
u_int td_pticks; /* (k) Statclock hits for profiling */
|
||||
u_int td_sticks; /* (k) Statclock hits in system mode. */
|
||||
u_int td_iticks; /* (k) Statclock hits in intr mode. */
|
||||
u_int td_uticks; /* (k) Statclock hits in user mode. */
|
||||
u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */
|
||||
u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */
|
||||
int td_intrval; /* (j) Return value of TDF_INTERRUPT. */
|
||||
|
@ -238,8 +238,11 @@ int setenv(const char *name, const char *value);
|
||||
int unsetenv(const char *name);
|
||||
int testenv(const char *name);
|
||||
|
||||
extern uint64_t (*cpu_ticks)(void);
|
||||
extern uint64_t (*cpu_tickrate)(void);
|
||||
typedef uint64_t (cpu_tick_f)(void);
|
||||
void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var);
|
||||
extern cpu_tick_f *cpu_ticks;
|
||||
uint64_t cpu_tickrate(void);
|
||||
uint64_t cputick2usec(uint64_t tick);
|
||||
|
||||
#ifdef APM_FIXUP_CALLTODO
|
||||
struct timeval;
|
||||
|
Loading…
x
Reference in New Issue
Block a user