Rework how we store process times in the kernel such that we always store

the raw values including for child process statistics and only compute the
system and user timevals on demand.

- Fix the various kern_wait() syscall wrappers to only pass in a rusage
  pointer if they are going to use the result.
- Add a kern_getrusage() function for the ABI syscalls to use so that they
  don't have to play stackgap games to call getrusage().
- Fix the svr4_sys_times() syscall to just call calcru() to calculate the
  times it needs rather than calling getrusage() twice with associated
  stackgap, etc.
- Add a new rusage_ext structure to store raw time stats such as tick counts
  for user, system, and interrupt time as well as a bintime of the total
  runtime.  A new p_rux field in struct proc replaces the same inline fields
  from struct proc (i.e. p_[isu]ticks, p_[isu]u, and p_runtime).  A new p_crux
  field in struct proc contains the "raw" child time usage statistics.
  ruadd() has been changed to handle adding the associated rusage_ext
  structures as well as the values in rusage.  Effectively, the values in
  rusage_ext replace the ru_utime and ru_stime values in struct rusage.  These
  two fields in struct rusage are no longer used in the kernel.
- calcru() has been split into a static worker function calcru1() that
  calculates appropriate timevals for user and system time as well as updating
  the rux_[isu]u fields of a passed in rusage_ext structure.  calcru() uses a
  copy of the process' p_rux structure to compute the timevals after updating
  the runtime appropriately if any of the threads in that process are
  currently executing.  It also now only locks sched_lock internally while
  doing the rux_runtime fixup.  calcru() now only requires the caller to
  hold the proc lock and calcru1() only requires the proc lock internally.
  calcru() also no longer allows callers to ask for an interrupt timeval
  since none of them actually did.
- calcru() now correctly handles threads executing on other CPUs.
- A new calccru() function computes the child system and user timevals by
  calling calcru1() on p_crux.  Note that this means that any code that wants
  child times must now call this function rather than reading from p_cru
  directly.  This function also requires the proc lock.
- This finishes the locking for rusage and friends so some of the Giant locks
  in exit1() and kern_wait() are now gone.
- The locking in ttyinfo() has been tweaked so that a shared lock of the
  proctree lock is used to protect the process group rather than the process
  group lock.  By holding this lock until the end of the function we now
  ensure that the process/thread that we pick to dump info about will no
  longer vanish while we are trying to output its info to the console.

Submitted by:	bde (mostly)
MFC after:	1 month
This commit is contained in:
John Baldwin 2004-10-05 18:51:11 +00:00
parent a55db2b6e6
commit 78c85e8dfc
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=136152
19 changed files with 275 additions and 259 deletions

View File

@ -1335,29 +1335,16 @@ osf1_getrusage(td, uap)
struct thread *td;
struct osf1_getrusage_args *uap;
{
struct proc *p;
struct rusage *rup;
struct osf1_rusage oru;
struct rusage ru;
int error;
p = td->td_proc;
switch (uap->who) {
case RUSAGE_SELF:
rup = &p->p_stats->p_ru;
mtx_lock_spin(&sched_lock);
calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
mtx_unlock_spin(&sched_lock);
break;
case RUSAGE_CHILDREN:
rup = &p->p_stats->p_cru;
break;
default:
return (EINVAL);
}
TV_CP(rup->ru_utime, oru.ru_utime);
TV_CP(rup->ru_stime, oru.ru_stime);
bcopy(&(rup->ru_first), &(oru.ru_first),
error = kern_getrusage(td, uap->who, &ru);
if (error)
return (error);
TV_CP(ru.ru_utime, oru.ru_utime);
TV_CP(ru.ru_stime, oru.ru_stime);
bcopy(&(ru.ru_first), &(oru.ru_first),
(&(oru.ru_last) - &(oru.ru_first)));
return (copyout((caddr_t)&oru, (caddr_t)uap->rusage,
@ -1372,9 +1359,13 @@ osf1_wait4(td, uap)
{
int error, status;
struct osf1_rusage oru;
struct rusage ru;
struct rusage ru, *rup;
error = kern_wait(td, uap->pid, &status, uap->options, &ru);
if (uap->rusage != NULL)
rup = &ru;
else
rup = NULL;
error = kern_wait(td, uap->pid, &status, uap->options, rup);
if (error)
return (error);
if (uap->status != NULL)

View File

@ -940,24 +940,14 @@ linux_nanosleep(struct thread *td, struct linux_nanosleep_args *uap)
int
linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
{
struct l_rusage s32;
struct rusage s;
int error;
caddr_t sg;
struct l_rusage *p32, s32;
struct rusage *p = NULL, s;
p32 = uap->rusage;
if (p32 != NULL) {
sg = stackgap_init();
p = stackgap_alloc(&sg, sizeof(struct rusage));
uap->rusage = (struct l_rusage *)p;
}
error = getrusage(td, (struct getrusage_args *) uap);
error = kern_getrusage(td, uap->who, &s);
if (error != 0)
return (error);
if (p32 != NULL) {
error = copyin(p, &s, sizeof(s));
if (error != 0)
return (error);
if (uap->rusage != NULL) {
s32.ru_utime.tv_sec = s.ru_utime.tv_sec;
s32.ru_utime.tv_usec = s.ru_utime.tv_usec;
s32.ru_stime.tv_sec = s.ru_stime.tv_sec;
@ -976,7 +966,7 @@ linux_getrusage(struct thread *td, struct linux_getrusage_args *uap)
s32.ru_nsignals = s.ru_nsignals;
s32.ru_nvcsw = s.ru_nvcsw;
s32.ru_nivcsw = s.ru_nivcsw;
error = copyout(&s32, p32, sizeof(s32));
error = copyout(&s32, uap->rusage, sizeof(s32));
}
return (error);
}

View File

@ -91,9 +91,13 @@ freebsd32_wait4(struct thread *td, struct freebsd32_wait4_args *uap)
{
int error, status;
struct rusage32 ru32;
struct rusage ru;
struct rusage ru, *rup;
error = kern_wait(td, uap->pid, &status, uap->options, &ru);
if (uap->rusage != NULL)
rup = &ru;
else
rup = NULL;
error = kern_wait(td, uap->pid, &status, uap->options, rup);
if (error)
return (error);
if (uap->status != NULL)
@ -639,24 +643,14 @@ freebsd32_gettimeofday(struct thread *td,
int
freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
{
struct rusage32 s32;
struct rusage s;
int error;
caddr_t sg;
struct rusage32 *p32, s32;
struct rusage *p = NULL, s;
p32 = uap->rusage;
if (p32) {
sg = stackgap_init();
p = stackgap_alloc(&sg, sizeof(struct rusage));
uap->rusage = (struct rusage32 *)p;
}
error = getrusage(td, (struct getrusage_args *) uap);
error = kern_getrusage(td, uap->who, &s);
if (error)
return (error);
if (p32) {
error = copyin(p, &s, sizeof(s));
if (error)
return (error);
if (uap->rusage != NULL) {
TV_CP(s, s32, ru_utime);
TV_CP(s, s32, ru_stime);
CP(s, s32, ru_maxrss);
@ -673,7 +667,7 @@ freebsd32_getrusage(struct thread *td, struct freebsd32_getrusage_args *uap)
CP(s, s32, ru_nsignals);
CP(s, s32, ru_nvcsw);
CP(s, s32, ru_nivcsw);
error = copyout(&s32, p32, sizeof(s32));
error = copyout(&s32, uap->rusage, sizeof(s32));
}
return (error);
}

View File

@ -689,9 +689,9 @@ struct l_times_argv {
int
linux_times(struct thread *td, struct linux_times_args *args)
{
struct timeval tv;
struct timeval tv, utime, stime, cutime, cstime;
struct l_times_argv tms;
struct rusage ru;
struct proc *p;
int error;
#ifdef DEBUG
@ -699,15 +699,17 @@ linux_times(struct thread *td, struct linux_times_args *args)
printf(ARGS(times, "*"));
#endif
mtx_lock_spin(&sched_lock);
calcru(td->td_proc, &ru.ru_utime, &ru.ru_stime, NULL);
mtx_unlock_spin(&sched_lock);
p = td->td_proc;
PROC_LOCK(p);
calcru(p, &utime, &stime);
calccru(p, &cutime, &cstime);
PROC_UNLOCK(p);
tms.tms_utime = CONVTCK(ru.ru_utime);
tms.tms_stime = CONVTCK(ru.ru_stime);
tms.tms_utime = CONVTCK(utime);
tms.tms_stime = CONVTCK(stime);
tms.tms_cutime = CONVTCK(td->td_proc->p_stats->p_cru.ru_utime);
tms.tms_cstime = CONVTCK(td->td_proc->p_stats->p_cru.ru_stime);
tms.tms_cutime = CONVTCK(cutime);
tms.tms_cstime = CONVTCK(cstime);
if ((error = copyout(&tms, args->buf, sizeof(tms))))
return error;
@ -851,7 +853,7 @@ int
linux_wait4(struct thread *td, struct linux_wait4_args *args)
{
int error, options, tmpstat;
struct rusage ru;
struct rusage ru, *rup;
struct proc *p;
#ifdef DEBUG
@ -866,7 +868,11 @@ linux_wait4(struct thread *td, struct linux_wait4_args *args)
if (args->options & __WCLONE)
options |= WLINUXCLONE;
error = kern_wait(td, args->pid, &tmpstat, options, &ru);
if (args->rusage != NULL)
rup = &ru;
else
rup = NULL;
error = kern_wait(td, args->pid, &tmpstat, options, rup);
if (error)
return error;

View File

@ -861,44 +861,30 @@ svr4_sys_times(td, uap)
struct thread *td;
struct svr4_sys_times_args *uap;
{
int error, *retval = td->td_retval;
struct tms tms;
struct timeval t;
struct rusage *ru;
struct rusage r;
struct getrusage_args ga;
struct timeval tv, utime, stime, cutime, cstime;
struct tms tms;
struct proc *p;
int error;
caddr_t sg = stackgap_init();
ru = stackgap_alloc(&sg, sizeof(struct rusage));
p = td->td_proc;
PROC_LOCK(p);
calcru(p, &utime, &stime);
calccru(p, &cutime, &cstime);
PROC_UNLOCK(p);
ga.who = RUSAGE_SELF;
ga.rusage = ru;
tms.tms_utime = timeval_to_clock_t(&utime);
tms.tms_stime = timeval_to_clock_t(&stime);
error = getrusage(td, &ga);
tms.tms_cutime = timeval_to_clock_t(&cutime);
tms.tms_cstime = timeval_to_clock_t(&cstime);
error = copyout(&tms, uap->tp, sizeof(tms));
if (error)
return error;
return (error);
if ((error = copyin(ru, &r, sizeof r)) != 0)
return error;
tms.tms_utime = timeval_to_clock_t(&r.ru_utime);
tms.tms_stime = timeval_to_clock_t(&r.ru_stime);
ga.who = RUSAGE_CHILDREN;
error = getrusage(td, &ga);
if (error)
return error;
if ((error = copyin(ru, &r, sizeof r)) != 0)
return error;
tms.tms_cutime = timeval_to_clock_t(&r.ru_utime);
tms.tms_cstime = timeval_to_clock_t(&r.ru_stime);
microtime(&t);
*retval = timeval_to_clock_t(&t);
return copyout(&tms, uap->tp, sizeof(tms));
microtime(&tv);
td->td_retval[0] = (int)timeval_to_clock_t(&tv);
return (0);
}
@ -1149,6 +1135,7 @@ svr4_setinfo(p, st, s)
int st;
svr4_siginfo_t *s;
{
struct timeval utime, stime;
svr4_siginfo_t i;
int sig;
@ -1159,16 +1146,11 @@ svr4_setinfo(p, st, s)
if (p) {
i.si_pid = p->p_pid;
mtx_lock_spin(&sched_lock);
if (p->p_state == PRS_ZOMBIE) {
i.si_stime = p->p_ru->ru_stime.tv_sec;
i.si_utime = p->p_ru->ru_utime.tv_sec;
}
else {
i.si_stime = p->p_stats->p_ru.ru_stime.tv_sec;
i.si_utime = p->p_stats->p_ru.ru_utime.tv_sec;
}
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
calcru(p, &utime, &stime);
PROC_UNLOCK(p);
i.si_stime = stime.tv_sec;
i.si_utime = utime.tv_sec;
}
if (WIFEXITED(st)) {
@ -1296,7 +1278,8 @@ svr4_sys_waitsys(td, uap)
PROC_UNLOCK(q);
sx_xunlock(&proctree_lock);
q->p_xstat = 0;
ruadd(&p->p_stats->p_cru, q->p_ru);
ruadd(&p->p_stats->p_cru, &p->p_crux, q->p_ru,
&q->p_rux);
FREE(q->p_ru, M_ZOMBIE);
q->p_ru = NULL;

View File

@ -124,12 +124,12 @@ procfs_doprocstatus(PFS_FILL_ARGS)
} else
wmesg = "nochan";
}
mtx_unlock_spin(&sched_lock);
if (p->p_sflag & PS_INMEM) {
struct timeval start, ut, st;
calcru(p, &ut, &st, (struct timeval *) NULL);
mtx_unlock_spin(&sched_lock);
calcru(p, &ut, &st);
start = p->p_stats->p_start;
timevaladd(&start, &boottime);
sbuf_printf(sb, " %ld,%ld %ld,%ld %ld,%ld",
@ -137,7 +137,6 @@ procfs_doprocstatus(PFS_FILL_ARGS)
ut.tv_sec, ut.tv_usec,
st.tv_sec, st.tv_usec);
} else {
mtx_unlock_spin(&sched_lock);
sbuf_printf(sb, " -1,-1 -1,-1 -1,-1");
}

View File

@ -468,8 +468,8 @@ proc0_post(void *dummy __unused)
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
microuptime(&p->p_stats->p_start);
p->p_runtime.sec = 0;
p->p_runtime.frac = 0;
p->p_rux.rux_runtime.sec = 0;
p->p_rux.rux_runtime.frac = 0;
}
sx_sunlock(&allproc_lock);
binuptime(PCPU_PTR(switchtime));

View File

@ -249,9 +249,7 @@ acct_process(td)
bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
/* (2) The amount of user and system time that was used */
mtx_lock_spin(&sched_lock);
calcru(p, &ut, &st, NULL);
mtx_unlock_spin(&sched_lock);
calcru(p, &ut, &st);
acct.ac_utime = encode_comp_t(ut.tv_sec, ut.tv_usec);
acct.ac_stime = encode_comp_t(st.tv_sec, st.tv_usec);

View File

@ -363,7 +363,7 @@ stopprofclock(p)
* Statistics clock. Grab profile sample, and if divider reaches 0,
* do process and kernel statistics. Most of the statistics are only
* used by user-level statistics programs. The main exceptions are
* ke->ke_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu.
* ke->ke_uticks, p->p_rux.rux_sticks, p->p_rux.rux_iticks, and p->p_estcpu.
* This should be called by all active processors.
*/
void
@ -386,7 +386,7 @@ statclock(frame)
*/
if (p->p_flag & P_SA)
thread_statclock(1);
p->p_uticks++;
p->p_rux.rux_uticks++;
if (p->p_nice > NZERO)
cp_time[CP_NICE]++;
else
@ -405,13 +405,13 @@ statclock(frame)
* in ``non-process'' (i.e., interrupt) work.
*/
if ((td->td_ithd != NULL) || td->td_intr_nesting_level >= 2) {
p->p_iticks++;
p->p_rux.rux_iticks++;
cp_time[CP_INTR]++;
} else {
if (p->p_flag & P_SA)
thread_statclock(0);
td->td_sticks++;
p->p_sticks++;
p->p_rux.rux_sticks++;
if (p != PCPU_GET(idlethread)->td_proc)
cp_time[CP_SYS]++;
else

View File

@ -410,20 +410,16 @@ exit1(struct thread *td, int rv)
}
/*
* Save exit status and final rusage info, adding in child rusage
* info and self times.
* Save exit status and finalize rusage info except for times,
* adding in child rusage info.
*/
mtx_lock(&Giant);
PROC_LOCK(p);
p->p_xstat = rv;
p->p_xthread = td;
p->p_stats->p_ru.ru_nvcsw++;
*p->p_ru = p->p_stats->p_ru;
mtx_lock_spin(&sched_lock);
calcru(p, &p->p_ru->ru_utime, &p->p_ru->ru_stime, NULL);
mtx_unlock_spin(&sched_lock);
ruadd(p->p_ru, &p->p_stats->p_cru);
ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
mtx_unlock(&Giant);
/*
* Notify interested parties of our demise.
*/
@ -487,9 +483,6 @@ exit1(struct thread *td, int rv)
PROC_LOCK(p->p_pptr);
sx_xunlock(&proctree_lock);
while (mtx_owned(&Giant))
mtx_unlock(&Giant);
/*
* We have to wait until after acquiring all locks before
* changing p_state. We need to avoid all possible context
@ -508,8 +501,8 @@ exit1(struct thread *td, int rv)
/* Do the same timestamp bookkeeping that mi_switch() would do. */
binuptime(&new_switchtime);
bintime_add(&p->p_runtime, &new_switchtime);
bintime_sub(&p->p_runtime, PCPU_PTR(switchtime));
bintime_add(&p->p_rux.rux_runtime, &new_switchtime);
bintime_sub(&p->p_rux.rux_runtime, PCPU_PTR(switchtime));
PCPU_SET(switchtime, new_switchtime);
PCPU_SET(switchticks, ticks);
cnt.v_swtch++;
@ -556,10 +549,14 @@ owait(struct thread *td, struct owait_args *uap __unused)
int
wait4(struct thread *td, struct wait_args *uap)
{
struct rusage ru;
struct rusage ru, *rup;
int error, status;
error = kern_wait(td, uap->pid, &status, uap->options, &ru);
if (uap->rusage != NULL)
rup = &ru;
else
rup = NULL;
error = kern_wait(td, uap->pid, &status, uap->options, rup);
if (uap->status != NULL && error == 0)
error = copyout(&status, uap->status, sizeof(status));
if (uap->rusage != NULL && error == 0)
@ -612,8 +609,10 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options,
td->td_retval[0] = p->p_pid;
if (status)
*status = p->p_xstat; /* convert to int */
if (rusage)
if (rusage) {
bcopy(p->p_ru, rusage, sizeof(struct rusage));
calcru(p, &rusage->ru_utime, &rusage->ru_stime);
}
/*
* If we got the child via a ptrace 'attach',
@ -648,16 +647,15 @@ kern_wait(struct thread *td, pid_t pid, int *status, int options,
* all other writes to this proc are visible now, so
* no more locking is needed for p.
*/
mtx_lock(&Giant);
PROC_LOCK(p);
p->p_xstat = 0; /* XXX: why? */
PROC_UNLOCK(p);
PROC_LOCK(q);
ruadd(&q->p_stats->p_cru, p->p_ru);
ruadd(&q->p_stats->p_cru, &q->p_crux, p->p_ru,
&p->p_rux);
PROC_UNLOCK(q);
FREE(p->p_ru, M_ZOMBIE);
p->p_ru = NULL;
mtx_unlock(&Giant);
/*
* Decrement the count of procs running with this uid.

View File

@ -674,23 +674,11 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp)
kp->ki_dsize = vm->vm_dsize;
kp->ki_ssize = vm->vm_ssize;
}
if ((p->p_sflag & PS_INMEM) && p->p_stats) {
kp->ki_start = p->p_stats->p_start;
timevaladd(&kp->ki_start, &boottime);
kp->ki_rusage = p->p_stats->p_ru;
calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime,
NULL);
kp->ki_childstime = p->p_stats->p_cru.ru_stime;
kp->ki_childutime = p->p_stats->p_cru.ru_utime;
/* Some callers want child-times in a single value */
kp->ki_childtime = kp->ki_childstime;
timevaladd(&kp->ki_childtime, &kp->ki_childutime);
}
kp->ki_sflag = p->p_sflag;
kp->ki_swtime = p->p_swtime;
kp->ki_pid = p->p_pid;
kp->ki_nice = p->p_nice;
bintime2timeval(&p->p_runtime, &tv);
bintime2timeval(&p->p_rux.rux_runtime, &tv);
kp->ki_runtime = tv.tv_sec * (u_int64_t)1000000 + tv.tv_usec;
if (p->p_state != PRS_ZOMBIE) {
#if 0
@ -758,6 +746,17 @@ fill_kinfo_thread(struct thread *td, struct kinfo_proc *kp)
kp->ki_stat = SZOMB;
}
mtx_unlock_spin(&sched_lock);
if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
kp->ki_start = p->p_stats->p_start;
timevaladd(&kp->ki_start, &boottime);
kp->ki_rusage = p->p_stats->p_ru;
calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
calccru(p, &kp->ki_childutime, &kp->ki_childstime);
/* Some callers want child-times in a single value */
kp->ki_childtime = kp->ki_childstime;
timevaladd(&kp->ki_childtime, &kp->ki_childutime);
}
sp = NULL;
tp = NULL;
if (p->p_pgrp) {

View File

@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/sx.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
#include <sys/time.h>
@ -67,6 +68,8 @@ static struct mtx uihashtbl_mtx;
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long uihash; /* size of hash table - 1 */
static void calcru1(struct proc *p, struct rusage_ext *ruxp,
struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
@ -691,65 +694,84 @@ getrlimit(td, uap)
* system, and interrupt time usage.
*/
void
calcru(p, up, sp, ip)
calcru(p, up, sp)
struct proc *p;
struct timeval *up;
struct timeval *sp;
struct timeval *ip;
{
struct bintime bt, rt;
struct timeval tv;
struct bintime bt;
struct rusage_ext rux;
struct thread *td;
int bt_valid;
PROC_LOCK_ASSERT(p, MA_OWNED);
mtx_assert(&sched_lock, MA_NOTOWNED);
bt_valid = 0;
mtx_lock_spin(&sched_lock);
rux = p->p_rux;
FOREACH_THREAD_IN_PROC(p, td) {
if (TD_IS_RUNNING(td)) {
/*
* Adjust for the current time slice. This is
* actually fairly important since the error here is
* on the order of a time quantum which is much
* greater than the precision of binuptime().
*/
KASSERT(td->td_oncpu != NOCPU,
("%s: running thread has no CPU", __func__));
if (!bt_valid) {
binuptime(&bt);
bt_valid = 1;
}
bintime_add(&rux.rux_runtime, &bt);
bintime_sub(&rux.rux_runtime,
&pcpu_find(td->td_oncpu)->pc_switchtime);
}
}
mtx_unlock_spin(&sched_lock);
calcru1(p, &rux, up, sp);
p->p_rux.rux_uu = rux.rux_uu;
p->p_rux.rux_su = rux.rux_su;
p->p_rux.rux_iu = rux.rux_iu;
}
void
calccru(p, up, sp)
struct proc *p;
struct timeval *up;
struct timeval *sp;
{
PROC_LOCK_ASSERT(p, MA_OWNED);
calcru1(p, &p->p_crux, up, sp);
}
static void
calcru1(p, ruxp, up, sp)
struct proc *p;
struct rusage_ext *ruxp;
struct timeval *up;
struct timeval *sp;
{
struct timeval tv;
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
int problemcase;
mtx_assert(&sched_lock, MA_OWNED);
/* XXX: why spl-protect ? worst case is an off-by-one report */
ut = p->p_uticks;
st = p->p_sticks;
it = p->p_iticks;
ut = ruxp->rux_uticks;
st = ruxp->rux_sticks;
it = ruxp->rux_iticks;
tt = ut + st + it;
if (tt == 0) {
st = 1;
tt = 1;
}
rt = p->p_runtime;
problemcase = 0;
FOREACH_THREAD_IN_PROC(p, td) {
/*
* Adjust for the current time slice. This is actually fairly
* important since the error here is on the order of a time
* quantum, which is much greater than the sampling error.
*/
if (td == curthread) {
binuptime(&bt);
bintime_sub(&bt, PCPU_PTR(switchtime));
bintime_add(&rt, &bt);
} else if (TD_IS_RUNNING(td)) {
/*
* XXX: this case should add the difference between
* the current time and the switch time as above,
* but the switch time is inaccessible, so we can't
* do the adjustment and will end up with a wrong
* runtime. A previous call with a different
* curthread may have obtained a (right or wrong)
* runtime that is in advance of ours. Just set a
* flag to avoid warning about this known problem.
*/
problemcase = 1;
}
}
bintime2timeval(&rt, &tv);
bintime2timeval(&ruxp->rux_runtime, &tv);
tu = (u_int64_t)tv.tv_sec * 1000000 + tv.tv_usec;
ptu = p->p_uu + p->p_su + p->p_iu;
ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu;
if (tu < ptu) {
if (!problemcase)
printf(
printf(
"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
(uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
tu = ptu;
}
if ((int64_t)tu < 0) {
@ -764,38 +786,34 @@ calcru(p, up, sp, ip)
iu = tu - uu - su;
/* Enforce monotonicity. */
if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
if (uu < p->p_uu)
uu = p->p_uu;
else if (uu + p->p_su + p->p_iu > tu)
uu = tu - p->p_su - p->p_iu;
if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) {
if (uu < ruxp->rux_uu)
uu = ruxp->rux_uu;
else if (uu + ruxp->rux_su + ruxp->rux_iu > tu)
uu = tu - ruxp->rux_su - ruxp->rux_iu;
if (st == 0)
su = p->p_su;
su = ruxp->rux_su;
else {
su = ((tu - uu) * st) / (st + it);
if (su < p->p_su)
su = p->p_su;
else if (uu + su + p->p_iu > tu)
su = tu - uu - p->p_iu;
if (su < ruxp->rux_su)
su = ruxp->rux_su;
else if (uu + su + ruxp->rux_iu > tu)
su = tu - uu - ruxp->rux_iu;
}
KASSERT(uu + su + p->p_iu <= tu,
KASSERT(uu + su + ruxp->rux_iu <= tu,
("calcru: monotonisation botch 1"));
iu = tu - uu - su;
KASSERT(iu >= p->p_iu,
KASSERT(iu >= ruxp->rux_iu,
("calcru: monotonisation botch 2"));
}
p->p_uu = uu;
p->p_su = su;
p->p_iu = iu;
ruxp->rux_uu = uu;
ruxp->rux_su = su;
ruxp->rux_iu = iu;
up->tv_sec = uu / 1000000;
up->tv_usec = uu % 1000000;
sp->tv_sec = su / 1000000;
sp->tv_usec = su % 1000000;
if (ip != NULL) {
ip->tv_sec = iu / 1000000;
ip->tv_usec = iu % 1000000;
}
}
#ifndef _SYS_SYSPROTO_H_
@ -807,51 +825,67 @@ struct getrusage_args {
/*
* MPSAFE
*/
/* ARGSUSED */
int
getrusage(td, uap)
register struct thread *td;
register struct getrusage_args *uap;
{
struct rusage ru;
int error;
error = kern_getrusage(td, uap->who, &ru);
if (error == 0)
error = copyout(&ru, uap->rusage, sizeof(struct rusage));
return (error);
}
int
kern_getrusage(td, who, rup)
struct thread *td;
int who;
struct rusage *rup;
{
struct proc *p;
p = td->td_proc;
switch (uap->who) {
PROC_LOCK(p);
switch (who) {
case RUSAGE_SELF:
mtx_lock(&Giant);
mtx_lock_spin(&sched_lock);
calcru(p, &p->p_stats->p_ru.ru_utime, &p->p_stats->p_ru.ru_stime,
NULL);
mtx_unlock_spin(&sched_lock);
ru = p->p_stats->p_ru;
mtx_unlock(&Giant);
*rup = p->p_stats->p_ru;
calcru(p, &rup->ru_utime, &rup->ru_stime);
break;
case RUSAGE_CHILDREN:
mtx_lock(&Giant);
ru = p->p_stats->p_cru;
mtx_unlock(&Giant);
*rup = p->p_stats->p_cru;
calccru(p, &rup->ru_utime, &rup->ru_stime);
break;
default:
PROC_UNLOCK(p);
return (EINVAL);
break;
}
return (copyout(&ru, uap->rusage, sizeof(struct rusage)));
PROC_UNLOCK(p);
return (0);
}
void
ruadd(ru, ru2)
ruadd(ru, rux, ru2, rux2)
struct rusage *ru;
struct rusage_ext *rux;
struct rusage *ru2;
struct rusage_ext *rux2;
{
register long *ip, *ip2;
register int i;
timevaladd(&ru->ru_utime, &ru2->ru_utime);
timevaladd(&ru->ru_stime, &ru2->ru_stime);
bintime_add(&rux->rux_runtime, &rux2->rux_runtime);
rux->rux_uticks += rux2->rux_uticks;
rux->rux_sticks += rux2->rux_sticks;
rux->rux_iticks += rux2->rux_iticks;
rux->rux_uu += rux2->rux_uu;
rux->rux_su += rux2->rux_su;
rux->rux_iu += rux2->rux_iu;
if (ru->ru_maxrss < ru2->ru_maxrss)
ru->ru_maxrss = ru2->ru_maxrss;
ip = &ru->ru_first;

View File

@ -302,8 +302,8 @@ mi_switch(int flags, struct thread *newtd)
* process was running, and add that to its total so far.
*/
binuptime(&new_switchtime);
bintime_add(&p->p_runtime, &new_switchtime);
bintime_sub(&p->p_runtime, PCPU_PTR(switchtime));
bintime_add(&p->p_rux.rux_runtime, &new_switchtime);
bintime_sub(&p->p_rux.rux_runtime, PCPU_PTR(switchtime));
td->td_generation++; /* bump preempt-detect counter */
@ -322,7 +322,7 @@ mi_switch(int flags, struct thread *newtd)
* over max, arrange to kill the process in ast().
*/
if (p->p_cpulimit != RLIM_INFINITY &&
p->p_runtime.sec > p->p_cpulimit) {
p->p_rux.rux_runtime.sec > p->p_cpulimit) {
p->p_sflag |= PS_XCPU;
td->td_flags |= TDF_ASTPENDING;
}

View File

@ -157,21 +157,23 @@ clock_gettime(struct thread *td, struct clock_gettime_args *uap)
{
struct timespec ats;
struct timeval sys, user;
struct proc *p;
p = td->td_proc;
switch (uap->clock_id) {
case CLOCK_REALTIME:
nanotime(&ats);
break;
case CLOCK_VIRTUAL:
mtx_lock_spin(&sched_lock);
calcru(td->td_proc, &user, &sys, NULL);
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
calcru(p, &user, &sys);
PROC_UNLOCK(p);
TIMEVAL_TO_TIMESPEC(&user, &ats);
break;
case CLOCK_PROF:
mtx_lock_spin(&sched_lock);
calcru(td->td_proc, &user, &sys, NULL);
mtx_unlock_spin(&sched_lock);
PROC_LOCK(p);
calcru(p, &user, &sys);
PROC_UNLOCK(p);
timevaladd(&user, &sys);
TIMEVAL_TO_TIMESPEC(&user, &ats);
break;

View File

@ -221,7 +221,7 @@ ast(struct trapframe *framep)
PROC_LOCK(p);
lim_rlimit(p, RLIMIT_CPU, &rlim);
mtx_lock_spin(&sched_lock);
if (p->p_runtime.sec >= rlim.rlim_max) {
if (p->p_rux.rux_runtime.sec >= rlim.rlim_max) {
mtx_unlock_spin(&sched_lock);
killproc(p, "exceeded maximum CPU limit");
} else {

View File

@ -2550,9 +2550,9 @@ ttyinfo(struct tty *tp)
tp->t_rocount = 0;
return;
}
PGRP_LOCK(tp->t_pgrp);
if ((p = LIST_FIRST(&tp->t_pgrp->pg_members)) == 0) {
PGRP_UNLOCK(tp->t_pgrp);
sx_slock(&proctree_lock);
if ((p = LIST_FIRST(&tp->t_pgrp->pg_members)) == NULL) {
sx_sunlock(&proctree_lock);
ttyprintf(tp, "empty foreground process group\n");
tp->t_rocount = 0;
return;
@ -2567,10 +2567,9 @@ ttyinfo(struct tty *tp)
* too much.
*/
mtx_lock_spin(&sched_lock);
for (pick = NULL; p != 0; p = LIST_NEXT(p, p_pglist))
for (pick = NULL; p != NULL; p = LIST_NEXT(p, p_pglist))
if (proc_compare(pick, p))
pick = p;
PGRP_UNLOCK(tp->t_pgrp);
td = FIRST_THREAD_IN_PROC(pick); /* XXXKSE */
#if 0
@ -2578,6 +2577,7 @@ ttyinfo(struct tty *tp)
#else
if (td == NULL) {
mtx_unlock_spin(&sched_lock);
sx_sunlock(&proctree_lock);
ttyprintf(tp, "foreground process without thread\n");
tp->t_rocount = 0;
return;
@ -2603,13 +2603,15 @@ ttyinfo(struct tty *tp)
state = "intrwait";
else
state = "unknown";
calcru(pick, &utime, &stime, NULL);
pctcpu = (sched_pctcpu(td) * 10000 + FSCALE / 2) >> FSHIFT;
if (pick->p_state == PRS_NEW || pick->p_state == PRS_ZOMBIE)
rss = 0;
else
rss = pgtok(vmspace_resident_count(pick->p_vmspace));
mtx_unlock_spin(&sched_lock);
PROC_LOCK(pick);
calcru(pick, &utime, &stime);
PROC_UNLOCK(pick);
/* Print command, pid, state, utime, stime, %cpu, and rss. */
ttyprintf(tp,
@ -2619,6 +2621,7 @@ ttyinfo(struct tty *tp)
(long)stime.tv_sec, stime.tv_usec / 10000,
pctcpu / 100, rss);
tp->t_rocount = 0;
sx_sunlock(&proctree_lock);
}
/*

View File

@ -486,6 +486,26 @@ struct ksegrp {
struct kg_sched *kg_sched; /* (*) Scheduler-specific data. */
};
/*
* XXX: Does this belong in resource.h or resourcevar.h instead?
* Resource usage extension. The times in rusage structs in the kernel are
* never up to date. The actual times are kept as runtimes and tick counts
* (with control info in the "previous" times), and are converted when
* userland asks for rusage info. Backwards compatibility prevents putting
* this directly in the user-visible rusage struct.
*
* Locking: (cj) means (j) for p_rux and (c) for p_crux.
*/
struct rusage_ext {
struct bintime rux_runtime; /* (cj) Real time. */
u_int64_t rux_uticks; /* (cj) Statclock hits in user mode. */
u_int64_t rux_sticks; /* (cj) Statclock hits in sys mode. */
u_int64_t rux_iticks; /* (cj) Statclock hits in intr mode. */
u_int64_t rux_uu; /* (c) Previous user time in usec. */
u_int64_t rux_su; /* (c) Previous sys time in usec. */
u_int64_t rux_iu; /* (c) Previous intr time in usec. */
};
/*
* The old fashionned process. May have multiple threads, KSEGRPs
* and KSEs. Starts off with a single embedded KSEGRP and THREAD.
@ -530,13 +550,8 @@ struct proc {
struct vmspace *p_vmspace; /* (b) Address space. */
u_int p_swtime; /* (j) Time swapped in or out. */
struct itimerval p_realtimer; /* (c) Alarm timer. */
struct bintime p_runtime; /* (j) Real time. */
u_int64_t p_uu; /* (j) Previous user time in usec. */
u_int64_t p_su; /* (j) Previous system time in usec. */
u_int64_t p_iu; /* (j) Previous intr time in usec. */
u_int64_t p_uticks; /* (j) Statclock hits in user mode. */
u_int64_t p_sticks; /* (j) Statclock hits in system mode. */
u_int64_t p_iticks; /* (j) Statclock hits in intr. */
struct rusage_ext p_rux; /* (cj) Internal resource usage. */
struct rusage_ext p_crux; /* (c) Internal child resource usage. */
int p_profthreads; /* (c) Num threads in addupc_task. */
int p_maxthrwaits; /* (c) Max threads num waiters */
int p_traceflag; /* (o) Kernel trace points. */

View File

@ -106,12 +106,13 @@ struct uidinfo {
#define UIDINFO_UNLOCK(ui) mtx_unlock((ui)->ui_mtxp)
struct proc;
struct rusage_ext;
struct thread;
void addupc_intr(struct thread *td, uintptr_t pc, u_int ticks);
void addupc_task(struct thread *td, uintptr_t pc, u_int ticks);
void calcru(struct proc *p, struct timeval *up, struct timeval *sp,
struct timeval *ip);
void calccru(struct proc *p, struct timeval *up, struct timeval *sp);
void calcru(struct proc *p, struct timeval *up, struct timeval *sp);
int chgproccnt(struct uidinfo *uip, int diff, int maxval);
int chgsbsize(struct uidinfo *uip, u_int *hiwat, u_int to,
rlim_t maxval);
@ -125,7 +126,8 @@ struct plimit
*lim_hold(struct plimit *limp);
rlim_t lim_max(struct proc *p, int which);
void lim_rlimit(struct proc *p, int which, struct rlimit *rlp);
void ruadd(struct rusage *ru, struct rusage *ru2);
void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
struct rusage_ext *rux2);
int suswintr(void *base, int word);
struct uidinfo
*uifind(uid_t uid);

View File

@ -34,6 +34,7 @@
struct mbuf;
struct msghdr;
struct rusage;
struct sockaddr;
int kern___getcwd(struct thread *td, u_char *buf, enum uio_seg bufseg,
@ -50,6 +51,7 @@ int kern_connect(struct thread *td, int fd, struct sockaddr *sa);
int kern_fcntl(struct thread *td, int fd, int cmd, intptr_t arg);
int kern_futimes(struct thread *td, int fd, struct timeval *tptr,
enum uio_seg tptrseg);
int kern_getrusage(struct thread *td, int who, struct rusage *rup);
int kern_getsockopt(struct thread *td, int s, int level, int name,
void *optval, enum uio_seg valseg, socklen_t *valsize);
int kern_lchown(struct thread *td, char *path, enum uio_seg pathseg,