diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c index 9d5e37bf3948..85cfbf948bf6 100644 --- a/sys/gnu/fs/ext2fs/ext2_bmap.c +++ b/sys/gnu/fs/ext2fs/ext2_bmap.c @@ -198,7 +198,7 @@ ext2_bmaparray(vp, bn, bnp, runp, runb) vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); - curproc->p_stats->p_ru.ru_inblock++; /* XXX */ + curthread->td_ru.ru_inblock++; error = bufwait(bp); if (error) { brelse(bp); diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 9306e8319458..8f99b19bab97 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -431,6 +431,7 @@ proc0_init(void *dummy __unused) bcopy("swapper", p->p_comm, sizeof ("swapper")); callout_init(&p->p_itcallout, CALLOUT_MPSAFE); + callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); /* Create credentials. */ diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c index f1b0b8f2a8df..69a171afa84f 100644 --- a/sys/kern/kern_acct.c +++ b/sys/kern/kern_acct.c @@ -337,7 +337,7 @@ acct_process(struct thread *td) struct timeval ut, st, tmp; struct plimit *newlim, *oldlim; struct proc *p; - struct rusage *r; + struct rusage ru; int t, ret, vfslocked; /* @@ -370,6 +370,7 @@ acct_process(struct thread *td) bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm); /* (2) The amount of user and system time that was used */ + rufetch(p, &ru); calcru(p, &ut, &st); acct.ac_utime = encode_timeval(ut); acct.ac_stime = encode_timeval(st); @@ -383,19 +384,18 @@ acct_process(struct thread *td) acct.ac_etime = encode_timeval(tmp); /* (4) The average amount of memory used */ - r = &p->p_stats->p_ru; tmp = ut; timevaladd(&tmp, &st); /* Convert tmp (i.e. u + s) into hz units to match ru_i*. */ t = tmp.tv_sec * hz + tmp.tv_usec / tick; if (t) - acct.ac_mem = encode_long((r->ru_ixrss + r->ru_idrss + - + r->ru_isrss) / t); + acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss + + + ru.ru_isrss) / t); else acct.ac_mem = 0; /* (5) The number of disk I/O operations done */ - acct.ac_io = encode_long(r->ru_inblock + r->ru_oublock); + acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock); /* (6) The UID and GID of the process */ acct.ac_uid = p->p_ucred->cr_ruid; diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index ac404dc1d5fc..0f7366a75c06 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -394,10 +394,9 @@ stopprofclock(p) } /* - * Statistics clock. Grab profile sample, and if divider reaches 0, - * do process and kernel statistics. Most of the statistics are only - * used by user-level statistics programs. The main exceptions are - * ke->ke_uticks, p->p_rux.rux_sticks, p->p_rux.rux_iticks, and p->p_estcpu. + * Statistics clock. Updates rusage information and calls the scheduler + * to adjust priorities of the active thread. + * * This should be called by all active processors. */ void @@ -466,10 +465,9 @@ statclock(int usermode) sched_clock(td); /* Update resource usage integrals and maximums. */ - MPASS(p->p_stats != NULL); MPASS(p->p_vmspace != NULL); vm = p->p_vmspace; - ru = &p->p_stats->p_ru; + ru = &td->td_ru; ru->ru_ixrss += pgtok(vm->vm_tsize); ru->ru_idrss += pgtok(vm->vm_dsize); ru->ru_isrss += pgtok(vm->vm_ssize); diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 029fe3a12f4b..54ac39247ef6 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -116,6 +116,7 @@ exit1(struct thread *td, int rv) struct ucred *tracecred; #endif struct plimit *plim; + struct rusage *ru; int locked; /* @@ -169,7 +170,8 @@ retry: * Threading support has been turned off. */ } - + KASSERT(p->p_numthreads == 1, + ("exit1: proc %p exiting with %d threads", p, p->p_numthreads)); /* * Wakeup anyone in procfs' PIOCWAIT. They should have a hold * on our vmspace, so we should block below until they have @@ -195,6 +197,8 @@ retry: msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0); PROC_UNLOCK(p); + /* Drain the limit callout while we don't have the proc locked */ + callout_drain(&p->p_limco); #ifdef AUDIT /* @@ -229,7 +233,7 @@ retry: */ EVENTHANDLER_INVOKE(process_exit, p); - MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage), + MALLOC(ru, struct rusage *, sizeof(struct rusage), M_ZOMBIE, M_WAITOK); /* * If parent is waiting for us to exit or exec, @@ -438,16 +442,20 @@ retry: PROC_UNLOCK(q); } - /* - * Save exit status and finalize rusage info except for times, - * adding in child rusage info later when our time is locked. - */ + /* Save exit status. */ PROC_LOCK(p); p->p_xstat = rv; p->p_xthread = td; - p->p_stats->p_ru.ru_nvcsw++; - *p->p_ru = p->p_stats->p_ru; - + /* + * All statistics have been aggregated into the final td_ru by + * thread_exit(). Copy these into the proc here where wait*() + * can find them. + * XXX We will miss any statistics gathered between here and + * thread_exit() except for those related to clock ticks. + */ + *ru = td->td_ru; + ru->ru_nvcsw++; + p->p_ru = ru; /* * Notify interested parties of our demise. */ diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index cbcb25ab775b..8fa8ce2070e5 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -534,7 +534,7 @@ again: /* * p_limit is copy-on-write. Bump its refcount. */ - p2->p_limit = lim_hold(p1->p_limit); + lim_fork(p1, p2); pstats_fork(p1->p_stats, p2->p_stats); diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index b5fe77de6203..a8ac12e92ccd 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -693,12 +693,12 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_swtime = p->p_swtime; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; + rufetch(p, &kp->ki_rusage); kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); mtx_unlock_spin(&sched_lock); if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) { kp->ki_start = p->p_stats->p_start; timevaladd(&kp->ki_start, &boottime); - kp->ki_rusage = p->p_stats->p_ru; calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime); calccru(p, &kp->ki_childutime, &kp->ki_childstime); diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 85ebe64ce1d7..8ddff9a9bddd 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -619,6 +619,38 @@ setrlimit(td, uap) return (error); } +static void +lim_cb(void *arg) +{ + struct rlimit rlim; + struct thread *td; + struct proc *p; + + p = arg; + PROC_LOCK_ASSERT(p, MA_OWNED); + /* + * Check if the process exceeds its cpu resource allocation. If + * it reaches the max, arrange to kill the process in ast(). + */ + if (p->p_cpulimit == RLIM_INFINITY) + return; + mtx_lock_spin(&sched_lock); + FOREACH_THREAD_IN_PROC(p, td) + ruxagg(&p->p_rux, td); + mtx_unlock_spin(&sched_lock); + if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) { + lim_rlimit(p, RLIMIT_CPU, &rlim); + if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { + killproc(p, "exceeded maximum CPU limit"); + } else { + if (p->p_cpulimit < rlim.rlim_max) + p->p_cpulimit += 5; + psignal(p, SIGXCPU); + } + } + callout_reset(&p->p_limco, hz, lim_cb, p); +} + int kern_setrlimit(td, which, limp) struct thread *td; @@ -664,6 +696,9 @@ kern_setrlimit(td, which, limp) switch (which) { case RLIMIT_CPU: + if (limp->rlim_cur != RLIM_INFINITY && + p->p_cpulimit == RLIM_INFINITY) + callout_reset(&p->p_limco, hz, lim_cb, p); mtx_lock_spin(&sched_lock); p->p_cpulimit = limp->rlim_cur; mtx_unlock_spin(&sched_lock); @@ -802,17 +837,11 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp) * We reset the thread and CPU state as if we had performed a context * switch right here. */ - if (curthread->td_proc == p) { - td = curthread; + td = curthread; + if (td->td_proc == p) { u = cpu_ticks(); p->p_rux.rux_runtime += u - PCPU_GET(switchtime); PCPU_SET(switchtime, u); - p->p_rux.rux_uticks += td->td_uticks; - td->td_uticks = 0; - p->p_rux.rux_iticks += td->td_iticks; - td->td_iticks = 0; - p->p_rux.rux_sticks += td->td_sticks; - td->td_sticks = 0; } /* Work on a copy of p_rux so we can let go of sched_lock */ rux = p->p_rux; @@ -932,7 +961,7 @@ kern_getrusage(td, who, rup) switch (who) { case RUSAGE_SELF: - *rup = p->p_stats->p_ru; + rufetch(p, rup); calcru(p, &rup->ru_utime, &rup->ru_stime); break; @@ -950,14 +979,23 @@ kern_getrusage(td, who, rup) } void -ruadd(ru, rux, ru2, rux2) - struct rusage *ru; - struct rusage_ext *rux; - struct rusage *ru2; - struct rusage_ext *rux2; +rucollect(struct rusage *ru, struct rusage *ru2) +{ + long *ip, *ip2; + int i; + + if (ru->ru_maxrss < ru2->ru_maxrss) + ru->ru_maxrss = ru2->ru_maxrss; + ip = &ru->ru_first; + ip2 = &ru2->ru_first; + for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) + *ip++ += *ip2++; +} + +void +ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, + struct rusage_ext *rux2) { - register long *ip, *ip2; - register int i; rux->rux_runtime += rux2->rux_runtime; rux->rux_uticks += rux2->rux_uticks; @@ -966,12 +1004,46 @@ ruadd(ru, rux, ru2, rux2) rux->rux_uu += rux2->rux_uu; rux->rux_su += rux2->rux_su; rux->rux_tu += rux2->rux_tu; - if (ru->ru_maxrss < ru2->ru_maxrss) - ru->ru_maxrss = ru2->ru_maxrss; - ip = &ru->ru_first; - ip2 = &ru2->ru_first; - for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--) - *ip++ += *ip2++; + rucollect(ru, ru2); +} + +/* + * Aggregate tick counts into the proc's rusage_ext. + */ +void +ruxagg(struct rusage_ext *rux, struct thread *td) +{ + rux->rux_runtime += td->td_runtime; + rux->rux_uticks += td->td_uticks; + rux->rux_sticks += td->td_sticks; + rux->rux_iticks += td->td_iticks; + td->td_runtime = 0; + td->td_uticks = 0; + td->td_iticks = 0; + td->td_sticks = 0; +} + +/* + * Update the rusage_ext structure and fetch a valid aggregate rusage + * for proc p if storage for one is supplied. + */ +void +rufetch(struct proc *p, struct rusage *ru) +{ + struct thread *td; + + memset(ru, 0, sizeof(*ru)); + mtx_lock_spin(&sched_lock); + if (p->p_ru == NULL) { + KASSERT(p->p_numthreads > 0, + ("rufetch: No threads or ru in proc %p", p)); + FOREACH_THREAD_IN_PROC(p, td) { + ruxagg(&p->p_rux, td); + rucollect(ru, &td->td_ru); + } + } else + *ru = *p->p_ru; + mtx_unlock_spin(&sched_lock); } /* @@ -997,6 +1069,15 @@ lim_hold(limp) return (limp); } +void +lim_fork(struct proc *p1, struct proc *p2) +{ + p2->p_limit = lim_hold(p1->p_limit); + callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0); + if (p1->p_cpulimit != RLIM_INFINITY) + callout_reset(&p2->p_limco, hz, lim_cb, p2); +} + void lim_free(limp) struct plimit *limp; diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c index fe2bbbeee988..dc0234658ddd 100644 --- a/sys/kern/kern_sig.c +++ b/sys/kern/kern_sig.c @@ -1868,7 +1868,7 @@ trapsignal(struct thread *td, ksiginfo_t *ksi) mtx_lock(&ps->ps_mtx); if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) && !SIGISMEMBER(td->td_sigmask, sig)) { - p->p_stats->p_ru.ru_nsignals++; + td->td_ru.ru_nsignals++; #ifdef KTRACE if (KTRPOINT(curthread, KTR_PSIG)) ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)], @@ -2781,7 +2781,7 @@ postsig(sig) SIGADDSET(ps->ps_sigignore, sig); ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL; } - p->p_stats->p_ru.ru_nsignals++; + td->td_ru.ru_nsignals++; if (p->p_sig != sig) { code = 0; } else { diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index d61dddf36b4c..b75dcf29362d 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -401,40 +401,18 @@ mi_switch(int flags, struct thread *newtd) } if (flags & SW_VOL) - p->p_stats->p_ru.ru_nvcsw++; + td->td_ru.ru_nvcsw++; else - p->p_stats->p_ru.ru_nivcsw++; - + td->td_ru.ru_nivcsw++; /* * Compute the amount of time during which the current - * process was running, and add that to its total so far. + * thread was running, and add that to its total so far. */ new_switchtime = cpu_ticks(); - p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); - p->p_rux.rux_uticks += td->td_uticks; - td->td_uticks = 0; - p->p_rux.rux_iticks += td->td_iticks; - td->td_iticks = 0; - p->p_rux.rux_sticks += td->td_sticks; - td->td_sticks = 0; - - td->td_generation++; /* bump preempt-detect counter */ - - /* - * Check if the process exceeds its cpu resource allocation. If - * it reaches the max, arrange to kill the process in ast(). - */ - if (p->p_cpulimit != RLIM_INFINITY && - p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) { - p->p_sflag |= PS_XCPU; - td->td_flags |= TDF_ASTPENDING; - } - - /* - * Finish up stats for outgoing thread. - */ - cnt.v_swtch++; + td->td_runtime += new_switchtime - PCPU_GET(switchtime); PCPU_SET(switchtime, new_switchtime); + td->td_generation++; /* bump preempt-detect counter */ + cnt.v_swtch++; PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (kse %p, pid %ld, %s)", td->td_tid, td->td_sched, p->p_pid, p->p_comm); diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c index e83bf7e8c363..dcb00b770954 100644 --- a/sys/kern/kern_thread.c +++ b/sys/kern/kern_thread.c @@ -352,6 +352,7 @@ thread_exit(void) { uint64_t new_switchtime; struct thread *td; + struct thread *td2; struct proc *p; td = curthread; @@ -402,17 +403,17 @@ thread_exit(void) /* Do the same timestamp bookkeeping that mi_switch() would do. */ new_switchtime = cpu_ticks(); p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); - p->p_rux.rux_uticks += td->td_uticks; - p->p_rux.rux_sticks += td->td_sticks; - p->p_rux.rux_iticks += td->td_iticks; PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); cnt.v_swtch++; - - /* Add our usage into the usage of all our children. */ + /* + * Aggregate this thread's tick stats in the parent so they are not + * lost. Also add the child usage to our own when the final thread + * exits. + */ + ruxagg(&p->p_rux, td); if (p->p_numthreads == 1) ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux); - /* * The last thread is left attached to the process * So that the whole bundle gets recycled. Skip @@ -424,8 +425,10 @@ thread_exit(void) if (p->p_flag & P_HADTHREADS) { if (p->p_numthreads > 1) { thread_unlink(td); - - sched_exit_thread(FIRST_THREAD_IN_PROC(p), td); + /* Impart our resource usage on another thread */ + td2 = FIRST_THREAD_IN_PROC(p); + rucollect(&td2->td_ru, &td->td_ru); + sched_exit_thread(td2, td); /* * The test below is NOT true if we are the diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 15c8fdd8fd1b..e9d9c3552b08 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -147,7 +147,6 @@ ast(struct trapframe *framep) { struct thread *td; struct proc *p; - struct rlimit rlim; int sflag; int flags; int sig; @@ -183,8 +182,8 @@ ast(struct trapframe *framep) mtx_lock_spin(&sched_lock); flags = td->td_flags; sflag = p->p_sflag; - if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU)) - p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU); + if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND)) + p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND); #ifdef MAC if (p->p_sflag & PS_MACPEND) p->p_sflag &= ~PS_MACPEND; @@ -231,21 +230,6 @@ ast(struct trapframe *framep) psignal(p, SIGPROF); PROC_UNLOCK(p); } - if (sflag & PS_XCPU) { - PROC_LOCK(p); - lim_rlimit(p, RLIMIT_CPU, &rlim); - mtx_lock_spin(&sched_lock); - if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) { - mtx_unlock_spin(&sched_lock); - killproc(p, "exceeded maximum CPU limit"); - } else { - if (p->p_cpulimit < rlim.rlim_max) - p->p_cpulimit += 5; - mtx_unlock_spin(&sched_lock); - psignal(p, SIGXCPU); - } - PROC_UNLOCK(p); - } #ifdef MAC if (sflag & PS_MACPEND) mac_thread_userret(td); diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 67b82b4f439d..1b6d03bca915 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -954,7 +954,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, dontroute = (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0; if (td != NULL) - td->td_proc->p_stats->p_ru.ru_msgsnd++; + td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; @@ -1123,7 +1123,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 && (so->so_proto->pr_flags & PR_ATOMIC); if (td != NULL) - td->td_proc->p_stats->p_ru.ru_msgsnd++; + td->td_ru.ru_msgsnd++; if (control != NULL) clen = control->m_len; @@ -1506,7 +1506,7 @@ dontblock: */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); if (uio->uio_td) - uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++; + uio->uio_td->td_ru.ru_msgrcv++; KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb")); SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index f5fd25f1814c..47a08b69eb70 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -805,7 +805,6 @@ aio_process(struct aiocblist *aiocbe) td = curthread; td_savedcred = td->td_ucred; td->td_ucred = aiocbe->cred; - mycp = td->td_proc; cb = &aiocbe->uaiocb; fp = aiocbe->fd_file; @@ -831,8 +830,8 @@ aio_process(struct aiocblist *aiocbe) auio.uio_segflg = UIO_USERSPACE; auio.uio_td = td; - inblock_st = mycp->p_stats->p_ru.ru_inblock; - oublock_st = mycp->p_stats->p_ru.ru_oublock; + inblock_st = td->td_ru.ru_inblock; + oublock_st = td->td_ru.ru_oublock; /* * aio_aqueue() acquires a reference to the file that is * released in aio_free_entry(). @@ -846,8 +845,8 @@ aio_process(struct aiocblist *aiocbe) auio.uio_rw = UIO_WRITE; error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td); } - inblock_end = mycp->p_stats->p_ru.ru_inblock; - oublock_end = mycp->p_stats->p_ru.ru_oublock; + inblock_end = td->td_ru.ru_inblock; + oublock_end = td->td_ru.ru_oublock; aiocbe->inputcharge = inblock_end - inblock_st; aiocbe->outputcharge = oublock_end - oublock_st; @@ -1663,11 +1662,10 @@ aio_return(struct thread *td, struct aio_return_args *uap) error = cb->uaiocb._aiocb_private.error; td->td_retval[0] = status; if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { - p->p_stats->p_ru.ru_oublock += - cb->outputcharge; + td->td_ru.ru_oublock += cb->outputcharge; cb->outputcharge = 0; } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { - p->p_stats->p_ru.ru_inblock += cb->inputcharge; + td->td_ru.ru_inblock += cb->inputcharge; cb->inputcharge = 0; } aio_free_entry(cb); @@ -2206,10 +2204,10 @@ aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap) error = cb->uaiocb._aiocb_private.error; td->td_retval[0] = status; if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) { - p->p_stats->p_ru.ru_oublock += cb->outputcharge; + td->td_ru.ru_oublock += cb->outputcharge; cb->outputcharge = 0; } else if (cb->uaiocb.aio_lio_opcode == LIO_READ) { - p->p_stats->p_ru.ru_inblock += cb->inputcharge; + td->td_ru.ru_inblock += cb->inputcharge; cb->inputcharge = 0; } aio_free_entry(cb); diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index 44879ffc4822..18c6d59cb550 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -746,7 +746,7 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize, if ((rabp->b_flags & B_CACHE) == 0) { if (!TD_IS_IDLETHREAD(curthread)) - curthread->td_proc->p_stats->p_ru.ru_inblock++; + curthread->td_ru.ru_inblock++; rabp->b_flags |= B_ASYNC; rabp->b_flags &= ~B_INVAL; rabp->b_ioflags &= ~BIO_ERROR; @@ -781,7 +781,7 @@ breadn(struct vnode * vp, daddr_t blkno, int size, /* if not found in cache, do some I/O */ if ((bp->b_flags & B_CACHE) == 0) { if (!TD_IS_IDLETHREAD(curthread)) - curthread->td_proc->p_stats->p_ru.ru_inblock++; + curthread->td_ru.ru_inblock++; bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; @@ -860,7 +860,7 @@ bufwrite(struct buf *bp) atomic_add_int(&runningbufspace, bp->b_runningbufspace); if (!TD_IS_IDLETHREAD(curthread)) - curthread->td_proc->p_stats->p_ru.ru_oublock++; + curthread->td_ru.ru_oublock++; if (oldflags & B_ASYNC) BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 226d80f98f18..7770bc43a01b 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -228,7 +228,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) BUF_KERNPROC(bp); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); - curproc->p_stats->p_ru.ru_inblock++; + curthread->td_ru.ru_inblock++; } /* @@ -281,7 +281,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp) BUF_KERNPROC(rbp); rbp->b_iooffset = dbtob(rbp->b_blkno); bstrategy(rbp); - curproc->p_stats->p_ru.ru_inblock++; + curthread->td_ru.ru_inblock++; } if (reqbp) diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 3c0056c3ea5d..f35ea8c80026 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -11012,7 +11012,7 @@ sctp_lower_sosend(struct socket *so, } /* Ok, we will attempt a msgsnd :> */ if (p) { - p->td_proc->p_stats->p_ru.ru_msgsnd++; + p->td_ru.ru_msgsnd++; } if (stcb) { if (((srcv->sinfo_flags | temp_flags) & SCTP_ADDR_OVER) == 0) { diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c index cfa52e278d1a..8094a3e33359 100644 --- a/sys/nfs4client/nfs4_vnops.c +++ b/sys/nfs4client/nfs4_vnops.c @@ -2828,7 +2828,7 @@ nfs4_writebp(struct buf *bp, int force __unused, struct thread *td) bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); - curthread->td_proc->p_stats->p_ru.ru_oublock++; + curthread->td_ru.ru_oublock++; splx(s); /* diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c index 28de49d5c97e..a6de7e4c2860 100644 --- a/sys/nfsclient/nfs_vnops.c +++ b/sys/nfsclient/nfs_vnops.c @@ -3129,7 +3129,7 @@ nfs_writebp(struct buf *bp, int force __unused, struct thread *td) bp->b_iocmd = BIO_WRITE; bufobj_wref(bp->b_bufobj); - curthread->td_proc->p_stats->p_ru.ru_oublock++; + curthread->td_ru.ru_oublock++; splx(s); /* diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 529512c0faeb..a73d2d571c4c 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -49,6 +49,7 @@ #include #include /* XXX. */ #include +#include #include #include #include @@ -255,10 +256,12 @@ struct thread { struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */ u_int td_estcpu; /* (j) Sum of the same field in KSEs. */ u_int td_slptime; /* (j) How long completely blocked. */ - u_int td_pticks; /* (k) Statclock hits for profiling */ - u_int td_sticks; /* (k) Statclock hits in system mode. */ - u_int td_iticks; /* (k) Statclock hits in intr mode. */ - u_int td_uticks; /* (k) Statclock hits in user mode. */ + struct rusage td_ru; /* (j) rusage information */ + uint64_t td_runtime; /* (j) How many cpu ticks we've run. */ + u_int td_pticks; /* (j) Statclock hits for profiling */ + u_int td_sticks; /* (j) Statclock hits in system mode. */ + u_int td_iticks; /* (j) Statclock hits in intr mode. */ + u_int td_uticks; /* (j) Statclock hits in user mode. */ u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */ u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */ int td_intrval; /* (j) Return value of TDF_INTERRUPT. */ @@ -486,6 +489,7 @@ struct proc { /* Accumulated stats for all threads? */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c) Process limits. */ + struct callout p_limco; /* (c) Limit callout handle */ struct sigacts *p_sigacts; /* (x) Signal actions, state (CPU). */ TAILQ_HEAD(, kse_upcall) p_upcalls; /* All upcalls in the proc. */ @@ -561,7 +565,7 @@ struct proc { struct pgrp *p_pgrp; /* (c + e) Pointer to process group. */ struct sysentvec *p_sysent; /* (b) Syscall dispatch info. */ struct pargs *p_args; /* (c) Process arguments. */ - rlim_t p_cpulimit; /* (j) Current CPU limit in seconds. */ + rlim_t p_cpulimit; /* (c) Current CPU limit in seconds. */ signed char p_nice; /* (c + j) Process "nice" value. */ /* End area that is copied on creation. */ #define p_endcopy p_xstat @@ -572,7 +576,7 @@ struct proc { struct mdproc p_md; /* Any machine-dependent fields. */ struct callout p_itcallout; /* (h + c) Interval timer callout. */ u_short p_acflag; /* (c) Accounting flags. */ - struct rusage *p_ru; /* (a) Exit information. XXX */ + struct rusage *p_ru; /* (a) Exit information. */ struct proc *p_peers; /* (r) */ struct proc *p_leader; /* (b) */ void *p_emuldata; /* (c) Emulator state data. */ @@ -624,7 +628,6 @@ struct proc { /* These flags are kept in p_sflag and are protected with sched_lock. */ #define PS_INMEM 0x00001 /* Loaded into memory. */ -#define PS_XCPU 0x00002 /* Exceeded CPU limit. */ #define PS_ALRMPEND 0x00020 /* Pending SIGVTALRM needs to be posted. */ #define PS_PROFPEND 0x00040 /* Pending SIGPROF needs to be posted. */ #define PS_SWAPINREQ 0x00100 /* Swapin request due to wakeup. */ diff --git a/sys/sys/resource.h b/sys/sys/resource.h index ae3e3460ebb2..c1b16f431f58 100644 --- a/sys/sys/resource.h +++ b/sys/sys/resource.h @@ -50,33 +50,31 @@ /* * Resource utilization information. * - * Locking key: - * c - locked by proc mtx - * j - locked by sched_lock mtx - * n - not locked, lazy + * All fields are only modified by curthread and + * no locks are required to read. */ #define RUSAGE_SELF 0 #define RUSAGE_CHILDREN -1 struct rusage { - struct timeval ru_utime; /* (n) user time used */ - struct timeval ru_stime; /* (n) system time used */ - long ru_maxrss; /* (j) max resident set size */ + struct timeval ru_utime; /* user time used */ + struct timeval ru_stime; /* system time used */ + long ru_maxrss; /* max resident set size */ #define ru_first ru_ixrss - long ru_ixrss; /* (j) integral shared memory size */ - long ru_idrss; /* (j) integral unshared data " */ - long ru_isrss; /* (j) integral unshared stack " */ - long ru_minflt; /* (c) page reclaims */ - long ru_majflt; /* (c) page faults */ - long ru_nswap; /* (c + j) swaps */ - long ru_inblock; /* (n) block input operations */ - long ru_oublock; /* (n) block output operations */ - long ru_msgsnd; /* (n) messages sent */ - long ru_msgrcv; /* (n) messages received */ - long ru_nsignals; /* (c) signals received */ - long ru_nvcsw; /* (j) voluntary context switches */ - long ru_nivcsw; /* (j) involuntary " */ + long ru_ixrss; /* integral shared memory size */ + long ru_idrss; /* integral unshared data " */ + long ru_isrss; /* integral unshared stack " */ + long ru_minflt; /* page reclaims */ + long ru_majflt; /* page faults */ + long ru_nswap; /* swaps */ + long ru_inblock; /* block input operations */ + long ru_oublock; /* block output operations */ + long ru_msgsnd; /* messages sent */ + long ru_msgrcv; /* messages received */ + long ru_nsignals; /* signals received */ + long ru_nvcsw; /* voluntary context switches */ + long ru_nivcsw; /* involuntary " */ #define ru_last ru_nivcsw }; diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h index ad8337041ce3..173d7e365e08 100644 --- a/sys/sys/resourcevar.h +++ b/sys/sys/resourcevar.h @@ -51,8 +51,7 @@ * k - only accessed by curthread */ struct pstats { -#define pstat_startzero p_ru - struct rusage p_ru; /* Stats for this process. */ +#define pstat_startzero p_cru struct rusage p_cru; /* Stats for reaped children. */ struct itimerval p_timer[3]; /* (j) Virtual-time timers. */ #define pstat_endzero pstat_startcopy @@ -116,6 +115,7 @@ struct plimit *lim_alloc(void); void lim_copy(struct plimit *dst, struct plimit *src); rlim_t lim_cur(struct proc *p, int which); +void lim_fork(struct proc *p1, struct proc *p2); void lim_free(struct plimit *limp); struct plimit *lim_hold(struct plimit *limp); @@ -123,6 +123,9 @@ rlim_t lim_max(struct proc *p, int which); void lim_rlimit(struct proc *p, int which, struct rlimit *rlp); void ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2, struct rusage_ext *rux2); +void rufetch(struct proc *p, struct rusage *ru); +void rucollect(struct rusage *ru, struct rusage *ru2); +void ruxagg(struct rusage_ext *rux, struct thread *td); int suswintr(void *base, int word); struct uidinfo *uifind(uid_t uid); diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c index ad6bff918982..dc4f339654a0 100644 --- a/sys/ufs/ffs/ffs_inode.c +++ b/sys/ufs/ffs/ffs_inode.c @@ -559,7 +559,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp) vp = ITOV(ip); bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, 0); if ((bp->b_flags & B_CACHE) == 0) { - curproc->p_stats->p_ru.ru_inblock++; /* pay for read */ + curthread->td_ru.ru_inblock++; /* pay for read */ bp->b_iocmd = BIO_READ; bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c index a80d84386acb..805d33ed64d5 100644 --- a/sys/ufs/ufs/ufs_bmap.c +++ b/sys/ufs/ufs/ufs_bmap.c @@ -226,7 +226,7 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb) vfs_busy_pages(bp, 0); bp->b_iooffset = dbtob(bp->b_blkno); bstrategy(bp); - curproc->p_stats->p_ru.ru_inblock++; /* XXX */ + curthread->td_ru.ru_inblock++; error = bufwait(bp); if (error) { brelse(bp); diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c index 58554307986d..614956aa2f7b 100644 --- a/sys/vm/vm_fault.c +++ b/sys/vm/vm_fault.c @@ -918,15 +918,10 @@ readrest: * Unlock everything, and return */ unlock_and_deallocate(&fs); - PROC_LOCK(curproc); - if ((curproc->p_sflag & PS_INMEM) && curproc->p_stats) { - if (hardfault) { - curproc->p_stats->p_ru.ru_majflt++; - } else { - curproc->p_stats->p_ru.ru_minflt++; - } - } - PROC_UNLOCK(curproc); + if (hardfault) + curthread->td_ru.ru_majflt++; + else + curthread->td_ru.ru_minflt++; return (KERN_SUCCESS); } diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c index a3749bf4ee21..cb2a657b1c3e 100644 --- a/sys/vm/vm_glue.c +++ b/sys/vm/vm_glue.c @@ -986,8 +986,8 @@ swapout(p) ("swapout: there is a thread not safe for swapout")); } #endif /* INVARIANTS */ - - ++p->p_stats->p_ru.ru_nswap; + td = FIRST_THREAD_IN_PROC(p); + ++td->td_ru.ru_nswap; /* * remember the process resident count */