diff --git a/sys/gnu/fs/ext2fs/ext2_bmap.c b/sys/gnu/fs/ext2fs/ext2_bmap.c
index 9d5e37bf3948..85cfbf948bf6 100644
--- a/sys/gnu/fs/ext2fs/ext2_bmap.c
+++ b/sys/gnu/fs/ext2fs/ext2_bmap.c
@@ -198,7 +198,7 @@ ext2_bmaparray(vp, bn, bnp, runp, runb)
 			vfs_busy_pages(bp, 0);
 			bp->b_iooffset = dbtob(bp->b_blkno);
 			bstrategy(bp);
-			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			curthread->td_ru.ru_inblock++;
 			error = bufwait(bp);
 			if (error) {
 				brelse(bp);
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index 9306e8319458..8f99b19bab97 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -431,6 +431,7 @@ proc0_init(void *dummy __unused)
 	bcopy("swapper", p->p_comm, sizeof ("swapper"));
 
 	callout_init(&p->p_itcallout, CALLOUT_MPSAFE);
+	callout_init_mtx(&p->p_limco, &p->p_mtx, 0);
 	callout_init(&td->td_slpcallout, CALLOUT_MPSAFE);
 
 	/* Create credentials. */
diff --git a/sys/kern/kern_acct.c b/sys/kern/kern_acct.c
index f1b0b8f2a8df..69a171afa84f 100644
--- a/sys/kern/kern_acct.c
+++ b/sys/kern/kern_acct.c
@@ -337,7 +337,7 @@ acct_process(struct thread *td)
 	struct timeval ut, st, tmp;
 	struct plimit *newlim, *oldlim;
 	struct proc *p;
-	struct rusage *r;
+	struct rusage ru;
 	int t, ret, vfslocked;
 
 	/*
@@ -370,6 +370,7 @@ acct_process(struct thread *td)
 	bcopy(p->p_comm, acct.ac_comm, sizeof acct.ac_comm);
 
 	/* (2) The amount of user and system time that was used */
+	rufetch(p, &ru);
 	calcru(p, &ut, &st);
 	acct.ac_utime = encode_timeval(ut);
 	acct.ac_stime = encode_timeval(st);
@@ -383,19 +384,18 @@ acct_process(struct thread *td)
 	acct.ac_etime = encode_timeval(tmp);
 
 	/* (4) The average amount of memory used */
-	r = &p->p_stats->p_ru;
 	tmp = ut;
 	timevaladd(&tmp, &st);
 	/* Convert tmp (i.e. u + s) into hz units to match ru_i*. */
 	t = tmp.tv_sec * hz + tmp.tv_usec / tick;
 	if (t)
-		acct.ac_mem = encode_long((r->ru_ixrss + r->ru_idrss +
-		    + r->ru_isrss) / t);
+		acct.ac_mem = encode_long((ru.ru_ixrss + ru.ru_idrss +
+		    + ru.ru_isrss) / t);
 	else
 		acct.ac_mem = 0;
 
 	/* (5) The number of disk I/O operations done */
-	acct.ac_io = encode_long(r->ru_inblock + r->ru_oublock);
+	acct.ac_io = encode_long(ru.ru_inblock + ru.ru_oublock);
 
 	/* (6) The UID and GID of the process */
 	acct.ac_uid = p->p_ucred->cr_ruid;
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index ac404dc1d5fc..0f7366a75c06 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -394,10 +394,9 @@ stopprofclock(p)
 }
 
 /*
- * Statistics clock.  Grab profile sample, and if divider reaches 0,
- * do process and kernel statistics.  Most of the statistics are only
- * used by user-level statistics programs.  The main exceptions are
- * ke->ke_uticks, p->p_rux.rux_sticks, p->p_rux.rux_iticks, and p->p_estcpu.
+ * Statistics clock.  Updates rusage information and calls the scheduler
+ * to adjust priorities of the active thread.
+ *
  * This should be called by all active processors.
  */
 void
@@ -466,10 +465,9 @@ statclock(int usermode)
 	sched_clock(td);
 
 	/* Update resource usage integrals and maximums. */
-	MPASS(p->p_stats != NULL);
 	MPASS(p->p_vmspace != NULL);
 	vm = p->p_vmspace;
-	ru = &p->p_stats->p_ru;
+	ru = &td->td_ru;
 	ru->ru_ixrss += pgtok(vm->vm_tsize);
 	ru->ru_idrss += pgtok(vm->vm_dsize);
 	ru->ru_isrss += pgtok(vm->vm_ssize);
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 029fe3a12f4b..54ac39247ef6 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -116,6 +116,7 @@ exit1(struct thread *td, int rv)
 	struct ucred *tracecred;
 #endif
 	struct plimit *plim;
+	struct rusage *ru;
 	int locked;
 
 	/*
@@ -169,7 +170,8 @@ retry:
 		 * Threading support has been turned off.
 		 */
 	}
-
+	KASSERT(p->p_numthreads == 1,
+	    ("exit1: proc %p exiting with %d threads", p, p->p_numthreads));
 	/*
 	 * Wakeup anyone in procfs' PIOCWAIT.  They should have a hold
 	 * on our vmspace, so we should block below until they have
@@ -195,6 +197,8 @@ retry:
 		msleep(&p->p_lock, &p->p_mtx, PWAIT, "exithold", 0);
 
 	PROC_UNLOCK(p);
+	/* Drain the limit callout while we don't have the proc locked */
+	callout_drain(&p->p_limco);
 
 #ifdef AUDIT
 	/*
@@ -229,7 +233,7 @@ retry:
 	 */
 	EVENTHANDLER_INVOKE(process_exit, p);
 
-	MALLOC(p->p_ru, struct rusage *, sizeof(struct rusage),
+	MALLOC(ru, struct rusage *, sizeof(struct rusage),
 		M_ZOMBIE, M_WAITOK);
 	/*
 	 * If parent is waiting for us to exit or exec,
@@ -438,16 +442,20 @@ retry:
 		PROC_UNLOCK(q);
 	}
 
-	/*
-	 * Save exit status and finalize rusage info except for times,
-	 * adding in child rusage info later when our time is locked.
-	 */
+	/* Save exit status. */
 	PROC_LOCK(p);
 	p->p_xstat = rv;
 	p->p_xthread = td;
-	p->p_stats->p_ru.ru_nvcsw++;
-	*p->p_ru = p->p_stats->p_ru;
-
+	/*
+	 * All statistics have been aggregated into the final td_ru by
+	 * thread_exit().  Copy these into the proc here where wait*()
+	 * can find them.
+	 * XXX We will miss any statistics gathered between here and
+	 * thread_exit() except for those related to clock ticks.
+	 */
+	*ru = td->td_ru;
+	ru->ru_nvcsw++;
+	p->p_ru = ru;
 	/*
 	 * Notify interested parties of our demise.
 	 */
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index cbcb25ab775b..8fa8ce2070e5 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -534,7 +534,7 @@ again:
 	/*
 	 * p_limit is copy-on-write.  Bump its refcount.
 	 */
-	p2->p_limit = lim_hold(p1->p_limit);
+	lim_fork(p1, p2);
 
 	pstats_fork(p1->p_stats, p2->p_stats);
 
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index b5fe77de6203..a8ac12e92ccd 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -693,12 +693,12 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
 	kp->ki_swtime = p->p_swtime;
 	kp->ki_pid = p->p_pid;
 	kp->ki_nice = p->p_nice;
+	rufetch(p, &kp->ki_rusage);
 	kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
 	mtx_unlock_spin(&sched_lock);
 	if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
 		kp->ki_start = p->p_stats->p_start;
 		timevaladd(&kp->ki_start, &boottime);
-		kp->ki_rusage = p->p_stats->p_ru;
 		calcru(p, &kp->ki_rusage.ru_utime, &kp->ki_rusage.ru_stime);
 		calccru(p, &kp->ki_childutime, &kp->ki_childstime);
 
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 85ebe64ce1d7..8ddff9a9bddd 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -619,6 +619,38 @@ setrlimit(td, uap)
 	return (error);
 }
 
+static void
+lim_cb(void *arg)
+{
+	struct rlimit rlim;
+	struct thread *td;
+	struct proc *p;
+
+	p = arg;
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	/*
+	 * Check if the process exceeds its cpu resource allocation.  If
+	 * it reaches the max, arrange to kill the process in ast().
+	 */
+	if (p->p_cpulimit == RLIM_INFINITY)
+		return;
+	mtx_lock_spin(&sched_lock);
+	FOREACH_THREAD_IN_PROC(p, td)
+		ruxagg(&p->p_rux, td);
+	mtx_unlock_spin(&sched_lock);
+	if (p->p_rux.rux_runtime > p->p_cpulimit * cpu_tickrate()) {
+		lim_rlimit(p, RLIMIT_CPU, &rlim);
+		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
+			killproc(p, "exceeded maximum CPU limit");
+		} else {
+			if (p->p_cpulimit < rlim.rlim_max)
+				p->p_cpulimit += 5;
+			psignal(p, SIGXCPU);
+		}
+	}
+	callout_reset(&p->p_limco, hz, lim_cb, p);
+}
+
 int
 kern_setrlimit(td, which, limp)
 	struct thread *td;
@@ -664,6 +696,9 @@ kern_setrlimit(td, which, limp)
 	switch (which) {
 
 	case RLIMIT_CPU:
+		if (limp->rlim_cur != RLIM_INFINITY &&
+		    p->p_cpulimit == RLIM_INFINITY)
+			callout_reset(&p->p_limco, hz, lim_cb, p);
 		mtx_lock_spin(&sched_lock);
 		p->p_cpulimit = limp->rlim_cur;
 		mtx_unlock_spin(&sched_lock);
@@ -802,17 +837,11 @@ calcru(struct proc *p, struct timeval *up, struct timeval *sp)
 	 * We reset the thread and CPU state as if we had performed a context
 	 * switch right here.
 	 */
-	if (curthread->td_proc == p) {
-		td = curthread;
+	td = curthread;
+	if (td->td_proc == p) {
 		u = cpu_ticks();
 		p->p_rux.rux_runtime += u - PCPU_GET(switchtime);
 		PCPU_SET(switchtime, u);
-		p->p_rux.rux_uticks += td->td_uticks;
-		td->td_uticks = 0;
-		p->p_rux.rux_iticks += td->td_iticks;
-		td->td_iticks = 0;
-		p->p_rux.rux_sticks += td->td_sticks;
-		td->td_sticks = 0;
 	}
 	/* Work on a copy of p_rux so we can let go of sched_lock */
 	rux = p->p_rux;
@@ -932,7 +961,7 @@ kern_getrusage(td, who, rup)
 	switch (who) {
 
 	case RUSAGE_SELF:
-		*rup = p->p_stats->p_ru;
+		rufetch(p, rup);
 		calcru(p, &rup->ru_utime, &rup->ru_stime);
 		break;
 
@@ -950,14 +979,23 @@ kern_getrusage(td, who, rup)
 }
 
 void
-ruadd(ru, rux, ru2, rux2)
-	struct rusage *ru;
-	struct rusage_ext *rux;
-	struct rusage *ru2;
-	struct rusage_ext *rux2;
+rucollect(struct rusage *ru, struct rusage *ru2)
+{
+	long *ip, *ip2;
+	int i;
+
+	if (ru->ru_maxrss < ru2->ru_maxrss)
+		ru->ru_maxrss = ru2->ru_maxrss;
+	ip = &ru->ru_first;
+	ip2 = &ru2->ru_first;
+	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
+		*ip++ += *ip2++;
+}
+
+void
+ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
+    struct rusage_ext *rux2)
 {
-	register long *ip, *ip2;
-	register int i;
 
 	rux->rux_runtime += rux2->rux_runtime;
 	rux->rux_uticks += rux2->rux_uticks;
@@ -966,12 +1004,46 @@ ruadd(ru, rux, ru2, rux2)
 	rux->rux_uu += rux2->rux_uu;
 	rux->rux_su += rux2->rux_su;
 	rux->rux_tu += rux2->rux_tu;
-	if (ru->ru_maxrss < ru2->ru_maxrss)
-		ru->ru_maxrss = ru2->ru_maxrss;
-	ip = &ru->ru_first;
-	ip2 = &ru2->ru_first;
-	for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
-		*ip++ += *ip2++;
+	rucollect(ru, ru2);
+}
+
+/*
+ * Aggregate tick counts into the proc's rusage_ext.
+ */
+void
+ruxagg(struct rusage_ext *rux, struct thread *td)
+{
+	rux->rux_runtime += td->td_runtime;
+	rux->rux_uticks += td->td_uticks;
+	rux->rux_sticks += td->td_sticks;
+	rux->rux_iticks += td->td_iticks;
+	td->td_runtime = 0;
+	td->td_uticks = 0;
+	td->td_iticks = 0;
+	td->td_sticks = 0;
+}
+
+/*
+ * Update the rusage_ext structure and fetch a valid aggregate rusage
+ * for proc p if storage for one is supplied.
+ */
+void
+rufetch(struct proc *p, struct rusage *ru)
+{
+	struct thread *td;
+
+	memset(ru, 0, sizeof(*ru));
+	mtx_lock_spin(&sched_lock);
+	if (p->p_ru == NULL)  {
+		KASSERT(p->p_numthreads > 0,
+		    ("rufetch: No threads or ru in proc %p", p));
+		FOREACH_THREAD_IN_PROC(p, td) {
+			ruxagg(&p->p_rux, td);
+			rucollect(ru, &td->td_ru);
+		}
+	} else
+		*ru = *p->p_ru;
+	mtx_unlock_spin(&sched_lock);
 }
 
 /*
@@ -997,6 +1069,15 @@ lim_hold(limp)
 	return (limp);
 }
 
+void
+lim_fork(struct proc *p1, struct proc *p2)
+{
+	p2->p_limit = lim_hold(p1->p_limit);
+	callout_init_mtx(&p2->p_limco, &p2->p_mtx, 0);
+	if (p1->p_cpulimit != RLIM_INFINITY)
+		callout_reset(&p2->p_limco, hz, lim_cb, p2);
+}
+
 void
 lim_free(limp)
 	struct plimit *limp;
diff --git a/sys/kern/kern_sig.c b/sys/kern/kern_sig.c
index fe2bbbeee988..dc0234658ddd 100644
--- a/sys/kern/kern_sig.c
+++ b/sys/kern/kern_sig.c
@@ -1868,7 +1868,7 @@ trapsignal(struct thread *td, ksiginfo_t *ksi)
 	mtx_lock(&ps->ps_mtx);
 	if ((p->p_flag & P_TRACED) == 0 && SIGISMEMBER(ps->ps_sigcatch, sig) &&
 	    !SIGISMEMBER(td->td_sigmask, sig)) {
-		p->p_stats->p_ru.ru_nsignals++;
+		td->td_ru.ru_nsignals++;
 #ifdef KTRACE
 		if (KTRPOINT(curthread, KTR_PSIG))
 			ktrpsig(sig, ps->ps_sigact[_SIG_IDX(sig)],
@@ -2781,7 +2781,7 @@ postsig(sig)
 				SIGADDSET(ps->ps_sigignore, sig);
 			ps->ps_sigact[_SIG_IDX(sig)] = SIG_DFL;
 		}
-		p->p_stats->p_ru.ru_nsignals++;
+		td->td_ru.ru_nsignals++;
 		if (p->p_sig != sig) {
 			code = 0;
 		} else {
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index d61dddf36b4c..b75dcf29362d 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -401,40 +401,18 @@ mi_switch(int flags, struct thread *newtd)
 	}
 
 	if (flags & SW_VOL)
-		p->p_stats->p_ru.ru_nvcsw++;
+		td->td_ru.ru_nvcsw++;
 	else
-		p->p_stats->p_ru.ru_nivcsw++;
-
+		td->td_ru.ru_nivcsw++;
 	/*
 	 * Compute the amount of time during which the current
-	 * process was running, and add that to its total so far.
+	 * thread was running, and add that to its total so far.
 	 */
 	new_switchtime = cpu_ticks();
-	p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
-	p->p_rux.rux_uticks += td->td_uticks;
-	td->td_uticks = 0;
-	p->p_rux.rux_iticks += td->td_iticks;
-	td->td_iticks = 0;
-	p->p_rux.rux_sticks += td->td_sticks;
-	td->td_sticks = 0;
-
-	td->td_generation++;	/* bump preempt-detect counter */
-
-	/*
-	 * Check if the process exceeds its cpu resource allocation.  If
-	 * it reaches the max, arrange to kill the process in ast().
-	 */
-	if (p->p_cpulimit != RLIM_INFINITY &&
-	    p->p_rux.rux_runtime >= p->p_cpulimit * cpu_tickrate()) {
-		p->p_sflag |= PS_XCPU;
-		td->td_flags |= TDF_ASTPENDING;
-	}
-
-	/*
-	 * Finish up stats for outgoing thread.
-	 */
-	cnt.v_swtch++;
+	td->td_runtime += new_switchtime - PCPU_GET(switchtime);
 	PCPU_SET(switchtime, new_switchtime);
+	td->td_generation++;	/* bump preempt-detect counter */
+	cnt.v_swtch++;
 	PCPU_SET(switchticks, ticks);
 	CTR4(KTR_PROC, "mi_switch: old thread %ld (kse %p, pid %ld, %s)",
 	    td->td_tid, td->td_sched, p->p_pid, p->p_comm);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index e83bf7e8c363..dcb00b770954 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -352,6 +352,7 @@ thread_exit(void)
 {
 	uint64_t new_switchtime;
 	struct thread *td;
+	struct thread *td2;
 	struct proc *p;
 
 	td = curthread;
@@ -402,17 +403,17 @@ thread_exit(void)
 	/* Do the same timestamp bookkeeping that mi_switch() would do. */
 	new_switchtime = cpu_ticks();
 	p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
-	p->p_rux.rux_uticks += td->td_uticks;
-	p->p_rux.rux_sticks += td->td_sticks;
-	p->p_rux.rux_iticks += td->td_iticks;
 	PCPU_SET(switchtime, new_switchtime);
 	PCPU_SET(switchticks, ticks);
 	cnt.v_swtch++;
-
-	/* Add our usage into the usage of all our children. */
+	/*
+	 * Aggregate this thread's tick stats in the parent so they are not
+	 * lost.  Also add the child usage to our own when the final thread
+	 * exits.
+	 */
+	ruxagg(&p->p_rux, td);
 	if (p->p_numthreads == 1)
 		ruadd(p->p_ru, &p->p_rux, &p->p_stats->p_cru, &p->p_crux);
-
 	/*
 	 * The last thread is left attached to the process
 	 * So that the whole bundle gets recycled. Skip
@@ -424,8 +425,10 @@ thread_exit(void)
 	if (p->p_flag & P_HADTHREADS) {
 		if (p->p_numthreads > 1) {
 			thread_unlink(td);
-
-			sched_exit_thread(FIRST_THREAD_IN_PROC(p), td);
+			/* Impart our resource usage on another thread */
+			td2 = FIRST_THREAD_IN_PROC(p);
+			rucollect(&td2->td_ru, &td->td_ru);
+			sched_exit_thread(td2, td);
 
 			/*
 			 * The test below is NOT true if we are the
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 15c8fdd8fd1b..e9d9c3552b08 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -147,7 +147,6 @@ ast(struct trapframe *framep)
 {
 	struct thread *td;
 	struct proc *p;
-	struct rlimit rlim;
 	int sflag;
 	int flags;
 	int sig;
@@ -183,8 +182,8 @@ ast(struct trapframe *framep)
 	mtx_lock_spin(&sched_lock);
 	flags = td->td_flags;
 	sflag = p->p_sflag;
-	if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND | PS_XCPU))
-		p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND | PS_XCPU);
+	if (p->p_sflag & (PS_ALRMPEND | PS_PROFPEND))
+		p->p_sflag &= ~(PS_ALRMPEND | PS_PROFPEND);
 #ifdef MAC
 	if (p->p_sflag & PS_MACPEND)
 		p->p_sflag &= ~PS_MACPEND;
@@ -231,21 +230,6 @@ ast(struct trapframe *framep)
 		psignal(p, SIGPROF);
 		PROC_UNLOCK(p);
 	}
-	if (sflag & PS_XCPU) {
-		PROC_LOCK(p);
-		lim_rlimit(p, RLIMIT_CPU, &rlim);
-		mtx_lock_spin(&sched_lock);
-		if (p->p_rux.rux_runtime >= rlim.rlim_max * cpu_tickrate()) {
-			mtx_unlock_spin(&sched_lock);
-			killproc(p, "exceeded maximum CPU limit");
-		} else {
-			if (p->p_cpulimit < rlim.rlim_max)
-				p->p_cpulimit += 5;
-			mtx_unlock_spin(&sched_lock);
-			psignal(p, SIGXCPU);
-		}
-		PROC_UNLOCK(p);
-	}
 #ifdef MAC
 	if (sflag & PS_MACPEND)
 		mac_thread_userret(td);
diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c
index 67b82b4f439d..1b6d03bca915 100644
--- a/sys/kern/uipc_socket.c
+++ b/sys/kern/uipc_socket.c
@@ -954,7 +954,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	dontroute =
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0;
 	if (td != NULL)
-		td->td_proc->p_stats->p_ru.ru_msgsnd++;
+		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
@@ -1123,7 +1123,7 @@ sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio,
 	    (flags & MSG_DONTROUTE) && (so->so_options & SO_DONTROUTE) == 0 &&
 	    (so->so_proto->pr_flags & PR_ATOMIC);
 	if (td != NULL)
-		td->td_proc->p_stats->p_ru.ru_msgsnd++;
+		td->td_ru.ru_msgsnd++;
 	if (control != NULL)
 		clen = control->m_len;
 
@@ -1506,7 +1506,7 @@ dontblock:
 	 */
 	SOCKBUF_LOCK_ASSERT(&so->so_rcv);
 	if (uio->uio_td)
-		uio->uio_td->td_proc->p_stats->p_ru.ru_msgrcv++;
+		uio->uio_td->td_ru.ru_msgrcv++;
 	KASSERT(m == so->so_rcv.sb_mb, ("soreceive: m != so->so_rcv.sb_mb"));
 	SBLASTRECORDCHK(&so->so_rcv);
 	SBLASTMBUFCHK(&so->so_rcv);
diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c
index f5fd25f1814c..47a08b69eb70 100644
--- a/sys/kern/vfs_aio.c
+++ b/sys/kern/vfs_aio.c
@@ -805,7 +805,6 @@ aio_process(struct aiocblist *aiocbe)
 	td = curthread;
 	td_savedcred = td->td_ucred;
 	td->td_ucred = aiocbe->cred;
-	mycp = td->td_proc;
 	cb = &aiocbe->uaiocb;
 	fp = aiocbe->fd_file;
 
@@ -831,8 +830,8 @@ aio_process(struct aiocblist *aiocbe)
 	auio.uio_segflg = UIO_USERSPACE;
 	auio.uio_td = td;
 
-	inblock_st = mycp->p_stats->p_ru.ru_inblock;
-	oublock_st = mycp->p_stats->p_ru.ru_oublock;
+	inblock_st = td->td_ru.ru_inblock;
+	oublock_st = td->td_ru.ru_oublock;
 	/*
 	 * aio_aqueue() acquires a reference to the file that is
 	 * released in aio_free_entry().
@@ -846,8 +845,8 @@ aio_process(struct aiocblist *aiocbe)
 		auio.uio_rw = UIO_WRITE;
 		error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, td);
 	}
-	inblock_end = mycp->p_stats->p_ru.ru_inblock;
-	oublock_end = mycp->p_stats->p_ru.ru_oublock;
+	inblock_end = td->td_ru.ru_inblock;
+	oublock_end = td->td_ru.ru_oublock;
 
 	aiocbe->inputcharge = inblock_end - inblock_st;
 	aiocbe->outputcharge = oublock_end - oublock_st;
@@ -1663,11 +1662,10 @@ aio_return(struct thread *td, struct aio_return_args *uap)
 		error = cb->uaiocb._aiocb_private.error;
 		td->td_retval[0] = status;
 		if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
-			p->p_stats->p_ru.ru_oublock +=
-			    cb->outputcharge;
+			td->td_ru.ru_oublock += cb->outputcharge;
 			cb->outputcharge = 0;
 		} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
-			p->p_stats->p_ru.ru_inblock += cb->inputcharge;
+			td->td_ru.ru_inblock += cb->inputcharge;
 			cb->inputcharge = 0;
 		}
 		aio_free_entry(cb);
@@ -2206,10 +2204,10 @@ aio_waitcomplete(struct thread *td, struct aio_waitcomplete_args *uap)
 		error = cb->uaiocb._aiocb_private.error;
 		td->td_retval[0] = status;
 		if (cb->uaiocb.aio_lio_opcode == LIO_WRITE) {
-			p->p_stats->p_ru.ru_oublock += cb->outputcharge;
+			td->td_ru.ru_oublock += cb->outputcharge;
 			cb->outputcharge = 0;
 		} else if (cb->uaiocb.aio_lio_opcode == LIO_READ) {
-			p->p_stats->p_ru.ru_inblock += cb->inputcharge;
+			td->td_ru.ru_inblock += cb->inputcharge;
 			cb->inputcharge = 0;
 		}
 		aio_free_entry(cb);
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 44879ffc4822..18c6d59cb550 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -746,7 +746,7 @@ breada(struct vnode * vp, daddr_t * rablkno, int * rabsize,
 
 		if ((rabp->b_flags & B_CACHE) == 0) {
 			if (!TD_IS_IDLETHREAD(curthread))
-				curthread->td_proc->p_stats->p_ru.ru_inblock++;
+				curthread->td_ru.ru_inblock++;
 			rabp->b_flags |= B_ASYNC;
 			rabp->b_flags &= ~B_INVAL;
 			rabp->b_ioflags &= ~BIO_ERROR;
@@ -781,7 +781,7 @@ breadn(struct vnode * vp, daddr_t blkno, int size,
 	/* if not found in cache, do some I/O */
 	if ((bp->b_flags & B_CACHE) == 0) {
 		if (!TD_IS_IDLETHREAD(curthread))
-			curthread->td_proc->p_stats->p_ru.ru_inblock++;
+			curthread->td_ru.ru_inblock++;
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
@@ -860,7 +860,7 @@ bufwrite(struct buf *bp)
 	atomic_add_int(&runningbufspace, bp->b_runningbufspace);
 
 	if (!TD_IS_IDLETHREAD(curthread))
-		curthread->td_proc->p_stats->p_ru.ru_oublock++;
+		curthread->td_ru.ru_oublock++;
 	if (oldflags & B_ASYNC)
 		BUF_KERNPROC(bp);
 	bp->b_iooffset = dbtob(bp->b_blkno);
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 226d80f98f18..7770bc43a01b 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -228,7 +228,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 			BUF_KERNPROC(bp);
 		bp->b_iooffset = dbtob(bp->b_blkno);
 		bstrategy(bp);
-		curproc->p_stats->p_ru.ru_inblock++;
+		curthread->td_ru.ru_inblock++;
 	}
 
 	/*
@@ -281,7 +281,7 @@ cluster_read(vp, filesize, lblkno, size, cred, totread, seqcount, bpp)
 			BUF_KERNPROC(rbp);
 		rbp->b_iooffset = dbtob(rbp->b_blkno);
 		bstrategy(rbp);
-		curproc->p_stats->p_ru.ru_inblock++;
+		curthread->td_ru.ru_inblock++;
 	}
 
 	if (reqbp)
diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c
index 3c0056c3ea5d..f35ea8c80026 100644
--- a/sys/netinet/sctp_output.c
+++ b/sys/netinet/sctp_output.c
@@ -11012,7 +11012,7 @@ sctp_lower_sosend(struct socket *so,
 	}
 	/* Ok, we will attempt a msgsnd :> */
 	if (p) {
-		p->td_proc->p_stats->p_ru.ru_msgsnd++;
+		p->td_ru.ru_msgsnd++;
 	}
 	if (stcb) {
 		if (((srcv->sinfo_flags | temp_flags) & SCTP_ADDR_OVER) == 0) {
diff --git a/sys/nfs4client/nfs4_vnops.c b/sys/nfs4client/nfs4_vnops.c
index cfa52e278d1a..8094a3e33359 100644
--- a/sys/nfs4client/nfs4_vnops.c
+++ b/sys/nfs4client/nfs4_vnops.c
@@ -2828,7 +2828,7 @@ nfs4_writebp(struct buf *bp, int force __unused, struct thread *td)
 	bp->b_iocmd = BIO_WRITE;
 
 	bufobj_wref(bp->b_bufobj);
-	curthread->td_proc->p_stats->p_ru.ru_oublock++;
+	curthread->td_ru.ru_oublock++;
 	splx(s);
 
 	/*
diff --git a/sys/nfsclient/nfs_vnops.c b/sys/nfsclient/nfs_vnops.c
index 28de49d5c97e..a6de7e4c2860 100644
--- a/sys/nfsclient/nfs_vnops.c
+++ b/sys/nfsclient/nfs_vnops.c
@@ -3129,7 +3129,7 @@ nfs_writebp(struct buf *bp, int force __unused, struct thread *td)
 	bp->b_iocmd = BIO_WRITE;
 
 	bufobj_wref(bp->b_bufobj);
-	curthread->td_proc->p_stats->p_ru.ru_oublock++;
+	curthread->td_ru.ru_oublock++;
 	splx(s);
 
 	/*
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 529512c0faeb..a73d2d571c4c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -49,6 +49,7 @@
 #include <sys/priority.h>
 #include <sys/rtprio.h>			/* XXX. */
 #include <sys/runq.h>
+#include <sys/resource.h>
 #include <sys/sigio.h>
 #include <sys/signal.h>
 #include <sys/signalvar.h>
@@ -255,10 +256,12 @@ struct thread {
 	struct kse_upcall *td_upcall;	/* (k + j) Upcall structure. */
 	u_int		td_estcpu;	/* (j) Sum of the same field in KSEs. */
 	u_int		td_slptime;	/* (j) How long completely blocked. */
-	u_int		td_pticks;	/* (k) Statclock hits for profiling */
-	u_int		td_sticks;	/* (k) Statclock hits in system mode. */
-	u_int		td_iticks;	/* (k) Statclock hits in intr mode. */
-	u_int		td_uticks;	/* (k) Statclock hits in user mode. */
+	struct rusage	td_ru;		/* (j) rusage information */
+	uint64_t	td_runtime;	/* (j) How many cpu ticks we've run. */
+	u_int 		td_pticks;	/* (j) Statclock hits for profiling */
+	u_int		td_sticks;	/* (j) Statclock hits in system mode. */
+	u_int		td_iticks;	/* (j) Statclock hits in intr mode. */
+	u_int		td_uticks;	/* (j) Statclock hits in user mode. */
 	u_int		td_uuticks;	/* (k) Statclock hits (usr), for UTS. */
 	u_int		td_usticks;	/* (k) Statclock hits (sys), for UTS. */
 	int		td_intrval;	/* (j) Return value of TDF_INTERRUPT. */
@@ -486,6 +489,7 @@ struct proc {
 					/* Accumulated stats for all threads? */
 	struct pstats	*p_stats;	/* (b) Accounting/statistics (CPU). */
 	struct plimit	*p_limit;	/* (c) Process limits. */
+	struct callout	p_limco;	/* (c) Limit callout handle */
 	struct sigacts	*p_sigacts;	/* (x) Signal actions, state (CPU). */
 	TAILQ_HEAD(, kse_upcall) p_upcalls; /* All upcalls in the proc. */
 
@@ -561,7 +565,7 @@ struct proc {
 	struct pgrp	*p_pgrp;	/* (c + e) Pointer to process group. */
 	struct sysentvec *p_sysent;	/* (b) Syscall dispatch info. */
 	struct pargs	*p_args;	/* (c) Process arguments. */
-	rlim_t		p_cpulimit;	/* (j) Current CPU limit in seconds. */
+	rlim_t		p_cpulimit;	/* (c) Current CPU limit in seconds. */
 	signed char	p_nice;		/* (c + j) Process "nice" value. */
 /* End area that is copied on creation. */
 #define	p_endcopy	p_xstat
@@ -572,7 +576,7 @@ struct proc {
 	struct mdproc	p_md;		/* Any machine-dependent fields. */
 	struct callout	p_itcallout;	/* (h + c) Interval timer callout. */
 	u_short		p_acflag;	/* (c) Accounting flags. */
-	struct rusage	*p_ru;		/* (a) Exit information. XXX */
+	struct rusage	*p_ru;		/* (a) Exit information. */
 	struct proc	*p_peers;	/* (r) */
 	struct proc	*p_leader;	/* (b) */
 	void		*p_emuldata;	/* (c) Emulator state data. */
@@ -624,7 +628,6 @@ struct proc {
 
 /* These flags are kept in p_sflag and are protected with sched_lock. */
 #define	PS_INMEM	0x00001	/* Loaded into memory. */
-#define	PS_XCPU		0x00002 /* Exceeded CPU limit. */
 #define	PS_ALRMPEND	0x00020	/* Pending SIGVTALRM needs to be posted. */
 #define	PS_PROFPEND	0x00040	/* Pending SIGPROF needs to be posted. */
 #define	PS_SWAPINREQ	0x00100	/* Swapin request due to wakeup. */
diff --git a/sys/sys/resource.h b/sys/sys/resource.h
index ae3e3460ebb2..c1b16f431f58 100644
--- a/sys/sys/resource.h
+++ b/sys/sys/resource.h
@@ -50,33 +50,31 @@
 /*
  * Resource utilization information.
  *
- * Locking key:
- *      c - locked by proc mtx
- *      j - locked by sched_lock mtx
- *      n - not locked, lazy
+ * All fields are only modified by curthread and
+ * no locks are required to read.
  */
 
 #define	RUSAGE_SELF	0
 #define	RUSAGE_CHILDREN	-1
 
 struct rusage {
-	struct timeval ru_utime;	/* (n) user time used */
-	struct timeval ru_stime;	/* (n) system time used */
-	long	ru_maxrss;		/* (j) max resident set size */
+	struct timeval ru_utime;	/* user time used */
+	struct timeval ru_stime;	/* system time used */
+	long	ru_maxrss;		/* max resident set size */
 #define	ru_first	ru_ixrss
-	long	ru_ixrss;		/* (j) integral shared memory size */
-	long	ru_idrss;		/* (j) integral unshared data " */
-	long	ru_isrss;		/* (j) integral unshared stack " */
-	long	ru_minflt;		/* (c) page reclaims */
-	long	ru_majflt;		/* (c) page faults */
-	long	ru_nswap;		/* (c + j) swaps */
-	long	ru_inblock;		/* (n) block input operations */
-	long	ru_oublock;		/* (n) block output operations */
-	long	ru_msgsnd;		/* (n) messages sent */
-	long	ru_msgrcv;		/* (n) messages received */
-	long	ru_nsignals;		/* (c) signals received */
-	long	ru_nvcsw;		/* (j) voluntary context switches */
-	long	ru_nivcsw;		/* (j) involuntary " */
+	long	ru_ixrss;		/* integral shared memory size */
+	long	ru_idrss;		/* integral unshared data " */
+	long	ru_isrss;		/* integral unshared stack " */
+	long	ru_minflt;		/* page reclaims */
+	long	ru_majflt;		/* page faults */
+	long	ru_nswap;		/* swaps */
+	long	ru_inblock;		/* block input operations */
+	long	ru_oublock;		/* block output operations */
+	long	ru_msgsnd;		/* messages sent */
+	long	ru_msgrcv;		/* messages received */
+	long	ru_nsignals;		/* signals received */
+	long	ru_nvcsw;		/* voluntary context switches */
+	long	ru_nivcsw;		/* involuntary " */
 #define	ru_last		ru_nivcsw
 };
 
diff --git a/sys/sys/resourcevar.h b/sys/sys/resourcevar.h
index ad8337041ce3..173d7e365e08 100644
--- a/sys/sys/resourcevar.h
+++ b/sys/sys/resourcevar.h
@@ -51,8 +51,7 @@
  *      k - only accessed by curthread
  */
 struct pstats {
-#define	pstat_startzero	p_ru
-	struct	rusage p_ru;		/* Stats for this process. */
+#define	pstat_startzero	p_cru
 	struct	rusage p_cru;		/* Stats for reaped children. */
 	struct	itimerval p_timer[3];	/* (j) Virtual-time timers. */
 #define	pstat_endzero	pstat_startcopy
@@ -116,6 +115,7 @@ struct plimit
 	*lim_alloc(void);
 void	 lim_copy(struct plimit *dst, struct plimit *src);
 rlim_t	 lim_cur(struct proc *p, int which);
+void	 lim_fork(struct proc *p1, struct proc *p2);
 void	 lim_free(struct plimit *limp);
 struct plimit
 	*lim_hold(struct plimit *limp);
@@ -123,6 +123,9 @@ rlim_t	 lim_max(struct proc *p, int which);
 void	 lim_rlimit(struct proc *p, int which, struct rlimit *rlp);
 void	 ruadd(struct rusage *ru, struct rusage_ext *rux, struct rusage *ru2,
 	    struct rusage_ext *rux2);
+void	 rufetch(struct proc *p, struct rusage *ru);
+void	 rucollect(struct rusage *ru, struct rusage *ru2);
+void	 ruxagg(struct rusage_ext *rux, struct thread *td);
 int	 suswintr(void *base, int word);
 struct uidinfo
 	*uifind(uid_t uid);
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index ad6bff918982..dc4f339654a0 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -559,7 +559,7 @@ ffs_indirtrunc(ip, lbn, dbn, lastbn, level, countp)
 	vp = ITOV(ip);
 	bp = getblk(vp, lbn, (int)fs->fs_bsize, 0, 0, 0);
 	if ((bp->b_flags & B_CACHE) == 0) {
-		curproc->p_stats->p_ru.ru_inblock++;	/* pay for read */
+		curthread->td_ru.ru_inblock++;	/* pay for read */
 		bp->b_iocmd = BIO_READ;
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
diff --git a/sys/ufs/ufs/ufs_bmap.c b/sys/ufs/ufs/ufs_bmap.c
index a80d84386acb..805d33ed64d5 100644
--- a/sys/ufs/ufs/ufs_bmap.c
+++ b/sys/ufs/ufs/ufs_bmap.c
@@ -226,7 +226,7 @@ ufs_bmaparray(vp, bn, bnp, nbp, runp, runb)
 			vfs_busy_pages(bp, 0);
 			bp->b_iooffset = dbtob(bp->b_blkno);
 			bstrategy(bp);
-			curproc->p_stats->p_ru.ru_inblock++;	/* XXX */
+			curthread->td_ru.ru_inblock++;
 			error = bufwait(bp);
 			if (error) {
 				brelse(bp);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 58554307986d..614956aa2f7b 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -918,15 +918,10 @@ readrest:
 	 * Unlock everything, and return
 	 */
 	unlock_and_deallocate(&fs);
-	PROC_LOCK(curproc);
-	if ((curproc->p_sflag & PS_INMEM) && curproc->p_stats) {
-		if (hardfault) {
-			curproc->p_stats->p_ru.ru_majflt++;
-		} else {
-			curproc->p_stats->p_ru.ru_minflt++;
-		}
-	}
-	PROC_UNLOCK(curproc);
+	if (hardfault)
+		curthread->td_ru.ru_majflt++;
+	else
+		curthread->td_ru.ru_minflt++;
 
 	return (KERN_SUCCESS);
 }
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index a3749bf4ee21..cb2a657b1c3e 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -986,8 +986,8 @@ swapout(p)
 			("swapout: there is a thread not safe for swapout"));
 	}
 #endif /* INVARIANTS */
-
-	++p->p_stats->p_ru.ru_nswap;
+	td = FIRST_THREAD_IN_PROC(p);
+	++td->td_ru.ru_nswap;
 	/*
 	 * remember the process resident count
 	 */