Add racct. It's an API to keep per-process, per-jail, per-loginclass

and per-loginclass resource accounting information, to be used by the new
resource limits code.  It's connected to the build, but the code that
actually calls the new functions will come later.

Sponsored by:	The FreeBSD Foundation
Reviewed by:	kib (earlier version)
This commit is contained in:
Edward Tomasz Napierala 2011-03-29 17:47:25 +00:00
parent f77057db08
commit 097055e26d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=220137
18 changed files with 1939 additions and 3 deletions

842
kern_racct.c Normal file
View File

@ -0,0 +1,842 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Edward Tomasz Napierala under sponsorship
* from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/eventhandler.h>
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/loginclass.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
#include <sys/umtx.h>
#ifdef RCTL
#include <sys/rctl.h>
#endif
#ifdef RACCT
FEATURE(racct, "Resource Accounting");
static struct mtx racct_lock;
MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
static uma_zone_t racct_zone;
static void racct_sub_racct(struct racct *dest, const struct racct *src);
static void racct_sub_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
static void racct_add_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
SDT_PROVIDER_DEFINE(racct);
SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
"struct proc *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
"int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
"int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
"struct proc *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
"int", "uint64_t");
SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
"struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
"struct racct *", "struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
"struct racct *");
int racct_types[] = {
[RACCT_CPU] =
RACCT_IN_THOUSANDS,
[RACCT_FSIZE] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_DATA] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_STACK] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_CORE] =
RACCT_DENIABLE,
[RACCT_RSS] =
RACCT_RECLAIMABLE,
[RACCT_MEMLOCK] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_NPROC] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_NOFILE] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_SBSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_VMEM] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_NPTS] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_SWAP] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NTHR] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_MSGQQUEUED] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_MSGQSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NMSGQ] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NSEM] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NSEMOP] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_NSHM] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_SHMSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_WALLCLOCK] =
RACCT_IN_THOUSANDS };
static void
racct_add_racct(struct racct *dest, const struct racct *src)
{
int i;
mtx_assert(&racct_lock, MA_OWNED);
/*
* Update resource usage in dest.
*/
for (i = 0; i <= RACCT_MAX; i++) {
KASSERT(dest->r_resources[i] >= 0,
("racct propagation meltdown: dest < 0"));
KASSERT(src->r_resources[i] >= 0,
("racct propagation meltdown: src < 0"));
dest->r_resources[i] += src->r_resources[i];
}
}
static void
racct_sub_racct(struct racct *dest, const struct racct *src)
{
int i;
mtx_assert(&racct_lock, MA_OWNED);
/*
* Update resource usage in dest.
*/
for (i = 0; i <= RACCT_MAX; i++) {
if (!racct_is_sloppy(i) &&
!racct_is_dampened(i)) {
KASSERT(dest->r_resources[i] >= 0,
("racct propagation meltdown: dest < 0"));
KASSERT(src->r_resources[i] >= 0,
("racct propagation meltdown: src < 0"));
KASSERT(src->r_resources[i] <= dest->r_resources[i],
("racct propagation meltdown: src > dest"));
}
if (racct_is_reclaimable(i)) {
dest->r_resources[i] -= src->r_resources[i];
if (dest->r_resources[i] < 0) {
KASSERT(racct_is_sloppy(i) ||
racct_is_dampened(i),
("racct_sub_racct: usage < 0"));
dest->r_resources[i] = 0;
}
}
}
}
void
racct_create(struct racct **racctp)
{
SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
KASSERT(*racctp == NULL, ("racct already allocated"));
*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
}
static void
racct_destroy_locked(struct racct **racctp)
{
int i;
struct racct *racct;
SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racctp != NULL, ("NULL racctp"));
KASSERT(*racctp != NULL, ("NULL racct"));
racct = *racctp;
for (i = 0; i <= RACCT_MAX; i++) {
if (racct_is_sloppy(i))
continue;
if (!racct_is_reclaimable(i))
continue;
if (racct_is_dampened(i))
continue;
KASSERT(racct->r_resources[i] == 0,
("destroying non-empty racct: "
"%ju allocated for resource %d\n",
racct->r_resources[i], i));
}
uma_zfree(racct_zone, racct);
*racctp = NULL;
}
void
racct_destroy(struct racct **racct)
{
mtx_lock(&racct_lock);
racct_destroy_locked(racct);
mtx_unlock(&racct_lock);
}
/*
* Increase consumption of 'resource' by 'amount' for 'racct'
* and all its parents. Differently from other cases, 'amount' here
* may be less than zero.
*/
static void
racct_alloc_resource(struct racct *racct, int resource,
uint64_t amount)
{
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racct != NULL, ("NULL racct"));
racct->r_resources[resource] += amount;
if (racct->r_resources[resource] < 0) {
KASSERT(racct_is_sloppy(resource) ||
racct_is_dampened(resource),
("racct_alloc_resource: usage < 0"));
racct->r_resources[resource] = 0;
}
}
/*
* Increase allocation of 'resource' by 'amount' for process 'p'.
* Return 0 if it's below limits, or errno, if it's not.
*/
int
racct_add(struct proc *p, int resource, uint64_t amount)
{
#ifdef RCTL
int error;
#endif
if (p->p_flag & P_SYSTEM)
return (0);
SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
#ifdef RCTL
error = rctl_enforce(p, resource, amount);
if (error && racct_is_deniable(resource)) {
SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
amount, 0, 0);
mtx_unlock(&racct_lock);
return (error);
}
#endif
racct_alloc_resource(p->p_racct, resource, amount);
racct_add_cred_locked(p->p_ucred, resource, amount);
mtx_unlock(&racct_lock);
return (0);
}
static void
racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
0, 0);
KASSERT(amount >= 0,
("racct_add_cred: invalid amount for resource %d: %ju",
resource, amount));
racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
racct_alloc_resource(pr->pr_racct, resource, amount);
racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
}
/*
* Increase allocation of 'resource' by 'amount' for credential 'cred'.
* Doesn't check for limits and never fails.
*
* XXX: Shouldn't this ever return an error?
*/
void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
mtx_lock(&racct_lock);
racct_add_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
}
/*
* Increase allocation of 'resource' by 'amount' for process 'p'.
* Doesn't check for limits and never fails.
*/
void
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0,
("racct_add_force: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
racct_alloc_resource(p->p_racct, resource, amount);
mtx_unlock(&racct_lock);
racct_add_cred(p->p_ucred, resource, amount);
}
static int
racct_set_locked(struct proc *p, int resource, uint64_t amount)
{
int64_t diff;
#ifdef RCTL
int error;
#endif
if (p->p_flag & P_SYSTEM)
return (0);
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
resource, amount));
diff = amount - p->p_racct->r_resources[resource];
#ifdef notyet
KASSERT(diff >= 0 || racct_is_reclaimable(resource),
("racct_set: usage of non-reclaimable resource %d dropping",
resource));
#endif
#ifdef RCTL
if (diff > 0) {
error = rctl_enforce(p, resource, diff);
if (error && racct_is_deniable(resource)) {
SDT_PROBE(racct, kernel, rusage, set_failure, p,
resource, amount, 0, 0);
return (error);
}
}
#endif
racct_alloc_resource(p->p_racct, resource, diff);
if (diff > 0)
racct_add_cred_locked(p->p_ucred, resource, diff);
else if (diff < 0)
racct_sub_cred_locked(p->p_ucred, resource, -diff);
return (0);
}
/*
* Set allocation of 'resource' to 'amount' for process 'p'.
* Return 0 if it's below limits, or errno, if it's not.
*
* Note that decreasing the allocation always returns 0,
* even if it's above the limit.
*/
int
racct_set(struct proc *p, int resource, uint64_t amount)
{
int error;
mtx_lock(&racct_lock);
error = racct_set_locked(p, resource, amount);
mtx_unlock(&racct_lock);
return (error);
}
void
racct_set_force(struct proc *p, int resource, uint64_t amount)
{
int64_t diff;
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0,
("racct_set_force: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
diff = amount - p->p_racct->r_resources[resource];
racct_alloc_resource(p->p_racct, resource, diff);
if (diff > 0)
racct_add_cred_locked(p->p_ucred, resource, diff);
else if (diff < 0)
racct_sub_cred_locked(p->p_ucred, resource, -diff);
mtx_unlock(&racct_lock);
}
/*
* Returns amount of 'resource' the process 'p' can keep allocated.
* Allocating more than that would be denied, unless the resource
* is marked undeniable. Amount of already allocated resource does
* not matter.
*/
uint64_t
racct_get_limit(struct proc *p, int resource)
{
#ifdef RCTL
return (rctl_get_limit(p, resource));
#else
return (UINT64_MAX);
#endif
}
/*
* Returns amount of 'resource' the process 'p' can keep allocated.
* Allocating more than that would be denied, unless the resource
* is marked undeniable. Amount of already allocated resource does
* matter.
*/
uint64_t
racct_get_available(struct proc *p, int resource)
{
#ifdef RCTL
return (rctl_get_available(p, resource));
#else
return (UINT64_MAX);
#endif
}
/*
* Decrease allocation of 'resource' by 'amount' for process 'p'.
*/
void
racct_sub(struct proc *p, int resource, uint64_t amount)
{
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
resource, amount));
KASSERT(racct_is_reclaimable(resource),
("racct_sub: called for non-reclaimable resource %d", resource));
mtx_lock(&racct_lock);
KASSERT(amount <= p->p_racct->r_resources[resource],
("racct_sub: freeing %ju of resource %d, which is more "
"than allocated %jd for %s (pid %d)", amount, resource,
(intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
racct_alloc_resource(p->p_racct, resource, -amount);
racct_sub_cred_locked(p->p_ucred, resource, amount);
mtx_unlock(&racct_lock);
}
static void
racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
0, 0);
KASSERT(amount >= 0,
("racct_sub_cred: invalid amount for resource %d: %ju",
resource, amount));
#ifdef notyet
KASSERT(racct_is_reclaimable(resource),
("racct_sub_cred: called for non-reclaimable resource %d",
resource));
#endif
racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
racct_alloc_resource(pr->pr_racct, resource, -amount);
racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
}
/*
* Decrease allocation of 'resource' by 'amount' for credential 'cred'.
*/
void
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
mtx_lock(&racct_lock);
racct_sub_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
}
/*
* Inherit resource usage information from the parent process.
*/
int
racct_proc_fork(struct proc *parent, struct proc *child)
{
int i, error = 0;
/*
* Create racct for the child process.
*/
racct_create(&child->p_racct);
/*
* No resource accounting for kernel processes.
*/
if (child->p_flag & P_SYSTEM)
return (0);
PROC_LOCK(parent);
PROC_LOCK(child);
mtx_lock(&racct_lock);
/*
* Inherit resource usage.
*/
for (i = 0; i <= RACCT_MAX; i++) {
if (parent->p_racct->r_resources[i] == 0 ||
!racct_is_inheritable(i))
continue;
error = racct_set_locked(child, i,
parent->p_racct->r_resources[i]);
if (error != 0) {
/*
* XXX: The only purpose of these two lines is
* to prevent from tripping checks in racct_destroy().
*/
for (i = 0; i <= RACCT_MAX; i++)
racct_set_locked(child, i, 0);
goto out;
}
}
#ifdef RCTL
error = rctl_proc_fork(parent, child);
if (error != 0) {
/*
* XXX: The only purpose of these two lines is to prevent from
* tripping checks in racct_destroy().
*/
for (i = 0; i <= RACCT_MAX; i++)
racct_set_locked(child, i, 0);
}
#endif
out:
if (error != 0)
racct_destroy_locked(&child->p_racct);
mtx_unlock(&racct_lock);
PROC_UNLOCK(child);
PROC_UNLOCK(parent);
return (error);
}
void
racct_proc_exit(struct proc *p)
{
uint64_t runtime;
PROC_LOCK(p);
/*
* We don't need to calculate rux, proc_reap() has already done this.
*/
runtime = cputick2usec(p->p_rux.rux_runtime);
#ifdef notyet
KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
#else
if (runtime < p->p_prev_runtime)
runtime = p->p_prev_runtime;
#endif
racct_set(p, RACCT_CPU, runtime);
/*
* XXX: Free this some other way.
*/
racct_set(p, RACCT_FSIZE, 0);
racct_set(p, RACCT_NPTS, 0);
racct_set(p, RACCT_NTHR, 0);
racct_set(p, RACCT_RSS, 0);
PROC_UNLOCK(p);
#ifdef RCTL
rctl_racct_release(p->p_racct);
#endif
racct_destroy(&p->p_racct);
}
/*
* Called after credentials change, to move resource utilisation
* between raccts.
*/
void
racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
struct ucred *newcred)
{
struct uidinfo *olduip, *newuip;
struct loginclass *oldlc, *newlc;
struct prison *oldpr, *newpr, *pr;
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
newuip = newcred->cr_ruidinfo;
olduip = oldcred->cr_ruidinfo;
newlc = newcred->cr_loginclass;
oldlc = oldcred->cr_loginclass;
newpr = newcred->cr_prison;
oldpr = oldcred->cr_prison;
mtx_lock(&racct_lock);
if (newuip != olduip) {
racct_sub_racct(olduip->ui_racct, p->p_racct);
racct_add_racct(newuip->ui_racct, p->p_racct);
}
if (newlc != oldlc) {
racct_sub_racct(oldlc->lc_racct, p->p_racct);
racct_add_racct(newlc->lc_racct, p->p_racct);
}
if (newpr != oldpr) {
for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
racct_sub_racct(pr->pr_racct, p->p_racct);
for (pr = newpr; pr != NULL; pr = pr->pr_parent)
racct_add_racct(pr->pr_racct, p->p_racct);
}
mtx_unlock(&racct_lock);
#ifdef RCTL
rctl_proc_ucred_changed(p, newcred);
#endif
}
static void
racctd(void)
{
struct thread *td;
struct proc *p;
struct timeval wallclock;
uint64_t runtime;
for (;;) {
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
if (p->p_state != PRS_NORMAL)
continue;
if (p->p_flag & P_SYSTEM)
continue;
microuptime(&wallclock);
timevalsub(&wallclock, &p->p_stats->p_start);
PROC_LOCK(p);
PROC_SLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
ruxagg(p, td);
thread_lock(td);
thread_unlock(td);
}
runtime = cputick2usec(p->p_rux.rux_runtime);
PROC_SUNLOCK(p);
#ifdef notyet
KASSERT(runtime >= p->p_prev_runtime,
("runtime < p_prev_runtime"));
#else
if (runtime < p->p_prev_runtime)
runtime = p->p_prev_runtime;
#endif
p->p_prev_runtime = runtime;
mtx_lock(&racct_lock);
racct_set_locked(p, RACCT_CPU, runtime);
racct_set_locked(p, RACCT_WALLCLOCK,
wallclock.tv_sec * 1000000 + wallclock.tv_usec);
mtx_unlock(&racct_lock);
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
pause("-", hz);
}
}
static struct kproc_desc racctd_kp = {
"racctd",
racctd,
NULL
};
SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
static void
racct_init(void)
{
racct_zone = uma_zcreate("racct", sizeof(struct racct),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
/*
* XXX: Move this somewhere.
*/
racct_create(&prison0.pr_racct);
}
SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
#else /* !RACCT */
int
racct_add(struct proc *p, int resource, uint64_t amount)
{
return (0);
}
void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
}
void
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
return (0);
}
int
racct_set(struct proc *p, int resource, uint64_t amount)
{
return (0);
}
void
racct_sub(struct proc *p, int resource, uint64_t amount)
{
}
void
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
}
uint64_t
racct_get_limit(struct proc *p, int resource)
{
return (UINT64_MAX);
}
void
racct_create(struct racct **racctp)
{
}
void
racct_destroy(struct racct **racctp)
{
}
int
racct_proc_fork(struct proc *parent, struct proc *child)
{
return (0);
}
void
racct_proc_exit(struct proc *p)
{
}
#endif /* !RACCT */

View File

@ -65,6 +65,8 @@ options MAC # TrustedBSD MAC Framework
#options KDTRACE_HOOKS # Kernel DTrace hooks
options INCLUDE_CONFIG_FILE # Include this file in kernel
options RACCT
# Debugging for use in -current
options KDB # Enable kernel debugger support.
options DDB # Support DDB.

View File

@ -2930,6 +2930,9 @@ options AAC_DEBUG # Debugging levels:
# 2 - extremely noisy, emit trace
# items in loops, etc.
# Resource Accounting
options RACCT
# Yet more undocumented options for linting.
# BKTR_ALLOC_PAGES has no effect except to cause warnings, and
# BROOKTREE_ALLOC_PAGES hasn't actually been superseded by it, since the

View File

@ -2225,6 +2225,7 @@ kern/kern_poll.c optional device_polling
kern/kern_priv.c standard
kern/kern_proc.c standard
kern/kern_prot.c standard
kern/kern_racct.c standard
kern/kern_resource.c standard
kern/kern_rmlock.c standard
kern/kern_rwlock.c standard

View File

@ -873,6 +873,9 @@ SDP_DEBUG opt_ofed.h
IPOIB_DEBUG opt_ofed.h
IPOIB_CM opt_ofed.h
# Resource Accounting
RACCT opt_global.h
# At least one of the AR71XX ubiquiti boards has a Redboot configuration
# that "lies" about the amount of RAM it has. Until a cleaner method is
# defined, this option will suffice in overriding what Redboot says.

View File

@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$");
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/proc.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/systm.h>
#include <sys/signalvar.h>
@ -526,6 +527,9 @@ proc0_init(void *dummy __unused)
p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem;
p->p_cpulimit = RLIM_INFINITY;
/* Initialize resource accounting structures. */
racct_create(&p->p_racct);
p->p_stats = pstats_alloc();
/* Allocate a prototype map so we have something to fork. */

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/wait.h>
#include <sys/vmmeter.h>
#include <sys/vnode.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
#include <sys/signalvar.h>
@ -740,6 +741,11 @@ proc_reap(struct thread *td, struct proc *p, int *status, int options,
*/
(void)chgproccnt(p->p_ucred->cr_ruidinfo, -1, 0);
/*
* Destroy resource accounting information associated with the process.
*/
racct_proc_exit(p);
/*
* Free credentials, arguments, and sigacts.
*/

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/pioctl.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/syscall.h>
@ -783,6 +784,21 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
knlist_init_mtx(&newproc->p_klist, &newproc->p_mtx);
STAILQ_INIT(&newproc->p_ktr);
/*
* XXX: This is ugly; when we copy resource usage, we need to bump
* per-cred resource counters.
*/
newproc->p_ucred = p1->p_ucred;
/*
* Initialize resource accounting for the child process.
*/
error = racct_proc_fork(p1, newproc);
if (error != 0) {
error = EAGAIN;
goto fail1;
}
/* We have to lock the process tree while we look for a pid. */
sx_slock(&proctree_lock);
@ -827,6 +843,7 @@ fork1(struct thread *td, int flags, int pages, struct proc **procp)
error = EAGAIN;
fail:
racct_proc_exit(newproc);
sx_sunlock(&proctree_lock);
if (ppsratecheck(&lastfail, &curfail, 1))
printf("maxproc limit exceeded by uid %i, please see tuning(7) and login.conf(5).\n",

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/racct.h>
#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/namei.h>
@ -1195,6 +1196,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
root = mypr->pr_root;
vref(root);
}
racct_create(&pr->pr_racct);
strlcpy(pr->pr_hostuuid, DEFAULT_HOSTUUID, HOSTUUIDLEN);
pr->pr_flags |= PR_HOST;
#if defined(INET) || defined(INET6)
@ -2295,6 +2297,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
newcred->cr_prison = pr;
p->p_ucred = newcred;
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
#endif
crfree(oldcred);
prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
@ -2527,6 +2532,7 @@ prison_deref(struct prison *pr, int flags)
if (pr->pr_cpuset != NULL)
cpuset_rel(pr->pr_cpuset);
osd_jail_exit(pr);
racct_destroy(&pr->pr_racct);
free(pr, M_PRISON);
/* Removing a prison frees a reference on its parent. */
@ -4263,6 +4269,17 @@ SYSCTL_JAIL_PARAM(_allow, quotas, CTLTYPE_INT | CTLFLAG_RW,
SYSCTL_JAIL_PARAM(_allow, socket_af, CTLTYPE_INT | CTLFLAG_RW,
"B", "Jail may create sockets other than just UNIX/IPv4/IPv6/route");
void
prison_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3)
{
struct prison *pr;
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list)
(callback)(pr->pr_racct, arg2, arg3);
sx_sunlock(&allprison_lock);
}
#ifdef DDB

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/queue.h>
#include <sys/racct.h>
#include <sys/refcount.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
@ -90,6 +91,7 @@ loginclass_free(struct loginclass *lc)
mtx_lock(&loginclasses_lock);
if (refcount_release(&lc->lc_refcount)) {
racct_destroy(&lc->lc_racct);
LIST_REMOVE(lc, lc_next);
mtx_unlock(&loginclasses_lock);
free(lc, M_LOGINCLASS);
@ -115,6 +117,7 @@ loginclass_find(const char *name)
return (NULL);
newlc = malloc(sizeof(*newlc), M_LOGINCLASS, M_ZERO | M_WAITOK);
racct_create(&newlc->lc_racct);
mtx_lock(&loginclasses_lock);
LIST_FOREACH(lc, &loginclasses, lc_next) {
@ -124,6 +127,7 @@ loginclass_find(const char *name)
/* Found loginclass with a matching name? */
loginclass_hold(lc);
mtx_unlock(&loginclasses_lock);
racct_destroy(&newlc->lc_racct);
free(newlc, M_LOGINCLASS);
return (lc);
}
@ -205,13 +209,27 @@ setloginclass(struct thread *td, struct setloginclass_args *uap)
newcred->cr_loginclass = newlc;
p->p_ucred = newcred;
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
#endif
loginclass_free(oldcred->cr_loginclass);
crfree(oldcred);
return (0);
}
void
loginclass_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3)
{
struct loginclass *lc;
mtx_lock(&loginclasses_lock);
LIST_FOREACH(lc, &loginclasses, lc_next)
(callback)(lc->lc_racct, arg2, arg3);
mtx_unlock(&loginclasses_lock);
}
static void
lc_init(void)
{

837
sys/kern/kern_racct.c Normal file
View File

@ -0,0 +1,837 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Edward Tomasz Napierala under sponsorship
* from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/eventhandler.h>
#include <sys/param.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/loginclass.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
#include <sys/sched.h>
#include <sys/sdt.h>
#include <sys/sx.h>
#include <sys/sysent.h>
#include <sys/sysproto.h>
#include <sys/systm.h>
#include <sys/umtx.h>
#ifdef RCTL
#include <sys/rctl.h>
#endif
#ifdef RACCT
FEATURE(racct, "Resource Accounting");
static struct mtx racct_lock;
MTX_SYSINIT(racct_lock, &racct_lock, "racct lock", MTX_DEF);
static uma_zone_t racct_zone;
static void racct_sub_racct(struct racct *dest, const struct racct *src);
static void racct_sub_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
static void racct_add_cred_locked(struct ucred *cred, int resource,
uint64_t amount);
SDT_PROVIDER_DEFINE(racct);
SDT_PROBE_DEFINE3(racct, kernel, rusage, add, add, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_failure, add-failure,
"struct proc *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_cred, add-cred, "struct ucred *",
"int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, add_force, add-force, "struct proc *",
"int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, set, set, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, set_failure, set-failure,
"struct proc *", "int", "uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, sub, sub, "struct proc *", "int",
"uint64_t");
SDT_PROBE_DEFINE3(racct, kernel, rusage, sub_cred, sub-cred, "struct ucred *",
"int", "uint64_t");
SDT_PROBE_DEFINE1(racct, kernel, racct, create, create, "struct racct *");
SDT_PROBE_DEFINE1(racct, kernel, racct, destroy, destroy, "struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, join, join, "struct racct *",
"struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, join_failure, join-failure,
"struct racct *", "struct racct *");
SDT_PROBE_DEFINE2(racct, kernel, racct, leave, leave, "struct racct *",
"struct racct *");
int racct_types[] = {
[RACCT_CPU] =
RACCT_IN_THOUSANDS,
[RACCT_FSIZE] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_DATA] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_STACK] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_CORE] =
RACCT_DENIABLE,
[RACCT_RSS] =
RACCT_RECLAIMABLE,
[RACCT_MEMLOCK] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_NPROC] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_NOFILE] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_SBSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_VMEM] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_NPTS] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_SWAP] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NTHR] =
RACCT_RECLAIMABLE | RACCT_DENIABLE,
[RACCT_MSGQQUEUED] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_MSGQSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NMSGQ] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NSEM] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_NSEMOP] =
RACCT_RECLAIMABLE | RACCT_INHERITABLE | RACCT_DENIABLE,
[RACCT_NSHM] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_SHMSIZE] =
RACCT_RECLAIMABLE | RACCT_DENIABLE | RACCT_SLOPPY,
[RACCT_WALLCLOCK] =
RACCT_IN_THOUSANDS };
static void
racct_add_racct(struct racct *dest, const struct racct *src)
{
int i;
mtx_assert(&racct_lock, MA_OWNED);
/*
* Update resource usage in dest.
*/
for (i = 0; i <= RACCT_MAX; i++) {
KASSERT(dest->r_resources[i] >= 0,
("racct propagation meltdown: dest < 0"));
KASSERT(src->r_resources[i] >= 0,
("racct propagation meltdown: src < 0"));
dest->r_resources[i] += src->r_resources[i];
}
}
static void
racct_sub_racct(struct racct *dest, const struct racct *src)
{
int i;
mtx_assert(&racct_lock, MA_OWNED);
/*
* Update resource usage in dest.
*/
for (i = 0; i <= RACCT_MAX; i++) {
if (!racct_is_sloppy(i)) {
KASSERT(dest->r_resources[i] >= 0,
("racct propagation meltdown: dest < 0"));
KASSERT(src->r_resources[i] >= 0,
("racct propagation meltdown: src < 0"));
KASSERT(src->r_resources[i] <= dest->r_resources[i],
("racct propagation meltdown: src > dest"));
}
if (racct_is_reclaimable(i)) {
dest->r_resources[i] -= src->r_resources[i];
if (dest->r_resources[i] < 0) {
KASSERT(racct_is_sloppy(i),
("racct_sub_racct: usage < 0"));
dest->r_resources[i] = 0;
}
}
}
}
void
racct_create(struct racct **racctp)
{
SDT_PROBE(racct, kernel, racct, create, racctp, 0, 0, 0, 0);
KASSERT(*racctp == NULL, ("racct already allocated"));
*racctp = uma_zalloc(racct_zone, M_WAITOK | M_ZERO);
}
static void
racct_destroy_locked(struct racct **racctp)
{
int i;
struct racct *racct;
SDT_PROBE(racct, kernel, racct, destroy, racctp, 0, 0, 0, 0);
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racctp != NULL, ("NULL racctp"));
KASSERT(*racctp != NULL, ("NULL racct"));
racct = *racctp;
for (i = 0; i <= RACCT_MAX; i++) {
if (racct_is_sloppy(i))
continue;
if (!racct_is_reclaimable(i))
continue;
KASSERT(racct->r_resources[i] == 0,
("destroying non-empty racct: "
"%ju allocated for resource %d\n",
racct->r_resources[i], i));
}
uma_zfree(racct_zone, racct);
*racctp = NULL;
}
void
racct_destroy(struct racct **racct)
{
mtx_lock(&racct_lock);
racct_destroy_locked(racct);
mtx_unlock(&racct_lock);
}
/*
* Increase consumption of 'resource' by 'amount' for 'racct'
* and all its parents. Differently from other cases, 'amount' here
* may be less than zero.
*/
static void
racct_alloc_resource(struct racct *racct, int resource,
uint64_t amount)
{
mtx_assert(&racct_lock, MA_OWNED);
KASSERT(racct != NULL, ("NULL racct"));
racct->r_resources[resource] += amount;
if (racct->r_resources[resource] < 0) {
KASSERT(racct_is_sloppy(resource),
("racct_alloc_resource: usage < 0"));
racct->r_resources[resource] = 0;
}
}
/*
* Increase allocation of 'resource' by 'amount' for process 'p'.
* Return 0 if it's below limits, or errno, if it's not.
*/
int
racct_add(struct proc *p, int resource, uint64_t amount)
{
#ifdef RCTL
int error;
#endif
if (p->p_flag & P_SYSTEM)
return (0);
SDT_PROBE(racct, kernel, rusage, add, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_add: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
#ifdef RCTL
error = rctl_enforce(p, resource, amount);
if (error && racct_is_deniable(resource)) {
SDT_PROBE(racct, kernel, rusage, add_failure, p, resource,
amount, 0, 0);
mtx_unlock(&racct_lock);
return (error);
}
#endif
racct_alloc_resource(p->p_racct, resource, amount);
racct_add_cred_locked(p->p_ucred, resource, amount);
mtx_unlock(&racct_lock);
return (0);
}
static void
racct_add_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
SDT_PROBE(racct, kernel, rusage, add_cred, cred, resource, amount,
0, 0);
KASSERT(amount >= 0,
("racct_add_cred: invalid amount for resource %d: %ju",
resource, amount));
racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
racct_alloc_resource(pr->pr_racct, resource, amount);
racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, amount);
}
/*
* Increase allocation of 'resource' by 'amount' for credential 'cred'.
* Doesn't check for limits and never fails.
*
* XXX: Shouldn't this ever return an error?
*/
void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
mtx_lock(&racct_lock);
racct_add_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
}
/*
* Increase allocation of 'resource' by 'amount' for process 'p'.
* Doesn't check for limits and never fails.
*/
void
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, add_force, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0,
("racct_add_force: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
racct_alloc_resource(p->p_racct, resource, amount);
mtx_unlock(&racct_lock);
racct_add_cred(p->p_ucred, resource, amount);
}
static int
racct_set_locked(struct proc *p, int resource, uint64_t amount)
{
int64_t diff;
#ifdef RCTL
int error;
#endif
if (p->p_flag & P_SYSTEM)
return (0);
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_set: invalid amount for resource %d: %ju",
resource, amount));
diff = amount - p->p_racct->r_resources[resource];
#ifdef notyet
KASSERT(diff >= 0 || racct_is_reclaimable(resource),
("racct_set: usage of non-reclaimable resource %d dropping",
resource));
#endif
#ifdef RCTL
if (diff > 0) {
error = rctl_enforce(p, resource, diff);
if (error && racct_is_deniable(resource)) {
SDT_PROBE(racct, kernel, rusage, set_failure, p,
resource, amount, 0, 0);
return (error);
}
}
#endif
racct_alloc_resource(p->p_racct, resource, diff);
if (diff > 0)
racct_add_cred_locked(p->p_ucred, resource, diff);
else if (diff < 0)
racct_sub_cred_locked(p->p_ucred, resource, -diff);
return (0);
}
/*
* Set allocation of 'resource' to 'amount' for process 'p'.
* Return 0 if it's below limits, or errno, if it's not.
*
* Note that decreasing the allocation always returns 0,
* even if it's above the limit.
*/
int
racct_set(struct proc *p, int resource, uint64_t amount)
{
int error;
mtx_lock(&racct_lock);
error = racct_set_locked(p, resource, amount);
mtx_unlock(&racct_lock);
return (error);
}
void
racct_set_force(struct proc *p, int resource, uint64_t amount)
{
int64_t diff;
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, set, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0,
("racct_set_force: invalid amount for resource %d: %ju",
resource, amount));
mtx_lock(&racct_lock);
diff = amount - p->p_racct->r_resources[resource];
racct_alloc_resource(p->p_racct, resource, diff);
if (diff > 0)
racct_add_cred_locked(p->p_ucred, resource, diff);
else if (diff < 0)
racct_sub_cred_locked(p->p_ucred, resource, -diff);
mtx_unlock(&racct_lock);
}
/*
* Returns amount of 'resource' the process 'p' can keep allocated.
* Allocating more than that would be denied, unless the resource
* is marked undeniable. Amount of already allocated resource does
* not matter.
*/
uint64_t
racct_get_limit(struct proc *p, int resource)
{
#ifdef RCTL
return (rctl_get_limit(p, resource));
#else
return (UINT64_MAX);
#endif
}
/*
* Returns amount of 'resource' the process 'p' can keep allocated.
* Allocating more than that would be denied, unless the resource
* is marked undeniable. Amount of already allocated resource does
* matter.
*/
uint64_t
racct_get_available(struct proc *p, int resource)
{
#ifdef RCTL
return (rctl_get_available(p, resource));
#else
return (UINT64_MAX);
#endif
}
/*
* Decrease allocation of 'resource' by 'amount' for process 'p'.
*/
void
racct_sub(struct proc *p, int resource, uint64_t amount)
{
if (p->p_flag & P_SYSTEM)
return;
SDT_PROBE(racct, kernel, rusage, sub, p, resource, amount, 0, 0);
/*
* We need proc lock to dereference p->p_ucred.
*/
PROC_LOCK_ASSERT(p, MA_OWNED);
KASSERT(amount >= 0, ("racct_sub: invalid amount for resource %d: %ju",
resource, amount));
KASSERT(racct_is_reclaimable(resource),
("racct_sub: called for non-reclaimable resource %d", resource));
mtx_lock(&racct_lock);
KASSERT(amount <= p->p_racct->r_resources[resource],
("racct_sub: freeing %ju of resource %d, which is more "
"than allocated %jd for %s (pid %d)", amount, resource,
(intmax_t)p->p_racct->r_resources[resource], p->p_comm, p->p_pid));
racct_alloc_resource(p->p_racct, resource, -amount);
racct_sub_cred_locked(p->p_ucred, resource, amount);
mtx_unlock(&racct_lock);
}
static void
racct_sub_cred_locked(struct ucred *cred, int resource, uint64_t amount)
{
struct prison *pr;
SDT_PROBE(racct, kernel, rusage, sub_cred, cred, resource, amount,
0, 0);
KASSERT(amount >= 0,
("racct_sub_cred: invalid amount for resource %d: %ju",
resource, amount));
#ifdef notyet
KASSERT(racct_is_reclaimable(resource),
("racct_sub_cred: called for non-reclaimable resource %d",
resource));
#endif
racct_alloc_resource(cred->cr_ruidinfo->ui_racct, resource, -amount);
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent)
racct_alloc_resource(pr->pr_racct, resource, -amount);
racct_alloc_resource(cred->cr_loginclass->lc_racct, resource, -amount);
}
/*
* Decrease allocation of 'resource' by 'amount' for credential 'cred'.
*/
void
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
mtx_lock(&racct_lock);
racct_sub_cred_locked(cred, resource, amount);
mtx_unlock(&racct_lock);
}
/*
* Inherit resource usage information from the parent process.
*/
int
racct_proc_fork(struct proc *parent, struct proc *child)
{
int i, error = 0;
/*
* Create racct for the child process.
*/
racct_create(&child->p_racct);
/*
* No resource accounting for kernel processes.
*/
if (child->p_flag & P_SYSTEM)
return (0);
PROC_LOCK(parent);
PROC_LOCK(child);
mtx_lock(&racct_lock);
/*
* Inherit resource usage.
*/
for (i = 0; i <= RACCT_MAX; i++) {
if (parent->p_racct->r_resources[i] == 0 ||
!racct_is_inheritable(i))
continue;
error = racct_set_locked(child, i,
parent->p_racct->r_resources[i]);
if (error != 0) {
/*
* XXX: The only purpose of these two lines is
* to prevent from tripping checks in racct_destroy().
*/
for (i = 0; i <= RACCT_MAX; i++)
racct_set_locked(child, i, 0);
goto out;
}
}
#ifdef RCTL
error = rctl_proc_fork(parent, child);
if (error != 0) {
/*
* XXX: The only purpose of these two lines is to prevent from
* tripping checks in racct_destroy().
*/
for (i = 0; i <= RACCT_MAX; i++)
racct_set_locked(child, i, 0);
}
#endif
out:
if (error != 0)
racct_destroy_locked(&child->p_racct);
mtx_unlock(&racct_lock);
PROC_UNLOCK(child);
PROC_UNLOCK(parent);
return (error);
}
void
racct_proc_exit(struct proc *p)
{
uint64_t runtime;
PROC_LOCK(p);
/*
* We don't need to calculate rux, proc_reap() has already done this.
*/
runtime = cputick2usec(p->p_rux.rux_runtime);
#ifdef notyet
KASSERT(runtime >= p->p_prev_runtime, ("runtime < p_prev_runtime"));
#else
if (runtime < p->p_prev_runtime)
runtime = p->p_prev_runtime;
#endif
racct_set(p, RACCT_CPU, runtime);
/*
* XXX: Free this some other way.
*/
racct_set(p, RACCT_FSIZE, 0);
racct_set(p, RACCT_NPTS, 0);
racct_set(p, RACCT_NTHR, 0);
racct_set(p, RACCT_RSS, 0);
PROC_UNLOCK(p);
#ifdef RCTL
rctl_racct_release(p->p_racct);
#endif
racct_destroy(&p->p_racct);
}
/*
* Called after credentials change, to move resource utilisation
* between raccts.
*/
void
racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
struct ucred *newcred)
{
struct uidinfo *olduip, *newuip;
struct loginclass *oldlc, *newlc;
struct prison *oldpr, *newpr, *pr;
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
newuip = newcred->cr_ruidinfo;
olduip = oldcred->cr_ruidinfo;
newlc = newcred->cr_loginclass;
oldlc = oldcred->cr_loginclass;
newpr = newcred->cr_prison;
oldpr = oldcred->cr_prison;
mtx_lock(&racct_lock);
if (newuip != olduip) {
racct_sub_racct(olduip->ui_racct, p->p_racct);
racct_add_racct(newuip->ui_racct, p->p_racct);
}
if (newlc != oldlc) {
racct_sub_racct(oldlc->lc_racct, p->p_racct);
racct_add_racct(newlc->lc_racct, p->p_racct);
}
if (newpr != oldpr) {
for (pr = oldpr; pr != NULL; pr = pr->pr_parent)
racct_sub_racct(pr->pr_racct, p->p_racct);
for (pr = newpr; pr != NULL; pr = pr->pr_parent)
racct_add_racct(pr->pr_racct, p->p_racct);
}
mtx_unlock(&racct_lock);
#ifdef RCTL
rctl_proc_ucred_changed(p, newcred);
#endif
}
static void
racctd(void)
{
struct thread *td;
struct proc *p;
struct timeval wallclock;
uint64_t runtime;
for (;;) {
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
if (p->p_state != PRS_NORMAL)
continue;
if (p->p_flag & P_SYSTEM)
continue;
microuptime(&wallclock);
timevalsub(&wallclock, &p->p_stats->p_start);
PROC_LOCK(p);
PROC_SLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
ruxagg(p, td);
thread_lock(td);
thread_unlock(td);
}
runtime = cputick2usec(p->p_rux.rux_runtime);
PROC_SUNLOCK(p);
#ifdef notyet
KASSERT(runtime >= p->p_prev_runtime,
("runtime < p_prev_runtime"));
#else
if (runtime < p->p_prev_runtime)
runtime = p->p_prev_runtime;
#endif
p->p_prev_runtime = runtime;
mtx_lock(&racct_lock);
racct_set_locked(p, RACCT_CPU, runtime);
racct_set_locked(p, RACCT_WALLCLOCK,
wallclock.tv_sec * 1000000 + wallclock.tv_usec);
mtx_unlock(&racct_lock);
PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
pause("-", hz);
}
}
static struct kproc_desc racctd_kp = {
"racctd",
racctd,
NULL
};
SYSINIT(racctd, SI_SUB_RACCTD, SI_ORDER_FIRST, kproc_start, &racctd_kp);
static void
racct_init(void)
{
racct_zone = uma_zcreate("racct", sizeof(struct racct),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
/*
* XXX: Move this somewhere.
*/
racct_create(&prison0.pr_racct);
}
SYSINIT(racct, SI_SUB_RACCT, SI_ORDER_FIRST, racct_init, NULL);
#else /* !RACCT */
int
racct_add(struct proc *p, int resource, uint64_t amount)
{
return (0);
}
void
racct_add_cred(struct ucred *cred, int resource, uint64_t amount)
{
}
void
racct_add_force(struct proc *p, int resource, uint64_t amount)
{
return;
}
int
racct_set(struct proc *p, int resource, uint64_t amount)
{
return (0);
}
void
racct_sub(struct proc *p, int resource, uint64_t amount)
{
}
void
racct_sub_cred(struct ucred *cred, int resource, uint64_t amount)
{
}
uint64_t
racct_get_limit(struct proc *p, int resource)
{
return (UINT64_MAX);
}
void
racct_create(struct racct **racctp)
{
}
void
racct_destroy(struct racct **racctp)
{
}
int
racct_proc_fork(struct proc *parent, struct proc *child)
{
return (0);
}
void
racct_proc_exit(struct proc *p)
{
}
#endif /* !RACCT */

View File

@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/refcount.h>
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sched.h>
@ -1201,6 +1202,7 @@ uifind(uid)
if (uip == NULL) {
rw_runlock(&uihashtbl_lock);
uip = malloc(sizeof(*uip), M_UIDINFO, M_WAITOK | M_ZERO);
racct_create(&uip->ui_racct);
rw_wlock(&uihashtbl_lock);
/*
* There's a chance someone created our uidinfo while we
@ -1209,6 +1211,7 @@ uifind(uid)
*/
if ((old_uip = uilookup(uid)) != NULL) {
/* Someone else beat us to it. */
racct_destroy(&uip->ui_racct);
free(uip, M_UIDINFO);
uip = old_uip;
} else {
@ -1264,6 +1267,7 @@ uifree(uip)
/* Prepare for suboptimal case. */
rw_wlock(&uihashtbl_lock);
if (refcount_release(&uip->ui_ref)) {
racct_destroy(&uip->ui_racct);
LIST_REMOVE(uip, ui_hash);
rw_wunlock(&uihashtbl_lock);
if (uip->ui_sbsize != 0)
@ -1286,6 +1290,22 @@ uifree(uip)
rw_wunlock(&uihashtbl_lock);
}
void
ui_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3)
{
struct uidinfo *uip;
struct uihashhead *uih;
rw_rlock(&uihashtbl_lock);
for (uih = &uihashtbl[uihash]; uih >= uihashtbl; uih--) {
LIST_FOREACH(uip, uih, ui_hash) {
(callback)(uip->ui_racct, arg2, arg3);
}
}
rw_runlock(&uihashtbl_lock);
}
/*
* Change the count associated with number of processes
* a given user is using. When 'max' is 0, don't enforce a limit

View File

@ -135,6 +135,8 @@ MALLOC_DECLARE(M_PRISON);
#define HOSTUUIDLEN 64
struct racct;
/*
* This structure describes a prison. It is pointed to by all struct
* ucreds's of the inmates. pr_ref keeps track of them and is used to
@ -166,7 +168,8 @@ struct prison {
int pr_ip6s; /* (p) number of v6 IPs */
struct in_addr *pr_ip4; /* (p) v4 IPs of jail */
struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */
void *pr_sparep[4];
struct racct *pr_racct; /* (c) resource accounting */
void *pr_sparep[3];
int pr_childcount; /* (a) number of child jails */
int pr_childmax; /* (p) maximum child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
@ -380,6 +383,8 @@ int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(SYSCTL_HANDLER_ARGS);
void prison_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_JAIL_H_ */

View File

@ -109,6 +109,7 @@ enum sysinit_sub_id {
SI_SUB_VNET_PRELINK = 0x1E00000, /* vnet init before modules */
SI_SUB_KLD = 0x2000000, /* KLD and module setup */
SI_SUB_CPU = 0x2100000, /* CPU resource(s)*/
SI_SUB_RACCT = 0x2110000, /* resource accounting */
SI_SUB_RANDOM = 0x2120000, /* random number generator */
SI_SUB_KDTRACE = 0x2140000, /* Kernel dtrace hooks */
SI_SUB_MAC = 0x2180000, /* TrustedBSD MAC subsystem */
@ -169,6 +170,7 @@ enum sysinit_sub_id {
SI_SUB_KTHREAD_UPDATE = 0xec00000, /* update daemon*/
SI_SUB_KTHREAD_IDLE = 0xee00000, /* idle procs*/
SI_SUB_SMP = 0xf000000, /* start the APs*/
SI_SUB_RACCTD = 0xf100000, /* start raccd*/
SI_SUB_RUN_SCHEDULER = 0xfffffff /* scheduler*/
};

View File

@ -32,6 +32,8 @@
#ifndef _SYS_LOGINCLASS_H_
#define _SYS_LOGINCLASS_H_
struct racct;
/*
* Exactly one of these structures exists per login class.
*/
@ -39,11 +41,13 @@ struct loginclass {
LIST_ENTRY(loginclass) lc_next;
char lc_name[MAXLOGNAME];
u_int lc_refcount;
struct racct *lc_racct;
};
void loginclass_hold(struct loginclass *lc);
void loginclass_free(struct loginclass *lc);
struct loginclass *loginclass_find(const char *name);
void loginclass_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* !_SYS_LOGINCLASS_H_ */

View File

@ -157,6 +157,7 @@ struct pargs {
* either lock is sufficient for read access, but both locks must be held
* for write access.
*/
struct racct;
struct kaudit_record;
struct td_sched;
struct nlminfo;
@ -566,6 +567,8 @@ struct proc {
struct cv p_pwait; /* (*) wait cv for exit/exec. */
struct cv p_dbgwait; /* (*) wait cv for debugger attach
after fork. */
uint64_t p_prev_runtime; /* (c) Resource usage accounting. */
struct racct *p_racct; /* (b) Resource accounting. */
};
#define p_session p_pgrp->pg_session

147
sys/sys/racct.h Normal file
View File

@ -0,0 +1,147 @@
/*-
* Copyright (c) 2010 The FreeBSD Foundation
* All rights reserved.
*
* This software was developed by Edward Tomasz Napierala under sponsorship
* from the FreeBSD Foundation.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Resource accounting.
*/
#ifndef _RACCT_H_
#define _RACCT_H_
#include <sys/cdefs.h>
#include <sys/queue.h>
#include <sys/types.h>
struct proc;
struct rctl_rule_link;
struct ucred;
/*
* Resources.
*/
#define RACCT_UNDEFINED -1
#define RACCT_CPU 0
#define RACCT_FSIZE 1
#define RACCT_DATA 2
#define RACCT_STACK 3
#define RACCT_CORE 4
#define RACCT_RSS 5
#define RACCT_MEMLOCK 6
#define RACCT_NPROC 7
#define RACCT_NOFILE 8
#define RACCT_SBSIZE 9
#define RACCT_VMEM 10
#define RACCT_NPTS 11
#define RACCT_SWAP 12
#define RACCT_NTHR 13
#define RACCT_MSGQQUEUED 14
#define RACCT_MSGQSIZE 15
#define RACCT_NMSGQ 16
#define RACCT_NSEM 17
#define RACCT_NSEMOP 18
#define RACCT_NSHM 19
#define RACCT_SHMSIZE 20
#define RACCT_WALLCLOCK 21
#define RACCT_MAX RACCT_WALLCLOCK
/*
* Resource properties.
*/
#define RACCT_IN_THOUSANDS 0x01
#define RACCT_RECLAIMABLE 0x02
#define RACCT_INHERITABLE 0x04
#define RACCT_DENIABLE 0x08
#define RACCT_SLOPPY 0x10
extern int racct_types[];
/*
* Amount stored in c_resources[] is thousand times bigger than what's
* visible to the userland. It gets fixed up when retrieving resource
* usage or adding rules.
*/
#define racct_is_in_thousands(X) (racct_types[X] & RACCT_IN_THOUSANDS)
/*
* Resource usage can drop, as opposed to only grow.
*/
#define racct_is_reclaimable(X) (racct_types[X] & RACCT_RECLAIMABLE)
/*
* Children inherit resource usage.
*/
#define racct_is_inheritable(X) (racct_types[X] & RACCT_INHERITABLE)
/*
* racct_{add,set}(9) can actually return an error and not update resource
* usage counters. Note that even when resource is not deniable, allocating
* resource might cause signals to be sent by RCTL code.
*/
#define racct_is_deniable(X) (racct_types[X] & RACCT_DENIABLE)
/*
* Per-process resource usage information makes no sense, but per-credential
* one does. This kind of resources are usually allocated for process, but
* freed using credentials.
*/
#define racct_is_sloppy(X) (racct_types[X] & RACCT_SLOPPY)
/*
* The 'racct' structure defines resource consumption for a particular
* subject, such as process or jail.
*
* This structure must be filled with zeroes initially.
*/
struct racct {
int64_t r_resources[RACCT_MAX + 1];
LIST_HEAD(, rctl_rule_link) r_rule_links;
};
int racct_add(struct proc *p, int resource, uint64_t amount);
void racct_add_cred(struct ucred *cred, int resource, uint64_t amount);
void racct_add_force(struct proc *p, int resource, uint64_t amount);
int racct_set(struct proc *p, int resource, uint64_t amount);
void racct_set_force(struct proc *p, int resource, uint64_t amount);
void racct_sub(struct proc *p, int resource, uint64_t amount);
void racct_sub_cred(struct ucred *cred, int resource, uint64_t amount);
uint64_t racct_get_limit(struct proc *p, int resource);
uint64_t racct_get_available(struct proc *p, int resource);
void racct_create(struct racct **racctp);
void racct_destroy(struct racct **racctp);
int racct_proc_fork(struct proc *parent, struct proc *child);
void racct_proc_exit(struct proc *p);
void racct_proc_ucred_changed(struct proc *p, struct ucred *oldcred,
struct ucred *newcred);
#endif /* !_RACCT_H_ */

View File

@ -79,6 +79,8 @@ struct plimit {
int pl_refcnt; /* number of references */
};
struct racct;
/*-
* Per uid resource consumption. This structure is used to track
* the total resource consumption (process count, socket buffer size,
@ -99,6 +101,7 @@ struct uidinfo {
long ui_ptscnt; /* (b) number of pseudo-terminals */
uid_t ui_uid; /* (a) uid */
u_int ui_ref; /* (b) reference count */
struct racct *ui_racct; /* (a) resource accounting */
};
#define UIDINFO_VMSIZE_LOCK(ui) mtx_lock(&((ui)->ui_vmsize_mtx))
@ -140,6 +143,8 @@ struct uidinfo
void uifree(struct uidinfo *uip);
void uihashinit(void);
void uihold(struct uidinfo *uip);
void ui_racct_foreach(void (*callback)(struct racct *racct,
void *arg2, void *arg3), void *arg2, void *arg3);
#endif /* _KERNEL */
#endif /* !_SYS_RESOURCEVAR_H_ */