e8a5a1ad71
For rate-based resources that support throttling (e.g. readiops/writeips), this fixes a divide-by-zero panic when rctl(8) passes 0 as the throttle value. For these resources, treat zero-throttle requests as requests to suspend forward progress as long as possible using the duration specified in kern.racct.rctl.throttle_max. PR: 251803 Reported by: chris@cretaforce.gr Reviewed by: kib MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D27858
2248 lines
54 KiB
C
2248 lines
54 KiB
C
/*-
|
|
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
|
|
*
|
|
* Copyright (c) 2010 The FreeBSD Foundation
|
|
*
|
|
* This software was developed by Edward Tomasz Napierala under sponsorship
|
|
* from the FreeBSD Foundation.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/devctl.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/refcount.h>
|
|
#include <sys/jail.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/limits.h>
|
|
#include <sys/loginclass.h>
|
|
#include <sys/priv.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/racct.h>
|
|
#include <sys/rctl.h>
|
|
#include <sys/resourcevar.h>
|
|
#include <sys/sx.h>
|
|
#include <sys/sysent.h>
|
|
#include <sys/sysproto.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/types.h>
|
|
#include <sys/eventhandler.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/rwlock.h>
|
|
#include <sys/sbuf.h>
|
|
#include <sys/taskqueue.h>
|
|
#include <sys/tree.h>
|
|
#include <vm/uma.h>
|
|
|
|
#ifdef RCTL
|
|
#ifndef RACCT
|
|
#error "The RCTL option requires the RACCT option"
|
|
#endif
|
|
|
|
FEATURE(rctl, "Resource Limits");
|
|
|
|
#define HRF_DEFAULT 0
|
|
#define HRF_DONT_INHERIT 1
|
|
#define HRF_DONT_ACCUMULATE 2
|
|
|
|
#define RCTL_MAX_INBUFSIZE 4 * 1024
|
|
#define RCTL_MAX_OUTBUFSIZE 16 * 1024 * 1024
|
|
#define RCTL_LOG_BUFSIZE 128
|
|
|
|
#define RCTL_PCPU_SHIFT (10 * 1000000)
|
|
|
|
static unsigned int rctl_maxbufsize = RCTL_MAX_OUTBUFSIZE;
|
|
static int rctl_log_rate_limit = 10;
|
|
static int rctl_devctl_rate_limit = 10;
|
|
|
|
/*
|
|
* Values below are initialized in rctl_init().
|
|
*/
|
|
static int rctl_throttle_min = -1;
|
|
static int rctl_throttle_max = -1;
|
|
static int rctl_throttle_pct = -1;
|
|
static int rctl_throttle_pct2 = -1;
|
|
|
|
static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS);
|
|
static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS);
|
|
static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS);
|
|
static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS);
|
|
|
|
SYSCTL_NODE(_kern_racct, OID_AUTO, rctl, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
|
|
"Resource Limits");
|
|
SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, maxbufsize, CTLFLAG_RWTUN,
|
|
&rctl_maxbufsize, 0, "Maximum output buffer size");
|
|
SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, log_rate_limit, CTLFLAG_RW,
|
|
&rctl_log_rate_limit, 0, "Maximum number of log messages per second");
|
|
SYSCTL_UINT(_kern_racct_rctl, OID_AUTO, devctl_rate_limit, CTLFLAG_RWTUN,
|
|
&rctl_devctl_rate_limit, 0, "Maximum number of devctl messages per second");
|
|
SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_min,
|
|
CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
|
|
&rctl_throttle_min_sysctl, "IU",
|
|
"Shortest throttling duration, in hz");
|
|
TUNABLE_INT("kern.racct.rctl.throttle_min", &rctl_throttle_min);
|
|
SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_max,
|
|
CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
|
|
&rctl_throttle_max_sysctl, "IU",
|
|
"Longest throttling duration, in hz");
|
|
TUNABLE_INT("kern.racct.rctl.throttle_max", &rctl_throttle_max);
|
|
SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct,
|
|
CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
|
|
&rctl_throttle_pct_sysctl, "IU",
|
|
"Throttling penalty for process consumption, in percent");
|
|
TUNABLE_INT("kern.racct.rctl.throttle_pct", &rctl_throttle_pct);
|
|
SYSCTL_PROC(_kern_racct_rctl, OID_AUTO, throttle_pct2,
|
|
CTLTYPE_UINT | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, 0, 0,
|
|
&rctl_throttle_pct2_sysctl, "IU",
|
|
"Throttling penalty for container consumption, in percent");
|
|
TUNABLE_INT("kern.racct.rctl.throttle_pct2", &rctl_throttle_pct2);
|
|
|
|
/*
|
|
* 'rctl_rule_link' connects a rule with every racct it's related to.
|
|
* For example, rule 'user:X:openfiles:deny=N/process' is linked
|
|
* with uidinfo for user X, and to each process of that user.
|
|
*/
|
|
struct rctl_rule_link {
|
|
LIST_ENTRY(rctl_rule_link) rrl_next;
|
|
struct rctl_rule *rrl_rule;
|
|
int rrl_exceeded;
|
|
};
|
|
|
|
struct dict {
|
|
const char *d_name;
|
|
int d_value;
|
|
};
|
|
|
|
static struct dict subjectnames[] = {
|
|
{ "process", RCTL_SUBJECT_TYPE_PROCESS },
|
|
{ "user", RCTL_SUBJECT_TYPE_USER },
|
|
{ "loginclass", RCTL_SUBJECT_TYPE_LOGINCLASS },
|
|
{ "jail", RCTL_SUBJECT_TYPE_JAIL },
|
|
{ NULL, -1 }};
|
|
|
|
static struct dict resourcenames[] = {
|
|
{ "cputime", RACCT_CPU },
|
|
{ "datasize", RACCT_DATA },
|
|
{ "stacksize", RACCT_STACK },
|
|
{ "coredumpsize", RACCT_CORE },
|
|
{ "memoryuse", RACCT_RSS },
|
|
{ "memorylocked", RACCT_MEMLOCK },
|
|
{ "maxproc", RACCT_NPROC },
|
|
{ "openfiles", RACCT_NOFILE },
|
|
{ "vmemoryuse", RACCT_VMEM },
|
|
{ "pseudoterminals", RACCT_NPTS },
|
|
{ "swapuse", RACCT_SWAP },
|
|
{ "nthr", RACCT_NTHR },
|
|
{ "msgqqueued", RACCT_MSGQQUEUED },
|
|
{ "msgqsize", RACCT_MSGQSIZE },
|
|
{ "nmsgq", RACCT_NMSGQ },
|
|
{ "nsem", RACCT_NSEM },
|
|
{ "nsemop", RACCT_NSEMOP },
|
|
{ "nshm", RACCT_NSHM },
|
|
{ "shmsize", RACCT_SHMSIZE },
|
|
{ "wallclock", RACCT_WALLCLOCK },
|
|
{ "pcpu", RACCT_PCTCPU },
|
|
{ "readbps", RACCT_READBPS },
|
|
{ "writebps", RACCT_WRITEBPS },
|
|
{ "readiops", RACCT_READIOPS },
|
|
{ "writeiops", RACCT_WRITEIOPS },
|
|
{ NULL, -1 }};
|
|
|
|
static struct dict actionnames[] = {
|
|
{ "sighup", RCTL_ACTION_SIGHUP },
|
|
{ "sigint", RCTL_ACTION_SIGINT },
|
|
{ "sigquit", RCTL_ACTION_SIGQUIT },
|
|
{ "sigill", RCTL_ACTION_SIGILL },
|
|
{ "sigtrap", RCTL_ACTION_SIGTRAP },
|
|
{ "sigabrt", RCTL_ACTION_SIGABRT },
|
|
{ "sigemt", RCTL_ACTION_SIGEMT },
|
|
{ "sigfpe", RCTL_ACTION_SIGFPE },
|
|
{ "sigkill", RCTL_ACTION_SIGKILL },
|
|
{ "sigbus", RCTL_ACTION_SIGBUS },
|
|
{ "sigsegv", RCTL_ACTION_SIGSEGV },
|
|
{ "sigsys", RCTL_ACTION_SIGSYS },
|
|
{ "sigpipe", RCTL_ACTION_SIGPIPE },
|
|
{ "sigalrm", RCTL_ACTION_SIGALRM },
|
|
{ "sigterm", RCTL_ACTION_SIGTERM },
|
|
{ "sigurg", RCTL_ACTION_SIGURG },
|
|
{ "sigstop", RCTL_ACTION_SIGSTOP },
|
|
{ "sigtstp", RCTL_ACTION_SIGTSTP },
|
|
{ "sigchld", RCTL_ACTION_SIGCHLD },
|
|
{ "sigttin", RCTL_ACTION_SIGTTIN },
|
|
{ "sigttou", RCTL_ACTION_SIGTTOU },
|
|
{ "sigio", RCTL_ACTION_SIGIO },
|
|
{ "sigxcpu", RCTL_ACTION_SIGXCPU },
|
|
{ "sigxfsz", RCTL_ACTION_SIGXFSZ },
|
|
{ "sigvtalrm", RCTL_ACTION_SIGVTALRM },
|
|
{ "sigprof", RCTL_ACTION_SIGPROF },
|
|
{ "sigwinch", RCTL_ACTION_SIGWINCH },
|
|
{ "siginfo", RCTL_ACTION_SIGINFO },
|
|
{ "sigusr1", RCTL_ACTION_SIGUSR1 },
|
|
{ "sigusr2", RCTL_ACTION_SIGUSR2 },
|
|
{ "sigthr", RCTL_ACTION_SIGTHR },
|
|
{ "deny", RCTL_ACTION_DENY },
|
|
{ "log", RCTL_ACTION_LOG },
|
|
{ "devctl", RCTL_ACTION_DEVCTL },
|
|
{ "throttle", RCTL_ACTION_THROTTLE },
|
|
{ NULL, -1 }};
|
|
|
|
static void rctl_init(void);
|
|
SYSINIT(rctl, SI_SUB_RACCT, SI_ORDER_FIRST, rctl_init, NULL);
|
|
|
|
static uma_zone_t rctl_rule_zone;
|
|
static uma_zone_t rctl_rule_link_zone;
|
|
|
|
static int rctl_rule_fully_specified(const struct rctl_rule *rule);
|
|
static void rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule);
|
|
|
|
static MALLOC_DEFINE(M_RCTL, "rctl", "Resource Limits");
|
|
|
|
static int rctl_throttle_min_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
int error, val = rctl_throttle_min;
|
|
|
|
error = sysctl_handle_int(oidp, &val, 0, req);
|
|
if (error || !req->newptr)
|
|
return (error);
|
|
if (val < 1 || val > rctl_throttle_max)
|
|
return (EINVAL);
|
|
|
|
RACCT_LOCK();
|
|
rctl_throttle_min = val;
|
|
RACCT_UNLOCK();
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int rctl_throttle_max_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
int error, val = rctl_throttle_max;
|
|
|
|
error = sysctl_handle_int(oidp, &val, 0, req);
|
|
if (error || !req->newptr)
|
|
return (error);
|
|
if (val < rctl_throttle_min)
|
|
return (EINVAL);
|
|
|
|
RACCT_LOCK();
|
|
rctl_throttle_max = val;
|
|
RACCT_UNLOCK();
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int rctl_throttle_pct_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
int error, val = rctl_throttle_pct;
|
|
|
|
error = sysctl_handle_int(oidp, &val, 0, req);
|
|
if (error || !req->newptr)
|
|
return (error);
|
|
if (val < 0)
|
|
return (EINVAL);
|
|
|
|
RACCT_LOCK();
|
|
rctl_throttle_pct = val;
|
|
RACCT_UNLOCK();
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int rctl_throttle_pct2_sysctl(SYSCTL_HANDLER_ARGS)
|
|
{
|
|
int error, val = rctl_throttle_pct2;
|
|
|
|
error = sysctl_handle_int(oidp, &val, 0, req);
|
|
if (error || !req->newptr)
|
|
return (error);
|
|
if (val < 0)
|
|
return (EINVAL);
|
|
|
|
RACCT_LOCK();
|
|
rctl_throttle_pct2 = val;
|
|
RACCT_UNLOCK();
|
|
|
|
return (0);
|
|
}
|
|
|
|
static const char *
|
|
rctl_subject_type_name(int subject)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; subjectnames[i].d_name != NULL; i++) {
|
|
if (subjectnames[i].d_value == subject)
|
|
return (subjectnames[i].d_name);
|
|
}
|
|
|
|
panic("rctl_subject_type_name: unknown subject type %d", subject);
|
|
}
|
|
|
|
static const char *
|
|
rctl_action_name(int action)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; actionnames[i].d_name != NULL; i++) {
|
|
if (actionnames[i].d_value == action)
|
|
return (actionnames[i].d_name);
|
|
}
|
|
|
|
panic("rctl_action_name: unknown action %d", action);
|
|
}
|
|
|
|
const char *
|
|
rctl_resource_name(int resource)
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; resourcenames[i].d_name != NULL; i++) {
|
|
if (resourcenames[i].d_value == resource)
|
|
return (resourcenames[i].d_name);
|
|
}
|
|
|
|
panic("rctl_resource_name: unknown resource %d", resource);
|
|
}
|
|
|
|
static struct racct *
|
|
rctl_proc_rule_to_racct(const struct proc *p, const struct rctl_rule *rule)
|
|
{
|
|
struct ucred *cred = p->p_ucred;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
switch (rule->rr_per) {
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
return (p->p_racct);
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
return (cred->cr_ruidinfo->ui_racct);
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
return (cred->cr_loginclass->lc_racct);
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
return (cred->cr_prison->pr_prison_racct->prr_racct);
|
|
default:
|
|
panic("%s: unknown per %d", __func__, rule->rr_per);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return the amount of resource that can be allocated by 'p' before
|
|
* hitting 'rule'.
|
|
*/
|
|
static int64_t
|
|
rctl_available_resource(const struct proc *p, const struct rctl_rule *rule)
|
|
{
|
|
const struct racct *racct;
|
|
int64_t available;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
racct = rctl_proc_rule_to_racct(p, rule);
|
|
available = rule->rr_amount - racct->r_resources[rule->rr_resource];
|
|
|
|
return (available);
|
|
}
|
|
|
|
/*
|
|
* Called every second for proc, uidinfo, loginclass, and jail containers.
|
|
* If the limit isn't exceeded, it decreases the usage amount to zero.
|
|
* Otherwise, it decreases it by the value of the limit. This way
|
|
* resource consumption exceeding the limit "carries over" to the next
|
|
* period.
|
|
*/
|
|
void
|
|
rctl_throttle_decay(struct racct *racct, int resource)
|
|
{
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
int64_t minavailable;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
minavailable = INT64_MAX;
|
|
|
|
LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
|
|
rule = link->rrl_rule;
|
|
|
|
if (rule->rr_resource != resource)
|
|
continue;
|
|
if (rule->rr_action != RCTL_ACTION_THROTTLE)
|
|
continue;
|
|
|
|
if (rule->rr_amount < minavailable)
|
|
minavailable = rule->rr_amount;
|
|
}
|
|
|
|
if (racct->r_resources[resource] < minavailable) {
|
|
racct->r_resources[resource] = 0;
|
|
} else {
|
|
/*
|
|
* Cap utilization counter at ten times the limit. Otherwise,
|
|
* if we changed the rule lowering the allowed amount, it could
|
|
* take unreasonably long time for the accumulated resource
|
|
* usage to drop.
|
|
*/
|
|
if (racct->r_resources[resource] > minavailable * 10)
|
|
racct->r_resources[resource] = minavailable * 10;
|
|
|
|
racct->r_resources[resource] -= minavailable;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Special version of rctl_get_available() for the %CPU resource.
|
|
* We slightly cheat here and return less than we normally would.
|
|
*/
|
|
int64_t
|
|
rctl_pcpu_available(const struct proc *p) {
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
int64_t available, minavailable, limit;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
minavailable = INT64_MAX;
|
|
limit = 0;
|
|
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
rule = link->rrl_rule;
|
|
if (rule->rr_resource != RACCT_PCTCPU)
|
|
continue;
|
|
if (rule->rr_action != RCTL_ACTION_DENY)
|
|
continue;
|
|
available = rctl_available_resource(p, rule);
|
|
if (available < minavailable) {
|
|
minavailable = available;
|
|
limit = rule->rr_amount;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Return slightly less than actual value of the available
|
|
* %cpu resource. This makes %cpu throttling more aggressive
|
|
* and lets us act sooner than the limits are already exceeded.
|
|
*/
|
|
if (limit != 0) {
|
|
if (limit > 2 * RCTL_PCPU_SHIFT)
|
|
minavailable -= RCTL_PCPU_SHIFT;
|
|
else
|
|
minavailable -= (limit / 2);
|
|
}
|
|
|
|
return (minavailable);
|
|
}
|
|
|
|
static uint64_t
|
|
xadd(uint64_t a, uint64_t b)
|
|
{
|
|
uint64_t c;
|
|
|
|
c = a + b;
|
|
|
|
/*
|
|
* Detect overflow.
|
|
*/
|
|
if (c < a || c < b)
|
|
return (UINT64_MAX);
|
|
|
|
return (c);
|
|
}
|
|
|
|
static uint64_t
|
|
xmul(uint64_t a, uint64_t b)
|
|
{
|
|
|
|
if (b != 0 && a > UINT64_MAX / b)
|
|
return (UINT64_MAX);
|
|
|
|
return (a * b);
|
|
}
|
|
|
|
/*
|
|
* Check whether the proc 'p' can allocate 'amount' of 'resource' in addition
|
|
* to what it keeps allocated now. Returns non-zero if the allocation should
|
|
* be denied, 0 otherwise.
|
|
*/
|
|
int
|
|
rctl_enforce(struct proc *p, int resource, uint64_t amount)
|
|
{
|
|
static struct timeval log_lasttime, devctl_lasttime;
|
|
static int log_curtime = 0, devctl_curtime = 0;
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
struct sbuf sb;
|
|
char *buf;
|
|
int64_t available;
|
|
uint64_t sleep_ms, sleep_ratio;
|
|
int should_deny = 0;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
/*
|
|
* There may be more than one matching rule; go through all of them.
|
|
* Denial should be done last, after logging and sending signals.
|
|
*/
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
rule = link->rrl_rule;
|
|
if (rule->rr_resource != resource)
|
|
continue;
|
|
|
|
available = rctl_available_resource(p, rule);
|
|
if (available >= (int64_t)amount) {
|
|
link->rrl_exceeded = 0;
|
|
continue;
|
|
}
|
|
|
|
switch (rule->rr_action) {
|
|
case RCTL_ACTION_DENY:
|
|
should_deny = 1;
|
|
continue;
|
|
case RCTL_ACTION_LOG:
|
|
/*
|
|
* If rrl_exceeded != 0, it means we've already
|
|
* logged a warning for this process.
|
|
*/
|
|
if (link->rrl_exceeded != 0)
|
|
continue;
|
|
|
|
/*
|
|
* If the process state is not fully initialized yet,
|
|
* we can't access most of the required fields, e.g.
|
|
* p->p_comm. This happens when called from fork1().
|
|
* Ignore this rule for now; it will be processed just
|
|
* after fork, when called from racct_proc_fork_done().
|
|
*/
|
|
if (p->p_state != PRS_NORMAL)
|
|
continue;
|
|
|
|
if (!ppsratecheck(&log_lasttime, &log_curtime,
|
|
rctl_log_rate_limit))
|
|
continue;
|
|
|
|
buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
|
|
if (buf == NULL) {
|
|
printf("rctl_enforce: out of memory\n");
|
|
continue;
|
|
}
|
|
sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
|
|
rctl_rule_to_sbuf(&sb, rule);
|
|
sbuf_finish(&sb);
|
|
printf("rctl: rule \"%s\" matched by pid %d "
|
|
"(%s), uid %d, jail %s\n", sbuf_data(&sb),
|
|
p->p_pid, p->p_comm, p->p_ucred->cr_uid,
|
|
p->p_ucred->cr_prison->pr_prison_racct->prr_name);
|
|
sbuf_delete(&sb);
|
|
free(buf, M_RCTL);
|
|
link->rrl_exceeded = 1;
|
|
continue;
|
|
case RCTL_ACTION_DEVCTL:
|
|
if (link->rrl_exceeded != 0)
|
|
continue;
|
|
|
|
if (p->p_state != PRS_NORMAL)
|
|
continue;
|
|
|
|
if (!ppsratecheck(&devctl_lasttime, &devctl_curtime,
|
|
rctl_devctl_rate_limit))
|
|
continue;
|
|
|
|
buf = malloc(RCTL_LOG_BUFSIZE, M_RCTL, M_NOWAIT);
|
|
if (buf == NULL) {
|
|
printf("rctl_enforce: out of memory\n");
|
|
continue;
|
|
}
|
|
sbuf_new(&sb, buf, RCTL_LOG_BUFSIZE, SBUF_FIXEDLEN);
|
|
sbuf_printf(&sb, "rule=");
|
|
rctl_rule_to_sbuf(&sb, rule);
|
|
sbuf_printf(&sb, " pid=%d ruid=%d jail=%s",
|
|
p->p_pid, p->p_ucred->cr_ruid,
|
|
p->p_ucred->cr_prison->pr_prison_racct->prr_name);
|
|
sbuf_finish(&sb);
|
|
devctl_notify("RCTL", "rule", "matched",
|
|
sbuf_data(&sb));
|
|
sbuf_delete(&sb);
|
|
free(buf, M_RCTL);
|
|
link->rrl_exceeded = 1;
|
|
continue;
|
|
case RCTL_ACTION_THROTTLE:
|
|
if (p->p_state != PRS_NORMAL)
|
|
continue;
|
|
|
|
if (rule->rr_amount == 0) {
|
|
racct_proc_throttle(p, rctl_throttle_max);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Make the process sleep for a fraction of second
|
|
* proportional to the ratio of process' resource
|
|
* utilization compared to the limit. The point is
|
|
* to penalize resource hogs: processes that consume
|
|
* more of the available resources sleep for longer.
|
|
*
|
|
* We're trying to defer division until the very end,
|
|
* to minimize the rounding effects. The following
|
|
* calculation could have been written in a clearer
|
|
* way like this:
|
|
*
|
|
* sleep_ms = hz * p->p_racct->r_resources[resource] /
|
|
* rule->rr_amount;
|
|
* sleep_ms *= rctl_throttle_pct / 100;
|
|
* if (sleep_ms < rctl_throttle_min)
|
|
* sleep_ms = rctl_throttle_min;
|
|
*
|
|
*/
|
|
sleep_ms = xmul(hz, p->p_racct->r_resources[resource]);
|
|
sleep_ms = xmul(sleep_ms, rctl_throttle_pct) / 100;
|
|
if (sleep_ms < rctl_throttle_min * rule->rr_amount)
|
|
sleep_ms = rctl_throttle_min * rule->rr_amount;
|
|
|
|
/*
|
|
* Multiply that by the ratio of the resource
|
|
* consumption for the container compared to the limit,
|
|
* squared. In other words, a process in a container
|
|
* that is two times over the limit will be throttled
|
|
* four times as much for hitting the same rule. The
|
|
* point is to penalize processes more if the container
|
|
* itself (eg certain UID or jail) is above the limit.
|
|
*/
|
|
if (available < 0)
|
|
sleep_ratio = -available / rule->rr_amount;
|
|
else
|
|
sleep_ratio = 0;
|
|
sleep_ratio = xmul(sleep_ratio, sleep_ratio);
|
|
sleep_ratio = xmul(sleep_ratio, rctl_throttle_pct2) / 100;
|
|
sleep_ms = xadd(sleep_ms, xmul(sleep_ms, sleep_ratio));
|
|
|
|
/*
|
|
* Finally the division.
|
|
*/
|
|
sleep_ms /= rule->rr_amount;
|
|
|
|
if (sleep_ms > rctl_throttle_max)
|
|
sleep_ms = rctl_throttle_max;
|
|
#if 0
|
|
printf("%s: pid %d (%s), %jd of %jd, will sleep for %ju ms (ratio %ju, available %jd)\n",
|
|
__func__, p->p_pid, p->p_comm,
|
|
p->p_racct->r_resources[resource],
|
|
rule->rr_amount, (uintmax_t)sleep_ms,
|
|
(uintmax_t)sleep_ratio, (intmax_t)available);
|
|
#endif
|
|
|
|
KASSERT(sleep_ms >= rctl_throttle_min, ("%s: %ju < %d\n",
|
|
__func__, (uintmax_t)sleep_ms, rctl_throttle_min));
|
|
racct_proc_throttle(p, sleep_ms);
|
|
continue;
|
|
default:
|
|
if (link->rrl_exceeded != 0)
|
|
continue;
|
|
|
|
if (p->p_state != PRS_NORMAL)
|
|
continue;
|
|
|
|
KASSERT(rule->rr_action > 0 &&
|
|
rule->rr_action <= RCTL_ACTION_SIGNAL_MAX,
|
|
("rctl_enforce: unknown action %d",
|
|
rule->rr_action));
|
|
|
|
/*
|
|
* We're using the fact that RCTL_ACTION_SIG* values
|
|
* are equal to their counterparts from sys/signal.h.
|
|
*/
|
|
kern_psignal(p, rule->rr_action);
|
|
link->rrl_exceeded = 1;
|
|
continue;
|
|
}
|
|
}
|
|
|
|
if (should_deny) {
|
|
/*
|
|
* Return fake error code; the caller should change it
|
|
* into one proper for the situation - EFSIZ, ENOMEM etc.
|
|
*/
|
|
return (EDOOFUS);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
uint64_t
|
|
rctl_get_limit(struct proc *p, int resource)
|
|
{
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
uint64_t amount = UINT64_MAX;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
/*
|
|
* There may be more than one matching rule; go through all of them.
|
|
* Denial should be done last, after logging and sending signals.
|
|
*/
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
rule = link->rrl_rule;
|
|
if (rule->rr_resource != resource)
|
|
continue;
|
|
if (rule->rr_action != RCTL_ACTION_DENY)
|
|
continue;
|
|
if (rule->rr_amount < amount)
|
|
amount = rule->rr_amount;
|
|
}
|
|
|
|
return (amount);
|
|
}
|
|
|
|
uint64_t
|
|
rctl_get_available(struct proc *p, int resource)
|
|
{
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
int64_t available, minavailable, allocated;
|
|
|
|
minavailable = INT64_MAX;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
/*
|
|
* There may be more than one matching rule; go through all of them.
|
|
* Denial should be done last, after logging and sending signals.
|
|
*/
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
rule = link->rrl_rule;
|
|
if (rule->rr_resource != resource)
|
|
continue;
|
|
if (rule->rr_action != RCTL_ACTION_DENY)
|
|
continue;
|
|
available = rctl_available_resource(p, rule);
|
|
if (available < minavailable)
|
|
minavailable = available;
|
|
}
|
|
|
|
/*
|
|
* XXX: Think about this _hard_.
|
|
*/
|
|
allocated = p->p_racct->r_resources[resource];
|
|
if (minavailable < INT64_MAX - allocated)
|
|
minavailable += allocated;
|
|
if (minavailable < 0)
|
|
minavailable = 0;
|
|
|
|
return (minavailable);
|
|
}
|
|
|
|
static int
|
|
rctl_rule_matches(const struct rctl_rule *rule, const struct rctl_rule *filter)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED) {
|
|
if (rule->rr_subject_type != filter->rr_subject_type)
|
|
return (0);
|
|
|
|
switch (filter->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
if (filter->rr_subject.rs_proc != NULL &&
|
|
rule->rr_subject.rs_proc !=
|
|
filter->rr_subject.rs_proc)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (filter->rr_subject.rs_uip != NULL &&
|
|
rule->rr_subject.rs_uip !=
|
|
filter->rr_subject.rs_uip)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (filter->rr_subject.rs_loginclass != NULL &&
|
|
rule->rr_subject.rs_loginclass !=
|
|
filter->rr_subject.rs_loginclass)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
if (filter->rr_subject.rs_prison_racct != NULL &&
|
|
rule->rr_subject.rs_prison_racct !=
|
|
filter->rr_subject.rs_prison_racct)
|
|
return (0);
|
|
break;
|
|
default:
|
|
panic("rctl_rule_matches: unknown subject type %d",
|
|
filter->rr_subject_type);
|
|
}
|
|
}
|
|
|
|
if (filter->rr_resource != RACCT_UNDEFINED) {
|
|
if (rule->rr_resource != filter->rr_resource)
|
|
return (0);
|
|
}
|
|
|
|
if (filter->rr_action != RCTL_ACTION_UNDEFINED) {
|
|
if (rule->rr_action != filter->rr_action)
|
|
return (0);
|
|
}
|
|
|
|
if (filter->rr_amount != RCTL_AMOUNT_UNDEFINED) {
|
|
if (rule->rr_amount != filter->rr_amount)
|
|
return (0);
|
|
}
|
|
|
|
if (filter->rr_per != RCTL_SUBJECT_TYPE_UNDEFINED) {
|
|
if (rule->rr_per != filter->rr_per)
|
|
return (0);
|
|
}
|
|
|
|
return (1);
|
|
}
|
|
|
|
static int
|
|
str2value(const char *str, int *value, struct dict *table)
|
|
{
|
|
int i;
|
|
|
|
if (value == NULL)
|
|
return (EINVAL);
|
|
|
|
for (i = 0; table[i].d_name != NULL; i++) {
|
|
if (strcasecmp(table[i].d_name, str) == 0) {
|
|
*value = table[i].d_value;
|
|
return (0);
|
|
}
|
|
}
|
|
|
|
return (EINVAL);
|
|
}
|
|
|
|
static int
|
|
str2id(const char *str, id_t *value)
|
|
{
|
|
char *end;
|
|
|
|
if (str == NULL)
|
|
return (EINVAL);
|
|
|
|
*value = strtoul(str, &end, 10);
|
|
if ((size_t)(end - str) != strlen(str))
|
|
return (EINVAL);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
str2int64(const char *str, int64_t *value)
|
|
{
|
|
char *end;
|
|
|
|
if (str == NULL)
|
|
return (EINVAL);
|
|
|
|
*value = strtoul(str, &end, 10);
|
|
if ((size_t)(end - str) != strlen(str))
|
|
return (EINVAL);
|
|
|
|
if (*value < 0)
|
|
return (ERANGE);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Connect the rule to the racct, increasing refcount for the rule.
|
|
*/
|
|
static void
|
|
rctl_racct_add_rule(struct racct *racct, struct rctl_rule *rule)
|
|
{
|
|
struct rctl_rule_link *link;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
|
|
|
|
rctl_rule_acquire(rule);
|
|
link = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
|
|
link->rrl_rule = rule;
|
|
link->rrl_exceeded = 0;
|
|
|
|
RACCT_LOCK();
|
|
LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
|
|
RACCT_UNLOCK();
|
|
}
|
|
|
|
static int
|
|
rctl_racct_add_rule_locked(struct racct *racct, struct rctl_rule *rule)
|
|
{
|
|
struct rctl_rule_link *link;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
link = uma_zalloc(rctl_rule_link_zone, M_NOWAIT);
|
|
if (link == NULL)
|
|
return (ENOMEM);
|
|
rctl_rule_acquire(rule);
|
|
link->rrl_rule = rule;
|
|
link->rrl_exceeded = 0;
|
|
|
|
LIST_INSERT_HEAD(&racct->r_rule_links, link, rrl_next);
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Remove limits for a rules matching the filter and release
|
|
* the refcounts for the rules, possibly freeing them. Returns
|
|
* the number of limit structures removed.
|
|
*/
|
|
static int
|
|
rctl_racct_remove_rules(struct racct *racct,
|
|
const struct rctl_rule *filter)
|
|
{
|
|
struct rctl_rule_link *link, *linktmp;
|
|
int removed = 0;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
LIST_FOREACH_SAFE(link, &racct->r_rule_links, rrl_next, linktmp) {
|
|
if (!rctl_rule_matches(link->rrl_rule, filter))
|
|
continue;
|
|
|
|
LIST_REMOVE(link, rrl_next);
|
|
rctl_rule_release(link->rrl_rule);
|
|
uma_zfree(rctl_rule_link_zone, link);
|
|
removed++;
|
|
}
|
|
return (removed);
|
|
}
|
|
|
|
static void
|
|
rctl_rule_acquire_subject(struct rctl_rule *rule)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_UNDEFINED:
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
if (rule->rr_subject.rs_prison_racct != NULL)
|
|
prison_racct_hold(rule->rr_subject.rs_prison_racct);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (rule->rr_subject.rs_uip != NULL)
|
|
uihold(rule->rr_subject.rs_uip);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (rule->rr_subject.rs_loginclass != NULL)
|
|
loginclass_hold(rule->rr_subject.rs_loginclass);
|
|
break;
|
|
default:
|
|
panic("rctl_rule_acquire_subject: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
}
|
|
|
|
static void
|
|
rctl_rule_release_subject(struct rctl_rule *rule)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_UNDEFINED:
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
if (rule->rr_subject.rs_prison_racct != NULL)
|
|
prison_racct_free(rule->rr_subject.rs_prison_racct);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (rule->rr_subject.rs_uip != NULL)
|
|
uifree(rule->rr_subject.rs_uip);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (rule->rr_subject.rs_loginclass != NULL)
|
|
loginclass_free(rule->rr_subject.rs_loginclass);
|
|
break;
|
|
default:
|
|
panic("rctl_rule_release_subject: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
}
|
|
|
|
struct rctl_rule *
|
|
rctl_rule_alloc(int flags)
|
|
{
|
|
struct rctl_rule *rule;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
rule = uma_zalloc(rctl_rule_zone, flags);
|
|
if (rule == NULL)
|
|
return (NULL);
|
|
rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
|
|
rule->rr_subject.rs_proc = NULL;
|
|
rule->rr_subject.rs_uip = NULL;
|
|
rule->rr_subject.rs_loginclass = NULL;
|
|
rule->rr_subject.rs_prison_racct = NULL;
|
|
rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
|
|
rule->rr_resource = RACCT_UNDEFINED;
|
|
rule->rr_action = RCTL_ACTION_UNDEFINED;
|
|
rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
|
|
refcount_init(&rule->rr_refcount, 1);
|
|
|
|
return (rule);
|
|
}
|
|
|
|
struct rctl_rule *
|
|
rctl_rule_duplicate(const struct rctl_rule *rule, int flags)
|
|
{
|
|
struct rctl_rule *copy;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
copy = uma_zalloc(rctl_rule_zone, flags);
|
|
if (copy == NULL)
|
|
return (NULL);
|
|
copy->rr_subject_type = rule->rr_subject_type;
|
|
copy->rr_subject.rs_proc = rule->rr_subject.rs_proc;
|
|
copy->rr_subject.rs_uip = rule->rr_subject.rs_uip;
|
|
copy->rr_subject.rs_loginclass = rule->rr_subject.rs_loginclass;
|
|
copy->rr_subject.rs_prison_racct = rule->rr_subject.rs_prison_racct;
|
|
copy->rr_per = rule->rr_per;
|
|
copy->rr_resource = rule->rr_resource;
|
|
copy->rr_action = rule->rr_action;
|
|
copy->rr_amount = rule->rr_amount;
|
|
refcount_init(©->rr_refcount, 1);
|
|
rctl_rule_acquire_subject(copy);
|
|
|
|
return (copy);
|
|
}
|
|
|
|
void
|
|
rctl_rule_acquire(struct rctl_rule *rule)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
|
|
|
|
refcount_acquire(&rule->rr_refcount);
|
|
}
|
|
|
|
static void
|
|
rctl_rule_free(void *context, int pending)
|
|
{
|
|
struct rctl_rule *rule;
|
|
|
|
rule = (struct rctl_rule *)context;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rule->rr_refcount == 0, ("rule->rr_refcount != 0"));
|
|
|
|
/*
|
|
* We don't need locking here; rule is guaranteed to be inaccessible.
|
|
*/
|
|
|
|
rctl_rule_release_subject(rule);
|
|
uma_zfree(rctl_rule_zone, rule);
|
|
}
|
|
|
|
void
|
|
rctl_rule_release(struct rctl_rule *rule)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rule->rr_refcount > 0, ("rule->rr_refcount <= 0"));
|
|
|
|
if (refcount_release(&rule->rr_refcount)) {
|
|
/*
|
|
* rctl_rule_release() is often called when iterating
|
|
* over all the uidinfo structures in the system,
|
|
* holding uihashtbl_lock. Since rctl_rule_free()
|
|
* might end up calling uifree(), this would lead
|
|
* to lock recursion. Use taskqueue to avoid this.
|
|
*/
|
|
TASK_INIT(&rule->rr_task, 0, rctl_rule_free, rule);
|
|
taskqueue_enqueue(taskqueue_thread, &rule->rr_task);
|
|
}
|
|
}
|
|
|
|
static int
|
|
rctl_rule_fully_specified(const struct rctl_rule *rule)
|
|
{
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_UNDEFINED:
|
|
return (0);
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
if (rule->rr_subject.rs_proc == NULL)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (rule->rr_subject.rs_uip == NULL)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (rule->rr_subject.rs_loginclass == NULL)
|
|
return (0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
if (rule->rr_subject.rs_prison_racct == NULL)
|
|
return (0);
|
|
break;
|
|
default:
|
|
panic("rctl_rule_fully_specified: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
if (rule->rr_resource == RACCT_UNDEFINED)
|
|
return (0);
|
|
if (rule->rr_action == RCTL_ACTION_UNDEFINED)
|
|
return (0);
|
|
if (rule->rr_amount == RCTL_AMOUNT_UNDEFINED)
|
|
return (0);
|
|
if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED)
|
|
return (0);
|
|
|
|
return (1);
|
|
}
|
|
|
|
static int
|
|
rctl_string_to_rule(char *rulestr, struct rctl_rule **rulep)
|
|
{
|
|
struct rctl_rule *rule;
|
|
char *subjectstr, *subject_idstr, *resourcestr, *actionstr,
|
|
*amountstr, *perstr;
|
|
id_t id;
|
|
int error = 0;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
rule = rctl_rule_alloc(M_WAITOK);
|
|
|
|
subjectstr = strsep(&rulestr, ":");
|
|
subject_idstr = strsep(&rulestr, ":");
|
|
resourcestr = strsep(&rulestr, ":");
|
|
actionstr = strsep(&rulestr, "=/");
|
|
amountstr = strsep(&rulestr, "/");
|
|
perstr = rulestr;
|
|
|
|
if (subjectstr == NULL || subjectstr[0] == '\0')
|
|
rule->rr_subject_type = RCTL_SUBJECT_TYPE_UNDEFINED;
|
|
else {
|
|
error = str2value(subjectstr, &rule->rr_subject_type, subjectnames);
|
|
if (error != 0)
|
|
goto out;
|
|
}
|
|
|
|
if (subject_idstr == NULL || subject_idstr[0] == '\0') {
|
|
rule->rr_subject.rs_proc = NULL;
|
|
rule->rr_subject.rs_uip = NULL;
|
|
rule->rr_subject.rs_loginclass = NULL;
|
|
rule->rr_subject.rs_prison_racct = NULL;
|
|
} else {
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_UNDEFINED:
|
|
error = EINVAL;
|
|
goto out;
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
error = str2id(subject_idstr, &id);
|
|
if (error != 0)
|
|
goto out;
|
|
sx_assert(&allproc_lock, SA_LOCKED);
|
|
rule->rr_subject.rs_proc = pfind(id);
|
|
if (rule->rr_subject.rs_proc == NULL) {
|
|
error = ESRCH;
|
|
goto out;
|
|
}
|
|
PROC_UNLOCK(rule->rr_subject.rs_proc);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
error = str2id(subject_idstr, &id);
|
|
if (error != 0)
|
|
goto out;
|
|
rule->rr_subject.rs_uip = uifind(id);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
rule->rr_subject.rs_loginclass =
|
|
loginclass_find(subject_idstr);
|
|
if (rule->rr_subject.rs_loginclass == NULL) {
|
|
error = ENAMETOOLONG;
|
|
goto out;
|
|
}
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
rule->rr_subject.rs_prison_racct =
|
|
prison_racct_find(subject_idstr);
|
|
if (rule->rr_subject.rs_prison_racct == NULL) {
|
|
error = ENAMETOOLONG;
|
|
goto out;
|
|
}
|
|
break;
|
|
default:
|
|
panic("rctl_string_to_rule: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
}
|
|
|
|
if (resourcestr == NULL || resourcestr[0] == '\0')
|
|
rule->rr_resource = RACCT_UNDEFINED;
|
|
else {
|
|
error = str2value(resourcestr, &rule->rr_resource,
|
|
resourcenames);
|
|
if (error != 0)
|
|
goto out;
|
|
}
|
|
|
|
if (actionstr == NULL || actionstr[0] == '\0')
|
|
rule->rr_action = RCTL_ACTION_UNDEFINED;
|
|
else {
|
|
error = str2value(actionstr, &rule->rr_action, actionnames);
|
|
if (error != 0)
|
|
goto out;
|
|
}
|
|
|
|
if (amountstr == NULL || amountstr[0] == '\0')
|
|
rule->rr_amount = RCTL_AMOUNT_UNDEFINED;
|
|
else {
|
|
error = str2int64(amountstr, &rule->rr_amount);
|
|
if (error != 0)
|
|
goto out;
|
|
if (RACCT_IS_IN_MILLIONS(rule->rr_resource)) {
|
|
if (rule->rr_amount > INT64_MAX / 1000000) {
|
|
error = ERANGE;
|
|
goto out;
|
|
}
|
|
rule->rr_amount *= 1000000;
|
|
}
|
|
}
|
|
|
|
if (perstr == NULL || perstr[0] == '\0')
|
|
rule->rr_per = RCTL_SUBJECT_TYPE_UNDEFINED;
|
|
else {
|
|
error = str2value(perstr, &rule->rr_per, subjectnames);
|
|
if (error != 0)
|
|
goto out;
|
|
}
|
|
|
|
out:
|
|
if (error == 0)
|
|
*rulep = rule;
|
|
else
|
|
rctl_rule_release(rule);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Link a rule with all the subjects it applies to.
|
|
*/
|
|
int
|
|
rctl_rule_add(struct rctl_rule *rule)
|
|
{
|
|
struct proc *p;
|
|
struct ucred *cred;
|
|
struct uidinfo *uip;
|
|
struct prison *pr;
|
|
struct prison_racct *prr;
|
|
struct loginclass *lc;
|
|
struct rctl_rule *rule2;
|
|
int match;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
KASSERT(rctl_rule_fully_specified(rule), ("rule not fully specified"));
|
|
|
|
/*
|
|
* Some rules just don't make sense, like "deny" rule for an undeniable
|
|
* resource. The exception are the RSS and %CPU resources - they are
|
|
* not deniable in the racct sense, but the limit is enforced in
|
|
* a different way.
|
|
*/
|
|
if (rule->rr_action == RCTL_ACTION_DENY &&
|
|
!RACCT_IS_DENIABLE(rule->rr_resource) &&
|
|
rule->rr_resource != RACCT_RSS &&
|
|
rule->rr_resource != RACCT_PCTCPU) {
|
|
return (EOPNOTSUPP);
|
|
}
|
|
|
|
if (rule->rr_action == RCTL_ACTION_THROTTLE &&
|
|
!RACCT_IS_DECAYING(rule->rr_resource)) {
|
|
return (EOPNOTSUPP);
|
|
}
|
|
|
|
if (rule->rr_action == RCTL_ACTION_THROTTLE &&
|
|
rule->rr_resource == RACCT_PCTCPU) {
|
|
return (EOPNOTSUPP);
|
|
}
|
|
|
|
if (rule->rr_per == RCTL_SUBJECT_TYPE_PROCESS &&
|
|
RACCT_IS_SLOPPY(rule->rr_resource)) {
|
|
return (EOPNOTSUPP);
|
|
}
|
|
|
|
/*
|
|
* Make sure there are no duplicated rules. Also, for the "deny"
|
|
* rules, remove ones differing only by "amount".
|
|
*/
|
|
if (rule->rr_action == RCTL_ACTION_DENY) {
|
|
rule2 = rctl_rule_duplicate(rule, M_WAITOK);
|
|
rule2->rr_amount = RCTL_AMOUNT_UNDEFINED;
|
|
rctl_rule_remove(rule2);
|
|
rctl_rule_release(rule2);
|
|
} else
|
|
rctl_rule_remove(rule);
|
|
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
p = rule->rr_subject.rs_proc;
|
|
KASSERT(p != NULL, ("rctl_rule_add: NULL proc"));
|
|
|
|
rctl_racct_add_rule(p->p_racct, rule);
|
|
/*
|
|
* In case of per-process rule, we don't have anything more
|
|
* to do.
|
|
*/
|
|
return (0);
|
|
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
uip = rule->rr_subject.rs_uip;
|
|
KASSERT(uip != NULL, ("rctl_rule_add: NULL uip"));
|
|
rctl_racct_add_rule(uip->ui_racct, rule);
|
|
break;
|
|
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
lc = rule->rr_subject.rs_loginclass;
|
|
KASSERT(lc != NULL, ("rctl_rule_add: NULL loginclass"));
|
|
rctl_racct_add_rule(lc->lc_racct, rule);
|
|
break;
|
|
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
prr = rule->rr_subject.rs_prison_racct;
|
|
KASSERT(prr != NULL, ("rctl_rule_add: NULL pr"));
|
|
rctl_racct_add_rule(prr->prr_racct, rule);
|
|
break;
|
|
|
|
default:
|
|
panic("rctl_rule_add: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
|
|
/*
|
|
* Now go through all the processes and add the new rule to the ones
|
|
* it applies to.
|
|
*/
|
|
sx_assert(&allproc_lock, SA_LOCKED);
|
|
FOREACH_PROC_IN_SYSTEM(p) {
|
|
cred = p->p_ucred;
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (cred->cr_uidinfo == rule->rr_subject.rs_uip ||
|
|
cred->cr_ruidinfo == rule->rr_subject.rs_uip)
|
|
break;
|
|
continue;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (cred->cr_loginclass == rule->rr_subject.rs_loginclass)
|
|
break;
|
|
continue;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
match = 0;
|
|
for (pr = cred->cr_prison; pr != NULL; pr = pr->pr_parent) {
|
|
if (pr->pr_prison_racct == rule->rr_subject.rs_prison_racct) {
|
|
match = 1;
|
|
break;
|
|
}
|
|
}
|
|
if (match)
|
|
break;
|
|
continue;
|
|
default:
|
|
panic("rctl_rule_add: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
|
|
rctl_racct_add_rule(p->p_racct, rule);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
rctl_rule_pre_callback(void)
|
|
{
|
|
|
|
RACCT_LOCK();
|
|
}
|
|
|
|
static void
|
|
rctl_rule_post_callback(void)
|
|
{
|
|
|
|
RACCT_UNLOCK();
|
|
}
|
|
|
|
static void
|
|
rctl_rule_remove_callback(struct racct *racct, void *arg2, void *arg3)
|
|
{
|
|
struct rctl_rule *filter = (struct rctl_rule *)arg2;
|
|
int found = 0;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
found += rctl_racct_remove_rules(racct, filter);
|
|
|
|
*((int *)arg3) += found;
|
|
}
|
|
|
|
/*
|
|
* Remove all rules that match the filter.
|
|
*/
|
|
int
|
|
rctl_rule_remove(struct rctl_rule *filter)
|
|
{
|
|
struct proc *p;
|
|
int found = 0;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_PROCESS &&
|
|
filter->rr_subject.rs_proc != NULL) {
|
|
p = filter->rr_subject.rs_proc;
|
|
RACCT_LOCK();
|
|
found = rctl_racct_remove_rules(p->p_racct, filter);
|
|
RACCT_UNLOCK();
|
|
if (found)
|
|
return (0);
|
|
return (ESRCH);
|
|
}
|
|
|
|
loginclass_racct_foreach(rctl_rule_remove_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, (void *)&found);
|
|
ui_racct_foreach(rctl_rule_remove_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, (void *)&found);
|
|
prison_racct_foreach(rctl_rule_remove_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, (void *)&found);
|
|
|
|
sx_assert(&allproc_lock, SA_LOCKED);
|
|
RACCT_LOCK();
|
|
FOREACH_PROC_IN_SYSTEM(p) {
|
|
found += rctl_racct_remove_rules(p->p_racct, filter);
|
|
}
|
|
RACCT_UNLOCK();
|
|
|
|
if (found)
|
|
return (0);
|
|
return (ESRCH);
|
|
}
|
|
|
|
/*
|
|
* Appends a rule to the sbuf.
|
|
*/
|
|
static void
|
|
rctl_rule_to_sbuf(struct sbuf *sb, const struct rctl_rule *rule)
|
|
{
|
|
int64_t amount;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
sbuf_printf(sb, "%s:", rctl_subject_type_name(rule->rr_subject_type));
|
|
|
|
switch (rule->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
if (rule->rr_subject.rs_proc == NULL)
|
|
sbuf_printf(sb, ":");
|
|
else
|
|
sbuf_printf(sb, "%d:",
|
|
rule->rr_subject.rs_proc->p_pid);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
if (rule->rr_subject.rs_uip == NULL)
|
|
sbuf_printf(sb, ":");
|
|
else
|
|
sbuf_printf(sb, "%d:",
|
|
rule->rr_subject.rs_uip->ui_uid);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
if (rule->rr_subject.rs_loginclass == NULL)
|
|
sbuf_printf(sb, ":");
|
|
else
|
|
sbuf_printf(sb, "%s:",
|
|
rule->rr_subject.rs_loginclass->lc_name);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
if (rule->rr_subject.rs_prison_racct == NULL)
|
|
sbuf_printf(sb, ":");
|
|
else
|
|
sbuf_printf(sb, "%s:",
|
|
rule->rr_subject.rs_prison_racct->prr_name);
|
|
break;
|
|
default:
|
|
panic("rctl_rule_to_sbuf: unknown subject type %d",
|
|
rule->rr_subject_type);
|
|
}
|
|
|
|
amount = rule->rr_amount;
|
|
if (amount != RCTL_AMOUNT_UNDEFINED &&
|
|
RACCT_IS_IN_MILLIONS(rule->rr_resource))
|
|
amount /= 1000000;
|
|
|
|
sbuf_printf(sb, "%s:%s=%jd",
|
|
rctl_resource_name(rule->rr_resource),
|
|
rctl_action_name(rule->rr_action),
|
|
amount);
|
|
|
|
if (rule->rr_per != rule->rr_subject_type)
|
|
sbuf_printf(sb, "/%s", rctl_subject_type_name(rule->rr_per));
|
|
}
|
|
|
|
/*
|
|
* Routine used by RCTL syscalls to read in input string.
|
|
*/
|
|
static int
|
|
rctl_read_inbuf(char **inputstr, const char *inbufp, size_t inbuflen)
|
|
{
|
|
char *str;
|
|
int error;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
if (inbuflen <= 0)
|
|
return (EINVAL);
|
|
if (inbuflen > RCTL_MAX_INBUFSIZE)
|
|
return (E2BIG);
|
|
|
|
str = malloc(inbuflen + 1, M_RCTL, M_WAITOK);
|
|
error = copyinstr(inbufp, str, inbuflen, NULL);
|
|
if (error != 0) {
|
|
free(str, M_RCTL);
|
|
return (error);
|
|
}
|
|
|
|
*inputstr = str;
|
|
|
|
return (0);
|
|
}
|
|
|
|
/*
|
|
* Routine used by RCTL syscalls to write out output string.
|
|
*/
|
|
static int
|
|
rctl_write_outbuf(struct sbuf *outputsbuf, char *outbufp, size_t outbuflen)
|
|
{
|
|
int error;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
if (outputsbuf == NULL)
|
|
return (0);
|
|
|
|
sbuf_finish(outputsbuf);
|
|
if (outbuflen < sbuf_len(outputsbuf) + 1) {
|
|
sbuf_delete(outputsbuf);
|
|
return (ERANGE);
|
|
}
|
|
error = copyout(sbuf_data(outputsbuf), outbufp,
|
|
sbuf_len(outputsbuf) + 1);
|
|
sbuf_delete(outputsbuf);
|
|
return (error);
|
|
}
|
|
|
|
static struct sbuf *
|
|
rctl_racct_to_sbuf(struct racct *racct, int sloppy)
|
|
{
|
|
struct sbuf *sb;
|
|
int64_t amount;
|
|
int i;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
|
|
sb = sbuf_new_auto();
|
|
for (i = 0; i <= RACCT_MAX; i++) {
|
|
if (sloppy == 0 && RACCT_IS_SLOPPY(i))
|
|
continue;
|
|
RACCT_LOCK();
|
|
amount = racct->r_resources[i];
|
|
RACCT_UNLOCK();
|
|
if (RACCT_IS_IN_MILLIONS(i))
|
|
amount /= 1000000;
|
|
sbuf_printf(sb, "%s=%jd,", rctl_resource_name(i), amount);
|
|
}
|
|
sbuf_setpos(sb, sbuf_len(sb) - 1);
|
|
return (sb);
|
|
}
|
|
|
|
int
|
|
sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
|
|
{
|
|
struct rctl_rule *filter;
|
|
struct sbuf *outputsbuf = NULL;
|
|
struct proc *p;
|
|
struct uidinfo *uip;
|
|
struct loginclass *lc;
|
|
struct prison_racct *prr;
|
|
char *inputstr;
|
|
int error;
|
|
|
|
if (!racct_enable)
|
|
return (ENOSYS);
|
|
|
|
error = priv_check(td, PRIV_RCTL_GET_RACCT);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_slock(&allproc_lock);
|
|
error = rctl_string_to_rule(inputstr, &filter);
|
|
free(inputstr, M_RCTL);
|
|
if (error != 0) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
|
|
switch (filter->rr_subject_type) {
|
|
case RCTL_SUBJECT_TYPE_PROCESS:
|
|
p = filter->rr_subject.rs_proc;
|
|
if (p == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
outputsbuf = rctl_racct_to_sbuf(p->p_racct, 0);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_USER:
|
|
uip = filter->rr_subject.rs_uip;
|
|
if (uip == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
outputsbuf = rctl_racct_to_sbuf(uip->ui_racct, 1);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_LOGINCLASS:
|
|
lc = filter->rr_subject.rs_loginclass;
|
|
if (lc == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
outputsbuf = rctl_racct_to_sbuf(lc->lc_racct, 1);
|
|
break;
|
|
case RCTL_SUBJECT_TYPE_JAIL:
|
|
prr = filter->rr_subject.rs_prison_racct;
|
|
if (prr == NULL) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
outputsbuf = rctl_racct_to_sbuf(prr->prr_racct, 1);
|
|
break;
|
|
default:
|
|
error = EINVAL;
|
|
}
|
|
out:
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_write_outbuf(outputsbuf, uap->outbufp, uap->outbuflen);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
rctl_get_rules_callback(struct racct *racct, void *arg2, void *arg3)
|
|
{
|
|
struct rctl_rule *filter = (struct rctl_rule *)arg2;
|
|
struct rctl_rule_link *link;
|
|
struct sbuf *sb = (struct sbuf *)arg3;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
LIST_FOREACH(link, &racct->r_rule_links, rrl_next) {
|
|
if (!rctl_rule_matches(link->rrl_rule, filter))
|
|
continue;
|
|
rctl_rule_to_sbuf(sb, link->rrl_rule);
|
|
sbuf_printf(sb, ",");
|
|
}
|
|
}
|
|
|
|
int
|
|
sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
|
|
{
|
|
struct sbuf *sb;
|
|
struct rctl_rule *filter;
|
|
struct rctl_rule_link *link;
|
|
struct proc *p;
|
|
char *inputstr, *buf;
|
|
size_t bufsize;
|
|
int error;
|
|
|
|
if (!racct_enable)
|
|
return (ENOSYS);
|
|
|
|
error = priv_check(td, PRIV_RCTL_GET_RULES);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_slock(&allproc_lock);
|
|
error = rctl_string_to_rule(inputstr, &filter);
|
|
free(inputstr, M_RCTL);
|
|
if (error != 0) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
|
|
bufsize = uap->outbuflen;
|
|
if (bufsize > rctl_maxbufsize) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (E2BIG);
|
|
}
|
|
|
|
buf = malloc(bufsize, M_RCTL, M_WAITOK);
|
|
sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
|
|
KASSERT(sb != NULL, ("sbuf_new failed"));
|
|
|
|
FOREACH_PROC_IN_SYSTEM(p) {
|
|
RACCT_LOCK();
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
/*
|
|
* Non-process rules will be added to the buffer later.
|
|
* Adding them here would result in duplicated output.
|
|
*/
|
|
if (link->rrl_rule->rr_subject_type !=
|
|
RCTL_SUBJECT_TYPE_PROCESS)
|
|
continue;
|
|
if (!rctl_rule_matches(link->rrl_rule, filter))
|
|
continue;
|
|
rctl_rule_to_sbuf(sb, link->rrl_rule);
|
|
sbuf_printf(sb, ",");
|
|
}
|
|
RACCT_UNLOCK();
|
|
}
|
|
|
|
loginclass_racct_foreach(rctl_get_rules_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, sb);
|
|
ui_racct_foreach(rctl_get_rules_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, sb);
|
|
prison_racct_foreach(rctl_get_rules_callback,
|
|
rctl_rule_pre_callback, rctl_rule_post_callback,
|
|
filter, sb);
|
|
if (sbuf_error(sb) == ENOMEM) {
|
|
error = ERANGE;
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Remove trailing ",".
|
|
*/
|
|
if (sbuf_len(sb) > 0)
|
|
sbuf_setpos(sb, sbuf_len(sb) - 1);
|
|
|
|
error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
|
|
out:
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
free(buf, M_RCTL);
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
|
|
{
|
|
struct sbuf *sb;
|
|
struct rctl_rule *filter;
|
|
struct rctl_rule_link *link;
|
|
char *inputstr, *buf;
|
|
size_t bufsize;
|
|
int error;
|
|
|
|
if (!racct_enable)
|
|
return (ENOSYS);
|
|
|
|
error = priv_check(td, PRIV_RCTL_GET_LIMITS);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_slock(&allproc_lock);
|
|
error = rctl_string_to_rule(inputstr, &filter);
|
|
free(inputstr, M_RCTL);
|
|
if (error != 0) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
|
|
if (filter->rr_subject_type == RCTL_SUBJECT_TYPE_UNDEFINED) {
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
return (EINVAL);
|
|
}
|
|
if (filter->rr_subject_type != RCTL_SUBJECT_TYPE_PROCESS) {
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
return (EOPNOTSUPP);
|
|
}
|
|
if (filter->rr_subject.rs_proc == NULL) {
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
return (EINVAL);
|
|
}
|
|
|
|
bufsize = uap->outbuflen;
|
|
if (bufsize > rctl_maxbufsize) {
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
return (E2BIG);
|
|
}
|
|
|
|
buf = malloc(bufsize, M_RCTL, M_WAITOK);
|
|
sb = sbuf_new(NULL, buf, bufsize, SBUF_FIXEDLEN);
|
|
KASSERT(sb != NULL, ("sbuf_new failed"));
|
|
|
|
RACCT_LOCK();
|
|
LIST_FOREACH(link, &filter->rr_subject.rs_proc->p_racct->r_rule_links,
|
|
rrl_next) {
|
|
rctl_rule_to_sbuf(sb, link->rrl_rule);
|
|
sbuf_printf(sb, ",");
|
|
}
|
|
RACCT_UNLOCK();
|
|
if (sbuf_error(sb) == ENOMEM) {
|
|
error = ERANGE;
|
|
sbuf_delete(sb);
|
|
goto out;
|
|
}
|
|
|
|
/*
|
|
* Remove trailing ",".
|
|
*/
|
|
if (sbuf_len(sb) > 0)
|
|
sbuf_setpos(sb, sbuf_len(sb) - 1);
|
|
|
|
error = rctl_write_outbuf(sb, uap->outbufp, uap->outbuflen);
|
|
out:
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
free(buf, M_RCTL);
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
|
|
{
|
|
struct rctl_rule *rule;
|
|
char *inputstr;
|
|
int error;
|
|
|
|
if (!racct_enable)
|
|
return (ENOSYS);
|
|
|
|
error = priv_check(td, PRIV_RCTL_ADD_RULE);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_slock(&allproc_lock);
|
|
error = rctl_string_to_rule(inputstr, &rule);
|
|
free(inputstr, M_RCTL);
|
|
if (error != 0) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
/*
|
|
* The 'per' part of a rule is optional.
|
|
*/
|
|
if (rule->rr_per == RCTL_SUBJECT_TYPE_UNDEFINED &&
|
|
rule->rr_subject_type != RCTL_SUBJECT_TYPE_UNDEFINED)
|
|
rule->rr_per = rule->rr_subject_type;
|
|
|
|
if (!rctl_rule_fully_specified(rule)) {
|
|
error = EINVAL;
|
|
goto out;
|
|
}
|
|
|
|
error = rctl_rule_add(rule);
|
|
|
|
out:
|
|
rctl_rule_release(rule);
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
|
|
int
|
|
sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
|
|
{
|
|
struct rctl_rule *filter;
|
|
char *inputstr;
|
|
int error;
|
|
|
|
if (!racct_enable)
|
|
return (ENOSYS);
|
|
|
|
error = priv_check(td, PRIV_RCTL_REMOVE_RULE);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
error = rctl_read_inbuf(&inputstr, uap->inbufp, uap->inbuflen);
|
|
if (error != 0)
|
|
return (error);
|
|
|
|
sx_slock(&allproc_lock);
|
|
error = rctl_string_to_rule(inputstr, &filter);
|
|
free(inputstr, M_RCTL);
|
|
if (error != 0) {
|
|
sx_sunlock(&allproc_lock);
|
|
return (error);
|
|
}
|
|
|
|
error = rctl_rule_remove(filter);
|
|
rctl_rule_release(filter);
|
|
sx_sunlock(&allproc_lock);
|
|
|
|
return (error);
|
|
}
|
|
|
|
/*
|
|
* Update RCTL rule list after credential change.
|
|
*/
|
|
void
|
|
rctl_proc_ucred_changed(struct proc *p, struct ucred *newcred)
|
|
{
|
|
LIST_HEAD(, rctl_rule_link) newrules;
|
|
struct rctl_rule_link *link, *newlink;
|
|
struct uidinfo *newuip;
|
|
struct loginclass *newlc;
|
|
struct prison_racct *newprr;
|
|
int rulecnt, i;
|
|
|
|
if (!racct_enable)
|
|
return;
|
|
|
|
PROC_LOCK_ASSERT(p, MA_NOTOWNED);
|
|
|
|
newuip = newcred->cr_ruidinfo;
|
|
newlc = newcred->cr_loginclass;
|
|
newprr = newcred->cr_prison->pr_prison_racct;
|
|
|
|
LIST_INIT(&newrules);
|
|
|
|
again:
|
|
/*
|
|
* First, count the rules that apply to the process with new
|
|
* credentials.
|
|
*/
|
|
rulecnt = 0;
|
|
RACCT_LOCK();
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
if (link->rrl_rule->rr_subject_type ==
|
|
RCTL_SUBJECT_TYPE_PROCESS)
|
|
rulecnt++;
|
|
}
|
|
LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next)
|
|
rulecnt++;
|
|
LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next)
|
|
rulecnt++;
|
|
LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next)
|
|
rulecnt++;
|
|
RACCT_UNLOCK();
|
|
|
|
/*
|
|
* Create temporary list. We've dropped the rctl_lock in order
|
|
* to use M_WAITOK.
|
|
*/
|
|
for (i = 0; i < rulecnt; i++) {
|
|
newlink = uma_zalloc(rctl_rule_link_zone, M_WAITOK);
|
|
newlink->rrl_rule = NULL;
|
|
newlink->rrl_exceeded = 0;
|
|
LIST_INSERT_HEAD(&newrules, newlink, rrl_next);
|
|
}
|
|
|
|
newlink = LIST_FIRST(&newrules);
|
|
|
|
/*
|
|
* Assign rules to the newly allocated list entries.
|
|
*/
|
|
RACCT_LOCK();
|
|
LIST_FOREACH(link, &p->p_racct->r_rule_links, rrl_next) {
|
|
if (link->rrl_rule->rr_subject_type ==
|
|
RCTL_SUBJECT_TYPE_PROCESS) {
|
|
if (newlink == NULL)
|
|
goto goaround;
|
|
rctl_rule_acquire(link->rrl_rule);
|
|
newlink->rrl_rule = link->rrl_rule;
|
|
newlink->rrl_exceeded = link->rrl_exceeded;
|
|
newlink = LIST_NEXT(newlink, rrl_next);
|
|
rulecnt--;
|
|
}
|
|
}
|
|
|
|
LIST_FOREACH(link, &newuip->ui_racct->r_rule_links, rrl_next) {
|
|
if (newlink == NULL)
|
|
goto goaround;
|
|
rctl_rule_acquire(link->rrl_rule);
|
|
newlink->rrl_rule = link->rrl_rule;
|
|
newlink->rrl_exceeded = link->rrl_exceeded;
|
|
newlink = LIST_NEXT(newlink, rrl_next);
|
|
rulecnt--;
|
|
}
|
|
|
|
LIST_FOREACH(link, &newlc->lc_racct->r_rule_links, rrl_next) {
|
|
if (newlink == NULL)
|
|
goto goaround;
|
|
rctl_rule_acquire(link->rrl_rule);
|
|
newlink->rrl_rule = link->rrl_rule;
|
|
newlink->rrl_exceeded = link->rrl_exceeded;
|
|
newlink = LIST_NEXT(newlink, rrl_next);
|
|
rulecnt--;
|
|
}
|
|
|
|
LIST_FOREACH(link, &newprr->prr_racct->r_rule_links, rrl_next) {
|
|
if (newlink == NULL)
|
|
goto goaround;
|
|
rctl_rule_acquire(link->rrl_rule);
|
|
newlink->rrl_rule = link->rrl_rule;
|
|
newlink->rrl_exceeded = link->rrl_exceeded;
|
|
newlink = LIST_NEXT(newlink, rrl_next);
|
|
rulecnt--;
|
|
}
|
|
|
|
if (rulecnt == 0) {
|
|
/*
|
|
* Free the old rule list.
|
|
*/
|
|
while (!LIST_EMPTY(&p->p_racct->r_rule_links)) {
|
|
link = LIST_FIRST(&p->p_racct->r_rule_links);
|
|
LIST_REMOVE(link, rrl_next);
|
|
rctl_rule_release(link->rrl_rule);
|
|
uma_zfree(rctl_rule_link_zone, link);
|
|
}
|
|
|
|
/*
|
|
* Replace lists and we're done.
|
|
*
|
|
* XXX: Is there any way to switch list heads instead
|
|
* of iterating here?
|
|
*/
|
|
while (!LIST_EMPTY(&newrules)) {
|
|
newlink = LIST_FIRST(&newrules);
|
|
LIST_REMOVE(newlink, rrl_next);
|
|
LIST_INSERT_HEAD(&p->p_racct->r_rule_links,
|
|
newlink, rrl_next);
|
|
}
|
|
|
|
RACCT_UNLOCK();
|
|
|
|
return;
|
|
}
|
|
|
|
goaround:
|
|
RACCT_UNLOCK();
|
|
|
|
/*
|
|
* Rule list changed while we were not holding the rctl_lock.
|
|
* Free the new list and try again.
|
|
*/
|
|
while (!LIST_EMPTY(&newrules)) {
|
|
newlink = LIST_FIRST(&newrules);
|
|
LIST_REMOVE(newlink, rrl_next);
|
|
if (newlink->rrl_rule != NULL)
|
|
rctl_rule_release(newlink->rrl_rule);
|
|
uma_zfree(rctl_rule_link_zone, newlink);
|
|
}
|
|
|
|
goto again;
|
|
}
|
|
|
|
/*
|
|
* Assign RCTL rules to the newly created process.
|
|
*/
|
|
int
|
|
rctl_proc_fork(struct proc *parent, struct proc *child)
|
|
{
|
|
struct rctl_rule *rule;
|
|
struct rctl_rule_link *link;
|
|
int error;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
KASSERT(parent->p_racct != NULL, ("process without racct; p = %p", parent));
|
|
|
|
LIST_INIT(&child->p_racct->r_rule_links);
|
|
|
|
/*
|
|
* Go through limits applicable to the parent and assign them
|
|
* to the child. Rules with 'process' subject have to be duplicated
|
|
* in order to make their rr_subject point to the new process.
|
|
*/
|
|
LIST_FOREACH(link, &parent->p_racct->r_rule_links, rrl_next) {
|
|
if (link->rrl_rule->rr_subject_type ==
|
|
RCTL_SUBJECT_TYPE_PROCESS) {
|
|
rule = rctl_rule_duplicate(link->rrl_rule, M_NOWAIT);
|
|
if (rule == NULL)
|
|
goto fail;
|
|
KASSERT(rule->rr_subject.rs_proc == parent,
|
|
("rule->rr_subject.rs_proc != parent"));
|
|
rule->rr_subject.rs_proc = child;
|
|
error = rctl_racct_add_rule_locked(child->p_racct,
|
|
rule);
|
|
rctl_rule_release(rule);
|
|
if (error != 0)
|
|
goto fail;
|
|
} else {
|
|
error = rctl_racct_add_rule_locked(child->p_racct,
|
|
link->rrl_rule);
|
|
if (error != 0)
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
return (0);
|
|
|
|
fail:
|
|
while (!LIST_EMPTY(&child->p_racct->r_rule_links)) {
|
|
link = LIST_FIRST(&child->p_racct->r_rule_links);
|
|
LIST_REMOVE(link, rrl_next);
|
|
rctl_rule_release(link->rrl_rule);
|
|
uma_zfree(rctl_rule_link_zone, link);
|
|
}
|
|
|
|
return (EAGAIN);
|
|
}
|
|
|
|
/*
|
|
* Release rules attached to the racct.
|
|
*/
|
|
void
|
|
rctl_racct_release(struct racct *racct)
|
|
{
|
|
struct rctl_rule_link *link;
|
|
|
|
ASSERT_RACCT_ENABLED();
|
|
RACCT_LOCK_ASSERT();
|
|
|
|
while (!LIST_EMPTY(&racct->r_rule_links)) {
|
|
link = LIST_FIRST(&racct->r_rule_links);
|
|
LIST_REMOVE(link, rrl_next);
|
|
rctl_rule_release(link->rrl_rule);
|
|
uma_zfree(rctl_rule_link_zone, link);
|
|
}
|
|
}
|
|
|
|
static void
|
|
rctl_init(void)
|
|
{
|
|
|
|
if (!racct_enable)
|
|
return;
|
|
|
|
rctl_rule_zone = uma_zcreate("rctl_rule", sizeof(struct rctl_rule),
|
|
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
|
rctl_rule_link_zone = uma_zcreate("rctl_rule_link",
|
|
sizeof(struct rctl_rule_link), NULL, NULL, NULL, NULL,
|
|
UMA_ALIGN_PTR, 0);
|
|
|
|
/*
|
|
* Set default values, making sure not to overwrite the ones
|
|
* fetched from tunables. Most of those could be set at the
|
|
* declaration, except for the rctl_throttle_max - we cannot
|
|
* set it there due to hz not being compile time constant.
|
|
*/
|
|
if (rctl_throttle_min < 1)
|
|
rctl_throttle_min = 1;
|
|
if (rctl_throttle_max < rctl_throttle_min)
|
|
rctl_throttle_max = 2 * hz;
|
|
if (rctl_throttle_pct < 0)
|
|
rctl_throttle_pct = 100;
|
|
if (rctl_throttle_pct2 < 0)
|
|
rctl_throttle_pct2 = 100;
|
|
}
|
|
|
|
#else /* !RCTL */
|
|
|
|
int
|
|
sys_rctl_get_racct(struct thread *td, struct rctl_get_racct_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_rctl_get_rules(struct thread *td, struct rctl_get_rules_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_rctl_get_limits(struct thread *td, struct rctl_get_limits_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_rctl_add_rule(struct thread *td, struct rctl_add_rule_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
int
|
|
sys_rctl_remove_rule(struct thread *td, struct rctl_remove_rule_args *uap)
|
|
{
|
|
|
|
return (ENOSYS);
|
|
}
|
|
|
|
#endif /* !RCTL */
|