freebsd-dev/sys/kern/kern_resource.c

862 lines
20 KiB
C
Raw Normal View History

1994-05-24 10:09:53 +00:00
/*-
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
* All or some portions of this file are derived from material licensed
* to the University of California by American Telephone and Telegraph
* Co. or Unix System Laboratories, Inc. and are reproduced herein with
* the permission of UNIX System Laboratories, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by the University of
* California, Berkeley and its contributors.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)kern_resource.c 8.5 (Berkeley) 1/21/94
1999-08-28 01:08:13 +00:00
* $FreeBSD$
1994-05-24 10:09:53 +00:00
*/
#include "opt_compat.h"
#include "opt_rlimit.h"
1994-05-24 10:09:53 +00:00
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
1994-05-24 10:09:53 +00:00
#include <sys/file.h>
#include <sys/kernel.h>
1994-05-24 10:09:53 +00:00
#include <sys/resourcevar.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
1994-05-24 10:09:53 +00:00
#include <sys/proc.h>
#include <sys/time.h>
1994-05-24 10:09:53 +00:00
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <sys/lock.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
1994-05-24 10:09:53 +00:00
1998-02-09 06:11:36 +00:00
static int donice __P((struct proc *curp, struct proc *chgp, int n));
/* dosetrlimit non-static: Needed by SysVR4 emulator */
int dosetrlimit __P((struct proc *p, u_int which, struct rlimit *limp));
static MALLOC_DEFINE(M_UIDINFO, "uidinfo", "uidinfo structures");
#define UIHASH(uid) (&uihashtbl[(uid) & uihash])
static struct mtx uihashtbl_mtx;
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long uihash; /* size of hash table - 1 */
static struct uidinfo *uicreate __P((uid_t uid));
static struct uidinfo *uilookup __P((uid_t uid));
1994-05-24 10:09:53 +00:00
/*
* Resource controls and accounting.
*/
#ifndef _SYS_SYSPROTO_H_
1994-05-24 10:09:53 +00:00
struct getpriority_args {
int which;
int who;
};
#endif
int
getpriority(curp, uap)
1994-05-24 10:09:53 +00:00
struct proc *curp;
register struct getpriority_args *uap;
{
register struct proc *p;
1994-05-24 10:09:53 +00:00
register int low = PRIO_MAX + 1;
switch (uap->which) {
case PRIO_PROCESS:
if (uap->who == 0)
p = curp;
else
p = pfind(uap->who);
if (p == 0)
break;
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if (p_can(curp, p, P_CAN_SEE, NULL))
break;
1994-05-24 10:09:53 +00:00
low = p->p_nice;
break;
case PRIO_PGRP: {
register struct pgrp *pg;
if (uap->who == 0)
pg = curp->p_pgrp;
else if ((pg = pgfind(uap->who)) == NULL)
break;
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if (!p_can(curp, p, P_CAN_SEE, NULL) && p->p_nice < low)
1994-05-24 10:09:53 +00:00
low = p->p_nice;
}
break;
}
case PRIO_USER:
if (uap->who == 0)
uap->who = curp->p_ucred->cr_uid;
ALLPROC_LOCK(AP_SHARED);
LIST_FOREACH(p, &allproc, p_list)
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if (!p_can(curp, p, P_CAN_SEE, NULL) &&
p->p_ucred->cr_uid == uap->who &&
1994-05-24 10:09:53 +00:00
p->p_nice < low)
low = p->p_nice;
ALLPROC_LOCK(AP_RELEASE);
1994-05-24 10:09:53 +00:00
break;
default:
return (EINVAL);
}
if (low == PRIO_MAX + 1)
return (ESRCH);
curp->p_retval[0] = low;
1994-05-24 10:09:53 +00:00
return (0);
}
#ifndef _SYS_SYSPROTO_H_
1994-05-24 10:09:53 +00:00
struct setpriority_args {
int which;
int who;
int prio;
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
setpriority(curp, uap)
1994-05-24 10:09:53 +00:00
struct proc *curp;
register struct setpriority_args *uap;
{
register struct proc *p;
int found = 0, error = 0;
switch (uap->which) {
case PRIO_PROCESS:
if (uap->who == 0)
p = curp;
else
p = pfind(uap->who);
if (p == 0)
break;
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if (p_can(curp, p, P_CAN_SEE, NULL))
break;
1994-05-24 10:09:53 +00:00
error = donice(curp, p, uap->prio);
found++;
break;
case PRIO_PGRP: {
register struct pgrp *pg;
1995-05-30 08:16:23 +00:00
1994-05-24 10:09:53 +00:00
if (uap->who == 0)
pg = curp->p_pgrp;
else if ((pg = pgfind(uap->who)) == NULL)
break;
LIST_FOREACH(p, &pg->pg_members, p_pglist) {
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if (!p_can(curp, p, P_CAN_SEE, NULL)) {
error = donice(curp, p, uap->prio);
found++;
}
1994-05-24 10:09:53 +00:00
}
break;
}
case PRIO_USER:
if (uap->who == 0)
uap->who = curp->p_ucred->cr_uid;
ALLPROC_LOCK(AP_SHARED);
LIST_FOREACH(p, &allproc, p_list)
if (p->p_ucred->cr_uid == uap->who &&
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
!p_can(curp, p, P_CAN_SEE, NULL)) {
1994-05-24 10:09:53 +00:00
error = donice(curp, p, uap->prio);
found++;
}
ALLPROC_LOCK(AP_RELEASE);
1994-05-24 10:09:53 +00:00
break;
default:
return (EINVAL);
}
if (found == 0)
return (ESRCH);
return (error);
}
1998-02-09 06:11:36 +00:00
static int
1994-05-24 10:09:53 +00:00
donice(curp, chgp, n)
register struct proc *curp, *chgp;
register int n;
{
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
int error;
1994-05-24 10:09:53 +00:00
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if ((error = p_can(curp, chgp, P_CAN_SCHED, NULL)))
return (error);
1994-05-24 10:09:53 +00:00
if (n > PRIO_MAX)
n = PRIO_MAX;
if (n < PRIO_MIN)
n = PRIO_MIN;
if (n < chgp->p_nice && suser(curp))
1994-05-24 10:09:53 +00:00
return (EACCES);
chgp->p_nice = n;
(void)resetpriority(chgp);
return (0);
}
/* rtprio system call */
#ifndef _SYS_SYSPROTO_H_
struct rtprio_args {
int function;
pid_t pid;
struct rtprio *rtp;
};
#endif
/*
* Set realtime priority
*/
/* ARGSUSED */
int
rtprio(curp, uap)
struct proc *curp;
register struct rtprio_args *uap;
{
register struct proc *p;
struct rtprio rtp;
int error;
error = copyin(uap->rtp, &rtp, sizeof(struct rtprio));
if (error)
return (error);
if (uap->pid == 0)
p = curp;
else
p = pfind(uap->pid);
if (p == 0)
return (ESRCH);
switch (uap->function) {
case RTP_LOOKUP:
return (copyout(&p->p_rtprio, uap->rtp, sizeof(struct rtprio)));
case RTP_SET:
o Centralize inter-process access control, introducing: int p_can(p1, p2, operation, privused) which allows specification of subject process, object process, inter-process operation, and an optional call-by-reference privused flag, allowing the caller to determine if privilege was required for the call to succeed. This allows jail, kern.ps_showallprocs and regular credential-based interaction checks to occur in one block of code. Possible operations are P_CAN_SEE, P_CAN_SCHED, P_CAN_KILL, and P_CAN_DEBUG. p_can currently breaks out as a wrapper to a series of static function checks in kern_prot, which should not be invoked directly. o Commented out capabilities entries are included for some checks. o Update most inter-process authorization to make use of p_can() instead of manual checks, PRISON_CHECK(), P_TRESPASS(), and kern.ps_showallprocs. o Modify suser{,_xxx} to use const arguments, as it no longer modifies process flags due to the disabling of ASU. o Modify some checks/errors in procfs so that ENOENT is returned instead of ESRCH, further improving concealment of processes that should not be visible to other processes. Also introduce new access checks to improve hiding of processes for procfs_lookup(), procfs_getattr(), procfs_readdir(). Correct a bug reported by bp concerning not handling the CREATE case in procfs_lookup(). Remove volatile flag in procfs that caused apparently spurious qualifier warnigns (approved by bde). o Add comment noting that ktrace() has not been updated, as its access control checks are different from ptrace(), whereas they should probably be the same. Further discussion should happen on this topic. Reviewed by: bde, green, phk, freebsd-security, others Approved by: bde Obtained from: TrustedBSD Project
2000-08-30 04:49:09 +00:00
if ((error = p_can(curp, p, P_CAN_SCHED, NULL)))
return (error);
/* disallow setting rtprio in most cases if not superuser */
if (suser(curp) != 0) {
/* can't set someone else's */
if (uap->pid)
1995-05-30 08:16:23 +00:00
return (EPERM);
/* can't set realtime priority */
/*
* Realtime priority has to be restricted for reasons which should be
* obvious. However, for idle priority, there is a potential for
* system deadlock if an idleprio process gains a lock on a resource
* that other processes need (and the idleprio process can't run
* due to a CPU-bound normal process). Fix me! XXX
*/
#if 0
if (RTP_PRIO_IS_REALTIME(rtp.type))
#endif
if (rtp.type != RTP_PRIO_NORMAL)
return (EPERM);
}
switch (rtp.type) {
#ifdef RTP_PRIO_FIFO
case RTP_PRIO_FIFO:
#endif
case RTP_PRIO_REALTIME:
case RTP_PRIO_NORMAL:
case RTP_PRIO_IDLE:
if (rtp.prio > RTP_PRIO_MAX)
return (EINVAL);
p->p_rtprio = rtp;
return (0);
default:
return (EINVAL);
}
1995-05-30 08:16:23 +00:00
default:
return (EINVAL);
}
}
1994-05-24 10:09:53 +00:00
#if defined(COMPAT_43) || defined(COMPAT_SUNOS)
#ifndef _SYS_SYSPROTO_H_
struct osetrlimit_args {
1994-05-24 10:09:53 +00:00
u_int which;
struct orlimit *rlp;
1994-05-24 10:09:53 +00:00
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
osetrlimit(p, uap)
1994-05-24 10:09:53 +00:00
struct proc *p;
register struct osetrlimit_args *uap;
1994-05-24 10:09:53 +00:00
{
struct orlimit olim;
struct rlimit lim;
int error;
if ((error =
copyin((caddr_t)uap->rlp, (caddr_t)&olim, sizeof(struct orlimit))))
1994-05-24 10:09:53 +00:00
return (error);
lim.rlim_cur = olim.rlim_cur;
lim.rlim_max = olim.rlim_max;
return (dosetrlimit(p, uap->which, &lim));
}
#ifndef _SYS_SYSPROTO_H_
struct ogetrlimit_args {
1994-05-24 10:09:53 +00:00
u_int which;
struct orlimit *rlp;
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
ogetrlimit(p, uap)
1994-05-24 10:09:53 +00:00
struct proc *p;
register struct ogetrlimit_args *uap;
1994-05-24 10:09:53 +00:00
{
struct orlimit olim;
if (uap->which >= RLIM_NLIMITS)
return (EINVAL);
olim.rlim_cur = p->p_rlimit[uap->which].rlim_cur;
if (olim.rlim_cur == -1)
olim.rlim_cur = 0x7fffffff;
olim.rlim_max = p->p_rlimit[uap->which].rlim_max;
if (olim.rlim_max == -1)
olim.rlim_max = 0x7fffffff;
return (copyout((caddr_t)&olim, (caddr_t)uap->rlp, sizeof(olim)));
}
#endif /* COMPAT_43 || COMPAT_SUNOS */
#ifndef _SYS_SYSPROTO_H_
1994-05-24 10:09:53 +00:00
struct __setrlimit_args {
u_int which;
struct rlimit *rlp;
1994-05-24 10:09:53 +00:00
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
setrlimit(p, uap)
1994-05-24 10:09:53 +00:00
struct proc *p;
register struct __setrlimit_args *uap;
{
struct rlimit alim;
int error;
if ((error =
copyin((caddr_t)uap->rlp, (caddr_t)&alim, sizeof (struct rlimit))))
1994-05-24 10:09:53 +00:00
return (error);
return (dosetrlimit(p, uap->which, &alim));
}
int
1994-05-24 10:09:53 +00:00
dosetrlimit(p, which, limp)
struct proc *p;
u_int which;
struct rlimit *limp;
{
register struct rlimit *alimp;
int error;
if (which >= RLIM_NLIMITS)
return (EINVAL);
alimp = &p->p_rlimit[which];
/*
* Preserve historical bugs by treating negative limits as unsigned.
*/
if (limp->rlim_cur < 0)
limp->rlim_cur = RLIM_INFINITY;
if (limp->rlim_max < 0)
limp->rlim_max = RLIM_INFINITY;
1995-05-30 08:16:23 +00:00
if (limp->rlim_cur > alimp->rlim_max ||
1994-05-24 10:09:53 +00:00
limp->rlim_max > alimp->rlim_max)
This Implements the mumbled about "Jail" feature. This is a seriously beefed up chroot kind of thing. The process is jailed along the same lines as a chroot does it, but with additional tough restrictions imposed on what the superuser can do. For all I know, it is safe to hand over the root bit inside a prison to the customer living in that prison, this is what it was developed for in fact: "real virtual servers". Each prison has an ip number associated with it, which all IP communications will be coerced to use and each prison has its own hostname. Needless to say, you need more RAM this way, but the advantage is that each customer can run their own particular version of apache and not stomp on the toes of their neighbors. It generally does what one would expect, but setting up a jail still takes a little knowledge. A few notes: I have no scripts for setting up a jail, don't ask me for them. The IP number should be an alias on one of the interfaces. mount a /proc in each jail, it will make ps more useable. /proc/<pid>/status tells the hostname of the prison for jailed processes. Quotas are only sensible if you have a mountpoint per prison. There are no privisions for stopping resource-hogging. Some "#ifdef INET" and similar may be missing (send patches!) If somebody wants to take it from here and develop it into more of a "virtual machine" they should be most welcome! Tools, comments, patches & documentation most welcome. Have fun... Sponsored by: http://www.rndassociates.com/ Run for almost a year by: http://www.servetheweb.com/
1999-04-28 11:38:52 +00:00
if ((error = suser_xxx(0, p, PRISON_ROOT)))
1994-05-24 10:09:53 +00:00
return (error);
if (limp->rlim_cur > limp->rlim_max)
limp->rlim_cur = limp->rlim_max;
if (p->p_limit->p_refcnt > 1 &&
(p->p_limit->p_lflags & PL_SHAREMOD) == 0) {
p->p_limit->p_refcnt--;
p->p_limit = limcopy(p->p_limit);
alimp = &p->p_rlimit[which];
}
switch (which) {
case RLIMIT_CPU:
if (limp->rlim_cur > RLIM_INFINITY / (rlim_t)1000000)
p->p_limit->p_cpulimit = RLIM_INFINITY;
else
p->p_limit->p_cpulimit =
(rlim_t)1000000 * limp->rlim_cur;
break;
1994-05-24 10:09:53 +00:00
case RLIMIT_DATA:
if (limp->rlim_cur > MAXDSIZ)
limp->rlim_cur = MAXDSIZ;
if (limp->rlim_max > MAXDSIZ)
limp->rlim_max = MAXDSIZ;
1994-05-24 10:09:53 +00:00
break;
case RLIMIT_STACK:
if (limp->rlim_cur > MAXSSIZ)
limp->rlim_cur = MAXSSIZ;
if (limp->rlim_max > MAXSSIZ)
limp->rlim_max = MAXSSIZ;
1994-05-24 10:09:53 +00:00
/*
* Stack is allocated to the max at exec time with only
* "rlim_cur" bytes accessible. If stack limit is going
* up make more accessible, if going down make inaccessible.
*/
if (limp->rlim_cur != alimp->rlim_cur) {
vm_offset_t addr;
vm_size_t size;
vm_prot_t prot;
if (limp->rlim_cur > alimp->rlim_cur) {
1994-05-24 10:09:53 +00:00
prot = VM_PROT_ALL;
size = limp->rlim_cur - alimp->rlim_cur;
addr = USRSTACK - limp->rlim_cur;
} else {
prot = VM_PROT_NONE;
size = alimp->rlim_cur - limp->rlim_cur;
addr = USRSTACK - alimp->rlim_cur;
}
addr = trunc_page(addr);
size = round_page(size);
(void) vm_map_protect(&p->p_vmspace->vm_map,
addr, addr+size, prot, FALSE);
}
break;
case RLIMIT_NOFILE:
if (limp->rlim_cur > maxfilesperproc)
limp->rlim_cur = maxfilesperproc;
if (limp->rlim_max > maxfilesperproc)
limp->rlim_max = maxfilesperproc;
1994-05-24 10:09:53 +00:00
break;
case RLIMIT_NPROC:
if (limp->rlim_cur > maxprocperuid)
limp->rlim_cur = maxprocperuid;
if (limp->rlim_max > maxprocperuid)
limp->rlim_max = maxprocperuid;
1994-05-24 10:09:53 +00:00
break;
}
*alimp = *limp;
return (0);
}
#ifndef _SYS_SYSPROTO_H_
1994-05-24 10:09:53 +00:00
struct __getrlimit_args {
u_int which;
struct rlimit *rlp;
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
getrlimit(p, uap)
1994-05-24 10:09:53 +00:00
struct proc *p;
register struct __getrlimit_args *uap;
{
if (uap->which >= RLIM_NLIMITS)
return (EINVAL);
return (copyout((caddr_t)&p->p_rlimit[uap->which], (caddr_t)uap->rlp,
sizeof (struct rlimit)));
}
/*
* Transform the running time and tick information in proc p into user,
* system, and interrupt time usage.
*/
void
1994-05-24 10:09:53 +00:00
calcru(p, up, sp, ip)
struct proc *p;
struct timeval *up;
struct timeval *sp;
struct timeval *ip;
1994-05-24 10:09:53 +00:00
{
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
int s;
1994-05-24 10:09:53 +00:00
struct timeval tv;
mtx_assert(&sched_lock, MA_OWNED);
/* XXX: why spl-protect ? worst case is an off-by-one report */
1994-05-24 10:09:53 +00:00
s = splstatclock();
ut = p->p_uticks;
st = p->p_sticks;
1994-05-24 10:09:53 +00:00
it = p->p_iticks;
splx(s);
tt = ut + st + it;
if (tt == 0) {
st = 1;
tt = 1;
1994-05-24 10:09:53 +00:00
}
tu = p->p_runtime;
if (p == curproc) {
1994-05-24 10:09:53 +00:00
/*
* Adjust for the current time slice. This is actually fairly
* important since the error here is on the order of a time
* quantum, which is much greater than the sampling error.
*/
microuptime(&tv);
if (timevalcmp(&tv, PCPU_PTR(switchtime), <))
2000-06-10 19:21:20 +00:00
printf("microuptime() went backwards (%ld.%06ld -> %ld.%06ld)\n",
PCPU_GET(switchtime.tv_sec), PCPU_GET(switchtime.tv_usec),
tv.tv_sec, tv.tv_usec);
else
tu += (tv.tv_usec - PCPU_GET(switchtime.tv_usec)) +
(tv.tv_sec - PCPU_GET(switchtime.tv_sec)) *
(int64_t)1000000;
1994-05-24 10:09:53 +00:00
}
ptu = p->p_uu + p->p_su + p->p_iu;
if (tu < ptu || (int64_t)tu < 0) {
/* XXX no %qd in kernel. Truncate. */
printf("calcru: negative time of %ld usec for pid %d (%s)\n",
(long)tu, p->p_pid, p->p_comm);
tu = ptu;
}
/* Subdivide tu. */
uu = (tu * ut) / tt;
su = (tu * st) / tt;
iu = tu - uu - su;
/* Enforce monotonicity. */
if (uu < p->p_uu || su < p->p_su || iu < p->p_iu) {
if (uu < p->p_uu)
uu = p->p_uu;
else if (uu + p->p_su + p->p_iu > tu)
uu = tu - p->p_su - p->p_iu;
if (st == 0)
su = p->p_su;
else {
su = ((tu - uu) * st) / (st + it);
if (su < p->p_su)
su = p->p_su;
else if (uu + su + p->p_iu > tu)
su = tu - uu - p->p_iu;
}
KASSERT(uu + su + p->p_iu <= tu,
("calcru: monotonisation botch 1"));
iu = tu - uu - su;
KASSERT(iu >= p->p_iu,
("calcru: monotonisation botch 2"));
}
p->p_uu = uu;
p->p_su = su;
p->p_iu = iu;
up->tv_sec = uu / 1000000;
up->tv_usec = uu % 1000000;
sp->tv_sec = su / 1000000;
sp->tv_usec = su % 1000000;
1994-05-24 10:09:53 +00:00
if (ip != NULL) {
ip->tv_sec = iu / 1000000;
ip->tv_usec = iu % 1000000;
1994-05-24 10:09:53 +00:00
}
}
#ifndef _SYS_SYSPROTO_H_
1994-05-24 10:09:53 +00:00
struct getrusage_args {
int who;
struct rusage *rusage;
};
#endif
1994-05-24 10:09:53 +00:00
/* ARGSUSED */
int
getrusage(p, uap)
1994-05-24 10:09:53 +00:00
register struct proc *p;
register struct getrusage_args *uap;
{
register struct rusage *rup;
switch (uap->who) {
case RUSAGE_SELF:
rup = &p->p_stats->p_ru;
mtx_enter(&sched_lock, MTX_SPIN);
1994-05-24 10:09:53 +00:00
calcru(p, &rup->ru_utime, &rup->ru_stime, NULL);
mtx_exit(&sched_lock, MTX_SPIN);
1994-05-24 10:09:53 +00:00
break;
case RUSAGE_CHILDREN:
rup = &p->p_stats->p_cru;
break;
default:
return (EINVAL);
}
return (copyout((caddr_t)rup, (caddr_t)uap->rusage,
sizeof (struct rusage)));
}
void
1994-05-24 10:09:53 +00:00
ruadd(ru, ru2)
register struct rusage *ru, *ru2;
{
register long *ip, *ip2;
register int i;
timevaladd(&ru->ru_utime, &ru2->ru_utime);
timevaladd(&ru->ru_stime, &ru2->ru_stime);
if (ru->ru_maxrss < ru2->ru_maxrss)
ru->ru_maxrss = ru2->ru_maxrss;
ip = &ru->ru_first; ip2 = &ru2->ru_first;
for (i = &ru->ru_last - &ru->ru_first; i >= 0; i--)
*ip++ += *ip2++;
}
/*
* Make a copy of the plimit structure.
* We share these structures copy-on-write after fork,
* and copy when a limit is changed.
*/
struct plimit *
limcopy(lim)
struct plimit *lim;
{
register struct plimit *copy;
MALLOC(copy, struct plimit *, sizeof(struct plimit),
M_SUBPROC, M_WAITOK);
bcopy(lim->pl_rlimit, copy->pl_rlimit, sizeof(struct plimit));
1994-05-24 10:09:53 +00:00
copy->p_lflags = 0;
copy->p_refcnt = 1;
return (copy);
}
/*
* Find the uidinfo structure for a uid. This structure is used to
* track the total resource consumption (process count, socket buffer
* size, etc.) for the uid and impose limits.
*/
void
uihashinit()
{
uihashtbl = hashinit(maxproc / 16, M_UIDINFO, &uihash);
mtx_init(&uihashtbl_mtx, "uidinfo hash", MTX_DEF);
}
/*
* lookup a uidinfo struct for the parameter uid.
* uihashtbl_mtx must be locked.
*/
static struct uidinfo *
uilookup(uid)
uid_t uid;
{
struct uihashhead *uipp;
struct uidinfo *uip;
mtx_assert(&uihashtbl_mtx, MA_OWNED);
uipp = UIHASH(uid);
LIST_FOREACH(uip, uipp, ui_hash)
if (uip->ui_uid == uid)
break;
return (uip);
}
/*
* Create a uidinfo struct for the parameter uid.
* uihashtbl_mtx must be locked.
*/
static struct uidinfo *
uicreate(uid)
uid_t uid;
{
struct uidinfo *uip;
mtx_assert(&uihashtbl_mtx, MA_OWNED);
MALLOC(uip, struct uidinfo *, sizeof(*uip), M_UIDINFO,
M_WAITOK | M_ZERO);
LIST_INSERT_HEAD(UIHASH(uid), uip, ui_hash);
uip->ui_uid = uid;
mtx_init(&uip->ui_mtx, "uidinfo struct", MTX_DEF);
return (uip);
}
/*
* Find or allocate a struct uidinfo for a particular uid.
* Increase refcount on uidinfo struct returned.
* uifree() should be called on a struct uidinfo when released.
*/
struct uidinfo *
uifind(uid)
uid_t uid;
{
struct uidinfo *uip;
mtx_enter(&uihashtbl_mtx, MTX_DEF);
uip = uilookup(uid);
if (uip == NULL)
uip = uicreate(uid);
uihold(uip);
mtx_exit(&uihashtbl_mtx, MTX_DEF);
return (uip);
}
/*
* Place another refcount on a uidinfo struct.
*/
void
uihold(uip)
struct uidinfo *uip;
{
mtx_enter(&uip->ui_mtx, MTX_DEF);
uip->ui_ref++;
mtx_exit(&uip->ui_mtx, MTX_DEF);
}
/*-
* Since uidinfo structs have a long lifetime, we use an
* opportunistic refcounting scheme to avoid locking the lookup hash
* for each release.
*
* If the refcount hits 0, we need to free the structure,
* which means we need to lock the hash.
* Optimal case:
* After locking the struct and lowering the refcount, if we find
* that we don't need to free, simply unlock and return.
* Suboptimal case:
* If refcount lowering results in need to free, bump the count
* back up, loose the lock and aquire the locks in the proper
* order to try again.
*/
void
uifree(uip)
struct uidinfo *uip;
{
/* Prepare for optimal case. */
mtx_enter(&uip->ui_mtx, MTX_DEF);
if (--uip->ui_ref != 0) {
mtx_exit(&uip->ui_mtx, MTX_DEF);
return;
}
/* Prepare for suboptimal case. */
uip->ui_ref++;
mtx_exit(&uip->ui_mtx, MTX_DEF);
mtx_enter(&uihashtbl_mtx, MTX_DEF);
mtx_enter(&uip->ui_mtx, MTX_DEF);
/*
* We must subtract one from the count again because we backed out
* our initial subtraction before dropping the lock.
* Since another thread may have added a reference after we dropped the
* initial lock we have to test for zero again.
*/
if (--uip->ui_ref == 0) {
LIST_REMOVE(uip, ui_hash);
mtx_exit(&uihashtbl_mtx, MTX_DEF);
if (uip->ui_sbsize != 0)
/* XXX no %qd in kernel. Truncate. */
printf("freeing uidinfo: uid = %d, sbsize = %ld\n",
uip->ui_uid, (long)uip->ui_sbsize);
if (uip->ui_proccnt != 0)
printf("freeing uidinfo: uid = %d, proccnt = %ld\n",
uip->ui_uid, uip->ui_proccnt);
mtx_destroy(&uip->ui_mtx);
FREE(uip, M_UIDINFO);
return;
}
mtx_exit(&uihashtbl_mtx, MTX_DEF);
mtx_exit(&uip->ui_mtx, MTX_DEF);
}
/*
* Change the count associated with number of processes
* a given user is using. When 'max' is 0, don't enforce a limit
*/
int
chgproccnt(uip, diff, max)
struct uidinfo *uip;
int diff;
int max;
{
mtx_enter(&uip->ui_mtx, MTX_DEF);
/* don't allow them to exceed max, but allow subtraction */
if (diff > 0 && uip->ui_proccnt + diff > max && max != 0) {
mtx_exit(&uip->ui_mtx, MTX_DEF);
return (0);
}
uip->ui_proccnt += diff;
if (uip->ui_proccnt < 0)
printf("negative proccnt for uid = %d\n", uip->ui_uid);
mtx_exit(&uip->ui_mtx, MTX_DEF);
return (1);
}
/*
* Change the total socket buffer size a user has used.
*/
int
chgsbsize(uip, hiwat, to, max)
struct uidinfo *uip;
u_long *hiwat;
u_long to;
rlim_t max;
{
rlim_t new;
int s;
s = splnet();
mtx_enter(&uip->ui_mtx, MTX_DEF);
new = uip->ui_sbsize + to - *hiwat;
/* don't allow them to exceed max, but allow subtraction */
if (to > *hiwat && new > max) {
splx(s);
mtx_exit(&uip->ui_mtx, MTX_DEF);
return (0);
}
uip->ui_sbsize = new;
*hiwat = to;
if (uip->ui_sbsize < 0)
printf("negative sbsize for uid = %d\n", uip->ui_uid);
splx(s);
mtx_exit(&uip->ui_mtx, MTX_DEF);
return (1);
}