Add hierarchical jails. A jail may further virtualize its environment

by creating a child jail, which is visible to that jail and to any
parent jails.  Child jails may be restricted more than their parents,
but never less.  Jail names reflect this hierarchy, being MIB-style
dot-separated strings.

Every thread now points to a jail, the default being prison0, which
contains information about the physical system.  Prison0's root
directory is the same as rootvnode; its hostname is the same as the
global hostname, and its securelevel replaces the global securelevel.
Note that the variable "securelevel" has actually gone away, which
should not cause any problems for code that properly uses
securelevel_gt() and securelevel_ge().

Some jail-related permissions that were kept in global variables and
set via sysctls are now per-jail settings.  The sysctls still exist for
backward compatibility, used only by the now-deprecated jail(2) system
call.

Approved by:	bz (mentor)
This commit is contained in:
jamie 2009-05-27 14:11:23 +00:00
parent 6e53147404
commit a013e0afcb
38 changed files with 2108 additions and 1075 deletions

View File

@ -22,6 +22,10 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20090527:
Add support for hierarchical jails. Remove global securelevel.
Bump __FreeBSD_version to 800091.
20090523:
The layout of struct vnet_net has changed, therefore modules
need to be rebuilt.

View File

@ -25,7 +25,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd April 29, 2009
.Dd May 27, 2009
.Dt JAIL 2
.Os
.Sh NAME
@ -283,7 +283,7 @@ of the jail for the given address family.
It is possible to identify a process as jailed by examining
.Dq Li /proc/<pid>/status :
it will show a field near the end of the line, either as
a single hyphen for a process at large, or the hostname currently
a single hyphen for a process at large, or the name currently
set for the prison for jailed processes.
.Sh ERRORS
The
@ -292,7 +292,10 @@ system call
will fail if:
.Bl -tag -width Er
.It Bq Er EPERM
This process is not allowed to create a jail.
This process is not allowed to create a jail, either because it is not
the super-user, or because it is in a jail where the
.Va allow.jails
parameter is not set.
.It Bq Er EFAULT
.Fa jail
points to an address outside the allocated address space of the process.
@ -308,7 +311,10 @@ system call
will fail if:
.Bl -tag -width Er
.It Bq Er EPERM
This process is not allowed to create a jail.
This process is not allowed to create a jail, either because it is not
the super-user, or because it is in a jail where the
.Va allow.jails
parameter is not set.
.It Bq Er EPERM
A jail parameter was set to a less restrictive value then the current
environment.
@ -324,6 +330,11 @@ or
parameter does not exist, and the
.Dv JAIL_CREATE
flag is not set.
.It Bq Er ENOENT
The jail referred to by a
.Va jid
is not accessible by the process, because the process is in a different
jail.
.It Bq Er EEXIST
The jail referred to by a
.Va jid
@ -368,6 +379,11 @@ or
.Va name
parameter does not exist.
.It Bq Er ENOENT
The jail referred to by a
.Va jid
is not accessible by the process, because the process is in a different
jail.
.It Bq Er ENOENT
The
.Va lastjid
parameter is greater than the highest current jail ID.
@ -429,4 +445,4 @@ for R&D Associates
who contributed it to
.Fx .
.An James Gritton
added the extensible jail parameters.
added the extensible jail parameters and hierarchical jails.

View File

@ -112,8 +112,6 @@ CTASSERT(sizeof(struct msghdr32) == 28);
CTASSERT(sizeof(struct stat32) == 96);
CTASSERT(sizeof(struct sigaction32) == 24);
extern int jail_max_af_ips;
static int freebsd32_kevent_copyout(void *arg, struct kevent *kevp, int count);
static int freebsd32_kevent_copyin(void *arg, struct kevent *kevp, int count);
@ -2044,17 +2042,9 @@ freebsd32_sysctl(struct thread *td, struct freebsd32_sysctl_args *uap)
int
freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
{
struct iovec optiov[10];
struct uio opt;
char *u_path, *u_hostname, *u_name;
#ifdef INET
struct in_addr *u_ip4;
#endif
#ifdef INET6
struct in6_addr *u_ip6;
#endif
uint32_t version;
int error;
struct jail j;
error = copyin(uap->jail, &version, sizeof(uint32_t));
if (error)
@ -2066,45 +2056,14 @@ freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
/* FreeBSD single IPv4 jails. */
struct jail32_v0 j32_v0;
bzero(&j, sizeof(struct jail));
error = copyin(uap->jail, &j32_v0, sizeof(struct jail32_v0));
if (error)
return (error);
u_path = malloc(MAXPATHLEN + MAXHOSTNAMELEN, M_TEMP, M_WAITOK);
u_hostname = u_path + MAXPATHLEN;
opt.uio_iov = optiov;
opt.uio_iovcnt = 4;
opt.uio_offset = -1;
opt.uio_resid = -1;
opt.uio_segflg = UIO_SYSSPACE;
opt.uio_rw = UIO_READ;
opt.uio_td = td;
optiov[0].iov_base = "path";
optiov[0].iov_len = sizeof("path");
optiov[1].iov_base = u_path;
error = copyinstr(PTRIN(j32_v0.path), u_path, MAXPATHLEN,
&optiov[1].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
optiov[2].iov_base = "host.hostname";
optiov[2].iov_len = sizeof("host.hostname");
optiov[3].iov_base = u_hostname;
error = copyinstr(PTRIN(j32_v0.hostname), u_hostname,
MAXHOSTNAMELEN, &optiov[3].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
#ifdef INET
optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
opt.uio_iovcnt++;
optiov[opt.uio_iovcnt].iov_base = &j32_v0.ip_number;
j32_v0.ip_number = htonl(j32_v0.ip_number);
optiov[opt.uio_iovcnt].iov_len = sizeof(j32_v0.ip_number);
opt.uio_iovcnt++;
#endif
CP(j32_v0, j, version);
PTRIN_CP(j32_v0, j, path);
PTRIN_CP(j32_v0, j, hostname);
j.ip4s = j32_v0.ip_number;
break;
}
@ -2119,109 +2078,18 @@ freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
{
/* FreeBSD multi-IPv4/IPv6,noIP jails. */
struct jail32 j32;
size_t tmplen;
error = copyin(uap->jail, &j32, sizeof(struct jail32));
if (error)
return (error);
tmplen = MAXPATHLEN + MAXHOSTNAMELEN + MAXHOSTNAMELEN;
#ifdef INET
if (j32.ip4s > jail_max_af_ips)
return (EINVAL);
tmplen += j32.ip4s * sizeof(struct in_addr);
#else
if (j32.ip4s > 0)
return (EINVAL);
#endif
#ifdef INET6
if (j32.ip6s > jail_max_af_ips)
return (EINVAL);
tmplen += j32.ip6s * sizeof(struct in6_addr);
#else
if (j32.ip6s > 0)
return (EINVAL);
#endif
u_path = malloc(tmplen, M_TEMP, M_WAITOK);
u_hostname = u_path + MAXPATHLEN;
u_name = u_hostname + MAXHOSTNAMELEN;
#ifdef INET
u_ip4 = (struct in_addr *)(u_name + MAXHOSTNAMELEN);
#endif
#ifdef INET6
#ifdef INET
u_ip6 = (struct in6_addr *)(u_ip4 + j32.ip4s);
#else
u_ip6 = (struct in6_addr *)(u_name + MAXHOSTNAMELEN);
#endif
#endif
opt.uio_iov = optiov;
opt.uio_iovcnt = 4;
opt.uio_offset = -1;
opt.uio_resid = -1;
opt.uio_segflg = UIO_SYSSPACE;
opt.uio_rw = UIO_READ;
opt.uio_td = td;
optiov[0].iov_base = "path";
optiov[0].iov_len = sizeof("path");
optiov[1].iov_base = u_path;
error = copyinstr(PTRIN(j32.path), u_path, MAXPATHLEN,
&optiov[1].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
optiov[2].iov_base = "host.hostname";
optiov[2].iov_len = sizeof("host.hostname");
optiov[3].iov_base = u_hostname;
error = copyinstr(PTRIN(j32.hostname), u_hostname,
MAXHOSTNAMELEN, &optiov[3].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
if (PTRIN(j32.jailname) != NULL) {
optiov[opt.uio_iovcnt].iov_base = "name";
optiov[opt.uio_iovcnt].iov_len = sizeof("name");
opt.uio_iovcnt++;
optiov[opt.uio_iovcnt].iov_base = u_name;
error = copyinstr(PTRIN(j32.jailname), u_name,
MAXHOSTNAMELEN, &optiov[opt.uio_iovcnt].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
opt.uio_iovcnt++;
}
#ifdef INET
optiov[opt.uio_iovcnt].iov_base = "ip4.addr";
optiov[opt.uio_iovcnt].iov_len = sizeof("ip4.addr");
opt.uio_iovcnt++;
optiov[opt.uio_iovcnt].iov_base = u_ip4;
optiov[opt.uio_iovcnt].iov_len =
j32.ip4s * sizeof(struct in_addr);
error = copyin(PTRIN(j32.ip4), u_ip4,
optiov[opt.uio_iovcnt].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
opt.uio_iovcnt++;
#endif
#ifdef INET6
optiov[opt.uio_iovcnt].iov_base = "ip6.addr";
optiov[opt.uio_iovcnt].iov_len = sizeof("ip6.addr");
opt.uio_iovcnt++;
optiov[opt.uio_iovcnt].iov_base = u_ip6;
optiov[opt.uio_iovcnt].iov_len =
j32.ip6s * sizeof(struct in6_addr);
error = copyin(PTRIN(j32.ip6), u_ip6,
optiov[opt.uio_iovcnt].iov_len);
if (error) {
free(u_path, M_TEMP);
return (error);
}
opt.uio_iovcnt++;
#endif
CP(j32, j, version);
PTRIN_CP(j32, j, path);
PTRIN_CP(j32, j, hostname);
PTRIN_CP(j32, j, jailname);
CP(j32, j, ip4s);
CP(j32, j, ip6s);
PTRIN_CP(j32, j, ip4);
PTRIN_CP(j32, j, ip6);
break;
}
@ -2229,9 +2097,7 @@ freebsd32_jail(struct thread *td, struct freebsd32_jail_args *uap)
/* Sci-Fi jails are not supported, sorry. */
return (EINVAL);
}
error = kern_jail_set(td, &opt, JAIL_CREATE | JAIL_ATTACH);
free(u_path, M_TEMP);
return (error);
return (kern_jail(td, &j));
}
int

View File

@ -57,16 +57,18 @@ struct linux_prison {
int pr_osrel;
};
static struct linux_prison lprison0 = {
.pr_osname = "Linux",
.pr_osrelease = "2.6.16",
.pr_oss_version = 0x030600,
.pr_osrel = 2006016
};
static unsigned linux_osd_jail_slot;
SYSCTL_NODE(_compat, OID_AUTO, linux, CTLFLAG_RW, 0,
"Linux mode");
static struct mtx osname_lock;
MTX_SYSINIT(linux_osname, &osname_lock, "linux osname", MTX_DEF);
static char linux_osname[LINUX_MAX_UTSNAME] = "Linux";
static int
linux_sysctl_osname(SYSCTL_HANDLER_ARGS)
{
@ -86,9 +88,6 @@ SYSCTL_PROC(_compat_linux, OID_AUTO, osname,
0, 0, linux_sysctl_osname, "A",
"Linux kernel OS name");
static char linux_osrelease[LINUX_MAX_UTSNAME] = "2.6.16";
static int linux_osrel = 2006016;
static int
linux_sysctl_osrelease(SYSCTL_HANDLER_ARGS)
{
@ -108,8 +107,6 @@ SYSCTL_PROC(_compat_linux, OID_AUTO, osrelease,
0, 0, linux_sysctl_osrelease, "A",
"Linux kernel OS release");
static int linux_oss_version = 0x030600;
static int
linux_sysctl_oss_version(SYSCTL_HANDLER_ARGS)
{
@ -161,69 +158,74 @@ linux_map_osrel(char *osrelease, int *osrel)
}
/*
* Returns holding the prison mutex if return non-NULL.
* Find a prison with Linux info.
* Return the Linux info and the (locked) prison.
*/
static struct linux_prison *
linux_get_prison(struct thread *td, struct prison **prp)
linux_find_prison(struct prison *spr, struct prison **prp)
{
struct prison *pr;
struct linux_prison *lpr;
KASSERT(td == curthread, ("linux_get_prison() called on !curthread"));
*prp = pr = td->td_ucred->cr_prison;
if (pr == NULL || !linux_osd_jail_slot)
return (NULL);
mtx_lock(&pr->pr_mtx);
lpr = osd_jail_get(pr, linux_osd_jail_slot);
if (lpr == NULL)
if (!linux_osd_jail_slot)
/* In case osd_register failed. */
spr = &prison0;
for (pr = spr;; pr = pr->pr_parent) {
mtx_lock(&pr->pr_mtx);
lpr = (pr == &prison0)
? &lprison0
: osd_jail_get(pr, linux_osd_jail_slot);
if (lpr != NULL)
break;
mtx_unlock(&pr->pr_mtx);
}
*prp = pr;
return (lpr);
}
/*
* Ensure a prison has its own Linux info. The prison should be locked on
* entrance and will be locked on exit (though it may get unlocked in the
* interrim).
* Ensure a prison has its own Linux info. If lprp is non-null, point it to
* the Linux info and lock the prison.
*/
static int
linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
{
struct prison *ppr;
struct linux_prison *lpr, *nlpr;
int error;
/* If this prison already has Linux info, return that. */
error = 0;
mtx_assert(&pr->pr_mtx, MA_OWNED);
lpr = osd_jail_get(pr, linux_osd_jail_slot);
if (lpr != NULL)
lpr = linux_find_prison(pr, &ppr);
if (ppr == pr)
goto done;
/*
* Allocate a new info record. Then check again, in case something
* changed during the allocation.
*/
mtx_unlock(&pr->pr_mtx);
mtx_unlock(&ppr->pr_mtx);
nlpr = malloc(sizeof(struct linux_prison), M_PRISON, M_WAITOK);
mtx_lock(&pr->pr_mtx);
lpr = osd_jail_get(pr, linux_osd_jail_slot);
if (lpr != NULL) {
lpr = linux_find_prison(pr, &ppr);
if (ppr == pr) {
free(nlpr, M_PRISON);
goto done;
}
/* Inherit the initial values from the ancestor. */
mtx_lock(&pr->pr_mtx);
error = osd_jail_set(pr, linux_osd_jail_slot, nlpr);
if (error)
free(nlpr, M_PRISON);
else {
if (error == 0) {
bcopy(lpr, nlpr, sizeof(*lpr));
lpr = nlpr;
mtx_lock(&osname_lock);
strncpy(lpr->pr_osname, linux_osname, LINUX_MAX_UTSNAME);
strncpy(lpr->pr_osrelease, linux_osrelease, LINUX_MAX_UTSNAME);
lpr->pr_oss_version = linux_oss_version;
lpr->pr_osrel = linux_osrel;
mtx_unlock(&osname_lock);
} else {
free(nlpr, M_PRISON);
lpr = NULL;
}
done:
mtx_unlock(&ppr->pr_mtx);
done:
if (lprp != NULL)
*lprp = lpr;
else
mtx_unlock(&pr->pr_mtx);
return (error);
}
@ -233,7 +235,6 @@ linux_alloc_prison(struct prison *pr, struct linux_prison **lprp)
static int
linux_prison_create(void *obj, void *data)
{
int error;
struct prison *pr = obj;
struct vfsoptlist *opts = data;
@ -243,10 +244,7 @@ linux_prison_create(void *obj, void *data)
* Inherit a prison's initial values from its parent
* (different from NULL which also inherits changes).
*/
mtx_lock(&pr->pr_mtx);
error = linux_alloc_prison(pr, NULL);
mtx_unlock(&pr->pr_mtx);
return (error);
return linux_alloc_prison(pr, NULL);
}
static int
@ -254,7 +252,7 @@ linux_prison_check(void *obj __unused, void *data)
{
struct vfsoptlist *opts = data;
char *osname, *osrelease;
int error, len, oss_version;
int error, len, osrel, oss_version;
/* Check that the parameters are correct. */
(void)vfs_flagopt(opts, "linux", NULL, 0);
@ -280,6 +278,11 @@ linux_prison_check(void *obj __unused, void *data)
vfs_opterror(opts, "linux.osrelease too long");
return (ENAMETOOLONG);
}
error = linux_map_osrel(osrelease, &osrel);
if (error != 0) {
vfs_opterror(opts, "linux.osrelease format error");
return (error);
}
}
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
@ -310,7 +313,7 @@ linux_prison_set(void *obj, void *data)
yeslinux = 1;
error = vfs_copyopt(opts, "linux.oss_version", &oss_version,
sizeof(oss_version));
gotversion = error == 0;
gotversion = (error == 0);
yeslinux |= gotversion;
if (nolinux) {
/* "nolinux": inherit the parent's Linux info. */
@ -322,7 +325,6 @@ linux_prison_set(void *obj, void *data)
* "linux" or "linux.*":
* the prison gets its own Linux info.
*/
mtx_lock(&pr->pr_mtx);
error = linux_alloc_prison(pr, &lpr);
if (error) {
mtx_unlock(&pr->pr_mtx);
@ -360,14 +362,16 @@ static int
linux_prison_get(void *obj, void *data)
{
struct linux_prison *lpr;
struct prison *ppr;
struct prison *pr = obj;
struct vfsoptlist *opts = data;
int error, i;
mtx_lock(&pr->pr_mtx);
/* Tell whether this prison has its own Linux info. */
lpr = osd_jail_get(pr, linux_osd_jail_slot);
i = lpr != NULL;
static int version0;
/* See if this prison is the one with the Linux info. */
lpr = linux_find_prison(pr, &ppr);
i = (ppr == pr);
error = vfs_setopt(opts, "linux", &i, sizeof(i));
if (error != 0 && error != ENOENT)
goto done;
@ -375,11 +379,22 @@ linux_prison_get(void *obj, void *data)
error = vfs_setopt(opts, "nolinux", &i, sizeof(i));
if (error != 0 && error != ENOENT)
goto done;
/*
* It's kind of bogus to give the root info, but leave it to the caller
* to check the above flag.
*/
if (lpr != NULL) {
if (i) {
/*
* If this prison is inheriting its Linux info, report
* empty/zero parameters.
*/
error = vfs_setopts(opts, "linux.osname", "");
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopts(opts, "linux.osrelease", "");
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopt(opts, "linux.oss_version", &version0,
sizeof(lpr->pr_oss_version));
if (error != 0 && error != ENOENT)
goto done;
} else {
error = vfs_setopts(opts, "linux.osname", lpr->pr_osname);
if (error != 0 && error != ENOENT)
goto done;
@ -390,24 +405,11 @@ linux_prison_get(void *obj, void *data)
&lpr->pr_oss_version, sizeof(lpr->pr_oss_version));
if (error != 0 && error != ENOENT)
goto done;
} else {
mtx_lock(&osname_lock);
error = vfs_setopts(opts, "linux.osname", linux_osname);
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopts(opts, "linux.osrelease", linux_osrelease);
if (error != 0 && error != ENOENT)
goto done;
error = vfs_setopt(opts, "linux.oss_version",
&linux_oss_version, sizeof(linux_oss_version));
if (error != 0 && error != ENOENT)
goto done;
mtx_unlock(&osname_lock);
}
error = 0;
done:
mtx_unlock(&pr->pr_mtx);
mtx_unlock(&ppr->pr_mtx);
return (error);
}
@ -434,11 +436,8 @@ linux_osd_jail_register(void)
if (linux_osd_jail_slot > 0) {
/* Copy the system linux info to any current prisons. */
sx_xlock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list) {
mtx_lock(&pr->pr_mtx);
TAILQ_FOREACH(pr, &allprison, pr_list)
(void)linux_alloc_prison(pr, NULL);
mtx_unlock(&pr->pr_mtx);
}
sx_xunlock(&allprison_lock);
}
}
@ -457,15 +456,9 @@ linux_get_osname(struct thread *td, char *dst)
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&osname_lock);
bcopy(linux_osname, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&osname_lock);
}
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osname, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
}
int
@ -474,16 +467,9 @@ linux_set_osname(struct thread *td, char *osname)
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&osname_lock);
strcpy(linux_osname, osname);
mtx_unlock(&osname_lock);
}
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
strlcpy(lpr->pr_osname, osname, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
return (0);
}
@ -493,15 +479,9 @@ linux_get_osrelease(struct thread *td, char *dst)
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&osname_lock);
bcopy(linux_osrelease, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&osname_lock);
}
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
bcopy(lpr->pr_osrelease, dst, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
}
int
@ -511,12 +491,9 @@ linux_kernver(struct thread *td)
struct linux_prison *lpr;
int osrel;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
osrel = lpr->pr_osrel;
mtx_unlock(&pr->pr_mtx);
} else
osrel = linux_osrel;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
osrel = lpr->pr_osrel;
mtx_unlock(&pr->pr_mtx);
return (osrel);
}
@ -527,27 +504,12 @@ linux_set_osrelease(struct thread *td, char *osrelease)
struct linux_prison *lpr;
int error;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
error = linux_map_osrel(osrelease, &lpr->pr_osrel);
if (error) {
mtx_unlock(&pr->pr_mtx);
return (error);
}
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
error = linux_map_osrel(osrelease, &lpr->pr_osrel);
if (error == 0)
strlcpy(lpr->pr_osrelease, osrelease, LINUX_MAX_UTSNAME);
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&osname_lock);
error = linux_map_osrel(osrelease, &linux_osrel);
if (error) {
mtx_unlock(&osname_lock);
return (error);
}
strcpy(linux_osrelease, osrelease);
mtx_unlock(&osname_lock);
}
return (0);
mtx_unlock(&pr->pr_mtx);
return (error);
}
int
@ -557,12 +519,9 @@ linux_get_oss_version(struct thread *td)
struct linux_prison *lpr;
int version;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
version = lpr->pr_oss_version;
mtx_unlock(&pr->pr_mtx);
} else
version = linux_oss_version;
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
version = lpr->pr_oss_version;
mtx_unlock(&pr->pr_mtx);
return (version);
}
@ -572,16 +531,9 @@ linux_set_oss_version(struct thread *td, int oss_version)
struct prison *pr;
struct linux_prison *lpr;
lpr = linux_get_prison(td, &pr);
if (lpr != NULL) {
lpr->pr_oss_version = oss_version;
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&osname_lock);
linux_oss_version = oss_version;
mtx_unlock(&osname_lock);
}
lpr = linux_find_prison(td->td_ucred->cr_prison, &pr);
lpr->pr_oss_version = oss_version;
mtx_unlock(&pr->pr_mtx);
return (0);
}

View File

@ -318,8 +318,10 @@ int iplioctl(dev, cmd, data, mode
# if (__FreeBSD_version >= 500024)
struct thread *p;
# if (__FreeBSD_version >= 500043)
# define p_cred td_ucred
# define p_uid td_ucred->cr_ruid
# else
# define p_cred t_proc->p_cred
# define p_uid t_proc->p_cred->p_ruid
# endif
# else
@ -342,7 +344,11 @@ int mode;
SPL_INT(s);
#if (BSD >= 199306) && defined(_KERNEL)
# if (__FreeBSD_version >= 500034)
if (securelevel_ge(p->p_cred, 3) && (mode & FWRITE))
# else
if ((securelevel >= 3) && (mode & FWRITE))
# endif
return EPERM;
#endif

View File

@ -662,7 +662,11 @@ void *ctx;
return EPERM;
}
# else
# if defined(__FreeBSD_version) && (__FreeBSD_version >= 500034)
if (securelevel_ge(curthread->td_ucred, 3) && (mode & FWRITE)) {
# else
if ((securelevel >= 3) && (mode & FWRITE)) {
# endif
return EPERM;
}
# endif

View File

@ -151,10 +151,11 @@ procfs_doprocstatus(PFS_FILL_ARGS)
sbuf_printf(sb, ",%lu", (u_long)cr->cr_groups[i]);
}
if (jailed(p->p_ucred)) {
mtx_lock(&p->p_ucred->cr_prison->pr_mtx);
sbuf_printf(sb, " %s", p->p_ucred->cr_prison->pr_host);
mtx_unlock(&p->p_ucred->cr_prison->pr_mtx);
if (jailed(cr)) {
mtx_lock(&cr->cr_prison->pr_mtx);
sbuf_printf(sb, " %s",
prison_name(td->td_ucred->cr_prison, cr->cr_prison));
mtx_unlock(&cr->cr_prison->pr_mtx);
} else {
sbuf_printf(sb, " -");
}

View File

@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <sys/exec.h>
#include <sys/file.h>
#include <sys/filedesc.h>
#include <sys/jail.h>
#include <sys/ktr.h>
#include <sys/lock.h>
#include <sys/mount.h>
@ -436,6 +437,7 @@ proc0_init(void *dummy __unused)
td->td_oncpu = 0;
td->td_flags = TDF_INMEM|TDP_KTHREAD;
td->td_cpuset = cpuset_thread0();
prison0.pr_cpuset = cpuset_ref(td->td_cpuset);
p->p_peers = 0;
p->p_leader = p;
@ -452,7 +454,7 @@ proc0_init(void *dummy __unused)
p->p_ucred->cr_ngroups = 1; /* group 0 */
p->p_ucred->cr_uidinfo = uifind(0);
p->p_ucred->cr_ruidinfo = uifind(0);
p->p_ucred->cr_prison = NULL; /* Don't jail it. */
p->p_ucred->cr_prison = &prison0;
#ifdef VIMAGE
KASSERT(LIST_FIRST(&vimage_head) != NULL, ("vimage_head empty"));
P_TO_VIMAGE(p) = LIST_FIRST(&vimage_head); /* set ucred->cr_vimage */

View File

@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysproto.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@ -53,7 +54,6 @@ __FBSDID("$FreeBSD$");
#include <sys/limits.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
#include <sys/jail.h> /* Must come after sys/proc.h */
#include <vm/uma.h>
@ -225,23 +225,16 @@ cpuset_lookup(cpusetid_t setid, struct thread *td)
KASSERT(td != NULL, ("[%s:%d] td is NULL", __func__, __LINE__));
if (set != NULL && jailed(td->td_ucred)) {
struct cpuset *rset, *jset;
struct prison *pr;
struct cpuset *jset, *tset;
rset = cpuset_refroot(set);
pr = td->td_ucred->cr_prison;
mtx_lock(&pr->pr_mtx);
cpuset_ref(pr->pr_cpuset);
jset = pr->pr_cpuset;
mtx_unlock(&pr->pr_mtx);
if (jset->cs_id != rset->cs_id) {
jset = td->td_ucred->cr_prison->pr_cpuset;
for (tset = set; tset != NULL; tset = tset->cs_parent)
if (tset == jset)
break;
if (tset == NULL) {
cpuset_rel(set);
set = NULL;
}
cpuset_rel(jset);
cpuset_rel(rset);
}
return (set);
@ -456,25 +449,14 @@ cpuset_which(cpuwhich_t which, id_t id, struct proc **pp, struct thread **tdp,
struct prison *pr;
sx_slock(&allprison_lock);
pr = prison_find(id);
pr = prison_find_child(curthread->td_ucred->cr_prison, id);
sx_sunlock(&allprison_lock);
if (pr == NULL)
return (ESRCH);
if (jailed(curthread->td_ucred)) {
if (curthread->td_ucred->cr_prison == pr) {
cpuset_ref(pr->pr_cpuset);
set = pr->pr_cpuset;
}
} else {
cpuset_ref(pr->pr_cpuset);
set = pr->pr_cpuset;
}
cpuset_ref(pr->pr_cpuset);
*setp = pr->pr_cpuset;
mtx_unlock(&pr->pr_mtx);
if (set) {
*setp = set;
return (0);
}
return (ESRCH);
return (0);
}
case CPU_WHICH_IRQ:
return (0);
@ -731,21 +713,15 @@ cpuset_thread0(void)
* In case of no error, returns the set in *setp locked with a reference.
*/
int
cpuset_create_root(struct thread *td, struct cpuset **setp)
cpuset_create_root(struct prison *pr, struct cpuset **setp)
{
struct cpuset *root;
struct cpuset *set;
int error;
KASSERT(td != NULL, ("[%s:%d] invalid td", __func__, __LINE__));
KASSERT(pr != NULL, ("[%s:%d] invalid pr", __func__, __LINE__));
KASSERT(setp != NULL, ("[%s:%d] invalid setp", __func__, __LINE__));
thread_lock(td);
root = cpuset_refroot(td->td_cpuset);
thread_unlock(td);
error = cpuset_create(setp, td->td_cpuset, &root->cs_mask);
cpuset_rel(root);
error = cpuset_create(setp, pr->pr_cpuset, &pr->pr_cpuset->cs_mask);
if (error)
return (error);

View File

@ -2416,24 +2416,25 @@ dupfdopen(struct thread *td, struct filedesc *fdp, int indx, int dfd, int mode,
}
/*
* Scan all active processes to see if any of them have a current or root
* directory of `olddp'. If so, replace them with the new mount point.
* Scan all active processes and prisons to see if any of them have a current
* or root directory of `olddp'. If so, replace them with the new mount point.
*/
void
mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
{
struct filedesc *fdp;
struct prison *pr;
struct proc *p;
int nrele;
if (vrefcnt(olddp) == 1)
return;
nrele = 0;
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
fdp = fdhold(p);
if (fdp == NULL)
continue;
nrele = 0;
FILEDESC_XLOCK(fdp);
if (fdp->fd_cdir == olddp) {
vref(newdp);
@ -2445,17 +2446,40 @@ mountcheckdirs(struct vnode *olddp, struct vnode *newdp)
fdp->fd_rdir = newdp;
nrele++;
}
if (fdp->fd_jdir == olddp) {
vref(newdp);
fdp->fd_jdir = newdp;
nrele++;
}
FILEDESC_XUNLOCK(fdp);
fddrop(fdp);
while (nrele--)
vrele(olddp);
}
sx_sunlock(&allproc_lock);
if (rootvnode == olddp) {
vrele(rootvnode);
vref(newdp);
rootvnode = newdp;
nrele++;
}
mtx_lock(&prison0.pr_mtx);
if (prison0.pr_root == olddp) {
vref(newdp);
prison0.pr_root = newdp;
nrele++;
}
mtx_unlock(&prison0.pr_mtx);
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list) {
mtx_lock(&pr->pr_mtx);
if (pr->pr_root == olddp) {
vref(newdp);
pr->pr_root = newdp;
nrele++;
}
mtx_unlock(&pr->pr_mtx);
}
sx_sunlock(&allprison_lock);
while (nrele--)
vrele(olddp);
}
struct filedesc_to_leader *

View File

@ -455,9 +455,8 @@ exit1(struct thread *td, int rv)
p->p_xstat = rv;
p->p_xthread = td;
/* In case we are jailed tell the prison that we are gone. */
if (jailed(p->p_ucred))
prison_proc_free(p->p_ucred->cr_prison);
/* Tell the prison that we are gone. */
prison_proc_free(p->p_ucred->cr_prison);
#ifdef KDTRACE_HOOKS
/*

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysproto.h>
#include <sys/eventhandler.h>
#include <sys/filedesc.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/sysctl.h>
@ -54,7 +55,6 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/priv.h>
#include <sys/proc.h>
#include <sys/jail.h>
#include <sys/pioctl.h>
#include <sys/resourcevar.h>
#include <sys/sched.h>
@ -458,9 +458,8 @@ fork1(td, flags, pages, procp)
p2->p_ucred = crhold(td->td_ucred);
/* In case we are jailed tell the prison that we exist. */
if (jailed(p2->p_ucred))
prison_proc_hold(p2->p_ucred->cr_prison);
/* Tell the prison that we exist. */
prison_proc_hold(p2->p_ucred->cr_prison);
PROC_UNLOCK(p2);

File diff suppressed because it is too large Load Diff

View File

@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mount.h>
#include <sys/linker.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/libkern.h>
#include <sys/namei.h>
#include <sys/vnode.h>
@ -375,7 +376,7 @@ linker_load_file(const char *filename, linker_file_t *result)
int foundfile, error;
/* Refuse to load modules if securelevel raised */
if (securelevel > 0)
if (prison0.pr_securelevel > 0)
return (EPERM);
KLD_LOCK_ASSERT();
@ -580,7 +581,7 @@ linker_file_unload(linker_file_t file, int flags)
int error, i;
/* Refuse to unload modules if securelevel raised. */
if (securelevel > 0)
if (prison0.pr_securelevel > 0)
return (EPERM);
KLD_LOCK_ASSERT();

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/jail.h>
#include <sys/smp.h>
#include <sys/sx.h>
#include <sys/unistd.h>
#include <sys/vimage.h>
@ -228,8 +229,8 @@ sysctl_hostname(SYSCTL_HANDLER_ARGS)
int error;
pr = req->td->td_ucred->cr_prison;
if (pr != NULL) {
if (!jail_set_hostname_allowed && req->newptr)
if (pr != &prison0) {
if (!(pr->pr_allow & PR_ALLOW_SET_HOSTNAME) && req->newptr)
return (EPERM);
/*
* Process is in jail, so make a local copy of jail
@ -259,9 +260,12 @@ sysctl_hostname(SYSCTL_HANDLER_ARGS)
error = sysctl_handle_string(oidp, tmphostname,
sizeof tmphostname, req);
if (req->newptr != NULL && error == 0) {
mtx_lock(&prison0.pr_mtx);
mtx_lock(&hostname_mtx);
bcopy(tmphostname, prison0.pr_host, MAXHOSTNAMELEN);
bcopy(tmphostname, V_hostname, MAXHOSTNAMELEN);
mtx_unlock(&hostname_mtx);
mtx_unlock(&prison0.pr_mtx);
}
}
return (error);
@ -278,55 +282,43 @@ SYSCTL_INT(_regression, OID_AUTO, securelevel_nonmonotonic, CTLFLAG_RW,
&regression_securelevel_nonmonotonic, 0, "securelevel may be lowered");
#endif
int securelevel = -1;
static struct mtx securelevel_mtx;
MTX_SYSINIT(securelevel_lock, &securelevel_mtx, "securelevel mutex lock",
MTX_DEF);
static int
sysctl_kern_securelvl(SYSCTL_HANDLER_ARGS)
{
struct prison *pr;
int error, level;
struct prison *pr, *cpr;
int descend, error, level;
pr = req->td->td_ucred->cr_prison;
/*
* If the process is in jail, return the maximum of the global and
* local levels; otherwise, return the global level. Perform a
* lockless read since the securelevel is an integer.
* Reading the securelevel is easy, since the current jail's level
* is known to be at least as secure as any higher levels. Perform
* a lockless read since the securelevel is an integer.
*/
if (pr != NULL)
level = imax(securelevel, pr->pr_securelevel);
else
level = securelevel;
level = pr->pr_securelevel;
error = sysctl_handle_int(oidp, &level, 0, req);
if (error || !req->newptr)
return (error);
/*
* Permit update only if the new securelevel exceeds the
* global level, and local level if any.
*/
if (pr != NULL) {
mtx_lock(&pr->pr_mtx);
if (!regression_securelevel_nonmonotonic &&
(level < imax(securelevel, pr->pr_securelevel))) {
mtx_unlock(&pr->pr_mtx);
return (EPERM);
}
pr->pr_securelevel = level;
/* Permit update only if the new securelevel exceeds the old. */
sx_slock(&allprison_lock);
mtx_lock(&pr->pr_mtx);
if (!regression_securelevel_nonmonotonic &&
level < pr->pr_securelevel) {
mtx_unlock(&pr->pr_mtx);
} else {
mtx_lock(&securelevel_mtx);
if (!regression_securelevel_nonmonotonic &&
(level < securelevel)) {
mtx_unlock(&securelevel_mtx);
return (EPERM);
}
securelevel = level;
mtx_unlock(&securelevel_mtx);
sx_sunlock(&allprison_lock);
return (EPERM);
}
pr->pr_securelevel = level;
/*
* Set all child jails to be at least this level, but do not lower
* them (even if regression_securelevel_nonmonotonic).
*/
FOREACH_PRISON_DESCENDANT_LOCKED(pr, cpr, descend) {
if (cpr->pr_securelevel < level)
cpr->pr_securelevel = level;
}
mtx_unlock(&pr->pr_mtx);
sx_sunlock(&allprison_lock);
return (error);
}

View File

@ -739,8 +739,8 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
/* If jailed(cred), emulate the old P_JAILED flag. */
if (jailed(cred)) {
kp->ki_flag |= P_JAILED;
/* If inside a jail, use 0 as a jail ID. */
if (!jailed(curthread->td_ucred))
/* If inside the jail, use 0 as a jail ID. */
if (cred->cr_prison != curthread->td_ucred->cr_prison)
kp->ki_jid = cred->cr_prison->pr_id;
}
}

View File

@ -1263,33 +1263,25 @@ groupmember(gid_t gid, struct ucred *cred)
* (securelevel >= level). Note that the logic is inverted -- these
* functions return EPERM on "success" and 0 on "failure".
*
* Due to care taken when setting the securelevel, we know that no jail will
* be less secure that its parent (or the physical system), so it is sufficient
* to test the current jail only.
*
* XXXRW: Possibly since this has to do with privilege, it should move to
* kern_priv.c.
*/
int
securelevel_gt(struct ucred *cr, int level)
{
int active_securelevel;
active_securelevel = securelevel;
KASSERT(cr != NULL, ("securelevel_gt: null cr"));
if (cr->cr_prison != NULL)
active_securelevel = imax(cr->cr_prison->pr_securelevel,
active_securelevel);
return (active_securelevel > level ? EPERM : 0);
return (cr->cr_prison->pr_securelevel > level ? EPERM : 0);
}
int
securelevel_ge(struct ucred *cr, int level)
{
int active_securelevel;
active_securelevel = securelevel;
KASSERT(cr != NULL, ("securelevel_ge: null cr"));
if (cr->cr_prison != NULL)
active_securelevel = imax(cr->cr_prison->pr_securelevel,
active_securelevel);
return (active_securelevel >= level ? EPERM : 0);
return (cr->cr_prison->pr_securelevel >= level ? EPERM : 0);
}
/*
@ -1823,7 +1815,7 @@ crfree(struct ucred *cr)
/*
* Free a prison, if any.
*/
if (jailed(cr))
if (cr->cr_prison != NULL)
prison_free(cr->cr_prison);
#ifdef VIMAGE
/* XXX TODO: find out why and when cr_vimage can be NULL here! */
@ -1863,8 +1855,7 @@ crcopy(struct ucred *dest, struct ucred *src)
(caddr_t)&src->cr_startcopy));
uihold(dest->cr_uidinfo);
uihold(dest->cr_ruidinfo);
if (jailed(dest))
prison_hold(dest->cr_prison);
prison_hold(dest->cr_prison);
#ifdef VIMAGE
KASSERT(src->cr_vimage != NULL, ("cr_vimage == NULL"));
refcount_acquire(&dest->cr_vimage->vi_ucredrefc);

View File

@ -337,7 +337,7 @@ msgsys(td, uap)
{
int error;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 ||
uap->which >= sizeof(msgcalls)/sizeof(msgcalls[0]))
@ -410,7 +410,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
int rval, error, msqix;
register struct msqid_kernel *msqkptr;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);
@ -564,7 +564,7 @@ msgget(td, uap)
DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&msq_mtx);
@ -674,7 +674,7 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
register struct msg *msghdr;
short next;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&msq_mtx);
@ -1012,7 +1012,7 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
int msqix, error = 0;
short next;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);

View File

@ -344,7 +344,7 @@ semsys(td, uap)
{
int error;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 ||
uap->which >= sizeof(semcalls)/sizeof(semcalls[0]))
@ -583,7 +583,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
semid, semnum, cmd, arg));
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
array = NULL;
@ -855,7 +855,7 @@ semget(struct thread *td, struct semget_args *uap)
struct ucred *cred = td->td_ucred;
DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&sem_mtx);
@ -982,7 +982,7 @@ semop(struct thread *td, struct semop_args *uap)
#endif
DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
semid = IPCID_TO_IX(semid); /* Convert back to zero origin */

View File

@ -303,7 +303,7 @@ shmdt(td, uap)
int i;
int error = 0;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&Giant);
shmmap_s = p->p_vmspace->vm_shm;
@ -357,7 +357,7 @@ kern_shmat(td, shmid, shmaddr, shmflg)
int rv;
int error = 0;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&Giant);
shmmap_s = p->p_vmspace->vm_shm;
@ -480,7 +480,7 @@ oshmctl(td, uap)
struct shmid_kernel *shmseg;
struct oshmid_ds outbuf;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&Giant);
shmseg = shm_find_segment_by_shmid(uap->shmid);
@ -542,7 +542,7 @@ kern_shmctl(td, shmid, cmd, buf, bufsz)
int error = 0;
struct shmid_kernel *shmseg;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&Giant);
@ -823,7 +823,7 @@ shmget(td, uap)
int segnum, mode;
int error;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
mtx_lock(&Giant);
mode = uap->shmflg & ACCESSPERMS;
@ -861,7 +861,7 @@ shmsys(td, uap)
#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
int error;
if (!jail_sysvipc_allowed && jailed(td->td_ucred))
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 ||
uap->which >= sizeof(shmcalls)/sizeof(shmcalls[0]))

View File

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/fcntl.h>
#include <sys/jail.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/namei.h>
@ -446,6 +447,7 @@ lookup(struct nameidata *ndp)
struct vnode *dp = 0; /* the directory we are searching */
struct vnode *tdp; /* saved dp */
struct mount *mp; /* mount table entry */
struct prison *pr;
int docache; /* == 0 do not cache last component */
int wantparent; /* 1 => wantparent or lockparent flag */
int rdonly; /* lookup read-only flag bit */
@ -603,9 +605,14 @@ lookup(struct nameidata *ndp)
goto bad;
}
for (;;) {
for (pr = cnp->cn_cred->cr_prison; pr != NULL;
pr = pr->pr_parent)
if (dp == pr->pr_root)
break;
if (dp == ndp->ni_rootdir ||
dp == ndp->ni_topdir ||
dp == rootvnode ||
pr != NULL ||
((dp->v_vflag & VV_ROOT) != 0 &&
(cnp->cn_flags & NOCROSSMOUNT) != 0)) {
ndp->ni_dvp = dp;

View File

@ -1420,6 +1420,11 @@ static void
root_mount_done(void)
{
/* Keep prison0's root in sync with the global rootvnode. */
mtx_lock(&prison0.pr_mtx);
prison0.pr_root = rootvnode;
vref(prison0.pr_root);
mtx_unlock(&prison0.pr_mtx);
/*
* Use a mutex to prevent the wakeup being missed and waiting for
* an extra 1 second sleep.

View File

@ -467,21 +467,13 @@ vfs_suser(struct mount *mp, struct thread *td)
return (EPERM);
/*
* If the file system was mounted outside a jail and a jailed thread
* tries to access it, deny immediately.
* If the file system was mounted outside the jail of the calling
* thread, deny immediately.
*/
if (!jailed(mp->mnt_cred) && jailed(td->td_ucred))
if (mp->mnt_cred->cr_prison != td->td_ucred->cr_prison &&
!prison_ischild(td->td_ucred->cr_prison, mp->mnt_cred->cr_prison))
return (EPERM);
/*
* If the file system was mounted inside different jail that the jail of
* the calling thread, deny immediately.
*/
if (jailed(mp->mnt_cred) && jailed(td->td_ucred) &&
mp->mnt_cred->cr_prison != td->td_ucred->cr_prison) {
return (EPERM);
}
/*
* If file system supports delegated administration, we don't check
* for the PRIV_VFS_MOUNT_OWNER privilege - it will be better verified
@ -2900,7 +2892,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
db_printf(" mnt_cred = { uid=%u ruid=%u",
(u_int)mp->mnt_cred->cr_uid, (u_int)mp->mnt_cred->cr_ruid);
if (mp->mnt_cred->cr_prison != NULL)
if (jailed(mp->mnt_cred))
db_printf(", jail=%d", mp->mnt_cred->cr_prison->pr_id);
db_printf(" }\n");
db_printf(" mnt_ref = %d\n", mp->mnt_ref);

View File

@ -164,12 +164,6 @@ sync(td, uap)
return (0);
}
/* XXX PRISON: could be per prison flag */
static int prison_quotas;
#if 0
SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, "");
#endif
/*
* Change filesystem quotas.
*/
@ -198,7 +192,7 @@ quotactl(td, uap)
AUDIT_ARG(cmd, uap->cmd);
AUDIT_ARG(uid, uap->uid);
if (jailed(td->td_ucred) && !prison_quotas)
if (!prison_allow(td->td_ucred, PR_ALLOW_QUOTAS))
return (EPERM);
NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | MPSAFE | AUDITVNODE1,
UIO_USERSPACE, uap->path, td);

View File

@ -373,6 +373,8 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
sin_addr;
if (prison_get_ip4(cred, &ia) != 0)
return (ESRCH);
}
@ -414,6 +416,8 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
sin6_addr;
if (prison_get_ip6(cred, &ia6) != 0)
return (ESRCH);
}

View File

@ -602,7 +602,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
goto done;
}
if (cred == NULL || !jailed(cred)) {
if (cred == NULL || !prison_flag(cred, PR_IP4)) {
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
}
@ -646,7 +646,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
struct ifnet *ifp;
/* If not jailed, use the default returned. */
if (cred == NULL || !jailed(cred)) {
if (cred == NULL || !prison_flag(cred, PR_IP4)) {
ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
laddr->s_addr = ia->ia_addr.sin_addr.s_addr;
goto done;
@ -711,7 +711,7 @@ in_pcbladdr(struct inpcb *inp, struct in_addr *faddr, struct in_addr *laddr,
if (ia == NULL)
ia = ifatoia(ifa_ifwithnet(sintosa(&sain)));
if (cred == NULL || !jailed(cred)) {
if (cred == NULL || !prison_flag(cred, PR_IP4)) {
#if __FreeBSD_version < 800000
if (ia == NULL)
ia = (struct in_ifaddr *)sro.ro_rt->rt_ifa;
@ -1222,7 +1222,8 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
* Found?
*/
if (cred == NULL ||
inp->inp_cred->cr_prison == cred->cr_prison)
prison_equal_ip4(cred->cr_prison,
inp->inp_cred->cr_prison))
return (inp);
}
}
@ -1254,7 +1255,8 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
inp->inp_cred->cr_prison != cred->cr_prison)
!prison_equal_ip4(inp->inp_cred->cr_prison,
cred->cr_prison))
continue;
#ifdef INET6
/* XXX inp locking */
@ -1335,7 +1337,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
* the inp here, without any checks.
* Well unless both bound with SO_REUSEPORT?
*/
if (jailed(inp->inp_cred))
if (prison_flag(inp->inp_cred, PR_IP4))
return (inp);
if (tmpinp == NULL)
tmpinp = inp;
@ -1380,7 +1382,7 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
(inp->inp_flags & INP_FAITH) == 0)
continue;
injail = jailed(inp->inp_cred);
injail = prison_flag(inp->inp_cred, PR_IP4);
if (injail) {
if (prison_check_ip4(inp->inp_cred,
&laddr) != 0)

View File

@ -1008,7 +1008,7 @@ udp_output(struct inpcb *inp, struct mbuf *m, struct sockaddr *addr,
* Remember addr if jailed, to prevent
* rebinding.
*/
if (jailed(td->td_ucred))
if (prison_flag(td->td_ucred, PR_IP4))
inp->inp_laddr = laddr;
inp->inp_lport = lport;
if (in_pcbinshash(inp) != 0) {

View File

@ -660,7 +660,6 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
struct in6_ifaddr *ia, int flags)
{
INIT_VNET_INET6(ifp->if_vnet);
INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
int error = 0, hostIsNew = 0, plen = -1;
struct in6_ifaddr *oia;
struct sockaddr_in6 dst6;
@ -1017,7 +1016,6 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
/*
* join node information group address
*/
#define hostnamelen strlen(V_hostname)
delay = 0;
if ((flags & IN6_IFAUPDATE_DADDELAY)) {
/*
@ -1027,10 +1025,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
delay = arc4random() %
(MAX_RTR_SOLICITATION_DELAY * hz);
}
mtx_lock(&hostname_mtx);
if (in6_nigroup(ifp, V_hostname, hostnamelen,
&mltaddr.sin6_addr) == 0) {
mtx_unlock(&hostname_mtx);
if (in6_nigroup(ifp, NULL, -1, &mltaddr.sin6_addr) == 0) {
imm = in6_joingroup(ifp, &mltaddr.sin6_addr, &error,
delay); /* XXX jinmei */
if (!imm) {
@ -1044,9 +1039,7 @@ in6_update_ifa(struct ifnet *ifp, struct in6_aliasreq *ifra,
LIST_INSERT_HEAD(&ia->ia6_memberships,
imm, i6mm_chain);
}
} else
mtx_unlock(&hostname_mtx);
#undef hostnamelen
}
/*
* join interface-local all-nodes address.

View File

@ -620,23 +620,44 @@ int
in6_nigroup(struct ifnet *ifp, const char *name, int namelen,
struct in6_addr *in6)
{
INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */
const char *p;
u_char *q;
MD5_CTX ctxt;
int use_hostname;
u_int8_t digest[16];
char l;
char n[64]; /* a single label must not exceed 63 chars */
if (!namelen || !name)
/*
* If no name is given and namelen is -1,
* we try to do the hostname lookup ourselves.
*/
if (!name && namelen == -1) {
use_hostname = 1;
mtx_lock(&hostname_mtx);
name = V_hostname;
namelen = strlen(name);
} else
use_hostname = 0;
if (!name || !namelen) {
if (use_hostname)
mtx_unlock(&hostname_mtx);
return -1;
}
p = name;
while (p && *p && *p != '.' && p - name < namelen)
p++;
if (p - name > sizeof(n) - 1)
if (p == name || p - name > sizeof(n) - 1) {
if (use_hostname)
mtx_unlock(&hostname_mtx);
return -1; /* label too long */
}
l = p - name;
strncpy(n, name, l);
if (use_hostname)
mtx_unlock(&hostname_mtx);
n[(int)l] = '\0';
for (q = n; *q; q++) {
if ('A' <= *q && *q <= 'Z')

View File

@ -666,7 +666,8 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
inp->inp_lport == lport) {
/* Found. */
if (cred == NULL ||
inp->inp_cred->cr_prison == cred->cr_prison)
prison_equal_ip6(cred->cr_prison,
inp->inp_cred->cr_prison))
return (inp);
}
}
@ -698,7 +699,8 @@ in6_pcblookup_local(struct inpcbinfo *pcbinfo, struct in6_addr *laddr,
LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
wildcard = 0;
if (cred != NULL &&
inp->inp_cred->cr_prison != cred->cr_prison)
!prison_equal_ip6(cred->cr_prison,
inp->inp_cred->cr_prison))
continue;
/* XXX inp locking */
if ((inp->inp_vflag & INP_IPV6) == 0)
@ -838,7 +840,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
* the inp here, without any checks.
* Well unless both bound with SO_REUSEPORT?
*/
if (jailed(inp->inp_cred))
if (prison_flag(inp->inp_cred, PR_IP6))
return (inp);
if (tmpinp == NULL)
tmpinp = inp;
@ -878,7 +880,7 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
if (faith && (inp->inp_flags & INP_FAITH) == 0)
continue;
injail = jailed(inp->inp_cred);
injail = prison_flag(inp->inp_cred, PR_IP6);
if (injail) {
if (prison_check_ip6(inp->inp_cred,
laddr) != 0)

View File

@ -43,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/jail.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@ -699,6 +700,8 @@ nfsrv_dorec(struct nfssvc_sock *slp, struct nfsd *nfsd,
nd = malloc(sizeof (struct nfsrv_descript),
M_NFSRVDESC, M_WAITOK);
nd->nd_cr = crget();
prison_hold(&prison0);
nd->nd_cr->cr_prison = &prison0;
NFSD_LOCK();
nd->nd_md = nd->nd_mrep = m;
nd->nd_nam2 = nam;

View File

@ -271,8 +271,8 @@ ugidfw_rulecheck(struct mac_bsdextended_rule *rule,
}
if (rule->mbr_subject.mbs_flags & MBS_PRISON_DEFINED) {
match = (cred->cr_prison != NULL &&
cred->cr_prison->pr_id == rule->mbr_subject.mbs_prison);
match =
(cred->cr_prison->pr_id == rule->mbr_subject.mbs_prison);
if (rule->mbr_subject.mbs_neg & MBS_PRISON_DEFINED)
match = !match;
if (!match)

View File

@ -169,14 +169,14 @@ struct cpuset {
#define CPU_SET_RDONLY 0x0002 /* No modification allowed. */
extern cpuset_t *cpuset_root;
struct prison;
struct proc;
struct thread;
struct cpuset *cpuset_thread0(void);
struct cpuset *cpuset_ref(struct cpuset *);
void cpuset_rel(struct cpuset *);
int cpuset_setthread(lwpid_t id, cpuset_t *);
int cpuset_create_root(struct thread *, struct cpuset **);
int cpuset_create_root(struct prison *, struct cpuset **);
int cpuset_setproc_update_set(struct proc *, struct cpuset *);
#else

View File

@ -122,8 +122,8 @@ int jail_remove(int);
#include <sys/queue.h>
#include <sys/sysctl.h>
#include <sys/_lock.h>
#include <sys/_mutex.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/_task.h>
#define JAIL_MAX 999999
@ -137,8 +137,6 @@ MALLOC_DECLARE(M_PRISON);
#include <sys/osd.h>
struct cpuset;
/*
* This structure describes a prison. It is pointed to by all struct
* ucreds's of the inmates. pr_ref keeps track of them and is used to
@ -162,7 +160,7 @@ struct prison {
struct vnode *pr_root; /* (c) vnode to rdir */
char pr_host[MAXHOSTNAMELEN]; /* (p) jail hostname */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
void *pr_spare; /* was pr_linux */
struct prison *pr_parent; /* (c) containing jail */
int pr_securelevel; /* (p) securelevel */
struct task pr_task; /* (d) destroy task */
struct mtx pr_mtx;
@ -171,15 +169,37 @@ struct prison {
struct in_addr *pr_ip4; /* (p) v4 IPs of jail */
int pr_ip6s; /* (p) number of v6 IPs */
struct in6_addr *pr_ip6; /* (p) v6 IPs of jail */
LIST_HEAD(, prison) pr_children; /* (a) list of child jails */
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
int pr_prisoncount; /* (a) number of child jails */
unsigned pr_allow; /* (p) PR_ALLOW_* flags */
int pr_enforce_statfs; /* (p) statfs permission */
};
#endif /* _KERNEL || _WANT_PRISON */
#ifdef _KERNEL
/*
* Flag bits set via options or internally
*/
/* Flag bits set via options */
#define PR_PERSIST 0x00000001 /* Can exist without processes */
#define PR_IP4_USER 0x00000004 /* Virtualize IPv4 addresses */
#define PR_IP6_USER 0x00000008 /* Virtualize IPv6 addresses */
/* Internal flag bits */
#define PR_REMOVE 0x01000000 /* In process of being removed */
#define PR_IP4 0x02000000 /* IPv4 virtualized by this jail or */
/* an ancestor */
#define PR_IP6 0x04000000 /* IPv6 virtualized by this jail or */
/* an ancestor */
/* Flags for pr_allow */
#define PR_ALLOW_SET_HOSTNAME 0x0001
#define PR_ALLOW_SYSVIPC 0x0002
#define PR_ALLOW_RAW_SOCKETS 0x0004
#define PR_ALLOW_CHFLAGS 0x0008
#define PR_ALLOW_MOUNT 0x0010
#define PR_ALLOW_QUOTAS 0x0020
#define PR_ALLOW_JAILS 0x0040
#define PR_ALLOW_SOCKET_AF 0x0080
#define PR_ALLOW_ALL 0x00ff
/*
* OSD methods
@ -192,16 +212,68 @@ struct prison {
#define PR_MAXMETHOD 5
/*
* Sysctl-set variables that determine global jail policy
*
* XXX MIB entries will need to be protected by a mutex.
* Lock/unlock a prison.
* XXX These exist not so much for general convenience, but to be useable in
* the FOREACH_PRISON_DESCENDANT_LOCKED macro which can't handle them in
* non-function form as currently defined.
*/
extern int jail_set_hostname_allowed;
extern int jail_socket_unixiproute_only;
extern int jail_sysvipc_allowed;
extern int jail_getfsstat_jailrootonly;
extern int jail_allow_raw_sockets;
extern int jail_chflags_allowed;
static __inline void
prison_lock(struct prison *pr)
{
mtx_lock(&pr->pr_mtx);
}
static __inline void
prison_unlock(struct prison *pr)
{
mtx_unlock(&pr->pr_mtx);
}
/* Traverse a prison's immediate children. */
#define FOREACH_PRISON_CHILD(ppr, cpr) \
LIST_FOREACH(cpr, &(ppr)->pr_children, pr_sibling)
/*
* Preorder traversal of all of a prison's descendants.
* This ugly loop allows the macro to be followed by a single block
* as expected in a looping primitive.
*/
#define FOREACH_PRISON_DESCENDANT(ppr, cpr, descend) \
for ((cpr) = (ppr), (descend) = 1; \
((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \
? LIST_FIRST(&(cpr)->pr_children) \
: ((cpr) == (ppr) \
? NULL \
: (((descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
? LIST_NEXT(cpr, pr_sibling) \
: (cpr)->pr_parent))));) \
if (!(descend)) \
; \
else
/*
* As above, but lock descendants on the way down and unlock on the way up.
*/
#define FOREACH_PRISON_DESCENDANT_LOCKED(ppr, cpr, descend) \
for ((cpr) = (ppr), (descend) = 1; \
((cpr) = (((descend) && !LIST_EMPTY(&(cpr)->pr_children)) \
? LIST_FIRST(&(cpr)->pr_children) \
: ((cpr) == (ppr) \
? NULL \
: ((prison_unlock(cpr), \
(descend) = LIST_NEXT(cpr, pr_sibling) != NULL) \
? LIST_NEXT(cpr, pr_sibling) \
: (cpr)->pr_parent))));) \
if ((descend) ? (prison_lock(cpr), 0) : 1) \
; \
else
/*
* Attributes of the physical system, and the root of the jail tree.
*/
extern struct prison prison0;
TAILQ_HEAD(prisonlist, prison);
extern struct prisonlist allprison;
@ -235,23 +307,29 @@ struct sockaddr;
struct statfs;
int jailed(struct ucred *cred);
void getcredhostname(struct ucred *cred, char *, size_t);
int prison_allow(struct ucred *, unsigned);
int prison_check(struct ucred *cred1, struct ucred *cred2);
int prison_canseemount(struct ucred *cred, struct mount *mp);
void prison_enforce_statfs(struct ucred *cred, struct mount *mp,
struct statfs *sp);
struct prison *prison_find(int prid);
struct prison *prison_find_name(const char *name);
struct prison *prison_find_child(struct prison *, int);
struct prison *prison_find_name(struct prison *, const char *);
int prison_flag(struct ucred *, unsigned);
void prison_free(struct prison *pr);
void prison_free_locked(struct prison *pr);
void prison_hold(struct prison *pr);
void prison_hold_locked(struct prison *pr);
void prison_proc_hold(struct prison *);
void prison_proc_free(struct prison *);
int prison_ischild(struct prison *, struct prison *);
int prison_equal_ip4(struct prison *, struct prison *);
int prison_get_ip4(struct ucred *cred, struct in_addr *ia);
int prison_local_ip4(struct ucred *cred, struct in_addr *ia);
int prison_remote_ip4(struct ucred *cred, struct in_addr *ia);
int prison_check_ip4(struct ucred *cred, struct in_addr *ia);
#ifdef INET6
int prison_equal_ip6(struct prison *, struct prison *);
int prison_get_ip6(struct ucred *, struct in6_addr *);
int prison_local_ip6(struct ucred *, struct in6_addr *, int);
int prison_remote_ip6(struct ucred *, struct in6_addr *);
@ -259,6 +337,7 @@ int prison_check_ip6(struct ucred *, struct in6_addr *);
#endif
int prison_check_af(struct ucred *cred, int af);
int prison_if(struct ucred *cred, struct sockaddr *sa);
char *prison_name(struct prison *, struct prison *);
int prison_priv_check(struct ucred *cred, int priv);
int sysctl_jail_param(struct sysctl_oid *, void *, int , struct sysctl_req *);

View File

@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 800090 /* Master, propagated to newvers */
#define __FreeBSD_version 800091 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>

View File

@ -37,6 +37,7 @@
struct file;
struct itimerval;
struct image_args;
struct jail;
struct mbuf;
struct msghdr;
struct msqid_ds;
@ -105,6 +106,7 @@ int kern_getsockname(struct thread *td, int fd, struct sockaddr **sa,
int kern_getsockopt(struct thread *td, int s, int level, int name,
void *optval, enum uio_seg valseg, socklen_t *valsize);
int kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data);
int kern_jail(struct thread *td, struct jail *j);
int kern_jail_get(struct thread *td, struct uio *options, int flags);
int kern_jail_set(struct thread *td, struct uio *options, int flags);
int kern_kevent(struct thread *td, int fd, int nchanges, int nevents,

View File

@ -45,8 +45,6 @@
#include <sys/queue.h>
#include <sys/stdint.h> /* for people using printf mainly */
extern int securelevel; /* system security level (see init(8)) */
extern int cold; /* nonzero if we are doing a cold boot */
extern int rebooting; /* boot() has been called. */
extern const char *panicstr; /* panic message */

View File

@ -61,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <sys/lockf.h>
#include <sys/conf.h>
#include <sys/acl.h>
#include <sys/jail.h>
#include <machine/mutex.h>