Encapsulate SYSV IPC objects in jails. Define per-module parameters

sysvmsg, sysvsem, and sysvshm, with the following bahavior:

inherit: allow full access to the IPC primitives.  This is the same as
the current setup with allow.sysvipc is on.  Jails and the base system
can see (and moduly) each other's objects, which is generally considered
a bad thing (though may be useful in some circumstances).

disable: all no access, same as the current setup with allow.sysvipc off.

new: A jail may see use the IPC objects that it has created.  It also
gets its own IPC key namespace, so different jails may have their own
objects using the same key value.  The parent jail (or base system) can
see the jail's IPC objects, but not its keys.

PR:		48471
Submitted by:	based on work by kikuchan98@gmail.com
MFC after:	5 days
This commit is contained in:
Jamie Gritton 2016-04-25 17:06:50 +00:00
parent 5d6cb09dfc
commit 52a510ace9
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=298585
4 changed files with 1041 additions and 117 deletions

View File

@ -62,8 +62,11 @@ __FBSDID("$FreeBSD$");
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/msg.h>
#include <sys/racct.h>
#include <sys/sbuf.h>
#include <sys/sx.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@ -80,6 +83,14 @@ static MALLOC_DEFINE(M_MSG, "msg", "SVID compatible message queues");
static int msginit(void);
static int msgunload(void);
static int sysvmsg_modload(struct module *, int, void *);
static void msq_remove(struct msqid_kernel *);
static struct prison *msg_find_prison(struct ucred *);
static int msq_prison_cansee(struct prison *, struct msqid_kernel *);
static int msg_prison_check(void *, void *);
static int msg_prison_set(void *, void *);
static int msg_prison_get(void *, void *);
static int msg_prison_remove(void *, void *);
static void msg_prison_cleanup(struct prison *);
#ifdef MSG_DEBUG
@ -155,6 +166,7 @@ static struct msgmap *msgmaps; /* MSGSEG msgmap structures */
static struct msg *msghdrs; /* MSGTQL msg headers */
static struct msqid_kernel *msqids; /* MSGMNI msqid_kernel struct's */
static struct mtx msq_mtx; /* global mutex for message queues. */
static unsigned msg_prison_slot;/* prison OSD slot */
static struct syscall_helper_data msg_syscalls[] = {
SYSCALL_INIT_HELPER(msgctl),
@ -194,7 +206,15 @@ static struct syscall_helper_data msg32_syscalls[] = {
static int
msginit()
{
struct prison *pr;
void *rsv;
int i, error;
osd_method_t methods[PR_MAXMETHOD] = {
[PR_METHOD_CHECK] = msg_prison_check,
[PR_METHOD_SET] = msg_prison_set,
[PR_METHOD_GET] = msg_prison_get,
[PR_METHOD_REMOVE] = msg_prison_remove,
};
msginfo.msgmax = msginfo.msgseg * msginfo.msgssz;
msgpool = malloc(msginfo.msgmax, M_MSG, M_WAITOK);
@ -252,6 +272,29 @@ msginit()
}
mtx_init(&msq_mtx, "msq", NULL, MTX_DEF);
/* Set current prisons according to their allow.sysvipc. */
msg_prison_slot = osd_jail_register(NULL, methods);
rsv = osd_reserve(msg_prison_slot);
prison_lock(&prison0);
(void)osd_jail_set_reserved(&prison0, msg_prison_slot, rsv, &prison0);
prison_unlock(&prison0);
rsv = NULL;
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (rsv == NULL)
rsv = osd_reserve(msg_prison_slot);
prison_lock(pr);
if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
&prison0);
rsv = NULL;
}
prison_unlock(pr);
}
if (rsv != NULL)
osd_free_reserved(rsv);
sx_sunlock(&allprison_lock);
error = syscall_helper_register(msg_syscalls, SY_THR_STATIC_KLD);
if (error != 0)
return (error);
@ -292,6 +335,8 @@ msgunload()
if (msqid != msginfo.msgmni)
return (EBUSY);
if (msg_prison_slot != 0)
osd_jail_deregister(msg_prison_slot);
#ifdef MAC
for (i = 0; i < msginfo.msgtql; i++)
mac_sysvmsg_destroy(&msghdrs[i]);
@ -366,6 +411,67 @@ msg_freehdr(msghdr)
#endif
}
static void
msq_remove(struct msqid_kernel *msqkptr)
{
struct msg *msghdr;
racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
crfree(msqkptr->cred);
msqkptr->cred = NULL;
/* Free the message headers */
msghdr = msqkptr->u.msg_first;
while (msghdr != NULL) {
struct msg *msghdr_tmp;
/* Free the segments of each message */
msqkptr->u.msg_cbytes -= msghdr->msg_ts;
msqkptr->u.msg_qnum--;
msghdr_tmp = msghdr;
msghdr = msghdr->msg_next;
msg_freehdr(msghdr_tmp);
}
if (msqkptr->u.msg_cbytes != 0)
panic("msg_cbytes is screwed up");
if (msqkptr->u.msg_qnum != 0)
panic("msg_qnum is screwed up");
msqkptr->u.msg_qbytes = 0; /* Mark it as free */
#ifdef MAC
mac_sysvmsq_cleanup(msqkptr);
#endif
wakeup(msqkptr);
}
static struct prison *
msg_find_prison(struct ucred *cred)
{
struct prison *pr, *rpr;
pr = cred->cr_prison;
prison_lock(pr);
rpr = osd_jail_get(pr, msg_prison_slot);
prison_unlock(pr);
return rpr;
}
static int
msq_prison_cansee(struct prison *rpr, struct msqid_kernel *msqkptr)
{
if (msqkptr->cred == NULL ||
!(rpr == msqkptr->cred->cr_prison ||
prison_ischild(rpr, msqkptr->cred->cr_prison)))
return (EINVAL);
return (0);
}
#ifndef _SYS_SYSPROTO_H_
struct msgctl_args {
int msqid;
@ -402,8 +508,10 @@ kern_msgctl(td, msqid, cmd, msqbuf)
{
int rval, error, msqix;
register struct msqid_kernel *msqkptr;
struct prison *rpr;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = msg_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);
@ -427,6 +535,13 @@ kern_msgctl(td, msqid, cmd, msqbuf)
error = EINVAL;
goto done2;
}
error = msq_prison_cansee(rpr, msqkptr);
if (error != 0) {
DPRINTF(("requester can't see prison\n"));
goto done2;
}
#ifdef MAC
error = mac_sysvmsq_check_msqctl(td->td_ucred, msqkptr, cmd);
if (error != 0)
@ -440,7 +555,9 @@ kern_msgctl(td, msqid, cmd, msqbuf)
case IPC_RMID:
{
#ifdef MAC
struct msg *msghdr;
#endif
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_M)))
goto done2;
@ -462,37 +579,7 @@ kern_msgctl(td, msqid, cmd, msqbuf)
}
#endif
racct_sub_cred(msqkptr->cred, RACCT_NMSGQ, 1);
racct_sub_cred(msqkptr->cred, RACCT_MSGQQUEUED, msqkptr->u.msg_qnum);
racct_sub_cred(msqkptr->cred, RACCT_MSGQSIZE, msqkptr->u.msg_cbytes);
crfree(msqkptr->cred);
msqkptr->cred = NULL;
/* Free the message headers */
msghdr = msqkptr->u.msg_first;
while (msghdr != NULL) {
struct msg *msghdr_tmp;
/* Free the segments of each message */
msqkptr->u.msg_cbytes -= msghdr->msg_ts;
msqkptr->u.msg_qnum--;
msghdr_tmp = msghdr;
msghdr = msghdr->msg_next;
msg_freehdr(msghdr_tmp);
}
if (msqkptr->u.msg_cbytes != 0)
panic("msg_cbytes is screwed up");
if (msqkptr->u.msg_qnum != 0)
panic("msg_qnum is screwed up");
msqkptr->u.msg_qbytes = 0; /* Mark it as free */
#ifdef MAC
mac_sysvmsq_cleanup(msqkptr);
#endif
wakeup(msqkptr);
msq_remove(msqkptr);
}
break;
@ -529,6 +616,8 @@ kern_msgctl(td, msqid, cmd, msqbuf)
goto done2;
}
*msqbuf = msqkptr->u;
if (td->td_ucred->cr_prison != msqkptr->cred->cr_prison)
msqbuf->msg_perm.key = IPC_PRIVATE;
break;
default:
@ -564,7 +653,7 @@ sys_msgget(td, uap)
DPRINTF(("msgget(0x%x, 0%o)\n", key, msgflg));
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
if (msg_find_prison(cred) == NULL)
return (ENOSYS);
mtx_lock(&msq_mtx);
@ -572,6 +661,8 @@ sys_msgget(td, uap)
for (msqid = 0; msqid < msginfo.msgmni; msqid++) {
msqkptr = &msqids[msqid];
if (msqkptr->u.msg_qbytes != 0 &&
msqkptr->cred != NULL &&
msqkptr->cred->cr_prison == cred->cr_prison &&
msqkptr->u.msg_perm.key == key)
break;
}
@ -684,12 +775,14 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
int msqix, segs_needed, error = 0;
register struct msqid_kernel *msqkptr;
register struct msg *msghdr;
struct prison *rpr;
short next;
#ifdef RACCT
size_t saved_msgsz;
#endif
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = msg_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
mtx_lock(&msq_mtx);
@ -714,6 +807,11 @@ kern_msgsnd(td, msqid, msgp, msgsz, msgflg, mtype)
goto done2;
}
if ((error = msq_prison_cansee(rpr, msqkptr))) {
DPRINTF(("requester can't see prison\n"));
goto done2;
}
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_W))) {
DPRINTF(("requester doesn't have write access\n"));
goto done2;
@ -1052,10 +1150,12 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
size_t len;
register struct msqid_kernel *msqkptr;
register struct msg *msghdr;
struct prison *rpr;
int msqix, error = 0;
short next;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = msg_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
msqix = IPCID_TO_IX(msqid);
@ -1079,6 +1179,11 @@ kern_msgrcv(td, msqid, msgp, msgsz, msgtyp, msgflg, mtype)
goto done2;
}
if ((error = msq_prison_cansee(rpr, msqkptr))) {
DPRINTF(("requester can't see prison\n"));
goto done2;
}
if ((error = ipcperm(td, &msqkptr->u.msg_perm, IPC_R))) {
DPRINTF(("requester doesn't have read access\n"));
goto done2;
@ -1318,9 +1423,39 @@ sys_msgrcv(td, uap)
static int
sysctl_msqids(SYSCTL_HANDLER_ARGS)
{
struct sbuf sb;
struct msqid_kernel tmp, empty;
struct msqid_kernel *msqkptr;
struct prison *rpr;
int error, i;
return (SYSCTL_OUT(req, msqids,
sizeof(struct msqid_kernel) * msginfo.msgmni));
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
goto done;
rpr = msg_find_prison(req->td->td_ucred);
sbuf_new_for_sysctl(&sb, NULL, sizeof(struct msqid_kernel) *
msginfo.msgmni, req);
bzero(&empty, sizeof(empty));
for (i = 0; i < msginfo.msgmni; i++) {
msqkptr = &msqids[i];
if (msqkptr->u.msg_qbytes == 0 || rpr == NULL ||
msq_prison_cansee(rpr, msqkptr) != 0) {
msqkptr = &empty;
} else if (req->td->td_ucred->cr_prison !=
msqkptr->cred->cr_prison) {
bcopy(msqkptr, &tmp, sizeof(tmp));
msqkptr = &tmp;
msqkptr->u.msg_perm.key = IPC_PRIVATE;
}
sbuf_bcat(&sb, msqkptr, sizeof(*msqkptr));
}
error = sbuf_finish(&sb);
sbuf_delete(&sb);
done:
return (error);
}
SYSCTL_INT(_kern_ipc, OID_AUTO, msgmax, CTLFLAG_RD, &msginfo.msgmax, 0,
@ -1338,6 +1473,181 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, msgseg, CTLFLAG_RDTUN, &msginfo.msgseg, 0,
SYSCTL_PROC(_kern_ipc, OID_AUTO, msqids, CTLTYPE_OPAQUE | CTLFLAG_RD,
NULL, 0, sysctl_msqids, "", "Message queue IDs");
static int
msg_prison_check(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *prpr;
struct vfsoptlist *opts = data;
int error, jsys;
/*
* sysvmsg is a jailsys integer.
* It must be "disable" if the parent jail is disabled.
*/
error = vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys));
if (error != ENOENT) {
if (error != 0)
return (error);
switch (jsys) {
case JAIL_SYS_DISABLE:
break;
case JAIL_SYS_NEW:
case JAIL_SYS_INHERIT:
prison_lock(pr->pr_parent);
prpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
prison_unlock(pr->pr_parent);
if (prpr == NULL)
return (EPERM);
break;
default:
return (EINVAL);
}
}
return (0);
}
static int
msg_prison_set(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *tpr, *orpr, *nrpr, *trpr;
struct vfsoptlist *opts = data;
void *rsv;
int jsys, descend;
/*
* sysvmsg controls which jail is the root of the associated msgs (this
* jail or same as the parent), or if the feature is available at all.
*/
if (vfs_copyopt(opts, "sysvmsg", &jsys, sizeof(jsys)) == ENOENT)
jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
? JAIL_SYS_INHERIT
: vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
? JAIL_SYS_DISABLE
: -1;
if (jsys == JAIL_SYS_DISABLE) {
prison_lock(pr);
orpr = osd_jail_get(pr, msg_prison_slot);
if (orpr != NULL)
osd_jail_del(pr, msg_prison_slot);
prison_unlock(pr);
if (orpr != NULL) {
if (orpr == pr)
msg_prison_cleanup(pr);
/* Disable all child jails as well. */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr, msg_prison_slot);
if (trpr != NULL) {
osd_jail_del(tpr, msg_prison_slot);
prison_unlock(tpr);
if (trpr == tpr)
msg_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
} else if (jsys != -1) {
if (jsys == JAIL_SYS_NEW)
nrpr = pr;
else {
prison_lock(pr->pr_parent);
nrpr = osd_jail_get(pr->pr_parent, msg_prison_slot);
prison_unlock(pr->pr_parent);
}
rsv = osd_reserve(msg_prison_slot);
prison_lock(pr);
orpr = osd_jail_get(pr, msg_prison_slot);
if (orpr != nrpr)
(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
nrpr);
else
osd_free_reserved(rsv);
prison_unlock(pr);
if (orpr != nrpr) {
if (orpr == pr)
msg_prison_cleanup(pr);
if (orpr != NULL) {
/* Change child jails matching the old root, */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr,
msg_prison_slot);
if (trpr == orpr) {
(void)osd_jail_set(tpr,
msg_prison_slot, nrpr);
prison_unlock(tpr);
if (trpr == tpr)
msg_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
}
}
return (0);
}
static int
msg_prison_get(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *rpr;
struct vfsoptlist *opts = data;
int error, jsys;
/* Set sysvmsg based on the jail's root prison. */
prison_lock(pr);
rpr = osd_jail_get(pr, msg_prison_slot);
prison_unlock(pr);
jsys = rpr == NULL ? JAIL_SYS_DISABLE
: rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
error = vfs_setopt(opts, "sysvmsg", &jsys, sizeof(jsys));
if (error == ENOENT)
error = 0;
return (error);
}
static int
msg_prison_remove(void *obj, void *data __unused)
{
struct prison *pr = obj;
struct prison *rpr;
prison_lock(pr);
rpr = osd_jail_get(pr, msg_prison_slot);
prison_unlock(pr);
if (rpr == pr)
msg_prison_cleanup(pr);
return (0);
}
static void
msg_prison_cleanup(struct prison *pr)
{
struct msqid_kernel *msqkptr;
int i;
/* Remove any msqs that belong to this jail. */
mtx_lock(&msq_mtx);
for (i = 0; i < msginfo.msgmni; i++) {
msqkptr = &msqids[i];
if (msqkptr->u.msg_qbytes != 0 &&
msqkptr->cred != NULL && msqkptr->cred->cr_prison == pr)
msq_remove(msqkptr);
}
mtx_unlock(&msq_mtx);
}
SYSCTL_JAIL_PARAM_SYS_NODE(sysvmsg, CTLFLAG_RW, "SYSV message queues");
#ifdef COMPAT_FREEBSD32
int
freebsd32_msgsys(struct thread *td, struct freebsd32_msgsys_args *uap)
@ -1516,8 +1826,6 @@ sys_msgsys(td, uap)
{
int error;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 || uap->which >= nitems(msgcalls))
return (EINVAL);
error = (*msgcalls[uap->which])(td, &uap->a2);

View File

@ -52,7 +52,9 @@ __FBSDID("$FreeBSD$");
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/racct.h>
#include <sys/sbuf.h>
#include <sys/sem.h>
#include <sys/sx.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
#include <sys/sysent.h>
@ -78,7 +80,16 @@ static int sysvsem_modload(struct module *, int, void *);
static int semunload(void);
static void semexit_myhook(void *arg, struct proc *p);
static int sysctl_sema(SYSCTL_HANDLER_ARGS);
static int semvalid(int semid, struct semid_kernel *semakptr);
static int semvalid(int semid, struct prison *rpr,
struct semid_kernel *semakptr);
static void sem_remove(int semidx, struct ucred *cred);
static struct prison *sem_find_prison(struct ucred *);
static int sem_prison_cansee(struct prison *, struct semid_kernel *);
static int sem_prison_check(void *, void *);
static int sem_prison_set(void *, void *);
static int sem_prison_get(void *, void *);
static int sem_prison_remove(void *, void *);
static void sem_prison_cleanup(struct prison *);
#ifndef _SYS_SYSPROTO_H_
struct __semctl_args;
@ -104,6 +115,7 @@ LIST_HEAD(, sem_undo) semu_list; /* list of active undo structures */
LIST_HEAD(, sem_undo) semu_free_list; /* list of free undo structures */
static int *semu; /* undo structure pool */
static eventhandler_tag semexit_tag;
static unsigned sem_prison_slot; /* prison OSD slot */
#define SEMUNDO_MTX sem_undo_mtx
#define SEMUNDO_LOCK() mtx_lock(&SEMUNDO_MTX);
@ -247,7 +259,15 @@ static struct syscall_helper_data sem32_syscalls[] = {
static int
seminit(void)
{
struct prison *pr;
void *rsv;
int i, error;
osd_method_t methods[PR_MAXMETHOD] = {
[PR_METHOD_CHECK] = sem_prison_check,
[PR_METHOD_SET] = sem_prison_set,
[PR_METHOD_GET] = sem_prison_get,
[PR_METHOD_REMOVE] = sem_prison_remove,
};
sem = malloc(sizeof(struct sem) * seminfo.semmns, M_SEM, M_WAITOK);
sema = malloc(sizeof(struct semid_kernel) * seminfo.semmni, M_SEM,
@ -278,6 +298,29 @@ seminit(void)
semexit_tag = EVENTHANDLER_REGISTER(process_exit, semexit_myhook, NULL,
EVENTHANDLER_PRI_ANY);
/* Set current prisons according to their allow.sysvipc. */
sem_prison_slot = osd_jail_register(NULL, methods);
rsv = osd_reserve(sem_prison_slot);
prison_lock(&prison0);
(void)osd_jail_set_reserved(&prison0, sem_prison_slot, rsv, &prison0);
prison_unlock(&prison0);
rsv = NULL;
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (rsv == NULL)
rsv = osd_reserve(sem_prison_slot);
prison_lock(pr);
if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
&prison0);
rsv = NULL;
}
prison_unlock(pr);
}
if (rsv != NULL)
osd_free_reserved(rsv);
sx_sunlock(&allprison_lock);
error = syscall_helper_register(sem_syscalls, SY_THR_STATIC_KLD);
if (error != 0)
return (error);
@ -303,6 +346,8 @@ semunload(void)
#endif
syscall_helper_unregister(sem_syscalls);
EVENTHANDLER_DEREGISTER(process_exit, semexit_tag);
if (sem_prison_slot != 0)
osd_jail_deregister(sem_prison_slot);
#ifdef MAC
for (i = 0; i < seminfo.semmni; i++)
mac_sysvsem_destroy(&sema[i]);
@ -489,11 +534,74 @@ semundo_clear(int semid, int semnum)
}
static int
semvalid(int semid, struct semid_kernel *semakptr)
semvalid(int semid, struct prison *rpr, struct semid_kernel *semakptr)
{
return ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ? EINVAL : 0);
semakptr->u.sem_perm.seq != IPCID_TO_SEQ(semid) ||
sem_prison_cansee(rpr, semakptr) ? EINVAL : 0);
}
static void
sem_remove(int semidx, struct ucred *cred)
{
struct semid_kernel *semakptr;
int i;
KASSERT(semidx >= 0 && semidx < seminfo.semmni,
("semidx out of bounds"));
semakptr = &sema[semidx];
semakptr->u.sem_perm.cuid = cred ? cred->cr_uid : 0;
semakptr->u.sem_perm.uid = cred ? cred->cr_uid : 0;
semakptr->u.sem_perm.mode = 0;
racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
crfree(semakptr->cred);
semakptr->cred = NULL;
SEMUNDO_LOCK();
semundo_clear(semidx, -1);
SEMUNDO_UNLOCK();
#ifdef MAC
mac_sysvsem_cleanup(semakptr);
#endif
wakeup(semakptr);
for (i = 0; i < seminfo.semmni; i++) {
if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
sema[i].u.sem_base > semakptr->u.sem_base)
mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
}
for (i = semakptr->u.sem_base - sem; i < semtot; i++)
sem[i] = sem[i + semakptr->u.sem_nsems];
for (i = 0; i < seminfo.semmni; i++) {
if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
sema[i].u.sem_base > semakptr->u.sem_base) {
sema[i].u.sem_base -= semakptr->u.sem_nsems;
mtx_unlock(&sema_mtx[i]);
}
}
semtot -= semakptr->u.sem_nsems;
}
static struct prison *
sem_find_prison(struct ucred *cred)
{
struct prison *pr, *rpr;
pr = cred->cr_prison;
prison_lock(pr);
rpr = osd_jail_get(pr, sem_prison_slot);
prison_unlock(pr);
return rpr;
}
static int
sem_prison_cansee(struct prison *rpr, struct semid_kernel *semakptr)
{
if (semakptr->cred == NULL ||
!(rpr == semakptr->cred->cr_prison ||
prison_ischild(rpr, semakptr->cred->cr_prison)))
return (EINVAL);
return (0);
}
/*
@ -572,6 +680,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
u_short *array;
struct ucred *cred = td->td_ucred;
int i, error;
struct prison *rpr;
struct semid_ds *sbuf;
struct semid_kernel *semakptr;
struct mtx *sema_mtxp;
@ -580,7 +689,9 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
DPRINTF(("call to semctl(%d, %d, %d, 0x%p)\n",
semid, semnum, cmd, arg));
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = sem_find_prison(td->td_ucred);
if (sem == NULL)
return (ENOSYS);
array = NULL;
@ -600,6 +711,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
error = EINVAL;
goto done2;
}
if ((error = sem_prison_cansee(rpr, semakptr)))
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
#ifdef MAC
@ -608,6 +721,8 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
goto done2;
#endif
bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
if (cred->cr_prison != semakptr->cred->cr_prison)
arg->buf->sem_perm.key = IPC_PRIVATE;
*rval = IXSEQ_TO_IPCID(semid, semakptr->u.sem_perm);
mtx_unlock(sema_mtxp);
return (0);
@ -622,6 +737,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
if (cmd == IPC_RMID)
mtx_lock(&sem_mtx);
mtx_lock(sema_mtxp);
#ifdef MAC
error = mac_sysvsem_check_semctl(cred, semakptr, cmd);
if (error != 0)
@ -633,42 +749,15 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
switch (cmd) {
case IPC_RMID:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
goto done2;
semakptr->u.sem_perm.cuid = cred->cr_uid;
semakptr->u.sem_perm.uid = cred->cr_uid;
semakptr->u.sem_perm.mode = 0;
racct_sub_cred(semakptr->cred, RACCT_NSEM, semakptr->u.sem_nsems);
crfree(semakptr->cred);
semakptr->cred = NULL;
SEMUNDO_LOCK();
semundo_clear(semidx, -1);
SEMUNDO_UNLOCK();
#ifdef MAC
mac_sysvsem_cleanup(semakptr);
#endif
wakeup(semakptr);
for (i = 0; i < seminfo.semmni; i++) {
if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
sema[i].u.sem_base > semakptr->u.sem_base)
mtx_lock_flags(&sema_mtx[i], LOP_DUPOK);
}
for (i = semakptr->u.sem_base - sem; i < semtot; i++)
sem[i] = sem[i + semakptr->u.sem_nsems];
for (i = 0; i < seminfo.semmni; i++) {
if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
sema[i].u.sem_base > semakptr->u.sem_base) {
sema[i].u.sem_base -= semakptr->u.sem_nsems;
mtx_unlock(&sema_mtx[i]);
}
}
semtot -= semakptr->u.sem_nsems;
sem_remove(semidx, cred);
break;
case IPC_SET:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_M)))
goto done2;
@ -681,15 +770,17 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case IPC_STAT:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
bcopy(&semakptr->u, arg->buf, sizeof(struct semid_ds));
if (cred->cr_prison != semakptr->cred->cr_prison)
arg->buf->sem_perm.key = IPC_PRIVATE;
break;
case GETNCNT:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@ -701,7 +792,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETPID:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@ -713,7 +804,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETVAL:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@ -749,7 +840,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
mtx_unlock(sema_mtxp);
array = malloc(sizeof(*array) * count, M_TEMP, M_WAITOK);
mtx_lock(sema_mtxp);
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
@ -762,7 +853,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case GETZCNT:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_R)))
goto done2;
@ -774,7 +865,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
break;
case SETVAL:
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
goto done2;
@ -805,7 +896,7 @@ kern_semctl(struct thread *td, int semid, int semnum, int cmd,
mtx_lock(sema_mtxp);
if (error)
break;
if ((error = semvalid(semid, semakptr)) != 0)
if ((error = semvalid(semid, rpr, semakptr)) != 0)
goto done2;
KASSERT(count == semakptr->u.sem_nsems, ("nsems changed"));
if ((error = ipcperm(td, &semakptr->u.sem_perm, IPC_W)))
@ -855,13 +946,16 @@ sys_semget(struct thread *td, struct semget_args *uap)
struct ucred *cred = td->td_ucred;
DPRINTF(("semget(0x%x, %d, 0%o)\n", key, nsems, semflg));
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
if (sem_find_prison(cred) == NULL)
return (ENOSYS);
mtx_lock(&sem_mtx);
if (key != IPC_PRIVATE) {
for (semid = 0; semid < seminfo.semmni; semid++) {
if ((sema[semid].u.sem_perm.mode & SEM_ALLOC) &&
sema[semid].cred != NULL &&
sema[semid].cred->cr_prison == cred->cr_prison &&
sema[semid].u.sem_perm.key == key)
break;
}
@ -978,6 +1072,7 @@ sys_semop(struct thread *td, struct semop_args *uap)
struct sembuf small_sops[SMALL_SOPS];
int semid = uap->semid;
size_t nsops = uap->nsops;
struct prison *rpr;
struct sembuf *sops;
struct semid_kernel *semakptr;
struct sembuf *sopptr = NULL;
@ -994,7 +1089,8 @@ sys_semop(struct thread *td, struct semop_args *uap)
#endif
DPRINTF(("call to semop(%d, %p, %u)\n", semid, sops, nsops));
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = sem_find_prison(td->td_ucred);
if (sem == NULL)
return (ENOSYS);
semid = IPCID_TO_IX(semid); /* Convert back to zero origin */
@ -1044,6 +1140,8 @@ sys_semop(struct thread *td, struct semop_args *uap)
error = EINVAL;
goto done2;
}
if ((error = sem_prison_cansee(rpr, semakptr)) != 0)
goto done2;
/*
* Initial pass thru sops to see what permissions are needed.
* Also perform any checks that don't need repeating on each
@ -1367,11 +1465,217 @@ semexit_myhook(void *arg, struct proc *p)
static int
sysctl_sema(SYSCTL_HANDLER_ARGS)
{
struct prison *rpr;
struct sbuf sb;
struct semid_kernel tmp, empty;
struct semid_kernel *semakptr;
int error, i;
return (SYSCTL_OUT(req, sema,
sizeof(struct semid_kernel) * seminfo.semmni));
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
goto done;
rpr = sem_find_prison(req->td->td_ucred);
sbuf_new_for_sysctl(&sb, NULL, sizeof(struct semid_kernel) *
seminfo.semmni, req);
bzero(&empty, sizeof(empty));
for (i = 0; i < seminfo.semmni; i++) {
semakptr = &sema[i];
if ((semakptr->u.sem_perm.mode & SEM_ALLOC) == 0 ||
rpr == NULL || sem_prison_cansee(rpr, semakptr) != 0) {
semakptr = &empty;
} else if (req->td->td_ucred->cr_prison !=
semakptr->cred->cr_prison) {
bcopy(semakptr, &tmp, sizeof(tmp));
semakptr = &tmp;
semakptr->u.sem_perm.key = IPC_PRIVATE;
}
sbuf_bcat(&sb, semakptr, sizeof(*semakptr));
}
error = sbuf_finish(&sb);
sbuf_delete(&sb);
done:
return (error);
}
static int
sem_prison_check(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *prpr;
struct vfsoptlist *opts = data;
int error, jsys;
/*
* sysvsem is a jailsys integer.
* It must be "disable" if the parent jail is disabled.
*/
error = vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys));
if (error != ENOENT) {
if (error != 0)
return (error);
switch (jsys) {
case JAIL_SYS_DISABLE:
break;
case JAIL_SYS_NEW:
case JAIL_SYS_INHERIT:
prison_lock(pr->pr_parent);
prpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
prison_unlock(pr->pr_parent);
if (prpr == NULL)
return (EPERM);
break;
default:
return (EINVAL);
}
}
return (0);
}
static int
sem_prison_set(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *tpr, *orpr, *nrpr, *trpr;
struct vfsoptlist *opts = data;
void *rsv;
int jsys, descend;
/*
* sysvsem controls which jail is the root of the associated sems (this
* jail or same as the parent), or if the feature is available at all.
*/
if (vfs_copyopt(opts, "sysvsem", &jsys, sizeof(jsys)) == ENOENT)
jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
? JAIL_SYS_INHERIT
: vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
? JAIL_SYS_DISABLE
: -1;
if (jsys == JAIL_SYS_DISABLE) {
prison_lock(pr);
orpr = osd_jail_get(pr, sem_prison_slot);
if (orpr != NULL)
osd_jail_del(pr, sem_prison_slot);
prison_unlock(pr);
if (orpr != NULL) {
if (orpr == pr)
sem_prison_cleanup(pr);
/* Disable all child jails as well. */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr, sem_prison_slot);
if (trpr != NULL) {
osd_jail_del(tpr, sem_prison_slot);
prison_unlock(tpr);
if (trpr == tpr)
sem_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
} else if (jsys != -1) {
if (jsys == JAIL_SYS_NEW)
nrpr = pr;
else {
prison_lock(pr->pr_parent);
nrpr = osd_jail_get(pr->pr_parent, sem_prison_slot);
prison_unlock(pr->pr_parent);
}
rsv = osd_reserve(sem_prison_slot);
prison_lock(pr);
orpr = osd_jail_get(pr, sem_prison_slot);
if (orpr != nrpr)
(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
nrpr);
else
osd_free_reserved(rsv);
prison_unlock(pr);
if (orpr != nrpr) {
if (orpr == pr)
sem_prison_cleanup(pr);
if (orpr != NULL) {
/* Change child jails matching the old root, */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr,
sem_prison_slot);
if (trpr == orpr) {
(void)osd_jail_set(tpr,
sem_prison_slot, nrpr);
prison_unlock(tpr);
if (trpr == tpr)
sem_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
}
}
return (0);
}
static int
sem_prison_get(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *rpr;
struct vfsoptlist *opts = data;
int error, jsys;
/* Set sysvsem based on the jail's root prison. */
prison_lock(pr);
rpr = osd_jail_get(pr, sem_prison_slot);
prison_unlock(pr);
jsys = rpr == NULL ? JAIL_SYS_DISABLE
: rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
error = vfs_setopt(opts, "sysvsem", &jsys, sizeof(jsys));
if (error == ENOENT)
error = 0;
return (error);
}
static int
sem_prison_remove(void *obj, void *data __unused)
{
struct prison *pr = obj;
struct prison *rpr;
prison_lock(pr);
rpr = osd_jail_get(pr, sem_prison_slot);
prison_unlock(pr);
if (rpr == pr)
sem_prison_cleanup(pr);
return (0);
}
static void
sem_prison_cleanup(struct prison *pr)
{
int i;
/* Remove any sems that belong to this jail. */
mtx_lock(&sem_mtx);
for (i = 0; i < seminfo.semmni; i++) {
if ((sema[i].u.sem_perm.mode & SEM_ALLOC) &&
sema[i].cred != NULL && sema[i].cred->cr_prison == pr) {
mtx_lock(&sema_mtx[i]);
sem_remove(i, NULL);
mtx_unlock(&sema_mtx[i]);
}
}
mtx_unlock(&sem_mtx);
}
SYSCTL_JAIL_PARAM_SYS_NODE(sysvsem, CTLFLAG_RW, "SYSV semaphores");
#if defined(COMPAT_FREEBSD4) || defined(COMPAT_FREEBSD5) || \
defined(COMPAT_FREEBSD6) || defined(COMPAT_FREEBSD7)
@ -1398,8 +1702,6 @@ sys_semsys(td, uap)
{
int error;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 || uap->which >= nitems(semcalls))
return (EINVAL);
error = (*semcalls[uap->which])(td, &uap->a2);

View File

@ -80,6 +80,7 @@ __FBSDID("$FreeBSD$");
#include <sys/racct.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
#include <sys/sbuf.h>
#include <sys/stat.h>
#include <sys/syscall.h>
#include <sys/syscallsubr.h>
@ -112,7 +113,8 @@ static int shmget_existing(struct thread *td, struct shmget_args *uap,
static int shm_last_free, shm_nused, shmalloced;
vm_size_t shm_committed;
static struct shmid_kernel *shmsegs;
static struct shmid_kernel *shmsegs;
static unsigned shm_prison_slot;
struct shmmap_state {
vm_offset_t va;
@ -120,8 +122,8 @@ struct shmmap_state {
};
static void shm_deallocate_segment(struct shmid_kernel *);
static int shm_find_segment_by_key(key_t);
static struct shmid_kernel *shm_find_segment(int, bool);
static int shm_find_segment_by_key(struct prison *, key_t);
static struct shmid_kernel *shm_find_segment(struct prison *, int, bool);
static int shm_delete_mapping(struct vmspace *vm, struct shmmap_state *);
static void shmrealloc(void);
static int shminit(void);
@ -130,6 +132,14 @@ static int shmunload(void);
static void shmexit_myhook(struct vmspace *vm);
static void shmfork_myhook(struct proc *p1, struct proc *p2);
static int sysctl_shmsegs(SYSCTL_HANDLER_ARGS);
static void shm_remove(struct shmid_kernel *, int);
static struct prison *shm_find_prison(struct ucred *);
static int shm_prison_cansee(struct prison *, struct shmid_kernel *);
static int shm_prison_check(void *, void *);
static int shm_prison_set(void *, void *);
static int shm_prison_get(void *, void *);
static int shm_prison_remove(void *, void *);
static void shm_prison_cleanup(struct prison *);
/*
* Tuneable values.
@ -189,12 +199,14 @@ static struct sx sysvshmsx;
#define SYSVSHM_ASSERT_LOCKED() sx_assert(&sysvshmsx, SA_XLOCKED)
static int
shm_find_segment_by_key(key_t key)
shm_find_segment_by_key(struct prison *pr, key_t key)
{
int i;
for (i = 0; i < shmalloced; i++)
if ((shmsegs[i].u.shm_perm.mode & SHMSEG_ALLOCATED) &&
shmsegs[i].cred != NULL &&
shmsegs[i].cred->cr_prison == pr &&
shmsegs[i].u.shm_perm.key == key)
return (i);
return (-1);
@ -205,7 +217,7 @@ shm_find_segment_by_key(key_t key)
* is_shmid is false.
*/
static struct shmid_kernel *
shm_find_segment(int arg, bool is_shmid)
shm_find_segment(struct prison *rpr, int arg, bool is_shmid)
{
struct shmid_kernel *shmseg;
int segnum;
@ -217,7 +229,8 @@ shm_find_segment(int arg, bool is_shmid)
if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
(!shm_allow_removed &&
(shmseg->u.shm_perm.mode & SHMSEG_REMOVED) != 0) ||
(is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)))
(is_shmid && shmseg->u.shm_perm.seq != IPCID_TO_SEQ(arg)) ||
!shm_prison_cansee(rpr, shmseg))
return (NULL);
return (shmseg);
}
@ -271,6 +284,41 @@ shm_delete_mapping(struct vmspace *vm, struct shmmap_state *shmmap_s)
return (0);
}
static void
shm_remove(struct shmid_kernel *shmseg, int segnum)
{
shmseg->u.shm_perm.key = IPC_PRIVATE;
shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
if (shmseg->u.shm_nattch <= 0) {
shm_deallocate_segment(shmseg);
shm_last_free = segnum;
}
}
static struct prison *
shm_find_prison(struct ucred *cred)
{
struct prison *pr, *rpr;
pr = cred->cr_prison;
prison_lock(pr);
rpr = osd_jail_get(pr, shm_prison_slot);
prison_unlock(pr);
return rpr;
}
static int
shm_prison_cansee(struct prison *rpr, struct shmid_kernel *shmseg)
{
if (shmseg->cred == NULL ||
!(rpr == shmseg->cred->cr_prison ||
prison_ischild(rpr, shmseg->cred->cr_prison)))
return (EINVAL);
return (0);
}
static int
kern_shmdt_locked(struct thread *td, const void *shmaddr)
{
@ -283,7 +331,7 @@ kern_shmdt_locked(struct thread *td, const void *shmaddr)
int i;
SYSVSHM_ASSERT_LOCKED();
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
if (shm_find_prison(td->td_ucred) == NULL)
return (ENOSYS);
shmmap_s = p->p_vmspace->vm_shm;
if (shmmap_s == NULL)
@ -325,6 +373,7 @@ static int
kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
int shmflg)
{
struct prison *rpr;
struct proc *p = td->td_proc;
struct shmid_kernel *shmseg;
struct shmmap_state *shmmap_s;
@ -334,7 +383,8 @@ kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
int error, i, rv;
SYSVSHM_ASSERT_LOCKED();
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = shm_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
shmmap_s = p->p_vmspace->vm_shm;
if (shmmap_s == NULL) {
@ -345,7 +395,7 @@ kern_shmat_locked(struct thread *td, int shmid, const void *shmaddr,
KASSERT(p->p_vmspace->vm_shm == NULL, ("raced"));
p->p_vmspace->vm_shm = shmmap_s;
}
shmseg = shm_find_segment(shmid, true);
shmseg = shm_find_segment(rpr, shmid, true);
if (shmseg == NULL)
return (EINVAL);
error = ipcperm(td, &shmseg->u.shm_perm,
@ -431,6 +481,7 @@ static int
kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
size_t *bufsz)
{
struct prison *rpr;
struct shmid_kernel *shmseg;
struct shmid_ds *shmidp;
struct shm_info shm_info;
@ -438,7 +489,8 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
SYSVSHM_ASSERT_LOCKED();
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = shm_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
switch (cmd) {
@ -471,7 +523,7 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
return (0);
}
}
shmseg = shm_find_segment(shmid, cmd != SHM_STAT);
shmseg = shm_find_segment(rpr, shmid, cmd != SHM_STAT);
if (shmseg == NULL)
return (EINVAL);
#ifdef MAC
@ -482,10 +534,13 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
switch (cmd) {
case SHM_STAT:
case IPC_STAT:
shmidp = (struct shmid_ds *)buf;
error = ipcperm(td, &shmseg->u.shm_perm, IPC_R);
if (error != 0)
return (error);
memcpy(buf, &shmseg->u, sizeof(struct shmid_ds));
memcpy(shmidp, &shmseg->u, sizeof(struct shmid_ds));
if (td->td_ucred->cr_prison != shmseg->cred->cr_prison)
shmidp->shm_perm.key = IPC_PRIVATE;
if (bufsz != NULL)
*bufsz = sizeof(struct shmid_ds);
if (cmd == SHM_STAT) {
@ -509,12 +564,7 @@ kern_shmctl_locked(struct thread *td, int shmid, int cmd, void *buf,
error = ipcperm(td, &shmseg->u.shm_perm, IPC_M);
if (error != 0)
return (error);
shmseg->u.shm_perm.key = IPC_PRIVATE;
shmseg->u.shm_perm.mode |= SHMSEG_REMOVED;
if (shmseg->u.shm_nattch <= 0) {
shm_deallocate_segment(shmseg);
shm_last_free = IPCID_TO_IX(shmid);
}
shm_remove(shmseg, IPCID_TO_IX(shmid));
break;
#if 0
case SHM_LOCK:
@ -721,14 +771,15 @@ sys_shmget(struct thread *td, struct shmget_args *uap)
int segnum, mode;
int error;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
if (shm_find_prison(td->td_ucred) == NULL)
return (ENOSYS);
mode = uap->shmflg & ACCESSPERMS;
SYSVSHM_LOCK();
if (uap->key == IPC_PRIVATE) {
error = shmget_allocate_segment(td, uap, mode);
} else {
segnum = shm_find_segment_by_key(uap->key);
segnum = shm_find_segment_by_key(td->td_ucred->cr_prison,
uap->key);
if (segnum >= 0)
error = shmget_existing(td, uap, mode, segnum);
else if ((uap->shmflg & IPC_CREAT) == 0)
@ -849,7 +900,15 @@ static struct syscall_helper_data shm32_syscalls[] = {
static int
shminit(void)
{
struct prison *pr;
void *rsv;
int i, error;
osd_method_t methods[PR_MAXMETHOD] = {
[PR_METHOD_CHECK] = shm_prison_check,
[PR_METHOD_SET] = shm_prison_set,
[PR_METHOD_GET] = shm_prison_get,
[PR_METHOD_REMOVE] = shm_prison_remove,
};
#ifndef BURN_BRIDGES
if (TUNABLE_ULONG_FETCH("kern.ipc.shmmaxpgs", &shminfo.shmall) != 0)
@ -879,6 +938,29 @@ shminit(void)
shmexit_hook = &shmexit_myhook;
shmfork_hook = &shmfork_myhook;
/* Set current prisons according to their allow.sysvipc. */
shm_prison_slot = osd_jail_register(NULL, methods);
rsv = osd_reserve(shm_prison_slot);
prison_lock(&prison0);
(void)osd_jail_set_reserved(&prison0, shm_prison_slot, rsv, &prison0);
prison_unlock(&prison0);
rsv = NULL;
sx_slock(&allprison_lock);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (rsv == NULL)
rsv = osd_reserve(shm_prison_slot);
prison_lock(pr);
if ((pr->pr_allow & PR_ALLOW_SYSVIPC) && pr->pr_ref > 0) {
(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
&prison0);
rsv = NULL;
}
prison_unlock(pr);
}
if (rsv != NULL)
osd_free_reserved(rsv);
sx_sunlock(&allprison_lock);
error = syscall_helper_register(shm_syscalls, SY_THR_STATIC_KLD);
if (error != 0)
return (error);
@ -902,6 +984,8 @@ shmunload(void)
syscall32_helper_unregister(shm32_syscalls);
#endif
syscall_helper_unregister(shm_syscalls);
if (shm_prison_slot != 0)
osd_jail_deregister(shm_prison_slot);
for (i = 0; i < shmalloced; i++) {
#ifdef MAC
@ -925,14 +1009,221 @@ shmunload(void)
static int
sysctl_shmsegs(SYSCTL_HANDLER_ARGS)
{
int error;
struct prison *rpr;
struct sbuf sb;
struct shmid_kernel tmp, empty;
struct shmid_kernel *shmseg;
int error, i;
SYSVSHM_LOCK();
error = SYSCTL_OUT(req, shmsegs, shmalloced * sizeof(shmsegs[0]));
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
goto done;
rpr = shm_find_prison(req->td->td_ucred);
sbuf_new_for_sysctl(&sb, NULL, shmalloced * sizeof(shmsegs[0]), req);
bzero(&empty, sizeof(empty));
empty.u.shm_perm.mode = SHMSEG_FREE;
for (i = 0; i < shmalloced; i++) {
shmseg = &shmsegs[i];
if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) == 0 ||
rpr == NULL || shm_prison_cansee(rpr, &shmsegs[i]) != 0) {
shmseg = &empty;
} else if (req->td->td_ucred->cr_prison !=
shmseg->cred->cr_prison) {
bcopy(shmseg, &tmp, sizeof(tmp));
shmseg = &tmp;
shmseg->u.shm_perm.key = IPC_PRIVATE;
}
sbuf_bcat(&sb, shmseg, sizeof(*shmseg));
}
error = sbuf_finish(&sb);
sbuf_delete(&sb);
done:
SYSVSHM_UNLOCK();
return (error);
}
static int
shm_prison_check(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *prpr;
struct vfsoptlist *opts = data;
int error, jsys;
/*
* sysvshm is a jailsys integer.
* It must be "disable" if the parent jail is disabled.
*/
error = vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys));
if (error != ENOENT) {
if (error != 0)
return (error);
switch (jsys) {
case JAIL_SYS_DISABLE:
break;
case JAIL_SYS_NEW:
case JAIL_SYS_INHERIT:
prison_lock(pr->pr_parent);
prpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
prison_unlock(pr->pr_parent);
if (prpr == NULL)
return (EPERM);
break;
default:
return (EINVAL);
}
}
return (0);
}
static int
shm_prison_set(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *tpr, *orpr, *nrpr, *trpr;
struct vfsoptlist *opts = data;
void *rsv;
int jsys, descend;
/*
* sysvshm controls which jail is the root of the associated segments
* (this jail or same as the parent), or if the feature is available
* at all.
*/
if (vfs_copyopt(opts, "sysvshm", &jsys, sizeof(jsys)) == ENOENT)
jsys = vfs_flagopt(opts, "allow.sysvipc", NULL, 0)
? JAIL_SYS_INHERIT
: vfs_flagopt(opts, "allow.nosysvipc", NULL, 0)
? JAIL_SYS_DISABLE
: -1;
if (jsys == JAIL_SYS_DISABLE) {
prison_lock(pr);
orpr = osd_jail_get(pr, shm_prison_slot);
if (orpr != NULL)
osd_jail_del(pr, shm_prison_slot);
prison_unlock(pr);
if (orpr != NULL) {
if (orpr == pr)
shm_prison_cleanup(pr);
/* Disable all child jails as well. */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr, shm_prison_slot);
if (trpr != NULL) {
osd_jail_del(tpr, shm_prison_slot);
prison_unlock(tpr);
if (trpr == tpr)
shm_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
} else if (jsys != -1) {
if (jsys == JAIL_SYS_NEW)
nrpr = pr;
else {
prison_lock(pr->pr_parent);
nrpr = osd_jail_get(pr->pr_parent, shm_prison_slot);
prison_unlock(pr->pr_parent);
}
rsv = osd_reserve(shm_prison_slot);
prison_lock(pr);
orpr = osd_jail_get(pr, shm_prison_slot);
if (orpr != nrpr)
(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
nrpr);
else
osd_free_reserved(rsv);
prison_unlock(pr);
if (orpr != nrpr) {
if (orpr == pr)
shm_prison_cleanup(pr);
if (orpr != NULL) {
/* Change child jails matching the old root, */
FOREACH_PRISON_DESCENDANT(pr, tpr, descend) {
prison_lock(tpr);
trpr = osd_jail_get(tpr,
shm_prison_slot);
if (trpr == orpr) {
(void)osd_jail_set(tpr,
shm_prison_slot, nrpr);
prison_unlock(tpr);
if (trpr == tpr)
shm_prison_cleanup(tpr);
} else {
prison_unlock(tpr);
descend = 0;
}
}
}
}
}
return (0);
}
static int
shm_prison_get(void *obj, void *data)
{
struct prison *pr = obj;
struct prison *rpr;
struct vfsoptlist *opts = data;
int error, jsys;
/* Set sysvshm based on the jail's root prison. */
prison_lock(pr);
rpr = osd_jail_get(pr, shm_prison_slot);
prison_unlock(pr);
jsys = rpr == NULL ? JAIL_SYS_DISABLE
: rpr == pr ? JAIL_SYS_NEW : JAIL_SYS_INHERIT;
error = vfs_setopt(opts, "sysvshm", &jsys, sizeof(jsys));
if (error == ENOENT)
error = 0;
return (error);
}
static int
shm_prison_remove(void *obj, void *data __unused)
{
struct prison *pr = obj;
struct prison *rpr;
SYSVSHM_LOCK();
prison_lock(pr);
rpr = osd_jail_get(pr, shm_prison_slot);
prison_unlock(pr);
if (rpr == pr)
shm_prison_cleanup(pr);
SYSVSHM_UNLOCK();
return (0);
}
static void
shm_prison_cleanup(struct prison *pr)
{
struct shmid_kernel *shmseg;
int i;
/* Remove any segments that belong to this jail. */
for (i = 0; i < shmalloced; i++) {
shmseg = &shmsegs[i];
if ((shmseg->u.shm_perm.mode & SHMSEG_ALLOCATED) &&
shmseg->cred != NULL && shmseg->cred->cr_prison == pr) {
shm_remove(shmseg, i);
}
}
}
SYSCTL_JAIL_PARAM_SYS_NODE(sysvshm, CTLFLAG_RW, "SYSV shared memory");
#if defined(__i386__) && (defined(COMPAT_FREEBSD4) || defined(COMPAT_43))
struct oshmid_ds {
struct ipc_perm_old shm_perm; /* operation perms */
@ -957,17 +1248,19 @@ oshmctl(struct thread *td, struct oshmctl_args *uap)
{
#ifdef COMPAT_43
int error = 0;
struct prison *rpr;
struct shmid_kernel *shmseg;
struct oshmid_ds outbuf;
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
rpr = shm_find_prison(td->td_ucred);
if (rpr == NULL)
return (ENOSYS);
if (uap->cmd != IPC_STAT) {
return (freebsd7_shmctl(td,
(struct freebsd7_shmctl_args *)uap));
}
SYSVSHM_LOCK();
shmseg = shm_find_segment(uap->shmid, true);
shmseg = shm_find_segment(rpr, uap->shmid, true);
if (shmseg == NULL) {
SYSVSHM_UNLOCK();
return (EINVAL);
@ -1020,8 +1313,6 @@ int
sys_shmsys(struct thread *td, struct shmsys_args *uap)
{
if (!prison_allow(td->td_ucred, PR_ALLOW_SYSVIPC))
return (ENOSYS);
if (uap->which < 0 || uap->which >= nitems(shmcalls))
return (EINVAL);
return ((*shmcalls[uap->which])(td, &uap->a2));

View File

@ -493,10 +493,14 @@ or
.Xr sethostname 3 .
.It Va allow.sysvipc
A process within the jail has access to System V IPC primitives.
In the current jail implementation, System V primitives share a single
namespace across the host and jail environments, meaning that processes
within a jail would be able to communicate with (and potentially interfere
with) processes outside of the jail, and in other jails.
This is deprecated in favor of the per-module parameters (see below).
When this parameter is set, it is equivalent to setting
.Va sysvmsg ,
.Va sysvsem ,
and
.Va sysvshm
all to
.Dq inherit .
.It Va allow.raw_sockets
The jail root is allowed to create raw sockets.
Setting this parameter allows utilities like
@ -634,6 +638,25 @@ will give the jail it's own environment (still originally inherited when
the jail is created).
.It Va linux.osname , linux.osrelease , linux.oss_version
The Linux OS name, OS release, and OSS version associated with this jail.
.It Va sysvmsg
Allow access to SYSV IPC message primitives.
If set to
.Dq inherit ,
all IPC objects on the system are visible to this jail, whether they
were created by the jail itself, the base system, or other jails.
If set to
.Dq new ,
the jail will have its own key namespace, and can only see the objects
that it has created;
the system (or parent jail) has access to the jail's objects, but not to
its keys.
If set to
.Dq disable ,
the jail cannot perform any sysvmsg-related system calls.
.It Va sysvsem, sysvmsg
Allow access to SYSV IPC semaphore and shared memory primitives, in the
same manner as
.Va sysvmsg.
.El
.Pp
There are pseudo-parameters that are not passed to the kernel, but are