jail: Change the locking around pr_ref and pr_uref

Require both the prison mutex and allprison_lock when pr_ref or
pr_uref go to/from zero.  Adding a non-first or removing a non-last
reference remain lock-free.  This means that a shared hold on
allprison_lock is sufficient for prison_isalive() to be useful, which
removes a number of cases of lock/check/unlock on the prison mutex.

Expand the locking in kern_jail_set() to keep allprison_lock held
exclusive until the new prison is valid, thus making invalid prisons
invisible to any thread holding allprison_lock (except of course the
one creating or destroying the prison).  This renders prison_isvalid()
nearly redundant, now used only in asserts.

Differential Revision:	https://reviews.freebsd.org/D28419
Differential Revision:	https://reviews.freebsd.org/D28458
This commit is contained in:
Jamie Gritton 2021-02-21 10:55:44 -08:00
parent b963ce4588
commit f7496dcab0
6 changed files with 237 additions and 240 deletions

View File

@ -137,9 +137,11 @@ LIST_HEAD(, prison_racct) allprison_racct;
int lastprid = 0;
static int get_next_prid(struct prison **insprp);
static int do_jail_attach(struct thread *td, struct prison *pr);
static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
static void prison_complete(void *context, int pending);
static void prison_deref(struct prison *pr, int flags);
static int prison_lock_xlock(struct prison *pr, int flags);
static void prison_free_not_last(struct prison *pr);
static void prison_set_allow_locked(struct prison *pr, unsigned flag,
int enable);
static char *prison_path(struct prison *pr1, struct prison *pr2);
@ -1006,18 +1008,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* where it can be inserted later.
*/
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id == jid) {
mtx_lock(&inspr->pr_mtx);
if (prison_isvalid(inspr)) {
pr = inspr;
drflags |= PD_LOCKED;
inspr = NULL;
} else
mtx_unlock(&inspr->pr_mtx);
break;
}
if (inspr->pr_id < jid)
continue;
if (inspr->pr_id > jid)
break;
pr = inspr;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
inspr = NULL;
break;
}
if (pr != NULL) {
ppr = pr->pr_parent;
@ -1041,13 +1040,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = ENOENT;
vfs_opterror(opts, "jail %d not found", jid);
goto done_deref;
} else if (!prison_isalive(pr)) {
}
if (!prison_isalive(pr)) {
if (!(flags & JAIL_DYING)) {
error = ENOENT;
vfs_opterror(opts, "jail %d is dying",
jid);
goto done_deref;
} else if ((flags & JAIL_ATTACH) ||
}
if ((flags & JAIL_ATTACH) ||
(pr_flags & PR_PERSIST)) {
/*
* A dying jail might be resurrected
@ -1121,12 +1122,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
if (namelc[0] != '\0') {
pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
if (tpr != pr &&
!strcmp(tpr->pr_name + pnamelen, namelc)) {
mtx_lock(&tpr->pr_mtx);
if (prison_isalive(tpr)) {
if (pr == NULL &&
cuflags != JAIL_CREATE) {
@ -1135,6 +1134,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* for updates.
*/
pr = tpr;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
break;
}
@ -1144,28 +1144,22 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* active sibling jail.
*/
error = EEXIST;
mtx_unlock(&tpr->pr_mtx);
vfs_opterror(opts,
"jail \"%s\" already exists",
name);
goto done_deref;
}
if (pr == NULL &&
cuflags != JAIL_CREATE &&
prison_isvalid(tpr))
cuflags != JAIL_CREATE) {
deadpr = tpr;
mtx_unlock(&tpr->pr_mtx);
}
}
}
/* If no active jail is found, use a dying one. */
if (deadpr != NULL && pr == NULL) {
if (flags & JAIL_DYING) {
mtx_lock(&deadpr->pr_mtx);
if (!prison_isvalid(deadpr)) {
mtx_unlock(&deadpr->pr_mtx);
goto name_again;
}
pr = deadpr;
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
} else if (cuflags == JAIL_UPDATE) {
error = ENOENT;
@ -1199,19 +1193,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
vfs_opterror(opts, "prison limit exceeded");
goto done_deref;
}
mtx_lock(&ppr->pr_mtx);
if (!prison_isvalid(ppr)) {
mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
vfs_opterror(opts, "jail \"%s\" not found",
prison_name(mypr, ppr));
goto done_deref;
}
prison_hold(ppr);
if (refcount_acquire(&ppr->pr_uref))
mtx_unlock(&ppr->pr_mtx);
else {
if (!refcount_acquire_if_not_zero(&ppr->pr_uref)) {
/* This brings the parent back to life. */
mtx_lock(&ppr->pr_mtx);
refcount_acquire(&ppr->pr_uref);
mtx_unlock(&ppr->pr_mtx);
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
if (error) {
@ -1219,7 +1205,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
drflags |= PD_DEREF | PD_DEUREF;
goto done_deref;
}
}
}
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
error = EAGAIN;
@ -1230,6 +1216,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
refcount_init(&pr->pr_ref, 0);
refcount_init(&pr->pr_uref, 0);
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@ -1452,7 +1440,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
refcount_load(&tpr->pr_uref) == 0) {
!prison_isalive(tpr)) {
descend = 0;
continue;
}
@ -1520,7 +1508,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef VIMAGE
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
#endif
refcount_load(&tpr->pr_uref) == 0) {
!prison_isalive(tpr)) {
descend = 0;
continue;
}
@ -1759,8 +1747,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
prison_hold(pr);
refcount_acquire(&pr->pr_uref);
} else {
refcount_release(&pr->pr_ref);
drflags |= PD_DEUREF;
prison_free_not_last(pr);
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
@ -1824,8 +1812,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#endif
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
if (born) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
if (error) {
@ -1842,9 +1828,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
mtx_lock(&pr->pr_mtx);
error = do_jail_attach(td, pr);
drflags &= ~PD_LIST_SLOCKED;
error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
if (error) {
if (created) {
/* do_jail_attach has removed the prison. */
@ -1857,9 +1842,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#ifdef RACCT
if (racct_enable && !created) {
if (drflags & PD_LIST_SLOCKED) {
sx_sunlock(&allprison_lock);
drflags &= ~PD_LIST_SLOCKED;
if (drflags & PD_LIST_XLOCKED) {
sx_xunlock(&allprison_lock);
drflags &= ~PD_LIST_XLOCKED;
}
prison_racct_modify(pr);
}
@ -1874,8 +1859,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* not be publicly visible).
*/
if (pr_flags & PR_PERSIST) {
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
drflags = prison_lock_xlock(pr, drflags);
refcount_acquire(&pr->pr_ref);
refcount_acquire(&pr->pr_uref);
} else {
@ -1952,13 +1936,8 @@ get_next_prid(struct prison **insprp)
TAILQ_FOREACH(inspr, &allprison, pr_list) {
if (inspr->pr_id < jid)
continue;
if (inspr->pr_id > jid ||
refcount_load(&inspr->pr_ref) == 0) {
/*
* Found an opening. This may be a gap
* in the list, or a dead jail with the
* same ID.
*/
if (inspr->pr_id > jid) {
/* Found an opening. */
maxid = 0;
break;
}
@ -2047,18 +2026,14 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
if (error == 0) {
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
if (pr->pr_id > jid &&
((flags & JAIL_DYING) || prison_isalive(pr)) &&
prison_ischild(mypr, pr)) {
mtx_lock(&pr->pr_mtx);
if ((flags & JAIL_DYING)
? prison_isvalid(pr) : prison_isalive(pr))
break;
mtx_unlock(&pr->pr_mtx);
drflags |= PD_LOCKED;
goto found_prison;
}
}
if (pr != NULL) {
drflags |= PD_LOCKED;
goto found_prison;
}
error = ENOENT;
vfs_opterror(opts, "no jail after %d", jid);
goto done;
@ -2314,7 +2289,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
int
sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
{
struct prison *pr, *cpr, *lpr, *tpr;
struct prison *pr, *cpr, *lpr;
int descend, error;
error = priv_check(td, PRIV_JAIL_REMOVE);
@ -2334,21 +2309,13 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
mtx_unlock(&pr->pr_mtx);
lpr = NULL;
FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
mtx_lock(&cpr->pr_mtx);
if (prison_isvalid(cpr)) {
tpr = cpr;
prison_hold(cpr);
} else {
/* Already removed - do not do it again. */
tpr = NULL;
}
mtx_unlock(&cpr->pr_mtx);
prison_hold(cpr);
if (lpr != NULL) {
mtx_lock(&lpr->pr_mtx);
prison_remove_one(lpr);
sx_xlock(&allprison_lock);
}
lpr = tpr;
lpr = cpr;
}
if (lpr != NULL) {
mtx_lock(&lpr->pr_mtx);
@ -2377,8 +2344,8 @@ prison_remove_one(struct prison *pr)
/* If the prison was persistent, it is not anymore. */
if (pr->pr_flags & PR_PERSIST) {
refcount_release(&pr->pr_ref);
drflags |= PD_DEUREF;
prison_free_not_last(pr);
pr->pr_flags &= ~PR_PERSIST;
}
@ -2428,14 +2395,7 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
if (error)
return (error);
/*
* Start with exclusive hold on allprison_lock to ensure that a possible
* PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
* But then immediately downgrade it since we don't need to stop
* readers.
*/
sx_xlock(&allprison_lock);
sx_downgrade(&allprison_lock);
sx_slock(&allprison_lock);
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
if (pr == NULL) {
sx_sunlock(&allprison_lock);
@ -2449,16 +2409,18 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
return (EINVAL);
}
return (do_jail_attach(td, pr));
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
}
static int
do_jail_attach(struct thread *td, struct prison *pr)
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
{
struct proc *p;
struct ucred *newcred, *oldcred;
int error;
mtx_assert(&pr->pr_mtx, MA_OWNED);
sx_assert(&allprison_lock, SX_LOCKED);
/*
* XXX: Note that there is a slight race here if two threads
* in the same privileged process attempt to attach to two
@ -2469,15 +2431,18 @@ do_jail_attach(struct thread *td, struct prison *pr)
*/
refcount_acquire(&pr->pr_ref);
refcount_acquire(&pr->pr_uref);
drflags |= PD_DEREF | PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
/* Let modules do whatever they need to prepare for attaching. */
error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
if (error) {
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
prison_deref(pr, drflags);
return (error);
}
sx_sunlock(&allprison_lock);
sx_unlock(&allprison_lock);
drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
/*
* Reparent the newly attached process to this jail.
@ -2513,7 +2478,7 @@ do_jail_attach(struct thread *td, struct prison *pr)
rctl_proc_ucred_changed(p, newcred);
crfree(newcred);
#endif
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
prison_deref(oldcred->cr_prison, drflags);
crfree(oldcred);
/*
@ -2533,8 +2498,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
e_revert_osd:
/* Tell modules this thread is still in its old jail after all. */
sx_slock(&allprison_lock);
drflags |= PD_LIST_SLOCKED;
(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
prison_deref(pr, drflags);
return (error);
}
@ -2548,19 +2514,13 @@ prison_find(int prid)
sx_assert(&allprison_lock, SX_LOCKED);
TAILQ_FOREACH(pr, &allprison, pr_list) {
if (pr->pr_id == prid) {
mtx_lock(&pr->pr_mtx);
if (prison_isvalid(pr))
return (pr);
/*
* Any active prison with the same ID would have
* been inserted before a dead one.
*/
mtx_unlock(&pr->pr_mtx);
break;
}
if (pr->pr_id < prid)
continue;
if (pr->pr_id > prid)
break;
KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
mtx_lock(&pr->pr_mtx);
return (pr);
}
return (NULL);
}
@ -2577,10 +2537,10 @@ prison_find_child(struct prison *mypr, int prid)
sx_assert(&allprison_lock, SX_LOCKED);
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (pr->pr_id == prid) {
KASSERT(prison_isvalid(pr),
("Found invalid prison %p", pr));
mtx_lock(&pr->pr_mtx);
if (prison_isvalid(pr))
return (pr);
mtx_unlock(&pr->pr_mtx);
return (pr);
}
}
return (NULL);
@ -2598,26 +2558,21 @@ prison_find_name(struct prison *mypr, const char *name)
sx_assert(&allprison_lock, SX_LOCKED);
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
again:
deadpr = NULL;
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
if (!strcmp(pr->pr_name + mylen, name)) {
mtx_lock(&pr->pr_mtx);
if (prison_isalive(pr))
KASSERT(prison_isvalid(pr),
("Found invalid prison %p", pr));
if (prison_isalive(pr)) {
mtx_lock(&pr->pr_mtx);
return (pr);
if (prison_isvalid(pr))
deadpr = pr;
mtx_unlock(&pr->pr_mtx);
}
deadpr = pr;
}
}
/* There was no valid prison - perhaps there was a dying one. */
if (deadpr != NULL) {
if (deadpr != NULL)
mtx_lock(&deadpr->pr_mtx);
if (!prison_isvalid(deadpr)) {
mtx_unlock(&deadpr->pr_mtx);
goto again;
}
}
return (deadpr);
}
@ -2671,45 +2626,53 @@ prison_hold(struct prison *pr)
/*
* Remove a prison reference. If that was the last reference, the
* prison will be removed (at a later time). Return with the prison
* unlocked.
* prison will be removed (at a later time).
*/
void
prison_free_locked(struct prison *pr)
{
int lastref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
lastref = refcount_release(&pr->pr_ref);
/*
* Locking is no longer required, but unlock because the caller
* expects it.
*/
mtx_unlock(&pr->pr_mtx);
if (lastref) {
/*
* Don't remove the prison itself in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
prison_free(pr);
}
void
prison_free(struct prison *pr)
{
/*
* Locking is only required when releasing the last reference.
* This allows assurance that a locked prison will remain valid
* until it is unlocked.
*/
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
if (refcount_release_if_not_last(&pr->pr_ref))
return;
mtx_lock(&pr->pr_mtx);
prison_free_locked(pr);
if (!refcount_release_if_not_last(&pr->pr_ref)) {
/*
* Don't remove the last reference in this context,
* in case there are locks held.
*/
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
static void
prison_free_not_last(struct prison *pr)
{
#ifdef INVARIANTS
int lastref;
KASSERT(refcount_load(&pr->pr_ref) > 0,
("Trying to free dead prison %p (jid=%d).",
pr, pr->pr_id));
lastref = refcount_release(&pr->pr_ref);
KASSERT(!lastref,
("prison_free_not_last freed last ref on prison %p (jid=%d).",
pr, pr->pr_id));
#else
refcount_release(&pr>pr_ref);
#endif
}
/*
@ -2718,7 +2681,8 @@ prison_free(struct prison *pr)
* user-visible, except through the the jail system calls. It is also
* an error to hold an invalid prison. A prison record will remain
* alive as long as it has at least one user reference, and will not
* be set to the dying state was long as the prison mutex is held.
* be set to the dying state until the prison mutex and allprison_lock
* are both freed.
*/
void
prison_proc_hold(struct prison *pr)
@ -2756,7 +2720,7 @@ prison_proc_free(struct prison *pr)
* but also half dead. Add a reference so any calls to
* prison_free() won't re-submit the task.
*/
refcount_acquire(&pr->pr_ref);
prison_hold(pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
@ -2768,18 +2732,18 @@ static void
prison_complete(void *context, int pending)
{
struct prison *pr = context;
int drflags;
sx_xlock(&allprison_lock);
mtx_lock(&pr->pr_mtx);
/*
* If this is completing a call to prison_proc_free, there will still
* be a user reference held; clear that as well as the reference that
* was added. No references are expected if this is completing a call
* to prison_free, but prison_deref is still called for the cleanup.
* This could be called to release the last reference, or the
* last user reference; the existence of a user reference implies
* the latter. There will always be a reference to remove, as
* prison_proc_free adds one.
*/
prison_deref(pr, refcount_load(&pr->pr_uref) > 0
? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
: PD_LOCKED | PD_LIST_XLOCKED);
drflags = prison_lock_xlock(pr, PD_DEREF);
if (refcount_load(&pr->pr_uref) > 0)
drflags |= PD_DEUREF;
prison_deref(pr, drflags);
}
/*
@ -2794,84 +2758,86 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prisonlist freeprison;
struct prison *rpr, *tpr;
int lastref, lasturef;
struct prison *rpr, *ppr, *tpr;
TAILQ_INIT(&freeprison);
if (!(flags & PD_LOCKED))
mtx_lock(&pr->pr_mtx);
/*
* Release this prison as requested, which may cause its parent
* to be released, and then maybe its grandparent, etc.
*/
for (;;) {
if (flags & PD_DEUREF) {
/* Drop a user reference. */
KASSERT(refcount_load(&pr->pr_uref) > 0,
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
pr->pr_id));
lasturef = refcount_release(&pr->pr_uref);
if (lasturef)
refcount_acquire(&pr->pr_ref);
KASSERT(refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
} else
lasturef = 0;
if (!refcount_release_if_not_last(&pr->pr_uref)) {
if (!(flags & PD_DEREF)) {
prison_hold(pr);
flags |= PD_DEREF;
}
flags = prison_lock_xlock(pr, flags);
if (refcount_release(&pr->pr_uref)) {
/*
* When the last user references goes,
* this becomes a dying prison.
*/
KASSERT(
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
(void)osd_jail_call(pr,
PR_METHOD_REMOVE, NULL);
}
}
}
if (flags & PD_DEREF) {
/* Drop a reference. */
KASSERT(refcount_load(&pr->pr_ref) > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
lastref = refcount_release(&pr->pr_ref);
}
else
lastref = refcount_load(&pr->pr_ref) == 0;
mtx_unlock(&pr->pr_mtx);
/*
* Tell the modules if the last user reference was removed
* (even it sticks around in dying state).
*/
if (lasturef) {
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
if (atomic_load_acq_int(&pr->pr_ref) > 1) {
sx_slock(&allprison_lock);
flags |= PD_LIST_SLOCKED;
} else {
sx_xlock(&allprison_lock);
flags |= PD_LIST_XLOCKED;
if (!refcount_release_if_not_last(&pr->pr_ref)) {
flags = prison_lock_xlock(pr, flags);
if (refcount_release(&pr->pr_ref)) {
/*
* When the last reference goes,
* unlink the prison and set it aside.
*/
KASSERT(
refcount_load(&pr->pr_uref) == 0,
("prison_deref: last ref, "
"but still has %d urefs (jid=%d)",
pr->pr_uref, pr->pr_id));
KASSERT(
refcount_load(&prison0.pr_ref) != 0,
("prison0 pr_ref=0"));
TAILQ_REMOVE(&allprison, pr, pr_list);
LIST_REMOVE(pr, pr_sibling);
TAILQ_INSERT_TAIL(&freeprison, pr,
pr_list);
for (ppr = pr->pr_parent;
ppr != NULL;
ppr = ppr->pr_parent)
ppr->pr_childcount--;
/*
* Removing a prison frees references
* from its parent.
*/
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
pr = pr->pr_parent;
flags |= PD_DEREF | PD_DEUREF;
continue;
}
}
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
mtx_lock(&pr->pr_mtx);
lastref = refcount_release(&pr->pr_ref);
mtx_unlock(&pr->pr_mtx);
}
if (!lastref)
break;
if (flags & PD_LIST_SLOCKED) {
if (!sx_try_upgrade(&allprison_lock)) {
sx_sunlock(&allprison_lock);
sx_xlock(&allprison_lock);
}
flags &= ~PD_LIST_SLOCKED;
} else if (!(flags & PD_LIST_XLOCKED))
sx_xlock(&allprison_lock);
flags |= PD_LIST_XLOCKED;
TAILQ_REMOVE(&allprison, pr, pr_list);
LIST_REMOVE(pr, pr_sibling);
TAILQ_INSERT_TAIL(&freeprison, pr, pr_list);
for (tpr = pr->pr_parent; tpr != NULL; tpr = tpr->pr_parent)
tpr->pr_childcount--;
/* Removing a prison frees a reference on its parent. */
pr = pr->pr_parent;
mtx_lock(&pr->pr_mtx);
flags |= PD_DEREF | PD_DEUREF;
break;
}
/* Release all the prison locks. */
if (flags & PD_LOCKED)
mtx_unlock(&pr->pr_mtx);
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@ -2902,10 +2868,47 @@ prison_deref(struct prison *pr, int flags)
if (racct_enable)
prison_racct_detach(rpr);
#endif
TAILQ_REMOVE(&freeprison, rpr, pr_list);
free(rpr, M_PRISON);
}
}
/*
* Given the current locking state in the flags, make sure allprison_lock
* is held exclusive, and the prison is locked. Return flags indicating
* the new state.
*/
static int
prison_lock_xlock(struct prison *pr, int flags)
{
if (!(flags & PD_LIST_XLOCKED)) {
/*
* Get allprison_lock, which may be an upgrade,
* and may require unlocking the prison.
*/
if (flags & PD_LOCKED) {
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
}
if (flags & PD_LIST_SLOCKED) {
if (!sx_try_upgrade(&allprison_lock)) {
sx_sunlock(&allprison_lock);
sx_xlock(&allprison_lock);
}
flags &= ~PD_LIST_SLOCKED;
} else
sx_xlock(&allprison_lock);
flags |= PD_LIST_XLOCKED;
}
if (!(flags & PD_LOCKED)) {
/* Lock the prison mutex. */
mtx_lock(&pr->pr_mtx);
flags |= PD_LOCKED;
}
return flags;
}
/*
* Set or clear a permission bit in the pr_allow field, passing restrictions
* (cleared permission) down to child jails.
@ -3068,15 +3071,13 @@ prison_ischild(struct prison *pr1, struct prison *pr2)
}
/*
* Return true if the prison is currently alive. A prison is alive if it is
* valid and holds user references, and it isn't being removed.
* Return true if the prison is currently alive. A prison is alive if it
* holds user references and it isn't being removed.
*/
bool
prison_isalive(struct prison *pr)
{
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
return (false);
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
return (false);
if (__predict_false(pr->pr_flags & PR_REMOVE))
@ -3087,7 +3088,9 @@ prison_isalive(struct prison *pr)
/*
* Return true if the prison is currently valid. A prison is valid if it has
* been fully created, and is not being destroyed. Note that dying prisons
* are still considered valid.
* are still considered valid. Invalid prisons won't be found under normal
* circumstances, as they're only put in that state by functions that have
* an exclusive hold on allprison_lock.
*/
bool
prison_isvalid(struct prison *pr)
@ -3754,10 +3757,6 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
cpr->pr_ip6s * sizeof(struct in6_addr));
}
#endif
if (!prison_isvalid(cpr)) {
mtx_unlock(&cpr->pr_mtx);
continue;
}
bzero(xp, sizeof(*xp));
xp->pr_version = XPRISON_VERSION;
xp->pr_id = cpr->pr_id;

View File

@ -290,7 +290,7 @@ msginit()
if (rsv == NULL)
rsv = osd_reserve(msg_prison_slot);
prison_lock(pr);
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
&prison0);
rsv = NULL;

View File

@ -321,7 +321,7 @@ seminit(void)
if (rsv == NULL)
rsv = osd_reserve(sem_prison_slot);
prison_lock(pr);
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
&prison0);
rsv = NULL;

View File

@ -979,7 +979,7 @@ shminit(void)
if (rsv == NULL)
rsv = osd_reserve(shm_prison_slot);
prison_lock(pr);
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
&prison0);
rsv = NULL;

View File

@ -1564,29 +1564,26 @@ mqfs_prison_remove(void *obj, void *data __unused)
const struct prison *pr = obj;
struct prison *tpr;
struct mqfs_node *pn, *tpn;
int found;
struct vnode *pr_root;
found = 0;
pr_root = pr->pr_root;
if (pr->pr_parent->pr_root == pr_root)
return (0);
TAILQ_FOREACH(tpr, &allprison, pr_list) {
prison_lock(tpr);
if (tpr != pr && prison_isvalid(tpr) &&
tpr->pr_root == pr->pr_root)
found = 1;
prison_unlock(tpr);
if (tpr != pr && tpr->pr_root == pr_root)
return (0);
}
if (!found) {
/*
* No jails are rooted in this directory anymore,
* so no queues should be either.
*/
sx_xlock(&mqfs_data.mi_lock);
LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
mn_sibling, tpn) {
if (pn->mn_pr_root == pr->pr_root)
(void)do_unlink(pn, curthread->td_ucred);
}
sx_xunlock(&mqfs_data.mi_lock);
/*
* No jails are rooted in this directory anymore,
* so no queues should be either.
*/
sx_xlock(&mqfs_data.mi_lock);
LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
mn_sibling, tpn) {
if (pn->mn_pr_root == pr_root)
(void)do_unlink(pn, curthread->td_ucred);
}
sx_xunlock(&mqfs_data.mi_lock);
return (0);
}

View File

@ -155,7 +155,8 @@ struct prison_racct;
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
* (r) atomic via refcount(9), pr_mtx required to decrement to zero
* (r) atomic via refcount(9), pr_mtx and allprison_lock required to
* decrement to zero
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */