jail: Change the locking around pr_ref and pr_uref
Require both the prison mutex and allprison_lock when pr_ref or pr_uref go to/from zero. Adding a non-first or removing a non-last reference remain lock-free. This means that a shared hold on allprison_lock is sufficient for prison_isalive() to be useful, which removes a number of cases of lock/check/unlock on the prison mutex. Expand the locking in kern_jail_set() to keep allprison_lock held exclusive until the new prison is valid, thus making invalid prisons invisible to any thread holding allprison_lock (except of course the one creating or destroying the prison). This renders prison_isvalid() nearly redundant, now used only in asserts. Differential Revision: https://reviews.freebsd.org/D28419 Differential Revision: https://reviews.freebsd.org/D28458
This commit is contained in:
parent
b963ce4588
commit
f7496dcab0
@ -137,9 +137,11 @@ LIST_HEAD(, prison_racct) allprison_racct;
|
||||
int lastprid = 0;
|
||||
|
||||
static int get_next_prid(struct prison **insprp);
|
||||
static int do_jail_attach(struct thread *td, struct prison *pr);
|
||||
static int do_jail_attach(struct thread *td, struct prison *pr, int drflags);
|
||||
static void prison_complete(void *context, int pending);
|
||||
static void prison_deref(struct prison *pr, int flags);
|
||||
static int prison_lock_xlock(struct prison *pr, int flags);
|
||||
static void prison_free_not_last(struct prison *pr);
|
||||
static void prison_set_allow_locked(struct prison *pr, unsigned flag,
|
||||
int enable);
|
||||
static char *prison_path(struct prison *pr1, struct prison *pr2);
|
||||
@ -1006,18 +1008,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
* where it can be inserted later.
|
||||
*/
|
||||
TAILQ_FOREACH(inspr, &allprison, pr_list) {
|
||||
if (inspr->pr_id == jid) {
|
||||
mtx_lock(&inspr->pr_mtx);
|
||||
if (prison_isvalid(inspr)) {
|
||||
pr = inspr;
|
||||
drflags |= PD_LOCKED;
|
||||
inspr = NULL;
|
||||
} else
|
||||
mtx_unlock(&inspr->pr_mtx);
|
||||
break;
|
||||
}
|
||||
if (inspr->pr_id < jid)
|
||||
continue;
|
||||
if (inspr->pr_id > jid)
|
||||
break;
|
||||
pr = inspr;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
inspr = NULL;
|
||||
break;
|
||||
}
|
||||
if (pr != NULL) {
|
||||
ppr = pr->pr_parent;
|
||||
@ -1041,13 +1040,15 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
error = ENOENT;
|
||||
vfs_opterror(opts, "jail %d not found", jid);
|
||||
goto done_deref;
|
||||
} else if (!prison_isalive(pr)) {
|
||||
}
|
||||
if (!prison_isalive(pr)) {
|
||||
if (!(flags & JAIL_DYING)) {
|
||||
error = ENOENT;
|
||||
vfs_opterror(opts, "jail %d is dying",
|
||||
jid);
|
||||
goto done_deref;
|
||||
} else if ((flags & JAIL_ATTACH) ||
|
||||
}
|
||||
if ((flags & JAIL_ATTACH) ||
|
||||
(pr_flags & PR_PERSIST)) {
|
||||
/*
|
||||
* A dying jail might be resurrected
|
||||
@ -1121,12 +1122,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
if (namelc[0] != '\0') {
|
||||
pnamelen =
|
||||
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
|
||||
name_again:
|
||||
deadpr = NULL;
|
||||
FOREACH_PRISON_CHILD(ppr, tpr) {
|
||||
if (tpr != pr &&
|
||||
!strcmp(tpr->pr_name + pnamelen, namelc)) {
|
||||
mtx_lock(&tpr->pr_mtx);
|
||||
if (prison_isalive(tpr)) {
|
||||
if (pr == NULL &&
|
||||
cuflags != JAIL_CREATE) {
|
||||
@ -1135,6 +1134,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
* for updates.
|
||||
*/
|
||||
pr = tpr;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
break;
|
||||
}
|
||||
@ -1144,28 +1144,22 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
* active sibling jail.
|
||||
*/
|
||||
error = EEXIST;
|
||||
mtx_unlock(&tpr->pr_mtx);
|
||||
vfs_opterror(opts,
|
||||
"jail \"%s\" already exists",
|
||||
name);
|
||||
goto done_deref;
|
||||
}
|
||||
if (pr == NULL &&
|
||||
cuflags != JAIL_CREATE &&
|
||||
prison_isvalid(tpr))
|
||||
cuflags != JAIL_CREATE) {
|
||||
deadpr = tpr;
|
||||
mtx_unlock(&tpr->pr_mtx);
|
||||
}
|
||||
}
|
||||
}
|
||||
/* If no active jail is found, use a dying one. */
|
||||
if (deadpr != NULL && pr == NULL) {
|
||||
if (flags & JAIL_DYING) {
|
||||
mtx_lock(&deadpr->pr_mtx);
|
||||
if (!prison_isvalid(deadpr)) {
|
||||
mtx_unlock(&deadpr->pr_mtx);
|
||||
goto name_again;
|
||||
}
|
||||
pr = deadpr;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
} else if (cuflags == JAIL_UPDATE) {
|
||||
error = ENOENT;
|
||||
@ -1199,19 +1193,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
vfs_opterror(opts, "prison limit exceeded");
|
||||
goto done_deref;
|
||||
}
|
||||
mtx_lock(&ppr->pr_mtx);
|
||||
if (!prison_isvalid(ppr)) {
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
error = ENOENT;
|
||||
vfs_opterror(opts, "jail \"%s\" not found",
|
||||
prison_name(mypr, ppr));
|
||||
goto done_deref;
|
||||
}
|
||||
prison_hold(ppr);
|
||||
if (refcount_acquire(&ppr->pr_uref))
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
else {
|
||||
if (!refcount_acquire_if_not_zero(&ppr->pr_uref)) {
|
||||
/* This brings the parent back to life. */
|
||||
mtx_lock(&ppr->pr_mtx);
|
||||
refcount_acquire(&ppr->pr_uref);
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
|
||||
if (error) {
|
||||
@ -1219,7 +1205,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
drflags |= PD_DEREF | PD_DEUREF;
|
||||
goto done_deref;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
|
||||
error = EAGAIN;
|
||||
@ -1230,6 +1216,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
}
|
||||
|
||||
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
|
||||
refcount_init(&pr->pr_ref, 0);
|
||||
refcount_init(&pr->pr_uref, 0);
|
||||
LIST_INIT(&pr->pr_children);
|
||||
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
|
||||
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
|
||||
@ -1452,7 +1440,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
#ifdef VIMAGE
|
||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
|
||||
#endif
|
||||
refcount_load(&tpr->pr_uref) == 0) {
|
||||
!prison_isalive(tpr)) {
|
||||
descend = 0;
|
||||
continue;
|
||||
}
|
||||
@ -1520,7 +1508,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
#ifdef VIMAGE
|
||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
|
||||
#endif
|
||||
refcount_load(&tpr->pr_uref) == 0) {
|
||||
!prison_isalive(tpr)) {
|
||||
descend = 0;
|
||||
continue;
|
||||
}
|
||||
@ -1759,8 +1747,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
prison_hold(pr);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
refcount_release(&pr->pr_ref);
|
||||
drflags |= PD_DEUREF;
|
||||
prison_free_not_last(pr);
|
||||
}
|
||||
}
|
||||
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
|
||||
@ -1824,8 +1812,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
#endif
|
||||
|
||||
/* Let the modules do their work. */
|
||||
sx_downgrade(&allprison_lock);
|
||||
drflags = (drflags & ~PD_LIST_XLOCKED) | PD_LIST_SLOCKED;
|
||||
if (born) {
|
||||
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
|
||||
if (error) {
|
||||
@ -1842,9 +1828,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
|
||||
/* Attach this process to the prison if requested. */
|
||||
if (flags & JAIL_ATTACH) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
error = do_jail_attach(td, pr);
|
||||
drflags &= ~PD_LIST_SLOCKED;
|
||||
error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
|
||||
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
|
||||
if (error) {
|
||||
if (created) {
|
||||
/* do_jail_attach has removed the prison. */
|
||||
@ -1857,9 +1842,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
|
||||
#ifdef RACCT
|
||||
if (racct_enable && !created) {
|
||||
if (drflags & PD_LIST_SLOCKED) {
|
||||
sx_sunlock(&allprison_lock);
|
||||
drflags &= ~PD_LIST_SLOCKED;
|
||||
if (drflags & PD_LIST_XLOCKED) {
|
||||
sx_xunlock(&allprison_lock);
|
||||
drflags &= ~PD_LIST_XLOCKED;
|
||||
}
|
||||
prison_racct_modify(pr);
|
||||
}
|
||||
@ -1874,8 +1859,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
* not be publicly visible).
|
||||
*/
|
||||
if (pr_flags & PR_PERSIST) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
drflags = prison_lock_xlock(pr, drflags);
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
@ -1952,13 +1936,8 @@ get_next_prid(struct prison **insprp)
|
||||
TAILQ_FOREACH(inspr, &allprison, pr_list) {
|
||||
if (inspr->pr_id < jid)
|
||||
continue;
|
||||
if (inspr->pr_id > jid ||
|
||||
refcount_load(&inspr->pr_ref) == 0) {
|
||||
/*
|
||||
* Found an opening. This may be a gap
|
||||
* in the list, or a dead jail with the
|
||||
* same ID.
|
||||
*/
|
||||
if (inspr->pr_id > jid) {
|
||||
/* Found an opening. */
|
||||
maxid = 0;
|
||||
break;
|
||||
}
|
||||
@ -2047,18 +2026,14 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
|
||||
error = vfs_copyopt(opts, "lastjid", &jid, sizeof(jid));
|
||||
if (error == 0) {
|
||||
TAILQ_FOREACH(pr, &allprison, pr_list) {
|
||||
if (pr->pr_id > jid && prison_ischild(mypr, pr)) {
|
||||
if (pr->pr_id > jid &&
|
||||
((flags & JAIL_DYING) || prison_isalive(pr)) &&
|
||||
prison_ischild(mypr, pr)) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
if ((flags & JAIL_DYING)
|
||||
? prison_isvalid(pr) : prison_isalive(pr))
|
||||
break;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
goto found_prison;
|
||||
}
|
||||
}
|
||||
if (pr != NULL) {
|
||||
drflags |= PD_LOCKED;
|
||||
goto found_prison;
|
||||
}
|
||||
error = ENOENT;
|
||||
vfs_opterror(opts, "no jail after %d", jid);
|
||||
goto done;
|
||||
@ -2314,7 +2289,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
|
||||
int
|
||||
sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
|
||||
{
|
||||
struct prison *pr, *cpr, *lpr, *tpr;
|
||||
struct prison *pr, *cpr, *lpr;
|
||||
int descend, error;
|
||||
|
||||
error = priv_check(td, PRIV_JAIL_REMOVE);
|
||||
@ -2334,21 +2309,13 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
lpr = NULL;
|
||||
FOREACH_PRISON_DESCENDANT(pr, cpr, descend) {
|
||||
mtx_lock(&cpr->pr_mtx);
|
||||
if (prison_isvalid(cpr)) {
|
||||
tpr = cpr;
|
||||
prison_hold(cpr);
|
||||
} else {
|
||||
/* Already removed - do not do it again. */
|
||||
tpr = NULL;
|
||||
}
|
||||
mtx_unlock(&cpr->pr_mtx);
|
||||
prison_hold(cpr);
|
||||
if (lpr != NULL) {
|
||||
mtx_lock(&lpr->pr_mtx);
|
||||
prison_remove_one(lpr);
|
||||
sx_xlock(&allprison_lock);
|
||||
}
|
||||
lpr = tpr;
|
||||
lpr = cpr;
|
||||
}
|
||||
if (lpr != NULL) {
|
||||
mtx_lock(&lpr->pr_mtx);
|
||||
@ -2377,8 +2344,8 @@ prison_remove_one(struct prison *pr)
|
||||
|
||||
/* If the prison was persistent, it is not anymore. */
|
||||
if (pr->pr_flags & PR_PERSIST) {
|
||||
refcount_release(&pr->pr_ref);
|
||||
drflags |= PD_DEUREF;
|
||||
prison_free_not_last(pr);
|
||||
pr->pr_flags &= ~PR_PERSIST;
|
||||
}
|
||||
|
||||
@ -2428,14 +2395,7 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Start with exclusive hold on allprison_lock to ensure that a possible
|
||||
* PR_METHOD_REMOVE call isn't concurrent with jail_set or jail_remove.
|
||||
* But then immediately downgrade it since we don't need to stop
|
||||
* readers.
|
||||
*/
|
||||
sx_xlock(&allprison_lock);
|
||||
sx_downgrade(&allprison_lock);
|
||||
sx_slock(&allprison_lock);
|
||||
pr = prison_find_child(td->td_ucred->cr_prison, uap->jid);
|
||||
if (pr == NULL) {
|
||||
sx_sunlock(&allprison_lock);
|
||||
@ -2449,16 +2409,18 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
return (do_jail_attach(td, pr));
|
||||
return (do_jail_attach(td, pr, PD_LOCKED | PD_LIST_SLOCKED));
|
||||
}
|
||||
|
||||
static int
|
||||
do_jail_attach(struct thread *td, struct prison *pr)
|
||||
do_jail_attach(struct thread *td, struct prison *pr, int drflags)
|
||||
{
|
||||
struct proc *p;
|
||||
struct ucred *newcred, *oldcred;
|
||||
int error;
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
sx_assert(&allprison_lock, SX_LOCKED);
|
||||
/*
|
||||
* XXX: Note that there is a slight race here if two threads
|
||||
* in the same privileged process attempt to attach to two
|
||||
@ -2469,15 +2431,18 @@ do_jail_attach(struct thread *td, struct prison *pr)
|
||||
*/
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
drflags |= PD_DEREF | PD_DEUREF;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
drflags &= ~PD_LOCKED;
|
||||
|
||||
/* Let modules do whatever they need to prepare for attaching. */
|
||||
error = osd_jail_call(pr, PR_METHOD_ATTACH, td);
|
||||
if (error) {
|
||||
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
|
||||
prison_deref(pr, drflags);
|
||||
return (error);
|
||||
}
|
||||
sx_sunlock(&allprison_lock);
|
||||
sx_unlock(&allprison_lock);
|
||||
drflags &= ~(PD_LIST_SLOCKED | PD_LIST_XLOCKED);
|
||||
|
||||
/*
|
||||
* Reparent the newly attached process to this jail.
|
||||
@ -2513,7 +2478,7 @@ do_jail_attach(struct thread *td, struct prison *pr)
|
||||
rctl_proc_ucred_changed(p, newcred);
|
||||
crfree(newcred);
|
||||
#endif
|
||||
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
|
||||
prison_deref(oldcred->cr_prison, drflags);
|
||||
crfree(oldcred);
|
||||
|
||||
/*
|
||||
@ -2533,8 +2498,9 @@ do_jail_attach(struct thread *td, struct prison *pr)
|
||||
e_revert_osd:
|
||||
/* Tell modules this thread is still in its old jail after all. */
|
||||
sx_slock(&allprison_lock);
|
||||
drflags |= PD_LIST_SLOCKED;
|
||||
(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
|
||||
prison_deref(pr, PD_DEREF | PD_DEUREF | PD_LIST_SLOCKED);
|
||||
prison_deref(pr, drflags);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -2548,19 +2514,13 @@ prison_find(int prid)
|
||||
|
||||
sx_assert(&allprison_lock, SX_LOCKED);
|
||||
TAILQ_FOREACH(pr, &allprison, pr_list) {
|
||||
if (pr->pr_id == prid) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
if (prison_isvalid(pr))
|
||||
return (pr);
|
||||
/*
|
||||
* Any active prison with the same ID would have
|
||||
* been inserted before a dead one.
|
||||
*/
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
break;
|
||||
}
|
||||
if (pr->pr_id < prid)
|
||||
continue;
|
||||
if (pr->pr_id > prid)
|
||||
break;
|
||||
KASSERT(prison_isvalid(pr), ("Found invalid prison %p", pr));
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
return (pr);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
@ -2577,10 +2537,10 @@ prison_find_child(struct prison *mypr, int prid)
|
||||
sx_assert(&allprison_lock, SX_LOCKED);
|
||||
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
|
||||
if (pr->pr_id == prid) {
|
||||
KASSERT(prison_isvalid(pr),
|
||||
("Found invalid prison %p", pr));
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
if (prison_isvalid(pr))
|
||||
return (pr);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
return (pr);
|
||||
}
|
||||
}
|
||||
return (NULL);
|
||||
@ -2598,26 +2558,21 @@ prison_find_name(struct prison *mypr, const char *name)
|
||||
|
||||
sx_assert(&allprison_lock, SX_LOCKED);
|
||||
mylen = (mypr == &prison0) ? 0 : strlen(mypr->pr_name) + 1;
|
||||
again:
|
||||
deadpr = NULL;
|
||||
FOREACH_PRISON_DESCENDANT(mypr, pr, descend) {
|
||||
if (!strcmp(pr->pr_name + mylen, name)) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
if (prison_isalive(pr))
|
||||
KASSERT(prison_isvalid(pr),
|
||||
("Found invalid prison %p", pr));
|
||||
if (prison_isalive(pr)) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
return (pr);
|
||||
if (prison_isvalid(pr))
|
||||
deadpr = pr;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
}
|
||||
deadpr = pr;
|
||||
}
|
||||
}
|
||||
/* There was no valid prison - perhaps there was a dying one. */
|
||||
if (deadpr != NULL) {
|
||||
if (deadpr != NULL)
|
||||
mtx_lock(&deadpr->pr_mtx);
|
||||
if (!prison_isvalid(deadpr)) {
|
||||
mtx_unlock(&deadpr->pr_mtx);
|
||||
goto again;
|
||||
}
|
||||
}
|
||||
return (deadpr);
|
||||
}
|
||||
|
||||
@ -2671,45 +2626,53 @@ prison_hold(struct prison *pr)
|
||||
|
||||
/*
|
||||
* Remove a prison reference. If that was the last reference, the
|
||||
* prison will be removed (at a later time). Return with the prison
|
||||
* unlocked.
|
||||
* prison will be removed (at a later time).
|
||||
*/
|
||||
void
|
||||
prison_free_locked(struct prison *pr)
|
||||
{
|
||||
int lastref;
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("Trying to free dead prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
/*
|
||||
* Locking is no longer required, but unlock because the caller
|
||||
* expects it.
|
||||
*/
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
if (lastref) {
|
||||
/*
|
||||
* Don't remove the prison itself in this context,
|
||||
* in case there are locks held.
|
||||
*/
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
}
|
||||
prison_free(pr);
|
||||
}
|
||||
|
||||
void
|
||||
prison_free(struct prison *pr)
|
||||
{
|
||||
|
||||
/*
|
||||
* Locking is only required when releasing the last reference.
|
||||
* This allows assurance that a locked prison will remain valid
|
||||
* until it is unlocked.
|
||||
*/
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("Trying to free dead prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
if (refcount_release_if_not_last(&pr->pr_ref))
|
||||
return;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
prison_free_locked(pr);
|
||||
if (!refcount_release_if_not_last(&pr->pr_ref)) {
|
||||
/*
|
||||
* Don't remove the last reference in this context,
|
||||
* in case there are locks held.
|
||||
*/
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
prison_free_not_last(struct prison *pr)
|
||||
{
|
||||
#ifdef INVARIANTS
|
||||
int lastref;
|
||||
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("Trying to free dead prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
KASSERT(!lastref,
|
||||
("prison_free_not_last freed last ref on prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
#else
|
||||
refcount_release(&pr>pr_ref);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2718,7 +2681,8 @@ prison_free(struct prison *pr)
|
||||
* user-visible, except through the the jail system calls. It is also
|
||||
* an error to hold an invalid prison. A prison record will remain
|
||||
* alive as long as it has at least one user reference, and will not
|
||||
* be set to the dying state was long as the prison mutex is held.
|
||||
* be set to the dying state until the prison mutex and allprison_lock
|
||||
* are both freed.
|
||||
*/
|
||||
void
|
||||
prison_proc_hold(struct prison *pr)
|
||||
@ -2756,7 +2720,7 @@ prison_proc_free(struct prison *pr)
|
||||
* but also half dead. Add a reference so any calls to
|
||||
* prison_free() won't re-submit the task.
|
||||
*/
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
prison_hold(pr);
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
}
|
||||
}
|
||||
@ -2768,18 +2732,18 @@ static void
|
||||
prison_complete(void *context, int pending)
|
||||
{
|
||||
struct prison *pr = context;
|
||||
int drflags;
|
||||
|
||||
sx_xlock(&allprison_lock);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
/*
|
||||
* If this is completing a call to prison_proc_free, there will still
|
||||
* be a user reference held; clear that as well as the reference that
|
||||
* was added. No references are expected if this is completing a call
|
||||
* to prison_free, but prison_deref is still called for the cleanup.
|
||||
* This could be called to release the last reference, or the
|
||||
* last user reference; the existence of a user reference implies
|
||||
* the latter. There will always be a reference to remove, as
|
||||
* prison_proc_free adds one.
|
||||
*/
|
||||
prison_deref(pr, refcount_load(&pr->pr_uref) > 0
|
||||
? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
|
||||
: PD_LOCKED | PD_LIST_XLOCKED);
|
||||
drflags = prison_lock_xlock(pr, PD_DEREF);
|
||||
if (refcount_load(&pr->pr_uref) > 0)
|
||||
drflags |= PD_DEUREF;
|
||||
prison_deref(pr, drflags);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2794,84 +2758,86 @@ static void
|
||||
prison_deref(struct prison *pr, int flags)
|
||||
{
|
||||
struct prisonlist freeprison;
|
||||
struct prison *rpr, *tpr;
|
||||
int lastref, lasturef;
|
||||
struct prison *rpr, *ppr, *tpr;
|
||||
|
||||
TAILQ_INIT(&freeprison);
|
||||
if (!(flags & PD_LOCKED))
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
/*
|
||||
* Release this prison as requested, which may cause its parent
|
||||
* to be released, and then maybe its grandparent, etc.
|
||||
*/
|
||||
for (;;) {
|
||||
if (flags & PD_DEUREF) {
|
||||
/* Drop a user reference. */
|
||||
KASSERT(refcount_load(&pr->pr_uref) > 0,
|
||||
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
|
||||
pr->pr_id));
|
||||
lasturef = refcount_release(&pr->pr_uref);
|
||||
if (lasturef)
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
KASSERT(refcount_load(&prison0.pr_uref) > 0,
|
||||
("prison0 pr_uref=0"));
|
||||
} else
|
||||
lasturef = 0;
|
||||
if (!refcount_release_if_not_last(&pr->pr_uref)) {
|
||||
if (!(flags & PD_DEREF)) {
|
||||
prison_hold(pr);
|
||||
flags |= PD_DEREF;
|
||||
}
|
||||
flags = prison_lock_xlock(pr, flags);
|
||||
if (refcount_release(&pr->pr_uref)) {
|
||||
/*
|
||||
* When the last user references goes,
|
||||
* this becomes a dying prison.
|
||||
*/
|
||||
KASSERT(
|
||||
refcount_load(&prison0.pr_uref) > 0,
|
||||
("prison0 pr_uref=0"));
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
flags &= ~PD_LOCKED;
|
||||
(void)osd_jail_call(pr,
|
||||
PR_METHOD_REMOVE, NULL);
|
||||
}
|
||||
}
|
||||
}
|
||||
if (flags & PD_DEREF) {
|
||||
/* Drop a reference. */
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("prison_deref PD_DEREF on a dead prison (jid=%d)",
|
||||
pr->pr_id));
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
}
|
||||
else
|
||||
lastref = refcount_load(&pr->pr_ref) == 0;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
|
||||
/*
|
||||
* Tell the modules if the last user reference was removed
|
||||
* (even it sticks around in dying state).
|
||||
*/
|
||||
if (lasturef) {
|
||||
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
|
||||
if (atomic_load_acq_int(&pr->pr_ref) > 1) {
|
||||
sx_slock(&allprison_lock);
|
||||
flags |= PD_LIST_SLOCKED;
|
||||
} else {
|
||||
sx_xlock(&allprison_lock);
|
||||
flags |= PD_LIST_XLOCKED;
|
||||
if (!refcount_release_if_not_last(&pr->pr_ref)) {
|
||||
flags = prison_lock_xlock(pr, flags);
|
||||
if (refcount_release(&pr->pr_ref)) {
|
||||
/*
|
||||
* When the last reference goes,
|
||||
* unlink the prison and set it aside.
|
||||
*/
|
||||
KASSERT(
|
||||
refcount_load(&pr->pr_uref) == 0,
|
||||
("prison_deref: last ref, "
|
||||
"but still has %d urefs (jid=%d)",
|
||||
pr->pr_uref, pr->pr_id));
|
||||
KASSERT(
|
||||
refcount_load(&prison0.pr_ref) != 0,
|
||||
("prison0 pr_ref=0"));
|
||||
TAILQ_REMOVE(&allprison, pr, pr_list);
|
||||
LIST_REMOVE(pr, pr_sibling);
|
||||
TAILQ_INSERT_TAIL(&freeprison, pr,
|
||||
pr_list);
|
||||
for (ppr = pr->pr_parent;
|
||||
ppr != NULL;
|
||||
ppr = ppr->pr_parent)
|
||||
ppr->pr_childcount--;
|
||||
/*
|
||||
* Removing a prison frees references
|
||||
* from its parent.
|
||||
*/
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
flags &= ~PD_LOCKED;
|
||||
pr = pr->pr_parent;
|
||||
flags |= PD_DEREF | PD_DEUREF;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
}
|
||||
|
||||
if (!lastref)
|
||||
break;
|
||||
|
||||
if (flags & PD_LIST_SLOCKED) {
|
||||
if (!sx_try_upgrade(&allprison_lock)) {
|
||||
sx_sunlock(&allprison_lock);
|
||||
sx_xlock(&allprison_lock);
|
||||
}
|
||||
flags &= ~PD_LIST_SLOCKED;
|
||||
} else if (!(flags & PD_LIST_XLOCKED))
|
||||
sx_xlock(&allprison_lock);
|
||||
flags |= PD_LIST_XLOCKED;
|
||||
|
||||
TAILQ_REMOVE(&allprison, pr, pr_list);
|
||||
LIST_REMOVE(pr, pr_sibling);
|
||||
TAILQ_INSERT_TAIL(&freeprison, pr, pr_list);
|
||||
for (tpr = pr->pr_parent; tpr != NULL; tpr = tpr->pr_parent)
|
||||
tpr->pr_childcount--;
|
||||
|
||||
/* Removing a prison frees a reference on its parent. */
|
||||
pr = pr->pr_parent;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
flags |= PD_DEREF | PD_DEUREF;
|
||||
break;
|
||||
}
|
||||
|
||||
/* Release all the prison locks. */
|
||||
if (flags & PD_LOCKED)
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
if (flags & PD_LIST_SLOCKED)
|
||||
sx_sunlock(&allprison_lock);
|
||||
else if (flags & PD_LIST_XLOCKED)
|
||||
@ -2902,10 +2868,47 @@ prison_deref(struct prison *pr, int flags)
|
||||
if (racct_enable)
|
||||
prison_racct_detach(rpr);
|
||||
#endif
|
||||
TAILQ_REMOVE(&freeprison, rpr, pr_list);
|
||||
free(rpr, M_PRISON);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Given the current locking state in the flags, make sure allprison_lock
|
||||
* is held exclusive, and the prison is locked. Return flags indicating
|
||||
* the new state.
|
||||
*/
|
||||
static int
|
||||
prison_lock_xlock(struct prison *pr, int flags)
|
||||
{
|
||||
|
||||
if (!(flags & PD_LIST_XLOCKED)) {
|
||||
/*
|
||||
* Get allprison_lock, which may be an upgrade,
|
||||
* and may require unlocking the prison.
|
||||
*/
|
||||
if (flags & PD_LOCKED) {
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
flags &= ~PD_LOCKED;
|
||||
}
|
||||
if (flags & PD_LIST_SLOCKED) {
|
||||
if (!sx_try_upgrade(&allprison_lock)) {
|
||||
sx_sunlock(&allprison_lock);
|
||||
sx_xlock(&allprison_lock);
|
||||
}
|
||||
flags &= ~PD_LIST_SLOCKED;
|
||||
} else
|
||||
sx_xlock(&allprison_lock);
|
||||
flags |= PD_LIST_XLOCKED;
|
||||
}
|
||||
if (!(flags & PD_LOCKED)) {
|
||||
/* Lock the prison mutex. */
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
flags |= PD_LOCKED;
|
||||
}
|
||||
return flags;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set or clear a permission bit in the pr_allow field, passing restrictions
|
||||
* (cleared permission) down to child jails.
|
||||
@ -3068,15 +3071,13 @@ prison_ischild(struct prison *pr1, struct prison *pr2)
|
||||
}
|
||||
|
||||
/*
|
||||
* Return true if the prison is currently alive. A prison is alive if it is
|
||||
* valid and holds user references, and it isn't being removed.
|
||||
* Return true if the prison is currently alive. A prison is alive if it
|
||||
* holds user references and it isn't being removed.
|
||||
*/
|
||||
bool
|
||||
prison_isalive(struct prison *pr)
|
||||
{
|
||||
|
||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
|
||||
return (false);
|
||||
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
|
||||
return (false);
|
||||
if (__predict_false(pr->pr_flags & PR_REMOVE))
|
||||
@ -3087,7 +3088,9 @@ prison_isalive(struct prison *pr)
|
||||
/*
|
||||
* Return true if the prison is currently valid. A prison is valid if it has
|
||||
* been fully created, and is not being destroyed. Note that dying prisons
|
||||
* are still considered valid.
|
||||
* are still considered valid. Invalid prisons won't be found under normal
|
||||
* circumstances, as they're only put in that state by functions that have
|
||||
* an exclusive hold on allprison_lock.
|
||||
*/
|
||||
bool
|
||||
prison_isvalid(struct prison *pr)
|
||||
@ -3754,10 +3757,6 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
|
||||
cpr->pr_ip6s * sizeof(struct in6_addr));
|
||||
}
|
||||
#endif
|
||||
if (!prison_isvalid(cpr)) {
|
||||
mtx_unlock(&cpr->pr_mtx);
|
||||
continue;
|
||||
}
|
||||
bzero(xp, sizeof(*xp));
|
||||
xp->pr_version = XPRISON_VERSION;
|
||||
xp->pr_id = cpr->pr_id;
|
||||
|
@ -290,7 +290,7 @@ msginit()
|
||||
if (rsv == NULL)
|
||||
rsv = osd_reserve(msg_prison_slot);
|
||||
prison_lock(pr);
|
||||
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
|
||||
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
|
||||
(void)osd_jail_set_reserved(pr, msg_prison_slot, rsv,
|
||||
&prison0);
|
||||
rsv = NULL;
|
||||
|
@ -321,7 +321,7 @@ seminit(void)
|
||||
if (rsv == NULL)
|
||||
rsv = osd_reserve(sem_prison_slot);
|
||||
prison_lock(pr);
|
||||
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
|
||||
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
|
||||
(void)osd_jail_set_reserved(pr, sem_prison_slot, rsv,
|
||||
&prison0);
|
||||
rsv = NULL;
|
||||
|
@ -979,7 +979,7 @@ shminit(void)
|
||||
if (rsv == NULL)
|
||||
rsv = osd_reserve(shm_prison_slot);
|
||||
prison_lock(pr);
|
||||
if (prison_isvalid(pr) && (pr->pr_allow & PR_ALLOW_SYSVIPC)) {
|
||||
if (pr->pr_allow & PR_ALLOW_SYSVIPC) {
|
||||
(void)osd_jail_set_reserved(pr, shm_prison_slot, rsv,
|
||||
&prison0);
|
||||
rsv = NULL;
|
||||
|
@ -1564,29 +1564,26 @@ mqfs_prison_remove(void *obj, void *data __unused)
|
||||
const struct prison *pr = obj;
|
||||
struct prison *tpr;
|
||||
struct mqfs_node *pn, *tpn;
|
||||
int found;
|
||||
struct vnode *pr_root;
|
||||
|
||||
found = 0;
|
||||
pr_root = pr->pr_root;
|
||||
if (pr->pr_parent->pr_root == pr_root)
|
||||
return (0);
|
||||
TAILQ_FOREACH(tpr, &allprison, pr_list) {
|
||||
prison_lock(tpr);
|
||||
if (tpr != pr && prison_isvalid(tpr) &&
|
||||
tpr->pr_root == pr->pr_root)
|
||||
found = 1;
|
||||
prison_unlock(tpr);
|
||||
if (tpr != pr && tpr->pr_root == pr_root)
|
||||
return (0);
|
||||
}
|
||||
if (!found) {
|
||||
/*
|
||||
* No jails are rooted in this directory anymore,
|
||||
* so no queues should be either.
|
||||
*/
|
||||
sx_xlock(&mqfs_data.mi_lock);
|
||||
LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
|
||||
mn_sibling, tpn) {
|
||||
if (pn->mn_pr_root == pr->pr_root)
|
||||
(void)do_unlink(pn, curthread->td_ucred);
|
||||
}
|
||||
sx_xunlock(&mqfs_data.mi_lock);
|
||||
/*
|
||||
* No jails are rooted in this directory anymore,
|
||||
* so no queues should be either.
|
||||
*/
|
||||
sx_xlock(&mqfs_data.mi_lock);
|
||||
LIST_FOREACH_SAFE(pn, &mqfs_data.mi_root->mn_children,
|
||||
mn_sibling, tpn) {
|
||||
if (pn->mn_pr_root == pr_root)
|
||||
(void)do_unlink(pn, curthread->td_ucred);
|
||||
}
|
||||
sx_xunlock(&mqfs_data.mi_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -155,7 +155,8 @@ struct prison_racct;
|
||||
* (m) locked by pr_mtx
|
||||
* (p) locked by pr_mtx, and also at least shared allprison_lock required
|
||||
* to update
|
||||
* (r) atomic via refcount(9), pr_mtx required to decrement to zero
|
||||
* (r) atomic via refcount(9), pr_mtx and allprison_lock required to
|
||||
* decrement to zero
|
||||
*/
|
||||
struct prison {
|
||||
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
|
||||
|
Loading…
Reference in New Issue
Block a user