jail: Use refcount(9) for prison references.
Use refcount(9) for both pr_ref and pr_uref in struct prison. This allows prisons to held and freed without requiring the prison mutex. An exception to this is that dropping the last reference will still lock the prison, to keep the guarantee that a locked prison remains valid and alive (provided it was at the time it was locked). Among other things, this honors the promise made in a comment in crcopy(9), that it will not block, which hasn't been true for two decades.
This commit is contained in:
parent
e3dd8ed77b
commit
6754ae2572
@ -1116,7 +1116,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
name_again:
|
||||
deadpr = NULL;
|
||||
FOREACH_PRISON_CHILD(ppr, tpr) {
|
||||
if (tpr != pr && tpr->pr_ref > 0 &&
|
||||
if (tpr != pr &&
|
||||
!strcmp(tpr->pr_name + pnamelen, namelc)) {
|
||||
mtx_lock(&tpr->pr_mtx);
|
||||
if (prison_isalive(tpr)) {
|
||||
@ -1199,8 +1199,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
prison_name(mypr, ppr));
|
||||
goto done_deref;
|
||||
}
|
||||
ppr->pr_ref++;
|
||||
ppr->pr_uref++;
|
||||
prison_hold(ppr);
|
||||
refcount_acquire(&ppr->pr_uref);
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
|
||||
if (jid == 0 && (jid = get_next_prid(&inspr)) == 0) {
|
||||
@ -1315,7 +1315,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
* Grab a reference for existing prisons, to ensure they
|
||||
* continue to exist for the duration of the call.
|
||||
*/
|
||||
pr->pr_ref++;
|
||||
prison_hold(pr);
|
||||
drflags |= PD_DEREF;
|
||||
#if defined(VIMAGE) && (defined(INET) || defined(INET6))
|
||||
if ((pr->pr_flags & PR_VNET) &&
|
||||
@ -1434,7 +1434,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
#ifdef VIMAGE
|
||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
|
||||
#endif
|
||||
tpr->pr_uref == 0) {
|
||||
refcount_load(&tpr->pr_uref) == 0) {
|
||||
descend = 0;
|
||||
continue;
|
||||
}
|
||||
@ -1502,7 +1502,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
#ifdef VIMAGE
|
||||
(tpr != tppr && (tpr->pr_flags & PR_VNET)) ||
|
||||
#endif
|
||||
tpr->pr_uref == 0) {
|
||||
refcount_load(&tpr->pr_uref) == 0) {
|
||||
descend = 0;
|
||||
continue;
|
||||
}
|
||||
@ -1738,11 +1738,11 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
born = !prison_isalive(pr);
|
||||
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
|
||||
if (pr_flags & PR_PERSIST) {
|
||||
pr->pr_ref++;
|
||||
pr->pr_uref++;
|
||||
prison_hold(pr);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
pr->pr_ref--;
|
||||
pr->pr_uref--;
|
||||
refcount_release(&pr->pr_uref);
|
||||
refcount_release(&pr->pr_ref);
|
||||
}
|
||||
}
|
||||
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
|
||||
@ -1857,8 +1857,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
if (pr_flags & PR_PERSIST) {
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
pr->pr_ref++;
|
||||
pr->pr_uref++;
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
/* Non-persistent jails need no further changes. */
|
||||
pr = NULL;
|
||||
@ -1933,7 +1933,8 @@ get_next_prid(struct prison **insprp)
|
||||
TAILQ_FOREACH(inspr, &allprison, pr_list) {
|
||||
if (inspr->pr_id < jid)
|
||||
continue;
|
||||
if (inspr->pr_id > jid || inspr->pr_ref == 0) {
|
||||
if (inspr->pr_id > jid ||
|
||||
refcount_load(&inspr->pr_ref) == 0) {
|
||||
/*
|
||||
* Found an opening. This may be a gap
|
||||
* in the list, or a dead jail with the
|
||||
@ -2096,7 +2097,7 @@ kern_jail_get(struct thread *td, struct uio *optuio, int flags)
|
||||
|
||||
found_prison:
|
||||
/* Get the parameters of the prison. */
|
||||
pr->pr_ref++;
|
||||
prison_hold(pr);
|
||||
drflags |= PD_DEREF;
|
||||
td->td_retval[0] = pr->pr_id;
|
||||
error = vfs_setopt(opts, "jid", &pr->pr_id, sizeof(pr->pr_id));
|
||||
@ -2309,7 +2310,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
|
||||
}
|
||||
|
||||
/* Remove all descendants of this prison, then remove this prison. */
|
||||
pr->pr_ref++;
|
||||
prison_hold(pr);
|
||||
if (!LIST_EMPTY(&pr->pr_children)) {
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
lpr = NULL;
|
||||
@ -2317,7 +2318,7 @@ sys_jail_remove(struct thread *td, struct jail_remove_args *uap)
|
||||
mtx_lock(&cpr->pr_mtx);
|
||||
if (prison_isvalid(cpr)) {
|
||||
tpr = cpr;
|
||||
cpr->pr_ref++;
|
||||
prison_hold(cpr);
|
||||
} else {
|
||||
/* Already removed - do not do it again. */
|
||||
tpr = NULL;
|
||||
@ -2351,18 +2352,19 @@ prison_remove_one(struct prison *pr)
|
||||
|
||||
/* If the prison was persistent, it is not anymore. */
|
||||
if (pr->pr_flags & PR_PERSIST) {
|
||||
pr->pr_ref--;
|
||||
refcount_release(&pr->pr_ref);
|
||||
drflags |= PD_DEUREF;
|
||||
pr->pr_flags &= ~PR_PERSIST;
|
||||
}
|
||||
|
||||
/*
|
||||
* jail_remove added a reference. If that's the only one, remove
|
||||
* the prison now.
|
||||
* the prison now. refcount(9) doesn't guarantee the cache coherence
|
||||
* of non-zero counters, so force it here.
|
||||
*/
|
||||
KASSERT(pr->pr_ref > 0,
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("prison_remove_one removing a dead prison (jid=%d)", pr->pr_id));
|
||||
if (pr->pr_ref == 1) {
|
||||
if (atomic_load_acq_int(&pr->pr_ref) == 1) {
|
||||
prison_deref(pr, drflags);
|
||||
return;
|
||||
}
|
||||
@ -2440,8 +2442,8 @@ do_jail_attach(struct thread *td, struct prison *pr)
|
||||
* a process root from one prison, but attached to the jail
|
||||
* of another.
|
||||
*/
|
||||
pr->pr_ref++;
|
||||
pr->pr_uref++;
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
|
||||
/* Let modules do whatever they need to prepare for attaching. */
|
||||
@ -2614,19 +2616,21 @@ void
|
||||
prison_hold_locked(struct prison *pr)
|
||||
{
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
KASSERT(pr->pr_ref > 0,
|
||||
("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
|
||||
pr->pr_ref++;
|
||||
/* Locking is no longer required. */
|
||||
prison_hold(pr);
|
||||
}
|
||||
|
||||
void
|
||||
prison_hold(struct prison *pr)
|
||||
{
|
||||
#ifdef INVARIANTS
|
||||
int was_valid = refcount_acquire_if_not_zero(&pr->pr_ref);
|
||||
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
prison_hold_locked(pr);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
KASSERT(was_valid,
|
||||
("Trying to hold dead prison %p (jid=%d).", pr, pr->pr_id));
|
||||
#else
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2637,14 +2641,17 @@ prison_hold(struct prison *pr)
|
||||
void
|
||||
prison_free_locked(struct prison *pr)
|
||||
{
|
||||
int ref;
|
||||
int lastref;
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
ref = --pr->pr_ref;
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("Trying to free dead prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
if (ref == 0) {
|
||||
if (lastref) {
|
||||
/*
|
||||
* Don't remove the last reference in this context,
|
||||
* Don't remove the prison itself in this context,
|
||||
* in case there are locks held.
|
||||
*/
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
@ -2655,6 +2662,16 @@ void
|
||||
prison_free(struct prison *pr)
|
||||
{
|
||||
|
||||
/*
|
||||
* Locking is only required when releasing the last reference.
|
||||
* This allows assurance that a locked prison will remain valid
|
||||
* until it is unlocked.
|
||||
*/
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("Trying to free dead prison %p (jid=%d).",
|
||||
pr, pr->pr_id));
|
||||
if (refcount_release_if_not_last(&pr->pr_ref))
|
||||
return;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
prison_free_locked(pr);
|
||||
}
|
||||
@ -2670,12 +2687,14 @@ prison_free(struct prison *pr)
|
||||
void
|
||||
prison_proc_hold(struct prison *pr)
|
||||
{
|
||||
#ifdef INVARIANTS
|
||||
int was_alive = refcount_acquire_if_not_zero(&pr->pr_uref);
|
||||
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
KASSERT(pr->pr_uref > 0,
|
||||
KASSERT(was_alive,
|
||||
("Cannot add a process to a non-alive prison (jid=%d)", pr->pr_id));
|
||||
pr->pr_uref++;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
#else
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2686,20 +2705,27 @@ prison_proc_hold(struct prison *pr)
|
||||
void
|
||||
prison_proc_free(struct prison *pr)
|
||||
{
|
||||
int lasturef;
|
||||
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
KASSERT(pr->pr_uref > 0,
|
||||
/*
|
||||
* Locking is only required when releasing the last reference.
|
||||
* This allows assurance that a locked prison will remain alive
|
||||
* until it is unlocked.
|
||||
*/
|
||||
KASSERT(refcount_load(&pr->pr_uref) > 0,
|
||||
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
|
||||
if (pr->pr_uref > 1)
|
||||
pr->pr_uref--;
|
||||
else {
|
||||
if (refcount_release_if_not_last(&pr->pr_uref))
|
||||
return;
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
lasturef = refcount_release(&pr->pr_uref);
|
||||
if (lasturef) {
|
||||
/*
|
||||
* Don't remove the last user reference in this context,
|
||||
* which is expected to be a process that is not only locked,
|
||||
* but also half dead. Add a reference so any calls to
|
||||
* prison_free() won't re-submit the task.
|
||||
*/
|
||||
pr->pr_ref++;
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
return;
|
||||
@ -2723,7 +2749,7 @@ prison_complete(void *context, int pending)
|
||||
* was added. No references are expected if this is completing a call
|
||||
* to prison_free, but prison_deref is still called for the cleanup.
|
||||
*/
|
||||
prison_deref(pr, pr->pr_uref > 0
|
||||
prison_deref(pr, refcount_load(&pr->pr_uref) > 0
|
||||
? PD_DEREF | PD_DEUREF | PD_LOCKED | PD_LIST_XLOCKED
|
||||
: PD_LOCKED | PD_LIST_XLOCKED);
|
||||
}
|
||||
@ -2740,29 +2766,30 @@ static void
|
||||
prison_deref(struct prison *pr, int flags)
|
||||
{
|
||||
struct prison *ppr, *tpr;
|
||||
int ref, lasturef;
|
||||
int lastref, lasturef;
|
||||
|
||||
if (!(flags & PD_LOCKED))
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
for (;;) {
|
||||
if (flags & PD_DEUREF) {
|
||||
KASSERT(pr->pr_uref > 0,
|
||||
KASSERT(refcount_load(&pr->pr_uref) > 0,
|
||||
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
|
||||
pr->pr_id));
|
||||
pr->pr_uref--;
|
||||
lasturef = pr->pr_uref == 0;
|
||||
lasturef = refcount_release(&pr->pr_uref);
|
||||
if (lasturef)
|
||||
pr->pr_ref++;
|
||||
KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
KASSERT(refcount_load(&prison0.pr_uref) > 0,
|
||||
("prison0 pr_uref=0"));
|
||||
} else
|
||||
lasturef = 0;
|
||||
if (flags & PD_DEREF) {
|
||||
KASSERT(pr->pr_ref > 0,
|
||||
KASSERT(refcount_load(&pr->pr_ref) > 0,
|
||||
("prison_deref PD_DEREF on a dead prison (jid=%d)",
|
||||
pr->pr_id));
|
||||
pr->pr_ref--;
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
}
|
||||
ref = pr->pr_ref;
|
||||
else
|
||||
lastref = refcount_load(&pr->pr_ref) == 0;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
|
||||
/*
|
||||
@ -2771,7 +2798,7 @@ prison_deref(struct prison *pr, int flags)
|
||||
*/
|
||||
if (lasturef) {
|
||||
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
|
||||
if (ref > 1) {
|
||||
if (atomic_load_acq_int(&pr->pr_ref) > 1) {
|
||||
sx_slock(&allprison_lock);
|
||||
flags |= PD_LIST_SLOCKED;
|
||||
} else {
|
||||
@ -2781,12 +2808,12 @@ prison_deref(struct prison *pr, int flags)
|
||||
}
|
||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
ref = --pr->pr_ref;
|
||||
lastref = refcount_release(&pr->pr_ref);
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
}
|
||||
|
||||
/* If the prison still has references, nothing else to do. */
|
||||
if (ref > 0) {
|
||||
if (!lastref) {
|
||||
if (flags & PD_LIST_SLOCKED)
|
||||
sx_sunlock(&allprison_lock);
|
||||
else if (flags & PD_LIST_XLOCKED)
|
||||
@ -3008,9 +3035,9 @@ prison_isalive(struct prison *pr)
|
||||
{
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
if (__predict_false(pr->pr_ref == 0))
|
||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
|
||||
return (false);
|
||||
if (__predict_false(pr->pr_uref == 0))
|
||||
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
|
||||
return (false);
|
||||
return (true);
|
||||
}
|
||||
@ -3025,7 +3052,7 @@ prison_isvalid(struct prison *pr)
|
||||
{
|
||||
|
||||
mtx_assert(&pr->pr_mtx, MA_OWNED);
|
||||
if (__predict_false(pr->pr_ref == 0))
|
||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
|
||||
return (false);
|
||||
return (true);
|
||||
}
|
||||
|
@ -150,17 +150,18 @@ struct prison_racct;
|
||||
*
|
||||
* Lock key:
|
||||
* (a) allprison_lock
|
||||
* (c) set only during creation before the structure is shared, no mutex
|
||||
* required to read
|
||||
* (m) locked by pr_mtx
|
||||
* (p) locked by pr_mtx, and also at least shared allprison_lock required
|
||||
* to update
|
||||
* (c) set only during creation before the structure is shared, no mutex
|
||||
* required to read
|
||||
* (r) atomic via refcount(9), pr_mtx required to decrement to zero
|
||||
*/
|
||||
struct prison {
|
||||
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
|
||||
int pr_id; /* (c) prison id */
|
||||
int pr_ref; /* (m) refcount */
|
||||
int pr_uref; /* (m) user (alive) refcount */
|
||||
volatile u_int pr_ref; /* (r) refcount */
|
||||
volatile u_int pr_uref; /* (r) user (alive) refcount */
|
||||
unsigned pr_flags; /* (p) PR_* flags */
|
||||
LIST_HEAD(, prison) pr_children; /* (a) list of child jails */
|
||||
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
|
||||
|
Loading…
Reference in New Issue
Block a user