jail: Add pr_state to struct prison

Rather that using references (pr_ref and pr_uref) to deduce the state
of a prison, keep track of its state explicitly.  A prison is either
"invalid" (pr_ref == 0), "alive" (pr_uref > 0) or "dying"
(pr_uref == 0).

State transitions are generally tied to the reference counts, but with
some flexibility: a new prison is "invalid" even though it now starts
with a reference, and jail_remove(2) sets the state to "dying" before
the user reference count drops to zero (which was prviously
accomplished via the PR_REMOVE flag).

pr_state is protected by both the prison mutex and allprison_lock, so
it has the same availablity guarantees as the reference counts do.

Differential Revision:	https://reviews.freebsd.org/D27876
This commit is contained in:
Jamie Gritton 2021-02-21 13:24:47 -08:00
parent 2443068d48
commit 1158508a80
2 changed files with 65 additions and 51 deletions

View File

@ -106,6 +106,7 @@ struct prison prison0 = {
.pr_path = "/",
.pr_securelevel = -1,
.pr_devfs_rsnum = 0,
.pr_state = PRISON_STATE_ALIVE,
.pr_childmax = JAIL_MAX,
.pr_hostuuid = DEFAULT_HOSTUUID,
.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children),
@ -663,7 +664,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
ch_flags |= jsf->new | jsf->disable;
}
if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
&& !(pr_flags & PR_PERSIST)) {
error = EINVAL;
vfs_opterror(opts, "new jail must persist or attach");
@ -1198,6 +1199,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
/* This brings the parent back to life. */
mtx_lock(&ppr->pr_mtx);
refcount_acquire(&ppr->pr_uref);
ppr->pr_state = PRISON_STATE_ALIVE;
mtx_unlock(&ppr->pr_mtx);
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
if (error) {
@ -1216,8 +1218,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
refcount_init(&pr->pr_ref, 0);
pr->pr_state = PRISON_STATE_INVALID;
refcount_init(&pr->pr_ref, 1);
refcount_init(&pr->pr_uref, 0);
drflags |= PD_DEREF;
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
@ -1311,11 +1315,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
mtx_lock(&pr->pr_mtx);
drflags |= PD_LOCKED;
/*
* New prisons do not yet have a reference, because we do not
* want others to see the incomplete prison once the
* allprison_lock is downgraded.
*/
} else {
/*
* Grab a reference for existing prisons, to ensure they
@ -1737,14 +1736,17 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
prison_set_allow_locked(pr, tallow, 0);
/*
* Persistent prisons get an extra reference, and prisons losing their
* persist flag lose that reference. Only do this for existing prisons
* for now, so new ones will remain unseen until after the module
* handlers have completed.
* persist flag lose that reference.
*/
born = !prison_isalive(pr);
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
if (pr_flags & PR_PERSIST) {
prison_hold(pr);
/*
* This may make a dead prison alive again, but wait
* to label it as such until after OSD calls have had
* a chance to run (and perhaps to fail).
*/
refcount_acquire(&pr->pr_uref);
} else {
drflags |= PD_DEUREF;
@ -1752,7 +1754,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
}
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
pr->pr_flags &= ~PR_REMOVE;
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
@ -1826,15 +1827,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref;
}
/*
* A new prison is now ready to be seen; either it has gained a user
* reference via persistence, or is about to gain one via attachment.
*/
if (born) {
drflags = prison_lock_xlock(pr, drflags);
pr->pr_state = PRISON_STATE_ALIVE;
}
/* Attach this process to the prison if requested. */
if (flags & JAIL_ATTACH) {
error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
if (error) {
if (created) {
/* do_jail_attach has removed the prison. */
pr = NULL;
}
vfs_opterror(opts, "attach failed");
goto done_deref;
}
@ -1852,22 +1858,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
td->td_retval[0] = pr->pr_id;
if (created) {
/*
* Add a reference to newly created persistent prisons
* (which was not done earlier so that the prison would
* not be publicly visible).
*/
if (pr_flags & PR_PERSIST) {
drflags = prison_lock_xlock(pr, drflags);
refcount_acquire(&pr->pr_ref);
refcount_acquire(&pr->pr_uref);
} else {
/* Non-persistent jails need no further changes. */
pr = NULL;
}
}
done_deref:
/* Release any temporary prison holds and/or locks. */
if (pr != NULL)
@ -2332,7 +2322,7 @@ static void
prison_remove_one(struct prison *pr)
{
struct proc *p;
int drflags;
int was_alive, drflags;
drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
@ -2340,7 +2330,8 @@ prison_remove_one(struct prison *pr)
* Mark the prison as doomed, so it doesn't accidentally come back
* to life. It may still be explicitly brought back by jail_set(2).
*/
pr->pr_flags |= PR_REMOVE;
was_alive = pr->pr_state == PRISON_STATE_ALIVE;
pr->pr_state = PRISON_STATE_DYING;
/* If the prison was persistent, it is not anymore. */
if (pr->pr_flags & PR_PERSIST) {
@ -2361,9 +2352,14 @@ prison_remove_one(struct prison *pr)
return;
}
/* Tell modules this prison has died. */
mtx_unlock(&pr->pr_mtx);
drflags &= ~PD_LOCKED;
if (was_alive)
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
sx_xunlock(&allprison_lock);
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
drflags &= ~PD_LIST_XLOCKED;
/*
* Kill all processes unfortunate enough to be attached to this prison.
*/
@ -2429,7 +2425,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
* a process root from one prison, but attached to the jail
* of another.
*/
refcount_acquire(&pr->pr_ref);
prison_hold(pr);
refcount_acquire(&pr->pr_uref);
drflags |= PD_DEREF | PD_DEUREF;
mtx_unlock(&pr->pr_mtx);
@ -2721,6 +2717,12 @@ prison_proc_free(struct prison *pr)
* prison_free() won't re-submit the task.
*/
prison_hold(pr);
mtx_lock(&pr->pr_mtx);
KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
("Redundant last reference in prison_proc_free (jid=%d)",
pr->pr_id));
pr->pr_flags |= PR_COMPLETE_PROC;
mtx_unlock(&pr->pr_mtx);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
}
@ -2735,14 +2737,14 @@ prison_complete(void *context, int pending)
int drflags;
/*
* This could be called to release the last reference, or the
* last user reference; the existence of a user reference implies
* the latter. There will always be a reference to remove, as
* prison_proc_free adds one.
* This could be called to release the last reference, or the last
* user reference (plus the reference held in prison_proc_free).
*/
drflags = prison_lock_xlock(pr, PD_DEREF);
if (refcount_load(&pr->pr_uref) > 0)
if (pr->pr_flags & PR_COMPLETE_PROC) {
pr->pr_flags &= ~PR_COMPLETE_PROC;
drflags |= PD_DEUREF;
}
prison_deref(pr, drflags);
}
@ -2777,7 +2779,8 @@ prison_deref(struct prison *pr, int flags)
flags |= PD_DEREF;
}
flags = prison_lock_xlock(pr, flags);
if (refcount_release(&pr->pr_uref)) {
if (refcount_release(&pr->pr_uref) &&
pr->pr_state == PRISON_STATE_ALIVE) {
/*
* When the last user references goes,
* this becomes a dying prison.
@ -2785,6 +2788,7 @@ prison_deref(struct prison *pr, int flags)
KASSERT(
refcount_load(&prison0.pr_uref) > 0,
("prison0 pr_uref=0"));
pr->pr_state = PRISON_STATE_DYING;
mtx_unlock(&pr->pr_mtx);
flags &= ~PD_LOCKED;
(void)osd_jail_call(pr,
@ -2812,6 +2816,7 @@ prison_deref(struct prison *pr, int flags)
KASSERT(
refcount_load(&prison0.pr_ref) != 0,
("prison0 pr_ref=0"));
pr->pr_state = PRISON_STATE_INVALID;
TAILQ_REMOVE(&allprison, pr, pr_list);
LIST_REMOVE(pr, pr_sibling);
TAILQ_INSERT_TAIL(&freeprison, pr,
@ -3078,9 +3083,7 @@ bool
prison_isalive(struct prison *pr)
{
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
return (false);
if (__predict_false(pr->pr_flags & PR_REMOVE))
if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
return (false);
return (true);
}
@ -3096,6 +3099,8 @@ bool
prison_isvalid(struct prison *pr)
{
if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
return (false);
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
return (false);
return (true);
@ -3760,8 +3765,7 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
bzero(xp, sizeof(*xp));
xp->pr_version = XPRISON_VERSION;
xp->pr_id = cpr->pr_id;
xp->pr_state = prison_isalive(cpr)
? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
xp->pr_state = cpr->pr_state;
strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
@ -4412,6 +4416,10 @@ db_show_prison(struct prison *pr)
db_printf(" parent = %p\n", pr->pr_parent);
db_printf(" ref = %d\n", pr->pr_ref);
db_printf(" uref = %d\n", pr->pr_uref);
db_printf(" state = %s\n",
pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
pr->pr_state == PRISON_STATE_DYING ? "dying" :
"invalid");
db_printf(" path = %s\n", pr->pr_path);
db_printf(" cpuset = %d\n", pr->pr_cpuset
? pr->pr_cpuset->cs_id : -1);

View File

@ -88,9 +88,11 @@ struct xprison {
};
#define XPRISON_VERSION 3
#define PRISON_STATE_INVALID 0
#define PRISON_STATE_ALIVE 1
#define PRISON_STATE_DYING 2
enum prison_state {
PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */
PRISON_STATE_ALIVE, /* Current prison, visible to all */
PRISON_STATE_DYING /* Removed but holding resources, */
}; /* optionally visible. */
/*
* Flags for jail_set and jail_get.
@ -155,6 +157,7 @@ struct prison_racct;
* (m) locked by pr_mtx
* (p) locked by pr_mtx, and also at least shared allprison_lock required
* to update
* (q) locked by both pr_mtx and allprison_lock
* (r) atomic via refcount(9), pr_mtx and allprison_lock required to
* decrement to zero
*/
@ -185,7 +188,8 @@ struct prison {
int pr_securelevel; /* (p) securelevel */
int pr_enforce_statfs; /* (p) statfs permission */
int pr_devfs_rsnum; /* (p) devfs ruleset */
int pr_spare[3];
enum prison_state pr_state; /* (q) state in life cycle */
int pr_spare[2];
int pr_osreldate; /* (c) kern.osreldate value */
unsigned long pr_hostid; /* (p) jail hostid */
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
@ -222,6 +226,8 @@ struct prison_racct {
/* by this jail or an ancestor */
#define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
/* by this jail or an ancestor */
#define PR_COMPLETE_PROC 0x08000000 /* prison_complete called from */
/* prison_proc_free, releases uref */
/*
* Flags for pr_allow