jail: Add pr_state to struct prison
Rather that using references (pr_ref and pr_uref) to deduce the state of a prison, keep track of its state explicitly. A prison is either "invalid" (pr_ref == 0), "alive" (pr_uref > 0) or "dying" (pr_uref == 0). State transitions are generally tied to the reference counts, but with some flexibility: a new prison is "invalid" even though it now starts with a reference, and jail_remove(2) sets the state to "dying" before the user reference count drops to zero (which was prviously accomplished via the PR_REMOVE flag). pr_state is protected by both the prison mutex and allprison_lock, so it has the same availablity guarantees as the reference counts do. Differential Revision: https://reviews.freebsd.org/D27876
This commit is contained in:
parent
2443068d48
commit
1158508a80
@ -106,6 +106,7 @@ struct prison prison0 = {
|
||||
.pr_path = "/",
|
||||
.pr_securelevel = -1,
|
||||
.pr_devfs_rsnum = 0,
|
||||
.pr_state = PRISON_STATE_ALIVE,
|
||||
.pr_childmax = JAIL_MAX,
|
||||
.pr_hostuuid = DEFAULT_HOSTUUID,
|
||||
.pr_children = LIST_HEAD_INITIALIZER(prison0.pr_children),
|
||||
@ -663,7 +664,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
}
|
||||
ch_flags |= jsf->new | jsf->disable;
|
||||
}
|
||||
if ((flags & (JAIL_CREATE | JAIL_UPDATE | JAIL_ATTACH)) == JAIL_CREATE
|
||||
if ((flags & (JAIL_CREATE | JAIL_ATTACH)) == JAIL_CREATE
|
||||
&& !(pr_flags & PR_PERSIST)) {
|
||||
error = EINVAL;
|
||||
vfs_opterror(opts, "new jail must persist or attach");
|
||||
@ -1198,6 +1199,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
/* This brings the parent back to life. */
|
||||
mtx_lock(&ppr->pr_mtx);
|
||||
refcount_acquire(&ppr->pr_uref);
|
||||
ppr->pr_state = PRISON_STATE_ALIVE;
|
||||
mtx_unlock(&ppr->pr_mtx);
|
||||
error = osd_jail_call(ppr, PR_METHOD_CREATE, opts);
|
||||
if (error) {
|
||||
@ -1216,8 +1218,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
}
|
||||
|
||||
pr = malloc(sizeof(*pr), M_PRISON, M_WAITOK | M_ZERO);
|
||||
refcount_init(&pr->pr_ref, 0);
|
||||
pr->pr_state = PRISON_STATE_INVALID;
|
||||
refcount_init(&pr->pr_ref, 1);
|
||||
refcount_init(&pr->pr_uref, 0);
|
||||
drflags |= PD_DEREF;
|
||||
LIST_INIT(&pr->pr_children);
|
||||
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
|
||||
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
|
||||
@ -1311,11 +1315,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
drflags |= PD_LOCKED;
|
||||
/*
|
||||
* New prisons do not yet have a reference, because we do not
|
||||
* want others to see the incomplete prison once the
|
||||
* allprison_lock is downgraded.
|
||||
*/
|
||||
} else {
|
||||
/*
|
||||
* Grab a reference for existing prisons, to ensure they
|
||||
@ -1737,14 +1736,17 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
prison_set_allow_locked(pr, tallow, 0);
|
||||
/*
|
||||
* Persistent prisons get an extra reference, and prisons losing their
|
||||
* persist flag lose that reference. Only do this for existing prisons
|
||||
* for now, so new ones will remain unseen until after the module
|
||||
* handlers have completed.
|
||||
* persist flag lose that reference.
|
||||
*/
|
||||
born = !prison_isalive(pr);
|
||||
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
|
||||
if (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags)) {
|
||||
if (pr_flags & PR_PERSIST) {
|
||||
prison_hold(pr);
|
||||
/*
|
||||
* This may make a dead prison alive again, but wait
|
||||
* to label it as such until after OSD calls have had
|
||||
* a chance to run (and perhaps to fail).
|
||||
*/
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
drflags |= PD_DEUREF;
|
||||
@ -1752,7 +1754,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
}
|
||||
}
|
||||
pr->pr_flags = (pr->pr_flags & ~ch_flags) | pr_flags;
|
||||
pr->pr_flags &= ~PR_REMOVE;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
drflags &= ~PD_LOCKED;
|
||||
|
||||
@ -1826,15 +1827,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
goto done_deref;
|
||||
}
|
||||
|
||||
/*
|
||||
* A new prison is now ready to be seen; either it has gained a user
|
||||
* reference via persistence, or is about to gain one via attachment.
|
||||
*/
|
||||
if (born) {
|
||||
drflags = prison_lock_xlock(pr, drflags);
|
||||
pr->pr_state = PRISON_STATE_ALIVE;
|
||||
}
|
||||
|
||||
/* Attach this process to the prison if requested. */
|
||||
if (flags & JAIL_ATTACH) {
|
||||
error = do_jail_attach(td, pr, prison_lock_xlock(pr, drflags));
|
||||
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
|
||||
if (error) {
|
||||
if (created) {
|
||||
/* do_jail_attach has removed the prison. */
|
||||
pr = NULL;
|
||||
}
|
||||
vfs_opterror(opts, "attach failed");
|
||||
goto done_deref;
|
||||
}
|
||||
@ -1852,22 +1858,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
|
||||
|
||||
td->td_retval[0] = pr->pr_id;
|
||||
|
||||
if (created) {
|
||||
/*
|
||||
* Add a reference to newly created persistent prisons
|
||||
* (which was not done earlier so that the prison would
|
||||
* not be publicly visible).
|
||||
*/
|
||||
if (pr_flags & PR_PERSIST) {
|
||||
drflags = prison_lock_xlock(pr, drflags);
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
} else {
|
||||
/* Non-persistent jails need no further changes. */
|
||||
pr = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
done_deref:
|
||||
/* Release any temporary prison holds and/or locks. */
|
||||
if (pr != NULL)
|
||||
@ -2332,7 +2322,7 @@ static void
|
||||
prison_remove_one(struct prison *pr)
|
||||
{
|
||||
struct proc *p;
|
||||
int drflags;
|
||||
int was_alive, drflags;
|
||||
|
||||
drflags = PD_DEREF | PD_LOCKED | PD_LIST_XLOCKED;
|
||||
|
||||
@ -2340,7 +2330,8 @@ prison_remove_one(struct prison *pr)
|
||||
* Mark the prison as doomed, so it doesn't accidentally come back
|
||||
* to life. It may still be explicitly brought back by jail_set(2).
|
||||
*/
|
||||
pr->pr_flags |= PR_REMOVE;
|
||||
was_alive = pr->pr_state == PRISON_STATE_ALIVE;
|
||||
pr->pr_state = PRISON_STATE_DYING;
|
||||
|
||||
/* If the prison was persistent, it is not anymore. */
|
||||
if (pr->pr_flags & PR_PERSIST) {
|
||||
@ -2361,9 +2352,14 @@ prison_remove_one(struct prison *pr)
|
||||
return;
|
||||
}
|
||||
|
||||
/* Tell modules this prison has died. */
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
drflags &= ~PD_LOCKED;
|
||||
if (was_alive)
|
||||
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
|
||||
|
||||
sx_xunlock(&allprison_lock);
|
||||
drflags &= ~(PD_LOCKED | PD_LIST_XLOCKED);
|
||||
drflags &= ~PD_LIST_XLOCKED;
|
||||
/*
|
||||
* Kill all processes unfortunate enough to be attached to this prison.
|
||||
*/
|
||||
@ -2429,7 +2425,7 @@ do_jail_attach(struct thread *td, struct prison *pr, int drflags)
|
||||
* a process root from one prison, but attached to the jail
|
||||
* of another.
|
||||
*/
|
||||
refcount_acquire(&pr->pr_ref);
|
||||
prison_hold(pr);
|
||||
refcount_acquire(&pr->pr_uref);
|
||||
drflags |= PD_DEREF | PD_DEUREF;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
@ -2721,6 +2717,12 @@ prison_proc_free(struct prison *pr)
|
||||
* prison_free() won't re-submit the task.
|
||||
*/
|
||||
prison_hold(pr);
|
||||
mtx_lock(&pr->pr_mtx);
|
||||
KASSERT(!(pr->pr_flags & PR_COMPLETE_PROC),
|
||||
("Redundant last reference in prison_proc_free (jid=%d)",
|
||||
pr->pr_id));
|
||||
pr->pr_flags |= PR_COMPLETE_PROC;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
|
||||
}
|
||||
}
|
||||
@ -2735,14 +2737,14 @@ prison_complete(void *context, int pending)
|
||||
int drflags;
|
||||
|
||||
/*
|
||||
* This could be called to release the last reference, or the
|
||||
* last user reference; the existence of a user reference implies
|
||||
* the latter. There will always be a reference to remove, as
|
||||
* prison_proc_free adds one.
|
||||
* This could be called to release the last reference, or the last
|
||||
* user reference (plus the reference held in prison_proc_free).
|
||||
*/
|
||||
drflags = prison_lock_xlock(pr, PD_DEREF);
|
||||
if (refcount_load(&pr->pr_uref) > 0)
|
||||
if (pr->pr_flags & PR_COMPLETE_PROC) {
|
||||
pr->pr_flags &= ~PR_COMPLETE_PROC;
|
||||
drflags |= PD_DEUREF;
|
||||
}
|
||||
prison_deref(pr, drflags);
|
||||
}
|
||||
|
||||
@ -2777,7 +2779,8 @@ prison_deref(struct prison *pr, int flags)
|
||||
flags |= PD_DEREF;
|
||||
}
|
||||
flags = prison_lock_xlock(pr, flags);
|
||||
if (refcount_release(&pr->pr_uref)) {
|
||||
if (refcount_release(&pr->pr_uref) &&
|
||||
pr->pr_state == PRISON_STATE_ALIVE) {
|
||||
/*
|
||||
* When the last user references goes,
|
||||
* this becomes a dying prison.
|
||||
@ -2785,6 +2788,7 @@ prison_deref(struct prison *pr, int flags)
|
||||
KASSERT(
|
||||
refcount_load(&prison0.pr_uref) > 0,
|
||||
("prison0 pr_uref=0"));
|
||||
pr->pr_state = PRISON_STATE_DYING;
|
||||
mtx_unlock(&pr->pr_mtx);
|
||||
flags &= ~PD_LOCKED;
|
||||
(void)osd_jail_call(pr,
|
||||
@ -2812,6 +2816,7 @@ prison_deref(struct prison *pr, int flags)
|
||||
KASSERT(
|
||||
refcount_load(&prison0.pr_ref) != 0,
|
||||
("prison0 pr_ref=0"));
|
||||
pr->pr_state = PRISON_STATE_INVALID;
|
||||
TAILQ_REMOVE(&allprison, pr, pr_list);
|
||||
LIST_REMOVE(pr, pr_sibling);
|
||||
TAILQ_INSERT_TAIL(&freeprison, pr,
|
||||
@ -3078,9 +3083,7 @@ bool
|
||||
prison_isalive(struct prison *pr)
|
||||
{
|
||||
|
||||
if (__predict_false(refcount_load(&pr->pr_uref) == 0))
|
||||
return (false);
|
||||
if (__predict_false(pr->pr_flags & PR_REMOVE))
|
||||
if (__predict_false(pr->pr_state != PRISON_STATE_ALIVE))
|
||||
return (false);
|
||||
return (true);
|
||||
}
|
||||
@ -3096,6 +3099,8 @@ bool
|
||||
prison_isvalid(struct prison *pr)
|
||||
{
|
||||
|
||||
if (__predict_false(pr->pr_state == PRISON_STATE_INVALID))
|
||||
return (false);
|
||||
if (__predict_false(refcount_load(&pr->pr_ref) == 0))
|
||||
return (false);
|
||||
return (true);
|
||||
@ -3760,8 +3765,7 @@ sysctl_jail_list(SYSCTL_HANDLER_ARGS)
|
||||
bzero(xp, sizeof(*xp));
|
||||
xp->pr_version = XPRISON_VERSION;
|
||||
xp->pr_id = cpr->pr_id;
|
||||
xp->pr_state = prison_isalive(cpr)
|
||||
? PRISON_STATE_ALIVE : PRISON_STATE_DYING;
|
||||
xp->pr_state = cpr->pr_state;
|
||||
strlcpy(xp->pr_path, prison_path(pr, cpr), sizeof(xp->pr_path));
|
||||
strlcpy(xp->pr_host, cpr->pr_hostname, sizeof(xp->pr_host));
|
||||
strlcpy(xp->pr_name, prison_name(pr, cpr), sizeof(xp->pr_name));
|
||||
@ -4412,6 +4416,10 @@ db_show_prison(struct prison *pr)
|
||||
db_printf(" parent = %p\n", pr->pr_parent);
|
||||
db_printf(" ref = %d\n", pr->pr_ref);
|
||||
db_printf(" uref = %d\n", pr->pr_uref);
|
||||
db_printf(" state = %s\n",
|
||||
pr->pr_state == PRISON_STATE_ALIVE ? "alive" :
|
||||
pr->pr_state == PRISON_STATE_DYING ? "dying" :
|
||||
"invalid");
|
||||
db_printf(" path = %s\n", pr->pr_path);
|
||||
db_printf(" cpuset = %d\n", pr->pr_cpuset
|
||||
? pr->pr_cpuset->cs_id : -1);
|
||||
|
@ -88,9 +88,11 @@ struct xprison {
|
||||
};
|
||||
#define XPRISON_VERSION 3
|
||||
|
||||
#define PRISON_STATE_INVALID 0
|
||||
#define PRISON_STATE_ALIVE 1
|
||||
#define PRISON_STATE_DYING 2
|
||||
enum prison_state {
|
||||
PRISON_STATE_INVALID = 0, /* New prison, not ready to be seen */
|
||||
PRISON_STATE_ALIVE, /* Current prison, visible to all */
|
||||
PRISON_STATE_DYING /* Removed but holding resources, */
|
||||
}; /* optionally visible. */
|
||||
|
||||
/*
|
||||
* Flags for jail_set and jail_get.
|
||||
@ -155,6 +157,7 @@ struct prison_racct;
|
||||
* (m) locked by pr_mtx
|
||||
* (p) locked by pr_mtx, and also at least shared allprison_lock required
|
||||
* to update
|
||||
* (q) locked by both pr_mtx and allprison_lock
|
||||
* (r) atomic via refcount(9), pr_mtx and allprison_lock required to
|
||||
* decrement to zero
|
||||
*/
|
||||
@ -185,7 +188,8 @@ struct prison {
|
||||
int pr_securelevel; /* (p) securelevel */
|
||||
int pr_enforce_statfs; /* (p) statfs permission */
|
||||
int pr_devfs_rsnum; /* (p) devfs ruleset */
|
||||
int pr_spare[3];
|
||||
enum prison_state pr_state; /* (q) state in life cycle */
|
||||
int pr_spare[2];
|
||||
int pr_osreldate; /* (c) kern.osreldate value */
|
||||
unsigned long pr_hostid; /* (p) jail hostid */
|
||||
char pr_name[MAXHOSTNAMELEN]; /* (p) admin jail name */
|
||||
@ -222,6 +226,8 @@ struct prison_racct {
|
||||
/* by this jail or an ancestor */
|
||||
#define PR_IP6 0x04000000 /* IPv6 restricted or disabled */
|
||||
/* by this jail or an ancestor */
|
||||
#define PR_COMPLETE_PROC 0x08000000 /* prison_complete called from */
|
||||
/* prison_proc_free, releases uref */
|
||||
|
||||
/*
|
||||
* Flags for pr_allow
|
||||
|
Loading…
Reference in New Issue
Block a user