MFC r298565:

Add a new jail OSD method, PR_METHOD_REMOVE.  It's called when a jail is
  removed from the user perspective, i.e. when the last pr_uref goes away,
  even though the jail mail still exist in the dying state.  It will also
  be called if either PR_METHOD_CREATE or PR_METHOD_SET fail.

MFC r298683:

  Delay removing the last jail reference in prison_proc_free, and instead
  put it off into the pr_task.  This is similar to prison_free, and in fact
  uses the same task even though they do something slightly different.

MFC r298566:

  Pass the current/new jail to PR_METHOD_CHECK, which pushes the call
  until after the jail is found or created.  This requires unlocking the
  jail for the call and re-locking it afterward, but that works because
  nothing in the jail has been changed yet, and other processes won't
  change the important fields as long as allprison_lock remains held.

  Keep better track of name vs namelc in kern_jail_set.  Name should
  always be the hierarchical name (relative to the caller), and namelc
  the last component.

MFC r298668:

  Use crcopysafe in jail_attach.

PR:		48471
This commit is contained in:
jamie 2016-04-30 03:19:07 +00:00
parent 5a2943388b
commit 0621c28d20
2 changed files with 141 additions and 75 deletions

View File

@ -560,8 +560,9 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
void *op;
#endif
unsigned long hid;
size_t namelen, onamelen;
int created, cuflags, descend, enforce, error, errmsg_len, errmsg_pos;
size_t namelen, onamelen, pnamelen;
int born, created, cuflags, descend, enforce;
int error, errmsg_len, errmsg_pos;
int gotchildmax, gotenforce, gothid, gotrsnum, gotslevel;
int fi, jid, jsys, len, level;
int childmax, osreldt, rsnum, slevel;
@ -584,7 +585,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
error = priv_check(td, PRIV_JAIL_ATTACH);
if (error)
return (error);
mypr = ppr = td->td_ucred->cr_prison;
mypr = td->td_ucred->cr_prison;
if ((flags & JAIL_CREATE) && mypr->pr_childmax == 0)
return (EPERM);
if (flags & ~JAIL_SET_MASK)
@ -611,6 +612,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
#endif
g_path = NULL;
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
error = EINVAL;
vfs_opterror(opts, "no valid operation (create or update)");
goto done_errmsg;
}
error = vfs_copyopt(opts, "jid", &jid, sizeof(jid));
if (error == ENOENT)
jid = 0;
@ -1020,42 +1028,18 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
/*
* Grab the allprison lock before letting modules check their
* parameters. Once we have it, do not let go so we'll have a
* consistent view of the OSD list.
*/
sx_xlock(&allprison_lock);
error = osd_jail_call(NULL, PR_METHOD_CHECK, opts);
if (error)
goto done_unlock_list;
/* By now, all parameters should have been noted. */
TAILQ_FOREACH(opt, opts, link) {
if (!opt->seen && strcmp(opt->name, "errmsg")) {
error = EINVAL;
vfs_opterror(opts, "unknown parameter: %s", opt->name);
goto done_unlock_list;
}
}
/*
* See if we are creating a new record or updating an existing one.
* Find the specified jail, or at least its parent.
* This abuses the file error codes ENOENT and EEXIST.
*/
cuflags = flags & (JAIL_CREATE | JAIL_UPDATE);
if (!cuflags) {
error = EINVAL;
vfs_opterror(opts, "no valid operation (create or update)");
goto done_unlock_list;
}
pr = NULL;
namelc = NULL;
ppr = mypr;
if (cuflags == JAIL_CREATE && jid == 0 && name != NULL) {
namelc = strrchr(name, '.');
jid = strtoul(namelc != NULL ? namelc + 1 : name, &p, 10);
if (*p != '\0')
jid = 0;
}
sx_xlock(&allprison_lock);
if (jid != 0) {
/*
* See if a requested jid already exists. There is an
@ -1121,6 +1105,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* and updates keyed by the name itself (where the name must exist
* because that is the jail being updated).
*/
namelc = NULL;
if (name != NULL) {
namelc = strrchr(name, '.');
if (namelc == NULL)
@ -1131,7 +1116,6 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* parent and child names, and make sure the parent
* exists or matches an already found jail.
*/
*namelc = '\0';
if (pr != NULL) {
if (strncmp(name, ppr->pr_name, namelc - name)
|| ppr->pr_name[namelc - name] != '\0') {
@ -1142,6 +1126,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_unlock_list;
}
} else {
*namelc = '\0';
ppr = prison_find_name(mypr, name);
if (ppr == NULL) {
error = ENOENT;
@ -1150,17 +1135,18 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_unlock_list;
}
mtx_unlock(&ppr->pr_mtx);
*namelc = '.';
}
name = ++namelc;
namelc++;
}
if (name[0] != '\0') {
namelen =
if (namelc[0] != '\0') {
pnamelen =
(ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
name_again:
deadpr = NULL;
FOREACH_PRISON_CHILD(ppr, tpr) {
if (tpr != pr && tpr->pr_ref > 0 &&
!strcmp(tpr->pr_name + namelen, name)) {
!strcmp(tpr->pr_name + pnamelen, namelc)) {
if (pr == NULL &&
cuflags != JAIL_CREATE) {
mtx_lock(&tpr->pr_mtx);
@ -1237,7 +1223,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
if (ppr->pr_ref == 0) {
mtx_unlock(&ppr->pr_mtx);
error = ENOENT;
vfs_opterror(opts, "parent jail went away!");
vfs_opterror(opts, "jail \"%s\" not found",
prison_name(mypr, ppr));
goto done_unlock_list;
}
ppr->pr_ref++;
@ -1291,8 +1278,8 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
pr->pr_id = jid;
/* Set some default values, and inherit some from the parent. */
if (name == NULL)
name = "";
if (namelc == NULL)
namelc = "";
if (path == NULL) {
path = "/";
root = mypr->pr_root;
@ -1355,6 +1342,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
LIST_INIT(&pr->pr_children);
mtx_init(&pr->pr_mtx, "jail mutex", NULL, MTX_DEF | MTX_DUPOK);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
#ifdef VIMAGE
/* Allocate a new vnet if specified. */
@ -1374,7 +1362,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
mtx_lock(&pr->pr_mtx);
/*
* New prisons do not yet have a reference, because we do not
* want other to see the incomplete prison once the
* want others to see the incomplete prison once the
* allprison_lock is downgraded.
*/
} else {
@ -1588,13 +1576,13 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
}
#endif
onamelen = namelen = 0;
if (name != NULL) {
if (namelc != NULL) {
/* Give a default name of the jid. Also allow the name to be
* explicitly the jid - but not any other number, and only in
* normal form (no leading zero/etc).
*/
if (name[0] == '\0')
snprintf(name = numbuf, sizeof(numbuf), "%d", jid);
if (namelc[0] == '\0')
snprintf(namelc = numbuf, sizeof(numbuf), "%d", jid);
else if ((strtoul(namelc, &p, 10) != jid ||
namelc[0] < '1' || namelc[0] > '9') && *p == '\0') {
error = EINVAL;
@ -1606,9 +1594,10 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* Make sure the name isn't too long for the prison or its
* children.
*/
onamelen = strlen(pr->pr_name);
namelen = strlen(name);
if (strlen(ppr->pr_name) + namelen + 2 > sizeof(pr->pr_name)) {
pnamelen = (ppr == &prison0) ? 0 : strlen(ppr->pr_name) + 1;
onamelen = strlen(pr->pr_name + pnamelen);
namelen = strlen(namelc);
if (pnamelen + namelen + 1 > sizeof(pr->pr_name)) {
error = ENAMETOOLONG;
goto done_deref_locked;
}
@ -1625,6 +1614,30 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
goto done_deref_locked;
}
/*
* Let modules check their parameters. This requires unlocking and
* then re-locking the prison, but this is still a valid state as long
* as allprison_lock remains xlocked.
*/
mtx_unlock(&pr->pr_mtx);
error = osd_jail_call(pr, PR_METHOD_CHECK, opts);
if (error != 0) {
prison_deref(pr, created
? PD_LIST_XLOCKED
: PD_DEREF | PD_LIST_XLOCKED);
goto done_releroot;
}
mtx_lock(&pr->pr_mtx);
/* At this point, all valid parameters should have been noted. */
TAILQ_FOREACH(opt, opts, link) {
if (!opt->seen && strcmp(opt->name, "errmsg")) {
error = EINVAL;
vfs_opterror(opts, "unknown parameter: %s", opt->name);
goto done_deref_locked;
}
}
/* Set the parameters of the prison. */
#ifdef INET
redo_ip4 = 0;
@ -1698,12 +1711,12 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend)
tpr->pr_devfs_rsnum = rsnum;
}
if (name != NULL) {
if (namelc != NULL) {
if (ppr == &prison0)
strlcpy(pr->pr_name, name, sizeof(pr->pr_name));
strlcpy(pr->pr_name, namelc, sizeof(pr->pr_name));
else
snprintf(pr->pr_name, sizeof(pr->pr_name), "%s.%s",
ppr->pr_name, name);
ppr->pr_name, namelc);
/* Change this component of child names. */
FOREACH_PRISON_DESCENDANT_LOCKED(pr, tpr, descend) {
bcopy(tpr->pr_name + onamelen, tpr->pr_name + namelen,
@ -1781,6 +1794,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
* for now, so new ones will remain unseen until after the module
* handlers have completed.
*/
born = pr->pr_uref == 0;
if (!created && (ch_flags & PR_PERSIST & (pr_flags ^ pr->pr_flags))) {
if (pr_flags & PR_PERSIST) {
pr->pr_ref++;
@ -1850,15 +1864,20 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
/* Let the modules do their work. */
sx_downgrade(&allprison_lock);
if (created) {
if (born) {
error = osd_jail_call(pr, PR_METHOD_CREATE, opts);
if (error) {
prison_deref(pr, PD_LIST_SLOCKED);
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
prison_deref(pr, created
? PD_LIST_SLOCKED
: PD_DEREF | PD_LIST_SLOCKED);
goto done_errmsg;
}
}
error = osd_jail_call(pr, PR_METHOD_SET, opts);
if (error) {
if (born)
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
prison_deref(pr, created
? PD_LIST_SLOCKED
: PD_DEREF | PD_LIST_SLOCKED);
@ -1910,7 +1929,7 @@ kern_jail_set(struct thread *td, struct uio *optuio, int flags)
sx_sunlock(&allprison_lock);
}
goto done_errmsg;
goto done_free;
done_deref_locked:
prison_deref(pr, created
@ -2404,7 +2423,6 @@ sys_jail_attach(struct thread *td, struct jail_attach_args *uap)
static int
do_jail_attach(struct thread *td, struct prison *pr)
{
struct prison *ppr;
struct proc *p;
struct ucred *newcred, *oldcred;
int error;
@ -2432,7 +2450,6 @@ do_jail_attach(struct thread *td, struct prison *pr)
/*
* Reparent the newly attached process to this jail.
*/
ppr = td->td_ucred->cr_prison;
p = td->td_proc;
error = cpuset_setproc_update_set(p, pr->pr_cpuset);
if (error)
@ -2451,23 +2468,23 @@ do_jail_attach(struct thread *td, struct prison *pr)
newcred = crget();
PROC_LOCK(p);
oldcred = p->p_ucred;
setsugid(p);
crcopy(newcred, oldcred);
oldcred = crcopysafe(p, newcred);
newcred->cr_prison = pr;
p->p_ucred = newcred;
setsugid(p);
PROC_UNLOCK(p);
#ifdef RACCT
racct_proc_ucred_changed(p, oldcred, newcred);
#endif
prison_deref(oldcred->cr_prison, PD_DEREF | PD_DEUREF);
crfree(oldcred);
prison_deref(ppr, PD_DEREF | PD_DEUREF);
return (0);
e_unlock:
VOP_UNLOCK(pr->pr_root, 0);
e_revert_osd:
/* Tell modules this thread is still in its old jail after all. */
(void)osd_jail_call(ppr, PR_METHOD_ATTACH, td);
(void)osd_jail_call(td->td_ucred->cr_prison, PR_METHOD_ATTACH, td);
prison_deref(pr, PD_DEREF | PD_DEUREF);
return (error);
}
@ -2576,16 +2593,13 @@ prison_allow(struct ucred *cred, unsigned flag)
void
prison_free_locked(struct prison *pr)
{
int ref;
mtx_assert(&pr->pr_mtx, MA_OWNED);
pr->pr_ref--;
if (pr->pr_ref == 0) {
mtx_unlock(&pr->pr_mtx);
TASK_INIT(&pr->pr_task, 0, prison_complete, pr);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
return;
}
ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
if (ref == 0)
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
}
void
@ -2596,11 +2610,17 @@ prison_free(struct prison *pr)
prison_free_locked(pr);
}
/*
* Complete a call to either prison_free or prison_proc_free.
*/
static void
prison_complete(void *context, int pending)
{
struct prison *pr = context;
prison_deref((struct prison *)context, 0);
mtx_lock(&pr->pr_mtx);
prison_deref(pr, pr->pr_uref
? PD_DEREF | PD_DEUREF | PD_LOCKED : PD_LOCKED);
}
/*
@ -2613,19 +2633,53 @@ static void
prison_deref(struct prison *pr, int flags)
{
struct prison *ppr, *tpr;
int ref, lasturef;
if (!(flags & PD_LOCKED))
mtx_lock(&pr->pr_mtx);
for (;;) {
if (flags & PD_DEUREF) {
KASSERT(pr->pr_uref > 0,
("prison_deref PD_DEUREF on a dead prison (jid=%d)",
pr->pr_id));
pr->pr_uref--;
lasturef = pr->pr_uref == 0;
if (lasturef)
pr->pr_ref++;
KASSERT(prison0.pr_uref != 0, ("prison0 pr_uref=0"));
}
if (flags & PD_DEREF)
} else
lasturef = 0;
if (flags & PD_DEREF) {
KASSERT(pr->pr_ref > 0,
("prison_deref PD_DEREF on a dead prison (jid=%d)",
pr->pr_id));
pr->pr_ref--;
/* If the prison still has references, nothing else to do. */
if (pr->pr_ref > 0) {
}
ref = pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
/*
* Tell the modules if the last user reference was removed
* (even it sticks around in dying state).
*/
if (lasturef) {
if (!(flags & (PD_LIST_SLOCKED | PD_LIST_XLOCKED))) {
if (ref > 1) {
sx_slock(&allprison_lock);
flags |= PD_LIST_SLOCKED;
} else {
sx_xlock(&allprison_lock);
flags |= PD_LIST_XLOCKED;
}
}
(void)osd_jail_call(pr, PR_METHOD_REMOVE, NULL);
mtx_lock(&pr->pr_mtx);
ref = --pr->pr_ref;
mtx_unlock(&pr->pr_mtx);
}
/* If the prison still has references, nothing else to do. */
if (ref > 0) {
if (flags & PD_LIST_SLOCKED)
sx_sunlock(&allprison_lock);
else if (flags & PD_LIST_XLOCKED)
@ -2633,7 +2687,6 @@ prison_deref(struct prison *pr, int flags)
return;
}
mtx_unlock(&pr->pr_mtx);
if (flags & PD_LIST_SLOCKED) {
if (!sx_try_upgrade(&allprison_lock)) {
sx_sunlock(&allprison_lock);
@ -2715,7 +2768,20 @@ prison_proc_free(struct prison *pr)
mtx_lock(&pr->pr_mtx);
KASSERT(pr->pr_uref > 0,
("Trying to kill a process in a dead prison (jid=%d)", pr->pr_id));
prison_deref(pr, PD_DEUREF | PD_LOCKED);
if (pr->pr_uref > 1)
pr->pr_uref--;
else {
/*
* Don't remove the last user reference in this context, which
* is expected to be a process that is not only locked, but
* also half dead.
*/
pr->pr_ref++;
mtx_unlock(&pr->pr_mtx);
taskqueue_enqueue(taskqueue_thread, &pr->pr_task);
return;
}
mtx_unlock(&pr->pr_mtx);
}

View File

@ -149,7 +149,6 @@ struct prison_racct;
* (p) locked by pr_mtx
* (c) set only during creation before the structure is shared, no mutex
* required to read
* (d) set only during destruction of jail, no mutex needed
*/
struct prison {
TAILQ_ENTRY(prison) pr_list; /* (a) all prisons */
@ -161,7 +160,7 @@ struct prison {
LIST_ENTRY(prison) pr_sibling; /* (a) next in parent's list */
struct prison *pr_parent; /* (c) containing jail */
struct mtx pr_mtx;
struct task pr_task; /* (d) destroy task */
struct task pr_task; /* (c) destroy task */
struct osd pr_osd; /* (p) additional data */
struct cpuset *pr_cpuset; /* (p) cpuset */
struct vnet *pr_vnet; /* (c) network stack */
@ -243,7 +242,8 @@ struct prison_racct {
#define PR_METHOD_SET 2
#define PR_METHOD_CHECK 3
#define PR_METHOD_ATTACH 4
#define PR_MAXMETHOD 5
#define PR_METHOD_REMOVE 5
#define PR_MAXMETHOD 6
/*
* Lock/unlock a prison.