kern: cpuset: properly rebase when attaching to a jail

The current logic is a fine choice for a system administrator modifying
process cpusets or a process creating a new cpuset(2), but not ideal for
processes attaching to a jail.

Currently, when a process attaches to a jail, it does exactly what any other
process does and loses any mask it might have applied in the process of
doing so because cpuset_setproc() is entirely based around the assumption
that non-anonymous cpusets in the process can be replaced with the new
parent set.

This approach slightly improves the jail attach integration by modifying
cpuset_setproc() callers to indicate if they should rebase their cpuset to
the indicated set or not (i.e. cpuset_setproc_update_set).

If we're rebasing and the process currently has a cpuset assigned that is
not the containing jail's root set, then we will now create a new base set
for it hanging off the jail's root with the existing mask applied instead of
using the jail's root set as the new base set.

Note that the common case will be that the process doesn't have a cpuset
within the jail root, but the system root can freely assign a cpuset from
a jail to a process outside of the jail with no restriction. We assume that
that may have happened or that it could happen due to a race when we drop
the proc lock, so we must recheck both within the loop to gather up
sufficient freed cpusets and after the loop.

To recap, here's how it worked before in all cases:

0     4 <-- jail              0      4 <-- jail / process
|                             |
1                 ->          1
|
3 <-- process

Here's how it works now:

0     4 <-- jail             0       4 <-- jail
|                            |       |
1                 ->         1       5 <-- process
|
3 <-- process

or

0     4 <-- jail             0       4 <-- jail / process
|                            |
1 <-- process     ->         1

More importantly, in both cases, the attaching process still retains the
mask it had prior to attaching or the attach fails with EDEADLK if it's
left with no CPUs to run on or the domain policy is incompatible. The
author of this patch considers this almost a security feature, because a MAC
policy could grant PRIV_JAIL_ATTACH to an unprivileged user that's
restricted to some subset of available CPUs the ability to attach to a jail,
which might lift the user's restrictions if they attach to a jail with a
wider mask.

In most cases, it's anticipated that admins will use this to be able to,
for example, `cpuset -c -l 1 jail -c path=/ command=/long/running/cmd`,
and avoid the need for contortions to spawn a command inside a jail with a
more limited cpuset than the jail.

Reviewed by:	jamie
MFC after:	1 month (maybe)
Differential Revision:	https://reviews.freebsd.org/D27298
This commit is contained in:
Kyle Evans 2020-11-25 03:14:25 +00:00
parent 30b7c6f977
commit d431dea5ac
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=368011

View File

@ -1104,14 +1104,63 @@ cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set,
domainlist);
}
static int
cpuset_setproc_newbase(struct thread *td, struct cpuset *set,
struct cpuset *nroot, struct cpuset **nsetp,
struct setlist *cpusets, struct domainlist *domainlist)
{
struct domainset ndomain;
cpuset_t nmask;
struct cpuset *pbase;
int error;
pbase = cpuset_getbase(td->td_cpuset);
/* Copy process mask, then further apply the new root mask. */
CPU_COPY(&pbase->cs_mask, &nmask);
CPU_AND(&nmask, &nroot->cs_mask);
domainset_copy(pbase->cs_domain, &ndomain);
DOMAINSET_AND(&ndomain.ds_mask, &set->cs_domain->ds_mask);
/* Policy is too restrictive, will not work. */
if (CPU_EMPTY(&nmask) || DOMAINSET_EMPTY(&ndomain.ds_mask))
return (EDEADLK);
/*
* Remove pbase from the freelist in advance, it'll be pushed to
* cpuset_ids on success. We assume here that cpuset_create() will not
* touch pbase on failure, and we just enqueue it back to the freelist
* to remain in a consistent state.
*/
pbase = LIST_FIRST(cpusets);
LIST_REMOVE(pbase, cs_link);
error = cpuset_create(&pbase, set, &nmask);
if (error != 0) {
LIST_INSERT_HEAD(cpusets, pbase, cs_link);
return (error);
}
/* Duplicates some work from above... oh well. */
pbase->cs_domain = domainset_shadow(set->cs_domain, &ndomain,
domainlist);
*nsetp = pbase;
return (0);
}
/*
* Handle three cases for updating an entire process.
* Handle four cases for updating an entire process.
*
* 1) Set is non-null. This reparents all anonymous sets to the provided
* set and replaces all non-anonymous td_cpusets with the provided set.
* 2) Mask is non-null. This replaces or creates anonymous sets for every
* 1) Set is non-null and the process is not rebasing onto a new root. This
* reparents all anonymous sets to the provided set and replaces all
* non-anonymous td_cpusets with the provided set.
* 2) Set is non-null and the process is rebasing onto a new root. This
* creates a new base set if the process previously had its own base set,
* then reparents all anonymous sets either to that set or the provided set
* if one was not created. Non-anonymous sets are similarly replaced.
* 3) Mask is non-null. This replaces or creates anonymous sets for every
* thread with the existing base as a parent.
* 3) domain is non-null. This creates anonymous sets for every thread
* 4) domain is non-null. This creates anonymous sets for every thread
* and replaces the domain set.
*
* This is overly complicated because we can't allocate while holding a
@ -1120,15 +1169,15 @@ cpuset_setproc_setthread(struct cpuset *tdset, struct cpuset *set,
*/
static int
cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
struct domainset *domain)
struct domainset *domain, bool rebase)
{
struct setlist freelist;
struct setlist droplist;
struct domainlist domainlist;
struct cpuset *nset;
struct cpuset *base, *nset, *nroot, *tdroot;
struct thread *td;
struct proc *p;
int threads;
int needed;
int nfree;
int error;
@ -1144,21 +1193,49 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
nfree = 1;
LIST_INIT(&droplist);
nfree = 0;
base = set;
nroot = NULL;
if (set != NULL)
nroot = cpuset_getroot(set);
for (;;) {
error = cpuset_which(CPU_WHICH_PID, pid, &p, &td, &nset);
if (error)
goto out;
if (nfree >= p->p_numthreads)
tdroot = cpuset_getroot(td->td_cpuset);
needed = p->p_numthreads;
if (set != NULL && rebase && tdroot != nroot)
needed++;
if (nfree >= needed)
break;
threads = p->p_numthreads;
PROC_UNLOCK(p);
if (nfree < threads) {
cpuset_freelist_add(&freelist, threads - nfree);
domainset_freelist_add(&domainlist, threads - nfree);
nfree = threads;
if (nfree < needed) {
cpuset_freelist_add(&freelist, needed - nfree);
domainset_freelist_add(&domainlist, needed - nfree);
nfree = needed;
}
}
PROC_LOCK_ASSERT(p, MA_OWNED);
/*
* If we're changing roots and the root set is what has been specified
* as the parent, then we'll check if the process was previously using
* the root set and, if it wasn't, create a new base with the process's
* mask applied to it.
*/
if (set != NULL && rebase && nroot != tdroot) {
cpusetid_t base_id, root_id;
root_id = td->td_ucred->cr_prison->pr_cpuset->cs_id;
base_id = cpuset_getbase(td->td_cpuset)->cs_id;
if (base_id != root_id) {
error = cpuset_setproc_newbase(td, set, nroot, &base,
&freelist, &domainlist);
if (error != 0)
goto unlock_out;
}
}
/*
* Now that the appropriate locks are held and we have enough cpusets,
* make sure the operation will succeed before applying changes. The
@ -1169,7 +1246,7 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
thread_lock(td);
if (set != NULL)
error = cpuset_setproc_test_setthread(td->td_cpuset,
set);
base);
else
error = cpuset_setproc_test_maskthread(td->td_cpuset,
mask, domain);
@ -1185,7 +1262,7 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
FOREACH_THREAD_IN_PROC(p, td) {
thread_lock(td);
if (set != NULL)
error = cpuset_setproc_setthread(td->td_cpuset, set,
error = cpuset_setproc_setthread(td->td_cpuset, base,
&nset, &freelist, &domainlist);
else
error = cpuset_setproc_maskthread(td->td_cpuset, mask,
@ -1200,6 +1277,8 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
unlock_out:
PROC_UNLOCK(p);
out:
if (base != NULL && base != set)
cpuset_rel(base);
while ((nset = LIST_FIRST(&droplist)) != NULL)
cpuset_rel_complete(nset);
cpuset_freelist_free(&freelist);
@ -1584,7 +1663,7 @@ cpuset_setproc_update_set(struct proc *p, struct cpuset *set)
KASSERT(set != NULL, ("[%s:%d] invalid set", __func__, __LINE__));
cpuset_ref(set);
error = cpuset_setproc(p->p_pid, set, NULL, NULL);
error = cpuset_setproc(p->p_pid, set, NULL, NULL, true);
if (error)
return (error);
cpuset_rel(set);
@ -1634,7 +1713,7 @@ sys_cpuset(struct thread *td, struct cpuset_args *uap)
return (error);
error = copyout(&set->cs_id, uap->setid, sizeof(set->cs_id));
if (error == 0)
error = cpuset_setproc(-1, set, NULL, NULL);
error = cpuset_setproc(-1, set, NULL, NULL, false);
cpuset_rel(set);
return (error);
}
@ -1668,7 +1747,7 @@ kern_cpuset_setid(struct thread *td, cpuwhich_t which,
set = cpuset_lookup(setid, td);
if (set == NULL)
return (ESRCH);
error = cpuset_setproc(id, set, NULL, NULL);
error = cpuset_setproc(id, set, NULL, NULL, false);
cpuset_rel(set);
return (error);
}
@ -1942,7 +2021,7 @@ kern_cpuset_setaffinity(struct thread *td, cpulevel_t level, cpuwhich_t which,
error = cpuset_setthread(id, mask);
break;
case CPU_WHICH_PID:
error = cpuset_setproc(id, NULL, mask, NULL);
error = cpuset_setproc(id, NULL, mask, NULL, false);
break;
case CPU_WHICH_CPUSET:
case CPU_WHICH_JAIL:
@ -2226,7 +2305,7 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
error = _cpuset_setthread(id, NULL, &domain);
break;
case CPU_WHICH_PID:
error = cpuset_setproc(id, NULL, NULL, &domain);
error = cpuset_setproc(id, NULL, NULL, &domain, false);
break;
case CPU_WHICH_CPUSET:
case CPU_WHICH_JAIL: