jail: allow root to implicitly widen its cpuset to attach
The default behavior for attaching processes to jails is that the jail's cpuset augments the attaching processes, so that it cannot be used to escalate a user's ability to take advantage of more CPUs than the administrator wanted them to. This is problematic when root needs to manage jails that have disjoint sets with whatever process is attaching, as this would otherwise result in a deadlock. Therefore, if we did not have an appropriate common subset of cpus/domains for our new policy, we now allow the process to simply take on the jail set *if* it has the privilege to widen its mask anyways. With the new logic, root can still usefully cpuset a process that attaches to a jail with the desire of maintaining the set it was given pre-attachment while still retaining the ability to manage child jails without jumping through hoops. A test has been added to demonstrate the issue; cpuset of a process down to just the first CPU and attempting to attach to a jail without access to any of the same CPUs previously resulted in EDEADLK and now results in taking on the jail's mask for privileged users. PR: 253724 Reviewed by: jamie (also discussed with) MFC after: 3 days Differential Revision: https://reviews.freebsd.org/D28952
This commit is contained in:
parent
af11c20290
commit
60c4ec806d
@ -1,7 +1,7 @@
|
||||
/*-
|
||||
* SPDX-License-Identifier: BSD-2-Clause
|
||||
*
|
||||
* Copyright (c) 2020 Kyle Evans <kevans@FreeBSD.org>
|
||||
* Copyright (c) 2020-2021 Kyle Evans <kevans@FreeBSD.org>
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -31,6 +31,8 @@ __FBSDID("$FreeBSD");
|
||||
#include <sys/param.h>
|
||||
#include <sys/cpuset.h>
|
||||
#include <sys/jail.h>
|
||||
#include <sys/procdesc.h>
|
||||
#include <sys/select.h>
|
||||
#include <sys/socket.h>
|
||||
#include <sys/uio.h>
|
||||
#include <sys/wait.h>
|
||||
@ -64,6 +66,10 @@ typedef void (*jail_test_cb)(struct jail_test_cb_params *);
|
||||
#define FAILURE_JAILSET 44
|
||||
#define FAILURE_PIDSET 45
|
||||
#define FAILURE_SEND 46
|
||||
#define FAILURE_DEADLK 47
|
||||
#define FAILURE_ATTACH 48
|
||||
#define FAILURE_BADAFFIN 49
|
||||
#define FAILURE_SUCCESS 50
|
||||
|
||||
static const char *
|
||||
do_jail_errstr(int error)
|
||||
@ -80,6 +86,14 @@ do_jail_errstr(int error)
|
||||
return ("Failed to get the pid setid");
|
||||
case FAILURE_SEND:
|
||||
return ("Failed to send(2) cpuset information");
|
||||
case FAILURE_DEADLK:
|
||||
return ("Deadlock hit trying to attach to jail");
|
||||
case FAILURE_ATTACH:
|
||||
return ("jail_attach(2) failed");
|
||||
case FAILURE_BADAFFIN:
|
||||
return ("Unexpected post-attach affinity");
|
||||
case FAILURE_SUCCESS:
|
||||
return ("jail_attach(2) succeeded, but should have failed.");
|
||||
default:
|
||||
return (NULL);
|
||||
}
|
||||
@ -444,6 +458,192 @@ ATF_TC_BODY(jail_attach_plain, tc)
|
||||
do_jail_test(1, false, &jail_attach_plain_pro, &jail_attach_jset_epi);
|
||||
}
|
||||
|
||||
static int
|
||||
jail_attach_disjoint_newjail(int fd)
|
||||
{
|
||||
struct iovec iov[2];
|
||||
char *name;
|
||||
int jid;
|
||||
|
||||
if (asprintf(&name, "cpuset_%d", getpid()) == -1)
|
||||
_exit(42);
|
||||
|
||||
iov[0].iov_base = "name";
|
||||
iov[0].iov_len = sizeof("name");
|
||||
|
||||
iov[1].iov_base = name;
|
||||
iov[1].iov_len = strlen(name) + 1;
|
||||
|
||||
if ((jid = jail_set(iov, 2, JAIL_CREATE | JAIL_ATTACH)) < 0)
|
||||
return (FAILURE_JAIL);
|
||||
|
||||
/* Signal that we're ready. */
|
||||
write(fd, &jid, sizeof(jid));
|
||||
for (;;) {
|
||||
/* Spin */
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
wait_jail(int fd, int pfd)
|
||||
{
|
||||
fd_set lset;
|
||||
struct timeval tv;
|
||||
int error, jid, maxfd;
|
||||
|
||||
FD_ZERO(&lset);
|
||||
FD_SET(fd, &lset);
|
||||
FD_SET(pfd, &lset);
|
||||
|
||||
maxfd = MAX(fd, pfd);
|
||||
|
||||
tv.tv_sec = 5;
|
||||
tv.tv_usec = 0;
|
||||
|
||||
/* Wait for jid to be written. */
|
||||
do {
|
||||
error = select(maxfd + 1, &lset, NULL, NULL, &tv);
|
||||
} while (error == -1 && errno == EINTR);
|
||||
|
||||
if (error == 0) {
|
||||
atf_tc_fail("Jail creator did not respond in time.");
|
||||
}
|
||||
|
||||
ATF_REQUIRE_MSG(error > 0, "Unexpected error %d from select()", errno);
|
||||
|
||||
if (FD_ISSET(pfd, &lset)) {
|
||||
/* Process died */
|
||||
atf_tc_fail("Jail creator died unexpectedly.");
|
||||
}
|
||||
|
||||
ATF_REQUIRE(FD_ISSET(fd, &lset));
|
||||
ATF_REQUIRE_EQ(sizeof(jid), recv(fd, &jid, sizeof(jid), 0));
|
||||
|
||||
return (jid);
|
||||
}
|
||||
|
||||
static int
|
||||
try_attach_child(int jid, cpuset_t *expected_mask)
|
||||
{
|
||||
cpuset_t mask;
|
||||
|
||||
if (jail_attach(jid) == -1) {
|
||||
if (errno == EDEADLK)
|
||||
return (FAILURE_DEADLK);
|
||||
return (FAILURE_ATTACH);
|
||||
}
|
||||
|
||||
if (expected_mask == NULL)
|
||||
return (FAILURE_SUCCESS);
|
||||
|
||||
/* If we had an expected mask, check it against the new process mask. */
|
||||
CPU_ZERO(&mask);
|
||||
if (cpuset_getaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_PID,
|
||||
-1, sizeof(mask), &mask) != 0) {
|
||||
return (FAILURE_MASK);
|
||||
}
|
||||
|
||||
if (CPU_CMP(expected_mask, &mask) != 0)
|
||||
return (FAILURE_BADAFFIN);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
try_attach(int jid, cpuset_t *expected_mask)
|
||||
{
|
||||
const char *errstr;
|
||||
pid_t pid;
|
||||
int error, fail, status;
|
||||
|
||||
ATF_REQUIRE(expected_mask != NULL);
|
||||
ATF_REQUIRE((pid = fork()) != -1);
|
||||
if (pid == 0)
|
||||
_exit(try_attach_child(jid, expected_mask));
|
||||
|
||||
while ((error = waitpid(pid, &status, 0)) == -1 && errno == EINTR) {
|
||||
/* Try again. */
|
||||
}
|
||||
|
||||
/* Sanity check the exit info. */
|
||||
ATF_REQUIRE_EQ(pid, error);
|
||||
ATF_REQUIRE(WIFEXITED(status));
|
||||
if ((fail = WEXITSTATUS(status)) != 0) {
|
||||
errstr = do_jail_errstr(fail);
|
||||
if (errstr != NULL)
|
||||
atf_tc_fail("%s", errstr);
|
||||
else
|
||||
atf_tc_fail("Unknown error '%d'", WEXITSTATUS(status));
|
||||
}
|
||||
}
|
||||
|
||||
ATF_TC(jail_attach_disjoint);
|
||||
ATF_TC_HEAD(jail_attach_disjoint, tc)
|
||||
{
|
||||
atf_tc_set_md_var(tc, "descr",
|
||||
"Test root attachment into completely disjoint jail cpuset.");
|
||||
atf_tc_set_md_var(tc, "require.user", "root");
|
||||
}
|
||||
ATF_TC_BODY(jail_attach_disjoint, tc)
|
||||
{
|
||||
cpuset_t smask, jmask;
|
||||
int sockpair[2];
|
||||
cpusetid_t setid;
|
||||
pid_t pid;
|
||||
int fcpu, jid, pfd, sock, scpu;
|
||||
|
||||
ATF_REQUIRE_EQ(0, cpuset(&setid));
|
||||
|
||||
skip_ltncpu(2, &jmask);
|
||||
fcpu = CPU_FFS(&jmask) - 1;
|
||||
ATF_REQUIRE_EQ(0, socketpair(PF_UNIX, SOCK_STREAM, 0, sockpair));
|
||||
|
||||
/* We'll wait on the procdesc, too, so we can fail faster if it dies. */
|
||||
ATF_REQUIRE((pid = pdfork(&pfd, 0)) != -1);
|
||||
|
||||
if (pid == 0) {
|
||||
/* First child sets up the jail. */
|
||||
sock = sockpair[SP_CHILD];
|
||||
close(sockpair[SP_PARENT]);
|
||||
|
||||
_exit(jail_attach_disjoint_newjail(sock));
|
||||
}
|
||||
|
||||
close(sockpair[SP_CHILD]);
|
||||
sock = sockpair[SP_PARENT];
|
||||
|
||||
ATF_REQUIRE((jid = wait_jail(sock, pfd)) > 0);
|
||||
|
||||
/*
|
||||
* This process will be clamped down to the first cpu, while the jail
|
||||
* will simply have the first CPU removed to make it a completely
|
||||
* disjoint operation.
|
||||
*/
|
||||
CPU_ZERO(&smask);
|
||||
CPU_SET(fcpu, &smask);
|
||||
CPU_CLR(fcpu, &jmask);
|
||||
|
||||
/*
|
||||
* We'll test with the first and second cpu set as well. Only the
|
||||
* second cpu should be used.
|
||||
*/
|
||||
scpu = CPU_FFS(&jmask) - 1;
|
||||
|
||||
ATF_REQUIRE_EQ(0, cpuset_setaffinity(CPU_LEVEL_ROOT, CPU_WHICH_JAIL,
|
||||
jid, sizeof(jmask), &jmask));
|
||||
ATF_REQUIRE_EQ(0, cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET,
|
||||
setid, sizeof(smask), &smask));
|
||||
|
||||
try_attach(jid, &jmask);
|
||||
|
||||
CPU_SET(scpu, &smask);
|
||||
ATF_REQUIRE_EQ(0, cpuset_setaffinity(CPU_LEVEL_CPUSET, CPU_WHICH_CPUSET,
|
||||
setid, sizeof(smask), &smask));
|
||||
|
||||
CPU_CLR(fcpu, &smask);
|
||||
try_attach(jid, &smask);
|
||||
}
|
||||
|
||||
ATF_TC(badparent);
|
||||
ATF_TC_HEAD(badparent, tc)
|
||||
{
|
||||
@ -488,6 +688,7 @@ ATF_TP_ADD_TCS(tp)
|
||||
ATF_TP_ADD_TC(tp, jail_attach_newbase_plain);
|
||||
ATF_TP_ADD_TC(tp, jail_attach_prevbase);
|
||||
ATF_TP_ADD_TC(tp, jail_attach_plain);
|
||||
ATF_TP_ADD_TC(tp, jail_attach_disjoint);
|
||||
ATF_TP_ADD_TC(tp, badparent);
|
||||
return (atf_no_error());
|
||||
}
|
||||
|
@ -1255,6 +1255,11 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
|
||||
* as the parent, then we'll check if the process was previously using
|
||||
* the root set and, if it wasn't, create a new base with the process's
|
||||
* mask applied to it.
|
||||
*
|
||||
* If the new root is incompatible with the existing mask, then we allow
|
||||
* the process to take on the new root if and only if they have
|
||||
* privilege to widen their mask anyways. Unprivileged processes get
|
||||
* rejected with EDEADLK.
|
||||
*/
|
||||
if (set != NULL && rebase && nroot != tdroot) {
|
||||
cpusetid_t base_id, root_id;
|
||||
@ -1265,6 +1270,9 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask,
|
||||
if (base_id != root_id) {
|
||||
error = cpuset_setproc_newbase(td, set, nroot, &base,
|
||||
&freelist, &domainlist);
|
||||
if (error == EDEADLK &&
|
||||
priv_check(td, PRIV_SCHED_CPUSET) == 0)
|
||||
error = 0;
|
||||
if (error != 0)
|
||||
goto unlock_out;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user