Jeff Roberson e5818a53db Implement several enhancements to NUMA policies.
Add a new "interleave" allocation policy which stripes pages across
domains with a stride or width keeping contiguity within a multi-page
region.

Move the kernel to the dedicated numbered cpuset #2 making it possible
to assign kernel threads and memory policy separately from user.  This
also eliminates the need for the complicated interrupt binding code.

Add a sysctl API for viewing and manipulating domainsets.  Refactor some
of the cpuset_t manipulation code using the generic bitset type so that
it can be used for both.  This probably belongs in a dedicated subr file.

Attempt to improve the include situation.

Reviewed by:	kib
Discussed with:	jhb (cpuset parts)
Tested by:	pho (before review feedback)
Sponsored by:	Netflix, Dell/EMC Isilon
Differential Revision:	https://reviews.freebsd.org/D14839
2018-03-29 02:54:50 +00:00

470 lines
11 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2007, 2008 Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Copyright (c) 2008 Nokia Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/types.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <sys/cpuset.h>
#include <sys/domainset.h>
#include <ctype.h>
#include <err.h>
#include <errno.h>
#include <limits.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <unistd.h>
#include <string.h>
static int Cflag;
static int cflag;
static int dflag;
static int gflag;
static int iflag;
static int jflag;
static int lflag;
static int nflag;
static int pflag;
static int rflag;
static int sflag;
static int tflag;
static int xflag;
static id_t id;
static cpulevel_t level;
static cpuwhich_t which;
static void usage(void);
struct numa_policy {
const char *name;
int policy;
};
static struct numa_policy policies[] = {
{ "round-robin", DOMAINSET_POLICY_ROUNDROBIN },
{ "rr", DOMAINSET_POLICY_ROUNDROBIN },
{ "first-touch", DOMAINSET_POLICY_FIRSTTOUCH },
{ "ft", DOMAINSET_POLICY_FIRSTTOUCH },
{ "prefer", DOMAINSET_POLICY_PREFER },
{ "interleave", DOMAINSET_POLICY_INTERLEAVE},
{ "il", DOMAINSET_POLICY_INTERLEAVE},
{ NULL, DOMAINSET_POLICY_INVALID }
};
static void printset(struct bitset *mask, int size);
static void
parselist(char *list, struct bitset *mask, int size)
{
enum { NONE, NUM, DASH } state;
int lastnum;
int curnum;
char *l;
state = NONE;
curnum = lastnum = 0;
for (l = list; *l != '\0';) {
if (isdigit(*l)) {
curnum = atoi(l);
if (curnum > size)
errx(EXIT_FAILURE,
"List entry %d exceeds maximum of %d",
curnum, size);
while (isdigit(*l))
l++;
switch (state) {
case NONE:
lastnum = curnum;
state = NUM;
break;
case DASH:
for (; lastnum <= curnum; lastnum++)
BIT_SET(size, lastnum, mask);
state = NONE;
break;
case NUM:
default:
goto parserr;
}
continue;
}
switch (*l) {
case ',':
switch (state) {
case NONE:
break;
case NUM:
BIT_SET(size, curnum, mask);
state = NONE;
break;
case DASH:
goto parserr;
break;
}
break;
case '-':
if (state != NUM)
goto parserr;
state = DASH;
break;
default:
goto parserr;
}
l++;
}
switch (state) {
case NONE:
break;
case NUM:
BIT_SET(size, curnum, mask);
break;
case DASH:
goto parserr;
}
return;
parserr:
errx(EXIT_FAILURE, "Malformed list %s", list);
}
static void
parsecpulist(char *list, cpuset_t *mask)
{
if (strcasecmp(list, "all") == 0) {
if (cpuset_getaffinity(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
sizeof(*mask), mask) != 0)
err(EXIT_FAILURE, "getaffinity");
return;
}
parselist(list, (struct bitset *)mask, CPU_SETSIZE);
}
/*
* permissively parse policy:domain list
* allow:
* round-robin:0-4 explicit
* round-robin:all explicit root domains
* 0-4 implicit root policy
* round-robin implicit root domains
* all explicit root domains and implicit policy
*/
static void
parsedomainlist(char *list, domainset_t *mask, int *policyp)
{
domainset_t rootmask;
struct numa_policy *policy;
char *l;
int p;
/*
* Use the rootset's policy as the default for unspecified policies.
*/
if (cpuset_getdomain(CPU_LEVEL_ROOT, CPU_WHICH_PID, -1,
sizeof(rootmask), &rootmask, &p) != 0)
err(EXIT_FAILURE, "getdomain");
l = list;
for (policy = &policies[0]; policy->name != NULL; policy++) {
if (strncasecmp(l, policy->name, strlen(policy->name)) == 0) {
p = policy->policy;
l += strlen(policy->name);
if (*l != ':' && *l != '\0')
errx(EXIT_FAILURE, "Malformed list %s", list);
if (*l == ':')
l++;
break;
}
}
*policyp = p;
if (strcasecmp(l, "all") == 0 || *l == '\0') {
DOMAINSET_COPY(&rootmask, mask);
return;
}
parselist(l, (struct bitset *)mask, DOMAINSET_SETSIZE);
}
static void
printset(struct bitset *mask, int size)
{
int once;
int bit;
for (once = 0, bit = 0; bit < size; bit++) {
if (BIT_ISSET(size, bit, mask)) {
if (once == 0) {
printf("%d", bit);
once = 1;
} else
printf(", %d", bit);
}
}
printf("\n");
}
static const char *whichnames[] = { NULL, "tid", "pid", "cpuset", "irq", "jail",
"domain" };
static const char *levelnames[] = { NULL, " root", " cpuset", "" };
static const char *policynames[] = { "invalid", "round-robin", "first-touch",
"prefer", "interleave" };
static void
printaffinity(void)
{
domainset_t domain;
cpuset_t mask;
int policy;
if (cpuset_getaffinity(level, which, id, sizeof(mask), &mask) != 0)
err(EXIT_FAILURE, "getaffinity");
printf("%s %jd%s mask: ", whichnames[which], (intmax_t)id,
levelnames[level]);
printset((struct bitset *)&mask, CPU_SETSIZE);
if (dflag)
goto out;
if (cpuset_getdomain(level, which, id, sizeof(domain), &domain,
&policy) != 0)
err(EXIT_FAILURE, "getdomain");
printf("%s %jd%s domain policy: %s mask: ", whichnames[which],
(intmax_t)id, levelnames[level], policynames[policy]);
printset((struct bitset *)&domain, DOMAINSET_SETSIZE);
out:
exit(EXIT_SUCCESS);
}
static void
printsetid(void)
{
cpusetid_t setid;
/*
* Only LEVEL_WHICH && WHICH_CPUSET has a numbered id.
*/
if (level == CPU_LEVEL_WHICH && !sflag)
level = CPU_LEVEL_CPUSET;
if (cpuset_getid(level, which, id, &setid))
err(errno, "getid");
printf("%s %jd%s id: %d\n", whichnames[which], (intmax_t)id,
levelnames[level], setid);
}
int
main(int argc, char *argv[])
{
domainset_t domains;
cpusetid_t setid;
cpuset_t mask;
int policy;
lwpid_t tid;
pid_t pid;
int ch;
CPU_ZERO(&mask);
DOMAINSET_ZERO(&domains);
policy = DOMAINSET_POLICY_INVALID;
level = CPU_LEVEL_WHICH;
which = CPU_WHICH_PID;
id = pid = tid = setid = -1;
while ((ch = getopt(argc, argv, "Ccd:gij:l:n:p:rs:t:x:")) != -1) {
switch (ch) {
case 'C':
Cflag = 1;
break;
case 'c':
cflag = 1;
level = CPU_LEVEL_CPUSET;
break;
case 'd':
dflag = 1;
which = CPU_WHICH_DOMAIN;
id = atoi(optarg);
break;
case 'g':
gflag = 1;
break;
case 'i':
iflag = 1;
break;
case 'j':
jflag = 1;
which = CPU_WHICH_JAIL;
id = atoi(optarg);
break;
case 'l':
lflag = 1;
parsecpulist(optarg, &mask);
break;
case 'n':
nflag = 1;
parsedomainlist(optarg, &domains, &policy);
break;
case 'p':
pflag = 1;
which = CPU_WHICH_PID;
id = pid = atoi(optarg);
break;
case 'r':
level = CPU_LEVEL_ROOT;
rflag = 1;
break;
case 's':
sflag = 1;
which = CPU_WHICH_CPUSET;
id = setid = atoi(optarg);
break;
case 't':
tflag = 1;
which = CPU_WHICH_TID;
id = tid = atoi(optarg);
break;
case 'x':
xflag = 1;
which = CPU_WHICH_IRQ;
id = atoi(optarg);
break;
default:
usage();
}
}
argc -= optind;
argv += optind;
if (gflag) {
if (argc || Cflag || lflag || nflag)
usage();
/* Only one identity specifier. */
if (dflag + jflag + xflag + sflag + pflag + tflag > 1)
usage();
if (iflag)
printsetid();
else
printaffinity();
exit(EXIT_SUCCESS);
}
if (dflag || iflag || rflag)
usage();
/*
* The user wants to run a command with a set and possibly cpumask.
*/
if (argc) {
if (Cflag || pflag || tflag || xflag || jflag)
usage();
if (sflag) {
if (cpuset_setid(CPU_WHICH_PID, -1, setid))
err(argc, "setid");
} else {
if (cpuset(&setid))
err(argc, "newid");
}
if (lflag) {
if (cpuset_setaffinity(level, CPU_WHICH_PID,
-1, sizeof(mask), &mask) != 0)
err(EXIT_FAILURE, "setaffinity");
}
if (nflag) {
if (cpuset_setdomain(level, CPU_WHICH_PID,
-1, sizeof(domains), &domains, policy) != 0)
err(EXIT_FAILURE, "setdomain");
}
errno = 0;
execvp(*argv, argv);
err(errno == ENOENT ? 127 : 126, "%s", *argv);
}
/*
* We're modifying something that presently exists.
*/
if (Cflag && (jflag || !pflag || sflag || tflag || xflag))
usage();
if ((!lflag && !nflag) && cflag)
usage();
if ((!lflag && !nflag) && !(Cflag || sflag))
usage();
/* You can only set a mask on a thread. */
if (tflag && (sflag | pflag | xflag | jflag))
usage();
/* You can only set a mask on an irq. */
if (xflag && (jflag | pflag | sflag | tflag))
usage();
if (Cflag) {
/*
* Create a new cpuset and move the specified process
* into the set.
*/
if (cpuset(&setid) < 0)
err(EXIT_FAILURE, "newid");
sflag = 1;
}
if (pflag && sflag) {
if (cpuset_setid(CPU_WHICH_PID, pid, setid))
err(EXIT_FAILURE, "setid");
/*
* If the user specifies a set and a list we want the mask
* to effect the pid and not the set.
*/
which = CPU_WHICH_PID;
id = pid;
}
if (lflag) {
if (cpuset_setaffinity(level, which, id, sizeof(mask),
&mask) != 0)
err(EXIT_FAILURE, "setaffinity");
}
if (nflag) {
if (cpuset_setdomain(level, which, id, sizeof(domains),
&domains, policy) != 0)
err(EXIT_FAILURE, "setdomain");
}
exit(EXIT_SUCCESS);
}
static void
usage(void)
{
fprintf(stderr,
"usage: cpuset [-l cpu-list] [-s setid] cmd ...\n");
fprintf(stderr,
" cpuset [-l cpu-list] [-s setid] -p pid\n");
fprintf(stderr,
" cpuset [-c] [-l cpu-list] -C -p pid\n");
fprintf(stderr,
" cpuset [-c] [-l cpu-list] [-j jailid | -p pid | -t tid | -s setid | -x irq]\n");
fprintf(stderr,
" cpuset -g [-cir] [-d domain | -j jailid | -p pid | -t tid | -s setid |\n"
" -x irq]\n");
exit(1);
}