Add files for r327895

Implement 'domainset', a cpuset based NUMA policy mechanism.  This allows
userspace to control NUMA policy administratively and programmatically.

Implement domainset based iterators in the page layer.

Remove the now legacy numa_* syscalls.

Cleanup some header polution created by having seq.h in proc.h.

Reviewed by:  markj, kib
Discussed with:       alc
Tested by:    pho
Sponsored by: Netflix, Dell/EMC Isilon
Differential Revision:        https://reviews.freebsd.org/D13403
This commit is contained in:
Jeff Roberson 2018-01-12 22:57:57 +00:00
parent 3f289c3fcf
commit 7b11a48326
4 changed files with 452 additions and 0 deletions

60
sys/sys/_domainset.h Normal file
View File

@ -0,0 +1,60 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS__DOMAINSET_H_
#define _SYS__DOMAINSET_H_
#include <sys/_bitset.h>
#ifdef _KERNEL
#define DOMAINSET_SETSIZE MAXMEMDOM
#endif
#define DOMAINSET_MAXSIZE 256
#ifndef DOMAINSET_SETSIZE
#define DOMAINSET_SETSIZE DOMAINSET_MAXSIZE
#endif
BITSET_DEFINE(_domainset, DOMAINSET_SETSIZE);
typedef struct _domainset domainset_t;
/*
* This structure is intended to be embedded in objects which have policy
* attributes. Each object keeps its own iterator so round-robin is
* synchronized and accurate.
*/
struct domainset;
struct domainset_ref {
struct domainset * volatile dr_policy;
int dr_iterator;
};
#endif /* !_SYS__DOMAINSET_H_ */

102
sys/sys/domainset.h Normal file
View File

@ -0,0 +1,102 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _SYS_DOMAINSETSET_H_
#define _SYS_DOMAINSETSET_H_
#include <sys/_domainset.h>
#include <sys/bitset.h>
#define _NDOMAINSETBITS _BITSET_BITS
#define _NDOMAINSETWORDS __bitset_words(DOMAINSET_SETSIZE)
#define DOMAINSETSETBUFSIZ ((2 + sizeof(long) * 2) * _NDOMAINSETWORDS)
#define DOMAINSET_CLR(n, p) BIT_CLR(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_COPY(f, t) BIT_COPY(DOMAINSET_SETSIZE, f, t)
#define DOMAINSET_ISSET(n, p) BIT_ISSET(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_SET(n, p) BIT_SET(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_ZERO(p) BIT_ZERO(DOMAINSET_SETSIZE, p)
#define DOMAINSET_FILL(p) BIT_FILL(DOMAINSET_SETSIZE, p)
#define DOMAINSET_SETOF(n, p) BIT_SETOF(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_EMPTY(p) BIT_EMPTY(DOMAINSET_SETSIZE, p)
#define DOMAINSET_ISFULLSET(p) BIT_ISFULLSET(DOMAINSET_SETSIZE, p)
#define DOMAINSET_SUBSET(p, c) BIT_SUBSET(DOMAINSET_SETSIZE, p, c)
#define DOMAINSET_OVERLAP(p, c) BIT_OVERLAP(DOMAINSET_SETSIZE, p, c)
#define DOMAINSET_CMP(p, c) BIT_CMP(DOMAINSET_SETSIZE, p, c)
#define DOMAINSET_OR(d, s) BIT_OR(DOMAINSET_SETSIZE, d, s)
#define DOMAINSET_AND(d, s) BIT_AND(DOMAINSET_SETSIZE, d, s)
#define DOMAINSET_NAND(d, s) BIT_NAND(DOMAINSET_SETSIZE, d, s)
#define DOMAINSET_CLR_ATOMIC(n, p) BIT_CLR_ATOMIC(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_SET_ATOMIC(n, p) BIT_SET_ATOMIC(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_SET_ATOMIC_ACQ(n, p) \
BIT_SET_ATOMIC_ACQ(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_AND_ATOMIC(n, p) BIT_AND_ATOMIC(DOMAINSET_SETSIZE, n, p)
#define DOMAINSET_OR_ATOMIC(d, s) BIT_OR_ATOMIC(DOMAINSET_SETSIZE, d, s)
#define DOMAINSET_COPY_STORE_REL(f, t) \
BIT_COPY_STORE_REL(DOMAINSET_SETSIZE, f, t)
#define DOMAINSET_FFS(p) BIT_FFS(DOMAINSET_SETSIZE, p)
#define DOMAINSET_FLS(p) BIT_FLS(DOMAINSET_SETSIZE, p)
#define DOMAINSET_COUNT(p) BIT_COUNT(DOMAINSET_SETSIZE, p)
#define DOMAINSET_FSET BITSET_FSET(_NDOMAINSETWORDS)
#define DOMAINSET_T_INITIALIZER BITSET_T_INITIALIZER
#define DOMAINSET_POLICY_INVALID 0
#define DOMAINSET_POLICY_ROUNDROBIN 1
#define DOMAINSET_POLICY_FIRSTTOUCH 2
#define DOMAINSET_POLICY_PREFER 3
#define DOMAINSET_POLICY_MAX DOMAINSET_POLICY_PREFER
#ifdef _KERNEL
#include <sys/queue.h>
LIST_HEAD(domainlist, domainset);
struct domainset {
LIST_ENTRY(domainset) ds_link;
domainset_t ds_mask; /* allowed domains. */
uint16_t ds_policy; /* Policy type. */
int16_t ds_prefer; /* Preferred domain or -1. */
uint16_t ds_cnt; /* popcnt from above. */
uint16_t ds_max; /* Maximum domain in set. */
};
void domainset_zero(void);
#else
__BEGIN_DECLS
int cpuset_getdomain(cpulevel_t, cpuwhich_t, id_t, size_t, domainset_t *,
int *);
int cpuset_setdomain(cpulevel_t, cpuwhich_t, id_t, size_t,
const domainset_t *, int);
__END_DECLS
#endif
#endif /* !_SYS_DOMAINSETSET_H_ */

243
sys/vm/vm_domainset.c Normal file
View File

@ -0,0 +1,243 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_vm.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bitset.h>
#include <sys/domainset.h>
#include <sys/proc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/vm_domainset.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
/*
* Iterators are written such that the first nowait pass has as short a
* codepath as possible to eliminate bloat from the allocator. It is
* assumed that most allocations are successful.
*/
/*
* Determine which policy is to be used for this allocation.
*/
static void
vm_domainset_iter_domain(struct vm_domainset_iter *di, struct vm_object *obj)
{
struct domainset *domain;
/*
* object policy takes precedence over thread policy. The policies
* are immutable and unsynchronized. Updates can race but pointer
* loads are assumed to be atomic.
*/
if (obj != NULL && (domain = obj->domain.dr_policy) != NULL) {
di->di_domain = domain;
di->di_iter = &obj->domain.dr_iterator;
} else {
di->di_domain = curthread->td_domain.dr_policy;
di->di_iter = &curthread->td_domain.dr_iterator;
}
}
static void
vm_domainset_iter_rr(struct vm_domainset_iter *di, int *domain)
{
int d;
d = *di->di_iter;
do {
d = (d + 1) % di->di_domain->ds_max;
} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask));
*di->di_iter = *domain = d;
}
static void
vm_domainset_iter_prefer(struct vm_domainset_iter *di, int *domain)
{
int d;
d = *di->di_iter;
do {
d = (d + 1) % di->di_domain->ds_max;
} while (!DOMAINSET_ISSET(d, &di->di_domain->ds_mask) ||
d == di->di_domain->ds_prefer);
*di->di_iter = *domain = d;
}
static void
vm_domainset_iter_next(struct vm_domainset_iter *di, int *domain)
{
KASSERT(di->di_n > 0,
("vm_domainset_iter_first: Invalid n %d", di->di_n));
switch (di->di_domain->ds_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
/*
* To prevent impossible allocations we convert an invalid
* first-touch to round-robin.
*/
/* FALLTHROUGH */
case DOMAINSET_POLICY_ROUNDROBIN:
vm_domainset_iter_rr(di, domain);
break;
case DOMAINSET_POLICY_PREFER:
vm_domainset_iter_prefer(di, domain);
break;
default:
panic("vm_domainset_iter_first: Unknown policy %d",
di->di_domain->ds_policy);
}
KASSERT(*domain < vm_ndomains,
("vm_domainset_iter_next: Invalid domain %d", *domain));
}
static void
vm_domainset_iter_first(struct vm_domainset_iter *di, int *domain)
{
switch (di->di_domain->ds_policy) {
case DOMAINSET_POLICY_FIRSTTOUCH:
*domain = PCPU_GET(domain);
if (DOMAINSET_ISSET(*domain, &di->di_domain->ds_mask)) {
di->di_n = 1;
break;
}
/*
* To prevent impossible allocations we convert an invalid
* first-touch to round-robin.
*/
/* FALLTHROUGH */
case DOMAINSET_POLICY_ROUNDROBIN:
di->di_n = di->di_domain->ds_cnt;
vm_domainset_iter_rr(di, domain);
break;
case DOMAINSET_POLICY_PREFER:
*domain = di->di_domain->ds_prefer;
di->di_n = di->di_domain->ds_cnt;
break;
default:
panic("vm_domainset_iter_first: Unknown policy %d",
di->di_domain->ds_policy);
}
KASSERT(di->di_n > 0,
("vm_domainset_iter_first: Invalid n %d", di->di_n));
KASSERT(*domain < vm_ndomains,
("vm_domainset_iter_first: Invalid domain %d", *domain));
}
void
vm_domainset_iter_page_init(struct vm_domainset_iter *di, struct vm_object *obj,
int *domain, int *req)
{
vm_domainset_iter_domain(di, obj);
di->di_flags = *req;
*req = (di->di_flags & ~(VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) |
VM_ALLOC_NOWAIT;
vm_domainset_iter_first(di, domain);
}
int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *req)
{
/*
* If we exhausted all options with NOWAIT and did a WAITFAIL it
* is time to return an error to the caller.
*/
if ((*req & VM_ALLOC_WAITFAIL) != 0)
return (ENOMEM);
/* If there are more domains to visit we run the iterator. */
if (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
return (0);
}
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & (VM_ALLOC_WAITOK | VM_ALLOC_WAITFAIL)) == 0)
return (ENOMEM);
/*
* We have visited all domains with non-blocking allocations, try
* from the beginning with a blocking allocation.
*/
vm_domainset_iter_first(di, domain);
*req = di->di_flags;
return (0);
}
void
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags)
{
vm_domainset_iter_domain(di, obj);
di->di_flags = *flags;
*flags = (di->di_flags & ~M_WAITOK) | M_NOWAIT;
vm_domainset_iter_first(di, domain);
}
int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
{
/* If there are more domains to visit we run the iterator. */
if (--di->di_n != 0) {
vm_domainset_iter_next(di, domain);
return (0);
}
/* If we visited all domains and this was a NOWAIT we return error. */
if ((di->di_flags & M_WAITOK) == 0)
return (ENOMEM);
/*
* We have visited all domains with non-blocking allocations, try
* from the beginning with a blocking allocation.
*/
vm_domainset_iter_first(di, domain);
*flags = di->di_flags;
return (0);
}

47
sys/vm/vm_domainset.h Normal file
View File

@ -0,0 +1,47 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2017, Jeffrey Roberson <jeff@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef __VM_DOMAINSET_H__
#define __VM_DOMAINSET_H__
struct vm_domainset_iter {
struct domainset *di_domain;
int *di_iter;
int di_flags;
int di_n;
};
int vm_domainset_iter_page(struct vm_domainset_iter *, int *, int *);
void vm_domainset_iter_page_init(struct vm_domainset_iter *,
struct vm_object *, int *, int *);
int vm_domainset_iter_malloc(struct vm_domainset_iter *, int *, int *);
void vm_domainset_iter_malloc_init(struct vm_domainset_iter *,
struct vm_object *, int *, int *);
#endif /* __VM_DOMAINSET_H__ */