Move VM_NUMA_ALLOC and DEVICE_NUMA under the single global config option NUMA.

Sponsored by:	Netflix, Dell/EMC Isilon
Discussed with:	jhb
This commit is contained in:
Jeff Roberson 2018-01-14 03:36:03 +00:00
parent b0ae8f91ac
commit b6715dab8f
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=327954
11 changed files with 58 additions and 30 deletions

View File

@ -99,7 +99,6 @@ options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
options DEVICE_NUMA # I/O Device Affinity
options EARLY_AP_STARTUP
# CPU frequency control

View File

@ -93,7 +93,6 @@ options MALLOC_DEBUG_MAXZONES=8 # Separate malloc(9) zones
# Make an SMP-capable kernel by default
options SMP # Symmetric MultiProcessor Kernel
options DEVICE_NUMA # I/O Device Affinity
options EARLY_AP_STARTUP
# CPU frequency control

View File

@ -231,18 +231,14 @@ options EARLY_AP_STARTUP
# A default value should be already present, for every architecture.
options MAXCPU=32
# NUMA enables use of Non-Uniform Memory Access policies in various kernel
# subsystems.
options NUMA
# MAXMEMDOM defines the maximum number of memory domains that can boot in the
# system. A default value should already be defined by every architecture.
options MAXMEMDOM=2
# VM_NUMA_ALLOC enables use of memory domain-aware allocation in the VM
# system.
options VM_NUMA_ALLOC
# DEVICE_NUMA enables reporting of domain affinity of I/O devices via
# bus_get_domain(), etc.
options DEVICE_NUMA
# ADAPTIVE_MUTEXES changes the behavior of blocking mutexes to spin
# if the thread that currently owns the mutex is executing on another
# CPU. This behavior is enabled by default, so this option can be used

View File

@ -95,7 +95,6 @@ COMPAT_LINUXKPI opt_compat.h
COMPILING_LINT opt_global.h
CY_PCI_FASTINTR
DEADLKRES opt_watchdog.h
DEVICE_NUMA
EXT_RESOURCES opt_global.h
DIRECTIO
FILEMON opt_dontuse.h
@ -605,7 +604,6 @@ VM_KMEM_SIZE opt_vm.h
VM_KMEM_SIZE_SCALE opt_vm.h
VM_KMEM_SIZE_MAX opt_vm.h
VM_NRESERVLEVEL opt_vm.h
VM_NUMA_ALLOC opt_vm.h
VM_LEVEL_0_ORDER opt_vm.h
NO_SWAPPING opt_vm.h
MALLOC_MAKE_FAILURES opt_vm.h
@ -621,6 +619,7 @@ DEBUG_REDZONE opt_vm.h
# Standard SMP options
EARLY_AP_STARTUP opt_global.h
SMP opt_global.h
NUMA opt_global.h
# Size of the kernel message buffer
MSGBUF_SIZE opt_msgbuf.h

View File

@ -31,7 +31,6 @@
__FBSDID("$FreeBSD$");
#include "opt_acpi.h"
#include "opt_device_numa.h"
#include <sys/param.h>
#include <sys/kernel.h>
@ -1089,7 +1088,7 @@ acpi_hint_device_unit(device_t acdev, device_t child, const char *name,
static int
acpi_parse_pxm(device_t dev)
{
#ifdef DEVICE_NUMA
#ifdef NUMA
ACPI_HANDLE handle;
ACPI_STATUS status;
int pxm;

View File

@ -2616,7 +2616,7 @@ zone_import(uma_zone_t zone, void **bucket, int max, int domain, int flags)
bucket[i++] = slab_alloc_item(keg, slab);
if (keg->uk_free <= keg->uk_reserve)
break;
#if MAXMEMDOM > 1
#ifdef NUMA
/*
* If the zone is striped we pick a new slab for every
* N allocations. Eliminating this conditional will

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_page.h>
#include <vm/vm_phys.h>
#ifdef NUMA
/*
* Iterators are written such that the first nowait pass has as short a
* codepath as possible to eliminate bloat from the allocator. It is
@ -241,3 +242,36 @@ vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
return (0);
}
#else /* !NUMA */
int
vm_domainset_iter_page(struct vm_domainset_iter *di, int *domain, int *flags)
{
return (EJUSTRETURN);
}
void
vm_domainset_iter_page_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags)
{
*domain = 0;
}
int
vm_domainset_iter_malloc(struct vm_domainset_iter *di, int *domain, int *flags)
{
return (EJUSTRETURN);
}
void
vm_domainset_iter_malloc_init(struct vm_domainset_iter *di,
struct vm_object *obj, int *domain, int *flags)
{
*domain = 0;
}
#endif

View File

@ -1919,16 +1919,13 @@ static void
vm_pageout(void)
{
int error;
#ifdef VM_NUMA_ALLOC
int i;
#endif
swap_pager_swap_init();
error = kthread_add(vm_pageout_laundry_worker, NULL, curproc, NULL,
0, 0, "laundry: dom0");
if (error != 0)
panic("starting laundry for domain 0, error %d", error);
#ifdef VM_NUMA_ALLOC
for (i = 1; i < vm_ndomains; i++) {
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
curproc, NULL, 0, 0, "dom%d", i);
@ -1937,7 +1934,6 @@ vm_pageout(void)
i, error);
}
}
#endif
error = kthread_add(uma_reclaim_worker, NULL, curproc, NULL,
0, 0, "uma");
if (error != 0)

View File

@ -71,7 +71,7 @@ __FBSDID("$FreeBSD$");
_Static_assert(sizeof(long) * NBBY >= VM_PHYSSEG_MAX,
"Too many physsegs.");
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
struct mem_affinity *mem_affinity;
int *mem_locality;
#endif
@ -140,7 +140,7 @@ static int sysctl_vm_phys_segs(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, sysctl_vm_phys_segs, "A", "Phys Seg Info");
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
static int sysctl_vm_phys_locality(SYSCTL_HANDLER_ARGS);
SYSCTL_OID(_vm, OID_AUTO, phys_locality, CTLTYPE_STRING | CTLFLAG_RD,
NULL, 0, sysctl_vm_phys_locality, "A", "Phys Locality Info");
@ -201,7 +201,7 @@ vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *p1,
int
vm_phys_domain_match(int prefer, vm_paddr_t low, vm_paddr_t high)
{
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
domainset_t mask;
int i;
@ -306,7 +306,7 @@ int
vm_phys_mem_affinity(int f, int t)
{
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
if (mem_locality == NULL)
return (-1);
if (f >= vm_ndomains || t >= vm_ndomains)
@ -317,7 +317,7 @@ vm_phys_mem_affinity(int f, int t)
#endif
}
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
/*
* Outputs the VM locality table.
*/
@ -393,7 +393,7 @@ _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain)
static void
vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end)
{
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
int i;
if (mem_affinity == NULL) {

View File

@ -48,6 +48,11 @@ struct mem_affinity {
vm_paddr_t end;
int domain;
};
#ifdef NUMA
extern struct mem_affinity *mem_affinity;
extern int *mem_locality;
#endif
extern int vm_ndomains;
struct vm_freelist {
struct pglist pl;
@ -62,9 +67,6 @@ struct vm_phys_seg {
struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER];
};
extern struct mem_affinity *mem_affinity;
extern int *mem_locality;
extern int vm_ndomains;
extern struct vm_phys_seg vm_phys_segs[];
extern int vm_phys_nsegs;
@ -101,6 +103,7 @@ int vm_phys_mem_affinity(int f, int t);
static inline int
vm_phys_domidx(vm_page_t m)
{
#ifdef NUMA
int domn, segind;
/* XXXKIB try to assert that the page is managed */
@ -109,6 +112,9 @@ vm_phys_domidx(vm_page_t m)
domn = vm_phys_segs[segind].domain;
KASSERT(domn < vm_ndomains, ("domain %d m %p", domn, m));
return (domn);
#else
return (0);
#endif
}
/*

View File

@ -153,7 +153,7 @@ parse_slit(void)
acpi_unmap_table(slit);
slit = NULL;
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
/* Tell the VM about it! */
mem_locality = vm_locality_table;
#endif
@ -469,7 +469,7 @@ parse_srat(void)
return (-1);
}
#ifdef VM_NUMA_ALLOC
#ifdef NUMA
/* Point vm_phys at our memory affinity table. */
vm_ndomains = ndomain;
mem_affinity = mem_info;