From 8998a23151c1bb92ebe446888a4a51cd1ad0c040 Mon Sep 17 00:00:00 2001 From: markj Date: Sat, 20 Oct 2018 17:36:00 +0000 Subject: [PATCH] Create some global domainsets and refactor NUMA registration. Pre-defined policies are useful when integrating the domainset(9) policy machinery into various kernel memory allocators. The refactoring will make it easier to add NUMA support for other architectures. No functional change intended. Reviewed by: alc, gallatin, jeff, kib Tested by: pho (part of a larger patch) MFC after: 3 days Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D17416 --- share/man/man9/domainset.9 | 13 +++++++++---- sys/kern/kern_cpuset.c | 37 ++++++++++++++++++++++++++++++++----- sys/sys/domainset.h | 8 +++++++- sys/vm/vm_phys.c | 28 ++++++++++++++++++++++++++++ sys/vm/vm_phys.h | 2 ++ sys/x86/acpica/srat.c | 14 ++------------ 6 files changed, 80 insertions(+), 22 deletions(-) diff --git a/share/man/man9/domainset.9 b/share/man/man9/domainset.9 index 20725a737cf9..470f79b250a7 100644 --- a/share/man/man9/domainset.9 +++ b/share/man/man9/domainset.9 @@ -24,14 +24,11 @@ .\" .\" $FreeBSD$ .\" -.Dd March 24, 2018 +.Dd October 20, 2018 .Dt DOMAINSET 9 .Os .Sh NAME .Nm domainset(9) -\(em -.Nm domainset_create , -.Nm sysctl_handle_domainset . .Nd domainset functions and operation .Sh SYNOPSIS .In sys/_domainset.h @@ -46,6 +43,8 @@ struct domainset { }; .Ed .Pp +.Fn DOMAINSET_RR +.Fn DOMAINSET_PREF domain .Ft struct domainset * .Fn domainset_create "const struct domainset *key" .Ft int @@ -99,6 +98,12 @@ efficiency higher and is preferential to round-robin for general use. .El .Pp The +.Fn DOMAINSET_RR +and +.Fn DOMAINSET_PREF +provide pointers to global pre-defined policies for use when the +desired policy is known at compile time. +The .Fn domainset_create function takes a partially filled in domainset as a key and returns a valid domainset or NULL. diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 3f4a81ff70d7..af8438e014bd 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -119,6 +119,8 @@ __FBSDID("$FreeBSD$"); */ LIST_HEAD(domainlist, domainset); +struct domainset __read_mostly domainset_prefer[MAXMEMDOM]; +struct domainset __read_mostly domainset_roundrobin; static uma_zone_t cpuset_zone; static uma_zone_t domainset_zone; @@ -1368,6 +1370,31 @@ cpuset_setithread(lwpid_t id, int cpu) return _cpuset_setthread(id, &mask, NULL); } +/* + * Initialize static domainsets after NUMA information is available. This is + * called very early during boot. + */ +void +domainset_init(void) +{ + struct domainset *dset; + int i; + + dset = &domainset_roundrobin; + DOMAINSET_COPY(&all_domains, &dset->ds_mask); + dset->ds_policy = DOMAINSET_POLICY_ROUNDROBIN; + dset->ds_prefer = -1; + _domainset_create(dset, NULL); + + for (i = 0; i < vm_ndomains; i++) { + dset = &domainset_prefer[i]; + DOMAINSET_COPY(&all_domains, &dset->ds_mask); + dset->ds_policy = DOMAINSET_POLICY_PREFER; + dset->ds_prefer = i; + _domainset_create(dset, NULL); + } +} + /* * Create the domainset for cpuset 0, 1 and cpuset 2. */ @@ -1375,22 +1402,22 @@ void domainset_zero(void) { struct domainset *dset; - int i; mtx_init(&cpuset_lock, "cpuset", NULL, MTX_SPIN | MTX_RECURSE); dset = &domainset0; - DOMAINSET_ZERO(&dset->ds_mask); - for (i = 0; i < vm_ndomains; i++) - DOMAINSET_SET(i, &dset->ds_mask); + DOMAINSET_COPY(&all_domains, &dset->ds_mask); dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH; dset->ds_prefer = -1; - (void)domainset_empty_vm(dset); curthread->td_domain.dr_policy = _domainset_create(dset, NULL); domainset_copy(dset, &domainset2); domainset2.ds_policy = DOMAINSET_POLICY_INTERLEAVE; kernel_object->domain.dr_policy = _domainset_create(&domainset2, NULL); + + /* Remove empty domains from the global policies. */ + LIST_FOREACH(dset, &cpuset_domains, ds_link) + (void)domainset_empty_vm(dset); } /* diff --git a/sys/sys/domainset.h b/sys/sys/domainset.h index 81375ed01850..32b35ac5c1e9 100644 --- a/sys/sys/domainset.h +++ b/sys/sys/domainset.h @@ -32,8 +32,8 @@ #define _SYS_DOMAINSET_H_ #include - #include +#include #define _NDOMAINSETBITS _BITSET_BITS #define _NDOMAINSETWORDS __bitset_words(DOMAINSET_SETSIZE) @@ -96,6 +96,12 @@ struct domainset { domainid_t ds_order[MAXMEMDOM]; /* nth domain table. */ }; +extern struct domainset domainset_prefer[MAXMEMDOM]; +#define DOMAINSET_PREF(domain) (&domainset_prefer[(domain)]) +extern struct domainset domainset_roundrobin; +#define DOMAINSET_RR() (&domainset_roundrobin) + +void domainset_init(void); void domainset_zero(void); /* diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c index 5206ba6e658f..2ed0e5feae33 100644 --- a/sys/vm/vm_phys.c +++ b/sys/vm/vm_phys.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -586,6 +587,33 @@ vm_phys_init(void) rw_init(&vm_phys_fictitious_reg_lock, "vmfctr"); } +/* + * Register info about the NUMA topology of the system. + * + * Invoked by platform-dependent code prior to vm_phys_init(). + */ +void +vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, + int *locality) +{ +#ifdef NUMA + int i; + + vm_ndomains = ndomains; + mem_affinity = affinity; + mem_locality = locality; + + for (i = 0; i < vm_ndomains; i++) + DOMAINSET_SET(i, &all_domains); + + domainset_init(); +#else + (void)ndomains; + (void)affinity; + (void)locality; +#endif +} + /* * Split a contiguous, power of two-sized set of physical pages. * diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index a01a4a82595c..08c1a92d25f8 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -88,6 +88,8 @@ void vm_phys_free_contig(vm_page_t m, u_long npages); void vm_phys_free_pages(vm_page_t m, int order); void vm_phys_init(void); vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa); +void vm_phys_register_domains(int ndomains, struct mem_affinity *affinity, + int *locality); vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, int options); void vm_phys_set_pool(int pool, vm_page_t m, int order); diff --git a/sys/x86/acpica/srat.c b/sys/x86/acpica/srat.c index 42b506c4447b..9b6d057a586f 100644 --- a/sys/x86/acpica/srat.c +++ b/sys/x86/acpica/srat.c @@ -153,10 +153,6 @@ parse_slit(void) acpi_unmap_table(slit); slit = NULL; -#ifdef NUMA - /* Tell the VM about it! */ - mem_locality = vm_locality_table; -#endif return (0); } @@ -481,13 +477,6 @@ parse_srat(void) return (-1); } -#ifdef NUMA - vm_ndomains = ndomain; - for (int i = 0; i < vm_ndomains; i++) - DOMAINSET_SET(i, &all_domains); - mem_affinity = mem_info; -#endif - return (0); } @@ -511,7 +500,8 @@ parse_acpi_tables(void *dummy) if (parse_srat() < 0) return; init_mem_locality(); - (void) parse_slit(); + (void)parse_slit(); + vm_phys_register_domains(ndomain, mem_info, vm_locality_table); } SYSINIT(parse_acpi_tables, SI_SUB_VM - 1, SI_ORDER_FIRST, parse_acpi_tables, NULL);