Allow empty NUMA memory domains to support Threadripper2
The AMD Threadripper 2990WX is basically a slightly crippled Epyc. Rather than having 4 memory controllers, one per NUMA domain, it has only 2 memory controllers enabled. This means that only 2 of the 4 NUMA domains can be populated with physical memory, and the others are empty. Add support to FreeBSD for empty NUMA domains by: - creating empty memory domains when parsing the SRAT table, rather than failing to parse the table - not running the pageout deamon threads in empty domains - adding defensive code to UMA to avoid allocating from empty domains - adding defensive code to cpuset to avoid binding to an empty domain Thanks to Jeff for suggesting this strategy. Reviewed by: alc, markj Approved by: re (gjb@) Differential Revision: https://reviews.freebsd.org/D1683
This commit is contained in:
parent
15a087e551
commit
30c5525b3c
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=339043
@ -65,7 +65,12 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/uma.h>
|
||||
#include <vm/vm.h>
|
||||
#include <vm/vm_object.h>
|
||||
#include <vm/vm_page.h>
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_extern.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_phys.h>
|
||||
#include <vm/vm_pagequeue.h>
|
||||
|
||||
#ifdef DDB
|
||||
#include <ddb/ddb.h>
|
||||
@ -478,6 +483,26 @@ _domainset_create(struct domainset *domain, struct domainlist *freelist)
|
||||
|
||||
}
|
||||
|
||||
/*
|
||||
* Are any of the domains in the mask empty? If so, silently
|
||||
* remove them. If only empty domains are present, we must
|
||||
* return failure.
|
||||
*/
|
||||
static bool
|
||||
domainset_empty_vm(struct domainset *domain)
|
||||
{
|
||||
int i, max;
|
||||
|
||||
max = DOMAINSET_FLS(&domain->ds_mask) + 1;
|
||||
for (i = 0; i < max; i++) {
|
||||
if (DOMAINSET_ISSET(i, &domain->ds_mask) &&
|
||||
VM_DOMAIN_EMPTY(i))
|
||||
DOMAINSET_CLR(i, &domain->ds_mask);
|
||||
}
|
||||
|
||||
return (DOMAINSET_EMPTY(&domain->ds_mask));
|
||||
}
|
||||
|
||||
/*
|
||||
* Create or lookup a domainset based on the key held in 'domain'.
|
||||
*/
|
||||
@ -1360,6 +1385,7 @@ domainset_zero(void)
|
||||
DOMAINSET_SET(i, &dset->ds_mask);
|
||||
dset->ds_policy = DOMAINSET_POLICY_FIRSTTOUCH;
|
||||
dset->ds_prefer = -1;
|
||||
(void)domainset_empty_vm(dset);
|
||||
curthread->td_domain.dr_policy = _domainset_create(dset, NULL);
|
||||
|
||||
domainset_copy(dset, &domainset2);
|
||||
@ -2087,6 +2113,13 @@ kern_cpuset_setdomain(struct thread *td, cpulevel_t level, cpuwhich_t which,
|
||||
DOMAINSET_FILL(&domain.ds_mask);
|
||||
}
|
||||
|
||||
/*
|
||||
* When given an impossible policy, fall back to interleaving
|
||||
* across all domains
|
||||
*/
|
||||
if (domainset_empty_vm(&domain))
|
||||
domainset_copy(&domainset2, &domain);
|
||||
|
||||
switch (level) {
|
||||
case CPU_LEVEL_ROOT:
|
||||
case CPU_LEVEL_CPUSET:
|
||||
|
@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <vm/vm_pageout.h>
|
||||
#include <vm/vm_param.h>
|
||||
#include <vm/vm_phys.h>
|
||||
#include <vm/vm_pagequeue.h>
|
||||
#include <vm/vm_map.h>
|
||||
#include <vm/vm_kern.h>
|
||||
#include <vm/vm_extern.h>
|
||||
@ -2469,9 +2470,11 @@ uma_zalloc_arg(uma_zone_t zone, void *udata, int flags)
|
||||
if (bucket != NULL)
|
||||
bucket_free(zone, bucket, udata);
|
||||
|
||||
if (zone->uz_flags & UMA_ZONE_NUMA)
|
||||
if (zone->uz_flags & UMA_ZONE_NUMA) {
|
||||
domain = PCPU_GET(domain);
|
||||
else
|
||||
if (VM_DOMAIN_EMPTY(domain))
|
||||
domain = UMA_ANYDOMAIN;
|
||||
} else
|
||||
domain = UMA_ANYDOMAIN;
|
||||
|
||||
/* Short-circuit for zones without buckets and low memory. */
|
||||
@ -2647,7 +2650,11 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
|
||||
rdomain = 0;
|
||||
rr = rdomain == UMA_ANYDOMAIN;
|
||||
if (rr) {
|
||||
keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
|
||||
start = keg->uk_cursor;
|
||||
do {
|
||||
keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains;
|
||||
domain = keg->uk_cursor;
|
||||
} while (VM_DOMAIN_EMPTY(domain) && domain != start);
|
||||
domain = start = keg->uk_cursor;
|
||||
/* Only block on the second pass. */
|
||||
if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK)
|
||||
@ -2698,8 +2705,11 @@ keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags)
|
||||
LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link);
|
||||
return (slab);
|
||||
}
|
||||
if (rr)
|
||||
domain = (domain + 1) % vm_ndomains;
|
||||
if (rr) {
|
||||
do {
|
||||
domain = (domain + 1) % vm_ndomains;
|
||||
} while (VM_DOMAIN_EMPTY(domain) && domain != start);
|
||||
}
|
||||
} while (domain != start);
|
||||
|
||||
/* Retry domain scan with blocking. */
|
||||
@ -2903,6 +2913,8 @@ zone_alloc_bucket(uma_zone_t zone, void *udata, int domain, int flags)
|
||||
uma_bucket_t bucket;
|
||||
int max;
|
||||
|
||||
CTR1(KTR_UMA, "zone_alloc:_bucket domain %d)", domain);
|
||||
|
||||
/* Don't wait for buckets, preserve caller's NOVM setting. */
|
||||
bucket = bucket_alloc(zone, udata, M_NOWAIT | (flags & M_NOVM));
|
||||
if (bucket == NULL)
|
||||
@ -2970,6 +2982,11 @@ zone_alloc_item(uma_zone_t zone, void *udata, int domain, int flags)
|
||||
|
||||
item = NULL;
|
||||
|
||||
if (domain != UMA_ANYDOMAIN) {
|
||||
/* avoid allocs targeting empty domains */
|
||||
if (VM_DOMAIN_EMPTY(domain))
|
||||
domain = UMA_ANYDOMAIN;
|
||||
}
|
||||
if (zone->uz_import(zone->uz_arg, &item, 1, domain, flags) != 1)
|
||||
goto fail;
|
||||
atomic_add_long(&zone->uz_allocs, 1);
|
||||
@ -3139,9 +3156,11 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
|
||||
/* We are no longer associated with this CPU. */
|
||||
critical_exit();
|
||||
|
||||
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0)
|
||||
if ((zone->uz_flags & UMA_ZONE_NUMA) != 0) {
|
||||
domain = PCPU_GET(domain);
|
||||
else
|
||||
if (VM_DOMAIN_EMPTY(domain))
|
||||
domain = UMA_ANYDOMAIN;
|
||||
} else
|
||||
domain = 0;
|
||||
zdom = &zone->uz_domain[0];
|
||||
|
||||
@ -3588,7 +3607,9 @@ uma_prealloc(uma_zone_t zone, int items)
|
||||
dom = &keg->uk_domain[slab->us_domain];
|
||||
LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link);
|
||||
slabs--;
|
||||
domain = (domain + 1) % vm_ndomains;
|
||||
do {
|
||||
domain = (domain + 1) % vm_ndomains;
|
||||
} while (VM_DOMAIN_EMPTY(domain));
|
||||
}
|
||||
KEG_UNLOCK(keg);
|
||||
}
|
||||
@ -3678,6 +3699,11 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait)
|
||||
vm_offset_t addr;
|
||||
uma_slab_t slab;
|
||||
|
||||
if (domain != UMA_ANYDOMAIN) {
|
||||
/* avoid allocs targeting empty domains */
|
||||
if (VM_DOMAIN_EMPTY(domain))
|
||||
domain = UMA_ANYDOMAIN;
|
||||
}
|
||||
slab = zone_alloc_item(slabzone, NULL, domain, wait);
|
||||
if (slab == NULL)
|
||||
return (NULL);
|
||||
|
@ -502,6 +502,8 @@ kmem_back(vm_object_t object, vm_offset_t addr, vm_size_t size, int flags)
|
||||
*/
|
||||
if (vm_ndomains > 1) {
|
||||
domain = (addr >> KVA_QUANTUM_SHIFT) % vm_ndomains;
|
||||
while (VM_DOMAIN_EMPTY(domain))
|
||||
domain++;
|
||||
next = roundup2(addr + 1, KVA_QUANTUM);
|
||||
if (next > end || next < start)
|
||||
next = end;
|
||||
|
@ -2082,6 +2082,13 @@ vm_pageout(void)
|
||||
if (error != 0)
|
||||
panic("starting laundry for domain 0, error %d", error);
|
||||
for (i = 1; i < vm_ndomains; i++) {
|
||||
if (VM_DOMAIN_EMPTY(i)) {
|
||||
if (bootverbose)
|
||||
printf("domain %d empty; skipping pageout\n",
|
||||
i);
|
||||
continue;
|
||||
}
|
||||
|
||||
error = kthread_add(vm_pageout_worker, (void *)(uintptr_t)i,
|
||||
curproc, NULL, 0, 0, "dom%d", i);
|
||||
if (error != 0) {
|
||||
|
@ -151,7 +151,8 @@ struct vm_domain {
|
||||
|
||||
extern struct vm_domain vm_dom[MAXMEMDOM];
|
||||
|
||||
#define VM_DOMAIN(n) (&vm_dom[(n)])
|
||||
#define VM_DOMAIN(n) (&vm_dom[(n)])
|
||||
#define VM_DOMAIN_EMPTY(n) (vm_dom[(n)].vmd_page_count == 0)
|
||||
|
||||
#define vm_pagequeue_assert_locked(pq) mtx_assert(&(pq)->pq_mutex, MA_OWNED)
|
||||
#define vm_pagequeue_lock(pq) mtx_lock(&(pq)->pq_mutex)
|
||||
|
@ -311,8 +311,20 @@ check_domains(void)
|
||||
}
|
||||
for (i = 0; i <= max_apic_id; i++)
|
||||
if (cpus[i].enabled && !cpus[i].has_memory) {
|
||||
printf("SRAT: No memory found for CPU %d\n", i);
|
||||
return (ENXIO);
|
||||
found = 0;
|
||||
for (j = 0; j < num_mem && !found; j++) {
|
||||
if (mem_info[j].domain == cpus[i].domain)
|
||||
found = 1;
|
||||
}
|
||||
if (!found) {
|
||||
if (bootverbose)
|
||||
printf("SRAT: mem dom %d is empty\n",
|
||||
cpus[i].domain);
|
||||
mem_info[num_mem].start = 0;
|
||||
mem_info[num_mem].end = 0;
|
||||
mem_info[num_mem].domain = cpus[i].domain;
|
||||
num_mem++;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user