Fix boot on systems where NUMA domain 0 is unpopulated.

- Add vm_phys_early_add_seg(), complementing vm_phys_early_alloc(), to
  ensure that segments registered during hammer_time() are placed in the
  right domain.  Otherwise, since the SRAT is not parsed at that point,
  we just add them to domain 0, which may be incorrect and results in a
  domain with only several MB worth of memory.
- Fix uma_startup1() to try allocating memory for zones from any domain.
  If domain 0 is unpopulated, the allocation will simply fail, resulting
  in a page fault slightly later during boot.
- Change _vm_phys_domain() to return -1 for addresses not covered by the
  affinity table, and change vm_phys_early_alloc() to handle wildcard
  domains.  This is necessary on amd64, where the page array is dense
  and pmap_page_array_startup() may allocate page table pages for
  non-existent page frames.

Reported and tested by:	Rafael Kitover <rkitover@gmail.com>
Reviewed by:	cem (earlier version), kib
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D25001
This commit is contained in:
Mark Johnston 2020-05-28 19:41:00 +00:00
parent 537ab18306
commit 81302f1d77
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=361595
7 changed files with 59 additions and 18 deletions

View File

@ -1223,7 +1223,7 @@ getmemsize(caddr_t kmdp, u_int64_t first)
* Tell the physical memory allocator about pages used to store
* the kernel and preloaded data. See kmem_bootstrap_free().
*/
vm_phys_add_seg((vm_paddr_t)kernphys, trunc_page(first));
vm_phys_early_add_seg((vm_paddr_t)kernphys, trunc_page(first));
bzero(physmap, sizeof(physmap));
physmap_idx = 0;

View File

@ -1700,7 +1700,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
* are required for promotion of the corresponding kernel virtual
* addresses to superpage mappings.
*/
vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
/*
* Account for the virtual addresses mapped by create_pagetables().

View File

@ -1828,7 +1828,7 @@ getmemsize(int first)
* Tell the physical memory allocator about pages used to store
* the kernel and preloaded data. See kmem_bootstrap_free().
*/
vm_phys_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
vm_phys_early_add_seg((vm_paddr_t)KERNLOAD, trunc_page(first));
TUNABLE_INT_FETCH("hw.above4g_allow", &above4g_allow);
TUNABLE_INT_FETCH("hw.above24g_allow", &above24g_allow);

View File

@ -633,7 +633,7 @@ __CONCAT(PMTYPE, bootstrap)(vm_paddr_t firstaddr)
* are required for promotion of the corresponding kernel virtual
* addresses to superpage mappings.
*/
vm_phys_add_seg(KPTphys, KPTphys + ptoa(nkpt));
vm_phys_early_add_seg(KPTphys, KPTphys + ptoa(nkpt));
/*
* Initialize the first available kernel virtual address.

View File

@ -2810,6 +2810,7 @@ uma_startup1(vm_offset_t virtual_avail)
size_t ksize, zsize, size;
uma_keg_t masterkeg;
uintptr_t m;
int domain;
uint8_t pflag;
bootstart = bootmem = virtual_avail;
@ -2827,7 +2828,12 @@ uma_startup1(vm_offset_t virtual_avail)
/* Allocate the zone of zones, zone of kegs, and zone of zones keg. */
size = (zsize * 2) + ksize;
m = (uintptr_t)startup_alloc(NULL, size, 0, &pflag, M_NOWAIT | M_ZERO);
for (domain = 0; domain < vm_ndomains; domain++) {
m = (uintptr_t)startup_alloc(NULL, size, domain, &pflag,
M_NOWAIT | M_ZERO);
if (m != 0)
break;
}
zones = (uma_zone_t)m;
m += zsize;
kegs = (uma_zone_t)m;
@ -3191,6 +3197,17 @@ item_dtor(uma_zone_t zone, void *item, int size, void *udata,
}
}
static int
item_domain(void *item)
{
int domain;
domain = _vm_phys_domain(vtophys(item));
KASSERT(domain >= 0 && domain < vm_ndomains,
("%s: unknown domain for item %p", __func__, item));
return (domain);
}
#if defined(INVARIANTS) || defined(DEBUG_MEMGUARD) || defined(WITNESS)
#define UMA_ZALLOC_DEBUG
static int
@ -4001,7 +4018,7 @@ uma_zfree_smr(uma_zone_t zone, void *item)
itemdomain = 0;
#ifdef NUMA
if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
itemdomain = item_domain(item);
#endif
critical_enter();
do {
@ -4085,7 +4102,7 @@ uma_zfree_arg(uma_zone_t zone, void *item, void *udata)
itemdomain = 0;
#ifdef NUMA
if ((uz_flags & UMA_ZONE_FIRSTTOUCH) != 0)
itemdomain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
itemdomain = item_domain(item);
#endif
critical_enter();
do {
@ -4159,7 +4176,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
ZONE_CROSS_LOCK(zone);
while (bucket->ub_cnt > 0) {
item = bucket->ub_bucket[bucket->ub_cnt - 1];
domain = _vm_phys_domain(pmap_kextract((vm_offset_t)item));
domain = item_domain(item);
zdom = ZDOM_GET(zone, domain);
if (zdom->uzd_cross == NULL) {
zdom->uzd_cross = bucket_alloc(zone, udata, M_NOWAIT);
@ -4182,8 +4199,7 @@ zone_free_cross(uma_zone_t zone, uma_bucket_t bucket, void *udata)
while ((b = STAILQ_FIRST(&fullbuckets)) != NULL) {
STAILQ_REMOVE_HEAD(&fullbuckets, ub_link);
domain = _vm_phys_domain(pmap_kextract(
(vm_offset_t)b->ub_bucket[0]));
domain = item_domain(b->ub_bucket[0]);
zone_put_bucket(zone, domain, b, udata, true);
}
}

View File

@ -82,6 +82,8 @@ domainset_t __read_mostly all_domains = DOMAINSET_T_INITIALIZER(0x1);
struct vm_phys_seg __read_mostly vm_phys_segs[VM_PHYSSEG_MAX];
int __read_mostly vm_phys_nsegs;
static struct vm_phys_seg vm_phys_early_segs[8];
static int vm_phys_early_nsegs;
struct vm_phys_fictitious_seg;
static int vm_phys_fictitious_cmp(struct vm_phys_fictitious_seg *,
@ -653,18 +655,16 @@ _vm_phys_domain(vm_paddr_t pa)
#ifdef NUMA
int i;
if (vm_ndomains == 1 || mem_affinity == NULL)
if (vm_ndomains == 1)
return (0);
/*
* Check for any memory that overlaps.
*/
for (i = 0; mem_affinity[i].end != 0; i++)
if (mem_affinity[i].start <= pa &&
mem_affinity[i].end >= pa)
return (mem_affinity[i].domain);
#endif
return (-1);
#else
return (0);
#endif
}
/*
@ -1611,6 +1611,21 @@ vm_phys_avail_split(vm_paddr_t pa, int i)
return (0);
}
void
vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end)
{
struct vm_phys_seg *seg;
if (vm_phys_early_nsegs == -1)
panic("%s: called after initialization", __func__);
if (vm_phys_early_nsegs == nitems(vm_phys_early_segs))
panic("%s: ran out of early segments", __func__);
seg = &vm_phys_early_segs[vm_phys_early_nsegs++];
seg->start = start;
seg->end = end;
}
/*
* This routine allocates NUMA node specific memory before the page
* allocator is bootstrapped.
@ -1621,6 +1636,8 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
int i, mem_index, biggestone;
vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
KASSERT(domain == -1 || (domain >= 0 && domain < vm_ndomains),
("%s: invalid domain index %d", __func__, domain));
/*
* Search the mem_affinity array for the biggest address
@ -1633,11 +1650,11 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
mem_end = -1;
#ifdef NUMA
if (mem_affinity != NULL) {
for (i = 0; ; i++) {
for (i = 0;; i++) {
size = mem_affinity[i].end - mem_affinity[i].start;
if (size == 0)
break;
if (mem_affinity[i].domain != domain)
if (domain != -1 && mem_affinity[i].domain != domain)
continue;
if (size > biggestsize) {
mem_index = i;
@ -1699,6 +1716,7 @@ vm_phys_early_alloc(int domain, size_t alloc_size)
void
vm_phys_early_startup(void)
{
struct vm_phys_seg *seg;
int i;
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
@ -1706,6 +1724,12 @@ vm_phys_early_startup(void)
phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
}
for (i = 0; i < vm_phys_early_nsegs; i++) {
seg = &vm_phys_early_segs[i];
vm_phys_add_seg(seg->start, seg->end);
}
vm_phys_early_nsegs = -1;
#ifdef NUMA
/* Force phys_avail to be split by domain. */
if (mem_affinity != NULL) {

View File

@ -103,6 +103,7 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low,
void vm_phys_set_pool(int pool, vm_page_t m, int order);
boolean_t vm_phys_unfree_page(vm_page_t m);
int vm_phys_mem_affinity(int f, int t);
void vm_phys_early_add_seg(vm_paddr_t start, vm_paddr_t end);
vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
void vm_phys_early_startup(void);
int vm_phys_avail_largest(void);