Encapsulate phys_avail manipulation in a set of simple routines. Add a

NUMA aware boot time memory allocator that will be used to allocate early
domain correct structures.  Code partially submitted by gallatin.

Reviewed by:	gallatin, kib
Tested by:	pho
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D21251
This commit is contained in:
Jeff Roberson 2019-08-18 07:06:31 +00:00
parent f49e79b56b
commit b7565d44df
3 changed files with 228 additions and 18 deletions

View File

@ -538,7 +538,7 @@ vm_page_startup(vm_offset_t vaddr)
char *list, *listend;
vm_offset_t mapped;
vm_paddr_t end, high_avail, low_avail, new_end, page_range, size;
vm_paddr_t biggestsize, last_pa, pa;
vm_paddr_t last_pa, pa;
u_long pagecount;
int biggestone, i, segind;
#ifdef WITNESS
@ -548,22 +548,10 @@ vm_page_startup(vm_offset_t vaddr)
long ii;
#endif
biggestsize = 0;
biggestone = 0;
vaddr = round_page(vaddr);
for (i = 0; phys_avail[i + 1]; i += 2) {
phys_avail[i] = round_page(phys_avail[i]);
phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
}
for (i = 0; phys_avail[i + 1]; i += 2) {
size = phys_avail[i + 1] - phys_avail[i];
if (size > biggestsize) {
biggestone = i;
biggestsize = size;
}
}
vm_phys_early_startup();
biggestone = vm_phys_avail_largest();
end = phys_avail[biggestone+1];
/*
@ -776,7 +764,8 @@ vm_page_startup(vm_offset_t vaddr)
* physical pages.
*/
for (i = 0; phys_avail[i + 1] != 0; i += 2)
vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
if (vm_phys_avail_size(i) != 0)
vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
/*
* Initialize the physical memory allocator.

View File

@ -1101,8 +1101,8 @@ vm_phys_free_pages(vm_page_t m, int order)
vm_page_t m_buddy;
KASSERT(m->order == VM_NFREEORDER,
("vm_phys_free_pages: page %p has unexpected order %d",
m, m->order));
("vm_phys_free_pages: page %p(%p) has unexpected order %d",
m, (void *)m->phys_addr, m->order));
KASSERT(m->pool < VM_NFREEPOOL,
("vm_phys_free_pages: page %p has unexpected pool %d",
m, m->pool));
@ -1501,6 +1501,222 @@ vm_phys_alloc_seg_contig(struct vm_phys_seg *seg, u_long npages,
return (m_ret);
}
/*
* Return the index of the first unused slot which may be the terminating
* entry.
*/
static int
vm_phys_avail_count(void)
{
int i;
for (i = 0; phys_avail[i + 1]; i += 2)
continue;
if (i > PHYS_AVAIL_ENTRIES)
panic("Improperly terminated phys_avail %d entries", i);
return (i);
}
/*
* Assert that a phys_avail entry is valid.
*/
static void
vm_phys_avail_check(int i)
{
if (phys_avail[i] & PAGE_MASK)
panic("Unaligned phys_avail[%d]: %#jx", i,
(intmax_t)phys_avail[i]);
if (phys_avail[i+1] & PAGE_MASK)
panic("Unaligned phys_avail[%d + 1]: %#jx", i,
(intmax_t)phys_avail[i]);
if (phys_avail[i + 1] < phys_avail[i])
panic("phys_avail[%d] start %#jx < end %#jx", i,
(intmax_t)phys_avail[i], (intmax_t)phys_avail[i+1]);
}
/*
* Return the index of an overlapping phys_avail entry or -1.
*/
static int
vm_phys_avail_find(vm_paddr_t pa)
{
int i;
for (i = 0; phys_avail[i + 1]; i += 2)
if (phys_avail[i] <= pa && phys_avail[i + 1] > pa)
return (i);
return (-1);
}
/*
* Return the index of the largest entry.
*/
int
vm_phys_avail_largest(void)
{
vm_paddr_t sz, largesz;
int largest;
int i;
largest = 0;
largesz = 0;
for (i = 0; phys_avail[i + 1]; i += 2) {
sz = vm_phys_avail_size(i);
if (sz > largesz) {
largesz = sz;
largest = i;
}
}
return (largest);
}
vm_paddr_t
vm_phys_avail_size(int i)
{
return (phys_avail[i + 1] - phys_avail[i]);
}
/*
* Split an entry at the address 'pa'. Return zero on success or errno.
*/
static int
vm_phys_avail_split(vm_paddr_t pa, int i)
{
int cnt;
vm_phys_avail_check(i);
if (pa <= phys_avail[i] || pa >= phys_avail[i + 1])
panic("vm_phys_avail_split: invalid address");
cnt = vm_phys_avail_count();
if (cnt >= PHYS_AVAIL_ENTRIES)
return (ENOSPC);
memmove(&phys_avail[i + 2], &phys_avail[i],
(cnt - i) * sizeof(phys_avail[0]));
phys_avail[i + 1] = pa;
phys_avail[i + 2] = pa;
vm_phys_avail_check(i);
vm_phys_avail_check(i+2);
return (0);
}
/*
* This routine allocates NUMA node specific memory before the page
* allocator is bootstrapped.
*/
vm_paddr_t
vm_phys_early_alloc(int domain, size_t alloc_size)
{
int i, mem_index, biggestone;
vm_paddr_t pa, mem_start, mem_end, size, biggestsize, align;
/*
* Search the mem_affinity array for the biggest address
* range in the desired domain. This is used to constrain
* the phys_avail selection below.
*/
biggestsize = 0;
mem_index = 0;
mem_start = 0;
mem_end = -1;
#ifdef NUMA
if (mem_affinity != NULL) {
for (i = 0; ; i++) {
size = mem_affinity[i].end - mem_affinity[i].start;
if (size == 0)
break;
if (mem_affinity[i].domain != domain)
continue;
if (size > biggestsize) {
mem_index = i;
biggestsize = size;
}
}
mem_start = mem_affinity[mem_index].start;
mem_end = mem_affinity[mem_index].end;
}
#endif
/*
* Now find biggest physical segment in within the desired
* numa domain.
*/
biggestsize = 0;
biggestone = 0;
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
/* skip regions that are out of range */
if (phys_avail[i+1] - alloc_size < mem_start ||
phys_avail[i+1] > mem_end)
continue;
size = vm_phys_avail_size(i);
if (size > biggestsize) {
biggestone = i;
biggestsize = size;
}
}
alloc_size = round_page(alloc_size);
/*
* Grab single pages from the front to reduce fragmentation.
*/
if (alloc_size == PAGE_SIZE) {
pa = phys_avail[biggestone];
phys_avail[biggestone] += PAGE_SIZE;
vm_phys_avail_check(biggestone);
return (pa);
}
/*
* Naturally align large allocations.
*/
align = phys_avail[biggestone + 1] & (alloc_size - 1);
if (alloc_size + align > biggestsize)
panic("cannot find a large enough size\n");
if (align != 0 &&
vm_phys_avail_split(phys_avail[biggestone + 1] - align,
biggestone) != 0)
/* Wasting memory. */
phys_avail[biggestone + 1] -= align;
phys_avail[biggestone + 1] -= alloc_size;
vm_phys_avail_check(biggestone);
pa = phys_avail[biggestone + 1];
return (pa);
}
void
vm_phys_early_startup(void)
{
int i;
for (i = 0; phys_avail[i + 1] != 0; i += 2) {
phys_avail[i] = round_page(phys_avail[i]);
phys_avail[i + 1] = trunc_page(phys_avail[i + 1]);
}
#ifdef NUMA
/* Force phys_avail to be split by domain. */
if (mem_affinity != NULL) {
int idx;
for (i = 0; mem_affinity[i].end != 0; i++) {
idx = vm_phys_avail_find(mem_affinity[i].start);
if (idx != -1 &&
phys_avail[idx] != mem_affinity[i].start)
vm_phys_avail_split(mem_affinity[i].start, idx);
idx = vm_phys_avail_find(mem_affinity[i].end);
if (idx != -1 &&
phys_avail[idx] != mem_affinity[i].end)
vm_phys_avail_split(mem_affinity[i].end, idx);
}
}
#endif
}
#ifdef DDB
/*
* Show the number of physical pages in each of the free lists.

View File

@ -103,6 +103,11 @@ vm_page_t vm_phys_scan_contig(int domain, u_long npages, vm_paddr_t low,
void vm_phys_set_pool(int pool, vm_page_t m, int order);
boolean_t vm_phys_unfree_page(vm_page_t m);
int vm_phys_mem_affinity(int f, int t);
vm_paddr_t vm_phys_early_alloc(int domain, size_t alloc_size);
void vm_phys_early_startup(void);
int vm_phys_avail_largest(void);
vm_paddr_t vm_phys_avail_size(int i);
/*
*