Allocate amd64's page array using pages and page directory pages from the

NUMA domain that the pages describe.  Patch original from gallatin.

Reviewed by:	kib
Tested by:	pho
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D21252
This commit is contained in:
Jeff Roberson 2019-08-18 23:07:56 +00:00
parent 7f32f0e227
commit 3e5e1b5135
6 changed files with 138 additions and 25 deletions

View File

@ -383,6 +383,9 @@ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */
static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */
static int ndmpdpphys; /* number of DMPDPphys pages */
static uint64_t PAPDPphys; /* phys addr of page array level 3 */
static int npapdpphys; /* number of PAPDPphys pages */
static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */
/*
@ -1427,6 +1430,16 @@ create_pagetables(vm_paddr_t *firstaddr)
pml4_entry_t *p4_p;
uint64_t DMPDkernphys;
npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4);
if (npapdpphys > NPAPML4E) {
printf("NDMPML4E limits system to %lu GB\n",
(NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page)));
npapdpphys = NPAPML4E;
Maxmem = atop(NPAPML4E * NBPML4 *
(PAGE_SIZE / sizeof(struct vm_page)));
}
PAPDPphys = allocpages(firstaddr, npapdpphys);
/* Allocate page table pages for the direct map */
ndmpdp = howmany(ptoa(Maxmem), NBPDP);
if (ndmpdp < 4) /* Minimum 4GB of dirmap */
@ -1573,6 +1586,12 @@ create_pagetables(vm_paddr_t *firstaddr)
p4_p[KPML4BASE + i] = KPDPphys + ptoa(i);
p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V;
}
/* Connect the page array slots up to the pml4. */
for (i = 0; i < npapdpphys; i++) {
p4_p[PAPML4I + i] = PAPDPphys + ptoa(i);
p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx;
}
}
/*
@ -3387,6 +3406,11 @@ pmap_pinit_pml4(vm_page_t pml4pg)
X86_PG_V;
}
for (i = 0; i < npapdpphys; i++) {
pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW |
X86_PG_V;
}
/* install self-referential address mapping entry(s) */
pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW |
X86_PG_A | X86_PG_M;
@ -3743,6 +3767,8 @@ pmap_release(pmap_t pmap)
pmap->pm_pml4[KPML4BASE + i] = 0;
for (i = 0; i < ndmpdpphys; i++)/* Direct Map */
pmap->pm_pml4[DMPML4I + i] = 0;
for (i = 0; i < npapdpphys; i++)
pmap->pm_pml4[PAPML4I + i] = 0;
pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */
for (i = 0; i < lm_ents; i++) /* Large Map */
pmap->pm_pml4[LMSPML4I + i] = 0;
@ -3780,6 +3806,44 @@ kvm_free(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD,
0, 0, kvm_free, "LU", "Amount of KVM free");
void
pmap_page_array_startup(long pages)
{
pdp_entry_t *pdpe;
pd_entry_t *pde, newpdir;
vm_offset_t va, start, end;
vm_paddr_t pa;
long pfn;
int domain, i;
vm_page_array_size = pages;
start = va = PA_MIN_ADDRESS;
end = va + (pages * sizeof(struct vm_page));
while (va < end) {
pfn = first_page + ((va - start) / sizeof(struct vm_page));
domain = _vm_phys_domain(ctob(pfn));
pdpe = pmap_pdpe(kernel_pmap, va);
if ((*pdpe & X86_PG_V) == 0) {
pa = vm_phys_early_alloc(domain, PAGE_SIZE);
bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE);
*pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW |
X86_PG_A | X86_PG_M);
continue; /* try again */
}
pde = pmap_pdpe_to_pde(pdpe, va);
if ((*pde & X86_PG_V) != 0)
panic("Unexpected pde");
pa = vm_phys_early_alloc(domain, NBPDR);
for (i = 0; i < NPDEPG; i++)
dump_add_page(pa + (i * PAGE_SIZE));
newpdir = (pd_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A |
X86_PG_M | PG_PS | pg_g | pg_nx);
pde_store(pde, newpdir);
va += NBPDR;
}
}
/*
* grow the number of kernel page table entries, if needed
*/

View File

@ -200,6 +200,13 @@
*/
#define NDMPML4E 8
/*
* NPAPML4E is the maximum number of PML4 entries that will be
* used to implement the page array. This should be roughly 3% of
* NPDPML4E owing to 3% overhead for struct vm_page.
*/
#define NPAPML4E 1
/*
* These values control the layout of virtual memory. The starting address
* of the direct map, which is controlled by DMPML4I, must be a multiple of
@ -219,7 +226,8 @@
#define PML4PML4I (NPML4EPG/2) /* Index of recursive pml4 mapping */
#define KPML4BASE (NPML4EPG-NKPML4E) /* KVM at highest addresses */
#define DMPML4I rounddown(KPML4BASE-NDMPML4E, NDMPML4E) /* Below KVM */
#define PAPML4I (KPML4BASE-1-NPAPML4E) /* Below KVM */
#define DMPML4I rounddown(PAPML4I-NDMPML4E, NDMPML4E) /* Below pages */
#define KPML4I (NPML4EPG-1)
#define KPDPI (NPDPEPG-2) /* kernbase at -2GB */
@ -467,6 +475,7 @@ int pmap_pkru_set(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
u_int keyidx, int flags);
void pmap_thread_init_invl_gen(struct thread *td);
int pmap_vmspace_copy(pmap_t dst_pmap, pmap_t src_pmap);
void pmap_page_array_startup(long count);
#endif /* _KERNEL */
/* Return various clipped indexes for a given VA */

View File

@ -160,7 +160,9 @@
* 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up)
* 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there)
* 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map
* 0xfffffc0000000000 - 0xfffffdffffffffff unused
* 0xfffffc0000000000 - 0xfffffcffffffffff unused
* 0xfffffd0000000000 - 0xfffffd7fffffffff page array 512GB
* 0xfffffd8000000000 - 0xfffffdffffffffff unused
* 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map
*
* Within the kernel map:
@ -175,6 +177,8 @@
#define DMAP_MIN_ADDRESS KVADDR(DMPML4I, 0, 0, 0)
#define DMAP_MAX_ADDRESS KVADDR(DMPML4I + NDMPML4E, 0, 0, 0)
#define PA_MIN_ADDRESS KVADDR(PAPML4I, 0, 0, 0)
#define LARGEMAP_MIN_ADDRESS KVADDR(LMSPML4I, 0, 0, 0)
#define LARGEMAP_MAX_ADDRESS KVADDR(LMEPML4I + 1, 0, 0, 0)
@ -211,6 +215,12 @@
(uintmax_t)x)); \
(x) & ~DMAP_MIN_ADDRESS; })
/*
* amd64 statically allocates the page array address so that it can
* be more easily allocated on the correct memory domains.
*/
#define PMAP_HAS_PAGE_ARRAY 1
/*
* How many physical pages per kmem arena virtual page.
*/

View File

@ -135,7 +135,11 @@ static int vm_pageproc_waiters;
*/
vm_page_t bogus_page;
#ifdef PMAP_HAS_PAGE_ARRAY
vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS;
#else
vm_page_t vm_page_array;
#endif
long vm_page_array_size;
long first_page;
@ -522,6 +526,31 @@ vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
pmap_page_init(m);
}
#ifndef PMAP_HAS_PAGE_ARRAY
static vm_paddr_t
vm_page_array_alloc(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t page_range)
{
vm_paddr_t new_end;
/*
* Reserve an unmapped guard page to trap access to vm_page_array[-1].
* However, because this page is allocated from KVM, out-of-bounds
* accesses using the direct map will not be trapped.
*/
*vaddr += PAGE_SIZE;
/*
* Allocate physical memory for the page structures, and map it.
*/
new_end = trunc_page(end - page_range * sizeof(struct vm_page));
vm_page_array = (vm_page_t)pmap_map(vaddr, new_end, end,
VM_PROT_READ | VM_PROT_WRITE);
vm_page_array_size = page_range;
return (new_end);
}
#endif
/*
* vm_page_startup:
*
@ -693,6 +722,11 @@ vm_page_startup(vm_offset_t vaddr)
#error "Either VM_PHYSSEG_DENSE or VM_PHYSSEG_SPARSE must be defined."
#endif
#ifdef PMAP_HAS_PAGE_ARRAY
pmap_page_array_startup(size / PAGE_SIZE);
biggestone = vm_phys_avail_largest();
end = new_end = phys_avail[biggestone + 1];
#else
#ifdef VM_PHYSSEG_DENSE
/*
* In the VM_PHYSSEG_DENSE case, the number of pages can account for
@ -723,31 +757,15 @@ vm_page_startup(vm_offset_t vaddr)
}
}
end = new_end;
/*
* Reserve an unmapped guard page to trap access to vm_page_array[-1].
* However, because this page is allocated from KVM, out-of-bounds
* accesses using the direct map will not be trapped.
*/
vaddr += PAGE_SIZE;
/*
* Allocate physical memory for the page structures, and map it.
*/
new_end = trunc_page(end - page_range * sizeof(struct vm_page));
mapped = pmap_map(&vaddr, new_end, end,
VM_PROT_READ | VM_PROT_WRITE);
vm_page_array = (vm_page_t)mapped;
vm_page_array_size = page_range;
new_end = vm_page_array_alloc(&vaddr, end, page_range);
#endif
#if VM_NRESERVLEVEL > 0
/*
* Allocate physical memory for the reservation management system's
* data structures, and map it.
*/
if (high_avail == end)
high_avail = new_end;
new_end = vm_reserv_startup(&vaddr, new_end, high_avail);
new_end = vm_reserv_startup(&vaddr, new_end);
#endif
#if defined(__aarch64__) || defined(__amd64__) || defined(__mips__) || \
defined(__riscv)

View File

@ -1360,10 +1360,23 @@ vm_reserv_size(int level)
* management system's data structures, in particular, the reservation array.
*/
vm_paddr_t
vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end)
{
vm_paddr_t new_end;
vm_paddr_t new_end, high_water;
size_t size;
int i;
high_water = phys_avail[1];
for (i = 0; i < vm_phys_nsegs; i++) {
if (vm_phys_segs[i].end > high_water)
high_water = vm_phys_segs[i].end;
}
/* Skip the first chunk. It is already accounted for. */
for (i = 2; phys_avail[i + 1] != 0; i += 2) {
if (phys_avail[i + 1] > high_water)
high_water = phys_avail[i + 1];
}
/*
* Calculate the size (in bytes) of the reservation array. Round up

View File

@ -66,8 +66,7 @@ boolean_t vm_reserv_reclaim_inactive(int domain);
void vm_reserv_rename(vm_page_t m, vm_object_t new_object,
vm_object_t old_object, vm_pindex_t old_object_offset);
int vm_reserv_size(int level);
vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
vm_paddr_t high_water);
vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end);
vm_page_t vm_reserv_to_superpage(vm_page_t m);
#endif /* VM_NRESERVLEVEL > 0 */