From 9d75f0dc75033e8d99c6d9f9328702f4aef27179 Mon Sep 17 00:00:00 2001 From: Mark Johnston Date: Tue, 3 Sep 2019 13:18:51 +0000 Subject: [PATCH] Map the vm_page array into KVA on amd64. r351198 allows the kernel to use domain-local memory to back the vm_page array (up to 2MB boundaries) and reserves a separate PML4 entry for that purpose. One consequence of that change is that the vm_page array is no longer present in minidumps, which only adds pages mapped above VM_MIN_KERNEL_ADDRESS. To avoid the friction caused by having kernel data structures mapped below VM_MIN_KERNEL_ADDRESS, map the vm_page array starting at VM_MIN_KERNEL_ADDRESS instead of using a dedicated PML4 entry. Reviewed by: kib Discussed with: jeff Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D21491 --- sys/amd64/amd64/pmap.c | 38 +++++++++---------------------------- sys/amd64/include/vmparam.h | 11 +++++------ sys/vm/vm_kern.c | 14 +++++++++++++- sys/vm/vm_page.c | 4 ---- 4 files changed, 27 insertions(+), 40 deletions(-) diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index ef97fa081c8f..2bba503a3cdc 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -384,9 +384,6 @@ static u_int64_t DMPDphys; /* phys addr of direct mapped level 2 */ static u_int64_t DMPDPphys; /* phys addr of direct mapped level 3 */ static int ndmpdpphys; /* number of DMPDPphys pages */ -static uint64_t PAPDPphys; /* phys addr of page array level 3 */ -static int npapdpphys; /* number of PAPDPphys pages */ - static vm_paddr_t KERNend; /* phys addr of end of bootstrap data */ /* @@ -1431,16 +1428,6 @@ create_pagetables(vm_paddr_t *firstaddr) pml4_entry_t *p4_p; uint64_t DMPDkernphys; - npapdpphys = howmany(ptoa(Maxmem) / sizeof(struct vm_page), NBPML4); - if (npapdpphys > NPAPML4E) { - printf("NDMPML4E limits system to %lu GB\n", - (NDMPML4E * 512) * (PAGE_SIZE / sizeof(struct vm_page))); - npapdpphys = NPAPML4E; - Maxmem = atop(NPAPML4E * NBPML4 * - (PAGE_SIZE / sizeof(struct vm_page))); - } - PAPDPphys = allocpages(firstaddr, npapdpphys); - /* Allocate page table pages for the direct map */ ndmpdp = howmany(ptoa(Maxmem), NBPDP); if (ndmpdp < 4) /* Minimum 4GB of dirmap */ @@ -1587,12 +1574,6 @@ create_pagetables(vm_paddr_t *firstaddr) p4_p[KPML4BASE + i] = KPDPphys + ptoa(i); p4_p[KPML4BASE + i] |= X86_PG_RW | X86_PG_V; } - - /* Connect the page array slots up to the pml4. */ - for (i = 0; i < npapdpphys; i++) { - p4_p[PAPML4I + i] = PAPDPphys + ptoa(i); - p4_p[PAPML4I + i] |= X86_PG_RW | X86_PG_V | pg_nx; - } } /* @@ -3456,11 +3437,6 @@ pmap_pinit_pml4(vm_page_t pml4pg) X86_PG_V; } - for (i = 0; i < npapdpphys; i++) { - pm_pml4[PAPML4I + i] = (PAPDPphys + ptoa(i)) | X86_PG_RW | - X86_PG_V; - } - /* install self-referential address mapping entry(s) */ pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M; @@ -3817,8 +3793,6 @@ pmap_release(pmap_t pmap) pmap->pm_pml4[KPML4BASE + i] = 0; for (i = 0; i < ndmpdpphys; i++)/* Direct Map */ pmap->pm_pml4[DMPML4I + i] = 0; - for (i = 0; i < npapdpphys; i++) - pmap->pm_pml4[PAPML4I + i] = 0; pmap->pm_pml4[PML4PML4I] = 0; /* Recursive Mapping */ for (i = 0; i < lm_ents; i++) /* Large Map */ pmap->pm_pml4[LMSPML4I + i] = 0; @@ -3856,6 +3830,10 @@ kvm_free(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_vm, OID_AUTO, kvm_free, CTLTYPE_LONG|CTLFLAG_RD, 0, 0, kvm_free, "LU", "Amount of KVM free"); +/* + * Allocate physical memory for the vm_page array and map it into KVA, + * attempting to back the vm_pages with domain-local memory. + */ void pmap_page_array_startup(long pages) { @@ -3868,14 +3846,15 @@ pmap_page_array_startup(long pages) vm_page_array_size = pages; - start = va = PA_MIN_ADDRESS; - end = va + (pages * sizeof(struct vm_page)); + start = va = VM_MIN_KERNEL_ADDRESS; + end = va + pages * sizeof(struct vm_page); while (va < end) { - pfn = first_page + ((va - start) / sizeof(struct vm_page)); + pfn = first_page + (va - start) / sizeof(struct vm_page); domain = _vm_phys_domain(ctob(pfn)); pdpe = pmap_pdpe(kernel_pmap, va); if ((*pdpe & X86_PG_V) == 0) { pa = vm_phys_early_alloc(domain, PAGE_SIZE); + dump_add_page(pa); bzero((void *)PHYS_TO_DMAP(pa), PAGE_SIZE); *pdpe = (pdp_entry_t)(pa | X86_PG_V | X86_PG_RW | X86_PG_A | X86_PG_M); @@ -3892,6 +3871,7 @@ pmap_page_array_startup(long pages) pde_store(pde, newpdir); va += NBPDR; } + vm_page_array = (vm_page_t)start; } /* diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h index 70f0aa25f03d..40f91b67313d 100644 --- a/sys/amd64/include/vmparam.h +++ b/sys/amd64/include/vmparam.h @@ -160,13 +160,12 @@ * 0xffff808000000000 - 0xffff847fffffffff large map (can be tuned up) * 0xffff848000000000 - 0xfffff7ffffffffff unused (large map extends there) * 0xfffff80000000000 - 0xfffffbffffffffff 4TB direct map - * 0xfffffc0000000000 - 0xfffffcffffffffff unused - * 0xfffffd0000000000 - 0xfffffd7fffffffff page array 512GB - * 0xfffffd8000000000 - 0xfffffdffffffffff unused + * 0xfffffc0000000000 - 0xfffffdffffffffff unused * 0xfffffe0000000000 - 0xffffffffffffffff 2TB kernel map * * Within the kernel map: * + * 0xfffffe0000000000 vm_page_array * 0xffffffff80000000 KERNBASE */ @@ -216,10 +215,10 @@ (x) & ~DMAP_MIN_ADDRESS; }) /* - * amd64 statically allocates the page array address so that it can - * be more easily allocated on the correct memory domains. + * amd64 maps the page array into KVA so that it can be more easily + * allocated on the correct memory domains. */ -#define PMAP_HAS_PAGE_ARRAY 1 +#define PMAP_HAS_PAGE_ARRAY 1 /* * How many physical pages per kmem arena virtual page. diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 324c9cce10f6..482ead0d1180 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -759,7 +759,7 @@ kmem_init(vm_offset_t start, vm_offset_t end) vm_map_lock(m); /* N.B.: cannot use kgdb to debug, starting with this assignment ... */ kernel_map = m; - (void) vm_map_insert(m, NULL, (vm_ooffset_t) 0, + (void)vm_map_insert(m, NULL, 0, #ifdef __amd64__ KERNBASE, #else @@ -767,6 +767,18 @@ kmem_init(vm_offset_t start, vm_offset_t end) #endif start, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT); /* ... and ending with the completion of the above `insert' */ + +#ifdef __amd64__ + /* + * Mark KVA used for the page array as allocated. Other platforms + * that handle vm_page_array allocation can simply adjust virtual_avail + * instead. + */ + (void)vm_map_insert(m, NULL, 0, (vm_offset_t)vm_page_array, + (vm_offset_t)vm_page_array + round_2mpage(vm_page_array_size * + sizeof(struct vm_page)), + VM_PROT_RW, VM_PROT_RW, MAP_NOFAULT); +#endif vm_map_unlock(m); /* diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 8a7c8765cd15..6078ce5e6b77 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -135,11 +135,7 @@ static int vm_pageproc_waiters; */ vm_page_t bogus_page; -#ifdef PMAP_HAS_PAGE_ARRAY -vm_page_t vm_page_array = (vm_page_t)PA_MIN_ADDRESS; -#else vm_page_t vm_page_array; -#endif long vm_page_array_size; long first_page;