From 7988971a9993c84fc80a119399712b8d7cd8e58f Mon Sep 17 00:00:00 2001 From: D Scott Phillips Date: Mon, 21 Sep 2020 22:22:53 +0000 Subject: [PATCH] vm_reserv: Sparsify the vm_reserv_array when VM_PHYSSEG_SPARSE On an Ampere Altra system, the physical memory is populated sparsely within the physical address space, with only about 0.4% of physical addresses backed by RAM in the range [0, last_pa]. This is causing the vm_reserv_array to be over-sized by a few orders of magnitude, wasting roughly 5 GiB on a system with 256 GiB of RAM. The sparse allocation of vm_reserv_array is controlled by defining VM_PHYSSEG_SPARSE, with the dense allocation still remaining for platforms with VM_PHYSSEG_DENSE. Reviewed by: markj, alc, kib Approved by: scottl (implicit) MFC after: 1 week Sponsored by: Ampere Computing, Inc. Differential Revision: https://reviews.freebsd.org/D26130 --- sys/vm/vm_phys.h | 3 ++ sys/vm/vm_reserv.c | 71 +++++++++++++++++++++++++++++++++++----------- 2 files changed, 58 insertions(+), 16 deletions(-) diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h index 5ad2ce11445b..36bc455bbe33 100644 --- a/sys/vm/vm_phys.h +++ b/sys/vm/vm_phys.h @@ -69,6 +69,9 @@ struct vm_phys_seg { vm_paddr_t start; vm_paddr_t end; vm_page_t first_page; +#if VM_NRESERVLEVEL > 0 + vm_reserv_t first_reserv; +#endif int domain; struct vm_freelist (*free_queues)[VM_NFREEPOOL][VM_NFREEORDER_MAX]; }; diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index c08895164171..9c81de5b5c9b 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -333,11 +333,17 @@ sysctl_vm_reserv_fullpop(SYSCTL_HANDLER_ARGS) for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); +#ifdef VM_PHYSSEG_SPARSE + rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) - + (seg->start >> VM_LEVEL_0_SHIFT); +#else + rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; +#endif while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + VM_LEVEL_0_SIZE <= seg->end) { - rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; fullpop += rv->popcnt == VM_LEVEL_0_NPAGES; paddr += VM_LEVEL_0_SIZE; + rv++; } } return (sysctl_handle_int(oidp, &fullpop, 0, req)); @@ -496,8 +502,15 @@ vm_reserv_depopulate(vm_reserv_t rv, int index) static __inline vm_reserv_t vm_reserv_from_page(vm_page_t m) { +#ifdef VM_PHYSSEG_SPARSE + struct vm_phys_seg *seg; + seg = &vm_phys_segs[m->segind]; + return (seg->first_reserv + (VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT) - + (seg->start >> VM_LEVEL_0_SHIFT)); +#else return (&vm_reserv_array[VM_PAGE_TO_PHYS(m) >> VM_LEVEL_0_SHIFT]); +#endif } /* @@ -1054,22 +1067,38 @@ vm_reserv_init(void) struct vm_phys_seg *seg; struct vm_reserv *rv; struct vm_reserv_domain *rvd; +#ifdef VM_PHYSSEG_SPARSE + vm_pindex_t used; +#endif int i, j, segind; /* * Initialize the reservation array. Specifically, initialize the * "pages" field for every element that has an underlying superpage. */ +#ifdef VM_PHYSSEG_SPARSE + used = 0; +#endif for (segind = 0; segind < vm_phys_nsegs; segind++) { seg = &vm_phys_segs[segind]; +#ifdef VM_PHYSSEG_SPARSE + seg->first_reserv = &vm_reserv_array[used]; + used += howmany(seg->end, VM_LEVEL_0_SIZE) - + seg->start / VM_LEVEL_0_SIZE; +#else + seg->first_reserv = + &vm_reserv_array[seg->start >> VM_LEVEL_0_SHIFT]; +#endif paddr = roundup2(seg->start, VM_LEVEL_0_SIZE); + rv = seg->first_reserv + (paddr >> VM_LEVEL_0_SHIFT) - + (seg->start >> VM_LEVEL_0_SHIFT); while (paddr + VM_LEVEL_0_SIZE > paddr && paddr + VM_LEVEL_0_SIZE <= seg->end) { - rv = &vm_reserv_array[paddr >> VM_LEVEL_0_SHIFT]; rv->pages = PHYS_TO_VM_PAGE(paddr); rv->domain = seg->domain; mtx_init(&rv->lock, "vm reserv", NULL, MTX_DEF); paddr += VM_LEVEL_0_SIZE; + rv++; } } for (i = 0; i < MAXMEMDOM; i++) { @@ -1400,30 +1429,40 @@ vm_reserv_size(int level) vm_paddr_t vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end) { - vm_paddr_t new_end, high_water; + vm_paddr_t new_end; + vm_pindex_t count; size_t size; int i; - high_water = phys_avail[1]; + count = 0; for (i = 0; i < vm_phys_nsegs; i++) { - if (vm_phys_segs[i].end > high_water) - high_water = vm_phys_segs[i].end; +#ifdef VM_PHYSSEG_SPARSE + count += howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE) - + vm_phys_segs[i].start / VM_LEVEL_0_SIZE; +#else + count = MAX(count, + howmany(vm_phys_segs[i].end, VM_LEVEL_0_SIZE)); +#endif } - /* Skip the first chunk. It is already accounted for. */ - for (i = 2; phys_avail[i + 1] != 0; i += 2) { - if (phys_avail[i + 1] > high_water) - high_water = phys_avail[i + 1]; + for (i = 0; phys_avail[i + 1] != 0; i += 2) { +#ifdef VM_PHYSSEG_SPARSE + count += howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE) - + phys_avail[i] / VM_LEVEL_0_SIZE; +#else + count = MAX(count, + howmany(phys_avail[i + 1], VM_LEVEL_0_SIZE)); +#endif } /* - * Calculate the size (in bytes) of the reservation array. Round up - * from "high_water" because every small page is mapped to an element - * in the reservation array based on its physical address. Thus, the - * number of elements in the reservation array can be greater than the - * number of superpages. + * Calculate the size (in bytes) of the reservation array. Rounding up + * for partial superpages at boundaries, as every small page is mapped + * to an element in the reservation array based on its physical address. + * Thus, the number of elements in the reservation array can be greater + * than the number of superpages. */ - size = howmany(high_water, VM_LEVEL_0_SIZE) * sizeof(struct vm_reserv); + size = count * sizeof(struct vm_reserv); /* * Allocate and map the physical memory for the reservation array. The