From af1c6d3f3013062370692c8e1e9c87bb138fbbd9 Mon Sep 17 00:00:00 2001 From: Konstantin Belousov Date: Wed, 3 May 2023 12:41:46 +0300 Subject: [PATCH] amd64: do not leak pcpu pages Do not preallocate pcpu area backing pages on early startup, only allocate enough of KVA for pcpu[MAXCPU] and the page for BSP. Other pages are allocated after we know the number of cpus and their assignments to the domains. PCPUs are not accessed until they are initialized, which happens on AP startup. Reviewed by: markj Sponsored by: The FreeBSD Foundation Differential revision: https://reviews.freebsd.org/D39945 --- sys/amd64/amd64/mp_machdep.c | 54 ++++++++++++++++-------------------- sys/amd64/amd64/pmap.c | 17 ++++++++---- 2 files changed, 35 insertions(+), 36 deletions(-) diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index f6c3446e9981..5fdde0bb887d 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -290,30 +290,33 @@ init_secondary(void) init_secondary_tail(); } +static void +amd64_mp_alloc_pcpu(void) +{ + vm_page_t m; + int cpu; + + /* Allocate pcpu areas to the correct domain. */ + for (cpu = 1; cpu < mp_ncpus; cpu++) { +#ifdef NUMA + m = NULL; + if (vm_ndomains > 1) { + m = vm_page_alloc_noobj_domain( + acpi_pxm_get_cpu_locality(cpu_apic_ids[cpu]), 0); + } + if (m == NULL) +#endif + m = vm_page_alloc_noobj(0); + if (m == NULL) + panic("cannot alloc pcpu page for cpu %d", cpu); + pmap_qenter((vm_offset_t)&__pcpu[cpu], &m, 1); + } +} + /******************************************************************* * local functions and data */ -#ifdef NUMA -static void -mp_realloc_pcpu(int cpuid, int domain) -{ - vm_page_t m; - vm_offset_t oa, na; - - oa = (vm_offset_t)&__pcpu[cpuid]; - if (vm_phys_domain(pmap_kextract(oa)) == domain) - return; - m = vm_page_alloc_noobj_domain(domain, 0); - if (m == NULL) - return; - na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)); - pagecopy((void *)oa, (void *)na); - pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1); - /* XXX old pcpu page leaked. */ -} -#endif - /* * start each AP in our list */ @@ -330,6 +333,7 @@ start_all_aps(void) int apic_id, cpu, domain, i; u_char mpbiosreason; + amd64_mp_alloc_pcpu(); mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); MPASS(bootMP_size <= PAGE_SIZE); @@ -403,16 +407,6 @@ start_all_aps(void) outb(CMOS_REG, BIOS_RESET); outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */ - /* Relocate pcpu areas to the correct domain. */ -#ifdef NUMA - if (vm_ndomains > 1) - for (cpu = 1; cpu < mp_ncpus; cpu++) { - apic_id = cpu_apic_ids[cpu]; - domain = acpi_pxm_get_cpu_locality(apic_id); - mp_realloc_pcpu(cpu, domain); - } -#endif - /* start each AP */ domain = 0; for (cpu = 1; cpu < mp_ncpus; cpu++) { diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 55086125fbb9..1009736472dc 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -1902,7 +1902,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) vm_offset_t va; pt_entry_t *pte, *pcpu_pte; struct region_descriptor r_gdt; - uint64_t cr4, pcpu_phys; + uint64_t cr4, pcpu0_phys; u_long res; int i; @@ -1917,7 +1917,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) */ create_pagetables(firstaddr); - pcpu_phys = allocpages(firstaddr, MAXCPU); + pcpu0_phys = allocpages(firstaddr, 1); /* * Add a physical memory segment (vm_phys_seg) corresponding to the @@ -1995,10 +1995,15 @@ pmap_bootstrap(vm_paddr_t *firstaddr) SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU); virtual_avail = va; - for (i = 0; i < MAXCPU; i++) { - pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW | - pg_g | pg_nx | X86_PG_M | X86_PG_A; - } + /* + * Map the BSP PCPU now, the rest of the PCPUs are mapped by + * amd64_mp_alloc_pcpu()/start_all_aps() when we know the + * number of CPUs and NUMA affinity. + */ + pcpu_pte[0] = pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx | + X86_PG_M | X86_PG_A; + for (i = 1; i < MAXCPU; i++) + pcpu_pte[i] = 0; /* * Re-initialize PCPU area for BSP after switching.