amd64: do not leak pcpu pages

Do not preallocate pcpu area backing pages on early startup, only
allocate enough of KVA for pcpu[MAXCPU] and the page for BSP.  Other
pages are allocated after we know the number of cpus and their
assignments to the domains.

PCPUs are not accessed until they are initialized, which happens on AP
startup.

Reviewed by:	markj
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D39945
This commit is contained in:
Konstantin Belousov 2023-05-03 12:41:46 +03:00
parent e704f88f3d
commit af1c6d3f30
2 changed files with 35 additions and 36 deletions

View File

@ -290,30 +290,33 @@ init_secondary(void)
init_secondary_tail();
}
static void
amd64_mp_alloc_pcpu(void)
{
vm_page_t m;
int cpu;
/* Allocate pcpu areas to the correct domain. */
for (cpu = 1; cpu < mp_ncpus; cpu++) {
#ifdef NUMA
m = NULL;
if (vm_ndomains > 1) {
m = vm_page_alloc_noobj_domain(
acpi_pxm_get_cpu_locality(cpu_apic_ids[cpu]), 0);
}
if (m == NULL)
#endif
m = vm_page_alloc_noobj(0);
if (m == NULL)
panic("cannot alloc pcpu page for cpu %d", cpu);
pmap_qenter((vm_offset_t)&__pcpu[cpu], &m, 1);
}
}
/*******************************************************************
* local functions and data
*/
#ifdef NUMA
static void
mp_realloc_pcpu(int cpuid, int domain)
{
vm_page_t m;
vm_offset_t oa, na;
oa = (vm_offset_t)&__pcpu[cpuid];
if (vm_phys_domain(pmap_kextract(oa)) == domain)
return;
m = vm_page_alloc_noobj_domain(domain, 0);
if (m == NULL)
return;
na = PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
pagecopy((void *)oa, (void *)na);
pmap_qenter((vm_offset_t)&__pcpu[cpuid], &m, 1);
/* XXX old pcpu page leaked. */
}
#endif
/*
* start each AP in our list
*/
@ -330,6 +333,7 @@ start_all_aps(void)
int apic_id, cpu, domain, i;
u_char mpbiosreason;
amd64_mp_alloc_pcpu();
mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
MPASS(bootMP_size <= PAGE_SIZE);
@ -403,16 +407,6 @@ start_all_aps(void)
outb(CMOS_REG, BIOS_RESET);
outb(CMOS_DATA, BIOS_WARM); /* 'warm-start' */
/* Relocate pcpu areas to the correct domain. */
#ifdef NUMA
if (vm_ndomains > 1)
for (cpu = 1; cpu < mp_ncpus; cpu++) {
apic_id = cpu_apic_ids[cpu];
domain = acpi_pxm_get_cpu_locality(apic_id);
mp_realloc_pcpu(cpu, domain);
}
#endif
/* start each AP */
domain = 0;
for (cpu = 1; cpu < mp_ncpus; cpu++) {

View File

@ -1902,7 +1902,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
vm_offset_t va;
pt_entry_t *pte, *pcpu_pte;
struct region_descriptor r_gdt;
uint64_t cr4, pcpu_phys;
uint64_t cr4, pcpu0_phys;
u_long res;
int i;
@ -1917,7 +1917,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
*/
create_pagetables(firstaddr);
pcpu_phys = allocpages(firstaddr, MAXCPU);
pcpu0_phys = allocpages(firstaddr, 1);
/*
* Add a physical memory segment (vm_phys_seg) corresponding to the
@ -1995,10 +1995,15 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
SYSMAP(struct pcpu *, pcpu_pte, __pcpu, MAXCPU);
virtual_avail = va;
for (i = 0; i < MAXCPU; i++) {
pcpu_pte[i] = (pcpu_phys + ptoa(i)) | X86_PG_V | X86_PG_RW |
pg_g | pg_nx | X86_PG_M | X86_PG_A;
}
/*
* Map the BSP PCPU now, the rest of the PCPUs are mapped by
* amd64_mp_alloc_pcpu()/start_all_aps() when we know the
* number of CPUs and NUMA affinity.
*/
pcpu_pte[0] = pcpu0_phys | X86_PG_V | X86_PG_RW | pg_g | pg_nx |
X86_PG_M | X86_PG_A;
for (i = 1; i < MAXCPU; i++)
pcpu_pte[i] = 0;
/*
* Re-initialize PCPU area for BSP after switching.