From 9dba82a4422a50722d40bf5318e6de204c1e372a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= Date: Thu, 5 Apr 2018 14:39:51 +0000 Subject: [PATCH] x86: improve reservation of AP trampoline memory So that it doesn't rely on physmap[1] containing an address below 1MiB. Instead scan the full physmap and search for a suitable address to place the trampoline code (below 1MiB) and the initial memory pages (below 4GiB). Sponsored by: Citrix Systems R&D Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D14878 --- sys/amd64/amd64/machdep.c | 10 +++---- sys/amd64/amd64/mp_machdep.c | 45 ++++++++++++++++++++++--------- sys/amd64/amd64/mpboot.S | 8 +++++- sys/amd64/include/smp.h | 2 +- sys/i386/i386/machdep.c | 2 +- sys/i386/i386/mp_machdep.c | 16 ----------- sys/i386/include/smp.h | 3 --- sys/x86/include/init.h | 2 +- sys/x86/include/x86_smp.h | 4 ++- sys/x86/x86/mp_x86.c | 51 ++++++++++++++++++++++++++++++++++++ 10 files changed, 100 insertions(+), 43 deletions(-) diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index a24546df2317..22cdb3fe6c22 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -1246,14 +1246,10 @@ getmemsize(caddr_t kmdp, u_int64_t first) * Make hole for "AP -> long mode" bootstrap code. The * mp_bootaddress vector is only available when the kernel * is configured to support APs and APs for the system start - * in 32bit mode (e.g. SMP bare metal). + * in real mode mode (e.g. SMP bare metal). */ - if (init_ops.mp_bootaddress) { - if (physmap[1] >= 0x100000000) - panic( - "Basemem segment is not suitable for AP bootstrap code!"); - physmap[1] = init_ops.mp_bootaddress(physmap[1] / 1024); - } + if (init_ops.mp_bootaddress) + init_ops.mp_bootaddress(physmap, &physmap_idx); /* * Maxmem isn't the "maximum memory", it's one larger than the diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 34012d2b4f96..7ef2c241e042 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -96,24 +96,45 @@ char *nmi_stack; static int start_ap(int apic_id); -static u_int bootMP_size; -static u_int boot_address; - /* * Calculate usable address in base memory for AP trampoline code. */ -u_int -mp_bootaddress(u_int basemem) +void +mp_bootaddress(vm_paddr_t *physmap, unsigned int *physmap_idx) { + unsigned int i; + bool allocated; - bootMP_size = mptramp_end - mptramp_start; - boot_address = trunc_page(basemem * 1024); /* round down to 4k boundary */ - if (((basemem * 1024) - boot_address) < bootMP_size) - boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ - /* 3 levels of page table pages */ - mptramp_pagetables = boot_address - (PAGE_SIZE * 3); + alloc_ap_trampoline(physmap, physmap_idx); - return mptramp_pagetables; + allocated = false; + for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { + /* + * Find a memory region big enough below the 4GB boundary to + * store the initial page tables. Note that it needs to be + * aligned to a page boundary. + */ + if (physmap[i] >= GiB(4) || + (physmap[i + 1] - round_page(physmap[i])) < (PAGE_SIZE * 3)) + continue; + + allocated = true; + mptramp_pagetables = round_page(physmap[i]); + physmap[i] = round_page(physmap[i]) + (PAGE_SIZE * 3); + if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { + memmove(&physmap[i], &physmap[i + 2], + sizeof(*physmap) * (*physmap_idx - i + 2)); + *physmap_idx -= 2; + } + } + + if (!allocated) { + mptramp_pagetables = trunc_page(boot_address) - (PAGE_SIZE * 3); + if (bootverbose) + printf( +"Cannot find enough space for the initial AP page tables, placing them at %#x", + mptramp_pagetables); + } } /* diff --git a/sys/amd64/amd64/mpboot.S b/sys/amd64/amd64/mpboot.S index 8af5a7898c9a..5545fe9290d1 100644 --- a/sys/amd64/amd64/mpboot.S +++ b/sys/amd64/amd64/mpboot.S @@ -216,8 +216,14 @@ lgdt_desc: .word gdtend-gdt /* Length */ .long gdt-mptramp_start /* Offset plus %ds << 4 */ - .globl mptramp_end mptramp_end: + /* + * The size of the trampoline code that needs to be relocated + * below the 1MiB boundary. + */ + .globl bootMP_size +bootMP_size: + .long mptramp_end - mptramp_start /* * From here on down is executed in the kernel .text section. diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index 64135bc36f58..2ecfe62cf9fb 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -23,7 +23,6 @@ /* global symbols in mpboot.S */ extern char mptramp_start[]; -extern char mptramp_end[]; extern u_int32_t mptramp_pagetables; /* IPI handlers */ @@ -59,6 +58,7 @@ void invlpg_pcid_handler(void); void invlrng_invpcid_handler(void); void invlrng_pcid_handler(void); int native_start_all_aps(void); +void mp_bootaddress(vm_paddr_t *, unsigned int *); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index 4e70afa62ca8..c1862fd7e158 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -1903,7 +1903,7 @@ getmemsize(int first) #ifdef SMP /* make hole for AP bootstrap code */ - physmap[1] = mp_bootaddress(physmap[1]); + alloc_ap_trampoline(physmap, &physmap_idx); #endif /* diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 0263c02556df..338711faf3c1 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -139,22 +139,6 @@ static void install_ap_tramp(void); static int start_all_aps(void); static int start_ap(int apic_id); -static u_int boot_address; - -/* - * Calculate usable address in base memory for AP trampoline code. - */ -u_int -mp_bootaddress(u_int basemem) -{ - - boot_address = trunc_page(basemem); /* round down to 4k boundary */ - if ((basemem - boot_address) < bootMP_size) - boot_address -= PAGE_SIZE; /* not enough, lower by 4k */ - - return boot_address; -} - /* * Initialize the IPI handlers and start up the AP's. */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 971f75c059b8..4fcb55a41996 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -27,9 +27,6 @@ #include #include -/* global data in mpboot.s */ -extern int bootMP_size; - /* functions in mpboot.s */ void bootMP(void); diff --git a/sys/x86/include/init.h b/sys/x86/include/init.h index d6d253d085cc..880cabaa9496 100644 --- a/sys/x86/include/init.h +++ b/sys/x86/include/init.h @@ -41,7 +41,7 @@ struct init_ops { void (*early_clock_source_init)(void); void (*early_delay)(int); void (*parse_memmap)(caddr_t, vm_paddr_t *, int *); - u_int (*mp_bootaddress)(u_int); + void (*mp_bootaddress)(vm_paddr_t *, unsigned int *); int (*start_all_aps)(void); void (*msi_init)(void); }; diff --git a/sys/x86/include/x86_smp.h b/sys/x86/include/x86_smp.h index b168bfb2d752..f7e28df06ccb 100644 --- a/sys/x86/include/x86_smp.h +++ b/sys/x86/include/x86_smp.h @@ -32,6 +32,8 @@ extern int bootAP; extern void *dpcpu; extern char *bootSTK; extern void *bootstacks[]; +extern unsigned int boot_address; +extern unsigned int bootMP_size; extern volatile u_int cpu_ipi_pending[]; extern volatile int aps_ready; extern struct mtx ap_boot_mtx; @@ -83,6 +85,7 @@ void assign_cpu_ids(void); void cpu_add(u_int apic_id, char boot_cpu); void cpustop_handler(void); void cpususpend_handler(void); +void alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx); void init_secondary_tail(void); void invltlb_handler(void); void invlpg_handler(void); @@ -95,7 +98,6 @@ void ipi_bitmap_handler(struct trapframe frame); void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); void ipi_selected(cpuset_t cpus, u_int ipi); -u_int mp_bootaddress(u_int); void set_interrupt_apic_ids(void); void smp_cache_flush(void); void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr, struct pmap *pmap); diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index dd178f1c2604..3fee726fd7b1 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -158,6 +158,8 @@ struct cache_info { int present; } static caches[MAX_CACHE_LEVELS]; +unsigned int boot_address; + void mem_range_AP_init(void) { @@ -905,6 +907,55 @@ cpu_mp_probe(void) return (mp_ncpus > 1); } +/* Allocate memory for the AP trampoline. */ +void +alloc_ap_trampoline(vm_paddr_t *physmap, unsigned int *physmap_idx) +{ + unsigned int i; + bool allocated; + + allocated = false; + for (i = *physmap_idx; i <= *physmap_idx; i -= 2) { + /* + * Find a memory region big enough and below the 1MB boundary + * for the trampoline code. + * NB: needs to be page aligned. + */ + if (physmap[i] >= MiB(1) || + (trunc_page(physmap[i + 1]) - round_page(physmap[i])) < + round_page(bootMP_size)) + continue; + + allocated = true; + /* + * Try to steal from the end of the region to mimic previous + * behaviour, else fallback to steal from the start. + */ + if (physmap[i + 1] < MiB(1)) { + boot_address = trunc_page(physmap[i + 1]); + if ((physmap[i + 1] - boot_address) < bootMP_size) + boot_address -= round_page(bootMP_size); + physmap[i + 1] = boot_address; + } else { + boot_address = round_page(physmap[i]); + physmap[i] = boot_address + round_page(bootMP_size); + } + if (physmap[i] == physmap[i + 1] && *physmap_idx != 0) { + memmove(&physmap[i], &physmap[i + 2], + sizeof(*physmap) * (*physmap_idx - i + 2)); + *physmap_idx -= 2; + } + } + + if (!allocated) { + boot_address = basemem * 1024 - bootMP_size; + if (bootverbose) + printf( +"Cannot find enough space for the boot trampoline, placing it at %#x", + boot_address); + } +} + /* * AP CPU's call this to initialize themselves. */