From 2ba6fe0009f083e66353364b4ae1d4f8e251267d Mon Sep 17 00:00:00 2001 From: Bruce Evans Date: Mon, 18 Dec 2017 13:53:22 +0000 Subject: [PATCH] Remove the permanent double mapping of low physical memory and replace it by a transient double mapping for the one instruction in ACPI wakeup where it is needed (and for many surrounding instructions in ACPI resume). Invalidate the TLB as soon as convenient after undoing the transient mapping. ACPI resume already has the strict ordering needed for this. This fixes the non-trapping of null pointers and other garbage pointers below NBPDR (except transiently). NBPDR is quite large (4MB, or 2MB for PAE). This fixes spurious traps at the first instruction in VM86 bioscalls. The traps are for transiently missing read permission in the first VM86 page (physical page 0) which was just written to at KERNBASE in the kernel. The mechanism is unknown (it is not simply PG_G). locore uses a similar but larger transient double mapping and needs it for 2 instructions instead of 1. Unmap the first PDE in it after the 2 instructions to detect most garbage pointers while bootstrapping. pmap_bootstrap() finishes the unmapping. Remove the avoidance of the double mapping for a recently fixed special case. ACPI resume could use this avoidance (made non-special) to avoid any problems with the transient double mapping, but no such problems are known. Update comments in locore. Many were for old versions of FreeBSD which tried to map low memory r/o except for special cases, or might have allowed access to low memory via physical offsets. Now all kernel maps are r/w, and removal of of the double map disallows use of physical offsets again. --- sys/i386/i386/locore.s | 74 +++++++++++++++--------------------- sys/x86/acpica/acpi_wakeup.c | 24 ++++++++++++ sys/x86/x86/mp_x86.c | 5 +++ 3 files changed, 60 insertions(+), 43 deletions(-) diff --git a/sys/i386/i386/locore.s b/sys/i386/i386/locore.s index 04b82dfbece5..ab3f79ffdcf6 100644 --- a/sys/i386/i386/locore.s +++ b/sys/i386/i386/locore.s @@ -241,22 +241,30 @@ NON_GPROF_ENTRY(btext) #if defined(PAE) || defined(PAE_TABLES) movl R(IdlePDPT), %eax movl %eax, %cr3 - movl %cr4, %eax - orl $CR4_PAE, %eax - movl %eax, %cr4 + movl %cr4, %edx + orl $CR4_PAE, %edx + movl %edx, %cr4 #else movl R(IdlePTD), %eax movl %eax,%cr3 /* load ptd addr into mmu */ #endif - movl %cr0,%eax /* get control word */ - orl $CR0_PE|CR0_PG,%eax /* enable paging */ - movl %eax,%cr0 /* and let's page NOW! */ + movl %cr0,%edx /* get control word */ + orl $CR0_PE|CR0_PG,%edx /* enable paging */ + movl %edx,%cr0 /* and let's page NOW! */ pushl $begin /* jump to high virtualized address */ ret -/* now running relocated at KERNBASE where the system is linked to run */ begin: + /* + * Now running relocated at KERNBASE where the system is linked to run. + * + * Remove the lowest part of the double mapping of low memory to get + * some null pointer checks. + */ + movl $0,PTD + movl %eax,%cr3 /* invalidate TLB */ + /* set up bootstrap stack */ movl proc0kstack,%eax /* location of in-kernel stack */ @@ -725,14 +733,15 @@ no_kernend: /* * Initialize page table pages mapping physical address zero through the - * end of the kernel. All of the page table entries allow read and write - * access. Write access to the first physical page is required by bios32 - * calls, and write access to the first 1 MB of physical memory is required - * by ACPI for implementing suspend and resume. We do this even - * if we've enabled PSE above, we'll just switch the corresponding kernel - * PDEs before we turn on paging. + * (physical) end of the kernel. Many of these pages must be reserved, + * and we reserve them all and map them linearly for convenience. We do + * this even if we've enabled PSE above; we'll just switch the corresponding + * kernel PDEs before we turn on paging. * * XXX: We waste some pages here in the PSE case! + * + * This and all other page table entries allow read and write access for + * various reasons. Kernel mappings never have any access restrictions. */ xorl %eax, %eax movl R(KERNend),%ecx @@ -784,42 +793,21 @@ no_kernend: /* * Create an identity mapping for low physical memory, including the kernel. - * The part of this mapping given by the first PDE (for the first 4 MB or 2 - * MB of physical memory) - * becomes a permanent part of the kernel's address space. The rest of this - * mapping is destroyed in pmap_bootstrap(). Ordinarily, the same page table - * pages are shared by the identity mapping and the kernel's native mapping. - * However, the permanent identity mapping cannot contain PG_G mappings. - * Thus, if the (physical) kernel overlaps the permanent identity mapping - * (and PG_G is enabled), the - * page table for the first PDE must be duplicated and not shared. + * This is only used to map the 2 instructions for jumping to 'begin' in + * locore (we map everything to avoid having to determine where these + * instructions are). ACPI resume will transiently restore the first PDE in + * this mapping (and depend on this PDE's page table created here not being + * destroyed). See pmap_bootstrap() for more details. * - * N.B. Due to errata concerning large pages and physical address zero, - * a PG_PS mapping is not used. + * Note: There are errata concerning large pages and physical address zero, + * so a PG_PS mapping should not be used for PDE 0. Our double mapping + * avoids this automatically by not using PG_PS for PDE #KPDI so that PAT + * bits can be set at the page level for i/o pages below 1 MB. */ movl R(KPTphys), %eax xorl %ebx, %ebx movl $NKPT, %ecx fillkpt(R(IdlePTD), $PG_RW) -#if KERNLOAD < (1 << PDRSHIFT) - testl $PG_G, R(pgeflag) - jz 1f - ALLOCPAGES(1) - movl %esi, %eax - movl $1, %ecx - fillkptphys($PG_RW) /* map the new page table in std map */ - movl %esi, %edi - movl R(IdlePTD), %eax - movl (%eax), %esi /* top bits are 0 for PAE */ - andl $~PAGE_MASK, %esi - movl %edi, (%eax) - orl $PG_V | PG_RW, (%eax) /* finish writing new PTD[0] */ - movl $PAGE_SIZE, %ecx - cld - rep - movsb -1: -#endif /* * Install PDEs for PTs covering enough kva to bootstrap. Then for the PSE diff --git a/sys/x86/acpica/acpi_wakeup.c b/sys/x86/acpica/acpi_wakeup.c index a4c5683aee8e..853ead4a90a7 100644 --- a/sys/x86/acpica/acpi_wakeup.c +++ b/sys/x86/acpica/acpi_wakeup.c @@ -179,6 +179,17 @@ acpi_wakeup_cpus(struct acpi_softc *sc) } } +#ifdef __i386__ + /* + * Remove the identity mapping of low memory for all CPUs and sync + * the TLB for the BSP. The APs are now spinning in + * cpususpend_handler() and we will release them soon. Then each + * will invalidate its TLB. + */ + kernel_pmap->pm_pdir[0] = 0; + invltlb_glob(); +#endif + /* restore the warmstart vector */ *(uint32_t *)WARMBOOT_OFF = mpbioswarmvec; @@ -235,6 +246,19 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit); WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base); +#ifdef __i386__ + /* + * Map some low memory with virt == phys for ACPI wakecode + * to use to jump to high memory after enabling paging. This + * is the same as for similar jump in locore, except the + * jump is a single instruction, and we know its address + * more precisely so only need a single PTD, and we have to + * be careful to use the kernel map (PTD[0] is for curthread + * which may be a user thread in deprecated APIs). + */ + kernel_pmap->pm_pdir[0] = PTD[KPTDI]; +#endif + /* Call ACPICA to enter the desired sleep state */ if (state == ACPI_STATE_S4 && sc->acpi_s4bios) status = AcpiEnterSleepStateS4bios(); diff --git a/sys/x86/x86/mp_x86.c b/sys/x86/x86/mp_x86.c index c14b181913b2..bc2f3526369d 100644 --- a/sys/x86/x86/mp_x86.c +++ b/sys/x86/x86/mp_x86.c @@ -1398,6 +1398,11 @@ cpususpend_handler(void) while (!CPU_ISSET(cpu, &started_cpus)) ia32_pause(); +#ifdef __i386__ + /* Finish removing the identity mapping of low memory for this AP. */ + invltlb_glob(); +#endif + if (cpu_ops.cpu_resume) cpu_ops.cpu_resume(); #ifdef __amd64__