Remove the permanent double mapping of low physical memory and replace

it by a transient double mapping for the one instruction in ACPI wakeup
where it is needed (and for many surrounding instructions in ACPI resume).
Invalidate the TLB as soon as convenient after undoing the transient
mapping.  ACPI resume already has the strict ordering needed for this.

This fixes the non-trapping of null pointers and other garbage pointers
below NBPDR (except transiently).  NBPDR is quite large (4MB, or 2MB for
PAE).

This fixes spurious traps at the first instruction in VM86 bioscalls.
The traps are for transiently missing read permission in the first
VM86 page (physical page 0) which was just written to at KERNBASE in
the kernel.  The mechanism is unknown (it is not simply PG_G).

locore uses a similar but larger transient double mapping and needs
it for 2 instructions instead of 1.  Unmap the first PDE in it after
the 2 instructions to detect most garbage pointers while bootstrapping.
pmap_bootstrap() finishes the unmapping.

Remove the avoidance of the double mapping for a recently fixed special
case.  ACPI resume could use this avoidance (made non-special) to avoid
any problems with the transient double mapping, but no such problems
are known.

Update comments in locore.  Many were for old versions of FreeBSD which
tried to map low memory r/o except for special cases, or might have
allowed access to low memory via physical offsets.  Now all kernel
maps are r/w, and removal of of the double map disallows use of physical
offsets again.
This commit is contained in:
Bruce Evans 2017-12-18 13:53:22 +00:00
parent 4a5eb9ac99
commit 2ba6fe0009
3 changed files with 60 additions and 43 deletions

View File

@ -241,22 +241,30 @@ NON_GPROF_ENTRY(btext)
#if defined(PAE) || defined(PAE_TABLES)
movl R(IdlePDPT), %eax
movl %eax, %cr3
movl %cr4, %eax
orl $CR4_PAE, %eax
movl %eax, %cr4
movl %cr4, %edx
orl $CR4_PAE, %edx
movl %edx, %cr4
#else
movl R(IdlePTD), %eax
movl %eax,%cr3 /* load ptd addr into mmu */
#endif
movl %cr0,%eax /* get control word */
orl $CR0_PE|CR0_PG,%eax /* enable paging */
movl %eax,%cr0 /* and let's page NOW! */
movl %cr0,%edx /* get control word */
orl $CR0_PE|CR0_PG,%edx /* enable paging */
movl %edx,%cr0 /* and let's page NOW! */
pushl $begin /* jump to high virtualized address */
ret
/* now running relocated at KERNBASE where the system is linked to run */
begin:
/*
* Now running relocated at KERNBASE where the system is linked to run.
*
* Remove the lowest part of the double mapping of low memory to get
* some null pointer checks.
*/
movl $0,PTD
movl %eax,%cr3 /* invalidate TLB */
/* set up bootstrap stack */
movl proc0kstack,%eax /* location of in-kernel stack */
@ -725,14 +733,15 @@ no_kernend:
/*
* Initialize page table pages mapping physical address zero through the
* end of the kernel. All of the page table entries allow read and write
* access. Write access to the first physical page is required by bios32
* calls, and write access to the first 1 MB of physical memory is required
* by ACPI for implementing suspend and resume. We do this even
* if we've enabled PSE above, we'll just switch the corresponding kernel
* PDEs before we turn on paging.
* (physical) end of the kernel. Many of these pages must be reserved,
* and we reserve them all and map them linearly for convenience. We do
* this even if we've enabled PSE above; we'll just switch the corresponding
* kernel PDEs before we turn on paging.
*
* XXX: We waste some pages here in the PSE case!
*
* This and all other page table entries allow read and write access for
* various reasons. Kernel mappings never have any access restrictions.
*/
xorl %eax, %eax
movl R(KERNend),%ecx
@ -784,42 +793,21 @@ no_kernend:
/*
* Create an identity mapping for low physical memory, including the kernel.
* The part of this mapping given by the first PDE (for the first 4 MB or 2
* MB of physical memory)
* becomes a permanent part of the kernel's address space. The rest of this
* mapping is destroyed in pmap_bootstrap(). Ordinarily, the same page table
* pages are shared by the identity mapping and the kernel's native mapping.
* However, the permanent identity mapping cannot contain PG_G mappings.
* Thus, if the (physical) kernel overlaps the permanent identity mapping
* (and PG_G is enabled), the
* page table for the first PDE must be duplicated and not shared.
* This is only used to map the 2 instructions for jumping to 'begin' in
* locore (we map everything to avoid having to determine where these
* instructions are). ACPI resume will transiently restore the first PDE in
* this mapping (and depend on this PDE's page table created here not being
* destroyed). See pmap_bootstrap() for more details.
*
* N.B. Due to errata concerning large pages and physical address zero,
* a PG_PS mapping is not used.
* Note: There are errata concerning large pages and physical address zero,
* so a PG_PS mapping should not be used for PDE 0. Our double mapping
* avoids this automatically by not using PG_PS for PDE #KPDI so that PAT
* bits can be set at the page level for i/o pages below 1 MB.
*/
movl R(KPTphys), %eax
xorl %ebx, %ebx
movl $NKPT, %ecx
fillkpt(R(IdlePTD), $PG_RW)
#if KERNLOAD < (1 << PDRSHIFT)
testl $PG_G, R(pgeflag)
jz 1f
ALLOCPAGES(1)
movl %esi, %eax
movl $1, %ecx
fillkptphys($PG_RW) /* map the new page table in std map */
movl %esi, %edi
movl R(IdlePTD), %eax
movl (%eax), %esi /* top bits are 0 for PAE */
andl $~PAGE_MASK, %esi
movl %edi, (%eax)
orl $PG_V | PG_RW, (%eax) /* finish writing new PTD[0] */
movl $PAGE_SIZE, %ecx
cld
rep
movsb
1:
#endif
/*
* Install PDEs for PTs covering enough kva to bootstrap. Then for the PSE

View File

@ -179,6 +179,17 @@ acpi_wakeup_cpus(struct acpi_softc *sc)
}
}
#ifdef __i386__
/*
* Remove the identity mapping of low memory for all CPUs and sync
* the TLB for the BSP. The APs are now spinning in
* cpususpend_handler() and we will release them soon. Then each
* will invalidate its TLB.
*/
kernel_pmap->pm_pdir[0] = 0;
invltlb_glob();
#endif
/* restore the warmstart vector */
*(uint32_t *)WARMBOOT_OFF = mpbioswarmvec;
@ -235,6 +246,19 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
WAKECODE_FIXUP(wakeup_gdt, uint16_t, pcb->pcb_gdt.rd_limit);
WAKECODE_FIXUP(wakeup_gdt + 2, uint64_t, pcb->pcb_gdt.rd_base);
#ifdef __i386__
/*
* Map some low memory with virt == phys for ACPI wakecode
* to use to jump to high memory after enabling paging. This
* is the same as for similar jump in locore, except the
* jump is a single instruction, and we know its address
* more precisely so only need a single PTD, and we have to
* be careful to use the kernel map (PTD[0] is for curthread
* which may be a user thread in deprecated APIs).
*/
kernel_pmap->pm_pdir[0] = PTD[KPTDI];
#endif
/* Call ACPICA to enter the desired sleep state */
if (state == ACPI_STATE_S4 && sc->acpi_s4bios)
status = AcpiEnterSleepStateS4bios();

View File

@ -1398,6 +1398,11 @@ cpususpend_handler(void)
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
#ifdef __i386__
/* Finish removing the identity mapping of low memory for this AP. */
invltlb_glob();
#endif
if (cpu_ops.cpu_resume)
cpu_ops.cpu_resume();
#ifdef __amd64__