Move the powerpc64 direct map base address from zero to high memory. This

accomplishes a few things:
- Makes NULL an invalid address in the kernel, which is useful for catching
  bugs.
- Lays groundwork for radix-tree translation on POWER9, which requires the
  direct map be at high memory.
- Similarly lays groundwork for a direct map on 64-bit Book-E.

The new base address is chosen as the base of the fourth radix quadrant
(the minimum kernel address in this translation mode) and because all
supported CPUs ignore at least the first two bits of addresses in real
mode, allowing direct-map addresses to be used in real-mode handlers.
This is required by Linux and is part of the architecture standard
starting in POWER ISA 3, so can be relied upon.

Reviewed by:	jhibbits, Breno Leitao
Differential Revision:	D14499
This commit is contained in:
Nathan Whitehorn 2018-03-07 17:08:07 +00:00
parent 2e9c3a4f99
commit f9edb09d70
15 changed files with 169 additions and 68 deletions

View File

@ -489,7 +489,7 @@ ofwfb_init(struct vt_device *vd)
#if defined(__powerpc__)
OF_decode_addr(node, fb_phys, &sc->sc_memt, &sc->fb.fb_vbase,
NULL);
sc->fb.fb_pbase = sc->fb.fb_vbase; /* 1:1 mapped */
sc->fb.fb_pbase = sc->fb.fb_vbase & ~DMAP_BASE_ADDRESS;
#ifdef __powerpc64__
/* Real mode under a hypervisor probably doesn't cover FB */
if (!(mfmsr() & (PSL_HV | PSL_DR)))

View File

@ -455,11 +455,33 @@ va_to_vsid(pmap_t pm, vm_offset_t va)
#endif
/*
* These functions need to provide addresses that both (a) work in real mode
* (or whatever mode/circumstances the kernel is in in early boot (now)) and
* (b) can still, in principle, work once the kernel is going. Because these
* rely on existing mappings/real mode, unmap is a no-op.
*/
vm_offset_t
pmap_early_io_map(vm_paddr_t pa, vm_size_t size)
{
KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!"));
return (pa);
/*
* If we have the MMU up in early boot, assume it is 1:1. Otherwise,
* try to get the address in a memory region compatible with the
* direct map for efficiency later.
*/
if (mfmsr() & PSL_DR)
return (pa);
else
return (DMAP_BASE_ADDRESS + pa);
}
void
pmap_early_io_unmap(vm_offset_t va, vm_size_t size)
{
KASSERT(!pmap_bootstrapped, ("Not available after PMAP started!"));
}
/* From p3-53 of the MPC7450 RISC Microprocessor Family Reference Manual */

View File

@ -551,7 +551,8 @@ moea64_add_ofw_mappings(mmu_t mmup, phandle_t mmu, size_t sz)
/* If this address is direct-mapped, skip remapping */
if (hw_direct_map &&
translations[i].om_va == PHYS_TO_DMAP(pa_base) &&
moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT) == LPTE_M)
moea64_calc_wimg(pa_base + off, VM_MEMATTR_DEFAULT)
== LPTE_M)
continue;
PMAP_LOCK(kernel_pmap);
@ -664,23 +665,24 @@ moea64_setup_direct_map(mmu_t mmup, vm_offset_t kernelstart,
}
}
PMAP_UNLOCK(kernel_pmap);
} else {
}
/*
* Make sure the kernel and BPVO pool stay mapped on systems either
* without a direct map or on which the kernel is not already executing
* out of the direct-mapped region.
*/
if (!hw_direct_map || kernelstart < DMAP_BASE_ADDRESS) {
for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
pa += PAGE_SIZE)
moea64_kenter(mmup, pa, pa);
}
if (!hw_direct_map) {
size = moea64_bpvo_pool_size*sizeof(struct pvo_entry);
off = (vm_offset_t)(moea64_bpvo_pool);
for (pa = off; pa < off + size; pa += PAGE_SIZE)
moea64_kenter(mmup, pa, pa);
/*
* Map certain important things, like ourselves.
*
* NOTE: We do not map the exception vector space. That code is
* used only in real mode, and leaving it unmapped allows us to
* catch NULL pointer deferences, instead of making NULL a valid
* address.
*/
for (pa = kernelstart & ~PAGE_MASK; pa < kernelend;
pa += PAGE_SIZE)
for (pa = off; pa < off + size; pa += PAGE_SIZE)
moea64_kenter(mmup, pa, pa);
}
ENABLE_TRANS(msr);
@ -826,6 +828,11 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
moea64_bpvo_pool_size*sizeof(struct pvo_entry), 0);
moea64_bpvo_pool_index = 0;
/* Place at address usable through the direct map */
if (hw_direct_map)
moea64_bpvo_pool = (struct pvo_entry *)
PHYS_TO_DMAP((uintptr_t)moea64_bpvo_pool);
/*
* Make sure kernel vsid is allocated as well as VSID 0.
*/
@ -898,12 +905,11 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend
Maxmem = max(Maxmem, powerpc_btop(phys_avail[i + 1]));
/*
* Initialize MMU and remap early physical mappings
* Initialize MMU.
*/
MMU_CPU_BOOTSTRAP(mmup,0);
mtmsr(mfmsr() | PSL_DR | PSL_IR);
pmap_bootstrapped++;
bs_remap_earlyboot();
/*
* Set the start and end of kva.
@ -919,6 +925,11 @@ moea64_late_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend
moea64_bootstrap_slb_prefault(va, 0);
#endif
/*
* Remap any early IO mappings (console framebuffer, etc.)
*/
bs_remap_earlyboot();
/*
* Figure out how far we can extend virtual_end into segment 16
* without running into existing mappings. Segment 16 is guaranteed
@ -1826,10 +1837,11 @@ moea64_kextract(mmu_t mmu, vm_offset_t va)
/*
* Shortcut the direct-mapped case when applicable. We never put
* anything but 1:1 mappings below VM_MIN_KERNEL_ADDRESS.
* anything but 1:1 (or 62-bit aliased) mappings below
* VM_MIN_KERNEL_ADDRESS.
*/
if (va < VM_MIN_KERNEL_ADDRESS)
return (va);
return (va & ~DMAP_BASE_ADDRESS);
PMAP_LOCK(kernel_pmap);
pvo = moea64_pvo_find_va(kernel_pmap, va);
@ -2565,12 +2577,15 @@ moea64_pvo_remove_from_page(mmu_t mmu, struct pvo_entry *pvo)
* Update vm about page writeability/executability if managed
*/
PV_LOCKASSERT(pvo->pvo_pte.pa & LPTE_RPGN);
pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
if (pvo->pvo_vaddr & PVO_MANAGED) {
pg = PHYS_TO_VM_PAGE(pvo->pvo_pte.pa & LPTE_RPGN);
if ((pvo->pvo_vaddr & PVO_MANAGED) && pg != NULL) {
LIST_REMOVE(pvo, pvo_vlink);
if (LIST_EMPTY(vm_page_to_pvoh(pg)))
vm_page_aflag_clear(pg, PGA_WRITEABLE | PGA_EXECUTABLE);
if (pg != NULL) {
LIST_REMOVE(pvo, pvo_vlink);
if (LIST_EMPTY(vm_page_to_pvoh(pg)))
vm_page_aflag_clear(pg,
PGA_WRITEABLE | PGA_EXECUTABLE);
}
}
moea64_pvo_entries--;
@ -2677,8 +2692,12 @@ moea64_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size)
vm_offset_t ppa;
int error = 0;
if (hw_direct_map && mem_valid(pa, size) == 0)
return (0);
PMAP_LOCK(kernel_pmap);
key.pvo_vaddr = ppa = pa & ~ADDR_POFF;
ppa = pa & ~ADDR_POFF;
key.pvo_vaddr = DMAP_BASE_ADDRESS + ppa;
for (pvo = RB_FIND(pvo_tree, &kernel_pmap->pmap_pvo, &key);
ppa < pa + size; ppa += PAGE_SIZE,
pvo = RB_NEXT(pvo_tree, &kernel_pmap->pmap_pvo, pvo)) {

View File

@ -401,7 +401,7 @@ moea64_cpu_bootstrap_native(mmu_t mmup, int ap)
*/
__asm __volatile ("ptesync; mtsdr1 %0; isync"
:: "r"((uintptr_t)moea64_pteg_table
:: "r"(((uintptr_t)moea64_pteg_table & ~DMAP_BASE_ADDRESS)
| (uintptr_t)(flsl(moea64_pteg_mask >> 11))));
tlbia();
}
@ -434,6 +434,9 @@ moea64_bootstrap_native(mmu_t mmup, vm_offset_t kernelstart,
*/
moea64_pteg_table = (struct lpte *)moea64_bootstrap_alloc(size, size);
if (hw_direct_map)
moea64_pteg_table =
(struct lpte *)PHYS_TO_DMAP((vm_offset_t)moea64_pteg_table);
DISABLE_TRANS(msr);
bzero(__DEVOLATILE(void *, moea64_pteg_table), moea64_pteg_count *
sizeof(struct lpteg));

View File

@ -207,13 +207,16 @@ kernel_va_to_slbv(vm_offset_t va)
/* Set kernel VSID to deterministic value */
slbv = (KERNEL_VSID((uintptr_t)va >> ADDR_SR_SHFT)) << SLBV_VSID_SHIFT;
/* Figure out if this is a large-page mapping */
if (hw_direct_map && va < VM_MIN_KERNEL_ADDRESS) {
/*
* Figure out if this is a large-page mapping.
*/
if (hw_direct_map && va > DMAP_BASE_ADDRESS && va < DMAP_MAX_ADDRESS) {
/*
* XXX: If we have set up a direct map, assumes
* all physical memory is mapped with large pages.
*/
if (mem_valid(va, 0) == 0)
if (mem_valid(DMAP_TO_PHYS(va), 0) == 0)
slbv |= SLBV_L;
}

View File

@ -43,7 +43,9 @@
#define GET_CPUINFO(r) \
mfsprg0 r
#define GET_TOCBASE(r) \
li r,TRAP_TOCBASE; /* Magic address for TOC */ \
lis r,DMAP_BASE_ADDRESS@highesta; /* To real-mode alias/dmap */ \
sldi r,r,32; \
ori r,r,TRAP_TOCBASE; /* Magic address for TOC */ \
ld r,0(r)
/*

View File

@ -53,7 +53,7 @@
#define KERNEL2_SEGMENT (0xfffff0 + KERNEL2_SR)
#define EMPTY_SEGMENT 0xfffff0
#ifdef __powerpc64__
#define USER_ADDR 0xcffffffff0000000UL
#define USER_ADDR 0xeffffffff0000000UL
#else
#define USER_ADDR ((uintptr_t)USER_SR << ADDR_SR_SHFT)
#endif

View File

@ -83,11 +83,7 @@
#if !defined(LOCORE)
#ifdef __powerpc64__
#define VM_MIN_ADDRESS (0x0000000000000000UL)
#ifdef AIM
#define VM_MAXUSER_ADDRESS (0xfffffffffffff000UL)
#else
#define VM_MAXUSER_ADDRESS (0x7ffffffffffff000UL)
#endif
#define VM_MAXUSER_ADDRESS (0x3ffffffffffff000UL)
#define VM_MAX_ADDRESS (0xffffffffffffffffUL)
#else
#define VM_MIN_ADDRESS ((vm_offset_t)0)
@ -99,7 +95,7 @@
#ifdef BOOKE
#define VM_MIN_ADDRESS 0
#ifdef __powerpc64__
#define VM_MAXUSER_ADDRESS 0x7ffffffffffff000
#define VM_MAXUSER_ADDRESS 0x3ffffffffffff000
#else
#define VM_MAXUSER_ADDRESS 0x7ffff000
#endif
@ -110,8 +106,13 @@
#define FREEBSD32_USRSTACK FREEBSD32_SHAREDPAGE
#ifdef __powerpc64__
#ifdef AIM
#define VM_MIN_KERNEL_ADDRESS 0xe000000000000000UL
#define VM_MAX_KERNEL_ADDRESS 0xe0000001c7ffffffUL
#else
#define VM_MIN_KERNEL_ADDRESS 0xc000000000000000UL
#define VM_MAX_KERNEL_ADDRESS 0xc0000001c7ffffffUL
#endif
#define VM_MAX_SAFE_KERNEL_ADDRESS VM_MAX_KERNEL_ADDRESS
#endif
@ -243,14 +244,17 @@ struct pmap_physseg {
/*
* We (usually) have a direct map of all physical memory, so provide
* a macro to use to get the kernel VA address for a given PA. Returns
* 0 if the direct map is unavailable. The location of the direct map
* may not be 1:1 in future, so use of the macro is recommended.
* a macro to use to get the kernel VA address for a given PA. Check the
* value of PMAP_HAS_PMAP before using.
*/
#ifndef LOCORE
#ifdef __powerpc64__
#define DMAP_BASE_ADDRESS 0x0000000000000000UL
#define DMAP_BASE_ADDRESS 0xc000000000000000UL
#define DMAP_MAX_ADDRESS 0xcfffffffffffffffUL
#else
#define DMAP_BASE_ADDRESS 0x00000000UL
#define DMAP_MAX_ADDRESS 0xbfffffffUL
#endif
#endif
#define PMAP_HAS_DMAP (hw_direct_map)

View File

@ -84,20 +84,21 @@ static int openfirmware(void *args);
__inline void
ofw_save_trap_vec(char *save_trap_vec)
{
if (!ofw_real_mode)
if (!ofw_real_mode || !hw_direct_map)
return;
bcopy((void *)EXC_RST, save_trap_vec, EXC_LAST - EXC_RST);
bcopy((void *)PHYS_TO_DMAP(EXC_RST), save_trap_vec, EXC_LAST - EXC_RST);
}
static __inline void
ofw_restore_trap_vec(char *restore_trap_vec)
{
if (!ofw_real_mode)
if (!ofw_real_mode || !hw_direct_map)
return;
bcopy(restore_trap_vec, (void *)EXC_RST, EXC_LAST - EXC_RST);
__syncicache(EXC_RSVD, EXC_LAST - EXC_RSVD);
bcopy(restore_trap_vec, (void *)PHYS_TO_DMAP(EXC_RST),
EXC_LAST - EXC_RST);
__syncicache((void *)PHYS_TO_DMAP(EXC_RSVD), EXC_LAST - EXC_RSVD);
}
/*
@ -381,12 +382,6 @@ OF_initial_setup(void *fdt_ptr, void *junk, int (*openfirm)(void *))
#endif
fdt = fdt_ptr;
#ifdef FDT_DTB_STATIC
/* Check for a statically included blob */
if (fdt == NULL)
fdt = &fdt_static_dtb;
#endif
}
boolean_t
@ -414,14 +409,58 @@ OF_bootstrap()
} else
#endif
if (fdt != NULL) {
status = OF_install(OFW_FDT, 0);
#ifdef AIM
bus_space_tag_t fdt_bt;
vm_offset_t tmp_fdt_ptr;
vm_size_t fdt_size;
uintptr_t fdt_va;
#endif
status = OF_install(OFW_FDT, 0);
if (status != TRUE)
return status;
#ifdef AIM /* AIM-only for now -- Book-E does this remapping in early init */
/* Get the FDT size for mapping if we can */
tmp_fdt_ptr = pmap_early_io_map((vm_paddr_t)fdt, PAGE_SIZE);
if (fdt_check_header((void *)tmp_fdt_ptr) != 0) {
pmap_early_io_unmap(tmp_fdt_ptr, PAGE_SIZE);
return FALSE;
}
fdt_size = fdt_totalsize((void *)tmp_fdt_ptr);
pmap_early_io_unmap(tmp_fdt_ptr, PAGE_SIZE);
/*
* Map this for real. Use bus_space_map() to take advantage
* of its auto-remapping function once the kernel is loaded.
* This is a dirty hack, but what we have.
*/
#ifdef _LITTLE_ENDIAN
fdt_bt = &bs_le_tag;
#else
fdt_bt = &bs_be_tag;
#endif
bus_space_map(fdt_bt, (vm_paddr_t)fdt, fdt_size, 0, &fdt_va);
err = OF_init((void *)fdt_va);
#else
err = OF_init(fdt);
#endif
}
#ifdef FDT_DTB_STATIC
/*
* Check for a statically included blob already in the kernel and
* needing no mapping.
*/
else {
status = OF_install(OFW_FDT, 0);
if (status != TRUE)
return status;
err = OF_init(&fdt_static_dtb);
}
#endif
if (err != 0) {
OF_install(NULL, 0);
status = FALSE;

View File

@ -223,7 +223,7 @@ ofw_real_bounce_alloc(void *junk)
* we have a 32-bit virtual address to give OF.
*/
if (!ofw_real_mode && !hw_direct_map)
if (!ofw_real_mode && (!hw_direct_map || DMAP_BASE_ADDRESS != 0))
pmap_kenter(of_bounce_phys, of_bounce_phys);
mtx_unlock(&of_bounce_mtx);
@ -244,7 +244,7 @@ ofw_real_map(const void *buf, size_t len)
* can use right now is memory mapped by firmware.
*/
if (!pmap_bootstrapped)
return (cell_t)(uintptr_t)buf;
return (cell_t)((uintptr_t)buf & ~DMAP_BASE_ADDRESS);
/*
* XXX: It is possible for us to get called before the VM has
@ -253,7 +253,8 @@ ofw_real_map(const void *buf, size_t len)
* Copy into the emergency buffer, and reset at the end.
*/
of_bounce_virt = emergency_buffer;
of_bounce_phys = (vm_offset_t)of_bounce_virt;
of_bounce_phys = (vm_offset_t)of_bounce_virt &
~DMAP_BASE_ADDRESS;
of_bounce_size = sizeof(emergency_buffer);
}
@ -261,7 +262,8 @@ ofw_real_map(const void *buf, size_t len)
* Make sure the bounce page offset satisfies any reasonable
* alignment constraint.
*/
of_bounce_offset += sizeof(register_t) - (of_bounce_offset % sizeof(register_t));
of_bounce_offset += sizeof(register_t) -
(of_bounce_offset % sizeof(register_t));
if (of_bounce_offset + len > of_bounce_size) {
panic("Oversize Open Firmware call!");

View File

@ -115,7 +115,9 @@ bs_remap_earlyboot(void)
for (i = 0; i < earlyboot_map_idx; i++) {
spa = earlyboot_mappings[i].addr;
if (spa == earlyboot_mappings[i].virt &&
if (hw_direct_map &&
PHYS_TO_DMAP(spa) == earlyboot_mappings[i].virt &&
pmap_dev_direct_mapped(spa, earlyboot_mappings[i].size) == 0)
continue;

View File

@ -222,6 +222,7 @@ ASSYM(TDF_NEEDRESCHED, TDF_NEEDRESCHED);
ASSYM(SF_UC, offsetof(struct sigframe, sf_uc));
ASSYM(KERNBASE, KERNBASE);
ASSYM(DMAP_BASE_ADDRESS, DMAP_BASE_ADDRESS);
ASSYM(MAXCOMLEN, MAXCOMLEN);
#ifdef __powerpc64__

View File

@ -125,8 +125,9 @@ kmem_direct_mapped: v = uio->uio_offset;
break;
}
if (!pmap_dev_direct_mapped(v, cnt)) {
error = uiomove((void *)v, cnt, uio);
if (hw_direct_map && !pmap_dev_direct_mapped(v, cnt)) {
error = uiomove((void *)PHYS_TO_DMAP(v), cnt,
uio);
} else {
m.phys_addr = trunc_page(v);
marr = &m;

View File

@ -94,7 +94,12 @@ uma_small_free(void *mem, vm_size_t size, u_int8_t flags)
pmap_remove(kernel_pmap,(vm_offset_t)mem,
(vm_offset_t)mem + PAGE_SIZE);
m = PHYS_TO_VM_PAGE((vm_offset_t)mem);
if (hw_direct_map)
m = PHYS_TO_VM_PAGE(DMAP_TO_PHYS((vm_offset_t)mem));
else
m = PHYS_TO_VM_PAGE((vm_offset_t)mem);
KASSERT(m != NULL,
("Freeing UMA block at %p with no associated page", mem));
vm_page_unwire_noq(m);
vm_page_free(m);
atomic_subtract_int(&hw_uma_mdpages, 1);

View File

@ -128,9 +128,6 @@ ps3_attach(platform_t plat)
pmap_mmu_install("mmu_ps3", BUS_PROBE_SPECIFIC);
cpu_idle_hook = ps3_cpu_idle;
/* Set a breakpoint to make NULL an invalid address */
lv1_set_dabr(0x7 /* read and write, MMU on */, 2 /* kernel accesses */);
/* Record our PIR at boot for later */
ps3_boot_pir = mfspr(SPR_PIR);
@ -227,7 +224,8 @@ static int
ps3_smp_start_cpu(platform_t plat, struct pcpu *pc)
{
/* kernel is spinning on 0x40 == -1 right now */
volatile uint32_t *secondary_spin_sem = (uint32_t *)PHYS_TO_DMAP(0x40);
volatile uint32_t *secondary_spin_sem =
(uint32_t *)PHYS_TO_DMAP((uintptr_t)0x40);
int remote_pir = pc->pc_hwref;
int timeout;