From 01ad9aa4bf25709f5ef398b5e9d665f373d42ea7 Mon Sep 17 00:00:00 2001 From: jhibbits Date: Tue, 19 Apr 2016 01:48:18 +0000 Subject: [PATCH] Fix SMP booting for PowerPC Book-E Summary: PowerPC Book-E SMP is currently broken for unknown reasons. Pull in Semihalf changes made c2012 for e500mc/e5500, which enables booting SMP. This eliminates the shared software TLB1 table, replacing it with tlb1_read_entry() function. This does not yet support ePAPR SMP booting, and doesn't handle resetting CPUs already released (ePAPR boot releases APs to a spin loop waiting on a specific address). This will be addressed in the near future by using the MPIC to reset the AP into our own alternate boot address. This does include a change to the dpaa/dtsec(4) driver, to mark the portals as CPU-private. Test Plan: Tested on Amiga X5000/20 (P5020). Boots, prints the following messages: Adding CPU 0, pir=0, awake=1 Waking up CPU 1 (dev=1) Adding CPU 1, pir=20, awake=1 SMP: AP CPU #1 launched top(1) shows CPU1 active. Obtained from: Semihalf Relnotes: Yes Differential Revision: https://reviews.freebsd.org/D5945 --- sys/dev/dpaa/portals_common.c | 2 - sys/powerpc/booke/locore.S | 127 +++++------ sys/powerpc/booke/pmap.c | 287 ++++++++++++++----------- sys/powerpc/include/tlb.h | 11 +- sys/powerpc/mpc85xx/platform_mpc85xx.c | 22 +- sys/powerpc/powerpc/genassym.c | 2 +- sys/powerpc/powerpc/mp_machdep.c | 3 + 7 files changed, 245 insertions(+), 209 deletions(-) diff --git a/sys/dev/dpaa/portals_common.c b/sys/dev/dpaa/portals_common.c index e540077b7555..61a034948250 100644 --- a/sys/dev/dpaa/portals_common.c +++ b/sys/dev/dpaa/portals_common.c @@ -75,8 +75,6 @@ dpaa_portal_alloc_res(device_t dev, struct dpaa_portals_devinfo *di, int cpu) sc->sc_rres[0] = bus_alloc_resource(dev, SYS_RES_MEMORY, &sc->sc_rrid[0], rle->start + sc->sc_dp_pa, rle->end + sc->sc_dp_pa, rle->count, RF_ACTIVE); - pmap_change_attr((vm_offset_t)rman_get_bushandle(sc->sc_rres[0]), - rle->count, VM_MEMATTR_CACHEABLE); if (sc->sc_rres[0] == NULL) { device_printf(dev, "Could not allocate memory.\n"); return (ENXIO); diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S index a49d79675b4a..9e47ee690de6 100644 --- a/sys/powerpc/booke/locore.S +++ b/sys/powerpc/booke/locore.S @@ -104,6 +104,10 @@ __start: mtmsr %r3 isync +/* + * Initial HIDs configuration + */ +1: mfpvr %r3 rlwinm %r3, %r3, 16, 16, 31 @@ -161,7 +165,6 @@ __start: /* * Create temporary mapping in AS=1 and switch to it */ - addi %r3, %r29, 1 bl tlb1_temp_mapping_as1 mfmsr %r3 @@ -197,7 +200,7 @@ __start: lis %r3, KERNBASE@h ori %r3, %r3, KERNBASE@l /* EPN = KERNBASE */ #ifdef SMP - ori %r3, %r3, MAS2_M@l /* WIMGE = 0b00100 */ + ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ #endif mtspr SPR_MAS2, %r3 isync @@ -295,21 +298,19 @@ done_mapping: __boot_page: bl 1f - .globl bp_ntlb1s -bp_ntlb1s: + .globl bp_trace +bp_trace: .long 0 - .globl bp_tlb1 -bp_tlb1: - .space 4 * 3 * 64 - - .globl bp_tlb1_end -bp_tlb1_end: + .globl bp_kernload +bp_kernload: + .long 0 /* * Initial configuration */ -1: mflr %r31 /* r31 hold the address of bp_ntlb1s */ +1: + mflr %r31 /* r31 hold the address of bp_trace */ /* Set HIDs */ mfpvr %r3 @@ -332,20 +333,7 @@ bp_tlb1_end: 3: mtspr SPR_HID0, %r4 isync -/* - * E500mc and E5500 do not have HID1 register, so skip HID1 setup on - * this core. - */ - cmpli 0, 0, %r3, FSL_E500mc - beq 1f - cmpli 0, 0, %r3, FSL_E5500 - beq 1f - lis %r3, HID1_E500_DEFAULT_SET@h - ori %r3, %r3, HID1_E500_DEFAULT_SET@l - mtspr SPR_HID1, %r3 - isync -1: /* Enable branch prediction */ li %r3, BUCSR_BPEN mtspr SPR_BUCSR, %r3 @@ -367,7 +355,7 @@ bp_tlb1_end: /* * Create temporary translation in AS=1 and switch to it */ - lwz %r3, 0(%r31) + bl tlb1_temp_mapping_as1 mfmsr %r3 @@ -388,39 +376,46 @@ bp_tlb1_end: /* * Setup final mapping in TLB1[1] and switch to it */ - lwz %r6, 0(%r31) - addi %r5, %r31, 4 - li %r4, 0 - -4: lis %r3, MAS0_TLBSEL1@h - rlwimi %r3, %r4, 16, 12, 15 + /* Final kernel mapping, map in 64 MB of RAM */ + lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + li %r4, 0 /* Entry 0 */ + rlwimi %r3, %r4, 16, 4, 15 mtspr SPR_MAS0, %r3 isync - lwz %r3, 0(%r5) - mtspr SPR_MAS1, %r3 + + li %r3, (TLB_SIZE_64M << MAS1_TSIZE_SHIFT)@l + oris %r3, %r3, (MAS1_VALID | MAS1_IPROT)@h + mtspr SPR_MAS1, %r3 /* note TS was not filled, so it's TS=0 */ isync - lwz %r3, 4(%r5) + + lis %r3, KERNBASE@h + ori %r3, %r3, KERNBASE@l /* EPN = KERNBASE */ + ori %r3, %r3, (_TLB_ENTRY_SHARED | MAS2_M)@l /* WIMGE = 0b00100 */ mtspr SPR_MAS2, %r3 isync - lwz %r3, 8(%r5) + + /* Retrieve kernel load [physical] address from bp_kernload */ + bl 4f + .long bp_kernload + .long __boot_page +4: mflr %r3 + lwz %r4, 0(%r3) + lwz %r5, 4(%r3) + rlwinm %r3, %r3, 0, 0, 19 + sub %r4, %r4, %r5 /* offset of bp_kernload within __boot_page */ + lwzx %r3, %r4, %r3 + + /* Set RPN and protection */ + ori %r3, %r3, (MAS3_SX | MAS3_SW | MAS3_SR)@l mtspr SPR_MAS3, %r3 isync tlbwe isync msync - addi %r5, %r5, 12 - addi %r4, %r4, 1 - cmpw %r4, %r6 - blt 4b /* Switch to the final mapping */ bl 5f - .long __boot_page-. -5: mflr %r5 - lwz %r3,0(%r3) - add %r5,%r5,%r3 /* __boot_page in r5 */ - bl 6f -6: mflr %r3 +5: mflr %r3 rlwinm %r3, %r3, 0, 0xfff /* Offset from boot page start */ add %r3, %r3, %r5 /* Make this virtual address */ addi %r3, %r3, 32 @@ -449,6 +444,7 @@ bp_tlb1_end: 1: mflr %r1 lwz %r2,0(%r1) add %r1,%r1,%r2 + stw %r1, 0(%r1) addi %r1, %r1, (TMPSTACKSZ - 16) /* @@ -479,6 +475,7 @@ bp_tlb1_end: 6: b 6b #endif /* SMP */ +#if defined (BOOKE_E500) /* * Invalidate all entries in the given TLB. * @@ -508,7 +505,7 @@ tlb1_find_current: isync tlbsx 0, %r3 mfspr %r17, SPR_MAS0 - rlwinm %r29, %r17, 16, 20, 31 /* MAS0[ESEL] -> r29 */ + rlwinm %r29, %r17, 16, 26, 31 /* MAS0[ESEL] -> r29 */ /* Make sure we have IPROT set on the entry */ mfspr %r17, SPR_MAS1 @@ -541,14 +538,11 @@ tlb1_inval_entry: blr /* - * r3 entry of temp translation - * r29 entry of current translation - * r28 returns temp entry passed in r3 - * r4-r5 scratched + * r29 current entry number + * r28 returned temp entry + * r3-r5 scratched */ tlb1_temp_mapping_as1: - mr %r28, %r3 - /* Read our current translation */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ rlwimi %r3, %r29, 16, 10, 15 /* Select our current entry */ @@ -556,8 +550,14 @@ tlb1_temp_mapping_as1: isync tlbre - /* Prepare and write temp entry */ + /* + * Prepare and write temp entry + * + * FIXME this is not robust against overflow i.e. when the current + * entry is the last in TLB1 + */ lis %r3, MAS0_TLBSEL1@h /* Select TLB1 */ + addi %r28, %r29, 1 /* Use next entry. */ rlwimi %r3, %r28, 16, 10, 15 /* Select temp entry */ mtspr SPR_MAS0, %r3 isync @@ -640,8 +640,19 @@ zero_mas8: mtspr SPR_MAS8, %r20 isync blr +#endif #ifdef SMP +.globl __boot_tlb1 + /* + * The __boot_tlb1 table is used to hold BSP TLB1 entries + * marked with _TLB_ENTRY_SHARED flag during AP bootstrap. + * The BSP fills in the table in tlb_ap_prep() function. Next, + * AP loads its contents to TLB1 hardware in pmap_bootstrap_ap(). + */ +__boot_tlb1: + .space TLB1_MAX_ENTRIES * TLB_ENTRY_SIZE + __boot_page_padding: /* * Boot page needs to be exactly 4K, with the last word of this page @@ -779,14 +790,8 @@ ENTRY(dataloss_erratum_access) mtspr SPR_L1CSR1, %r11 isync - mflr %r9 - bl 1f - .long 2f-. -1: - mflr %r5 - lwz %r8, 0(%r5) - mtlr %r9 - add %r8, %r8, %r5 + lis %r8, 2f@h + ori %r8, %r8, 2f@l icbtls 0, 0, %r8 addi %r9, %r8, 64 diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index 42eb631936c2..50cd78ad7bda 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -110,10 +110,6 @@ extern unsigned char _end[]; extern uint32_t *bootinfo; -#ifdef SMP -extern uint32_t bp_ntlb1s; -#endif - vm_paddr_t kernload; vm_offset_t kernstart; vm_size_t kernsize; @@ -187,11 +183,6 @@ uint32_t tlb1_entries; #define TLB1_ENTRIES (tlb1_entries) #define TLB1_MAXENTRIES 64 -/* In-ram copy of the TLB1 */ -static tlb_entry_t tlb1[TLB1_MAXENTRIES]; - -/* Next free entry in the TLB1 */ -static unsigned int tlb1_idx; static vm_offset_t tlb1_map_base = VM_MAXUSER_ADDRESS + PAGE_SIZE; static tlbtid_t tid_alloc(struct pmap *); @@ -199,7 +190,8 @@ static void tid_flush(tlbtid_t tid); static void tlb_print_entry(int, uint32_t, uint32_t, uint32_t, uint32_t); -static void tlb1_write_entry(unsigned int); +static void tlb1_read_entry(tlb_entry_t *, unsigned int); +static void tlb1_write_entry(tlb_entry_t *, unsigned int); static int tlb1_iomapped(int, vm_paddr_t, vm_size_t, vm_offset_t *); static vm_size_t tlb1_mapin_region(vm_offset_t, vm_paddr_t, vm_size_t); @@ -271,6 +263,7 @@ static vm_offset_t ptbl_buf_pool_vabase; static struct ptbl_buf *ptbl_bufs; #ifdef SMP +extern tlb_entry_t __boot_tlb1[]; void pmap_bootstrap_ap(volatile uint32_t *); #endif @@ -1369,6 +1362,22 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) } #ifdef SMP + void +tlb1_ap_prep(void) +{ + tlb_entry_t *e, tmp; + unsigned int i; + + /* Prepare TLB1 image for AP processors */ + e = __boot_tlb1; + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&tmp, i); + + if ((tmp.mas1 & MAS1_VALID) && (tmp.mas2 & _TLB_ENTRY_SHARED)) + memcpy(e++, &tmp, sizeof(tmp)); + } +} + void pmap_bootstrap_ap(volatile uint32_t *trcp __unused) { @@ -1376,15 +1385,15 @@ pmap_bootstrap_ap(volatile uint32_t *trcp __unused) /* * Finish TLB1 configuration: the BSP already set up its TLB1 and we - * have the snapshot of its contents in the s/w tlb1[] table, so use - * these values directly to (re)program AP's TLB1 hardware. + * have the snapshot of its contents in the s/w __boot_tlb1[] table + * created by tlb1_ap_prep(), so use these values directly to + * (re)program AP's TLB1 hardware. + * + * Start at index 1 because index 0 has the kernel map. */ - for (i = bp_ntlb1s; i < tlb1_idx; i++) { - /* Skip invalid entries */ - if (!(tlb1[i].mas1 & MAS1_VALID)) - continue; - - tlb1_write_entry(i); + for (i = 1; i < TLB1_ENTRIES; i++) { + if (__boot_tlb1[i].mas1 & MAS1_VALID) + tlb1_write_entry(&__boot_tlb1[i], i); } set_mas4_defaults(); @@ -1429,14 +1438,16 @@ mmu_booke_extract(mmu_t mmu, pmap_t pmap, vm_offset_t va) static vm_paddr_t mmu_booke_kextract(mmu_t mmu, vm_offset_t va) { + tlb_entry_t e; int i; /* Check TLB1 mappings */ - for (i = 0; i < tlb1_idx; i++) { - if (!(tlb1[i].mas1 & MAS1_VALID)) + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) continue; - if (va >= tlb1[i].virt && va < tlb1[i].virt + tlb1[i].size) - return (tlb1[i].phys + (va - tlb1[i].virt)); + if (va >= e.virt && va < e.virt + e.size) + return (e.phys + (va - e.virt)); } return (pte_vatopa(mmu, kernel_pmap, va)); @@ -2652,7 +2663,7 @@ mmu_booke_dev_direct_mapped(mmu_t mmu, vm_paddr_t pa, vm_size_t size) * This currently does not work for entries that * overlap TLB1 entries. */ - for (i = 0; i < tlb1_idx; i ++) { + for (i = 0; i < TLB1_ENTRIES; i ++) { if (tlb1_iomapped(i, pa, size, &va) == 0) return (0); } @@ -2692,28 +2703,36 @@ mmu_booke_dumpsys_unmap(mmu_t mmu, vm_paddr_t pa, size_t sz, void *va) vm_paddr_t ppa; vm_offset_t ofs; vm_size_t gran; + tlb_entry_t e; + int i; /* Minidumps are based on virtual memory addresses. */ /* Nothing to do... */ if (do_minidump) return; + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) + break; + } + /* Raw physical memory dumps don't have a virtual address. */ - tlb1_idx--; - tlb1[tlb1_idx].mas1 = 0; - tlb1[tlb1_idx].mas2 = 0; - tlb1[tlb1_idx].mas3 = 0; - tlb1_write_entry(tlb1_idx); + i--; + e.mas1 = 0; + e.mas2 = 0; + e.mas3 = 0; + tlb1_write_entry(&e, i); gran = 256 * 1024 * 1024; ppa = pa & ~(gran - 1); ofs = pa - ppa; if (sz > (gran - ofs)) { - tlb1_idx--; - tlb1[tlb1_idx].mas1 = 0; - tlb1[tlb1_idx].mas2 = 0; - tlb1[tlb1_idx].mas3 = 0; - tlb1_write_entry(tlb1_idx); + i--; + e.mas1 = 0; + e.mas2 = 0; + e.mas3 = 0; + tlb1_write_entry(&e, i); } } @@ -2796,6 +2815,7 @@ mmu_booke_mapdev(mmu_t mmu, vm_paddr_t pa, vm_size_t size) static void * mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) { + tlb_entry_t e; void *res; uintptr_t va, tmpva; vm_size_t sz; @@ -2807,13 +2827,14 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) * requirement, but now only checks the easy case. */ if (ma == VM_MEMATTR_DEFAULT) { - for (i = 0; i < tlb1_idx; i++) { - if (!(tlb1[i].mas1 & MAS1_VALID)) + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) continue; - if (pa >= tlb1[i].phys && - (pa + size) <= (tlb1[i].phys + tlb1[i].size)) - return (void *)(tlb1[i].virt + - (vm_offset_t)(pa - tlb1[i].phys)); + if (pa >= e.phys && + (pa + size) <= (e.phys + e.size)) + return (void *)(e.virt + + (vm_offset_t)(pa - e.phys)); } } @@ -2846,9 +2867,10 @@ mmu_booke_mapdev_attr(mmu_t mmu, vm_paddr_t pa, vm_size_t size, vm_memattr_t ma) } while (va % sz != 0); } if (bootverbose) - printf("Wiring VA=%x to PA=%jx (size=%x), " - "using TLB1[%d]\n", va, (uintmax_t)pa, sz, tlb1_idx); - tlb1_set_entry(va, pa, sz, tlb_calc_wimg(pa, ma)); + printf("Wiring VA=%x to PA=%jx (size=%x)\n", + va, (uintmax_t)pa, sz); + tlb1_set_entry(va, pa, sz, + _TLB_ENTRY_SHARED | tlb_calc_wimg(pa, ma)); size -= sz; pa += sz; va += sz; @@ -2912,30 +2934,34 @@ mmu_booke_change_attr(mmu_t mmu, vm_offset_t addr, vm_size_t sz, vm_offset_t va; pte_t *pte; int i, j; + tlb_entry_t e; /* Check TLB1 mappings */ - for (i = 0; i < tlb1_idx; i++) { - if (!(tlb1[i].mas1 & MAS1_VALID)) + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) continue; - if (addr >= tlb1[i].virt && addr < tlb1[i].virt + tlb1[i].size) + if (addr >= e.virt && addr < e.virt + e.size) break; } - if (i < tlb1_idx) { + if (i < TLB1_ENTRIES) { /* Only allow full mappings to be modified for now. */ /* Validate the range. */ - for (j = i, va = addr; va < addr + sz; va += tlb1[j].size, j++) { - if (va != tlb1[j].virt || (sz - (va - addr) < tlb1[j].size)) + for (j = i, va = addr; va < addr + sz; va += e.size, j++) { + tlb1_read_entry(&e, j); + if (va != e.virt || (sz - (va - addr) < e.size)) return (EINVAL); } - for (va = addr; va < addr + sz; va += tlb1[i].size, i++) { - tlb1[i].mas2 &= ~MAS2_WIMGE_MASK; - tlb1[i].mas2 |= tlb_calc_wimg(tlb1[i].phys, mode); + for (va = addr; va < addr + sz; va += e.size, i++) { + tlb1_read_entry(&e, i); + e.mas2 &= ~MAS2_WIMGE_MASK; + e.mas2 |= tlb_calc_wimg(e.phys, mode); /* * Write it out to the TLB. Should really re-sync with other * cores. */ - tlb1_write_entry(i); + tlb1_write_entry(&e, i); } return (0); } @@ -3118,12 +3144,48 @@ tlb0_print_tlbentries(void) * windows, other devices mappings. */ + /* + * Read an entry from given TLB1 slot. + */ +void +tlb1_read_entry(tlb_entry_t *entry, unsigned int slot) +{ + uint32_t mas0; + + KASSERT((entry != NULL), ("%s(): Entry is NULL!", __func__)); + + mas0 = MAS0_TLBSEL(1) | MAS0_ESEL(slot); + mtspr(SPR_MAS0, mas0); + __asm __volatile("isync; tlbre"); + + entry->mas1 = mfspr(SPR_MAS1); + entry->mas2 = mfspr(SPR_MAS2); + entry->mas3 = mfspr(SPR_MAS3); + + switch ((mfpvr() >> 16) & 0xFFFF) { + case FSL_E500v2: + case FSL_E500mc: + case FSL_E5500: + entry->mas7 = mfspr(SPR_MAS7); + break; + default: + entry->mas7 = 0; + break; + } + + entry->virt = entry->mas2 & MAS2_EPN_MASK; + entry->phys = ((vm_paddr_t)(entry->mas7 & MAS7_RPN) << 32) | + (entry->mas3 & MAS3_RPN); + entry->size = + tsize2size((entry->mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT); +} + /* * Write given entry to TLB1 hardware. * Use 32 bit pa, clear 4 high-order bits of RPN (mas7). */ static void -tlb1_write_entry(unsigned int idx) +tlb1_write_entry(tlb_entry_t *e, unsigned int idx) { uint32_t mas0; @@ -3135,11 +3197,11 @@ tlb1_write_entry(unsigned int idx) mtspr(SPR_MAS0, mas0); __asm __volatile("isync"); - mtspr(SPR_MAS1, tlb1[idx].mas1); + mtspr(SPR_MAS1, e->mas1); __asm __volatile("isync"); - mtspr(SPR_MAS2, tlb1[idx].mas2); + mtspr(SPR_MAS2, e->mas2); __asm __volatile("isync"); - mtspr(SPR_MAS3, tlb1[idx].mas3); + mtspr(SPR_MAS3, e->mas3); __asm __volatile("isync"); switch ((mfpvr() >> 16) & 0xFFFF) { case FSL_E500mc: @@ -3148,7 +3210,7 @@ tlb1_write_entry(unsigned int idx) __asm __volatile("isync"); /* FALLTHROUGH */ case FSL_E500v2: - mtspr(SPR_MAS7, tlb1[idx].mas7); + mtspr(SPR_MAS7, e->mas7); __asm __volatile("isync"); break; default: @@ -3207,10 +3269,21 @@ int tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, uint32_t flags) { + tlb_entry_t e; uint32_t ts, tid; int tsize, index; - index = atomic_fetchadd_int(&tlb1_idx, 1); + for (index = 0; index < TLB1_ENTRIES; index++) { + tlb1_read_entry(&e, index); + if ((e.mas1 & MAS1_VALID) == 0) + break; + /* Check if we're just updating the flags, and update them. */ + if (e.phys == pa && e.virt == va && e.size == size) { + e.mas2 = (va & MAS2_EPN_MASK) | flags; + tlb1_write_entry(&e, index); + return (0); + } + } if (index >= TLB1_ENTRIES) { printf("tlb1_set_entry: TLB1 full!\n"); return (-1); @@ -3223,23 +3296,18 @@ tlb1_set_entry(vm_offset_t va, vm_paddr_t pa, vm_size_t size, /* XXX TS is hard coded to 0 for now as we only use single address space */ ts = (0 << MAS1_TS_SHIFT) & MAS1_TS_MASK; - /* - * Atomicity is preserved by the atomic increment above since nothing - * is ever removed from tlb1. - */ - - tlb1[index].phys = pa; - tlb1[index].virt = va; - tlb1[index].size = size; - tlb1[index].mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; - tlb1[index].mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); - tlb1[index].mas2 = (va & MAS2_EPN_MASK) | flags; + e.phys = pa; + e.virt = va; + e.size = size; + e.mas1 = MAS1_VALID | MAS1_IPROT | ts | tid; + e.mas1 |= ((tsize << MAS1_TSIZE_SHIFT) & MAS1_TSIZE_MASK); + e.mas2 = (va & MAS2_EPN_MASK) | flags; /* Set supervisor RWX permission bits */ - tlb1[index].mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; - tlb1[index].mas7 = (pa >> 32) & MAS7_RPN; + e.mas3 = (pa & MAS3_RPN) | MAS3_SR | MAS3_SW | MAS3_SX; + e.mas7 = (pa >> 32) & MAS7_RPN; - tlb1_write_entry(index); + tlb1_write_entry(&e, index); /* * XXX in general TLB1 updates should be propagated between CPUs, @@ -3302,7 +3370,8 @@ tlb1_mapin_region(vm_offset_t va, vm_paddr_t pa, vm_size_t size) for (idx = 0; idx < nents; idx++) { pgsz = pgs[idx]; debugf("%u: %llx -> %x, size=%x\n", idx, pa, va, pgsz); - tlb1_set_entry(va, pa, pgsz, _TLB_ENTRY_MEM); + tlb1_set_entry(va, pa, pgsz, + _TLB_ENTRY_SHARED | _TLB_ENTRY_MEM); pa += pgsz; va += pgsz; } @@ -3326,9 +3395,6 @@ tlb1_init() { uint32_t mas0, mas1, mas2, mas3, mas7; uint32_t tsz; - int i; - - tlb1_idx = 1; tlb1_get_tlbconf(); @@ -3341,27 +3407,11 @@ tlb1_init() mas3 = mfspr(SPR_MAS3); mas7 = mfspr(SPR_MAS7); - tlb1[0].mas1 = mas1; - tlb1[0].mas2 = mfspr(SPR_MAS2); - tlb1[0].mas3 = mas3; - tlb1[0].mas7 = mas7; - tlb1[0].virt = mas2 & MAS2_EPN_MASK; - tlb1[0].phys = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | + kernload = ((vm_paddr_t)(mas7 & MAS7_RPN) << 32) | (mas3 & MAS3_RPN); - kernload = tlb1[0].phys; - tsz = (mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; - tlb1[0].size = (tsz > 0) ? tsize2size(tsz) : 0; - kernsize += tlb1[0].size; - -#ifdef SMP - bp_ntlb1s = tlb1_idx; -#endif - - /* Purge the remaining entries */ - for (i = tlb1_idx; i < TLB1_ENTRIES; i++) - tlb1_write_entry(i); + kernsize += (tsz > 0) ? tsize2size(tsz) : 0; /* Setup TLB miss defaults */ set_mas4_defaults(); @@ -3373,15 +3423,17 @@ pmap_early_io_map(vm_paddr_t pa, vm_size_t size) vm_paddr_t pa_base; vm_offset_t va, sz; int i; + tlb_entry_t e; KASSERT(!pmap_bootstrapped, ("Do not use after PMAP is up!")); - for (i = 0; i < tlb1_idx; i++) { - if (!(tlb1[i].mas1 & MAS1_VALID)) + for (i = 0; i < TLB1_ENTRIES; i++) { + tlb1_read_entry(&e, i); + if (!(e.mas1 & MAS1_VALID)) continue; - if (pa >= tlb1[i].phys && (pa + size) <= - (tlb1[i].phys + tlb1[i].size)) - return (tlb1[i].virt + (pa - tlb1[i].phys)); + if (pa >= e.phys && (pa + size) <= + (e.phys + e.size)) + return (e.virt + (pa - e.phys)); } pa_base = rounddown(pa, PAGE_SIZE); @@ -3391,16 +3443,13 @@ pmap_early_io_map(vm_paddr_t pa, vm_size_t size) do { sz = 1 << (ilog2(size) & ~1); - tlb1_set_entry(tlb1_map_base, pa_base, sz, _TLB_ENTRY_IO); + tlb1_set_entry(tlb1_map_base, pa_base, sz, + _TLB_ENTRY_SHARED | _TLB_ENTRY_IO); size -= sz; pa_base += sz; tlb1_map_base += sz; } while (size > 0); -#ifdef SMP - bp_ntlb1s = tlb1_idx; -#endif - return (va); } @@ -3449,20 +3498,6 @@ tlb1_print_tlbentries(void) } } -/* - * Print out contents of the in-ram tlb1 table. - */ -void -tlb1_print_entries(void) -{ - int i; - - debugf("tlb1[] table entries:\n"); - for (i = 0; i < TLB1_ENTRIES; i++) - tlb_print_entry(i, tlb1[i].mas1, tlb1[i].mas2, tlb1[i].mas3, - tlb1[i].mas7); -} - /* * Return 0 if the physical IO range is encompassed by one of the * the TLB1 entries, otherwise return related error code. @@ -3475,39 +3510,41 @@ tlb1_iomapped(int i, vm_paddr_t pa, vm_size_t size, vm_offset_t *va) vm_paddr_t pa_end; unsigned int entry_tsize; vm_size_t entry_size; + tlb_entry_t e; *va = (vm_offset_t)NULL; + tlb1_read_entry(&e, i); /* Skip invalid entries */ - if (!(tlb1[i].mas1 & MAS1_VALID)) + if (!(e.mas1 & MAS1_VALID)) return (EINVAL); /* * The entry must be cache-inhibited, guarded, and r/w * so it can function as an i/o page */ - prot = tlb1[i].mas2 & (MAS2_I | MAS2_G); + prot = e.mas2 & (MAS2_I | MAS2_G); if (prot != (MAS2_I | MAS2_G)) return (EPERM); - prot = tlb1[i].mas3 & (MAS3_SR | MAS3_SW); + prot = e.mas3 & (MAS3_SR | MAS3_SW); if (prot != (MAS3_SR | MAS3_SW)) return (EPERM); /* The address should be within the entry range. */ - entry_tsize = (tlb1[i].mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; + entry_tsize = (e.mas1 & MAS1_TSIZE_MASK) >> MAS1_TSIZE_SHIFT; KASSERT((entry_tsize), ("tlb1_iomapped: invalid entry tsize")); entry_size = tsize2size(entry_tsize); - pa_start = (((vm_paddr_t)tlb1[i].mas7 & MAS7_RPN) << 32) | - (tlb1[i].mas3 & MAS3_RPN); + pa_start = (((vm_paddr_t)e.mas7 & MAS7_RPN) << 32) | + (e.mas3 & MAS3_RPN); pa_end = pa_start + entry_size; if ((pa < pa_start) || ((pa + size) > pa_end)) return (ERANGE); /* Return virtual address of this mapping. */ - *va = (tlb1[i].mas2 & MAS2_EPN_MASK) + (pa - pa_start); + *va = (e.mas2 & MAS2_EPN_MASK) + (pa - pa_start); return (0); } diff --git a/sys/powerpc/include/tlb.h b/sys/powerpc/include/tlb.h index 65293bff02a4..83bcf5c17930 100644 --- a/sys/powerpc/include/tlb.h +++ b/sys/powerpc/include/tlb.h @@ -74,7 +74,7 @@ #define MAS2_M 0x00000004 #define MAS2_G 0x00000002 #define MAS2_E 0x00000001 -#define MAS2_WIMGE_MASK 0x0000001F +#define MAS2_WIMGE_MASK 0x0000007F #define MAS3_RPN 0xFFFFF000 #define MAS3_RPN_SHIFT 12 @@ -120,9 +120,17 @@ */ #define KERNEL_REGION_MAX_TLB_ENTRIES 4 +/* + * Use MAS2_X0 to mark entries which will be copied + * to AP CPUs during SMP bootstrap. As result entries + * marked with _TLB_ENTRY_SHARED will be shared by all CPUs. + */ +#define _TLB_ENTRY_SHARED (MAS2_X0) /* XXX under SMP? */ #define _TLB_ENTRY_IO (MAS2_I | MAS2_G) #define _TLB_ENTRY_MEM (MAS2_M) +#define TLB1_MAX_ENTRIES 64 + #if !defined(LOCORE) typedef struct tlb_entry { vm_paddr_t phys; @@ -211,6 +219,7 @@ struct pmap; void tlb_lock(uint32_t *); void tlb_unlock(uint32_t *); +void tlb1_ap_prep(void); int tlb1_set_entry(vm_offset_t, vm_paddr_t, vm_size_t, uint32_t); #endif /* !LOCORE */ diff --git a/sys/powerpc/mpc85xx/platform_mpc85xx.c b/sys/powerpc/mpc85xx/platform_mpc85xx.c index 3a36e6dbbe77..aec141202448 100644 --- a/sys/powerpc/mpc85xx/platform_mpc85xx.c +++ b/sys/powerpc/mpc85xx/platform_mpc85xx.c @@ -62,9 +62,7 @@ __FBSDID("$FreeBSD$"); extern void *ap_pcpu; extern vm_paddr_t kernload; /* Kernel physical load address */ extern uint8_t __boot_page[]; /* Boot page body */ -extern uint32_t bp_ntlb1s; -extern uint32_t bp_tlb1[]; -extern uint32_t bp_tlb1_end[]; +extern uint32_t bp_kernload; #endif extern uint32_t *bootinfo; @@ -321,10 +319,9 @@ static int mpc85xx_smp_start_cpu(platform_t plat, struct pcpu *pc) { #ifdef SMP - uint32_t *tlb1; vm_paddr_t bptr; uint32_t reg; - int i, timeout; + int timeout; uintptr_t brr; int cpuid; @@ -344,6 +341,7 @@ mpc85xx_smp_start_cpu(platform_t plat, struct pcpu *pc) brr = OCP85XX_EEBPCR; cpuid = pc->pc_cpuid + 24; #endif + bp_kernload = kernload; reg = ccsr_read4(brr); if ((reg & (1 << cpuid)) != 0) { printf("SMP: CPU %d already out of hold-off state!\n", @@ -354,20 +352,6 @@ mpc85xx_smp_start_cpu(platform_t plat, struct pcpu *pc) ap_pcpu = pc; __asm __volatile("msync; isync"); - i = 0; - tlb1 = bp_tlb1; - while (i < bp_ntlb1s && tlb1 < bp_tlb1_end) { - mtspr(SPR_MAS0, MAS0_TLBSEL(1) | MAS0_ESEL(i)); - __asm __volatile("isync; tlbre"); - tlb1[0] = mfspr(SPR_MAS1); - tlb1[1] = mfspr(SPR_MAS2); - tlb1[2] = mfspr(SPR_MAS3); - i++; - tlb1 += 3; - } - if (i < bp_ntlb1s) - bp_ntlb1s = i; - /* Flush caches to have our changes hit DRAM. */ cpu_flush_dcache(__boot_page, 4096); diff --git a/sys/powerpc/powerpc/genassym.c b/sys/powerpc/powerpc/genassym.c index 1a8cb560d187..2200eea47d81 100644 --- a/sys/powerpc/powerpc/genassym.c +++ b/sys/powerpc/powerpc/genassym.c @@ -125,7 +125,7 @@ ASSYM(PM_PDIR, offsetof(struct pmap, pm_pdir)); ASSYM(PTE_RPN, 0); ASSYM(PTE_FLAGS, sizeof(uint32_t)); #if defined(BOOKE_E500) -ASSYM(TLB0_ENTRY_SIZE, sizeof(struct tlb_entry)); +ASSYM(TLB_ENTRY_SIZE, sizeof(struct tlb_entry)); #endif #endif diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 2f20b71a3b76..e219683f8b86 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -212,6 +212,9 @@ cpu_mp_unleash(void *dummy) cpus = 0; smp_cpus = 0; +#ifdef BOOKE + tlb1_ap_prep(); +#endif STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; if (!pc->pc_bsp) {