From a745246822b524c3527fd87eb683cfd8d0d75b58 Mon Sep 17 00:00:00 2001 From: Justin Hibbits Date: Sat, 18 Apr 2015 21:39:17 +0000 Subject: [PATCH] Implement hwpmc(4) for Freescale e500 core. This supports e500v1, e500v2, and e500mc. Tested only on e500v2, but the performance counters are identical across all, with e500mc having some additional events. Relnotes: Yes --- lib/libpmc/libpmc.c | 24 +- sys/conf/files.powerpc | 3 +- sys/dev/hwpmc/hwpmc_e500.c | 660 +++++++++++++++++++++++++++++++++ sys/dev/hwpmc/hwpmc_mpc7xxx.c | 4 +- sys/dev/hwpmc/hwpmc_powerpc.c | 5 + sys/dev/hwpmc/hwpmc_powerpc.h | 1 + sys/dev/hwpmc/hwpmc_ppc970.c | 4 +- sys/dev/hwpmc/pmc_events.h | 167 +++++++++ sys/modules/hwpmc/Makefile | 2 +- sys/powerpc/booke/interrupt.c | 25 ++ sys/powerpc/booke/locore.S | 2 + sys/powerpc/booke/machdep.c | 7 + sys/powerpc/booke/trap_subr.S | 13 + sys/powerpc/include/pmc_mdep.h | 58 ++- 14 files changed, 964 insertions(+), 11 deletions(-) create mode 100644 sys/dev/hwpmc/hwpmc_e500.c diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c index 7d429ebd297c..2ad226855246 100644 --- a/lib/libpmc/libpmc.c +++ b/lib/libpmc/libpmc.c @@ -164,6 +164,7 @@ PMC_CLASSDEP_TABLE(octeon, OCTEON); PMC_CLASSDEP_TABLE(ucf, UCF); PMC_CLASSDEP_TABLE(ppc7450, PPC7450); PMC_CLASSDEP_TABLE(ppc970, PPC970); +PMC_CLASSDEP_TABLE(e500, E500); static struct pmc_event_descr soft_event_table[PMC_EV_DYN_COUNT]; @@ -296,8 +297,9 @@ PMC_MDEP_TABLE(armv7, ARMV7, PMC_CLASS_SOFT, PMC_CLASS_ARMV7); PMC_MDEP_TABLE(mips24k, MIPS24K, PMC_CLASS_SOFT, PMC_CLASS_MIPS24K); PMC_MDEP_TABLE(mips74k, MIPS74K, PMC_CLASS_SOFT, PMC_CLASS_MIPS74K); PMC_MDEP_TABLE(octeon, OCTEON, PMC_CLASS_SOFT, PMC_CLASS_OCTEON); -PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450); -PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970); +PMC_MDEP_TABLE(ppc7450, PPC7450, PMC_CLASS_SOFT, PMC_CLASS_PPC7450, PMC_CLASS_TSC); +PMC_MDEP_TABLE(ppc970, PPC970, PMC_CLASS_SOFT, PMC_CLASS_PPC970, PMC_CLASS_TSC); +PMC_MDEP_TABLE(e500, E500, PMC_CLASS_SOFT, PMC_CLASS_E500, PMC_CLASS_TSC); PMC_MDEP_TABLE(generic, SOFT, PMC_CLASS_SOFT); static const struct pmc_event_descr tsc_event_table[] = @@ -368,6 +370,7 @@ PMC_CLASS_TABLE_DESC(octeon, OCTEON, octeon, mips); #if defined(__powerpc__) PMC_CLASS_TABLE_DESC(ppc7450, PPC7450, ppc7450, powerpc); PMC_CLASS_TABLE_DESC(ppc970, PPC970, ppc970, powerpc); +PMC_CLASS_TABLE_DESC(e500, E500, e500, powerpc); #endif static struct pmc_class_descr soft_class_table_descr = @@ -2494,6 +2497,12 @@ static struct pmc_event_alias ppc970_aliases[] = { EV_ALIAS(NULL, NULL) }; +static struct pmc_event_alias e500_aliases[] = { + EV_ALIAS("instructions", "INSTR_COMPLETED"), + EV_ALIAS("cycles", "CYCLES"), + EV_ALIAS(NULL, NULL) +}; + #define POWERPC_KW_OS "os" #define POWERPC_KW_USR "usr" #define POWERPC_KW_ANYTHREAD "anythread" @@ -2949,6 +2958,10 @@ pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames, ev = ppc970_event_table; count = PMC_EVENT_TABLE_SIZE(ppc970); break; + case PMC_CLASS_E500: + ev = e500_event_table; + count = PMC_EVENT_TABLE_SIZE(e500); + break; case PMC_CLASS_SOFT: ev = soft_event_table; count = soft_event_info.pm_nevent; @@ -3245,6 +3258,10 @@ pmc_init(void) PMC_MDEP_INIT(ppc970); pmc_class_table[n] = &ppc970_class_table_descr; break; + case PMC_CPU_PPC_E500: + PMC_MDEP_INIT(e500); + pmc_class_table[n] = &e500_class_table_descr; + break; #endif default: /* @@ -3444,6 +3461,9 @@ _pmc_name_of_event(enum pmc_event pe, enum pmc_cputype cpu) } else if (pe >= PMC_EV_PPC970_FIRST && pe <= PMC_EV_PPC970_LAST) { ev = ppc970_event_table; evfence = ppc970_event_table + PMC_EVENT_TABLE_SIZE(ppc970); + } else if (pe >= PMC_EV_E500_FIRST && pe <= PMC_EV_E500_LAST) { + ev = e500_event_table; + evfence = e500_event_table + PMC_EVENT_TABLE_SIZE(e500); } else if (pe == PMC_EV_TSC_TSC) { ev = tsc_event_table; evfence = tsc_event_table + PMC_EVENT_TABLE_SIZE(tsc); diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc index bd20385dae4d..ab585b067f83 100644 --- a/sys/conf/files.powerpc +++ b/sys/conf/files.powerpc @@ -31,8 +31,9 @@ dev/fb/fb.c optional sc dev/fdt/fdt_powerpc.c optional fdt # ofwbus depends on simplebus. dev/fdt/simplebus.c optional aim | fdt -dev/hwpmc/hwpmc_powerpc.c optional hwpmc +dev/hwpmc/hwpmc_e500.c optional hwpmc dev/hwpmc/hwpmc_mpc7xxx.c optional hwpmc +dev/hwpmc/hwpmc_powerpc.c optional hwpmc dev/hwpmc/hwpmc_ppc970.c optional hwpmc dev/iicbus/ad7417.c optional ad7417 powermac dev/iicbus/adm1030.c optional powermac windtunnel | adm1030 powermac diff --git a/sys/dev/hwpmc/hwpmc_e500.c b/sys/dev/hwpmc/hwpmc_e500.c new file mode 100644 index 000000000000..d81b33354ee6 --- /dev/null +++ b/sys/dev/hwpmc/hwpmc_e500.c @@ -0,0 +1,660 @@ +/*- + * Copyright (c) 2015 Justin Hibbits + * Copyright (c) 2005, Joseph Koshy + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include + +#include +#include + +#include + +#include "hwpmc_powerpc.h" + +#define POWERPC_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \ + PMC_CAP_SYSTEM | PMC_CAP_EDGE | \ + PMC_CAP_THRESHOLD | PMC_CAP_READ | \ + PMC_CAP_WRITE | PMC_CAP_INVERT | \ + PMC_CAP_QUALIFIER) + +#define E500_PMC_HAS_OVERFLOWED(x) (e500_pmcn_read(x) & (0x1 << 31)) + +struct e500_event_code_map { + enum pmc_event pe_ev; /* enum value */ + uint8_t pe_counter_mask; /* Which counter this can be counted in. */ + uint8_t pe_code; /* numeric code */ + uint8_t pe_cpu; /* e500 core (v1,v2,mc), mask */ +}; + +#define E500_MAX_PMCS 4 +#define PMC_PPC_MASK0 0 +#define PMC_PPC_MASK1 1 +#define PMC_PPC_MASK2 2 +#define PMC_PPC_MASK3 3 +#define PMC_PPC_MASK_ALL 0x0f +#define PMC_PPC_E500V1 1 +#define PMC_PPC_E500V2 2 +#define PMC_PPC_E500MC 4 +#define PMC_PPC_E500_ANY 7 +#define PMC_E500_EVENT(id, mask, number, core) \ + [PMC_EV_E500_##id - PMC_EV_E500_FIRST] = \ + { .pe_ev = PMC_EV_E500_##id, .pe_counter_mask = mask, \ + .pe_code = number, .pe_cpu = core } +#define PMC_E500MC_ONLY(id, number) \ + PMC_E500_EVENT(id, PMC_PPC_MASK_ALL, number, PMC_PPC_E500MC) +#define PMC_E500_COMMON(id, number) \ + PMC_E500_EVENT(id, PMC_PPC_MASK_ALL, number, PMC_PPC_E500_ANY) + +static struct e500_event_code_map e500_event_codes[] = { + PMC_E500_COMMON(CYCLES, 1), + PMC_E500_COMMON(INSTR_COMPLETED, 2), + PMC_E500_COMMON(UOPS_COMPLETED, 3), + PMC_E500_COMMON(INSTR_FETCHED, 4), + PMC_E500_COMMON(UOPS_DECODED, 5), + PMC_E500_COMMON(PM_EVENT_TRANSITIONS, 6), + PMC_E500_COMMON(PM_EVENT_CYCLES, 7), + PMC_E500_COMMON(BRANCH_INSTRS_COMPLETED, 8), + PMC_E500_COMMON(LOAD_UOPS_COMPLETED, 9), + PMC_E500_COMMON(STORE_UOPS_COMPLETED, 10), + PMC_E500_COMMON(CQ_REDIRECTS, 11), + PMC_E500_COMMON(BRANCHES_FINISHED, 12), + PMC_E500_COMMON(TAKEN_BRANCHES_FINISHED, 13), + PMC_E500_COMMON(FINISHED_UNCOND_BRANCHES_MISS_BTB, 14), + PMC_E500_COMMON(BRANCH_MISPRED, 15), + PMC_E500_COMMON(BTB_BRANCH_MISPRED_FROM_DIRECTION, 16), + PMC_E500_COMMON(BTB_HITS_PSEUDO_HITS, 17), + PMC_E500_COMMON(CYCLES_DECODE_STALLED, 18), + PMC_E500_COMMON(CYCLES_ISSUE_STALLED, 19), + PMC_E500_COMMON(CYCLES_BRANCH_ISSUE_STALLED, 20), + PMC_E500_COMMON(CYCLES_SU1_SCHED_STALLED, 21), + PMC_E500_COMMON(CYCLES_SU2_SCHED_STALLED, 22), + PMC_E500_COMMON(CYCLES_MU_SCHED_STALLED, 23), + PMC_E500_COMMON(CYCLES_LRU_SCHED_STALLED, 24), + PMC_E500_COMMON(CYCLES_BU_SCHED_STALLED, 25), + PMC_E500_COMMON(TOTAL_TRANSLATED, 26), + PMC_E500_COMMON(LOADS_TRANSLATED, 27), + PMC_E500_COMMON(STORES_TRANSLATED, 28), + PMC_E500_COMMON(TOUCHES_TRANSLATED, 29), + PMC_E500_COMMON(CACHEOPS_TRANSLATED, 30), + PMC_E500_COMMON(CACHE_INHIBITED_ACCESS_TRANSLATED, 31), + PMC_E500_COMMON(GUARDED_LOADS_TRANSLATED, 32), + PMC_E500_COMMON(WRITE_THROUGH_STORES_TRANSLATED, 33), + PMC_E500_COMMON(MISALIGNED_LOAD_STORE_ACCESS_TRANSLATED, 34), + PMC_E500_COMMON(TOTAL_ALLOCATED_TO_DLFB, 35), + PMC_E500_COMMON(LOADS_TRANSLATED_ALLOCATED_TO_DLFB, 36), + PMC_E500_COMMON(STORES_COMPLETED_ALLOCATED_TO_DLFB, 37), + PMC_E500_COMMON(TOUCHES_TRANSLATED_ALLOCATED_TO_DLFB, 38), + PMC_E500_COMMON(STORES_COMPLETED, 39), + PMC_E500_COMMON(DATA_L1_CACHE_LOCKS, 40), + PMC_E500_COMMON(DATA_L1_CACHE_RELOADS, 41), + PMC_E500_COMMON(DATA_L1_CACHE_CASTOUTS, 42), + PMC_E500_COMMON(LOAD_MISS_DLFB_FULL, 43), + PMC_E500_COMMON(LOAD_MISS_LDQ_FULL, 44), + PMC_E500_COMMON(LOAD_GUARDED_MISS, 45), + PMC_E500_COMMON(STORE_TRANSLATE_WHEN_QUEUE_FULL, 46), + PMC_E500_COMMON(ADDRESS_COLLISION, 47), + PMC_E500_COMMON(DATA_MMU_MISS, 48), + PMC_E500_COMMON(DATA_MMU_BUSY, 49), + PMC_E500_COMMON(PART2_MISALIGNED_CACHE_ACCESS, 50), + PMC_E500_COMMON(LOAD_MISS_DLFB_FULL_CYCLES, 51), + PMC_E500_COMMON(LOAD_MISS_LDQ_FULL_CYCLES, 52), + PMC_E500_COMMON(LOAD_GUARDED_MISS_CYCLES, 53), + PMC_E500_COMMON(STORE_TRANSLATE_WHEN_QUEUE_FULL_CYCLES, 54), + PMC_E500_COMMON(ADDRESS_COLLISION_CYCLES, 55), + PMC_E500_COMMON(DATA_MMU_MISS_CYCLES, 56), + PMC_E500_COMMON(DATA_MMU_BUSY_CYCLES, 57), + PMC_E500_COMMON(PART2_MISALIGNED_CACHE_ACCESS_CYCLES, 58), + PMC_E500_COMMON(INSTR_L1_CACHE_LOCKS, 59), + PMC_E500_COMMON(INSTR_L1_CACHE_RELOADS, 60), + PMC_E500_COMMON(INSTR_L1_CACHE_FETCHES, 61), + PMC_E500_COMMON(INSTR_MMU_TLB4K_RELOADS, 62), + PMC_E500_COMMON(INSTR_MMU_VSP_RELOADS, 63), + PMC_E500_COMMON(DATA_MMU_TLB4K_RELOADS, 64), + PMC_E500_COMMON(DATA_MMU_VSP_RELOADS, 65), + PMC_E500_COMMON(L2MMU_MISSES, 66), + PMC_E500_COMMON(BIU_MASTER_REQUESTS, 67), + PMC_E500_COMMON(BIU_MASTER_INSTR_SIDE_REQUESTS, 68), + PMC_E500_COMMON(BIU_MASTER_DATA_SIDE_REQUESTS, 69), + PMC_E500_COMMON(BIU_MASTER_DATA_SIDE_CASTOUT_REQUESTS, 70), + PMC_E500_COMMON(BIU_MASTER_RETRIES, 71), + PMC_E500_COMMON(SNOOP_REQUESTS, 72), + PMC_E500_COMMON(SNOOP_HITS, 73), + PMC_E500_COMMON(SNOOP_PUSHES, 74), + PMC_E500_COMMON(SNOOP_RETRIES, 75), + PMC_E500_EVENT(DLFB_LOAD_MISS_CYCLES, PMC_PPC_MASK0|PMC_PPC_MASK1, + 76, PMC_PPC_E500_ANY), + PMC_E500_EVENT(ILFB_FETCH_MISS_CYCLES, PMC_PPC_MASK0|PMC_PPC_MASK1, + 77, PMC_PPC_E500_ANY), + PMC_E500_EVENT(EXT_INPU_INTR_LATENCY_CYCLES, PMC_PPC_MASK0|PMC_PPC_MASK1, + 78, PMC_PPC_E500_ANY), + PMC_E500_EVENT(CRIT_INPUT_INTR_LATENCY_CYCLES, PMC_PPC_MASK0|PMC_PPC_MASK1, + 79, PMC_PPC_E500_ANY), + PMC_E500_EVENT(EXT_INPUT_INTR_PENDING_LATENCY_CYCLES, + PMC_PPC_MASK0|PMC_PPC_MASK1, 80, PMC_PPC_E500_ANY), + PMC_E500_EVENT(CRIT_INPUT_INTR_PENDING_LATENCY_CYCLES, + PMC_PPC_MASK0|PMC_PPC_MASK1, 81, PMC_PPC_E500_ANY), + PMC_E500_COMMON(PMC0_OVERFLOW, 82), + PMC_E500_COMMON(PMC1_OVERFLOW, 83), + PMC_E500_COMMON(PMC2_OVERFLOW, 84), + PMC_E500_COMMON(PMC3_OVERFLOW, 85), + PMC_E500_COMMON(INTERRUPTS_TAKEN, 86), + PMC_E500_COMMON(EXT_INPUT_INTR_TAKEN, 87), + PMC_E500_COMMON(CRIT_INPUT_INTR_TAKEN, 88), + PMC_E500_COMMON(SYSCALL_TRAP_INTR, 89), + PMC_E500_EVENT(TLB_BIT_TRANSITIONS, PMC_PPC_MASK_ALL, 90, + PMC_PPC_E500V2 | PMC_PPC_E500MC), + PMC_E500MC_ONLY(L2_LINEFILL_BUFFER, 91), + PMC_E500MC_ONLY(LV2_VS, 92), + PMC_E500MC_ONLY(CASTOUTS_RELEASED, 93), + PMC_E500MC_ONLY(INTV_ALLOCATIONS, 94), + PMC_E500MC_ONLY(DLFB_RETRIES_TO_MBAR, 95), + PMC_E500MC_ONLY(STORE_RETRIES, 96), + PMC_E500MC_ONLY(STASH_L1_HITS, 97), + PMC_E500MC_ONLY(STASH_L2_HITS, 98), + PMC_E500MC_ONLY(STASH_BUSY_1, 99), + PMC_E500MC_ONLY(STASH_BUSY_2, 100), + PMC_E500MC_ONLY(STASH_BUSY_3, 101), + PMC_E500MC_ONLY(STASH_HITS, 102), + PMC_E500MC_ONLY(STASH_HIT_DLFB, 103), + PMC_E500MC_ONLY(STASH_REQUESTS, 106), + PMC_E500MC_ONLY(STASH_REQUESTS_L1, 107), + PMC_E500MC_ONLY(STASH_REQUESTS_L2, 108), + PMC_E500MC_ONLY(STALLS_NO_CAQ_OR_COB, 109), + PMC_E500MC_ONLY(L2_CACHE_ACCESSES, 110), + PMC_E500MC_ONLY(L2_HIT_CACHE_ACCESSES, 111), + PMC_E500MC_ONLY(L2_CACHE_DATA_ACCESSES, 112), + PMC_E500MC_ONLY(L2_CACHE_DATA_HITS, 113), + PMC_E500MC_ONLY(L2_CACHE_INSTR_ACCESSES, 114), + PMC_E500MC_ONLY(L2_CACHE_INSTR_HITS, 115), + PMC_E500MC_ONLY(L2_CACHE_ALLOCATIONS, 116), + PMC_E500MC_ONLY(L2_CACHE_DATA_ALLOCATIONS, 117), + PMC_E500MC_ONLY(L2_CACHE_DIRTY_DATA_ALLOCATIONS, 118), + PMC_E500MC_ONLY(L2_CACHE_INSTR_ALLOCATIONS, 119), + PMC_E500MC_ONLY(L2_CACHE_UPDATES, 120), + PMC_E500MC_ONLY(L2_CACHE_CLEAN_UPDATES, 121), + PMC_E500MC_ONLY(L2_CACHE_DIRTY_UPDATES, 122), + PMC_E500MC_ONLY(L2_CACHE_CLEAN_REDUNDANT_UPDATES, 123), + PMC_E500MC_ONLY(L2_CACHE_DIRTY_REDUNDANT_UPDATES, 124), + PMC_E500MC_ONLY(L2_CACHE_LOCKS, 125), + PMC_E500MC_ONLY(L2_CACHE_CASTOUTS, 126), + PMC_E500MC_ONLY(L2_CACHE_DATA_DIRTY_HITS, 127), + PMC_E500MC_ONLY(INSTR_LFB_WENT_HIGH_PRIORITY, 128), + PMC_E500MC_ONLY(SNOOP_THROTTLING_TURNED_ON, 129), + PMC_E500MC_ONLY(L2_CLEAN_LINE_INVALIDATIONS, 130), + PMC_E500MC_ONLY(L2_INCOHERENT_LINE_INVALIDATIONS, 131), + PMC_E500MC_ONLY(L2_COHERENT_LINE_INVALIDATIONS, 132), + PMC_E500MC_ONLY(COHERENT_LOOKUP_MISS_DUE_TO_VALID_BUT_INCOHERENT_MATCHES, 133), + PMC_E500MC_ONLY(IAC1S_DETECTED, 140), + PMC_E500MC_ONLY(IAC2S_DETECTED, 141), + PMC_E500MC_ONLY(DAC1S_DTECTED, 144), + PMC_E500MC_ONLY(DAC2S_DTECTED, 145), + PMC_E500MC_ONLY(DVT0_DETECTED, 148), + PMC_E500MC_ONLY(DVT1_DETECTED, 149), + PMC_E500MC_ONLY(DVT2_DETECTED, 150), + PMC_E500MC_ONLY(DVT3_DETECTED, 151), + PMC_E500MC_ONLY(DVT4_DETECTED, 152), + PMC_E500MC_ONLY(DVT5_DETECTED, 153), + PMC_E500MC_ONLY(DVT6_DETECTED, 154), + PMC_E500MC_ONLY(DVT7_DETECTED, 155), + PMC_E500MC_ONLY(CYCLES_COMPLETION_STALLED_NEXUS_FIFO_FULL, 156), + PMC_E500MC_ONLY(FPU_DOUBLE_PUMP, 160), + PMC_E500MC_ONLY(FPU_FINISH, 161), + PMC_E500MC_ONLY(FPU_DIVIDE_CYCLES, 162), + PMC_E500MC_ONLY(FPU_DENORM_INPUT_CYCLES, 163), + PMC_E500MC_ONLY(FPU_RESULT_STALL_CYCLES, 164), + PMC_E500MC_ONLY(FPU_FPSCR_FULL_STALL, 165), + PMC_E500MC_ONLY(FPU_PIPE_SYNC_STALLS, 166), + PMC_E500MC_ONLY(FPU_INPUT_DATA_STALLS, 167), + PMC_E500MC_ONLY(DECORATED_LOADS, 176), + PMC_E500MC_ONLY(DECORATED_STORES, 177), + PMC_E500MC_ONLY(LOAD_RETRIES, 178), + PMC_E500MC_ONLY(STWCX_SUCCESSES, 179), + PMC_E500MC_ONLY(STWCX_FAILURES, 180), +}; + +const size_t e500_event_codes_size = + sizeof(e500_event_codes) / sizeof(e500_event_codes[0]); + +static pmc_value_t +e500_pmcn_read(unsigned int pmc) +{ + switch (pmc) { + case 0: + return mfpmr(PMR_PMC0); + break; + case 1: + return mfpmr(PMR_PMC1); + break; + case 2: + return mfpmr(PMR_PMC2); + break; + case 3: + return mfpmr(PMR_PMC3); + break; + default: + panic("Invalid PMC number: %d\n", pmc); + } +} + +static void +e500_pmcn_write(unsigned int pmc, uint32_t val) +{ + switch (pmc) { + case 0: + mtpmr(PMR_PMC0, val); + break; + case 1: + mtpmr(PMR_PMC1, val); + break; + case 2: + mtpmr(PMR_PMC2, val); + break; + case 3: + mtpmr(PMR_PMC3, val); + break; + default: + panic("Invalid PMC number: %d\n", pmc); + } +} + +static int +e500_read_pmc(int cpu, int ri, pmc_value_t *v) +{ + struct pmc *pm; + pmc_value_t tmp; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < E500_MAX_PMCS, + ("[powerpc,%d] illegal row index %d", __LINE__, ri)); + + pm = powerpc_pcpu[cpu]->pc_ppcpmcs[ri].phw_pmc; + KASSERT(pm, + ("[core,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, + ri)); + + tmp = e500_pmcn_read(ri); + PMCDBG(MDP,REA,2,"ppc-read id=%d -> %jd", ri, tmp); + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + *v = POWERPC_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); + else + *v = tmp; + + return 0; +} + +static int +e500_write_pmc(int cpu, int ri, pmc_value_t v) +{ + struct pmc *pm; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < E500_MAX_PMCS, + ("[powerpc,%d] illegal row-index %d", __LINE__, ri)); + + pm = powerpc_pcpu[cpu]->pc_ppcpmcs[ri].phw_pmc; + + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + v = POWERPC_RELOAD_COUNT_TO_PERFCTR_VALUE(v); + + PMCDBG(MDP,WRI,1,"powerpc-write cpu=%d ri=%d v=%jx", cpu, ri, v); + + e500_pmcn_write(ri, v); + + return 0; +} + +static int +e500_config_pmc(int cpu, int ri, struct pmc *pm) +{ + struct pmc_hw *phw; + + PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm); + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < E500_MAX_PMCS, + ("[powerpc,%d] illegal row-index %d", __LINE__, ri)); + + phw = &powerpc_pcpu[cpu]->pc_ppcpmcs[ri]; + + KASSERT(pm == NULL || phw->phw_pmc == NULL, + ("[powerpc,%d] pm=%p phw->pm=%p hwpmc not unconfigured", + __LINE__, pm, phw->phw_pmc)); + + phw->phw_pmc = pm; + + return 0; +} + +static int +e500_start_pmc(int cpu, int ri) +{ + uint32_t config; + struct pmc *pm; + struct pmc_hw *phw; + + phw = &powerpc_pcpu[cpu]->pc_ppcpmcs[ri]; + pm = phw->phw_pmc; + config = pm->pm_md.pm_powerpc.pm_powerpc_evsel; + + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + config |= PMLCax_CE; + + /* Enable the PMC. */ + switch (ri) { + case 0: + mtpmr(PMR_PMLCa0, config); + break; + case 1: + mtpmr(PMR_PMLCa1, config); + break; + case 2: + mtpmr(PMR_PMLCa2, config); + break; + case 3: + mtpmr(PMR_PMLCa3, config); + break; + default: + break; + } + + return 0; +} + +static int +e500_stop_pmc(int cpu, int ri) +{ + struct pmc *pm; + struct pmc_hw *phw; + register_t pmc_pmlc; + + phw = &powerpc_pcpu[cpu]->pc_ppcpmcs[ri]; + pm = phw->phw_pmc; + + /* + * Disable the PMCs. + */ + switch (ri) { + case 0: + pmc_pmlc = mfpmr(PMR_PMLCa0); + pmc_pmlc |= PMLCax_FC; + mtpmr(PMR_PMLCa0, pmc_pmlc); + break; + case 1: + pmc_pmlc = mfpmr(PMR_PMLCa1); + pmc_pmlc |= PMLCax_FC; + mtpmr(PMR_PMLCa1, pmc_pmlc); + break; + case 2: + pmc_pmlc = mfpmr(PMR_PMLCa2); + pmc_pmlc |= PMLCax_FC; + mtpmr(PMR_PMLCa2, pmc_pmlc); + break; + case 3: + pmc_pmlc = mfpmr(PMR_PMLCa3); + pmc_pmlc |= PMLCax_FC; + mtpmr(PMR_PMLCa3, pmc_pmlc); + break; + default: + break; + } + return 0; +} + +static int +e500_pcpu_init(struct pmc_mdep *md, int cpu) +{ + int first_ri, i; + struct pmc_cpu *pc; + struct powerpc_cpu *pac; + struct pmc_hw *phw; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] wrong cpu number %d", __LINE__, cpu)); + PMCDBG(MDP,INI,1,"powerpc-init cpu=%d", cpu); + + /* Freeze all counters. */ + mtpmr(PMR_PMGC0, PMGC_FAC | PMGC_PMIE | PMGC_FCECE); + + powerpc_pcpu[cpu] = pac = malloc(sizeof(struct powerpc_cpu), M_PMC, + M_WAITOK|M_ZERO); + pac->pc_ppcpmcs = malloc(sizeof(struct pmc_hw) * E500_MAX_PMCS, + M_PMC, M_WAITOK|M_ZERO); + pac->pc_class = PMC_CLASS_E500; + pc = pmc_pcpu[cpu]; + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC].pcd_ri; + KASSERT(pc != NULL, ("[powerpc,%d] NULL per-cpu pointer", __LINE__)); + + for (i = 0, phw = pac->pc_ppcpmcs; i < E500_MAX_PMCS; i++, phw++) { + phw->phw_state = PMC_PHW_FLAG_IS_ENABLED | + PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(i); + phw->phw_pmc = NULL; + pc->pc_hwpmcs[i + first_ri] = phw; + + /* Initialize the PMC to stopped */ + e500_stop_pmc(cpu, i); + } + /* Unfreeze global register. */ + mtpmr(PMR_PMGC0, PMGC_PMIE | PMGC_FCECE); + + return 0; +} + +static int +e500_pcpu_fini(struct pmc_mdep *md, int cpu) +{ + uint32_t pmgc0 = mfpmr(PMR_PMGC0); + + pmgc0 |= PMGC_FAC; + mtpmr(PMR_PMGC0, pmgc0); + mtmsr(mfmsr() & ~PSL_PMM); + + free(powerpc_pcpu[cpu]->pc_ppcpmcs, M_PMC); + free(powerpc_pcpu[cpu], M_PMC); + + return 0; +} + +static int +e500_allocate_pmc(int cpu, int ri, struct pmc *pm, + const struct pmc_op_pmcallocate *a) +{ + enum pmc_event pe; + uint32_t caps, config, counter; + struct e500_event_code_map *ev; + uint16_t vers; + uint8_t pe_cpu_mask; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < E500_MAX_PMCS, + ("[powerpc,%d] illegal row index %d", __LINE__, ri)); + + caps = a->pm_caps; + + pe = a->pm_ev; + config = PMLCax_FCS | PMLCax_FCU | + PMLCax_FCM1 | PMLCax_FCM1; + if (pe < PMC_EV_E500_FIRST || pe > PMC_EV_E500_LAST) + return (EINVAL); + + ev = &e500_event_codes[pe-PMC_EV_E500_FIRST]; + if (ev->pe_code == 0) + return (EINVAL); + + vers = mfpvr() >> 16; + switch (vers) { + case FSL_E500v1: + pe_cpu_mask = ev->pe_code & PMC_PPC_E500V1; + break; + case FSL_E500v2: + pe_cpu_mask = ev->pe_code & PMC_PPC_E500V2; + break; + case FSL_E500mc: + pe_cpu_mask = ev->pe_code & PMC_PPC_E500MC; + break; + } + if (pe_cpu_mask == 0) + return (EINVAL); + + config |= PMLCax_EVENT(ev->pe_code); + counter = ev->pe_counter_mask; + if ((counter & (1 << ri)) == 0) + return (EINVAL); + + if (caps & PMC_CAP_SYSTEM) + config &= ~PMLCax_FCS; + if (caps & PMC_CAP_USER) + config &= ~PMLCax_FCU; + if ((caps & (PMC_CAP_USER | PMC_CAP_SYSTEM)) == 0) + config &= ~(PMLCax_FCS|PMLCax_FCU); + + pm->pm_md.pm_powerpc.pm_powerpc_evsel = config; + + PMCDBG(MDP,ALL,2,"powerpc-allocate ri=%d -> config=0x%x", ri, config); + + return 0; +} + +static int +e500_release_pmc(int cpu, int ri, struct pmc *pmc) +{ + struct pmc_hw *phw; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] illegal CPU value %d", __LINE__, cpu)); + KASSERT(ri >= 0 && ri < E500_MAX_PMCS, + ("[powerpc,%d] illegal row-index %d", __LINE__, ri)); + + phw = &powerpc_pcpu[cpu]->pc_ppcpmcs[ri]; + KASSERT(phw->phw_pmc == NULL, + ("[powerpc,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc)); + + return 0; +} + +static int +e500_intr(int cpu, struct trapframe *tf) +{ + int i, error, retval; + uint32_t config; + struct pmc *pm; + struct powerpc_cpu *pac; + + KASSERT(cpu >= 0 && cpu < pmc_cpu_max(), + ("[powerpc,%d] out of range CPU %d", __LINE__, cpu)); + + PMCDBG(MDP,INT,1, "cpu=%d tf=%p um=%d", cpu, (void *) tf, + TRAPF_USERMODE(tf)); + + retval = 0; + + pac = powerpc_pcpu[cpu]; + + config = mfpmr(PMR_PMGC0) & ~PMGC_FAC; + + /* + * look for all PMCs that have interrupted: + * - look for a running, sampling PMC which has overflowed + * and which has a valid 'struct pmc' association + * + * If found, we call a helper to process the interrupt. + */ + + for (i = 0; i < E500_MAX_PMCS; i++) { + if ((pm = pac->pc_ppcpmcs[i].phw_pmc) == NULL || + !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) { + continue; + } + + if (!E500_PMC_HAS_OVERFLOWED(i)) + continue; + + retval = 1; /* Found an interrupting PMC. */ + + if (pm->pm_state != PMC_STATE_RUNNING) + continue; + + /* Stop the counter if logging fails. */ + error = pmc_process_interrupt(cpu, PMC_HR, pm, tf, + TRAPF_USERMODE(tf)); + if (error != 0) + e500_stop_pmc(cpu, i); + + /* reload count. */ + e500_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount); + } + + atomic_add_int(retval ? &pmc_stats.pm_intr_processed : + &pmc_stats.pm_intr_ignored, 1); + + /* Re-enable PERF exceptions. */ + if (retval) + mtpmr(PMR_PMGC0, config | PMGC_PMIE); + + return (retval); +} + +int +pmc_e500_initialize(struct pmc_mdep *pmc_mdep) +{ + struct pmc_classdep *pcd; + + pmc_mdep->pmd_cputype = PMC_CPU_PPC_E500; + + pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC]; + pcd->pcd_caps = POWERPC_PMC_CAPS; + pcd->pcd_class = PMC_CLASS_E500; + pcd->pcd_num = E500_MAX_PMCS; + pcd->pcd_ri = pmc_mdep->pmd_npmc; + pcd->pcd_width = 32; + + pcd->pcd_allocate_pmc = e500_allocate_pmc; + pcd->pcd_config_pmc = e500_config_pmc; + pcd->pcd_pcpu_fini = e500_pcpu_fini; + pcd->pcd_pcpu_init = e500_pcpu_init; + pcd->pcd_describe = powerpc_describe; + pcd->pcd_get_config = powerpc_get_config; + pcd->pcd_read_pmc = e500_read_pmc; + pcd->pcd_release_pmc = e500_release_pmc; + pcd->pcd_start_pmc = e500_start_pmc; + pcd->pcd_stop_pmc = e500_stop_pmc; + pcd->pcd_write_pmc = e500_write_pmc; + + pmc_mdep->pmd_npmc += E500_MAX_PMCS; + pmc_mdep->pmd_intr = e500_intr; + + return (0); +} diff --git a/sys/dev/hwpmc/hwpmc_mpc7xxx.c b/sys/dev/hwpmc/hwpmc_mpc7xxx.c index 171523b80b50..eaa440e2fbc5 100644 --- a/sys/dev/hwpmc/hwpmc_mpc7xxx.c +++ b/sys/dev/hwpmc/hwpmc_mpc7xxx.c @@ -567,7 +567,7 @@ mpc7xxx_pcpu_init(struct pmc_mdep *md, int cpu) M_PMC, M_WAITOK|M_ZERO); pac->pc_class = PMC_CLASS_PPC7450; pc = pmc_pcpu[cpu]; - first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PPC7450].pcd_ri; + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC].pcd_ri; KASSERT(pc != NULL, ("[powerpc,%d] NULL per-cpu pointer", __LINE__)); for (i = 0, phw = pac->pc_ppcpmcs; i < MPC7XXX_MAX_PMCS; i++, phw++) { @@ -729,7 +729,7 @@ pmc_mpc7xxx_initialize(struct pmc_mdep *pmc_mdep) pmc_mdep->pmd_cputype = PMC_CPU_PPC_7450; - pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_PPC7450]; + pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC]; pcd->pcd_caps = POWERPC_PMC_CAPS; pcd->pcd_class = PMC_CLASS_PPC7450; pcd->pcd_num = MPC7XXX_MAX_PMCS; diff --git a/sys/dev/hwpmc/hwpmc_powerpc.c b/sys/dev/hwpmc/hwpmc_powerpc.c index ad251d3a42b3..889a5d009ffb 100644 --- a/sys/dev/hwpmc/hwpmc_powerpc.c +++ b/sys/dev/hwpmc/hwpmc_powerpc.c @@ -175,6 +175,11 @@ pmc_md_initialize() case IBM970MP: error = pmc_ppc970_initialize(pmc_mdep); break; + case FSL_E500v1: + case FSL_E500v2: + case FSL_E500mc: + error = pmc_e500_initialize(pmc_mdep); + break; default: error = -1; break; diff --git a/sys/dev/hwpmc/hwpmc_powerpc.h b/sys/dev/hwpmc/hwpmc_powerpc.h index 8f0b8ce1a077..66eaa21d4a86 100644 --- a/sys/dev/hwpmc/hwpmc_powerpc.h +++ b/sys/dev/hwpmc/hwpmc_powerpc.h @@ -51,6 +51,7 @@ struct powerpc_cpu { extern struct powerpc_cpu **powerpc_pcpu; +extern int pmc_e500_initialize(struct pmc_mdep *pmc_mdep); extern int pmc_mpc7xxx_initialize(struct pmc_mdep *pmc_mdep); extern int pmc_ppc970_initialize(struct pmc_mdep *pmc_mdep); diff --git a/sys/dev/hwpmc/hwpmc_ppc970.c b/sys/dev/hwpmc/hwpmc_ppc970.c index c6e8f4c7b6d8..f477be310858 100644 --- a/sys/dev/hwpmc/hwpmc_ppc970.c +++ b/sys/dev/hwpmc/hwpmc_ppc970.c @@ -555,7 +555,7 @@ ppc970_pcpu_init(struct pmc_mdep *md, int cpu) pac->pc_class = PMC_CLASS_PPC970; pc = pmc_pcpu[cpu]; - first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_PPC970].pcd_ri; + first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC].pcd_ri; KASSERT(pc != NULL, ("[powerpc,%d] NULL per-cpu pointer", __LINE__)); for (i = 0, phw = pac->pc_ppcpmcs; i < PPC970_MAX_PMCS; i++, phw++) { @@ -661,7 +661,7 @@ pmc_ppc970_initialize(struct pmc_mdep *pmc_mdep) pmc_mdep->pmd_cputype = PMC_CPU_PPC_970; - pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_PPC970]; + pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_POWERPC]; pcd->pcd_caps = POWERPC_PMC_CAPS; pcd->pcd_class = PMC_CLASS_PPC970; pcd->pcd_num = PPC970_MAX_PMCS; diff --git a/sys/dev/hwpmc/pmc_events.h b/sys/dev/hwpmc/pmc_events.h index 83182bfbea15..7f878cb9cc20 100644 --- a/sys/dev/hwpmc/pmc_events.h +++ b/sys/dev/hwpmc/pmc_events.h @@ -5379,6 +5379,168 @@ __PMC_EV_ALIAS("IMPC_C0H_TRK_REQUEST.ALL", UCP_EVENT_84H_01H) #define PMC_EV_PPC970_FIRST PMC_EV_PPC970_INSTR_COMPLETED #define PMC_EV_PPC970_LAST PMC_EV_PPC970_ADDER + +#define __PMC_EV_E500() \ + __PMC_EV(E500, CYCLES) \ + __PMC_EV(E500, INSTR_COMPLETED) \ + __PMC_EV(E500, UOPS_COMPLETED) \ + __PMC_EV(E500, INSTR_FETCHED) \ + __PMC_EV(E500, UOPS_DECODED) \ + __PMC_EV(E500, PM_EVENT_TRANSITIONS) \ + __PMC_EV(E500, PM_EVENT_CYCLES) \ + __PMC_EV(E500, BRANCH_INSTRS_COMPLETED) \ + __PMC_EV(E500, LOAD_UOPS_COMPLETED) \ + __PMC_EV(E500, STORE_UOPS_COMPLETED) \ + __PMC_EV(E500, CQ_REDIRECTS) \ + __PMC_EV(E500, BRANCHES_FINISHED) \ + __PMC_EV(E500, TAKEN_BRANCHES_FINISHED) \ + __PMC_EV(E500, FINISHED_UNCOND_BRANCHES_MISS_BTB) \ + __PMC_EV(E500, BRANCH_MISPRED) \ + __PMC_EV(E500, BTB_BRANCH_MISPRED_FROM_DIRECTION) \ + __PMC_EV(E500, BTB_HITS_PSEUDO_HITS) \ + __PMC_EV(E500, CYCLES_DECODE_STALLED) \ + __PMC_EV(E500, CYCLES_ISSUE_STALLED) \ + __PMC_EV(E500, CYCLES_BRANCH_ISSUE_STALLED) \ + __PMC_EV(E500, CYCLES_SU1_SCHED_STALLED) \ + __PMC_EV(E500, CYCLES_SU2_SCHED_STALLED) \ + __PMC_EV(E500, CYCLES_MU_SCHED_STALLED) \ + __PMC_EV(E500, CYCLES_LRU_SCHED_STALLED) \ + __PMC_EV(E500, CYCLES_BU_SCHED_STALLED) \ + __PMC_EV(E500, TOTAL_TRANSLATED) \ + __PMC_EV(E500, LOADS_TRANSLATED) \ + __PMC_EV(E500, STORES_TRANSLATED) \ + __PMC_EV(E500, TOUCHES_TRANSLATED) \ + __PMC_EV(E500, CACHEOPS_TRANSLATED) \ + __PMC_EV(E500, CACHE_INHIBITED_ACCESS_TRANSLATED) \ + __PMC_EV(E500, GUARDED_LOADS_TRANSLATED) \ + __PMC_EV(E500, WRITE_THROUGH_STORES_TRANSLATED) \ + __PMC_EV(E500, MISALIGNED_LOAD_STORE_ACCESS_TRANSLATED) \ + __PMC_EV(E500, TOTAL_ALLOCATED_TO_DLFB) \ + __PMC_EV(E500, LOADS_TRANSLATED_ALLOCATED_TO_DLFB) \ + __PMC_EV(E500, STORES_COMPLETED_ALLOCATED_TO_DLFB) \ + __PMC_EV(E500, TOUCHES_TRANSLATED_ALLOCATED_TO_DLFB) \ + __PMC_EV(E500, STORES_COMPLETED) \ + __PMC_EV(E500, DATA_L1_CACHE_LOCKS) \ + __PMC_EV(E500, DATA_L1_CACHE_RELOADS) \ + __PMC_EV(E500, DATA_L1_CACHE_CASTOUTS) \ + __PMC_EV(E500, LOAD_MISS_DLFB_FULL) \ + __PMC_EV(E500, LOAD_MISS_LDQ_FULL) \ + __PMC_EV(E500, LOAD_GUARDED_MISS) \ + __PMC_EV(E500, STORE_TRANSLATE_WHEN_QUEUE_FULL) \ + __PMC_EV(E500, ADDRESS_COLLISION) \ + __PMC_EV(E500, DATA_MMU_MISS) \ + __PMC_EV(E500, DATA_MMU_BUSY) \ + __PMC_EV(E500, PART2_MISALIGNED_CACHE_ACCESS) \ + __PMC_EV(E500, LOAD_MISS_DLFB_FULL_CYCLES) \ + __PMC_EV(E500, LOAD_MISS_LDQ_FULL_CYCLES) \ + __PMC_EV(E500, LOAD_GUARDED_MISS_CYCLES) \ + __PMC_EV(E500, STORE_TRANSLATE_WHEN_QUEUE_FULL_CYCLES) \ + __PMC_EV(E500, ADDRESS_COLLISION_CYCLES) \ + __PMC_EV(E500, DATA_MMU_MISS_CYCLES) \ + __PMC_EV(E500, DATA_MMU_BUSY_CYCLES) \ + __PMC_EV(E500, PART2_MISALIGNED_CACHE_ACCESS_CYCLES) \ + __PMC_EV(E500, INSTR_L1_CACHE_LOCKS) \ + __PMC_EV(E500, INSTR_L1_CACHE_RELOADS) \ + __PMC_EV(E500, INSTR_L1_CACHE_FETCHES) \ + __PMC_EV(E500, INSTR_MMU_TLB4K_RELOADS) \ + __PMC_EV(E500, INSTR_MMU_VSP_RELOADS) \ + __PMC_EV(E500, DATA_MMU_TLB4K_RELOADS) \ + __PMC_EV(E500, DATA_MMU_VSP_RELOADS) \ + __PMC_EV(E500, L2MMU_MISSES) \ + __PMC_EV(E500, BIU_MASTER_REQUESTS) \ + __PMC_EV(E500, BIU_MASTER_INSTR_SIDE_REQUESTS) \ + __PMC_EV(E500, BIU_MASTER_DATA_SIDE_REQUESTS) \ + __PMC_EV(E500, BIU_MASTER_DATA_SIDE_CASTOUT_REQUESTS) \ + __PMC_EV(E500, BIU_MASTER_RETRIES) \ + __PMC_EV(E500, SNOOP_REQUESTS) \ + __PMC_EV(E500, SNOOP_HITS) \ + __PMC_EV(E500, SNOOP_PUSHES) \ + __PMC_EV(E500, SNOOP_RETRIES) \ + __PMC_EV(E500, DLFB_LOAD_MISS_CYCLES) \ + __PMC_EV(E500, ILFB_FETCH_MISS_CYCLES) \ + __PMC_EV(E500, EXT_INPU_INTR_LATENCY_CYCLES) \ + __PMC_EV(E500, CRIT_INPUT_INTR_LATENCY_CYCLES) \ + __PMC_EV(E500, EXT_INPUT_INTR_PENDING_LATENCY_CYCLES) \ + __PMC_EV(E500, CRIT_INPUT_INTR_PENDING_LATENCY_CYCLES) \ + __PMC_EV(E500, PMC0_OVERFLOW) \ + __PMC_EV(E500, PMC1_OVERFLOW) \ + __PMC_EV(E500, PMC2_OVERFLOW) \ + __PMC_EV(E500, PMC3_OVERFLOW) \ + __PMC_EV(E500, INTERRUPTS_TAKEN) \ + __PMC_EV(E500, EXT_INPUT_INTR_TAKEN) \ + __PMC_EV(E500, CRIT_INPUT_INTR_TAKEN) \ + __PMC_EV(E500, SYSCALL_TRAP_INTR) \ + __PMC_EV(E500, TLB_BIT_TRANSITIONS) \ + __PMC_EV(E500, L2_LINEFILL_BUFFER) \ + __PMC_EV(E500, LV2_VS) \ + __PMC_EV(E500, CASTOUTS_RELEASED) \ + __PMC_EV(E500, INTV_ALLOCATIONS) \ + __PMC_EV(E500, DLFB_RETRIES_TO_MBAR) \ + __PMC_EV(E500, STORE_RETRIES) \ + __PMC_EV(E500, STASH_L1_HITS) \ + __PMC_EV(E500, STASH_L2_HITS) \ + __PMC_EV(E500, STASH_BUSY_1) \ + __PMC_EV(E500, STASH_BUSY_2) \ + __PMC_EV(E500, STASH_BUSY_3) \ + __PMC_EV(E500, STASH_HITS) \ + __PMC_EV(E500, STASH_HIT_DLFB) \ + __PMC_EV(E500, STASH_REQUESTS) \ + __PMC_EV(E500, STASH_REQUESTS_L1) \ + __PMC_EV(E500, STASH_REQUESTS_L2) \ + __PMC_EV(E500, STALLS_NO_CAQ_OR_COB) \ + __PMC_EV(E500, L2_CACHE_ACCESSES) \ + __PMC_EV(E500, L2_HIT_CACHE_ACCESSES) \ + __PMC_EV(E500, L2_CACHE_DATA_ACCESSES) \ + __PMC_EV(E500, L2_CACHE_DATA_HITS) \ + __PMC_EV(E500, L2_CACHE_INSTR_ACCESSES) \ + __PMC_EV(E500, L2_CACHE_INSTR_HITS) \ + __PMC_EV(E500, L2_CACHE_ALLOCATIONS) \ + __PMC_EV(E500, L2_CACHE_DATA_ALLOCATIONS) \ + __PMC_EV(E500, L2_CACHE_DIRTY_DATA_ALLOCATIONS) \ + __PMC_EV(E500, L2_CACHE_INSTR_ALLOCATIONS) \ + __PMC_EV(E500, L2_CACHE_UPDATES) \ + __PMC_EV(E500, L2_CACHE_CLEAN_UPDATES) \ + __PMC_EV(E500, L2_CACHE_DIRTY_UPDATES) \ + __PMC_EV(E500, L2_CACHE_CLEAN_REDUNDANT_UPDATES) \ + __PMC_EV(E500, L2_CACHE_DIRTY_REDUNDANT_UPDATES) \ + __PMC_EV(E500, L2_CACHE_LOCKS) \ + __PMC_EV(E500, L2_CACHE_CASTOUTS) \ + __PMC_EV(E500, L2_CACHE_DATA_DIRTY_HITS) \ + __PMC_EV(E500, INSTR_LFB_WENT_HIGH_PRIORITY) \ + __PMC_EV(E500, SNOOP_THROTTLING_TURNED_ON) \ + __PMC_EV(E500, L2_CLEAN_LINE_INVALIDATIONS) \ + __PMC_EV(E500, L2_INCOHERENT_LINE_INVALIDATIONS) \ + __PMC_EV(E500, L2_COHERENT_LINE_INVALIDATIONS) \ + __PMC_EV(E500, COHERENT_LOOKUP_MISS_DUE_TO_VALID_BUT_INCOHERENT_MATCHES) \ + __PMC_EV(E500, IAC1S_DETECTED) \ + __PMC_EV(E500, IAC2S_DETECTED) \ + __PMC_EV(E500, DAC1S_DTECTED) \ + __PMC_EV(E500, DAC2S_DTECTED) \ + __PMC_EV(E500, DVT0_DETECTED) \ + __PMC_EV(E500, DVT1_DETECTED) \ + __PMC_EV(E500, DVT2_DETECTED) \ + __PMC_EV(E500, DVT3_DETECTED) \ + __PMC_EV(E500, DVT4_DETECTED) \ + __PMC_EV(E500, DVT5_DETECTED) \ + __PMC_EV(E500, DVT6_DETECTED) \ + __PMC_EV(E500, DVT7_DETECTED) \ + __PMC_EV(E500, CYCLES_COMPLETION_STALLED_NEXUS_FIFO_FULL) \ + __PMC_EV(E500, FPU_DOUBLE_PUMP) \ + __PMC_EV(E500, FPU_FINISH) \ + __PMC_EV(E500, FPU_DIVIDE_CYCLES) \ + __PMC_EV(E500, FPU_DENORM_INPUT_CYCLES) \ + __PMC_EV(E500, FPU_RESULT_STALL_CYCLES) \ + __PMC_EV(E500, FPU_FPSCR_FULL_STALL) \ + __PMC_EV(E500, FPU_PIPE_SYNC_STALLS) \ + __PMC_EV(E500, FPU_INPUT_DATA_STALLS) \ + __PMC_EV(E500, DECORATED_LOADS) \ + __PMC_EV(E500, DECORATED_STORES) \ + __PMC_EV(E500, LOAD_RETRIES) \ + __PMC_EV(E500, STWCX_SUCCESSES) \ + __PMC_EV(E500, STWCX_FAILURES) \ + +#define PMC_EV_E500_FIRST PMC_EV_E500_CYCLES +#define PMC_EV_E500_LAST PMC_EV_E500_STWCX_FAILURES /* * All known PMC events. * @@ -5400,6 +5562,9 @@ __PMC_EV_ALIAS("IMPC_C0H_TRK_REQUEST.ALL", UCP_EVENT_84H_01H) * 0x11300 0x00FF MIPS 24K events * 0x11400 0x00FF Octeon events * 0x11500 0x00FF MIPS 74K events + * 0x13000 0x00FF MPC7450 events + * 0x13100 0x00FF IBM PPC970 events + * 0x13300 0x00FF Freescale e500 events * 0x14000 0x0100 ARMv7 events * 0x20000 0x1000 Software events */ @@ -5436,6 +5601,8 @@ __PMC_EV_ALIAS("IMPC_C0H_TRK_REQUEST.ALL", UCP_EVENT_84H_01H) __PMC_EV_PPC7450() \ __PMC_EV_BLOCK(PPC970, 0x13100) \ __PMC_EV_PPC970() \ + __PMC_EV_BLOCK(E500, 0x13300) \ + __PMC_EV_E500() \ __PMC_EV_BLOCK(ARMV7, 0x14000) \ __PMC_EV_ARMV7() \ diff --git a/sys/modules/hwpmc/Makefile b/sys/modules/hwpmc/Makefile index c9c60fcf8585..cfae7b6a4c7b 100644 --- a/sys/modules/hwpmc/Makefile +++ b/sys/modules/hwpmc/Makefile @@ -25,7 +25,7 @@ SRCS+= device_if.h bus_if.h .endif .if ${MACHINE_CPUARCH} == "powerpc" -SRCS+= hwpmc_powerpc.c hwpmc_mpc7xxx.c hwpmc_ppc970.c +SRCS+= hwpmc_powerpc.c hwpmc_e500.c hwpmc_mpc7xxx.c hwpmc_ppc970.c .endif .if ${MACHINE_CPUARCH} == "sparc64" diff --git a/sys/powerpc/booke/interrupt.c b/sys/powerpc/booke/interrupt.c index b5f61401cf61..3786c689dff4 100644 --- a/sys/powerpc/booke/interrupt.c +++ b/sys/powerpc/booke/interrupt.c @@ -32,6 +32,8 @@ * Interrupts are dispatched to here from locore asm */ +#include "opt_hwpmc_hooks.h" + #include /* RCS ID & Copyright macro defns */ __FBSDID("$FreeBSD$"); @@ -45,6 +47,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef HWPMC_HOOKS +#include +#endif #include #include #include @@ -67,6 +72,9 @@ void powerpc_decr_interrupt(struct trapframe *); void powerpc_extr_interrupt(struct trapframe *); void powerpc_crit_interrupt(struct trapframe *); void powerpc_mchk_interrupt(struct trapframe *); +#ifdef HWPMC_HOOKS +void powerpc_pmc_interrupt(struct trapframe *framep); +#endif static void dump_frame(struct trapframe *framep); @@ -142,3 +150,20 @@ powerpc_extr_interrupt(struct trapframe *framep) critical_exit(); framep->srr1 &= ~PSL_WE; } + +#ifdef HWPMC_HOOKS +/* + * Performance Counter interrupt routine + */ +void +powerpc_pmc_interrupt(struct trapframe *framep) +{ + + critical_enter(); + KASSERT(pmc_intr != NULL, ("Performance exception, but no handler!")); + (*pmc_intr)(PCPU_GET(cpuid), framep); + critical_exit(); + if (pmc_hook && (PCPU_GET(curthread)->td_pflags & TDP_CALLCHAIN)) + pmc_hook(PCPU_GET(curthread), PMC_FN_USER_CALLCHAIN, framep); +} +#endif diff --git a/sys/powerpc/booke/locore.S b/sys/powerpc/booke/locore.S index 34f0b6f06b3d..330a61f70551 100644 --- a/sys/powerpc/booke/locore.S +++ b/sys/powerpc/booke/locore.S @@ -28,6 +28,8 @@ #include "assym.s" +#include "opt_hwpmc_hooks.h" + #include #include #include diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c index f3ef9e3631ff..943063cfdb44 100644 --- a/sys/powerpc/booke/machdep.c +++ b/sys/powerpc/booke/machdep.c @@ -83,6 +83,7 @@ __FBSDID("$FreeBSD$"); #include "opt_compat.h" #include "opt_ddb.h" +#include "opt_hwpmc_hooks.h" #include "opt_kstack_pages.h" #include "opt_platform.h" @@ -208,6 +209,9 @@ extern void *int_watchdog; extern void *int_data_tlb_error; extern void *int_inst_tlb_error; extern void *int_debug; +#ifdef HWPMC_HOOKS +extern void *int_performance_counter; +#endif #define SET_TRAP(ivor, handler) \ KASSERT(((uintptr_t)(&handler) & ~0xffffUL) == \ @@ -235,6 +239,9 @@ ivor_setup(void) SET_TRAP(SPR_IVOR13, int_data_tlb_error); SET_TRAP(SPR_IVOR14, int_inst_tlb_error); SET_TRAP(SPR_IVOR15, int_debug); +#ifdef HWPMC_HOOKS + SET_TRAP(SPR_IVOR35, int_performance_counter); +#endif } static void diff --git a/sys/powerpc/booke/trap_subr.S b/sys/powerpc/booke/trap_subr.S index bc6bad3083cb..c585f1426458 100644 --- a/sys/powerpc/booke/trap_subr.S +++ b/sys/powerpc/booke/trap_subr.S @@ -497,6 +497,19 @@ INTERRUPT(int_watchdog) b trap_common +#ifdef HWPMC_HOOKS +/***************************************************************************** + * PMC Interrupt + ****************************************************************************/ +INTERRUPT(int_performance_counter) + STANDARD_PROLOG(SPR_SPRG3, PC_TEMPSAVE, SPR_SRR0, SPR_SRR1) + FRAME_SETUP(SPR_SPRG3, PC_TEMPSAVE, EXC_PERF) + addi %r3, %r1, 8 + bl CNAME(powerpc_pmc_interrupt) + b trapexit +#endif + + /***************************************************************************** * Data TLB miss interrupt * diff --git a/sys/powerpc/include/pmc_mdep.h b/sys/powerpc/include/pmc_mdep.h index 87dc76c6f8c7..eb74b56fe5f1 100644 --- a/sys/powerpc/include/pmc_mdep.h +++ b/sys/powerpc/include/pmc_mdep.h @@ -7,9 +7,7 @@ #ifndef _MACHINE_PMC_MDEP_H_ #define _MACHINE_PMC_MDEP_H_ -#define PMC_MDEP_CLASS_INDEX_CPU 1 -#define PMC_MDEP_CLASS_INDEX_PPC7450 1 -#define PMC_MDEP_CLASS_INDEX_PPC970 1 +#define PMC_MDEP_CLASS_INDEX_POWERPC 1 union pmc_md_op_pmcallocate { uint64_t __pad[4]; @@ -19,6 +17,60 @@ union pmc_md_op_pmcallocate { #define PMCLOG_READADDR PMCLOG_READ32 #define PMCLOG_EMITADDR PMCLOG_EMIT32 +#define mtpmr(reg, val) \ + __asm __volatile("mtpmr %0,%1" : : "K"(reg), "r"(val)) +#define mfpmr(reg) \ + ( { register_t val; \ + __asm __volatile("mfpmr %0,%1" : "=r"(val) : "K"(reg)); \ + val; } ) + +#define PMR_PMC0 16 +#define PMR_PMC1 17 +#define PMR_PMC2 18 +#define PMR_PMC3 19 +#define PMR_PMLCa0 144 +#define PMLCax_FC 0x80000000 +#define PMLCax_FCS 0x40000000 +#define PMLCax_FCU 0x20000000 +#define PMLCax_FCM1 0x10000000 +#define PMLCax_FCM0 0x08000000 +#define PMLCax_CE 0x04000000 +#define PMLCax_EVENT(x) ((x) << 16) +#define PMLCax_FCGS1 0x00000002 +#define PMLCax_FCGS0 0x00000001 +#define PMR_PMLCa1 145 +#define PMR_PMLCa2 146 +#define PMR_PMLCa3 147 +#define PMR_PMLCb0 272 +#define PMLCbx_TRIGONCTL(x) ((x) << 28) +#define PMLCbx_TRIGOFFCTL(x) ((x) << 24) +#define PMLCbx_PMCC 0x00800000 +#define PMLCbx_PMP(x) ((x) << 13) +#define PMLCbx_TREHMUL(x) ((x) << 8) +#define PMLCbx_TRESHOLD(x) ((x) << 0) +#define PMR_PMLCb1 273 +#define PMR_PMLCb2 274 +#define PMR_PMLCb3 275 +#define PMR_PMGC0 400 +#define PMGC_FAC 0x80000000 +#define PMGC_PMIE 0x40000000 +#define PMGC_FCECE 0x20000000 +#define PMGC_TBSEL(x) ((x) << 11) +#define PMGC_TBEE 0x00000100 +#define PMR_UPMC0 0 +#define PMR_UPMC1 1 +#define PMR_UPMC2 2 +#define PMR_UPMC3 3 +#define PMR_UPMLCa0 128 +#define PMR_UPMLCa1 129 +#define PMR_UPMLCa2 130 +#define PMR_UPMLCa3 131 +#define PMR_UPMLCb0 256 +#define PMR_UPMLCb1 257 +#define PMR_UPMLCb2 258 +#define PMR_UPMLCb3 259 +#define PMR_UPMGC0 384 + #if _KERNEL struct pmc_md_powerpc_pmc {