rpaulo f7db53a9d1 Intel XScale hwpmc(4) support.
This brings hwpmc(4) support for 2nd and 3rd generation XScale cores.
Right now it's enabled by default to make sure we test this a bit.
When the time comes it can be disabled by default.
Tested on Gateworks boards.

A man page is coming.

Obtained from:	//depot/user/rpaulo/xscalepmc/...
2009-12-23 23:16:54 +00:00

679 lines
16 KiB
C

/*-
* Copyright (c) 2009 Rui Paulo <rpaulo@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pmc.h>
#include <sys/pmckern.h>
#include <machine/pmc_mdep.h>
/*
* Support for the Intel XScale network processors
*
* XScale processors have up to now three generations.
*
* The first generation has two PMC; the event selection, interrupt config
* and overflow flag setup are done by writing to the PMNC register.
* It also has less monitoring events than the latter generations.
*
* The second and third generatiosn have four PMCs, one register for the event
* selection, one register for the interrupt config and one register for
* the overflow flags.
*/
static int xscale_npmcs;
static int xscale_gen; /* XScale Core generation */
struct xscale_event_code_map {
enum pmc_event pe_ev;
uint8_t pe_code;
};
const struct xscale_event_code_map xscale_event_codes[] = {
/* 1st and 2nd Generation XScale cores */
{ PMC_EV_XSCALE_IC_FETCH, 0x00 },
{ PMC_EV_XSCALE_IC_MISS, 0x01 },
{ PMC_EV_XSCALE_DATA_DEPENDENCY_STALLED,0x02 },
{ PMC_EV_XSCALE_ITLB_MISS, 0x03 },
{ PMC_EV_XSCALE_DTLB_MISS, 0x04 },
{ PMC_EV_XSCALE_BRANCH_RETIRED, 0x05 },
{ PMC_EV_XSCALE_BRANCH_MISPRED, 0x06 },
{ PMC_EV_XSCALE_INSTR_RETIRED, 0x07 },
{ PMC_EV_XSCALE_DC_FULL_CYCLE, 0x08 },
{ PMC_EV_XSCALE_DC_FULL_CONTIG, 0x09 },
{ PMC_EV_XSCALE_DC_ACCESS, 0x0a },
{ PMC_EV_XSCALE_DC_MISS, 0x0b },
{ PMC_EV_XSCALE_DC_WRITEBACK, 0x0c },
{ PMC_EV_XSCALE_PC_CHANGE, 0x0d },
/* 3rd Generation XScale cores */
{ PMC_EV_XSCALE_BRANCH_RETIRED_ALL, 0x0e },
{ PMC_EV_XSCALE_INSTR_CYCLE, 0x0f },
{ PMC_EV_XSCALE_CP_STALL, 0x17 },
{ PMC_EV_XSCALE_PC_CHANGE_ALL, 0x18 },
{ PMC_EV_XSCALE_PIPELINE_FLUSH, 0x19 },
{ PMC_EV_XSCALE_BACKEND_STALL, 0x1a },
{ PMC_EV_XSCALE_MULTIPLIER_USE, 0x1b },
{ PMC_EV_XSCALE_MULTIPLIER_STALLED, 0x1c },
{ PMC_EV_XSCALE_DATA_CACHE_STALLED, 0x1e },
{ PMC_EV_XSCALE_L2_CACHE_REQ, 0x20 },
{ PMC_EV_XSCALE_L2_CACHE_MISS, 0x23 },
{ PMC_EV_XSCALE_ADDRESS_BUS_TRANS, 0x40 },
{ PMC_EV_XSCALE_SELF_ADDRESS_BUS_TRANS, 0x41 },
{ PMC_EV_XSCALE_DATA_BUS_TRANS, 0x48 },
};
const int xscale_event_codes_size =
sizeof(xscale_event_codes) / sizeof(xscale_event_codes[0]);
/*
* Per-processor information.
*/
struct xscale_cpu {
struct pmc_hw *pc_xscalepmcs;
};
static struct xscale_cpu **xscale_pcpu;
/*
* Performance Monitor Control Register
*/
static __inline uint32_t
xscale_pmnc_read(void)
{
uint32_t reg;
__asm __volatile("mrc p14, 0, %0, c0, c1, 0" : "=r" (reg));
return (reg);
}
static __inline void
xscale_pmnc_write(uint32_t reg)
{
__asm __volatile("mcr p14, 0, %0, c0, c1, 0" : : "r" (reg));
}
/*
* Clock Counter Register
*/
static __inline uint32_t
xscale_ccnt_read(void)
{
uint32_t reg;
__asm __volatile("mrc p14, 0, %0, c1, c1, 0" : "=r" (reg));
return (reg);
}
static __inline void
xscale_ccnt_write(uint32_t reg)
{
__asm __volatile("mcr p14, 0, %0, c1, c1, 0" : : "r" (reg));
}
/*
* Interrupt Enable Register
*/
static __inline uint32_t
xscale_inten_read(void)
{
uint32_t reg;
__asm __volatile("mrc p14, 0, %0, c4, c1, 0" : "=r" (reg));
return (reg);
}
static __inline void
xscale_inten_write(uint32_t reg)
{
__asm __volatile("mcr p14, 0, %0, c4, c1, 0" : : "r" (reg));
}
/*
* Overflow Flag Register
*/
static __inline uint32_t
xscale_flag_read(void)
{
uint32_t reg;
__asm __volatile("mrc p14, 0, %0, c5, c1, 0" : "=r" (reg));
return (reg);
}
static __inline void
xscale_flag_write(uint32_t reg)
{
__asm __volatile("mcr p14, 0, %0, c5, c1, 0" : : "r" (reg));
}
/*
* Event Selection Register
*/
static __inline uint32_t
xscale_evtsel_read(void)
{
uint32_t reg;
__asm __volatile("mrc p14, 0, %0, c8, c1, 0" : "=r" (reg));
return (reg);
}
static __inline void
xscale_evtsel_write(uint32_t reg)
{
__asm __volatile("mcr p14, 0, %0, c8, c1, 0" : : "r" (reg));
}
/*
* Performance Count Register N
*/
static uint32_t
xscale_pmcn_read(unsigned int pmc)
{
uint32_t reg = 0;
KASSERT(pmc < 4, ("[xscale,%d] illegal PMC number %d", __LINE__, pmc));
switch (pmc) {
case 0:
__asm __volatile("mrc p14, 0, %0, c0, c2, 0" : "=r" (reg));
break;
case 1:
__asm __volatile("mrc p14, 0, %0, c1, c2, 0" : "=r" (reg));
break;
case 2:
__asm __volatile("mrc p14, 0, %0, c2, c2, 0" : "=r" (reg));
break;
case 3:
__asm __volatile("mrc p14, 0, %0, c3, c2, 0" : "=r" (reg));
break;
}
return (reg);
}
static uint32_t
xscale_pmcn_write(unsigned int pmc, uint32_t reg)
{
KASSERT(pmc < 4, ("[xscale,%d] illegal PMC number %d", __LINE__, pmc));
switch (pmc) {
case 0:
__asm __volatile("mcr p14, 0, %0, c0, c2, 0" : : "r" (reg));
break;
case 1:
__asm __volatile("mcr p14, 0, %0, c1, c2, 0" : : "r" (reg));
break;
case 2:
__asm __volatile("mcr p14, 0, %0, c2, c2, 0" : : "r" (reg));
break;
case 3:
__asm __volatile("mcr p14, 0, %0, c3, c2, 0" : : "r" (reg));
break;
}
return (reg);
}
static int
xscale_allocate_pmc(int cpu, int ri, struct pmc *pm,
const struct pmc_op_pmcallocate *a)
{
enum pmc_event pe;
uint32_t caps, config;
int i;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] illegal row index %d", __LINE__, ri));
caps = a->pm_caps;
if (a->pm_class != PMC_CLASS_XSCALE)
return (EINVAL);
pe = a->pm_ev;
for (i = 0; i < xscale_event_codes_size; i++) {
if (xscale_event_codes[i].pe_ev == pe) {
config = xscale_event_codes[i].pe_code;
break;
}
}
if (i == xscale_event_codes_size)
return EINVAL;
/* Generation 1 has fewer events */
if (xscale_gen == 1 && i > PMC_EV_XSCALE_PC_CHANGE)
return EINVAL;
pm->pm_md.pm_xscale.pm_xscale_evsel = config;
PMCDBG(MDP,ALL,2,"xscale-allocate ri=%d -> config=0x%x", ri, config);
return 0;
}
static int
xscale_read_pmc(int cpu, int ri, pmc_value_t *v)
{
struct pmc *pm;
pmc_value_t tmp;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] illegal row index %d", __LINE__, ri));
pm = xscale_pcpu[cpu]->pc_xscalepmcs[ri].phw_pmc;
tmp = xscale_pmcn_read(ri);
PMCDBG(MDP,REA,2,"xscale-read id=%d -> %jd", ri, tmp);
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
*v = XSCALE_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
else
*v = tmp;
return 0;
}
static int
xscale_write_pmc(int cpu, int ri, pmc_value_t v)
{
struct pmc *pm;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] illegal row-index %d", __LINE__, ri));
pm = xscale_pcpu[cpu]->pc_xscalepmcs[ri].phw_pmc;
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
v = XSCALE_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
PMCDBG(MDP,WRI,1,"xscale-write cpu=%d ri=%d v=%jx", cpu, ri, v);
xscale_pmcn_write(ri, v);
return 0;
}
static int
xscale_config_pmc(int cpu, int ri, struct pmc *pm)
{
struct pmc_hw *phw;
PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] illegal row-index %d", __LINE__, ri));
phw = &xscale_pcpu[cpu]->pc_xscalepmcs[ri];
KASSERT(pm == NULL || phw->phw_pmc == NULL,
("[xscale,%d] pm=%p phw->pm=%p hwpmc not unconfigured",
__LINE__, pm, phw->phw_pmc));
phw->phw_pmc = pm;
return 0;
}
static int
xscale_start_pmc(int cpu, int ri)
{
uint32_t pmnc, config, evtsel;
struct pmc *pm;
struct pmc_hw *phw;
phw = &xscale_pcpu[cpu]->pc_xscalepmcs[ri];
pm = phw->phw_pmc;
config = pm->pm_md.pm_xscale.pm_xscale_evsel;
/*
* Configure the event selection.
*
* On the XScale 2nd Generation there's no EVTSEL register.
*/
if (xscale_npmcs == 2) {
pmnc = xscale_pmnc_read();
switch (ri) {
case 0:
pmnc &= ~XSCALE_PMNC_EVT0_MASK;
pmnc |= (config << 12) & XSCALE_PMNC_EVT0_MASK;
break;
case 1:
pmnc &= ~XSCALE_PMNC_EVT1_MASK;
pmnc |= (config << 20) & XSCALE_PMNC_EVT1_MASK;
break;
default:
/* XXX */
break;
}
xscale_pmnc_write(pmnc);
} else {
evtsel = xscale_evtsel_read();
switch (ri) {
case 0:
evtsel &= ~XSCALE_EVTSEL_EVT0_MASK;
evtsel |= config & XSCALE_EVTSEL_EVT0_MASK;
break;
case 1:
evtsel &= ~XSCALE_EVTSEL_EVT1_MASK;
evtsel |= (config << 8) & XSCALE_EVTSEL_EVT1_MASK;
break;
case 2:
evtsel &= ~XSCALE_EVTSEL_EVT2_MASK;
evtsel |= (config << 16) & XSCALE_EVTSEL_EVT2_MASK;
break;
case 3:
evtsel &= ~XSCALE_EVTSEL_EVT3_MASK;
evtsel |= (config << 24) & XSCALE_EVTSEL_EVT3_MASK;
break;
default:
/* XXX */
break;
}
xscale_evtsel_write(evtsel);
}
/*
* Enable the PMC.
*
* Note that XScale provides only one bit to enable/disable _all_
* performance monitoring units.
*/
pmnc = xscale_pmnc_read();
pmnc |= XSCALE_PMNC_ENABLE;
xscale_pmnc_write(pmnc);
return 0;
}
static int
xscale_stop_pmc(int cpu, int ri)
{
uint32_t pmnc, evtsel;
struct pmc *pm;
struct pmc_hw *phw;
phw = &xscale_pcpu[cpu]->pc_xscalepmcs[ri];
pm = phw->phw_pmc;
/*
* Disable the PMCs.
*
* Note that XScale provides only one bit to enable/disable _all_
* performance monitoring units.
*/
pmnc = xscale_pmnc_read();
pmnc &= ~XSCALE_PMNC_ENABLE;
xscale_pmnc_write(pmnc);
/*
* A value of 0xff makes the corresponding PMU go into
* power saving mode.
*/
if (xscale_npmcs == 2) {
pmnc = xscale_pmnc_read();
switch (ri) {
case 0:
pmnc |= XSCALE_PMNC_EVT0_MASK;
break;
case 1:
pmnc |= XSCALE_PMNC_EVT1_MASK;
break;
default:
/* XXX */
break;
}
xscale_pmnc_write(pmnc);
} else {
evtsel = xscale_evtsel_read();
switch (ri) {
case 0:
evtsel |= XSCALE_EVTSEL_EVT0_MASK;
break;
case 1:
evtsel |= XSCALE_EVTSEL_EVT1_MASK;
break;
case 2:
evtsel |= XSCALE_EVTSEL_EVT2_MASK;
break;
case 3:
evtsel |= XSCALE_EVTSEL_EVT3_MASK;
break;
default:
/* XXX */
break;
}
xscale_evtsel_write(evtsel);
}
return 0;
}
static int
xscale_release_pmc(int cpu, int ri, struct pmc *pmc)
{
struct pmc_hw *phw;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] illegal row-index %d", __LINE__, ri));
phw = &xscale_pcpu[cpu]->pc_xscalepmcs[ri];
KASSERT(phw->phw_pmc == NULL,
("[xscale,%d] PHW pmc %p non-NULL", __LINE__, phw->phw_pmc));
return 0;
}
static int
xscale_intr(int cpu, struct trapframe *tf)
{
printf("intr\n");
return 0;
}
static int
xscale_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
{
int error;
struct pmc_hw *phw;
char xscale_name[PMC_NAME_MAX];
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d], illegal CPU %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < xscale_npmcs,
("[xscale,%d] row-index %d out of range", __LINE__, ri));
phw = &xscale_pcpu[cpu]->pc_xscalepmcs[ri];
snprintf(xscale_name, sizeof(xscale_name), "XSCALE-%d", ri);
if ((error = copystr(xscale_name, pi->pm_name, PMC_NAME_MAX,
NULL)) != 0)
return error;
pi->pm_class = PMC_CLASS_XSCALE;
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
pi->pm_enabled = TRUE;
*ppmc = phw->phw_pmc;
} else {
pi->pm_enabled = FALSE;
*ppmc = NULL;
}
return (0);
}
static int
xscale_get_config(int cpu, int ri, struct pmc **ppm)
{
*ppm = xscale_pcpu[cpu]->pc_xscalepmcs[ri].phw_pmc;
return 0;
}
/*
* XXX don't know what we should do here.
*/
static int
xscale_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
{
return 0;
}
static int
xscale_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
{
return 0;
}
static int
xscale_pcpu_init(struct pmc_mdep *md, int cpu)
{
int first_ri, i;
struct pmc_cpu *pc;
struct xscale_cpu *pac;
struct pmc_hw *phw;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[xscale,%d] wrong cpu number %d", __LINE__, cpu));
PMCDBG(MDP,INI,1,"xscale-init cpu=%d", cpu);
xscale_pcpu[cpu] = pac = malloc(sizeof(struct xscale_cpu), M_PMC,
M_WAITOK|M_ZERO);
pac->pc_xscalepmcs = malloc(sizeof(struct pmc_hw) * xscale_npmcs,
M_PMC, M_WAITOK|M_ZERO);
pc = pmc_pcpu[cpu];
first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_XSCALE].pcd_ri;
KASSERT(pc != NULL, ("[xscale,%d] NULL per-cpu pointer", __LINE__));
for (i = 0, phw = pac->pc_xscalepmcs; i < xscale_npmcs; i++, phw++) {
phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(i);
phw->phw_pmc = NULL;
pc->pc_hwpmcs[i + first_ri] = phw;
}
/*
* Disable and put the PMUs into power save mode.
*/
if (xscale_npmcs == 2) {
xscale_pmnc_write(XSCALE_PMNC_EVT1_MASK |
XSCALE_PMNC_EVT0_MASK);
} else {
xscale_evtsel_write(XSCALE_EVTSEL_EVT3_MASK |
XSCALE_EVTSEL_EVT2_MASK | XSCALE_EVTSEL_EVT1_MASK |
XSCALE_EVTSEL_EVT0_MASK);
}
return 0;
}
static int
xscale_pcpu_fini(struct pmc_mdep *md, int cpu)
{
return 0;
}
struct pmc_mdep *
pmc_xscale_initialize()
{
struct pmc_mdep *pmc_mdep;
struct pmc_classdep *pcd;
uint32_t idreg;
/* Get the Core Generation from CP15 */
__asm __volatile("mrc p15, 0, %0, c0, c0, 0" : "=r" (idreg));
xscale_gen = (idreg >> 13) & 0x3;
switch (xscale_gen) {
case 1:
xscale_npmcs = 2;
break;
case 2:
case 3:
xscale_npmcs = 4;
break;
default:
printf("%s: unknown XScale core generation\n", __func__);
return (NULL);
}
PMCDBG(MDP,INI,1,"xscale-init npmcs=%d", xscale_npmcs);
/*
* Allocate space for pointers to PMC HW descriptors and for
* the MDEP structure used by MI code.
*/
xscale_pcpu = malloc(sizeof(struct xscale_cpu *) * pmc_cpu_max(), M_PMC,
M_WAITOK|M_ZERO);
/* Just one class */
pmc_mdep = malloc(sizeof(struct pmc_mdep) + sizeof(struct pmc_classdep),
M_PMC, M_WAITOK|M_ZERO);
pmc_mdep->pmd_cputype = PMC_CPU_INTEL_XSCALE;
pmc_mdep->pmd_nclass = 1;
pcd = &pmc_mdep->pmd_classdep[PMC_MDEP_CLASS_INDEX_XSCALE];
pcd->pcd_caps = XSCALE_PMC_CAPS;
pcd->pcd_class = PMC_CLASS_XSCALE;
pcd->pcd_num = xscale_npmcs;
pcd->pcd_ri = pmc_mdep->pmd_npmc;
pcd->pcd_width = 32;
pcd->pcd_allocate_pmc = xscale_allocate_pmc;
pcd->pcd_config_pmc = xscale_config_pmc;
pcd->pcd_pcpu_fini = xscale_pcpu_fini;
pcd->pcd_pcpu_init = xscale_pcpu_init;
pcd->pcd_describe = xscale_describe;
pcd->pcd_get_config = xscale_get_config;
pcd->pcd_read_pmc = xscale_read_pmc;
pcd->pcd_release_pmc = xscale_release_pmc;
pcd->pcd_start_pmc = xscale_start_pmc;
pcd->pcd_stop_pmc = xscale_stop_pmc;
pcd->pcd_write_pmc = xscale_write_pmc;
pmc_mdep->pmd_intr = xscale_intr;
pmc_mdep->pmd_switch_in = xscale_switch_in;
pmc_mdep->pmd_switch_out = xscale_switch_out;
pmc_mdep->pmd_npmc += xscale_npmcs;
return (pmc_mdep);
}
void
pmc_xscale_finalize(struct pmc_mdep *md)
{
}