5347c3345e
- Update driver interrupt statistics correctly. sys/sys/pmc.h, sys/dev/hwpmc/hwpmc_mod.c: - Fix a bug affecting debug printfs. - Move the 'stalled' flag from being in a bit in the 'pm_flags' field of a 'struct pmc' to a field of its own in the same structure. This flag is updated from the NMI handler and keeping it separate makes it easier to avoid races with other parts of the code. sys/dev/hwpmc/hwpmc_logging.c: - Do arithmetic with 'uintptr_t' types rather that casting to and from 'char *'. Approved by: re (scottl)
1802 lines
52 KiB
C
1802 lines
52 KiB
C
/*-
|
|
* Copyright (c) 2003-2005 Joseph Koshy
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/pmc.h>
|
|
#include <sys/pmckern.h>
|
|
#include <sys/smp.h>
|
|
#include <sys/systm.h>
|
|
|
|
#include <machine/cpufunc.h>
|
|
#include <machine/md_var.h>
|
|
#include <machine/specialreg.h>
|
|
|
|
/*
|
|
* PENTIUM 4 SUPPORT
|
|
*
|
|
* The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
|
|
* respectively. Each PMC comprises of two model specific registers:
|
|
* a counter configuration control register (CCCR) and a counter
|
|
* register that holds the actual event counts.
|
|
*
|
|
* Configuring an event requires the use of one of 45 event selection
|
|
* control registers (ESCR). Events are associated with specific
|
|
* ESCRs. Each PMC group has a set of ESCRs it can use.
|
|
*
|
|
* - The BPU counter group (4 PMCs) can use the 16 ESCRs:
|
|
* BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
|
|
* PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
|
|
*
|
|
* - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
|
|
* TC_ESCR{0,1}, TBPU_ESCR{0,1}.
|
|
*
|
|
* - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
|
|
* FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
|
|
* DAC_ESCR{0,1}.
|
|
*
|
|
* - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
|
|
* ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
|
|
*
|
|
* Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
|
|
* present) of a counter group. Odd-numbers ESCRs can be used with
|
|
* counters 2, 3 and 5 (if present) of a counter group. The
|
|
* 'p4_escrs[]' table describes these restrictions in a form that
|
|
* function 'p4_allocate()' uses for making allocation decisions.
|
|
*
|
|
* SYSTEM-MODE AND THREAD-MODE ALLOCATION
|
|
*
|
|
* In addition to remembering the state of PMC rows
|
|
* ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
|
|
* state of ESCR rows. If an ESCR is allocated to a system-mode PMC
|
|
* on a CPU we cannot allocate this to a thread-mode PMC. On a
|
|
* multi-cpu (multiple physical CPUs) system, ESCR allocation on each
|
|
* CPU is tracked by the pc_escrs[] array.
|
|
*
|
|
* Each system-mode PMC that is using an ESCR records its row-index in
|
|
* the appropriate entry and system-mode allocation attempts check
|
|
* that an ESCR is available using this array. Process-mode PMCs do
|
|
* not use the pc_escrs[] array, since ESCR row itself would have been
|
|
* marked as in 'THREAD' mode.
|
|
*
|
|
* HYPERTHREADING SUPPORT
|
|
*
|
|
* When HTT is enabled, the FreeBSD kernel treats the two 'logical'
|
|
* cpus as independent CPUs and can schedule kernel threads on them
|
|
* independently. However, the two logical CPUs share the same set of
|
|
* PMC resources. We need to ensure that:
|
|
* - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
|
|
* and,
|
|
* - Threads of multi-threaded processes that get scheduled on the same
|
|
* physical CPU are handled correctly.
|
|
*
|
|
* HTT Detection
|
|
*
|
|
* Not all HTT capable systems will have HTT enabled since users may
|
|
* have turned HTT support off using the appropriate sysctls
|
|
* (machdep.hlt_logical_cpus or machdep.logical_cpus_mask). We detect
|
|
* the presence of HTT by remembering if 'p4_init()' was called for a
|
|
* logical CPU. Note that hwpmc(4) cannot deal with a change in HTT
|
|
* status once it is loaded.
|
|
*
|
|
* Handling HTT READ / WRITE / START / STOP
|
|
*
|
|
* PMC resources are shared across multiple logical CPUs. In each
|
|
* physical CPU's state we keep track of a 'runcount' which reflects
|
|
* the number of PMC-using processes that have been scheduled on the
|
|
* logical CPUs of this physical CPU. Process-mode PMC operations
|
|
* will actually 'start' or 'stop' hardware only if these are the
|
|
* first or last processes respectively to use the hardware. PMC
|
|
* values written by a 'write' operation are saved and are transferred
|
|
* to hardware at PMC 'start' time if the runcount is 0. If the
|
|
* runcount is greater than 0 at the time of a 'start' operation, we
|
|
* keep track of the actual hardware value at the time of the 'start'
|
|
* operation and use this to adjust the final readings at PMC 'stop'
|
|
* or 'read' time.
|
|
*
|
|
* Execution sequences:
|
|
*
|
|
* Case 1: CPUx +...- (no overlap)
|
|
* CPUy +...-
|
|
* RC 0 1 0 1 0
|
|
*
|
|
* Case 2: CPUx +........- (partial overlap)
|
|
* CPUy +........-
|
|
* RC 0 1 2 1 0
|
|
*
|
|
* Case 3: CPUx +..............- (fully overlapped)
|
|
* CPUy +.....-
|
|
* RC 0 1 2 1 0
|
|
*
|
|
* Key:
|
|
* 'CPU[xy]' : one of the two logical processors on a HTT CPU.
|
|
* 'RC' : run count (#threads per physical core).
|
|
* '+' : point in time when a thread is put on a CPU.
|
|
* '-' : point in time where a thread is taken off a CPU.
|
|
*
|
|
* Handling HTT CONFIG
|
|
*
|
|
* Different processes attached to the same PMC may get scheduled on
|
|
* the two logical processors in the package. We keep track of config
|
|
* and de-config operations using the CFGFLAGS fields of the per-physical
|
|
* cpu state.
|
|
*/
|
|
|
|
#define P4_PMCS() \
|
|
P4_PMC(BPU_COUNTER0) \
|
|
P4_PMC(BPU_COUNTER1) \
|
|
P4_PMC(BPU_COUNTER2) \
|
|
P4_PMC(BPU_COUNTER3) \
|
|
P4_PMC(MS_COUNTER0) \
|
|
P4_PMC(MS_COUNTER1) \
|
|
P4_PMC(MS_COUNTER2) \
|
|
P4_PMC(MS_COUNTER3) \
|
|
P4_PMC(FLAME_COUNTER0) \
|
|
P4_PMC(FLAME_COUNTER1) \
|
|
P4_PMC(FLAME_COUNTER2) \
|
|
P4_PMC(FLAME_COUNTER3) \
|
|
P4_PMC(IQ_COUNTER0) \
|
|
P4_PMC(IQ_COUNTER1) \
|
|
P4_PMC(IQ_COUNTER2) \
|
|
P4_PMC(IQ_COUNTER3) \
|
|
P4_PMC(IQ_COUNTER4) \
|
|
P4_PMC(IQ_COUNTER5) \
|
|
P4_PMC(NONE)
|
|
|
|
enum pmc_p4pmc {
|
|
#undef P4_PMC
|
|
#define P4_PMC(N) P4_PMC_##N ,
|
|
P4_PMCS()
|
|
};
|
|
|
|
/*
|
|
* P4 ESCR descriptors
|
|
*/
|
|
|
|
#define P4_ESCRS() \
|
|
P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \
|
|
P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(NONE, ~0, NONE, NONE, NONE)
|
|
|
|
enum pmc_p4escr {
|
|
#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N ,
|
|
P4_ESCRS()
|
|
#undef P4_ESCR
|
|
};
|
|
|
|
struct pmc_p4escr_descr {
|
|
const char pm_escrname[PMC_NAME_MAX];
|
|
u_short pm_escr_msr;
|
|
const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
|
|
};
|
|
|
|
static struct pmc_p4escr_descr p4_escrs[] =
|
|
{
|
|
#define P4_ESCR(N, MSR, P1, P2, P3) \
|
|
{ \
|
|
.pm_escrname = #N, \
|
|
.pm_escr_msr = (MSR), \
|
|
.pm_pmcs = \
|
|
{ \
|
|
P4_PMC_##P1, \
|
|
P4_PMC_##P2, \
|
|
P4_PMC_##P3 \
|
|
} \
|
|
} ,
|
|
|
|
P4_ESCRS()
|
|
|
|
#undef P4_ESCR
|
|
};
|
|
|
|
/*
|
|
* P4 Event descriptor
|
|
*/
|
|
|
|
struct p4_event_descr {
|
|
const enum pmc_event pm_event;
|
|
const uint32_t pm_escr_eventselect;
|
|
const uint32_t pm_cccr_select;
|
|
const char pm_is_ti_event;
|
|
enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT];
|
|
};
|
|
|
|
static struct p4_event_descr p4_events[] = {
|
|
|
|
#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \
|
|
{ \
|
|
.pm_event = PMC_EV_P4_##NAME, \
|
|
.pm_escr_eventselect = (ESCREVENTSEL), \
|
|
.pm_cccr_select = (CCCRSEL), \
|
|
.pm_is_ti_event = (TI_EVENT), \
|
|
.pm_escrs = \
|
|
{ \
|
|
P4_ESCR_##ESCR0, \
|
|
P4_ESCR_##ESCR1 \
|
|
} \
|
|
}
|
|
|
|
P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1),
|
|
P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1),
|
|
P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1),
|
|
P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1),
|
|
P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1),
|
|
P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1),
|
|
P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1),
|
|
P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE),
|
|
P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE),
|
|
P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE),
|
|
/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
|
|
P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
|
|
P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
|
|
P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
|
|
0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
|
|
P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
|
|
P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1),
|
|
P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1),
|
|
P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1),
|
|
P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3)
|
|
|
|
#undef P4_EVDESCR
|
|
};
|
|
|
|
#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
|
|
|
|
#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
|
|
|
|
/*
|
|
* P4 PMC descriptors
|
|
*/
|
|
|
|
struct p4pmc_descr {
|
|
struct pmc_descr pm_descr; /* common information */
|
|
enum pmc_p4pmc pm_pmcnum; /* PMC number */
|
|
uint32_t pm_pmc_msr; /* PERFCTR MSR address */
|
|
uint32_t pm_cccr_msr; /* CCCR MSR address */
|
|
};
|
|
|
|
static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
|
|
|
|
/*
|
|
* TSC descriptor
|
|
*/
|
|
|
|
{
|
|
.pm_descr =
|
|
{
|
|
.pd_name = "TSC",
|
|
.pd_class = PMC_CLASS_TSC,
|
|
.pd_caps = PMC_CAP_READ | PMC_CAP_WRITE,
|
|
.pd_width = 64
|
|
},
|
|
.pm_pmcnum = ~0,
|
|
.pm_cccr_msr = ~0,
|
|
.pm_pmc_msr = 0x10,
|
|
},
|
|
|
|
/*
|
|
* P4 PMCS
|
|
*/
|
|
|
|
#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
|
|
PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
|
|
PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \
|
|
PMC_CAP_TAGGING | PMC_CAP_CASCADE)
|
|
|
|
#define P4_PMCDESCR(N, PMC, CCCR) \
|
|
{ \
|
|
.pm_descr = \
|
|
{ \
|
|
.pd_name = #N, \
|
|
.pd_class = PMC_CLASS_P4, \
|
|
.pd_caps = P4_PMC_CAPS, \
|
|
.pd_width = 40 \
|
|
}, \
|
|
.pm_pmcnum = P4_PMC_##N, \
|
|
.pm_cccr_msr = (CCCR), \
|
|
.pm_pmc_msr = (PMC) \
|
|
}
|
|
|
|
P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360),
|
|
P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361),
|
|
P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362),
|
|
P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363),
|
|
P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364),
|
|
P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365),
|
|
P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366),
|
|
P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367),
|
|
P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368),
|
|
P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369),
|
|
P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A),
|
|
P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B),
|
|
P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C),
|
|
P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D),
|
|
P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E),
|
|
P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F),
|
|
P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370),
|
|
P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371),
|
|
|
|
#undef P4_PMCDESCR
|
|
};
|
|
|
|
/* HTT support */
|
|
#define P4_NHTT 2 /* logical processors/chip */
|
|
#define P4_HTT_CPU_INDEX_0 0
|
|
#define P4_HTT_CPU_INDEX_1 1
|
|
|
|
static int p4_system_has_htt;
|
|
|
|
/*
|
|
* Per-CPU data structure for P4 class CPUs
|
|
*
|
|
* [common stuff]
|
|
* [19 struct pmc_hw pointers]
|
|
* [19 struct pmc_hw structures]
|
|
* [45 ESCRs status bytes]
|
|
* [per-cpu spin mutex]
|
|
* [19 flag fields for holding config flags and a runcount]
|
|
* [19*2 hw value fields] (Thread mode PMC support)
|
|
* or
|
|
* [19*2 EIP values] (Sampling mode PMCs)
|
|
* [19*2 pmc value fields] (Thread mode PMC support))
|
|
*/
|
|
|
|
struct p4_cpu {
|
|
struct pmc_cpu pc_common;
|
|
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
|
|
struct pmc_hw pc_p4pmcs[P4_NPMCS];
|
|
char pc_escrs[P4_NESCR];
|
|
struct mtx pc_mtx; /* spin lock */
|
|
uint32_t pc_intrflag; /* NMI handler flags */
|
|
unsigned int pc_intrlock; /* NMI handler spin lock */
|
|
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
|
|
union {
|
|
pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
|
|
uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
|
|
} pc_si;
|
|
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
|
|
};
|
|
|
|
/*
|
|
* A 'logical' CPU shares PMC resources with partner 'physical' CPU,
|
|
* except the TSC, which is architectural and hence seperate. The
|
|
* 'logical' CPU descriptor thus has pointers to the physical CPUs
|
|
* descriptor state except for the TSC (rowindex 0) which is not
|
|
* shared.
|
|
*/
|
|
|
|
struct p4_logicalcpu {
|
|
struct pmc_cpu pc_common;
|
|
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
|
|
struct pmc_hw pc_tsc;
|
|
};
|
|
|
|
#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
|
|
#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
|
|
#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
|
|
|
|
#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
|
|
#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
|
|
char _tmp; \
|
|
_tmp = (PC)->pc_flags[(RI)]; \
|
|
_tmp &= ~(MASK); \
|
|
_tmp |= (VAL) & (MASK); \
|
|
(PC)->pc_flags[(RI)] = _tmp; \
|
|
} while (0)
|
|
|
|
#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
|
|
#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
|
|
|
|
#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
|
|
#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
|
|
|
|
#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1)
|
|
|
|
#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
|
|
#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
|
|
uint32_t __mask; \
|
|
__mask = 1 << (I); \
|
|
if ((V)) \
|
|
(PC)->pc_intrflag |= __mask; \
|
|
else \
|
|
(PC)->pc_intrflag &= ~__mask; \
|
|
} while (0)
|
|
|
|
/*
|
|
* A minimal spin lock implementation for use inside the NMI handler.
|
|
*
|
|
* We don't want to use a regular spin lock here, because curthread
|
|
* may not be consistent at the time the handler is invoked.
|
|
*/
|
|
#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
|
|
while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
|
|
ia32_pause(); \
|
|
} while (0)
|
|
#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
|
|
atomic_store_rel_int(&pc->pc_intrlock, 0);
|
|
|
|
/* ESCR row disposition */
|
|
static int p4_escrdisp[P4_NESCR];
|
|
|
|
#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0)
|
|
#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0)
|
|
#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0)
|
|
|
|
#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \
|
|
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
|
|
__LINE__)); \
|
|
atomic_add_int(&p4_escrdisp[(E)], -1); \
|
|
KASSERT(p4_escrdisp[(E)] >= (-mp_ncpus), ("[p4,%d] row " \
|
|
"disposition error", __LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \
|
|
atomic_add_int(&p4_escrdisp[(E)], 1); \
|
|
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
|
|
__LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_MARK_ROW_THREAD(E) do { \
|
|
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
|
|
__LINE__)); \
|
|
atomic_add_int(&p4_escrdisp[(E)], 1); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \
|
|
atomic_add_int(&p4_escrdisp[(E)], -1); \
|
|
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error",\
|
|
__LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
|
|
|
|
#define P4_TO_PHYSICAL_CPU(cpu) (pmc_cpu_is_logical(cpu) ? \
|
|
((cpu) & ~1) : (cpu))
|
|
|
|
#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \
|
|
P4_CCCR_ENABLE|P4_CCCR_OVF))
|
|
#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \
|
|
P4_ESCR_T1_USR))
|
|
|
|
/*
|
|
* support routines
|
|
*/
|
|
|
|
static struct p4_event_descr *
|
|
p4_find_event(enum pmc_event ev)
|
|
{
|
|
int n;
|
|
|
|
for (n = 0; n < P4_NEVENTS; n++)
|
|
if (p4_events[n].pm_event == ev)
|
|
break;
|
|
if (n == P4_NEVENTS)
|
|
return NULL;
|
|
return &p4_events[n];
|
|
}
|
|
|
|
/*
|
|
* Initialize per-cpu state
|
|
*/
|
|
|
|
static int
|
|
p4_init(int cpu)
|
|
{
|
|
int n, phycpu;
|
|
char *pescr;
|
|
struct p4_cpu *pcs;
|
|
struct p4_logicalcpu *plcs;
|
|
struct pmc_hw *phw;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] insane cpu number %d", __LINE__, cpu));
|
|
|
|
PMCDBG(MDP,INI,0, "p4-init cpu=%d logical=%d", cpu,
|
|
pmc_cpu_is_logical(cpu) != 0);
|
|
|
|
/*
|
|
* A 'logical' CPU shares its per-cpu state with its physical
|
|
* CPU. The physical CPU would have been initialized prior to
|
|
* the initialization for this cpu.
|
|
*/
|
|
|
|
if (pmc_cpu_is_logical(cpu)) {
|
|
phycpu = P4_TO_PHYSICAL_CPU(cpu);
|
|
pcs = (struct p4_cpu *) pmc_pcpu[phycpu];
|
|
PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p",
|
|
cpu, phycpu, pcs);
|
|
KASSERT(pcs,
|
|
("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__,
|
|
cpu, phycpu));
|
|
if (pcs == NULL) /* decline to init */
|
|
return ENXIO;
|
|
|
|
p4_system_has_htt = 1;
|
|
|
|
MALLOC(plcs, struct p4_logicalcpu *,
|
|
sizeof(struct p4_logicalcpu), M_PMC, M_WAITOK|M_ZERO);
|
|
|
|
/* The TSC is architectural state and is not shared */
|
|
plcs->pc_hwpmcs[0] = &plcs->pc_tsc;
|
|
plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED |
|
|
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
|
|
PMC_PHW_FLAG_IS_SHAREABLE;
|
|
|
|
/* Other PMCs are shared with the physical CPU */
|
|
for (n = 1; n < P4_NPMCS; n++)
|
|
plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n];
|
|
|
|
pmc_pcpu[cpu] = (struct pmc_cpu *) plcs;
|
|
return 0;
|
|
}
|
|
|
|
MALLOC(pcs, struct p4_cpu *, sizeof(struct p4_cpu), M_PMC,
|
|
M_WAITOK|M_ZERO);
|
|
|
|
if (pcs == NULL)
|
|
return ENOMEM;
|
|
phw = pcs->pc_p4pmcs;
|
|
|
|
for (n = 0; n < P4_NPMCS; n++, phw++) {
|
|
phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
|
|
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
|
|
phw->phw_pmc = NULL;
|
|
pcs->pc_hwpmcs[n] = phw;
|
|
}
|
|
|
|
/* Mark the TSC as shareable */
|
|
pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE;
|
|
|
|
pescr = pcs->pc_escrs;
|
|
for (n = 0; n < P4_NESCR; n++)
|
|
*pescr++ = P4_INVALID_PMC_INDEX;
|
|
pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
|
|
|
|
mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc", MTX_SPIN);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Destroy per-cpu state.
|
|
*/
|
|
|
|
static int
|
|
p4_cleanup(int cpu)
|
|
{
|
|
struct p4_cpu *pcs;
|
|
|
|
PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
|
|
|
|
if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
|
|
return 0;
|
|
|
|
/*
|
|
* If the CPU is physical we need to teardown the
|
|
* full MD state.
|
|
*/
|
|
if (!pmc_cpu_is_logical(cpu))
|
|
mtx_destroy(&pcs->pc_mtx);
|
|
|
|
FREE(pcs, M_PMC);
|
|
|
|
pmc_pcpu[cpu] = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Context switch in.
|
|
*/
|
|
|
|
static int
|
|
p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
|
|
{
|
|
(void) pc;
|
|
|
|
PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
|
|
(pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
|
|
|
|
/* enable the RDPMC instruction */
|
|
if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
|
|
load_cr4(rcr4() | CR4_PCE);
|
|
|
|
PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4());
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Context switch out.
|
|
*/
|
|
|
|
static int
|
|
p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
|
|
{
|
|
(void) pc;
|
|
(void) pp; /* can be null */
|
|
|
|
PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp);
|
|
|
|
/* always disallow the RDPMC instruction */
|
|
load_cr4(rcr4() & ~CR4_PCE);
|
|
|
|
PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4());
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Read a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_read_pmc(int cpu, int ri, pmc_value_t *v)
|
|
{
|
|
enum pmc_mode mode;
|
|
struct p4pmc_descr *pd;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
pmc_value_t tmp;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
|
|
if (ri == 0) { /* TSC */
|
|
#if DEBUG
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
|
|
cpu, ri));
|
|
KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
|
|
("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri,
|
|
PMC_TO_CLASS(pm)));
|
|
KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)),
|
|
("[p4,%d] TSC counter in non-counting mode", __LINE__));
|
|
#endif
|
|
*v = rdtsc();
|
|
PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
|
|
return 0;
|
|
}
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pd = &p4_pmcdesc[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
|
|
cpu, ri));
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
|
|
("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
|
|
pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
|
|
|
|
mode = PMC_TO_MODE(pm);
|
|
|
|
PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
|
|
|
|
tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
|
|
|
|
if (PMC_IS_VIRTUAL_MODE(mode)) {
|
|
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
|
|
tmp += (P4_PERFCTR_MASK + 1) -
|
|
P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
else
|
|
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
|
|
}
|
|
|
|
if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
|
|
*v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
|
|
else
|
|
*v = tmp;
|
|
|
|
PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_write_pmc(int cpu, int ri, pmc_value_t v)
|
|
{
|
|
enum pmc_mode mode;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
const struct pmc_hw *phw;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[amd,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[amd,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
|
|
/*
|
|
* The P4's TSC register is writeable, but we don't allow a
|
|
* write as changing the TSC's value could interfere with
|
|
* timekeeping and other system functions.
|
|
*/
|
|
if (ri == 0) {
|
|
#if DEBUG
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
|
|
cpu, ri));
|
|
KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
|
|
("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__,
|
|
cpu, ri, PMC_TO_CLASS(pm)));
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/* Shared PMCs */
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
|
|
cpu, ri));
|
|
|
|
mode = PMC_TO_MODE(pm);
|
|
|
|
PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
|
|
mode, v);
|
|
|
|
/*
|
|
* write the PMC value to the register/saved value: for
|
|
* sampling mode PMCs, the value to be programmed into the PMC
|
|
* counter is -(C+1) where 'C' is the requested sample rate.
|
|
*/
|
|
if (PMC_IS_SAMPLING_MODE(mode))
|
|
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(mode))
|
|
wrmsr(pd->pm_pmc_msr, v);
|
|
else
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Configure a PMC 'pm' on the given CPU and row-index.
|
|
*
|
|
* 'pm' may be NULL to indicate de-configuration.
|
|
*
|
|
* On HTT systems, a PMC may get configured twice, once for each
|
|
* "logical" CPU. We track this using the CFGFLAGS field of the
|
|
* per-cpu state; this field is a bit mask with one bit each for
|
|
* logical CPUs 0 & 1.
|
|
*/
|
|
|
|
static int
|
|
p4_config_pmc(int cpu, int ri, struct pmc *pm)
|
|
{
|
|
struct pmc_hw *phw;
|
|
struct p4_cpu *pc;
|
|
int cfgflags, cpuflag;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
|
|
|
|
if (ri == 0) { /* TSC */
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(pm == NULL || phw->phw_pmc == NULL,
|
|
("[p4,%d] hwpmc doubly config'ed", __LINE__));
|
|
phw->phw_pmc = pm;
|
|
return 0;
|
|
}
|
|
|
|
/* Shared PMCs */
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(pm == NULL || phw->phw_pmc == NULL ||
|
|
(p4_system_has_htt && phw->phw_pmc == pm),
|
|
("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
|
|
|
|
KASSERT(cfgflags >= 0 || cfgflags <= 3,
|
|
("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
|
|
cfgflags, cpu, ri));
|
|
|
|
KASSERT(cfgflags == 0 || phw->phw_pmc,
|
|
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
|
|
__LINE__, cpu, ri));
|
|
|
|
cpuflag = P4_CPU_TO_FLAG(cpu);
|
|
|
|
if (pm) { /* config */
|
|
if (cfgflags == 0)
|
|
phw->phw_pmc = pm;
|
|
|
|
KASSERT(phw->phw_pmc == pm,
|
|
("[p4,%d] cpu=%d ri=%d config %p != hw %p",
|
|
__LINE__, cpu, ri, pm, phw->phw_pmc));
|
|
|
|
cfgflags |= cpuflag;
|
|
} else { /* unconfig */
|
|
cfgflags &= ~cpuflag;
|
|
|
|
if (cfgflags == 0)
|
|
phw->phw_pmc = NULL;
|
|
}
|
|
|
|
KASSERT(cfgflags >= 0 || cfgflags <= 3,
|
|
("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
|
|
cfgflags, cpu, ri));
|
|
|
|
P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Retrieve a configured PMC pointer from hardware state.
|
|
*/
|
|
|
|
static int
|
|
p4_get_config(int cpu, int ri, struct pmc **ppm)
|
|
{
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
int cfgflags;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
if (cfgflags & P4_CPU_TO_FLAG(cpu))
|
|
*ppm = phw->phw_pmc; /* PMC config'ed on this CPU */
|
|
else
|
|
*ppm = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate a PMC.
|
|
*
|
|
* The allocation strategy differs between HTT and non-HTT systems.
|
|
*
|
|
* The non-HTT case:
|
|
* - Given the desired event and the PMC row-index, lookup the
|
|
* list of valid ESCRs for the event.
|
|
* - For each valid ESCR:
|
|
* - Check if the ESCR is free and the ESCR row is in a compatible
|
|
* mode (i.e., system or process))
|
|
* - Check if the ESCR is usable with a P4 PMC at the desired row-index.
|
|
* If everything matches, we determine the appropriate bit values for the
|
|
* ESCR and CCCR registers.
|
|
*
|
|
* The HTT case:
|
|
*
|
|
* - Process mode PMCs require special care. The FreeBSD scheduler could
|
|
* schedule any two processes on the same physical CPU. We need to ensure
|
|
* that a given PMC row-index is never allocated to two different
|
|
* PMCs owned by different user-processes.
|
|
* This is ensured by always allocating a PMC from a 'FREE' PMC row
|
|
* if the system has HTT active.
|
|
* - A similar check needs to be done for ESCRs; we do not want two PMCs
|
|
* using the same ESCR to be scheduled at the same time. Thus ESCR
|
|
* allocation is also restricted to FREE rows if the system has HTT
|
|
* enabled.
|
|
* - Thirdly, some events are 'thread-independent' terminology, i.e.,
|
|
* the PMC hardware cannot distinguish between events caused by
|
|
* different logical CPUs. This makes it impossible to assign events
|
|
* to a given thread of execution. If the system has HTT enabled,
|
|
* these events are not allowed for process-mode PMCs.
|
|
*/
|
|
|
|
static int
|
|
p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
|
|
const struct pmc_op_pmcallocate *a)
|
|
{
|
|
int found, n, m;
|
|
uint32_t caps, cccrvalue, escrvalue, tflags;
|
|
enum pmc_p4escr escr;
|
|
struct p4_cpu *pc;
|
|
struct p4_event_descr *pevent;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index value %d", __LINE__, ri));
|
|
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
|
|
"reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
|
|
pm->pm_caps);
|
|
|
|
/* check class */
|
|
if (pd->pm_descr.pd_class != a->pm_class)
|
|
return EINVAL;
|
|
|
|
/* check requested capabilities */
|
|
caps = a->pm_caps;
|
|
if ((pd->pm_descr.pd_caps & caps) != caps)
|
|
return EPERM;
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
|
|
/* TSC's are always allocated in system-wide counting mode */
|
|
if (a->pm_ev != PMC_EV_TSC_TSC ||
|
|
a->pm_mode != PMC_MODE_SC)
|
|
return EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* If the system has HTT enabled, and the desired allocation
|
|
* mode is process-private, and the PMC row disposition is not
|
|
* FREE (0), decline the allocation.
|
|
*/
|
|
|
|
if (p4_system_has_htt &&
|
|
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
|
|
pmc_getrowdisp(ri) != 0)
|
|
return EBUSY;
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] unknown PMC class %d", __LINE__,
|
|
pd->pm_descr.pd_class));
|
|
|
|
if (pm->pm_event < PMC_EV_P4_FIRST ||
|
|
pm->pm_event > PMC_EV_P4_LAST)
|
|
return EINVAL;
|
|
|
|
if ((pevent = p4_find_event(pm->pm_event)) == NULL)
|
|
return ESRCH;
|
|
|
|
PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
|
|
pevent->pm_event, pevent->pm_escr_eventselect,
|
|
pevent->pm_cccr_select, pevent->pm_is_ti_event);
|
|
|
|
/*
|
|
* Some PMC events are 'thread independent'and therefore
|
|
* cannot be used for process-private modes if HTT is being
|
|
* used.
|
|
*/
|
|
|
|
if (P4_EVENT_IS_TI(pevent) &&
|
|
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
|
|
p4_system_has_htt)
|
|
return EINVAL;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
|
|
found = 0;
|
|
|
|
/* look for a suitable ESCR for this event */
|
|
for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
|
|
if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
|
|
break; /* out of ESCRs */
|
|
/*
|
|
* Check ESCR row disposition.
|
|
*
|
|
* If the request is for a system-mode PMC, then the
|
|
* ESCR row should not be in process-virtual mode, and
|
|
* should also be free on the current CPU.
|
|
*/
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
|
|
pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* If the request is for a process-virtual PMC, and if
|
|
* HTT is not enabled, we can use an ESCR row that is
|
|
* either FREE or already in process mode.
|
|
*
|
|
* If HTT is enabled, then we need to ensure that a
|
|
* given ESCR is never allocated to two PMCS that
|
|
* could run simultaneously on the two logical CPUs of
|
|
* a CPU package. We ensure this be only allocating
|
|
* ESCRs from rows marked as 'FREE'.
|
|
*/
|
|
|
|
if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
|
|
if (p4_system_has_htt) {
|
|
if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
|
|
continue;
|
|
} else
|
|
if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* We found a suitable ESCR for this event. Now check if
|
|
* this escr can work with the PMC at row-index 'ri'.
|
|
*/
|
|
|
|
for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
|
|
if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found == 0)
|
|
return ESRCH;
|
|
|
|
KASSERT((int) escr >= 0 && escr < P4_NESCR,
|
|
("[p4,%d] illegal ESCR value %d", __LINE__, escr));
|
|
|
|
/* mark ESCR row mode */
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
|
|
P4_ESCR_MARK_ROW_STANDALONE(escr);
|
|
} else {
|
|
KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
|
|
("[p4,%d] escr[%d] already in use", __LINE__, escr));
|
|
P4_ESCR_MARK_ROW_THREAD(escr);
|
|
}
|
|
|
|
pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr;
|
|
pm->pm_md.pm_p4.pm_p4_escr = escr;
|
|
|
|
cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
|
|
escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
|
|
|
|
/* CCCR fields */
|
|
if (caps & PMC_CAP_THRESHOLD)
|
|
cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
|
|
P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
|
|
|
|
if (caps & PMC_CAP_EDGE)
|
|
cccrvalue |= P4_CCCR_EDGE;
|
|
|
|
if (caps & PMC_CAP_INVERT)
|
|
cccrvalue |= P4_CCCR_COMPLEMENT;
|
|
|
|
if (p4_system_has_htt)
|
|
cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
|
|
P4_CCCR_ACTIVE_THREAD_MASK;
|
|
else /* no HTT; thread field should be '11b' */
|
|
cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
|
|
|
|
if (caps & PMC_CAP_CASCADE)
|
|
cccrvalue |= P4_CCCR_CASCADE;
|
|
|
|
/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
|
|
if (caps & PMC_CAP_INTERRUPT)
|
|
cccrvalue |= P4_CCCR_OVF_PMI_T0;
|
|
|
|
/* ESCR fields */
|
|
if (caps & PMC_CAP_QUALIFIER)
|
|
escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_EVENT_MASK_MASK;
|
|
if (caps & PMC_CAP_TAGGING)
|
|
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
|
|
if (caps & PMC_CAP_QUALIFIER)
|
|
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_EVENT_MASK_MASK);
|
|
|
|
/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
|
|
tflags = 0;
|
|
if (caps & PMC_CAP_SYSTEM)
|
|
tflags |= P4_ESCR_T0_OS;
|
|
if (caps & PMC_CAP_USER)
|
|
tflags |= P4_ESCR_T0_USR;
|
|
if (tflags == 0)
|
|
tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
escrvalue |= tflags;
|
|
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
|
|
pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
|
|
|
|
PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
|
|
"escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
|
|
cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* release a PMC.
|
|
*/
|
|
|
|
static int
|
|
p4_release_pmc(int cpu, int ri, struct pmc *pm)
|
|
{
|
|
enum pmc_p4escr escr;
|
|
struct pmc_hw *phw;
|
|
struct p4_cpu *pc;
|
|
|
|
if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC)
|
|
return 0;
|
|
|
|
escr = pm->pm_md.pm_p4.pm_p4_escr;
|
|
|
|
PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(phw->phw_pmc == NULL,
|
|
("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
|
|
|
|
P4_ESCR_UNMARK_ROW_STANDALONE(escr);
|
|
KASSERT(pc->pc_escrs[escr] == ri,
|
|
("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
|
|
escr, ri));
|
|
pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
|
|
} else
|
|
P4_ESCR_UNMARK_ROW_THREAD(escr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Start a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_start_pmc(int cpu, int ri)
|
|
{
|
|
int rc;
|
|
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__,
|
|
cpu, ri));
|
|
|
|
PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */
|
|
return 0;
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] wrong PMC class %d", __LINE__,
|
|
pd->pm_descr.pd_class));
|
|
|
|
/* retrieve the desired CCCR/ESCR values from the PMC */
|
|
cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
|
|
escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
|
|
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
|
|
|
|
/* extract and zero the logical processor selection bits */
|
|
cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
|
|
escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
|
|
escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
|
|
if (pmc_cpu_is_logical(cpu)) { /* shift T0 bits to T1 position */
|
|
cccrtbits <<= 1;
|
|
escrtbits >>= 2;
|
|
}
|
|
|
|
/* start system mode PMCs directly */
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
wrmsr(escrmsr, escrvalue | escrtbits);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Thread mode PMCs
|
|
*
|
|
* On HTT machines, the same PMC could be scheduled on the
|
|
* same physical CPU twice (once for each logical CPU), for
|
|
* example, if two threads of a multi-threaded process get
|
|
* scheduled on the same CPU.
|
|
*
|
|
*/
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
|
|
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
|
|
KASSERT(rc == 0 || rc == 1,
|
|
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
|
|
rc));
|
|
|
|
if (rc == 0) { /* 1st CPU and the non-HTT case */
|
|
|
|
KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
|
|
("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
|
|
cpu, ri, pd->pm_cccr_msr));
|
|
|
|
/* write out the low 40 bits of the saved value to hardware */
|
|
wrmsr(pd->pm_pmc_msr,
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
|
|
|
|
} else if (rc == 1) { /* 2nd CPU */
|
|
|
|
/*
|
|
* Stop the PMC and retrieve the CCCR and ESCR values
|
|
* from their MSRs, and turn on the additional T[0/1]
|
|
* bits for the 2nd CPU.
|
|
*/
|
|
|
|
cccrvalue = rdmsr(pd->pm_cccr_msr);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
|
|
|
|
/* check that the configuration bits read back match the PMC */
|
|
KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
|
|
(pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
|
|
("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
|
|
"cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
|
|
cccrvalue & P4_CCCR_Tx_MASK,
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
|
|
KASSERT(cccrvalue & P4_CCCR_ENABLE,
|
|
("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
|
|
__LINE__, rc, cpu, ri));
|
|
KASSERT((cccrvalue & cccrtbits) == 0,
|
|
("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
|
|
"cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
|
|
cccrvalue, cccrtbits));
|
|
|
|
escrvalue = rdmsr(escrmsr);
|
|
|
|
KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
|
|
(pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
|
|
("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
|
|
"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
|
|
escrvalue & P4_ESCR_Tx_MASK,
|
|
pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
|
|
KASSERT((escrvalue & escrtbits) == 0,
|
|
("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
|
|
"escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
|
|
rc, cpu, ri, escrmsr, escrvalue, escrtbits));
|
|
}
|
|
|
|
/* Enable the correct bits for this CPU. */
|
|
escrvalue |= escrtbits;
|
|
cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
|
|
|
|
/* Save HW value at the time of starting hardware */
|
|
P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
|
|
|
|
/* Program the ESCR and CCCR and start the PMC */
|
|
wrmsr(escrmsr, escrvalue);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue);
|
|
|
|
++rc;
|
|
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
|
|
"escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
|
|
ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
|
|
cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Stop a PMC.
|
|
*/
|
|
|
|
static int
|
|
p4_stop_pmc(int cpu, int ri)
|
|
{
|
|
int rc;
|
|
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
struct p4pmc_descr *pd;
|
|
pmc_value_t tmp;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row index %d", __LINE__, ri));
|
|
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
|
|
return 0;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(phw != NULL,
|
|
("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri));
|
|
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
|
|
|
|
PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
wrmsr(pd->pm_cccr_msr,
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Thread mode PMCs.
|
|
*
|
|
* On HTT machines, this PMC may be in use by two threads
|
|
* running on two logical CPUS. Thus we look at the
|
|
* 'pm_runcount' field and only turn off the appropriate TO/T1
|
|
* bits (and keep the PMC running) if two logical CPUs were
|
|
* using the PMC.
|
|
*
|
|
*/
|
|
|
|
/* bits to mask */
|
|
cccrtbits = P4_CCCR_OVF_PMI_T0;
|
|
escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
|
|
if (pmc_cpu_is_logical(cpu)) {
|
|
cccrtbits <<= 1;
|
|
escrtbits >>= 2;
|
|
}
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
|
|
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
|
|
|
|
KASSERT(rc == 2 || rc == 1,
|
|
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
|
|
rc));
|
|
|
|
--rc;
|
|
|
|
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
|
|
|
|
/* Stop this PMC */
|
|
cccrvalue = rdmsr(pd->pm_cccr_msr);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
|
|
|
|
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
|
|
escrvalue = rdmsr(escrmsr);
|
|
|
|
/* The current CPU should be running on this PMC */
|
|
KASSERT(escrvalue & escrtbits,
|
|
("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
|
|
"escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
|
|
escrvalue, escrtbits));
|
|
KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
|
|
(cccrvalue & cccrtbits),
|
|
("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
|
|
"tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
|
|
|
|
/* get the current hardware reading */
|
|
tmp = rdmsr(pd->pm_pmc_msr);
|
|
|
|
if (rc == 1) { /* need to keep the PMC running */
|
|
escrvalue &= ~escrtbits;
|
|
cccrvalue &= ~cccrtbits;
|
|
wrmsr(escrmsr, escrvalue);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue);
|
|
}
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
|
|
"escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
|
|
escrvalue, cccrvalue, tmp);
|
|
|
|
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
|
|
tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
else
|
|
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handle an interrupt.
|
|
*
|
|
* The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler
|
|
* examines all the 18 CCCR registers, processing the counters that have overflowed.
|
|
*
|
|
* On HTT machines, the CCCR register is shared and will interrupt
|
|
* both logical processors if so configured. Thus multiple logical
|
|
* CPUs could enter the NMI service routine at the same time. These
|
|
* will get serialized using a per-cpu spinlock dedicated for use in
|
|
* the NMI handler.
|
|
*/
|
|
|
|
static int
|
|
p4_intr(int cpu, uintptr_t eip, int usermode)
|
|
{
|
|
int i, did_interrupt, error, ri;
|
|
uint32_t cccrval, ovf_mask, ovf_partner;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
struct pmc *pm;
|
|
pmc_value_t v;
|
|
|
|
PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
|
|
|
|
ovf_mask = pmc_cpu_is_logical(cpu) ?
|
|
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
|
|
ovf_mask |= P4_CCCR_OVF;
|
|
if (p4_system_has_htt)
|
|
ovf_partner = pmc_cpu_is_logical(cpu) ? P4_CCCR_OVF_PMI_T0 :
|
|
P4_CCCR_OVF_PMI_T1;
|
|
else
|
|
ovf_partner = 0;
|
|
did_interrupt = 0;
|
|
|
|
if (p4_system_has_htt)
|
|
P4_PCPU_ACQ_INTR_SPINLOCK(pc);
|
|
|
|
/*
|
|
* Loop through all CCCRs, looking for ones that have
|
|
* interrupted this CPU.
|
|
*/
|
|
for (i = 0; i < P4_NPMCS-1; i++) {
|
|
|
|
ri = i + 1; /* row index */
|
|
|
|
/*
|
|
* Check if our partner logical CPU has already marked
|
|
* this PMC has having interrupted it. If so, reset
|
|
* the flag and process the interrupt, but leave the
|
|
* hardware alone.
|
|
*/
|
|
if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
|
|
P4_PCPU_SET_INTRFLAG(pc,ri,0);
|
|
did_interrupt = 1;
|
|
|
|
/*
|
|
* Ignore de-configured or stopped PMCs.
|
|
* Ignore PMCs not in sampling mode.
|
|
*/
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
if (pm == NULL ||
|
|
pm->pm_state != PMC_STATE_RUNNING ||
|
|
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
|
|
continue;
|
|
}
|
|
(void) pmc_process_interrupt(cpu, pm, eip, usermode);
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Fresh interrupt. Look for the CCCR_OVF bit
|
|
* and the OVF_Tx bit for this logical
|
|
* processor being set.
|
|
*/
|
|
cccrval = rdmsr(P4_CCCR_MSR_FIRST + i);
|
|
|
|
if ((cccrval & ovf_mask) != ovf_mask)
|
|
continue;
|
|
|
|
/*
|
|
* If the other logical CPU would also have been
|
|
* interrupted due to the PMC being shared, record
|
|
* this fact in the per-cpu saved interrupt flag
|
|
* bitmask.
|
|
*/
|
|
if (p4_system_has_htt && (cccrval & ovf_partner))
|
|
P4_PCPU_SET_INTRFLAG(pc, ri, 1);
|
|
|
|
v = rdmsr(P4_PERFCTR_MSR_FIRST + i);
|
|
|
|
PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
|
|
|
|
/* Stop the counter, and reset the overflow bit */
|
|
cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
|
|
wrmsr(P4_CCCR_MSR_FIRST + i, cccrval);
|
|
|
|
did_interrupt = 1;
|
|
|
|
/*
|
|
* Ignore de-configured or stopped PMCs. Ignore PMCs
|
|
* not in sampling mode.
|
|
*/
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
if (pm == NULL ||
|
|
pm->pm_state != PMC_STATE_RUNNING ||
|
|
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Process the interrupt. Re-enable the PMC if
|
|
* processing was successful.
|
|
*/
|
|
error = pmc_process_interrupt(cpu, pm, eip, usermode);
|
|
|
|
/*
|
|
* Only the first processor executing the NMI handler
|
|
* in a HTT pair will restart a PMC, and that too
|
|
* only if there were no errors.
|
|
*/
|
|
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
|
|
pm->pm_sc.pm_reloadcount);
|
|
wrmsr(P4_PERFCTR_MSR_FIRST + i, v);
|
|
if (error == 0)
|
|
wrmsr(P4_CCCR_MSR_FIRST + i,
|
|
cccrval | P4_CCCR_ENABLE);
|
|
}
|
|
|
|
/* allow the other CPU to proceed */
|
|
if (p4_system_has_htt)
|
|
P4_PCPU_REL_INTR_SPINLOCK(pc);
|
|
|
|
/*
|
|
* On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
|
|
* masked when a PMC interrupts the CPU. We need to unmask
|
|
* the interrupt source explicitly.
|
|
*/
|
|
|
|
if (did_interrupt)
|
|
pmc_x86_lapic_enable_pmc_interrupt();
|
|
|
|
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
|
|
&pmc_stats.pm_intr_ignored, 1);
|
|
|
|
return did_interrupt;
|
|
}
|
|
|
|
/*
|
|
* Describe a CPU's PMC state.
|
|
*/
|
|
|
|
static int
|
|
p4_describe(int cpu, int ri, struct pmc_info *pi,
|
|
struct pmc **ppmc)
|
|
{
|
|
int error;
|
|
size_t copied;
|
|
struct pmc_hw *phw;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < mp_ncpus,
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] row-index %d out of range", __LINE__, ri));
|
|
|
|
PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (pmc_cpu_is_logical(cpu))
|
|
return EINVAL;
|
|
|
|
phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
|
|
PMC_NAME_MAX, &copied)) != 0)
|
|
return error;
|
|
|
|
pi->pm_class = pd->pm_descr.pd_class;
|
|
|
|
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
|
|
pi->pm_enabled = TRUE;
|
|
*ppmc = phw->phw_pmc;
|
|
} else {
|
|
pi->pm_enabled = FALSE;
|
|
*ppmc = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Get MSR# for use with RDPMC.
|
|
*/
|
|
|
|
static int
|
|
p4_get_msr(int ri, uint32_t *msr)
|
|
{
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] ri %d out of range", __LINE__, ri));
|
|
|
|
*msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
|
|
|
|
PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int
|
|
pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
|
|
{
|
|
struct p4_event_descr *pe;
|
|
|
|
KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
|
|
("[p4,%d] Initializing non-intel processor", __LINE__));
|
|
|
|
PMCDBG(MDP,INI,1, "%s", "p4-initialize");
|
|
|
|
switch (pmc_mdep->pmd_cputype) {
|
|
case PMC_CPU_INTEL_PIV:
|
|
|
|
pmc_mdep->pmd_npmc = P4_NPMCS;
|
|
pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4;
|
|
pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS;
|
|
pmc_mdep->pmd_classes[1].pm_width = 40;
|
|
pmc_mdep->pmd_nclasspmcs[1] = 18;
|
|
|
|
pmc_mdep->pmd_init = p4_init;
|
|
pmc_mdep->pmd_cleanup = p4_cleanup;
|
|
pmc_mdep->pmd_switch_in = p4_switch_in;
|
|
pmc_mdep->pmd_switch_out = p4_switch_out;
|
|
pmc_mdep->pmd_read_pmc = p4_read_pmc;
|
|
pmc_mdep->pmd_write_pmc = p4_write_pmc;
|
|
pmc_mdep->pmd_config_pmc = p4_config_pmc;
|
|
pmc_mdep->pmd_get_config = p4_get_config;
|
|
pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc;
|
|
pmc_mdep->pmd_release_pmc = p4_release_pmc;
|
|
pmc_mdep->pmd_start_pmc = p4_start_pmc;
|
|
pmc_mdep->pmd_stop_pmc = p4_stop_pmc;
|
|
pmc_mdep->pmd_intr = p4_intr;
|
|
pmc_mdep->pmd_describe = p4_describe;
|
|
pmc_mdep->pmd_get_msr = p4_get_msr; /* i386 */
|
|
|
|
/* model specific munging */
|
|
if ((cpu_id & 0xFFF) < 0xF27) {
|
|
|
|
/*
|
|
* On P4 and Xeon with CPUID < (Family 15,
|
|
* Model 2, Stepping 7), only one ESCR is
|
|
* available for the IOQ_ALLOCATION event.
|
|
*/
|
|
|
|
pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
|
|
pe->pm_escrs[1] = P4_ESCR_NONE;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
|
|
return ENOSYS;
|
|
}
|
|
|
|
return 0;
|
|
}
|