a1e1ad22e0
be careful not to fix anything that was already broken; the NFSv4 code is particularly bad in this respect.
1829 lines
53 KiB
C
1829 lines
53 KiB
C
/*-
|
|
* Copyright (c) 2003-2007 Joseph Koshy
|
|
* Copyright (c) 2007 The FreeBSD Foundation
|
|
* All rights reserved.
|
|
*
|
|
* Portions of this software were developed by A. Joseph Koshy under
|
|
* sponsorship from the FreeBSD Foundation and Google, Inc.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/pmc.h>
|
|
#include <sys/pmckern.h>
|
|
#include <sys/smp.h>
|
|
#include <sys/systm.h>
|
|
|
|
#include <machine/cpu.h>
|
|
#include <machine/cpufunc.h>
|
|
#include <machine/md_var.h>
|
|
#include <machine/specialreg.h>
|
|
|
|
/*
|
|
* PENTIUM 4 SUPPORT
|
|
*
|
|
* The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
|
|
* respectively. Each PMC comprises of two model specific registers:
|
|
* a counter configuration control register (CCCR) and a counter
|
|
* register that holds the actual event counts.
|
|
*
|
|
* Configuring an event requires the use of one of 45 event selection
|
|
* control registers (ESCR). Events are associated with specific
|
|
* ESCRs. Each PMC group has a set of ESCRs it can use.
|
|
*
|
|
* - The BPU counter group (4 PMCs) can use the 16 ESCRs:
|
|
* BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
|
|
* PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
|
|
*
|
|
* - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
|
|
* TC_ESCR{0,1}, TBPU_ESCR{0,1}.
|
|
*
|
|
* - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
|
|
* FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
|
|
* DAC_ESCR{0,1}.
|
|
*
|
|
* - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
|
|
* ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
|
|
*
|
|
* Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
|
|
* present) of a counter group. Odd-numbers ESCRs can be used with
|
|
* counters 2, 3 and 5 (if present) of a counter group. The
|
|
* 'p4_escrs[]' table describes these restrictions in a form that
|
|
* function 'p4_allocate()' uses for making allocation decisions.
|
|
*
|
|
* SYSTEM-MODE AND THREAD-MODE ALLOCATION
|
|
*
|
|
* In addition to remembering the state of PMC rows
|
|
* ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
|
|
* state of ESCR rows. If an ESCR is allocated to a system-mode PMC
|
|
* on a CPU we cannot allocate this to a thread-mode PMC. On a
|
|
* multi-cpu (multiple physical CPUs) system, ESCR allocation on each
|
|
* CPU is tracked by the pc_escrs[] array.
|
|
*
|
|
* Each system-mode PMC that is using an ESCR records its row-index in
|
|
* the appropriate entry and system-mode allocation attempts check
|
|
* that an ESCR is available using this array. Process-mode PMCs do
|
|
* not use the pc_escrs[] array, since ESCR row itself would have been
|
|
* marked as in 'THREAD' mode.
|
|
*
|
|
* HYPERTHREADING SUPPORT
|
|
*
|
|
* When HTT is enabled, the FreeBSD kernel treats the two 'logical'
|
|
* cpus as independent CPUs and can schedule kernel threads on them
|
|
* independently. However, the two logical CPUs share the same set of
|
|
* PMC resources. We need to ensure that:
|
|
* - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
|
|
* and,
|
|
* - Threads of multi-threaded processes that get scheduled on the same
|
|
* physical CPU are handled correctly.
|
|
*
|
|
* HTT Detection
|
|
*
|
|
* Not all HTT capable systems will have HTT enabled. We detect the
|
|
* presence of HTT by detecting if 'p4_init()' was called for a secondary
|
|
* CPU in a HTT pair.
|
|
*
|
|
* Note that hwpmc(4) cannot currently deal with a change in HTT status once
|
|
* loaded.
|
|
*
|
|
* Handling HTT READ / WRITE / START / STOP
|
|
*
|
|
* PMC resources are shared across the CPUs in an HTT pair. We
|
|
* designate the lower numbered CPU in a HTT pair as the 'primary'
|
|
* CPU. In each primary CPU's state we keep track of a 'runcount'
|
|
* which reflects the number of PMC-using processes that have been
|
|
* scheduled on its secondary CPU. Process-mode PMC operations will
|
|
* actually 'start' or 'stop' hardware only if these are the first or
|
|
* last processes respectively to use the hardware. PMC values
|
|
* written by a 'write' operation are saved and are transferred to
|
|
* hardware at PMC 'start' time if the runcount is 0. If the runcount
|
|
* is greater than 0 at the time of a 'start' operation, we keep track
|
|
* of the actual hardware value at the time of the 'start' operation
|
|
* and use this to adjust the final readings at PMC 'stop' or 'read'
|
|
* time.
|
|
*
|
|
* Execution sequences:
|
|
*
|
|
* Case 1: CPUx +...- (no overlap)
|
|
* CPUy +...-
|
|
* RC 0 1 0 1 0
|
|
*
|
|
* Case 2: CPUx +........- (partial overlap)
|
|
* CPUy +........-
|
|
* RC 0 1 2 1 0
|
|
*
|
|
* Case 3: CPUx +..............- (fully overlapped)
|
|
* CPUy +.....-
|
|
* RC 0 1 2 1 0
|
|
*
|
|
* Key:
|
|
* 'CPU[xy]' : one of the two logical processors on a HTT CPU.
|
|
* 'RC' : run count (#threads per physical core).
|
|
* '+' : point in time when a thread is put on a CPU.
|
|
* '-' : point in time where a thread is taken off a CPU.
|
|
*
|
|
* Handling HTT CONFIG
|
|
*
|
|
* Different processes attached to the same PMC may get scheduled on
|
|
* the two logical processors in the package. We keep track of config
|
|
* and de-config operations using the CFGFLAGS fields of the per-physical
|
|
* cpu state.
|
|
*
|
|
* Handling TSCs
|
|
*
|
|
* TSCs are architectural state and each CPU in a HTT pair has its own
|
|
* TSC register.
|
|
*/
|
|
|
|
#define P4_PMCS() \
|
|
P4_PMC(BPU_COUNTER0) \
|
|
P4_PMC(BPU_COUNTER1) \
|
|
P4_PMC(BPU_COUNTER2) \
|
|
P4_PMC(BPU_COUNTER3) \
|
|
P4_PMC(MS_COUNTER0) \
|
|
P4_PMC(MS_COUNTER1) \
|
|
P4_PMC(MS_COUNTER2) \
|
|
P4_PMC(MS_COUNTER3) \
|
|
P4_PMC(FLAME_COUNTER0) \
|
|
P4_PMC(FLAME_COUNTER1) \
|
|
P4_PMC(FLAME_COUNTER2) \
|
|
P4_PMC(FLAME_COUNTER3) \
|
|
P4_PMC(IQ_COUNTER0) \
|
|
P4_PMC(IQ_COUNTER1) \
|
|
P4_PMC(IQ_COUNTER2) \
|
|
P4_PMC(IQ_COUNTER3) \
|
|
P4_PMC(IQ_COUNTER4) \
|
|
P4_PMC(IQ_COUNTER5) \
|
|
P4_PMC(NONE)
|
|
|
|
enum pmc_p4pmc {
|
|
#undef P4_PMC
|
|
#define P4_PMC(N) P4_PMC_##N ,
|
|
P4_PMCS()
|
|
};
|
|
|
|
/*
|
|
* P4 ESCR descriptors
|
|
*/
|
|
|
|
#define P4_ESCRS() \
|
|
P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
|
|
P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
|
|
P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \
|
|
P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \
|
|
P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \
|
|
P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \
|
|
P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \
|
|
P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
|
|
P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
|
|
P4_ESCR(NONE, ~0, NONE, NONE, NONE)
|
|
|
|
enum pmc_p4escr {
|
|
#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N ,
|
|
P4_ESCRS()
|
|
#undef P4_ESCR
|
|
};
|
|
|
|
struct pmc_p4escr_descr {
|
|
const char pm_escrname[PMC_NAME_MAX];
|
|
u_short pm_escr_msr;
|
|
const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
|
|
};
|
|
|
|
static struct pmc_p4escr_descr p4_escrs[] =
|
|
{
|
|
#define P4_ESCR(N, MSR, P1, P2, P3) \
|
|
{ \
|
|
.pm_escrname = #N, \
|
|
.pm_escr_msr = (MSR), \
|
|
.pm_pmcs = \
|
|
{ \
|
|
P4_PMC_##P1, \
|
|
P4_PMC_##P2, \
|
|
P4_PMC_##P3 \
|
|
} \
|
|
} ,
|
|
|
|
P4_ESCRS()
|
|
|
|
#undef P4_ESCR
|
|
};
|
|
|
|
/*
|
|
* P4 Event descriptor
|
|
*/
|
|
|
|
struct p4_event_descr {
|
|
const enum pmc_event pm_event;
|
|
const uint32_t pm_escr_eventselect;
|
|
const uint32_t pm_cccr_select;
|
|
const char pm_is_ti_event;
|
|
enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT];
|
|
};
|
|
|
|
static struct p4_event_descr p4_events[] = {
|
|
|
|
#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \
|
|
{ \
|
|
.pm_event = PMC_EV_P4_##NAME, \
|
|
.pm_escr_eventselect = (ESCREVENTSEL), \
|
|
.pm_cccr_select = (CCCRSEL), \
|
|
.pm_is_ti_event = (TI_EVENT), \
|
|
.pm_escrs = \
|
|
{ \
|
|
P4_ESCR_##ESCR0, \
|
|
P4_ESCR_##ESCR1 \
|
|
} \
|
|
}
|
|
|
|
P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1),
|
|
P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1),
|
|
P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1),
|
|
P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1),
|
|
P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
|
|
P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1),
|
|
P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1),
|
|
P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1),
|
|
P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE),
|
|
P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE),
|
|
P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE),
|
|
/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
|
|
P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
|
|
P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
|
|
P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
|
|
P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
|
|
0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
|
|
P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
|
|
P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1),
|
|
P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1),
|
|
P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
|
|
P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1),
|
|
P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
|
|
P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
|
|
P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3)
|
|
|
|
#undef P4_EVDESCR
|
|
};
|
|
|
|
#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
|
|
|
|
#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
|
|
|
|
/*
|
|
* P4 PMC descriptors
|
|
*/
|
|
|
|
struct p4pmc_descr {
|
|
struct pmc_descr pm_descr; /* common information */
|
|
enum pmc_p4pmc pm_pmcnum; /* PMC number */
|
|
uint32_t pm_pmc_msr; /* PERFCTR MSR address */
|
|
uint32_t pm_cccr_msr; /* CCCR MSR address */
|
|
};
|
|
|
|
static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
|
|
|
|
/*
|
|
* TSC descriptor
|
|
*/
|
|
|
|
{
|
|
.pm_descr =
|
|
{
|
|
.pd_name = "TSC",
|
|
.pd_class = PMC_CLASS_TSC,
|
|
.pd_caps = PMC_CAP_READ | PMC_CAP_WRITE,
|
|
.pd_width = 64
|
|
},
|
|
.pm_pmcnum = ~0,
|
|
.pm_cccr_msr = ~0,
|
|
.pm_pmc_msr = 0x10,
|
|
},
|
|
|
|
/*
|
|
* P4 PMCS
|
|
*/
|
|
|
|
#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
|
|
PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
|
|
PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \
|
|
PMC_CAP_TAGGING | PMC_CAP_CASCADE)
|
|
|
|
#define P4_PMCDESCR(N, PMC, CCCR) \
|
|
{ \
|
|
.pm_descr = \
|
|
{ \
|
|
.pd_name = #N, \
|
|
.pd_class = PMC_CLASS_P4, \
|
|
.pd_caps = P4_PMC_CAPS, \
|
|
.pd_width = 40 \
|
|
}, \
|
|
.pm_pmcnum = P4_PMC_##N, \
|
|
.pm_cccr_msr = (CCCR), \
|
|
.pm_pmc_msr = (PMC) \
|
|
}
|
|
|
|
P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360),
|
|
P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361),
|
|
P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362),
|
|
P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363),
|
|
P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364),
|
|
P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365),
|
|
P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366),
|
|
P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367),
|
|
P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368),
|
|
P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369),
|
|
P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A),
|
|
P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B),
|
|
P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C),
|
|
P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D),
|
|
P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E),
|
|
P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F),
|
|
P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370),
|
|
P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371),
|
|
|
|
#undef P4_PMCDESCR
|
|
};
|
|
|
|
/* HTT support */
|
|
#define P4_NHTT 2 /* logical processors/chip */
|
|
|
|
static int p4_system_has_htt;
|
|
|
|
/*
|
|
* Per-CPU data structure for P4 class CPUs
|
|
*
|
|
* [common stuff]
|
|
* [19 struct pmc_hw pointers]
|
|
* [19 struct pmc_hw structures]
|
|
* [45 ESCRs status bytes]
|
|
* [per-cpu spin mutex]
|
|
* [19 flag fields for holding config flags and a runcount]
|
|
* [19*2 hw value fields] (Thread mode PMC support)
|
|
* or
|
|
* [19*2 EIP values] (Sampling mode PMCs)
|
|
* [19*2 pmc value fields] (Thread mode PMC support))
|
|
*/
|
|
|
|
struct p4_cpu {
|
|
struct pmc_cpu pc_common;
|
|
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
|
|
struct pmc_hw pc_p4pmcs[P4_NPMCS];
|
|
char pc_escrs[P4_NESCR];
|
|
struct mtx pc_mtx; /* spin lock */
|
|
uint32_t pc_intrflag; /* NMI handler flags */
|
|
unsigned int pc_intrlock; /* NMI handler spin lock */
|
|
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
|
|
union {
|
|
pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
|
|
uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
|
|
} pc_si;
|
|
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
|
|
};
|
|
|
|
/*
|
|
* A 'logical' CPU shares PMC resources with partner 'physical' CPU,
|
|
* except the TSC, which is architectural and hence seperate. The
|
|
* 'logical' CPU descriptor thus has pointers to the physical CPUs
|
|
* descriptor state except for the TSC (rowindex 0) which is not
|
|
* shared.
|
|
*/
|
|
|
|
struct p4_logicalcpu {
|
|
struct pmc_cpu pc_common;
|
|
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
|
|
struct pmc_hw pc_tsc;
|
|
};
|
|
|
|
#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
|
|
#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
|
|
#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
|
|
|
|
#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
|
|
#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
|
|
char _tmp; \
|
|
_tmp = (PC)->pc_flags[(RI)]; \
|
|
_tmp &= ~(MASK); \
|
|
_tmp |= (VAL) & (MASK); \
|
|
(PC)->pc_flags[(RI)] = _tmp; \
|
|
} while (0)
|
|
|
|
#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
|
|
#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
|
|
|
|
#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
|
|
#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
|
|
|
|
#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1)
|
|
|
|
#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
|
|
#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
|
|
uint32_t __mask; \
|
|
__mask = 1 << (I); \
|
|
if ((V)) \
|
|
(PC)->pc_intrflag |= __mask; \
|
|
else \
|
|
(PC)->pc_intrflag &= ~__mask; \
|
|
} while (0)
|
|
|
|
/*
|
|
* A minimal spin lock implementation for use inside the NMI handler.
|
|
*
|
|
* We don't want to use a regular spin lock here, because curthread
|
|
* may not be consistent at the time the handler is invoked.
|
|
*/
|
|
#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
|
|
while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
|
|
ia32_pause(); \
|
|
} while (0)
|
|
#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
|
|
atomic_store_rel_int(&pc->pc_intrlock, 0);
|
|
|
|
/* ESCR row disposition */
|
|
static int p4_escrdisp[P4_NESCR];
|
|
|
|
#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0)
|
|
#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0)
|
|
#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0)
|
|
|
|
#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \
|
|
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
|
|
__LINE__)); \
|
|
atomic_add_int(&p4_escrdisp[(E)], -1); \
|
|
KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \
|
|
("[p4,%d] row disposition error", __LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \
|
|
atomic_add_int(&p4_escrdisp[(E)], 1); \
|
|
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
|
|
__LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_MARK_ROW_THREAD(E) do { \
|
|
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
|
|
__LINE__)); \
|
|
atomic_add_int(&p4_escrdisp[(E)], 1); \
|
|
} while (0)
|
|
|
|
#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \
|
|
atomic_add_int(&p4_escrdisp[(E)], -1); \
|
|
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
|
|
__LINE__)); \
|
|
} while (0)
|
|
|
|
#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
|
|
|
|
#define P4_CPU_IS_HTT_SECONDARY(cpu) \
|
|
(p4_system_has_htt ? ((cpu) & 1) : 0)
|
|
#define P4_TO_HTT_PRIMARY(cpu) \
|
|
(p4_system_has_htt ? ((cpu) & ~1) : (cpu))
|
|
|
|
#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \
|
|
P4_CCCR_ENABLE|P4_CCCR_OVF))
|
|
#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \
|
|
P4_ESCR_T1_USR))
|
|
|
|
/*
|
|
* support routines
|
|
*/
|
|
|
|
static struct p4_event_descr *
|
|
p4_find_event(enum pmc_event ev)
|
|
{
|
|
int n;
|
|
|
|
for (n = 0; n < P4_NEVENTS; n++)
|
|
if (p4_events[n].pm_event == ev)
|
|
break;
|
|
if (n == P4_NEVENTS)
|
|
return NULL;
|
|
return &p4_events[n];
|
|
}
|
|
|
|
/*
|
|
* Initialize per-cpu state
|
|
*/
|
|
|
|
static int
|
|
p4_init(int cpu)
|
|
{
|
|
int n, phycpu;
|
|
char *pescr;
|
|
struct p4_cpu *pcs;
|
|
struct p4_logicalcpu *plcs;
|
|
struct pmc_hw *phw;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] insane cpu number %d", __LINE__, cpu));
|
|
|
|
PMCDBG(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
|
|
pmc_cpu_is_primary(cpu) != 0);
|
|
|
|
/*
|
|
* The two CPUs in an HT pair share their per-cpu state.
|
|
*
|
|
* For HT capable CPUs, we assume that the two logical
|
|
* processors in the HT pair get two consecutive CPU ids
|
|
* starting with an even id #.
|
|
*
|
|
* The primary CPU (the even numbered CPU of the pair) would
|
|
* have been initialized prior to the initialization for the
|
|
* secondary.
|
|
*/
|
|
|
|
if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
|
|
|
|
p4_system_has_htt = 1;
|
|
|
|
phycpu = P4_TO_HTT_PRIMARY(cpu);
|
|
pcs = (struct p4_cpu *) pmc_pcpu[phycpu];
|
|
PMCDBG(MDP,INI,1, "p4-init cpu=%d phycpu=%d pcs=%p",
|
|
cpu, phycpu, pcs);
|
|
KASSERT(pcs,
|
|
("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d", __LINE__,
|
|
cpu, phycpu));
|
|
if (pcs == NULL) /* decline to init */
|
|
return ENXIO;
|
|
|
|
plcs = malloc(sizeof(struct p4_logicalcpu),
|
|
M_PMC, M_WAITOK|M_ZERO);
|
|
|
|
/* The TSC is architectural state and is not shared */
|
|
plcs->pc_hwpmcs[0] = &plcs->pc_tsc;
|
|
plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED |
|
|
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
|
|
PMC_PHW_FLAG_IS_SHAREABLE;
|
|
|
|
/* Other PMCs are shared with the physical CPU */
|
|
for (n = 1; n < P4_NPMCS; n++)
|
|
plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n];
|
|
|
|
pmc_pcpu[cpu] = (struct pmc_cpu *) plcs;
|
|
return 0;
|
|
}
|
|
|
|
pcs = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO);
|
|
|
|
if (pcs == NULL)
|
|
return ENOMEM;
|
|
phw = pcs->pc_p4pmcs;
|
|
|
|
for (n = 0; n < P4_NPMCS; n++, phw++) {
|
|
phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
|
|
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
|
|
phw->phw_pmc = NULL;
|
|
pcs->pc_hwpmcs[n] = phw;
|
|
}
|
|
|
|
/* Mark the TSC as shareable */
|
|
pcs->pc_hwpmcs[0]->phw_state |= PMC_PHW_FLAG_IS_SHAREABLE;
|
|
|
|
pescr = pcs->pc_escrs;
|
|
for (n = 0; n < P4_NESCR; n++)
|
|
*pescr++ = P4_INVALID_PMC_INDEX;
|
|
pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
|
|
|
|
mtx_init(&pcs->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Destroy per-cpu state.
|
|
*/
|
|
|
|
static int
|
|
p4_cleanup(int cpu)
|
|
{
|
|
int i;
|
|
struct p4_cpu *pcs;
|
|
|
|
PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
|
|
|
|
if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
|
|
return 0;
|
|
|
|
/* Turn off all PMCs on this CPU */
|
|
for (i = 0; i < P4_NPMCS - 1; i++)
|
|
wrmsr(P4_CCCR_MSR_FIRST + i,
|
|
rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
|
|
|
|
/*
|
|
* If the CPU is physical we need to teardown the
|
|
* full MD state.
|
|
*/
|
|
if (!P4_CPU_IS_HTT_SECONDARY(cpu))
|
|
mtx_destroy(&pcs->pc_mtx);
|
|
|
|
free(pcs, M_PMC);
|
|
|
|
pmc_pcpu[cpu] = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Context switch in.
|
|
*/
|
|
|
|
static int
|
|
p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
|
|
{
|
|
(void) pc;
|
|
|
|
PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
|
|
(pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
|
|
|
|
/* enable the RDPMC instruction */
|
|
if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
|
|
load_cr4(rcr4() | CR4_PCE);
|
|
|
|
PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4());
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Context switch out.
|
|
*/
|
|
|
|
static int
|
|
p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
|
|
{
|
|
(void) pc;
|
|
(void) pp; /* can be null */
|
|
|
|
PMCDBG(MDP,SWO,1, "pc=%p pp=%p", pc, pp);
|
|
|
|
/* always disallow the RDPMC instruction */
|
|
load_cr4(rcr4() & ~CR4_PCE);
|
|
|
|
PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4());
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Read a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_read_pmc(int cpu, int ri, pmc_value_t *v)
|
|
{
|
|
enum pmc_mode mode;
|
|
struct p4pmc_descr *pd;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
pmc_value_t tmp;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
|
|
if (ri == 0) { /* TSC */
|
|
#ifdef DEBUG
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
|
|
cpu, ri));
|
|
KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
|
|
("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri,
|
|
PMC_TO_CLASS(pm)));
|
|
KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)),
|
|
("[p4,%d] TSC counter in non-counting mode", __LINE__));
|
|
#endif
|
|
*v = rdtsc();
|
|
PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
|
|
return 0;
|
|
}
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pd = &p4_pmcdesc[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
|
|
cpu, ri));
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
|
|
("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
|
|
pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
|
|
|
|
mode = PMC_TO_MODE(pm);
|
|
|
|
PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
|
|
|
|
tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
|
|
|
|
if (PMC_IS_VIRTUAL_MODE(mode)) {
|
|
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
|
|
tmp += (P4_PERFCTR_MASK + 1) -
|
|
P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
else
|
|
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
|
|
}
|
|
|
|
if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
|
|
*v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
|
|
else
|
|
*v = tmp;
|
|
|
|
PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Write a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_write_pmc(int cpu, int ri, pmc_value_t v)
|
|
{
|
|
enum pmc_mode mode;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
const struct pmc_hw *phw;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[amd,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[amd,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
|
|
/*
|
|
* The P4's TSC register is writeable, but we don't allow a
|
|
* write as changing the TSC's value could interfere with
|
|
* timekeeping and other system functions.
|
|
*/
|
|
if (ri == 0) {
|
|
#ifdef DEBUG
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
|
|
cpu, ri));
|
|
KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
|
|
("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__,
|
|
cpu, ri, PMC_TO_CLASS(pm)));
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
/* Shared PMCs */
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
|
|
cpu, ri));
|
|
|
|
mode = PMC_TO_MODE(pm);
|
|
|
|
PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
|
|
mode, v);
|
|
|
|
/*
|
|
* write the PMC value to the register/saved value: for
|
|
* sampling mode PMCs, the value to be programmed into the PMC
|
|
* counter is -(C+1) where 'C' is the requested sample rate.
|
|
*/
|
|
if (PMC_IS_SAMPLING_MODE(mode))
|
|
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(mode))
|
|
wrmsr(pd->pm_pmc_msr, v);
|
|
else
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Configure a PMC 'pm' on the given CPU and row-index.
|
|
*
|
|
* 'pm' may be NULL to indicate de-configuration.
|
|
*
|
|
* On HTT systems, a PMC may get configured twice, once for each
|
|
* "logical" CPU. We track this using the CFGFLAGS field of the
|
|
* per-cpu state; this field is a bit mask with one bit each for
|
|
* logical CPUs 0 & 1.
|
|
*/
|
|
|
|
static int
|
|
p4_config_pmc(int cpu, int ri, struct pmc *pm)
|
|
{
|
|
struct pmc_hw *phw;
|
|
struct p4_cpu *pc;
|
|
int cfgflags, cpuflag;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
|
|
|
|
if (ri == 0) { /* TSC */
|
|
pc = (struct p4_cpu *) pmc_pcpu[cpu];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(pm == NULL || phw->phw_pmc == NULL,
|
|
("[p4,%d] hwpmc doubly config'ed", __LINE__));
|
|
phw->phw_pmc = pm;
|
|
return 0;
|
|
}
|
|
|
|
/* Shared PMCs */
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(pm == NULL || phw->phw_pmc == NULL ||
|
|
(p4_system_has_htt && phw->phw_pmc == pm),
|
|
("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
|
|
|
|
KASSERT(cfgflags >= 0 || cfgflags <= 3,
|
|
("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
|
|
cfgflags, cpu, ri));
|
|
|
|
KASSERT(cfgflags == 0 || phw->phw_pmc,
|
|
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
|
|
__LINE__, cpu, ri));
|
|
|
|
cpuflag = P4_CPU_TO_FLAG(cpu);
|
|
|
|
if (pm) { /* config */
|
|
if (cfgflags == 0)
|
|
phw->phw_pmc = pm;
|
|
|
|
KASSERT(phw->phw_pmc == pm,
|
|
("[p4,%d] cpu=%d ri=%d config %p != hw %p",
|
|
__LINE__, cpu, ri, pm, phw->phw_pmc));
|
|
|
|
cfgflags |= cpuflag;
|
|
} else { /* unconfig */
|
|
cfgflags &= ~cpuflag;
|
|
|
|
if (cfgflags == 0)
|
|
phw->phw_pmc = NULL;
|
|
}
|
|
|
|
KASSERT(cfgflags >= 0 || cfgflags <= 3,
|
|
("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
|
|
cfgflags, cpu, ri));
|
|
|
|
P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Retrieve a configured PMC pointer from hardware state.
|
|
*/
|
|
|
|
static int
|
|
p4_get_config(int cpu, int ri, struct pmc **ppm)
|
|
{
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
int cfgflags;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
if (cfgflags & P4_CPU_TO_FLAG(cpu))
|
|
*ppm = phw->phw_pmc; /* PMC config'ed on this CPU */
|
|
else
|
|
*ppm = NULL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate a PMC.
|
|
*
|
|
* The allocation strategy differs between HTT and non-HTT systems.
|
|
*
|
|
* The non-HTT case:
|
|
* - Given the desired event and the PMC row-index, lookup the
|
|
* list of valid ESCRs for the event.
|
|
* - For each valid ESCR:
|
|
* - Check if the ESCR is free and the ESCR row is in a compatible
|
|
* mode (i.e., system or process))
|
|
* - Check if the ESCR is usable with a P4 PMC at the desired row-index.
|
|
* If everything matches, we determine the appropriate bit values for the
|
|
* ESCR and CCCR registers.
|
|
*
|
|
* The HTT case:
|
|
*
|
|
* - Process mode PMCs require special care. The FreeBSD scheduler could
|
|
* schedule any two processes on the same physical CPU. We need to ensure
|
|
* that a given PMC row-index is never allocated to two different
|
|
* PMCs owned by different user-processes.
|
|
* This is ensured by always allocating a PMC from a 'FREE' PMC row
|
|
* if the system has HTT active.
|
|
* - A similar check needs to be done for ESCRs; we do not want two PMCs
|
|
* using the same ESCR to be scheduled at the same time. Thus ESCR
|
|
* allocation is also restricted to FREE rows if the system has HTT
|
|
* enabled.
|
|
* - Thirdly, some events are 'thread-independent' terminology, i.e.,
|
|
* the PMC hardware cannot distinguish between events caused by
|
|
* different logical CPUs. This makes it impossible to assign events
|
|
* to a given thread of execution. If the system has HTT enabled,
|
|
* these events are not allowed for process-mode PMCs.
|
|
*/
|
|
|
|
static int
|
|
p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
|
|
const struct pmc_op_pmcallocate *a)
|
|
{
|
|
int found, n, m;
|
|
uint32_t caps, cccrvalue, escrvalue, tflags;
|
|
enum pmc_p4escr escr;
|
|
struct p4_cpu *pc;
|
|
struct p4_event_descr *pevent;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index value %d", __LINE__, ri));
|
|
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
|
|
"reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
|
|
pm->pm_caps);
|
|
|
|
/* check class */
|
|
if (pd->pm_descr.pd_class != a->pm_class)
|
|
return EINVAL;
|
|
|
|
/* check requested capabilities */
|
|
caps = a->pm_caps;
|
|
if ((pd->pm_descr.pd_caps & caps) != caps)
|
|
return EPERM;
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
|
|
/* TSC's are always allocated in system-wide counting mode */
|
|
if (a->pm_ev != PMC_EV_TSC_TSC ||
|
|
a->pm_mode != PMC_MODE_SC)
|
|
return EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* If the system has HTT enabled, and the desired allocation
|
|
* mode is process-private, and the PMC row disposition is not
|
|
* free (0), decline the allocation.
|
|
*/
|
|
|
|
if (p4_system_has_htt &&
|
|
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
|
|
pmc_getrowdisp(ri) != 0)
|
|
return EBUSY;
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] unknown PMC class %d", __LINE__,
|
|
pd->pm_descr.pd_class));
|
|
|
|
if (pm->pm_event < PMC_EV_P4_FIRST ||
|
|
pm->pm_event > PMC_EV_P4_LAST)
|
|
return EINVAL;
|
|
|
|
if ((pevent = p4_find_event(pm->pm_event)) == NULL)
|
|
return ESRCH;
|
|
|
|
PMCDBG(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
|
|
pevent->pm_event, pevent->pm_escr_eventselect,
|
|
pevent->pm_cccr_select, pevent->pm_is_ti_event);
|
|
|
|
/*
|
|
* Some PMC events are 'thread independent'and therefore
|
|
* cannot be used for process-private modes if HTT is being
|
|
* used.
|
|
*/
|
|
|
|
if (P4_EVENT_IS_TI(pevent) &&
|
|
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
|
|
p4_system_has_htt)
|
|
return EINVAL;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
|
|
found = 0;
|
|
|
|
/* look for a suitable ESCR for this event */
|
|
for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
|
|
if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
|
|
break; /* out of ESCRs */
|
|
/*
|
|
* Check ESCR row disposition.
|
|
*
|
|
* If the request is for a system-mode PMC, then the
|
|
* ESCR row should not be in process-virtual mode, and
|
|
* should also be free on the current CPU.
|
|
*/
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
|
|
pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* If the request is for a process-virtual PMC, and if
|
|
* HTT is not enabled, we can use an ESCR row that is
|
|
* either FREE or already in process mode.
|
|
*
|
|
* If HTT is enabled, then we need to ensure that a
|
|
* given ESCR is never allocated to two PMCS that
|
|
* could run simultaneously on the two logical CPUs of
|
|
* a CPU package. We ensure this be only allocating
|
|
* ESCRs from rows marked as 'FREE'.
|
|
*/
|
|
|
|
if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
|
|
if (p4_system_has_htt) {
|
|
if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
|
|
continue;
|
|
} else
|
|
if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* We found a suitable ESCR for this event. Now check if
|
|
* this escr can work with the PMC at row-index 'ri'.
|
|
*/
|
|
|
|
for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
|
|
if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
|
|
found = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
if (found == 0)
|
|
return ESRCH;
|
|
|
|
KASSERT((int) escr >= 0 && escr < P4_NESCR,
|
|
("[p4,%d] illegal ESCR value %d", __LINE__, escr));
|
|
|
|
/* mark ESCR row mode */
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
|
|
P4_ESCR_MARK_ROW_STANDALONE(escr);
|
|
} else {
|
|
KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
|
|
("[p4,%d] escr[%d] already in use", __LINE__, escr));
|
|
P4_ESCR_MARK_ROW_THREAD(escr);
|
|
}
|
|
|
|
pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr;
|
|
pm->pm_md.pm_p4.pm_p4_escr = escr;
|
|
|
|
cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
|
|
escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
|
|
|
|
/* CCCR fields */
|
|
if (caps & PMC_CAP_THRESHOLD)
|
|
cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
|
|
P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
|
|
|
|
if (caps & PMC_CAP_EDGE)
|
|
cccrvalue |= P4_CCCR_EDGE;
|
|
|
|
if (caps & PMC_CAP_INVERT)
|
|
cccrvalue |= P4_CCCR_COMPLEMENT;
|
|
|
|
if (p4_system_has_htt)
|
|
cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
|
|
P4_CCCR_ACTIVE_THREAD_MASK;
|
|
else /* no HTT; thread field should be '11b' */
|
|
cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
|
|
|
|
if (caps & PMC_CAP_CASCADE)
|
|
cccrvalue |= P4_CCCR_CASCADE;
|
|
|
|
/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
|
|
if (caps & PMC_CAP_INTERRUPT)
|
|
cccrvalue |= P4_CCCR_OVF_PMI_T0;
|
|
|
|
/* ESCR fields */
|
|
if (caps & PMC_CAP_QUALIFIER)
|
|
escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_EVENT_MASK_MASK;
|
|
if (caps & PMC_CAP_TAGGING)
|
|
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
|
|
if (caps & PMC_CAP_QUALIFIER)
|
|
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
|
|
P4_ESCR_EVENT_MASK_MASK);
|
|
|
|
/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
|
|
tflags = 0;
|
|
if (caps & PMC_CAP_SYSTEM)
|
|
tflags |= P4_ESCR_T0_OS;
|
|
if (caps & PMC_CAP_USER)
|
|
tflags |= P4_ESCR_T0_USR;
|
|
if (tflags == 0)
|
|
tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
escrvalue |= tflags;
|
|
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
|
|
pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
|
|
|
|
PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
|
|
"escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
|
|
cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* release a PMC.
|
|
*/
|
|
|
|
static int
|
|
p4_release_pmc(int cpu, int ri, struct pmc *pm)
|
|
{
|
|
enum pmc_p4escr escr;
|
|
struct pmc_hw *phw;
|
|
struct p4_cpu *pc;
|
|
|
|
if (p4_pmcdesc[ri].pm_descr.pd_class == PMC_CLASS_TSC)
|
|
return 0;
|
|
|
|
escr = pm->pm_md.pm_p4.pm_p4_escr;
|
|
|
|
PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(phw->phw_pmc == NULL,
|
|
("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
|
|
|
|
P4_ESCR_UNMARK_ROW_STANDALONE(escr);
|
|
KASSERT(pc->pc_escrs[escr] == ri,
|
|
("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
|
|
escr, ri));
|
|
pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
|
|
} else
|
|
P4_ESCR_UNMARK_ROW_THREAD(escr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Start a PMC
|
|
*/
|
|
|
|
static int
|
|
p4_start_pmc(int cpu, int ri)
|
|
{
|
|
int rc;
|
|
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row-index %d", __LINE__, ri));
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__,
|
|
cpu, ri));
|
|
|
|
PMCDBG(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC) /* TSC are always on */
|
|
return 0;
|
|
|
|
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
|
|
("[p4,%d] wrong PMC class %d", __LINE__,
|
|
pd->pm_descr.pd_class));
|
|
|
|
/* retrieve the desired CCCR/ESCR values from the PMC */
|
|
cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
|
|
escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
|
|
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
|
|
|
|
/* extract and zero the logical processor selection bits */
|
|
cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
|
|
escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
|
|
escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
|
|
|
|
if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */
|
|
cccrtbits <<= 1;
|
|
escrtbits >>= 2;
|
|
}
|
|
|
|
/* start system mode PMCs directly */
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
wrmsr(escrmsr, escrvalue | escrtbits);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Thread mode PMCs
|
|
*
|
|
* On HTT machines, the same PMC could be scheduled on the
|
|
* same physical CPU twice (once for each logical CPU), for
|
|
* example, if two threads of a multi-threaded process get
|
|
* scheduled on the same CPU.
|
|
*
|
|
*/
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
|
|
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
|
|
KASSERT(rc == 0 || rc == 1,
|
|
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
|
|
rc));
|
|
|
|
if (rc == 0) { /* 1st CPU and the non-HTT case */
|
|
|
|
KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
|
|
("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
|
|
cpu, ri, pd->pm_cccr_msr));
|
|
|
|
/* write out the low 40 bits of the saved value to hardware */
|
|
wrmsr(pd->pm_pmc_msr,
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
|
|
|
|
} else if (rc == 1) { /* 2nd CPU */
|
|
|
|
/*
|
|
* Stop the PMC and retrieve the CCCR and ESCR values
|
|
* from their MSRs, and turn on the additional T[0/1]
|
|
* bits for the 2nd CPU.
|
|
*/
|
|
|
|
cccrvalue = rdmsr(pd->pm_cccr_msr);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
|
|
|
|
/* check that the configuration bits read back match the PMC */
|
|
KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
|
|
(pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
|
|
("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
|
|
"cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
|
|
cccrvalue & P4_CCCR_Tx_MASK,
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
|
|
KASSERT(cccrvalue & P4_CCCR_ENABLE,
|
|
("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
|
|
__LINE__, rc, cpu, ri));
|
|
KASSERT((cccrvalue & cccrtbits) == 0,
|
|
("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
|
|
"cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
|
|
cccrvalue, cccrtbits));
|
|
|
|
escrvalue = rdmsr(escrmsr);
|
|
|
|
KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
|
|
(pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
|
|
("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
|
|
"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
|
|
escrvalue & P4_ESCR_Tx_MASK,
|
|
pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
|
|
KASSERT((escrvalue & escrtbits) == 0,
|
|
("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
|
|
"escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
|
|
rc, cpu, ri, escrmsr, escrvalue, escrtbits));
|
|
}
|
|
|
|
/* Enable the correct bits for this CPU. */
|
|
escrvalue |= escrtbits;
|
|
cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
|
|
|
|
/* Save HW value at the time of starting hardware */
|
|
P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
|
|
|
|
/* Program the ESCR and CCCR and start the PMC */
|
|
wrmsr(escrmsr, escrvalue);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue);
|
|
|
|
++rc;
|
|
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
|
|
"escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
|
|
ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
|
|
cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Stop a PMC.
|
|
*/
|
|
|
|
static int
|
|
p4_stop_pmc(int cpu, int ri)
|
|
{
|
|
int rc;
|
|
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
|
|
struct pmc *pm;
|
|
struct p4_cpu *pc;
|
|
struct pmc_hw *phw;
|
|
struct p4pmc_descr *pd;
|
|
pmc_value_t tmp;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] illegal row index %d", __LINE__, ri));
|
|
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
|
|
return 0;
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
phw = pc->pc_hwpmcs[ri];
|
|
|
|
KASSERT(phw != NULL,
|
|
("[p4,%d] null phw for cpu%d, ri%d", __LINE__, cpu, ri));
|
|
|
|
pm = phw->phw_pmc;
|
|
|
|
KASSERT(pm != NULL,
|
|
("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
|
|
|
|
PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
|
|
wrmsr(pd->pm_cccr_msr,
|
|
pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Thread mode PMCs.
|
|
*
|
|
* On HTT machines, this PMC may be in use by two threads
|
|
* running on two logical CPUS. Thus we look at the
|
|
* 'runcount' field and only turn off the appropriate TO/T1
|
|
* bits (and keep the PMC running) if two logical CPUs were
|
|
* using the PMC.
|
|
*
|
|
*/
|
|
|
|
/* bits to mask */
|
|
cccrtbits = P4_CCCR_OVF_PMI_T0;
|
|
escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
|
|
if (P4_CPU_IS_HTT_SECONDARY(cpu)) {
|
|
cccrtbits <<= 1;
|
|
escrtbits >>= 2;
|
|
}
|
|
|
|
mtx_lock_spin(&pc->pc_mtx);
|
|
|
|
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
|
|
|
|
KASSERT(rc == 2 || rc == 1,
|
|
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
|
|
rc));
|
|
|
|
--rc;
|
|
|
|
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
|
|
|
|
/* Stop this PMC */
|
|
cccrvalue = rdmsr(pd->pm_cccr_msr);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
|
|
|
|
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
|
|
escrvalue = rdmsr(escrmsr);
|
|
|
|
/* The current CPU should be running on this PMC */
|
|
KASSERT(escrvalue & escrtbits,
|
|
("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
|
|
"escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
|
|
escrvalue, escrtbits));
|
|
KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
|
|
(cccrvalue & cccrtbits),
|
|
("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
|
|
"tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
|
|
|
|
/* get the current hardware reading */
|
|
tmp = rdmsr(pd->pm_pmc_msr);
|
|
|
|
if (rc == 1) { /* need to keep the PMC running */
|
|
escrvalue &= ~escrtbits;
|
|
cccrvalue &= ~cccrtbits;
|
|
wrmsr(escrmsr, escrvalue);
|
|
wrmsr(pd->pm_cccr_msr, cccrvalue);
|
|
}
|
|
|
|
mtx_unlock_spin(&pc->pc_mtx);
|
|
|
|
PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
|
|
"escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
|
|
escrvalue, cccrvalue, tmp);
|
|
|
|
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
|
|
tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
else
|
|
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
|
|
|
|
P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Handle an interrupt.
|
|
*
|
|
* The hardware sets the CCCR_OVF whenever a counter overflow occurs,
|
|
* so the handler examines all the 18 CCCR registers, processing the
|
|
* counters that have overflowed.
|
|
*
|
|
* On HTT machines, the CCCR register is shared and will interrupt
|
|
* both logical processors if so configured. Thus multiple logical
|
|
* CPUs could enter the NMI service routine at the same time. These
|
|
* will get serialized using a per-cpu spinlock dedicated for use in
|
|
* the NMI handler.
|
|
*/
|
|
|
|
static int
|
|
p4_intr(int cpu, struct trapframe *tf)
|
|
{
|
|
uint32_t cccrval, ovf_mask, ovf_partner;
|
|
int i, did_interrupt, error, ri;
|
|
struct pmc_hw *phw;
|
|
struct p4_cpu *pc;
|
|
struct pmc *pm;
|
|
pmc_value_t v;
|
|
|
|
PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
|
|
TRAPF_USERMODE(tf));
|
|
|
|
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
|
|
|
|
ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
|
|
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
|
|
ovf_mask |= P4_CCCR_OVF;
|
|
if (p4_system_has_htt)
|
|
ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
|
|
P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
|
|
else
|
|
ovf_partner = 0;
|
|
did_interrupt = 0;
|
|
|
|
if (p4_system_has_htt)
|
|
P4_PCPU_ACQ_INTR_SPINLOCK(pc);
|
|
|
|
/*
|
|
* Loop through all CCCRs, looking for ones that have
|
|
* interrupted this CPU.
|
|
*/
|
|
for (i = 0; i < P4_NPMCS-1; i++) {
|
|
|
|
ri = i + 1; /* row index */
|
|
|
|
/*
|
|
* Check if our partner logical CPU has already marked
|
|
* this PMC has having interrupted it. If so, reset
|
|
* the flag and process the interrupt, but leave the
|
|
* hardware alone.
|
|
*/
|
|
if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
|
|
P4_PCPU_SET_INTRFLAG(pc,ri,0);
|
|
did_interrupt = 1;
|
|
|
|
/*
|
|
* Ignore de-configured or stopped PMCs.
|
|
* Ignore PMCs not in sampling mode.
|
|
*/
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
if (pm == NULL ||
|
|
pm->pm_state != PMC_STATE_RUNNING ||
|
|
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
|
|
continue;
|
|
}
|
|
(void) pmc_process_interrupt(cpu, pm, tf,
|
|
TRAPF_USERMODE(tf));
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Fresh interrupt. Look for the CCCR_OVF bit
|
|
* and the OVF_Tx bit for this logical
|
|
* processor being set.
|
|
*/
|
|
cccrval = rdmsr(P4_CCCR_MSR_FIRST + i);
|
|
|
|
if ((cccrval & ovf_mask) != ovf_mask)
|
|
continue;
|
|
|
|
/*
|
|
* If the other logical CPU would also have been
|
|
* interrupted due to the PMC being shared, record
|
|
* this fact in the per-cpu saved interrupt flag
|
|
* bitmask.
|
|
*/
|
|
if (p4_system_has_htt && (cccrval & ovf_partner))
|
|
P4_PCPU_SET_INTRFLAG(pc, ri, 1);
|
|
|
|
v = rdmsr(P4_PERFCTR_MSR_FIRST + i);
|
|
|
|
PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
|
|
|
|
/* Stop the counter, and reset the overflow bit */
|
|
cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
|
|
wrmsr(P4_CCCR_MSR_FIRST + i, cccrval);
|
|
|
|
did_interrupt = 1;
|
|
|
|
/*
|
|
* Ignore de-configured or stopped PMCs. Ignore PMCs
|
|
* not in sampling mode.
|
|
*/
|
|
phw = pc->pc_hwpmcs[ri];
|
|
pm = phw->phw_pmc;
|
|
|
|
if (pm == NULL ||
|
|
pm->pm_state != PMC_STATE_RUNNING ||
|
|
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
|
|
continue;
|
|
}
|
|
|
|
/*
|
|
* Process the interrupt. Re-enable the PMC if
|
|
* processing was successful.
|
|
*/
|
|
error = pmc_process_interrupt(cpu, pm, tf,
|
|
TRAPF_USERMODE(tf));
|
|
|
|
/*
|
|
* Only the first processor executing the NMI handler
|
|
* in a HTT pair will restart a PMC, and that too
|
|
* only if there were no errors.
|
|
*/
|
|
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
|
|
pm->pm_sc.pm_reloadcount);
|
|
wrmsr(P4_PERFCTR_MSR_FIRST + i, v);
|
|
if (error == 0)
|
|
wrmsr(P4_CCCR_MSR_FIRST + i,
|
|
cccrval | P4_CCCR_ENABLE);
|
|
}
|
|
|
|
/* allow the other CPU to proceed */
|
|
if (p4_system_has_htt)
|
|
P4_PCPU_REL_INTR_SPINLOCK(pc);
|
|
|
|
/*
|
|
* On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
|
|
* masked when a PMC interrupts the CPU. We need to unmask
|
|
* the interrupt source explicitly.
|
|
*/
|
|
|
|
if (did_interrupt)
|
|
pmc_x86_lapic_enable_pmc_interrupt();
|
|
|
|
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
|
|
&pmc_stats.pm_intr_ignored, 1);
|
|
|
|
return (did_interrupt);
|
|
}
|
|
|
|
/*
|
|
* Describe a CPU's PMC state.
|
|
*/
|
|
|
|
static int
|
|
p4_describe(int cpu, int ri, struct pmc_info *pi,
|
|
struct pmc **ppmc)
|
|
{
|
|
int error;
|
|
size_t copied;
|
|
struct pmc_hw *phw;
|
|
const struct p4pmc_descr *pd;
|
|
|
|
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
|
|
("[p4,%d] illegal CPU %d", __LINE__, cpu));
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] row-index %d out of range", __LINE__, ri));
|
|
|
|
PMCDBG(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
|
|
|
|
if (P4_CPU_IS_HTT_SECONDARY(cpu))
|
|
return EINVAL;
|
|
|
|
phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
|
|
pd = &p4_pmcdesc[ri];
|
|
|
|
if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
|
|
PMC_NAME_MAX, &copied)) != 0)
|
|
return error;
|
|
|
|
pi->pm_class = pd->pm_descr.pd_class;
|
|
|
|
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
|
|
pi->pm_enabled = TRUE;
|
|
*ppmc = phw->phw_pmc;
|
|
} else {
|
|
pi->pm_enabled = FALSE;
|
|
*ppmc = NULL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Get MSR# for use with RDPMC.
|
|
*/
|
|
|
|
static int
|
|
p4_get_msr(int ri, uint32_t *msr)
|
|
{
|
|
KASSERT(ri >= 0 && ri < P4_NPMCS,
|
|
("[p4,%d] ri %d out of range", __LINE__, ri));
|
|
|
|
*msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
|
|
|
|
PMCDBG(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
int
|
|
pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
|
|
{
|
|
struct p4_event_descr *pe;
|
|
|
|
KASSERT(strcmp(cpu_vendor, "GenuineIntel") == 0,
|
|
("[p4,%d] Initializing non-intel processor", __LINE__));
|
|
|
|
PMCDBG(MDP,INI,1, "%s", "p4-initialize");
|
|
|
|
switch (pmc_mdep->pmd_cputype) {
|
|
case PMC_CPU_INTEL_PIV:
|
|
|
|
pmc_mdep->pmd_npmc = P4_NPMCS;
|
|
pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4;
|
|
pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS;
|
|
pmc_mdep->pmd_classes[1].pm_width = 40;
|
|
pmc_mdep->pmd_nclasspmcs[1] = 18;
|
|
|
|
pmc_mdep->pmd_init = p4_init;
|
|
pmc_mdep->pmd_cleanup = p4_cleanup;
|
|
pmc_mdep->pmd_switch_in = p4_switch_in;
|
|
pmc_mdep->pmd_switch_out = p4_switch_out;
|
|
pmc_mdep->pmd_read_pmc = p4_read_pmc;
|
|
pmc_mdep->pmd_write_pmc = p4_write_pmc;
|
|
pmc_mdep->pmd_config_pmc = p4_config_pmc;
|
|
pmc_mdep->pmd_get_config = p4_get_config;
|
|
pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc;
|
|
pmc_mdep->pmd_release_pmc = p4_release_pmc;
|
|
pmc_mdep->pmd_start_pmc = p4_start_pmc;
|
|
pmc_mdep->pmd_stop_pmc = p4_stop_pmc;
|
|
pmc_mdep->pmd_intr = p4_intr;
|
|
pmc_mdep->pmd_describe = p4_describe;
|
|
pmc_mdep->pmd_get_msr = p4_get_msr; /* i386 */
|
|
|
|
/* model specific munging */
|
|
if ((cpu_id & 0xFFF) < 0xF27) {
|
|
|
|
/*
|
|
* On P4 and Xeon with CPUID < (Family 15,
|
|
* Model 2, Stepping 7), only one ESCR is
|
|
* available for the IOQ_ALLOCATION event.
|
|
*/
|
|
|
|
pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
|
|
pe->pm_escrs[1] = P4_ESCR_NONE;
|
|
}
|
|
|
|
break;
|
|
|
|
default:
|
|
KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
|
|
return ENOSYS;
|
|
}
|
|
|
|
return 0;
|
|
}
|