freebsd-skq/sys/dev/hwpmc/hwpmc_piv.c
mmacy 2981a3420c hwpmc(9): Make pmclog buffer pcpu and update constants
On non-trivial SMP systems the contention on the pmc_owner mutex leads
to a substantial number of samples captured being from the pmc process
itself. This change a) makes buffers larger to avoid contention on the
global list b) makes the working sample buffer per cpu.

Run pmcstat in the background (default event rate of 64k):
pmcstat -S UNHALTED_CORE_CYCLES -O /dev/null sleep 600 &

Before:
make -j96 buildkernel -s >&/dev/null 3336.68s user 24684.10s system 7442% cpu 6:16.50 total

After:
make -j96 buildkernel -s >&/dev/null 2697.82s user 1347.35s system 6058% cpu 1:06.77 total

For more realistic overhead measurement set the sample rate for ~2khz
on a 2.1Ghz processor:
pmcstat -n 1050000 -S UNHALTED_CORE_CYCLES -O /dev/null sleep 6000 &

Collecting 10 samples of `make -j96 buildkernel` from each:

x before
+ after

real time:
    N           Min           Max        Median           Avg        Stddev
x  10          76.4        127.62        84.845        88.577     15.100031
+  10         59.71         60.79        60.135        60.179    0.29957192
Difference at 95.0% confidence
        -28.398 +/- 10.0344
        -32.0602% +/- 7.69825%
        (Student's t, pooled s = 10.6794)

system time:
    N           Min           Max        Median           Avg        Stddev
x  10       2277.96       6948.53       2949.47      3341.492     1385.2677
+  10        1038.7       1081.06      1070.555      1064.017      15.85404
Difference at 95.0% confidence
        -2277.47 +/- 920.425
        -68.1574% +/- 8.77623%
        (Student's t, pooled s = 979.596)

x no pmc
+ pmc running
real time:

HEAD:
    N           Min           Max        Median           Avg        Stddev
x  10         58.38         59.15         58.86        58.847    0.22504567
+  10          76.4        127.62        84.845        88.577     15.100031
Difference at 95.0% confidence
        29.73 +/- 10.0335
        50.5208% +/- 17.0525%
        (Student's t, pooled s = 10.6785)

patched:
    N           Min           Max        Median           Avg        Stddev
x  10         58.38         59.15         58.86        58.847    0.22504567
+  10         59.71         60.79        60.135        60.179    0.29957192
Difference at 95.0% confidence
        1.332 +/- 0.248939
        2.2635% +/- 0.426506%
        (Student's t, pooled s = 0.264942)

system time:

HEAD:
    N           Min           Max        Median           Avg        Stddev
x  10       1010.15       1073.31      1025.465      1031.524     18.135705
+  10       2277.96       6948.53       2949.47      3341.492     1385.2677
Difference at 95.0% confidence
        2309.97 +/- 920.443
        223.937% +/- 89.3039%
        (Student's t, pooled s = 979.616)

patched:
    N           Min           Max        Median           Avg        Stddev
x  10       1010.15       1073.31      1025.465      1031.524     18.135705
+  10        1038.7       1081.06      1070.555      1064.017      15.85404
Difference at 95.0% confidence
        32.493 +/- 16.0042
        3.15% +/- 1.5794%
        (Student's t, pooled s = 17.0331)

Reviewed by:	jeff@
Approved by:	sbruno@
Differential Revision:	https://reviews.freebsd.org/D15155
2018-05-12 01:26:34 +00:00

1703 lines
50 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pmc.h>
#include <sys/pmckern.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/intr_machdep.h>
#if (__FreeBSD_version >= 1100000)
#include <x86/apicvar.h>
#else
#include <machine/apicvar.h>
#endif
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/cputypes.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
/*
* PENTIUM 4 SUPPORT
*
* The P4 has 18 PMCs, divided into 4 groups with 4,4,4 and 6 PMCs
* respectively. Each PMC comprises of two model specific registers:
* a counter configuration control register (CCCR) and a counter
* register that holds the actual event counts.
*
* Configuring an event requires the use of one of 45 event selection
* control registers (ESCR). Events are associated with specific
* ESCRs. Each PMC group has a set of ESCRs it can use.
*
* - The BPU counter group (4 PMCs) can use the 16 ESCRs:
* BPU_ESCR{0,1}, IS_ESCR{0,1}, MOB_ESCR{0,1}, ITLB_ESCR{0,1},
* PMH_ESCR{0,1}, IX_ESCR{0,1}, FSB_ESCR{0,}, BSU_ESCR{0,1}.
*
* - The MS counter group (4 PMCs) can use the 6 ESCRs: MS_ESCR{0,1},
* TC_ESCR{0,1}, TBPU_ESCR{0,1}.
*
* - The FLAME counter group (4 PMCs) can use the 10 ESCRs:
* FLAME_ESCR{0,1}, FIRM_ESCR{0,1}, SAAT_ESCR{0,1}, U2L_ESCR{0,1},
* DAC_ESCR{0,1}.
*
* - The IQ counter group (6 PMCs) can use the 13 ESCRs: IQ_ESCR{0,1},
* ALF_ESCR{0,1}, RAT_ESCR{0,1}, SSU_ESCR0, CRU_ESCR{0,1,2,3,4,5}.
*
* Even-numbered ESCRs can be used with counters 0, 1 and 4 (if
* present) of a counter group. Odd-numbers ESCRs can be used with
* counters 2, 3 and 5 (if present) of a counter group. The
* 'p4_escrs[]' table describes these restrictions in a form that
* function 'p4_allocate()' uses for making allocation decisions.
*
* SYSTEM-MODE AND THREAD-MODE ALLOCATION
*
* In addition to remembering the state of PMC rows
* ('FREE','STANDALONE', or 'THREAD'), we similar need to track the
* state of ESCR rows. If an ESCR is allocated to a system-mode PMC
* on a CPU we cannot allocate this to a thread-mode PMC. On a
* multi-cpu (multiple physical CPUs) system, ESCR allocation on each
* CPU is tracked by the pc_escrs[] array.
*
* Each system-mode PMC that is using an ESCR records its row-index in
* the appropriate entry and system-mode allocation attempts check
* that an ESCR is available using this array. Process-mode PMCs do
* not use the pc_escrs[] array, since ESCR row itself would have been
* marked as in 'THREAD' mode.
*
* HYPERTHREADING SUPPORT
*
* When HTT is enabled, the FreeBSD kernel treats the two 'logical'
* cpus as independent CPUs and can schedule kernel threads on them
* independently. However, the two logical CPUs share the same set of
* PMC resources. We need to ensure that:
* - PMCs that use the PMC_F_DESCENDANTS semantics are handled correctly,
* and,
* - Threads of multi-threaded processes that get scheduled on the same
* physical CPU are handled correctly.
*
* HTT Detection
*
* Not all HTT capable systems will have HTT enabled. We detect the
* presence of HTT by detecting if 'p4_init()' was called for a secondary
* CPU in a HTT pair.
*
* Note that hwpmc(4) cannot currently deal with a change in HTT status once
* loaded.
*
* Handling HTT READ / WRITE / START / STOP
*
* PMC resources are shared across the CPUs in an HTT pair. We
* designate the lower numbered CPU in a HTT pair as the 'primary'
* CPU. In each primary CPU's state we keep track of a 'runcount'
* which reflects the number of PMC-using processes that have been
* scheduled on its secondary CPU. Process-mode PMC operations will
* actually 'start' or 'stop' hardware only if these are the first or
* last processes respectively to use the hardware. PMC values
* written by a 'write' operation are saved and are transferred to
* hardware at PMC 'start' time if the runcount is 0. If the runcount
* is greater than 0 at the time of a 'start' operation, we keep track
* of the actual hardware value at the time of the 'start' operation
* and use this to adjust the final readings at PMC 'stop' or 'read'
* time.
*
* Execution sequences:
*
* Case 1: CPUx +...- (no overlap)
* CPUy +...-
* RC 0 1 0 1 0
*
* Case 2: CPUx +........- (partial overlap)
* CPUy +........-
* RC 0 1 2 1 0
*
* Case 3: CPUx +..............- (fully overlapped)
* CPUy +.....-
* RC 0 1 2 1 0
*
* Key:
* 'CPU[xy]' : one of the two logical processors on a HTT CPU.
* 'RC' : run count (#threads per physical core).
* '+' : point in time when a thread is put on a CPU.
* '-' : point in time where a thread is taken off a CPU.
*
* Handling HTT CONFIG
*
* Different processes attached to the same PMC may get scheduled on
* the two logical processors in the package. We keep track of config
* and de-config operations using the CFGFLAGS fields of the per-physical
* cpu state.
*/
#define P4_PMCS() \
P4_PMC(BPU_COUNTER0) \
P4_PMC(BPU_COUNTER1) \
P4_PMC(BPU_COUNTER2) \
P4_PMC(BPU_COUNTER3) \
P4_PMC(MS_COUNTER0) \
P4_PMC(MS_COUNTER1) \
P4_PMC(MS_COUNTER2) \
P4_PMC(MS_COUNTER3) \
P4_PMC(FLAME_COUNTER0) \
P4_PMC(FLAME_COUNTER1) \
P4_PMC(FLAME_COUNTER2) \
P4_PMC(FLAME_COUNTER3) \
P4_PMC(IQ_COUNTER0) \
P4_PMC(IQ_COUNTER1) \
P4_PMC(IQ_COUNTER2) \
P4_PMC(IQ_COUNTER3) \
P4_PMC(IQ_COUNTER4) \
P4_PMC(IQ_COUNTER5) \
P4_PMC(NONE)
enum pmc_p4pmc {
#undef P4_PMC
#define P4_PMC(N) P4_PMC_##N ,
P4_PMCS()
};
/*
* P4 ESCR descriptors
*/
#define P4_ESCRS() \
P4_ESCR(BSU_ESCR0, 0x3A0, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(BSU_ESCR1, 0x3A1, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(FSB_ESCR0, 0x3A2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(FSB_ESCR1, 0x3A3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(FIRM_ESCR0, 0x3A4, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
P4_ESCR(FIRM_ESCR1, 0x3A5, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
P4_ESCR(FLAME_ESCR0, 0x3A6, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
P4_ESCR(FLAME_ESCR1, 0x3A7, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
P4_ESCR(DAC_ESCR0, 0x3A8, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
P4_ESCR(DAC_ESCR1, 0x3A9, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
P4_ESCR(MOB_ESCR0, 0x3AA, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(MOB_ESCR1, 0x3AB, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(PMH_ESCR0, 0x3AC, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(PMH_ESCR1, 0x3AD, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(SAAT_ESCR0, 0x3AE, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
P4_ESCR(SAAT_ESCR1, 0x3AF, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
P4_ESCR(U2L_ESCR0, 0x3B0, FLAME_COUNTER0, FLAME_COUNTER1, NONE) \
P4_ESCR(U2L_ESCR1, 0x3B1, FLAME_COUNTER2, FLAME_COUNTER3, NONE) \
P4_ESCR(BPU_ESCR0, 0x3B2, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(BPU_ESCR1, 0x3B3, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(IS_ESCR0, 0x3B4, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(IS_ESCR1, 0x3B5, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(ITLB_ESCR0, 0x3B6, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(ITLB_ESCR1, 0x3B7, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(CRU_ESCR0, 0x3B8, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(CRU_ESCR1, 0x3B9, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(IQ_ESCR0, 0x3BA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(IQ_ESCR1, 0x3BB, IQ_COUNTER1, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(RAT_ESCR0, 0x3BC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(RAT_ESCR1, 0x3BD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(SSU_ESCR0, 0x3BE, IQ_COUNTER0, IQ_COUNTER2, IQ_COUNTER4) \
P4_ESCR(MS_ESCR0, 0x3C0, MS_COUNTER0, MS_COUNTER1, NONE) \
P4_ESCR(MS_ESCR1, 0x3C1, MS_COUNTER2, MS_COUNTER3, NONE) \
P4_ESCR(TBPU_ESCR0, 0x3C2, MS_COUNTER0, MS_COUNTER1, NONE) \
P4_ESCR(TBPU_ESCR1, 0x3C3, MS_COUNTER2, MS_COUNTER3, NONE) \
P4_ESCR(TC_ESCR0, 0x3C4, MS_COUNTER0, MS_COUNTER1, NONE) \
P4_ESCR(TC_ESCR1, 0x3C5, MS_COUNTER2, MS_COUNTER3, NONE) \
P4_ESCR(IX_ESCR0, 0x3C8, BPU_COUNTER0, BPU_COUNTER1, NONE) \
P4_ESCR(IX_ESCR1, 0x3C9, BPU_COUNTER2, BPU_COUNTER3, NONE) \
P4_ESCR(ALF_ESCR0, 0x3CA, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(ALF_ESCR1, 0x3CB, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(CRU_ESCR2, 0x3CC, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(CRU_ESCR3, 0x3CD, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(CRU_ESCR4, 0x3E0, IQ_COUNTER0, IQ_COUNTER1, IQ_COUNTER4) \
P4_ESCR(CRU_ESCR5, 0x3E1, IQ_COUNTER2, IQ_COUNTER3, IQ_COUNTER5) \
P4_ESCR(NONE, ~0, NONE, NONE, NONE)
enum pmc_p4escr {
#define P4_ESCR(N, MSR, P1, P2, P3) P4_ESCR_##N ,
P4_ESCRS()
#undef P4_ESCR
};
struct pmc_p4escr_descr {
const char pm_escrname[PMC_NAME_MAX];
u_short pm_escr_msr;
const enum pmc_p4pmc pm_pmcs[P4_MAX_PMC_PER_ESCR];
};
static struct pmc_p4escr_descr p4_escrs[] =
{
#define P4_ESCR(N, MSR, P1, P2, P3) \
{ \
.pm_escrname = #N, \
.pm_escr_msr = (MSR), \
.pm_pmcs = \
{ \
P4_PMC_##P1, \
P4_PMC_##P2, \
P4_PMC_##P3 \
} \
} ,
P4_ESCRS()
#undef P4_ESCR
};
/*
* P4 Event descriptor
*/
struct p4_event_descr {
const enum pmc_event pm_event;
const uint32_t pm_escr_eventselect;
const uint32_t pm_cccr_select;
const char pm_is_ti_event;
enum pmc_p4escr pm_escrs[P4_MAX_ESCR_PER_EVENT];
};
static struct p4_event_descr p4_events[] = {
#define P4_EVDESCR(NAME, ESCREVENTSEL, CCCRSEL, TI_EVENT, ESCR0, ESCR1) \
{ \
.pm_event = PMC_EV_P4_##NAME, \
.pm_escr_eventselect = (ESCREVENTSEL), \
.pm_cccr_select = (CCCRSEL), \
.pm_is_ti_event = (TI_EVENT), \
.pm_escrs = \
{ \
P4_ESCR_##ESCR0, \
P4_ESCR_##ESCR1 \
} \
}
P4_EVDESCR(TC_DELIVER_MODE, 0x01, 0x01, TRUE, TC_ESCR0, TC_ESCR1),
P4_EVDESCR(BPU_FETCH_REQUEST, 0x03, 0x00, FALSE, BPU_ESCR0, BPU_ESCR1),
P4_EVDESCR(ITLB_REFERENCE, 0x18, 0x03, FALSE, ITLB_ESCR0, ITLB_ESCR1),
P4_EVDESCR(MEMORY_CANCEL, 0x02, 0x05, FALSE, DAC_ESCR0, DAC_ESCR1),
P4_EVDESCR(MEMORY_COMPLETE, 0x08, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
P4_EVDESCR(LOAD_PORT_REPLAY, 0x04, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
P4_EVDESCR(STORE_PORT_REPLAY, 0x05, 0x02, FALSE, SAAT_ESCR0, SAAT_ESCR1),
P4_EVDESCR(MOB_LOAD_REPLAY, 0x03, 0x02, FALSE, MOB_ESCR0, MOB_ESCR1),
P4_EVDESCR(PAGE_WALK_TYPE, 0x01, 0x04, TRUE, PMH_ESCR0, PMH_ESCR1),
P4_EVDESCR(BSQ_CACHE_REFERENCE, 0x0C, 0x07, FALSE, BSU_ESCR0, BSU_ESCR1),
P4_EVDESCR(IOQ_ALLOCATION, 0x03, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(IOQ_ACTIVE_ENTRIES, 0x1A, 0x06, FALSE, FSB_ESCR1, NONE),
P4_EVDESCR(FSB_DATA_ACTIVITY, 0x17, 0x06, TRUE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(BSQ_ALLOCATION, 0x05, 0x07, FALSE, BSU_ESCR0, NONE),
P4_EVDESCR(BSQ_ACTIVE_ENTRIES, 0x06, 0x07, FALSE, BSU_ESCR1, NONE),
/* BSQ_ACTIVE_ENTRIES inherits CPU specificity from BSQ_ALLOCATION */
P4_EVDESCR(SSE_INPUT_ASSIST, 0x34, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(PACKED_SP_UOP, 0x08, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(PACKED_DP_UOP, 0x0C, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(SCALAR_SP_UOP, 0x0A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(SCALAR_DP_UOP, 0x0E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(64BIT_MMX_UOP, 0x02, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(128BIT_MMX_UOP, 0x1A, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(X87_FP_UOP, 0x04, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(X87_SIMD_MOVES_UOP, 0x2E, 0x01, TRUE, FIRM_ESCR0, FIRM_ESCR1),
P4_EVDESCR(GLOBAL_POWER_EVENTS, 0x13, 0x06, FALSE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(TC_MS_XFER, 0x05, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
P4_EVDESCR(UOP_QUEUE_WRITES, 0x09, 0x00, FALSE, MS_ESCR0, MS_ESCR1),
P4_EVDESCR(RETIRED_MISPRED_BRANCH_TYPE,
0x05, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
P4_EVDESCR(RETIRED_BRANCH_TYPE, 0x04, 0x02, FALSE, TBPU_ESCR0, TBPU_ESCR1),
P4_EVDESCR(RESOURCE_STALL, 0x01, 0x01, FALSE, ALF_ESCR0, ALF_ESCR1),
P4_EVDESCR(WC_BUFFER, 0x05, 0x05, TRUE, DAC_ESCR0, DAC_ESCR1),
P4_EVDESCR(B2B_CYCLES, 0x16, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(BNR, 0x08, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(SNOOP, 0x06, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(RESPONSE, 0x04, 0x03, TRUE, FSB_ESCR0, FSB_ESCR1),
P4_EVDESCR(FRONT_END_EVENT, 0x08, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
P4_EVDESCR(EXECUTION_EVENT, 0x0C, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
P4_EVDESCR(REPLAY_EVENT, 0x09, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
P4_EVDESCR(INSTR_RETIRED, 0x02, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
P4_EVDESCR(UOPS_RETIRED, 0x01, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
P4_EVDESCR(UOP_TYPE, 0x02, 0x02, FALSE, RAT_ESCR0, RAT_ESCR1),
P4_EVDESCR(BRANCH_RETIRED, 0x06, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
P4_EVDESCR(MISPRED_BRANCH_RETIRED, 0x03, 0x04, FALSE, CRU_ESCR0, CRU_ESCR1),
P4_EVDESCR(X87_ASSIST, 0x03, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3),
P4_EVDESCR(MACHINE_CLEAR, 0x02, 0x05, FALSE, CRU_ESCR2, CRU_ESCR3)
#undef P4_EVDESCR
};
#define P4_EVENT_IS_TI(E) ((E)->pm_is_ti_event == TRUE)
#define P4_NEVENTS (PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1)
/*
* P4 PMC descriptors
*/
struct p4pmc_descr {
struct pmc_descr pm_descr; /* common information */
enum pmc_p4pmc pm_pmcnum; /* PMC number */
uint32_t pm_pmc_msr; /* PERFCTR MSR address */
uint32_t pm_cccr_msr; /* CCCR MSR address */
};
static struct p4pmc_descr p4_pmcdesc[P4_NPMCS] = {
#define P4_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | PMC_CAP_SYSTEM | \
PMC_CAP_EDGE | PMC_CAP_THRESHOLD | PMC_CAP_READ | PMC_CAP_WRITE | \
PMC_CAP_INVERT | PMC_CAP_QUALIFIER | PMC_CAP_PRECISE | \
PMC_CAP_TAGGING | PMC_CAP_CASCADE)
#define P4_PMCDESCR(N, PMC, CCCR) \
{ \
.pm_descr = \
{ \
.pd_name = #N, \
.pd_class = PMC_CLASS_P4, \
.pd_caps = P4_PMC_CAPS, \
.pd_width = 40 \
}, \
.pm_pmcnum = P4_PMC_##N, \
.pm_cccr_msr = (CCCR), \
.pm_pmc_msr = (PMC) \
}
P4_PMCDESCR(BPU_COUNTER0, 0x300, 0x360),
P4_PMCDESCR(BPU_COUNTER1, 0x301, 0x361),
P4_PMCDESCR(BPU_COUNTER2, 0x302, 0x362),
P4_PMCDESCR(BPU_COUNTER3, 0x303, 0x363),
P4_PMCDESCR(MS_COUNTER0, 0x304, 0x364),
P4_PMCDESCR(MS_COUNTER1, 0x305, 0x365),
P4_PMCDESCR(MS_COUNTER2, 0x306, 0x366),
P4_PMCDESCR(MS_COUNTER3, 0x307, 0x367),
P4_PMCDESCR(FLAME_COUNTER0, 0x308, 0x368),
P4_PMCDESCR(FLAME_COUNTER1, 0x309, 0x369),
P4_PMCDESCR(FLAME_COUNTER2, 0x30A, 0x36A),
P4_PMCDESCR(FLAME_COUNTER3, 0x30B, 0x36B),
P4_PMCDESCR(IQ_COUNTER0, 0x30C, 0x36C),
P4_PMCDESCR(IQ_COUNTER1, 0x30D, 0x36D),
P4_PMCDESCR(IQ_COUNTER2, 0x30E, 0x36E),
P4_PMCDESCR(IQ_COUNTER3, 0x30F, 0x36F),
P4_PMCDESCR(IQ_COUNTER4, 0x310, 0x370),
P4_PMCDESCR(IQ_COUNTER5, 0x311, 0x371),
#undef P4_PMCDESCR
};
/* HTT support */
#define P4_NHTT 2 /* logical processors/chip */
static int p4_system_has_htt;
/*
* Per-CPU data structure for P4 class CPUs
*
* [19 struct pmc_hw structures]
* [45 ESCRs status bytes]
* [per-cpu spin mutex]
* [19 flag fields for holding config flags and a runcount]
* [19*2 hw value fields] (Thread mode PMC support)
* or
* [19*2 EIP values] (Sampling mode PMCs)
* [19*2 pmc value fields] (Thread mode PMC support))
*/
struct p4_cpu {
struct pmc_hw pc_p4pmcs[P4_NPMCS];
char pc_escrs[P4_NESCR];
struct mtx pc_mtx; /* spin lock */
uint32_t pc_intrflag; /* NMI handler flags */
unsigned int pc_intrlock; /* NMI handler spin lock */
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
union {
pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
} pc_si;
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
};
static struct p4_cpu **p4_pcpu;
#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
char _tmp; \
_tmp = (PC)->pc_flags[(RI)]; \
_tmp &= ~(MASK); \
_tmp |= (VAL) & (MASK); \
(PC)->pc_flags[(RI)] = _tmp; \
} while (0)
#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
#define P4_CPU_TO_FLAG(C) (P4_CPU_IS_HTT_SECONDARY(cpu) ? 0x2 : 0x1)
#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
uint32_t __mask; \
__mask = 1 << (I); \
if ((V)) \
(PC)->pc_intrflag |= __mask; \
else \
(PC)->pc_intrflag &= ~__mask; \
} while (0)
/*
* A minimal spin lock implementation for use inside the NMI handler.
*
* We don't want to use a regular spin lock here, because curthread
* may not be consistent at the time the handler is invoked.
*/
#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
ia32_pause(); \
} while (0)
#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
atomic_store_rel_int(&pc->pc_intrlock, 0);
/* ESCR row disposition */
static int p4_escrdisp[P4_NESCR];
#define P4_ESCR_ROW_DISP_IS_THREAD(E) (p4_escrdisp[(E)] > 0)
#define P4_ESCR_ROW_DISP_IS_STANDALONE(E) (p4_escrdisp[(E)] < 0)
#define P4_ESCR_ROW_DISP_IS_FREE(E) (p4_escrdisp[(E)] == 0)
#define P4_ESCR_MARK_ROW_STANDALONE(E) do { \
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
__LINE__)); \
atomic_add_int(&p4_escrdisp[(E)], -1); \
KASSERT(p4_escrdisp[(E)] >= (-pmc_cpu_max_active()), \
("[p4,%d] row disposition error", __LINE__)); \
} while (0)
#define P4_ESCR_UNMARK_ROW_STANDALONE(E) do { \
atomic_add_int(&p4_escrdisp[(E)], 1); \
KASSERT(p4_escrdisp[(E)] <= 0, ("[p4,%d] row disposition error",\
__LINE__)); \
} while (0)
#define P4_ESCR_MARK_ROW_THREAD(E) do { \
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
__LINE__)); \
atomic_add_int(&p4_escrdisp[(E)], 1); \
} while (0)
#define P4_ESCR_UNMARK_ROW_THREAD(E) do { \
atomic_add_int(&p4_escrdisp[(E)], -1); \
KASSERT(p4_escrdisp[(E)] >= 0, ("[p4,%d] row disposition error", \
__LINE__)); \
} while (0)
#define P4_PMC_IS_STOPPED(cccr) ((rdmsr(cccr) & P4_CCCR_ENABLE) == 0)
#define P4_CPU_IS_HTT_SECONDARY(cpu) \
(p4_system_has_htt ? ((cpu) & 1) : 0)
#define P4_TO_HTT_PRIMARY(cpu) \
(p4_system_has_htt ? ((cpu) & ~1) : (cpu))
#define P4_CCCR_Tx_MASK (~(P4_CCCR_OVF_PMI_T0|P4_CCCR_OVF_PMI_T1| \
P4_CCCR_ENABLE|P4_CCCR_OVF))
#define P4_ESCR_Tx_MASK (~(P4_ESCR_T0_OS|P4_ESCR_T0_USR|P4_ESCR_T1_OS| \
P4_ESCR_T1_USR))
/*
* support routines
*/
static struct p4_event_descr *
p4_find_event(enum pmc_event ev)
{
int n;
for (n = 0; n < P4_NEVENTS; n++)
if (p4_events[n].pm_event == ev)
break;
if (n == P4_NEVENTS)
return (NULL);
return (&p4_events[n]);
}
/*
* Initialize per-cpu state
*/
static int
p4_pcpu_init(struct pmc_mdep *md, int cpu)
{
char *pescr;
int n, first_ri, phycpu;
struct pmc_hw *phw;
struct p4_cpu *p4c;
struct pmc_cpu *pc, *plc;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] insane cpu number %d", __LINE__, cpu));
PMCDBG2(MDP,INI,0, "p4-init cpu=%d is-primary=%d", cpu,
pmc_cpu_is_primary(cpu) != 0);
first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
/*
* The two CPUs in an HT pair share their per-cpu state.
*
* For HT capable CPUs, we assume that the two logical
* processors in the HT pair get two consecutive CPU ids
* starting with an even id #.
*
* The primary CPU (the even numbered CPU of the pair) would
* have been initialized prior to the initialization for the
* secondary.
*/
if (!pmc_cpu_is_primary(cpu) && (cpu & 1)) {
p4_system_has_htt = 1;
phycpu = P4_TO_HTT_PRIMARY(cpu);
pc = pmc_pcpu[phycpu];
plc = pmc_pcpu[cpu];
KASSERT(plc != pc, ("[p4,%d] per-cpu config error", __LINE__));
PMCDBG3(MDP,INI,1, "p4-init cpu=%d phycpu=%d pc=%p", cpu,
phycpu, pc);
KASSERT(pc, ("[p4,%d] Null Per-Cpu state cpu=%d phycpu=%d",
__LINE__, cpu, phycpu));
/* PMCs are shared with the physical CPU. */
for (n = 0; n < P4_NPMCS; n++)
plc->pc_hwpmcs[n + first_ri] =
pc->pc_hwpmcs[n + first_ri];
return (0);
}
p4c = malloc(sizeof(struct p4_cpu), M_PMC, M_WAITOK|M_ZERO);
pc = pmc_pcpu[cpu];
KASSERT(pc != NULL, ("[p4,%d] cpu %d null per-cpu", __LINE__, cpu));
p4_pcpu[cpu] = p4c;
phw = p4c->pc_p4pmcs;
for (n = 0; n < P4_NPMCS; n++, phw++) {
phw->phw_state = PMC_PHW_FLAG_IS_ENABLED |
PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(n);
phw->phw_pmc = NULL;
pc->pc_hwpmcs[n + first_ri] = phw;
}
pescr = p4c->pc_escrs;
for (n = 0; n < P4_NESCR; n++)
*pescr++ = P4_INVALID_PMC_INDEX;
mtx_init(&p4c->pc_mtx, "p4-pcpu", "pmc-leaf", MTX_SPIN);
return (0);
}
/*
* Destroy per-cpu state.
*/
static int
p4_pcpu_fini(struct pmc_mdep *md, int cpu)
{
int first_ri, i;
struct p4_cpu *p4c;
struct pmc_cpu *pc;
PMCDBG1(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
pc = pmc_pcpu[cpu];
first_ri = md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4].pcd_ri;
for (i = 0; i < P4_NPMCS; i++)
pc->pc_hwpmcs[i + first_ri] = NULL;
if (!pmc_cpu_is_primary(cpu) && (cpu & 1))
return (0);
p4c = p4_pcpu[cpu];
KASSERT(p4c != NULL, ("[p4,%d] NULL pcpu", __LINE__));
/* Turn off all PMCs on this CPU */
for (i = 0; i < P4_NPMCS - 1; i++)
wrmsr(P4_CCCR_MSR_FIRST + i,
rdmsr(P4_CCCR_MSR_FIRST + i) & ~P4_CCCR_ENABLE);
mtx_destroy(&p4c->pc_mtx);
free(p4c, M_PMC);
p4_pcpu[cpu] = NULL;
return (0);
}
/*
* Read a PMC
*/
static int
p4_read_pmc(int cpu, int ri, pmc_value_t *v)
{
struct pmc *pm;
pmc_value_t tmp;
struct p4_cpu *pc;
enum pmc_mode mode;
struct p4pmc_descr *pd;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
pm = pc->pc_p4pmcs[ri].phw_pmc;
pd = &p4_pmcdesc[ri];
KASSERT(pm != NULL,
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri));
KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
mode = PMC_TO_MODE(pm);
PMCDBG3(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
if (PMC_IS_VIRTUAL_MODE(mode)) {
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
tmp += (P4_PERFCTR_MASK + 1) -
P4_PCPU_HW_VALUE(pc,ri,cpu);
else
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
}
if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
*v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
else
*v = tmp;
PMCDBG1(MDP,REA,2, "p4-read -> %jx", *v);
return (0);
}
/*
* Write a PMC
*/
static int
p4_write_pmc(int cpu, int ri, pmc_value_t v)
{
enum pmc_mode mode;
struct pmc *pm;
struct p4_cpu *pc;
const struct pmc_hw *phw;
const struct p4pmc_descr *pd;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[amd,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[amd,%d] illegal row-index %d", __LINE__, ri));
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
phw = &pc->pc_p4pmcs[ri];
pm = phw->phw_pmc;
pd = &p4_pmcdesc[ri];
KASSERT(pm != NULL,
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
cpu, ri));
mode = PMC_TO_MODE(pm);
PMCDBG4(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
mode, v);
/*
* write the PMC value to the register/saved value: for
* sampling mode PMCs, the value to be programmed into the PMC
* counter is -(C+1) where 'C' is the requested sample rate.
*/
if (PMC_IS_SAMPLING_MODE(mode))
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
if (PMC_IS_SYSTEM_MODE(mode))
wrmsr(pd->pm_pmc_msr, v);
else
P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
return (0);
}
/*
* Configure a PMC 'pm' on the given CPU and row-index.
*
* 'pm' may be NULL to indicate de-configuration.
*
* On HTT systems, a PMC may get configured twice, once for each
* "logical" CPU. We track this using the CFGFLAGS field of the
* per-cpu state; this field is a bit mask with one bit each for
* logical CPUs 0 & 1.
*/
static int
p4_config_pmc(int cpu, int ri, struct pmc *pm)
{
struct pmc_hw *phw;
struct p4_cpu *pc;
int cfgflags, cpuflag;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
PMCDBG3(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
phw = &pc->pc_p4pmcs[ri];
KASSERT(pm == NULL || phw->phw_pmc == NULL ||
(p4_system_has_htt && phw->phw_pmc == pm),
("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
mtx_lock_spin(&pc->pc_mtx);
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
KASSERT((cfgflags & ~0x3) == 0,
("[p4,%d] illegal cfgflags cfg=%#x on cpu=%d ri=%d", __LINE__,
cfgflags, cpu, ri));
KASSERT(cfgflags == 0 || phw->phw_pmc,
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
__LINE__, cpu, ri));
cpuflag = P4_CPU_TO_FLAG(cpu);
if (pm) { /* config */
if (cfgflags == 0)
phw->phw_pmc = pm;
KASSERT(phw->phw_pmc == pm,
("[p4,%d] cpu=%d ri=%d config %p != hw %p",
__LINE__, cpu, ri, pm, phw->phw_pmc));
cfgflags |= cpuflag;
} else { /* unconfig */
cfgflags &= ~cpuflag;
if (cfgflags == 0)
phw->phw_pmc = NULL;
}
KASSERT((cfgflags & ~0x3) == 0,
("[p4,%d] illegal runcount cfg=%#x on cpu=%d ri=%d", __LINE__,
cfgflags, cpu, ri));
P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
mtx_unlock_spin(&pc->pc_mtx);
return (0);
}
/*
* Retrieve a configured PMC pointer from hardware state.
*/
static int
p4_get_config(int cpu, int ri, struct pmc **ppm)
{
int cfgflags;
struct p4_cpu *pc;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
mtx_lock_spin(&pc->pc_mtx);
cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
mtx_unlock_spin(&pc->pc_mtx);
if (cfgflags & P4_CPU_TO_FLAG(cpu))
*ppm = pc->pc_p4pmcs[ri].phw_pmc; /* PMC config'ed on this CPU */
else
*ppm = NULL;
return 0;
}
/*
* Allocate a PMC.
*
* The allocation strategy differs between HTT and non-HTT systems.
*
* The non-HTT case:
* - Given the desired event and the PMC row-index, lookup the
* list of valid ESCRs for the event.
* - For each valid ESCR:
* - Check if the ESCR is free and the ESCR row is in a compatible
* mode (i.e., system or process))
* - Check if the ESCR is usable with a P4 PMC at the desired row-index.
* If everything matches, we determine the appropriate bit values for the
* ESCR and CCCR registers.
*
* The HTT case:
*
* - Process mode PMCs require special care. The FreeBSD scheduler could
* schedule any two processes on the same physical CPU. We need to ensure
* that a given PMC row-index is never allocated to two different
* PMCs owned by different user-processes.
* This is ensured by always allocating a PMC from a 'FREE' PMC row
* if the system has HTT active.
* - A similar check needs to be done for ESCRs; we do not want two PMCs
* using the same ESCR to be scheduled at the same time. Thus ESCR
* allocation is also restricted to FREE rows if the system has HTT
* enabled.
* - Thirdly, some events are 'thread-independent' terminology, i.e.,
* the PMC hardware cannot distinguish between events caused by
* different logical CPUs. This makes it impossible to assign events
* to a given thread of execution. If the system has HTT enabled,
* these events are not allowed for process-mode PMCs.
*/
static int
p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
const struct pmc_op_pmcallocate *a)
{
int found, n, m;
uint32_t caps, cccrvalue, escrvalue, tflags;
enum pmc_p4escr escr;
struct p4_cpu *pc;
struct p4_event_descr *pevent;
const struct p4pmc_descr *pd;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index value %d", __LINE__, ri));
pd = &p4_pmcdesc[ri];
PMCDBG4(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
"reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
pm->pm_caps);
/* check class */
if (pd->pm_descr.pd_class != a->pm_class)
return (EINVAL);
/* check requested capabilities */
caps = a->pm_caps;
if ((pd->pm_descr.pd_caps & caps) != caps)
return (EPERM);
/*
* If the system has HTT enabled, and the desired allocation
* mode is process-private, and the PMC row disposition is not
* FREE (0), decline the allocation.
*/
if (p4_system_has_htt &&
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
pmc_getrowdisp(ri) != 0)
return (EBUSY);
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] unknown PMC class %d", __LINE__,
pd->pm_descr.pd_class));
if (pm->pm_event < PMC_EV_P4_FIRST ||
pm->pm_event > PMC_EV_P4_LAST)
return (EINVAL);
if ((pevent = p4_find_event(pm->pm_event)) == NULL)
return (ESRCH);
PMCDBG4(MDP,ALL,2, "pevent={ev=%d,escrsel=0x%x,cccrsel=0x%x,isti=%d}",
pevent->pm_event, pevent->pm_escr_eventselect,
pevent->pm_cccr_select, pevent->pm_is_ti_event);
/*
* Some PMC events are 'thread independent'and therefore
* cannot be used for process-private modes if HTT is being
* used.
*/
if (P4_EVENT_IS_TI(pevent) &&
PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
p4_system_has_htt)
return (EINVAL);
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
found = 0;
/* look for a suitable ESCR for this event */
for (n = 0; n < P4_MAX_ESCR_PER_EVENT && !found; n++) {
if ((escr = pevent->pm_escrs[n]) == P4_ESCR_NONE)
break; /* out of ESCRs */
/*
* Check ESCR row disposition.
*
* If the request is for a system-mode PMC, then the
* ESCR row should not be in process-virtual mode, and
* should also be free on the current CPU.
*/
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
continue;
}
/*
* If the request is for a process-virtual PMC, and if
* HTT is not enabled, we can use an ESCR row that is
* either FREE or already in process mode.
*
* If HTT is enabled, then we need to ensure that a
* given ESCR is never allocated to two PMCS that
* could run simultaneously on the two logical CPUs of
* a CPU package. We ensure this be only allocating
* ESCRs from rows marked as 'FREE'.
*/
if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
if (p4_system_has_htt) {
if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
continue;
} else
if (P4_ESCR_ROW_DISP_IS_STANDALONE(escr))
continue;
}
/*
* We found a suitable ESCR for this event. Now check if
* this escr can work with the PMC at row-index 'ri'.
*/
for (m = 0; m < P4_MAX_PMC_PER_ESCR; m++)
if (p4_escrs[escr].pm_pmcs[m] == pd->pm_pmcnum) {
found = 1;
break;
}
}
if (found == 0)
return (ESRCH);
KASSERT((int) escr >= 0 && escr < P4_NESCR,
("[p4,%d] illegal ESCR value %d", __LINE__, escr));
/* mark ESCR row mode */
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
P4_ESCR_MARK_ROW_STANDALONE(escr);
} else {
KASSERT(pc->pc_escrs[escr] == P4_INVALID_PMC_INDEX,
("[p4,%d] escr[%d] already in use", __LINE__, escr));
P4_ESCR_MARK_ROW_THREAD(escr);
}
pm->pm_md.pm_p4.pm_p4_escrmsr = p4_escrs[escr].pm_escr_msr;
pm->pm_md.pm_p4.pm_p4_escr = escr;
cccrvalue = P4_CCCR_TO_ESCR_SELECT(pevent->pm_cccr_select);
escrvalue = P4_ESCR_TO_EVENT_SELECT(pevent->pm_escr_eventselect);
/* CCCR fields */
if (caps & PMC_CAP_THRESHOLD)
cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
if (caps & PMC_CAP_EDGE)
cccrvalue |= P4_CCCR_EDGE;
if (caps & PMC_CAP_INVERT)
cccrvalue |= P4_CCCR_COMPLEMENT;
if (p4_system_has_htt)
cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
P4_CCCR_ACTIVE_THREAD_MASK;
else /* no HTT; thread field should be '11b' */
cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
if (caps & PMC_CAP_CASCADE)
cccrvalue |= P4_CCCR_CASCADE;
/* On HTT systems the PMI T0 field may get moved to T1 at pmc start */
if (caps & PMC_CAP_INTERRUPT)
cccrvalue |= P4_CCCR_OVF_PMI_T0;
/* ESCR fields */
if (caps & PMC_CAP_QUALIFIER)
escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
P4_ESCR_EVENT_MASK_MASK;
if (caps & PMC_CAP_TAGGING)
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
if (caps & PMC_CAP_QUALIFIER)
escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
P4_ESCR_EVENT_MASK_MASK);
/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
tflags = 0;
if (caps & PMC_CAP_SYSTEM)
tflags |= P4_ESCR_T0_OS;
if (caps & PMC_CAP_USER)
tflags |= P4_ESCR_T0_USR;
if (tflags == 0)
tflags = (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
escrvalue |= tflags;
pm->pm_md.pm_p4.pm_p4_cccrvalue = cccrvalue;
pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
PMCDBG5(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
"escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
return (0);
}
/*
* release a PMC.
*/
static int
p4_release_pmc(int cpu, int ri, struct pmc *pm)
{
enum pmc_p4escr escr;
struct p4_cpu *pc;
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
escr = pm->pm_md.pm_p4.pm_p4_escr;
PMCDBG3(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
KASSERT(pc->pc_p4pmcs[ri].phw_pmc == NULL,
("[p4,%d] releasing configured PMC ri=%d", __LINE__, ri));
P4_ESCR_UNMARK_ROW_STANDALONE(escr);
KASSERT(pc->pc_escrs[escr] == ri,
("[p4,%d] escr[%d] not allocated to ri %d", __LINE__,
escr, ri));
pc->pc_escrs[escr] = P4_INVALID_PMC_INDEX; /* mark as free */
} else
P4_ESCR_UNMARK_ROW_THREAD(escr);
return (0);
}
/*
* Start a PMC
*/
static int
p4_start_pmc(int cpu, int ri)
{
int rc;
struct pmc *pm;
struct p4_cpu *pc;
struct p4pmc_descr *pd;
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
pm = pc->pc_p4pmcs[ri].phw_pmc;
pd = &p4_pmcdesc[ri];
KASSERT(pm != NULL,
("[p4,%d] starting cpu%d,pmc%d with null pmc", __LINE__, cpu, ri));
PMCDBG2(MDP,STA,1, "p4-start cpu=%d ri=%d", cpu, ri);
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] wrong PMC class %d", __LINE__,
pd->pm_descr.pd_class));
/* retrieve the desired CCCR/ESCR values from the PMC */
cccrvalue = pm->pm_md.pm_p4.pm_p4_cccrvalue;
escrvalue = pm->pm_md.pm_p4.pm_p4_escrvalue;
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
/* extract and zero the logical processor selection bits */
cccrtbits = cccrvalue & P4_CCCR_OVF_PMI_T0;
escrtbits = escrvalue & (P4_ESCR_T0_OS|P4_ESCR_T0_USR);
cccrvalue &= ~P4_CCCR_OVF_PMI_T0;
escrvalue &= ~(P4_ESCR_T0_OS|P4_ESCR_T0_USR);
if (P4_CPU_IS_HTT_SECONDARY(cpu)) { /* shift T0 bits to T1 position */
cccrtbits <<= 1;
escrtbits >>= 2;
}
/* start system mode PMCs directly */
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
wrmsr(escrmsr, escrvalue | escrtbits);
wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
return 0;
}
/*
* Thread mode PMCs
*
* On HTT machines, the same PMC could be scheduled on the
* same physical CPU twice (once for each logical CPU), for
* example, if two threads of a multi-threaded process get
* scheduled on the same CPU.
*
*/
mtx_lock_spin(&pc->pc_mtx);
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
KASSERT(rc == 0 || rc == 1,
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
rc));
if (rc == 0) { /* 1st CPU and the non-HTT case */
KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
cpu, ri, pd->pm_cccr_msr));
/* write out the low 40 bits of the saved value to hardware */
wrmsr(pd->pm_pmc_msr,
P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
} else if (rc == 1) { /* 2nd CPU */
/*
* Stop the PMC and retrieve the CCCR and ESCR values
* from their MSRs, and turn on the additional T[0/1]
* bits for the 2nd CPU.
*/
cccrvalue = rdmsr(pd->pm_cccr_msr);
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
/* check that the configuration bits read back match the PMC */
KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
(pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
"cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
cccrvalue & P4_CCCR_Tx_MASK,
pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
KASSERT(cccrvalue & P4_CCCR_ENABLE,
("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
__LINE__, rc, cpu, ri));
KASSERT((cccrvalue & cccrtbits) == 0,
("[p4,%d] CCCR T0/T1 mismatch rc=%d cpu=%d ri=%d"
"cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
cccrvalue, cccrtbits));
escrvalue = rdmsr(escrmsr);
KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
(pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK),
("[p4,%d] Extra ESCR bits cpu=%d rc=%d ri=%d "
"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
escrvalue & P4_ESCR_Tx_MASK,
pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
KASSERT((escrvalue & escrtbits) == 0,
("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
"escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
rc, cpu, ri, escrmsr, escrvalue, escrtbits));
}
/* Enable the correct bits for this CPU. */
escrvalue |= escrtbits;
cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
/* Save HW value at the time of starting hardware */
P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
/* Program the ESCR and CCCR and start the PMC */
wrmsr(escrmsr, escrvalue);
wrmsr(pd->pm_cccr_msr, cccrvalue);
++rc;
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
mtx_unlock_spin(&pc->pc_mtx);
PMCDBG6(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
"escrmsr=0x%x escrvalue=0x%x", cpu, rc,
ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue);
PMCDBG2(MDP,STA,2,"cccr_config=0x%x v=%jx",
cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
return (0);
}
/*
* Stop a PMC.
*/
static int
p4_stop_pmc(int cpu, int ri)
{
int rc;
uint32_t cccrvalue, cccrtbits, escrvalue, escrmsr, escrtbits;
struct pmc *pm;
struct p4_cpu *pc;
struct p4pmc_descr *pd;
pmc_value_t tmp;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU value %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row index %d", __LINE__, ri));
pd = &p4_pmcdesc[ri];
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
pm = pc->pc_p4pmcs[ri].phw_pmc;
KASSERT(pm != NULL,
("[p4,%d] null pmc for cpu%d, ri%d", __LINE__, cpu, ri));
PMCDBG2(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
wrmsr(pd->pm_cccr_msr,
pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
return (0);
}
/*
* Thread mode PMCs.
*
* On HTT machines, this PMC may be in use by two threads
* running on two logical CPUS. Thus we look at the
* 'runcount' field and only turn off the appropriate TO/T1
* bits (and keep the PMC running) if two logical CPUs were
* using the PMC.
*
*/
/* bits to mask */
cccrtbits = P4_CCCR_OVF_PMI_T0;
escrtbits = P4_ESCR_T0_OS | P4_ESCR_T0_USR;
if (P4_CPU_IS_HTT_SECONDARY(cpu)) {
cccrtbits <<= 1;
escrtbits >>= 2;
}
mtx_lock_spin(&pc->pc_mtx);
rc = P4_PCPU_GET_RUNCOUNT(pc,ri);
KASSERT(rc == 2 || rc == 1,
("[p4,%d] illegal runcount cpu=%d ri=%d rc=%d", __LINE__, cpu, ri,
rc));
--rc;
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
/* Stop this PMC */
cccrvalue = rdmsr(pd->pm_cccr_msr);
wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
escrvalue = rdmsr(escrmsr);
/* The current CPU should be running on this PMC */
KASSERT(escrvalue & escrtbits,
("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
"escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
escrvalue, escrtbits));
KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
(cccrvalue & cccrtbits),
("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
"tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
/* get the current hardware reading */
tmp = rdmsr(pd->pm_pmc_msr);
if (rc == 1) { /* need to keep the PMC running */
escrvalue &= ~escrtbits;
cccrvalue &= ~cccrtbits;
wrmsr(escrmsr, escrvalue);
wrmsr(pd->pm_cccr_msr, cccrvalue);
}
mtx_unlock_spin(&pc->pc_mtx);
PMCDBG5(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
"escrval=0x%x", cpu, rc, ri, escrmsr, escrvalue);
PMCDBG2(MDP,STO,2, "cccrval=0x%x v=%jx", cccrvalue, tmp);
if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
else
tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
return 0;
}
/*
* Handle an interrupt.
*
* The hardware sets the CCCR_OVF whenever a counter overflow occurs,
* so the handler examines all the 18 CCCR registers, processing the
* counters that have overflowed.
*
* On HTT machines, the CCCR register is shared and will interrupt
* both logical processors if so configured. Thus multiple logical
* CPUs could enter the NMI service routine at the same time. These
* will get serialized using a per-cpu spinlock dedicated for use in
* the NMI handler.
*/
static int
p4_intr(int cpu, struct trapframe *tf)
{
uint32_t cccrval, ovf_mask, ovf_partner;
int did_interrupt, error, ri;
struct p4_cpu *pc;
struct pmc *pm;
pmc_value_t v;
PMCDBG3(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
TRAPF_USERMODE(tf));
pc = p4_pcpu[P4_TO_HTT_PRIMARY(cpu)];
ovf_mask = P4_CPU_IS_HTT_SECONDARY(cpu) ?
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
ovf_mask |= P4_CCCR_OVF;
if (p4_system_has_htt)
ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
else
ovf_partner = 0;
did_interrupt = 0;
if (p4_system_has_htt)
P4_PCPU_ACQ_INTR_SPINLOCK(pc);
/*
* Loop through all CCCRs, looking for ones that have
* interrupted this CPU.
*/
for (ri = 0; ri < P4_NPMCS; ri++) {
/*
* Check if our partner logical CPU has already marked
* this PMC has having interrupted it. If so, reset
* the flag and process the interrupt, but leave the
* hardware alone.
*/
if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
P4_PCPU_SET_INTRFLAG(pc,ri,0);
did_interrupt = 1;
/*
* Ignore de-configured or stopped PMCs.
* Ignore PMCs not in sampling mode.
*/
pm = pc->pc_p4pmcs[ri].phw_pmc;
if (pm == NULL ||
pm->pm_state != PMC_STATE_RUNNING ||
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
(void) pmc_process_interrupt(cpu, PMC_HR, pm, tf,
TRAPF_USERMODE(tf));
continue;
}
/*
* Fresh interrupt. Look for the CCCR_OVF bit
* and the OVF_Tx bit for this logical
* processor being set.
*/
cccrval = rdmsr(P4_CCCR_MSR_FIRST + ri);
if ((cccrval & ovf_mask) != ovf_mask)
continue;
/*
* If the other logical CPU would also have been
* interrupted due to the PMC being shared, record
* this fact in the per-cpu saved interrupt flag
* bitmask.
*/
if (p4_system_has_htt && (cccrval & ovf_partner))
P4_PCPU_SET_INTRFLAG(pc, ri, 1);
v = rdmsr(P4_PERFCTR_MSR_FIRST + ri);
PMCDBG2(MDP,INT, 2, "ri=%d v=%jx", ri, v);
/* Stop the counter, and reset the overflow bit */
cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
wrmsr(P4_CCCR_MSR_FIRST + ri, cccrval);
did_interrupt = 1;
/*
* Ignore de-configured or stopped PMCs. Ignore PMCs
* not in sampling mode.
*/
pm = pc->pc_p4pmcs[ri].phw_pmc;
if (pm == NULL ||
pm->pm_state != PMC_STATE_RUNNING ||
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
/*
* Process the interrupt. Re-enable the PMC if
* processing was successful.
*/
error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
TRAPF_USERMODE(tf));
/*
* Only the first processor executing the NMI handler
* in a HTT pair will restart a PMC, and that too
* only if there were no errors.
*/
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
pm->pm_sc.pm_reloadcount);
wrmsr(P4_PERFCTR_MSR_FIRST + ri, v);
if (error == 0)
wrmsr(P4_CCCR_MSR_FIRST + ri,
cccrval | P4_CCCR_ENABLE);
}
/* allow the other CPU to proceed */
if (p4_system_has_htt)
P4_PCPU_REL_INTR_SPINLOCK(pc);
/*
* On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
* masked when a PMC interrupts the CPU. We need to unmask
* the interrupt source explicitly.
*/
if (did_interrupt)
lapic_reenable_pmc();
if (did_interrupt)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
return (did_interrupt);
}
/*
* Describe a CPU's PMC state.
*/
static int
p4_describe(int cpu, int ri, struct pmc_info *pi,
struct pmc **ppmc)
{
int error;
size_t copied;
const struct p4pmc_descr *pd;
KASSERT(cpu >= 0 && cpu < pmc_cpu_max(),
("[p4,%d] illegal CPU %d", __LINE__, cpu));
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] row-index %d out of range", __LINE__, ri));
PMCDBG2(MDP,OPS,1,"p4-describe cpu=%d ri=%d", cpu, ri);
if (P4_CPU_IS_HTT_SECONDARY(cpu))
return (EINVAL);
pd = &p4_pmcdesc[ri];
if ((error = copystr(pd->pm_descr.pd_name, pi->pm_name,
PMC_NAME_MAX, &copied)) != 0)
return (error);
pi->pm_class = pd->pm_descr.pd_class;
if (p4_pcpu[cpu]->pc_p4pmcs[ri].phw_state & PMC_PHW_FLAG_IS_ENABLED) {
pi->pm_enabled = TRUE;
*ppmc = p4_pcpu[cpu]->pc_p4pmcs[ri].phw_pmc;
} else {
pi->pm_enabled = FALSE;
*ppmc = NULL;
}
return (0);
}
/*
* Get MSR# for use with RDPMC.
*/
static int
p4_get_msr(int ri, uint32_t *msr)
{
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] ri %d out of range", __LINE__, ri));
*msr = p4_pmcdesc[ri].pm_pmc_msr - P4_PERFCTR_MSR_FIRST;
PMCDBG2(MDP,OPS, 1, "ri=%d getmsr=0x%x", ri, *msr);
return 0;
}
int
pmc_p4_initialize(struct pmc_mdep *md, int ncpus)
{
struct pmc_classdep *pcd;
struct p4_event_descr *pe;
KASSERT(md != NULL, ("[p4,%d] md is NULL", __LINE__));
KASSERT(cpu_vendor_id == CPU_VENDOR_INTEL,
("[p4,%d] Initializing non-intel processor", __LINE__));
PMCDBG0(MDP,INI,1, "p4-initialize");
/* Allocate space for pointers to per-cpu descriptors. */
p4_pcpu = malloc(sizeof(*p4_pcpu) * ncpus, M_PMC, M_ZERO | M_WAITOK);
/* Fill in the class dependent descriptor. */
pcd = &md->pmd_classdep[PMC_MDEP_CLASS_INDEX_P4];
switch (md->pmd_cputype) {
case PMC_CPU_INTEL_PIV:
pcd->pcd_caps = P4_PMC_CAPS;
pcd->pcd_class = PMC_CLASS_P4;
pcd->pcd_num = P4_NPMCS;
pcd->pcd_ri = md->pmd_npmc;
pcd->pcd_width = 40;
pcd->pcd_allocate_pmc = p4_allocate_pmc;
pcd->pcd_config_pmc = p4_config_pmc;
pcd->pcd_describe = p4_describe;
pcd->pcd_get_config = p4_get_config;
pcd->pcd_get_msr = p4_get_msr;
pcd->pcd_pcpu_fini = p4_pcpu_fini;
pcd->pcd_pcpu_init = p4_pcpu_init;
pcd->pcd_read_pmc = p4_read_pmc;
pcd->pcd_release_pmc = p4_release_pmc;
pcd->pcd_start_pmc = p4_start_pmc;
pcd->pcd_stop_pmc = p4_stop_pmc;
pcd->pcd_write_pmc = p4_write_pmc;
md->pmd_pcpu_fini = NULL;
md->pmd_pcpu_init = NULL;
md->pmd_intr = p4_intr;
md->pmd_npmc += P4_NPMCS;
/* model specific configuration */
if ((cpu_id & 0xFFF) < 0xF27) {
/*
* On P4 and Xeon with CPUID < (Family 15,
* Model 2, Stepping 7), only one ESCR is
* available for the IOQ_ALLOCATION event.
*/
pe = p4_find_event(PMC_EV_P4_IOQ_ALLOCATION);
pe->pm_escrs[1] = P4_ESCR_NONE;
}
break;
default:
KASSERT(0,("[p4,%d] Unknown CPU type", __LINE__));
return ENOSYS;
}
return (0);
}
void
pmc_p4_finalize(struct pmc_mdep *md)
{
#if defined(INVARIANTS)
int i, ncpus;
#endif
KASSERT(p4_pcpu != NULL,
("[p4,%d] NULL p4_pcpu", __LINE__));
#if defined(INVARIANTS)
ncpus = pmc_cpu_max();
for (i = 0; i < ncpus; i++)
KASSERT(p4_pcpu[i] == NULL, ("[p4,%d] non-null pcpu %d",
__LINE__, i));
#endif
free(p4_pcpu, M_PMC);
p4_pcpu = NULL;
}