hwpmc(9): Make pmclog buffer pcpu and update constants

On non-trivial SMP systems the contention on the pmc_owner mutex leads
to a substantial number of samples captured being from the pmc process
itself. This change a) makes buffers larger to avoid contention on the
global list b) makes the working sample buffer per cpu.

Run pmcstat in the background (default event rate of 64k):
pmcstat -S UNHALTED_CORE_CYCLES -O /dev/null sleep 600 &

Before:
make -j96 buildkernel -s >&/dev/null 3336.68s user 24684.10s system 7442% cpu 6:16.50 total

After:
make -j96 buildkernel -s >&/dev/null 2697.82s user 1347.35s system 6058% cpu 1:06.77 total

For more realistic overhead measurement set the sample rate for ~2khz
on a 2.1Ghz processor:
pmcstat -n 1050000 -S UNHALTED_CORE_CYCLES -O /dev/null sleep 6000 &

Collecting 10 samples of `make -j96 buildkernel` from each:

x before
+ after

real time:
    N           Min           Max        Median           Avg        Stddev
x  10          76.4        127.62        84.845        88.577     15.100031
+  10         59.71         60.79        60.135        60.179    0.29957192
Difference at 95.0% confidence
        -28.398 +/- 10.0344
        -32.0602% +/- 7.69825%
        (Student's t, pooled s = 10.6794)

system time:
    N           Min           Max        Median           Avg        Stddev
x  10       2277.96       6948.53       2949.47      3341.492     1385.2677
+  10        1038.7       1081.06      1070.555      1064.017      15.85404
Difference at 95.0% confidence
        -2277.47 +/- 920.425
        -68.1574% +/- 8.77623%
        (Student's t, pooled s = 979.596)

x no pmc
+ pmc running
real time:

HEAD:
    N           Min           Max        Median           Avg        Stddev
x  10         58.38         59.15         58.86        58.847    0.22504567
+  10          76.4        127.62        84.845        88.577     15.100031
Difference at 95.0% confidence
        29.73 +/- 10.0335
        50.5208% +/- 17.0525%
        (Student's t, pooled s = 10.6785)

patched:
    N           Min           Max        Median           Avg        Stddev
x  10         58.38         59.15         58.86        58.847    0.22504567
+  10         59.71         60.79        60.135        60.179    0.29957192
Difference at 95.0% confidence
        1.332 +/- 0.248939
        2.2635% +/- 0.426506%
        (Student's t, pooled s = 0.264942)

system time:

HEAD:
    N           Min           Max        Median           Avg        Stddev
x  10       1010.15       1073.31      1025.465      1031.524     18.135705
+  10       2277.96       6948.53       2949.47      3341.492     1385.2677
Difference at 95.0% confidence
        2309.97 +/- 920.443
        223.937% +/- 89.3039%
        (Student's t, pooled s = 979.616)

patched:
    N           Min           Max        Median           Avg        Stddev
x  10       1010.15       1073.31      1025.465      1031.524     18.135705
+  10        1038.7       1081.06      1070.555      1064.017      15.85404
Difference at 95.0% confidence
        32.493 +/- 16.0042
        3.15% +/- 1.5794%
        (Student's t, pooled s = 17.0331)

Reviewed by:	jeff@
Approved by:	sbruno@
Differential Revision:	https://reviews.freebsd.org/D15155
This commit is contained in:
Matt Macy 2018-05-12 01:26:34 +00:00
parent 5d4835e4b7
commit e6b475e0af
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=333509
14 changed files with 433 additions and 244 deletions

View File

@ -694,8 +694,10 @@ amd_intr(int cpu, struct trapframe *tf)
wrmsr(evsel, config);
}
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (retval)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
PMCDBG1(MDP,INT,2, "retval=%d", retval);
return (retval);

View File

@ -2831,8 +2831,10 @@ core_intr(int cpu, struct trapframe *tf)
if (found_interrupt)
lapic_reenable_pmc();
atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (found_interrupt)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
return (found_interrupt);
}
@ -2896,6 +2898,7 @@ core2_intr(int cpu, struct trapframe *tf)
error = pmc_process_interrupt(cpu, PMC_HR, pm, tf,
TRAPF_USERMODE(tf));
if (error)
intrenable &= ~flag;
@ -2955,8 +2958,10 @@ core2_intr(int cpu, struct trapframe *tf)
if (found_interrupt)
lapic_reenable_pmc();
atomic_add_int(found_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (found_interrupt)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
return (found_interrupt);
}

View File

@ -616,8 +616,10 @@ e500_intr(int cpu, struct trapframe *tf)
e500_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount);
}
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (retval)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
/* Re-enable PERF exceptions. */
if (retval)

View File

@ -94,6 +94,8 @@ pmc_intel_initialize(void)
model = ((cpu_id & 0xF0000) >> 12) | ((cpu_id & 0xF0) >> 4);
stepping = cpu_id & 0xF;
snprintf(pmc_cpuid, sizeof(pmc_cpuid), "GenuineIntel-%d-%02X",
(cpu_id & 0xF00) >> 8, model);
switch (cpu_id & 0xF00) {
#if defined(__i386__)
case 0x500: /* Pentium family processors */

View File

@ -3,6 +3,7 @@
*
* Copyright (c) 2005-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* Copyright (c) 2018 Matthew Macy
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
@ -50,7 +51,9 @@ __FBSDID("$FreeBSD$");
#include <sys/pmckern.h>
#include <sys/pmclog.h>
#include <sys/proc.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
#include <sys/syscallsubr.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
@ -79,31 +82,28 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_RDTUN,
* kern.hwpmc.nbuffer -- number of global log buffers
*/
static int pmc_nlogbuffers = PMC_NLOGBUFFERS;
static int pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
#if (__FreeBSD_version < 1100000)
TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers);
TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers_pcpu);
#endif
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_RDTUN,
&pmc_nlogbuffers, 0, "number of global log buffers");
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers_pcpu, CTLFLAG_RDTUN,
&pmc_nlogbuffers_pcpu, 0, "number of log buffers per cpu");
/*
* Global log buffer list and associated spin lock.
*/
TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist =
TAILQ_HEAD_INITIALIZER(pmc_bufferlist);
static struct mtx pmc_bufferlist_mtx; /* spin lock */
static struct mtx pmc_kthread_mtx; /* sleep lock */
#define PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do { \
const int __roundup = roundup(sizeof(*D), \
sizeof(uint32_t)); \
(D)->plb_fence = ((char *) (D)) + \
1024*pmclog_buffer_size; \
(D)->plb_base = (D)->plb_ptr = ((char *) (D)) + \
__roundup; \
#define PMCLOG_INIT_BUFFER_DESCRIPTOR(D, buf, domain) do { \
(D)->plb_fence = ((char *) (buf)) + 1024*pmclog_buffer_size; \
(D)->plb_base = (D)->plb_ptr = ((char *) (buf)); \
(D)->plb_domain = domain; \
} while (0)
#define PMCLOG_RESET_BUFFER_DESCRIPTOR(D) do { \
(D)->plb_ptr = (D)->plb_base; \
} while (0)
/*
* Log file record constructors.
@ -114,15 +114,29 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
((L) & 0xFFFF))
/* reserve LEN bytes of space and initialize the entry header */
#define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \
#define _PMCLOG_RESERVE_SAFE(PO,TYPE,LEN,ACTION) do { \
uint32_t *_le; \
int _len = roundup((LEN), sizeof(uint32_t)); \
int _len = roundup((LEN), sizeof(uint32_t)); \
if ((_le = pmclog_reserve((PO), _len)) == NULL) { \
ACTION; \
} \
*_le = _PMCLOG_TO_HEADER(TYPE,_len); \
_le += 3 /* skip over timestamp */
/* reserve LEN bytes of space and initialize the entry header */
#define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \
uint32_t *_le; \
int _len = roundup((LEN), sizeof(uint32_t)); \
spinlock_enter(); \
if ((_le = pmclog_reserve((PO), _len)) == NULL) { \
spinlock_exit(); \
ACTION; \
} \
*_le = _PMCLOG_TO_HEADER(TYPE,_len); \
_le += 3 /* skip over timestamp */
#define PMCLOG_RESERVE_SAFE(P,T,L) _PMCLOG_RESERVE_SAFE(P,T,L,return)
#define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return)
#define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \
error=ENOMEM;goto error)
@ -138,10 +152,20 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
#define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0)
#define PMCLOG_EMITNULLSTRING(L) do { bzero(_le, (L)); } while (0)
#define PMCLOG_DESPATCH(PO) \
pmclog_release((PO)); \
#define PMCLOG_DESPATCH_SAFE(PO) \
pmclog_release((PO)); \
} while (0)
#define PMCLOG_DESPATCH(PO) \
pmclog_release((PO)); \
spinlock_exit(); \
} while (0)
#define PMCLOG_DESPATCH_SYNC(PO) \
pmclog_schedule_io((PO)); \
spinlock_exit(); \
} while (0)
/*
* Assertions about the log file format.
@ -180,7 +204,18 @@ struct pmclog_buffer {
char *plb_base;
char *plb_ptr;
char *plb_fence;
};
uint16_t plb_domain;
} __aligned(CACHE_LINE_SIZE);
struct pmc_domain_buffer_header {
struct mtx pdbh_mtx;
TAILQ_HEAD(, pmclog_buffer) pdbh_head;
struct pmclog_buffer *pdbh_plbs;
int pdbh_ncpus;
} __aligned(CACHE_LINE_SIZE);
struct pmc_domain_buffer_header *pmc_dom_hdrs[MAXMEMDOM];
/*
* Prototypes
@ -191,12 +226,28 @@ static void pmclog_loop(void *arg);
static void pmclog_release(struct pmc_owner *po);
static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
static void pmclog_schedule_io(struct pmc_owner *po);
static void pmclog_schedule_all(struct pmc_owner *po);
static void pmclog_stop_kthread(struct pmc_owner *po);
/*
* Helper functions
*/
static inline void
pmc_plb_rele_unlocked(struct pmclog_buffer *plb)
{
TAILQ_INSERT_HEAD(&pmc_dom_hdrs[plb->plb_domain]->pdbh_head, plb, plb_next);
}
static inline void
pmc_plb_rele(struct pmclog_buffer *plb)
{
mtx_lock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
pmc_plb_rele_unlocked(plb);
mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx);
}
/*
* Get a log buffer
*/
@ -205,16 +256,16 @@ static int
pmclog_get_buffer(struct pmc_owner *po)
{
struct pmclog_buffer *plb;
int domain;
mtx_assert(&po->po_mtx, MA_OWNED);
KASSERT(po->po_curbuf == NULL,
KASSERT(po->po_curbuf[curcpu] == NULL,
("[pmclog,%d] po=%p current buffer still valid", __LINE__, po));
mtx_lock_spin(&pmc_bufferlist_mtx);
if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL)
TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
mtx_unlock_spin(&pmc_bufferlist_mtx);
domain = PCPU_GET(domain);
mtx_lock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
if ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL)
TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
mtx_unlock_spin(&pmc_dom_hdrs[domain]->pdbh_mtx);
PMCDBG2(LOG,GTB,1, "po=%p plb=%p", po, plb);
@ -227,12 +278,12 @@ pmclog_get_buffer(struct pmc_owner *po)
plb->plb_base, plb->plb_fence));
#endif
po->po_curbuf = plb;
po->po_curbuf[curcpu] = plb;
/* update stats */
atomic_add_int(&pmc_stats.pm_buffer_requests, 1);
counter_u64_add(pmc_stats.pm_buffer_requests, 1);
if (plb == NULL)
atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1);
counter_u64_add(pmc_stats.pm_buffer_requests_failed, 1);
return (plb ? 0 : ENOMEM);
}
@ -421,12 +472,9 @@ pmclog_loop(void *arg)
mtx_lock(&pmc_kthread_mtx);
/* put the used buffer back into the global pool */
PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
mtx_lock_spin(&pmc_bufferlist_mtx);
TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
mtx_unlock_spin(&pmc_bufferlist_mtx);
PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
pmc_plb_rele(lb);
lb = NULL;
}
@ -437,11 +485,9 @@ pmclog_loop(void *arg)
/* return the current I/O buffer to the global pool */
if (lb) {
PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
mtx_lock_spin(&pmc_bufferlist_mtx);
TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
mtx_unlock_spin(&pmc_bufferlist_mtx);
pmc_plb_rele(lb);
}
/*
@ -460,19 +506,20 @@ pmclog_loop(void *arg)
static void
pmclog_release(struct pmc_owner *po)
{
KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
struct pmclog_buffer *plb;
plb = po->po_curbuf[curcpu];
KASSERT(plb->plb_ptr >= plb->plb_base,
("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
po, plb->plb_ptr, plb->plb_base));
KASSERT(plb->plb_ptr <= plb->plb_fence,
("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
po, plb->plb_ptr, plb->plb_fence));
/* schedule an I/O if we've filled a buffer */
if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence)
if (plb->plb_ptr >= plb->plb_fence)
pmclog_schedule_io(po);
mtx_unlock_spin(&po->po_mtx);
PMCDBG1(LOG,REL,1, "po=%p", po);
}
@ -492,36 +539,32 @@ pmclog_reserve(struct pmc_owner *po, int length)
uintptr_t newptr, oldptr;
uint32_t *lh;
struct timespec ts;
struct pmclog_buffer *plb, **pplb;
PMCDBG2(LOG,ALL,1, "po=%p len=%d", po, length);
KASSERT(length % sizeof(uint32_t) == 0,
("[pmclog,%d] length not a multiple of word size", __LINE__));
mtx_lock_spin(&po->po_mtx);
/* No more data when shutdown in progress. */
if (po->po_flags & PMC_PO_SHUTDOWN) {
mtx_unlock_spin(&po->po_mtx);
if (po->po_flags & PMC_PO_SHUTDOWN)
return (NULL);
}
if (po->po_curbuf == NULL)
if (pmclog_get_buffer(po) != 0) {
mtx_unlock_spin(&po->po_mtx);
return (NULL);
}
pplb = &po->po_curbuf[curcpu];
if (*pplb == NULL && pmclog_get_buffer(po) != 0)
goto fail;
KASSERT(po->po_curbuf != NULL,
KASSERT(*pplb != NULL,
("[pmclog,%d] po=%p no current buffer", __LINE__, po));
KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base &&
po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
plb = *pplb;
KASSERT(plb->plb_ptr >= plb->plb_base &&
plb->plb_ptr <= plb->plb_fence,
("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
po->po_curbuf->plb_fence));
__LINE__, po, plb->plb_ptr, plb->plb_base,
plb->plb_fence));
oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
oldptr = (uintptr_t) plb->plb_ptr;
newptr = oldptr + length;
KASSERT(oldptr != (uintptr_t) NULL,
@ -531,8 +574,8 @@ pmclog_reserve(struct pmc_owner *po, int length)
* If we have space in the current buffer, return a pointer to
* available space with the PO structure locked.
*/
if (newptr <= (uintptr_t) po->po_curbuf->plb_fence) {
po->po_curbuf->plb_ptr = (char *) newptr;
if (newptr <= (uintptr_t) plb->plb_fence) {
plb->plb_ptr = (char *) newptr;
goto done;
}
@ -542,24 +585,23 @@ pmclog_reserve(struct pmc_owner *po, int length)
*/
pmclog_schedule_io(po);
if (pmclog_get_buffer(po) != 0) {
mtx_unlock_spin(&po->po_mtx);
return (NULL);
}
if (pmclog_get_buffer(po) != 0)
goto fail;
KASSERT(po->po_curbuf != NULL,
plb = *pplb;
KASSERT(plb != NULL,
("[pmclog,%d] po=%p no current buffer", __LINE__, po));
KASSERT(po->po_curbuf->plb_ptr != NULL,
KASSERT(plb->plb_ptr != NULL,
("[pmclog,%d] null return from pmc_get_log_buffer", __LINE__));
KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base &&
po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
KASSERT(plb->plb_ptr == plb->plb_base &&
plb->plb_ptr <= plb->plb_fence,
("[pmclog,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
__LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
po->po_curbuf->plb_fence));
__LINE__, po, plb->plb_ptr, plb->plb_base,
plb->plb_fence));
oldptr = (uintptr_t) po->po_curbuf->plb_ptr;
oldptr = (uintptr_t) plb->plb_ptr;
done:
lh = (uint32_t *) oldptr;
@ -568,6 +610,8 @@ pmclog_reserve(struct pmc_owner *po, int length)
*lh++ = ts.tv_sec & 0xFFFFFFFF;
*lh++ = ts.tv_nsec & 0xFFFFFFF;
return ((uint32_t *) oldptr);
fail:
return (NULL);
}
/*
@ -579,26 +623,28 @@ pmclog_reserve(struct pmc_owner *po, int length)
static void
pmclog_schedule_io(struct pmc_owner *po)
{
KASSERT(po->po_curbuf != NULL,
("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po));
struct pmclog_buffer *plb;
KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
plb = po->po_curbuf[curcpu];
po->po_curbuf[curcpu] = NULL;
KASSERT(plb != NULL,
("[pmclog,%d] schedule_io with null buffer po=%p", __LINE__, po));
KASSERT(plb->plb_ptr >= plb->plb_base,
("[pmclog,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
po, plb->plb_ptr, plb->plb_base));
KASSERT(plb->plb_ptr <= plb->plb_fence,
("[pmclog,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
po, plb->plb_ptr, plb->plb_fence));
PMCDBG1(LOG,SIO, 1, "po=%p", po);
mtx_assert(&po->po_mtx, MA_OWNED);
/*
* Add the current buffer to the tail of the buffer list and
* wakeup the helper.
*/
TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next);
po->po_curbuf = NULL;
mtx_lock_spin(&po->po_mtx);
TAILQ_INSERT_TAIL(&po->po_logbuffers, plb, plb_next);
mtx_unlock_spin(&po->po_mtx);
wakeup_one(po);
}
@ -671,7 +717,7 @@ pmclog_configure_log(struct pmc_mdep *md, struct pmc_owner *po, int logfd)
sizeof(struct pmclog_initialize));
PMCLOG_EMIT32(PMC_VERSION);
PMCLOG_EMIT32(md->pmd_cputype);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
return (0);
@ -719,19 +765,22 @@ pmclog_deconfigure_log(struct pmc_owner *po)
/* return all queued log buffers to the global pool */
while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) {
TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
mtx_lock_spin(&pmc_bufferlist_mtx);
TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
mtx_unlock_spin(&pmc_bufferlist_mtx);
PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
pmc_plb_rele(lb);
}
/* return the 'current' buffer to the global pool */
if ((lb = po->po_curbuf) != NULL) {
PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
mtx_lock_spin(&pmc_bufferlist_mtx);
TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
mtx_unlock_spin(&pmc_bufferlist_mtx);
for (int i = 0; i < mp_ncpus; i++) {
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
/* return the 'current' buffer to the global pool */
if ((lb = po->po_curbuf[curcpu]) != NULL) {
PMCLOG_RESET_BUFFER_DESCRIPTOR(lb);
pmc_plb_rele(lb);
}
}
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
/* drop a reference to the fd */
if (po->po_file != NULL) {
@ -752,7 +801,6 @@ int
pmclog_flush(struct pmc_owner *po)
{
int error;
struct pmclog_buffer *lb;
PMCDBG1(LOG,FLS,1, "po=%p", po);
@ -774,23 +822,45 @@ pmclog_flush(struct pmc_owner *po)
goto error;
}
/*
* Schedule the current buffer if any and not empty.
*/
mtx_lock_spin(&po->po_mtx);
lb = po->po_curbuf;
if (lb && lb->plb_ptr != lb->plb_base) {
pmclog_schedule_io(po);
} else
error = ENOBUFS;
mtx_unlock_spin(&po->po_mtx);
pmclog_schedule_all(po);
error:
mtx_unlock(&pmc_kthread_mtx);
return (error);
}
static void
pmclog_schedule_one_cond(void *arg)
{
struct pmc_owner *po = arg;
struct pmclog_buffer *plb;
spinlock_enter();
/* tell hardclock not to run again */
DPCPU_SET(pmc_sampled, 0);
plb = po->po_curbuf[curcpu];
if (plb && plb->plb_ptr != plb->plb_base)
pmclog_schedule_io(po);
spinlock_exit();
}
static void
pmclog_schedule_all(struct pmc_owner *po)
{
/*
* Schedule the current buffer if any and not empty.
*/
for (int i = 0; i < mp_ncpus; i++) {
thread_lock(curthread);
sched_bind(curthread, i);
thread_unlock(curthread);
pmclog_schedule_one_cond(po);
}
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
}
int
pmclog_close(struct pmc_owner *po)
{
@ -804,19 +874,14 @@ pmclog_close(struct pmc_owner *po)
/*
* Schedule the current buffer.
*/
mtx_lock_spin(&po->po_mtx);
if (po->po_curbuf)
pmclog_schedule_io(po);
else
wakeup_one(po);
mtx_unlock_spin(&po->po_mtx);
pmclog_schedule_all(po);
wakeup_one(po);
/*
* Initiate shutdown: no new data queued,
* thread will close file on last block.
*/
po->po_flags |= PMC_PO_SHUTDOWN;
mtx_unlock(&pmc_kthread_mtx);
return (0);
@ -836,20 +901,20 @@ pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps)
ps->ps_nsamples * sizeof(uintfptr_t);
po = pm->pm_owner;
flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
PMCLOG_RESERVE_SAFE(po, CALLCHAIN, recordlen);
PMCLOG_EMIT32(ps->ps_pid);
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(flags);
for (n = 0; n < ps->ps_nsamples; n++)
PMCLOG_EMITADDR(ps->ps_pc[n]);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SAFE(po);
}
void
pmclog_process_closelog(struct pmc_owner *po)
{
PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog));
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
}
void
@ -913,14 +978,14 @@ pmclog_process_pmcallocate(struct pmc *pm)
else
PMCLOG_EMITNULLSTRING(PMC_NAME_MAX);
pmc_soft_ev_release(ps);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
} else {
PMCLOG_RESERVE(po, PMCALLOCATE,
sizeof(struct pmclog_pmcallocate));
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(pm->pm_event);
PMCLOG_EMIT32(pm->pm_flags);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
}
}
@ -941,7 +1006,7 @@ pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path)
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(pid);
PMCLOG_EMITSTRING(path, pathlen);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
}
void
@ -956,7 +1021,7 @@ pmclog_process_pmcdetach(struct pmc *pm, pid_t pid)
PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach));
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(pid);
PMCLOG_DESPATCH(po);
PMCLOG_DESPATCH_SYNC(po);
}
/*
@ -1081,30 +1146,57 @@ pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl)
void
pmclog_initialize()
{
int n;
int domain, cpu;
struct pcpu *pc;
struct pmclog_buffer *plb;
if (pmclog_buffer_size <= 0) {
if (pmclog_buffer_size <= 0 || pmclog_buffer_size > 16*1024) {
(void) printf("hwpmc: tunable logbuffersize=%d must be "
"greater than zero.\n", pmclog_buffer_size);
"greater than zero and less than or equal to 16MB.\n",
pmclog_buffer_size);
pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
}
if (pmc_nlogbuffers <= 0) {
if (pmc_nlogbuffers_pcpu <= 0) {
(void) printf("hwpmc: tunable nlogbuffers=%d must be greater "
"than zero.\n", pmc_nlogbuffers);
pmc_nlogbuffers = PMC_NLOGBUFFERS;
"than zero.\n", pmc_nlogbuffers_pcpu);
pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
}
/* create global pool of log buffers */
for (n = 0; n < pmc_nlogbuffers; n++) {
plb = malloc(1024 * pmclog_buffer_size, M_PMC,
M_WAITOK|M_ZERO);
PMCLOG_INIT_BUFFER_DESCRIPTOR(plb);
TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next);
if (pmc_nlogbuffers_pcpu*pmclog_buffer_size > 32*1024) {
(void) printf("hwpmc: memory allocated pcpu must be less than 32MB (is %dK).\n",
pmc_nlogbuffers_pcpu*pmclog_buffer_size);
pmc_nlogbuffers_pcpu = PMC_NLOGBUFFERS_PCPU;
pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
}
for (domain = 0; domain < vm_ndomains; domain++) {
pmc_dom_hdrs[domain] = malloc_domain(sizeof(struct pmc_domain_buffer_header), M_PMC, domain,
M_WAITOK|M_ZERO);
mtx_init(&pmc_dom_hdrs[domain]->pdbh_mtx, "pmc_bufferlist_mtx", "pmc-leaf", MTX_SPIN);
TAILQ_INIT(&pmc_dom_hdrs[domain]->pdbh_head);
}
CPU_FOREACH(cpu) {
if (CPU_ABSENT(cpu))
continue;
pc = pcpu_find(cpu);
domain = pc->pc_domain;
pmc_dom_hdrs[domain]->pdbh_ncpus++;
}
for (domain = 0; domain < vm_ndomains; domain++) {
int ncpus = pmc_dom_hdrs[domain]->pdbh_ncpus;
int total = ncpus*pmc_nlogbuffers_pcpu;
plb = malloc_domain(sizeof(struct pmclog_buffer)*total, M_PMC, domain, M_WAITOK|M_ZERO);
pmc_dom_hdrs[domain]->pdbh_plbs = plb;
for (int i = 0; i < total; i++, plb++) {
void *buf;
buf = malloc_domain(1024 * pmclog_buffer_size, M_PMC, domain,
M_WAITOK|M_ZERO);
PMCLOG_INIT_BUFFER_DESCRIPTOR(plb, buf, domain);
pmc_plb_rele_unlocked(plb);
}
}
mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc-leaf",
MTX_SPIN);
mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc-sleep", MTX_DEF);
}
@ -1118,12 +1210,17 @@ void
pmclog_shutdown()
{
struct pmclog_buffer *plb;
int domain;
mtx_destroy(&pmc_kthread_mtx);
mtx_destroy(&pmc_bufferlist_mtx);
while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) {
TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
free(plb, M_PMC);
for (domain = 0; domain < vm_ndomains; domain++) {
mtx_destroy(&pmc_dom_hdrs[domain]->pdbh_mtx);
while ((plb = TAILQ_FIRST(&pmc_dom_hdrs[domain]->pdbh_head)) != NULL) {
TAILQ_REMOVE(&pmc_dom_hdrs[domain]->pdbh_head, plb, plb_next);
free(plb->plb_base, M_PMC);
}
free(pmc_dom_hdrs[domain]->pdbh_plbs, M_PMC);
free(pmc_dom_hdrs[domain], M_PMC);
}
}

View File

@ -3,6 +3,7 @@
*
* Copyright (c) 2003-2008 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* Copyright (c) 2018 Matthew Macy
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
@ -138,7 +139,8 @@ static eventhandler_tag pmc_exit_tag, pmc_fork_tag, pmc_kld_load_tag,
pmc_kld_unload_tag;
/* Module statistics */
struct pmc_op_getdriverstats pmc_stats;
struct pmc_driverstats pmc_stats;
/* Machine/processor dependent operations */
static struct pmc_mdep *md;
@ -235,11 +237,34 @@ static void pmc_generic_cpu_finalize(struct pmc_mdep *md);
*/
SYSCTL_DECL(_kern_hwpmc);
SYSCTL_NODE(_kern_hwpmc, OID_AUTO, stats, CTLFLAG_RW, 0, "HWPMC stats");
/* Stats. */
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_ignored, CTLFLAG_RW,
&pmc_stats.pm_intr_ignored, "# of interrupts ignored");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_processed, CTLFLAG_RW,
&pmc_stats.pm_intr_processed, "# of interrupts processed");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, intr_bufferfull, CTLFLAG_RW,
&pmc_stats.pm_intr_bufferfull, "# of interrupts where buffer was full");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscalls, CTLFLAG_RW,
&pmc_stats.pm_syscalls, "# of syscalls");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, syscall_errors, CTLFLAG_RW,
&pmc_stats.pm_syscall_errors, "# of syscall_errors");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests, CTLFLAG_RW,
&pmc_stats.pm_buffer_requests, "# of buffer requests");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_RW,
&pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed");
SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW,
&pmc_stats.pm_log_sweeps, "# of ?");
static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN,
&pmc_callchaindepth, 0, "depth of call chain records");
char pmc_cpuid[64];
SYSCTL_STRING(_kern_hwpmc, OID_AUTO, cpuid, CTLFLAG_RD,
pmc_cpuid, 0, "cpu version string");
#ifdef HWPMC_DEBUG
struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
char pmc_debugstr[PMC_DEBUG_STRSIZE];
@ -250,6 +275,7 @@ SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
0, 0, pmc_debugflags_sysctl_handler, "A", "debug flags");
#endif
/*
* kern.hwpmc.hashrows -- determines the number of rows in the
* of the hash table used to look up threads
@ -1260,7 +1286,7 @@ pmc_process_csw_in(struct thread *td)
continue;
/* increment PMC runcount */
atomic_add_rel_int(&pm->pm_runcount, 1);
counter_u64_add(pm->pm_runcount, 1);
/* configure the HWPMC we are going to use. */
pcd = pmc_ri_to_classdep(md, ri, &adjri);
@ -1311,10 +1337,10 @@ pmc_process_csw_in(struct thread *td)
/* If a sampling mode PMC, reset stalled state. */
if (PMC_TO_MODE(pm) == PMC_MODE_TS)
CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
pm->pm_pcpu_state[cpu].pps_stalled = 0;
/* Indicate that we desire this to run. */
CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
pm->pm_pcpu_state[cpu].pps_cpustate = 1;
/* Start the PMC. */
pcd->pcd_start_pmc(cpu, adjri);
@ -1417,12 +1443,12 @@ pmc_process_csw_out(struct thread *td)
* an interrupt re-enables the PMC after this code has
* already checked the pm_stalled flag.
*/
CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
if (!CPU_ISSET(cpu, &pm->pm_stalled))
pm->pm_pcpu_state[cpu].pps_cpustate = 0;
if (pm->pm_pcpu_state[cpu].pps_stalled == 0)
pcd->pcd_stop_pmc(cpu, adjri);
/* reduce this PMC's runcount */
atomic_subtract_rel_int(&pm->pm_runcount, 1);
counter_u64_add(pm->pm_runcount, -1);
/*
* If this PMC is associated with this process,
@ -1537,7 +1563,7 @@ pmc_process_mmap(struct thread *td, struct pmckern_map_in *pkm)
/* Inform owners of all system-wide sampling PMCs. */
LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
pmclog_process_map_in(po, pid, pkm->pm_address, fullpath);
if ((pp = pmc_find_process_descriptor(td->td_proc, 0)) == NULL)
goto done;
@ -1993,7 +2019,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
* had already processed the interrupt). We don't
* lose the interrupt sample.
*/
CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
DPCPU_SET(pmc_sampled, 0);
pmc_process_samples(PCPU_GET(cpuid), PMC_HR);
pmc_process_samples(PCPU_GET(cpuid), PMC_SR);
break;
@ -2191,7 +2217,8 @@ pmc_allocate_pmc_descriptor(void)
struct pmc *pmc;
pmc = malloc(sizeof(struct pmc), M_PMC, M_WAITOK|M_ZERO);
pmc->pm_runcount = counter_u64_alloc(M_WAITOK);
pmc->pm_pcpu_state = malloc(sizeof(struct pmc_pcpu_state)*mp_ncpus, M_PMC, M_WAITOK|M_ZERO);
PMCDBG1(PMC,ALL,1, "allocate-pmc -> pmc=%p", pmc);
return pmc;
@ -2212,10 +2239,12 @@ pmc_destroy_pmc_descriptor(struct pmc *pm)
("[pmc,%d] destroying pmc with targets", __LINE__));
KASSERT(pm->pm_owner == NULL,
("[pmc,%d] destroying pmc attached to an owner", __LINE__));
KASSERT(pm->pm_runcount == 0,
("[pmc,%d] pmc has non-zero run count %d", __LINE__,
pm->pm_runcount));
KASSERT(counter_u64_fetch(pm->pm_runcount) == 0,
("[pmc,%d] pmc has non-zero run count %ld", __LINE__,
(unsigned long)counter_u64_fetch(pm->pm_runcount)));
counter_u64_free(pm->pm_runcount);
free(pm->pm_pcpu_state, M_PMC);
free(pm, M_PMC);
}
@ -2231,13 +2260,13 @@ pmc_wait_for_pmc_idle(struct pmc *pm)
* Loop (with a forced context switch) till the PMC's runcount
* comes down to zero.
*/
while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
while (counter_u64_fetch(pm->pm_runcount) > 0) {
#ifdef HWPMC_DEBUG
maxloop--;
KASSERT(maxloop > 0,
("[pmc,%d] (ri%d, rc%d) waiting too long for "
("[pmc,%d] (ri%d, rc%ld) waiting too long for "
"pmc to be free", __LINE__,
PMC_TO_ROWINDEX(pm), pm->pm_runcount));
PMC_TO_ROWINDEX(pm), (unsigned long)counter_u64_fetch(pm->pm_runcount)));
#endif
pmc_force_context_switch();
}
@ -2295,9 +2324,9 @@ pmc_release_pmc_descriptor(struct pmc *pm)
pmc_select_cpu(cpu);
/* switch off non-stalled CPUs */
CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
pm->pm_pcpu_state[cpu].pps_cpustate = 0;
if (pm->pm_state == PMC_STATE_RUNNING &&
!CPU_ISSET(cpu, &pm->pm_stalled)) {
pm->pm_pcpu_state[cpu].pps_stalled == 0) {
phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
@ -2735,10 +2764,10 @@ pmc_start(struct pmc *pm)
pm->pm_sc.pm_initial)) == 0) {
/* If a sampling mode PMC, reset stalled state. */
if (PMC_IS_SAMPLING_MODE(mode))
CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
pm->pm_pcpu_state[cpu].pps_stalled = 0;
/* Indicate that we desire this to run. Start it. */
CPU_SET_ATOMIC(cpu, &pm->pm_cpustate);
pm->pm_pcpu_state[cpu].pps_cpustate = 1;
error = pcd->pcd_start_pmc(cpu, adjri);
}
critical_exit();
@ -2802,7 +2831,7 @@ pmc_stop(struct pmc *pm)
ri = PMC_TO_ROWINDEX(pm);
pcd = pmc_ri_to_classdep(md, ri, &adjri);
CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
pm->pm_pcpu_state[cpu].pps_cpustate = 0;
critical_enter();
if ((error = pcd->pcd_stop_pmc(cpu, adjri)) == 0)
error = pcd->pcd_read_pmc(cpu, adjri, &pm->pm_sc.pm_initial);
@ -2884,7 +2913,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
pmc_op_to_name[op], arg);
error = 0;
atomic_add_int(&pmc_stats.pm_syscalls, 1);
counter_u64_add(pmc_stats.pm_syscalls, 1);
switch (op) {
@ -3063,8 +3092,16 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
case PMC_OP_GETDRIVERSTATS:
{
struct pmc_op_getdriverstats gms;
bcopy(&pmc_stats, &gms, sizeof(gms));
#define CFETCH(a, b, field) a.field = counter_u64_fetch(b.field)
CFETCH(gms, pmc_stats, pm_intr_ignored);
CFETCH(gms, pmc_stats, pm_intr_processed);
CFETCH(gms, pmc_stats, pm_intr_bufferfull);
CFETCH(gms, pmc_stats, pm_syscalls);
CFETCH(gms, pmc_stats, pm_syscall_errors);
CFETCH(gms, pmc_stats, pm_buffer_requests);
CFETCH(gms, pmc_stats, pm_buffer_requests_failed);
CFETCH(gms, pmc_stats, pm_log_sweeps);
#undef CFETCH
error = copyout(&gms, arg, sizeof(gms));
}
break;
@ -4040,7 +4077,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
sx_xunlock(&pmc_sx);
done_syscall:
if (error)
atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
counter_u64_add(pmc_stats.pm_syscall_errors, 1);
return (error);
}
@ -4115,8 +4152,8 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
ps = psb->ps_write;
if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
CPU_SET_ATOMIC(cpu, &pm->pm_stalled);
atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
pm->pm_pcpu_state[cpu].pps_stalled = 1;
counter_u64_add(pmc_stats.pm_intr_bufferfull, 1);
PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
cpu, pm, (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
@ -4133,11 +4170,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
KASSERT(pm->pm_runcount >= 0,
("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
pm->pm_runcount));
KASSERT(counter_u64_fetch(pm->pm_runcount) >= 0,
("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
(unsigned long)counter_u64_fetch(pm->pm_runcount)));
atomic_add_rel_int(&pm->pm_runcount, 1); /* hold onto PMC */
counter_u64_add(pm->pm_runcount, 1); /* hold onto PMC */
ps->ps_pmc = pm;
if ((td = curthread) && td->td_proc)
@ -4180,7 +4217,7 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
done:
/* mark CPU as needing processing */
if (callchaindepth != PMC_SAMPLE_INUSE)
CPU_SET_ATOMIC(cpu, &pmc_cpumask);
DPCPU_SET(pmc_sampled, 1);
return (error);
}
@ -4244,8 +4281,8 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
("[pmc,%d] Retrieving callchain for PMC that doesn't "
"want it", __LINE__));
KASSERT(pm->pm_runcount > 0,
("[pmc,%d] runcount %d", __LINE__, pm->pm_runcount));
KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount)));
/*
* Retrieve the callchain and mark the sample buffer
@ -4275,9 +4312,7 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
sched_unpin(); /* Can migrate safely now. */
/* mark CPU as needing processing */
CPU_SET_ATOMIC(cpu, &pmc_cpumask);
return;
DPCPU_SET(pmc_sampled, 1);
}
/*
@ -4309,9 +4344,9 @@ pmc_process_samples(int cpu, int ring)
pm = ps->ps_pmc;
KASSERT(pm->pm_runcount > 0,
("[pmc,%d] pm=%p runcount %d", __LINE__, (void *) pm,
pm->pm_runcount));
KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] pm=%p runcount %ld", __LINE__, (void *) pm,
(unsigned long)counter_u64_fetch(pm->pm_runcount)));
po = pm->pm_owner;
@ -4326,7 +4361,7 @@ pmc_process_samples(int cpu, int ring)
/* If there is a pending AST wait for completion */
if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
/* Need a rescan at a later time. */
CPU_SET_ATOMIC(cpu, &pmc_cpumask);
DPCPU_SET(pmc_sampled, 1);
break;
}
@ -4359,7 +4394,7 @@ pmc_process_samples(int cpu, int ring)
entrydone:
ps->ps_nsamples = 0; /* mark entry as free */
atomic_subtract_rel_int(&pm->pm_runcount, 1);
counter_u64_add(pm->pm_runcount, -1);
/* increment read pointer, modulo sample size */
if (++ps == psb->ps_fence)
@ -4368,7 +4403,7 @@ pmc_process_samples(int cpu, int ring)
psb->ps_read = ps;
}
atomic_add_int(&pmc_stats.pm_log_sweeps, 1);
counter_u64_add(pmc_stats.pm_log_sweeps, 1);
/* Do not re-enable stalled PMCs if we failed to process any samples */
if (n == 0)
@ -4390,11 +4425,11 @@ pmc_process_samples(int cpu, int ring)
if (pm == NULL || /* !cfg'ed */
pm->pm_state != PMC_STATE_RUNNING || /* !active */
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
!CPU_ISSET(cpu, &pm->pm_cpustate) || /* !desired */
!CPU_ISSET(cpu, &pm->pm_stalled)) /* !stalled */
!pm->pm_pcpu_state[cpu].pps_cpustate || /* !desired */
!pm->pm_pcpu_state[cpu].pps_stalled) /* !stalled */
continue;
CPU_CLR_ATOMIC(cpu, &pm->pm_stalled);
pm->pm_pcpu_state[cpu].pps_stalled = 0;
(*pcd->pcd_start_pmc)(cpu, adjri);
}
}
@ -4513,9 +4548,9 @@ pmc_process_exit(void *arg __unused, struct proc *p)
("[pmc,%d] pm %p != pp_pmcs[%d] %p",
__LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
KASSERT(pm->pm_runcount > 0,
("[pmc,%d] bad runcount ri %d rc %d",
__LINE__, ri, pm->pm_runcount));
KASSERT(counter_u64_fetch(pm->pm_runcount) > 0,
("[pmc,%d] bad runcount ri %d rc %ld",
__LINE__, ri, (unsigned long)counter_u64_fetch(pm->pm_runcount)));
/*
* Change desired state, and then stop if not
@ -4524,9 +4559,9 @@ pmc_process_exit(void *arg __unused, struct proc *p)
* the PMC after this code has already checked
* the pm_stalled flag.
*/
if (CPU_ISSET(cpu, &pm->pm_cpustate)) {
CPU_CLR_ATOMIC(cpu, &pm->pm_cpustate);
if (!CPU_ISSET(cpu, &pm->pm_stalled)) {
if (pm->pm_pcpu_state[cpu].pps_cpustate) {
pm->pm_pcpu_state[cpu].pps_cpustate = 0;
if (!pm->pm_pcpu_state[cpu].pps_stalled) {
(void) pcd->pcd_stop_pmc(cpu, adjri);
pcd->pcd_read_pmc(cpu, adjri,
&newvalue);
@ -4540,9 +4575,9 @@ pmc_process_exit(void *arg __unused, struct proc *p)
}
}
atomic_subtract_rel_int(&pm->pm_runcount,1);
counter_u64_add(pm->pm_runcount, -1);
KASSERT((int) pm->pm_runcount >= 0,
KASSERT((int) counter_u64_fetch(pm->pm_runcount) >= 0,
("[pmc,%d] runcount is %d", __LINE__, ri));
(void) pcd->pcd_config_pmc(cpu, adjri, NULL);
@ -4811,7 +4846,8 @@ static int
pmc_initialize(void)
{
int c, cpu, error, n, ri;
unsigned int maxcpu;
unsigned int maxcpu, domain;
struct pcpu *pc;
struct pmc_binding pb;
struct pmc_sample *ps;
struct pmc_classdep *pcd;
@ -4820,6 +4856,15 @@ pmc_initialize(void)
md = NULL;
error = 0;
pmc_stats.pm_intr_ignored = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_intr_processed = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_intr_bufferfull = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_syscalls = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_syscall_errors = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK);
pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK);
#ifdef HWPMC_DEBUG
/* parse debug flags first */
if (TUNABLE_STR_FETCH(PMC_SYSCTL_NAME_PREFIX "debugflags",
@ -4927,9 +4972,10 @@ pmc_initialize(void)
for (cpu = 0; cpu < maxcpu; cpu++) {
if (!pmc_cpu_is_active(cpu))
continue;
sb = malloc(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
pc = pcpu_find(cpu);
domain = pc->pc_domain;
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
sb->ps_read = sb->ps_write = sb->ps_samples;
sb->ps_fence = sb->ps_samples + pmc_nsamples;
@ -4937,8 +4983,8 @@ pmc_initialize(void)
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples *
sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO);
for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
ps->ps_pc = sb->ps_callchains +
@ -4946,8 +4992,8 @@ pmc_initialize(void)
pmc_pcpu[cpu]->pc_sb[PMC_HR] = sb;
sb = malloc(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
sb = malloc_domain(sizeof(struct pmc_samplebuffer) +
pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain,
M_WAITOK|M_ZERO);
sb->ps_read = sb->ps_write = sb->ps_samples;
sb->ps_fence = sb->ps_samples + pmc_nsamples;
@ -4955,8 +5001,8 @@ pmc_initialize(void)
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
sb->ps_callchains = malloc(pmc_callchaindepth * pmc_nsamples *
sizeof(uintptr_t), M_PMC, M_WAITOK|M_ZERO);
sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples *
sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO);
for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
ps->ps_pc = sb->ps_callchains +
@ -5048,7 +5094,8 @@ pmc_cleanup(void)
PMCDBG0(MOD,INI,0, "cleanup");
/* switch off sampling */
CPU_ZERO(&pmc_cpumask);
CPU_FOREACH(cpu)
DPCPU_ID_SET(cpu, pmc_sampled, 0);
pmc_intr = NULL;
sx_xlock(&pmc_sx);
@ -5157,11 +5204,11 @@ pmc_cleanup(void)
KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL,
("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__,
cpu));
free(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
free(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
free(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
free(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
free(pmc_pcpu[cpu], M_PMC);
free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC);
free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC);
free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC);
free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC);
free_domain(pmc_pcpu[cpu], M_PMC);
}
free(pmc_pcpu, M_PMC);
@ -5181,7 +5228,14 @@ pmc_cleanup(void)
}
pmclog_shutdown();
counter_u64_free(pmc_stats.pm_intr_ignored);
counter_u64_free(pmc_stats.pm_intr_processed);
counter_u64_free(pmc_stats.pm_intr_bufferfull);
counter_u64_free(pmc_stats.pm_syscalls);
counter_u64_free(pmc_stats.pm_syscall_errors);
counter_u64_free(pmc_stats.pm_buffer_requests);
counter_u64_free(pmc_stats.pm_buffer_requests_failed);
counter_u64_free(pmc_stats.pm_log_sweeps);
sx_xunlock(&pmc_sx); /* we are done */
}

View File

@ -710,9 +710,10 @@ mpc7xxx_intr(int cpu, struct trapframe *tf)
/* reload count. */
mpc7xxx_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount);
}
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (retval)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
/* Re-enable PERF exceptions. */
if (retval)

View File

@ -1545,8 +1545,10 @@ p4_intr(int cpu, struct trapframe *tf)
if (did_interrupt)
lapic_reenable_pmc();
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (did_interrupt)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
return (did_interrupt);
}

View File

@ -528,8 +528,10 @@ ppc970_intr(int cpu, struct trapframe *tf)
ppc970_write_pmc(cpu, i, pm->pm_sc.pm_reloadcount);
}
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (retval)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
/* Re-enable PERF exceptions. */
if (retval)

View File

@ -725,8 +725,10 @@ p6_intr(int cpu, struct trapframe *tf)
if (retval)
lapic_reenable_pmc();
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (retval)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
/* restart counters that can be restarted */
P6_SYNC_CTR_STATE(pc);

View File

@ -441,9 +441,10 @@ pmc_soft_intr(struct pmckern_soft *ks)
} else
pc->soft_values[ri]++;
}
atomic_add_int(processed ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
if (processed)
counter_u64_add(pmc_stats.pm_intr_processed, 1);
else
counter_u64_add(pmc_stats.pm_intr_ignored, 1);
return (processed);
}

View File

@ -66,8 +66,7 @@ int __read_mostly (*pmc_hook)(struct thread *td, int function, void *arg) = NULL
/* Interrupt handler */
int __read_mostly (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
/* Bitmask of CPUs requiring servicing at hardclock time */
volatile cpuset_t pmc_cpumask;
DPCPU_DEFINE(uint8_t, pmc_sampled);
/*
* A global count of SS mode PMCs. When non-zero, this means that

View File

@ -36,7 +36,8 @@
#define _SYS_PMC_H_
#include <dev/hwpmc/pmc_events.h>
#include <sys/proc.h>
#include <sys/counter.h>
#include <machine/pmc_mdep.h>
#include <machine/profile.h>
@ -552,6 +553,19 @@ struct pmc_op_configurelog {
*
* Retrieve pmc(4) driver-wide statistics.
*/
#ifdef _KERNEL
struct pmc_driverstats {
counter_u64_t pm_intr_ignored; /* #interrupts ignored */
counter_u64_t pm_intr_processed; /* #interrupts processed */
counter_u64_t pm_intr_bufferfull; /* #interrupts with ENOSPC */
counter_u64_t pm_syscalls; /* #syscalls */
counter_u64_t pm_syscall_errors; /* #syscalls with errors */
counter_u64_t pm_buffer_requests; /* #buffer requests */
counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */
counter_u64_t pm_log_sweeps; /* #sample buffer processing
passes */
};
#endif
struct pmc_op_getdriverstats {
unsigned int pm_intr_ignored; /* #interrupts ignored */
@ -625,9 +639,9 @@ struct pmc_op_getdyneventinfo {
#define PMC_HASH_SIZE 1024
#define PMC_MTXPOOL_SIZE 2048
#define PMC_LOG_BUFFER_SIZE 4
#define PMC_NLOGBUFFERS 1024
#define PMC_NSAMPLES 1024
#define PMC_LOG_BUFFER_SIZE 128
#define PMC_NLOGBUFFERS_PCPU 8
#define PMC_NSAMPLES 64
#define PMC_CALLCHAIN_DEPTH 32
#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
@ -701,7 +715,10 @@ struct pmc_target {
* field is '0'.
*
*/
struct pmc_pcpu_state {
uint8_t pps_stalled;
uint8_t pps_cpustate;
} __aligned(CACHE_LINE_SIZE);
struct pmc {
LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */
LIST_ENTRY(pmc) pm_next; /* owner's list */
@ -735,13 +752,13 @@ struct pmc {
pmc_value_t pm_initial; /* counting PMC modes */
} pm_sc;
volatile cpuset_t pm_stalled; /* marks stalled sampling PMCs */
struct pmc_pcpu_state *pm_pcpu_state;
volatile cpuset_t pm_cpustate; /* CPUs where PMC should be active */
uint32_t pm_caps; /* PMC capabilities */
enum pmc_event pm_event; /* event being measured */
uint32_t pm_flags; /* additional flags PMC_F_... */
struct pmc_owner *pm_owner; /* owner thread state */
int pm_runcount; /* #cpus currently on */
counter_u64_t pm_runcount; /* #cpus currently on */
enum pmc_state pm_state; /* current PMC state */
uint32_t pm_overflowcnt; /* count overflow interrupts */
@ -816,11 +833,11 @@ struct pmc_owner {
struct proc *po_owner; /* owner proc */
uint32_t po_flags; /* (k) flags PMC_PO_* */
struct proc *po_kthread; /* (k) helper kthread */
struct pmclog_buffer *po_curbuf; /* current log buffer */
struct file *po_file; /* file reference */
int po_error; /* recorded error */
short po_sscount; /* # SS PMCs owned */
short po_logprocmaps; /* global mappings done */
struct pmclog_buffer *po_curbuf[MAXCPU]; /* current log buffer */
};
#define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */
@ -1012,7 +1029,10 @@ struct pmc_mdep {
extern struct pmc_cpu **pmc_pcpu;
/* driver statistics */
extern struct pmc_op_getdriverstats pmc_stats;
extern struct pmc_driverstats pmc_stats;
/* cpu model name for pmu lookup */
extern char pmc_cpuid[64];
#if defined(HWPMC_DEBUG)
#include <sys/ktr.h>

View File

@ -165,7 +165,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
extern struct sx pmc_sx;
/* Per-cpu flags indicating availability of sampling data */
extern volatile cpuset_t pmc_cpumask;
DPCPU_DECLARE(uint8_t, pmc_sampled);
/* Count of system-wide sampling PMCs in existence */
extern volatile int pmc_ss_count;
@ -220,7 +220,7 @@ do { \
#define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0)
/* Check if a CPU has recorded samples. */
#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask)))
#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(DPCPU_ID_GET((C), pmc_sampled)))
/*
* Helper functions.