Kernel and hwpmc(4) support for callchain capture.

Sponsored by:	FreeBSD Foundation and Google Inc.
This commit is contained in:
Joseph Koshy 2007-12-07 08:20:17 +00:00
parent 2de2af32a0
commit d07f36b075
18 changed files with 783 additions and 135 deletions

View File

@ -1,8 +1,12 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -32,6 +36,7 @@
#include "opt_atpic.h"
#include "opt_compat.h"
#include "opt_hwpmc_hooks.h"
#include <machine/asmacros.h>
#include <machine/psl.h>
@ -40,6 +45,9 @@
#include "assym.s"
.text
#ifdef HWPMC_HOOKS
ENTRY(start_exceptions)
#endif
/*****************************************************************************/
/* Trap handling */
@ -348,6 +356,9 @@ IDTVEC(fast_syscall32)
* execute the NMI handler with interrupts disabled to prevent a
* nested interrupt from executing an 'iretq' instruction and
* inadvertently taking the processor out of NMI mode.
*
* Third, the NMI handler runs on its own stack (tss_ist1), shared
* with the double fault handler.
*/
IDTVEC(nmi)
@ -386,6 +397,61 @@ nmi_calltrap:
movq %rsp, %rdi
call trap
MEXITCOUNT
#ifdef HWPMC_HOOKS
/*
* Check if the current trap was from user mode and if so
* whether the current thread needs a user call chain to be
* captured. We are still in NMI mode at this point.
*/
testb $SEL_RPL_MASK,TF_CS(%rsp)
jz nocallchain
movq PCPU(CURTHREAD),%rax /* curthread present? */
orq %rax,%rax
jz nocallchain
testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
jz nocallchain
/*
* A user callchain is to be captured, so:
* - Move execution to the regular kernel stack, to allow for
* nested NMI interrupts.
* - Take the processor out of "NMI" mode by faking an "iret".
* - Enable interrupts, so that copyin() can work.
*/
movq %rsp,%rsi /* source stack pointer */
movq $TF_SIZE,%rcx
movq PCPU(RSP0),%rbx
subq %rcx,%rbx
movq %rbx,%rdi /* destination stack pointer */
shrq $3,%rcx /* trap frame size in long words */
cld
rep
movsq /* copy trapframe */
movl %ss,%eax
pushq %rax /* tf_ss */
pushq %rbx /* tf_rsp (on kernel stack) */
pushfq /* tf_rflags */
movl %cs,%eax
pushq %rax /* tf_cs */
pushq $outofnmi /* tf_rip */
iretq
outofnmi:
/*
* At this point the processor has exited NMI mode and is running
* with interrupts turned off on the normal kernel stack.
* We turn interrupts back on, and take the usual 'doreti' exit
* path.
*
* If a pending NMI gets recognized at or after this point, it
* will cause a kernel callchain to be traced. Since this path
* is only taken for NMI interrupts from user space, our `swapgs'
* state is correct for taking the doreti path.
*/
sti
jmp doreti
nocallchain:
#endif
testl %ebx,%ebx
jz nmi_restoreregs
swapgs
@ -556,3 +622,6 @@ doreti_iret_fault:
movq $0,TF_ADDR(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
jmp calltrap
#ifdef HWPMC_HOOKS
ENTRY(end_exceptions)
#endif

View File

@ -195,8 +195,7 @@ trap(struct trapframe *frame)
* the NMI was handled by it and we can return immediately.
*/
if (type == T_NMI && pmc_intr &&
(*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame->tf_rip,
TRAPF_USERMODE(frame)))
(*pmc_intr)(PCPU_GET(cpuid), frame))
goto out;
#endif

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -53,10 +57,34 @@ union pmc_md_pmc {
struct pmc;
#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_rip)
#define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_rbp)
#define PMC_TRAPFRAME_TO_SP(TF) ((TF)->tf_rsp)
#define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \
(((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */
#define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \
(((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */
#define PMC_AT_FUNCTION_EPILOGUE_RET(I) \
(((I) & 0xFF) == 0xC3) /* ret */
#define PMC_IN_TRAP_HANDLER(PC) \
((PC) >= (uintptr_t) start_exceptions && \
(PC) < (uintptr_t) end_exceptions)
#define PMC_IN_KERNEL_STACK(S,START,END) \
((S) >= (START) && (S) < (END))
#define PMC_IN_KERNEL(va) (((va) >= DMAP_MIN_ADDRESS && \
(va) < DMAP_MAX_ADDRESS) || ((va) >= KERNBASE && \
(va) < VM_MAX_KERNEL_ADDRESS))
#define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS)
/*
* Prototypes
*/
void start_exceptions(void), end_exceptions(void);
void pmc_x86_lapic_enable_pmc_interrupt(void);
#endif

View File

@ -32,7 +32,8 @@ S= ../../..
.include "$S/conf/kern.pre.mk"
DDB_ENABLED!= grep DDB opt_ddb.h || true
.if !empty(DDB_ENABLED)
HWPMC_ENABLED!= grep HWPMC opt_hwpmc_hooks.h || true
.if !empty(DDB_ENABLED) || !empty(HWPMC_ENABLED)
CFLAGS+= -fno-omit-frame-pointer
.endif

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -38,9 +42,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pmc_mdep.h>
#include <machine/specialreg.h>
#ifdef DEBUG
@ -667,7 +671,7 @@ amd_stop_pmc(int cpu, int ri)
*/
static int
amd_intr(int cpu, uintptr_t eip, int usermode)
amd_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t config, evsel, perfctr;
@ -679,8 +683,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
KASSERT(cpu >= 0 && cpu < mp_ncpus,
("[amd,%d] out of range CPU %d", __LINE__, cpu));
PMCDBG(MDP,INT,1, "cpu=%d eip=%p um=%d", cpu, (void *) eip,
usermode);
PMCDBG(MDP,INT,1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
TRAPF_USERMODE(tf));
retval = 0;
@ -696,8 +700,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
*
* If multiple PMCs interrupt at the same time, the AMD64
* processor appears to deliver as many NMIs as there are
* outstanding PMC interrupts. Thus we need to only process
* one interrupt at a time.
* outstanding PMC interrupts. So we process only one NMI
* interrupt at a time.
*/
for (i = 0; retval == 0 && i < AMD_NPMCS-1; i++) {
@ -717,9 +721,9 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
continue;
}
retval = 1; /* found an interrupting PMC */
retval = 1; /* Found an interrupting PMC. */
/* stop the PMC, reload count */
/* Stop the PMC, reload count. */
evsel = AMD_PMC_EVSEL_0 + i;
perfctr = AMD_PMC_PERFCTR_0 + i;
v = pm->pm_sc.pm_reloadcount;
@ -733,8 +737,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
wrmsr(evsel, config & ~AMD_PMC_ENABLE);
wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v));
/* restart the counter if there was no error during logging */
error = pmc_process_interrupt(cpu, pm, eip, usermode);
/* Restart the counter if logging succeeded. */
error = pmc_process_interrupt(cpu, pm, tf, TRAPF_USERMODE(tf));
if (error == 0)
wrmsr(evsel, config | AMD_PMC_ENABLE);
}
@ -742,7 +746,7 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
return retval;
return (retval);
}
/*

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2005 Joseph Koshy
* Copyright (c) 2005-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -64,7 +68,6 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_TUN|CTLFLAG_RD,
&pmclog_buffer_size, 0, "size of log buffers in kilobytes");
/*
* kern.hwpmc.nbuffer -- number of global log buffers
*/
@ -96,7 +99,6 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
/*
* Log file record constructors.
*/
#define _PMCLOG_TO_HEADER(T,L) \
((PMCLOG_HEADER_MAGIC << 24) | \
(PMCLOG_TYPE_ ## T << 16) | \
@ -135,6 +137,8 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
* Assertions about the log file format.
*/
CTASSERT(sizeof(struct pmclog_callchain) == 6*4 +
PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t));
CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX +
@ -710,9 +714,28 @@ pmclog_flush(struct pmc_owner *po)
}
/*
* Send a 'close log' event to the log file.
*/
void
pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps)
{
int n, recordlen;
uint32_t flags;
struct pmc_owner *po;
PMCDBG(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid,
ps->ps_nsamples);
recordlen = offsetof(struct pmclog_callchain, pl_pc) +
ps->ps_nsamples * sizeof(uintfptr_t);
po = pm->pm_owner;
flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
PMCLOG_EMIT32(ps->ps_pid);
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(flags);
for (n = 0; n < ps->ps_nsamples; n++)
PMCLOG_EMITADDR(ps->ps_pc[n]);
PMCLOG_DESPATCH(po);
}
void
pmclog_process_closelog(struct pmc_owner *po)
@ -760,24 +783,6 @@ pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start,
PMCLOG_DESPATCH(po);
}
void
pmclog_process_pcsample(struct pmc *pm, struct pmc_sample *ps)
{
struct pmc_owner *po;
PMCDBG(LOG,SAM,1,"pm=%p pid=%d pc=%p", pm, ps->ps_pid,
(void *) ps->ps_pc);
po = pm->pm_owner;
PMCLOG_RESERVE(po, PCSAMPLE, sizeof(struct pmclog_pcsample));
PMCLOG_EMIT32(ps->ps_pid);
PMCLOG_EMITADDR(ps->ps_pc);
PMCLOG_EMIT32(pm->pm_id);
PMCLOG_EMIT32(ps->ps_usermode);
PMCLOG_DESPATCH(po);
}
void
pmclog_process_pmcallocate(struct pmc *pm)
{

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2006 Joseph Koshy
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -166,6 +170,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
static int pmc_can_attach(struct pmc *pm, struct proc *p);
static void pmc_capture_user_callchain(int cpu, struct trapframe *tf);
static void pmc_cleanup(void);
static int pmc_detach_process(struct proc *p, struct pmc *pm);
static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
@ -180,6 +185,9 @@ static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
static void pmc_force_context_switch(void);
static void pmc_link_target_process(struct pmc *pm,
struct pmc_process *pp);
static void pmc_log_all_process_mappings(struct pmc_owner *po);
static void pmc_log_kernel_mappings(struct pmc *pm);
static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
static void pmc_maybe_remove_owner(struct pmc_owner *po);
static void pmc_process_csw_in(struct thread *td);
static void pmc_process_csw_out(struct thread *td);
@ -205,6 +213,11 @@ static void pmc_unlink_target_process(struct pmc *pmc,
SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_callchaindepth, 0, "depth of call chain records");
#ifdef DEBUG
struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
char pmc_debugstr[PMC_DEBUG_STRSIZE];
@ -226,7 +239,7 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_hashsize, 0, "rows in hash tables");
/*
* kern.hwpmc.nsamples --- number of PC samples per CPU
* kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
*/
static int pmc_nsamples = PMC_NSAMPLES;
@ -234,6 +247,7 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_nsamples, 0, "number of PC samples per CPU");
/*
* kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
*/
@ -957,6 +971,8 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
pmclog_process_pmcattach(pm, p->p_pid, fullpath);
if (freepath)
FREE(freepath, M_TEMP);
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
pmc_log_process_mappings(pm->pm_owner, p);
}
/* mark process as using HWPMCs */
PROC_LOCK(p);
@ -1449,7 +1465,7 @@ pmc_process_kld_unload(struct pmckern_map_out *pkm)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1,
pkm->pm_address, pkm->pm_address + pkm->pm_size);
/*
* TODO: Notify owners of process-sampling PMCs.
*/
@ -1527,6 +1543,88 @@ pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
pkm->pm_address, pkm->pm_address + pkm->pm_size);
}
/*
* Log mapping information about the kernel.
*/
static void
pmc_log_kernel_mappings(struct pmc *pm)
{
struct pmc_owner *po;
struct pmckern_map_in *km, *kmbase;
sx_assert(&pmc_sx, SX_LOCKED);
KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
("[pmc,%d] non-sampling PMC (%p) desires mapping information",
__LINE__, (void *) pm));
po = pm->pm_owner;
if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
return;
/*
* Log the current set of kernel modules.
*/
kmbase = linker_hwpmc_list_objects();
for (km = kmbase; km->pm_file != NULL; km++) {
PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
(void *) km->pm_address);
pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
km->pm_file);
}
FREE(kmbase, M_LINKER);
po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
}
/*
* Log the mappings for a single process.
*/
static void
pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
{
}
/*
* Log mappings for all processes in the system.
*/
static void
pmc_log_all_process_mappings(struct pmc_owner *po)
{
struct proc *p, *top;
sx_assert(&pmc_sx, SX_XLOCKED);
if ((p = pfind(1)) == NULL)
panic("[pmc,%d] Cannot find init", __LINE__);
PROC_UNLOCK(p);
sx_slock(&proctree_lock);
top = p;
for (;;) {
pmc_log_process_mappings(po, p);
if (!LIST_EMPTY(&p->p_children))
p = LIST_FIRST(&p->p_children);
else for (;;) {
if (p == top)
goto done;
if (LIST_NEXT(p, p_sibling)) {
p = LIST_NEXT(p, p_sibling);
break;
}
p = p->p_pptr;
}
}
done:
sx_sunlock(&proctree_lock);
}
/*
* The 'hook' invoked from the kernel proper
*/
@ -1543,7 +1641,8 @@ const char *pmc_hooknames[] = {
"KLDLOAD",
"KLDUNLOAD",
"MMAP",
"MUNMAP"
"MUNMAP",
"CALLCHAIN"
};
#endif
@ -1726,6 +1825,14 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break;
case PMC_FN_USER_CALLCHAIN:
/*
* Record a call chain.
*/
pmc_capture_user_callchain(PCPU_GET(cpuid),
(struct trapframe *) arg);
break;
default:
#ifdef DEBUG
KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@ -2321,6 +2428,21 @@ pmc_start(struct pmc *pm)
po = pm->pm_owner;
/*
* Disallow PMCSTART if a logfile is required but has not been
* configured yet.
*/
if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
(po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
return EDOOFUS; /* programming error */
/*
* If this is a sampling mode PMC, log mapping information for
* the kernel modules that are currently loaded.
*/
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
pmc_log_kernel_mappings(pm);
if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
@ -2332,15 +2454,6 @@ pmc_start(struct pmc *pm)
error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
pmc_attach_process(po->po_owner, pm);
/*
* Disallow PMCSTART if a logfile is required but has not
* been configured yet.
*/
if (error == 0 && (pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
(po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
error = EDOOFUS;
/*
* If the PMC is attached to its owner, then force a context
* switch to ensure that the MD state gets set correctly.
@ -2358,13 +2471,7 @@ pmc_start(struct pmc *pm)
/*
* A system-wide PMC.
*/
if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
(po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
return EDOOFUS; /* programming error */
/*
*
* Add the owner to the global list if this is a system-wide
* sampling PMC.
*/
@ -2378,7 +2485,8 @@ pmc_start(struct pmc *pm)
po->po_sscount++;
}
/* TODO: dump system wide process mappings to the log? */
/* Log mapping information for all processes in the system. */
pmc_log_all_process_mappings(po);
/*
* Move to the CPU associated with this
@ -2554,7 +2662,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
struct proc *p;
struct pmc *pm;
struct pmc_owner *po;
struct pmckern_map_in *km, *kmbase;
struct pmc_op_configurelog cl;
sx_assert(&pmc_sx, SX_XLOCKED);
@ -2593,18 +2700,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if (error)
break;
/*
* Log the current set of kernel modules.
*/
kmbase = linker_hwpmc_list_objects();
for (km = kmbase; km->pm_file != NULL; km++) {
PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
(void *) km->pm_address);
pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
km->pm_file);
}
FREE(kmbase, M_LINKER);
}
break;
@ -2945,7 +3040,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
*/
if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
PMC_F_LOG_PROCEXIT)) != 0) {
PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
error = EINVAL;
break;
}
@ -3632,57 +3727,118 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
*/
/*
* Mark the thread as needing callchain capture and post an AST. The
* actual callchain capture will be done in a context where it is safe
* to take page faults.
*/
static void
pmc_post_callchain_ast(void)
{
struct thread *td;
td = curthread;
/*
* Mark this thread as needing processing in ast().
* td->td_pflags will be safe to touch as the process was in
* user space when it was interrupted.
*/
td->td_pflags |= TDP_CALLCHAIN;
/*
* Again, since we've entered this function directly from
* userland, `td' is guaranteed to be not locked by this CPU,
* so its safe to try acquire the thread lock even though we
* are executing in an NMI context. We need to acquire this
* lock before touching `td_flags' because other CPUs may be
* in the process of touching this field.
*/
thread_lock(td);
td->td_flags |= TDF_ASTPENDING;
thread_unlock(td);
return;
}
/*
* Interrupt processing.
*
* Find a free slot in the per-cpu array of PC samples and write the
* current (PMC,PID,PC) triple to it. If an event was successfully
* added, a bit is set in mask 'pmc_cpumask' denoting that the
* DO_SAMPLES hook needs to be invoked from the clock handler.
* Find a free slot in the per-cpu array of samples and capture the
* current callchain there. If a sample was successfully added, a bit
* is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
* needs to be invoked from the clock handler.
*
* This function is meant to be called from an NMI handler. It cannot
* use any of the locking primitives supplied by the OS.
*/
int
pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
int inuserspace)
{
int error, ri;
int error, callchaindepth;
struct thread *td;
struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
error = 0;
ri = PMC_TO_ROWINDEX(pm);
/*
* Allocate space for a sample buffer.
*/
psb = pmc_pcpu[cpu]->pc_sb;
ps = psb->ps_write;
if (ps->ps_pc) { /* in use, reader hasn't caught up */
if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
pm->pm_stalled = 1;
atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d",
cpu, pm, (uint64_t) pc, usermode,
PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
cpu, pm, (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
error = ENOMEM;
goto done;
}
/* fill in entry */
PMCDBG(SAM,INT,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, pm,
(uint64_t) pc, usermode,
/* Fill in entry. */
PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
(void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
ps->ps_pmc = pm;
if ((td = curthread) && td->td_proc)
ps->ps_pid = td->td_proc->p_pid;
else
ps->ps_pid = -1;
ps->ps_usermode = usermode;
ps->ps_pc = pc; /* mark entry as in use */
ps->ps_cpu = cpu;
ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
pmc_callchaindepth : 1;
if (callchaindepth == 1)
ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
else {
/*
* Kernel stack traversals can be done immediately,
* while we defer to an AST for user space traversals.
*/
if (!inuserspace)
callchaindepth =
pmc_save_kernel_callchain(ps->ps_pc,
callchaindepth, tf);
else {
pmc_post_callchain_ast();
callchaindepth = PMC_SAMPLE_INUSE;
}
}
ps->ps_nsamples = callchaindepth; /* mark entry as in use */
/* increment write pointer, modulo ring buffer size */
ps++;
@ -3695,7 +3851,50 @@ pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
/* mark CPU as needing processing */
atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
return error;
return (error);
}
/*
* Capture a user call chain. This function will be called from ast()
* before control returns to userland and before the process gets
* rescheduled.
*/
static void
pmc_capture_user_callchain(int cpu, struct trapframe *tf)
{
int i;
struct pmc *pm;
struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
psb = pmc_pcpu[cpu]->pc_sb;
/*
* Iterate through all deferred callchain requests.
*/
for (i = 0; i < pmc_nsamples; i++) {
ps = &psb->ps_samples[i];
if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
continue;
pm = ps->ps_pmc;
KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
("[pmc,%d] Retrieving callchain for PMC that doesn't "
"want it", __LINE__));
/*
* Retrieve the callchain and mark the sample buffer
* as 'processable' by the timer tick sweep code.
*/
ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
pmc_callchaindepth, tf);
}
return;
}
@ -3722,8 +3921,13 @@ pmc_process_samples(int cpu)
for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
ps = psb->ps_read;
if (ps->ps_pc == (uintfptr_t) 0) /* no data */
if (ps->ps_nsamples == PMC_SAMPLE_FREE)
break;
if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
/* Need a rescan at a later time. */
atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
break;
}
pm = ps->ps_pmc;
po = pm->pm_owner;
@ -3736,8 +3940,8 @@ pmc_process_samples(int cpu)
if (pm->pm_state != PMC_STATE_RUNNING)
goto entrydone;
PMCDBG(SAM,OPS,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu,
pm, (uint64_t) ps->ps_pc, ps->ps_usermode,
PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
pm, ps->ps_nsamples, ps->ps_flags,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
@ -3748,9 +3952,9 @@ pmc_process_samples(int cpu)
* would have done.
*/
if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
if (ps->ps_usermode) {
if (ps->ps_flags & PMC_CC_F_USERSPACE) {
td = FIRST_THREAD_IN_PROC(po->po_owner);
addupc_intr(td, ps->ps_pc, 1);
addupc_intr(td, ps->ps_pc[0], 1);
}
goto entrydone;
}
@ -3762,10 +3966,10 @@ pmc_process_samples(int cpu)
* entry to the PMC's owner process.
*/
pmclog_process_pcsample(pm, ps);
pmclog_process_callchain(pm, ps);
entrydone:
ps->ps_pc = (uintfptr_t) 0; /* mark entry as free */
ps->ps_nsamples = 0; /* mark entry as free */
atomic_subtract_rel_32(&pm->pm_runcount, 1);
/* increment read pointer, modulo sample size */
@ -4087,6 +4291,7 @@ pmc_initialize(void)
{
int cpu, error, n;
struct pmc_binding pb;
struct pmc_sample *ps;
struct pmc_samplebuffer *sb;
md = NULL;
@ -4119,17 +4324,24 @@ pmc_initialize(void)
*/
if (pmc_hashsize <= 0) {
(void) printf("hwpmc: tunable hashsize=%d must be greater "
"than zero.\n", pmc_hashsize);
(void) printf("hwpmc: tunable \"hashsize\"=%d must be "
"greater than zero.\n", pmc_hashsize);
pmc_hashsize = PMC_HASH_SIZE;
}
if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
(void) printf("hwpmc: tunable nsamples=%d out of range.\n",
pmc_nsamples);
(void) printf("hwpmc: tunable \"nsamples\"=%d out of "
"range.\n", pmc_nsamples);
pmc_nsamples = PMC_NSAMPLES;
}
if (pmc_callchaindepth <= 0 ||
pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
(void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
"range.\n", pmc_callchaindepth);
pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
}
md = pmc_md_initialize();
if (md == NULL || md->pmd_init == NULL)
@ -4171,6 +4383,14 @@ pmc_initialize(void)
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
MALLOC(sb->ps_callchains, uintptr_t *,
pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t),
M_PMC, M_WAITOK|M_ZERO);
for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
ps->ps_pc = sb->ps_callchains +
(n * pmc_callchaindepth);
pmc_pcpu[cpu]->pc_sb = sb;
}
@ -4327,6 +4547,7 @@ pmc_cleanup(void)
KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
cpu));
FREE(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC);
FREE(pmc_pcpu[cpu]->pc_sb, M_PMC);
pmc_pcpu[cpu]->pc_sb = NULL;
}

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@ -1478,7 +1483,7 @@ p4_stop_pmc(int cpu, int ri)
*
* On HTT machines, this PMC may be in use by two threads
* running on two logical CPUS. Thus we look at the
* 'pm_runcount' field and only turn off the appropriate TO/T1
* 'runcount' field and only turn off the appropriate TO/T1
* bits (and keep the PMC running) if two logical CPUs were
* using the PMC.
*
@ -1562,16 +1567,17 @@ p4_stop_pmc(int cpu, int ri)
*/
static int
p4_intr(int cpu, uintptr_t eip, int usermode)
p4_intr(int cpu, struct trapframe *tf)
{
int i, did_interrupt, error, ri;
uint32_t cccrval, ovf_mask, ovf_partner;
struct p4_cpu *pc;
int i, did_interrupt, error, ri;
struct pmc_hw *phw;
struct p4_cpu *pc;
struct pmc *pm;
pmc_value_t v;
PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
TRAPF_USERMODE(tf));
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
@ -1579,8 +1585,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
ovf_mask |= P4_CCCR_OVF;
if (p4_system_has_htt)
ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T0 :
P4_CCCR_OVF_PMI_T1;
ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
else
ovf_partner = 0;
did_interrupt = 0;
@ -1617,7 +1623,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
(void) pmc_process_interrupt(cpu, pm, eip, usermode);
(void) pmc_process_interrupt(cpu, pm, tf,
TRAPF_USERMODE(tf));
continue;
}
@ -1667,7 +1674,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
* Process the interrupt. Re-enable the PMC if
* processing was successful.
*/
error = pmc_process_interrupt(cpu, pm, eip, usermode);
error = pmc_process_interrupt(cpu, pm, tf,
TRAPF_USERMODE(tf));
/*
* Only the first processor executing the NMI handler
@ -1698,7 +1706,7 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
return did_interrupt;
return (did_interrupt);
}
/*

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pmc_mdep.h>
@ -710,7 +715,7 @@ p6_stop_pmc(int cpu, int ri)
}
static int
p6_intr(int cpu, uintptr_t eip, int usermode)
p6_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t perf0cfg;
@ -745,7 +750,8 @@ p6_intr(int cpu, uintptr_t eip, int usermode)
retval = 1;
error = pmc_process_interrupt(cpu, pm, eip, usermode);
error = pmc_process_interrupt(cpu, pm, tf,
TRAPF_USERMODE(tf));
if (error)
P6_MARK_STOPPED(pc,ri);

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2005, Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -30,12 +34,18 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/pmc.h>
#include <sys/proc.h>
#include <sys/systm.h>
#include <machine/cpu.h>
#include <machine/apicreg.h>
#include <machine/pmc_mdep.h>
#include <machine/md_var.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
#include <vm/pmap.h>
extern volatile lapic_t *lapic;
void
@ -48,6 +58,187 @@ pmc_x86_lapic_enable_pmc_interrupt(void)
lapic->lvt_pcint = value;
}
/*
* Attempt to walk a user call stack using a too-simple algorithm.
* In the general case we need unwind information associated with
* the executable to be able to walk the user stack.
*
* We are handed a trap frame laid down at the time the PMC interrupt
* was taken. If the application is using frame pointers, the saved
* PC value could be:
* a. at the beginning of a function before the stack frame is laid
* down,
* b. just before a 'ret', after the stack frame has been taken off,
* c. somewhere else in the function with a valid stack frame being
* present,
*
* If the application is not using frame pointers, this algorithm will
* fail to yield an interesting call chain.
*
* TODO: figure out a way to use unwind information.
*/
int
pmc_save_user_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
{
int n;
uint32_t instr;
uintptr_t fp, oldfp, pc, r, sp;
KASSERT(TRAPF_USERMODE(tf), ("[x86,%d] Not a user trap frame tf=%p",
__LINE__, (void *) tf));
pc = PMC_TRAPFRAME_TO_PC(tf);
oldfp = fp = PMC_TRAPFRAME_TO_FP(tf);
sp = PMC_TRAPFRAME_TO_SP(tf);
*cc++ = pc; n = 1;
r = fp + sizeof(uintptr_t); /* points to return address */
if (!PMC_IN_USERSPACE(pc))
return (n);
if (copyin((void *) pc, &instr, sizeof(instr)) != 0)
return (n);
if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
PMC_AT_FUNCTION_EPILOGUE_RET(instr)) { /* ret */
if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
return (n);
} else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
sp += sizeof(uintptr_t);
if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
return (n);
} else if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
copyin((void *) fp, &fp, sizeof(fp) != 0))
return (n);
for (; n < nframes;) {
if (pc == 0 || !PMC_IN_USERSPACE(pc))
break;
*cc++ = pc; n++;
if (fp < oldfp)
break;
r = fp + sizeof(uintptr_t); /* address of return address */
oldfp = fp;
if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
copyin((void *) fp, &fp, sizeof(fp)) != 0)
break;
}
return (n);
}
/*
* Walking the kernel call stack.
*
* We are handed the trap frame laid down at the time the PMC
* interrupt was taken. The saved PC could be:
* a. in the lowlevel trap handler, meaning that there isn't a C stack
* to traverse,
* b. at the beginning of a function before the stack frame is laid
* down,
* c. just before a 'ret', after the stack frame has been taken off,
* d. somewhere else in a function with a valid stack frame being
* present.
*
* In case (d), the previous frame pointer is at [%ebp]/[%rbp] and
* the return address is at [%ebp+4]/[%rbp+8].
*
* For cases (b) and (c), the return address is at [%esp]/[%rsp] and
* the frame pointer doesn't need to be changed when going up one
* level in the stack.
*
* For case (a), we check if the PC lies in low-level trap handling
* code, and if so we terminate our trace.
*/
int
pmc_save_kernel_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
{
int n;
uint32_t instr;
uintptr_t fp, pc, r, sp, stackstart, stackend;
struct thread *td;
KASSERT(TRAPF_USERMODE(tf) == 0,("[x86,%d] not a kernel backtrace",
__LINE__));
pc = PMC_TRAPFRAME_TO_PC(tf);
fp = PMC_TRAPFRAME_TO_FP(tf);
sp = PMC_TRAPFRAME_TO_SP(tf);
*cc++ = pc;
r = fp + sizeof(uintptr_t); /* points to return address */
if ((td = curthread) == NULL)
return (1);
if (nframes <= 1)
return (1);
stackstart = (uintptr_t) td->td_kstack;
stackend = (uintptr_t) td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
if (PMC_IN_TRAP_HANDLER(pc) ||
!PMC_IN_KERNEL(pc) || !PMC_IN_KERNEL(r) ||
!PMC_IN_KERNEL_STACK(sp, stackstart, stackend) ||
!PMC_IN_KERNEL_STACK(fp, stackstart, stackend))
return (1);
instr = *(uint32_t *) pc;
/*
* Determine whether the interrupted function was in the
* processing of either laying down its stack frame or taking
* it off.
*
* If we haven't started laying down a stack frame, or are
* just about to return, then our caller's address is at
* *sp, and we don't have a frame to unwind.
*/
if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
PMC_AT_FUNCTION_EPILOGUE_RET(instr))
pc = *(uintptr_t *) sp;
else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
/*
* The code was midway through laying down a frame.
* At this point sp[0] has a frame back pointer,
* and the caller's address is therefore at sp[1].
*/
sp += sizeof(uintptr_t);
if (!PMC_IN_KERNEL_STACK(sp, stackstart, stackend))
return (1);
pc = *(uintptr_t *) sp;
} else {
/*
* Not in the function prologue or epilogue.
*/
pc = *(uintptr_t *) r;
fp = *(uintptr_t *) fp;
}
for (n = 1; n < nframes; n++) {
*cc++ = pc;
if (PMC_IN_TRAP_HANDLER(pc))
break;
r = fp + sizeof(uintptr_t);
if (!PMC_IN_KERNEL_STACK(fp, stackstart, stackend) ||
!PMC_IN_KERNEL(r))
break;
pc = *(uintptr_t *) r;
fp = *(uintptr_t *) fp;
}
return (n);
}
static struct pmc_mdep *
pmc_intel_initialize(void)

View File

@ -1,8 +1,12 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -31,6 +35,7 @@
*/
#include "opt_apic.h"
#include "opt_hwpmc_hooks.h"
#include "opt_npx.h"
#include <machine/asmacros.h>
@ -42,7 +47,9 @@
#define SEL_RPL_MASK 0x0003
.text
#ifdef HWPMC_HOOKS
ENTRY(start_exceptions)
#endif
/*****************************************************************************/
/* Trap handling */
/*****************************************************************************/
@ -261,8 +268,18 @@ doreti:
FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
doreti_next:
/*
* Check if ASTs can be handled now. PSL_VM must be checked first
* since segment registers only have an RPL in non-VM86 mode.
* Check if ASTs can be handled now. ASTs cannot be safely
* processed when returning from an NMI.
*/
cmpb $T_NMI,TF_TRAPNO(%esp)
#ifdef HWPMC_HOOKS
je doreti_nmi
#else
je doreti_exit
#endif
/*
* PSL_VM must be checked first since segment registers only
* have an RPL in non-VM86 mode.
*/
testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
jz doreti_notvm86
@ -340,3 +357,32 @@ doreti_popl_fs_fault:
movl $0,TF_ERR(%esp) /* XXX should be the error code */
movl $T_PROTFLT,TF_TRAPNO(%esp)
jmp alltraps_with_regs_pushed
#ifdef HWPMC_HOOKS
doreti_nmi:
/*
* Since we are returning from an NMI, check if the current trap
* was from user mode and if so whether the current thread
* needs a user call chain capture.
*/
testb $SEL_RPL_MASK,TF_CS(%esp)
jz doreti_exit
movl PCPU(CURTHREAD),%eax /* curthread present? */
orl %eax,%eax
jz doreti_exit
testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
jz doreti_exit
/*
* Take the processor out of NMI mode by executing a fake "iret".
*/
pushfl
pushl %cs
pushl $outofnmi
iret
outofnmi:
/*
* Clear interrupts and jump to AST handling code.
*/
sti
jmp doreti_ast
ENTRY(end_exceptions)
#endif

View File

@ -214,8 +214,7 @@ trap(struct trapframe *frame)
* return immediately.
*/
if (type == T_NMI && pmc_intr &&
(*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame->tf_eip,
TRAPF_USERMODE(frame)))
(*pmc_intr)(PCPU_GET(cpuid), frame))
goto out;
#endif

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -73,10 +77,33 @@ union pmc_md_pmc {
struct pmc;
#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_eip)
#define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_ebp)
#define PMC_TRAPFRAME_TO_SP(TF) ((TF)->tf_esp)
#define PMC_IN_KERNEL_STACK(S,START,END) \
((S) >= (START) && (S) < (END))
#define PMC_IN_KERNEL(va) (((va) >= USRSTACK) && \
((va) < VM_MAX_KERNEL_ADDRESS))
#define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS)
#define PMC_IN_TRAP_HANDLER(PC) \
((PC) >= (uintptr_t) start_exceptions && \
(PC) < (uintptr_t) end_exceptions)
#define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \
(((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */
#define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \
(((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */
#define PMC_AT_FUNCTION_EPILOGUE_RET(I) \
(((I) & 0xFF) == 0xC3) /* ret */
/*
* Prototypes
*/
void start_exceptions(void), end_exceptions(void);
void pmc_x86_lapic_enable_pmc_interrupt(void);
#endif /* _KERNEL */

View File

@ -1,5 +1,10 @@
/*-
* Copyright (c) 2003-2005, Joseph Koshy
* Copyright (c) 2003-2007 Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -45,7 +50,7 @@ const int pmc_kernel_version = PMC_KERNEL_VERSION;
int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
/* Interrupt handler */
int (*pmc_intr)(int cpu, uintptr_t pc, int usermode) = NULL;
int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
/* Bitmask of CPUs requiring servicing at hardclock time */
volatile cpumask_t pmc_cpumask;
@ -66,7 +71,14 @@ volatile int pmc_ss_count;
* somewhat more expensive than a simple 'if' check and indirect call.
*/
struct sx pmc_sx;
SX_SYSINIT(pmc, &pmc_sx, "pmc shared lock");
static void
pmc_init_sx(void)
{
sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
}
SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
/*
* Helper functions

View File

@ -2,10 +2,14 @@
* Copyright (C) 1994, David Greenman
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
* Copyright (c) 2007 The FreeBSD Foundation
*
* This code is derived from software contributed to Berkeley by
* the University of Utah, and William Jolitz.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -40,6 +44,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_ktrace.h"
#include "opt_mac.h"
#ifdef __i386__
@ -52,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/pmckern.h>
#include <sys/proc.h>
#include <sys/ktr.h>
#include <sys/resourcevar.h>
@ -201,6 +207,13 @@ ast(struct trapframe *framep)
td->td_profil_ticks = 0;
td->td_pflags &= ~TDP_OWEUPC;
}
#if defined(HWPMC_HOOKS)
if (td->td_pflags & TDP_CALLCHAIN) {
PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN,
(void *) framep);
td->td_pflags &= ~TDP_CALLCHAIN;
}
#endif
if (flags & TDF_ALRMPEND) {
PROC_LOCK(p);
psignal(p, SIGVTALRM);

View File

@ -541,11 +541,14 @@ struct pmc_op_getmsr {
#include <sys/malloc.h>
#include <sys/sysctl.h>
#include <machine/frame.h>
#define PMC_HASH_SIZE 16
#define PMC_MTXPOOL_SIZE 32
#define PMC_LOG_BUFFER_SIZE 4
#define PMC_NLOGBUFFERS 16
#define PMC_NSAMPLES 32
#define PMC_CALLCHAIN_DEPTH 8
#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
@ -652,7 +655,7 @@ struct pmc {
pmc_value_t pm_initial; /* counting PMC modes */
} pm_sc;
uint32_t pm_stalled; /* true for stalled sampling PMCs */
uint32_t pm_stalled; /* marks stalled sampling PMCs */
uint32_t pm_caps; /* PMC capabilities */
enum pmc_event pm_event; /* event being measured */
uint32_t pm_flags; /* additional flags PMC_F_... */
@ -680,6 +683,7 @@ struct pmc {
#define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id)
#define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id)
/*
* struct pmc_process
*
@ -743,6 +747,7 @@ struct pmc_owner {
#define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */
#define PMC_PO_IN_FLUSH 0x00000010 /* in the middle of a flush */
#define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020
/*
* struct pmc_hw -- describe the state of the PMC hardware
@ -793,15 +798,21 @@ struct pmc_hw {
*/
struct pmc_sample {
uintfptr_t ps_pc; /* PC value at interrupt */
struct pmc *ps_pmc; /* interrupting PMC */
int ps_usermode; /* true for user mode PCs */
uint16_t ps_nsamples; /* callchain depth */
uint8_t ps_cpu; /* cpu number */
uint8_t ps_flags; /* other flags */
pid_t ps_pid; /* process PID or -1 */
struct pmc *ps_pmc; /* interrupting PMC */
uintptr_t *ps_pc; /* (const) callchain start */
};
#define PMC_SAMPLE_FREE ((uint16_t) 0)
#define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF)
struct pmc_samplebuffer {
struct pmc_sample * volatile ps_read; /* read pointer */
struct pmc_sample * volatile ps_write; /* write pointer */
uintptr_t *ps_callchains; /* all saved call chains */
struct pmc_sample *ps_fence; /* one beyond ps_samples[] */
struct pmc_sample ps_samples[]; /* array of sample entries */
};
@ -881,7 +892,7 @@ struct pmc_mdep {
int (*pmd_stop_pmc)(int _cpu, int _ri);
/* handle a PMC interrupt */
int (*pmd_intr)(int _cpu, uintptr_t _pc, int _usermode);
int (*pmd_intr)(int _cpu, struct trapframe *_tf);
int (*pmd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
struct pmc **_ppmc);
@ -1002,8 +1013,11 @@ MALLOC_DECLARE(M_PMC);
struct pmc_mdep *pmc_md_initialize(void); /* MD init function */
int pmc_getrowdisp(int _ri);
int pmc_process_interrupt(int _cpu, struct pmc *_pm, uintfptr_t _pc,
int _usermode);
int pmc_process_interrupt(int _cpu, struct pmc *_pm,
struct trapframe *_tf, int _inuserspace);
int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
struct trapframe *_tf);
int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
struct trapframe *_tf);
#endif /* _KERNEL */
#endif /* _SYS_PMC_H_ */

View File

@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2006, Joseph Koshy
* Copyright (c) 2003-2007, Joseph Koshy
* Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
* Portions of this software were developed by A. Joseph Koshy under
* sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@ -47,6 +51,7 @@
#define PMC_FN_KLD_UNLOAD 6
#define PMC_FN_MMAP 7
#define PMC_FN_MUNMAP 8
#define PMC_FN_USER_CALLCHAIN 9
struct pmckern_procexec {
int pm_credentialschanged;
@ -65,7 +70,7 @@ struct pmckern_map_out {
/* hook */
extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
extern int (*pmc_intr)(int _cpu, uintptr_t _pc, int _usermode);
extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
/* SX lock protecting the hook */
extern struct sx pmc_sx;

View File

@ -243,13 +243,13 @@ int pmclog_configure_log(struct pmc_owner *_po, int _logfd);
int pmclog_deconfigure_log(struct pmc_owner *_po);
int pmclog_flush(struct pmc_owner *_po);
void pmclog_initialize(void);
void pmclog_process_callchain(struct pmc *_pm, struct pmc_sample *_ps);
void pmclog_process_closelog(struct pmc_owner *po);
void pmclog_process_dropnotify(struct pmc_owner *po);
void pmclog_process_map_in(struct pmc_owner *po, pid_t pid,
uintfptr_t start, const char *path);
void pmclog_process_map_out(struct pmc_owner *po, pid_t pid,
uintfptr_t start, uintfptr_t end);
void pmclog_process_pcsample(struct pmc *_pm, struct pmc_sample *_ps);
void pmclog_process_pmcallocate(struct pmc *_pm);
void pmclog_process_pmcattach(struct pmc *_pm, pid_t _pid, char *_path);
void pmclog_process_pmcdetach(struct pmc *_pm, pid_t _pid);