diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c index a69e9d0df813..454ac87359a8 100644 --- a/sys/dev/hwpmc/hwpmc_logging.c +++ b/sys/dev/hwpmc/hwpmc_logging.c @@ -250,11 +250,9 @@ pmc_plb_rele(struct pmclog_buffer *plb) mtx_unlock_spin(&pmc_dom_hdrs[plb->plb_domain]->pdbh_mtx); } - /* * Get a log buffer */ - static int pmclog_get_buffer(struct pmc_owner *po) { @@ -345,7 +343,6 @@ pmclog_proc_ignite(void *handle, struct pmc_owner *po) * * This function is executed by each pmc owner's helper thread. */ - static void pmclog_loop(void *arg) { @@ -846,12 +843,15 @@ pmclog_schedule_one_cond(void *arg) { struct pmc_owner *po = arg; struct pmclog_buffer *plb; + int cpu; spinlock_enter(); + cpu = curcpu; /* tell hardclock not to run again */ - if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid))) + if (PMC_CPU_HAS_SAMPLES(cpu)) PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL); - plb = po->po_curbuf[curcpu]; + pmc_flush_samples(cpu); + plb = po->po_curbuf[cpu]; if (plb && plb->plb_ptr != plb->plb_base) pmclog_schedule_io(po, 1); spinlock_exit(); diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 690cd31708bc..b1729f8295d0 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -207,6 +207,8 @@ static int pmc_debugflags_parse(char *newstr, char *fence); #endif static int load(struct module *module, int cmd, void *arg); +static int pmc_add_sample(int cpu, int ring, struct pmc *pm, + struct trapframe *tf, int inuserspace); static void pmc_add_thread_descriptors_from_proc(struct proc *p, struct pmc_process *pp); static int pmc_attach_process(struct proc *p, struct pmc *pm); @@ -248,6 +250,7 @@ static void pmc_process_samples(int cpu, int soft); static void pmc_release_pmc_descriptor(struct pmc *pmc); static void pmc_process_thread_add(struct thread *td); static void pmc_process_thread_delete(struct thread *td); +static void pmc_process_thread_userret(struct thread *td); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_remove_process_descriptor(struct pmc_process *pp); static void pmc_restore_cpu_binding(struct pmc_binding *pb); @@ -265,7 +268,7 @@ static int generic_switch_in(struct pmc_cpu *pc, struct pmc_process *pp); static int generic_switch_out(struct pmc_cpu *pc, struct pmc_process *pp); static struct pmc_mdep *pmc_generic_cpu_initialize(void); static void pmc_generic_cpu_finalize(struct pmc_mdep *md); - +static void pmc_post_callchain_callback(void); /* * Kernel tunables and sysctl(8) interface. */ @@ -291,6 +294,10 @@ SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, buffer_requests_failed, CTLFLAG_ &pmc_stats.pm_buffer_requests_failed, "# of buffer requests which failed"); SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, log_sweeps, CTLFLAG_RW, &pmc_stats.pm_log_sweeps, "# of ?"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, merges, CTLFLAG_RW, + &pmc_stats.pm_merges, "# of times kernel stack was found for user trace"); +SYSCTL_COUNTER_U64(_kern_hwpmc_stats, OID_AUTO, overwrites, CTLFLAG_RW, + &pmc_stats.pm_overwrites, "# of times a sample was overwritten before being logged"); static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_RDTUN, @@ -1702,6 +1709,19 @@ pmc_process_thread_delete(struct thread *td) td, PMC_FLAG_REMOVE)); } +/* + * A userret() call for a thread. + */ +static void +pmc_process_thread_userret(struct thread *td) +{ + + thread_lock(td); + curthread->td_flags |= TDF_ASTPENDING; + thread_unlock(td); + pmc_post_callchain_callback(); +} + /* * A mapping change for a process. */ @@ -2030,6 +2050,7 @@ const char *pmc_hooknames[] = { "SOFTSAMPLING", "THR-CREATE", "THR-EXIT", + "THR-USERRET", }; #endif @@ -2194,6 +2215,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg) cpu = PCPU_GET(cpuid); pmc_process_samples(cpu, PMC_HR); pmc_process_samples(cpu, PMC_SR); + pmc_process_samples(cpu, PMC_UR); break; case PMC_FN_MMAP: @@ -2214,6 +2236,11 @@ pmc_hook_handler(struct thread *td, int function, void *arg) pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_HR, (struct trapframe *) arg); + + KASSERT(td->td_pinned == 1, + ("[pmc,%d] invalid td_pinned value", __LINE__)); + sched_unpin(); /* Can migrate safely now. */ + td->td_pflags &= ~TDP_CALLCHAIN; break; @@ -2223,8 +2250,18 @@ pmc_hook_handler(struct thread *td, int function, void *arg) */ KASSERT(td == curthread, ("[pmc,%d] td != curthread", __LINE__)); - pmc_capture_user_callchain(PCPU_GET(cpuid), PMC_SR, + + cpu = PCPU_GET(cpuid); + pmc_capture_user_callchain(cpu, PMC_SR, (struct trapframe *) arg); + pmc_capture_user_callchain(cpu, PMC_UR, + (struct trapframe *) arg); + + KASSERT(td->td_pinned == 1, + ("[pmc,%d] invalid td_pinned value", __LINE__)); + + sched_unpin(); /* Can migrate safely now. */ + td->td_pflags &= ~TDP_CALLCHAIN; break; @@ -2245,6 +2282,12 @@ pmc_hook_handler(struct thread *td, int function, void *arg) pmc_process_thread_delete(td); break; + case PMC_FN_THR_USERRET: + KASSERT(td == curthread, ("[pmc,%d] td != curthread", + __LINE__)); + pmc_process_thread_userret(td); + break; + default: #ifdef HWPMC_DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); @@ -2658,7 +2701,9 @@ pmc_wait_for_pmc_idle(struct pmc *pm) * Loop (with a forced context switch) till the PMC's runcount * comes down to zero. */ + pmclog_flush(pm->pm_owner); while (counter_u64_fetch(pm->pm_runcount) > 0) { + pmclog_flush(pm->pm_owner); #ifdef HWPMC_DEBUG maxloop--; KASSERT(maxloop > 0, @@ -3264,7 +3309,7 @@ pmc_class_to_classdep(enum pmc_class class) return (NULL); } -#ifdef HWPMC_DEBUG +#if defined(HWPMC_DEBUG) && defined(KTR) static const char *pmc_op_to_name[] = { #undef __PMC_OP #define __PMC_OP(N, D) #N , @@ -3806,7 +3851,22 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | - PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { + PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN | + PMC_F_USERCALLCHAIN)) != 0) { + error = EINVAL; + break; + } + + /* PMC_F_USERCALLCHAIN is only valid with PMC_F_CALLCHAIN */ + if ((pa.pm_flags & (PMC_F_CALLCHAIN | PMC_F_USERCALLCHAIN)) == + PMC_F_USERCALLCHAIN) { + error = EINVAL; + break; + } + + /* PMC_F_USERCALLCHAIN is only valid for sampling mode */ + if (pa.pm_flags & PMC_F_USERCALLCHAIN && + mode != PMC_MODE_TS && mode != PMC_MODE_SS) { error = EINVAL; break; } @@ -4533,8 +4593,6 @@ pmc_post_callchain_callback(void) } /* - * Interrupt processing. - * * Find a free slot in the per-cpu array of samples and capture the * current callchain there. If a sample was successfully added, a bit * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook @@ -4544,8 +4602,8 @@ pmc_post_callchain_callback(void) * use any of the locking primitives supplied by the OS. */ -int -pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, +static int +pmc_add_sample(int cpu, int ring, struct pmc *pm, struct trapframe *tf, int inuserspace) { int error, callchaindepth; @@ -4561,7 +4619,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, psb = pmc_pcpu[cpu]->pc_sb[ring]; ps = psb->ps_write; - if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ + if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { + counter_u64_add(ps->ps_pmc->pm_runcount, -1); + counter_u64_add(pmc_stats.pm_overwrites, 1); + ps->ps_nsamples = 0; + } else if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_pcpu_state[cpu].pps_stalled = 1; counter_u64_add(pmc_stats.pm_intr_bufferfull, 1); PMCDBG6(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", @@ -4573,7 +4635,6 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, goto done; } - /* Fill in entry. */ PMCDBG6(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, (void *) tf, inuserspace, @@ -4619,7 +4680,11 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, } ps->ps_nsamples = callchaindepth; /* mark entry as in use */ - + if (ring == PMC_UR) { + ps->ps_nsamples_actual = callchaindepth; /* mark entry as in use */ + ps->ps_nsamples = PMC_SAMPLE_INUSE; + } else + ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; if (ps == psb->ps_fence) @@ -4635,6 +4700,30 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, return (error); } +/* + * Interrupt processing. + * + * This function is meant to be called from an NMI handler. It cannot + * use any of the locking primitives supplied by the OS. + */ + +int +pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf, + int inuserspace) +{ + struct thread *td; + + td = curthread; + if ((pm->pm_flags & PMC_F_USERCALLCHAIN) && + td && td->td_proc && + (td->td_proc->p_flag & P_KPROC) == 0 && + !inuserspace) { + atomic_add_int(&curthread->td_pmcpend, 1); + return (pmc_add_sample(cpu, PMC_UR, pm, tf, 0)); + } + return (pmc_add_sample(cpu, ring, pm, tf, inuserspace)); +} + /* * Capture a user call chain. This function will be called from ast() * before control returns to userland and before the process gets @@ -4648,6 +4737,7 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) struct thread *td; struct pmc_sample *ps, *ps_end; struct pmc_samplebuffer *psb; + int nsamples, nrecords, pass; #ifdef INVARIANTS int ncallchains; int nfree; @@ -4664,6 +4754,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) ncallchains = 0; nfree = 0; #endif + nrecords = INT_MAX; + pass = 0; + restart: + if (ring == PMC_UR) + nrecords = atomic_readandclear_32(&td->td_pmcpend); /* * Iterate through all deferred callchain requests. @@ -4675,6 +4770,11 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) ps_end = psb->ps_write; do { #ifdef INVARIANTS + if (ps->ps_nsamples == PMC_SAMPLE_FREE) { + nfree++; + goto next; + } + if ((ps->ps_pmc == NULL) || (ps->ps_pmc->pm_state != PMC_STATE_RUNNING)) nfree++; @@ -4697,37 +4797,91 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf) KASSERT(counter_u64_fetch(pm->pm_runcount) > 0, ("[pmc,%d] runcount %ld", __LINE__, (unsigned long)counter_u64_fetch(pm->pm_runcount))); + if (ring == PMC_UR) { + nsamples = ps->ps_nsamples_actual; + counter_u64_add(pmc_stats.pm_merges, 1); + } else + nsamples = 0; + /* * Retrieve the callchain and mark the sample buffer * as 'processable' by the timer tick sweep code. */ - ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, - pmc_callchaindepth, tf); -#ifdef INVARIANTS +#ifdef INVARIANTS ncallchains++; #endif + if (__predict_true(nsamples < pmc_callchaindepth - 1)) + nsamples += pmc_save_user_callchain(ps->ps_pc + nsamples, + pmc_callchaindepth - nsamples - 1, tf); + wmb(); + ps->ps_nsamples = nsamples; + if (nrecords-- == 1) + break; next: /* increment the pointer, modulo sample ring size */ if (++ps == psb->ps_fence) ps = psb->ps_samples; } while (ps != ps_end); + if (__predict_false(ring == PMC_UR && td->td_pmcpend)) { + if (pass == 0) { + pass = 1; + goto restart; + } + /* only collect samples for this part once */ + td->td_pmcpend = 0; + } -#ifdef INVARIANTS - KASSERT(ncallchains > 0 || nfree > 0, - ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, - cpu)); +#ifdef INVARIANTS + if (ring == PMC_HR) + KASSERT(ncallchains > 0 || nfree > 0, + ("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__, + cpu)); #endif - KASSERT(td->td_pinned == 1, - ("[pmc,%d] invalid td_pinned value", __LINE__)); - sched_unpin(); /* Can migrate safely now. */ - /* mark CPU as needing processing */ DPCPU_SET(pmc_sampled, 1); } + +static void +pmc_flush_ring(int cpu, int ring) +{ + struct pmc *pm; + struct pmc_sample *ps; + struct pmc_samplebuffer *psb; + int n; + + psb = pmc_pcpu[cpu]->pc_sb[ring]; + + for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ + + ps = psb->ps_read; + if (ps->ps_nsamples == PMC_SAMPLE_FREE) + goto next; + pm = ps->ps_pmc; + counter_u64_add(pm->pm_runcount, -1); + ps->ps_nsamples = PMC_SAMPLE_FREE; + /* increment read pointer, modulo sample size */ + next: + if (++ps == psb->ps_fence) + psb->ps_read = psb->ps_samples; + else + psb->ps_read = ps; + } +} + +void +pmc_flush_samples(int cpu) +{ + int n; + + for (n = 0; n < PMC_NUM_SR; n++) + pmc_flush_ring(cpu, n); +} + + /* * Process saved PC samples. */ @@ -4788,22 +4942,19 @@ pmc_process_samples(int cpu, int ring) * its owner, and if the PC is in user mode, update * profiling statistics like timer-based profiling * would have done. + * + * Otherwise, this is either a sampling-mode PMC that + * is attached to a different process than its owner, + * or a system-wide sampling PMC. Dispatch a log + * entry to the PMC's owner process. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); addupc_intr(td, ps->ps_pc[0], 1); } - goto entrydone; - } - - /* - * Otherwise, this is either a sampling mode PMC that - * is attached to a different process than its owner, - * or a system-wide sampling PMC. Dispatch a log - * entry to the PMC's owner process. - */ - pmclog_process_callchain(pm, ps); + } else + pmclog_process_callchain(pm, ps); entrydone: ps->ps_nsamples = 0; /* mark entry as free */ @@ -5282,6 +5433,8 @@ pmc_initialize(void) pmc_stats.pm_buffer_requests = counter_u64_alloc(M_WAITOK); pmc_stats.pm_buffer_requests_failed = counter_u64_alloc(M_WAITOK); pmc_stats.pm_log_sweeps = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_merges = counter_u64_alloc(M_WAITOK); + pmc_stats.pm_overwrites = counter_u64_alloc(M_WAITOK); #ifdef HWPMC_DEBUG /* parse debug flags first */ @@ -5427,6 +5580,24 @@ pmc_initialize(void) (n * pmc_callchaindepth); pmc_pcpu[cpu]->pc_sb[PMC_SR] = sb; + + sb = malloc_domain(sizeof(struct pmc_samplebuffer) + + pmc_nsamples * sizeof(struct pmc_sample), M_PMC, domain, + M_WAITOK|M_ZERO); + sb->ps_read = sb->ps_write = sb->ps_samples; + sb->ps_fence = sb->ps_samples + pmc_nsamples; + + KASSERT(pmc_pcpu[cpu] != NULL, + ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); + + sb->ps_callchains = malloc_domain(pmc_callchaindepth * pmc_nsamples * + sizeof(uintptr_t), M_PMC, domain, M_WAITOK|M_ZERO); + + for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) + ps->ps_pc = sb->ps_callchains + + (n * pmc_callchaindepth); + + pmc_pcpu[cpu]->pc_sb[PMC_UR] = sb; } /* allocate space for the row disposition array */ @@ -5637,10 +5808,15 @@ pmc_cleanup(void) KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_SR] != NULL, ("[pmc,%d] Null sw cpu sample buffer cpu=%d", __LINE__, cpu)); + KASSERT(pmc_pcpu[cpu]->pc_sb[PMC_UR] != NULL, + ("[pmc,%d] Null userret cpu sample buffer cpu=%d", __LINE__, + cpu)); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_HR], M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR]->ps_callchains, M_PMC); free_domain(pmc_pcpu[cpu]->pc_sb[PMC_SR], M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR]->ps_callchains, M_PMC); + free_domain(pmc_pcpu[cpu]->pc_sb[PMC_UR], M_PMC); free_domain(pmc_pcpu[cpu], M_PMC); } @@ -5669,6 +5845,8 @@ pmc_cleanup(void) counter_u64_free(pmc_stats.pm_buffer_requests); counter_u64_free(pmc_stats.pm_buffer_requests_failed); counter_u64_free(pmc_stats.pm_log_sweeps); + counter_u64_free(pmc_stats.pm_merges); + counter_u64_free(pmc_stats.pm_overwrites); sx_xunlock(&pmc_sx); /* we are done */ } diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index 64c9c9251ea3..72a0b8f8d481 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -145,6 +145,11 @@ userret(struct thread *td, struct trapframe *frame) */ if (p->p_flag & P_PROFIL) addupc_task(td, TRAPF_PC(frame), td->td_pticks * psratio); + +#ifdef HWPMC_HOOKS + if (PMC_THREAD_HAS_SAMPLES(td)) + PMC_CALL_HOOK(td, PMC_FN_THR_USERRET, NULL); +#endif /* * Let the scheduler adjust our priority etc. */ diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h index 73afeb68875f..04f6f968b312 100644 --- a/sys/sys/pmc.h +++ b/sys/sys/pmc.h @@ -369,13 +369,14 @@ enum pmc_ops { #define PMC_F_KGMON 0x00000040 /*OP ALLOCATE kgmon(8) profiling */ /* V2 API */ #define PMC_F_CALLCHAIN 0x00000080 /*OP ALLOCATE capture callchains */ +#define PMC_F_USERCALLCHAIN 0x00000100 /*OP ALLOCATE use userspace stack */ /* internal flags */ #define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/ #define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */ #define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */ -#define PMC_CALLCHAIN_DEPTH_MAX 128 +#define PMC_CALLCHAIN_DEPTH_MAX 512 #define PMC_CC_F_USERSPACE 0x01 /*userspace callchain*/ @@ -568,6 +569,8 @@ struct pmc_driverstats { counter_u64_t pm_buffer_requests_failed; /* #failed buffer requests */ counter_u64_t pm_log_sweeps; /* #sample buffer processing passes */ + counter_u64_t pm_merges; /* merged k+u */ + counter_u64_t pm_overwrites; /* UR overwrites */ }; #endif @@ -643,11 +646,11 @@ struct pmc_op_getdyneventinfo { #define PMC_HASH_SIZE 1024 #define PMC_MTXPOOL_SIZE 2048 -#define PMC_LOG_BUFFER_SIZE 128 -#define PMC_NLOGBUFFERS_PCPU 8 -#define PMC_NSAMPLES 64 -#define PMC_CALLCHAIN_DEPTH 32 -#define PMC_THREADLIST_MAX 64 +#define PMC_LOG_BUFFER_SIZE 256 +#define PMC_NLOGBUFFERS_PCPU 32 +#define PMC_NSAMPLES 256 +#define PMC_CALLCHAIN_DEPTH 128 +#define PMC_THREADLIST_MAX 128 #define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "." @@ -923,9 +926,9 @@ struct pmc_hw { struct pmc_sample { uint16_t ps_nsamples; /* callchain depth */ + uint16_t ps_nsamples_actual; uint16_t ps_cpu; /* cpu number */ uint16_t ps_flags; /* other flags */ - uint8_t ps_pad[2]; lwpid_t ps_tid; /* thread id */ pid_t ps_pid; /* process PID or -1 */ struct thread *ps_td; /* which thread */ @@ -954,7 +957,7 @@ struct pmc_samplebuffer { struct pmc_cpu { uint32_t pc_state; /* physical cpu number + flags */ - struct pmc_samplebuffer *pc_sb[2]; /* space for samples */ + struct pmc_samplebuffer *pc_sb[3]; /* space for samples */ struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */ }; @@ -1203,7 +1206,7 @@ MALLOC_DECLARE(M_PMC); struct pmc_mdep *pmc_md_initialize(void); /* MD init function */ void pmc_md_finalize(struct pmc_mdep *_md); /* MD fini function */ int pmc_getrowdisp(int _ri); -int pmc_process_interrupt(int _cpu, int _soft, struct pmc *_pm, +int pmc_process_interrupt(int _cpu, int _ring, struct pmc *_pm, struct trapframe *_tf, int _inuserspace); int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); @@ -1211,5 +1214,6 @@ int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples, struct trapframe *_tf); struct pmc_mdep *pmc_mdep_alloc(int nclasses); void pmc_mdep_free(struct pmc_mdep *md); +void pmc_flush_samples(int cpu); #endif /* _KERNEL */ #endif /* _SYS_PMC_H_ */ diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h index 1400666ac4d9..d0a19161483a 100644 --- a/sys/sys/pmckern.h +++ b/sys/sys/pmckern.h @@ -62,9 +62,12 @@ #define PMC_FN_SOFT_SAMPLING 11 #define PMC_FN_THR_CREATE 12 #define PMC_FN_THR_EXIT 13 +#define PMC_FN_THR_USERRET 14 #define PMC_HR 0 /* Hardware ring buffer */ #define PMC_SR 1 /* Software ring buffer */ +#define PMC_UR 2 /* userret ring buffer */ +#define PMC_NUM_SR (PMC_UR+1) struct pmckern_procexec { int pm_credentialschanged; @@ -227,6 +230,9 @@ do { \ #define PMC_PROC_IS_USING_PMCS(p) \ (__predict_false(p->p_flag & P_HWPMC)) +#define PMC_THREAD_HAS_SAMPLES(td) \ + (__predict_false((td)->td_pmcpend)) + /* Check if a thread have pending user capture. */ #define PMC_IS_PENDING_CALLCHAIN(p) \ (__predict_false((p)->td_pflags & TDP_CALLCHAIN)) diff --git a/sys/sys/proc.h b/sys/sys/proc.h index b51862fba3db..0bc0e1a67b45 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -375,6 +375,7 @@ struct thread { void *td_lkpi_task; /* LinuxKPI task struct pointer */ TAILQ_ENTRY(thread) td_epochq; /* (t) Epoch queue. */ epoch_section_t td_epoch_section; /* (t) epoch section object */ + int td_pmcpend; }; struct thread0_storage { diff --git a/usr.sbin/pmcstat/pmcpl_callgraph.c b/usr.sbin/pmcstat/pmcpl_callgraph.c index 324bee85b9d0..8b5570336c3e 100644 --- a/usr.sbin/pmcstat/pmcpl_callgraph.c +++ b/usr.sbin/pmcstat/pmcpl_callgraph.c @@ -345,7 +345,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, pc = cc[0]; pmcid = pmcr->pr_pmcid; - parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode); + child = parent = pmcstat_cgnode_hash_lookup_pc(pp, pmcid, pc, usermode); if (parent == NULL) { pmcstat_stats.ps_callchain_dubious_frames++; pmcr->pr_dubious_frames++; @@ -384,7 +384,7 @@ pmcpl_cg_process(struct pmcstat_process *pp, struct pmcstat_pmcrecord *pmcr, } } if (ppm == NULL) - return; + continue; image = ppm->ppm_image; loadaddress = ppm->ppm_lowpc + image->pi_vaddr - diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8 index f6ff4fbb32cb..dfd323dcf585 100644 --- a/usr.sbin/pmcstat/pmcstat.8 +++ b/usr.sbin/pmcstat/pmcstat.8 @@ -47,7 +47,7 @@ .Op Fl R Ar logfilename .Op Fl S Ar event-spec .Op Fl T -.Op Fl U Ar event-spec +.Op Fl U .Op Fl W .Op Fl a Ar pathname .Op Fl c Ar cpu-spec @@ -226,8 +226,10 @@ can be used: 'c+a' switch to accumulative mode, 'c+d' switch to delta mode, 'm' merge PMCs, 'n' change view, 'p' show next PMC, ' ' pause, 'q' quit. calltree only: 'f' cost under threshold is seen as a dot. -.It Fl U Ar event-spec -Provide long description of event. +.It Fl U +Toggle capturing user-space call traces while in kernel mode. +The default is for sampling PMCs to capture user-space callchain information +while in user-space mode, and kernel callchain information while in kernel mode. .It Fl W Toggle logging the incremental counts seen by the threads of a tracked process each time they are scheduled on a CPU. diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c index 5ba9d75fc915..f73de2b37385 100644 --- a/usr.sbin/pmcstat/pmcstat.c +++ b/usr.sbin/pmcstat/pmcstat.c @@ -374,7 +374,7 @@ pmcstat_show_usage(void) "\t -R file\t read events from \"file\"\n" "\t -S spec\t allocate a system-wide sampling PMC\n" "\t -T\t\t start in top mode\n" - "\t -U spec \t provide long description of counters matching spec\n" + "\t -U \t\n merged user kernel stack capture\n" "\t -W\t\t (toggle) show counts per context switch\n" "\t -a file\t print sampled PCs and callgraph to \"file\"\n" "\t -c cpu-list\t set cpus for subsequent system-wide PMCs\n" @@ -432,7 +432,8 @@ main(int argc, char **argv) int option, npmc; int c, check_driver_stats; int do_callchain, do_descendants, do_logproccsw, do_logprocexit; - int do_print, do_read, do_listcounters, do_descr, do_long_descr; + int do_print, do_read, do_listcounters, do_descr; + int do_userspace; size_t len; int graphdepth; int pipefd[2], rfd; @@ -455,7 +456,7 @@ main(int argc, char **argv) do_callchain = 1; do_descr = 0; do_descendants = 0; - do_long_descr = 0; + do_userspace = 0; do_logproccsw = 0; do_logprocexit = 0; do_listcounters = 0; @@ -510,7 +511,7 @@ main(int argc, char **argv) CPU_COPY(&rootmask, &cpumask); while ((option = getopt(argc, argv, - "CD:EF:G:ILM:NO:P:R:S:TU:WZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) != -1) + "CD:EF:G:ILM:NO:P:R:S:TUWZa:c:def:gi:k:l:m:n:o:p:qr:s:t:u:vw:z:")) != -1) switch (option) { case 'a': /* Annotate + callgraph */ args.pa_flags |= FLAG_DO_ANNOTATE; @@ -677,8 +678,11 @@ main(int argc, char **argv) ev->ev_cpu = PMC_CPU_ANY; ev->ev_flags = 0; - if (do_callchain) + if (do_callchain) { ev->ev_flags |= PMC_F_CALLCHAIN; + if (do_userspace) + ev->ev_flags |= PMC_F_USERCALLCHAIN; + } if (do_descendants) ev->ev_flags |= PMC_F_DESCENDANTS; if (do_logprocexit) @@ -776,9 +780,9 @@ main(int argc, char **argv) do_descr = 1; event = optarg; break; - case 'U': - do_long_descr = 1; - event = optarg; + case 'U': /* toggle user-space callchain capture */ + do_userspace = !do_userspace; + args.pa_required |= FLAG_HAS_SAMPLING_PMCS; break; case 'v': /* verbose */ args.pa_verbosity++; @@ -816,17 +820,15 @@ main(int argc, char **argv) break; } - if ((do_listcounters | do_descr | do_long_descr) && + if ((do_listcounters | do_descr) && pmc_pmu_enabled() == 0) errx(EX_USAGE, "pmu features not supported on host or hwpmc not loaded"); if (do_listcounters) { pmc_pmu_print_counters(NULL); } else if (do_descr) { pmc_pmu_print_counter_desc(event); - } else if (do_long_descr) { - pmc_pmu_print_counter_desc_long(event); } - if (do_listcounters | do_descr | do_long_descr) + if (do_listcounters | do_descr) exit(0); args.pa_argc = (argc -= optind);