Reimplement stack capture of running threads on i386 and amd64.

After r355784 the td_oncpu field is no longer synchronized by the thread
lock, so the stack capture interrupt cannot be delievered precisely.
Fix this using a loop which drops the thread lock and restarts if the
wrong thread was sampled from the stack capture interrupt handler.

Change the implementation to use a regular interrupt instead of an NMI.
Now that we drop the thread lock, there is no advantage to the latter.

Simplify the KPIs.  Remove stack_save_td_running() and add a return
value to stack_save_td().  On platforms that do not support stack
capture of running threads, stack_save_td() returns EOPNOTSUPP.  If the
target thread is running in user mode, stack_save_td() returns EBUSY.

Reviewed by:	kib
Reported by:	mjg, pho
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D23355
This commit is contained in:
Mark Johnston 2020-01-31 15:43:33 +00:00
parent 80ba579b00
commit 1c29da0279
18 changed files with 150 additions and 186 deletions

View File

@ -27,7 +27,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd October 6, 2017
.Dd January 31, 2020
.Dt STACK 9
.Os
.Sh NAME
@ -65,10 +65,8 @@ In the kernel configuration file:
.Fn stack_sbuf_print_ddb "struct sbuf sb*" "const struct stack *st"
.Ft void
.Fn stack_save "struct stack *st"
.Ft void
.Fn stack_save_td "struct stack *st" "struct thread *td"
.Ft int
.Fn stack_save_td_running "struct stack *st" "struct thread *td"
.Fn stack_save_td "struct stack *st" "struct thread *td"
.Sh DESCRIPTION
The
.Nm
@ -93,18 +91,17 @@ argument is passed to
Memory associated with a trace is freed by calling
.Fn stack_destroy .
.Pp
A trace of the current kernel thread's call stack may be captured using
A trace of the current thread's kernel call stack may be captured using
.Fn stack_save .
.Fn stack_save_td
and
.Fn stack_save_td_running
can also be used to capture the stack of a caller-specified thread.
Callers of these functions must own the thread lock of the specified thread.
can be used to capture the kernel stack of a caller-specified thread.
Callers of these functions must own the thread lock of the specified thread,
and the thread's stack must not be swapped out.
.Fn stack_save_td
can capture the stack of a kernel thread that is not running or
swapped out at the time of the call.
.Fn stack_save_td_running
can capture the stack of a running kernel thread.
can capture the kernel stack of a running thread, though note that this is
not implemented on all platforms.
If the thread is running, the caller must also hold the process lock for the
target thread.
.Pp
.Fn stack_print
and
@ -157,11 +154,11 @@ Otherwise the
does not contain space to record additional frames, and a non-zero value is
returned.
.Pp
.Fn stack_save_td_running
.Fn stack_save_td
returns 0 when the stack capture was successful and a non-zero error number
otherwise.
In particular,
.Er EAGAIN
.Er EBUSY
is returned if the thread was running in user mode at the time that the
capture was attempted, and
.Er EOPNOTSUPP

View File

@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
#include "opt_kdb.h"
#include "opt_stack.h"
#include <sys/param.h>
#include <sys/bus.h>
@ -228,11 +227,6 @@ trap(struct trapframe *frame)
(*pmc_intr)(frame) != 0)
return;
#endif
#ifdef STACK
if (stack_nmi_handler(frame) != 0)
return;
#endif
}
if ((frame->tf_rflags & PSL_I) == 0) {

View File

@ -30,8 +30,11 @@ __FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
#include <machine/pcb.h>
#include <machine/stack.h>
@ -63,13 +66,17 @@ stack_save(struct stack *st)
stack_capture(st, &state);
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
struct unwind_state state;
KASSERT(!TD_IS_SWAPPED(td), ("stack_save_td: swapped"));
KASSERT(!TD_IS_RUNNING(td), ("stack_save_td: running"));
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
return (EOPNOTSUPP);
state.registers[FP] = td->td_pcb->pcb_regs.sf_r11;
state.registers[SP] = td->td_pcb->pcb_regs.sf_sp;
@ -77,15 +84,5 @@ stack_save_td(struct stack *st, struct thread *td)
state.registers[PC] = td->td_pcb->pcb_regs.sf_pc;
stack_capture(st, &state);
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
if (td == curthread) {
stack_save(st);
return (0);
}
return (EOPNOTSUPP);
return (0);
}

View File

@ -33,6 +33,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
@ -55,28 +57,24 @@ stack_capture(struct stack *st, struct unwind_state *frame)
}
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
struct unwind_state frame;
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
return (EOPNOTSUPP);
frame.sp = td->td_pcb->pcb_sp;
frame.fp = td->td_pcb->pcb_x[29];
frame.pc = td->td_pcb->pcb_x[30];
stack_capture(st, &frame);
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
return (EOPNOTSUPP);
return (0);
}
void

View File

@ -52,7 +52,6 @@ __FBSDID("$FreeBSD$");
#include "opt_hwpmc_hooks.h"
#include "opt_isa.h"
#include "opt_kdb.h"
#include "opt_stack.h"
#include "opt_trap.h"
#include <sys/param.h>
@ -248,11 +247,6 @@ trap(struct trapframe *frame)
(*pmc_intr)(frame) != 0)
return;
#endif
#ifdef STACK
if (stack_nmi_handler(frame) != 0)
return;
#endif
}
if (type == T_MCHK) {

View File

@ -2669,17 +2669,12 @@ sysctl_kern_proc_kstack(SYSCTL_HANDLER_ARGS)
sizeof(kkstp->kkst_trace), SBUF_FIXEDLEN);
thread_lock(td);
kkstp->kkst_tid = td->td_tid;
if (TD_IS_SWAPPED(td)) {
if (TD_IS_SWAPPED(td))
kkstp->kkst_state = KKST_STATE_SWAPPED;
} else if (TD_IS_RUNNING(td)) {
if (stack_save_td_running(st, td) == 0)
kkstp->kkst_state = KKST_STATE_STACKOK;
else
kkstp->kkst_state = KKST_STATE_RUNNING;
} else {
else if (stack_save_td(st, td) == 0)
kkstp->kkst_state = KKST_STATE_STACKOK;
stack_save_td(st, td);
}
else
kkstp->kkst_state = KKST_STATE_RUNNING;
thread_unlock(td);
PROC_UNLOCK(p);
stack_sbuf_print(&sb, st);

View File

@ -432,9 +432,8 @@ kdb_backtrace_thread(struct thread *td)
struct stack st;
printf("KDB: stack backtrace of thread %d:\n", td->td_tid);
stack_zero(&st);
stack_save_td(&st, td);
stack_print_ddb(&st);
if (stack_save_td(&st, td) == 0)
stack_print_ddb(&st);
}
#endif
}

View File

@ -1245,7 +1245,7 @@ sleepq_sbuf_print_stacks(struct sbuf *sb, const void *wchan, int queue,
goto loop_end;
/* Note the td_lock is equal to the sleepq_lock here. */
stack_save_td(st[stack_idx], td);
(void)stack_save_td(st[stack_idx], td);
sbuf_printf(td_infos[stack_idx], "%d: %s %p",
td->td_tid, td->td_name, td);

View File

@ -340,12 +340,8 @@ tty_info(struct tty *tp)
if (tty_info_kstacks) {
if (TD_IS_SWAPPED(td))
sterr = ENOENT;
else if (TD_IS_RUNNING(td))
sterr = stack_save_td_running(&stack, td);
else {
stack_save_td(&stack, td);
sterr = 0;
}
else
sterr = stack_save_td(&stack, td);
}
#endif
thread_unlock(td);

View File

@ -29,9 +29,10 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/systm.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
@ -127,26 +128,22 @@ stack_capture(struct stack *st, u_register_t pc, u_register_t sp)
return;
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
u_register_t pc, sp;
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
return (EOPNOTSUPP);
pc = td->td_pcb->pcb_regs.pc;
sp = td->td_pcb->pcb_regs.sp;
stack_capture(st, pc, sp);
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
return (EOPNOTSUPP);
return (0);
}
void

View File

@ -30,6 +30,8 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
#include <sys/systm.h>
@ -86,25 +88,21 @@ stack_capture(struct stack *st, vm_offset_t frame)
}
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
vm_offset_t frame;
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
return (EOPNOTSUPP);
frame = td->td_pcb->pcb_sp;
stack_capture(st, frame);
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
return (EOPNOTSUPP);
return (0);
}
void

View File

@ -37,6 +37,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
@ -60,28 +62,24 @@ stack_capture(struct stack *st, struct unwind_state *frame)
}
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
struct unwind_state frame;
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
return (EOPNOTSUPP);
frame.sp = td->td_pcb->pcb_sp;
frame.fp = td->td_pcb->pcb_s[0];
frame.pc = td->td_pcb->pcb_ra;
stack_capture(st, &frame);
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
return (EOPNOTSUPP);
return (0);
}
void

View File

@ -32,6 +32,8 @@ __FBSDID("$FreeBSD$");
#include "opt_kstack_pages.h"
#include <sys/param.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/stack.h>
#include <sys/systm.h>
@ -72,23 +74,19 @@ stack_capture(struct stack *st, struct frame *frame)
}
}
void
int
stack_save_td(struct stack *st, struct thread *td)
{
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
THREAD_LOCK_ASSERT(td, MA_OWNED);
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
return (EOPNOTSUPP);
stack_capture(st, (struct frame *)(td->td_pcb->pcb_sp + SPOFF));
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
return (EOPNOTSUPP);
return (0);
}
void

View File

@ -67,7 +67,6 @@ void stack_ktr(u_int, const char *, int, const struct stack *,
/* MD Routines. */
struct thread;
void stack_save(struct stack *);
void stack_save_td(struct stack *, struct thread *);
int stack_save_td_running(struct stack *, struct thread *);
int stack_save_td(struct stack *, struct thread *);
#endif

View File

@ -123,20 +123,20 @@
#define IPI_AST 0 /* Generate software trap. */
#define IPI_PREEMPT 1
#define IPI_HARDCLOCK 2
#define IPI_BITMAP_LAST IPI_HARDCLOCK
#define IPI_TRACE 3 /* Collect stack trace. */
#define IPI_BITMAP_LAST IPI_TRACE
#define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
#define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */
#define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */
#define IPI_DYN_FIRST (APIC_IPI_INTS + 8)
#define IPI_DYN_LAST (253) /* IPIs allocated at runtime */
#define IPI_DYN_LAST (254) /* IPIs allocated at runtime */
/*
* IPI_STOP_HARD does not need to occupy a slot in the IPI vector space since
* it is delivered using an NMI anyways.
*/
#define IPI_NMI_FIRST 254
#define IPI_TRACE 254 /* Interrupt for tracing. */
#define IPI_NMI_FIRST 255
#define IPI_STOP_HARD 255 /* Stop CPU with a NMI. */
/*

View File

@ -55,7 +55,7 @@ struct i386_frame {
#endif /* __amd64__ */
#ifdef _KERNEL
int stack_nmi_handler(struct trapframe *);
void stack_capture_intr(void);
#endif
#endif /* !_X86_STACK_H */

View File

@ -31,10 +31,12 @@ __FBSDID("$FreeBSD$");
#include "opt_apic.h"
#endif
#include "opt_cpu.h"
#include "opt_ddb.h"
#include "opt_kstack_pages.h"
#include "opt_pmap.h"
#include "opt_sched.h"
#include "opt_smp.h"
#include "opt_stack.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -75,6 +77,7 @@ __FBSDID("$FreeBSD$");
#include <machine/psl.h>
#include <machine/smp.h>
#include <machine/specialreg.h>
#include <machine/stack.h>
#include <x86/ucode.h>
static MALLOC_DEFINE(M_CPUS, "cpus", "CPU items");
@ -1284,6 +1287,10 @@ ipi_bitmap_handler(struct trapframe frame)
td->td_intr_nesting_level++;
oldframe = td->td_intr_frame;
td->td_intr_frame = &frame;
#if defined(STACK) || defined(DDB)
if (ipi_bitmap & (1 << IPI_TRACE))
stack_capture_intr();
#endif
if (ipi_bitmap & (1 << IPI_PREEMPT)) {
#ifdef COUNT_IPIS
(*ipi_preempt_counts[cpu])++;

View File

@ -63,14 +63,11 @@ typedef struct i386_frame *x86_frame_t;
typedef struct amd64_frame *x86_frame_t;
#endif
#ifdef STACK
static struct stack *nmi_stack;
static volatile struct thread *nmi_pending;
#ifdef SMP
static struct mtx nmi_lock;
MTX_SYSINIT(nmi_lock, &nmi_lock, "stack_nmi", MTX_SPIN);
#endif
static struct stack *stack_intr_stack;
static struct thread *stack_intr_td;
static struct mtx intr_lock;
MTX_SYSINIT(intr_lock, &intr_lock, "stack intr", MTX_DEF);
#endif
static void
@ -97,74 +94,74 @@ stack_capture(struct thread *td, struct stack *st, register_t fp)
}
}
int
stack_nmi_handler(struct trapframe *tf)
{
#ifdef STACK
/* Don't consume an NMI that wasn't meant for us. */
if (nmi_stack == NULL || curthread != nmi_pending)
return (0);
if (!TRAPF_USERMODE(tf) && (TF_FLAGS(tf) & PSL_I) != 0)
stack_capture(curthread, nmi_stack, TF_FP(tf));
else
/* We were running in usermode or had interrupts disabled. */
nmi_stack->depth = 0;
atomic_store_rel_ptr((long *)&nmi_pending, (long)NULL);
return (1);
#else
return (0);
#endif
}
#ifdef SMP
void
stack_capture_intr(void)
{
struct thread *td;
td = curthread;
stack_capture(td, stack_intr_stack, TF_FP(td->td_intr_frame));
atomic_store_rel_ptr((void *)&stack_intr_td, (uintptr_t)td);
}
#endif
int
stack_save_td(struct stack *st, struct thread *td)
{
int cpuid, error;
bool done;
if (TD_IS_SWAPPED(td))
panic("stack_save_td: swapped");
if (TD_IS_RUNNING(td))
panic("stack_save_td: running");
stack_capture(td, st, PCB_FP(td->td_pcb));
}
int
stack_save_td_running(struct stack *st, struct thread *td)
{
#ifdef STACK
THREAD_LOCK_ASSERT(td, MA_OWNED);
MPASS(TD_IS_RUNNING(td));
KASSERT(!TD_IS_SWAPPED(td),
("stack_save_td: thread %p is swapped", td));
if (TD_IS_RUNNING(td) && td != curthread)
PROC_LOCK_ASSERT(td->td_proc, MA_OWNED);
if (td == curthread) {
stack_save(st);
return (0);
}
for (done = false, error = 0; !done;) {
if (!TD_IS_RUNNING(td)) {
/*
* The thread will not start running so long as we hold
* its lock.
*/
stack_capture(td, st, PCB_FP(td->td_pcb));
error = 0;
break;
}
#ifdef SMP
mtx_lock_spin(&nmi_lock);
thread_unlock(td);
cpuid = atomic_load_int(&td->td_oncpu);
if (cpuid == NOCPU) {
cpu_spinwait();
} else {
mtx_lock(&intr_lock);
stack_intr_td = NULL;
stack_intr_stack = st;
ipi_cpu(cpuid, IPI_TRACE);
while (atomic_load_acq_ptr((void *)&stack_intr_td) ==
(uintptr_t)NULL)
cpu_spinwait();
if (stack_intr_td == td) {
done = true;
error = st->depth > 0 ? 0 : EBUSY;
}
stack_intr_td = NULL;
mtx_unlock(&intr_lock);
}
thread_lock(td);
#else
(void)cpuid;
KASSERT(0, ("%s: multiple running threads", __func__));
#endif
}
nmi_stack = st;
nmi_pending = td;
ipi_cpu(td->td_oncpu, IPI_TRACE);
while ((void *)atomic_load_acq_ptr((long *)&nmi_pending) != NULL)
cpu_spinwait();
nmi_stack = NULL;
mtx_unlock_spin(&nmi_lock);
if (st->depth == 0)
return (EAGAIN);
#else /* !SMP */
KASSERT(0, ("curthread isn't running"));
#endif /* SMP */
return (0);
#else /* !STACK */
return (EOPNOTSUPP);
#endif /* STACK */
return (error);
}
void