kinst: Add per-CPU interrupt trampolines

In the common case, kinst emulates a traced instruction by copying it to
a trampoline, where it is followed by a jump back to the original code,
and pointing the interrupted thread's %rip at the trampoline.  In
particular, the trampoline is executed with the same CPU context as the
original instruction, so if interrupts are enabled at the point where
the probe fires, they will be enabled when the trampoline is
subsequently executed.

It can happen that an interrupt is raised while a thread is executing a
kinst trampoline.  In that case, it is possible that the interrupt
handler will trigger a kinst probe, so we must ensure that the thread
does not recurse and overwrite its trampoline before it is finished
executing the original contents, otherwise an attempt to trace code
called from interrupt handlers can crash the kernel.

To that end, add a per-CPU trampoline, used when the probe fired with
interrupts disabled.  Note that this is not quite complete since it does
not handle the possibility of kinst probes firing while executing an NMI
handler.

Also ensure that we do not trace instructions which set IF, since in
that case it is not clear which trampoline (the per-thread trampoline or
the per-CPU trampoline) we should use, and since such instructions are
rare.

Reported and tested by:	Domagoj Stolfa
Reviewed by:	christos
Fixes:		f0bc4ed144fc ("kinst: Initial revision")
Differential Revision:	https://reviews.freebsd.org/D37619
This commit is contained in:
Mark Johnston 2022-12-08 15:03:51 -05:00
parent 778b74378f
commit 84d7fe4a6f
3 changed files with 78 additions and 5 deletions

View File

@ -6,6 +6,7 @@
*/
#include <sys/param.h>
#include <sys/pcpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
@ -39,6 +40,14 @@
#define KINST_F_JMP 0x0008 /* instruction is a %rip-relative jmp */
#define KINST_F_MOD_DIRECT 0x0010 /* operand is not a memory address */
/*
* Per-CPU trampolines used when the interrupted thread is executing with
* interrupts disabled. If an interrupt is raised while executing a trampoline,
* the interrupt thread cannot safely overwrite its trampoline if it hits a
* kinst probe while executing the interrupt handler.
*/
DPCPU_DEFINE_STATIC(uint8_t *, intr_tramp);
/*
* Map ModR/M register bits to a trapframe offset.
*/
@ -185,7 +194,10 @@ kinst_invop(uintptr_t addr, struct trapframe *frame, uintptr_t scratch)
}
return (DTRACE_INVOP_CALL);
} else {
tramp = curthread->t_kinst;
if ((frame->tf_rflags & PSL_I) == 0)
tramp = DPCPU_GET(intr_tramp);
else
tramp = curthread->t_kinst;
if (tramp == NULL) {
/*
* A trampoline allocation failed, so this probe is
@ -495,7 +507,7 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
struct kinst_probe *kp;
dtrace_kinst_probedesc_t *pd;
const char *func;
int error, n, off;
int error, instrsize, n, off;
uint8_t *instr, *limit;
pd = opaque;
@ -510,17 +522,37 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
/*
* Ignore functions not beginning with the usual function prologue.
* These might correspond to assembly routines with which we should not
* meddle.
* These might correspond to exception handlers with which we should not
* meddle. This does however exclude functions which can be safely
* traced, such as cpu_switch().
*/
if (*instr != KINST_PUSHL_RBP)
return (0);
n = 0;
while (instr < limit) {
instrsize = dtrace_instr_size(instr);
off = (int)(instr - (uint8_t *)symval->value);
if (pd->kpd_off != -1 && off != pd->kpd_off) {
instr += dtrace_instr_size(instr);
instr += instrsize;
continue;
}
/*
* Check for instructions which may enable interrupts. Such
* instructions are tricky to trace since it is unclear whether
* to use the per-thread or per-CPU trampolines. Since they are
* rare, we don't bother to implement special handling for them.
*
* If the caller specified an offset, return an error, otherwise
* silently ignore the instruction so that it remains possible
* to enable all instructions in a function.
*/
if (instrsize == 1 &&
(instr[0] == KINST_POPF || instr[0] == KINST_STI)) {
if (pd->kpd_off != -1)
return (EINVAL);
instr += instrsize;
continue;
}
@ -554,3 +586,34 @@ kinst_make_probe(linker_file_t lf, int symindx, linker_symval_t *symval,
return (0);
}
int
kinst_md_init(void)
{
uint8_t *tramp;
int cpu;
CPU_FOREACH(cpu) {
tramp = kinst_trampoline_alloc(M_WAITOK);
if (tramp == NULL)
return (ENOMEM);
DPCPU_ID_SET(cpu, intr_tramp, tramp);
}
return (0);
}
void
kinst_md_deinit(void)
{
uint8_t *tramp;
int cpu;
CPU_FOREACH(cpu) {
tramp = DPCPU_ID_GET(cpu, intr_tramp);
if (tramp != NULL) {
kinst_trampoline_dealloc(DPCPU_ID_GET(cpu, intr_tramp));
DPCPU_ID_SET(cpu, intr_tramp, NULL);
}
}
}

View File

@ -180,10 +180,16 @@ kinst_load(void *dummy)
error = kinst_trampoline_init();
if (error != 0)
return (error);
error = kinst_md_init();
if (error != 0) {
kinst_trampoline_deinit();
return (error);
}
error = dtrace_register("kinst", &kinst_attr, DTRACE_PRIV_USER, NULL,
&kinst_pops, NULL, &kinst_id);
if (error != 0) {
kinst_md_deinit();
kinst_trampoline_deinit();
return (error);
}
@ -201,6 +207,7 @@ static int
kinst_unload(void *dummy)
{
free(kinst_probetab, M_KINST);
kinst_md_deinit();
kinst_trampoline_deinit();
dtrace_invop_remove(kinst_invop);
destroy_dev(kinst_cdev);

View File

@ -57,6 +57,9 @@ int kinst_trampoline_deinit(void);
uint8_t *kinst_trampoline_alloc(int);
void kinst_trampoline_dealloc(uint8_t *);
int kinst_md_init(void);
void kinst_md_deinit(void);
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_KINST);
#endif /* MALLOC_DECLARE */