Allow swi_sched() to be called from NMI context.

For purposes of handling hardware error reported via NMIs I need a way to
escape NMI context, being too restrictive to do something significant.

To do it this change introduces new swi_sched() flag SWI_FROMNMI, making
it careful about used KPIs.  On platforms allowing IPI sending from NMI
context (x86 for now) it immediately wakes clk_intr_event via new IPI_SWI,
otherwise it works just like SWI_DELAY.  To handle the delayed SWIs this
patch calls clk_intr_event on every hardclock() tick.

MFC after:	2 weeks
Sponsored by:	iXsystems, Inc.
Differential Revision:	https://reviews.freebsd.org/D25754
This commit is contained in:
Alexander Motin 2020-07-25 15:19:38 +00:00
parent 3024e8af1e
commit aba10e131f
13 changed files with 102 additions and 11 deletions

View File

@ -23,7 +23,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd April 19, 2012
.Dd July 25, 2020
.Dt SWI 9
.Os
.Sh NAME
@ -132,7 +132,7 @@ The
.Fa flags
argument specifies how and when the handler should be run and is a mask of one
or more of the following flags:
.Bl -tag -width SWI_DELAY
.Bl -tag -width SWI_FROMNMI
.It Dv SWI_DELAY
Specifies that the kernel should mark the specified handler as needing to run,
but the kernel should not schedule the software interrupt thread to run.
@ -146,6 +146,13 @@ functionality performed by
.Fn setdelayed
in earlier versions of
.Fx .
.It Dv SWI_FROMNMI
Specifies that
.Fn swi_sched
is called from NMI context and should be careful about used KPIs.
On platforms allowing IPI sending from NMI context it immediately wakes
.Va clk_intr_event
via the IPI, otherwise it works just like SWI_DELAY.
.El
.Pp
The

View File

@ -205,6 +205,16 @@ IDTVEC(spuriousint)
call as_lapic_eoi
jmp doreti
/*
* Executed by a CPU when it receives an IPI_SWI.
*/
INTR_HANDLER ipi_swi
call as_lapic_eoi
FAKE_MCOUNT(TF_RIP(%rsp))
call ipi_swi_handler
MEXITCOUNT
jmp doreti
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
*

View File

@ -223,6 +223,10 @@ cpu_mp_start(void)
setidt(IPI_SUSPEND, pti ? IDTVEC(cpususpend_pti) : IDTVEC(cpususpend),
SDT_SYSIGT, SEL_KPL, 0);
/* Install an IPI for calling delayed SWI */
setidt(IPI_SWI, pti ? IDTVEC(ipi_swi_pti) : IDTVEC(ipi_swi),
SDT_SYSIGT, SEL_KPL, 0);
/* Set boot_cpu_id if needed. */
if (boot_cpu_id == -1) {
boot_cpu_id = PCPU_GET(apic_id);

View File

@ -32,6 +32,7 @@ inthand_t
IDTVEC(invlop_pti),
IDTVEC(invlop),
IDTVEC(ipi_intr_bitmap_handler_pti),
IDTVEC(ipi_swi_pti),
IDTVEC(cpustop_pti),
IDTVEC(cpususpend_pti),
IDTVEC(rendezvous_pti);

View File

@ -308,6 +308,23 @@ IDTVEC(cpususpend)
call *%eax
jmp doreti
/*
* Executed by a CPU when it receives an IPI_SWI.
*/
.text
SUPERALIGN_TEXT
IDTVEC(ipi_swi)
PUSH_FRAME
SET_KERNEL_SREGS
cld
KENTER
call as_lapic_eoi
FAKE_MCOUNT(TF_EIP(%esp))
movl $ipi_swi_handler, %eax
call *%eax
MEXITCOUNT
jmp doreti
/*
* Executed by a CPU when it receives a RENDEZVOUS IPI from another CPU.
*

View File

@ -188,6 +188,10 @@ cpu_mp_start(void)
setidt(IPI_SUSPEND, IDTVEC(cpususpend),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Install an IPI for calling delayed SWI */
setidt(IPI_SWI, IDTVEC(ipi_swi),
SDT_SYS386IGT, SEL_KPL, GSEL(GCODE_SEL, SEL_KPL));
/* Set boot_cpu_id if needed. */
if (boot_cpu_id == -1) {
boot_cpu_id = PCPU_GET(apic_id);

View File

@ -508,6 +508,7 @@ hardclock(int cnt, int usermode)
if (i > 0 && i <= newticks)
watchdog_fire();
}
intr_event_handle(clk_intr_event, NULL);
}
if (curcpu == CPU_FIRST())
cpu_tick_calibration();

View File

@ -60,6 +60,7 @@ __FBSDID("$FreeBSD$");
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/md_var.h>
#include <machine/smp.h>
#include <machine/stdarg.h>
#ifdef DDB
#include <ddb/ddb.h>
@ -85,6 +86,7 @@ struct intr_entropy {
uintptr_t event;
};
struct intr_event *clk_intr_event;
struct intr_event *tty_intr_event;
void *vm_ih;
struct proc *intrproc;
@ -1018,7 +1020,7 @@ swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
void *arg, int pri, enum intr_type flags, void **cookiep)
{
struct intr_event *ie;
int error;
int error = 0;
if (flags & INTR_ENTROPY)
return (EINVAL);
@ -1036,8 +1038,10 @@ swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
if (eventp != NULL)
*eventp = ie;
}
error = intr_event_add_handler(ie, name, NULL, handler, arg,
PI_SWI(pri), flags, cookiep);
if (handler != NULL) {
error = intr_event_add_handler(ie, name, NULL, handler, arg,
PI_SWI(pri), flags, cookiep);
}
return (error);
}
@ -1055,9 +1059,11 @@ swi_sched(void *cookie, int flags)
CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name,
ih->ih_need);
entropy.event = (uintptr_t)ih;
entropy.td = curthread;
random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI);
if ((flags & SWI_FROMNMI) == 0) {
entropy.event = (uintptr_t)ih;
entropy.td = curthread;
random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI);
}
/*
* Set ih_need for this handler so that if the ithread is already
@ -1066,7 +1072,16 @@ swi_sched(void *cookie, int flags)
*/
ih->ih_need = 1;
if (!(flags & SWI_DELAY)) {
if (flags & SWI_DELAY)
return;
if (flags & SWI_FROMNMI) {
#if defined(SMP) && (defined(__i386__) || defined(__amd64__))
KASSERT(ie == clk_intr_event,
("SWI_FROMNMI used not with clk_intr_event"));
ipi_self_from_nmi(IPI_SWI);
#endif
} else {
VM_CNT_INC(v_soft);
error = intr_event_schedule_thread(ie);
KASSERT(error == 0, ("stray software interrupt"));
@ -1346,6 +1361,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if ((ih->ih_flags & IH_SUSP) != 0)
continue;
if ((ie->ie_flags & IE_SOFT) != 0 && ih->ih_need == 0)
continue;
if (ih->ih_filter == NULL) {
thread = true;
continue;
@ -1570,6 +1587,9 @@ static void
start_softintr(void *dummy)
{
if (swi_add(&clk_intr_event, "clk", NULL, NULL, SWI_CLOCK,
INTR_MPSAFE, NULL))
panic("died while creating clk swi ithread");
if (swi_add(NULL, "vm", swi_vm, NULL, SWI_VM, INTR_MPSAFE, &vm_ih))
panic("died while creating vm swi ithread");
}

View File

@ -133,7 +133,8 @@ struct intr_event {
#define IE_SOFT 0x000001 /* Software interrupt. */
#define IE_ADDING_THREAD 0x000004 /* Currently building an ithread. */
/* Flags to pass to sched_swi. */
/* Flags to pass to swi_sched. */
#define SWI_FROMNMI 0x1
#define SWI_DELAY 0x2
/*
@ -151,6 +152,7 @@ struct intr_event {
struct proc;
extern struct intr_event *clk_intr_event;
extern struct intr_event *tty_intr_event;
extern void *vm_ih;

View File

@ -130,7 +130,8 @@
#define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */
#define IPI_SUSPEND (APIC_IPI_INTS + 7) /* Suspend CPU until restarted. */
#define IPI_DYN_FIRST (APIC_IPI_INTS + 8)
#define IPI_SWI (APIC_IPI_INTS + 8) /* Run clk_intr_event. */
#define IPI_DYN_FIRST (APIC_IPI_INTS + 9)
#define IPI_DYN_LAST (254) /* IPIs allocated at runtime */
/*

View File

@ -76,6 +76,7 @@ extern u_long *ipi_rendezvous_counts[MAXCPU];
/* IPI handlers */
inthand_t
IDTVEC(ipi_intr_bitmap_handler), /* Bitmap based IPIs */
IDTVEC(ipi_swi), /* Runs delayed SWI */
IDTVEC(cpustop), /* CPU stops & waits to be restarted */
IDTVEC(cpususpend), /* CPU suspends & waits to be resumed */
IDTVEC(rendezvous); /* handle CPU rendezvous */
@ -96,6 +97,7 @@ void ipi_all_but_self(u_int ipi);
void ipi_bitmap_handler(struct trapframe frame);
void ipi_cpu(int cpu, u_int ipi);
int ipi_nmi_handler(void);
void ipi_swi_handler(struct trapframe frame);
void ipi_selected(cpuset_t cpus, u_int ipi);
void ipi_self_from_nmi(u_int vector);
void set_interrupt_apic_ids(void);

View File

@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#ifdef GPROF
#include <sys/gmon.h>
#endif
#include <sys/interrupt.h>
#include <sys/kdb.h>
#include <sys/kernel.h>
#include <sys/ktr.h>
@ -1620,6 +1621,16 @@ cpususpend_handler(void)
CPU_CLR_ATOMIC(cpu, &toresume_cpus);
}
/*
* Handle an IPI_SWI by waking delayed SWI thread.
*/
void
ipi_swi_handler(struct trapframe frame)
{
intr_event_handle(clk_intr_event, &frame);
}
/*
* This is called once the rest of the system is up and running and we're
* ready to let the AP's out of the pen.

View File

@ -76,6 +76,7 @@ static driver_filter_t xen_invlcache;
static driver_filter_t xen_ipi_bitmap_handler;
static driver_filter_t xen_cpustop_handler;
static driver_filter_t xen_cpususpend_handler;
static driver_filter_t xen_ipi_swi_handler;
#endif
/*---------------------------------- Macros ----------------------------------*/
@ -103,6 +104,7 @@ static struct xen_ipi_handler xen_ipis[] =
[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" },
[IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" },
[IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" },
[IPI_TO_IDX(IPI_SWI)] = { xen_ipi_swi_handler, "sw" },
};
#endif
@ -522,6 +524,15 @@ xen_cpususpend_handler(void *arg)
return (FILTER_HANDLED);
}
static int
xen_ipi_swi_handler(void *arg)
{
struct trapframe *frame = arg;
ipi_swi_handler(*frame);
return (FILTER_HANDLED);
}
/*----------------------------- XEN PV IPI setup -----------------------------*/
/*
* Those functions are provided outside of the Xen PV APIC implementation