freebsd-dev/sys/x86/xen/xen_apic.c
Konstantin Belousov dc43978aa5 amd64: allow parallel shootdown IPIs
Stop using smp_ipi_mtx to protect global shootdown state, and
move/multiply the global state into pcpu.  Now each CPU can initiate
shootdown IPI independently from other CPUs.  Initiator enters
critical section, then fills its local PCPU shootdown info
(pc_smp_tlb_XXX), then clears scoreboard generation at location (cpu,
my_cpuid) for each target cpu.  After that IPI is sent to all targets
which scan for zeroed scoreboard generation words.  Upon finding such
word the shootdown data is read from corresponding cpu' pcpu, and
generation is set.  Meantime initiator loops waiting for all zeroed
generations in scoreboard to update.

Initiator does not disable interrupts, which should allow
non-invalidation IPIs from deadlocking, it only needs to disable
preemption to pin itself to the instance of the pcpu smp_tlb data.

The generation is set before the actual invalidation is performed in
handler. It is safe because target CPU cannot return to userspace
before handler finishes. In principle only NMI can preempt the
handler, but NMI would see the kernel handler frame and not touch
not-invalidated user page table.

Handlers loop until they do not see zeroed scoreboard generations.
This, together with hardware keeping one pending IPI in LAPIC IRR
should prevent lost shootdowns.

Notes.
1. The code does protect writes to LAPIC ICR with exclusion. I believe
   this is fine because we in fact do not send IPIs from interrupt
   handlers. More for !x2APIC mode where ICR access for write requires
   two registers write, we disable interrupts around it. If considered
   incorrect, I can add per-cpu spinlock around ipi_send().
2. Scoreboard lines owned by given target CPU can be padded to the
   cache line, to reduce ping-pong.

Reviewed by:	markj (previous version)
Discussed with:	alc
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	3 weeks
Differential revision:	https://reviews.freebsd.org/D25510
2020-07-14 20:37:50 +00:00

574 lines
12 KiB
C

/*
* Copyright (c) 2014 Roger Pau Monné <roger.pau@citrix.com>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/cpufunc.h>
#include <machine/cpu.h>
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/smp.h>
#include <x86/apicreg.h>
#include <x86/apicvar.h>
#include <xen/xen-os.h>
#include <xen/features.h>
#include <xen/gnttab.h>
#include <xen/hypervisor.h>
#include <xen/hvm.h>
#include <xen/xen_intr.h>
#include <xen/interface/vcpu.h>
/*--------------------------------- Macros -----------------------------------*/
#define XEN_APIC_UNSUPPORTED \
panic("%s: not available in Xen PV port.", __func__)
/*--------------------------- Forward Declarations ---------------------------*/
#ifdef SMP
static driver_filter_t xen_smp_rendezvous_action;
#ifdef __amd64__
static driver_filter_t xen_invlop;
#else
static driver_filter_t xen_invltlb;
static driver_filter_t xen_invlpg;
static driver_filter_t xen_invlrng;
static driver_filter_t xen_invlcache;
#endif
static driver_filter_t xen_ipi_bitmap_handler;
static driver_filter_t xen_cpustop_handler;
static driver_filter_t xen_cpususpend_handler;
#endif
/*---------------------------------- Macros ----------------------------------*/
#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
/*--------------------------------- Xen IPIs ---------------------------------*/
#ifdef SMP
struct xen_ipi_handler
{
driver_filter_t *filter;
const char *description;
};
static struct xen_ipi_handler xen_ipis[] =
{
[IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" },
#ifdef __amd64__
[IPI_TO_IDX(IPI_INVLOP)] = { xen_invlop, "itlb"},
#else
[IPI_TO_IDX(IPI_INVLTLB)] = { xen_invltlb, "itlb"},
[IPI_TO_IDX(IPI_INVLPG)] = { xen_invlpg, "ipg" },
[IPI_TO_IDX(IPI_INVLRNG)] = { xen_invlrng, "irg" },
[IPI_TO_IDX(IPI_INVLCACHE)] = { xen_invlcache, "ic" },
#endif
[IPI_TO_IDX(IPI_BITMAP_VECTOR)] = { xen_ipi_bitmap_handler, "b" },
[IPI_TO_IDX(IPI_STOP)] = { xen_cpustop_handler, "st" },
[IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" },
};
#endif
/*------------------------------- Per-CPU Data -------------------------------*/
#ifdef SMP
DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
#endif
/*------------------------------- Xen PV APIC --------------------------------*/
static void
xen_pv_lapic_create(u_int apic_id, int boot_cpu)
{
#ifdef SMP
cpu_add(apic_id, boot_cpu);
#endif
}
static void
xen_pv_lapic_init(vm_paddr_t addr)
{
}
static void
xen_pv_lapic_setup(int boot)
{
}
static void
xen_pv_lapic_dump(const char *str)
{
printf("cpu%d %s XEN PV LAPIC\n", PCPU_GET(cpuid), str);
}
static void
xen_pv_lapic_disable(void)
{
}
static bool
xen_pv_lapic_is_x2apic(void)
{
return (false);
}
static void
xen_pv_lapic_eoi(void)
{
XEN_APIC_UNSUPPORTED;
}
static int
xen_pv_lapic_id(void)
{
return (PCPU_GET(apic_id));
}
static int
xen_pv_lapic_intr_pending(u_int vector)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static u_int
xen_pv_apic_cpuid(u_int apic_id)
{
#ifdef SMP
return (apic_cpuids[apic_id]);
#else
return (0);
#endif
}
static u_int
xen_pv_apic_alloc_vector(u_int apic_id, u_int irq)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static u_int
xen_pv_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static void
xen_pv_apic_disable_vector(u_int apic_id, u_int vector)
{
XEN_APIC_UNSUPPORTED;
}
static void
xen_pv_apic_enable_vector(u_int apic_id, u_int vector)
{
XEN_APIC_UNSUPPORTED;
}
static void
xen_pv_apic_free_vector(u_int apic_id, u_int vector, u_int irq)
{
XEN_APIC_UNSUPPORTED;
}
static void
xen_pv_lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id)
{
XEN_APIC_UNSUPPORTED;
}
static int
xen_pv_lapic_enable_pmc(void)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static void
xen_pv_lapic_disable_pmc(void)
{
XEN_APIC_UNSUPPORTED;
}
static void
xen_pv_lapic_reenable_pmc(void)
{
XEN_APIC_UNSUPPORTED;
}
static void
xen_pv_lapic_enable_cmc(void)
{
}
#ifdef SMP
static void
xen_pv_lapic_ipi_raw(register_t icrlo, u_int dest)
{
XEN_APIC_UNSUPPORTED;
}
#define PCPU_ID_GET(id, field) (pcpu_find(id)->pc_##field)
static void
send_nmi(int dest)
{
unsigned int cpu;
/*
* NMIs are not routed over event channels, and instead delivered as on
* native using the exception vector (#2). Triggering them can be done
* using the local APIC, or an hypercall as a shortcut like it's done
* below.
*/
switch(dest) {
case APIC_IPI_DEST_SELF:
HYPERVISOR_vcpu_op(VCPUOP_send_nmi, PCPU_GET(vcpu_id), NULL);
break;
case APIC_IPI_DEST_ALL:
CPU_FOREACH(cpu)
HYPERVISOR_vcpu_op(VCPUOP_send_nmi,
PCPU_ID_GET(cpu, vcpu_id), NULL);
break;
case APIC_IPI_DEST_OTHERS:
CPU_FOREACH(cpu)
if (cpu != PCPU_GET(cpuid))
HYPERVISOR_vcpu_op(VCPUOP_send_nmi,
PCPU_ID_GET(cpu, vcpu_id), NULL);
break;
default:
HYPERVISOR_vcpu_op(VCPUOP_send_nmi,
PCPU_ID_GET(apic_cpuid(dest), vcpu_id), NULL);
break;
}
}
#undef PCPU_ID_GET
static void
xen_pv_lapic_ipi_vectored(u_int vector, int dest)
{
xen_intr_handle_t *ipi_handle;
int ipi_idx, to_cpu, self;
if (vector >= IPI_NMI_FIRST) {
send_nmi(dest);
return;
}
ipi_idx = IPI_TO_IDX(vector);
if (ipi_idx >= nitems(xen_ipis))
panic("IPI out of range");
switch(dest) {
case APIC_IPI_DEST_SELF:
ipi_handle = DPCPU_GET(ipi_handle);
xen_intr_signal(ipi_handle[ipi_idx]);
break;
case APIC_IPI_DEST_ALL:
CPU_FOREACH(to_cpu) {
ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
xen_intr_signal(ipi_handle[ipi_idx]);
}
break;
case APIC_IPI_DEST_OTHERS:
self = PCPU_GET(cpuid);
CPU_FOREACH(to_cpu) {
if (to_cpu != self) {
ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
xen_intr_signal(ipi_handle[ipi_idx]);
}
}
break;
default:
to_cpu = apic_cpuid(dest);
ipi_handle = DPCPU_ID_GET(to_cpu, ipi_handle);
xen_intr_signal(ipi_handle[ipi_idx]);
break;
}
}
static int
xen_pv_lapic_ipi_wait(int delay)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
#endif /* SMP */
static int
xen_pv_lapic_ipi_alloc(inthand_t *ipifunc)
{
XEN_APIC_UNSUPPORTED;
return (-1);
}
static void
xen_pv_lapic_ipi_free(int vector)
{
XEN_APIC_UNSUPPORTED;
}
static int
xen_pv_lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static int
xen_pv_lapic_set_lvt_mode(u_int apic_id, u_int lvt, uint32_t mode)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static int
xen_pv_lapic_set_lvt_polarity(u_int apic_id, u_int lvt, enum intr_polarity pol)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
static int
xen_pv_lapic_set_lvt_triggermode(u_int apic_id, u_int lvt,
enum intr_trigger trigger)
{
XEN_APIC_UNSUPPORTED;
return (0);
}
/* Xen apic_ops implementation */
struct apic_ops xen_apic_ops = {
.create = xen_pv_lapic_create,
.init = xen_pv_lapic_init,
.xapic_mode = xen_pv_lapic_disable,
.is_x2apic = xen_pv_lapic_is_x2apic,
.setup = xen_pv_lapic_setup,
.dump = xen_pv_lapic_dump,
.disable = xen_pv_lapic_disable,
.eoi = xen_pv_lapic_eoi,
.id = xen_pv_lapic_id,
.intr_pending = xen_pv_lapic_intr_pending,
.set_logical_id = xen_pv_lapic_set_logical_id,
.cpuid = xen_pv_apic_cpuid,
.alloc_vector = xen_pv_apic_alloc_vector,
.alloc_vectors = xen_pv_apic_alloc_vectors,
.enable_vector = xen_pv_apic_enable_vector,
.disable_vector = xen_pv_apic_disable_vector,
.free_vector = xen_pv_apic_free_vector,
.enable_pmc = xen_pv_lapic_enable_pmc,
.disable_pmc = xen_pv_lapic_disable_pmc,
.reenable_pmc = xen_pv_lapic_reenable_pmc,
.enable_cmc = xen_pv_lapic_enable_cmc,
#ifdef SMP
.ipi_raw = xen_pv_lapic_ipi_raw,
.ipi_vectored = xen_pv_lapic_ipi_vectored,
.ipi_wait = xen_pv_lapic_ipi_wait,
#endif
.ipi_alloc = xen_pv_lapic_ipi_alloc,
.ipi_free = xen_pv_lapic_ipi_free,
.set_lvt_mask = xen_pv_lapic_set_lvt_mask,
.set_lvt_mode = xen_pv_lapic_set_lvt_mode,
.set_lvt_polarity = xen_pv_lapic_set_lvt_polarity,
.set_lvt_triggermode = xen_pv_lapic_set_lvt_triggermode,
};
#ifdef SMP
/*---------------------------- XEN PV IPI Handlers ---------------------------*/
/*
* These are C clones of the ASM functions found in apic_vector.
*/
static int
xen_ipi_bitmap_handler(void *arg)
{
struct trapframe *frame;
frame = arg;
ipi_bitmap_handler(*frame);
return (FILTER_HANDLED);
}
static int
xen_smp_rendezvous_action(void *arg)
{
#ifdef COUNT_IPIS
(*ipi_rendezvous_counts[PCPU_GET(cpuid)])++;
#endif /* COUNT_IPIS */
smp_rendezvous_action();
return (FILTER_HANDLED);
}
#ifdef __amd64__
static int
xen_invlop(void *arg)
{
invlop_handler();
return (FILTER_HANDLED);
}
#else /* __i386__ */
static int
xen_invltlb(void *arg)
{
invltlb_handler();
return (FILTER_HANDLED);
}
static int
xen_invlpg(void *arg)
{
invlpg_handler();
return (FILTER_HANDLED);
}
static int
xen_invlrng(void *arg)
{
invlrng_handler();
return (FILTER_HANDLED);
}
static int
xen_invlcache(void *arg)
{
invlcache_handler();
return (FILTER_HANDLED);
}
#endif /* __amd64__ */
static int
xen_cpustop_handler(void *arg)
{
cpustop_handler();
return (FILTER_HANDLED);
}
static int
xen_cpususpend_handler(void *arg)
{
cpususpend_handler();
return (FILTER_HANDLED);
}
/*----------------------------- XEN PV IPI setup -----------------------------*/
/*
* Those functions are provided outside of the Xen PV APIC implementation
* so PVHVM guests can also use PV IPIs without having an actual Xen PV APIC,
* because on PVHVM there's an emulated LAPIC provided by Xen.
*/
static void
xen_cpu_ipi_init(int cpu)
{
xen_intr_handle_t *ipi_handle;
const struct xen_ipi_handler *ipi;
int idx, rc;
ipi_handle = DPCPU_ID_GET(cpu, ipi_handle);
for (ipi = xen_ipis, idx = 0; idx < nitems(xen_ipis); ipi++, idx++) {
if (ipi->filter == NULL) {
ipi_handle[idx] = NULL;
continue;
}
rc = xen_intr_alloc_and_bind_ipi(cpu, ipi->filter,
INTR_TYPE_TTY, &ipi_handle[idx]);
if (rc != 0)
panic("Unable to allocate a XEN IPI port");
xen_intr_describe(ipi_handle[idx], "%s", ipi->description);
}
}
static void
xen_setup_cpus(void)
{
int i;
if (!xen_vector_callback_enabled)
return;
CPU_FOREACH(i)
xen_cpu_ipi_init(i);
/* Set the xen pv ipi ops to replace the native ones */
if (xen_hvm_domain())
apic_ops.ipi_vectored = xen_pv_lapic_ipi_vectored;
}
/* Switch to using PV IPIs as soon as the vcpu_id is set. */
SYSINIT(xen_setup_cpus, SI_SUB_SMP, SI_ORDER_SECOND, xen_setup_cpus, NULL);
#endif /* SMP */