Add support for suspend/resume/migration operations when running as a

Xen PVHVM guest.

Submitted by:	Roger Pau Monné
Sponsored by:	Citrix Systems R&D
Reviewed by:	gibbs
Approved by:	re (blanket Xen)
MFC after:	2 weeks

sys/amd64/amd64/mp_machdep.c:
sys/i386/i386/mp_machdep.c:
	- Make sure that are no MMU related IPIs pending on migration.
	- Reset pending IPI_BITMAP on resume.
	- Init vcpu_info on resume.

sys/amd64/include/intr_machdep.h:
sys/i386/include/intr_machdep.h:
sys/x86/acpica/acpi_wakeup.c:
sys/x86/x86/intr_machdep.c:
sys/x86/isa/atpic.c:
sys/x86/x86/io_apic.c:
sys/x86/x86/local_apic.c:
	- Add a "suspend_cancelled" parameter to pic_resume().  For the
	  Xen PIC, restoration of interrupt services differs between
	  the aborted suspend and normal resume cases, so we must provide
	  this information.

sys/dev/acpica/acpi_timer.c:
sys/dev/xen/timer/timer.c:
sys/timetc.h:
	- Don't swap out "suspend safe" timers across a suspend/resume
	  cycle.  This includes the Xen PV and ACPI timers.

sys/dev/xen/control/control.c:
	- Perform proper suspend/resume process for PVHVM:
		- Suspend all APs before going into suspension, this allows us
		  to reset the vcpu_info on resume for each AP.
		- Reset shared info page and callback on resume.

sys/dev/xen/timer/timer.c:
	- Implement suspend/resume support for the PV timer. Since FreeBSD
	  doesn't perform a per-cpu resume of the timer, we need to call
	  smp_rendezvous in order to correctly resume the timer on each CPU.

sys/dev/xen/xenpci/xenpci.c:
	- Don't reset the PCI interrupt on each suspend/resume.

sys/kern/subr_smp.c:
	- When suspending a PVHVM domain make sure there are no MMU IPIs
	  in-flight, or we will get a lockup on resume due to the fact that
	  pending event channels are not carried over on migration.
	- Implement a generic version of restart_cpus that can be used by
	  suspended and stopped cpus.

sys/x86/xen/hvm.c:
	- Implement resume support for the hypercall page and shared info.
	- Clear vcpu_info so it can be reset by APs when resuming from
	  suspension.

sys/dev/xen/xenpci/xenpci.c:
sys/x86/xen/hvm.c:
sys/x86/xen/xen_intr.c:
	- Support UP kernel configurations.

sys/x86/xen/xen_intr.c:
	- Properly rebind per-cpus VIRQs and IPIs on resume.
This commit is contained in:
Justin T. Gibbs 2013-09-20 05:06:03 +00:00
parent e96ca45522
commit 428b7ca290
19 changed files with 420 additions and 122 deletions

View File

@ -1468,6 +1468,10 @@ cpususpend_handler(void)
cpu = PCPU_GET(cpuid);
#ifdef XENHVM
mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
#endif
if (savectx(susppcbs[cpu])) {
ctx_fpusave(susppcbs[cpu]->pcb_fpususpend);
wbinvd();
@ -1486,11 +1490,23 @@ cpususpend_handler(void)
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
#ifdef XENHVM
/*
* Reset pending bitmap IPIs, because Xen doesn't preserve pending
* event channels on migration.
*/
cpu_ipi_pending[cpu] = 0;
/* register vcpu_info area */
xen_hvm_init_cpu();
#endif
/* Resume MCA and local APIC */
mca_resume();
lapic_setup(0);
CPU_CLR_ATOMIC(cpu, &started_cpus);
/* Indicate that we are resumed */
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
}
/*

View File

@ -102,7 +102,7 @@ struct pic {
int (*pic_vector)(struct intsrc *);
int (*pic_source_pending)(struct intsrc *);
void (*pic_suspend)(struct pic *);
void (*pic_resume)(struct pic *);
void (*pic_resume)(struct pic *, bool suspend_cancelled);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@ -170,7 +170,7 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
void intr_resume(void);
void intr_resume(bool suspend_cancelled);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
void nexus_add_irq(u_long irq);

View File

@ -189,6 +189,7 @@ acpi_timer_probe(device_t dev)
else
acpi_timer_timecounter.tc_counter_mask = 0x00ffffff;
acpi_timer_timecounter.tc_frequency = acpi_timer_frequency;
acpi_timer_timecounter.tc_flags = TC_FLAGS_SUSPEND_SAFE;
if (testenv("debug.acpi.timer_test"))
acpi_timer_boot_test();
@ -285,6 +286,14 @@ acpi_timer_suspend_handler(struct timecounter *newtc)
acpi_timer_eh = NULL;
}
if ((timecounter->tc_flags & TC_FLAGS_SUSPEND_SAFE) != 0) {
/*
* If we are using a suspend safe timecounter, don't
* save/restore it across suspend/resume.
*/
return;
}
KASSERT(newtc == &acpi_timer_timecounter,
("acpi_timer_suspend_handler: wrong timecounter"));

View File

@ -119,11 +119,9 @@ __FBSDID("$FreeBSD$");
#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/vnode.h>
#ifndef XENHVM
#include <sys/sched.h>
#include <sys/smp.h>
#endif
#include <sys/eventhandler.h>
#include <geom/geom.h>
@ -140,6 +138,10 @@ __FBSDID("$FreeBSD$");
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#ifdef XENHVM
#include <xen/hvm.h>
#endif
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
@ -199,7 +201,7 @@ extern void xencons_resume(void);
static void
xctrl_suspend()
{
int i, j, k, fpp;
int i, j, k, fpp, suspend_cancelled;
unsigned long max_pfn, start_info_mfn;
EVENTHANDLER_INVOKE(power_suspend);
@ -264,7 +266,7 @@ xctrl_suspend()
*/
start_info_mfn = VTOMFN(xen_start_info);
pmap_suspend();
HYPERVISOR_suspend(start_info_mfn);
suspend_cancelled = HYPERVISOR_suspend(start_info_mfn);
pmap_resume();
pmap_kenter_ma((vm_offset_t) shared_info, xen_start_info->shared_info);
@ -287,7 +289,7 @@ xctrl_suspend()
HYPERVISOR_shared_info->arch.max_pfn = max_pfn;
gnttab_resume();
intr_resume();
intr_resume(suspend_cancelled != 0);
local_irq_enable();
xencons_resume();
@ -331,16 +333,31 @@ xen_pv_shutdown_final(void *arg, int howto)
}
#else
extern void xenpci_resume(void);
/* HVM mode suspension. */
static void
xctrl_suspend()
{
#ifdef SMP
cpuset_t cpu_suspend_map;
#endif
int suspend_cancelled;
EVENTHANDLER_INVOKE(power_suspend);
if (smp_started) {
thread_lock(curthread);
sched_bind(curthread, 0);
thread_unlock(curthread);
}
KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
/*
* Clear our XenStore node so the toolstack knows we are
* responding to the suspend request.
*/
xs_write(XST_NIL, "control", "shutdown", "");
/*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
@ -353,31 +370,67 @@ xctrl_suspend()
}
mtx_unlock(&Giant);
#ifdef SMP
if (smp_started) {
/*
* Suspend other CPUs. This prevents IPIs while we
* are resuming, and will allow us to reset per-cpu
* vcpu_info on resume.
*/
cpu_suspend_map = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
if (!CPU_EMPTY(&cpu_suspend_map))
suspend_cpus(cpu_suspend_map);
}
#endif
/*
* Prevent any races with evtchn_interrupt() handler.
*/
disable_intr();
intr_suspend();
xen_hvm_suspend();
suspend_cancelled = HYPERVISOR_suspend(0);
intr_resume();
xen_hvm_resume(suspend_cancelled != 0);
intr_resume(suspend_cancelled != 0);
enable_intr();
/*
* Re-enable interrupts and put the scheduler back to normal.
* Reset grant table info.
*/
enable_intr();
gnttab_resume();
#ifdef SMP
if (smp_started && !CPU_EMPTY(&cpu_suspend_map)) {
/*
* Now that event channels have been initialized,
* resume CPUs.
*/
resume_cpus(cpu_suspend_map);
}
#endif
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
mtx_lock(&Giant);
if (!suspend_cancelled)
DEVICE_RESUME(root_bus);
DEVICE_RESUME(root_bus);
mtx_unlock(&Giant);
if (smp_started) {
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
}
EVENTHANDLER_INVOKE(power_resume);
if (bootverbose)
printf("System resumed after suspension\n");
}
#endif

View File

@ -1,4 +1,4 @@
/**
/*-
* Copyright (c) 2009 Adrian Chadd
* Copyright (c) 2012 Spectra Logic Corporation
* All rights reserved.
@ -57,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <machine/cpufunc.h>
#include <machine/clock.h>
#include <machine/_inttypes.h>
#include <machine/smp.h>
#include "clock_if.h"
@ -316,7 +317,7 @@ xentimer_settime(device_t dev __unused, struct timespec *ts)
* Don't return EINVAL here; just silently fail if the domain isn't
* privileged enough to set the TOD.
*/
return(0);
return (0);
}
/**
@ -339,7 +340,7 @@ xentimer_gettime(device_t dev, struct timespec *ts)
xen_fetch_uptime(&u_ts);
timespecadd(ts, &u_ts);
return(0);
return (0);
}
/**
@ -457,8 +458,9 @@ xentimer_attach(device_t dev)
/* Bind an event channel to a VIRQ on each VCPU. */
CPU_FOREACH(i) {
struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
struct xentimer_pcpu_data *pcpu;
pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
if (error) {
device_printf(dev, "Error disabling Xen periodic timer "
@ -493,6 +495,7 @@ xentimer_attach(device_t dev)
/* Register the timecounter. */
sc->tc.tc_name = "XENTIMER";
sc->tc.tc_quality = XENTIMER_QUALITY;
sc->tc.tc_flags = TC_FLAGS_SUSPEND_SAFE;
/*
* The underlying resolution is in nanoseconds, since the timer info
* scales TSC frequencies using a fraction that represents time in
@ -523,75 +526,60 @@ xentimer_detach(device_t dev)
return (EBUSY);
}
/**
* The following device methods are disabled because they wouldn't work
* properly.
*/
#ifdef NOTYET
static void
xentimer_percpu_resume(void *arg)
{
device_t dev = (device_t) arg;
struct xentimer_softc *sc = device_get_softc(dev);
xentimer_et_start(&sc->et, sc->et.et_min_period, 0);
}
static int
xentimer_resume(device_t dev)
{
struct xentimer_softc *sc = device_get_softc(dev);
int error = 0;
int error;
int i;
device_printf(sc->dev, "%s", __func__);
/* Disable the periodic timer */
CPU_FOREACH(i) {
struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
/* Skip inactive timers. */
if (pcpu->timer == 0)
continue;
/*
* XXX This won't actually work, because Xen requires that
* singleshot timers be set while running on the given CPU.
*/
error = xentimer_vcpu_start_timer(i, pcpu->timer);
if (error == -ETIME) {
/* Event time has already passed; process. */
xentimer_intr(sc);
} else if (error != 0) {
panic("%s: error %d restarting vcpu %d\n",
__func__, error, i);
error = HYPERVISOR_vcpu_op(VCPUOP_stop_periodic_timer, i, NULL);
if (error != 0) {
device_printf(dev,
"Error disabling Xen periodic timer on CPU %d\n",
i);
return (error);
}
}
return (error);
/* Reset the last uptime value */
xen_timer_last_time = 0;
/* Reset the RTC clock */
inittodr(time_second);
/* Kick the timers on all CPUs */
smp_rendezvous(NULL, xentimer_percpu_resume, NULL, dev);
if (bootverbose)
device_printf(dev, "resumed operation after suspension\n");
return (0);
}
static int
xentimer_suspend(device_t dev)
{
struct xentimer_softc *sc = device_get_softc(dev);
int error = 0;
int i;
device_printf(sc->dev, "%s", __func__);
CPU_FOREACH(i) {
struct xentimer_pcpu_data *pcpu = DPCPU_ID_PTR(i, xentimer_pcpu);
/* Skip inactive timers. */
if (pcpu->timer == 0)
continue;
error = xentimer_vcpu_stop_timer(i);
if (error)
panic("Error %d stopping VCPU %d timer\n", error, i);
}
return (error);
return (0);
}
#endif
static device_method_t xentimer_methods[] = {
DEVMETHOD(device_identify, xentimer_identify),
DEVMETHOD(device_probe, xentimer_probe),
DEVMETHOD(device_attach, xentimer_attach),
DEVMETHOD(device_detach, xentimer_detach),
#ifdef NOTYET
DEVMETHOD(device_suspend, xentimer_suspend),
DEVMETHOD(device_resume, xentimer_resume),
#endif
/* clock interface */
DEVMETHOD(clock_gettime, xentimer_gettime),
DEVMETHOD(clock_settime, xentimer_settime),

View File

@ -77,6 +77,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp)
if (error)
return error;
#ifdef SMP
/*
* When using the PCI event delivery callback we cannot assign
* events to specific vCPUs, so all events are delivered to vCPU#0 by
@ -88,6 +89,7 @@ xenpci_irq_init(device_t device, struct xenpci_softc *scp)
scp->res_irq, 0);
if (error)
return error;
#endif
xen_hvm_set_callback(device);
return (0);
@ -309,28 +311,12 @@ xenpci_detach(device_t dev)
static int
xenpci_suspend(device_t dev)
{
struct xenpci_softc *scp = device_get_softc(dev);
device_t parent = device_get_parent(dev);
if (scp->intr_cookie != NULL) {
if (BUS_TEARDOWN_INTR(parent, dev, scp->res_irq,
scp->intr_cookie) != 0)
printf("intr teardown failed.. continuing\n");
scp->intr_cookie = NULL;
}
return (bus_generic_suspend(dev));
}
static int
xenpci_resume(device_t dev)
{
struct xenpci_softc *scp = device_get_softc(dev);
device_t parent = device_get_parent(dev);
BUS_SETUP_INTR(parent, dev, scp->res_irq,
INTR_MPSAFE|INTR_TYPE_MISC, xenpci_intr_filter, NULL,
/*trap_frame*/NULL, &scp->intr_cookie);
xen_hvm_set_callback(dev);
return (bus_generic_resume(dev));
}

View File

@ -1529,6 +1529,10 @@ cpususpend_handler(void)
cpu = PCPU_GET(cpuid);
#ifdef XENHVM
mtx_assert(&smp_ipi_mtx, MA_NOTOWNED);
#endif
if (savectx(susppcbs[cpu])) {
wbinvd();
CPU_SET_ATOMIC(cpu, &suspended_cpus);
@ -1545,10 +1549,22 @@ cpususpend_handler(void)
while (!CPU_ISSET(cpu, &started_cpus))
ia32_pause();
#ifdef XENHVM
/*
* Reset pending bitmap IPIs, because Xen doesn't preserve pending
* event channels on migration.
*/
cpu_ipi_pending[cpu] = 0;
/* register vcpu_info area */
xen_hvm_init_cpu();
#endif
/* Resume MCA and local APIC */
mca_resume();
lapic_setup(0);
/* Indicate that we are resumed */
CPU_CLR_ATOMIC(cpu, &suspended_cpus);
CPU_CLR_ATOMIC(cpu, &started_cpus);
}
/*

View File

@ -108,7 +108,7 @@ struct pic {
int (*pic_vector)(struct intsrc *);
int (*pic_source_pending)(struct intsrc *);
void (*pic_suspend)(struct pic *);
void (*pic_resume)(struct pic *);
void (*pic_resume)(struct pic *, bool suspend_cancelled);
int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
enum intr_polarity);
int (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
@ -166,7 +166,7 @@ struct intsrc *intr_lookup_source(int vector);
int intr_register_pic(struct pic *pic);
int intr_register_source(struct intsrc *isrc);
int intr_remove_handler(void *cookie);
void intr_resume(void);
void intr_resume(bool suspend_cancelled);
void intr_suspend(void);
void intrcnt_add(const char *name, u_long **countp);
void nexus_add_irq(u_long irq);

View File

@ -225,6 +225,18 @@ generic_stop_cpus(cpuset_t map, u_int type)
CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
cpusetobj_strprint(cpusetbuf, &map), type);
#ifdef XENHVM
/*
* When migrating a PVHVM domain we need to make sure there are
* no IPIs in progress. IPIs that have been issued, but not
* yet delivered (not pending on a vCPU) will be lost in the
* IPI rebinding process, violating FreeBSD's assumption of
* reliable IPI delivery.
*/
if (type == IPI_SUSPEND)
mtx_lock_spin(&smp_ipi_mtx);
#endif
if (stopping_cpu != PCPU_GET(cpuid))
while (atomic_cmpset_int(&stopping_cpu, NOCPU,
PCPU_GET(cpuid)) == 0)
@ -252,6 +264,11 @@ generic_stop_cpus(cpuset_t map, u_int type)
}
}
#ifdef XENHVM
if (type == IPI_SUSPEND)
mtx_unlock_spin(&smp_ipi_mtx);
#endif
stopping_cpu = NOCPU;
return (1);
}
@ -292,28 +309,60 @@ suspend_cpus(cpuset_t map)
* 0: NA
* 1: ok
*/
int
restart_cpus(cpuset_t map)
static int
generic_restart_cpus(cpuset_t map, u_int type)
{
#ifdef KTR
char cpusetbuf[CPUSETBUFSIZ];
#endif
volatile cpuset_t *cpus;
KASSERT(
#if defined(__amd64__) || defined(__i386__)
type == IPI_STOP || type == IPI_STOP_HARD || type == IPI_SUSPEND,
#else
type == IPI_STOP || type == IPI_STOP_HARD,
#endif
("%s: invalid stop type", __func__));
if (!smp_started)
return 0;
CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
#if defined(__amd64__) || defined(__i386__)
if (type == IPI_SUSPEND)
cpus = &suspended_cpus;
else
#endif
cpus = &stopped_cpus;
/* signal other cpus to restart */
CPU_COPY_STORE_REL(&map, &started_cpus);
/* wait for each to clear its bit */
while (CPU_OVERLAP(&stopped_cpus, &map))
while (CPU_OVERLAP(cpus, &map))
cpu_spinwait();
return 1;
}
int
restart_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_STOP));
}
#if defined(__amd64__) || defined(__i386__)
int
resume_cpus(cpuset_t map)
{
return (generic_restart_cpus(map, IPI_SUSPEND));
}
#endif
/*
* All-CPU rendezvous. CPUs are signalled, all execute the setup function
* (if specified), rendezvous, execute the action function (if specified),

View File

@ -166,6 +166,7 @@ int stop_cpus(cpuset_t);
int stop_cpus_hard(cpuset_t);
#if defined(__amd64__) || defined(__i386__)
int suspend_cpus(cpuset_t);
int resume_cpus(cpuset_t);
#endif
void smp_rendezvous_action(void);

View File

@ -59,6 +59,10 @@ struct timecounter {
*/
u_int tc_flags;
#define TC_FLAGS_C3STOP 1 /* Timer dies in C3. */
#define TC_FLAGS_SUSPEND_SAFE 2 /*
* Timer functional across
* suspend/resume.
*/
void *tc_priv;
/* Pointer to the timecounter's private parts. */

View File

@ -266,7 +266,7 @@ acpi_wakeup_machdep(struct acpi_softc *sc, int state, int sleep_result,
restart_cpus(suspcpus);
#endif
mca_resume();
intr_resume();
intr_resume(/*suspend_cancelled*/false);
AcpiSetFirmwareWakingVector(0);
} else {

View File

@ -123,7 +123,7 @@ static void atpic_eoi_slave(struct intsrc *isrc);
static void atpic_enable_intr(struct intsrc *isrc);
static void atpic_disable_intr(struct intsrc *isrc);
static int atpic_vector(struct intsrc *isrc);
static void atpic_resume(struct pic *pic);
static void atpic_resume(struct pic *pic, bool suspend_cancelled);
static int atpic_source_pending(struct intsrc *isrc);
static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
@ -276,7 +276,7 @@ atpic_source_pending(struct intsrc *isrc)
}
static void
atpic_resume(struct pic *pic)
atpic_resume(struct pic *pic, bool suspend_cancelled)
{
struct atpic *ap = (struct atpic *)pic;

View File

@ -279,7 +279,7 @@ intr_execute_handlers(struct intsrc *isrc, struct trapframe *frame)
}
void
intr_resume(void)
intr_resume(bool suspend_cancelled)
{
struct pic *pic;
@ -289,7 +289,7 @@ intr_resume(void)
mtx_lock(&intr_table_lock);
TAILQ_FOREACH(pic, &pics, pics) {
if (pic->pic_resume != NULL)
pic->pic_resume(pic);
pic->pic_resume(pic, suspend_cancelled);
}
mtx_unlock(&intr_table_lock);
}

View File

@ -119,7 +119,7 @@ static int ioapic_vector(struct intsrc *isrc);
static int ioapic_source_pending(struct intsrc *isrc);
static int ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
enum intr_polarity pol);
static void ioapic_resume(struct pic *pic);
static void ioapic_resume(struct pic *pic, bool suspend_cancelled);
static int ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
static void ioapic_program_intpin(struct ioapic_intsrc *intpin);
@ -486,7 +486,7 @@ ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
}
static void
ioapic_resume(struct pic *pic)
ioapic_resume(struct pic *pic, bool suspend_cancelled)
{
struct ioapic *io = (struct ioapic *)pic;
int i;

View File

@ -161,7 +161,7 @@ static u_long lapic_timer_divisor;
static struct eventtimer lapic_et;
static void lapic_enable(void);
static void lapic_resume(struct pic *pic);
static void lapic_resume(struct pic *pic, bool suspend_cancelled);
static void lapic_timer_oneshot(struct lapic *,
u_int count, int enable_int);
static void lapic_timer_periodic(struct lapic *,
@ -566,7 +566,7 @@ lapic_enable(void)
/* Reset the local APIC on the BSP during resume. */
static void
lapic_resume(struct pic *pic)
lapic_resume(struct pic *pic, bool suspend_cancelled)
{
lapic_setup(0);

View File

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <xen/interface/vcpu.h>
/*--------------------------- Forward Declarations ---------------------------*/
#ifdef SMP
static driver_filter_t xen_smp_rendezvous_action;
static driver_filter_t xen_invltlb;
static driver_filter_t xen_invlpg;
@ -70,6 +71,7 @@ static driver_filter_t xen_ipi_bitmap_handler;
static driver_filter_t xen_cpustop_handler;
static driver_filter_t xen_cpususpend_handler;
static driver_filter_t xen_cpustophard_handler;
#endif
/*---------------------------- Extern Declarations ---------------------------*/
/* Variables used by mp_machdep to perform the MMU related IPIs */
@ -93,6 +95,12 @@ extern void pmap_lazyfix_action(void);
#define IPI_TO_IDX(ipi) ((ipi) - APIC_IPI_INTS)
/*-------------------------------- Local Types -------------------------------*/
enum xen_hvm_init_type {
XEN_HVM_INIT_COLD,
XEN_HVM_INIT_CANCELLED_SUSPEND,
XEN_HVM_INIT_RESUME
};
struct xen_ipi_handler
{
driver_filter_t *filter;
@ -104,6 +112,7 @@ enum xen_domain_type xen_domain_type = XEN_NATIVE;
static MALLOC_DEFINE(M_XENHVM, "xen_hvm", "Xen HVM PV Support");
#ifdef SMP
static struct xen_ipi_handler xen_ipis[] =
{
[IPI_TO_IDX(IPI_RENDEZVOUS)] = { xen_smp_rendezvous_action, "r" },
@ -119,6 +128,7 @@ static struct xen_ipi_handler xen_ipis[] =
[IPI_TO_IDX(IPI_SUSPEND)] = { xen_cpususpend_handler, "sp" },
[IPI_TO_IDX(IPI_STOP_HARD)] = { xen_cpustophard_handler, "sth" },
};
#endif
/**
* If non-zero, the hypervisor has been configured to use a direct
@ -129,13 +139,16 @@ int xen_vector_callback_enabled;
/*------------------------------- Per-CPU Data -------------------------------*/
DPCPU_DEFINE(struct vcpu_info, vcpu_local_info);
DPCPU_DEFINE(struct vcpu_info *, vcpu_info);
#ifdef SMP
DPCPU_DEFINE(xen_intr_handle_t, ipi_handle[nitems(xen_ipis)]);
#endif
/*------------------ Hypervisor Access Shared Memory Regions -----------------*/
/** Hypercall table accessed via HYPERVISOR_*_op() methods. */
char *hypercall_stubs;
shared_info_t *HYPERVISOR_shared_info;
#ifdef SMP
/*---------------------------- XEN PV IPI Handlers ---------------------------*/
/*
* This are C clones of the ASM functions found in apic_vector.s
@ -496,6 +509,7 @@ xen_init_ipis(void)
/* Set the xen pv ipi ops to replace the native ones */
cpu_ops.ipi_vectored = xen_ipi_vectored;
}
#endif
/*---------------------- XEN Hypervisor Probe and Setup ----------------------*/
static uint32_t
@ -579,6 +593,9 @@ xen_hvm_set_callback(device_t dev)
struct xen_hvm_param xhp;
int irq;
if (xen_vector_callback_enabled)
return;
xhp.domid = DOMID_SELF;
xhp.index = HVM_PARAM_CALLBACK_IRQ;
if (xen_feature(XENFEAT_hvm_callback_vector) != 0) {
@ -637,41 +654,83 @@ xen_hvm_disable_emulated_devices(void)
outw(XEN_MAGIC_IOPORT, XMI_UNPLUG_IDE_DISKS|XMI_UNPLUG_NICS);
}
static void
xen_hvm_init(enum xen_hvm_init_type init_type)
{
int error;
int i;
if (init_type == XEN_HVM_INIT_CANCELLED_SUSPEND)
return;
error = xen_hvm_init_hypercall_stubs();
switch (init_type) {
case XEN_HVM_INIT_COLD:
if (error != 0)
return;
setup_xen_features();
break;
case XEN_HVM_INIT_RESUME:
if (error != 0)
panic("Unable to init Xen hypercall stubs on resume");
break;
default:
panic("Unsupported HVM initialization type");
}
/* Clear any stale vcpu_info. */
CPU_FOREACH(i)
DPCPU_ID_SET(i, vcpu_info, NULL);
xen_vector_callback_enabled = 0;
xen_domain_type = XEN_HVM_DOMAIN;
xen_hvm_init_shared_info_page();
xen_hvm_set_callback(NULL);
xen_hvm_disable_emulated_devices();
}
void
xen_hvm_suspend(void)
{
}
void
xen_hvm_resume(void)
xen_hvm_resume(bool suspend_cancelled)
{
xen_hvm_init_hypercall_stubs();
xen_hvm_init_shared_info_page();
xen_hvm_init(suspend_cancelled ?
XEN_HVM_INIT_CANCELLED_SUSPEND : XEN_HVM_INIT_RESUME);
/* Register vcpu_info area for CPU#0. */
xen_hvm_init_cpu();
}
static void
xen_hvm_init(void *dummy __unused)
xen_hvm_sysinit(void *arg __unused)
{
xen_hvm_init(XEN_HVM_INIT_COLD);
}
if (xen_hvm_init_hypercall_stubs() != 0)
return;
xen_domain_type = XEN_HVM_DOMAIN;
setup_xen_features();
xen_hvm_init_shared_info_page();
xen_hvm_set_callback(NULL);
xen_hvm_disable_emulated_devices();
}
void xen_hvm_init_cpu(void)
void
xen_hvm_init_cpu(void)
{
struct vcpu_register_vcpu_info info;
struct vcpu_info *vcpu_info;
int cpu, rc;
cpu = PCPU_GET(acpi_id);
if (DPCPU_GET(vcpu_info) != NULL) {
/*
* vcpu_info is already set. We're resuming
* from a failed migration and our pre-suspend
* configuration is still valid.
*/
return;
}
vcpu_info = DPCPU_PTR(vcpu_local_info);
cpu = PCPU_GET(acpi_id);
info.mfn = vtophys(vcpu_info) >> PAGE_SHIFT;
info.offset = vtophys(vcpu_info) - trunc_page(vtophys(vcpu_info));
@ -682,6 +741,8 @@ void xen_hvm_init_cpu(void)
DPCPU_SET(vcpu_info, vcpu_info);
}
SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_init, NULL);
SYSINIT(xen_hvm_init, SI_SUB_HYPERVISOR, SI_ORDER_FIRST, xen_hvm_sysinit, NULL);
#ifdef SMP
SYSINIT(xen_init_ipis, SI_SUB_SMP, SI_ORDER_FIRST, xen_init_ipis, NULL);
#endif
SYSINIT(xen_hvm_init_cpu, SI_SUB_INTR, SI_ORDER_FIRST, xen_hvm_init_cpu, NULL);

View File

@ -120,7 +120,7 @@ struct xenisrc {
#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0]))
static void xen_intr_suspend(struct pic *);
static void xen_intr_resume(struct pic *);
static void xen_intr_resume(struct pic *, bool suspend_cancelled);
static void xen_intr_enable_source(struct intsrc *isrc);
static void xen_intr_disable_source(struct intsrc *isrc, int eoi);
static void xen_intr_eoi_source(struct intsrc *isrc);
@ -334,7 +334,7 @@ xen_intr_release_isrc(struct xenisrc *isrc)
evtchn_cpu_mask_port(isrc->xi_cpu, isrc->xi_port);
evtchn_cpu_unmask_port(0, isrc->xi_port);
if (isrc->xi_close != 0) {
if (isrc->xi_close != 0 && is_valid_evtchn(isrc->xi_port)) {
struct evtchn_close close = { .port = isrc->xi_port };
if (HYPERVISOR_event_channel_op(EVTCHNOP_close, &close))
panic("EVTCHNOP_close failed");
@ -408,6 +408,7 @@ xen_intr_bind_isrc(struct xenisrc **isrcp, evtchn_port_t local_port,
return (error);
}
*isrcp = isrc;
evtchn_unmask_port(local_port);
return (0);
}
@ -571,6 +572,9 @@ xen_intr_init(void *dummy __unused)
struct xen_intr_pcpu_data *pcpu;
int i;
if (!xen_domain())
return (0);
mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF);
/*
@ -602,20 +606,116 @@ xen_intr_suspend(struct pic *unused)
{
}
static void
xen_rebind_ipi(struct xenisrc *isrc)
{
#ifdef SMP
int cpu = isrc->xi_cpu;
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
int error;
struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi,
&bind_ipi);
if (error != 0)
panic("unable to rebind xen IPI: %d", error);
isrc->xi_port = bind_ipi.port;
isrc->xi_cpu = 0;
xen_intr_port_to_isrc[bind_ipi.port] = isrc;
error = xen_intr_assign_cpu(&isrc->xi_intsrc,
cpu_apic_ids[cpu]);
if (error)
panic("unable to bind xen IPI to CPU#%d: %d",
cpu, error);
evtchn_unmask_port(bind_ipi.port);
#else
panic("Resume IPI event channel on UP");
#endif
}
static void
xen_rebind_virq(struct xenisrc *isrc)
{
int cpu = isrc->xi_cpu;
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
int error;
struct evtchn_bind_virq bind_virq = { .virq = isrc->xi_virq,
.vcpu = acpi_id };
error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq,
&bind_virq);
if (error != 0)
panic("unable to rebind xen VIRQ#%d: %d", isrc->xi_virq, error);
isrc->xi_port = bind_virq.port;
isrc->xi_cpu = 0;
xen_intr_port_to_isrc[bind_virq.port] = isrc;
#ifdef SMP
error = xen_intr_assign_cpu(&isrc->xi_intsrc,
cpu_apic_ids[cpu]);
if (error)
panic("unable to bind xen VIRQ#%d to CPU#%d: %d",
isrc->xi_virq, cpu, error);
#endif
evtchn_unmask_port(bind_virq.port);
}
/**
* Return this PIC to service after being suspended.
*/
static void
xen_intr_resume(struct pic *unused)
xen_intr_resume(struct pic *unused, bool suspend_cancelled)
{
u_int port;
shared_info_t *s = HYPERVISOR_shared_info;
struct xenisrc *isrc;
u_int isrc_idx;
int i;
/*
* Mask events for all ports. They will be unmasked after
* drivers have re-registered their handlers.
*/
for (port = 0; port < NR_EVENT_CHANNELS; port++)
evtchn_mask_port(port);
if (suspend_cancelled)
return;
/* Reset the per-CPU masks */
CPU_FOREACH(i) {
struct xen_intr_pcpu_data *pcpu;
pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu);
memset(pcpu->evtchn_enabled,
i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled));
}
/* Mask all event channels. */
for (i = 0; i < nitems(s->evtchn_mask); i++)
atomic_store_rel_long(&s->evtchn_mask[i], ~0);
/* Remove port -> isrc mappings */
memset(xen_intr_port_to_isrc, 0, sizeof(xen_intr_port_to_isrc));
/* Free unused isrcs and rebind VIRQs and IPIs */
for (isrc_idx = 0; isrc_idx < xen_intr_isrc_count; isrc_idx++) {
u_int vector;
vector = FIRST_EVTCHN_INT + isrc_idx;
isrc = (struct xenisrc *)intr_lookup_source(vector);
if (isrc != NULL) {
isrc->xi_port = 0;
switch (isrc->xi_type) {
case EVTCHN_TYPE_IPI:
xen_rebind_ipi(isrc);
break;
case EVTCHN_TYPE_VIRQ:
xen_rebind_virq(isrc);
break;
default:
isrc->xi_cpu = 0;
break;
}
}
}
}
/**
@ -693,6 +793,7 @@ xen_intr_config_intr(struct intsrc *isrc, enum intr_trigger trig,
static int
xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
{
#ifdef SMP
struct evtchn_bind_vcpu bind_vcpu;
struct xenisrc *isrc;
u_int to_cpu, acpi_id;
@ -749,6 +850,9 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id)
}
mtx_unlock(&xen_intr_isrc_lock);
return (0);
#else
return (EOPNOTSUPP);
#endif
}
/*------------------- Virtual Interrupt Source PIC Functions -----------------*/
@ -979,8 +1083,11 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
error = xen_intr_bind_isrc(&isrc, bind_virq.port, EVTCHN_TYPE_VIRQ, dev,
filter, handler, arg, flags, port_handlep);
#ifdef SMP
if (error == 0)
error = intr_event_bind(isrc->xi_intsrc.is_event, cpu);
#endif
if (error != 0) {
evtchn_close_t close = { .port = bind_virq.port };
@ -991,6 +1098,7 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
return (error);
}
#ifdef SMP
if (isrc->xi_cpu != cpu) {
/*
* Too early in the boot process for the generic interrupt
@ -1000,12 +1108,15 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu,
*/
xen_intr_assign_cpu(&isrc->xi_intsrc, cpu_apic_ids[cpu]);
}
#endif
/*
* The Event Channel API opened this port, so it is
* responsible for closing it automatically on unbind.
*/
isrc->xi_close = 1;
isrc->xi_virq = virq;
return (0);
}
@ -1014,6 +1125,7 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
driver_filter_t filter, enum intr_type flags,
xen_intr_handle_t *port_handlep)
{
#ifdef SMP
int acpi_id = pcpu_find(cpu)->pc_acpi_id;
struct xenisrc *isrc;
struct evtchn_bind_ipi bind_ipi = { .vcpu = acpi_id };
@ -1063,6 +1175,9 @@ xen_intr_alloc_and_bind_ipi(device_t dev, u_int cpu,
*/
isrc->xi_close = 1;
return (0);
#else
return (EOPNOTSUPP);
#endif
}
int

View File

@ -93,6 +93,6 @@ enum {
void xen_hvm_set_callback(device_t);
void xen_hvm_suspend(void);
void xen_hvm_resume(void);
void xen_hvm_resume(bool suspend_cancelled);
void xen_hvm_init_cpu(void);
#endif /* __XEN_HVM_H__ */