Roger Pau Monné 4e4e43dc9e xen: allow limiting the amount of duplicated pending xenstore watches
Xenstore watches received are queued in a list and processed in a
deferred thread. Such queuing was done without any checking, so a
guest could potentially trigger a resource starvation against the
FreeBSD kernel if such kernel is watching any user-controlled xenstore
path.

Allowing limiting the amount of pending events a watch can accumulate
to prevent a remote guest from triggering this resource starvation
issue.

For the PV device backends and frontends this limitation is only
applied to the other end /state node, which is limited to 1 pending
event, the rest of the watched paths can still have unlimited pending
watches because they are either local or controlled by a privileged
domain.

The xenstore user-space device gets special treatment as it's not
possible for the kernel to know whether the paths being watched by
user-space processes are controlled by a guest domain. For this reason
watches set by the xenstore user-space device are limited to 1000
pending events. Note this can be modified using the
max_pending_watch_events sysctl of the device.

This is XSA-349.

Sponsored by:	Citrix Systems R&D
MFC after:	3 days
2020-12-30 11:18:26 +01:00

488 lines
14 KiB
C

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD AND BSD-4-Clause
*
* Copyright (c) 2010 Justin T. Gibbs, Spectra Logic Corporation
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions, and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* substantially similar to the "NO WARRANTY" disclaimer below
* ("Disclaimer") and any redistribution must be conditioned upon
* including a substantially similar Disclaimer requirement for further
* binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
* STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
* IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGES.
*/
/*-
* PV suspend/resume support:
*
* Copyright (c) 2004 Christian Limpach.
* Copyright (c) 2004-2006,2008 Kip Macy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. All advertising materials mentioning features or use of this software
* must display the following acknowledgement:
* This product includes software developed by Christian Limpach.
* 4. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/*-
* HVM suspend/resume support:
*
* Copyright (c) 2008 Citrix Systems, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/**
* \file control.c
*
* \brief Device driver to repond to control domain events that impact
* this VM.
*/
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/bio.h>
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/disk.h>
#include <sys/fcntl.h>
#include <sys/filedesc.h>
#include <sys/kdb.h>
#include <sys/module.h>
#include <sys/mount.h>
#include <sys/namei.h>
#include <sys/proc.h>
#include <sys/reboot.h>
#include <sys/rman.h>
#include <sys/sched.h>
#include <sys/taskqueue.h>
#include <sys/types.h>
#include <sys/vnode.h>
#include <sys/sched.h>
#include <sys/smp.h>
#include <sys/eventhandler.h>
#include <sys/timetc.h>
#include <geom/geom.h>
#include <machine/_inttypes.h>
#include <machine/intr_machdep.h>
#include <x86/apicvar.h>
#include <vm/vm.h>
#include <vm/vm_extern.h>
#include <vm/vm_kern.h>
#include <xen/xen-os.h>
#include <xen/blkif.h>
#include <xen/evtchn.h>
#include <xen/gnttab.h>
#include <xen/xen_intr.h>
#include <xen/hvm.h>
#include <xen/interface/event_channel.h>
#include <xen/interface/grant_table.h>
#include <xen/xenbus/xenbusvar.h>
bool xen_suspend_cancelled;
/*--------------------------- Forward Declarations --------------------------*/
/** Function signature for shutdown event handlers. */
typedef void (xctrl_shutdown_handler_t)(void);
static xctrl_shutdown_handler_t xctrl_poweroff;
static xctrl_shutdown_handler_t xctrl_reboot;
static xctrl_shutdown_handler_t xctrl_suspend;
static xctrl_shutdown_handler_t xctrl_crash;
/*-------------------------- Private Data Structures -------------------------*/
/** Element type for lookup table of event name to handler. */
struct xctrl_shutdown_reason {
const char *name;
xctrl_shutdown_handler_t *handler;
};
/** Lookup table for shutdown event name to handler. */
static const struct xctrl_shutdown_reason xctrl_shutdown_reasons[] = {
{ "poweroff", xctrl_poweroff },
{ "reboot", xctrl_reboot },
{ "suspend", xctrl_suspend },
{ "crash", xctrl_crash },
{ "halt", xctrl_poweroff },
};
struct xctrl_softc {
struct xs_watch xctrl_watch;
};
/*------------------------------ Event Handlers ------------------------------*/
static void
xctrl_poweroff()
{
shutdown_nice(RB_POWEROFF|RB_HALT);
}
static void
xctrl_reboot()
{
shutdown_nice(0);
}
static void
xctrl_suspend()
{
#ifdef SMP
cpuset_t cpu_suspend_map;
#endif
EVENTHANDLER_INVOKE(power_suspend_early);
xs_lock();
stop_all_proc();
xs_unlock();
suspend_all_fs();
EVENTHANDLER_INVOKE(power_suspend);
#ifdef EARLY_AP_STARTUP
MPASS(mp_ncpus == 1 || smp_started);
thread_lock(curthread);
sched_bind(curthread, 0);
thread_unlock(curthread);
#else
if (smp_started) {
thread_lock(curthread);
sched_bind(curthread, 0);
thread_unlock(curthread);
}
#endif
KASSERT((PCPU_GET(cpuid) == 0), ("Not running on CPU#0"));
/*
* Be sure to hold Giant across DEVICE_SUSPEND/RESUME since non-MPSAFE
* drivers need this.
*/
mtx_lock(&Giant);
if (DEVICE_SUSPEND(root_bus) != 0) {
mtx_unlock(&Giant);
printf("%s: device_suspend failed\n", __func__);
return;
}
#ifdef SMP
#ifdef EARLY_AP_STARTUP
/*
* Suspend other CPUs. This prevents IPIs while we
* are resuming, and will allow us to reset per-cpu
* vcpu_info on resume.
*/
cpu_suspend_map = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
if (!CPU_EMPTY(&cpu_suspend_map))
suspend_cpus(cpu_suspend_map);
#else
CPU_ZERO(&cpu_suspend_map); /* silence gcc */
if (smp_started) {
/*
* Suspend other CPUs. This prevents IPIs while we
* are resuming, and will allow us to reset per-cpu
* vcpu_info on resume.
*/
cpu_suspend_map = all_cpus;
CPU_CLR(PCPU_GET(cpuid), &cpu_suspend_map);
if (!CPU_EMPTY(&cpu_suspend_map))
suspend_cpus(cpu_suspend_map);
}
#endif
#endif
/*
* Prevent any races with evtchn_interrupt() handler.
*/
disable_intr();
intr_suspend();
xen_hvm_suspend();
xen_suspend_cancelled = !!HYPERVISOR_suspend(0);
if (!xen_suspend_cancelled) {
xen_hvm_resume(false);
}
intr_resume(xen_suspend_cancelled != 0);
enable_intr();
/*
* Reset grant table info.
*/
if (!xen_suspend_cancelled) {
gnttab_resume(NULL);
}
#ifdef SMP
if (!CPU_EMPTY(&cpu_suspend_map)) {
/*
* Now that event channels have been initialized,
* resume CPUs.
*/
resume_cpus(cpu_suspend_map);
/* Send an IPI_BITMAP in case there are pending bitmap IPIs. */
lapic_ipi_vectored(IPI_BITMAP_VECTOR, APIC_IPI_DEST_ALL);
}
#endif
/*
* FreeBSD really needs to add DEVICE_SUSPEND_CANCEL or
* similar.
*/
DEVICE_RESUME(root_bus);
mtx_unlock(&Giant);
/*
* Warm up timecounter again and reset system clock.
*/
timecounter->tc_get_timecount(timecounter);
inittodr(time_second);
#ifdef EARLY_AP_STARTUP
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
#else
if (smp_started) {
thread_lock(curthread);
sched_unbind(curthread);
thread_unlock(curthread);
}
#endif
resume_all_fs();
resume_all_proc();
EVENTHANDLER_INVOKE(power_resume);
if (bootverbose)
printf("System resumed after suspension\n");
}
static void
xctrl_crash()
{
panic("Xen directed crash");
}
static void
xen_pv_shutdown_final(void *arg, int howto)
{
/*
* Inform the hypervisor that shutdown is complete.
* This is not necessary in HVM domains since Xen
* emulates ACPI in that mode and FreeBSD's ACPI
* support will request this transition.
*/
if (howto & (RB_HALT | RB_POWEROFF))
HYPERVISOR_shutdown(SHUTDOWN_poweroff);
else
HYPERVISOR_shutdown(SHUTDOWN_reboot);
}
/*------------------------------ Event Reception -----------------------------*/
static void
xctrl_on_watch_event(struct xs_watch *watch, const char **vec, unsigned int len)
{
const struct xctrl_shutdown_reason *reason;
const struct xctrl_shutdown_reason *last_reason;
char *result;
int error;
int result_len;
error = xs_read(XST_NIL, "control", "shutdown",
&result_len, (void **)&result);
if (error != 0 || result_len == 0)
return;
/* Acknowledge the request by writing back an empty string. */
error = xs_write(XST_NIL, "control", "shutdown", "");
if (error != 0)
printf("unable to ack shutdown request, proceeding anyway\n");
reason = xctrl_shutdown_reasons;
last_reason = reason + nitems(xctrl_shutdown_reasons);
while (reason < last_reason) {
if (!strcmp(result, reason->name)) {
reason->handler();
break;
}
reason++;
}
free(result, M_XENSTORE);
}
/*------------------ Private Device Attachment Functions --------------------*/
/**
* \brief Identify instances of this device type in the system.
*
* \param driver The driver performing this identify action.
* \param parent The NewBus parent device for any devices this method adds.
*/
static void
xctrl_identify(driver_t *driver __unused, device_t parent)
{
/*
* A single device instance for our driver is always present
* in a system operating under Xen.
*/
BUS_ADD_CHILD(parent, 0, driver->name, 0);
}
/**
* \brief Probe for the existence of the Xen Control device
*
* \param dev NewBus device_t for this Xen control instance.
*
* \return Always returns 0 indicating success.
*/
static int
xctrl_probe(device_t dev)
{
device_set_desc(dev, "Xen Control Device");
return (BUS_PROBE_NOWILDCARD);
}
/**
* \brief Attach the Xen control device.
*
* \param dev NewBus device_t for this Xen control instance.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xctrl_attach(device_t dev)
{
struct xctrl_softc *xctrl;
xctrl = device_get_softc(dev);
/* Activate watch */
xctrl->xctrl_watch.node = "control/shutdown";
xctrl->xctrl_watch.callback = xctrl_on_watch_event;
xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
/*
* We don't care about the path updated, just about the value changes
* on that single node, hence there's no need to queue more that one
* event.
*/
xctrl->xctrl_watch.max_pending = 1;
xs_register_watch(&xctrl->xctrl_watch);
if (xen_pv_domain())
EVENTHANDLER_REGISTER(shutdown_final, xen_pv_shutdown_final, NULL,
SHUTDOWN_PRI_LAST);
return (0);
}
/**
* \brief Detach the Xen control device.
*
* \param dev NewBus device_t for this Xen control device instance.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xctrl_detach(device_t dev)
{
struct xctrl_softc *xctrl;
xctrl = device_get_softc(dev);
/* Release watch */
xs_unregister_watch(&xctrl->xctrl_watch);
return (0);
}
/*-------------------- Private Device Attachment Data -----------------------*/
static device_method_t xctrl_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, xctrl_identify),
DEVMETHOD(device_probe, xctrl_probe),
DEVMETHOD(device_attach, xctrl_attach),
DEVMETHOD(device_detach, xctrl_detach),
DEVMETHOD_END
};
DEFINE_CLASS_0(xctrl, xctrl_driver, xctrl_methods, sizeof(struct xctrl_softc));
devclass_t xctrl_devclass;
DRIVER_MODULE(xctrl, xenstore, xctrl_driver, xctrl_devclass, NULL, NULL);