xen: allow limiting the amount of duplicated pending xenstore watches
Xenstore watches received are queued in a list and processed in a deferred thread. Such queuing was done without any checking, so a guest could potentially trigger a resource starvation against the FreeBSD kernel if such kernel is watching any user-controlled xenstore path. Allowing limiting the amount of pending events a watch can accumulate to prevent a remote guest from triggering this resource starvation issue. For the PV device backends and frontends this limitation is only applied to the other end /state node, which is limited to 1 pending event, the rest of the watched paths can still have unlimited pending watches because they are either local or controlled by a privileged domain. The xenstore user-space device gets special treatment as it's not possible for the kernel to know whether the paths being watched by user-space processes are controlled by a guest domain. For this reason watches set by the xenstore user-space device are limited to 1000 pending events. Note this can be modified using the max_pending_watch_events sysctl of the device. This is XSA-349. Sponsored by: Citrix Systems R&D MFC after: 3 days
This commit is contained in:
parent
2ae75536d3
commit
4e4e43dc9e
@ -312,7 +312,8 @@ set_new_target(unsigned long target)
|
||||
|
||||
static struct xs_watch target_watch =
|
||||
{
|
||||
.node = "memory/target"
|
||||
.node = "memory/target",
|
||||
.max_pending = 1,
|
||||
};
|
||||
|
||||
/* React to a change in the target key */
|
||||
|
@ -3746,6 +3746,12 @@ xbb_attach(device_t dev)
|
||||
xbb->hotplug_watch.callback = xbb_attach_disk;
|
||||
KASSERT(xbb->hotplug_watch.node == NULL, ("watch node already setup"));
|
||||
xbb->hotplug_watch.node = strdup(sbuf_data(watch_path), M_XENBLOCKBACK);
|
||||
/*
|
||||
* We don't care about the path updated, just about the value changes
|
||||
* on that single node, hence there's no need to queue more that one
|
||||
* event.
|
||||
*/
|
||||
xbb->hotplug_watch.max_pending = 1;
|
||||
sbuf_delete(watch_path);
|
||||
error = xs_register_watch(&xbb->hotplug_watch);
|
||||
if (error != 0) {
|
||||
|
@ -434,6 +434,12 @@ xctrl_attach(device_t dev)
|
||||
xctrl->xctrl_watch.node = "control/shutdown";
|
||||
xctrl->xctrl_watch.callback = xctrl_on_watch_event;
|
||||
xctrl->xctrl_watch.callback_data = (uintptr_t)xctrl;
|
||||
/*
|
||||
* We don't care about the path updated, just about the value changes
|
||||
* on that single node, hence there's no need to queue more that one
|
||||
* event.
|
||||
*/
|
||||
xctrl->xctrl_watch.max_pending = 1;
|
||||
xs_register_watch(&xctrl->xctrl_watch);
|
||||
|
||||
if (xen_pv_domain())
|
||||
|
@ -655,12 +655,17 @@ xs_process_msg(enum xsd_sockmsg_type *type)
|
||||
mtx_lock(&xs.registered_watches_lock);
|
||||
msg->u.watch.handle = find_watch(
|
||||
msg->u.watch.vec[XS_WATCH_TOKEN]);
|
||||
if (msg->u.watch.handle != NULL) {
|
||||
mtx_lock(&xs.watch_events_lock);
|
||||
mtx_lock(&xs.watch_events_lock);
|
||||
if (msg->u.watch.handle != NULL &&
|
||||
(!msg->u.watch.handle->max_pending ||
|
||||
msg->u.watch.handle->pending <
|
||||
msg->u.watch.handle->max_pending)) {
|
||||
msg->u.watch.handle->pending++;
|
||||
TAILQ_INSERT_TAIL(&xs.watch_events, msg, list);
|
||||
wakeup(&xs.watch_events);
|
||||
mtx_unlock(&xs.watch_events_lock);
|
||||
} else {
|
||||
mtx_unlock(&xs.watch_events_lock);
|
||||
free(msg->u.watch.vec, M_XENSTORE);
|
||||
free(msg, M_XENSTORE);
|
||||
}
|
||||
@ -981,8 +986,10 @@ xenwatch_thread(void *unused)
|
||||
|
||||
mtx_lock(&xs.watch_events_lock);
|
||||
msg = TAILQ_FIRST(&xs.watch_events);
|
||||
if (msg)
|
||||
if (msg) {
|
||||
TAILQ_REMOVE(&xs.watch_events, msg, list);
|
||||
msg->u.watch.handle->pending--;
|
||||
}
|
||||
mtx_unlock(&xs.watch_events_lock);
|
||||
|
||||
if (msg != NULL) {
|
||||
@ -1577,6 +1584,7 @@ xs_register_watch(struct xs_watch *watch)
|
||||
char token[sizeof(watch) * 2 + 1];
|
||||
int error;
|
||||
|
||||
watch->pending = 0;
|
||||
sprintf(token, "%lX", (long)watch);
|
||||
|
||||
mtx_lock(&xs.registered_watches_lock);
|
||||
|
@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/conf.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/selinfo.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/poll.h>
|
||||
|
||||
#include <xen/xen-os.h>
|
||||
@ -52,6 +53,8 @@ __FBSDID("$FreeBSD$");
|
||||
#include <xen/xenstore/xenstorevar.h>
|
||||
#include <xen/xenstore/xenstore_internal.h>
|
||||
|
||||
static unsigned int max_pending_watches = 1000;
|
||||
|
||||
struct xs_dev_transaction {
|
||||
LIST_ENTRY(xs_dev_transaction) list;
|
||||
struct xs_transaction handle;
|
||||
@ -333,6 +336,7 @@ xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
|
||||
watch->watch.node = strdup(wpath, M_XENSTORE);
|
||||
watch->watch.callback = xs_dev_watch_cb;
|
||||
watch->watch.callback_data = (uintptr_t)watch;
|
||||
watch->watch.max_pending = max_pending_watches;
|
||||
watch->token = strdup(wtoken, M_XENSTORE);
|
||||
watch->user = u;
|
||||
|
||||
@ -509,6 +513,17 @@ static int
|
||||
xs_dev_attach(device_t dev)
|
||||
{
|
||||
struct cdev *xs_cdev;
|
||||
struct sysctl_ctx_list *sysctl_ctx;
|
||||
struct sysctl_oid *sysctl_tree;
|
||||
|
||||
sysctl_ctx = device_get_sysctl_ctx(dev);
|
||||
sysctl_tree = device_get_sysctl_tree(dev);
|
||||
if (sysctl_ctx == NULL || sysctl_tree == NULL)
|
||||
return (EINVAL);
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0,
|
||||
"maximum amount of pending watch events to be delivered");
|
||||
|
||||
xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
|
||||
UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
|
||||
|
@ -701,10 +701,21 @@ xenbusb_add_device(device_t dev, const char *type, const char *id)
|
||||
ivars->xd_otherend_watch.node = statepath;
|
||||
ivars->xd_otherend_watch.callback = xenbusb_otherend_watch_cb;
|
||||
ivars->xd_otherend_watch.callback_data = (uintptr_t)ivars;
|
||||
/*
|
||||
* Other end state node watch, limit to one pending event
|
||||
* to prevent frontends from queuing too many events that
|
||||
* could cause resource starvation.
|
||||
*/
|
||||
ivars->xd_otherend_watch.max_pending = 1;
|
||||
|
||||
ivars->xd_local_watch.node = ivars->xd_node;
|
||||
ivars->xd_local_watch.callback = xenbusb_local_watch_cb;
|
||||
ivars->xd_local_watch.callback_data = (uintptr_t)ivars;
|
||||
/*
|
||||
* Watch our local path, only writable by us or a privileged
|
||||
* domain, no need to limit.
|
||||
*/
|
||||
ivars->xd_local_watch.max_pending = 0;
|
||||
|
||||
mtx_lock(&xbs->xbs_lock);
|
||||
xbs->xbs_connecting_children++;
|
||||
@ -763,6 +774,12 @@ xenbusb_attach(device_t dev, char *bus_node, u_int id_components)
|
||||
xbs->xbs_device_watch.node = bus_node;
|
||||
xbs->xbs_device_watch.callback = xenbusb_devices_changed;
|
||||
xbs->xbs_device_watch.callback_data = (uintptr_t)xbs;
|
||||
/*
|
||||
* Allow for unlimited pending watches, as those are local paths
|
||||
* either controlled by the guest or only writable by privileged
|
||||
* domains.
|
||||
*/
|
||||
xbs->xbs_device_watch.max_pending = 0;
|
||||
|
||||
TASK_INIT(&xbs->xbs_probe_children, 0, xenbusb_probe_children_cb, dev);
|
||||
|
||||
|
@ -70,6 +70,15 @@ struct xs_watch
|
||||
|
||||
/* Callback client data untouched by the XenStore watch mechanism. */
|
||||
uintptr_t callback_data;
|
||||
|
||||
/* Maximum number of pending watch events to be delivered. */
|
||||
unsigned int max_pending;
|
||||
|
||||
/*
|
||||
* Private counter used by xenstore to keep track of the pending
|
||||
* watches. Protected by xs.watch_events_lock.
|
||||
*/
|
||||
unsigned int pending;
|
||||
};
|
||||
LIST_HEAD(xs_watch_list, xs_watch);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user