freebsd-dev/sys/dev/xen/xenstore/xenstore_dev.c
Roger Pau Monné 4e4e43dc9e xen: allow limiting the amount of duplicated pending xenstore watches
Xenstore watches received are queued in a list and processed in a
deferred thread. Such queuing was done without any checking, so a
guest could potentially trigger a resource starvation against the
FreeBSD kernel if such kernel is watching any user-controlled xenstore
path.

Allowing limiting the amount of pending events a watch can accumulate
to prevent a remote guest from triggering this resource starvation
issue.

For the PV device backends and frontends this limitation is only
applied to the other end /state node, which is limited to 1 pending
event, the rest of the watched paths can still have unlimited pending
watches because they are either local or controlled by a privileged
domain.

The xenstore user-space device gets special treatment as it's not
possible for the kernel to know whether the paths being watched by
user-space processes are controlled by a guest domain. For this reason
watches set by the xenstore user-space device are limited to 1000
pending events. Note this can be modified using the
max_pending_watch_events sysctl of the device.

This is XSA-349.

Sponsored by:	Citrix Systems R&D
MFC after:	3 days
2020-12-30 11:18:26 +01:00

551 lines
13 KiB
C

/*
* xenstore_dev.c
*
* Driver giving user-space access to the kernel's connection to the
* XenStore service.
*
* Copyright (c) 2005, Christian Limpach
* Copyright (c) 2005, Rusty Russell, IBM Corporation
*
* This file may be distributed separately from the Linux kernel, or
* incorporated into other software packages, subject to the following license:
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this source file (the "Software"), to deal in the Software without
* restriction, including without limitation the rights to use, copy, modify,
* merge, publish, distribute, sublicense, and/or sell copies of the Software,
* and to permit persons to whom the Software is furnished to do so, subject to
* the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/cdefs.h>
#include <sys/errno.h>
#include <sys/uio.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/conf.h>
#include <sys/module.h>
#include <sys/selinfo.h>
#include <sys/sysctl.h>
#include <sys/poll.h>
#include <xen/xen-os.h>
#include <xen/hypervisor.h>
#include <xen/xenstore/xenstorevar.h>
#include <xen/xenstore/xenstore_internal.h>
static unsigned int max_pending_watches = 1000;
struct xs_dev_transaction {
LIST_ENTRY(xs_dev_transaction) list;
struct xs_transaction handle;
};
struct xs_dev_watch {
LIST_ENTRY(xs_dev_watch) list;
struct xs_watch watch;
char *token;
struct xs_dev_data *user;
};
struct xs_dev_data {
/* In-progress transaction. */
LIST_HEAD(, xs_dev_transaction) transactions;
/* Active watches. */
LIST_HEAD(, xs_dev_watch) watches;
/* Partial request. */
unsigned int len;
union {
struct xsd_sockmsg msg;
char buffer[PAGE_SIZE];
} u;
/* Response queue. */
#define MASK_READ_IDX(idx) ((idx)&(PAGE_SIZE-1))
char read_buffer[PAGE_SIZE];
unsigned int read_cons, read_prod;
/* Serializes writes to the read buffer. */
struct mtx lock;
/* Polling structure (for reads only ATM). */
struct selinfo ev_rsel;
};
static void
xs_queue_reply(struct xs_dev_data *u, const char *data, unsigned int len)
{
unsigned int i;
for (i = 0; i < len; i++, u->read_prod++)
u->read_buffer[MASK_READ_IDX(u->read_prod)] = data[i];
KASSERT((u->read_prod - u->read_cons) <= sizeof(u->read_buffer),
("xenstore reply too big"));
wakeup(u);
selwakeup(&u->ev_rsel);
}
static const char *
xs_dev_error_to_string(int error)
{
unsigned int i;
for (i = 0; i < nitems(xsd_errors); i++)
if (xsd_errors[i].errnum == error)
return (xsd_errors[i].errstring);
return (NULL);
}
static void
xs_dev_return_error(struct xs_dev_data *u, int error, int req_id, int tx_id)
{
struct xsd_sockmsg msg;
const char *payload;
msg.type = XS_ERROR;
msg.req_id = req_id;
msg.tx_id = tx_id;
payload = NULL;
payload = xs_dev_error_to_string(error);
if (payload == NULL)
payload = xs_dev_error_to_string(EINVAL);
KASSERT(payload != NULL, ("Unable to find string for EINVAL errno"));
msg.len = strlen(payload) + 1;
mtx_lock(&u->lock);
xs_queue_reply(u, (char *)&msg, sizeof(msg));
xs_queue_reply(u, payload, msg.len);
mtx_unlock(&u->lock);
}
static int
xs_dev_watch_message_parse_string(const char **p, const char *end,
const char **string_r)
{
const char *nul;
nul = memchr(*p, 0, end - *p);
if (!nul)
return (EINVAL);
*string_r = *p;
*p = nul+1;
return (0);
}
static int
xs_dev_watch_message_parse(const struct xsd_sockmsg *msg, const char **path_r,
const char **token_r)
{
const char *p, *end;
int error;
p = (const char *)msg + sizeof(*msg);
end = p + msg->len;
KASSERT(p <= end, ("payload overflow"));
error = xs_dev_watch_message_parse_string(&p, end, path_r);
if (error)
return (error);
error = xs_dev_watch_message_parse_string(&p, end, token_r);
if (error)
return (error);
return (0);
}
static struct xs_dev_watch *
xs_dev_find_watch(struct xs_dev_data *u, const char *token)
{
struct xs_dev_watch *watch;
LIST_FOREACH(watch, &u->watches, list)
if (strcmp(watch->token, token) == 0)
return (watch);
return (NULL);
}
static void
xs_dev_watch_cb(struct xs_watch *watch, const char **vec, unsigned int len)
{
struct xs_dev_watch *dwatch;
struct xsd_sockmsg msg;
char *payload;
dwatch = (struct xs_dev_watch *)watch->callback_data;
msg.type = XS_WATCH_EVENT;
msg.req_id = msg.tx_id = 0;
msg.len = strlen(vec[XS_WATCH_PATH]) + strlen(dwatch->token) + 2;
payload = malloc(msg.len, M_XENSTORE, M_WAITOK);
strcpy(payload, vec[XS_WATCH_PATH]);
strcpy(&payload[strlen(vec[XS_WATCH_PATH]) + 1], dwatch->token);
mtx_lock(&dwatch->user->lock);
xs_queue_reply(dwatch->user, (char *)&msg, sizeof(msg));
xs_queue_reply(dwatch->user, payload, msg.len);
mtx_unlock(&dwatch->user->lock);
free(payload, M_XENSTORE);
}
static struct xs_dev_transaction *
xs_dev_find_transaction(struct xs_dev_data *u, uint32_t tx_id)
{
struct xs_dev_transaction *trans;
LIST_FOREACH(trans, &u->transactions, list)
if (trans->handle.id == tx_id)
return (trans);
return (NULL);
}
static int
xs_dev_read(struct cdev *dev, struct uio *uio, int ioflag)
{
int error;
struct xs_dev_data *u;
error = devfs_get_cdevpriv((void **)&u);
if (error != 0)
return (error);
while (u->read_prod == u->read_cons) {
error = tsleep(u, PCATCH, "xsdread", hz/10);
if (error && error != EWOULDBLOCK)
return (error);
}
while (uio->uio_resid > 0) {
if (u->read_cons == u->read_prod)
break;
error = uiomove(&u->read_buffer[MASK_READ_IDX(u->read_cons)],
1, uio);
if (error)
return (error);
u->read_cons++;
}
return (0);
}
static int
xs_dev_write(struct cdev *dev, struct uio *uio, int ioflag)
{
int error;
const char *wpath, *wtoken;
struct xs_dev_data *u;
struct xs_dev_transaction *trans;
struct xs_dev_watch *watch;
void *reply;
static const char *ok = "OK";
int len = uio->uio_resid;
error = devfs_get_cdevpriv((void **)&u);
if (error != 0)
return (error);
if ((len + u->len) > sizeof(u->u.buffer))
return (EINVAL);
error = uiomove(u->u.buffer + u->len, len, uio);
if (error)
return (error);
u->len += len;
if (u->len < (sizeof(u->u.msg) + u->u.msg.len))
return (0);
switch (u->u.msg.type) {
case XS_TRANSACTION_START:
case XS_TRANSACTION_END:
case XS_DIRECTORY:
case XS_READ:
case XS_GET_PERMS:
case XS_RELEASE:
case XS_GET_DOMAIN_PATH:
case XS_WRITE:
case XS_MKDIR:
case XS_RM:
case XS_SET_PERMS:
/* Check that this transaction id is not hijacked. */
if (u->u.msg.tx_id != 0 &&
xs_dev_find_transaction(u, u->u.msg.tx_id) == NULL) {
error = EINVAL;
break;
}
error = xs_dev_request_and_reply(&u->u.msg, &reply);
if (!error) {
if (u->u.msg.type == XS_TRANSACTION_START) {
trans = malloc(sizeof(*trans), M_XENSTORE,
M_WAITOK);
trans->handle.id = strtoul(reply, NULL, 0);
LIST_INSERT_HEAD(&u->transactions, trans, list);
} else if (u->u.msg.type == XS_TRANSACTION_END) {
trans = xs_dev_find_transaction(u,
u->u.msg.tx_id);
KASSERT(trans != NULL,
("Unable to find transaction"));
LIST_REMOVE(trans, list);
free(trans, M_XENSTORE);
}
mtx_lock(&u->lock);
xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
xs_queue_reply(u, (char *)reply, u->u.msg.len);
mtx_unlock(&u->lock);
free(reply, M_XENSTORE);
}
break;
case XS_WATCH:
u->u.msg.tx_id = 0;
error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
if (error)
break;
if (xs_dev_find_watch(u, wtoken) != NULL) {
error = EINVAL;
break;
}
watch = malloc(sizeof(*watch), M_XENSTORE, M_WAITOK);
watch->watch.node = strdup(wpath, M_XENSTORE);
watch->watch.callback = xs_dev_watch_cb;
watch->watch.callback_data = (uintptr_t)watch;
watch->watch.max_pending = max_pending_watches;
watch->token = strdup(wtoken, M_XENSTORE);
watch->user = u;
error = xs_register_watch(&watch->watch);
if (error != 0) {
free(watch->token, M_XENSTORE);
free(watch->watch.node, M_XENSTORE);
free(watch, M_XENSTORE);
break;
}
LIST_INSERT_HEAD(&u->watches, watch, list);
u->u.msg.len = sizeof(ok);
mtx_lock(&u->lock);
xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
xs_queue_reply(u, ok, sizeof(ok));
mtx_unlock(&u->lock);
break;
case XS_UNWATCH:
u->u.msg.tx_id = 0;
error = xs_dev_watch_message_parse(&u->u.msg, &wpath, &wtoken);
if (error)
break;
watch = xs_dev_find_watch(u, wtoken);
if (watch == NULL) {
error = EINVAL;
break;
}
LIST_REMOVE(watch, list);
xs_unregister_watch(&watch->watch);
free(watch->watch.node, M_XENSTORE);
free(watch->token, M_XENSTORE);
free(watch, M_XENSTORE);
u->u.msg.len = sizeof(ok);
mtx_lock(&u->lock);
xs_queue_reply(u, (char *)&u->u.msg, sizeof(u->u.msg));
xs_queue_reply(u, ok, sizeof(ok));
mtx_unlock(&u->lock);
break;
default:
error = EINVAL;
break;
}
if (error != 0)
xs_dev_return_error(u, error, u->u.msg.req_id, u->u.msg.tx_id);
/* Reset the write buffer. */
u->len = 0;
return (0);
}
static int
xs_dev_poll(struct cdev *dev, int events, struct thread *td)
{
struct xs_dev_data *u;
int error, mask;
error = devfs_get_cdevpriv((void **)&u);
if (error != 0)
return (POLLERR);
/* we can always write */
mask = events & (POLLOUT | POLLWRNORM);
if (events & (POLLIN | POLLRDNORM)) {
if (u->read_cons != u->read_prod) {
mask |= events & (POLLIN | POLLRDNORM);
} else {
/* Record that someone is waiting */
selrecord(td, &u->ev_rsel);
}
}
return (mask);
}
static void
xs_dev_dtor(void *arg)
{
struct xs_dev_data *u = arg;
struct xs_dev_transaction *trans, *tmpt;
struct xs_dev_watch *watch, *tmpw;
seldrain(&u->ev_rsel);
LIST_FOREACH_SAFE(trans, &u->transactions, list, tmpt) {
xs_transaction_end(trans->handle, 1);
LIST_REMOVE(trans, list);
free(trans, M_XENSTORE);
}
LIST_FOREACH_SAFE(watch, &u->watches, list, tmpw) {
LIST_REMOVE(watch, list);
xs_unregister_watch(&watch->watch);
free(watch->watch.node, M_XENSTORE);
free(watch->token, M_XENSTORE);
free(watch, M_XENSTORE);
}
mtx_destroy(&u->lock);
free(u, M_XENSTORE);
}
static int
xs_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
struct xs_dev_data *u;
int error;
u = malloc(sizeof(*u), M_XENSTORE, M_WAITOK|M_ZERO);
mtx_init(&u->lock, "xsdev_lock", NULL, MTX_DEF);
LIST_INIT(&u->transactions);
LIST_INIT(&u->watches);
error = devfs_set_cdevpriv(u, xs_dev_dtor);
if (error != 0)
free(u, M_XENSTORE);
return (error);
}
static struct cdevsw xs_dev_cdevsw = {
.d_version = D_VERSION,
.d_read = xs_dev_read,
.d_write = xs_dev_write,
.d_open = xs_dev_open,
.d_poll = xs_dev_poll,
.d_name = "xs_dev",
};
/*------------------ Private Device Attachment Functions --------------------*/
/**
* \brief Identify instances of this device type in the system.
*
* \param driver The driver performing this identify action.
* \param parent The NewBus parent device for any devices this method adds.
*/
static void
xs_dev_identify(driver_t *driver __unused, device_t parent)
{
/*
* A single device instance for our driver is always present
* in a system operating under Xen.
*/
BUS_ADD_CHILD(parent, 0, driver->name, 0);
}
/**
* \brief Probe for the existence of the Xenstore device
*
* \param dev NewBus device_t for this instance.
*
* \return Always returns 0 indicating success.
*/
static int
xs_dev_probe(device_t dev)
{
device_set_desc(dev, "Xenstore user-space device");
return (0);
}
/**
* \brief Attach the Xenstore device.
*
* \param dev NewBus device_t for this instance.
*
* \return On success, 0. Otherwise an errno value indicating the
* type of failure.
*/
static int
xs_dev_attach(device_t dev)
{
struct cdev *xs_cdev;
struct sysctl_ctx_list *sysctl_ctx;
struct sysctl_oid *sysctl_tree;
sysctl_ctx = device_get_sysctl_ctx(dev);
sysctl_tree = device_get_sysctl_tree(dev);
if (sysctl_ctx == NULL || sysctl_tree == NULL)
return (EINVAL);
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"max_pending_watch_events", CTLFLAG_RW, &max_pending_watches, 0,
"maximum amount of pending watch events to be delivered");
xs_cdev = make_dev_credf(MAKEDEV_ETERNAL, &xs_dev_cdevsw, 0, NULL,
UID_ROOT, GID_WHEEL, 0400, "xen/xenstore");
if (xs_cdev == NULL)
return (EINVAL);
return (0);
}
/*-------------------- Private Device Attachment Data -----------------------*/
static device_method_t xs_dev_methods[] = {
/* Device interface */
DEVMETHOD(device_identify, xs_dev_identify),
DEVMETHOD(device_probe, xs_dev_probe),
DEVMETHOD(device_attach, xs_dev_attach),
DEVMETHOD_END
};
DEFINE_CLASS_0(xs_dev, xs_dev_driver, xs_dev_methods, 0);
devclass_t xs_dev_devclass;
DRIVER_MODULE(xs_dev, xenstore, xs_dev_driver, xs_dev_devclass,
NULL, NULL);