xenstore: remove the suspend sx lock

There's no need to prevent suspend while doing xenstore transactions,
callers of transactions are supposed to be prepared for a transaction
to fail.

This fixes a bug that could be triggered from the xenstore user-space
device, since starting a transaction from user-space would result in
returning there with a sx lock held, that causes a WITNESS check to
trigger.

Tested by:      Nathan Friess <nathan.friess@gmail.com>
Sponsored by:   Citrix Systems R&D
This commit is contained in:
Roger Pau Monné 2018-05-24 10:16:11 +00:00
parent c90a8cf80a
commit 5f8f664619

View File

@ -201,18 +201,6 @@ struct xs_softc {
/** Lock protecting the watch calback list. */
struct mtx watch_events_lock;
/**
* Sleepable lock used to prevent VM suspension while a
* xenstore transaction is outstanding.
*
* Each active transaction holds a shared lock on the
* suspend mutex. Our suspend method blocks waiting
* to acquire an exclusive lock. This guarantees that
* suspend processing will only proceed once all active
* transactions have been retired.
*/
struct sx suspend_mutex;
/**
* The processid of the xenwatch thread.
*/
@ -710,50 +698,6 @@ xs_rcv_thread(void *arg __unused)
}
/*---------------- XenStore Message Request/Reply Processing -----------------*/
/**
* Filter invoked before transmitting any message to the XenStore service.
*
* The role of the filter may expand, but currently serves to manage
* the interactions of messages with transaction state.
*
* \param request_msg_type The message type for the request.
*/
static inline void
xs_request_filter(uint32_t request_msg_type)
{
if (request_msg_type == XS_TRANSACTION_START)
sx_slock(&xs.suspend_mutex);
}
/**
* Filter invoked after transmitting any message to the XenStore service.
*
* The role of the filter may expand, but currently serves to manage
* the interactions of messages with transaction state.
*
* \param request_msg_type The message type for the original request.
* \param reply_msg_type The message type for any received reply.
* \param request_reply_error The error status from the attempt to send
* the request or retrieve the reply.
*/
static inline void
xs_reply_filter(uint32_t request_msg_type,
uint32_t reply_msg_type, int request_reply_error)
{
/*
* The count of transactions drops if we attempted
* to end a transaction (even if that attempt fails
* in error), we receive a transaction end acknowledgement,
* or if our attempt to begin a transaction fails.
*/
if (request_msg_type == XS_TRANSACTION_END
|| (request_reply_error == 0 && reply_msg_type == XS_TRANSACTION_END)
|| (request_msg_type == XS_TRANSACTION_START
&& (request_reply_error != 0 || reply_msg_type == XS_ERROR)))
sx_sunlock(&xs.suspend_mutex);
}
#define xsd_error_count (sizeof(xsd_errors) / sizeof(xsd_errors[0]))
/**
@ -843,15 +787,12 @@ xs_dev_request_and_reply(struct xsd_sockmsg *msg, void **result)
int error;
request_type = msg->type;
xs_request_filter(request_type);
sx_xlock(&xs.request_mutex);
if ((error = xs_write_store(msg, sizeof(*msg) + msg->len)) == 0)
error = xs_read_reply(&msg->type, &msg->len, result);
sx_xunlock(&xs.request_mutex);
xs_reply_filter(request_type, msg->type, error);
return (error);
}
@ -887,8 +828,6 @@ xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
for (i = 0; i < num_vecs; i++)
msg.len += iovec[i].iov_len;
xs_request_filter(request_type);
sx_xlock(&xs.request_mutex);
error = xs_write_store(&msg, sizeof(msg));
if (error) {
@ -908,7 +847,6 @@ xs_talkv(struct xs_transaction t, enum xsd_sockmsg_type request_type,
error_lock_held:
sx_xunlock(&xs.request_mutex);
xs_reply_filter(request_type, msg.type, error);
if (error)
return (error);
@ -1206,7 +1144,6 @@ xs_attach(device_t dev)
mtx_init(&xs.reply_lock, "reply lock", NULL, MTX_DEF);
sx_init(&xs.xenwatch_mutex, "xenwatch");
sx_init(&xs.request_mutex, "xenstore request");
sx_init(&xs.suspend_mutex, "xenstore suspend");
mtx_init(&xs.registered_watches_lock, "watches", NULL, MTX_DEF);
mtx_init(&xs.watch_events_lock, "watch events", NULL, MTX_DEF);
@ -1249,7 +1186,6 @@ xs_suspend(device_t dev)
if (error != 0)
return (error);
sx_xlock(&xs.suspend_mutex);
sx_xlock(&xs.request_mutex);
return (0);
@ -1269,16 +1205,16 @@ xs_resume(device_t dev __unused)
sx_xunlock(&xs.request_mutex);
/*
* No need for registered_watches_lock: the suspend_mutex
* is sufficient.
* NB: since xenstore childs have not been resumed yet, there's
* no need to hold any watch mutex. Having clients try to add or
* remove watches at this point (before xenstore is resumed) is
* clearly a violantion of the resume order.
*/
LIST_FOREACH(watch, &xs.registered_watches, list) {
sprintf(token, "%lX", (long)watch);
xs_watch(watch->node, token);
}
sx_xunlock(&xs.suspend_mutex);
/* Resume child Xen devices. */
bus_generic_resume(dev);
@ -1631,8 +1567,6 @@ xs_register_watch(struct xs_watch *watch)
sprintf(token, "%lX", (long)watch);
sx_slock(&xs.suspend_mutex);
mtx_lock(&xs.registered_watches_lock);
KASSERT(find_watch(token) == NULL, ("watch already registered"));
LIST_INSERT_HEAD(&xs.registered_watches, watch, list);
@ -1650,8 +1584,6 @@ xs_register_watch(struct xs_watch *watch)
mtx_unlock(&xs.registered_watches_lock);
}
sx_sunlock(&xs.suspend_mutex);
return (error);
}
@ -1664,12 +1596,9 @@ xs_unregister_watch(struct xs_watch *watch)
sprintf(token, "%lX", (long)watch);
sx_slock(&xs.suspend_mutex);
mtx_lock(&xs.registered_watches_lock);
if (find_watch(token) == NULL) {
mtx_unlock(&xs.registered_watches_lock);
sx_sunlock(&xs.suspend_mutex);
return;
}
LIST_REMOVE(watch, list);
@ -1680,8 +1609,6 @@ xs_unregister_watch(struct xs_watch *watch)
log(LOG_WARNING, "XENSTORE Failed to release watch %s: %i\n",
watch->node, error);
sx_sunlock(&xs.suspend_mutex);
/* Cancel pending watch events. */
mtx_lock(&xs.watch_events_lock);
TAILQ_FOREACH_SAFE(msg, &xs.watch_events, list, tmp) {