Add suspend/resume support to the Xen blkfront driver.

Sponsored by: BQ Internet

sys/dev/xen/blkfront/block.h:
sys/dev/xen/blkfront/blkfront.c:
	Remove now unused blkif_vdev_t from the blkfront soft.

sys/dev/xen/blkfront/blkfront.c:
	o In blkfront_suspend(), indicate the desire to suspend
	  by changing the softc connected state to SUSPENDED, and
	  then wait for any I/O pending on the remote peer to
	  drain.  Cancel suspend processing if I/O does not
	  drain within 30 seconds.
	o Enable and update blkfront_resume().  Since I/O is
	  drained prior to the suspension of the VM, the complicated
	  recovery process performed by other Xen blkfront
	  implementations is avoided.  We simply tear down the
	  connection to our old peer, and then re-connect.
	o In blkif_initialize(), fix a resource leak and botched
	  return if we cannot allocate shadow memory for our
	  requests.
	o In blkfront_backend_changed(), correct our response to
	  the XenbusStateInitialised state.  This state indicates
	  that our backend peer has published sufficient data for
	  blkfront to publish ring information and other XenStore
	  data, not that a connection can occur.  Blkfront now
	  will only perform connection processing in response to
	  the XenbusStateConnected state.  This corrects an issue
	  where blkfront connected before the backend was ready
	  during resume processing.

Approved by:	re
MFC after:	1 week
This commit is contained in:
Justin T. Gibbs 2011-09-21 00:02:44 +00:00
parent 2ca7463bc7
commit 06a630f65d
2 changed files with 51 additions and 46 deletions

View File

@ -77,11 +77,8 @@ static int blkfront_detach(device_t);
static int setup_blkring(struct xb_softc *);
static void blkif_int(void *);
static void blkfront_initialize(struct xb_softc *);
#if 0
static void blkif_recover(struct xb_softc *);
#endif
static int blkif_completion(struct xb_command *);
static void blkif_free(struct xb_softc *, int);
static void blkif_free(struct xb_softc *);
static void blkif_queue_cb(void *, bus_dma_segment_t *, int, int);
MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
@ -452,9 +449,6 @@ blkfront_attach(device_t dev)
sc->vdevice = vdevice;
sc->connected = BLKIF_STATE_DISCONNECTED;
/* Front end dir is a number, which is used as the id. */
sc->handle = strtoul(strrchr(xenbus_get_node(dev),'/')+1, NULL, 0);
/* Wait for backend device to publish its protocol capabilities. */
xenbus_set_state(dev, XenbusStateInitialising);
@ -465,29 +459,40 @@ static int
blkfront_suspend(device_t dev)
{
struct xb_softc *sc = device_get_softc(dev);
int retval;
int saved_state;
/* Prevent new requests being issued until we fix things up. */
mtx_lock(&sc->xb_io_lock);
saved_state = sc->connected;
sc->connected = BLKIF_STATE_SUSPENDED;
/* Wait for outstanding I/O to drain. */
retval = 0;
while (TAILQ_EMPTY(&sc->cm_busy) == 0) {
if (msleep(&sc->cm_busy, &sc->xb_io_lock,
PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
retval = EBUSY;
break;
}
}
mtx_unlock(&sc->xb_io_lock);
return (0);
if (retval != 0)
sc->connected = saved_state;
return (retval);
}
static int
blkfront_resume(device_t dev)
{
#if 0
struct xb_softc *sc = device_get_softc(dev);
DPRINTK("blkfront_resume: %s\n", xenbus_get_node(dev));
/* XXX This can't work!!! */
blkif_free(sc, 1);
blkif_free(sc);
blkfront_initialize(sc);
if (sc->connected == BLKIF_STATE_SUSPENDED)
blkif_recover(sc);
#endif
return (0);
}
@ -499,8 +504,10 @@ blkfront_initialize(struct xb_softc *sc)
int error;
int i;
if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising)
return;
if (xenbus_get_state(sc->xb_dev) != XenbusStateInitialising) {
/* Initialization has already been performed. */
return;
}
/*
* Protocol defaults valid even if negotiation for a
@ -593,8 +600,10 @@ blkfront_initialize(struct xb_softc *sc)
sc->shadow = malloc(sizeof(*sc->shadow) * sc->max_requests,
M_XENBLOCKFRONT, M_NOWAIT|M_ZERO);
if (sc->shadow == NULL) {
bus_dma_tag_destroy(sc->xb_io_dmat);
xenbus_dev_fatal(sc->xb_dev, error,
"Cannot allocate request structures\n");
return;
}
for (i = 0; i < sc->max_requests; i++) {
@ -755,10 +764,10 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
break;
case XenbusStateInitWait:
case XenbusStateInitialised:
blkfront_initialize(sc);
break;
case XenbusStateInitialised:
case XenbusStateConnected:
blkfront_initialize(sc);
blkfront_connect(sc);
@ -775,7 +784,7 @@ blkfront_backend_changed(device_t dev, XenbusState backend_state)
}
/*
** Invoked when the backend is finally 'ready' (and has told produced
** Invoked when the backend is finally 'ready' (and has published
** the details about the physical device - #sectors, size, etc).
*/
static void
@ -809,13 +818,15 @@ blkfront_connect(struct xb_softc *sc)
if (!err || feature_barrier)
sc->xb_flags |= XB_BARRIER;
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
device_get_desc(dev),
xenbus_get_node(dev));
bus_print_child_footer(device_get_parent(dev), dev);
if (sc->xb_disk == NULL) {
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
device_get_desc(dev),
xenbus_get_node(dev));
bus_print_child_footer(device_get_parent(dev), dev);
xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
xlvbd_add(sc, sectors, sc->vdevice, binfo, sector_size);
}
(void)xenbus_set_state(dev, XenbusStateConnected);
@ -825,7 +836,6 @@ blkfront_connect(struct xb_softc *sc)
xb_startio(sc);
sc->xb_flags |= XB_READY;
mtx_unlock(&sc->xb_io_lock);
}
/**
@ -859,7 +869,7 @@ blkfront_detach(device_t dev)
DPRINTK("blkfront_remove: %s removed\n", xenbus_get_node(dev));
blkif_free(sc, 0);
blkif_free(sc);
mtx_destroy(&sc->xb_io_lock);
return 0;
@ -1140,6 +1150,9 @@ xb_startio(struct xb_softc *sc)
mtx_assert(&sc->xb_io_lock, MA_OWNED);
if (sc->connected != BLKIF_STATE_CONNECTED)
return;
while (RING_FREE_REQUESTS(&sc->ring) >= sc->max_request_blocks) {
if (sc->xb_flags & XB_FROZEN)
break;
@ -1174,7 +1187,7 @@ blkif_int(void *xsc)
mtx_lock(&sc->xb_io_lock);
if (unlikely(sc->connected != BLKIF_STATE_CONNECTED)) {
if (unlikely(sc->connected == BLKIF_STATE_DISCONNECTED)) {
mtx_unlock(&sc->xb_io_lock);
return;
}
@ -1232,19 +1245,21 @@ blkif_int(void *xsc)
xb_startio(sc);
if (unlikely(sc->connected == BLKIF_STATE_SUSPENDED))
wakeup(&sc->cm_busy);
mtx_unlock(&sc->xb_io_lock);
}
static void
blkif_free(struct xb_softc *sc, int suspend)
blkif_free(struct xb_softc *sc)
{
uint8_t *sring_page_ptr;
int i;
/* Prevent new requests being issued until we fix things up. */
mtx_lock(&sc->xb_io_lock);
sc->connected = suspend ?
BLKIF_STATE_SUSPENDED : BLKIF_STATE_DISCONNECTED;
sc->connected = BLKIF_STATE_DISCONNECTED;
mtx_unlock(&sc->xb_io_lock);
/* Free resources associated with old device channel. */
@ -1276,6 +1291,12 @@ blkif_free(struct xb_softc *sc, int suspend)
}
free(sc->shadow, M_XENBLOCKFRONT);
sc->shadow = NULL;
bus_dma_tag_destroy(sc->xb_io_dmat);
xb_initq_free(sc);
xb_initq_ready(sc);
xb_initq_complete(sc);
}
if (sc->irq) {
@ -1292,21 +1313,6 @@ blkif_completion(struct xb_command *s)
return (BLKIF_SEGS_TO_BLOCKS(s->nseg));
}
#if 0
static void
blkif_recover(struct xb_softc *sc)
{
/*
* XXX The whole concept of not quiescing and completing all i/o
* during suspend, and then hoping to recover and replay the
* resulting abandoned I/O during resume, is laughable. At best,
* it invalidates the i/o ordering rules required by just about
* every filesystem, and at worst it'll corrupt data. The code
* has been removed until further notice.
*/
}
#endif
/* ** Driver registration ** */
static device_method_t blkfront_methods[] = {
/* Device interface */

View File

@ -142,7 +142,6 @@ struct xb_softc {
#define XB_READY (1 << 2) /* Is ready */
#define XB_FROZEN (1 << 3) /* Waiting for resources */
int vdevice;
blkif_vdev_t handle;
int connected;
u_int ring_pages;
uint32_t max_requests;