In the Xen block front driver, take advantage of backends that

support cache flush and write barrier commands.

sys/dev/xen/blkfront/block.h:
	Add per-command flag that specifies that the I/O queue must
	be frozen after this command is dispatched.  This is used
	to implement "single-stepping".

	Remove the unused per-command flag that indicates a polled
	command.

	Add block device instance flags to record backend features.

	Add a block device instance flag to indicate the I/O queue
	is frozen until all outstanding I/O completes.

	Enhance the queue API to allow the number of elements in a
	queue to be interrogated.

	Prefer "inline" to "__inline".

sys/dev/xen/blkfront/blkfront.c:
	Formalize queue freeze semantics by adding methods for both
	global and command-associated queue freezing.

	Provide mechanism to freeze the I/O queue until all outstanding
	I/O completes.  Use this to implement barrier semantics
	(BIO_ORDERED) when the backend does not support
	BLKIF_OP_WRITE_BARRIER commands.

	Implement BIO_FLUSH as either a BLKIF_OP_FLUSH_DISKCACHE
	command or a 0 byte write barrier.  Currently, all publicly
	available backends perform a diskcache flush when processing
	barrier commands, and this frontend behavior matches what
	is done in Linux.

	Simplify code by using new queue length API.

	Report backend features during device attach and via sysctl.

Submitted by:	Roger Pau Monné
Submitted by:	gibbs (Merge with new driver queue API, sysctl support)
This commit is contained in:
Justin T. Gibbs 2013-06-26 20:39:07 +00:00
parent fafb835a0b
commit 9985113b61
2 changed files with 175 additions and 27 deletions

View File

@ -111,6 +111,26 @@ xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag)
sc->xbd_qfrozen_cnt--;
}
static void
xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag)
{
if ((cm->cm_flags & XBDCF_FROZEN) != 0)
return;
cm->cm_flags |= XBDCF_FROZEN|cm_flag;
xbd_freeze(sc, XBDF_NONE);
}
static void
xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm)
{
if ((cm->cm_flags & XBDCF_FROZEN) == 0)
return;
cm->cm_flags &= ~XBDCF_FROZEN;
xbd_thaw(sc, XBDF_NONE);
}
static inline void
xbd_flush_requests(struct xbd_softc *sc)
{
@ -263,8 +283,7 @@ xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm)
* we just attempted to map, so we can't rely on bus dma
* blocking for it too.
*/
xbd_freeze(sc, XBDF_NONE);
cm->cm_flags |= XBDCF_FROZEN|XBDCF_ASYNC_MAPPING;
xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING);
return (0);
}
@ -318,10 +337,46 @@ xbd_bio_command(struct xbd_softc *sc)
cm->cm_bp = bp;
cm->cm_data = bp->bio_data;
cm->cm_datalen = bp->bio_bcount;
cm->cm_operation = (bp->bio_cmd == BIO_READ) ?
BLKIF_OP_READ : BLKIF_OP_WRITE;
cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno;
switch (bp->bio_cmd) {
case BIO_READ:
cm->cm_operation = BLKIF_OP_READ;
break;
case BIO_WRITE:
cm->cm_operation = BLKIF_OP_WRITE;
if ((bp->bio_flags & BIO_ORDERED) != 0) {
if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
} else {
/*
* Single step this command.
*/
cm->cm_flags |= XBDCF_Q_FREEZE;
if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
/*
* Wait for in-flight requests to
* finish.
*/
xbd_freeze(sc, XBDF_WAIT_IDLE);
xbd_requeue_cm(cm, XBD_Q_READY);
return (NULL);
}
}
}
break;
case BIO_FLUSH:
if ((sc->xbd_flags & XBDF_FLUSH) != 0)
cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE;
else if ((sc->xbd_flags & XBDF_BARRIER) != 0)
cm->cm_operation = BLKIF_OP_WRITE_BARRIER;
else
panic("flush request, but no flush support available");
break;
default:
panic("unknown bio command %d", bp->bio_cmd);
}
return (cm);
}
@ -356,6 +411,14 @@ xbd_startio(struct xbd_softc *sc)
if (cm == NULL)
break;
if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) {
/*
* Single step command. Future work is
* held off until this command completes.
*/
xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE);
}
if ((error = xbd_queue_request(sc, cm)) != 0) {
printf("xbd_queue_request returned %d\n", error);
break;
@ -425,7 +488,8 @@ xbd_int(void *xsc)
if (cm->cm_operation == BLKIF_OP_READ)
op = BUS_DMASYNC_POSTREAD;
else if (cm->cm_operation == BLKIF_OP_WRITE)
else if (cm->cm_operation == BLKIF_OP_WRITE ||
cm->cm_operation == BLKIF_OP_WRITE_BARRIER)
op = BUS_DMASYNC_POSTWRITE;
else
op = 0;
@ -436,10 +500,7 @@ xbd_int(void *xsc)
* Release any hold this command has on future command
* dispatch.
*/
if ((cm->cm_flags & XBDCF_FROZEN) != 0) {
xbd_thaw(sc, XBDF_NONE);
cm->cm_flags &= ~XBDCF_FROZEN;
}
xbd_cm_thaw(sc, cm);
/*
* Directly call the i/o complete routine to save an
@ -465,6 +526,9 @@ xbd_int(void *xsc)
sc->xbd_ring.sring->rsp_event = i + 1;
}
if (xbd_queue_length(sc, XBD_Q_BUSY) == 0)
xbd_thaw(sc, XBDF_WAIT_IDLE);
xbd_startio(sc);
if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED))
@ -483,13 +547,13 @@ xbd_quiesce(struct xbd_softc *sc)
int mtd;
// While there are outstanding requests
while (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd);
if (mtd) {
/* Recieved request completions, update queue. */
xbd_int(sc);
}
if (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) {
if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
/*
* Still pending requests, wait for the disk i/o
* to complete.
@ -750,11 +814,55 @@ xbd_free_ring(struct xbd_softc *sc)
}
/*-------------------------- Initialization/Teardown -------------------------*/
static int
xbd_feature_string(struct xbd_softc *sc, char *features, size_t len)
{
struct sbuf sb;
int feature_cnt;
sbuf_new(&sb, features, len, SBUF_FIXEDLEN);
feature_cnt = 0;
if ((sc->xbd_flags & XBDF_FLUSH) != 0) {
sbuf_printf(&sb, "flush");
feature_cnt++;
}
if ((sc->xbd_flags & XBDF_BARRIER) != 0) {
if (feature_cnt != 0)
sbuf_printf(&sb, ", ");
sbuf_printf(&sb, "write_barrier");
feature_cnt++;
}
(void) sbuf_finish(&sb);
return (sbuf_len(&sb));
}
static int
xbd_sysctl_features(SYSCTL_HANDLER_ARGS)
{
char features[80];
struct xbd_softc *sc = arg1;
int error;
int len;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
len = xbd_feature_string(sc, features, sizeof(features));
/* len is -1 on error, which will make the SYSCTL_OUT a no-op. */
return (SYSCTL_OUT(req, features, len + 1/*NUL*/));
}
static void
xbd_setup_sysctl(struct xbd_softc *xbd)
{
struct sysctl_ctx_list *sysctl_ctx = NULL;
struct sysctl_oid *sysctl_tree = NULL;
struct sysctl_oid_list *children;
sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev);
if (sysctl_ctx == NULL)
@ -764,22 +872,31 @@ xbd_setup_sysctl(struct xbd_softc *xbd)
if (sysctl_tree == NULL)
return;
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
children = SYSCTL_CHILDREN(sysctl_tree);
SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
"maximum outstanding requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1,
"maximum outstanding requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_request_segments", CTLFLAG_RD,
&xbd->xbd_max_request_segments, 0,
"maximum number of pages per requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0,
"maximum size in bytes of a request (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO,
"ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0,
"communication channel pages (negotiated)");
SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO,
"features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0,
xbd_sysctl_features, "A", "protocol features (negotiated)");
}
/*
@ -854,6 +971,7 @@ int
xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
int vdevice, uint16_t vdisk_info, unsigned long sector_size)
{
char features[80];
int unit, error = 0;
const char *name;
@ -861,9 +979,14 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
sc->xbd_unit = unit;
if (strcmp(name, "xbd"))
if (strcmp(name, "xbd") != 0)
device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit);
if (xbd_feature_string(sc, features, sizeof(features)) > 0) {
device_printf(sc->xbd_dev, "features: %s\n",
features);
}
sc->xbd_disk = disk_alloc();
sc->xbd_disk->d_unit = sc->xbd_unit;
sc->xbd_disk->d_open = xbd_open;
@ -878,6 +1001,11 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors,
sc->xbd_disk->d_mediasize = sectors * sector_size;
sc->xbd_disk->d_maxsize = sc->xbd_max_request_size;
sc->xbd_disk->d_flags = 0;
if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) {
sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE;
device_printf(sc->xbd_dev,
"synchronize cache commands enabled.\n");
}
disk_create(sc->xbd_disk, DISK_VERSION);
return error;
@ -1183,7 +1311,7 @@ xbd_connect(struct xbd_softc *sc)
device_t dev = sc->xbd_dev;
unsigned long sectors, sector_size;
unsigned int binfo;
int err, feature_barrier;
int err, feature_barrier, feature_flush;
if (sc->xbd_state == XBD_STATE_CONNECTED ||
sc->xbd_state == XBD_STATE_SUSPENDED)
@ -1205,9 +1333,15 @@ xbd_connect(struct xbd_softc *sc)
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
"feature-barrier", "%lu", &feature_barrier,
NULL);
if (!err || feature_barrier)
if (err == 0 && feature_barrier != 0)
sc->xbd_flags |= XBDF_BARRIER;
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
"feature-flush-cache", "%lu", &feature_flush,
NULL);
if (err == 0 && feature_flush != 0)
sc->xbd_flags |= XBDF_FLUSH;
if (sc->xbd_disk == NULL) {
device_printf(dev, "%juMB <%s> at %s",
(uintmax_t) sectors / (1048576 / sector_size),
@ -1339,7 +1473,7 @@ xbd_suspend(device_t dev)
/* Wait for outstanding I/O to drain. */
retval = 0;
while (TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq) == 0) {
while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) {
if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock,
PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) {
retval = EBUSY;

View File

@ -94,8 +94,11 @@
typedef enum {
XBDCF_Q_MASK = 0xFF,
/* This command has contributed to xbd_qfrozen_cnt. */
XBDCF_FROZEN = 1<<8,
XBDCF_POLLED = 1<<9,
/* Freeze the command queue on dispatch (i.e. single step command). */
XBDCF_Q_FREEZE = 1<<9,
/* Bus DMA returned EINPROGRESS for this command. */
XBDCF_ASYNC_MAPPING = 1<<10,
XBDCF_INITIALIZER = XBDCF_Q_MASK
} xbdc_flag_t;
@ -147,9 +150,14 @@ typedef enum {
XBDF_NONE = 0,
XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */
XBDF_BARRIER = 1 << 1, /* backend supports barriers */
XBDF_READY = 1 << 2, /* Is ready */
XBDF_CM_SHORTAGE = 1 << 3, /* Free cm resource shortage active. */
XBDF_GNT_SHORTAGE = 1 << 4 /* Grant ref resource shortage active */
XBDF_FLUSH = 1 << 2, /* backend supports flush */
XBDF_READY = 1 << 3, /* Is ready */
XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */
XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */
XBDF_WAIT_IDLE = 1 << 6 /*
* No new work until oustanding work
* completes.
*/
} xbd_flag_t;
/*
@ -206,6 +214,12 @@ xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index)
sc->xbd_cm_q[index].q_length--;
}
static inline uint32_t
xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index)
{
return (sc->xbd_cm_q[index].q_length);
}
static inline void
xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index)
{
@ -289,27 +303,27 @@ xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index)
xbd_removed_qentry(cm->cm_sc, index);
}
static __inline void
static inline void
xbd_initq_bio(struct xbd_softc *sc)
{
bioq_init(&sc->xbd_bioq);
}
static __inline void
static inline void
xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp)
{
bioq_insert_tail(&sc->xbd_bioq, bp);
xbd_added_qentry(sc, XBD_Q_BIO);
}
static __inline void
static inline void
xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp)
{
bioq_insert_head(&sc->xbd_bioq, bp);
xbd_added_qentry(sc, XBD_Q_BIO);
}
static __inline struct bio *
static inline struct bio *
xbd_dequeue_bio(struct xbd_softc *sc)
{
struct bio *bp;