diff --git a/sys/dev/xen/blkfront/blkfront.c b/sys/dev/xen/blkfront/blkfront.c index 0b2f2d9fba75..9f1b08297f3c 100644 --- a/sys/dev/xen/blkfront/blkfront.c +++ b/sys/dev/xen/blkfront/blkfront.c @@ -111,6 +111,26 @@ xbd_thaw(struct xbd_softc *sc, xbd_flag_t xbd_flag) sc->xbd_qfrozen_cnt--; } +static void +xbd_cm_freeze(struct xbd_softc *sc, struct xbd_command *cm, xbdc_flag_t cm_flag) +{ + if ((cm->cm_flags & XBDCF_FROZEN) != 0) + return; + + cm->cm_flags |= XBDCF_FROZEN|cm_flag; + xbd_freeze(sc, XBDF_NONE); +} + +static void +xbd_cm_thaw(struct xbd_softc *sc, struct xbd_command *cm) +{ + if ((cm->cm_flags & XBDCF_FROZEN) == 0) + return; + + cm->cm_flags &= ~XBDCF_FROZEN; + xbd_thaw(sc, XBDF_NONE); +} + static inline void xbd_flush_requests(struct xbd_softc *sc) { @@ -263,8 +283,7 @@ xbd_queue_request(struct xbd_softc *sc, struct xbd_command *cm) * we just attempted to map, so we can't rely on bus dma * blocking for it too. */ - xbd_freeze(sc, XBDF_NONE); - cm->cm_flags |= XBDCF_FROZEN|XBDCF_ASYNC_MAPPING; + xbd_cm_freeze(sc, cm, XBDCF_ASYNC_MAPPING); return (0); } @@ -318,10 +337,46 @@ xbd_bio_command(struct xbd_softc *sc) cm->cm_bp = bp; cm->cm_data = bp->bio_data; cm->cm_datalen = bp->bio_bcount; - cm->cm_operation = (bp->bio_cmd == BIO_READ) ? - BLKIF_OP_READ : BLKIF_OP_WRITE; cm->cm_sector_number = (blkif_sector_t)bp->bio_pblkno; + switch (bp->bio_cmd) { + case BIO_READ: + cm->cm_operation = BLKIF_OP_READ; + break; + case BIO_WRITE: + cm->cm_operation = BLKIF_OP_WRITE; + if ((bp->bio_flags & BIO_ORDERED) != 0) { + if ((sc->xbd_flags & XBDF_BARRIER) != 0) { + cm->cm_operation = BLKIF_OP_WRITE_BARRIER; + } else { + /* + * Single step this command. + */ + cm->cm_flags |= XBDCF_Q_FREEZE; + if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { + /* + * Wait for in-flight requests to + * finish. + */ + xbd_freeze(sc, XBDF_WAIT_IDLE); + xbd_requeue_cm(cm, XBD_Q_READY); + return (NULL); + } + } + } + break; + case BIO_FLUSH: + if ((sc->xbd_flags & XBDF_FLUSH) != 0) + cm->cm_operation = BLKIF_OP_FLUSH_DISKCACHE; + else if ((sc->xbd_flags & XBDF_BARRIER) != 0) + cm->cm_operation = BLKIF_OP_WRITE_BARRIER; + else + panic("flush request, but no flush support available"); + break; + default: + panic("unknown bio command %d", bp->bio_cmd); + } + return (cm); } @@ -356,6 +411,14 @@ xbd_startio(struct xbd_softc *sc) if (cm == NULL) break; + if ((cm->cm_flags & XBDCF_Q_FREEZE) != 0) { + /* + * Single step command. Future work is + * held off until this command completes. + */ + xbd_cm_freeze(sc, cm, XBDCF_Q_FREEZE); + } + if ((error = xbd_queue_request(sc, cm)) != 0) { printf("xbd_queue_request returned %d\n", error); break; @@ -425,7 +488,8 @@ xbd_int(void *xsc) if (cm->cm_operation == BLKIF_OP_READ) op = BUS_DMASYNC_POSTREAD; - else if (cm->cm_operation == BLKIF_OP_WRITE) + else if (cm->cm_operation == BLKIF_OP_WRITE || + cm->cm_operation == BLKIF_OP_WRITE_BARRIER) op = BUS_DMASYNC_POSTWRITE; else op = 0; @@ -436,10 +500,7 @@ xbd_int(void *xsc) * Release any hold this command has on future command * dispatch. */ - if ((cm->cm_flags & XBDCF_FROZEN) != 0) { - xbd_thaw(sc, XBDF_NONE); - cm->cm_flags &= ~XBDCF_FROZEN; - } + xbd_cm_thaw(sc, cm); /* * Directly call the i/o complete routine to save an @@ -465,6 +526,9 @@ xbd_int(void *xsc) sc->xbd_ring.sring->rsp_event = i + 1; } + if (xbd_queue_length(sc, XBD_Q_BUSY) == 0) + xbd_thaw(sc, XBDF_WAIT_IDLE); + xbd_startio(sc); if (unlikely(sc->xbd_state == XBD_STATE_SUSPENDED)) @@ -483,13 +547,13 @@ xbd_quiesce(struct xbd_softc *sc) int mtd; // While there are outstanding requests - while (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) { + while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { RING_FINAL_CHECK_FOR_RESPONSES(&sc->xbd_ring, mtd); if (mtd) { /* Recieved request completions, update queue. */ xbd_int(sc); } - if (!TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq)) { + if (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { /* * Still pending requests, wait for the disk i/o * to complete. @@ -750,11 +814,55 @@ xbd_free_ring(struct xbd_softc *sc) } /*-------------------------- Initialization/Teardown -------------------------*/ +static int +xbd_feature_string(struct xbd_softc *sc, char *features, size_t len) +{ + struct sbuf sb; + int feature_cnt; + + sbuf_new(&sb, features, len, SBUF_FIXEDLEN); + + feature_cnt = 0; + if ((sc->xbd_flags & XBDF_FLUSH) != 0) { + sbuf_printf(&sb, "flush"); + feature_cnt++; + } + + if ((sc->xbd_flags & XBDF_BARRIER) != 0) { + if (feature_cnt != 0) + sbuf_printf(&sb, ", "); + sbuf_printf(&sb, "write_barrier"); + feature_cnt++; + } + + (void) sbuf_finish(&sb); + return (sbuf_len(&sb)); +} + +static int +xbd_sysctl_features(SYSCTL_HANDLER_ARGS) +{ + char features[80]; + struct xbd_softc *sc = arg1; + int error; + int len; + + error = sysctl_wire_old_buffer(req, 0); + if (error != 0) + return (error); + + len = xbd_feature_string(sc, features, sizeof(features)); + + /* len is -1 on error, which will make the SYSCTL_OUT a no-op. */ + return (SYSCTL_OUT(req, features, len + 1/*NUL*/)); +} + static void xbd_setup_sysctl(struct xbd_softc *xbd) { struct sysctl_ctx_list *sysctl_ctx = NULL; struct sysctl_oid *sysctl_tree = NULL; + struct sysctl_oid_list *children; sysctl_ctx = device_get_sysctl_ctx(xbd->xbd_dev); if (sysctl_ctx == NULL) @@ -764,22 +872,31 @@ xbd_setup_sysctl(struct xbd_softc *xbd) if (sysctl_tree == NULL) return; - SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + children = SYSCTL_CHILDREN(sysctl_tree); + SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, "maximum outstanding requests (negotiated)"); - SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, + "max_requests", CTLFLAG_RD, &xbd->xbd_max_requests, -1, + "maximum outstanding requests (negotiated)"); + + SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_segments", CTLFLAG_RD, &xbd->xbd_max_request_segments, 0, "maximum number of pages per requests (negotiated)"); - SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "max_request_size", CTLFLAG_RD, &xbd->xbd_max_request_size, 0, "maximum size in bytes of a request (negotiated)"); - SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO, + SYSCTL_ADD_UINT(sysctl_ctx, children, OID_AUTO, "ring_pages", CTLFLAG_RD, &xbd->xbd_ring_pages, 0, "communication channel pages (negotiated)"); + + SYSCTL_ADD_PROC(sysctl_ctx, children, OID_AUTO, + "features", CTLTYPE_STRING|CTLFLAG_RD, xbd, 0, + xbd_sysctl_features, "A", "protocol features (negotiated)"); } /* @@ -854,6 +971,7 @@ int xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, int vdevice, uint16_t vdisk_info, unsigned long sector_size) { + char features[80]; int unit, error = 0; const char *name; @@ -861,9 +979,14 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, sc->xbd_unit = unit; - if (strcmp(name, "xbd")) + if (strcmp(name, "xbd") != 0) device_printf(sc->xbd_dev, "attaching as %s%d\n", name, unit); + if (xbd_feature_string(sc, features, sizeof(features)) > 0) { + device_printf(sc->xbd_dev, "features: %s\n", + features); + } + sc->xbd_disk = disk_alloc(); sc->xbd_disk->d_unit = sc->xbd_unit; sc->xbd_disk->d_open = xbd_open; @@ -878,6 +1001,11 @@ xbd_instance_create(struct xbd_softc *sc, blkif_sector_t sectors, sc->xbd_disk->d_mediasize = sectors * sector_size; sc->xbd_disk->d_maxsize = sc->xbd_max_request_size; sc->xbd_disk->d_flags = 0; + if ((sc->xbd_flags & (XBDF_FLUSH|XBDF_BARRIER)) != 0) { + sc->xbd_disk->d_flags |= DISKFLAG_CANFLUSHCACHE; + device_printf(sc->xbd_dev, + "synchronize cache commands enabled.\n"); + } disk_create(sc->xbd_disk, DISK_VERSION); return error; @@ -1183,7 +1311,7 @@ xbd_connect(struct xbd_softc *sc) device_t dev = sc->xbd_dev; unsigned long sectors, sector_size; unsigned int binfo; - int err, feature_barrier; + int err, feature_barrier, feature_flush; if (sc->xbd_state == XBD_STATE_CONNECTED || sc->xbd_state == XBD_STATE_SUSPENDED) @@ -1205,9 +1333,15 @@ xbd_connect(struct xbd_softc *sc) err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), "feature-barrier", "%lu", &feature_barrier, NULL); - if (!err || feature_barrier) + if (err == 0 && feature_barrier != 0) sc->xbd_flags |= XBDF_BARRIER; + err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev), + "feature-flush-cache", "%lu", &feature_flush, + NULL); + if (err == 0 && feature_flush != 0) + sc->xbd_flags |= XBDF_FLUSH; + if (sc->xbd_disk == NULL) { device_printf(dev, "%juMB <%s> at %s", (uintmax_t) sectors / (1048576 / sector_size), @@ -1339,7 +1473,7 @@ xbd_suspend(device_t dev) /* Wait for outstanding I/O to drain. */ retval = 0; - while (TAILQ_EMPTY(&sc->xbd_cm_q[XBD_Q_BUSY].q_tailq) == 0) { + while (xbd_queue_length(sc, XBD_Q_BUSY) != 0) { if (msleep(&sc->xbd_cm_q[XBD_Q_BUSY], &sc->xbd_io_lock, PRIBIO, "blkf_susp", 30 * hz) == EWOULDBLOCK) { retval = EBUSY; diff --git a/sys/dev/xen/blkfront/block.h b/sys/dev/xen/blkfront/block.h index 7cfe24171f84..0f7d6cb124d8 100644 --- a/sys/dev/xen/blkfront/block.h +++ b/sys/dev/xen/blkfront/block.h @@ -94,8 +94,11 @@ typedef enum { XBDCF_Q_MASK = 0xFF, + /* This command has contributed to xbd_qfrozen_cnt. */ XBDCF_FROZEN = 1<<8, - XBDCF_POLLED = 1<<9, + /* Freeze the command queue on dispatch (i.e. single step command). */ + XBDCF_Q_FREEZE = 1<<9, + /* Bus DMA returned EINPROGRESS for this command. */ XBDCF_ASYNC_MAPPING = 1<<10, XBDCF_INITIALIZER = XBDCF_Q_MASK } xbdc_flag_t; @@ -147,9 +150,14 @@ typedef enum { XBDF_NONE = 0, XBDF_OPEN = 1 << 0, /* drive is open (can't shut down) */ XBDF_BARRIER = 1 << 1, /* backend supports barriers */ - XBDF_READY = 1 << 2, /* Is ready */ - XBDF_CM_SHORTAGE = 1 << 3, /* Free cm resource shortage active. */ - XBDF_GNT_SHORTAGE = 1 << 4 /* Grant ref resource shortage active */ + XBDF_FLUSH = 1 << 2, /* backend supports flush */ + XBDF_READY = 1 << 3, /* Is ready */ + XBDF_CM_SHORTAGE = 1 << 4, /* Free cm resource shortage active. */ + XBDF_GNT_SHORTAGE = 1 << 5, /* Grant ref resource shortage active */ + XBDF_WAIT_IDLE = 1 << 6 /* + * No new work until oustanding work + * completes. + */ } xbd_flag_t; /* @@ -206,6 +214,12 @@ xbd_removed_qentry(struct xbd_softc *sc, xbd_q_index_t index) sc->xbd_cm_q[index].q_length--; } +static inline uint32_t +xbd_queue_length(struct xbd_softc *sc, xbd_q_index_t index) +{ + return (sc->xbd_cm_q[index].q_length); +} + static inline void xbd_initq_cm(struct xbd_softc *sc, xbd_q_index_t index) { @@ -289,27 +303,27 @@ xbd_remove_cm(struct xbd_command *cm, xbd_q_index_t expected_index) xbd_removed_qentry(cm->cm_sc, index); } -static __inline void +static inline void xbd_initq_bio(struct xbd_softc *sc) { bioq_init(&sc->xbd_bioq); } -static __inline void +static inline void xbd_enqueue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_tail(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } -static __inline void +static inline void xbd_requeue_bio(struct xbd_softc *sc, struct bio *bp) { bioq_insert_head(&sc->xbd_bioq, bp); xbd_added_qentry(sc, XBD_Q_BIO); } -static __inline struct bio * +static inline struct bio * xbd_dequeue_bio(struct xbd_softc *sc) { struct bio *bp;