Add support for Xen blkif indirect segment I/Os. This makes it possible for
the blkfront driver to perform I/Os of up to 2 MB, subject to support from the blkback to which it is connected and the initiation of such large I/Os by the rest of the kernel. In practice, the I/O size is increased from 40 kB to 128 kB. The changes to xen/interface/io/blkif.h consist merely of merging updates from the upstream Xen repository. In dev/xen/blkfront/block.h we add some convenience macros and structure fields used for indirect-page I/Os: The device records its negotiated limit on the number of indirect pages used, while each I/O command structure gains permanently allocated page(s) for indirect page references and the Xen grant references for those pages. In dev/xen/blkfront/blkfront.c we now check in xbd_queue_cb whether a request is small enough to handle without an indirection page, and either follow the previous behaviour or use new code for issuing an indirect segment I/O. In xbd_connect we read the size of indirect segment I/Os supported by the backend and select the maximum size we will use; then allocate the pages and Xen grant references for each I/O command structure. In xbd_free those grants and pages are released. A new loader tunable, hw.xbd.xbd_enable_indirect, can be set to 0 in order to disable this functionality; it works by pretending that the backend does not support this feature. Some backends exhibit a loss of performance with large I/Os, so users may wish to test with and without this functionality enabled. Reviewed by: royger MFC after: 3 days Relnotes: yes
This commit is contained in:
parent
79c1792263
commit
d40e2b9930
@ -84,6 +84,11 @@ static void xbd_startio(struct xbd_softc *sc);
|
||||
/*---------------------------- Global Static Data ----------------------------*/
|
||||
static MALLOC_DEFINE(M_XENBLOCKFRONT, "xbd", "Xen Block Front driver data");
|
||||
|
||||
static int xbd_enable_indirect = 1;
|
||||
SYSCTL_NODE(_hw, OID_AUTO, xbd, CTLFLAG_RD, 0, "xbd driver parameters");
|
||||
SYSCTL_INT(_hw_xbd, OID_AUTO, xbd_enable_indirect, CTLFLAG_RDTUN,
|
||||
&xbd_enable_indirect, 0, "Enable xbd indirect segments");
|
||||
|
||||
/*---------------------------- Command Processing ----------------------------*/
|
||||
static void
|
||||
xbd_freeze(struct xbd_softc *sc, xbd_flag_t xbd_flag)
|
||||
@ -205,7 +210,6 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
|
||||
{
|
||||
struct xbd_softc *sc;
|
||||
struct xbd_command *cm;
|
||||
blkif_request_t *ring_req;
|
||||
int op;
|
||||
|
||||
cm = arg;
|
||||
@ -218,22 +222,47 @@ xbd_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
|
||||
return;
|
||||
}
|
||||
|
||||
KASSERT(nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST,
|
||||
KASSERT(nsegs <= sc->xbd_max_request_segments,
|
||||
("Too many segments in a blkfront I/O"));
|
||||
|
||||
/* Fill out a communications ring structure. */
|
||||
ring_req = RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
|
||||
sc->xbd_ring.req_prod_pvt++;
|
||||
ring_req->id = cm->cm_id;
|
||||
ring_req->operation = cm->cm_operation;
|
||||
ring_req->sector_number = cm->cm_sector_number;
|
||||
ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
|
||||
ring_req->nr_segments = nsegs;
|
||||
cm->cm_nseg = nsegs;
|
||||
xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
|
||||
xenbus_get_otherend_id(sc->xbd_dev),
|
||||
cm->cm_operation == BLKIF_OP_WRITE,
|
||||
cm->cm_sg_refs, ring_req->seg);
|
||||
if (nsegs <= BLKIF_MAX_SEGMENTS_PER_REQUEST) {
|
||||
blkif_request_t *ring_req;
|
||||
|
||||
/* Fill out a blkif_request_t structure. */
|
||||
ring_req = (blkif_request_t *)
|
||||
RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
|
||||
sc->xbd_ring.req_prod_pvt++;
|
||||
ring_req->id = cm->cm_id;
|
||||
ring_req->operation = cm->cm_operation;
|
||||
ring_req->sector_number = cm->cm_sector_number;
|
||||
ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
|
||||
ring_req->nr_segments = nsegs;
|
||||
cm->cm_nseg = nsegs;
|
||||
xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
|
||||
xenbus_get_otherend_id(sc->xbd_dev),
|
||||
cm->cm_operation == BLKIF_OP_WRITE,
|
||||
cm->cm_sg_refs, ring_req->seg);
|
||||
} else {
|
||||
blkif_request_indirect_t *ring_req;
|
||||
|
||||
/* Fill out a blkif_request_indirect_t structure. */
|
||||
ring_req = (blkif_request_indirect_t *)
|
||||
RING_GET_REQUEST(&sc->xbd_ring, sc->xbd_ring.req_prod_pvt);
|
||||
sc->xbd_ring.req_prod_pvt++;
|
||||
ring_req->id = cm->cm_id;
|
||||
ring_req->operation = BLKIF_OP_INDIRECT;
|
||||
ring_req->indirect_op = cm->cm_operation;
|
||||
ring_req->sector_number = cm->cm_sector_number;
|
||||
ring_req->handle = (blkif_vdev_t)(uintptr_t)sc->xbd_disk;
|
||||
ring_req->nr_segments = nsegs;
|
||||
cm->cm_nseg = nsegs;
|
||||
xbd_mksegarray(segs, nsegs, &cm->cm_gref_head,
|
||||
xenbus_get_otherend_id(sc->xbd_dev),
|
||||
cm->cm_operation == BLKIF_OP_WRITE,
|
||||
cm->cm_sg_refs, cm->cm_indirectionpages);
|
||||
memcpy(ring_req->indirect_grefs, &cm->cm_indirectionrefs,
|
||||
sizeof(grant_ref_t) * sc->xbd_max_request_indirectpages);
|
||||
}
|
||||
|
||||
if (cm->cm_operation == BLKIF_OP_READ)
|
||||
op = BUS_DMASYNC_PREREAD;
|
||||
@ -1015,6 +1044,16 @@ xbd_free(struct xbd_softc *sc)
|
||||
cm->cm_sg_refs = NULL;
|
||||
}
|
||||
|
||||
if (cm->cm_indirectionpages != NULL) {
|
||||
gnttab_end_foreign_access_references(
|
||||
sc->xbd_max_request_indirectpages,
|
||||
&cm->cm_indirectionrefs[0]);
|
||||
contigfree(cm->cm_indirectionpages, PAGE_SIZE *
|
||||
sc->xbd_max_request_indirectpages,
|
||||
M_XENBLOCKFRONT);
|
||||
cm->cm_indirectionpages = NULL;
|
||||
}
|
||||
|
||||
bus_dmamap_destroy(sc->xbd_io_dmat, cm->cm_map);
|
||||
}
|
||||
free(sc->xbd_shadow, M_XENBLOCKFRONT);
|
||||
@ -1051,9 +1090,6 @@ xbd_initialize(struct xbd_softc *sc)
|
||||
*/
|
||||
max_ring_page_order = 0;
|
||||
sc->xbd_ring_pages = 1;
|
||||
sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
sc->xbd_max_request_size =
|
||||
XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
|
||||
|
||||
/*
|
||||
* Protocol negotiation.
|
||||
@ -1167,7 +1203,7 @@ xbd_connect(struct xbd_softc *sc)
|
||||
unsigned long sectors, sector_size;
|
||||
unsigned int binfo;
|
||||
int err, feature_barrier, feature_flush;
|
||||
int i;
|
||||
int i, j;
|
||||
|
||||
if (sc->xbd_state == XBD_STATE_CONNECTED ||
|
||||
sc->xbd_state == XBD_STATE_SUSPENDED)
|
||||
@ -1198,6 +1234,22 @@ xbd_connect(struct xbd_softc *sc)
|
||||
if (err == 0 && feature_flush != 0)
|
||||
sc->xbd_flags |= XBDF_FLUSH;
|
||||
|
||||
err = xs_gather(XST_NIL, xenbus_get_otherend_path(dev),
|
||||
"feature-max-indirect-segments", "%" PRIu32,
|
||||
&sc->xbd_max_request_segments, NULL);
|
||||
if ((err != 0) || (xbd_enable_indirect == 0))
|
||||
sc->xbd_max_request_segments = 0;
|
||||
if (sc->xbd_max_request_segments > XBD_MAX_INDIRECT_SEGMENTS)
|
||||
sc->xbd_max_request_segments = XBD_MAX_INDIRECT_SEGMENTS;
|
||||
if (sc->xbd_max_request_segments > XBD_SIZE_TO_SEGS(MAXPHYS))
|
||||
sc->xbd_max_request_segments = XBD_SIZE_TO_SEGS(MAXPHYS);
|
||||
sc->xbd_max_request_indirectpages =
|
||||
XBD_INDIRECT_SEGS_TO_PAGES(sc->xbd_max_request_segments);
|
||||
if (sc->xbd_max_request_segments < BLKIF_MAX_SEGMENTS_PER_REQUEST)
|
||||
sc->xbd_max_request_segments = BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
||||
sc->xbd_max_request_size =
|
||||
XBD_SEGS_TO_SIZE(sc->xbd_max_request_segments);
|
||||
|
||||
/* Allocate datastructures based on negotiated values. */
|
||||
err = bus_dma_tag_create(
|
||||
bus_get_dma_tag(sc->xbd_dev), /* parent */
|
||||
@ -1230,6 +1282,7 @@ xbd_connect(struct xbd_softc *sc)
|
||||
|
||||
for (i = 0; i < sc->xbd_max_requests; i++) {
|
||||
struct xbd_command *cm;
|
||||
void * indirectpages;
|
||||
|
||||
cm = &sc->xbd_shadow[i];
|
||||
cm->cm_sg_refs = malloc(
|
||||
@ -1242,6 +1295,24 @@ xbd_connect(struct xbd_softc *sc)
|
||||
cm->cm_sc = sc;
|
||||
if (bus_dmamap_create(sc->xbd_io_dmat, 0, &cm->cm_map) != 0)
|
||||
break;
|
||||
if (sc->xbd_max_request_indirectpages > 0) {
|
||||
indirectpages = contigmalloc(
|
||||
PAGE_SIZE * sc->xbd_max_request_indirectpages,
|
||||
M_XENBLOCKFRONT, M_ZERO, 0, ~0, PAGE_SIZE, 0);
|
||||
} else {
|
||||
indirectpages = NULL;
|
||||
}
|
||||
for (j = 0; j < sc->xbd_max_request_indirectpages; j++) {
|
||||
if (gnttab_grant_foreign_access(
|
||||
xenbus_get_otherend_id(sc->xbd_dev),
|
||||
(vtomach(indirectpages) >> PAGE_SHIFT) + j,
|
||||
1 /* grant read-only access */,
|
||||
&cm->cm_indirectionrefs[j]))
|
||||
break;
|
||||
}
|
||||
if (j < sc->xbd_max_request_indirectpages)
|
||||
break;
|
||||
cm->cm_indirectionpages = indirectpages;
|
||||
xbd_free_command(cm);
|
||||
}
|
||||
|
||||
|
@ -75,11 +75,25 @@
|
||||
__CONST_RING_SIZE(blkif, PAGE_SIZE * XBD_MAX_RING_PAGES)
|
||||
|
||||
/**
|
||||
* The maximum mapped region size per request we will allow in a negotiated
|
||||
* block-front/back communication channel.
|
||||
* The maximum number of blkif segments which can be provided per indirect
|
||||
* page in an indirect request.
|
||||
*/
|
||||
#define XBD_MAX_REQUEST_SIZE \
|
||||
MIN(MAXPHYS, XBD_SEGS_TO_SIZE(BLKIF_MAX_SEGMENTS_PER_REQUEST))
|
||||
#define XBD_MAX_SEGMENTS_PER_PAGE \
|
||||
(PAGE_SIZE / sizeof(struct blkif_request_segment))
|
||||
|
||||
/**
|
||||
* The maximum number of blkif segments which can be provided in an indirect
|
||||
* request.
|
||||
*/
|
||||
#define XBD_MAX_INDIRECT_SEGMENTS \
|
||||
(BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST * XBD_MAX_SEGMENTS_PER_PAGE)
|
||||
|
||||
/**
|
||||
* Compute the number of indirect segment pages required for an I/O with the
|
||||
* specified number of indirect segments.
|
||||
*/
|
||||
#define XBD_INDIRECT_SEGS_TO_PAGES(segs) \
|
||||
((segs + XBD_MAX_SEGMENTS_PER_PAGE - 1) / XBD_MAX_SEGMENTS_PER_PAGE)
|
||||
|
||||
typedef enum {
|
||||
XBDCF_Q_MASK = 0xFF,
|
||||
@ -111,6 +125,8 @@ struct xbd_command {
|
||||
blkif_sector_t cm_sector_number;
|
||||
int cm_status;
|
||||
xbd_cbcf_t *cm_complete;
|
||||
void *cm_indirectionpages;
|
||||
grant_ref_t cm_indirectionrefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
|
||||
};
|
||||
|
||||
typedef enum {
|
||||
@ -165,6 +181,7 @@ struct xbd_softc {
|
||||
uint32_t xbd_max_requests;
|
||||
uint32_t xbd_max_request_segments;
|
||||
uint32_t xbd_max_request_size;
|
||||
uint32_t xbd_max_request_indirectpages;
|
||||
grant_ref_t xbd_ring_ref[XBD_MAX_RING_PAGES];
|
||||
blkif_front_ring_t xbd_ring;
|
||||
xen_intr_handle_t xen_intr_handle;
|
||||
|
@ -97,6 +97,28 @@
|
||||
*
|
||||
* The type of the backing device/object.
|
||||
*
|
||||
*
|
||||
* direct-io-safe
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
*
|
||||
* The underlying storage is not affected by the direct IO memory
|
||||
* lifetime bug. See:
|
||||
* http://lists.xen.org/archives/html/xen-devel/2012-12/msg01154.html
|
||||
*
|
||||
* Therefore this option gives the backend permission to use
|
||||
* O_DIRECT, notwithstanding that bug.
|
||||
*
|
||||
* That is, if this option is enabled, use of O_DIRECT is safe,
|
||||
* in circumstances where we would normally have avoided it as a
|
||||
* workaround for that bug. This option is not relevant for all
|
||||
* backends, and even not necessarily supported for those for
|
||||
* which it is relevant. A backend which knows that it is not
|
||||
* affected by the bug can ignore this option.
|
||||
*
|
||||
* This option doesn't require a backend to use O_DIRECT, so it
|
||||
* should not be used to try to control the caching behaviour.
|
||||
*
|
||||
*--------------------------------- Features ---------------------------------
|
||||
*
|
||||
* feature-barrier
|
||||
@ -126,6 +148,34 @@
|
||||
* of this type may still be returned at any time with the
|
||||
* BLKIF_RSP_EOPNOTSUPP result code.
|
||||
*
|
||||
* feature-persistent
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
* Notes: 7
|
||||
*
|
||||
* A value of "1" indicates that the backend can keep the grants used
|
||||
* by the frontend driver mapped, so the same set of grants should be
|
||||
* used in all transactions. The maximum number of grants the backend
|
||||
* can map persistently depends on the implementation, but ideally it
|
||||
* should be RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST. Using this
|
||||
* feature the backend doesn't need to unmap each grant, preventing
|
||||
* costly TLB flushes. The backend driver should only map grants
|
||||
* persistently if the frontend supports it. If a backend driver chooses
|
||||
* to use the persistent protocol when the frontend doesn't support it,
|
||||
* it will probably hit the maximum number of persistently mapped grants
|
||||
* (due to the fact that the frontend won't be reusing the same grants),
|
||||
* and fall back to non-persistent mode. Backend implementations may
|
||||
* shrink or expand the number of persistently mapped grants without
|
||||
* notifying the frontend depending on memory constraints (this might
|
||||
* cause a performance degradation).
|
||||
*
|
||||
* If a backend driver wants to limit the maximum number of persistently
|
||||
* mapped grants to a value less than RING_SIZE *
|
||||
* BLKIF_MAX_SEGMENTS_PER_REQUEST a LRU strategy should be used to
|
||||
* discard the grants that are less commonly used. Using a LRU in the
|
||||
* backend driver paired with a LIFO queue in the frontend will
|
||||
* allow us to have better performance in this scenario.
|
||||
*
|
||||
*----------------------- Request Transport Parameters ------------------------
|
||||
*
|
||||
* max-ring-page-order
|
||||
@ -147,6 +197,16 @@
|
||||
*
|
||||
*------------------------- Backend Device Properties -------------------------
|
||||
*
|
||||
* discard-enable
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 1
|
||||
*
|
||||
* This optional property, set by the toolstack, instructs the backend
|
||||
* to offer discard to the frontend. If the property is missing the
|
||||
* backend should offer discard if the backing storage actually supports
|
||||
* it. This optional property, set by the toolstack, requests that the
|
||||
* backend offer, or not offer, discard to the frontend.
|
||||
*
|
||||
* discard-alignment
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 0
|
||||
@ -166,6 +226,7 @@
|
||||
* discard-secure
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
* Notes: 10
|
||||
*
|
||||
* A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
|
||||
* requests with the BLKIF_DISCARD_SECURE flag set.
|
||||
@ -180,13 +241,17 @@
|
||||
* sector-size
|
||||
* Values: <uint32_t>
|
||||
*
|
||||
* The size, in bytes, of the individually addressible data blocks
|
||||
* on the backend device.
|
||||
* The logical sector size, in bytes, of the backend device.
|
||||
*
|
||||
* physical-sector-size
|
||||
* Values: <uint32_t>
|
||||
*
|
||||
* The physical sector size, in bytes, of the backend device.
|
||||
*
|
||||
* sectors
|
||||
* Values: <uint64_t>
|
||||
*
|
||||
* The size of the backend device, expressed in units of its native
|
||||
* The size of the backend device, expressed in units of its logical
|
||||
* sector size ("sector-size").
|
||||
*
|
||||
*****************************************************************************
|
||||
@ -243,6 +308,27 @@
|
||||
* The size of the frontend allocated request ring buffer in units of
|
||||
* machine pages. The value must be a power of 2.
|
||||
*
|
||||
* feature-persistent
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
* Notes: 7, 8, 9
|
||||
*
|
||||
* A value of "1" indicates that the frontend will reuse the same grants
|
||||
* for all transactions, allowing the backend to map them with write
|
||||
* access (even when it should be read-only). If the frontend hits the
|
||||
* maximum number of allowed persistently mapped grants, it can fallback
|
||||
* to non persistent mode. This will cause a performance degradation,
|
||||
* since the the backend driver will still try to map those grants
|
||||
* persistently. Since the persistent grants protocol is compatible with
|
||||
* the previous protocol, a frontend driver can choose to work in
|
||||
* persistent mode even when the backend doesn't support it.
|
||||
*
|
||||
* It is recommended that the frontend driver stores the persistently
|
||||
* mapped grants in a LIFO queue, so a subset of all persistently mapped
|
||||
* grants gets used commonly. This is done in case the backend driver
|
||||
* decides to limit the maximum number of persistently mapped grants
|
||||
* to a value less than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
|
||||
*
|
||||
*------------------------- Virtual Device Properties -------------------------
|
||||
*
|
||||
* device-type
|
||||
@ -262,17 +348,23 @@
|
||||
* -----
|
||||
* (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
|
||||
* PV drivers.
|
||||
* (2) Multi-page ring buffer scheme first used in some Red Hat distributions
|
||||
* (2) Multi-page ring buffer scheme first used in some RedHat distributions
|
||||
* including a distribution deployed on certain nodes of the Amazon
|
||||
* EC2 cluster.
|
||||
* (3) Support for multi-page ring buffers was implemented independently,
|
||||
* in slightly different forms, by both Citrix and Red Hat/Amazon.
|
||||
* in slightly different forms, by both Citrix and RedHat/Amazon.
|
||||
* For full interoperability, block front and backends should publish
|
||||
* identical ring parameters, adjusted for unit differences, to the
|
||||
* XenStore nodes used in both schemes.
|
||||
* (4) Devices that support discard functionality may internally allocate
|
||||
* space (discardable extents) in units that are larger than the
|
||||
* exported logical block size.
|
||||
* (4) Devices that support discard functionality may internally allocate space
|
||||
* (discardable extents) in units that are larger than the exported logical
|
||||
* block size. If the backing device has such discardable extents the
|
||||
* backend should provide both discard-granularity and discard-alignment.
|
||||
* Providing just one of the two may be considered an error by the frontend.
|
||||
* Backends supporting discard should include discard-granularity and
|
||||
* discard-alignment even if it supports discarding individual sectors.
|
||||
* Frontends should assume discard-alignment == 0 and discard-granularity
|
||||
* == sector size if these keys are missing.
|
||||
* (5) The discard-alignment parameter allows a physical device to be
|
||||
* partitioned into virtual devices that do not necessarily begin or
|
||||
* end on a discardable extent boundary.
|
||||
@ -280,6 +372,19 @@
|
||||
* 'ring-ref' is used to communicate the grant reference for this
|
||||
* page to the backend. When using a multi-page ring, the 'ring-ref'
|
||||
* node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
|
||||
* (7) When using persistent grants data has to be copied from/to the page
|
||||
* where the grant is currently mapped. The overhead of doing this copy
|
||||
* however doesn't suppress the speed improvement of not having to unmap
|
||||
* the grants.
|
||||
* (8) The frontend driver has to allow the backend driver to map all grants
|
||||
* with write access, even when they should be mapped read-only, since
|
||||
* further requests may reuse these grants and require write permissions.
|
||||
* (9) Linux implementation doesn't have a limit on the maximum number of
|
||||
* grants that can be persistently mapped in the frontend driver, but
|
||||
* due to the frontent driver implementation it should never be bigger
|
||||
* than RING_SIZE * BLKIF_MAX_SEGMENTS_PER_REQUEST.
|
||||
*(10) The discard-secure property may be present and will be set to 1 if the
|
||||
* backing device supports secure discard.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -403,6 +508,30 @@
|
||||
*/
|
||||
#define BLKIF_OP_DISCARD 5
|
||||
|
||||
/*
|
||||
* Recognized if "feature-max-indirect-segments" in present in the backend
|
||||
* xenbus info. The "feature-max-indirect-segments" node contains the maximum
|
||||
* number of segments allowed by the backend per request. If the node is
|
||||
* present, the frontend might use blkif_request_indirect structs in order to
|
||||
* issue requests with more than BLKIF_MAX_SEGMENTS_PER_REQUEST (11). The
|
||||
* maximum number of indirect segments is fixed by the backend, but the
|
||||
* frontend can issue requests with any number of indirect segments as long as
|
||||
* it's less than the number provided by the backend. The indirect_grefs field
|
||||
* in blkif_request_indirect should be filled by the frontend with the
|
||||
* grant references of the pages that are holding the indirect segments.
|
||||
* These pages are filled with an array of blkif_request_segment that hold the
|
||||
* information about the segments. The number of indirect pages to use is
|
||||
* determined by the number of segments an indirect request contains. Every
|
||||
* indirect page can contain a maximum of
|
||||
* (PAGE_SIZE / sizeof(struct blkif_request_segment)) segments, so to
|
||||
* calculate the number of indirect pages to use we have to do
|
||||
* ceil(indirect_segments / (PAGE_SIZE / sizeof(struct blkif_request_segment))).
|
||||
*
|
||||
* If a backend does not recognize BLKIF_OP_INDIRECT, it should *not*
|
||||
* create the "feature-max-indirect-segments" node!
|
||||
*/
|
||||
#define BLKIF_OP_INDIRECT 6
|
||||
|
||||
/*
|
||||
* Maximum scatter/gather segments per request.
|
||||
* This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
|
||||
@ -410,12 +539,18 @@
|
||||
*/
|
||||
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 11
|
||||
|
||||
/*
|
||||
* Maximum number of indirect pages to use per request.
|
||||
*/
|
||||
#define BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST 8
|
||||
|
||||
/*
|
||||
* NB. first_sect and last_sect in blkif_request_segment, as well as
|
||||
* sector_number in blkif_request, are always expressed in 512-byte units.
|
||||
* However they must be properly aligned to the real sector size of the
|
||||
* physical disk, which is reported in the "sector-size" node in the backend
|
||||
* xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units.
|
||||
* physical disk, which is reported in the "physical-sector-size" node in
|
||||
* the backend xenbus info. Also the xenbus "sectors" node is expressed in
|
||||
* 512-byte units.
|
||||
*/
|
||||
struct blkif_request_segment {
|
||||
grant_ref_t gref; /* reference to I/O buffer frame */
|
||||
@ -453,6 +588,20 @@ struct blkif_request_discard {
|
||||
};
|
||||
typedef struct blkif_request_discard blkif_request_discard_t;
|
||||
|
||||
struct blkif_request_indirect {
|
||||
uint8_t operation; /* BLKIF_OP_INDIRECT */
|
||||
uint8_t indirect_op; /* BLKIF_OP_{READ/WRITE} */
|
||||
uint16_t nr_segments; /* number of segments */
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
||||
blkif_vdev_t handle; /* same as for read/write requests */
|
||||
grant_ref_t indirect_grefs[BLKIF_MAX_INDIRECT_PAGES_PER_REQUEST];
|
||||
#ifdef __i386__
|
||||
uint64_t pad; /* Make it 64 byte aligned on i386 */
|
||||
#endif
|
||||
};
|
||||
typedef struct blkif_request_indirect blkif_request_indirect_t;
|
||||
|
||||
struct blkif_response {
|
||||
uint64_t id; /* copied from request */
|
||||
uint8_t operation; /* copied from request */
|
||||
@ -484,7 +633,7 @@ DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
|
||||
/*
|
||||
* Local variables:
|
||||
* mode: C
|
||||
* c-set-style: "BSD"
|
||||
* c-file-style: "BSD"
|
||||
* c-basic-offset: 4
|
||||
* tab-width: 4
|
||||
* indent-tabs-mode: nil
|
||||
|
Loading…
Reference in New Issue
Block a user