Enhance documentation, improve interoperability, and fix defects in

FreeBSD's front and back Xen blkif interface drivers.

sys/dev/xen/blkfront/block.h:
sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/blkback/blkback.c:
	Replace FreeBSD specific multi-page ring impelementation with
	support for both the Citrix and Amazon/RedHat versions of this
	extension.

sys/dev/xen/blkfront/blkfront.c:
	o Add a per-instance sysctl tree that exposes all negotiated
	  transport parameters (ring pages, max number of requests,
	  max request size, max number of segments).
	o In blkfront_vdevice_to_unit() add a missing return statement
	  so that we properly identify the unit number for high numbered
	  xvd devices.

sys/dev/xen/blkback/blkback.c:
	o Add static dtrace probes for several events in this driver.
	o Defer connection shutdown processing until the front-end
	  enters the closed state.  This avoids prematurely tearing
	  down the connection when buggy front-ends transition to the
	  closing state, even though the device is open and they
	  veto the close request from the tool stack.
	o Add nodes for maximum request size and the number of active
	  ring pages to the exising, per-instance, sysctl tree.
	o Miscelaneous style cleanup.

sys/xen/interface/io/blkif.h:
	o Add extensive documentation of the XenStore nodes used to
	  implement the blkif interface.
	o Document the startup sequence between a front and back driver.
	o Add structures and documenatation for the "discard" feature
	  (AKA Trim).
	o Cleanup some definitions related to FreeBSD's request
	  number/size/segment-limit extension.

sys/dev/xen/blkfront/blkfront.c:
sys/dev/xen/blkback/blkback.c:
sys/xen/xenbus/xenbusvar.h:
	Add the convenience function xenbus_get_otherend_state() and
	use it to simplify some logic in both block-front and block-back.

MFC after:	1 day
This commit is contained in:
Justin T. Gibbs 2012-02-15 06:45:49 +00:00
parent 03a67b59f8
commit 8b8bfa3567
5 changed files with 743 additions and 128 deletions

View File

@ -40,6 +40,8 @@ __FBSDID("$FreeBSD$");
* a FreeBSD domain to other domains.
*/
#include "opt_kdtrace.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -63,6 +65,7 @@ __FBSDID("$FreeBSD$");
#include <sys/mount.h>
#include <sys/sysctl.h>
#include <sys/bitstring.h>
#include <sys/sdt.h>
#include <geom/geom.h>
@ -124,7 +127,7 @@ __FBSDID("$FreeBSD$");
static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
#ifdef XBB_DEBUG
#define DPRINTF(fmt, args...) \
#define DPRINTF(fmt, args...) \
printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
#else
#define DPRINTF(fmt, args...) do {} while(0)
@ -134,7 +137,7 @@ static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
* The maximum mapped region size per request we will allow in a negotiated
* block-front/back communication channel.
*/
#define XBB_MAX_REQUEST_SIZE \
#define XBB_MAX_REQUEST_SIZE \
MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE)
/**
@ -142,9 +145,9 @@ static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
* segment blocks) per request we will allow in a negotiated block-front/back
* communication channel.
*/
#define XBB_MAX_SEGMENTS_PER_REQUEST \
(MIN(UIO_MAXIOV, \
MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
#define XBB_MAX_SEGMENTS_PER_REQUEST \
(MIN(UIO_MAXIOV, \
MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
(XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
/**
@ -980,9 +983,10 @@ xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
static uint8_t *
xbb_get_kva(struct xbb_softc *xbb, int nr_pages)
{
intptr_t first_clear, num_clear;
intptr_t first_clear;
intptr_t num_clear;
uint8_t *free_kva;
int i;
int i;
KASSERT(nr_pages != 0, ("xbb_get_kva of zero length"));
@ -1681,19 +1685,19 @@ xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
req_ring_idx++;
switch (xbb->abi) {
case BLKIF_PROTOCOL_NATIVE:
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.native,
req_ring_idx);
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
req_ring_idx);
break;
case BLKIF_PROTOCOL_X86_32:
{
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_32,
req_ring_idx);
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
req_ring_idx);
break;
}
case BLKIF_PROTOCOL_X86_64:
{
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_64,
req_ring_idx);
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
req_ring_idx);
break;
}
default:
@ -1817,8 +1821,8 @@ xbb_run_queue(void *context, int pending)
struct xbb_xen_reqlist *reqlist;
xbb = (struct xbb_softc *)context;
rings = &xbb->rings;
xbb = (struct xbb_softc *)context;
rings = &xbb->rings;
/*
* Work gather and dispatch loop. Note that we have a bias here
@ -2032,6 +2036,13 @@ xbb_intr(void *arg)
taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
}
SDT_PROVIDER_DEFINE(xbb);
SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int");
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t",
"uint64_t");
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int",
"uint64_t", "uint64_t");
/*----------------------------- Backend Handlers -----------------------------*/
/**
* Backend handler for character device access.
@ -2087,6 +2098,9 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
nreq->pendcnt = 1;
SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
device_get_unit(xbb->dev));
(*dev_data->csw->d_strategy)(bio);
return (0);
@ -2181,6 +2195,17 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
bios[bio_idx]->bio_bcount);
}
#endif
if (operation == BIO_READ) {
SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read,
device_get_unit(xbb->dev),
bios[bio_idx]->bio_offset,
bios[bio_idx]->bio_length);
} else if (operation == BIO_WRITE) {
SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write,
device_get_unit(xbb->dev),
bios[bio_idx]->bio_offset,
bios[bio_idx]->bio_length);
}
(*dev_data->csw->d_strategy)(bios[bio_idx]);
}
@ -2193,6 +2218,12 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
return (error);
}
SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int");
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t",
"uint64_t");
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int",
"uint64_t", "uint64_t");
/**
* Backend handler for file access.
*
@ -2237,6 +2268,9 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
case BIO_FLUSH: {
struct mount *mountpoint;
SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
device_get_unit(xbb->dev));
vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
@ -2336,6 +2370,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
switch (operation) {
case BIO_READ:
SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read,
device_get_unit(xbb->dev), xuio.uio_offset,
xuio.uio_resid);
vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
/*
@ -2366,6 +2404,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
case BIO_WRITE: {
struct mount *mountpoint;
SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write,
device_get_unit(xbb->dev), xuio.uio_offset,
xuio.uio_resid);
(void)vn_start_write(xbb->vn, &mountpoint, V_WAIT);
vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
@ -3028,6 +3070,8 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
const char *otherend_path;
int error;
u_int ring_idx;
u_int ring_page_order;
size_t ring_size;
otherend_path = xenbus_get_otherend_path(xbb->dev);
@ -3035,23 +3079,19 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
* Protocol defaults valid even if all negotiation fails.
*/
xbb->ring_config.ring_pages = 1;
xbb->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE;
/*
* Mandatory data (used in all versions of the protocol) first.
*/
error = xs_gather(XST_NIL, otherend_path,
"ring-ref", "%" PRIu32,
&xbb->ring_config.ring_ref[0],
"event-channel", "%" PRIu32,
&xbb->ring_config.evtchn,
NULL);
error = xs_scanf(XST_NIL, otherend_path,
"event-channel", NULL, "%" PRIu32,
&xbb->ring_config.evtchn);
if (error != 0) {
xenbus_dev_fatal(xbb->dev, error,
"Unable to retrieve ring information from "
"frontend %s. Unable to connect.",
"Unable to retrieve event-channel information "
"from frontend %s. Unable to connect.",
xenbus_get_otherend_path(xbb->dev));
return (error);
}
@ -3065,10 +3105,20 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
* we must use independant calls in order to guarantee
* we don't miss information in a sparsly populated front-end
* tree.
*
* \note xs_scanf() does not update variables for unmatched
* fields.
*/
ring_page_order = 0;
(void)xs_scanf(XST_NIL, otherend_path,
"ring-pages", NULL, "%u",
"ring-page-order", NULL, "%u",
&ring_page_order);
xbb->ring_config.ring_pages = 1 << ring_page_order;
(void)xs_scanf(XST_NIL, otherend_path,
"num-ring-pages", NULL, "%u",
&xbb->ring_config.ring_pages);
ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
(void)xs_scanf(XST_NIL, otherend_path,
"max-requests", NULL, "%u",
@ -3116,22 +3166,39 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
return (EINVAL);
}
/* If using a multi-page ring, pull in the remaining references. */
for (ring_idx = 1; ring_idx < xbb->ring_config.ring_pages; ring_idx++) {
char ring_ref_name[]= "ring_refXX";
snprintf(ring_ref_name, sizeof(ring_ref_name),
"ring-ref%u", ring_idx);
error = xs_scanf(XST_NIL, otherend_path,
ring_ref_name, NULL, "%" PRIu32,
&xbb->ring_config.ring_ref[ring_idx]);
if (xbb->ring_config.ring_pages == 1) {
error = xs_gather(XST_NIL, otherend_path,
"ring-ref", "%" PRIu32,
&xbb->ring_config.ring_ref[0],
NULL);
if (error != 0) {
xenbus_dev_fatal(xbb->dev, error,
"Failed to retriev grant reference "
"for page %u of shared ring. Unable "
"to connect.", ring_idx);
"Unable to retrieve ring information "
"from frontend %s. Unable to "
"connect.",
xenbus_get_otherend_path(xbb->dev));
return (error);
}
} else {
/* Multi-page ring format. */
for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages;
ring_idx++) {
char ring_ref_name[]= "ring_refXX";
snprintf(ring_ref_name, sizeof(ring_ref_name),
"ring-ref%u", ring_idx);
error = xs_scanf(XST_NIL, otherend_path,
ring_ref_name, NULL, "%" PRIu32,
&xbb->ring_config.ring_ref[ring_idx]);
if (error != 0) {
xenbus_dev_fatal(xbb->dev, error,
"Failed to retriev grant "
"reference for page %u of "
"shared ring. Unable "
"to connect.", ring_idx);
return (error);
}
}
}
error = xs_gather(XST_NIL, otherend_path,
@ -3197,8 +3264,8 @@ xbb_alloc_requests(struct xbb_softc *xbb)
static int
xbb_alloc_request_lists(struct xbb_softc *xbb)
{
int i;
struct xbb_xen_reqlist *reqlist;
int i;
/*
* If no requests can be merged, we need 1 request list per
@ -3318,7 +3385,7 @@ xbb_publish_backend_info(struct xbb_softc *xbb)
static void
xbb_connect(struct xbb_softc *xbb)
{
int error;
int error;
if (xenbus_get_state(xbb->dev) == XenbusStateConnected)
return;
@ -3399,7 +3466,8 @@ xbb_connect(struct xbb_softc *xbb)
static int
xbb_shutdown(struct xbb_softc *xbb)
{
int error;
XenbusState frontState;
int error;
DPRINTF("\n");
@ -3413,6 +3481,20 @@ xbb_shutdown(struct xbb_softc *xbb)
if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0)
return (EAGAIN);
xbb->flags |= XBBF_IN_SHUTDOWN;
mtx_unlock(&xbb->lock);
if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
xenbus_set_state(xbb->dev, XenbusStateClosing);
frontState = xenbus_get_otherend_state(xbb->dev);
mtx_lock(&xbb->lock);
xbb->flags &= ~XBBF_IN_SHUTDOWN;
/* The front can submit I/O until entering the closed state. */
if (frontState < XenbusStateClosed)
return (EAGAIN);
DPRINTF("\n");
/* Indicate shutdown is in progress. */
@ -3434,19 +3516,6 @@ xbb_shutdown(struct xbb_softc *xbb)
DPRINTF("\n");
/*
* Before unlocking mutex, set this flag to prevent other threads from
* getting into this function
*/
xbb->flags |= XBBF_IN_SHUTDOWN;
mtx_unlock(&xbb->lock);
if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
xenbus_set_state(xbb->dev, XenbusStateClosing);
mtx_lock(&xbb->lock);
xbb->flags &= ~XBBF_IN_SHUTDOWN;
/* Indicate to xbb_detach() that is it safe to proceed. */
wakeup(xbb);
@ -3573,6 +3642,16 @@ xbb_setup_sysctl(struct xbb_softc *xbb)
"max_request_segments", CTLFLAG_RD,
&xbb->max_request_segments, 0,
"maximum number of pages per requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"max_request_size", CTLFLAG_RD,
&xbb->max_request_size, 0,
"maximum size in bytes of a request (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"ring_pages", CTLFLAG_RD,
&xbb->ring_config.ring_pages, 0,
"communication channel pages (negotiated)");
}
/**
@ -3587,6 +3666,7 @@ xbb_attach(device_t dev)
{
struct xbb_softc *xbb;
int error;
u_int max_ring_page_order;
DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
@ -3621,6 +3701,10 @@ xbb_attach(device_t dev)
return (error);
}
/*
* Amazon EC2 client compatility. They refer to max-ring-pages
* instead of to max-ring-page-order.
*/
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
"max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
if (error) {
@ -3629,6 +3713,15 @@ xbb_attach(device_t dev)
return (error);
}
max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
"max-ring-page-order", "%u", max_ring_page_order);
if (error) {
xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order",
xenbus_get_node(xbb->dev));
return (error);
}
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
"max-requests", "%u", XBB_MAX_REQUESTS);
if (error) {
@ -3862,12 +3955,16 @@ xbb_frontend_changed(device_t dev, XenbusState frontend_state)
xbb_connect(xbb);
break;
case XenbusStateClosing:
/*
* Frontend has acknowledged Closing request.
* Wait for Closed state.
*/
break;
case XenbusStateClosed:
mtx_lock(&xbb->lock);
xbb_shutdown(xbb);
mtx_unlock(&xbb->lock);
if (frontend_state == XenbusStateClosed)
xenbus_set_state(xbb->dev, XenbusStateClosed);
xenbus_set_state(xbb->dev, XenbusStateClosed);
break;
default:
xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend",

View File

@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <sys/bus.h>
#include <sys/conf.h>
#include <sys/module.h>
#include <sys/sysctl.h>
#include <machine/bus.h>
#include <sys/rman.h>
@ -139,7 +140,7 @@ static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
* with blkfront as the emulated drives, easing transition slightly.
*/
static void
blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
{
static struct vdev_info {
int major;
@ -186,6 +187,7 @@ blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
if (vdevice & (1 << 28)) {
*unit = (vdevice & ((1 << 28) - 1)) >> 8;
*name = "xbd";
return;
}
for (i = 0; info[i].major; i++) {
@ -226,7 +228,7 @@ xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
sc->xb_disk->d_sectorsize = sector_size;
sc->xb_disk->d_mediasize = sectors * sector_size;
sc->xb_disk->d_maxsize = sc->max_request_size;
sc->xb_disk->d_maxsize = sc->max_request_size - PAGE_SIZE;
sc->xb_disk->d_flags = 0;
disk_create(sc->xb_disk, DISK_VERSION_00);
@ -407,6 +409,40 @@ blkfront_probe(device_t dev)
return (ENXIO);
}
static void
xb_setup_sysctl(struct xb_softc *xb)
{
struct sysctl_ctx_list *sysctl_ctx = NULL;
struct sysctl_oid *sysctl_tree = NULL;
sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
if (sysctl_ctx == NULL)
return;
sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
if (sysctl_tree == NULL)
return;
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"max_requests", CTLFLAG_RD, &xb->max_requests, -1,
"maximum outstanding requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"max_request_segments", CTLFLAG_RD,
&xb->max_request_segments, 0,
"maximum number of pages per requests (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"max_request_size", CTLFLAG_RD,
&xb->max_request_size, 0,
"maximum size in bytes of a request (negotiated)");
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
"ring_pages", CTLFLAG_RD,
&xb->ring_pages, 0,
"communication channel pages (negotiated)");
}
/*
* Setup supplies the backend dir, virtual device. We place an event
* channel and shared frame entries. We watch backend to wait if it's
@ -417,14 +453,14 @@ blkfront_attach(device_t dev)
{
struct xb_softc *sc;
const char *name;
uint32_t vdevice;
int error;
int vdevice;
int i;
int unit;
/* FIXME: Use dynamic device id if this is not set. */
error = xs_scanf(XST_NIL, xenbus_get_node(dev),
"virtual-device", NULL, "%i", &vdevice);
"virtual-device", NULL, "%" PRIu32, &vdevice);
if (error) {
xenbus_dev_fatal(dev, error, "reading virtual-device");
device_printf(dev, "Couldn't determine virtual device.\n");
@ -449,6 +485,8 @@ blkfront_attach(device_t dev)
sc->vdevice = vdevice;
sc->connected = BLKIF_STATE_DISCONNECTED;
xb_setup_sysctl(sc);
/* Wait for backend device to publish its protocol capabilities. */
xenbus_set_state(dev, XenbusStateInitialising);
@ -501,6 +539,7 @@ blkfront_initialize(struct xb_softc *sc)
{
const char *otherend_path;
const char *node_path;
uint32_t max_ring_page_order;
int error;
int i;
@ -513,8 +552,8 @@ blkfront_initialize(struct xb_softc *sc)
* Protocol defaults valid even if negotiation for a
* setting fails.
*/
max_ring_page_order = 0;
sc->ring_pages = 1;
sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE;
sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
@ -526,13 +565,25 @@ blkfront_initialize(struct xb_softc *sc)
* we must use independant calls in order to guarantee
* we don't miss information in a sparsly populated back-end
* tree.
*
* \note xs_scanf() does not update variables for unmatched
* fields.
*/
otherend_path = xenbus_get_otherend_path(sc->xb_dev);
node_path = xenbus_get_node(sc->xb_dev);
/* Support both backend schemes for relaying ring page limits. */
(void)xs_scanf(XST_NIL, otherend_path,
"max-ring-page-order", NULL, "%" PRIu32,
&max_ring_page_order);
sc->ring_pages = 1 << max_ring_page_order;
(void)xs_scanf(XST_NIL, otherend_path,
"max-ring-pages", NULL, "%" PRIu32,
&sc->ring_pages);
if (sc->ring_pages < 1)
sc->ring_pages = 1;
sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
(void)xs_scanf(XST_NIL, otherend_path,
"max-requests", NULL, "%" PRIu32,
&sc->max_requests);
@ -552,6 +603,16 @@ blkfront_initialize(struct xb_softc *sc)
sc->ring_pages = XBF_MAX_RING_PAGES;
}
if (powerof2(sc->ring_pages) == 0) {
uint32_t new_page_limit;
new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
device_printf(sc->xb_dev, "Back-end specified ring-pages of "
"%u is not a power of 2. Limited to %u.\n",
sc->ring_pages, new_page_limit);
sc->ring_pages = new_page_limit;
}
if (sc->max_requests > XBF_MAX_REQUESTS) {
device_printf(sc->xb_dev, "Back-end specified max_requests of "
"%u limited to front-end limit of %u.\n",
@ -625,11 +686,20 @@ blkfront_initialize(struct xb_softc *sc)
if (setup_blkring(sc) != 0)
return;
/* Support both backend schemes for relaying ring page limits. */
error = xs_printf(XST_NIL, node_path,
"ring-pages","%u", sc->ring_pages);
"num-ring-pages","%u", sc->ring_pages);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/ring-pages",
"writing %s/num-ring-pages",
node_path);
return;
}
error = xs_printf(XST_NIL, node_path,
"ring-page-order","%u", fls(sc->ring_pages) - 1);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/ring-page-order",
node_path);
return;
}
@ -711,25 +781,31 @@ setup_blkring(struct xb_softc *sc)
return (error);
}
}
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
"ring-ref","%u", sc->ring_ref[0]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref",
xenbus_get_node(sc->xb_dev));
return (error);
}
for (i = 1; i < sc->ring_pages; i++) {
char ring_ref_name[]= "ring_refXX";
snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i);
if (sc->ring_pages == 1) {
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
ring_ref_name, "%u", sc->ring_ref[i]);
"ring-ref", "%u", sc->ring_ref[0]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s",
xenbus_get_node(sc->xb_dev),
ring_ref_name);
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/ring-ref",
xenbus_get_node(sc->xb_dev));
return (error);
}
} else {
for (i = 0; i < sc->ring_pages; i++) {
char ring_ref_name[]= "ring_refXX";
snprintf(ring_ref_name, sizeof(ring_ref_name),
"ring-ref%u", i);
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
ring_ref_name, "%u", sc->ring_ref[i]);
if (error) {
xenbus_dev_fatal(sc->xb_dev, error,
"writing %s/%s",
xenbus_get_node(sc->xb_dev),
ring_ref_name);
return (error);
}
}
}
error = bind_listening_port_to_irqhandler(
@ -795,7 +871,7 @@ blkfront_connect(struct xb_softc *sc)
unsigned int binfo;
int err, feature_barrier;
if( (sc->connected == BLKIF_STATE_CONNECTED) ||
if( (sc->connected == BLKIF_STATE_CONNECTED) ||
(sc->connected == BLKIF_STATE_SUSPENDED) )
return;
@ -923,15 +999,13 @@ blkif_close(struct disk *dp)
return (ENXIO);
sc->xb_flags &= ~XB_OPEN;
if (--(sc->users) == 0) {
/* Check whether we have been instructed to close. We will
have ignored this request initially, as the device was
still mounted. */
device_t dev = sc->xb_dev;
XenbusState state =
xenbus_read_driver_state(xenbus_get_otherend_path(dev));
if (state == XenbusStateClosing)
blkfront_closing(dev);
/*
* Check whether we have been instructed to close. We will
* have ignored this request initially, as the device was
* still mounted.
*/
if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
blkfront_closing(sc->xb_dev);
}
return (0);
}
@ -1033,7 +1107,7 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
struct xb_command *cm;
blkif_request_t *ring_req;
struct blkif_request_segment *sg;
struct blkif_request_segment *last_block_sg;
struct blkif_request_segment *last_block_sg;
grant_ref_t *sg_ref;
vm_paddr_t buffer_ma;
uint64_t fsect, lsect;
@ -1104,12 +1178,12 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
nsegs--;
}
block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
if (block_segs == 0)
break;
if (block_segs == 0)
break;
sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
sc->ring.req_prod_pvt++;
last_block_sg = sg + block_segs;
last_block_sg = sg + block_segs;
}
if (cm->operation == BLKIF_OP_READ)

View File

@ -49,7 +49,7 @@
* guarantee we can handle an unaligned transfer without the need to
* use a bounce buffer..
*/
#define XBF_MAX_REQUEST_SIZE \
#define XBF_MAX_REQUEST_SIZE \
MIN(MAXPHYS, (BLKIF_MAX_SEGMENTS_PER_REQUEST - 1) * PAGE_SIZE)
/**
@ -57,8 +57,8 @@
* segment blocks) per request we will allow in a negotiated block-front/back
* communication channel.
*/
#define XBF_MAX_SEGMENTS_PER_REQUEST \
(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
#define XBF_MAX_SEGMENTS_PER_REQUEST \
(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
(XBF_MAX_REQUEST_SIZE / PAGE_SIZE) + 1))
/**

View File

@ -1,8 +1,8 @@
/******************************************************************************
* blkif.h
*
*
* Unified block-device I/O interface for Xen guest OSes.
*
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
@ -22,6 +22,7 @@
* DEALINGS IN THE SOFTWARE.
*
* Copyright (c) 2003-2004, Keir Fraser
* Copyright (c) 2012, Spectra Logic Corporation
*/
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
@ -35,7 +36,7 @@
* notification can be made conditional on req_event (i.e., the generic
* hold-off mechanism provided by the ring macros). Backends must set
* req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
*
*
* Back->front notifications: When enqueuing a new response, sending a
* notification can be made conditional on rsp_event (i.e., the generic
* hold-off mechanism provided by the ring macros). Frontends must set
@ -47,38 +48,414 @@
#endif
#define blkif_sector_t uint64_t
/*
* Feature and Parameter Negotiation
* =================================
* The two halves of a Xen block driver utilize nodes within the XenStore to
* communicate capabilities and to negotiate operating parameters. This
* section enumerates these nodes which reside in the respective front and
* backend portions of the XenStore, following the XenBus convention.
*
* All data in the XenStore is stored as strings. Nodes specifying numeric
* values are encoded in decimal. Integer value ranges listed below are
* expressed as fixed sized integer types capable of storing the conversion
* of a properly formated node string, without loss of information.
*
* Any specified default value is in effect if the corresponding XenBus node
* is not present in the XenStore.
*
* XenStore nodes in sections marked "PRIVATE" are solely for use by the
* driver side whose XenBus tree contains them.
*
* See the XenBus state transition diagram below for details on when XenBus
* nodes must be published and when they can be queried.
*
*****************************************************************************
* Backend XenBus Nodes
*****************************************************************************
*
*------------------ Backend Device Identification (PRIVATE) ------------------
*
* mode
* Values: "r" (read only), "w" (writable)
*
* The read or write access permissions to the backing store to be
* granted to the frontend.
*
* params
* Values: string
*
* A free formatted string providing sufficient information for the
* backend driver to open the backing device. (e.g. the path to the
* file or block device representing the backing store.)
*
* type
* Values: "file", "phy", "tap"
*
* The type of the backing device/object.
*
*--------------------------------- Features ---------------------------------
*
* feature-barrier
* Values: 0/1 (boolean)
* Default Value: 0
*
* A value of "1" indicates that the backend can process requests
* containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests
* of this type may still be returned at any time with the
* BLKIF_RSP_EOPNOTSUPP result code.
*
* feature-flush-cache
* Values: 0/1 (boolean)
* Default Value: 0
*
* A value of "1" indicates that the backend can process requests
* containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests
* of this type may still be returned at any time with the
* BLKIF_RSP_EOPNOTSUPP result code.
*
* feature-discard
* Values: 0/1 (boolean)
* Default Value: 0
*
* A value of "1" indicates that the backend can process requests
* containing the BLKIF_OP_DISCARD request opcode. Requests
* of this type may still be returned at any time with the
* BLKIF_RSP_EOPNOTSUPP result code.
*
*----------------------- Request Transport Parameters ------------------------
*
* max-ring-page-order
* Values: <uint32_t>
* Default Value: 0
* Notes: 1, 3
*
* The maximum supported size of the request ring buffer in units of
* lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
* etc.).
*
* max-ring-pages
* Values: <uint32_t>
* Default Value: 1
* Notes: 2, 3
*
* The maximum supported size of the request ring buffer in units of
* machine pages. The value must be a power of 2.
*
* max-requests <uint32_t>
* Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE)
* Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages)
*
* The maximum number of concurrent, logical requests that will be
* issued by the backend.
*
* Note: A logical request may span multiple ring entries.
*
* max-request-segments
* Values: <uint8_t>
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK
* Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST
*
* The maximum value of blkif_request.nr_segments supported by
* the backend.
*
* max-request-size
* Values: <uint32_t>
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE
* Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE
*
* The maximum amount of data, in bytes, that can be referenced by a
* request type that accesses frontend memory (currently BLKIF_OP_READ,
* BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER).
*
*------------------------- Backend Device Properties -------------------------
*
* discard-aligment
* Values: <uint32_t>
* Default Value: 0
* Notes: 4, 5
*
* The offset, in bytes from the beginning of the virtual block device,
* to the first, addressable, discard extent on the underlying device.
*
* discard-granularity
* Values: <uint32_t>
* Default Value: <"sector-size">
* Notes: 4
*
* The size, in bytes, of the individually addressable discard extents
* of the underlying device.
*
* discard-secure
* Values: 0/1 (boolean)
* Default Value: 0
*
* A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
* requests with the BLKIF_DISCARD_SECURE flag set.
*
* info
* Values: <uint32_t> (bitmap)
*
* A collection of bit flags describing attributes of the backing
* device. The VDISK_* macros define the meaning of each bit
* location.
*
* sector-size
* Values: <uint32_t>
*
* The native sector size, in bytes, of the backend device.
*
* sectors
* Values: <uint64_t>
*
* The size of the backend device, expressed in units of its native
* sector size ("sector-size").
*
*****************************************************************************
* Frontend XenBus Nodes
*****************************************************************************
*
*----------------------- Request Transport Parameters -----------------------
*
* event-channel
* Values: <uint32_t>
*
* The identifier of the Xen event channel used to signal activity
* in the ring buffer.
*
* ring-ref
* Values: <uint32_t>
* Notes: 6
*
* The Xen grant reference granting permission for the backend to map
* the sole page in a single page sized ring buffer.
*
* ring-ref%u
* Values: <uint32_t>
* Notes: 6
*
* For a frontend providing a multi-page ring, a "num-ring-pages" sized
* list of nodes, each containing a Xen grant reference granting
* permission for the backend to map the page of the ring located
* at page index "%u". Page indexes are zero based.
*
* protocol
* Values: string (XEN_IO_PROTO_ABI_*)
* Default Value: XEN_IO_PROTO_ABI_NATIVE
*
* The machine ABI rules governing the format of all ring request and
* response structures.
*
* ring-page-order
* Values: <uint32_t>
* Default Value: 0
* Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
* Notes: 1, 3
*
* The size of the frontend allocated request ring buffer in units
* of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
* etc.).
*
* num-ring-pages
* Values: <uint32_t>
* Default Value: 1
* Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order))
* Notes: 2, 3
*
* The size of the frontend allocated request ring buffer in units of
* machine pages. The value must be a power of 2.
*
* max-requests
* Values: <uint32_t>
* Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE)
* Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring_pages)
*
* The maximum number of concurrent, logical requests that will be
* issued by the frontend.
*
* Note: A logical request may span multiple ring entries.
*
* max-request-segments
* Values: <uint8_t>
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK
* Maximum Value: MIN(255, backend/max-request-segments)
*
* The maximum value the frontend will set in the
* blkif_request.nr_segments field.
*
* max-request-size
* Values: <uint32_t>
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE
* Maximum Value: max-request-segments * PAGE_SIZE
*
* The maximum amount of data, in bytes, that can be referenced by
* a request type that accesses frontend memory (currently BLKIF_OP_READ,
* BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER).
*
*------------------------- Virtual Device Properties -------------------------
*
* device-type
* Values: "disk", "cdrom", "floppy", etc.
*
* virtual-device
* Values: <uint32_t>
*
* A value indicating the physical device to virtualize within the
* frontend's domain. (e.g. "The first ATA disk", "The third SCSI
* disk", etc.)
*
* See docs/misc/vbd-interface.txt for details on the format of this
* value.
*
* Notes
* -----
* (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
* PV drivers.
* (2) Multi-page ring buffer scheme first used in some RedHat distributions
* including a distribution deployed on certain nodes of the Amazon
* EC2 cluster.
* (3) Support for multi-page ring buffers was implemented independently,
* in slightly different forms, by both Citrix and RedHat/Amazon.
* For full interoperability, block front and backends should publish
* identical ring parameters, adjusted for unit differences, to the
* XenStore nodes used in both schemes.
* (4) Devices that support discard functionality may internally allocate
* space (discardable extents) in units that are larger than the
* exported logical block size.
* (5) The discard-alignment parameter allows a physical device to be
* partitioned into virtual devices that do not necessarily begin or
* end on a discardable extent boundary.
* (6) When there is only a single page allocated to the request ring,
* 'ring-ref' is used to communicate the grant reference for this
* page to the backend. When using a multi-page ring, the 'ring-ref'
* node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
*/
/*
* STATE DIAGRAMS
*
*****************************************************************************
* Startup *
*****************************************************************************
*
* Tool stack creates front and back nodes with state XenbusStateInitialising.
*
* Front Back
* ================================= =====================================
* XenbusStateInitialising XenbusStateInitialising
* o Query virtual device o Query backend device identification
* properties. data.
* o Setup OS device instance. o Open and validate backend device.
* o Publish backend features and
* transport parameters.
* |
* |
* V
* XenbusStateInitWait
*
* o Query backend features and
* transport parameters.
* o Allocate and initialize the
* request ring.
* o Publish transport parameters
* that will be in effect during
* this connection.
* |
* |
* V
* XenbusStateInitialised
*
* o Query frontend transport parameters.
* o Connect to the request ring and
* event channel.
* o Publish backend device properties.
* |
* |
* V
* XenbusStateConnected
*
* o Query backend device properties.
* o Finalize OS virtual device
* instance.
* |
* |
* V
* XenbusStateConnected
*
* Note: Drivers that do not support any optional features, or the negotiation
* of transport parameters, can skip certain states in the state machine:
*
* o A frontend may transition to XenbusStateInitialised without
* waiting for the backend to enter XenbusStateInitWait. In this
* case, default transport parameters are in effect and any
* transport parameters published by the frontend must contain
* their default values.
*
* o A backend may transition to XenbusStateInitialised, bypassing
* XenbusStateInitWait, without waiting for the frontend to first
* enter the XenbusStateInitialised state. In this case, default
* transport parameters are in effect and any transport parameters
* published by the backend must contain their default values.
*
* Drivers that support optional features and/or transport parameter
* negotiation must tolerate these additional state transition paths.
* In general this means performing the work of any skipped state
* transition, if it has not already been performed, in addition to the
* work associated with entry into the current state.
*/
/*
* REQUEST CODES.
*/
#define BLKIF_OP_READ 0
#define BLKIF_OP_WRITE 1
/*
* Recognised only if "feature-barrier" is present in backend xenbus info.
* The "feature-barrier" node contains a boolean indicating whether barrier
* requests are likely to succeed or fail. Either way, a barrier request
* may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
* the underlying block-device hardware. The boolean simply indicates whether
* or not it is worthwhile for the frontend to attempt barrier requests.
* If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
* create the "feature-barrier" node!
* All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
* operation code ("barrier request") must be completed prior to the
* execution of the barrier request. All writes issued after the barrier
* request must not execute until after the completion of the barrier request.
*
* Optional. See "feature-barrier" XenBus node documentation above.
*/
#define BLKIF_OP_WRITE_BARRIER 2
/*
* Recognised if "feature-flush-cache" is present in backend xenbus
* info. A flush will ask the underlying storage hardware to flush its
* non-volatile caches as appropriate. The "feature-flush-cache" node
* contains a boolean indicating whether flush requests are likely to
* succeed or fail. Either way, a flush request may fail at any time
* with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
* block-device hardware. The boolean simply indicates whether or not it
* is worthwhile for the frontend to attempt flushes. If a backend does
* not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
* "feature-flush-cache" node!
* Commit any uncommitted contents of the backing device's volatile cache
* to stable storage.
*
* Optional. See "feature-flush-cache" XenBus node documentation above.
*/
#define BLKIF_OP_FLUSH_DISKCACHE 3
/*
* Used in SLES sources for device specific command packet
* contained within the request. Reserved for that purpose.
*/
#define BLKIF_OP_RESERVED_1 4
/*
* Indicate to the backend device that a region of storage is no longer in
* use, and may be discarded at any time without impact to the client. If
* the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
* discarded region on the device must be rendered unrecoverable before the
* command returns.
*
* This operation is analogous to performing a trim (ATA) or unamp (SCSI),
* command on a native device.
*
* More information about trim/unmap operations can be found at:
* http://t13.org/Documents/UploadedDocuments/docs2008/
* e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
* http://www.seagate.com/staticfiles/support/disc/manuals/
* Interface%20manuals/100293068c.pdf
*
* Optional. See "feature-discard", "discard-alignment",
* "discard-granularity", and "discard-secure" in the XenBus node
* documentation above.
*/
#define BLKIF_OP_DISCARD 5
/*
* Maximum scatter/gather segments associated with a request header block.
* This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
* NB. This could be 12 if the ring indexes weren't stored in the same page.
*/
#define BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK 11
@ -92,6 +469,13 @@
*/
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 255
/*
* NB. first_sect and last_sect in blkif_request_segment, as well as
* sector_number in blkif_request, are always expressed in 512-byte units.
* However they must be properly aligned to the real sector size of the
* physical disk, which is reported in the "sector-size" node in the backend
* xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units.
*/
struct blkif_request_segment {
grant_ref_t gref; /* reference to I/O buffer frame */
/* @first_sect: first sector in frame to transfer (inclusive). */
@ -100,16 +484,60 @@ struct blkif_request_segment {
};
typedef struct blkif_request_segment blkif_request_segment_t;
/*
* Starting ring element for any I/O request.
*
* One or more segment blocks can be inserted into the request ring
* just after a blkif_request_t, allowing requests to operate on
* up to BLKIF_MAX_SEGMENTS_PER_REQUEST.
*
* BLKIF_SEGS_TO_BLOCKS() can be used on blkif_requst.nr_segments
* to determine the number of contiguous ring entries associated
* with this request.
*
* Note: Due to the way Xen request rings operate, the producer and
* consumer indices of the ring must be incremented by the
* BLKIF_SEGS_TO_BLOCKS() value of the associated request.
* (e.g. a response to a 3 ring entry request must also consume
* 3 entries in the ring, even though only the first ring entry
* in the response has any data.)
*/
struct blkif_request {
uint8_t operation; /* BLKIF_OP_??? */
uint8_t nr_segments; /* number of segments */
blkif_vdev_t handle; /* only for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
};
typedef struct blkif_request blkif_request_t;
/*
* A segment block is a ring request structure that contains only
* segment data.
*
* sizeof(struct blkif_segment_block) <= sizeof(struct blkif_request)
*/
struct blkif_segment_block {
blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK];
};
typedef struct blkif_segment_block blkif_segment_block_t;
/*
* Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
* sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
*/
struct blkif_request_discard {
uint8_t operation; /* BLKIF_OP_DISCARD */
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
blkif_vdev_t handle; /* same as for read/write requests */
uint64_t id; /* private guest value, echoed in resp */
blkif_sector_t sector_number;/* start sector idx on disk */
uint64_t nr_sectors; /* number of contiguous sectors to discard*/
};
typedef struct blkif_request_discard blkif_request_discard_t;
struct blkif_response {
uint64_t id; /* copied from request */
uint8_t operation; /* copied from request */
@ -130,24 +558,26 @@ typedef struct blkif_response blkif_response_t;
/*
* Generate blkif ring structures and types.
*/
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
#define BLKRING_GET_SG_REQUEST(_r, _idx) \
((struct blkif_request_segment *)RING_GET_REQUEST(_r, _idx))
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
/*
* Index to, and treat as a segment block, an entry in the ring.
*/
#define BLKRING_GET_SEG_BLOCK(_r, _idx) \
(((blkif_segment_block_t *)RING_GET_REQUEST(_r, _idx))->seg)
/*
* The number of ring request blocks required to handle an I/O
* request containing _segs segments.
*/
#define BLKIF_SEGS_TO_BLOCKS(_segs) \
((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \
+ (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \
/ BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1)
#define BLKIF_SEGS_TO_BLOCKS(_segs) \
((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \
+ (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \
/ BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1)
#define VDISK_CDROM 0x1
#define VDISK_REMOVABLE 0x2
#define VDISK_READONLY 0x4
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */

View File

@ -103,6 +103,20 @@ XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *)
*/
XenbusState xenbus_read_driver_state(const char *path);
/**
* Return the state of the "other end" (peer) of a XenBus device.
*
* \param dev The XenBus device whose peer to query.
*
* \return The current state of the peer device or XenbusStateClosed if no
* state can be read.
*/
static inline XenbusState
xenbus_get_otherend_state(device_t dev)
{
return (xenbus_read_driver_state(xenbus_get_otherend_path(dev)));
}
/**
* Initialize and register a watch on the given path (client suplied storage).
*