Enhance documentation, improve interoperability, and fix defects in
FreeBSD's front and back Xen blkif interface drivers. sys/dev/xen/blkfront/block.h: sys/dev/xen/blkfront/blkfront.c: sys/dev/xen/blkback/blkback.c: Replace FreeBSD specific multi-page ring impelementation with support for both the Citrix and Amazon/RedHat versions of this extension. sys/dev/xen/blkfront/blkfront.c: o Add a per-instance sysctl tree that exposes all negotiated transport parameters (ring pages, max number of requests, max request size, max number of segments). o In blkfront_vdevice_to_unit() add a missing return statement so that we properly identify the unit number for high numbered xvd devices. sys/dev/xen/blkback/blkback.c: o Add static dtrace probes for several events in this driver. o Defer connection shutdown processing until the front-end enters the closed state. This avoids prematurely tearing down the connection when buggy front-ends transition to the closing state, even though the device is open and they veto the close request from the tool stack. o Add nodes for maximum request size and the number of active ring pages to the exising, per-instance, sysctl tree. o Miscelaneous style cleanup. sys/xen/interface/io/blkif.h: o Add extensive documentation of the XenStore nodes used to implement the blkif interface. o Document the startup sequence between a front and back driver. o Add structures and documenatation for the "discard" feature (AKA Trim). o Cleanup some definitions related to FreeBSD's request number/size/segment-limit extension. sys/dev/xen/blkfront/blkfront.c: sys/dev/xen/blkback/blkback.c: sys/xen/xenbus/xenbusvar.h: Add the convenience function xenbus_get_otherend_state() and use it to simplify some logic in both block-front and block-back. MFC after: 1 day
This commit is contained in:
parent
03a67b59f8
commit
8b8bfa3567
@ -40,6 +40,8 @@ __FBSDID("$FreeBSD$");
|
||||
* a FreeBSD domain to other domains.
|
||||
*/
|
||||
|
||||
#include "opt_kdtrace.h"
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/kernel.h>
|
||||
@ -63,6 +65,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/mount.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <sys/bitstring.h>
|
||||
#include <sys/sdt.h>
|
||||
|
||||
#include <geom/geom.h>
|
||||
|
||||
@ -124,7 +127,7 @@ __FBSDID("$FreeBSD$");
|
||||
static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
|
||||
|
||||
#ifdef XBB_DEBUG
|
||||
#define DPRINTF(fmt, args...) \
|
||||
#define DPRINTF(fmt, args...) \
|
||||
printf("xbb(%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
|
||||
#else
|
||||
#define DPRINTF(fmt, args...) do {} while(0)
|
||||
@ -134,7 +137,7 @@ static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
|
||||
* The maximum mapped region size per request we will allow in a negotiated
|
||||
* block-front/back communication channel.
|
||||
*/
|
||||
#define XBB_MAX_REQUEST_SIZE \
|
||||
#define XBB_MAX_REQUEST_SIZE \
|
||||
MIN(MAXPHYS, BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE)
|
||||
|
||||
/**
|
||||
@ -142,9 +145,9 @@ static MALLOC_DEFINE(M_XENBLOCKBACK, "xbbd", "Xen Block Back Driver Data");
|
||||
* segment blocks) per request we will allow in a negotiated block-front/back
|
||||
* communication channel.
|
||||
*/
|
||||
#define XBB_MAX_SEGMENTS_PER_REQUEST \
|
||||
(MIN(UIO_MAXIOV, \
|
||||
MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
|
||||
#define XBB_MAX_SEGMENTS_PER_REQUEST \
|
||||
(MIN(UIO_MAXIOV, \
|
||||
MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
|
||||
(XBB_MAX_REQUEST_SIZE / PAGE_SIZE) + 1)))
|
||||
|
||||
/**
|
||||
@ -980,9 +983,10 @@ xbb_get_gntaddr(struct xbb_xen_reqlist *reqlist, int pagenr, int sector)
|
||||
static uint8_t *
|
||||
xbb_get_kva(struct xbb_softc *xbb, int nr_pages)
|
||||
{
|
||||
intptr_t first_clear, num_clear;
|
||||
intptr_t first_clear;
|
||||
intptr_t num_clear;
|
||||
uint8_t *free_kva;
|
||||
int i;
|
||||
int i;
|
||||
|
||||
KASSERT(nr_pages != 0, ("xbb_get_kva of zero length"));
|
||||
|
||||
@ -1681,19 +1685,19 @@ xbb_dispatch_io(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist)
|
||||
req_ring_idx++;
|
||||
switch (xbb->abi) {
|
||||
case BLKIF_PROTOCOL_NATIVE:
|
||||
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.native,
|
||||
req_ring_idx);
|
||||
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.native,
|
||||
req_ring_idx);
|
||||
break;
|
||||
case BLKIF_PROTOCOL_X86_32:
|
||||
{
|
||||
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_32,
|
||||
req_ring_idx);
|
||||
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_32,
|
||||
req_ring_idx);
|
||||
break;
|
||||
}
|
||||
case BLKIF_PROTOCOL_X86_64:
|
||||
{
|
||||
sg = BLKRING_GET_SG_REQUEST(&xbb->rings.x86_64,
|
||||
req_ring_idx);
|
||||
sg = BLKRING_GET_SEG_BLOCK(&xbb->rings.x86_64,
|
||||
req_ring_idx);
|
||||
break;
|
||||
}
|
||||
default:
|
||||
@ -1817,8 +1821,8 @@ xbb_run_queue(void *context, int pending)
|
||||
struct xbb_xen_reqlist *reqlist;
|
||||
|
||||
|
||||
xbb = (struct xbb_softc *)context;
|
||||
rings = &xbb->rings;
|
||||
xbb = (struct xbb_softc *)context;
|
||||
rings = &xbb->rings;
|
||||
|
||||
/*
|
||||
* Work gather and dispatch loop. Note that we have a bias here
|
||||
@ -2032,6 +2036,13 @@ xbb_intr(void *arg)
|
||||
taskqueue_enqueue(xbb->io_taskqueue, &xbb->io_task);
|
||||
}
|
||||
|
||||
SDT_PROVIDER_DEFINE(xbb);
|
||||
SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_dev, flush, flush, "int");
|
||||
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, read, read, "int", "uint64_t",
|
||||
"uint64_t");
|
||||
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_dev, write, write, "int",
|
||||
"uint64_t", "uint64_t");
|
||||
|
||||
/*----------------------------- Backend Handlers -----------------------------*/
|
||||
/**
|
||||
* Backend handler for character device access.
|
||||
@ -2087,6 +2098,9 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
|
||||
nreq->pendcnt = 1;
|
||||
|
||||
SDT_PROBE1(xbb, kernel, xbb_dispatch_dev, flush,
|
||||
device_get_unit(xbb->dev));
|
||||
|
||||
(*dev_data->csw->d_strategy)(bio);
|
||||
|
||||
return (0);
|
||||
@ -2181,6 +2195,17 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
bios[bio_idx]->bio_bcount);
|
||||
}
|
||||
#endif
|
||||
if (operation == BIO_READ) {
|
||||
SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, read,
|
||||
device_get_unit(xbb->dev),
|
||||
bios[bio_idx]->bio_offset,
|
||||
bios[bio_idx]->bio_length);
|
||||
} else if (operation == BIO_WRITE) {
|
||||
SDT_PROBE3(xbb, kernel, xbb_dispatch_dev, write,
|
||||
device_get_unit(xbb->dev),
|
||||
bios[bio_idx]->bio_offset,
|
||||
bios[bio_idx]->bio_length);
|
||||
}
|
||||
(*dev_data->csw->d_strategy)(bios[bio_idx]);
|
||||
}
|
||||
|
||||
@ -2193,6 +2218,12 @@ xbb_dispatch_dev(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
return (error);
|
||||
}
|
||||
|
||||
SDT_PROBE_DEFINE1(xbb, kernel, xbb_dispatch_file, flush, flush, "int");
|
||||
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, read, read, "int", "uint64_t",
|
||||
"uint64_t");
|
||||
SDT_PROBE_DEFINE3(xbb, kernel, xbb_dispatch_file, write, write, "int",
|
||||
"uint64_t", "uint64_t");
|
||||
|
||||
/**
|
||||
* Backend handler for file access.
|
||||
*
|
||||
@ -2237,6 +2268,9 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
case BIO_FLUSH: {
|
||||
struct mount *mountpoint;
|
||||
|
||||
SDT_PROBE1(xbb, kernel, xbb_dispatch_file, flush,
|
||||
device_get_unit(xbb->dev));
|
||||
|
||||
vfs_is_locked = VFS_LOCK_GIANT(xbb->vn->v_mount);
|
||||
|
||||
(void) vn_start_write(xbb->vn, &mountpoint, V_WAIT);
|
||||
@ -2336,6 +2370,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
switch (operation) {
|
||||
case BIO_READ:
|
||||
|
||||
SDT_PROBE3(xbb, kernel, xbb_dispatch_file, read,
|
||||
device_get_unit(xbb->dev), xuio.uio_offset,
|
||||
xuio.uio_resid);
|
||||
|
||||
vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
|
||||
|
||||
/*
|
||||
@ -2366,6 +2404,10 @@ xbb_dispatch_file(struct xbb_softc *xbb, struct xbb_xen_reqlist *reqlist,
|
||||
case BIO_WRITE: {
|
||||
struct mount *mountpoint;
|
||||
|
||||
SDT_PROBE3(xbb, kernel, xbb_dispatch_file, write,
|
||||
device_get_unit(xbb->dev), xuio.uio_offset,
|
||||
xuio.uio_resid);
|
||||
|
||||
(void)vn_start_write(xbb->vn, &mountpoint, V_WAIT);
|
||||
|
||||
vn_lock(xbb->vn, LK_EXCLUSIVE | LK_RETRY);
|
||||
@ -3028,6 +3070,8 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
|
||||
const char *otherend_path;
|
||||
int error;
|
||||
u_int ring_idx;
|
||||
u_int ring_page_order;
|
||||
size_t ring_size;
|
||||
|
||||
otherend_path = xenbus_get_otherend_path(xbb->dev);
|
||||
|
||||
@ -3035,23 +3079,19 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
|
||||
* Protocol defaults valid even if all negotiation fails.
|
||||
*/
|
||||
xbb->ring_config.ring_pages = 1;
|
||||
xbb->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
|
||||
xbb->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
|
||||
xbb->max_request_size = xbb->max_request_segments * PAGE_SIZE;
|
||||
|
||||
/*
|
||||
* Mandatory data (used in all versions of the protocol) first.
|
||||
*/
|
||||
error = xs_gather(XST_NIL, otherend_path,
|
||||
"ring-ref", "%" PRIu32,
|
||||
&xbb->ring_config.ring_ref[0],
|
||||
"event-channel", "%" PRIu32,
|
||||
&xbb->ring_config.evtchn,
|
||||
NULL);
|
||||
error = xs_scanf(XST_NIL, otherend_path,
|
||||
"event-channel", NULL, "%" PRIu32,
|
||||
&xbb->ring_config.evtchn);
|
||||
if (error != 0) {
|
||||
xenbus_dev_fatal(xbb->dev, error,
|
||||
"Unable to retrieve ring information from "
|
||||
"frontend %s. Unable to connect.",
|
||||
"Unable to retrieve event-channel information "
|
||||
"from frontend %s. Unable to connect.",
|
||||
xenbus_get_otherend_path(xbb->dev));
|
||||
return (error);
|
||||
}
|
||||
@ -3065,10 +3105,20 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
|
||||
* we must use independant calls in order to guarantee
|
||||
* we don't miss information in a sparsly populated front-end
|
||||
* tree.
|
||||
*
|
||||
* \note xs_scanf() does not update variables for unmatched
|
||||
* fields.
|
||||
*/
|
||||
ring_page_order = 0;
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"ring-pages", NULL, "%u",
|
||||
"ring-page-order", NULL, "%u",
|
||||
&ring_page_order);
|
||||
xbb->ring_config.ring_pages = 1 << ring_page_order;
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"num-ring-pages", NULL, "%u",
|
||||
&xbb->ring_config.ring_pages);
|
||||
ring_size = PAGE_SIZE * xbb->ring_config.ring_pages;
|
||||
xbb->max_requests = BLKIF_MAX_RING_REQUESTS(ring_size);
|
||||
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"max-requests", NULL, "%u",
|
||||
@ -3116,22 +3166,39 @@ xbb_collect_frontend_info(struct xbb_softc *xbb)
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
/* If using a multi-page ring, pull in the remaining references. */
|
||||
for (ring_idx = 1; ring_idx < xbb->ring_config.ring_pages; ring_idx++) {
|
||||
char ring_ref_name[]= "ring_refXX";
|
||||
|
||||
snprintf(ring_ref_name, sizeof(ring_ref_name),
|
||||
"ring-ref%u", ring_idx);
|
||||
error = xs_scanf(XST_NIL, otherend_path,
|
||||
ring_ref_name, NULL, "%" PRIu32,
|
||||
&xbb->ring_config.ring_ref[ring_idx]);
|
||||
if (xbb->ring_config.ring_pages == 1) {
|
||||
error = xs_gather(XST_NIL, otherend_path,
|
||||
"ring-ref", "%" PRIu32,
|
||||
&xbb->ring_config.ring_ref[0],
|
||||
NULL);
|
||||
if (error != 0) {
|
||||
xenbus_dev_fatal(xbb->dev, error,
|
||||
"Failed to retriev grant reference "
|
||||
"for page %u of shared ring. Unable "
|
||||
"to connect.", ring_idx);
|
||||
"Unable to retrieve ring information "
|
||||
"from frontend %s. Unable to "
|
||||
"connect.",
|
||||
xenbus_get_otherend_path(xbb->dev));
|
||||
return (error);
|
||||
}
|
||||
} else {
|
||||
/* Multi-page ring format. */
|
||||
for (ring_idx = 0; ring_idx < xbb->ring_config.ring_pages;
|
||||
ring_idx++) {
|
||||
char ring_ref_name[]= "ring_refXX";
|
||||
|
||||
snprintf(ring_ref_name, sizeof(ring_ref_name),
|
||||
"ring-ref%u", ring_idx);
|
||||
error = xs_scanf(XST_NIL, otherend_path,
|
||||
ring_ref_name, NULL, "%" PRIu32,
|
||||
&xbb->ring_config.ring_ref[ring_idx]);
|
||||
if (error != 0) {
|
||||
xenbus_dev_fatal(xbb->dev, error,
|
||||
"Failed to retriev grant "
|
||||
"reference for page %u of "
|
||||
"shared ring. Unable "
|
||||
"to connect.", ring_idx);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = xs_gather(XST_NIL, otherend_path,
|
||||
@ -3197,8 +3264,8 @@ xbb_alloc_requests(struct xbb_softc *xbb)
|
||||
static int
|
||||
xbb_alloc_request_lists(struct xbb_softc *xbb)
|
||||
{
|
||||
int i;
|
||||
struct xbb_xen_reqlist *reqlist;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* If no requests can be merged, we need 1 request list per
|
||||
@ -3318,7 +3385,7 @@ xbb_publish_backend_info(struct xbb_softc *xbb)
|
||||
static void
|
||||
xbb_connect(struct xbb_softc *xbb)
|
||||
{
|
||||
int error;
|
||||
int error;
|
||||
|
||||
if (xenbus_get_state(xbb->dev) == XenbusStateConnected)
|
||||
return;
|
||||
@ -3399,7 +3466,8 @@ xbb_connect(struct xbb_softc *xbb)
|
||||
static int
|
||||
xbb_shutdown(struct xbb_softc *xbb)
|
||||
{
|
||||
int error;
|
||||
XenbusState frontState;
|
||||
int error;
|
||||
|
||||
DPRINTF("\n");
|
||||
|
||||
@ -3413,6 +3481,20 @@ xbb_shutdown(struct xbb_softc *xbb)
|
||||
if ((xbb->flags & XBBF_IN_SHUTDOWN) != 0)
|
||||
return (EAGAIN);
|
||||
|
||||
xbb->flags |= XBBF_IN_SHUTDOWN;
|
||||
mtx_unlock(&xbb->lock);
|
||||
|
||||
if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
|
||||
xenbus_set_state(xbb->dev, XenbusStateClosing);
|
||||
|
||||
frontState = xenbus_get_otherend_state(xbb->dev);
|
||||
mtx_lock(&xbb->lock);
|
||||
xbb->flags &= ~XBBF_IN_SHUTDOWN;
|
||||
|
||||
/* The front can submit I/O until entering the closed state. */
|
||||
if (frontState < XenbusStateClosed)
|
||||
return (EAGAIN);
|
||||
|
||||
DPRINTF("\n");
|
||||
|
||||
/* Indicate shutdown is in progress. */
|
||||
@ -3434,19 +3516,6 @@ xbb_shutdown(struct xbb_softc *xbb)
|
||||
|
||||
DPRINTF("\n");
|
||||
|
||||
/*
|
||||
* Before unlocking mutex, set this flag to prevent other threads from
|
||||
* getting into this function
|
||||
*/
|
||||
xbb->flags |= XBBF_IN_SHUTDOWN;
|
||||
mtx_unlock(&xbb->lock);
|
||||
|
||||
if (xenbus_get_state(xbb->dev) < XenbusStateClosing)
|
||||
xenbus_set_state(xbb->dev, XenbusStateClosing);
|
||||
|
||||
mtx_lock(&xbb->lock);
|
||||
xbb->flags &= ~XBBF_IN_SHUTDOWN;
|
||||
|
||||
/* Indicate to xbb_detach() that is it safe to proceed. */
|
||||
wakeup(xbb);
|
||||
|
||||
@ -3573,6 +3642,16 @@ xbb_setup_sysctl(struct xbb_softc *xbb)
|
||||
"max_request_segments", CTLFLAG_RD,
|
||||
&xbb->max_request_segments, 0,
|
||||
"maximum number of pages per requests (negotiated)");
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"max_request_size", CTLFLAG_RD,
|
||||
&xbb->max_request_size, 0,
|
||||
"maximum size in bytes of a request (negotiated)");
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"ring_pages", CTLFLAG_RD,
|
||||
&xbb->ring_config.ring_pages, 0,
|
||||
"communication channel pages (negotiated)");
|
||||
}
|
||||
|
||||
/**
|
||||
@ -3587,6 +3666,7 @@ xbb_attach(device_t dev)
|
||||
{
|
||||
struct xbb_softc *xbb;
|
||||
int error;
|
||||
u_int max_ring_page_order;
|
||||
|
||||
DPRINTF("Attaching to %s\n", xenbus_get_node(dev));
|
||||
|
||||
@ -3621,6 +3701,10 @@ xbb_attach(device_t dev)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
* Amazon EC2 client compatility. They refer to max-ring-pages
|
||||
* instead of to max-ring-page-order.
|
||||
*/
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
|
||||
"max-ring-pages", "%zu", XBB_MAX_RING_PAGES);
|
||||
if (error) {
|
||||
@ -3629,6 +3713,15 @@ xbb_attach(device_t dev)
|
||||
return (error);
|
||||
}
|
||||
|
||||
max_ring_page_order = flsl(XBB_MAX_RING_PAGES) - 1;
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
|
||||
"max-ring-page-order", "%u", max_ring_page_order);
|
||||
if (error) {
|
||||
xbb_attach_failed(xbb, error, "writing %s/max-ring-page-order",
|
||||
xenbus_get_node(xbb->dev));
|
||||
return (error);
|
||||
}
|
||||
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(xbb->dev),
|
||||
"max-requests", "%u", XBB_MAX_REQUESTS);
|
||||
if (error) {
|
||||
@ -3862,12 +3955,16 @@ xbb_frontend_changed(device_t dev, XenbusState frontend_state)
|
||||
xbb_connect(xbb);
|
||||
break;
|
||||
case XenbusStateClosing:
|
||||
/*
|
||||
* Frontend has acknowledged Closing request.
|
||||
* Wait for Closed state.
|
||||
*/
|
||||
break;
|
||||
case XenbusStateClosed:
|
||||
mtx_lock(&xbb->lock);
|
||||
xbb_shutdown(xbb);
|
||||
mtx_unlock(&xbb->lock);
|
||||
if (frontend_state == XenbusStateClosed)
|
||||
xenbus_set_state(xbb->dev, XenbusStateClosed);
|
||||
xenbus_set_state(xbb->dev, XenbusStateClosed);
|
||||
break;
|
||||
default:
|
||||
xenbus_dev_fatal(xbb->dev, EINVAL, "saw state %d at frontend",
|
||||
|
@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/bus.h>
|
||||
#include <sys/conf.h>
|
||||
#include <sys/module.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <machine/bus.h>
|
||||
#include <sys/rman.h>
|
||||
@ -139,7 +140,7 @@ static int xb_dump(void *, void *, vm_offset_t, off_t, size_t);
|
||||
* with blkfront as the emulated drives, easing transition slightly.
|
||||
*/
|
||||
static void
|
||||
blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
|
||||
blkfront_vdevice_to_unit(uint32_t vdevice, int *unit, const char **name)
|
||||
{
|
||||
static struct vdev_info {
|
||||
int major;
|
||||
@ -186,6 +187,7 @@ blkfront_vdevice_to_unit(int vdevice, int *unit, const char **name)
|
||||
if (vdevice & (1 << 28)) {
|
||||
*unit = (vdevice & ((1 << 28) - 1)) >> 8;
|
||||
*name = "xbd";
|
||||
return;
|
||||
}
|
||||
|
||||
for (i = 0; info[i].major; i++) {
|
||||
@ -226,7 +228,7 @@ xlvbd_add(struct xb_softc *sc, blkif_sector_t sectors,
|
||||
sc->xb_disk->d_sectorsize = sector_size;
|
||||
|
||||
sc->xb_disk->d_mediasize = sectors * sector_size;
|
||||
sc->xb_disk->d_maxsize = sc->max_request_size;
|
||||
sc->xb_disk->d_maxsize = sc->max_request_size - PAGE_SIZE;
|
||||
sc->xb_disk->d_flags = 0;
|
||||
disk_create(sc->xb_disk, DISK_VERSION_00);
|
||||
|
||||
@ -407,6 +409,40 @@ blkfront_probe(device_t dev)
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
xb_setup_sysctl(struct xb_softc *xb)
|
||||
{
|
||||
struct sysctl_ctx_list *sysctl_ctx = NULL;
|
||||
struct sysctl_oid *sysctl_tree = NULL;
|
||||
|
||||
sysctl_ctx = device_get_sysctl_ctx(xb->xb_dev);
|
||||
if (sysctl_ctx == NULL)
|
||||
return;
|
||||
|
||||
sysctl_tree = device_get_sysctl_tree(xb->xb_dev);
|
||||
if (sysctl_tree == NULL)
|
||||
return;
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"max_requests", CTLFLAG_RD, &xb->max_requests, -1,
|
||||
"maximum outstanding requests (negotiated)");
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"max_request_segments", CTLFLAG_RD,
|
||||
&xb->max_request_segments, 0,
|
||||
"maximum number of pages per requests (negotiated)");
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"max_request_size", CTLFLAG_RD,
|
||||
&xb->max_request_size, 0,
|
||||
"maximum size in bytes of a request (negotiated)");
|
||||
|
||||
SYSCTL_ADD_UINT(sysctl_ctx, SYSCTL_CHILDREN(sysctl_tree), OID_AUTO,
|
||||
"ring_pages", CTLFLAG_RD,
|
||||
&xb->ring_pages, 0,
|
||||
"communication channel pages (negotiated)");
|
||||
}
|
||||
|
||||
/*
|
||||
* Setup supplies the backend dir, virtual device. We place an event
|
||||
* channel and shared frame entries. We watch backend to wait if it's
|
||||
@ -417,14 +453,14 @@ blkfront_attach(device_t dev)
|
||||
{
|
||||
struct xb_softc *sc;
|
||||
const char *name;
|
||||
uint32_t vdevice;
|
||||
int error;
|
||||
int vdevice;
|
||||
int i;
|
||||
int unit;
|
||||
|
||||
/* FIXME: Use dynamic device id if this is not set. */
|
||||
error = xs_scanf(XST_NIL, xenbus_get_node(dev),
|
||||
"virtual-device", NULL, "%i", &vdevice);
|
||||
"virtual-device", NULL, "%" PRIu32, &vdevice);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(dev, error, "reading virtual-device");
|
||||
device_printf(dev, "Couldn't determine virtual device.\n");
|
||||
@ -449,6 +485,8 @@ blkfront_attach(device_t dev)
|
||||
sc->vdevice = vdevice;
|
||||
sc->connected = BLKIF_STATE_DISCONNECTED;
|
||||
|
||||
xb_setup_sysctl(sc);
|
||||
|
||||
/* Wait for backend device to publish its protocol capabilities. */
|
||||
xenbus_set_state(dev, XenbusStateInitialising);
|
||||
|
||||
@ -501,6 +539,7 @@ blkfront_initialize(struct xb_softc *sc)
|
||||
{
|
||||
const char *otherend_path;
|
||||
const char *node_path;
|
||||
uint32_t max_ring_page_order;
|
||||
int error;
|
||||
int i;
|
||||
|
||||
@ -513,8 +552,8 @@ blkfront_initialize(struct xb_softc *sc)
|
||||
* Protocol defaults valid even if negotiation for a
|
||||
* setting fails.
|
||||
*/
|
||||
max_ring_page_order = 0;
|
||||
sc->ring_pages = 1;
|
||||
sc->max_requests = BLKIF_MAX_RING_REQUESTS(PAGE_SIZE);
|
||||
sc->max_request_segments = BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK;
|
||||
sc->max_request_size = (sc->max_request_segments - 1) * PAGE_SIZE;
|
||||
sc->max_request_blocks = BLKIF_SEGS_TO_BLOCKS(sc->max_request_segments);
|
||||
@ -526,13 +565,25 @@ blkfront_initialize(struct xb_softc *sc)
|
||||
* we must use independant calls in order to guarantee
|
||||
* we don't miss information in a sparsly populated back-end
|
||||
* tree.
|
||||
*
|
||||
* \note xs_scanf() does not update variables for unmatched
|
||||
* fields.
|
||||
*/
|
||||
otherend_path = xenbus_get_otherend_path(sc->xb_dev);
|
||||
node_path = xenbus_get_node(sc->xb_dev);
|
||||
|
||||
/* Support both backend schemes for relaying ring page limits. */
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"max-ring-page-order", NULL, "%" PRIu32,
|
||||
&max_ring_page_order);
|
||||
sc->ring_pages = 1 << max_ring_page_order;
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"max-ring-pages", NULL, "%" PRIu32,
|
||||
&sc->ring_pages);
|
||||
if (sc->ring_pages < 1)
|
||||
sc->ring_pages = 1;
|
||||
|
||||
sc->max_requests = BLKIF_MAX_RING_REQUESTS(sc->ring_pages * PAGE_SIZE);
|
||||
(void)xs_scanf(XST_NIL, otherend_path,
|
||||
"max-requests", NULL, "%" PRIu32,
|
||||
&sc->max_requests);
|
||||
@ -552,6 +603,16 @@ blkfront_initialize(struct xb_softc *sc)
|
||||
sc->ring_pages = XBF_MAX_RING_PAGES;
|
||||
}
|
||||
|
||||
if (powerof2(sc->ring_pages) == 0) {
|
||||
uint32_t new_page_limit;
|
||||
|
||||
new_page_limit = 0x01 << (fls(sc->ring_pages) - 1);
|
||||
device_printf(sc->xb_dev, "Back-end specified ring-pages of "
|
||||
"%u is not a power of 2. Limited to %u.\n",
|
||||
sc->ring_pages, new_page_limit);
|
||||
sc->ring_pages = new_page_limit;
|
||||
}
|
||||
|
||||
if (sc->max_requests > XBF_MAX_REQUESTS) {
|
||||
device_printf(sc->xb_dev, "Back-end specified max_requests of "
|
||||
"%u limited to front-end limit of %u.\n",
|
||||
@ -625,11 +686,20 @@ blkfront_initialize(struct xb_softc *sc)
|
||||
if (setup_blkring(sc) != 0)
|
||||
return;
|
||||
|
||||
/* Support both backend schemes for relaying ring page limits. */
|
||||
error = xs_printf(XST_NIL, node_path,
|
||||
"ring-pages","%u", sc->ring_pages);
|
||||
"num-ring-pages","%u", sc->ring_pages);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(sc->xb_dev, error,
|
||||
"writing %s/ring-pages",
|
||||
"writing %s/num-ring-pages",
|
||||
node_path);
|
||||
return;
|
||||
}
|
||||
error = xs_printf(XST_NIL, node_path,
|
||||
"ring-page-order","%u", fls(sc->ring_pages) - 1);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(sc->xb_dev, error,
|
||||
"writing %s/ring-page-order",
|
||||
node_path);
|
||||
return;
|
||||
}
|
||||
@ -711,25 +781,31 @@ setup_blkring(struct xb_softc *sc)
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
|
||||
"ring-ref","%u", sc->ring_ref[0]);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/ring-ref",
|
||||
xenbus_get_node(sc->xb_dev));
|
||||
return (error);
|
||||
}
|
||||
for (i = 1; i < sc->ring_pages; i++) {
|
||||
char ring_ref_name[]= "ring_refXX";
|
||||
|
||||
snprintf(ring_ref_name, sizeof(ring_ref_name), "ring-ref%u", i);
|
||||
if (sc->ring_pages == 1) {
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
|
||||
ring_ref_name, "%u", sc->ring_ref[i]);
|
||||
"ring-ref", "%u", sc->ring_ref[0]);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(sc->xb_dev, error, "writing %s/%s",
|
||||
xenbus_get_node(sc->xb_dev),
|
||||
ring_ref_name);
|
||||
xenbus_dev_fatal(sc->xb_dev, error,
|
||||
"writing %s/ring-ref",
|
||||
xenbus_get_node(sc->xb_dev));
|
||||
return (error);
|
||||
}
|
||||
} else {
|
||||
for (i = 0; i < sc->ring_pages; i++) {
|
||||
char ring_ref_name[]= "ring_refXX";
|
||||
|
||||
snprintf(ring_ref_name, sizeof(ring_ref_name),
|
||||
"ring-ref%u", i);
|
||||
error = xs_printf(XST_NIL, xenbus_get_node(sc->xb_dev),
|
||||
ring_ref_name, "%u", sc->ring_ref[i]);
|
||||
if (error) {
|
||||
xenbus_dev_fatal(sc->xb_dev, error,
|
||||
"writing %s/%s",
|
||||
xenbus_get_node(sc->xb_dev),
|
||||
ring_ref_name);
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
error = bind_listening_port_to_irqhandler(
|
||||
@ -795,7 +871,7 @@ blkfront_connect(struct xb_softc *sc)
|
||||
unsigned int binfo;
|
||||
int err, feature_barrier;
|
||||
|
||||
if( (sc->connected == BLKIF_STATE_CONNECTED) ||
|
||||
if( (sc->connected == BLKIF_STATE_CONNECTED) ||
|
||||
(sc->connected == BLKIF_STATE_SUSPENDED) )
|
||||
return;
|
||||
|
||||
@ -923,15 +999,13 @@ blkif_close(struct disk *dp)
|
||||
return (ENXIO);
|
||||
sc->xb_flags &= ~XB_OPEN;
|
||||
if (--(sc->users) == 0) {
|
||||
/* Check whether we have been instructed to close. We will
|
||||
have ignored this request initially, as the device was
|
||||
still mounted. */
|
||||
device_t dev = sc->xb_dev;
|
||||
XenbusState state =
|
||||
xenbus_read_driver_state(xenbus_get_otherend_path(dev));
|
||||
|
||||
if (state == XenbusStateClosing)
|
||||
blkfront_closing(dev);
|
||||
/*
|
||||
* Check whether we have been instructed to close. We will
|
||||
* have ignored this request initially, as the device was
|
||||
* still mounted.
|
||||
*/
|
||||
if (xenbus_get_otherend_state(sc->xb_dev) == XenbusStateClosing)
|
||||
blkfront_closing(sc->xb_dev);
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
@ -1033,7 +1107,7 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
|
||||
struct xb_command *cm;
|
||||
blkif_request_t *ring_req;
|
||||
struct blkif_request_segment *sg;
|
||||
struct blkif_request_segment *last_block_sg;
|
||||
struct blkif_request_segment *last_block_sg;
|
||||
grant_ref_t *sg_ref;
|
||||
vm_paddr_t buffer_ma;
|
||||
uint64_t fsect, lsect;
|
||||
@ -1104,12 +1178,12 @@ blkif_queue_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
|
||||
nsegs--;
|
||||
}
|
||||
block_segs = MIN(nsegs, BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK);
|
||||
if (block_segs == 0)
|
||||
break;
|
||||
if (block_segs == 0)
|
||||
break;
|
||||
|
||||
sg = BLKRING_GET_SG_REQUEST(&sc->ring, sc->ring.req_prod_pvt);
|
||||
sg = BLKRING_GET_SEG_BLOCK(&sc->ring, sc->ring.req_prod_pvt);
|
||||
sc->ring.req_prod_pvt++;
|
||||
last_block_sg = sg + block_segs;
|
||||
last_block_sg = sg + block_segs;
|
||||
}
|
||||
|
||||
if (cm->operation == BLKIF_OP_READ)
|
||||
|
@ -49,7 +49,7 @@
|
||||
* guarantee we can handle an unaligned transfer without the need to
|
||||
* use a bounce buffer..
|
||||
*/
|
||||
#define XBF_MAX_REQUEST_SIZE \
|
||||
#define XBF_MAX_REQUEST_SIZE \
|
||||
MIN(MAXPHYS, (BLKIF_MAX_SEGMENTS_PER_REQUEST - 1) * PAGE_SIZE)
|
||||
|
||||
/**
|
||||
@ -57,8 +57,8 @@
|
||||
* segment blocks) per request we will allow in a negotiated block-front/back
|
||||
* communication channel.
|
||||
*/
|
||||
#define XBF_MAX_SEGMENTS_PER_REQUEST \
|
||||
(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
|
||||
#define XBF_MAX_SEGMENTS_PER_REQUEST \
|
||||
(MIN(BLKIF_MAX_SEGMENTS_PER_REQUEST, \
|
||||
(XBF_MAX_REQUEST_SIZE / PAGE_SIZE) + 1))
|
||||
|
||||
/**
|
||||
|
@ -1,8 +1,8 @@
|
||||
/******************************************************************************
|
||||
* blkif.h
|
||||
*
|
||||
*
|
||||
* Unified block-device I/O interface for Xen guest OSes.
|
||||
*
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
* of this software and associated documentation files (the "Software"), to
|
||||
* deal in the Software without restriction, including without limitation the
|
||||
@ -22,6 +22,7 @@
|
||||
* DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* Copyright (c) 2003-2004, Keir Fraser
|
||||
* Copyright (c) 2012, Spectra Logic Corporation
|
||||
*/
|
||||
|
||||
#ifndef __XEN_PUBLIC_IO_BLKIF_H__
|
||||
@ -35,7 +36,7 @@
|
||||
* notification can be made conditional on req_event (i.e., the generic
|
||||
* hold-off mechanism provided by the ring macros). Backends must set
|
||||
* req_event appropriately (e.g., using RING_FINAL_CHECK_FOR_REQUESTS()).
|
||||
*
|
||||
*
|
||||
* Back->front notifications: When enqueuing a new response, sending a
|
||||
* notification can be made conditional on rsp_event (i.e., the generic
|
||||
* hold-off mechanism provided by the ring macros). Frontends must set
|
||||
@ -47,38 +48,414 @@
|
||||
#endif
|
||||
#define blkif_sector_t uint64_t
|
||||
|
||||
/*
|
||||
* Feature and Parameter Negotiation
|
||||
* =================================
|
||||
* The two halves of a Xen block driver utilize nodes within the XenStore to
|
||||
* communicate capabilities and to negotiate operating parameters. This
|
||||
* section enumerates these nodes which reside in the respective front and
|
||||
* backend portions of the XenStore, following the XenBus convention.
|
||||
*
|
||||
* All data in the XenStore is stored as strings. Nodes specifying numeric
|
||||
* values are encoded in decimal. Integer value ranges listed below are
|
||||
* expressed as fixed sized integer types capable of storing the conversion
|
||||
* of a properly formated node string, without loss of information.
|
||||
*
|
||||
* Any specified default value is in effect if the corresponding XenBus node
|
||||
* is not present in the XenStore.
|
||||
*
|
||||
* XenStore nodes in sections marked "PRIVATE" are solely for use by the
|
||||
* driver side whose XenBus tree contains them.
|
||||
*
|
||||
* See the XenBus state transition diagram below for details on when XenBus
|
||||
* nodes must be published and when they can be queried.
|
||||
*
|
||||
*****************************************************************************
|
||||
* Backend XenBus Nodes
|
||||
*****************************************************************************
|
||||
*
|
||||
*------------------ Backend Device Identification (PRIVATE) ------------------
|
||||
*
|
||||
* mode
|
||||
* Values: "r" (read only), "w" (writable)
|
||||
*
|
||||
* The read or write access permissions to the backing store to be
|
||||
* granted to the frontend.
|
||||
*
|
||||
* params
|
||||
* Values: string
|
||||
*
|
||||
* A free formatted string providing sufficient information for the
|
||||
* backend driver to open the backing device. (e.g. the path to the
|
||||
* file or block device representing the backing store.)
|
||||
*
|
||||
* type
|
||||
* Values: "file", "phy", "tap"
|
||||
*
|
||||
* The type of the backing device/object.
|
||||
*
|
||||
*--------------------------------- Features ---------------------------------
|
||||
*
|
||||
* feature-barrier
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
*
|
||||
* A value of "1" indicates that the backend can process requests
|
||||
* containing the BLKIF_OP_WRITE_BARRIER request opcode. Requests
|
||||
* of this type may still be returned at any time with the
|
||||
* BLKIF_RSP_EOPNOTSUPP result code.
|
||||
*
|
||||
* feature-flush-cache
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
*
|
||||
* A value of "1" indicates that the backend can process requests
|
||||
* containing the BLKIF_OP_FLUSH_DISKCACHE request opcode. Requests
|
||||
* of this type may still be returned at any time with the
|
||||
* BLKIF_RSP_EOPNOTSUPP result code.
|
||||
*
|
||||
* feature-discard
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
*
|
||||
* A value of "1" indicates that the backend can process requests
|
||||
* containing the BLKIF_OP_DISCARD request opcode. Requests
|
||||
* of this type may still be returned at any time with the
|
||||
* BLKIF_RSP_EOPNOTSUPP result code.
|
||||
*
|
||||
*----------------------- Request Transport Parameters ------------------------
|
||||
*
|
||||
* max-ring-page-order
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 0
|
||||
* Notes: 1, 3
|
||||
*
|
||||
* The maximum supported size of the request ring buffer in units of
|
||||
* lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
|
||||
* etc.).
|
||||
*
|
||||
* max-ring-pages
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 1
|
||||
* Notes: 2, 3
|
||||
*
|
||||
* The maximum supported size of the request ring buffer in units of
|
||||
* machine pages. The value must be a power of 2.
|
||||
*
|
||||
* max-requests <uint32_t>
|
||||
* Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE)
|
||||
* Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring-pages)
|
||||
*
|
||||
* The maximum number of concurrent, logical requests that will be
|
||||
* issued by the backend.
|
||||
*
|
||||
* Note: A logical request may span multiple ring entries.
|
||||
*
|
||||
* max-request-segments
|
||||
* Values: <uint8_t>
|
||||
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK
|
||||
* Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST
|
||||
*
|
||||
* The maximum value of blkif_request.nr_segments supported by
|
||||
* the backend.
|
||||
*
|
||||
* max-request-size
|
||||
* Values: <uint32_t>
|
||||
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE
|
||||
* Maximum Value: BLKIF_MAX_SEGMENTS_PER_REQUEST * PAGE_SIZE
|
||||
*
|
||||
* The maximum amount of data, in bytes, that can be referenced by a
|
||||
* request type that accesses frontend memory (currently BLKIF_OP_READ,
|
||||
* BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER).
|
||||
*
|
||||
*------------------------- Backend Device Properties -------------------------
|
||||
*
|
||||
* discard-aligment
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 0
|
||||
* Notes: 4, 5
|
||||
*
|
||||
* The offset, in bytes from the beginning of the virtual block device,
|
||||
* to the first, addressable, discard extent on the underlying device.
|
||||
*
|
||||
* discard-granularity
|
||||
* Values: <uint32_t>
|
||||
* Default Value: <"sector-size">
|
||||
* Notes: 4
|
||||
*
|
||||
* The size, in bytes, of the individually addressable discard extents
|
||||
* of the underlying device.
|
||||
*
|
||||
* discard-secure
|
||||
* Values: 0/1 (boolean)
|
||||
* Default Value: 0
|
||||
*
|
||||
* A value of "1" indicates that the backend can process BLKIF_OP_DISCARD
|
||||
* requests with the BLKIF_DISCARD_SECURE flag set.
|
||||
*
|
||||
* info
|
||||
* Values: <uint32_t> (bitmap)
|
||||
*
|
||||
* A collection of bit flags describing attributes of the backing
|
||||
* device. The VDISK_* macros define the meaning of each bit
|
||||
* location.
|
||||
*
|
||||
* sector-size
|
||||
* Values: <uint32_t>
|
||||
*
|
||||
* The native sector size, in bytes, of the backend device.
|
||||
*
|
||||
* sectors
|
||||
* Values: <uint64_t>
|
||||
*
|
||||
* The size of the backend device, expressed in units of its native
|
||||
* sector size ("sector-size").
|
||||
*
|
||||
*****************************************************************************
|
||||
* Frontend XenBus Nodes
|
||||
*****************************************************************************
|
||||
*
|
||||
*----------------------- Request Transport Parameters -----------------------
|
||||
*
|
||||
* event-channel
|
||||
* Values: <uint32_t>
|
||||
*
|
||||
* The identifier of the Xen event channel used to signal activity
|
||||
* in the ring buffer.
|
||||
*
|
||||
* ring-ref
|
||||
* Values: <uint32_t>
|
||||
* Notes: 6
|
||||
*
|
||||
* The Xen grant reference granting permission for the backend to map
|
||||
* the sole page in a single page sized ring buffer.
|
||||
*
|
||||
* ring-ref%u
|
||||
* Values: <uint32_t>
|
||||
* Notes: 6
|
||||
*
|
||||
* For a frontend providing a multi-page ring, a "num-ring-pages" sized
|
||||
* list of nodes, each containing a Xen grant reference granting
|
||||
* permission for the backend to map the page of the ring located
|
||||
* at page index "%u". Page indexes are zero based.
|
||||
*
|
||||
* protocol
|
||||
* Values: string (XEN_IO_PROTO_ABI_*)
|
||||
* Default Value: XEN_IO_PROTO_ABI_NATIVE
|
||||
*
|
||||
* The machine ABI rules governing the format of all ring request and
|
||||
* response structures.
|
||||
*
|
||||
* ring-page-order
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 0
|
||||
* Maximum Value: MAX(ffs(max-ring-pages) - 1, max-ring-page-order)
|
||||
* Notes: 1, 3
|
||||
*
|
||||
* The size of the frontend allocated request ring buffer in units
|
||||
* of lb(machine pages). (e.g. 0 == 1 page, 1 = 2 pages, 2 == 4 pages,
|
||||
* etc.).
|
||||
*
|
||||
* num-ring-pages
|
||||
* Values: <uint32_t>
|
||||
* Default Value: 1
|
||||
* Maximum Value: MAX(max-ring-pages,(0x1 << max-ring-page-order))
|
||||
* Notes: 2, 3
|
||||
*
|
||||
* The size of the frontend allocated request ring buffer in units of
|
||||
* machine pages. The value must be a power of 2.
|
||||
*
|
||||
* max-requests
|
||||
* Values: <uint32_t>
|
||||
* Default Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE)
|
||||
* Maximum Value: BLKIF_MAX_RING_REQUESTS(PAGE_SIZE * max-ring_pages)
|
||||
*
|
||||
* The maximum number of concurrent, logical requests that will be
|
||||
* issued by the frontend.
|
||||
*
|
||||
* Note: A logical request may span multiple ring entries.
|
||||
*
|
||||
* max-request-segments
|
||||
* Values: <uint8_t>
|
||||
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK
|
||||
* Maximum Value: MIN(255, backend/max-request-segments)
|
||||
*
|
||||
* The maximum value the frontend will set in the
|
||||
* blkif_request.nr_segments field.
|
||||
*
|
||||
* max-request-size
|
||||
* Values: <uint32_t>
|
||||
* Default Value: BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK * PAGE_SIZE
|
||||
* Maximum Value: max-request-segments * PAGE_SIZE
|
||||
*
|
||||
* The maximum amount of data, in bytes, that can be referenced by
|
||||
* a request type that accesses frontend memory (currently BLKIF_OP_READ,
|
||||
* BLKIF_OP_WRITE, or BLKIF_OP_WRITE_BARRIER).
|
||||
*
|
||||
*------------------------- Virtual Device Properties -------------------------
|
||||
*
|
||||
* device-type
|
||||
* Values: "disk", "cdrom", "floppy", etc.
|
||||
*
|
||||
* virtual-device
|
||||
* Values: <uint32_t>
|
||||
*
|
||||
* A value indicating the physical device to virtualize within the
|
||||
* frontend's domain. (e.g. "The first ATA disk", "The third SCSI
|
||||
* disk", etc.)
|
||||
*
|
||||
* See docs/misc/vbd-interface.txt for details on the format of this
|
||||
* value.
|
||||
*
|
||||
* Notes
|
||||
* -----
|
||||
* (1) Multi-page ring buffer scheme first developed in the Citrix XenServer
|
||||
* PV drivers.
|
||||
* (2) Multi-page ring buffer scheme first used in some RedHat distributions
|
||||
* including a distribution deployed on certain nodes of the Amazon
|
||||
* EC2 cluster.
|
||||
* (3) Support for multi-page ring buffers was implemented independently,
|
||||
* in slightly different forms, by both Citrix and RedHat/Amazon.
|
||||
* For full interoperability, block front and backends should publish
|
||||
* identical ring parameters, adjusted for unit differences, to the
|
||||
* XenStore nodes used in both schemes.
|
||||
* (4) Devices that support discard functionality may internally allocate
|
||||
* space (discardable extents) in units that are larger than the
|
||||
* exported logical block size.
|
||||
* (5) The discard-alignment parameter allows a physical device to be
|
||||
* partitioned into virtual devices that do not necessarily begin or
|
||||
* end on a discardable extent boundary.
|
||||
* (6) When there is only a single page allocated to the request ring,
|
||||
* 'ring-ref' is used to communicate the grant reference for this
|
||||
* page to the backend. When using a multi-page ring, the 'ring-ref'
|
||||
* node is not created. Instead 'ring-ref0' - 'ring-refN' are used.
|
||||
*/
|
||||
|
||||
/*
|
||||
* STATE DIAGRAMS
|
||||
*
|
||||
*****************************************************************************
|
||||
* Startup *
|
||||
*****************************************************************************
|
||||
*
|
||||
* Tool stack creates front and back nodes with state XenbusStateInitialising.
|
||||
*
|
||||
* Front Back
|
||||
* ================================= =====================================
|
||||
* XenbusStateInitialising XenbusStateInitialising
|
||||
* o Query virtual device o Query backend device identification
|
||||
* properties. data.
|
||||
* o Setup OS device instance. o Open and validate backend device.
|
||||
* o Publish backend features and
|
||||
* transport parameters.
|
||||
* |
|
||||
* |
|
||||
* V
|
||||
* XenbusStateInitWait
|
||||
*
|
||||
* o Query backend features and
|
||||
* transport parameters.
|
||||
* o Allocate and initialize the
|
||||
* request ring.
|
||||
* o Publish transport parameters
|
||||
* that will be in effect during
|
||||
* this connection.
|
||||
* |
|
||||
* |
|
||||
* V
|
||||
* XenbusStateInitialised
|
||||
*
|
||||
* o Query frontend transport parameters.
|
||||
* o Connect to the request ring and
|
||||
* event channel.
|
||||
* o Publish backend device properties.
|
||||
* |
|
||||
* |
|
||||
* V
|
||||
* XenbusStateConnected
|
||||
*
|
||||
* o Query backend device properties.
|
||||
* o Finalize OS virtual device
|
||||
* instance.
|
||||
* |
|
||||
* |
|
||||
* V
|
||||
* XenbusStateConnected
|
||||
*
|
||||
* Note: Drivers that do not support any optional features, or the negotiation
|
||||
* of transport parameters, can skip certain states in the state machine:
|
||||
*
|
||||
* o A frontend may transition to XenbusStateInitialised without
|
||||
* waiting for the backend to enter XenbusStateInitWait. In this
|
||||
* case, default transport parameters are in effect and any
|
||||
* transport parameters published by the frontend must contain
|
||||
* their default values.
|
||||
*
|
||||
* o A backend may transition to XenbusStateInitialised, bypassing
|
||||
* XenbusStateInitWait, without waiting for the frontend to first
|
||||
* enter the XenbusStateInitialised state. In this case, default
|
||||
* transport parameters are in effect and any transport parameters
|
||||
* published by the backend must contain their default values.
|
||||
*
|
||||
* Drivers that support optional features and/or transport parameter
|
||||
* negotiation must tolerate these additional state transition paths.
|
||||
* In general this means performing the work of any skipped state
|
||||
* transition, if it has not already been performed, in addition to the
|
||||
* work associated with entry into the current state.
|
||||
*/
|
||||
|
||||
/*
|
||||
* REQUEST CODES.
|
||||
*/
|
||||
#define BLKIF_OP_READ 0
|
||||
#define BLKIF_OP_WRITE 1
|
||||
/*
|
||||
* Recognised only if "feature-barrier" is present in backend xenbus info.
|
||||
* The "feature-barrier" node contains a boolean indicating whether barrier
|
||||
* requests are likely to succeed or fail. Either way, a barrier request
|
||||
* may fail at any time with BLKIF_RSP_EOPNOTSUPP if it is unsupported by
|
||||
* the underlying block-device hardware. The boolean simply indicates whether
|
||||
* or not it is worthwhile for the frontend to attempt barrier requests.
|
||||
* If a backend does not recognise BLKIF_OP_WRITE_BARRIER, it should *not*
|
||||
* create the "feature-barrier" node!
|
||||
* All writes issued prior to a request with the BLKIF_OP_WRITE_BARRIER
|
||||
* operation code ("barrier request") must be completed prior to the
|
||||
* execution of the barrier request. All writes issued after the barrier
|
||||
* request must not execute until after the completion of the barrier request.
|
||||
*
|
||||
* Optional. See "feature-barrier" XenBus node documentation above.
|
||||
*/
|
||||
#define BLKIF_OP_WRITE_BARRIER 2
|
||||
/*
|
||||
* Recognised if "feature-flush-cache" is present in backend xenbus
|
||||
* info. A flush will ask the underlying storage hardware to flush its
|
||||
* non-volatile caches as appropriate. The "feature-flush-cache" node
|
||||
* contains a boolean indicating whether flush requests are likely to
|
||||
* succeed or fail. Either way, a flush request may fail at any time
|
||||
* with BLKIF_RSP_EOPNOTSUPP if it is unsupported by the underlying
|
||||
* block-device hardware. The boolean simply indicates whether or not it
|
||||
* is worthwhile for the frontend to attempt flushes. If a backend does
|
||||
* not recognise BLKIF_OP_WRITE_FLUSH_CACHE, it should *not* create the
|
||||
* "feature-flush-cache" node!
|
||||
* Commit any uncommitted contents of the backing device's volatile cache
|
||||
* to stable storage.
|
||||
*
|
||||
* Optional. See "feature-flush-cache" XenBus node documentation above.
|
||||
*/
|
||||
#define BLKIF_OP_FLUSH_DISKCACHE 3
|
||||
/*
|
||||
* Used in SLES sources for device specific command packet
|
||||
* contained within the request. Reserved for that purpose.
|
||||
*/
|
||||
#define BLKIF_OP_RESERVED_1 4
|
||||
/*
|
||||
* Indicate to the backend device that a region of storage is no longer in
|
||||
* use, and may be discarded at any time without impact to the client. If
|
||||
* the BLKIF_DISCARD_SECURE flag is set on the request, all copies of the
|
||||
* discarded region on the device must be rendered unrecoverable before the
|
||||
* command returns.
|
||||
*
|
||||
* This operation is analogous to performing a trim (ATA) or unamp (SCSI),
|
||||
* command on a native device.
|
||||
*
|
||||
* More information about trim/unmap operations can be found at:
|
||||
* http://t13.org/Documents/UploadedDocuments/docs2008/
|
||||
* e07154r6-Data_Set_Management_Proposal_for_ATA-ACS2.doc
|
||||
* http://www.seagate.com/staticfiles/support/disc/manuals/
|
||||
* Interface%20manuals/100293068c.pdf
|
||||
*
|
||||
* Optional. See "feature-discard", "discard-alignment",
|
||||
* "discard-granularity", and "discard-secure" in the XenBus node
|
||||
* documentation above.
|
||||
*/
|
||||
#define BLKIF_OP_DISCARD 5
|
||||
|
||||
/*
|
||||
* Maximum scatter/gather segments associated with a request header block.
|
||||
* This is carefully chosen so that sizeof(blkif_ring_t) <= PAGE_SIZE.
|
||||
* NB. This could be 12 if the ring indexes weren't stored in the same page.
|
||||
*/
|
||||
#define BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK 11
|
||||
|
||||
@ -92,6 +469,13 @@
|
||||
*/
|
||||
#define BLKIF_MAX_SEGMENTS_PER_REQUEST 255
|
||||
|
||||
/*
|
||||
* NB. first_sect and last_sect in blkif_request_segment, as well as
|
||||
* sector_number in blkif_request, are always expressed in 512-byte units.
|
||||
* However they must be properly aligned to the real sector size of the
|
||||
* physical disk, which is reported in the "sector-size" node in the backend
|
||||
* xenbus info. Also the xenbus "sectors" node is expressed in 512-byte units.
|
||||
*/
|
||||
struct blkif_request_segment {
|
||||
grant_ref_t gref; /* reference to I/O buffer frame */
|
||||
/* @first_sect: first sector in frame to transfer (inclusive). */
|
||||
@ -100,16 +484,60 @@ struct blkif_request_segment {
|
||||
};
|
||||
typedef struct blkif_request_segment blkif_request_segment_t;
|
||||
|
||||
/*
|
||||
* Starting ring element for any I/O request.
|
||||
*
|
||||
* One or more segment blocks can be inserted into the request ring
|
||||
* just after a blkif_request_t, allowing requests to operate on
|
||||
* up to BLKIF_MAX_SEGMENTS_PER_REQUEST.
|
||||
*
|
||||
* BLKIF_SEGS_TO_BLOCKS() can be used on blkif_requst.nr_segments
|
||||
* to determine the number of contiguous ring entries associated
|
||||
* with this request.
|
||||
*
|
||||
* Note: Due to the way Xen request rings operate, the producer and
|
||||
* consumer indices of the ring must be incremented by the
|
||||
* BLKIF_SEGS_TO_BLOCKS() value of the associated request.
|
||||
* (e.g. a response to a 3 ring entry request must also consume
|
||||
* 3 entries in the ring, even though only the first ring entry
|
||||
* in the response has any data.)
|
||||
*/
|
||||
struct blkif_request {
|
||||
uint8_t operation; /* BLKIF_OP_??? */
|
||||
uint8_t nr_segments; /* number of segments */
|
||||
blkif_vdev_t handle; /* only for read/write requests */
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
blkif_sector_t sector_number;/* start sector idx on disk (r/w only) */
|
||||
struct blkif_request_segment seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
|
||||
blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK];
|
||||
};
|
||||
typedef struct blkif_request blkif_request_t;
|
||||
|
||||
/*
|
||||
* A segment block is a ring request structure that contains only
|
||||
* segment data.
|
||||
*
|
||||
* sizeof(struct blkif_segment_block) <= sizeof(struct blkif_request)
|
||||
*/
|
||||
struct blkif_segment_block {
|
||||
blkif_request_segment_t seg[BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK];
|
||||
};
|
||||
typedef struct blkif_segment_block blkif_segment_block_t;
|
||||
|
||||
/*
|
||||
* Cast to this structure when blkif_request.operation == BLKIF_OP_DISCARD
|
||||
* sizeof(struct blkif_request_discard) <= sizeof(struct blkif_request)
|
||||
*/
|
||||
struct blkif_request_discard {
|
||||
uint8_t operation; /* BLKIF_OP_DISCARD */
|
||||
uint8_t flag; /* BLKIF_DISCARD_SECURE or zero */
|
||||
#define BLKIF_DISCARD_SECURE (1<<0) /* ignored if discard-secure=0 */
|
||||
blkif_vdev_t handle; /* same as for read/write requests */
|
||||
uint64_t id; /* private guest value, echoed in resp */
|
||||
blkif_sector_t sector_number;/* start sector idx on disk */
|
||||
uint64_t nr_sectors; /* number of contiguous sectors to discard*/
|
||||
};
|
||||
typedef struct blkif_request_discard blkif_request_discard_t;
|
||||
|
||||
struct blkif_response {
|
||||
uint64_t id; /* copied from request */
|
||||
uint8_t operation; /* copied from request */
|
||||
@ -130,24 +558,26 @@ typedef struct blkif_response blkif_response_t;
|
||||
/*
|
||||
* Generate blkif ring structures and types.
|
||||
*/
|
||||
|
||||
DEFINE_RING_TYPES(blkif, struct blkif_request, struct blkif_response);
|
||||
|
||||
#define BLKRING_GET_SG_REQUEST(_r, _idx) \
|
||||
((struct blkif_request_segment *)RING_GET_REQUEST(_r, _idx))
|
||||
|
||||
#define VDISK_CDROM 0x1
|
||||
#define VDISK_REMOVABLE 0x2
|
||||
#define VDISK_READONLY 0x4
|
||||
/*
|
||||
* Index to, and treat as a segment block, an entry in the ring.
|
||||
*/
|
||||
#define BLKRING_GET_SEG_BLOCK(_r, _idx) \
|
||||
(((blkif_segment_block_t *)RING_GET_REQUEST(_r, _idx))->seg)
|
||||
|
||||
/*
|
||||
* The number of ring request blocks required to handle an I/O
|
||||
* request containing _segs segments.
|
||||
*/
|
||||
#define BLKIF_SEGS_TO_BLOCKS(_segs) \
|
||||
((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \
|
||||
+ (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \
|
||||
/ BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1)
|
||||
#define BLKIF_SEGS_TO_BLOCKS(_segs) \
|
||||
((((_segs - BLKIF_MAX_SEGMENTS_PER_HEADER_BLOCK) \
|
||||
+ (BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK - 1)) \
|
||||
/ BLKIF_MAX_SEGMENTS_PER_SEGMENT_BLOCK) + /*header_block*/1)
|
||||
|
||||
#define VDISK_CDROM 0x1
|
||||
#define VDISK_REMOVABLE 0x2
|
||||
#define VDISK_READONLY 0x4
|
||||
|
||||
#endif /* __XEN_PUBLIC_IO_BLKIF_H__ */
|
||||
|
||||
|
@ -103,6 +103,20 @@ XENBUS_ACCESSOR(otherend_path, OTHEREND_PATH, const char *)
|
||||
*/
|
||||
XenbusState xenbus_read_driver_state(const char *path);
|
||||
|
||||
/**
|
||||
* Return the state of the "other end" (peer) of a XenBus device.
|
||||
*
|
||||
* \param dev The XenBus device whose peer to query.
|
||||
*
|
||||
* \return The current state of the peer device or XenbusStateClosed if no
|
||||
* state can be read.
|
||||
*/
|
||||
static inline XenbusState
|
||||
xenbus_get_otherend_state(device_t dev)
|
||||
{
|
||||
return (xenbus_read_driver_state(xenbus_get_otherend_path(dev)));
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize and register a watch on the given path (client suplied storage).
|
||||
*
|
||||
|
Loading…
Reference in New Issue
Block a user