2008-08-12 20:01:57 +00:00
|
|
|
/*
|
|
|
|
* Copyright (c) 2006, Cisco Systems, Inc.
|
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
*
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
|
|
|
* 3. Neither the name of Cisco Systems, Inc. nor the names of its contributors
|
|
|
|
* may be used to endorse or promote products derived from this software
|
|
|
|
* without specific prior written permission.
|
|
|
|
*
|
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
|
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
|
|
* LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
|
|
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
|
|
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
|
|
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
|
|
* CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
|
|
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
|
|
* POSSIBILITY OF SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/mbuf.h>
|
|
|
|
#include <sys/malloc.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/socket.h>
|
|
|
|
#include <sys/queue.h>
|
|
|
|
#include <sys/taskqueue.h>
|
|
|
|
#include <sys/namei.h>
|
|
|
|
#include <sys/proc.h>
|
|
|
|
#include <sys/filedesc.h>
|
|
|
|
#include <sys/vnode.h>
|
|
|
|
#include <sys/fcntl.h>
|
|
|
|
#include <sys/disk.h>
|
|
|
|
#include <sys/bio.h>
|
|
|
|
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/bus.h>
|
|
|
|
#include <sys/sysctl.h>
|
|
|
|
|
|
|
|
#include <geom/geom.h>
|
|
|
|
|
|
|
|
#include <vm/vm_extern.h>
|
|
|
|
#include <vm/vm_kern.h>
|
|
|
|
|
|
|
|
#include <machine/xen-os.h>
|
|
|
|
#include <machine/hypervisor.h>
|
|
|
|
#include <machine/hypervisor-ifs.h>
|
|
|
|
#include <machine/xen_intr.h>
|
|
|
|
#include <machine/evtchn.h>
|
|
|
|
#include <machine/xenbus.h>
|
|
|
|
#include <machine/gnttab.h>
|
|
|
|
#include <machine/xen-public/memory.h>
|
|
|
|
#include <dev/xen/xenbus/xenbus_comms.h>
|
|
|
|
|
|
|
|
|
|
|
|
#if XEN_BLKBACK_DEBUG
|
|
|
|
#define DPRINTF(fmt, args...) \
|
|
|
|
printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
|
|
|
|
#else
|
|
|
|
#define DPRINTF(fmt, args...) ((void)0)
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#define WPRINTF(fmt, args...) \
|
|
|
|
printf("blkback (%s:%d): " fmt, __FUNCTION__, __LINE__, ##args)
|
|
|
|
|
|
|
|
#define BLKBACK_INVALID_HANDLE (~0)
|
|
|
|
|
|
|
|
struct ring_ref {
|
|
|
|
vm_offset_t va;
|
|
|
|
grant_handle_t handle;
|
|
|
|
uint64_t bus_addr;
|
|
|
|
};
|
|
|
|
|
|
|
|
typedef struct blkback_info {
|
|
|
|
|
|
|
|
/* Schedule lists */
|
|
|
|
STAILQ_ENTRY(blkback_info) next_req;
|
|
|
|
int on_req_sched_list;
|
|
|
|
|
|
|
|
struct xenbus_device *xdev;
|
|
|
|
XenbusState frontend_state;
|
|
|
|
|
|
|
|
domid_t domid;
|
|
|
|
|
|
|
|
int state;
|
|
|
|
int ring_connected;
|
|
|
|
struct ring_ref rr;
|
|
|
|
blkif_back_ring_t ring;
|
|
|
|
evtchn_port_t evtchn;
|
|
|
|
int irq;
|
|
|
|
void *irq_cookie;
|
|
|
|
|
|
|
|
int ref_cnt;
|
|
|
|
|
|
|
|
int handle;
|
|
|
|
char *mode;
|
|
|
|
char *type;
|
|
|
|
char *dev_name;
|
|
|
|
|
|
|
|
struct vnode *vn;
|
|
|
|
struct cdev *cdev;
|
|
|
|
struct cdevsw *csw;
|
|
|
|
u_int sector_size;
|
|
|
|
int sector_size_shift;
|
|
|
|
off_t media_size;
|
|
|
|
u_int media_num_sectors;
|
|
|
|
int major;
|
|
|
|
int minor;
|
|
|
|
int read_only;
|
|
|
|
|
|
|
|
struct mtx blk_ring_lock;
|
|
|
|
|
|
|
|
device_t ndev;
|
|
|
|
|
|
|
|
/* Stats */
|
|
|
|
int st_rd_req;
|
|
|
|
int st_wr_req;
|
|
|
|
int st_oo_req;
|
|
|
|
int st_err_req;
|
|
|
|
} blkif_t;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* These are rather arbitrary. They are fairly large because adjacent requests
|
|
|
|
* pulled from a communication ring are quite likely to end up being part of
|
|
|
|
* the same scatter/gather request at the disc.
|
|
|
|
*
|
|
|
|
* ** TRY INCREASING 'blkif_reqs' IF WRITE SPEEDS SEEM TOO LOW **
|
|
|
|
*
|
|
|
|
* This will increase the chances of being able to write whole tracks.
|
|
|
|
* 64 should be enough to keep us competitive with Linux.
|
|
|
|
*/
|
|
|
|
static int blkif_reqs = 64;
|
|
|
|
TUNABLE_INT("xen.vbd.blkif_reqs", &blkif_reqs);
|
|
|
|
|
|
|
|
static int mmap_pages;
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Each outstanding request that we've passed to the lower device layers has a
|
|
|
|
* 'pending_req' allocated to it. Each buffer_head that completes decrements
|
|
|
|
* the pendcnt towards zero. When it hits zero, the specified domain has a
|
|
|
|
* response queued for it, with the saved 'id' passed back.
|
|
|
|
*/
|
|
|
|
typedef struct pending_req {
|
|
|
|
blkif_t *blkif;
|
|
|
|
uint64_t id;
|
|
|
|
int nr_pages;
|
|
|
|
int pendcnt;
|
|
|
|
unsigned short operation;
|
|
|
|
int status;
|
|
|
|
STAILQ_ENTRY(pending_req) free_list;
|
|
|
|
} pending_req_t;
|
|
|
|
|
|
|
|
static pending_req_t *pending_reqs;
|
|
|
|
static STAILQ_HEAD(pending_reqs_list, pending_req) pending_free =
|
|
|
|
STAILQ_HEAD_INITIALIZER(pending_free);
|
|
|
|
static struct mtx pending_free_lock;
|
|
|
|
|
|
|
|
static STAILQ_HEAD(blkback_req_sched_list, blkback_info) req_sched_list =
|
|
|
|
STAILQ_HEAD_INITIALIZER(req_sched_list);
|
|
|
|
static struct mtx req_sched_list_lock;
|
|
|
|
|
|
|
|
static unsigned long mmap_vstart;
|
|
|
|
static unsigned long *pending_vaddrs;
|
|
|
|
static grant_handle_t *pending_grant_handles;
|
|
|
|
|
|
|
|
static struct task blk_req_task;
|
|
|
|
|
|
|
|
/* Protos */
|
|
|
|
static void disconnect_ring(blkif_t *blkif);
|
|
|
|
static int vbd_add_dev(struct xenbus_device *xdev);
|
|
|
|
|
|
|
|
static inline int vaddr_pagenr(pending_req_t *req, int seg)
|
|
|
|
{
|
|
|
|
return (req - pending_reqs) * BLKIF_MAX_SEGMENTS_PER_REQUEST + seg;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline unsigned long vaddr(pending_req_t *req, int seg)
|
|
|
|
{
|
|
|
|
return pending_vaddrs[vaddr_pagenr(req, seg)];
|
|
|
|
}
|
|
|
|
|
|
|
|
#define pending_handle(_req, _seg) \
|
|
|
|
(pending_grant_handles[vaddr_pagenr(_req, _seg)])
|
|
|
|
|
|
|
|
static unsigned long
|
|
|
|
alloc_empty_page_range(unsigned long nr_pages)
|
|
|
|
{
|
|
|
|
void *pages;
|
|
|
|
int i = 0, j = 0;
|
|
|
|
multicall_entry_t mcl[17];
|
|
|
|
unsigned long mfn_list[16];
|
|
|
|
struct xen_memory_reservation reservation = {
|
|
|
|
.extent_start = mfn_list,
|
|
|
|
.nr_extents = 0,
|
|
|
|
.address_bits = 0,
|
|
|
|
.extent_order = 0,
|
|
|
|
.domid = DOMID_SELF
|
|
|
|
};
|
|
|
|
|
|
|
|
pages = malloc(nr_pages*PAGE_SIZE, M_DEVBUF, M_NOWAIT);
|
|
|
|
if (pages == NULL)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
memset(mcl, 0, sizeof(mcl));
|
|
|
|
|
|
|
|
while (i < nr_pages) {
|
|
|
|
unsigned long va = (unsigned long)pages + (i++ * PAGE_SIZE);
|
|
|
|
|
|
|
|
mcl[j].op = __HYPERVISOR_update_va_mapping;
|
|
|
|
mcl[j].args[0] = va;
|
|
|
|
|
|
|
|
mfn_list[j++] = vtomach(va) >> PAGE_SHIFT;
|
|
|
|
|
|
|
|
xen_phys_machine[(vtophys(va) >> PAGE_SHIFT)] = INVALID_P2M_ENTRY;
|
|
|
|
|
|
|
|
if (j == 16 || i == nr_pages) {
|
|
|
|
mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = UVMF_TLB_FLUSH|UVMF_LOCAL;
|
|
|
|
|
|
|
|
reservation.nr_extents = j;
|
|
|
|
|
|
|
|
mcl[j].op = __HYPERVISOR_memory_op;
|
|
|
|
mcl[j].args[0] = XENMEM_decrease_reservation;
|
|
|
|
mcl[j].args[1] = (unsigned long)&reservation;
|
|
|
|
|
|
|
|
(void)HYPERVISOR_multicall(mcl, j+1);
|
|
|
|
|
|
|
|
mcl[j-1].args[MULTI_UVMFLAGS_INDEX] = 0;
|
|
|
|
j = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (unsigned long)pages;
|
|
|
|
}
|
|
|
|
|
|
|
|
static pending_req_t *
|
|
|
|
alloc_req(void)
|
|
|
|
{
|
|
|
|
pending_req_t *req;
|
|
|
|
mtx_lock(&pending_free_lock);
|
|
|
|
if ((req = STAILQ_FIRST(&pending_free))) {
|
|
|
|
STAILQ_REMOVE(&pending_free, req, pending_req, free_list);
|
|
|
|
STAILQ_NEXT(req, free_list) = NULL;
|
|
|
|
}
|
|
|
|
mtx_unlock(&pending_free_lock);
|
|
|
|
return req;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
free_req(pending_req_t *req)
|
|
|
|
{
|
|
|
|
int was_empty;
|
|
|
|
|
|
|
|
mtx_lock(&pending_free_lock);
|
|
|
|
was_empty = STAILQ_EMPTY(&pending_free);
|
|
|
|
STAILQ_INSERT_TAIL(&pending_free, req, free_list);
|
|
|
|
mtx_unlock(&pending_free_lock);
|
|
|
|
if (was_empty)
|
|
|
|
taskqueue_enqueue(taskqueue_swi, &blk_req_task);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
fast_flush_area(pending_req_t *req)
|
|
|
|
{
|
|
|
|
struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
|
|
unsigned int i, invcount = 0;
|
|
|
|
grant_handle_t handle;
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
for (i = 0; i < req->nr_pages; i++) {
|
|
|
|
handle = pending_handle(req, i);
|
|
|
|
if (handle == BLKBACK_INVALID_HANDLE)
|
|
|
|
continue;
|
|
|
|
unmap[invcount].host_addr = vaddr(req, i);
|
|
|
|
unmap[invcount].dev_bus_addr = 0;
|
|
|
|
unmap[invcount].handle = handle;
|
|
|
|
pending_handle(req, i) = BLKBACK_INVALID_HANDLE;
|
|
|
|
invcount++;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = HYPERVISOR_grant_table_op(
|
|
|
|
GNTTABOP_unmap_grant_ref, unmap, invcount);
|
|
|
|
PANIC_IF(ret);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blkif_get(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
atomic_add_int(&blkif->ref_cnt, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blkif_put(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
if (atomic_fetchadd_int(&blkif->ref_cnt, -1) == 1) {
|
|
|
|
DPRINTF("Removing %x\n", (unsigned int)blkif);
|
|
|
|
disconnect_ring(blkif);
|
|
|
|
if (blkif->mode)
|
|
|
|
free(blkif->mode, M_DEVBUF);
|
|
|
|
if (blkif->type)
|
|
|
|
free(blkif->type, M_DEVBUF);
|
|
|
|
if (blkif->dev_name)
|
|
|
|
free(blkif->dev_name, M_DEVBUF);
|
|
|
|
free(blkif, M_DEVBUF);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
blkif_create(struct xenbus_device *xdev, long handle, char *mode, char *type, char *params)
|
|
|
|
{
|
|
|
|
blkif_t *blkif;
|
|
|
|
|
|
|
|
blkif = (blkif_t *)malloc(sizeof(*blkif), M_DEVBUF, M_NOWAIT | M_ZERO);
|
|
|
|
if (!blkif)
|
|
|
|
return ENOMEM;
|
|
|
|
|
|
|
|
DPRINTF("Created %x\n", (unsigned int)blkif);
|
|
|
|
|
|
|
|
blkif->ref_cnt = 1;
|
|
|
|
blkif->domid = xdev->otherend_id;
|
|
|
|
blkif->handle = handle;
|
|
|
|
blkif->mode = mode;
|
|
|
|
blkif->type = type;
|
|
|
|
blkif->dev_name = params;
|
|
|
|
blkif->xdev = xdev;
|
|
|
|
xdev->data = blkif;
|
|
|
|
|
|
|
|
mtx_init(&blkif->blk_ring_lock, "blk_ring_ock", "blkback ring lock", MTX_DEF);
|
|
|
|
|
|
|
|
if (strcmp(mode, "w"))
|
|
|
|
blkif->read_only = 1;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
add_to_req_schedule_list_tail(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
if (!blkif->on_req_sched_list) {
|
|
|
|
mtx_lock(&req_sched_list_lock);
|
|
|
|
if (!blkif->on_req_sched_list && (blkif->state == XenbusStateConnected)) {
|
|
|
|
blkif_get(blkif);
|
|
|
|
STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
|
|
|
|
blkif->on_req_sched_list = 1;
|
|
|
|
taskqueue_enqueue(taskqueue_swi, &blk_req_task);
|
|
|
|
}
|
|
|
|
mtx_unlock(&req_sched_list_lock);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* This routine does not call blkif_get(), does not schedule the blk_req_task to run,
|
|
|
|
and assumes that the state is connected */
|
|
|
|
static void
|
|
|
|
add_to_req_schedule_list_tail2(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
mtx_lock(&req_sched_list_lock);
|
|
|
|
if (!blkif->on_req_sched_list) {
|
|
|
|
STAILQ_INSERT_TAIL(&req_sched_list, blkif, next_req);
|
|
|
|
blkif->on_req_sched_list = 1;
|
|
|
|
}
|
|
|
|
mtx_unlock(&req_sched_list_lock);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Removes blkif from front of list and does not call blkif_put() (caller must) */
|
|
|
|
static blkif_t *
|
|
|
|
remove_from_req_schedule_list(void)
|
|
|
|
{
|
|
|
|
blkif_t *blkif;
|
|
|
|
|
|
|
|
mtx_lock(&req_sched_list_lock);
|
|
|
|
|
|
|
|
if ((blkif = STAILQ_FIRST(&req_sched_list))) {
|
|
|
|
STAILQ_REMOVE(&req_sched_list, blkif, blkback_info, next_req);
|
|
|
|
STAILQ_NEXT(blkif, next_req) = NULL;
|
|
|
|
blkif->on_req_sched_list = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
mtx_unlock(&req_sched_list_lock);
|
|
|
|
|
|
|
|
return blkif;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
make_response(blkif_t *blkif, uint64_t id,
|
|
|
|
unsigned short op, int st)
|
|
|
|
{
|
|
|
|
blkif_response_t *resp;
|
|
|
|
blkif_back_ring_t *blk_ring = &blkif->ring;
|
|
|
|
int more_to_do = 0;
|
|
|
|
int notify;
|
|
|
|
|
|
|
|
mtx_lock(&blkif->blk_ring_lock);
|
|
|
|
|
|
|
|
|
|
|
|
/* Place on the response ring for the relevant domain. */
|
|
|
|
resp = RING_GET_RESPONSE(blk_ring, blk_ring->rsp_prod_pvt);
|
|
|
|
resp->id = id;
|
|
|
|
resp->operation = op;
|
|
|
|
resp->status = st;
|
|
|
|
blk_ring->rsp_prod_pvt++;
|
|
|
|
RING_PUSH_RESPONSES_AND_CHECK_NOTIFY(blk_ring, notify);
|
|
|
|
|
|
|
|
if (blk_ring->rsp_prod_pvt == blk_ring->req_cons) {
|
|
|
|
/*
|
|
|
|
* Tail check for pending requests. Allows frontend to avoid
|
|
|
|
* notifications if requests are already in flight (lower
|
|
|
|
* overheads and promotes batching).
|
|
|
|
*/
|
|
|
|
RING_FINAL_CHECK_FOR_REQUESTS(blk_ring, more_to_do);
|
|
|
|
|
|
|
|
} else if (RING_HAS_UNCONSUMED_REQUESTS(blk_ring))
|
|
|
|
more_to_do = 1;
|
|
|
|
|
|
|
|
mtx_unlock(&blkif->blk_ring_lock);
|
|
|
|
|
|
|
|
if (more_to_do)
|
|
|
|
add_to_req_schedule_list_tail(blkif);
|
|
|
|
|
|
|
|
if (notify)
|
|
|
|
notify_remote_via_irq(blkif->irq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
end_block_io_op(struct bio *bio)
|
|
|
|
{
|
|
|
|
pending_req_t *pending_req = bio->bio_caller2;
|
|
|
|
|
|
|
|
if (bio->bio_error) {
|
|
|
|
DPRINTF("BIO returned error %d for operation on device %s\n",
|
|
|
|
bio->bio_error, pending_req->blkif->dev_name);
|
|
|
|
pending_req->status = BLKIF_RSP_ERROR;
|
|
|
|
pending_req->blkif->st_err_req++;
|
|
|
|
}
|
|
|
|
|
|
|
|
#if 0
|
|
|
|
printf("done: bio=%x error=%x completed=%llu resid=%lu flags=%x\n",
|
|
|
|
(unsigned int)bio, bio->bio_error, bio->bio_completed, bio->bio_resid, bio->bio_flags);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (atomic_fetchadd_int(&pending_req->pendcnt, -1) == 1) {
|
|
|
|
fast_flush_area(pending_req);
|
|
|
|
make_response(pending_req->blkif, pending_req->id,
|
|
|
|
pending_req->operation, pending_req->status);
|
|
|
|
blkif_put(pending_req->blkif);
|
|
|
|
free_req(pending_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
g_destroy_bio(bio);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
dispatch_rw_block_io(blkif_t *blkif, blkif_request_t *req, pending_req_t *pending_req)
|
|
|
|
{
|
|
|
|
struct gnttab_map_grant_ref map[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
|
|
struct {
|
|
|
|
unsigned long buf; unsigned int nsec;
|
|
|
|
} seg[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
|
|
unsigned int nseg = req->nr_segments, nr_sects = 0;
|
|
|
|
struct bio *biolist[BLKIF_MAX_SEGMENTS_PER_REQUEST];
|
|
|
|
int operation, ret, i, nbio = 0;
|
|
|
|
|
|
|
|
/* Check that number of segments is sane. */
|
|
|
|
if (unlikely(nseg == 0) ||
|
|
|
|
unlikely(nseg > BLKIF_MAX_SEGMENTS_PER_REQUEST)) {
|
|
|
|
DPRINTF("Bad number of segments in request (%d)\n", nseg);
|
|
|
|
goto fail_response;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (req->operation == BLKIF_OP_WRITE) {
|
|
|
|
if (blkif->read_only) {
|
|
|
|
DPRINTF("Attempt to write to read only device %s\n", blkif->dev_name);
|
|
|
|
goto fail_response;
|
|
|
|
}
|
|
|
|
operation = BIO_WRITE;
|
|
|
|
} else
|
|
|
|
operation = BIO_READ;
|
|
|
|
|
|
|
|
pending_req->blkif = blkif;
|
|
|
|
pending_req->id = req->id;
|
|
|
|
pending_req->operation = req->operation;
|
|
|
|
pending_req->status = BLKIF_RSP_OKAY;
|
|
|
|
pending_req->nr_pages = nseg;
|
|
|
|
|
|
|
|
for (i = 0; i < nseg; i++) {
|
|
|
|
seg[i].nsec = req->seg[i].last_sect -
|
|
|
|
req->seg[i].first_sect + 1;
|
|
|
|
|
|
|
|
if ((req->seg[i].last_sect >= (PAGE_SIZE >> 9)) ||
|
|
|
|
(seg[i].nsec <= 0))
|
|
|
|
goto fail_response;
|
|
|
|
nr_sects += seg[i].nsec;
|
|
|
|
|
|
|
|
map[i].host_addr = vaddr(pending_req, i);
|
|
|
|
map[i].dom = blkif->domid;
|
|
|
|
map[i].ref = req->seg[i].gref;
|
|
|
|
map[i].flags = GNTMAP_host_map;
|
|
|
|
if (operation == BIO_WRITE)
|
|
|
|
map[i].flags |= GNTMAP_readonly;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Convert to the disk's sector size */
|
|
|
|
nr_sects = (nr_sects << 9) >> blkif->sector_size_shift;
|
|
|
|
|
|
|
|
ret = HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, map, nseg);
|
|
|
|
PANIC_IF(ret);
|
|
|
|
|
|
|
|
for (i = 0; i < nseg; i++) {
|
|
|
|
if (unlikely(map[i].status != 0)) {
|
|
|
|
DPRINTF("invalid buffer -- could not remap it\n");
|
|
|
|
goto fail_flush;
|
|
|
|
}
|
|
|
|
|
|
|
|
pending_handle(pending_req, i) = map[i].handle;
|
|
|
|
#if 0
|
|
|
|
/* Can't do this in FreeBSD since vtophys() returns the pfn */
|
|
|
|
/* of the remote domain who loaned us the machine page - DPT */
|
|
|
|
xen_phys_machine[(vtophys(vaddr(pending_req, i)) >> PAGE_SHIFT)] =
|
|
|
|
map[i]dev_bus_addr >> PAGE_SHIFT;
|
|
|
|
#endif
|
|
|
|
seg[i].buf = map[i].dev_bus_addr |
|
|
|
|
(req->seg[i].first_sect << 9);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (req->sector_number + nr_sects > blkif->media_num_sectors) {
|
|
|
|
DPRINTF("%s of [%llu,%llu] extends past end of device %s\n",
|
|
|
|
operation == BIO_READ ? "read" : "write",
|
|
|
|
req->sector_number,
|
|
|
|
req->sector_number + nr_sects, blkif->dev_name);
|
|
|
|
goto fail_flush;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < nseg; i++) {
|
|
|
|
struct bio *bio;
|
|
|
|
|
|
|
|
if ((int)seg[i].nsec & ((blkif->sector_size >> 9) - 1)) {
|
|
|
|
DPRINTF("Misaligned I/O request from domain %d", blkif->domid);
|
|
|
|
goto fail_put_bio;
|
|
|
|
}
|
|
|
|
|
|
|
|
bio = biolist[nbio++] = g_new_bio();
|
|
|
|
if (unlikely(bio == NULL))
|
|
|
|
goto fail_put_bio;
|
|
|
|
|
|
|
|
bio->bio_cmd = operation;
|
|
|
|
bio->bio_offset = req->sector_number << blkif->sector_size_shift;
|
|
|
|
bio->bio_length = seg[i].nsec << 9;
|
|
|
|
bio->bio_bcount = bio->bio_length;
|
|
|
|
bio->bio_data = (caddr_t)(vaddr(pending_req, i) | (seg[i].buf & PAGE_MASK));
|
|
|
|
bio->bio_done = end_block_io_op;
|
|
|
|
bio->bio_caller2 = pending_req;
|
|
|
|
bio->bio_dev = blkif->cdev;
|
|
|
|
|
|
|
|
req->sector_number += (seg[i].nsec << 9) >> blkif->sector_size_shift;
|
|
|
|
#if 0
|
|
|
|
printf("new: bio=%x cmd=%d sect=%llu nsect=%u iosize_max=%u @ %08lx\n",
|
|
|
|
(unsigned int)bio, req->operation, req->sector_number, seg[i].nsec,
|
|
|
|
blkif->cdev->si_iosize_max, seg[i].buf);
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
|
|
|
pending_req->pendcnt = nbio;
|
|
|
|
blkif_get(blkif);
|
|
|
|
|
|
|
|
for (i = 0; i < nbio; i++)
|
|
|
|
(*blkif->csw->d_strategy)(biolist[i]);
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
fail_put_bio:
|
|
|
|
for (i = 0; i < (nbio-1); i++)
|
|
|
|
g_destroy_bio(biolist[i]);
|
|
|
|
fail_flush:
|
|
|
|
fast_flush_area(pending_req);
|
|
|
|
fail_response:
|
|
|
|
make_response(blkif, req->id, req->operation, BLKIF_RSP_ERROR);
|
|
|
|
free_req(pending_req);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
blk_req_action(void *context, int pending)
|
|
|
|
{
|
|
|
|
blkif_t *blkif;
|
|
|
|
|
|
|
|
DPRINTF("\n");
|
|
|
|
|
|
|
|
while (!STAILQ_EMPTY(&req_sched_list)) {
|
|
|
|
blkif_back_ring_t *blk_ring;
|
|
|
|
RING_IDX rc, rp;
|
|
|
|
|
|
|
|
blkif = remove_from_req_schedule_list();
|
|
|
|
|
|
|
|
blk_ring = &blkif->ring;
|
|
|
|
rc = blk_ring->req_cons;
|
|
|
|
rp = blk_ring->sring->req_prod;
|
|
|
|
rmb(); /* Ensure we see queued requests up to 'rp'. */
|
|
|
|
|
|
|
|
while ((rc != rp) && !RING_REQUEST_CONS_OVERFLOW(blk_ring, rc)) {
|
|
|
|
blkif_request_t *req;
|
|
|
|
pending_req_t *pending_req;
|
|
|
|
|
|
|
|
pending_req = alloc_req();
|
|
|
|
if (pending_req == NULL)
|
|
|
|
goto out_of_preqs;
|
|
|
|
|
|
|
|
req = RING_GET_REQUEST(blk_ring, rc);
|
|
|
|
blk_ring->req_cons = ++rc; /* before make_response() */
|
|
|
|
|
|
|
|
switch (req->operation) {
|
|
|
|
case BLKIF_OP_READ:
|
|
|
|
blkif->st_rd_req++;
|
|
|
|
dispatch_rw_block_io(blkif, req, pending_req);
|
|
|
|
break;
|
|
|
|
case BLKIF_OP_WRITE:
|
|
|
|
blkif->st_wr_req++;
|
|
|
|
dispatch_rw_block_io(blkif, req, pending_req);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
blkif->st_err_req++;
|
|
|
|
DPRINTF("error: unknown block io operation [%d]\n",
|
|
|
|
req->operation);
|
|
|
|
make_response(blkif, req->id, req->operation,
|
|
|
|
BLKIF_RSP_ERROR);
|
|
|
|
free_req(pending_req);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
blkif_put(blkif);
|
|
|
|
}
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
out_of_preqs:
|
|
|
|
/* We ran out of pending req structs */
|
|
|
|
/* Just requeue interface and wait to be rescheduled to run when one is freed */
|
|
|
|
add_to_req_schedule_list_tail2(blkif);
|
|
|
|
blkif->st_oo_req++;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Handle interrupt from a frontend */
|
|
|
|
static void
|
|
|
|
blkback_intr(void *arg)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = arg;
|
|
|
|
DPRINTF("%x\n", (unsigned int)blkif);
|
|
|
|
add_to_req_schedule_list_tail(blkif);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Map grant ref for ring */
|
|
|
|
static int
|
|
|
|
map_ring(grant_ref_t ref, domid_t dom, struct ring_ref *ring)
|
|
|
|
{
|
|
|
|
struct gnttab_map_grant_ref op;
|
|
|
|
|
|
|
|
ring->va = kmem_alloc_nofault(kernel_map, PAGE_SIZE);
|
|
|
|
if (ring->va == 0)
|
|
|
|
return ENOMEM;
|
|
|
|
|
|
|
|
op.host_addr = ring->va;
|
|
|
|
op.flags = GNTMAP_host_map;
|
|
|
|
op.ref = ref;
|
|
|
|
op.dom = dom;
|
|
|
|
HYPERVISOR_grant_table_op(GNTTABOP_map_grant_ref, &op, 1);
|
|
|
|
if (op.status) {
|
|
|
|
WPRINTF("grant table op err=%d\n", op.status);
|
|
|
|
kmem_free(kernel_map, ring->va, PAGE_SIZE);
|
|
|
|
ring->va = 0;
|
|
|
|
return EACCES;
|
|
|
|
}
|
|
|
|
|
|
|
|
ring->handle = op.handle;
|
|
|
|
ring->bus_addr = op.dev_bus_addr;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Unmap grant ref for ring */
|
|
|
|
static void
|
|
|
|
unmap_ring(struct ring_ref *ring)
|
|
|
|
{
|
|
|
|
struct gnttab_unmap_grant_ref op;
|
|
|
|
|
|
|
|
op.host_addr = ring->va;
|
|
|
|
op.dev_bus_addr = ring->bus_addr;
|
|
|
|
op.handle = ring->handle;
|
|
|
|
HYPERVISOR_grant_table_op(GNTTABOP_unmap_grant_ref, &op, 1);
|
|
|
|
if (op.status)
|
|
|
|
WPRINTF("grant table op err=%d\n", op.status);
|
|
|
|
|
|
|
|
kmem_free(kernel_map, ring->va, PAGE_SIZE);
|
|
|
|
ring->va = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
connect_ring(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
struct xenbus_device *xdev = blkif->xdev;
|
|
|
|
blkif_sring_t *ring;
|
|
|
|
unsigned long ring_ref;
|
|
|
|
evtchn_port_t evtchn;
|
|
|
|
evtchn_op_t op = { .cmd = EVTCHNOP_bind_interdomain };
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (blkif->ring_connected)
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
// Grab FE data and map his memory
|
|
|
|
err = xenbus_gather(NULL, xdev->otherend,
|
|
|
|
"ring-ref", "%lu", &ring_ref,
|
|
|
|
"event-channel", "%u", &evtchn, NULL);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err,
|
|
|
|
"reading %s/ring-ref and event-channel",
|
|
|
|
xdev->otherend);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = map_ring(ring_ref, blkif->domid, &blkif->rr);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err, "mapping ring");
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
ring = (blkif_sring_t *)blkif->rr.va;
|
|
|
|
BACK_RING_INIT(&blkif->ring, ring, PAGE_SIZE);
|
|
|
|
|
|
|
|
op.u.bind_interdomain.remote_dom = blkif->domid;
|
|
|
|
op.u.bind_interdomain.remote_port = evtchn;
|
|
|
|
err = HYPERVISOR_event_channel_op(&op);
|
|
|
|
if (err) {
|
|
|
|
unmap_ring(&blkif->rr);
|
|
|
|
xenbus_dev_fatal(xdev, err, "binding event channel");
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
blkif->evtchn = op.u.bind_interdomain.local_port;
|
|
|
|
|
|
|
|
/* bind evtchn to irq handler */
|
|
|
|
blkif->irq =
|
|
|
|
bind_evtchn_to_irqhandler(blkif->evtchn, "blkback",
|
|
|
|
blkback_intr, blkif, INTR_TYPE_NET|INTR_MPSAFE, &blkif->irq_cookie);
|
|
|
|
|
|
|
|
blkif->ring_connected = 1;
|
|
|
|
|
|
|
|
DPRINTF("%x rings connected! evtchn=%d irq=%d\n",
|
|
|
|
(unsigned int)blkif, blkif->evtchn, blkif->irq);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
disconnect_ring(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
DPRINTF("\n");
|
|
|
|
|
|
|
|
if (blkif->ring_connected) {
|
|
|
|
unbind_from_irqhandler(blkif->irq, blkif->irq_cookie);
|
|
|
|
blkif->irq = 0;
|
|
|
|
unmap_ring(&blkif->rr);
|
|
|
|
blkif->ring_connected = 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
connect(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
struct xenbus_transaction *xbt;
|
|
|
|
struct xenbus_device *xdev = blkif->xdev;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (!blkif->ring_connected ||
|
|
|
|
blkif->vn == NULL ||
|
|
|
|
blkif->state == XenbusStateConnected)
|
|
|
|
return;
|
|
|
|
|
|
|
|
DPRINTF("%s\n", xdev->otherend);
|
|
|
|
|
|
|
|
/* Supply the information about the device the frontend needs */
|
|
|
|
again:
|
|
|
|
xbt = xenbus_transaction_start();
|
|
|
|
if (IS_ERR(xbt)) {
|
|
|
|
xenbus_dev_fatal(xdev, PTR_ERR(xbt),
|
|
|
|
"Error writing configuration for backend "
|
|
|
|
"(start transaction)");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = xenbus_printf(xbt, xdev->nodename, "sectors", "%u",
|
|
|
|
blkif->media_num_sectors);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err, "writing %s/sectors",
|
|
|
|
xdev->nodename);
|
|
|
|
goto abort;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = xenbus_printf(xbt, xdev->nodename, "info", "%u",
|
|
|
|
blkif->read_only ? VDISK_READONLY : 0);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err, "writing %s/info",
|
|
|
|
xdev->nodename);
|
|
|
|
goto abort;
|
|
|
|
}
|
|
|
|
err = xenbus_printf(xbt, xdev->nodename, "sector-size", "%u",
|
|
|
|
blkif->sector_size);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err, "writing %s/sector-size",
|
|
|
|
xdev->nodename);
|
|
|
|
goto abort;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = xenbus_transaction_end(xbt, 0);
|
|
|
|
if (err == -EAGAIN)
|
|
|
|
goto again;
|
|
|
|
if (err)
|
|
|
|
xenbus_dev_fatal(xdev, err, "ending transaction");
|
|
|
|
|
|
|
|
err = xenbus_switch_state(xdev, NULL, XenbusStateConnected);
|
|
|
|
if (err)
|
|
|
|
xenbus_dev_fatal(xdev, err, "switching to Connected state",
|
|
|
|
xdev->nodename);
|
|
|
|
|
|
|
|
blkif->state = XenbusStateConnected;
|
|
|
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
abort:
|
|
|
|
xenbus_transaction_end(xbt, 1);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
blkback_probe(struct xenbus_device *xdev, const struct xenbus_device_id *id)
|
|
|
|
{
|
|
|
|
int err;
|
|
|
|
char *p, *mode = NULL, *type = NULL, *params = NULL;
|
|
|
|
long handle;
|
|
|
|
|
|
|
|
DPRINTF("node=%s\n", xdev->nodename);
|
|
|
|
|
|
|
|
p = strrchr(xdev->otherend, '/') + 1;
|
|
|
|
handle = strtoul(p, NULL, 0);
|
|
|
|
|
|
|
|
mode = xenbus_read(NULL, xdev->nodename, "mode", NULL);
|
|
|
|
if (IS_ERR(mode)) {
|
|
|
|
xenbus_dev_fatal(xdev, PTR_ERR(mode), "reading mode");
|
|
|
|
err = PTR_ERR(mode);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
type = xenbus_read(NULL, xdev->nodename, "type", NULL);
|
|
|
|
if (IS_ERR(type)) {
|
|
|
|
xenbus_dev_fatal(xdev, PTR_ERR(type), "reading type");
|
|
|
|
err = PTR_ERR(type);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
params = xenbus_read(NULL, xdev->nodename, "params", NULL);
|
|
|
|
if (IS_ERR(type)) {
|
|
|
|
xenbus_dev_fatal(xdev, PTR_ERR(params), "reading params");
|
|
|
|
err = PTR_ERR(params);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = blkif_create(xdev, handle, mode, type, params);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(xdev, err, "creating blkif");
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = vbd_add_dev(xdev);
|
|
|
|
if (err) {
|
|
|
|
blkif_put((blkif_t *)xdev->data);
|
|
|
|
xenbus_dev_fatal(xdev, err, "adding vbd device");
|
|
|
|
}
|
|
|
|
|
|
|
|
return err;
|
|
|
|
|
|
|
|
error:
|
|
|
|
if (mode)
|
|
|
|
free(mode, M_DEVBUF);
|
|
|
|
if (type)
|
|
|
|
free(type, M_DEVBUF);
|
|
|
|
if (params)
|
|
|
|
free(params, M_DEVBUF);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
blkback_remove(struct xenbus_device *xdev)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = xdev->data;
|
|
|
|
device_t ndev;
|
|
|
|
|
|
|
|
DPRINTF("node=%s\n", xdev->nodename);
|
|
|
|
|
|
|
|
blkif->state = XenbusStateClosing;
|
|
|
|
|
|
|
|
if ((ndev = blkif->ndev)) {
|
|
|
|
blkif->ndev = NULL;
|
|
|
|
mtx_lock(&Giant);
|
|
|
|
device_detach(ndev);
|
|
|
|
mtx_unlock(&Giant);
|
|
|
|
}
|
|
|
|
|
|
|
|
xdev->data = NULL;
|
|
|
|
blkif->xdev = NULL;
|
|
|
|
blkif_put(blkif);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
blkback_resume(struct xenbus_device *xdev)
|
|
|
|
{
|
|
|
|
DPRINTF("node=%s\n", xdev->nodename);
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
frontend_changed(struct xenbus_device *xdev,
|
|
|
|
XenbusState frontend_state)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = xdev->data;
|
|
|
|
|
|
|
|
DPRINTF("state=%d\n", frontend_state);
|
|
|
|
|
|
|
|
blkif->frontend_state = frontend_state;
|
|
|
|
|
|
|
|
switch (frontend_state) {
|
|
|
|
case XenbusStateInitialising:
|
|
|
|
break;
|
|
|
|
case XenbusStateInitialised:
|
|
|
|
case XenbusStateConnected:
|
|
|
|
connect_ring(blkif);
|
|
|
|
connect(blkif);
|
|
|
|
break;
|
|
|
|
case XenbusStateClosing:
|
|
|
|
xenbus_switch_state(xdev, NULL, XenbusStateClosing);
|
|
|
|
break;
|
|
|
|
case XenbusStateClosed:
|
|
|
|
xenbus_remove_device(xdev);
|
|
|
|
break;
|
|
|
|
case XenbusStateUnknown:
|
|
|
|
case XenbusStateInitWait:
|
|
|
|
xenbus_dev_fatal(xdev, EINVAL, "saw state %d at frontend",
|
|
|
|
frontend_state);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* ** Driver registration ** */
|
|
|
|
|
|
|
|
static struct xenbus_device_id blkback_ids[] = {
|
|
|
|
{ "vbd" },
|
|
|
|
{ "" }
|
|
|
|
};
|
|
|
|
|
|
|
|
static struct xenbus_driver blkback = {
|
|
|
|
.name = "blkback",
|
|
|
|
.ids = blkback_ids,
|
|
|
|
.probe = blkback_probe,
|
|
|
|
.remove = blkback_remove,
|
|
|
|
.resume = blkback_resume,
|
|
|
|
.otherend_changed = frontend_changed,
|
|
|
|
};
|
|
|
|
|
|
|
|
static void
|
|
|
|
blkback_init(void *unused)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
|
|
|
TASK_INIT(&blk_req_task, 0, blk_req_action, NULL);
|
|
|
|
mtx_init(&req_sched_list_lock, "blk_req_sched_lock", "blkback req sched lock", MTX_DEF);
|
|
|
|
|
|
|
|
mtx_init(&pending_free_lock, "blk_pending_req_ock", "blkback pending request lock", MTX_DEF);
|
|
|
|
|
|
|
|
mmap_pages = blkif_reqs * BLKIF_MAX_SEGMENTS_PER_REQUEST;
|
|
|
|
pending_reqs = malloc(sizeof(pending_reqs[0]) *
|
|
|
|
blkif_reqs, M_DEVBUF, M_ZERO|M_NOWAIT);
|
|
|
|
pending_grant_handles = malloc(sizeof(pending_grant_handles[0]) *
|
|
|
|
mmap_pages, M_DEVBUF, M_NOWAIT);
|
|
|
|
pending_vaddrs = malloc(sizeof(pending_vaddrs[0]) *
|
|
|
|
mmap_pages, M_DEVBUF, M_NOWAIT);
|
|
|
|
mmap_vstart = alloc_empty_page_range(mmap_pages);
|
|
|
|
if (!pending_reqs || !pending_grant_handles || !pending_vaddrs || !mmap_vstart) {
|
|
|
|
if (pending_reqs)
|
|
|
|
free(pending_reqs, M_DEVBUF);
|
|
|
|
if (pending_grant_handles)
|
|
|
|
free(pending_grant_handles, M_DEVBUF);
|
|
|
|
if (pending_vaddrs)
|
|
|
|
free(pending_vaddrs, M_DEVBUF);
|
|
|
|
WPRINTF("out of memory\n");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < mmap_pages; i++) {
|
|
|
|
pending_vaddrs[i] = mmap_vstart + (i << PAGE_SHIFT);
|
|
|
|
pending_grant_handles[i] = BLKBACK_INVALID_HANDLE;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < blkif_reqs; i++) {
|
|
|
|
STAILQ_INSERT_TAIL(&pending_free, &pending_reqs[i], free_list);
|
|
|
|
}
|
|
|
|
|
|
|
|
DPRINTF("registering %s\n", blkback.name);
|
|
|
|
xenbus_register_backend(&blkback);
|
|
|
|
}
|
|
|
|
|
|
|
|
SYSINIT(xbbedev, SI_SUB_PSEUDO, SI_ORDER_ANY, blkback_init, NULL)
|
|
|
|
|
|
|
|
static void
|
|
|
|
close_device(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
DPRINTF("closing dev=%s\n", blkif->dev_name);
|
|
|
|
if (blkif->vn) {
|
|
|
|
int flags = FREAD;
|
|
|
|
|
|
|
|
if (!blkif->read_only)
|
|
|
|
flags |= FWRITE;
|
|
|
|
|
|
|
|
if (blkif->csw) {
|
|
|
|
dev_relthread(blkif->cdev);
|
|
|
|
blkif->csw = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
(void)vn_close(blkif->vn, flags, NOCRED, curthread);
|
|
|
|
blkif->vn = NULL;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
open_device(blkif_t *blkif)
|
|
|
|
{
|
|
|
|
struct nameidata nd;
|
|
|
|
struct vattr vattr;
|
|
|
|
struct cdev *dev;
|
|
|
|
struct cdevsw *devsw;
|
|
|
|
int flags = FREAD, err = 0;
|
|
|
|
|
|
|
|
DPRINTF("opening dev=%s\n", blkif->dev_name);
|
|
|
|
|
|
|
|
if (!blkif->read_only)
|
|
|
|
flags |= FWRITE;
|
|
|
|
|
|
|
|
if (!curthread->td_proc->p_fd->fd_cdir) {
|
|
|
|
curthread->td_proc->p_fd->fd_cdir = rootvnode;
|
|
|
|
VREF(rootvnode);
|
|
|
|
}
|
|
|
|
if (!curthread->td_proc->p_fd->fd_rdir) {
|
|
|
|
curthread->td_proc->p_fd->fd_rdir = rootvnode;
|
|
|
|
VREF(rootvnode);
|
|
|
|
}
|
|
|
|
if (!curthread->td_proc->p_fd->fd_jdir) {
|
|
|
|
curthread->td_proc->p_fd->fd_jdir = rootvnode;
|
|
|
|
VREF(rootvnode);
|
|
|
|
}
|
|
|
|
|
|
|
|
again:
|
|
|
|
NDINIT(&nd, LOOKUP, FOLLOW, UIO_SYSSPACE, blkif->dev_name, curthread);
|
|
|
|
err = vn_open(&nd, &flags, 0, -1);
|
|
|
|
if (err) {
|
|
|
|
if (blkif->dev_name[0] != '/') {
|
|
|
|
char *dev_path = "/dev/";
|
|
|
|
char *dev_name;
|
|
|
|
|
|
|
|
/* Try adding device path at beginning of name */
|
|
|
|
dev_name = malloc(strlen(blkif->dev_name) + strlen(dev_path) + 1, M_DEVBUF, M_NOWAIT);
|
|
|
|
if (dev_name) {
|
|
|
|
sprintf(dev_name, "%s%s", dev_path, blkif->dev_name);
|
|
|
|
free(blkif->dev_name, M_DEVBUF);
|
|
|
|
blkif->dev_name = dev_name;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err, "error opening device %s", blkif->dev_name);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
NDFREE(&nd, NDF_ONLY_PNBUF);
|
|
|
|
|
|
|
|
blkif->vn = nd.ni_vp;
|
|
|
|
|
|
|
|
/* We only support disks for now */
|
|
|
|
if (!vn_isdisk(blkif->vn, &err)) {
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err, "device %s is not a disk", blkif->dev_name);
|
|
|
|
VOP_UNLOCK(blkif->vn, 0, curthread);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
blkif->cdev = blkif->vn->v_rdev;
|
|
|
|
blkif->csw = dev_refthread(blkif->cdev);
|
|
|
|
PANIC_IF(blkif->csw == NULL);
|
|
|
|
|
2008-08-28 15:23:18 +00:00
|
|
|
err = VOP_GETATTR(blkif->vn, &vattr, NOCRED);
|
2008-08-12 20:01:57 +00:00
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err,
|
|
|
|
"error getting vnode attributes for device %s", blkif->dev_name);
|
|
|
|
VOP_UNLOCK(blkif->vn, 0, curthread);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
VOP_UNLOCK(blkif->vn, 0, curthread);
|
|
|
|
|
|
|
|
dev = blkif->vn->v_rdev;
|
|
|
|
devsw = dev->si_devsw;
|
|
|
|
if (!devsw->d_ioctl) {
|
|
|
|
err = ENODEV;
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err,
|
|
|
|
"no d_ioctl for device %s!", blkif->dev_name);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
|
|
|
|
err = (*devsw->d_ioctl)(dev, DIOCGSECTORSIZE, (caddr_t)&blkif->sector_size, FREAD, curthread);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err,
|
|
|
|
"error calling ioctl DIOCGSECTORSIZE for device %s", blkif->dev_name);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
blkif->sector_size_shift = fls(blkif->sector_size) - 1;
|
|
|
|
|
|
|
|
err = (*devsw->d_ioctl)(dev, DIOCGMEDIASIZE, (caddr_t)&blkif->media_size, FREAD, curthread);
|
|
|
|
if (err) {
|
|
|
|
xenbus_dev_fatal(blkif->xdev, err,
|
|
|
|
"error calling ioctl DIOCGMEDIASIZE for device %s", blkif->dev_name);
|
|
|
|
goto error;
|
|
|
|
}
|
|
|
|
blkif->media_num_sectors = blkif->media_size >> blkif->sector_size_shift;
|
|
|
|
|
2009-01-28 17:57:16 +00:00
|
|
|
blkif->major = major(vattr.va_rdev);
|
|
|
|
blkif->minor = minor(vattr.va_rdev);
|
2008-08-12 20:01:57 +00:00
|
|
|
|
|
|
|
DPRINTF("opened dev=%s major=%d minor=%d sector_size=%u media_size=%lld\n",
|
|
|
|
blkif->dev_name, blkif->major, blkif->minor, blkif->sector_size, blkif->media_size);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
|
|
|
|
error:
|
|
|
|
close_device(blkif);
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vbd_add_dev(struct xenbus_device *xdev)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = xdev->data;
|
|
|
|
device_t nexus, ndev;
|
|
|
|
devclass_t dc;
|
|
|
|
int err = 0;
|
|
|
|
|
|
|
|
mtx_lock(&Giant);
|
|
|
|
|
|
|
|
/* We will add a vbd device as a child of nexus0 (for now) */
|
|
|
|
if (!(dc = devclass_find("nexus")) ||
|
|
|
|
!(nexus = devclass_get_device(dc, 0))) {
|
|
|
|
WPRINTF("could not find nexus0!\n");
|
|
|
|
err = ENOENT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
/* Create a newbus device representing the vbd */
|
|
|
|
ndev = BUS_ADD_CHILD(nexus, 0, "vbd", blkif->handle);
|
|
|
|
if (!ndev) {
|
|
|
|
WPRINTF("could not create newbus device vbd%d!\n", blkif->handle);
|
|
|
|
err = EFAULT;
|
|
|
|
goto done;
|
|
|
|
}
|
|
|
|
|
|
|
|
blkif_get(blkif);
|
|
|
|
device_set_ivars(ndev, blkif);
|
|
|
|
blkif->ndev = ndev;
|
|
|
|
|
|
|
|
device_probe_and_attach(ndev);
|
|
|
|
|
|
|
|
done:
|
|
|
|
|
|
|
|
mtx_unlock(&Giant);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
enum {
|
|
|
|
VBD_SYSCTL_DOMID,
|
|
|
|
VBD_SYSCTL_ST_RD_REQ,
|
|
|
|
VBD_SYSCTL_ST_WR_REQ,
|
|
|
|
VBD_SYSCTL_ST_OO_REQ,
|
|
|
|
VBD_SYSCTL_ST_ERR_REQ,
|
|
|
|
VBD_SYSCTL_RING,
|
|
|
|
};
|
|
|
|
|
|
|
|
static char *
|
|
|
|
vbd_sysctl_ring_info(blkif_t *blkif, int cmd)
|
|
|
|
{
|
|
|
|
char *buf = malloc(256, M_DEVBUF, M_WAITOK);
|
|
|
|
if (buf) {
|
|
|
|
if (!blkif->ring_connected)
|
|
|
|
sprintf(buf, "ring not connected\n");
|
|
|
|
else {
|
|
|
|
blkif_back_ring_t *ring = &blkif->ring;
|
|
|
|
sprintf(buf, "nr_ents=%x req_cons=%x"
|
|
|
|
" req_prod=%x req_event=%x"
|
|
|
|
" rsp_prod=%x rsp_event=%x",
|
|
|
|
ring->nr_ents, ring->req_cons,
|
|
|
|
ring->sring->req_prod, ring->sring->req_event,
|
|
|
|
ring->sring->rsp_prod, ring->sring->rsp_event);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
vbd_sysctl_handler(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
device_t dev = (device_t)arg1;
|
|
|
|
blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
|
|
|
|
const char *value;
|
|
|
|
char *buf = NULL;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
switch (arg2) {
|
|
|
|
case VBD_SYSCTL_DOMID:
|
|
|
|
return sysctl_handle_int(oidp, NULL, blkif->domid, req);
|
|
|
|
case VBD_SYSCTL_ST_RD_REQ:
|
|
|
|
return sysctl_handle_int(oidp, NULL, blkif->st_rd_req, req);
|
|
|
|
case VBD_SYSCTL_ST_WR_REQ:
|
|
|
|
return sysctl_handle_int(oidp, NULL, blkif->st_wr_req, req);
|
|
|
|
case VBD_SYSCTL_ST_OO_REQ:
|
|
|
|
return sysctl_handle_int(oidp, NULL, blkif->st_oo_req, req);
|
|
|
|
case VBD_SYSCTL_ST_ERR_REQ:
|
|
|
|
return sysctl_handle_int(oidp, NULL, blkif->st_err_req, req);
|
|
|
|
case VBD_SYSCTL_RING:
|
|
|
|
value = buf = vbd_sysctl_ring_info(blkif, arg2);
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
return (EINVAL);
|
|
|
|
}
|
|
|
|
|
|
|
|
err = SYSCTL_OUT(req, value, strlen(value));
|
|
|
|
if (buf != NULL)
|
|
|
|
free(buf, M_DEVBUF);
|
|
|
|
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Newbus vbd device driver probe */
|
|
|
|
static int
|
|
|
|
vbd_probe(device_t dev)
|
|
|
|
{
|
|
|
|
DPRINTF("vbd%d\n", device_get_unit(dev));
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Newbus vbd device driver attach */
|
|
|
|
static int
|
|
|
|
vbd_attach(device_t dev)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
|
|
|
|
|
|
|
|
DPRINTF("%s\n", blkif->dev_name);
|
|
|
|
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "domid", CTLTYPE_INT|CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_DOMID, vbd_sysctl_handler, "I",
|
|
|
|
"domid of frontend");
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "rd_reqs", CTLTYPE_INT|CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_ST_RD_REQ, vbd_sysctl_handler, "I",
|
|
|
|
"number of read reqs");
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "wr_reqs", CTLTYPE_INT|CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_ST_WR_REQ, vbd_sysctl_handler, "I",
|
|
|
|
"number of write reqs");
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "oo_reqs", CTLTYPE_INT|CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_ST_OO_REQ, vbd_sysctl_handler, "I",
|
|
|
|
"number of deferred reqs");
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "err_reqs", CTLTYPE_INT|CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_ST_ERR_REQ, vbd_sysctl_handler, "I",
|
|
|
|
"number of reqs that returned error");
|
|
|
|
#if XEN_BLKBACK_DEBUG
|
|
|
|
SYSCTL_ADD_PROC(device_get_sysctl_ctx(dev), SYSCTL_CHILDREN(device_get_sysctl_tree(dev)),
|
|
|
|
OID_AUTO, "ring", CTLFLAG_RD,
|
|
|
|
dev, VBD_SYSCTL_RING, vbd_sysctl_handler, "A",
|
|
|
|
"req ring info");
|
|
|
|
#endif
|
|
|
|
|
|
|
|
if (!open_device(blkif))
|
|
|
|
connect(blkif);
|
|
|
|
|
|
|
|
return bus_generic_attach(dev);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Newbus vbd device driver detach */
|
|
|
|
static int
|
|
|
|
vbd_detach(device_t dev)
|
|
|
|
{
|
|
|
|
blkif_t *blkif = (blkif_t *)device_get_ivars(dev);
|
|
|
|
|
|
|
|
DPRINTF("%s\n", blkif->dev_name);
|
|
|
|
|
|
|
|
close_device(blkif);
|
|
|
|
|
|
|
|
bus_generic_detach(dev);
|
|
|
|
|
|
|
|
blkif_put(blkif);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static device_method_t vbd_methods[] = {
|
|
|
|
/* Device interface */
|
|
|
|
DEVMETHOD(device_probe, vbd_probe),
|
|
|
|
DEVMETHOD(device_attach, vbd_attach),
|
|
|
|
DEVMETHOD(device_detach, vbd_detach),
|
|
|
|
DEVMETHOD(device_shutdown, bus_generic_shutdown),
|
|
|
|
DEVMETHOD(device_suspend, bus_generic_suspend),
|
|
|
|
DEVMETHOD(device_resume, bus_generic_resume),
|
|
|
|
{0, 0}
|
|
|
|
};
|
|
|
|
|
|
|
|
static devclass_t vbd_devclass;
|
|
|
|
|
|
|
|
static driver_t vbd_driver = {
|
|
|
|
"vbd",
|
|
|
|
vbd_methods,
|
|
|
|
0,
|
|
|
|
};
|
|
|
|
|
|
|
|
DRIVER_MODULE(vbd, nexus, vbd_driver, vbd_devclass, 0, 0);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Local variables:
|
|
|
|
* mode: C
|
|
|
|
* c-set-style: "BSD"
|
|
|
|
* c-basic-offset: 4
|
|
|
|
* tab-width: 4
|
|
|
|
* indent-tabs-mode: t
|
|
|
|
* End:
|
|
|
|
*/
|