336f459c31
c162516 Remove vtblk_sector_size c162515 Wrap long license lines c162514 Remove vtblk_unit c162513 Wrap long lines in the license. c162512 Remove verbose messages when link goes up/down. A similar message is printed elsewhere as a result of if_link_state_change(). c162511 Explicity compare pointer to NULL c162510 Allocate the mac filter table at attach time. c162509 Add real BSD licenses to the header files copied from Linux. The chases upstream changes made in Linux awhile ago. c162508 Only notify if we actually dequeued something. c162507 Change a couple of if () { KASSERT(...) } to just KASSERTs. In non-debug kernels, the if() { } probably get optomized away, but I guess this is clearer. c162506 Remove VIRTIO_BLK_F_TOPOLOGY fields in the config. TOPOLOGY has since been removed from the spec, and the FreeBSD didn't really do anything with the fields anyways. c162505 Move vtblk_enqueue_request() outside the locks when getting the ident. c162504 Remove soon to be uneeded trylock during dump [1]. http://lists.freebsd.org/pipermail/freebsd-current/2011-November/029226.html c162503 Remove emtpy line c162502 Drop frame if cannot allocate a vtnet_tx_header. If we don't, we set OACTIVE, but if there are no other frames in flight, vtnet_txeof() will never be called to unset OACTIVE. The interface would have to be down/up'ed in order to become usable. We could be cuter here and only do this if the virtqueue is emtpy, but its probably not worth the complication. c162501 Start mbuf replacement loop at 1 for clarity Obtained from: Bryan Venteicher bryanv at daemoninthecloset dot org
1135 lines
25 KiB
C
1135 lines
25 KiB
C
/*-
|
|
* Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice unmodified, this list of conditions, and the following
|
|
* disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/* Driver for VirtIO block devices. */
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/module.h>
|
|
#include <sys/sglist.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/taskqueue.h>
|
|
|
|
#include <geom/geom_disk.h>
|
|
#include <vm/uma.h>
|
|
|
|
#include <machine/bus.h>
|
|
#include <machine/resource.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/rman.h>
|
|
|
|
#include <dev/virtio/virtio.h>
|
|
#include <dev/virtio/virtqueue.h>
|
|
#include <dev/virtio/block/virtio_blk.h>
|
|
|
|
#include "virtio_if.h"
|
|
|
|
struct vtblk_request {
|
|
struct virtio_blk_outhdr vbr_hdr;
|
|
struct bio *vbr_bp;
|
|
uint8_t vbr_ack;
|
|
|
|
TAILQ_ENTRY(vtblk_request) vbr_link;
|
|
};
|
|
|
|
struct vtblk_softc {
|
|
device_t vtblk_dev;
|
|
struct mtx vtblk_mtx;
|
|
uint64_t vtblk_features;
|
|
uint32_t vtblk_flags;
|
|
#define VTBLK_FLAG_INDIRECT 0x0001
|
|
#define VTBLK_FLAG_READONLY 0x0002
|
|
#define VTBLK_FLAG_DETACHING 0x0004
|
|
#define VTBLK_FLAG_SUSPENDED 0x0008
|
|
#define VTBLK_FLAG_DUMPING 0x0010
|
|
|
|
struct virtqueue *vtblk_vq;
|
|
struct sglist *vtblk_sglist;
|
|
struct disk *vtblk_disk;
|
|
|
|
struct bio_queue_head vtblk_bioq;
|
|
TAILQ_HEAD(, vtblk_request)
|
|
vtblk_req_free;
|
|
TAILQ_HEAD(, vtblk_request)
|
|
vtblk_req_ready;
|
|
|
|
struct taskqueue *vtblk_tq;
|
|
struct task vtblk_intr_task;
|
|
|
|
int vtblk_max_nsegs;
|
|
int vtblk_request_count;
|
|
|
|
struct vtblk_request vtblk_dump_request;
|
|
};
|
|
|
|
static struct virtio_feature_desc vtblk_feature_desc[] = {
|
|
{ VIRTIO_BLK_F_BARRIER, "HostBarrier" },
|
|
{ VIRTIO_BLK_F_SIZE_MAX, "MaxSegSize" },
|
|
{ VIRTIO_BLK_F_SEG_MAX, "MaxNumSegs" },
|
|
{ VIRTIO_BLK_F_GEOMETRY, "DiskGeometry" },
|
|
{ VIRTIO_BLK_F_RO, "ReadOnly" },
|
|
{ VIRTIO_BLK_F_BLK_SIZE, "BlockSize" },
|
|
{ VIRTIO_BLK_F_SCSI, "SCSICmds" },
|
|
{ VIRTIO_BLK_F_FLUSH, "FlushCmd" },
|
|
{ VIRTIO_BLK_F_TOPOLOGY, "Topology" },
|
|
|
|
{ 0, NULL }
|
|
};
|
|
|
|
static int vtblk_modevent(module_t, int, void *);
|
|
|
|
static int vtblk_probe(device_t);
|
|
static int vtblk_attach(device_t);
|
|
static int vtblk_detach(device_t);
|
|
static int vtblk_suspend(device_t);
|
|
static int vtblk_resume(device_t);
|
|
static int vtblk_shutdown(device_t);
|
|
|
|
static void vtblk_negotiate_features(struct vtblk_softc *);
|
|
static int vtblk_maximum_segments(struct vtblk_softc *,
|
|
struct virtio_blk_config *);
|
|
static int vtblk_alloc_virtqueue(struct vtblk_softc *);
|
|
static void vtblk_alloc_disk(struct vtblk_softc *,
|
|
struct virtio_blk_config *);
|
|
static void vtblk_create_disk(struct vtblk_softc *);
|
|
|
|
static int vtblk_open(struct disk *);
|
|
static int vtblk_close(struct disk *);
|
|
static int vtblk_ioctl(struct disk *, u_long, void *, int,
|
|
struct thread *);
|
|
static int vtblk_dump(void *, void *, vm_offset_t, off_t, size_t);
|
|
static void vtblk_strategy(struct bio *);
|
|
|
|
static void vtblk_startio(struct vtblk_softc *);
|
|
static struct vtblk_request * vtblk_bio_request(struct vtblk_softc *);
|
|
static int vtblk_execute_request(struct vtblk_softc *,
|
|
struct vtblk_request *);
|
|
|
|
static int vtblk_vq_intr(void *);
|
|
static void vtblk_intr_task(void *, int);
|
|
|
|
static void vtblk_stop(struct vtblk_softc *);
|
|
|
|
static void vtblk_get_ident(struct vtblk_softc *);
|
|
static void vtblk_prepare_dump(struct vtblk_softc *);
|
|
static int vtblk_write_dump(struct vtblk_softc *, void *, off_t, size_t);
|
|
static int vtblk_flush_dump(struct vtblk_softc *);
|
|
static int vtblk_poll_request(struct vtblk_softc *,
|
|
struct vtblk_request *);
|
|
|
|
static void vtblk_drain_vq(struct vtblk_softc *, int);
|
|
static void vtblk_drain(struct vtblk_softc *);
|
|
|
|
static int vtblk_alloc_requests(struct vtblk_softc *);
|
|
static void vtblk_free_requests(struct vtblk_softc *);
|
|
static struct vtblk_request * vtblk_dequeue_request(struct vtblk_softc *);
|
|
static void vtblk_enqueue_request(struct vtblk_softc *,
|
|
struct vtblk_request *);
|
|
|
|
static struct vtblk_request * vtblk_dequeue_ready(struct vtblk_softc *);
|
|
static void vtblk_enqueue_ready(struct vtblk_softc *,
|
|
struct vtblk_request *);
|
|
|
|
static void vtblk_bio_error(struct bio *, int);
|
|
|
|
/* Tunables. */
|
|
static int vtblk_no_ident = 0;
|
|
TUNABLE_INT("hw.vtblk.no_ident", &vtblk_no_ident);
|
|
|
|
/* Features desired/implemented by this driver. */
|
|
#define VTBLK_FEATURES \
|
|
(VIRTIO_BLK_F_BARRIER | \
|
|
VIRTIO_BLK_F_SIZE_MAX | \
|
|
VIRTIO_BLK_F_SEG_MAX | \
|
|
VIRTIO_BLK_F_GEOMETRY | \
|
|
VIRTIO_BLK_F_RO | \
|
|
VIRTIO_BLK_F_BLK_SIZE | \
|
|
VIRTIO_BLK_F_FLUSH | \
|
|
VIRTIO_RING_F_INDIRECT_DESC)
|
|
|
|
#define VTBLK_MTX(_sc) &(_sc)->vtblk_mtx
|
|
#define VTBLK_LOCK_INIT(_sc, _name) \
|
|
mtx_init(VTBLK_MTX((_sc)), (_name), \
|
|
"VTBLK Lock", MTX_DEF)
|
|
#define VTBLK_LOCK(_sc) mtx_lock(VTBLK_MTX((_sc)))
|
|
#define VTBLK_UNLOCK(_sc) mtx_unlock(VTBLK_MTX((_sc)))
|
|
#define VTBLK_LOCK_DESTROY(_sc) mtx_destroy(VTBLK_MTX((_sc)))
|
|
#define VTBLK_LOCK_ASSERT(_sc) mtx_assert(VTBLK_MTX((_sc)), MA_OWNED)
|
|
#define VTBLK_LOCK_ASSERT_NOTOWNED(_sc) \
|
|
mtx_assert(VTBLK_MTX((_sc)), MA_NOTOWNED)
|
|
|
|
#define VTBLK_BIO_SEGMENTS(_bp) sglist_count((_bp)->bio_data, (_bp)->bio_bcount)
|
|
|
|
#define VTBLK_DISK_NAME "vtbd"
|
|
|
|
/*
|
|
* Each block request uses at least two segments - one for the header
|
|
* and one for the status.
|
|
*/
|
|
#define VTBLK_MIN_SEGMENTS 2
|
|
|
|
static uma_zone_t vtblk_req_zone;
|
|
|
|
static device_method_t vtblk_methods[] = {
|
|
/* Device methods. */
|
|
DEVMETHOD(device_probe, vtblk_probe),
|
|
DEVMETHOD(device_attach, vtblk_attach),
|
|
DEVMETHOD(device_detach, vtblk_detach),
|
|
DEVMETHOD(device_suspend, vtblk_suspend),
|
|
DEVMETHOD(device_resume, vtblk_resume),
|
|
DEVMETHOD(device_shutdown, vtblk_shutdown),
|
|
|
|
{ 0, 0 }
|
|
};
|
|
|
|
static driver_t vtblk_driver = {
|
|
"vtblk",
|
|
vtblk_methods,
|
|
sizeof(struct vtblk_softc)
|
|
};
|
|
static devclass_t vtblk_devclass;
|
|
|
|
DRIVER_MODULE(virtio_blk, virtio_pci, vtblk_driver, vtblk_devclass,
|
|
vtblk_modevent, 0);
|
|
MODULE_VERSION(virtio_blk, 1);
|
|
MODULE_DEPEND(virtio_blk, virtio, 1, 1, 1);
|
|
|
|
static int
|
|
vtblk_modevent(module_t mod, int type, void *unused)
|
|
{
|
|
int error;
|
|
|
|
error = 0;
|
|
|
|
switch (type) {
|
|
case MOD_LOAD:
|
|
vtblk_req_zone = uma_zcreate("vtblk_request",
|
|
sizeof(struct vtblk_request),
|
|
NULL, NULL, NULL, NULL, 0, 0);
|
|
break;
|
|
case MOD_QUIESCE:
|
|
case MOD_UNLOAD:
|
|
if (uma_zone_get_cur(vtblk_req_zone) > 0)
|
|
error = EBUSY;
|
|
else if (type == MOD_UNLOAD) {
|
|
uma_zdestroy(vtblk_req_zone);
|
|
vtblk_req_zone = NULL;
|
|
}
|
|
break;
|
|
case MOD_SHUTDOWN:
|
|
break;
|
|
default:
|
|
error = EOPNOTSUPP;
|
|
break;
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vtblk_probe(device_t dev)
|
|
{
|
|
|
|
if (virtio_get_device_type(dev) != VIRTIO_ID_BLOCK)
|
|
return (ENXIO);
|
|
|
|
device_set_desc(dev, "VirtIO Block Adapter");
|
|
|
|
return (BUS_PROBE_DEFAULT);
|
|
}
|
|
|
|
static int
|
|
vtblk_attach(device_t dev)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
struct virtio_blk_config blkcfg;
|
|
int error;
|
|
|
|
sc = device_get_softc(dev);
|
|
sc->vtblk_dev = dev;
|
|
|
|
VTBLK_LOCK_INIT(sc, device_get_nameunit(dev));
|
|
|
|
bioq_init(&sc->vtblk_bioq);
|
|
TAILQ_INIT(&sc->vtblk_req_free);
|
|
TAILQ_INIT(&sc->vtblk_req_ready);
|
|
|
|
virtio_set_feature_desc(dev, vtblk_feature_desc);
|
|
vtblk_negotiate_features(sc);
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
|
|
sc->vtblk_flags |= VTBLK_FLAG_INDIRECT;
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_RO))
|
|
sc->vtblk_flags |= VTBLK_FLAG_READONLY;
|
|
|
|
/* Get local copy of config. */
|
|
virtio_read_device_config(dev, 0, &blkcfg,
|
|
sizeof(struct virtio_blk_config));
|
|
|
|
/*
|
|
* With the current sglist(9) implementation, it is not easy
|
|
* for us to support a maximum segment size as adjacent
|
|
* segments are coalesced. For now, just make sure it's larger
|
|
* than the maximum supported transfer size.
|
|
*/
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_SIZE_MAX)) {
|
|
if (blkcfg.size_max < MAXPHYS) {
|
|
error = ENOTSUP;
|
|
device_printf(dev, "host requires unsupported "
|
|
"maximum segment size feature\n");
|
|
goto fail;
|
|
}
|
|
}
|
|
|
|
sc->vtblk_max_nsegs = vtblk_maximum_segments(sc, &blkcfg);
|
|
|
|
/*
|
|
* Allocate working sglist. The number of segments may be too
|
|
* large to safely store on the stack.
|
|
*/
|
|
sc->vtblk_sglist = sglist_alloc(sc->vtblk_max_nsegs, M_NOWAIT);
|
|
if (sc->vtblk_sglist == NULL) {
|
|
error = ENOMEM;
|
|
device_printf(dev, "cannot allocate sglist\n");
|
|
goto fail;
|
|
}
|
|
|
|
error = vtblk_alloc_virtqueue(sc);
|
|
if (error) {
|
|
device_printf(dev, "cannot allocate virtqueue\n");
|
|
goto fail;
|
|
}
|
|
|
|
error = vtblk_alloc_requests(sc);
|
|
if (error) {
|
|
device_printf(dev, "cannot preallocate requests\n");
|
|
goto fail;
|
|
}
|
|
|
|
vtblk_alloc_disk(sc, &blkcfg);
|
|
|
|
TASK_INIT(&sc->vtblk_intr_task, 0, vtblk_intr_task, sc);
|
|
sc->vtblk_tq = taskqueue_create_fast("vtblk_taskq", M_NOWAIT,
|
|
taskqueue_thread_enqueue, &sc->vtblk_tq);
|
|
if (sc->vtblk_tq == NULL) {
|
|
error = ENOMEM;
|
|
device_printf(dev, "cannot allocate taskqueue\n");
|
|
goto fail;
|
|
}
|
|
taskqueue_start_threads(&sc->vtblk_tq, 1, PI_DISK, "%s taskq",
|
|
device_get_nameunit(dev));
|
|
|
|
error = virtio_setup_intr(dev, INTR_TYPE_BIO | INTR_ENTROPY);
|
|
if (error) {
|
|
device_printf(dev, "cannot setup virtqueue interrupt\n");
|
|
goto fail;
|
|
}
|
|
|
|
vtblk_create_disk(sc);
|
|
|
|
virtqueue_enable_intr(sc->vtblk_vq);
|
|
|
|
fail:
|
|
if (error)
|
|
vtblk_detach(dev);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vtblk_detach(device_t dev)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
sc = device_get_softc(dev);
|
|
|
|
VTBLK_LOCK(sc);
|
|
sc->vtblk_flags |= VTBLK_FLAG_DETACHING;
|
|
if (device_is_attached(dev))
|
|
vtblk_stop(sc);
|
|
VTBLK_UNLOCK(sc);
|
|
|
|
if (sc->vtblk_tq != NULL) {
|
|
taskqueue_drain(sc->vtblk_tq, &sc->vtblk_intr_task);
|
|
taskqueue_free(sc->vtblk_tq);
|
|
sc->vtblk_tq = NULL;
|
|
}
|
|
|
|
vtblk_drain(sc);
|
|
|
|
if (sc->vtblk_disk != NULL) {
|
|
disk_destroy(sc->vtblk_disk);
|
|
sc->vtblk_disk = NULL;
|
|
}
|
|
|
|
if (sc->vtblk_sglist != NULL) {
|
|
sglist_free(sc->vtblk_sglist);
|
|
sc->vtblk_sglist = NULL;
|
|
}
|
|
|
|
VTBLK_LOCK_DESTROY(sc);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vtblk_suspend(device_t dev)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
sc = device_get_softc(dev);
|
|
|
|
VTBLK_LOCK(sc);
|
|
sc->vtblk_flags |= VTBLK_FLAG_SUSPENDED;
|
|
/* TODO Wait for any inflight IO to complete? */
|
|
VTBLK_UNLOCK(sc);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vtblk_resume(device_t dev)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
sc = device_get_softc(dev);
|
|
|
|
VTBLK_LOCK(sc);
|
|
sc->vtblk_flags &= ~VTBLK_FLAG_SUSPENDED;
|
|
/* TODO Resume IO? */
|
|
VTBLK_UNLOCK(sc);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vtblk_shutdown(device_t dev)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vtblk_open(struct disk *dp)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
if ((sc = dp->d_drv1) == NULL)
|
|
return (ENXIO);
|
|
|
|
return (sc->vtblk_flags & VTBLK_FLAG_DETACHING ? ENXIO : 0);
|
|
}
|
|
|
|
static int
|
|
vtblk_close(struct disk *dp)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
if ((sc = dp->d_drv1) == NULL)
|
|
return (ENXIO);
|
|
|
|
return (0);
|
|
}
|
|
|
|
static int
|
|
vtblk_ioctl(struct disk *dp, u_long cmd, void *addr, int flag,
|
|
struct thread *td)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
if ((sc = dp->d_drv1) == NULL)
|
|
return (ENXIO);
|
|
|
|
return (ENOTTY);
|
|
}
|
|
|
|
static int
|
|
vtblk_dump(void *arg, void *virtual, vm_offset_t physical, off_t offset,
|
|
size_t length)
|
|
{
|
|
struct disk *dp;
|
|
struct vtblk_softc *sc;
|
|
int error;
|
|
|
|
dp = arg;
|
|
error = 0;
|
|
|
|
if ((sc = dp->d_drv1) == NULL)
|
|
return (ENXIO);
|
|
|
|
if ((sc->vtblk_flags & VTBLK_FLAG_DUMPING) == 0) {
|
|
vtblk_prepare_dump(sc);
|
|
sc->vtblk_flags |= VTBLK_FLAG_DUMPING;
|
|
}
|
|
|
|
if (length > 0)
|
|
error = vtblk_write_dump(sc, virtual, offset, length);
|
|
else if (virtual == NULL && offset == 0)
|
|
error = vtblk_flush_dump(sc);
|
|
|
|
VTBLK_UNLOCK(sc);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
vtblk_strategy(struct bio *bp)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
if ((sc = bp->bio_disk->d_drv1) == NULL) {
|
|
vtblk_bio_error(bp, EINVAL);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Fail any write if RO. Unfortunately, there does not seem to
|
|
* be a better way to report our readonly'ness to GEOM above.
|
|
*/
|
|
if (sc->vtblk_flags & VTBLK_FLAG_READONLY &&
|
|
(bp->bio_cmd == BIO_WRITE || bp->bio_cmd == BIO_FLUSH)) {
|
|
vtblk_bio_error(bp, EROFS);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Prevent read/write buffers spanning too many segments from
|
|
* getting into the queue. This should only trip if d_maxsize
|
|
* was incorrectly set.
|
|
*/
|
|
if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
|
|
KASSERT(VTBLK_BIO_SEGMENTS(bp) <= sc->vtblk_max_nsegs -
|
|
VTBLK_MIN_SEGMENTS,
|
|
("bio spanned too many segments: %d, max: %d",
|
|
VTBLK_BIO_SEGMENTS(bp),
|
|
sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS));
|
|
}
|
|
|
|
VTBLK_LOCK(sc);
|
|
if ((sc->vtblk_flags & VTBLK_FLAG_DETACHING) == 0) {
|
|
bioq_disksort(&sc->vtblk_bioq, bp);
|
|
vtblk_startio(sc);
|
|
} else
|
|
vtblk_bio_error(bp, ENXIO);
|
|
VTBLK_UNLOCK(sc);
|
|
}
|
|
|
|
static void
|
|
vtblk_negotiate_features(struct vtblk_softc *sc)
|
|
{
|
|
device_t dev;
|
|
uint64_t features;
|
|
|
|
dev = sc->vtblk_dev;
|
|
features = VTBLK_FEATURES;
|
|
|
|
sc->vtblk_features = virtio_negotiate_features(dev, features);
|
|
}
|
|
|
|
static int
|
|
vtblk_maximum_segments(struct vtblk_softc *sc,
|
|
struct virtio_blk_config *blkcfg)
|
|
{
|
|
device_t dev;
|
|
int nsegs;
|
|
|
|
dev = sc->vtblk_dev;
|
|
nsegs = VTBLK_MIN_SEGMENTS;
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_SEG_MAX)) {
|
|
nsegs += MIN(blkcfg->seg_max, MAXPHYS / PAGE_SIZE + 1);
|
|
if (sc->vtblk_flags & VTBLK_FLAG_INDIRECT)
|
|
nsegs = MIN(nsegs, VIRTIO_MAX_INDIRECT);
|
|
} else
|
|
nsegs += 1;
|
|
|
|
return (nsegs);
|
|
}
|
|
|
|
static int
|
|
vtblk_alloc_virtqueue(struct vtblk_softc *sc)
|
|
{
|
|
device_t dev;
|
|
struct vq_alloc_info vq_info;
|
|
|
|
dev = sc->vtblk_dev;
|
|
|
|
VQ_ALLOC_INFO_INIT(&vq_info, sc->vtblk_max_nsegs,
|
|
vtblk_vq_intr, sc, &sc->vtblk_vq,
|
|
"%s request", device_get_nameunit(dev));
|
|
|
|
return (virtio_alloc_virtqueues(dev, 0, 1, &vq_info));
|
|
}
|
|
|
|
static void
|
|
vtblk_alloc_disk(struct vtblk_softc *sc, struct virtio_blk_config *blkcfg)
|
|
{
|
|
device_t dev;
|
|
struct disk *dp;
|
|
|
|
dev = sc->vtblk_dev;
|
|
|
|
sc->vtblk_disk = dp = disk_alloc();
|
|
dp->d_open = vtblk_open;
|
|
dp->d_close = vtblk_close;
|
|
dp->d_ioctl = vtblk_ioctl;
|
|
dp->d_strategy = vtblk_strategy;
|
|
dp->d_name = VTBLK_DISK_NAME;
|
|
dp->d_unit = device_get_unit(dev);
|
|
dp->d_drv1 = sc;
|
|
|
|
if ((sc->vtblk_flags & VTBLK_FLAG_READONLY) == 0)
|
|
dp->d_dump = vtblk_dump;
|
|
|
|
/* Capacity is always in 512-byte units. */
|
|
dp->d_mediasize = blkcfg->capacity * 512;
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_BLK_SIZE))
|
|
dp->d_sectorsize = blkcfg->blk_size;
|
|
else
|
|
dp->d_sectorsize = 512;
|
|
|
|
/*
|
|
* The VirtIO maximum I/O size is given in terms of segments.
|
|
* However, FreeBSD limits I/O size by logical buffer size, not
|
|
* by physically contiguous pages. Therefore, we have to assume
|
|
* no pages are contiguous. This may impose an artificially low
|
|
* maximum I/O size. But in practice, since QEMU advertises 128
|
|
* segments, this gives us a maximum IO size of 125 * PAGE_SIZE,
|
|
* which is typically greater than MAXPHYS. Eventually we should
|
|
* just advertise MAXPHYS and split buffers that are too big.
|
|
*
|
|
* Note we must subtract one additional segment in case of non
|
|
* page aligned buffers.
|
|
*/
|
|
dp->d_maxsize = (sc->vtblk_max_nsegs - VTBLK_MIN_SEGMENTS - 1) *
|
|
PAGE_SIZE;
|
|
if (dp->d_maxsize < PAGE_SIZE)
|
|
dp->d_maxsize = PAGE_SIZE; /* XXX */
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_GEOMETRY)) {
|
|
dp->d_fwsectors = blkcfg->geometry.sectors;
|
|
dp->d_fwheads = blkcfg->geometry.heads;
|
|
}
|
|
|
|
if (virtio_with_feature(dev, VIRTIO_BLK_F_FLUSH))
|
|
dp->d_flags |= DISKFLAG_CANFLUSHCACHE;
|
|
}
|
|
|
|
static void
|
|
vtblk_create_disk(struct vtblk_softc *sc)
|
|
{
|
|
struct disk *dp;
|
|
|
|
dp = sc->vtblk_disk;
|
|
|
|
/*
|
|
* Retrieving the identification string must be done after
|
|
* the virtqueue interrupt is setup otherwise it will hang.
|
|
*/
|
|
vtblk_get_ident(sc);
|
|
|
|
device_printf(sc->vtblk_dev, "%juMB (%ju %u byte sectors)\n",
|
|
(uintmax_t) dp->d_mediasize >> 20,
|
|
(uintmax_t) dp->d_mediasize / dp->d_sectorsize,
|
|
dp->d_sectorsize);
|
|
|
|
disk_create(dp, DISK_VERSION);
|
|
}
|
|
|
|
static void
|
|
vtblk_startio(struct vtblk_softc *sc)
|
|
{
|
|
struct virtqueue *vq;
|
|
struct vtblk_request *req;
|
|
int enq;
|
|
|
|
vq = sc->vtblk_vq;
|
|
enq = 0;
|
|
|
|
VTBLK_LOCK_ASSERT(sc);
|
|
|
|
if (sc->vtblk_flags & VTBLK_FLAG_SUSPENDED)
|
|
return;
|
|
|
|
while (!virtqueue_full(vq)) {
|
|
if ((req = vtblk_dequeue_ready(sc)) == NULL)
|
|
req = vtblk_bio_request(sc);
|
|
if (req == NULL)
|
|
break;
|
|
|
|
if (vtblk_execute_request(sc, req) != 0) {
|
|
vtblk_enqueue_ready(sc, req);
|
|
break;
|
|
}
|
|
|
|
enq++;
|
|
}
|
|
|
|
if (enq > 0)
|
|
virtqueue_notify(vq);
|
|
}
|
|
|
|
static struct vtblk_request *
|
|
vtblk_bio_request(struct vtblk_softc *sc)
|
|
{
|
|
struct bio_queue_head *bioq;
|
|
struct vtblk_request *req;
|
|
struct bio *bp;
|
|
|
|
bioq = &sc->vtblk_bioq;
|
|
|
|
if (bioq_first(bioq) == NULL)
|
|
return (NULL);
|
|
|
|
req = vtblk_dequeue_request(sc);
|
|
if (req == NULL)
|
|
return (NULL);
|
|
|
|
bp = bioq_takefirst(bioq);
|
|
req->vbr_bp = bp;
|
|
req->vbr_ack = -1;
|
|
req->vbr_hdr.ioprio = 1;
|
|
|
|
switch (bp->bio_cmd) {
|
|
case BIO_FLUSH:
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
|
|
break;
|
|
case BIO_READ:
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_IN;
|
|
req->vbr_hdr.sector = bp->bio_offset / 512;
|
|
break;
|
|
case BIO_WRITE:
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
|
|
req->vbr_hdr.sector = bp->bio_offset / 512;
|
|
break;
|
|
default:
|
|
KASSERT(0, ("bio with unhandled cmd: %d", bp->bio_cmd));
|
|
req->vbr_hdr.type = -1;
|
|
break;
|
|
}
|
|
|
|
if (bp->bio_flags & BIO_ORDERED)
|
|
req->vbr_hdr.type |= VIRTIO_BLK_T_BARRIER;
|
|
|
|
return (req);
|
|
}
|
|
|
|
static int
|
|
vtblk_execute_request(struct vtblk_softc *sc, struct vtblk_request *req)
|
|
{
|
|
struct sglist *sg;
|
|
struct bio *bp;
|
|
int writable, error;
|
|
|
|
sg = sc->vtblk_sglist;
|
|
bp = req->vbr_bp;
|
|
writable = 0;
|
|
|
|
VTBLK_LOCK_ASSERT(sc);
|
|
|
|
sglist_reset(sg);
|
|
error = sglist_append(sg, &req->vbr_hdr,
|
|
sizeof(struct virtio_blk_outhdr));
|
|
KASSERT(error == 0, ("error adding header to sglist"));
|
|
KASSERT(sg->sg_nseg == 1,
|
|
("header spanned multiple segments: %d", sg->sg_nseg));
|
|
|
|
if (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE) {
|
|
error = sglist_append(sg, bp->bio_data, bp->bio_bcount);
|
|
KASSERT(error == 0, ("error adding buffer to sglist"));
|
|
|
|
/* BIO_READ means the host writes into our buffer. */
|
|
if (bp->bio_cmd == BIO_READ)
|
|
writable += sg->sg_nseg - 1;
|
|
}
|
|
|
|
error = sglist_append(sg, &req->vbr_ack, sizeof(uint8_t));
|
|
KASSERT(error == 0, ("error adding ack to sglist"));
|
|
writable++;
|
|
|
|
KASSERT(sg->sg_nseg >= VTBLK_MIN_SEGMENTS,
|
|
("fewer than min segments: %d", sg->sg_nseg));
|
|
|
|
error = virtqueue_enqueue(sc->vtblk_vq, req, sg,
|
|
sg->sg_nseg - writable, writable);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
vtblk_vq_intr(void *xsc)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
|
|
sc = xsc;
|
|
|
|
virtqueue_disable_intr(sc->vtblk_vq);
|
|
taskqueue_enqueue_fast(sc->vtblk_tq, &sc->vtblk_intr_task);
|
|
|
|
return (1);
|
|
}
|
|
|
|
static void
|
|
vtblk_intr_task(void *arg, int pending)
|
|
{
|
|
struct vtblk_softc *sc;
|
|
struct vtblk_request *req;
|
|
struct virtqueue *vq;
|
|
struct bio *bp;
|
|
|
|
sc = arg;
|
|
vq = sc->vtblk_vq;
|
|
|
|
VTBLK_LOCK(sc);
|
|
if (sc->vtblk_flags & VTBLK_FLAG_DETACHING) {
|
|
VTBLK_UNLOCK(sc);
|
|
return;
|
|
}
|
|
|
|
while ((req = virtqueue_dequeue(vq, NULL)) != NULL) {
|
|
bp = req->vbr_bp;
|
|
|
|
if (req->vbr_ack == VIRTIO_BLK_S_OK)
|
|
bp->bio_resid = 0;
|
|
else {
|
|
bp->bio_flags |= BIO_ERROR;
|
|
if (req->vbr_ack == VIRTIO_BLK_S_UNSUPP)
|
|
bp->bio_error = ENOTSUP;
|
|
else
|
|
bp->bio_error = EIO;
|
|
}
|
|
|
|
biodone(bp);
|
|
vtblk_enqueue_request(sc, req);
|
|
}
|
|
|
|
vtblk_startio(sc);
|
|
|
|
if (virtqueue_enable_intr(vq) != 0) {
|
|
virtqueue_disable_intr(vq);
|
|
VTBLK_UNLOCK(sc);
|
|
taskqueue_enqueue_fast(sc->vtblk_tq,
|
|
&sc->vtblk_intr_task);
|
|
return;
|
|
}
|
|
|
|
VTBLK_UNLOCK(sc);
|
|
}
|
|
|
|
static void
|
|
vtblk_stop(struct vtblk_softc *sc)
|
|
{
|
|
|
|
virtqueue_disable_intr(sc->vtblk_vq);
|
|
virtio_stop(sc->vtblk_dev);
|
|
}
|
|
|
|
static void
|
|
vtblk_get_ident(struct vtblk_softc *sc)
|
|
{
|
|
struct bio buf;
|
|
struct disk *dp;
|
|
struct vtblk_request *req;
|
|
int len, error;
|
|
|
|
dp = sc->vtblk_disk;
|
|
len = MIN(VIRTIO_BLK_ID_BYTES, DISK_IDENT_SIZE);
|
|
|
|
if (vtblk_no_ident != 0)
|
|
return;
|
|
|
|
req = vtblk_dequeue_request(sc);
|
|
if (req == NULL)
|
|
return;
|
|
|
|
req->vbr_ack = -1;
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_GET_ID;
|
|
req->vbr_hdr.ioprio = 1;
|
|
req->vbr_hdr.sector = 0;
|
|
|
|
req->vbr_bp = &buf;
|
|
bzero(&buf, sizeof(struct bio));
|
|
|
|
buf.bio_cmd = BIO_READ;
|
|
buf.bio_data = dp->d_ident;
|
|
buf.bio_bcount = len;
|
|
|
|
VTBLK_LOCK(sc);
|
|
error = vtblk_poll_request(sc, req);
|
|
VTBLK_UNLOCK(sc);
|
|
|
|
vtblk_enqueue_request(sc, req);
|
|
|
|
if (error) {
|
|
device_printf(sc->vtblk_dev,
|
|
"error getting device identifier: %d\n", error);
|
|
}
|
|
}
|
|
|
|
static void
|
|
vtblk_prepare_dump(struct vtblk_softc *sc)
|
|
{
|
|
device_t dev;
|
|
struct virtqueue *vq;
|
|
|
|
dev = sc->vtblk_dev;
|
|
vq = sc->vtblk_vq;
|
|
|
|
vtblk_stop(sc);
|
|
|
|
/*
|
|
* Drain all requests caught in-flight in the virtqueue,
|
|
* skipping biodone(). When dumping, only one request is
|
|
* outstanding at a time, and we just poll the virtqueue
|
|
* for the response.
|
|
*/
|
|
vtblk_drain_vq(sc, 1);
|
|
|
|
if (virtio_reinit(dev, sc->vtblk_features) != 0)
|
|
panic("cannot reinit VirtIO block device during dump");
|
|
|
|
virtqueue_disable_intr(vq);
|
|
virtio_reinit_complete(dev);
|
|
}
|
|
|
|
static int
|
|
vtblk_write_dump(struct vtblk_softc *sc, void *virtual, off_t offset,
|
|
size_t length)
|
|
{
|
|
struct bio buf;
|
|
struct vtblk_request *req;
|
|
|
|
req = &sc->vtblk_dump_request;
|
|
req->vbr_ack = -1;
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_OUT;
|
|
req->vbr_hdr.ioprio = 1;
|
|
req->vbr_hdr.sector = offset / 512;
|
|
|
|
req->vbr_bp = &buf;
|
|
bzero(&buf, sizeof(struct bio));
|
|
|
|
buf.bio_cmd = BIO_WRITE;
|
|
buf.bio_data = virtual;
|
|
buf.bio_bcount = length;
|
|
|
|
return (vtblk_poll_request(sc, req));
|
|
}
|
|
|
|
static int
|
|
vtblk_flush_dump(struct vtblk_softc *sc)
|
|
{
|
|
struct bio buf;
|
|
struct vtblk_request *req;
|
|
|
|
req = &sc->vtblk_dump_request;
|
|
req->vbr_ack = -1;
|
|
req->vbr_hdr.type = VIRTIO_BLK_T_FLUSH;
|
|
req->vbr_hdr.ioprio = 1;
|
|
req->vbr_hdr.sector = 0;
|
|
|
|
req->vbr_bp = &buf;
|
|
bzero(&buf, sizeof(struct bio));
|
|
|
|
buf.bio_cmd = BIO_FLUSH;
|
|
|
|
return (vtblk_poll_request(sc, req));
|
|
}
|
|
|
|
static int
|
|
vtblk_poll_request(struct vtblk_softc *sc, struct vtblk_request *req)
|
|
{
|
|
device_t dev;
|
|
struct virtqueue *vq;
|
|
struct vtblk_request *r;
|
|
int error;
|
|
|
|
dev = sc->vtblk_dev;
|
|
vq = sc->vtblk_vq;
|
|
|
|
if (!virtqueue_empty(vq))
|
|
return (EBUSY);
|
|
|
|
error = vtblk_execute_request(sc, req);
|
|
if (error)
|
|
return (error);
|
|
|
|
virtqueue_notify(vq);
|
|
|
|
r = virtqueue_poll(vq, NULL);
|
|
KASSERT(r == req, ("unexpected request response"));
|
|
|
|
if (req->vbr_ack != VIRTIO_BLK_S_OK) {
|
|
error = req->vbr_ack == VIRTIO_BLK_S_UNSUPP ? ENOTSUP : EIO;
|
|
if (bootverbose)
|
|
device_printf(dev,
|
|
"vtblk_poll_request: IO error: %d\n", error);
|
|
}
|
|
|
|
return (error);
|
|
}
|
|
|
|
static void
|
|
vtblk_drain_vq(struct vtblk_softc *sc, int skip_done)
|
|
{
|
|
struct virtqueue *vq;
|
|
struct vtblk_request *req;
|
|
int last;
|
|
|
|
vq = sc->vtblk_vq;
|
|
last = 0;
|
|
|
|
while ((req = virtqueue_drain(vq, &last)) != NULL) {
|
|
if (!skip_done)
|
|
vtblk_bio_error(req->vbr_bp, ENXIO);
|
|
|
|
vtblk_enqueue_request(sc, req);
|
|
}
|
|
|
|
KASSERT(virtqueue_empty(vq), ("virtqueue not empty"));
|
|
}
|
|
|
|
static void
|
|
vtblk_drain(struct vtblk_softc *sc)
|
|
{
|
|
struct bio_queue_head *bioq;
|
|
struct vtblk_request *req;
|
|
struct bio *bp;
|
|
|
|
bioq = &sc->vtblk_bioq;
|
|
|
|
if (sc->vtblk_vq != NULL)
|
|
vtblk_drain_vq(sc, 0);
|
|
|
|
while ((req = vtblk_dequeue_ready(sc)) != NULL) {
|
|
vtblk_bio_error(req->vbr_bp, ENXIO);
|
|
vtblk_enqueue_request(sc, req);
|
|
}
|
|
|
|
while (bioq_first(bioq) != NULL) {
|
|
bp = bioq_takefirst(bioq);
|
|
vtblk_bio_error(bp, ENXIO);
|
|
}
|
|
|
|
vtblk_free_requests(sc);
|
|
}
|
|
|
|
static int
|
|
vtblk_alloc_requests(struct vtblk_softc *sc)
|
|
{
|
|
struct vtblk_request *req;
|
|
int i, size;
|
|
|
|
size = virtqueue_size(sc->vtblk_vq);
|
|
|
|
/*
|
|
* Preallocate sufficient requests to keep the virtqueue full. Each
|
|
* request consumes VTBLK_MIN_SEGMENTS or more descriptors so reduce
|
|
* the number allocated when indirect descriptors are not available.
|
|
*/
|
|
if ((sc->vtblk_flags & VTBLK_FLAG_INDIRECT) == 0)
|
|
size /= VTBLK_MIN_SEGMENTS;
|
|
|
|
for (i = 0; i < size; i++) {
|
|
req = uma_zalloc(vtblk_req_zone, M_NOWAIT);
|
|
if (req == NULL)
|
|
return (ENOMEM);
|
|
|
|
sc->vtblk_request_count++;
|
|
vtblk_enqueue_request(sc, req);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
vtblk_free_requests(struct vtblk_softc *sc)
|
|
{
|
|
struct vtblk_request *req;
|
|
|
|
while ((req = vtblk_dequeue_request(sc)) != NULL) {
|
|
sc->vtblk_request_count--;
|
|
uma_zfree(vtblk_req_zone, req);
|
|
}
|
|
|
|
KASSERT(sc->vtblk_request_count == 0, ("leaked requests"));
|
|
}
|
|
|
|
static struct vtblk_request *
|
|
vtblk_dequeue_request(struct vtblk_softc *sc)
|
|
{
|
|
struct vtblk_request *req;
|
|
|
|
req = TAILQ_FIRST(&sc->vtblk_req_free);
|
|
if (req != NULL)
|
|
TAILQ_REMOVE(&sc->vtblk_req_free, req, vbr_link);
|
|
|
|
return (req);
|
|
}
|
|
|
|
static void
|
|
vtblk_enqueue_request(struct vtblk_softc *sc, struct vtblk_request *req)
|
|
{
|
|
|
|
bzero(req, sizeof(struct vtblk_request));
|
|
TAILQ_INSERT_HEAD(&sc->vtblk_req_free, req, vbr_link);
|
|
}
|
|
|
|
static struct vtblk_request *
|
|
vtblk_dequeue_ready(struct vtblk_softc *sc)
|
|
{
|
|
struct vtblk_request *req;
|
|
|
|
req = TAILQ_FIRST(&sc->vtblk_req_ready);
|
|
if (req != NULL)
|
|
TAILQ_REMOVE(&sc->vtblk_req_ready, req, vbr_link);
|
|
|
|
return (req);
|
|
}
|
|
|
|
static void
|
|
vtblk_enqueue_ready(struct vtblk_softc *sc, struct vtblk_request *req)
|
|
{
|
|
|
|
TAILQ_INSERT_HEAD(&sc->vtblk_req_ready, req, vbr_link);
|
|
}
|
|
|
|
static void
|
|
vtblk_bio_error(struct bio *bp, int error)
|
|
{
|
|
|
|
biofinish(bp, NULL, error);
|
|
}
|