b8a587074f
a8af6270bd96be6ccd86f70b60fa6512b710e4f0 virtio_blk: Include function name in panic string cbdb03a694b76c5253d7ae3a59b9995b9afbb67a virtio_balloon: Do the notify outside of the lock By the time we return from virtqueue_notify(), the descriptor will be in the used ring so we shouldn't have to sleep. 10ba392e60692529a5cbc1e9987e4064e0128447 virtio: Use DEVMETHOD_END 80cbcc4d6552cac758be67f0c99c36f23ce62110 virtqueue: Add support for VIRTIO_F_RING_EVENT_IDX This can be used to reduce the number of guest/host and host/guest interrupts by delaying the interrupt until a certain index value is reached. Actual use by the network driver will come along later. 8fc465969acc0c58477153e4c3530390db436c02 virtqueue: Simplify virtqueue_nused() Since the values just wrap naturally at UINT16_MAX, we can just subtract the two values directly, rather than doing 2's complement math. a8aa22f25959e2767d006cd621b69050e7ffb0ae virtio_blk: Remove debugging crud from 75dd732a There seems to be an issue with Qemu (or FreeBSD VirtIO) that sets the PCI register space for the device config to bogus values. This only seems to happen after unloading and reloading the module. d404800661cb2a9769c033f8a50b2133934501aa virtio_blk: Use better variable name 75dd732a97743d96e7c63f7ced3c2169696dadd3 virtio_blk: Partially revert 92ba40e65 Just use the virtqueue to determine if any requests are still inflight. 06661ed66b7a9efaea240f99f414c368f1bbcdc7 virtio_blk: error if allowed too few segments Should never happen unless the host provides use with a bogus seg_max value. 4b33e5085bc87a818433d7e664a0a2c8f56a1a89 virtio_blk: Sort function declarations 426b9f5cac892c9c64cc7631966461514f7e08c6 virtio_blk: Cleanup whitespace 617c23e12c61e3c2233d942db713c6b8ff0bd112 virtio_blk: Call disk_err() on error'd completed requests 081a5712d4b2e0abf273be4d26affcf3870263a9 virtio_blk: ASSERT the ready and inflight request queues are empty a9be2631a4f770a84145c18ee03a3f103bed4ca8 virtio_blk: Simplify check for too many segments At the cost of a small style violation. e00ec09da014f2e60cc75542d0ab78898672d521 virtio_blk: Add beginnings of suspend/resume Still not sure if we need to virtio_stop()/virtio_reinit() the device before/after a suspend. Don't start additional IO when marked as suspending. 47c71dc6ce8c238aa59ce8afd4bda5aa294bc884 virtio_blk: Panic when dealt an unhandled BIO cmd 1055544f90fb8c0cc6a2395f5b6104039606aafe virtio_blk: Add VQ enqueue/dequeue wrappers Wrapper functions managed the added/removing to the in-flight list of requests. Normally biodone() any completed IO when draining the virtqueue. 92ba40e65b3bb5e4acb9300ece711f1ea8f3f7f4 virtio_blk: Add in-flight list of requests 74f6d260e075443544522c0833dc2712dd93f49b virtio_blk: Rename VTBLK_FLAG_DETACHING to VTBLK_FLAG_DETACH 7aa549050f6fc6551c09c6362ed6b2a0728956ef virtio_blk: Finish all BIOs through vtblk_finish_bio() Also properly set bio_resid in the case of errors. Most geom_disk providers seem to do the same. 9eef6d0e6f7e5dd362f71ba097f2e2e4c3744882 Added function to translate VirtIO status to error code ef06adc337f31e1129d6d5f26de6d8d1be27bcd2 Reset dumping flag when given unexpected parameters 393b3e390c644193a2e392220dcc6a6c50b212d9 Added missing VTBLK_LOCK() in dump handler Obtained from: Bryan Venteicher bryanv at daemoninthecloset dot org
810 lines
19 KiB
C
810 lines
19 KiB
C
/*-
|
|
* Copyright (c) 2011, Bryan Venteicher <bryanv@daemoninthecloset.org>
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice unmodified, this list of conditions, and the following
|
|
* disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
|
|
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
|
|
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
|
|
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
|
|
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
|
|
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
/*
|
|
* Implements the virtqueue interface as basically described
|
|
* in the original VirtIO paper.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/sglist.h>
|
|
#include <vm/vm.h>
|
|
#include <vm/pmap.h>
|
|
|
|
#include <machine/cpu.h>
|
|
#include <machine/bus.h>
|
|
#include <machine/atomic.h>
|
|
#include <machine/resource.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/rman.h>
|
|
|
|
#include <dev/virtio/virtio.h>
|
|
#include <dev/virtio/virtqueue.h>
|
|
#include <dev/virtio/virtio_ring.h>
|
|
|
|
#include "virtio_bus_if.h"
|
|
|
|
struct virtqueue {
|
|
device_t vq_dev;
|
|
char vq_name[VIRTQUEUE_MAX_NAME_SZ];
|
|
uint16_t vq_queue_index;
|
|
uint16_t vq_nentries;
|
|
uint32_t vq_flags;
|
|
#define VIRTQUEUE_FLAG_INDIRECT 0x0001
|
|
#define VIRTQUEUE_FLAG_EVENT_IDX 0x0002
|
|
|
|
int vq_alignment;
|
|
int vq_ring_size;
|
|
void *vq_ring_mem;
|
|
int vq_max_indirect_size;
|
|
int vq_indirect_mem_size;
|
|
virtqueue_intr_t *vq_intrhand;
|
|
void *vq_intrhand_arg;
|
|
|
|
struct vring vq_ring;
|
|
uint16_t vq_free_cnt;
|
|
uint16_t vq_queued_cnt;
|
|
/*
|
|
* Head of the free chain in the descriptor table. If
|
|
* there are no free descriptors, this will be set to
|
|
* VQ_RING_DESC_CHAIN_END.
|
|
*/
|
|
uint16_t vq_desc_head_idx;
|
|
/*
|
|
* Last consumed descriptor in the used table,
|
|
* trails vq_ring.used->idx.
|
|
*/
|
|
uint16_t vq_used_cons_idx;
|
|
|
|
struct vq_desc_extra {
|
|
void *cookie;
|
|
struct vring_desc *indirect;
|
|
vm_paddr_t indirect_paddr;
|
|
uint16_t ndescs;
|
|
} vq_descx[0];
|
|
};
|
|
|
|
/*
|
|
* The maximum virtqueue size is 2^15. Use that value as the end of
|
|
* descriptor chain terminator since it will never be a valid index
|
|
* in the descriptor table. This is used to verify we are correctly
|
|
* handling vq_free_cnt.
|
|
*/
|
|
#define VQ_RING_DESC_CHAIN_END 32768
|
|
|
|
#define VQASSERT(_vq, _exp, _msg, ...) \
|
|
KASSERT((_exp),("%s: %s - "_msg, __func__, (_vq)->vq_name, \
|
|
##__VA_ARGS__))
|
|
|
|
#define VQ_RING_ASSERT_VALID_IDX(_vq, _idx) \
|
|
VQASSERT((_vq), (_idx) < (_vq)->vq_nentries, \
|
|
"invalid ring index: %d, max: %d", (_idx), \
|
|
(_vq)->vq_nentries)
|
|
|
|
#define VQ_RING_ASSERT_CHAIN_TERM(_vq) \
|
|
VQASSERT((_vq), (_vq)->vq_desc_head_idx == \
|
|
VQ_RING_DESC_CHAIN_END, "full ring terminated " \
|
|
"incorrectly: head idx: %d", (_vq)->vq_desc_head_idx)
|
|
|
|
static int virtqueue_init_indirect(struct virtqueue *vq, int);
|
|
static void virtqueue_free_indirect(struct virtqueue *vq);
|
|
static void virtqueue_init_indirect_list(struct virtqueue *,
|
|
struct vring_desc *);
|
|
|
|
static void vq_ring_init(struct virtqueue *);
|
|
static void vq_ring_update_avail(struct virtqueue *, uint16_t);
|
|
static uint16_t vq_ring_enqueue_segments(struct virtqueue *,
|
|
struct vring_desc *, uint16_t, struct sglist *, int, int);
|
|
static int vq_ring_use_indirect(struct virtqueue *, int);
|
|
static void vq_ring_enqueue_indirect(struct virtqueue *, void *,
|
|
struct sglist *, int, int);
|
|
static int vq_ring_must_notify_host(struct virtqueue *);
|
|
static void vq_ring_notify_host(struct virtqueue *);
|
|
static void vq_ring_free_chain(struct virtqueue *, uint16_t);
|
|
|
|
uint64_t
|
|
virtqueue_filter_features(uint64_t features)
|
|
{
|
|
uint64_t mask;
|
|
|
|
mask = (1 << VIRTIO_TRANSPORT_F_START) - 1;
|
|
mask |= VIRTIO_RING_F_INDIRECT_DESC;
|
|
mask |= VIRTIO_RING_F_EVENT_IDX;
|
|
|
|
return (features & mask);
|
|
}
|
|
|
|
int
|
|
virtqueue_alloc(device_t dev, uint16_t queue, uint16_t size, int align,
|
|
vm_paddr_t highaddr, struct vq_alloc_info *info, struct virtqueue **vqp)
|
|
{
|
|
struct virtqueue *vq;
|
|
int error;
|
|
|
|
*vqp = NULL;
|
|
error = 0;
|
|
|
|
if (size == 0) {
|
|
device_printf(dev,
|
|
"virtqueue %d (%s) does not exist (size is zero)\n",
|
|
queue, info->vqai_name);
|
|
return (ENODEV);
|
|
} else if (!powerof2(size)) {
|
|
device_printf(dev,
|
|
"virtqueue %d (%s) size is not a power of 2: %d\n",
|
|
queue, info->vqai_name, size);
|
|
return (ENXIO);
|
|
} else if (info->vqai_maxindirsz > VIRTIO_MAX_INDIRECT) {
|
|
device_printf(dev, "virtqueue %d (%s) requested too many "
|
|
"indirect descriptors: %d, max %d\n",
|
|
queue, info->vqai_name, info->vqai_maxindirsz,
|
|
VIRTIO_MAX_INDIRECT);
|
|
return (EINVAL);
|
|
}
|
|
|
|
vq = malloc(sizeof(struct virtqueue) +
|
|
size * sizeof(struct vq_desc_extra), M_DEVBUF, M_NOWAIT | M_ZERO);
|
|
if (vq == NULL) {
|
|
device_printf(dev, "cannot allocate virtqueue\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
vq->vq_dev = dev;
|
|
strlcpy(vq->vq_name, info->vqai_name, sizeof(vq->vq_name));
|
|
vq->vq_queue_index = queue;
|
|
vq->vq_alignment = align;
|
|
vq->vq_nentries = size;
|
|
vq->vq_free_cnt = size;
|
|
vq->vq_intrhand = info->vqai_intr;
|
|
vq->vq_intrhand_arg = info->vqai_intr_arg;
|
|
|
|
if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_EVENT_IDX) != 0)
|
|
vq->vq_flags |= VIRTQUEUE_FLAG_EVENT_IDX;
|
|
|
|
if (info->vqai_maxindirsz > 1) {
|
|
error = virtqueue_init_indirect(vq, info->vqai_maxindirsz);
|
|
if (error)
|
|
goto fail;
|
|
}
|
|
|
|
vq->vq_ring_size = round_page(vring_size(size, align));
|
|
vq->vq_ring_mem = contigmalloc(vq->vq_ring_size, M_DEVBUF,
|
|
M_NOWAIT | M_ZERO, 0, highaddr, PAGE_SIZE, 0);
|
|
if (vq->vq_ring_mem == NULL) {
|
|
device_printf(dev,
|
|
"cannot allocate memory for virtqueue ring\n");
|
|
error = ENOMEM;
|
|
goto fail;
|
|
}
|
|
|
|
vq_ring_init(vq);
|
|
virtqueue_disable_intr(vq);
|
|
|
|
*vqp = vq;
|
|
|
|
fail:
|
|
if (error)
|
|
virtqueue_free(vq);
|
|
|
|
return (error);
|
|
}
|
|
|
|
static int
|
|
virtqueue_init_indirect(struct virtqueue *vq, int indirect_size)
|
|
{
|
|
device_t dev;
|
|
struct vq_desc_extra *dxp;
|
|
int i, size;
|
|
|
|
dev = vq->vq_dev;
|
|
|
|
if (VIRTIO_BUS_WITH_FEATURE(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0) {
|
|
/*
|
|
* Indirect descriptors requested by the driver but not
|
|
* negotiated. Return zero to keep the initialization
|
|
* going: we'll run fine without.
|
|
*/
|
|
if (bootverbose)
|
|
device_printf(dev, "virtqueue %d (%s) requested "
|
|
"indirect descriptors but not negotiated\n",
|
|
vq->vq_queue_index, vq->vq_name);
|
|
return (0);
|
|
}
|
|
|
|
size = indirect_size * sizeof(struct vring_desc);
|
|
vq->vq_max_indirect_size = indirect_size;
|
|
vq->vq_indirect_mem_size = size;
|
|
vq->vq_flags |= VIRTQUEUE_FLAG_INDIRECT;
|
|
|
|
for (i = 0; i < vq->vq_nentries; i++) {
|
|
dxp = &vq->vq_descx[i];
|
|
|
|
dxp->indirect = malloc(size, M_DEVBUF, M_NOWAIT);
|
|
if (dxp->indirect == NULL) {
|
|
device_printf(dev, "cannot allocate indirect list\n");
|
|
return (ENOMEM);
|
|
}
|
|
|
|
dxp->indirect_paddr = vtophys(dxp->indirect);
|
|
virtqueue_init_indirect_list(vq, dxp->indirect);
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
virtqueue_free_indirect(struct virtqueue *vq)
|
|
{
|
|
struct vq_desc_extra *dxp;
|
|
int i;
|
|
|
|
for (i = 0; i < vq->vq_nentries; i++) {
|
|
dxp = &vq->vq_descx[i];
|
|
|
|
if (dxp->indirect == NULL)
|
|
break;
|
|
|
|
free(dxp->indirect, M_DEVBUF);
|
|
dxp->indirect = NULL;
|
|
dxp->indirect_paddr = 0;
|
|
}
|
|
|
|
vq->vq_flags &= ~VIRTQUEUE_FLAG_INDIRECT;
|
|
vq->vq_indirect_mem_size = 0;
|
|
}
|
|
|
|
static void
|
|
virtqueue_init_indirect_list(struct virtqueue *vq,
|
|
struct vring_desc *indirect)
|
|
{
|
|
int i;
|
|
|
|
bzero(indirect, vq->vq_indirect_mem_size);
|
|
|
|
for (i = 0; i < vq->vq_max_indirect_size - 1; i++)
|
|
indirect[i].next = i + 1;
|
|
indirect[i].next = VQ_RING_DESC_CHAIN_END;
|
|
}
|
|
|
|
int
|
|
virtqueue_reinit(struct virtqueue *vq, uint16_t size)
|
|
{
|
|
struct vq_desc_extra *dxp;
|
|
int i;
|
|
|
|
if (vq->vq_nentries != size) {
|
|
device_printf(vq->vq_dev,
|
|
"%s: '%s' changed size; old=%hu, new=%hu\n",
|
|
__func__, vq->vq_name, vq->vq_nentries, size);
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* Warn if the virtqueue was not properly cleaned up. */
|
|
if (vq->vq_free_cnt != vq->vq_nentries) {
|
|
device_printf(vq->vq_dev,
|
|
"%s: warning, '%s' virtqueue not empty, "
|
|
"leaking %d entries\n", __func__, vq->vq_name,
|
|
vq->vq_nentries - vq->vq_free_cnt);
|
|
}
|
|
|
|
vq->vq_desc_head_idx = 0;
|
|
vq->vq_used_cons_idx = 0;
|
|
vq->vq_queued_cnt = 0;
|
|
vq->vq_free_cnt = vq->vq_nentries;
|
|
|
|
/* To be safe, reset all our allocated memory. */
|
|
bzero(vq->vq_ring_mem, vq->vq_ring_size);
|
|
for (i = 0; i < vq->vq_nentries; i++) {
|
|
dxp = &vq->vq_descx[i];
|
|
dxp->cookie = NULL;
|
|
dxp->ndescs = 0;
|
|
if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
|
|
virtqueue_init_indirect_list(vq, dxp->indirect);
|
|
}
|
|
|
|
vq_ring_init(vq);
|
|
virtqueue_disable_intr(vq);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
virtqueue_free(struct virtqueue *vq)
|
|
{
|
|
|
|
if (vq->vq_free_cnt != vq->vq_nentries) {
|
|
device_printf(vq->vq_dev, "%s: freeing non-empty virtqueue, "
|
|
"leaking %d entries\n", vq->vq_name,
|
|
vq->vq_nentries - vq->vq_free_cnt);
|
|
}
|
|
|
|
if (vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT)
|
|
virtqueue_free_indirect(vq);
|
|
|
|
if (vq->vq_ring_mem != NULL) {
|
|
contigfree(vq->vq_ring_mem, vq->vq_ring_size, M_DEVBUF);
|
|
vq->vq_ring_size = 0;
|
|
vq->vq_ring_mem = NULL;
|
|
}
|
|
|
|
free(vq, M_DEVBUF);
|
|
}
|
|
|
|
vm_paddr_t
|
|
virtqueue_paddr(struct virtqueue *vq)
|
|
{
|
|
|
|
return (vtophys(vq->vq_ring_mem));
|
|
}
|
|
|
|
int
|
|
virtqueue_size(struct virtqueue *vq)
|
|
{
|
|
|
|
return (vq->vq_nentries);
|
|
}
|
|
|
|
int
|
|
virtqueue_empty(struct virtqueue *vq)
|
|
{
|
|
|
|
return (vq->vq_nentries == vq->vq_free_cnt);
|
|
}
|
|
|
|
int
|
|
virtqueue_full(struct virtqueue *vq)
|
|
{
|
|
|
|
return (vq->vq_free_cnt == 0);
|
|
}
|
|
|
|
void
|
|
virtqueue_notify(struct virtqueue *vq)
|
|
{
|
|
/* Ensure updated avail->idx is visible to host. */
|
|
mb();
|
|
|
|
if (vq_ring_must_notify_host(vq))
|
|
vq_ring_notify_host(vq);
|
|
vq->vq_queued_cnt = 0;
|
|
}
|
|
|
|
int
|
|
virtqueue_nused(struct virtqueue *vq)
|
|
{
|
|
uint16_t used_idx, nused;
|
|
|
|
used_idx = vq->vq_ring.used->idx;
|
|
|
|
nused = (uint16_t)(used_idx - vq->vq_used_cons_idx);
|
|
VQASSERT(vq, nused <= vq->vq_nentries, "used more than available");
|
|
|
|
return (nused);
|
|
}
|
|
|
|
int
|
|
virtqueue_intr(struct virtqueue *vq)
|
|
{
|
|
|
|
if (vq->vq_intrhand == NULL ||
|
|
vq->vq_used_cons_idx == vq->vq_ring.used->idx)
|
|
return (0);
|
|
|
|
vq->vq_intrhand(vq->vq_intrhand_arg);
|
|
|
|
return (1);
|
|
}
|
|
|
|
int
|
|
virtqueue_enable_intr(struct virtqueue *vq)
|
|
{
|
|
|
|
/*
|
|
* Enable interrupts, making sure we get the latest
|
|
* index of what's already been consumed.
|
|
*/
|
|
vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
|
if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
|
|
vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx;
|
|
else
|
|
vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
|
|
|
mb();
|
|
|
|
/*
|
|
* Additional items may have been consumed in the time between
|
|
* since we last checked and enabled interrupts above. Let our
|
|
* caller know so it processes the new entries.
|
|
*/
|
|
if (vq->vq_used_cons_idx != vq->vq_ring.used->idx)
|
|
return (1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
virtqueue_postpone_intr(struct virtqueue *vq)
|
|
{
|
|
uint16_t ndesc;
|
|
|
|
/*
|
|
* Postpone until at least half of the available descriptors
|
|
* have been consumed.
|
|
*
|
|
* XXX Adaptive factor? (Linux uses 3/4)
|
|
*/
|
|
ndesc = (uint16_t)(vq->vq_ring.avail->idx - vq->vq_used_cons_idx) / 2;
|
|
|
|
if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX)
|
|
vring_used_event(&vq->vq_ring) = vq->vq_used_cons_idx + ndesc;
|
|
else
|
|
vq->vq_ring.avail->flags &= ~VRING_AVAIL_F_NO_INTERRUPT;
|
|
|
|
mb();
|
|
|
|
/*
|
|
* Enough items may have already been consumed to meet our
|
|
* threshold since we last checked. Let our caller know so
|
|
* it processes the new entries.
|
|
*/
|
|
if (virtqueue_nused(vq) > ndesc)
|
|
return (1);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
virtqueue_disable_intr(struct virtqueue *vq)
|
|
{
|
|
|
|
/*
|
|
* Note this is only considered a hint to the host.
|
|
*/
|
|
if ((vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) == 0)
|
|
vq->vq_ring.avail->flags |= VRING_AVAIL_F_NO_INTERRUPT;
|
|
}
|
|
|
|
int
|
|
virtqueue_enqueue(struct virtqueue *vq, void *cookie, struct sglist *sg,
|
|
int readable, int writable)
|
|
{
|
|
struct vq_desc_extra *dxp;
|
|
int needed;
|
|
uint16_t head_idx, idx;
|
|
|
|
needed = readable + writable;
|
|
|
|
VQASSERT(vq, cookie != NULL, "enqueuing with no cookie");
|
|
VQASSERT(vq, needed == sg->sg_nseg,
|
|
"segment count mismatch, %d, %d", needed, sg->sg_nseg);
|
|
VQASSERT(vq,
|
|
needed <= vq->vq_nentries || needed <= vq->vq_max_indirect_size,
|
|
"too many segments to enqueue: %d, %d/%d", needed,
|
|
vq->vq_nentries, vq->vq_max_indirect_size);
|
|
|
|
if (needed < 1)
|
|
return (EINVAL);
|
|
if (vq->vq_free_cnt == 0)
|
|
return (ENOSPC);
|
|
|
|
if (vq_ring_use_indirect(vq, needed)) {
|
|
vq_ring_enqueue_indirect(vq, cookie, sg, readable, writable);
|
|
return (0);
|
|
} else if (vq->vq_free_cnt < needed)
|
|
return (EMSGSIZE);
|
|
|
|
head_idx = vq->vq_desc_head_idx;
|
|
VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
|
|
dxp = &vq->vq_descx[head_idx];
|
|
|
|
VQASSERT(vq, dxp->cookie == NULL,
|
|
"cookie already exists for index %d", head_idx);
|
|
dxp->cookie = cookie;
|
|
dxp->ndescs = needed;
|
|
|
|
idx = vq_ring_enqueue_segments(vq, vq->vq_ring.desc, head_idx,
|
|
sg, readable, writable);
|
|
|
|
vq->vq_desc_head_idx = idx;
|
|
vq->vq_free_cnt -= needed;
|
|
if (vq->vq_free_cnt == 0)
|
|
VQ_RING_ASSERT_CHAIN_TERM(vq);
|
|
else
|
|
VQ_RING_ASSERT_VALID_IDX(vq, idx);
|
|
|
|
vq_ring_update_avail(vq, head_idx);
|
|
|
|
return (0);
|
|
}
|
|
|
|
void *
|
|
virtqueue_dequeue(struct virtqueue *vq, uint32_t *len)
|
|
{
|
|
struct vring_used_elem *uep;
|
|
void *cookie;
|
|
uint16_t used_idx, desc_idx;
|
|
|
|
if (vq->vq_used_cons_idx == vq->vq_ring.used->idx)
|
|
return (NULL);
|
|
|
|
used_idx = vq->vq_used_cons_idx++ & (vq->vq_nentries - 1);
|
|
uep = &vq->vq_ring.used->ring[used_idx];
|
|
|
|
mb();
|
|
desc_idx = (uint16_t) uep->id;
|
|
if (len != NULL)
|
|
*len = uep->len;
|
|
|
|
vq_ring_free_chain(vq, desc_idx);
|
|
|
|
cookie = vq->vq_descx[desc_idx].cookie;
|
|
VQASSERT(vq, cookie != NULL, "no cookie for index %d", desc_idx);
|
|
vq->vq_descx[desc_idx].cookie = NULL;
|
|
|
|
return (cookie);
|
|
}
|
|
|
|
void *
|
|
virtqueue_poll(struct virtqueue *vq, uint32_t *len)
|
|
{
|
|
void *cookie;
|
|
|
|
while ((cookie = virtqueue_dequeue(vq, len)) == NULL)
|
|
cpu_spinwait();
|
|
|
|
return (cookie);
|
|
}
|
|
|
|
void *
|
|
virtqueue_drain(struct virtqueue *vq, int *last)
|
|
{
|
|
void *cookie;
|
|
int idx;
|
|
|
|
cookie = NULL;
|
|
idx = *last;
|
|
|
|
while (idx < vq->vq_nentries && cookie == NULL) {
|
|
if ((cookie = vq->vq_descx[idx].cookie) != NULL) {
|
|
vq->vq_descx[idx].cookie = NULL;
|
|
/* Free chain to keep free count consistent. */
|
|
vq_ring_free_chain(vq, idx);
|
|
}
|
|
idx++;
|
|
}
|
|
|
|
*last = idx;
|
|
|
|
return (cookie);
|
|
}
|
|
|
|
void
|
|
virtqueue_dump(struct virtqueue *vq)
|
|
{
|
|
|
|
if (vq == NULL)
|
|
return;
|
|
|
|
printf("VQ: %s - size=%d; free=%d; used=%d; queued=%d; "
|
|
"desc_head_idx=%d; avail.idx=%d; used_cons_idx=%d; "
|
|
"used.idx=%d; avail.flags=0x%x; used.flags=0x%x\n",
|
|
vq->vq_name, vq->vq_nentries, vq->vq_free_cnt,
|
|
virtqueue_nused(vq), vq->vq_queued_cnt, vq->vq_desc_head_idx,
|
|
vq->vq_ring.avail->idx, vq->vq_used_cons_idx,
|
|
vq->vq_ring.used->idx, vq->vq_ring.avail->flags,
|
|
vq->vq_ring.used->flags);
|
|
}
|
|
|
|
static void
|
|
vq_ring_init(struct virtqueue *vq)
|
|
{
|
|
struct vring *vr;
|
|
char *ring_mem;
|
|
int i, size;
|
|
|
|
ring_mem = vq->vq_ring_mem;
|
|
size = vq->vq_nentries;
|
|
vr = &vq->vq_ring;
|
|
|
|
vring_init(vr, size, ring_mem, vq->vq_alignment);
|
|
|
|
for (i = 0; i < size - 1; i++)
|
|
vr->desc[i].next = i + 1;
|
|
vr->desc[i].next = VQ_RING_DESC_CHAIN_END;
|
|
}
|
|
|
|
static void
|
|
vq_ring_update_avail(struct virtqueue *vq, uint16_t desc_idx)
|
|
{
|
|
uint16_t avail_idx;
|
|
|
|
/*
|
|
* Place the head of the descriptor chain into the next slot and make
|
|
* it usable to the host. The chain is made available now rather than
|
|
* deferring to virtqueue_notify() in the hopes that if the host is
|
|
* currently running on another CPU, we can keep it processing the new
|
|
* descriptor.
|
|
*/
|
|
avail_idx = vq->vq_ring.avail->idx & (vq->vq_nentries - 1);
|
|
vq->vq_ring.avail->ring[avail_idx] = desc_idx;
|
|
|
|
mb();
|
|
vq->vq_ring.avail->idx++;
|
|
|
|
/* Keep pending count until virtqueue_notify(). */
|
|
vq->vq_queued_cnt++;
|
|
}
|
|
|
|
static uint16_t
|
|
vq_ring_enqueue_segments(struct virtqueue *vq, struct vring_desc *desc,
|
|
uint16_t head_idx, struct sglist *sg, int readable, int writable)
|
|
{
|
|
struct sglist_seg *seg;
|
|
struct vring_desc *dp;
|
|
int i, needed;
|
|
uint16_t idx;
|
|
|
|
needed = readable + writable;
|
|
|
|
for (i = 0, idx = head_idx, seg = sg->sg_segs;
|
|
i < needed;
|
|
i++, idx = dp->next, seg++) {
|
|
VQASSERT(vq, idx != VQ_RING_DESC_CHAIN_END,
|
|
"premature end of free desc chain");
|
|
|
|
dp = &desc[idx];
|
|
dp->addr = seg->ss_paddr;
|
|
dp->len = seg->ss_len;
|
|
dp->flags = 0;
|
|
|
|
if (i < needed - 1)
|
|
dp->flags |= VRING_DESC_F_NEXT;
|
|
if (i >= readable)
|
|
dp->flags |= VRING_DESC_F_WRITE;
|
|
}
|
|
|
|
return (idx);
|
|
}
|
|
|
|
static int
|
|
vq_ring_use_indirect(struct virtqueue *vq, int needed)
|
|
{
|
|
|
|
if ((vq->vq_flags & VIRTQUEUE_FLAG_INDIRECT) == 0)
|
|
return (0);
|
|
|
|
if (vq->vq_max_indirect_size < needed)
|
|
return (0);
|
|
|
|
if (needed < 2)
|
|
return (0);
|
|
|
|
return (1);
|
|
}
|
|
|
|
static void
|
|
vq_ring_enqueue_indirect(struct virtqueue *vq, void *cookie,
|
|
struct sglist *sg, int readable, int writable)
|
|
{
|
|
struct vring_desc *dp;
|
|
struct vq_desc_extra *dxp;
|
|
int needed;
|
|
uint16_t head_idx;
|
|
|
|
needed = readable + writable;
|
|
VQASSERT(vq, needed <= vq->vq_max_indirect_size,
|
|
"enqueuing too many indirect descriptors");
|
|
|
|
head_idx = vq->vq_desc_head_idx;
|
|
VQ_RING_ASSERT_VALID_IDX(vq, head_idx);
|
|
dp = &vq->vq_ring.desc[head_idx];
|
|
dxp = &vq->vq_descx[head_idx];
|
|
|
|
VQASSERT(vq, dxp->cookie == NULL,
|
|
"cookie already exists for index %d", head_idx);
|
|
dxp->cookie = cookie;
|
|
dxp->ndescs = 1;
|
|
|
|
dp->addr = dxp->indirect_paddr;
|
|
dp->len = needed * sizeof(struct vring_desc);
|
|
dp->flags = VRING_DESC_F_INDIRECT;
|
|
|
|
vq_ring_enqueue_segments(vq, dxp->indirect, 0,
|
|
sg, readable, writable);
|
|
|
|
vq->vq_desc_head_idx = dp->next;
|
|
vq->vq_free_cnt--;
|
|
if (vq->vq_free_cnt == 0)
|
|
VQ_RING_ASSERT_CHAIN_TERM(vq);
|
|
else
|
|
VQ_RING_ASSERT_VALID_IDX(vq, vq->vq_desc_head_idx);
|
|
|
|
vq_ring_update_avail(vq, head_idx);
|
|
}
|
|
|
|
static int
|
|
vq_ring_must_notify_host(struct virtqueue *vq)
|
|
{
|
|
uint16_t new_idx, prev_idx, event_idx;
|
|
|
|
if (vq->vq_flags & VIRTQUEUE_FLAG_EVENT_IDX) {
|
|
new_idx = vq->vq_ring.avail->idx;
|
|
prev_idx = new_idx - vq->vq_queued_cnt;
|
|
event_idx = vring_avail_event(&vq->vq_ring);
|
|
|
|
return (vring_need_event(event_idx, new_idx, prev_idx) != 0);
|
|
}
|
|
|
|
return ((vq->vq_ring.used->flags & VRING_USED_F_NO_NOTIFY) == 0);
|
|
}
|
|
|
|
static void
|
|
vq_ring_notify_host(struct virtqueue *vq)
|
|
{
|
|
|
|
VIRTIO_BUS_NOTIFY_VQ(vq->vq_dev, vq->vq_queue_index);
|
|
}
|
|
|
|
static void
|
|
vq_ring_free_chain(struct virtqueue *vq, uint16_t desc_idx)
|
|
{
|
|
struct vring_desc *dp;
|
|
struct vq_desc_extra *dxp;
|
|
|
|
VQ_RING_ASSERT_VALID_IDX(vq, desc_idx);
|
|
dp = &vq->vq_ring.desc[desc_idx];
|
|
dxp = &vq->vq_descx[desc_idx];
|
|
|
|
if (vq->vq_free_cnt == 0)
|
|
VQ_RING_ASSERT_CHAIN_TERM(vq);
|
|
|
|
vq->vq_free_cnt += dxp->ndescs;
|
|
dxp->ndescs--;
|
|
|
|
if ((dp->flags & VRING_DESC_F_INDIRECT) == 0) {
|
|
while (dp->flags & VRING_DESC_F_NEXT) {
|
|
VQ_RING_ASSERT_VALID_IDX(vq, dp->next);
|
|
dp = &vq->vq_ring.desc[dp->next];
|
|
dxp->ndescs--;
|
|
}
|
|
}
|
|
VQASSERT(vq, dxp->ndescs == 0, "failed to free entire desc chain");
|
|
|
|
/*
|
|
* We must append the existing free chain, if any, to the end of
|
|
* newly freed chain. If the virtqueue was completely used, then
|
|
* head would be VQ_RING_DESC_CHAIN_END (ASSERTed above).
|
|
*/
|
|
dp->next = vq->vq_desc_head_idx;
|
|
vq->vq_desc_head_idx = desc_idx;
|
|
}
|