freebsd-nq/sys/dev/virtio/pci/virtio_pci.c

1357 lines
31 KiB
C
Raw Normal View History

/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
/* Driver for the VirtIO PCI interface. */
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/malloc.h>
#include <sys/endian.h>
#include <machine/bus.h>
#include <machine/resource.h>
#include <sys/bus.h>
#include <sys/rman.h>
#include <dev/pci/pcivar.h>
#include <dev/pci/pcireg.h>
#include <dev/virtio/virtio.h>
#include <dev/virtio/virtqueue.h>
#include <dev/virtio/pci/virtio_pci.h>
#include "virtio_bus_if.h"
#include "virtio_if.h"
struct vtpci_interrupt {
struct resource *vti_irq;
int vti_rid;
void *vti_handler;
};
struct vtpci_virtqueue {
struct virtqueue *vtv_vq;
int vtv_no_intr;
};
struct vtpci_softc {
device_t vtpci_dev;
struct resource *vtpci_res;
struct resource *vtpci_msix_res;
uint64_t vtpci_features;
uint32_t vtpci_flags;
#define VTPCI_FLAG_NO_MSI 0x0001
#define VTPCI_FLAG_NO_MSIX 0x0002
#define VTPCI_FLAG_LEGACY 0x1000
#define VTPCI_FLAG_MSI 0x2000
#define VTPCI_FLAG_MSIX 0x4000
#define VTPCI_FLAG_SHARED_MSIX 0x8000
#define VTPCI_FLAG_ITYPE_MASK 0xF000
/* This "bus" will only ever have one child. */
device_t vtpci_child_dev;
struct virtio_feature_desc *vtpci_child_feat_desc;
int vtpci_nvqs;
struct vtpci_virtqueue *vtpci_vqs;
/*
* Ideally, each virtqueue that the driver provides a callback for will
* receive its own MSIX vector. If there are not sufficient vectors
* available, then attempt to have all the VQs share one vector. For
* MSIX, the configuration changed notifications must be on their own
* vector.
*
* If MSIX is not available, we will attempt to have the whole device
* share one MSI vector, and then, finally, one legacy interrupt.
*/
struct vtpci_interrupt vtpci_device_interrupt;
struct vtpci_interrupt *vtpci_msix_vq_interrupts;
int vtpci_nmsix_resources;
};
static int vtpci_probe(device_t);
static int vtpci_attach(device_t);
static int vtpci_detach(device_t);
static int vtpci_suspend(device_t);
static int vtpci_resume(device_t);
static int vtpci_shutdown(device_t);
static void vtpci_driver_added(device_t, driver_t *);
static void vtpci_child_detached(device_t, device_t);
static int vtpci_read_ivar(device_t, device_t, int, uintptr_t *);
static int vtpci_write_ivar(device_t, device_t, int, uintptr_t);
static uint64_t vtpci_negotiate_features(device_t, uint64_t);
static int vtpci_with_feature(device_t, uint64_t);
static int vtpci_alloc_virtqueues(device_t, int, int,
struct vq_alloc_info *);
static int vtpci_setup_intr(device_t, enum intr_type);
static void vtpci_stop(device_t);
static int vtpci_reinit(device_t, uint64_t);
static void vtpci_reinit_complete(device_t);
static void vtpci_notify_virtqueue(device_t, uint16_t);
static uint8_t vtpci_get_status(device_t);
static void vtpci_set_status(device_t, uint8_t);
static void vtpci_read_dev_config(device_t, bus_size_t, void *, int);
static void vtpci_write_dev_config(device_t, bus_size_t, void *, int);
static void vtpci_describe_features(struct vtpci_softc *, const char *,
uint64_t);
static void vtpci_probe_and_attach_child(struct vtpci_softc *);
static int vtpci_alloc_msix(struct vtpci_softc *, int);
static int vtpci_alloc_msi(struct vtpci_softc *);
static int vtpci_alloc_intr_msix_pervq(struct vtpci_softc *);
static int vtpci_alloc_intr_msix_shared(struct vtpci_softc *);
static int vtpci_alloc_intr_msi(struct vtpci_softc *);
static int vtpci_alloc_intr_legacy(struct vtpci_softc *);
static int vtpci_alloc_interrupt(struct vtpci_softc *, int, int,
struct vtpci_interrupt *);
static int vtpci_alloc_intr_resources(struct vtpci_softc *);
static int vtpci_setup_legacy_interrupt(struct vtpci_softc *,
enum intr_type);
static int vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *,
enum intr_type);
static int vtpci_setup_msix_interrupts(struct vtpci_softc *,
enum intr_type);
static int vtpci_setup_interrupts(struct vtpci_softc *, enum intr_type);
static int vtpci_register_msix_vector(struct vtpci_softc *, int,
struct vtpci_interrupt *);
static int vtpci_set_host_msix_vectors(struct vtpci_softc *);
static int vtpci_reinit_virtqueue(struct vtpci_softc *, int);
static void vtpci_free_interrupt(struct vtpci_softc *,
struct vtpci_interrupt *);
static void vtpci_free_interrupts(struct vtpci_softc *);
static void vtpci_free_virtqueues(struct vtpci_softc *);
static void vtpci_release_child_resources(struct vtpci_softc *);
static void vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *);
static void vtpci_reset(struct vtpci_softc *);
static void vtpci_select_virtqueue(struct vtpci_softc *, int);
static void vtpci_legacy_intr(void *);
static int vtpci_vq_shared_intr_filter(void *);
static void vtpci_vq_shared_intr(void *);
static int vtpci_vq_intr_filter(void *);
static void vtpci_vq_intr(void *);
static void vtpci_config_intr(void *);
#define vtpci_setup_msi_interrupt vtpci_setup_legacy_interrupt
#define VIRTIO_PCI_CONFIG(_sc) \
VIRTIO_PCI_CONFIG_OFF((((_sc)->vtpci_flags & VTPCI_FLAG_MSIX)) != 0)
/*
* I/O port read/write wrappers.
*/
#define vtpci_read_config_1(sc, o) bus_read_1((sc)->vtpci_res, (o))
#define vtpci_read_config_2(sc, o) bus_read_2((sc)->vtpci_res, (o))
#define vtpci_read_config_4(sc, o) bus_read_4((sc)->vtpci_res, (o))
#define vtpci_write_config_1(sc, o, v) bus_write_1((sc)->vtpci_res, (o), (v))
#define vtpci_write_config_2(sc, o, v) bus_write_2((sc)->vtpci_res, (o), (v))
#define vtpci_write_config_4(sc, o, v) bus_write_4((sc)->vtpci_res, (o), (v))
/*
* Legacy VirtIO header is always PCI endianness (little), so if we
* are in a BE machine we need to swap bytes from LE to BE when reading
* and from BE to LE when writing.
* If we are in a LE machine, there will be no swaps.
*/
#define vtpci_read_header_2(sc, o) le16toh(vtpci_read_config_2(sc, o))
#define vtpci_read_header_4(sc, o) le32toh(vtpci_read_config_4(sc, o))
#define vtpci_write_header_2(sc, o, v) vtpci_write_config_2(sc, o, (htole16(v)))
#define vtpci_write_header_4(sc, o, v) vtpci_write_config_4(sc, o, (htole32(v)))
/* Tunables. */
static int vtpci_disable_msix = 0;
TUNABLE_INT("hw.virtio.pci.disable_msix", &vtpci_disable_msix);
static device_method_t vtpci_methods[] = {
/* Device interface. */
DEVMETHOD(device_probe, vtpci_probe),
DEVMETHOD(device_attach, vtpci_attach),
DEVMETHOD(device_detach, vtpci_detach),
DEVMETHOD(device_suspend, vtpci_suspend),
DEVMETHOD(device_resume, vtpci_resume),
DEVMETHOD(device_shutdown, vtpci_shutdown),
/* Bus interface. */
DEVMETHOD(bus_driver_added, vtpci_driver_added),
DEVMETHOD(bus_child_detached, vtpci_child_detached),
DEVMETHOD(bus_child_pnpinfo_str, virtio_child_pnpinfo_str),
DEVMETHOD(bus_read_ivar, vtpci_read_ivar),
DEVMETHOD(bus_write_ivar, vtpci_write_ivar),
/* VirtIO bus interface. */
DEVMETHOD(virtio_bus_negotiate_features, vtpci_negotiate_features),
DEVMETHOD(virtio_bus_with_feature, vtpci_with_feature),
DEVMETHOD(virtio_bus_alloc_virtqueues, vtpci_alloc_virtqueues),
DEVMETHOD(virtio_bus_setup_intr, vtpci_setup_intr),
DEVMETHOD(virtio_bus_stop, vtpci_stop),
DEVMETHOD(virtio_bus_reinit, vtpci_reinit),
DEVMETHOD(virtio_bus_reinit_complete, vtpci_reinit_complete),
DEVMETHOD(virtio_bus_notify_vq, vtpci_notify_virtqueue),
DEVMETHOD(virtio_bus_read_device_config, vtpci_read_dev_config),
DEVMETHOD(virtio_bus_write_device_config, vtpci_write_dev_config),
Catch up with Bryan Venteicher's virtio git repo: a8af6270bd96be6ccd86f70b60fa6512b710e4f0 virtio_blk: Include function name in panic string cbdb03a694b76c5253d7ae3a59b9995b9afbb67a virtio_balloon: Do the notify outside of the lock By the time we return from virtqueue_notify(), the descriptor will be in the used ring so we shouldn't have to sleep. 10ba392e60692529a5cbc1e9987e4064e0128447 virtio: Use DEVMETHOD_END 80cbcc4d6552cac758be67f0c99c36f23ce62110 virtqueue: Add support for VIRTIO_F_RING_EVENT_IDX This can be used to reduce the number of guest/host and host/guest interrupts by delaying the interrupt until a certain index value is reached. Actual use by the network driver will come along later. 8fc465969acc0c58477153e4c3530390db436c02 virtqueue: Simplify virtqueue_nused() Since the values just wrap naturally at UINT16_MAX, we can just subtract the two values directly, rather than doing 2's complement math. a8aa22f25959e2767d006cd621b69050e7ffb0ae virtio_blk: Remove debugging crud from 75dd732a There seems to be an issue with Qemu (or FreeBSD VirtIO) that sets the PCI register space for the device config to bogus values. This only seems to happen after unloading and reloading the module. d404800661cb2a9769c033f8a50b2133934501aa virtio_blk: Use better variable name 75dd732a97743d96e7c63f7ced3c2169696dadd3 virtio_blk: Partially revert 92ba40e65 Just use the virtqueue to determine if any requests are still inflight. 06661ed66b7a9efaea240f99f414c368f1bbcdc7 virtio_blk: error if allowed too few segments Should never happen unless the host provides use with a bogus seg_max value. 4b33e5085bc87a818433d7e664a0a2c8f56a1a89 virtio_blk: Sort function declarations 426b9f5cac892c9c64cc7631966461514f7e08c6 virtio_blk: Cleanup whitespace 617c23e12c61e3c2233d942db713c6b8ff0bd112 virtio_blk: Call disk_err() on error'd completed requests 081a5712d4b2e0abf273be4d26affcf3870263a9 virtio_blk: ASSERT the ready and inflight request queues are empty a9be2631a4f770a84145c18ee03a3f103bed4ca8 virtio_blk: Simplify check for too many segments At the cost of a small style violation. e00ec09da014f2e60cc75542d0ab78898672d521 virtio_blk: Add beginnings of suspend/resume Still not sure if we need to virtio_stop()/virtio_reinit() the device before/after a suspend. Don't start additional IO when marked as suspending. 47c71dc6ce8c238aa59ce8afd4bda5aa294bc884 virtio_blk: Panic when dealt an unhandled BIO cmd 1055544f90fb8c0cc6a2395f5b6104039606aafe virtio_blk: Add VQ enqueue/dequeue wrappers Wrapper functions managed the added/removing to the in-flight list of requests. Normally biodone() any completed IO when draining the virtqueue. 92ba40e65b3bb5e4acb9300ece711f1ea8f3f7f4 virtio_blk: Add in-flight list of requests 74f6d260e075443544522c0833dc2712dd93f49b virtio_blk: Rename VTBLK_FLAG_DETACHING to VTBLK_FLAG_DETACH 7aa549050f6fc6551c09c6362ed6b2a0728956ef virtio_blk: Finish all BIOs through vtblk_finish_bio() Also properly set bio_resid in the case of errors. Most geom_disk providers seem to do the same. 9eef6d0e6f7e5dd362f71ba097f2e2e4c3744882 Added function to translate VirtIO status to error code ef06adc337f31e1129d6d5f26de6d8d1be27bcd2 Reset dumping flag when given unexpected parameters 393b3e390c644193a2e392220dcc6a6c50b212d9 Added missing VTBLK_LOCK() in dump handler Obtained from: Bryan Venteicher bryanv at daemoninthecloset dot org
2012-04-14 05:48:04 +00:00
DEVMETHOD_END
};
static driver_t vtpci_driver = {
"virtio_pci",
vtpci_methods,
sizeof(struct vtpci_softc)
};
devclass_t vtpci_devclass;
DRIVER_MODULE(virtio_pci, pci, vtpci_driver, vtpci_devclass, 0, 0);
MODULE_VERSION(virtio_pci, 1);
MODULE_DEPEND(virtio_pci, pci, 1, 1, 1);
MODULE_DEPEND(virtio_pci, virtio, 1, 1, 1);
static int
vtpci_probe(device_t dev)
{
char desc[36];
const char *name;
if (pci_get_vendor(dev) != VIRTIO_PCI_VENDORID)
return (ENXIO);
if (pci_get_device(dev) < VIRTIO_PCI_DEVICEID_MIN ||
pci_get_device(dev) > VIRTIO_PCI_DEVICEID_MAX)
return (ENXIO);
if (pci_get_revid(dev) != VIRTIO_PCI_ABI_VERSION)
return (ENXIO);
name = virtio_device_name(pci_get_subdevice(dev));
if (name == NULL)
name = "Unknown";
snprintf(desc, sizeof(desc), "VirtIO PCI %s adapter", name);
device_set_desc_copy(dev, desc);
return (BUS_PROBE_DEFAULT);
}
static int
vtpci_attach(device_t dev)
{
struct vtpci_softc *sc;
device_t child;
int rid;
sc = device_get_softc(dev);
sc->vtpci_dev = dev;
pci_enable_busmaster(dev);
rid = PCIR_BAR(0);
sc->vtpci_res = bus_alloc_resource_any(dev, SYS_RES_IOPORT, &rid,
RF_ACTIVE);
if (sc->vtpci_res == NULL) {
device_printf(dev, "cannot map I/O space\n");
return (ENXIO);
}
/*
* For legacy VirtIO, the device-specific configuration is guest
* endian, while the common configuration header is always
* PCI (little) endian and will be handled specifically in
* other parts of this file via functions
* 'vtpci_[read|write]_header_[2|4]'
*/
#if _BYTE_ORDER == _BIG_ENDIAN
rman_set_bustag(sc->vtpci_res, &bs_be_tag);
#endif
if (pci_find_cap(dev, PCIY_MSI, NULL) != 0)
sc->vtpci_flags |= VTPCI_FLAG_NO_MSI;
if (pci_find_cap(dev, PCIY_MSIX, NULL) == 0) {
rid = PCIR_BAR(1);
sc->vtpci_msix_res = bus_alloc_resource_any(dev,
SYS_RES_MEMORY, &rid, RF_ACTIVE);
}
if (sc->vtpci_msix_res == NULL)
sc->vtpci_flags |= VTPCI_FLAG_NO_MSIX;
vtpci_reset(sc);
/* Tell the host we've noticed this device. */
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
if ((child = device_add_child(dev, NULL, -1)) == NULL) {
device_printf(dev, "cannot create child device\n");
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
vtpci_detach(dev);
return (ENOMEM);
}
sc->vtpci_child_dev = child;
vtpci_probe_and_attach_child(sc);
return (0);
}
static int
vtpci_detach(device_t dev)
{
struct vtpci_softc *sc;
device_t child;
int error;
sc = device_get_softc(dev);
if ((child = sc->vtpci_child_dev) != NULL) {
error = device_delete_child(dev, child);
if (error)
return (error);
sc->vtpci_child_dev = NULL;
}
vtpci_reset(sc);
if (sc->vtpci_msix_res != NULL) {
bus_release_resource(dev, SYS_RES_MEMORY, PCIR_BAR(1),
sc->vtpci_msix_res);
sc->vtpci_msix_res = NULL;
}
if (sc->vtpci_res != NULL) {
bus_release_resource(dev, SYS_RES_IOPORT, PCIR_BAR(0),
sc->vtpci_res);
sc->vtpci_res = NULL;
}
return (0);
}
static int
vtpci_suspend(device_t dev)
{
return (bus_generic_suspend(dev));
}
static int
vtpci_resume(device_t dev)
{
return (bus_generic_resume(dev));
}
static int
vtpci_shutdown(device_t dev)
{
(void) bus_generic_shutdown(dev);
/* Forcibly stop the host device. */
vtpci_stop(dev);
return (0);
}
static void
vtpci_driver_added(device_t dev, driver_t *driver)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
vtpci_probe_and_attach_child(sc);
}
static void
vtpci_child_detached(device_t dev, device_t child)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
vtpci_reset(sc);
vtpci_release_child_resources(sc);
}
static int
vtpci_read_ivar(device_t dev, device_t child, int index, uintptr_t *result)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
if (sc->vtpci_child_dev != child)
return (ENOENT);
switch (index) {
case VIRTIO_IVAR_DEVTYPE:
case VIRTIO_IVAR_SUBDEVICE:
*result = pci_get_subdevice(dev);
break;
case VIRTIO_IVAR_VENDOR:
*result = pci_get_vendor(dev);
break;
case VIRTIO_IVAR_DEVICE:
*result = pci_get_device(dev);
break;
case VIRTIO_IVAR_SUBVENDOR:
*result = pci_get_subvendor(dev);
break;
default:
return (ENOENT);
}
return (0);
}
static int
vtpci_write_ivar(device_t dev, device_t child, int index, uintptr_t value)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
if (sc->vtpci_child_dev != child)
return (ENOENT);
switch (index) {
case VIRTIO_IVAR_FEATURE_DESC:
sc->vtpci_child_feat_desc = (void *) value;
break;
default:
return (ENOENT);
}
return (0);
}
static uint64_t
vtpci_negotiate_features(device_t dev, uint64_t child_features)
{
struct vtpci_softc *sc;
uint64_t host_features, features;
sc = device_get_softc(dev);
host_features = vtpci_read_header_4(sc, VIRTIO_PCI_HOST_FEATURES);
vtpci_describe_features(sc, "host", host_features);
/*
* Limit negotiated features to what the driver, virtqueue, and
* host all support.
*/
features = host_features & child_features;
features = virtqueue_filter_features(features);
sc->vtpci_features = features;
vtpci_describe_features(sc, "negotiated", features);
vtpci_write_header_4(sc, VIRTIO_PCI_GUEST_FEATURES, features);
return (features);
}
static int
vtpci_with_feature(device_t dev, uint64_t feature)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
return ((sc->vtpci_features & feature) != 0);
}
static int
vtpci_alloc_virtqueues(device_t dev, int flags, int nvqs,
struct vq_alloc_info *vq_info)
{
struct vtpci_softc *sc;
struct virtqueue *vq;
struct vtpci_virtqueue *vqx;
struct vq_alloc_info *info;
int idx, error;
uint16_t size;
sc = device_get_softc(dev);
if (sc->vtpci_nvqs != 0)
return (EALREADY);
if (nvqs <= 0)
return (EINVAL);
sc->vtpci_vqs = malloc(nvqs * sizeof(struct vtpci_virtqueue),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (sc->vtpci_vqs == NULL)
return (ENOMEM);
for (idx = 0; idx < nvqs; idx++) {
vqx = &sc->vtpci_vqs[idx];
info = &vq_info[idx];
vtpci_select_virtqueue(sc, idx);
size = vtpci_read_header_2(sc, VIRTIO_PCI_QUEUE_NUM);
error = virtqueue_alloc(dev, idx, size, VIRTIO_PCI_VRING_ALIGN,
~(vm_paddr_t)0, info, &vq);
if (error) {
device_printf(dev,
"cannot allocate virtqueue %d: %d\n", idx, error);
break;
}
vtpci_write_header_4(sc, VIRTIO_PCI_QUEUE_PFN,
virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
vqx->vtv_vq = *info->vqai_vq = vq;
vqx->vtv_no_intr = info->vqai_intr == NULL;
sc->vtpci_nvqs++;
}
if (error)
vtpci_free_virtqueues(sc);
return (error);
}
static int
vtpci_setup_intr(device_t dev, enum intr_type type)
{
struct vtpci_softc *sc;
int attempt, error;
sc = device_get_softc(dev);
for (attempt = 0; attempt < 5; attempt++) {
/*
* Start with the most desirable interrupt configuration and
* fallback towards less desirable ones.
*/
switch (attempt) {
case 0:
error = vtpci_alloc_intr_msix_pervq(sc);
break;
case 1:
error = vtpci_alloc_intr_msix_shared(sc);
break;
case 2:
error = vtpci_alloc_intr_msi(sc);
break;
case 3:
error = vtpci_alloc_intr_legacy(sc);
break;
default:
device_printf(dev,
"exhausted all interrupt allocation attempts\n");
return (ENXIO);
}
if (error == 0 && vtpci_setup_interrupts(sc, type) == 0)
break;
vtpci_cleanup_setup_intr_attempt(sc);
}
if (bootverbose) {
if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
device_printf(dev, "using legacy interrupt\n");
else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
device_printf(dev, "using MSI interrupt\n");
else if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX)
device_printf(dev, "using shared MSIX interrupts\n");
else
device_printf(dev, "using per VQ MSIX interrupts\n");
}
return (0);
}
static void
vtpci_stop(device_t dev)
{
vtpci_reset(device_get_softc(dev));
}
static int
vtpci_reinit(device_t dev, uint64_t features)
{
struct vtpci_softc *sc;
int idx, error;
sc = device_get_softc(dev);
/*
* Redrive the device initialization. This is a bit of an abuse of
* the specification, but VirtualBox, QEMU/KVM, and BHyVe seem to
* play nice.
*
* We do not allow the host device to change from what was originally
* negotiated beyond what the guest driver changed. MSIX state should
* not change, number of virtqueues and their size remain the same, etc.
* This will need to be rethought when we want to support migration.
*/
if (vtpci_get_status(dev) != VIRTIO_CONFIG_STATUS_RESET)
vtpci_stop(dev);
/*
* Quickly drive the status through ACK and DRIVER. The device
* does not become usable again until vtpci_reinit_complete().
*/
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
vtpci_negotiate_features(dev, features);
for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
error = vtpci_reinit_virtqueue(sc, idx);
if (error)
return (error);
}
if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
error = vtpci_set_host_msix_vectors(sc);
if (error)
return (error);
}
return (0);
}
static void
vtpci_reinit_complete(device_t dev)
{
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
}
static void
vtpci_notify_virtqueue(device_t dev, uint16_t queue)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
vtpci_write_header_2(sc, VIRTIO_PCI_QUEUE_NOTIFY, queue);
}
static uint8_t
vtpci_get_status(device_t dev)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
return (vtpci_read_config_1(sc, VIRTIO_PCI_STATUS));
}
static void
vtpci_set_status(device_t dev, uint8_t status)
{
struct vtpci_softc *sc;
sc = device_get_softc(dev);
if (status != VIRTIO_CONFIG_STATUS_RESET)
status |= vtpci_get_status(dev);
vtpci_write_config_1(sc, VIRTIO_PCI_STATUS, status);
}
static void
vtpci_read_dev_config(device_t dev, bus_size_t offset,
void *dst, int length)
{
struct vtpci_softc *sc;
bus_size_t off;
uint8_t *d;
int size;
sc = device_get_softc(dev);
off = VIRTIO_PCI_CONFIG(sc) + offset;
for (d = dst; length > 0; d += size, off += size, length -= size) {
if (length >= 4) {
size = 4;
*(uint32_t *)d = vtpci_read_config_4(sc, off);
} else if (length >= 2) {
size = 2;
*(uint16_t *)d = vtpci_read_config_2(sc, off);
} else {
size = 1;
*d = vtpci_read_config_1(sc, off);
}
}
}
static void
vtpci_write_dev_config(device_t dev, bus_size_t offset,
void *src, int length)
{
struct vtpci_softc *sc;
bus_size_t off;
uint8_t *s;
int size;
sc = device_get_softc(dev);
off = VIRTIO_PCI_CONFIG(sc) + offset;
for (s = src; length > 0; s += size, off += size, length -= size) {
if (length >= 4) {
size = 4;
vtpci_write_config_4(sc, off, *(uint32_t *)s);
} else if (length >= 2) {
size = 2;
vtpci_write_config_2(sc, off, *(uint16_t *)s);
} else {
size = 1;
vtpci_write_config_1(sc, off, *s);
}
}
}
static void
vtpci_describe_features(struct vtpci_softc *sc, const char *msg,
uint64_t features)
{
device_t dev, child;
dev = sc->vtpci_dev;
child = sc->vtpci_child_dev;
if (device_is_attached(child) || bootverbose == 0)
return;
virtio_describe(dev, msg, features, sc->vtpci_child_feat_desc);
}
static void
vtpci_probe_and_attach_child(struct vtpci_softc *sc)
{
device_t dev, child;
dev = sc->vtpci_dev;
child = sc->vtpci_child_dev;
if (child == NULL)
return;
if (device_get_state(child) != DS_NOTPRESENT)
return;
if (device_probe(child) != 0)
return;
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER);
if (device_attach(child) != 0) {
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_FAILED);
vtpci_reset(sc);
vtpci_release_child_resources(sc);
/* Reset status for future attempt. */
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_ACK);
} else {
vtpci_set_status(dev, VIRTIO_CONFIG_STATUS_DRIVER_OK);
VIRTIO_ATTACH_COMPLETED(child);
}
}
static int
vtpci_alloc_msix(struct vtpci_softc *sc, int nvectors)
{
device_t dev;
int nmsix, cnt, required;
dev = sc->vtpci_dev;
/* Allocate an additional vector for the config changes. */
required = nvectors + 1;
nmsix = pci_msix_count(dev);
if (nmsix < required)
return (1);
cnt = required;
if (pci_alloc_msix(dev, &cnt) == 0 && cnt >= required) {
sc->vtpci_nmsix_resources = required;
return (0);
}
pci_release_msi(dev);
return (1);
}
static int
vtpci_alloc_msi(struct vtpci_softc *sc)
{
device_t dev;
int nmsi, cnt, required;
dev = sc->vtpci_dev;
required = 1;
nmsi = pci_msi_count(dev);
if (nmsi < required)
return (1);
cnt = required;
if (pci_alloc_msi(dev, &cnt) == 0 && cnt >= required)
return (0);
pci_release_msi(dev);
return (1);
}
static int
vtpci_alloc_intr_msix_pervq(struct vtpci_softc *sc)
{
int i, nvectors, error;
if (vtpci_disable_msix != 0 ||
sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
return (ENOTSUP);
for (nvectors = 0, i = 0; i < sc->vtpci_nvqs; i++) {
if (sc->vtpci_vqs[i].vtv_no_intr == 0)
nvectors++;
}
error = vtpci_alloc_msix(sc, nvectors);
if (error)
return (error);
sc->vtpci_flags |= VTPCI_FLAG_MSIX;
return (0);
}
static int
vtpci_alloc_intr_msix_shared(struct vtpci_softc *sc)
{
int error;
if (vtpci_disable_msix != 0 ||
sc->vtpci_flags & VTPCI_FLAG_NO_MSIX)
return (ENOTSUP);
error = vtpci_alloc_msix(sc, 1);
if (error)
return (error);
sc->vtpci_flags |= VTPCI_FLAG_MSIX | VTPCI_FLAG_SHARED_MSIX;
return (0);
}
static int
vtpci_alloc_intr_msi(struct vtpci_softc *sc)
{
int error;
/* Only BHyVe supports MSI. */
if (sc->vtpci_flags & VTPCI_FLAG_NO_MSI)
return (ENOTSUP);
error = vtpci_alloc_msi(sc);
if (error)
return (error);
sc->vtpci_flags |= VTPCI_FLAG_MSI;
return (0);
}
static int
vtpci_alloc_intr_legacy(struct vtpci_softc *sc)
{
sc->vtpci_flags |= VTPCI_FLAG_LEGACY;
return (0);
}
static int
vtpci_alloc_interrupt(struct vtpci_softc *sc, int rid, int flags,
struct vtpci_interrupt *intr)
{
struct resource *irq;
irq = bus_alloc_resource_any(sc->vtpci_dev, SYS_RES_IRQ, &rid, flags);
if (irq == NULL)
return (ENXIO);
intr->vti_irq = irq;
intr->vti_rid = rid;
return (0);
}
static int
vtpci_alloc_intr_resources(struct vtpci_softc *sc)
{
struct vtpci_interrupt *intr;
int i, rid, flags, nvq_intrs, error;
rid = 0;
flags = RF_ACTIVE;
if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
flags |= RF_SHAREABLE;
else
rid = 1;
/*
* For legacy and MSI interrupts, this single resource handles all
* interrupts. For MSIX, this resource is used for the configuration
* changed interrupt.
*/
intr = &sc->vtpci_device_interrupt;
error = vtpci_alloc_interrupt(sc, rid, flags, intr);
if (error || sc->vtpci_flags & (VTPCI_FLAG_LEGACY | VTPCI_FLAG_MSI))
return (error);
/* Subtract one for the configuration changed interrupt. */
nvq_intrs = sc->vtpci_nmsix_resources - 1;
intr = sc->vtpci_msix_vq_interrupts = malloc(nvq_intrs *
sizeof(struct vtpci_interrupt), M_DEVBUF, M_NOWAIT | M_ZERO);
if (sc->vtpci_msix_vq_interrupts == NULL)
return (ENOMEM);
for (i = 0, rid++; i < nvq_intrs; i++, rid++, intr++) {
error = vtpci_alloc_interrupt(sc, rid, flags, intr);
if (error)
return (error);
}
return (0);
}
static int
vtpci_setup_legacy_interrupt(struct vtpci_softc *sc, enum intr_type type)
{
struct vtpci_interrupt *intr;
int error;
intr = &sc->vtpci_device_interrupt;
error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type, NULL,
vtpci_legacy_intr, sc, &intr->vti_handler);
return (error);
}
static int
vtpci_setup_pervq_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
{
struct vtpci_virtqueue *vqx;
struct vtpci_interrupt *intr;
int i, error;
intr = sc->vtpci_msix_vq_interrupts;
for (i = 0; i < sc->vtpci_nvqs; i++) {
vqx = &sc->vtpci_vqs[i];
if (vqx->vtv_no_intr)
continue;
error = bus_setup_intr(sc->vtpci_dev, intr->vti_irq, type,
vtpci_vq_intr_filter, vtpci_vq_intr, vqx->vtv_vq,
&intr->vti_handler);
if (error)
return (error);
intr++;
}
return (0);
}
static int
vtpci_setup_msix_interrupts(struct vtpci_softc *sc, enum intr_type type)
{
device_t dev;
struct vtpci_interrupt *intr;
int error;
dev = sc->vtpci_dev;
intr = &sc->vtpci_device_interrupt;
error = bus_setup_intr(dev, intr->vti_irq, type, NULL,
vtpci_config_intr, sc, &intr->vti_handler);
if (error)
return (error);
if (sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) {
intr = sc->vtpci_msix_vq_interrupts;
error = bus_setup_intr(dev, intr->vti_irq, type,
vtpci_vq_shared_intr_filter, vtpci_vq_shared_intr, sc,
&intr->vti_handler);
} else
error = vtpci_setup_pervq_msix_interrupts(sc, type);
return (error ? error : vtpci_set_host_msix_vectors(sc));
}
static int
vtpci_setup_interrupts(struct vtpci_softc *sc, enum intr_type type)
{
int error;
type |= INTR_MPSAFE;
KASSERT(sc->vtpci_flags & VTPCI_FLAG_ITYPE_MASK,
("%s: no interrupt type selected %#x", __func__, sc->vtpci_flags));
error = vtpci_alloc_intr_resources(sc);
if (error)
return (error);
if (sc->vtpci_flags & VTPCI_FLAG_LEGACY)
error = vtpci_setup_legacy_interrupt(sc, type);
else if (sc->vtpci_flags & VTPCI_FLAG_MSI)
error = vtpci_setup_msi_interrupt(sc, type);
else
error = vtpci_setup_msix_interrupts(sc, type);
return (error);
}
static int
vtpci_register_msix_vector(struct vtpci_softc *sc, int offset,
struct vtpci_interrupt *intr)
{
device_t dev;
uint16_t vector;
dev = sc->vtpci_dev;
if (intr != NULL) {
/* Map from guest rid to host vector. */
vector = intr->vti_rid - 1;
} else
vector = VIRTIO_MSI_NO_VECTOR;
vtpci_write_header_2(sc, offset, vector);
/* Read vector to determine if the host had sufficient resources. */
if (vtpci_read_header_2(sc, offset) != vector) {
device_printf(dev,
"insufficient host resources for MSIX interrupts\n");
return (ENODEV);
}
return (0);
}
static int
vtpci_set_host_msix_vectors(struct vtpci_softc *sc)
{
struct vtpci_interrupt *intr, *tintr;
int idx, offset, error;
intr = &sc->vtpci_device_interrupt;
offset = VIRTIO_MSI_CONFIG_VECTOR;
error = vtpci_register_msix_vector(sc, offset, intr);
if (error)
return (error);
intr = sc->vtpci_msix_vq_interrupts;
offset = VIRTIO_MSI_QUEUE_VECTOR;
for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
vtpci_select_virtqueue(sc, idx);
if (sc->vtpci_vqs[idx].vtv_no_intr)
tintr = NULL;
else
tintr = intr;
error = vtpci_register_msix_vector(sc, offset, tintr);
if (error)
break;
/*
* For shared MSIX, all the virtqueues share the first
* interrupt.
*/
if (!sc->vtpci_vqs[idx].vtv_no_intr &&
(sc->vtpci_flags & VTPCI_FLAG_SHARED_MSIX) == 0)
intr++;
}
return (error);
}
static int
vtpci_reinit_virtqueue(struct vtpci_softc *sc, int idx)
{
struct vtpci_virtqueue *vqx;
struct virtqueue *vq;
int error;
uint16_t size;
vqx = &sc->vtpci_vqs[idx];
vq = vqx->vtv_vq;
KASSERT(vq != NULL, ("%s: vq %d not allocated", __func__, idx));
vtpci_select_virtqueue(sc, idx);
size = vtpci_read_header_2(sc, VIRTIO_PCI_QUEUE_NUM);
error = virtqueue_reinit(vq, size);
if (error)
return (error);
vtpci_write_header_4(sc, VIRTIO_PCI_QUEUE_PFN,
virtqueue_paddr(vq) >> VIRTIO_PCI_QUEUE_ADDR_SHIFT);
return (0);
}
static void
vtpci_free_interrupt(struct vtpci_softc *sc, struct vtpci_interrupt *intr)
{
device_t dev;
dev = sc->vtpci_dev;
if (intr->vti_handler != NULL) {
bus_teardown_intr(dev, intr->vti_irq, intr->vti_handler);
intr->vti_handler = NULL;
}
if (intr->vti_irq != NULL) {
bus_release_resource(dev, SYS_RES_IRQ, intr->vti_rid,
intr->vti_irq);
intr->vti_irq = NULL;
intr->vti_rid = -1;
}
}
static void
vtpci_free_interrupts(struct vtpci_softc *sc)
{
struct vtpci_interrupt *intr;
int i, nvq_intrs;
vtpci_free_interrupt(sc, &sc->vtpci_device_interrupt);
if (sc->vtpci_nmsix_resources != 0) {
nvq_intrs = sc->vtpci_nmsix_resources - 1;
sc->vtpci_nmsix_resources = 0;
intr = sc->vtpci_msix_vq_interrupts;
if (intr != NULL) {
for (i = 0; i < nvq_intrs; i++, intr++)
vtpci_free_interrupt(sc, intr);
free(sc->vtpci_msix_vq_interrupts, M_DEVBUF);
sc->vtpci_msix_vq_interrupts = NULL;
}
}
if (sc->vtpci_flags & (VTPCI_FLAG_MSI | VTPCI_FLAG_MSIX))
pci_release_msi(sc->vtpci_dev);
sc->vtpci_flags &= ~VTPCI_FLAG_ITYPE_MASK;
}
static void
vtpci_free_virtqueues(struct vtpci_softc *sc)
{
struct vtpci_virtqueue *vqx;
int idx;
for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
vqx = &sc->vtpci_vqs[idx];
vtpci_select_virtqueue(sc, idx);
vtpci_write_header_4(sc, VIRTIO_PCI_QUEUE_PFN, 0);
virtqueue_free(vqx->vtv_vq);
vqx->vtv_vq = NULL;
}
free(sc->vtpci_vqs, M_DEVBUF);
sc->vtpci_vqs = NULL;
sc->vtpci_nvqs = 0;
}
static void
vtpci_release_child_resources(struct vtpci_softc *sc)
{
vtpci_free_interrupts(sc);
vtpci_free_virtqueues(sc);
}
static void
vtpci_cleanup_setup_intr_attempt(struct vtpci_softc *sc)
{
int idx;
if (sc->vtpci_flags & VTPCI_FLAG_MSIX) {
vtpci_write_header_2(sc, VIRTIO_MSI_CONFIG_VECTOR,
VIRTIO_MSI_NO_VECTOR);
for (idx = 0; idx < sc->vtpci_nvqs; idx++) {
vtpci_select_virtqueue(sc, idx);
vtpci_write_header_2(sc, VIRTIO_MSI_QUEUE_VECTOR,
VIRTIO_MSI_NO_VECTOR);
}
}
vtpci_free_interrupts(sc);
}
static void
vtpci_reset(struct vtpci_softc *sc)
{
/*
* Setting the status to RESET sets the host device to
* the original, uninitialized state.
*/
vtpci_set_status(sc->vtpci_dev, VIRTIO_CONFIG_STATUS_RESET);
}
static void
vtpci_select_virtqueue(struct vtpci_softc *sc, int idx)
{
vtpci_write_header_2(sc, VIRTIO_PCI_QUEUE_SEL, idx);
}
static void
vtpci_legacy_intr(void *xsc)
{
struct vtpci_softc *sc;
struct vtpci_virtqueue *vqx;
int i;
uint8_t isr;
sc = xsc;
vqx = &sc->vtpci_vqs[0];
/* Reading the ISR also clears it. */
isr = vtpci_read_config_1(sc, VIRTIO_PCI_ISR);
if (isr & VIRTIO_PCI_ISR_CONFIG)
vtpci_config_intr(sc);
if (isr & VIRTIO_PCI_ISR_INTR) {
for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
if (vqx->vtv_no_intr == 0)
virtqueue_intr(vqx->vtv_vq);
}
}
}
static int
vtpci_vq_shared_intr_filter(void *xsc)
{
struct vtpci_softc *sc;
struct vtpci_virtqueue *vqx;
int i, rc;
rc = 0;
sc = xsc;
vqx = &sc->vtpci_vqs[0];
for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
if (vqx->vtv_no_intr == 0)
rc |= virtqueue_intr_filter(vqx->vtv_vq);
}
return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
}
static void
vtpci_vq_shared_intr(void *xsc)
{
struct vtpci_softc *sc;
struct vtpci_virtqueue *vqx;
int i;
sc = xsc;
vqx = &sc->vtpci_vqs[0];
for (i = 0; i < sc->vtpci_nvqs; i++, vqx++) {
if (vqx->vtv_no_intr == 0)
virtqueue_intr(vqx->vtv_vq);
}
}
static int
vtpci_vq_intr_filter(void *xvq)
{
struct virtqueue *vq;
int rc;
vq = xvq;
rc = virtqueue_intr_filter(vq);
return (rc ? FILTER_SCHEDULE_THREAD : FILTER_STRAY);
}
static void
vtpci_vq_intr(void *xvq)
{
struct virtqueue *vq;
vq = xvq;
virtqueue_intr(vq);
}
static void
vtpci_config_intr(void *xsc)
{
struct vtpci_softc *sc;
device_t child;
sc = xsc;
child = sc->vtpci_child_dev;
if (child != NULL)
VIRTIO_CONFIG_CHANGE(child);
}