844a6f0c53
- use PCI_VENDOR and PCI_DEVICE ids from a publicly allocated range (thanks to RedHat) - export memory pool information through PCI registers - improve mechanism for configuring passthrough on different hypervisors Code is from Vincenzo Maffione as a follow up to his GSOC work.
306 lines
11 KiB
C
306 lines
11 KiB
C
/*
|
|
* Copyright (C) 2013-2016 Luigi Rizzo
|
|
* Copyright (C) 2013-2016 Giuseppe Lettieri
|
|
* Copyright (C) 2013-2016 Vincenzo Maffione
|
|
* Copyright (C) 2015 Stefano Garzarella
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*/
|
|
|
|
#ifndef NETMAP_VIRT_H
|
|
#define NETMAP_VIRT_H
|
|
|
|
/*
|
|
* ptnetmap_memdev: device used to expose memory into the guest VM
|
|
*
|
|
* These macros are used in the hypervisor frontend (QEMU, bhyve) and in the
|
|
* guest device driver.
|
|
*/
|
|
|
|
/* PCI identifiers and PCI BARs for the ptnetmap memdev
|
|
* and ptnetmap network interface. */
|
|
#define PTNETMAP_MEMDEV_NAME "ptnetmap-memdev"
|
|
#define PTNETMAP_PCI_VENDOR_ID 0x1b36 /* QEMU virtual devices */
|
|
#define PTNETMAP_PCI_DEVICE_ID 0x000c /* memory device */
|
|
#define PTNETMAP_PCI_NETIF_ID 0x000d /* ptnet network interface */
|
|
#define PTNETMAP_IO_PCI_BAR 0
|
|
#define PTNETMAP_MEM_PCI_BAR 1
|
|
#define PTNETMAP_MSIX_PCI_BAR 2
|
|
|
|
/* Registers for the ptnetmap memdev */
|
|
#define PTNET_MDEV_IO_MEMSIZE_LO 0 /* netmap memory size (low) */
|
|
#define PTNET_MDEV_IO_MEMSIZE_HI 4 /* netmap_memory_size (high) */
|
|
#define PTNET_MDEV_IO_MEMID 8 /* memory allocator ID in the host */
|
|
#define PTNET_MDEV_IO_IF_POOL_OFS 64
|
|
#define PTNET_MDEV_IO_IF_POOL_OBJNUM 68
|
|
#define PTNET_MDEV_IO_IF_POOL_OBJSZ 72
|
|
#define PTNET_MDEV_IO_RING_POOL_OFS 76
|
|
#define PTNET_MDEV_IO_RING_POOL_OBJNUM 80
|
|
#define PTNET_MDEV_IO_RING_POOL_OBJSZ 84
|
|
#define PTNET_MDEV_IO_BUF_POOL_OFS 88
|
|
#define PTNET_MDEV_IO_BUF_POOL_OBJNUM 92
|
|
#define PTNET_MDEV_IO_BUF_POOL_OBJSZ 96
|
|
#define PTNET_MDEV_IO_END 100
|
|
|
|
/*
|
|
* ptnetmap configuration
|
|
*
|
|
* The ptnet kthreads (running in host kernel-space) need to be configured
|
|
* in order to know how to intercept guest kicks (I/O register writes) and
|
|
* how to inject MSI-X interrupts to the guest. The configuration may vary
|
|
* depending on the hypervisor. Currently, we support QEMU/KVM on Linux and
|
|
* and bhyve on FreeBSD.
|
|
* The configuration is passed by the hypervisor to the host netmap module
|
|
* by means of an ioctl() with nr_cmd=NETMAP_PT_HOST_CREATE, and it is
|
|
* specified by the ptnetmap_cfg struct. This struct contains an header
|
|
* with general informations and an array of entries whose size depends
|
|
* on the hypervisor. The NETMAP_PT_HOST_CREATE command is issued every
|
|
* time the kthreads are started.
|
|
*/
|
|
struct ptnetmap_cfg {
|
|
#define PTNETMAP_CFGTYPE_QEMU 0x1
|
|
#define PTNETMAP_CFGTYPE_BHYVE 0x2
|
|
uint16_t cfgtype; /* how to interpret the cfg entries */
|
|
uint16_t entry_size; /* size of a config entry */
|
|
uint32_t num_rings; /* number of config entries */
|
|
void *ptrings; /* ptrings inside CSB */
|
|
/* Configuration entries are allocated right after the struct. */
|
|
};
|
|
|
|
/* Configuration of a ptnetmap ring for QEMU. */
|
|
struct ptnetmap_cfgentry_qemu {
|
|
uint32_t ioeventfd; /* to intercept guest register access */
|
|
uint32_t irqfd; /* to inject guest interrupts */
|
|
};
|
|
|
|
/* Configuration of a ptnetmap ring for bhyve. */
|
|
struct ptnetmap_cfgentry_bhyve {
|
|
uint64_t wchan; /* tsleep() parameter, to wake up kthread */
|
|
uint32_t ioctl_fd; /* ioctl fd */
|
|
/* ioctl parameters to send irq */
|
|
uint32_t ioctl_cmd;
|
|
/* vmm.ko MSIX parameters for IOCTL */
|
|
struct {
|
|
uint64_t msg_data;
|
|
uint64_t addr;
|
|
} ioctl_data;
|
|
};
|
|
|
|
/*
|
|
* Structure filled-in by the kernel when asked for allocator info
|
|
* through NETMAP_POOLS_INFO_GET. Used by hypervisors supporting
|
|
* ptnetmap.
|
|
*/
|
|
struct netmap_pools_info {
|
|
uint64_t memsize; /* same as nmr->nr_memsize */
|
|
uint32_t memid; /* same as nmr->nr_arg2 */
|
|
uint32_t if_pool_offset;
|
|
uint32_t if_pool_objtotal;
|
|
uint32_t if_pool_objsize;
|
|
uint32_t ring_pool_offset;
|
|
uint32_t ring_pool_objtotal;
|
|
uint32_t ring_pool_objsize;
|
|
uint32_t buf_pool_offset;
|
|
uint32_t buf_pool_objtotal;
|
|
uint32_t buf_pool_objsize;
|
|
};
|
|
|
|
/*
|
|
* Pass a pointer to a userspace buffer to be passed to kernelspace for write
|
|
* or read. Used by NETMAP_PT_HOST_CREATE and NETMAP_POOLS_INFO_GET.
|
|
*/
|
|
static inline void
|
|
nmreq_pointer_put(struct nmreq *nmr, void *userptr)
|
|
{
|
|
uintptr_t *pp = (uintptr_t *)&nmr->nr_arg1;
|
|
*pp = (uintptr_t)userptr;
|
|
}
|
|
|
|
/* ptnetmap features */
|
|
#define PTNETMAP_F_VNET_HDR 1
|
|
|
|
/* I/O registers for the ptnet device. */
|
|
#define PTNET_IO_PTFEAT 0
|
|
#define PTNET_IO_PTCTL 4
|
|
#define PTNET_IO_MAC_LO 8
|
|
#define PTNET_IO_MAC_HI 12
|
|
#define PTNET_IO_CSBBAH 16
|
|
#define PTNET_IO_CSBBAL 20
|
|
#define PTNET_IO_NIFP_OFS 24
|
|
#define PTNET_IO_NUM_TX_RINGS 28
|
|
#define PTNET_IO_NUM_RX_RINGS 32
|
|
#define PTNET_IO_NUM_TX_SLOTS 36
|
|
#define PTNET_IO_NUM_RX_SLOTS 40
|
|
#define PTNET_IO_VNET_HDR_LEN 44
|
|
#define PTNET_IO_HOSTMEMID 48
|
|
#define PTNET_IO_END 52
|
|
#define PTNET_IO_KICK_BASE 128
|
|
#define PTNET_IO_MASK 0xff
|
|
|
|
/* ptnetmap control commands (values for PTCTL register) */
|
|
#define PTNETMAP_PTCTL_CREATE 1
|
|
#define PTNETMAP_PTCTL_DELETE 2
|
|
|
|
/* If defined, CSB is allocated by the guest, not by the host. */
|
|
#define PTNET_CSB_ALLOC
|
|
|
|
/* ptnetmap ring fields shared between guest and host */
|
|
struct ptnet_ring {
|
|
/* XXX revise the layout to minimize cache bounces. */
|
|
uint32_t head; /* GW+ HR+ the head of the guest netmap_ring */
|
|
uint32_t cur; /* GW+ HR+ the cur of the guest netmap_ring */
|
|
uint32_t guest_need_kick; /* GW+ HR+ host-->guest notification enable */
|
|
uint32_t sync_flags; /* GW+ HR+ the flags of the guest [tx|rx]sync() */
|
|
uint32_t hwcur; /* GR+ HW+ the hwcur of the host netmap_kring */
|
|
uint32_t hwtail; /* GR+ HW+ the hwtail of the host netmap_kring */
|
|
uint32_t host_need_kick; /* GR+ HW+ guest-->host notification enable */
|
|
char pad[4];
|
|
};
|
|
|
|
/* CSB for the ptnet device. */
|
|
struct ptnet_csb {
|
|
#define NETMAP_VIRT_CSB_SIZE 4096
|
|
struct ptnet_ring rings[NETMAP_VIRT_CSB_SIZE/sizeof(struct ptnet_ring)];
|
|
};
|
|
|
|
#ifdef WITH_PTNETMAP_GUEST
|
|
|
|
/* ptnetmap_memdev routines used to talk with ptnetmap_memdev device driver */
|
|
struct ptnetmap_memdev;
|
|
int nm_os_pt_memdev_iomap(struct ptnetmap_memdev *, vm_paddr_t *, void **,
|
|
uint64_t *);
|
|
void nm_os_pt_memdev_iounmap(struct ptnetmap_memdev *);
|
|
uint32_t nm_os_pt_memdev_ioread(struct ptnetmap_memdev *, unsigned int);
|
|
|
|
/* Guest driver: Write kring pointers (cur, head) to the CSB.
|
|
* This routine is coupled with ptnetmap_host_read_kring_csb(). */
|
|
static inline void
|
|
ptnetmap_guest_write_kring_csb(struct ptnet_ring *ptr, uint32_t cur,
|
|
uint32_t head)
|
|
{
|
|
/*
|
|
* We need to write cur and head to the CSB but we cannot do it atomically.
|
|
* There is no way we can prevent the host from reading the updated value
|
|
* of one of the two and the old value of the other. However, if we make
|
|
* sure that the host never reads a value of head more recent than the
|
|
* value of cur we are safe. We can allow the host to read a value of cur
|
|
* more recent than the value of head, since in the netmap ring cur can be
|
|
* ahead of head and cur cannot wrap around head because it must be behind
|
|
* tail. Inverting the order of writes below could instead result into the
|
|
* host to think head went ahead of cur, which would cause the sync
|
|
* prologue to fail.
|
|
*
|
|
* The following memory barrier scheme is used to make this happen:
|
|
*
|
|
* Guest Host
|
|
*
|
|
* STORE(cur) LOAD(head)
|
|
* mb() <-----------> mb()
|
|
* STORE(head) LOAD(cur)
|
|
*/
|
|
ptr->cur = cur;
|
|
mb();
|
|
ptr->head = head;
|
|
}
|
|
|
|
/* Guest driver: Read kring pointers (hwcur, hwtail) from the CSB.
|
|
* This routine is coupled with ptnetmap_host_write_kring_csb(). */
|
|
static inline void
|
|
ptnetmap_guest_read_kring_csb(struct ptnet_ring *ptr, struct netmap_kring *kring)
|
|
{
|
|
/*
|
|
* We place a memory barrier to make sure that the update of hwtail never
|
|
* overtakes the update of hwcur.
|
|
* (see explanation in ptnetmap_host_write_kring_csb).
|
|
*/
|
|
kring->nr_hwtail = ptr->hwtail;
|
|
mb();
|
|
kring->nr_hwcur = ptr->hwcur;
|
|
}
|
|
|
|
#endif /* WITH_PTNETMAP_GUEST */
|
|
|
|
#ifdef WITH_PTNETMAP_HOST
|
|
/*
|
|
* ptnetmap kernel thread routines
|
|
* */
|
|
|
|
/* Functions to read and write CSB fields in the host */
|
|
#if defined (linux)
|
|
#define CSB_READ(csb, field, r) (get_user(r, &csb->field))
|
|
#define CSB_WRITE(csb, field, v) (put_user(v, &csb->field))
|
|
#else /* ! linux */
|
|
#define CSB_READ(csb, field, r) (r = fuword32(&csb->field))
|
|
#define CSB_WRITE(csb, field, v) (suword32(&csb->field, v))
|
|
#endif /* ! linux */
|
|
|
|
/* Host netmap: Write kring pointers (hwcur, hwtail) to the CSB.
|
|
* This routine is coupled with ptnetmap_guest_read_kring_csb(). */
|
|
static inline void
|
|
ptnetmap_host_write_kring_csb(struct ptnet_ring __user *ptr, uint32_t hwcur,
|
|
uint32_t hwtail)
|
|
{
|
|
/*
|
|
* The same scheme used in ptnetmap_guest_write_kring_csb() applies here.
|
|
* We allow the guest to read a value of hwcur more recent than the value
|
|
* of hwtail, since this would anyway result in a consistent view of the
|
|
* ring state (and hwcur can never wraparound hwtail, since hwcur must be
|
|
* behind head).
|
|
*
|
|
* The following memory barrier scheme is used to make this happen:
|
|
*
|
|
* Guest Host
|
|
*
|
|
* STORE(hwcur) LOAD(hwtail)
|
|
* mb() <-------------> mb()
|
|
* STORE(hwtail) LOAD(hwcur)
|
|
*/
|
|
CSB_WRITE(ptr, hwcur, hwcur);
|
|
mb();
|
|
CSB_WRITE(ptr, hwtail, hwtail);
|
|
}
|
|
|
|
/* Host netmap: Read kring pointers (head, cur, sync_flags) from the CSB.
|
|
* This routine is coupled with ptnetmap_guest_write_kring_csb(). */
|
|
static inline void
|
|
ptnetmap_host_read_kring_csb(struct ptnet_ring __user *ptr,
|
|
struct netmap_ring *shadow_ring,
|
|
uint32_t num_slots)
|
|
{
|
|
/*
|
|
* We place a memory barrier to make sure that the update of head never
|
|
* overtakes the update of cur.
|
|
* (see explanation in ptnetmap_guest_write_kring_csb).
|
|
*/
|
|
CSB_READ(ptr, head, shadow_ring->head);
|
|
mb();
|
|
CSB_READ(ptr, cur, shadow_ring->cur);
|
|
CSB_READ(ptr, sync_flags, shadow_ring->flags);
|
|
}
|
|
|
|
#endif /* WITH_PTNETMAP_HOST */
|
|
|
|
#endif /* NETMAP_VIRT_H */
|