Add netmap support for bhyve
Submitted by: btw MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D4826
This commit is contained in:
parent
11f9ca696e
commit
b60204754b
@ -36,6 +36,10 @@ __FBSDID("$FreeBSD$");
|
||||
#include <sys/ioctl.h>
|
||||
#include <machine/atomic.h>
|
||||
#include <net/ethernet.h>
|
||||
#ifndef NETMAP_WITH_LIBS
|
||||
#define NETMAP_WITH_LIBS
|
||||
#endif
|
||||
#include <net/netmap_user.h>
|
||||
|
||||
#include <errno.h>
|
||||
#include <fcntl.h>
|
||||
@ -133,6 +137,8 @@ struct pci_vtnet_softc {
|
||||
struct mevent *vsc_mevp;
|
||||
|
||||
int vsc_tapfd;
|
||||
struct nm_desc *vsc_nmd;
|
||||
|
||||
int vsc_rx_ready;
|
||||
volatile int resetting; /* set and checked outside lock */
|
||||
|
||||
@ -149,6 +155,10 @@ struct pci_vtnet_softc {
|
||||
pthread_mutex_t tx_mtx;
|
||||
pthread_cond_t tx_cond;
|
||||
int tx_in_progress;
|
||||
|
||||
void (*pci_vtnet_rx)(struct pci_vtnet_softc *sc);
|
||||
void (*pci_vtnet_tx)(struct pci_vtnet_softc *sc, struct iovec *iov,
|
||||
int iovcnt, int len);
|
||||
};
|
||||
|
||||
static void pci_vtnet_reset(void *);
|
||||
@ -371,14 +381,208 @@ pci_vtnet_tap_rx(struct pci_vtnet_softc *sc)
|
||||
vq_endchains(vq, 1);
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_netmap_writev(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
|
||||
{
|
||||
int r, i;
|
||||
int len = 0;
|
||||
|
||||
for (r = nmd->cur_tx_ring; ; ) {
|
||||
struct netmap_ring *ring = NETMAP_TXRING(nmd->nifp, r);
|
||||
uint32_t cur, idx;
|
||||
char *buf;
|
||||
|
||||
if (nm_ring_empty(ring)) {
|
||||
r++;
|
||||
if (r > nmd->last_tx_ring)
|
||||
r = nmd->first_tx_ring;
|
||||
if (r == nmd->cur_rx_ring)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
cur = ring->cur;
|
||||
idx = ring->slot[cur].buf_idx;
|
||||
buf = NETMAP_BUF(ring, idx);
|
||||
|
||||
for (i = 0; i < iovcnt; i++) {
|
||||
memcpy(&buf[len], iov[i].iov_base, iov[i].iov_len);
|
||||
len += iov[i].iov_len;
|
||||
}
|
||||
ring->slot[cur].len = len;
|
||||
ring->head = ring->cur = nm_ring_next(ring, cur);
|
||||
nmd->cur_tx_ring = r;
|
||||
ioctl(nmd->fd, NIOCTXSYNC, NULL);
|
||||
break;
|
||||
}
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
static inline int
|
||||
pci_vtnet_netmap_readv(struct nm_desc *nmd, struct iovec *iov, int iovcnt)
|
||||
{
|
||||
int len = 0;
|
||||
int i = 0;
|
||||
int r;
|
||||
|
||||
for (r = nmd->cur_rx_ring; ; ) {
|
||||
struct netmap_ring *ring = NETMAP_RXRING(nmd->nifp, r);
|
||||
uint32_t cur, idx;
|
||||
char *buf;
|
||||
size_t left;
|
||||
|
||||
if (nm_ring_empty(ring)) {
|
||||
r++;
|
||||
if (r > nmd->last_rx_ring)
|
||||
r = nmd->first_rx_ring;
|
||||
if (r == nmd->cur_rx_ring)
|
||||
break;
|
||||
continue;
|
||||
}
|
||||
cur = ring->cur;
|
||||
idx = ring->slot[cur].buf_idx;
|
||||
buf = NETMAP_BUF(ring, idx);
|
||||
left = ring->slot[cur].len;
|
||||
|
||||
for (i = 0; i < iovcnt && left > 0; i++) {
|
||||
if (iov[i].iov_len > left)
|
||||
iov[i].iov_len = left;
|
||||
memcpy(iov[i].iov_base, &buf[len], iov[i].iov_len);
|
||||
len += iov[i].iov_len;
|
||||
left -= iov[i].iov_len;
|
||||
}
|
||||
ring->head = ring->cur = nm_ring_next(ring, cur);
|
||||
nmd->cur_rx_ring = r;
|
||||
ioctl(nmd->fd, NIOCRXSYNC, NULL);
|
||||
break;
|
||||
}
|
||||
for (; i < iovcnt; i++)
|
||||
iov[i].iov_len = 0;
|
||||
|
||||
return (len);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called to send a buffer chain out to the vale port
|
||||
*/
|
||||
static void
|
||||
pci_vtnet_tap_callback(int fd, enum ev_type type, void *param)
|
||||
pci_vtnet_netmap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt,
|
||||
int len)
|
||||
{
|
||||
static char pad[60]; /* all zero bytes */
|
||||
|
||||
if (sc->vsc_nmd == NULL)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If the length is < 60, pad out to that and add the
|
||||
* extra zero'd segment to the iov. It is guaranteed that
|
||||
* there is always an extra iov available by the caller.
|
||||
*/
|
||||
if (len < 60) {
|
||||
iov[iovcnt].iov_base = pad;
|
||||
iov[iovcnt].iov_len = 60 - len;
|
||||
iovcnt++;
|
||||
}
|
||||
(void) pci_vtnet_netmap_writev(sc->vsc_nmd, iov, iovcnt);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_netmap_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS], *riov;
|
||||
struct vqueue_info *vq;
|
||||
void *vrx;
|
||||
int len, n;
|
||||
uint16_t idx;
|
||||
|
||||
/*
|
||||
* Should never be called without a valid netmap descriptor
|
||||
*/
|
||||
assert(sc->vsc_nmd != NULL);
|
||||
|
||||
/*
|
||||
* But, will be called when the rx ring hasn't yet
|
||||
* been set up or the guest is resetting the device.
|
||||
*/
|
||||
if (!sc->vsc_rx_ready || sc->resetting) {
|
||||
/*
|
||||
* Drop the packet and try later.
|
||||
*/
|
||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Check for available rx buffers
|
||||
*/
|
||||
vq = &sc->vsc_queues[VTNET_RXQ];
|
||||
if (!vq_has_descs(vq)) {
|
||||
/*
|
||||
* Drop the packet and try later. Interrupt on
|
||||
* empty, if that's negotiated.
|
||||
*/
|
||||
(void) nm_nextpkt(sc->vsc_nmd, (void *)dummybuf);
|
||||
vq_endchains(vq, 1);
|
||||
return;
|
||||
}
|
||||
|
||||
do {
|
||||
/*
|
||||
* Get descriptor chain.
|
||||
*/
|
||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
|
||||
assert(n >= 1 && n <= VTNET_MAXSEGS);
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vrx = iov[0].iov_base;
|
||||
riov = rx_iov_trim(iov, &n, sc->rx_vhdrlen);
|
||||
|
||||
len = pci_vtnet_netmap_readv(sc->vsc_nmd, riov, n);
|
||||
|
||||
if (len == 0) {
|
||||
/*
|
||||
* No more packets, but still some avail ring
|
||||
* entries. Interrupt if needed/appropriate.
|
||||
*/
|
||||
vq_endchains(vq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers if merged rx bufs were negotiated.
|
||||
*/
|
||||
memset(vrx, 0, sc->rx_vhdrlen);
|
||||
|
||||
if (sc->rx_merge) {
|
||||
struct virtio_net_rxhdr *vrxh;
|
||||
|
||||
vrxh = vrx;
|
||||
vrxh->vrh_bufs = 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Release this chain and handle more chains.
|
||||
*/
|
||||
vq_relchain(vq, idx, len + sc->rx_vhdrlen);
|
||||
} while (vq_has_descs(vq));
|
||||
|
||||
/* Interrupt if needed, including for NOTIFY_ON_EMPTY. */
|
||||
vq_endchains(vq, 1);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_rx_callback(int fd, enum ev_type type, void *param)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = param;
|
||||
|
||||
pthread_mutex_lock(&sc->rx_mtx);
|
||||
sc->rx_in_progress = 1;
|
||||
pci_vtnet_tap_rx(sc);
|
||||
sc->pci_vtnet_rx(sc);
|
||||
sc->rx_in_progress = 0;
|
||||
pthread_mutex_unlock(&sc->rx_mtx);
|
||||
|
||||
@ -421,7 +625,7 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
|
||||
}
|
||||
|
||||
DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, n));
|
||||
pci_vtnet_tap_tx(sc, &iov[1], n - 1, plen);
|
||||
sc->pci_vtnet_tx(sc, &iov[1], n - 1, plen);
|
||||
|
||||
/* chain is processed, release it and set tlen */
|
||||
vq_relchain(vq, idx, tlen);
|
||||
@ -532,6 +736,67 @@ pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_tap_setup(struct pci_vtnet_softc *sc, char *devname)
|
||||
{
|
||||
char tbuf[80];
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strlcat(tbuf, devname, sizeof(tbuf));
|
||||
|
||||
sc->pci_vtnet_rx = pci_vtnet_tap_rx;
|
||||
sc->pci_vtnet_tx = pci_vtnet_tap_tx;
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_rx_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
pci_vtnet_netmap_setup(struct pci_vtnet_softc *sc, char *ifname)
|
||||
{
|
||||
sc->pci_vtnet_rx = pci_vtnet_netmap_rx;
|
||||
sc->pci_vtnet_tx = pci_vtnet_netmap_tx;
|
||||
|
||||
sc->vsc_nmd = nm_open(ifname, NULL, 0, 0);
|
||||
if (sc->vsc_nmd == NULL) {
|
||||
WPRINTF(("open of netmap device %s failed\n", ifname));
|
||||
return;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_nmd->fd,
|
||||
EVF_READ,
|
||||
pci_vtnet_rx_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
nm_close(sc->vsc_nmd);
|
||||
sc->vsc_nmd = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
@ -567,8 +832,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
*/
|
||||
mac_provided = 0;
|
||||
sc->vsc_tapfd = -1;
|
||||
sc->vsc_nmd = NULL;
|
||||
if (opts != NULL) {
|
||||
char tbuf[80];
|
||||
int err;
|
||||
|
||||
devname = vtopts = strdup(opts);
|
||||
@ -583,36 +848,12 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
mac_provided = 1;
|
||||
}
|
||||
|
||||
strcpy(tbuf, "/dev/");
|
||||
strlcat(tbuf, devname, sizeof(tbuf));
|
||||
if (strncmp(devname, "vale", 4) == 0)
|
||||
pci_vtnet_netmap_setup(sc, devname);
|
||||
if (strncmp(devname, "tap", 3) == 0)
|
||||
pci_vtnet_tap_setup(sc, devname);
|
||||
|
||||
free(devname);
|
||||
|
||||
sc->vsc_tapfd = open(tbuf, O_RDWR);
|
||||
if (sc->vsc_tapfd == -1) {
|
||||
WPRINTF(("open of tap device %s failed\n", tbuf));
|
||||
} else {
|
||||
/*
|
||||
* Set non-blocking and register for read
|
||||
* notifications with the event loop
|
||||
*/
|
||||
int opt = 1;
|
||||
if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) {
|
||||
WPRINTF(("tap device O_NONBLOCK failed\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
|
||||
sc->vsc_mevp = mevent_add(sc->vsc_tapfd,
|
||||
EVF_READ,
|
||||
pci_vtnet_tap_callback,
|
||||
sc);
|
||||
if (sc->vsc_mevp == NULL) {
|
||||
WPRINTF(("Could not register event\n"));
|
||||
close(sc->vsc_tapfd);
|
||||
sc->vsc_tapfd = -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
|
Loading…
Reference in New Issue
Block a user