bhyve: move virtio-net header processing to pci_virtio_net
This patch cleans up the API between the net frontends (e1000, virtio-net) and the net backends (tap and netmap). We move the virtio-net header stripping/prepending to the virtio-net code, where this functionality belongs. In this way, the netbe_send() and netbe_recv() signatures can have const struct iov * rather than struct iov *. Reviewed by: grehan, bcr, aleksandr.fedorov@itglobal.com MFC after: 1 week Differential Revision: https://reviews.freebsd.org/D23342
This commit is contained in:
parent
6b197e58f6
commit
66c662b005
@ -99,7 +99,8 @@ struct net_backend {
|
||||
* vector provided by the caller has 'iovcnt' elements and contains
|
||||
* the packet to send.
|
||||
*/
|
||||
ssize_t (*send)(struct net_backend *be, struct iovec *iov, int iovcnt);
|
||||
ssize_t (*send)(struct net_backend *be, const struct iovec *iov,
|
||||
int iovcnt);
|
||||
|
||||
/*
|
||||
* Called to receive a packet from the backend. When the function
|
||||
@ -108,7 +109,8 @@ struct net_backend {
|
||||
* The function returns 0 if the backend doesn't have a new packet to
|
||||
* receive.
|
||||
*/
|
||||
ssize_t (*recv)(struct net_backend *be, struct iovec *iov, int iovcnt);
|
||||
ssize_t (*recv)(struct net_backend *be, const struct iovec *iov,
|
||||
int iovcnt);
|
||||
|
||||
/*
|
||||
* Ask the backend to enable or disable receive operation in the
|
||||
@ -238,13 +240,13 @@ error:
|
||||
* Called to send a buffer chain out to the tap device
|
||||
*/
|
||||
static ssize_t
|
||||
tap_send(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
tap_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
return (writev(be->fd, iov, iovcnt));
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
tap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
tap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
ssize_t ret;
|
||||
|
||||
@ -458,7 +460,7 @@ netmap_cleanup(struct net_backend *be)
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
netmap_send(struct net_backend *be, struct iovec *iov,
|
||||
netmap_send(struct net_backend *be, const struct iovec *iov,
|
||||
int iovcnt)
|
||||
{
|
||||
struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
|
||||
@ -538,7 +540,7 @@ txsync:
|
||||
}
|
||||
|
||||
static ssize_t
|
||||
netmap_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
netmap_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
struct netmap_priv *priv = (struct netmap_priv *)be->opaque;
|
||||
struct netmap_slot *slot = NULL;
|
||||
@ -749,42 +751,10 @@ netbe_set_cap(struct net_backend *be, uint64_t features,
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static __inline struct iovec *
|
||||
iov_trim(struct iovec *iov, int *iovcnt, unsigned int tlen)
|
||||
{
|
||||
struct iovec *riov;
|
||||
|
||||
/* XXX short-cut: assume first segment is >= tlen */
|
||||
assert(iov[0].iov_len >= tlen);
|
||||
|
||||
iov[0].iov_len -= tlen;
|
||||
if (iov[0].iov_len == 0) {
|
||||
assert(*iovcnt > 1);
|
||||
*iovcnt -= 1;
|
||||
riov = &iov[1];
|
||||
} else {
|
||||
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + tlen);
|
||||
riov = &iov[0];
|
||||
}
|
||||
|
||||
return (riov);
|
||||
}
|
||||
|
||||
ssize_t
|
||||
netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
netbe_send(struct net_backend *be, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
|
||||
assert(be != NULL);
|
||||
if (be->be_vnet_hdr_len != be->fe_vnet_hdr_len) {
|
||||
/*
|
||||
* The frontend uses a virtio-net header, but the backend
|
||||
* does not. We ignore it (as it must be all zeroes) and
|
||||
* strip it.
|
||||
*/
|
||||
assert(be->be_vnet_hdr_len == 0);
|
||||
iov = iov_trim(iov, &iovcnt, be->fe_vnet_hdr_len);
|
||||
}
|
||||
|
||||
return (be->send(be, iov, iovcnt));
|
||||
}
|
||||
|
||||
@ -794,46 +764,10 @@ netbe_send(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
* the length of the packet just read. Return -1 in case of errors.
|
||||
*/
|
||||
ssize_t
|
||||
netbe_recv(struct net_backend *be, struct iovec *iov, int iovcnt)
|
||||
netbe_recv(struct net_backend *be, const struct iovec *iov, int iovcnt)
|
||||
{
|
||||
/* Length of prepended virtio-net header. */
|
||||
unsigned int hlen = be->fe_vnet_hdr_len;
|
||||
int ret;
|
||||
|
||||
assert(be != NULL);
|
||||
|
||||
if (hlen && hlen != be->be_vnet_hdr_len) {
|
||||
/*
|
||||
* The frontend uses a virtio-net header, but the backend
|
||||
* does not. We need to prepend a zeroed header.
|
||||
*/
|
||||
struct virtio_net_rxhdr *vh;
|
||||
|
||||
assert(be->be_vnet_hdr_len == 0);
|
||||
|
||||
/*
|
||||
* Get a pointer to the rx header, and use the
|
||||
* data immediately following it for the packet buffer.
|
||||
*/
|
||||
vh = iov[0].iov_base;
|
||||
iov = iov_trim(iov, &iovcnt, hlen);
|
||||
|
||||
/*
|
||||
* The only valid field in the rx packet header is the
|
||||
* number of buffers if merged rx bufs were negotiated.
|
||||
*/
|
||||
memset(vh, 0, hlen);
|
||||
if (hlen == VNET_HDR_LEN) {
|
||||
vh->vrh_bufs = 1;
|
||||
}
|
||||
}
|
||||
|
||||
ret = be->recv(be, iov, iovcnt);
|
||||
if (ret > 0) {
|
||||
ret += hlen;
|
||||
}
|
||||
|
||||
return (ret);
|
||||
return (be->recv(be, iov, iovcnt));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -871,3 +805,10 @@ netbe_rx_enable(struct net_backend *be)
|
||||
|
||||
return be->recv_enable(be);
|
||||
}
|
||||
|
||||
size_t
|
||||
netbe_get_vnet_hdr_len(struct net_backend *be)
|
||||
{
|
||||
|
||||
return (be->be_vnet_hdr_len);
|
||||
}
|
||||
|
@ -43,8 +43,9 @@ void netbe_cleanup(net_backend_t *be);
|
||||
uint64_t netbe_get_cap(net_backend_t *be);
|
||||
int netbe_set_cap(net_backend_t *be, uint64_t cap,
|
||||
unsigned vnet_hdr_len);
|
||||
ssize_t netbe_send(net_backend_t *be, struct iovec *iov, int iovcnt);
|
||||
ssize_t netbe_recv(net_backend_t *be, struct iovec *iov, int iovcnt);
|
||||
size_t netbe_get_vnet_hdr_len(net_backend_t *be);
|
||||
ssize_t netbe_send(net_backend_t *be, const struct iovec *iov, int iovcnt);
|
||||
ssize_t netbe_recv(net_backend_t *be, const struct iovec *iov, int iovcnt);
|
||||
ssize_t netbe_rx_discard(net_backend_t *be);
|
||||
void netbe_rx_disable(net_backend_t *be);
|
||||
void netbe_rx_enable(net_backend_t *be);
|
||||
|
@ -44,21 +44,19 @@ int
|
||||
net_parsemac(char *mac_str, uint8_t *mac_addr)
|
||||
{
|
||||
struct ether_addr *ea;
|
||||
char *tmpstr;
|
||||
char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 };
|
||||
|
||||
tmpstr = strsep(&mac_str,"=");
|
||||
if (mac_str == NULL)
|
||||
return (EINVAL);
|
||||
|
||||
if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) {
|
||||
ea = ether_aton(mac_str);
|
||||
ea = ether_aton(mac_str);
|
||||
|
||||
if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
|
||||
memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
|
||||
EPRINTLN("Invalid MAC %s", mac_str);
|
||||
return (EINVAL);
|
||||
} else
|
||||
memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
|
||||
}
|
||||
if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) ||
|
||||
memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) {
|
||||
EPRINTLN("Invalid MAC %s", mac_str);
|
||||
return (EINVAL);
|
||||
} else
|
||||
memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
@ -2328,18 +2328,36 @@ e82545_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
mac_provided = 0;
|
||||
sc->esc_be = NULL;
|
||||
if (opts != NULL) {
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
devname = vtopts = strdup(opts);
|
||||
(void) strsep(&vtopts, ",");
|
||||
|
||||
if (vtopts != NULL) {
|
||||
err = net_parsemac(vtopts, sc->esc_mac.octet);
|
||||
if (err != 0) {
|
||||
free(devname);
|
||||
return (err);
|
||||
/*
|
||||
* Parse the list of options in the form
|
||||
* key1=value1,...,keyN=valueN.
|
||||
*/
|
||||
while (vtopts != NULL) {
|
||||
char *value = vtopts;
|
||||
char *key;
|
||||
|
||||
key = strsep(&value, "=");
|
||||
if (value == NULL)
|
||||
break;
|
||||
vtopts = value;
|
||||
(void) strsep(&vtopts, ",");
|
||||
|
||||
if (strcmp(key, "mac") == 0) {
|
||||
err = net_parsemac(value, sc->esc_mac.octet);
|
||||
if (err)
|
||||
break;
|
||||
mac_provided = 1;
|
||||
}
|
||||
mac_provided = 1;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
free(devname);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = netbe_init(&sc->esc_be, devname, e82545_rx_callback, sc);
|
||||
|
@ -117,6 +117,9 @@ struct pci_vtnet_softc {
|
||||
pthread_cond_t tx_cond;
|
||||
int tx_in_progress;
|
||||
|
||||
size_t vhdrlen;
|
||||
size_t be_vhdrlen;
|
||||
|
||||
struct virtio_net_config vsc_config;
|
||||
struct virtio_consts vsc_consts;
|
||||
};
|
||||
@ -180,6 +183,38 @@ pci_vtnet_reset(void *vsc)
|
||||
pthread_mutex_unlock(&sc->rx_mtx);
|
||||
}
|
||||
|
||||
static __inline struct iovec *
|
||||
iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen)
|
||||
{
|
||||
struct iovec *riov;
|
||||
|
||||
if (iov[0].iov_len < hlen) {
|
||||
/*
|
||||
* Not enough header space in the first fragment.
|
||||
* That's not ok for us.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
|
||||
iov[0].iov_len -= hlen;
|
||||
if (iov[0].iov_len == 0) {
|
||||
*iovcnt -= 1;
|
||||
if (*iovcnt == 0) {
|
||||
/*
|
||||
* Only space for the header. That's not
|
||||
* enough for us.
|
||||
*/
|
||||
return NULL;
|
||||
}
|
||||
riov = &iov[1];
|
||||
} else {
|
||||
iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen);
|
||||
riov = &iov[0];
|
||||
}
|
||||
|
||||
return (riov);
|
||||
}
|
||||
|
||||
struct virtio_mrg_rxbuf_info {
|
||||
uint16_t idx;
|
||||
uint16_t pad;
|
||||
@ -189,31 +224,34 @@ struct virtio_mrg_rxbuf_info {
|
||||
static void
|
||||
pci_vtnet_rx(struct pci_vtnet_softc *sc)
|
||||
{
|
||||
int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen;
|
||||
struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS];
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
struct vqueue_info *vq;
|
||||
uint32_t cur_iov_bytes;
|
||||
struct iovec *cur_iov;
|
||||
uint16_t cur_iov_len;
|
||||
uint32_t riov_bytes;
|
||||
struct iovec *riov;
|
||||
int riov_len;
|
||||
uint32_t ulen;
|
||||
int n_chains;
|
||||
int len;
|
||||
|
||||
vq = &sc->vsc_queues[VTNET_RXQ];
|
||||
for (;;) {
|
||||
struct virtio_net_rxhdr *hdr;
|
||||
|
||||
/*
|
||||
* Get a descriptor chain to store the next ingress
|
||||
* packet. In case of mergeable rx buffers, get as
|
||||
* many chains as necessary in order to make room
|
||||
* for a maximum sized LRO packet.
|
||||
*/
|
||||
cur_iov_bytes = 0;
|
||||
cur_iov_len = 0;
|
||||
cur_iov = iov;
|
||||
riov_bytes = 0;
|
||||
riov_len = 0;
|
||||
riov = iov;
|
||||
n_chains = 0;
|
||||
do {
|
||||
int n = vq_getchain(vq, &info[n_chains].idx, cur_iov,
|
||||
VTNET_MAXSEGS - cur_iov_len, NULL);
|
||||
int n = vq_getchain(vq, &info[n_chains].idx, riov,
|
||||
VTNET_MAXSEGS - riov_len, NULL);
|
||||
|
||||
if (n == 0) {
|
||||
/*
|
||||
@ -239,20 +277,42 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
|
||||
vq_kick_disable(vq);
|
||||
continue;
|
||||
}
|
||||
assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS);
|
||||
cur_iov_len += n;
|
||||
assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS);
|
||||
riov_len += n;
|
||||
if (!sc->rx_merge) {
|
||||
n_chains = 1;
|
||||
break;
|
||||
}
|
||||
info[n_chains].len = (uint32_t)count_iov(cur_iov, n);
|
||||
cur_iov_bytes += info[n_chains].len;
|
||||
cur_iov += n;
|
||||
info[n_chains].len = (uint32_t)count_iov(riov, n);
|
||||
riov_bytes += info[n_chains].len;
|
||||
riov += n;
|
||||
n_chains++;
|
||||
} while (cur_iov_bytes < VTNET_MAX_PKT_LEN &&
|
||||
cur_iov_len < VTNET_MAXSEGS);
|
||||
} while (riov_bytes < VTNET_MAX_PKT_LEN &&
|
||||
riov_len < VTNET_MAXSEGS);
|
||||
|
||||
len = netbe_recv(sc->vsc_be, iov, cur_iov_len);
|
||||
riov = iov;
|
||||
hdr = riov[0].iov_base;
|
||||
if (prepend_hdr_len > 0) {
|
||||
/*
|
||||
* The frontend uses a virtio-net header, but the
|
||||
* backend does not. We need to prepend a zeroed
|
||||
* header.
|
||||
*/
|
||||
riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len);
|
||||
if (riov == NULL) {
|
||||
/*
|
||||
* The first collected chain is nonsensical,
|
||||
* as it is not even enough to store the
|
||||
* virtio-net header. Just drop it.
|
||||
*/
|
||||
vq_relchain(vq, info[0].idx, 0);
|
||||
vq_retchains(vq, n_chains - 1);
|
||||
continue;
|
||||
}
|
||||
memset(hdr, 0, prepend_hdr_len);
|
||||
}
|
||||
|
||||
len = netbe_recv(sc->vsc_be, riov, riov_len);
|
||||
|
||||
if (len <= 0) {
|
||||
/*
|
||||
@ -266,18 +326,18 @@ pci_vtnet_rx(struct pci_vtnet_softc *sc)
|
||||
return;
|
||||
}
|
||||
|
||||
ulen = (uint32_t)len; /* avoid too many casts below */
|
||||
ulen = (uint32_t)(len + prepend_hdr_len);
|
||||
|
||||
/* Publish the used buffers to the guest. */
|
||||
/*
|
||||
* Publish the used buffers to the guest, reporting the
|
||||
* number of bytes that we wrote.
|
||||
*/
|
||||
if (!sc->rx_merge) {
|
||||
vq_relchain(vq, info[0].idx, ulen);
|
||||
} else {
|
||||
struct virtio_net_rxhdr *hdr = iov[0].iov_base;
|
||||
uint32_t iolen;
|
||||
int i = 0;
|
||||
|
||||
assert(iov[0].iov_len >= sizeof(*hdr));
|
||||
|
||||
do {
|
||||
iolen = info[i].len;
|
||||
if (iolen > ulen) {
|
||||
@ -333,6 +393,7 @@ static void
|
||||
pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
|
||||
{
|
||||
struct iovec iov[VTNET_MAXSEGS + 1];
|
||||
struct iovec *siov = iov;
|
||||
uint16_t idx;
|
||||
ssize_t len;
|
||||
int n;
|
||||
@ -344,10 +405,34 @@ pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq)
|
||||
n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL);
|
||||
assert(n >= 1 && n <= VTNET_MAXSEGS);
|
||||
|
||||
len = netbe_send(sc->vsc_be, iov, n);
|
||||
if (sc->vhdrlen != sc->be_vhdrlen) {
|
||||
/*
|
||||
* The frontend uses a virtio-net header, but the backend
|
||||
* does not. We simply strip the header and ignore it, as
|
||||
* it should be zero-filled.
|
||||
*/
|
||||
siov = iov_trim_hdr(siov, &n, sc->vhdrlen);
|
||||
}
|
||||
|
||||
/* chain is processed, release it and set len */
|
||||
vq_relchain(vq, idx, len > 0 ? len : 0);
|
||||
if (siov == NULL) {
|
||||
/* The chain is nonsensical. Just drop it. */
|
||||
len = 0;
|
||||
} else {
|
||||
len = netbe_send(sc->vsc_be, siov, n);
|
||||
if (len < 0) {
|
||||
/*
|
||||
* If send failed, report that 0 bytes
|
||||
* were read.
|
||||
*/
|
||||
len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Return the processed chain to the guest, reporting
|
||||
* the number of bytes that we read.
|
||||
*/
|
||||
vq_relchain(vq, idx, len);
|
||||
}
|
||||
|
||||
/* Called on TX kick. */
|
||||
@ -466,19 +551,38 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
if (opts != NULL) {
|
||||
char *devname;
|
||||
char *vtopts;
|
||||
int err;
|
||||
int err = 0;
|
||||
|
||||
/* Get the device name. */
|
||||
devname = vtopts = strdup(opts);
|
||||
(void) strsep(&vtopts, ",");
|
||||
|
||||
if (vtopts != NULL) {
|
||||
err = net_parsemac(vtopts, sc->vsc_config.mac);
|
||||
if (err != 0) {
|
||||
free(devname);
|
||||
free(sc);
|
||||
return (err);
|
||||
/*
|
||||
* Parse the list of options in the form
|
||||
* key1=value1,...,keyN=valueN.
|
||||
*/
|
||||
while (vtopts != NULL) {
|
||||
char *value = vtopts;
|
||||
char *key;
|
||||
|
||||
key = strsep(&value, "=");
|
||||
if (value == NULL)
|
||||
break;
|
||||
vtopts = value;
|
||||
(void) strsep(&vtopts, ",");
|
||||
|
||||
if (strcmp(key, "mac") == 0) {
|
||||
err = net_parsemac(value, sc->vsc_config.mac);
|
||||
if (err)
|
||||
break;
|
||||
mac_provided = 1;
|
||||
}
|
||||
mac_provided = 1;
|
||||
}
|
||||
|
||||
if (err) {
|
||||
free(devname);
|
||||
free(sc);
|
||||
return (err);
|
||||
}
|
||||
|
||||
err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback,
|
||||
@ -520,6 +624,7 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
|
||||
sc->resetting = 0;
|
||||
|
||||
sc->rx_merge = 0;
|
||||
sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
|
||||
pthread_mutex_init(&sc->rx_mtx, NULL);
|
||||
|
||||
/*
|
||||
@ -574,24 +679,25 @@ static void
|
||||
pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features)
|
||||
{
|
||||
struct pci_vtnet_softc *sc = vsc;
|
||||
unsigned int rx_vhdrlen;
|
||||
|
||||
sc->vsc_features = negotiated_features;
|
||||
|
||||
if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) {
|
||||
rx_vhdrlen = sizeof(struct virtio_net_rxhdr);
|
||||
sc->vhdrlen = sizeof(struct virtio_net_rxhdr);
|
||||
sc->rx_merge = 1;
|
||||
} else {
|
||||
/*
|
||||
* Without mergeable rx buffers, virtio-net header is 2
|
||||
* bytes shorter than sizeof(struct virtio_net_rxhdr).
|
||||
*/
|
||||
rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
|
||||
sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2;
|
||||
sc->rx_merge = 0;
|
||||
}
|
||||
|
||||
/* Tell the backend to enable some capabilities it has advertised. */
|
||||
netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen);
|
||||
netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen);
|
||||
sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be);
|
||||
assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen);
|
||||
}
|
||||
|
||||
static struct pci_devemu pci_de_vnet = {
|
||||
|
Loading…
x
Reference in New Issue
Block a user