net/netvsc: support integrated VF

Integrate accelerated networking support into netvsc PMD.
This allows netvsc to manage VF without using failsafe or vdev_netvsc.
For the exception vswitch path some tests like transmit
get a 22% increase in packets/sec.
For the VF path, the code is slightly shorter but has no
real change in performance.

Pro:
   * using netvsc is more like other DPDK NIC's
   * the exception packet uses less CPU
   * much smaller code size
   * no locking required on VF transmit/receive path
   * no legacy Linux network device to get mangled by userspace
   * much simpler (1K vs 9K) LOC
   * unified extended statistics

Con:
   * using netvsc has more complex startup model
   * no bifurcated driver support
   * no flow support (since host does not have flow API).
   * no tunnel offload support
   * no receive interrupt support

Signed-off-by: Stephen Hemminger <sthemmin@microsoft.com>
This commit is contained in:
Stephen Hemminger 2018-08-30 15:35:12 -07:00 committed by Ferruh Yigit
parent f6ddcf80ad
commit dc7680e859
12 changed files with 798 additions and 83 deletions

View File

@ -34,14 +34,10 @@ In this release, the hyper PMD driver provides the basic functionality of packet
* The maximum number of queues is limited by the host (currently 64).
When used with 4.16 kernel only a single queue is available.
.. note::
This driver is intended for use with **Hyper-V only** and is
not recommended for use on Azure because accelerated Networking
(SR-IOV) is not supported.
On Azure, use the :doc:`vdev_netvsc` which
automatically configures the necessary TAP and failsave drivers.
* This driver supports SR-IOV network acceleration.
If SR-IOV is enabled then the driver will transparently manage the interface,
and send and receive packets using the VF path.
The VDEV_NETVSC and FAILSAFE drivers are *not* used when using netvsc PMD.
Installation
------------

View File

@ -61,6 +61,12 @@ New Features
* Match items: destination MAC address.
* Action items: push/pop/rewrite vlan header.
* **Added support for SR-IOV in netvsc PMD.**
The ``netvsc`` poll mode driver now supports the Accelerated Networking
SR-IOV option in Hyper-V and Azure. This is an alternative to the previous
vdev_netvsc, tap, and failsafe drivers combination.
API Changes
-----------
@ -135,7 +141,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_bus_fslmc.so.1
librte_bus_pci.so.1
librte_bus_vdev.so.1
librte_bus_vmbus.so.1
+ librte_bus_vmbus.so.1
librte_cfgfile.so.2
librte_cmdline.so.2
librte_common_octeontx.so.1
@ -172,6 +178,7 @@ The libraries prepended with a plus sign were incremented in this version.
librte_pmd_ring.so.2
librte_pmd_softnic.so.1
librte_pmd_vhost.so.2
+ librte_pmd_netvsc.so.1
librte_port.so.3
librte_power.so.1
librte_rawdev.so.1
@ -198,6 +205,10 @@ Known Issues
Also, make sure to start the actual text at the margin.
=========================================================
* When using SR-IOV (VF) support with netvsc PMD and the Mellanox mlx5 bifurcated
driver; the Linux netvsc device must be brought up before the netvsc device is
unbound and passed to the DPDK.
Tested Platforms
----------------
@ -217,4 +228,3 @@ Tested Platforms
This section is a comment. Do not overwrite or remove it.
Also, make sure to start the actual text at the margin.
=========================================================

View File

@ -15,6 +15,7 @@ SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c
SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_vf.c
LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring
LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs

View File

@ -192,7 +192,7 @@ static int hn_parse_args(const struct rte_eth_dev *dev)
*/
int
hn_dev_link_update(struct rte_eth_dev *dev,
__rte_unused int wait_to_complete)
int wait_to_complete)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_link link, old;
@ -206,6 +206,8 @@ hn_dev_link_update(struct rte_eth_dev *dev,
hn_rndis_get_linkspeed(hv);
hn_vf_link_update(dev, wait_to_complete);
link = (struct rte_eth_link) {
.link_duplex = ETH_LINK_FULL_DUPLEX,
.link_autoneg = ETH_LINK_SPEED_FIXED,
@ -244,6 +246,7 @@ static void hn_dev_info_get(struct rte_eth_dev *dev,
dev_info->max_tx_queues = hv->max_queues;
hn_rndis_get_offload(hv, dev_info);
hn_vf_info_get(hv, dev_info);
}
static void
@ -394,7 +397,7 @@ static int hn_dev_configure(struct rte_eth_dev *dev)
}
}
return 0;
return hn_vf_configure(dev, dev_conf);
}
static int hn_dev_stats_get(struct rte_eth_dev *dev,
@ -402,6 +405,8 @@ static int hn_dev_stats_get(struct rte_eth_dev *dev,
{
unsigned int i;
hn_vf_stats_get(dev, stats);
for (i = 0; i < dev->data->nb_tx_queues; i++) {
const struct hn_tx_queue *txq = dev->data->tx_queues[i];
@ -464,18 +469,38 @@ hn_dev_stats_reset(struct rte_eth_dev *dev)
}
}
static void
hn_dev_xstats_reset(struct rte_eth_dev *dev)
{
hn_dev_stats_reset(dev);
hn_vf_xstats_reset(dev);
}
static int
hn_dev_xstats_count(struct rte_eth_dev *dev)
{
int ret, count;
count = dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings);
count += dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
ret = hn_vf_xstats_get_names(dev, NULL, 0);
if (ret < 0)
return ret;
return count + ret;
}
static int
hn_dev_xstats_get_names(struct rte_eth_dev *dev,
struct rte_eth_xstat_name *xstats_names,
__rte_unused unsigned int limit)
unsigned int limit)
{
unsigned int i, t, count = 0;
PMD_INIT_FUNC_TRACE();
int ret;
if (!xstats_names)
return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
+ dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
return hn_dev_xstats_count(dev);
/* Note: limit checked in rte_eth_xstats_names() */
for (i = 0; i < dev->data->nb_tx_queues; i++) {
@ -484,6 +509,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
if (!txq)
continue;
if (count >= limit)
break;
for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
snprintf(xstats_names[count++].name,
RTE_ETH_XSTATS_NAME_SIZE,
@ -496,6 +524,9 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
if (!rxq)
continue;
if (count >= limit)
break;
for (t = 0; t < RTE_DIM(hn_stat_strings); t++)
snprintf(xstats_names[count++].name,
RTE_ETH_XSTATS_NAME_SIZE,
@ -503,7 +534,12 @@ hn_dev_xstats_get_names(struct rte_eth_dev *dev,
hn_stat_strings[t].name);
}
return count;
ret = hn_vf_xstats_get_names(dev, xstats_names + count,
limit - count);
if (ret < 0)
return ret;
return count + ret;
}
static int
@ -512,11 +548,9 @@ hn_dev_xstats_get(struct rte_eth_dev *dev,
unsigned int n)
{
unsigned int i, t, count = 0;
const unsigned int nstats =
dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings)
+ dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings);
const unsigned int nstats = hn_dev_xstats_count(dev);
const char *stats;
int ret;
PMD_INIT_FUNC_TRACE();
@ -547,20 +581,33 @@ hn_dev_xstats_get(struct rte_eth_dev *dev,
(stats + hn_stat_strings[t].offset);
}
return count;
ret = hn_vf_xstats_get(dev, xstats + count, n - count);
if (ret < 0)
return ret;
return count + ret;
}
static int
hn_dev_start(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
int error;
PMD_INIT_FUNC_TRACE();
return hn_rndis_set_rxfilter(hv,
NDIS_PACKET_TYPE_BROADCAST |
NDIS_PACKET_TYPE_ALL_MULTICAST |
NDIS_PACKET_TYPE_DIRECTED);
error = hn_rndis_set_rxfilter(hv,
NDIS_PACKET_TYPE_BROADCAST |
NDIS_PACKET_TYPE_ALL_MULTICAST |
NDIS_PACKET_TYPE_DIRECTED);
if (error)
return error;
error = hn_vf_start(dev);
if (error)
hn_rndis_set_rxfilter(hv, 0);
return error;
}
static void
@ -571,12 +618,15 @@ hn_dev_stop(struct rte_eth_dev *dev)
PMD_INIT_FUNC_TRACE();
hn_rndis_set_rxfilter(hv, 0);
hn_vf_stop(dev);
}
static void
hn_dev_close(struct rte_eth_dev *dev __rte_unused)
{
PMD_INIT_LOG(DEBUG, "close");
hn_vf_close(dev);
}
static const struct eth_dev_ops hn_eth_dev_ops = {
@ -585,8 +635,7 @@ static const struct eth_dev_ops hn_eth_dev_ops = {
.dev_stop = hn_dev_stop,
.dev_close = hn_dev_close,
.dev_infos_get = hn_dev_info_get,
.txq_info_get = hn_dev_tx_queue_info,
.rxq_info_get = hn_dev_rx_queue_info,
.dev_supported_ptypes_get = hn_vf_supported_ptypes,
.promiscuous_enable = hn_dev_promiscuous_enable,
.promiscuous_disable = hn_dev_promiscuous_disable,
.allmulticast_enable = hn_dev_allmulticast_enable,
@ -598,10 +647,10 @@ static const struct eth_dev_ops hn_eth_dev_ops = {
.rx_queue_release = hn_dev_rx_queue_release,
.link_update = hn_dev_link_update,
.stats_get = hn_dev_stats_get,
.stats_reset = hn_dev_stats_reset,
.xstats_get = hn_dev_xstats_get,
.xstats_get_names = hn_dev_xstats_get_names,
.stats_reset = hn_dev_stats_reset,
.xstats_reset = hn_dev_stats_reset,
.xstats_reset = hn_dev_xstats_reset,
};
/*
@ -679,6 +728,14 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
if (err)
return err;
strlcpy(hv->owner.name, eth_dev->device->name,
RTE_ETH_MAX_OWNER_NAME_LEN);
err = rte_eth_dev_owner_new(&hv->owner.id);
if (err) {
PMD_INIT_LOG(ERR, "Can not get owner id");
return err;
}
/* Initialize primary channel input for control operations */
err = rte_vmbus_chan_open(vmbus, &hv->channels[0]);
if (err)
@ -714,6 +771,15 @@ eth_hn_dev_init(struct rte_eth_dev *eth_dev)
hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan);
/* If VF was reported but not added, do it now */
if (hv->vf_present && !hv->vf_dev) {
PMD_INIT_LOG(DEBUG, "Adding VF device");
err = hn_vf_add(eth_dev, hv);
if (err)
goto failed;
}
return 0;
failed:
@ -743,6 +809,7 @@ eth_hn_dev_uninit(struct rte_eth_dev *eth_dev)
hn_detach(hv);
rte_vmbus_chan_close(hv->primary->chan);
rte_free(hv->primary);
rte_eth_dev_owner_delete(hv->owner.id);
eth_dev->data->mac_addrs = NULL;

View File

@ -532,10 +532,19 @@ void
hn_nvs_set_datapath(struct hn_data *hv, uint32_t path)
{
struct hn_nvs_datapath dp;
int error;
PMD_DRV_LOG(DEBUG, "set datapath %s",
path ? "VF" : "Synthetic");
memset(&dp, 0, sizeof(dp));
dp.type = NVS_TYPE_SET_DATAPATH;
dp.active_path = path;
hn_nvs_req_send(hv, &dp, sizeof(dp));
error = hn_nvs_req_send(hv, &dp, sizeof(dp));
if (error) {
PMD_DRV_LOG(ERR,
"send set datapath failed: %d",
error);
}
}

View File

@ -105,6 +105,12 @@ struct hn_nvs_ndis_init {
uint8_t rsvd[28];
} __rte_packed;
struct hn_nvs_vf_association {
uint32_t type; /* NVS_TYPE_VFASSOC_NOTE */
uint32_t allocated;
uint32_t serial;
} __rte_packed;
#define NVS_DATAPATH_SYNTHETIC 0
#define NVS_DATAPATH_VF 1
@ -207,6 +213,9 @@ void hn_nvs_detach(struct hn_data *hv);
void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid);
int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch);
void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path);
void hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
const struct vmbus_chanpkt_hdr *hdr,
const void *data);
static inline int
hn_nvs_send(struct vmbus_channel *chan, uint16_t flags,

View File

@ -913,6 +913,37 @@ int hn_rndis_get_offload(struct hn_data *hv,
return 0;
}
uint32_t
hn_rndis_get_ptypes(struct hn_data *hv)
{
struct ndis_offload hwcaps;
uint32_t ptypes;
int error;
memset(&hwcaps, 0, sizeof(hwcaps));
error = hn_rndis_query_hwcaps(hv, &hwcaps);
if (error) {
PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error);
return RTE_PTYPE_L2_ETHER;
}
ptypes = RTE_PTYPE_L2_ETHER;
if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4)
ptypes |= RTE_PTYPE_L3_IPV4;
if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) ||
(hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6))
ptypes |= RTE_PTYPE_L4_TCP;
if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) ||
(hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6))
ptypes |= RTE_PTYPE_L4_UDP;
return ptypes;
}
int
hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter)
{

View File

@ -24,6 +24,7 @@ int hn_rndis_query_rsscaps(struct hn_data *hv,
unsigned int *rxr_cnt0);
int hn_rndis_conf_rss(struct hn_data *hv,
const struct rte_eth_rss_conf *rss_conf);
uint32_t hn_rndis_get_ptypes(struct hn_data *hv);
#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP
void hn_rndis_dump(const void *buf);

View File

@ -217,6 +217,7 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
struct hn_data *hv = dev->data->dev_private;
struct hn_tx_queue *txq;
uint32_t tx_free_thresh;
int err;
PMD_INIT_FUNC_TRACE();
@ -246,8 +247,14 @@ hn_dev_tx_queue_setup(struct rte_eth_dev *dev,
hn_reset_txagg(txq);
dev->data->tx_queues[queue_idx] = txq;
err = hn_vf_tx_queue_setup(dev, queue_idx, nb_desc,
socket_id, tx_conf);
if (err) {
rte_free(txq);
return err;
}
dev->data->tx_queues[queue_idx] = txq;
return 0;
}
@ -270,17 +277,6 @@ hn_dev_tx_queue_release(void *arg)
rte_free(txq);
}
void
hn_dev_tx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
struct rte_eth_txq_info *qinfo)
{
struct hn_data *hv = dev->data->dev_private;
struct hn_tx_queue *txq = dev->data->rx_queues[queue_idx];
qinfo->conf.tx_free_thresh = txq->free_thresh;
qinfo->nb_desc = hv->tx_pool->size;
}
static void
hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id,
unsigned long xactid, const struct hn_nvs_rndis_ack *ack)
@ -713,6 +709,35 @@ hn_nvs_handle_rxbuf(struct rte_eth_dev *dev,
hn_rx_buf_release(rxb);
}
/*
* Called when NVS inband events are received.
* Send up a two part message with port_id and the NVS message
* to the pipe to the netvsc-vf-event control thread.
*/
static void hn_nvs_handle_notify(struct rte_eth_dev *dev,
const struct vmbus_chanpkt_hdr *pkt,
const void *data)
{
const struct hn_nvs_hdr *hdr = data;
switch (hdr->type) {
case NVS_TYPE_TXTBL_NOTE:
/* Transmit indirection table has locking problems
* in DPDK and therefore not implemented
*/
PMD_DRV_LOG(DEBUG, "host notify of transmit indirection table");
break;
case NVS_TYPE_VFASSOC_NOTE:
hn_nvs_handle_vfassoc(dev, pkt, data);
break;
default:
PMD_DRV_LOG(INFO,
"got notify, nvs type %u", hdr->type);
}
}
struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv,
uint16_t queue_id,
unsigned int socket_id)
@ -744,13 +769,14 @@ int
hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf __rte_unused,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
struct hn_data *hv = dev->data->dev_private;
char ring_name[RTE_RING_NAMESIZE];
struct hn_rx_queue *rxq;
unsigned int count;
int error = -ENOMEM;
PMD_INIT_FUNC_TRACE();
@ -780,6 +806,11 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
if (!rxq->rx_ring)
goto fail;
error = hn_vf_rx_queue_setup(dev, queue_idx, nb_desc,
socket_id, rx_conf, mp);
if (error)
goto fail;
dev->data->rx_queues[queue_idx] = rxq;
return 0;
@ -787,7 +818,7 @@ hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
rte_ring_free(rxq->rx_ring);
rte_free(rxq->event_buf);
rte_free(rxq);
return -ENOMEM;
return error;
}
void
@ -804,6 +835,9 @@ hn_dev_rx_queue_release(void *arg)
rxq->rx_ring = NULL;
rxq->mb_pool = NULL;
hn_vf_rx_queue_release(rxq->hv, rxq->queue_id);
/* Keep primary queue to allow for control operations */
if (rxq != rxq->hv->primary) {
rte_free(rxq->event_buf);
rte_free(rxq);
@ -818,32 +852,6 @@ hn_dev_tx_done_cleanup(void *arg, uint32_t free_cnt)
return hn_process_events(txq->hv, txq->queue_id, free_cnt);
}
void
hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
struct rte_eth_rxq_info *qinfo)
{
struct hn_rx_queue *rxq = dev->data->rx_queues[queue_idx];
qinfo->mp = rxq->mb_pool;
qinfo->scattered_rx = 1;
qinfo->nb_desc = rte_ring_get_capacity(rxq->rx_ring);
}
static void
hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr,
const void *data)
{
const struct hn_nvs_hdr *hdr = data;
if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) {
PMD_DRV_LOG(ERR, "invalid nvs notify");
return;
}
PMD_DRV_LOG(INFO,
"got notify, nvs type %u", hdr->type);
}
/*
* Process pending events on the channel.
* Called from both Rx queue poll and Tx cleanup
@ -916,7 +924,7 @@ uint32_t hn_process_events(struct hn_data *hv, uint16_t queue_id,
break;
case VMBUS_CHANPKT_TYPE_INBAND:
hn_nvs_handle_notify(pkt, data);
hn_nvs_handle_notify(dev, pkt, data);
break;
default:
@ -1275,7 +1283,9 @@ uint16_t
hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
{
struct hn_tx_queue *txq = ptxq;
uint16_t queue_id = txq->queue_id;
struct hn_data *hv = txq->hv;
struct rte_eth_dev *vf_dev;
bool need_sig = false;
uint16_t nb_tx;
int ret;
@ -1283,6 +1293,15 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
if (unlikely(hv->closed))
return 0;
/* Transmit over VF if present and up */
vf_dev = hv->vf_dev;
rte_compiler_barrier();
if (vf_dev && vf_dev->data->dev_started) {
void *sub_q = vf_dev->data->tx_queues[queue_id];
return (*vf_dev->tx_pkt_burst)(sub_q, tx_pkts, nb_pkts);
}
if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh)
hn_process_events(hv, txq->queue_id, 0);
@ -1304,7 +1323,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
if (unlikely(!pkt))
break;
hn_encap(pkt, txq->queue_id, m);
hn_encap(pkt, queue_id, m);
hn_append_to_chim(txq, pkt, m);
rte_pktmbuf_free(m);
@ -1331,7 +1350,7 @@ hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
txd->data_size += m->pkt_len;
++txd->packets;
hn_encap(pkt, txq->queue_id, m);
hn_encap(pkt, queue_id, m);
ret = hn_xmit_sg(txq, txd, m, &need_sig);
if (unlikely(ret != 0)) {
@ -1360,15 +1379,36 @@ hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts)
{
struct hn_rx_queue *rxq = prxq;
struct hn_data *hv = rxq->hv;
struct rte_eth_dev *vf_dev;
uint16_t nb_rcv;
if (unlikely(hv->closed))
return 0;
/* If ring is empty then process more */
if (rte_ring_count(rxq->rx_ring) < nb_pkts)
vf_dev = hv->vf_dev;
rte_compiler_barrier();
if (vf_dev && vf_dev->data->dev_started) {
/* Normally, with SR-IOV the ring buffer will be empty */
hn_process_events(hv, rxq->queue_id, 0);
/* Get mbufs off staging ring */
return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts,
nb_pkts, NULL);
/* Get mbufs some bufs off of staging ring */
nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
(void **)rx_pkts,
nb_pkts / 2, NULL);
/* And rest off of VF */
nb_rcv += rte_eth_rx_burst(vf_dev->data->port_id,
rxq->queue_id,
rx_pkts + nb_rcv, nb_pkts - nb_rcv);
} else {
/* If receive ring is not full then get more */
if (rte_ring_count(rxq->rx_ring) < nb_pkts)
hn_process_events(hv, rxq->queue_id, 0);
nb_rcv = rte_ring_sc_dequeue_burst(rxq->rx_ring,
(void **)rx_pkts,
nb_pkts, NULL);
}
return nb_rcv;
}

View File

@ -94,8 +94,11 @@ struct hn_rx_bufinfo {
struct hn_data {
struct rte_vmbus_device *vmbus;
struct hn_rx_queue *primary;
struct rte_eth_dev *vf_dev; /* Subordinate device */
rte_spinlock_t vf_lock;
uint16_t port_id;
bool closed;
bool vf_present;
uint32_t link_status;
uint32_t link_speed;
@ -124,6 +127,10 @@ struct hn_data {
uint8_t rndis_resp[256];
struct ether_addr mac_addr;
struct rte_eth_dev_owner owner;
struct rte_intr_handle vf_intr;
struct vmbus_channel *channels[HN_MAX_CHANNELS];
};
@ -160,5 +167,37 @@ int hn_dev_rx_queue_setup(struct rte_eth_dev *dev,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp);
void hn_dev_rx_queue_release(void *arg);
void hn_dev_rx_queue_info(struct rte_eth_dev *dev, uint16_t queue_idx,
struct rte_eth_rxq_info *qinfo);
void hn_vf_info_get(struct hn_data *hv,
struct rte_eth_dev_info *info);
int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv);
int hn_vf_configure(struct rte_eth_dev *dev,
const struct rte_eth_conf *dev_conf);
const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev);
int hn_vf_start(struct rte_eth_dev *dev);
void hn_vf_reset(struct rte_eth_dev *dev);
void hn_vf_stop(struct rte_eth_dev *dev);
void hn_vf_close(struct rte_eth_dev *dev);
int hn_vf_link_update(struct rte_eth_dev *dev,
int wait_to_complete);
int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_txconf *tx_conf);
void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id);
int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp);
void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id);
int hn_vf_stats_get(struct rte_eth_dev *dev, struct rte_eth_stats *stats);
void hn_vf_stats_reset(struct rte_eth_dev *dev);
int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
struct rte_eth_xstat_name *xstats_names,
unsigned int size);
int hn_vf_xstats_get(struct rte_eth_dev *dev,
struct rte_eth_xstat *xstats,
unsigned int n);
void hn_vf_xstats_reset(struct rte_eth_dev *dev);

512
drivers/net/netvsc/hn_vf.c Normal file
View File

@ -0,0 +1,512 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2018 Microsoft Corp.
* All rights reserved.
*/
#include <stdio.h>
#include <stdint.h>
#include <string.h>
#include <stdbool.h>
#include <errno.h>
#include <unistd.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/fcntl.h>
#include <sys/uio.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_ethdev_driver.h>
#include <rte_lcore.h>
#include <rte_memory.h>
#include <rte_bus_vmbus.h>
#include <rte_pci.h>
#include <rte_bus_pci.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include "hn_logs.h"
#include "hn_var.h"
#include "hn_nvs.h"
/* Search for VF with matching MAC address, return port id */
static int hn_vf_match(const struct rte_eth_dev *dev)
{
const struct ether_addr *mac = dev->data->mac_addrs;
char buf[32];
int i;
ether_format_addr(buf, sizeof(buf), mac);
RTE_ETH_FOREACH_DEV(i) {
const struct rte_eth_dev *vf_dev = &rte_eth_devices[i];
const struct ether_addr *vf_mac = vf_dev->data->mac_addrs;
if (vf_dev == dev)
continue;
ether_format_addr(buf, sizeof(buf), vf_mac);
if (is_same_ether_addr(mac, vf_mac))
return i;
}
return -ENOENT;
}
/*
* Attach new PCI VF device and return the port_id
*/
static int hn_vf_attach(struct hn_data *hv, uint16_t port_id,
struct rte_eth_dev **vf_dev)
{
struct rte_eth_dev_owner owner = { .id = RTE_ETH_DEV_NO_OWNER };
int ret;
ret = rte_eth_dev_owner_get(port_id, &owner);
if (ret < 0) {
PMD_DRV_LOG(ERR, "Can not find owner for port %d", port_id);
return ret;
}
if (owner.id != RTE_ETH_DEV_NO_OWNER) {
PMD_DRV_LOG(ERR, "Port %u already owned by other device %s",
port_id, owner.name);
return -EBUSY;
}
ret = rte_eth_dev_owner_set(port_id, &hv->owner);
if (ret < 0) {
PMD_DRV_LOG(ERR, "Can set owner for port %d", port_id);
return ret;
}
PMD_DRV_LOG(DEBUG, "Attach VF device %u", port_id);
rte_smp_wmb();
*vf_dev = &rte_eth_devices[port_id];
return 0;
}
/* Add new VF device to synthetic device */
int hn_vf_add(struct rte_eth_dev *dev, struct hn_data *hv)
{
int port, err;
port = hn_vf_match(dev);
if (port < 0) {
PMD_DRV_LOG(NOTICE, "No matching MAC found");
return port;
}
rte_spinlock_lock(&hv->vf_lock);
if (hv->vf_dev) {
PMD_DRV_LOG(ERR, "VF already attached");
err = -EBUSY;
} else {
err = hn_vf_attach(hv, port, &hv->vf_dev);
}
if (err == 0) {
dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
hv->vf_intr = (struct rte_intr_handle) {
.fd = -1,
.type = RTE_INTR_HANDLE_EXT,
};
dev->intr_handle = &hv->vf_intr;
hn_nvs_set_datapath(hv, NVS_DATAPATH_VF);
}
rte_spinlock_unlock(&hv->vf_lock);
return err;
}
/* Remove new VF device */
static void hn_vf_remove(struct hn_data *hv)
{
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (!vf_dev) {
PMD_DRV_LOG(ERR, "VF path not active");
rte_spinlock_unlock(&hv->vf_lock);
return;
}
/* Stop incoming packets from arriving on VF */
hn_nvs_set_datapath(hv, NVS_DATAPATH_SYNTHETIC);
hv->vf_dev = NULL;
/* Give back ownership */
rte_eth_dev_owner_unset(vf_dev->data->port_id, hv->owner.id);
rte_spinlock_unlock(&hv->vf_lock);
}
/* Handle VF association message from host */
void
hn_nvs_handle_vfassoc(struct rte_eth_dev *dev,
const struct vmbus_chanpkt_hdr *hdr,
const void *data)
{
struct hn_data *hv = dev->data->dev_private;
const struct hn_nvs_vf_association *vf_assoc = data;
if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*vf_assoc))) {
PMD_DRV_LOG(ERR, "invalid vf association NVS");
return;
}
PMD_DRV_LOG(DEBUG, "VF serial %u %s port %u",
vf_assoc->serial,
vf_assoc->allocated ? "add to" : "remove from",
dev->data->port_id);
hv->vf_present = vf_assoc->allocated;
if (dev->state != RTE_ETH_DEV_ATTACHED)
return;
if (vf_assoc->allocated)
hn_vf_add(dev, hv);
else
hn_vf_remove(hv);
}
/*
* Merge the info from the VF and synthetic path.
* use the default config of the VF
* and the minimum number of queues and buffer sizes.
*/
static void hn_vf_info_merge(struct rte_eth_dev *vf_dev,
struct rte_eth_dev_info *info)
{
struct rte_eth_dev_info vf_info;
rte_eth_dev_info_get(vf_dev->data->port_id, &vf_info);
info->speed_capa = vf_info.speed_capa;
info->default_rxportconf = vf_info.default_rxportconf;
info->default_txportconf = vf_info.default_txportconf;
info->max_rx_queues = RTE_MIN(vf_info.max_rx_queues,
info->max_rx_queues);
info->rx_offload_capa &= vf_info.rx_offload_capa;
info->rx_queue_offload_capa &= vf_info.rx_queue_offload_capa;
info->flow_type_rss_offloads &= vf_info.flow_type_rss_offloads;
info->max_tx_queues = RTE_MIN(vf_info.max_tx_queues,
info->max_tx_queues);
info->tx_offload_capa &= vf_info.tx_offload_capa;
info->tx_queue_offload_capa &= vf_info.tx_queue_offload_capa;
info->min_rx_bufsize = RTE_MAX(vf_info.min_rx_bufsize,
info->min_rx_bufsize);
info->max_rx_pktlen = RTE_MAX(vf_info.max_rx_pktlen,
info->max_rx_pktlen);
}
void hn_vf_info_get(struct hn_data *hv, struct rte_eth_dev_info *info)
{
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
hn_vf_info_merge(vf_dev, info);
rte_spinlock_unlock(&hv->vf_lock);
}
int hn_vf_link_update(struct rte_eth_dev *dev,
int wait_to_complete)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->link_update)
ret = (*vf_dev->dev_ops->link_update)(dev, wait_to_complete);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
/* called when VF has link state interrupts enabled */
static int hn_vf_lsc_event(uint16_t port_id __rte_unused,
enum rte_eth_event_type event,
void *cb_arg, void *out __rte_unused)
{
struct rte_eth_dev *dev = cb_arg;
if (event != RTE_ETH_EVENT_INTR_LSC)
return 0;
/* if link state has changed pass on */
if (hn_dev_link_update(dev, 0) == 0)
return 0; /* no change */
return _rte_eth_dev_callback_process(dev,
RTE_ETH_EVENT_INTR_LSC,
NULL);
}
static int _hn_vf_configure(struct rte_eth_dev *dev,
struct rte_eth_dev *vf_dev,
const struct rte_eth_conf *dev_conf)
{
struct rte_eth_conf vf_conf = *dev_conf;
uint16_t vf_port = vf_dev->data->port_id;
int ret;
if (dev_conf->intr_conf.lsc &&
(vf_dev->data->dev_flags & RTE_ETH_DEV_INTR_LSC)) {
PMD_DRV_LOG(DEBUG, "enabling LSC for VF %u",
vf_port);
vf_conf.intr_conf.lsc = 1;
} else {
PMD_DRV_LOG(DEBUG, "disabling LSC for VF %u",
vf_port);
vf_conf.intr_conf.lsc = 0;
}
ret = rte_eth_dev_configure(vf_port,
dev->data->nb_rx_queues,
dev->data->nb_tx_queues,
&vf_conf);
if (ret) {
PMD_DRV_LOG(ERR,
"VF configuration failed: %d", ret);
} else if (vf_conf.intr_conf.lsc) {
ret = rte_eth_dev_callback_register(vf_port,
RTE_ETH_DEV_INTR_LSC,
hn_vf_lsc_event, dev);
if (ret)
PMD_DRV_LOG(ERR,
"Failed to register LSC callback for VF %u",
vf_port);
}
return ret;
}
/*
* Configure VF if present.
* Force VF to have same number of queues as synthetic device
*/
int hn_vf_configure(struct rte_eth_dev *dev,
const struct rte_eth_conf *dev_conf)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
ret = _hn_vf_configure(dev, vf_dev, dev_conf);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
const uint32_t *hn_vf_supported_ptypes(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
const uint32_t *ptypes = NULL;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->dev_supported_ptypes_get)
ptypes = (*vf_dev->dev_ops->dev_supported_ptypes_get)(vf_dev);
rte_spinlock_unlock(&hv->vf_lock);
return ptypes;
}
int hn_vf_start(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
ret = rte_eth_dev_start(vf_dev->data->port_id);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
void hn_vf_stop(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
rte_eth_dev_stop(vf_dev->data->port_id);
rte_spinlock_unlock(&hv->vf_lock);
}
/* If VF is present, then cascade configuration down */
#define VF_ETHDEV_FUNC(dev, func) \
{ \
struct hn_data *hv = (dev)->data->dev_private; \
struct rte_eth_dev *vf_dev; \
rte_spinlock_lock(&hv->vf_lock); \
vf_dev = hv->vf_dev; \
if (vf_dev) \
func(vf_dev->data->port_id); \
rte_spinlock_unlock(&hv->vf_lock); \
}
void hn_vf_reset(struct rte_eth_dev *dev)
{
VF_ETHDEV_FUNC(dev, rte_eth_dev_reset);
}
void hn_vf_close(struct rte_eth_dev *dev)
{
VF_ETHDEV_FUNC(dev, rte_eth_dev_close);
}
void hn_vf_stats_reset(struct rte_eth_dev *dev)
{
VF_ETHDEV_FUNC(dev, rte_eth_stats_reset);
}
int hn_vf_tx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_txconf *tx_conf)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
ret = rte_eth_tx_queue_setup(vf_dev->data->port_id,
queue_idx, nb_desc,
socket_id, tx_conf);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
void hn_vf_tx_queue_release(struct hn_data *hv, uint16_t queue_id)
{
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->tx_queue_release) {
void *subq = vf_dev->data->tx_queues[queue_id];
(*vf_dev->dev_ops->tx_queue_release)(subq);
}
rte_spinlock_unlock(&hv->vf_lock);
}
int hn_vf_rx_queue_setup(struct rte_eth_dev *dev,
uint16_t queue_idx, uint16_t nb_desc,
unsigned int socket_id,
const struct rte_eth_rxconf *rx_conf,
struct rte_mempool *mp)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
ret = rte_eth_rx_queue_setup(vf_dev->data->port_id,
queue_idx, nb_desc,
socket_id, rx_conf, mp);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
void hn_vf_rx_queue_release(struct hn_data *hv, uint16_t queue_id)
{
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->rx_queue_release) {
void *subq = vf_dev->data->rx_queues[queue_id];
(*vf_dev->dev_ops->rx_queue_release)(subq);
}
rte_spinlock_unlock(&hv->vf_lock);
}
int hn_vf_stats_get(struct rte_eth_dev *dev,
struct rte_eth_stats *stats)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int ret = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev)
ret = rte_eth_stats_get(vf_dev->data->port_id, stats);
rte_spinlock_unlock(&hv->vf_lock);
return ret;
}
int hn_vf_xstats_get_names(struct rte_eth_dev *dev,
struct rte_eth_xstat_name *names,
unsigned int n)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int i, count = 0;
char tmp[RTE_ETH_XSTATS_NAME_SIZE];
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->xstats_get_names)
count = vf_dev->dev_ops->xstats_get_names(vf_dev, names, n);
rte_spinlock_unlock(&hv->vf_lock);
/* add vf_ prefix to xstat names */
if (names) {
for (i = 0; i < count; i++) {
snprintf(tmp, sizeof(tmp), "vf_%s", names[i].name);
strlcpy(names[i].name, tmp, sizeof(names[i].name));
}
}
return count;
}
int hn_vf_xstats_get(struct rte_eth_dev *dev,
struct rte_eth_xstat *xstats,
unsigned int n)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
int count = 0;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->xstats_get)
count = vf_dev->dev_ops->xstats_get(vf_dev, xstats, n);
rte_spinlock_unlock(&hv->vf_lock);
return count;
}
void hn_vf_xstats_reset(struct rte_eth_dev *dev)
{
struct hn_data *hv = dev->data->dev_private;
struct rte_eth_dev *vf_dev;
rte_spinlock_lock(&hv->vf_lock);
vf_dev = hv->vf_dev;
if (vf_dev && vf_dev->dev_ops->xstats_reset)
vf_dev->dev_ops->xstats_reset(vf_dev);
rte_spinlock_unlock(&hv->vf_lock);
}

View File

@ -3,7 +3,7 @@
build = dpdk_conf.has('RTE_LIBRTE_VMBUS_BUS')
version = 2
sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c')
sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c', 'hn_vf.c')
deps += ['bus_vmbus' ]