From 4e9c73e96e834dbfaa51ada48ad11e7873556808 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Fri, 13 Jul 2018 10:06:43 -0700 Subject: [PATCH] net/netvsc: add Hyper-V network device The driver supports Hyper-V networking directly like virtio for KVM or vmxnet3 for VMware. This code is based off of the FreeBSD driver. The file and variable names are kept the same to help with understanding (with most of the BSD style warts removed). This version supports the latest NetVSP 6.1 version and older versions. Signed-off-by: Haiyang Zhang Signed-off-by: Stephen Hemminger --- MAINTAINERS | 6 + config/common_base | 9 + config/common_linuxapp | 3 + drivers/bus/vmbus/rte_bus_vmbus_version.map | 1 - drivers/net/Makefile | 1 + drivers/net/meson.build | 1 + drivers/net/netvsc/Makefile | 23 + drivers/net/netvsc/hn_ethdev.c | 759 ++++++++++ drivers/net/netvsc/hn_logs.h | 36 + drivers/net/netvsc/hn_nvs.c | 546 +++++++ drivers/net/netvsc/hn_nvs.h | 229 +++ drivers/net/netvsc/hn_rndis.c | 1099 ++++++++++++++ drivers/net/netvsc/hn_rndis.h | 32 + drivers/net/netvsc/hn_rxtx.c | 1329 +++++++++++++++++ drivers/net/netvsc/hn_var.h | 153 ++ drivers/net/netvsc/meson.build | 9 + drivers/net/netvsc/ndis.h | 378 +++++ drivers/net/netvsc/rndis.h | 414 +++++ drivers/net/netvsc/rte_pmd_netvsc_version.map | 5 + mk/rte.app.mk | 1 + 20 files changed, 5033 insertions(+), 1 deletion(-) create mode 100644 drivers/net/netvsc/Makefile create mode 100644 drivers/net/netvsc/hn_ethdev.c create mode 100644 drivers/net/netvsc/hn_logs.h create mode 100644 drivers/net/netvsc/hn_nvs.c create mode 100644 drivers/net/netvsc/hn_nvs.h create mode 100644 drivers/net/netvsc/hn_rndis.c create mode 100644 drivers/net/netvsc/hn_rndis.h create mode 100644 drivers/net/netvsc/hn_rxtx.c create mode 100644 drivers/net/netvsc/hn_var.h create mode 100644 drivers/net/netvsc/meson.build create mode 100644 drivers/net/netvsc/ndis.h create mode 100644 drivers/net/netvsc/rndis.h create mode 100644 drivers/net/netvsc/rte_pmd_netvsc_version.map diff --git a/MAINTAINERS b/MAINTAINERS index 61d27a329f..0f3bc74002 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -607,6 +607,12 @@ F: drivers/net/vdev_netvsc/ F: doc/guides/nics/vdev_netvsc.rst F: doc/guides/nics/features/vdev_netvsc.ini +Microsoft Hyper-V netvsc - EXPERIMENTAL +M: Stephen Hemminger +M: K. Y. Srinivasan +M: Haiyang Zhang +F: drivers/net/netvsc/ + Netcope szedata2 M: Matej Vido F: drivers/net/szedata2/ diff --git a/config/common_base b/config/common_base index 8f8190a659..201cdf698f 100644 --- a/config/common_base +++ b/config/common_base @@ -404,6 +404,15 @@ CONFIG_RTE_LIBRTE_MVPP2_PMD=n # CONFIG_RTE_LIBRTE_VMBUS=n +# +# Compile native PMD for Hyper-V/Azure +# +CONFIG_RTE_LIBRTE_NETVSC_PMD=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_RX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_TX=n +CONFIG_RTE_LIBRTE_NETVSC_DEBUG_DUMP=n + +# # Compile virtual device driver for NetVSC on Hyper-V/Azure # CONFIG_RTE_LIBRTE_VDEV_NETVSC_PMD=n diff --git a/config/common_linuxapp b/config/common_linuxapp index 37e8f6958c..9c5ea9d899 100644 --- a/config/common_linuxapp +++ b/config/common_linuxapp @@ -26,6 +26,9 @@ CONFIG_RTE_LIBRTE_POWER=y CONFIG_RTE_VIRTIO_USER=y CONFIG_RTE_PROC_INFO=y +CONFIG_RTE_LIBRTE_VMBUS=y +CONFIG_RTE_LIBRTE_NETVSC_PMD=y + # NXP DPAA BUS and drivers CONFIG_RTE_LIBRTE_DPAA_BUS=y CONFIG_RTE_LIBRTE_DPAA_MEMPOOL=y diff --git a/drivers/bus/vmbus/rte_bus_vmbus_version.map b/drivers/bus/vmbus/rte_bus_vmbus_version.map index aa6264530b..5324fef466 100644 --- a/drivers/bus/vmbus/rte_bus_vmbus_version.map +++ b/drivers/bus/vmbus/rte_bus_vmbus_version.map @@ -17,7 +17,6 @@ DPDK_18.08 { rte_vmbus_map_device; rte_vmbus_max_channels; rte_vmbus_probe; - rte_vmbus_probe_one; rte_vmbus_register; rte_vmbus_scan; rte_vmbus_sub_channel_index; diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 1ae0eaffbd..664398de98 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -33,6 +33,7 @@ DIRS-$(CONFIG_RTE_LIBRTE_LIO_PMD) += liquidio DIRS-$(CONFIG_RTE_LIBRTE_MLX4_PMD) += mlx4 DIRS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5 DIRS-$(CONFIG_RTE_LIBRTE_MVPP2_PMD) += mvpp2 +DIRS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += netvsc DIRS-$(CONFIG_RTE_LIBRTE_NFP_PMD) += nfp DIRS-$(CONFIG_RTE_LIBRTE_BNXT_PMD) += bnxt DIRS-$(CONFIG_RTE_LIBRTE_PMD_NULL) += null diff --git a/drivers/net/meson.build b/drivers/net/meson.build index d19b195f8b..9c28ed4da4 100644 --- a/drivers/net/meson.build +++ b/drivers/net/meson.build @@ -19,6 +19,7 @@ drivers = ['af_packet', 'kni', 'liquidio', 'mvpp2', + 'netvsc', 'nfp', 'null', 'octeontx', 'pcap', 'ring', 'sfc', diff --git a/drivers/net/netvsc/Makefile b/drivers/net/netvsc/Makefile new file mode 100644 index 0000000000..3c713af3c8 --- /dev/null +++ b/drivers/net/netvsc/Makefile @@ -0,0 +1,23 @@ +# SPDX-License-Identifier: BSD-3-Clause + +include $(RTE_SDK)/mk/rte.vars.mk + +LIB = librte_pmd_netvsc.a + +CFLAGS += -O3 $(WERROR_FLAGS) +CFLAGS += -DALLOW_EXPERIMENTAL_API + +EXPORT_MAP := rte_pmd_netvsc_version.map + +LIBABIVER := 1 + +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_ethdev.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rxtx.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_rndis.c +SRCS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += hn_nvs.c + +LDLIBS += -lrte_eal -lrte_mbuf -lrte_mempool -lrte_ring +LDLIBS += -lrte_ethdev -lrte_net -lrte_kvargs +LDLIBS += -lrte_bus_vmbus + +include $(RTE_SDK)/mk/rte.lib.mk diff --git a/drivers/net/netvsc/hn_ethdev.c b/drivers/net/netvsc/hn_ethdev.c new file mode 100644 index 0000000000..47ed760b82 --- /dev/null +++ b/drivers/net/netvsc/hn_ethdev.c @@ -0,0 +1,759 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Microsoft Corporation + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. + * All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_rndis.h" +#include "hn_nvs.h" +#include "ndis.h" + +#define HN_TX_OFFLOAD_CAPS (DEV_TX_OFFLOAD_IPV4_CKSUM | \ + DEV_TX_OFFLOAD_TCP_CKSUM | \ + DEV_TX_OFFLOAD_UDP_CKSUM | \ + DEV_TX_OFFLOAD_TCP_TSO | \ + DEV_TX_OFFLOAD_MULTI_SEGS | \ + DEV_TX_OFFLOAD_VLAN_INSERT) + +#define HN_RX_OFFLOAD_CAPS (DEV_RX_OFFLOAD_CHECKSUM | \ + DEV_RX_OFFLOAD_VLAN_STRIP | \ + DEV_RX_OFFLOAD_CRC_STRIP) + +int hn_logtype_init; +int hn_logtype_driver; + +struct hn_xstats_name_off { + char name[RTE_ETH_XSTATS_NAME_SIZE]; + unsigned int offset; +}; + +static const struct hn_xstats_name_off hn_stat_strings[] = { + { "good_packets", offsetof(struct hn_stats, packets) }, + { "good_bytes", offsetof(struct hn_stats, bytes) }, + { "errors", offsetof(struct hn_stats, errors) }, + { "allocation_failed", offsetof(struct hn_stats, nomemory) }, + { "multicast_packets", offsetof(struct hn_stats, multicast) }, + { "broadcast_packets", offsetof(struct hn_stats, broadcast) }, + { "undersize_packets", offsetof(struct hn_stats, size_bins[0]) }, + { "size_64_packets", offsetof(struct hn_stats, size_bins[1]) }, + { "size_65_127_packets", offsetof(struct hn_stats, size_bins[2]) }, + { "size_128_255_packets", offsetof(struct hn_stats, size_bins[3]) }, + { "size_256_511_packets", offsetof(struct hn_stats, size_bins[4]) }, + { "size_512_1023_packets", offsetof(struct hn_stats, size_bins[5]) }, + { "size_1024_1518_packets", offsetof(struct hn_stats, size_bins[6]) }, + { "size_1519_max_packets", offsetof(struct hn_stats, size_bins[7]) }, +}; + +static struct rte_eth_dev * +eth_dev_vmbus_allocate(struct rte_vmbus_device *dev, size_t private_data_size) +{ + struct rte_eth_dev *eth_dev; + const char *name; + + if (!dev) + return NULL; + + name = dev->device.name; + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) { + eth_dev = rte_eth_dev_allocate(name); + if (!eth_dev) { + PMD_DRV_LOG(NOTICE, "can not allocate rte ethdev"); + return NULL; + } + + if (private_data_size) { + eth_dev->data->dev_private = + rte_zmalloc_socket(name, private_data_size, + RTE_CACHE_LINE_SIZE, dev->device.numa_node); + if (!eth_dev->data->dev_private) { + PMD_DRV_LOG(NOTICE, "can not allocate driver data"); + rte_eth_dev_release_port(eth_dev); + return NULL; + } + } + } else { + eth_dev = rte_eth_dev_attach_secondary(name); + if (!eth_dev) { + PMD_DRV_LOG(NOTICE, "can not attach secondary"); + return NULL; + } + } + + eth_dev->device = &dev->device; + eth_dev->intr_handle = &dev->intr_handle; + + return eth_dev; +} + +static void +eth_dev_vmbus_release(struct rte_eth_dev *eth_dev) +{ + /* free ether device */ + rte_eth_dev_release_port(eth_dev); + + if (rte_eal_process_type() == RTE_PROC_PRIMARY) + rte_free(eth_dev->data->dev_private); + + eth_dev->data->dev_private = NULL; + + /* + * Secondary process will check the name to attach. + * Clear this field to avoid attaching a released ports. + */ + eth_dev->data->name[0] = '\0'; + + eth_dev->device = NULL; + eth_dev->intr_handle = NULL; +} + +/* Update link status. + * Note: the DPDK definition of "wait_to_complete" + * means block this call until link is up. + * which is not worth supporting. + */ +static int +hn_dev_link_update(struct rte_eth_dev *dev, + __rte_unused int wait_to_complete) +{ + struct hn_data *hv = dev->data->dev_private; + struct rte_eth_link link, old; + int error; + + old = dev->data->dev_link; + + error = hn_rndis_get_linkstatus(hv); + if (error) + return error; + + hn_rndis_get_linkspeed(hv); + + link = (struct rte_eth_link) { + .link_duplex = ETH_LINK_FULL_DUPLEX, + .link_autoneg = ETH_LINK_SPEED_FIXED, + .link_speed = hv->link_speed / 10000, + }; + + if (hv->link_status == NDIS_MEDIA_STATE_CONNECTED) + link.link_status = ETH_LINK_UP; + else + link.link_status = ETH_LINK_DOWN; + + if (old.link_status == link.link_status) + return 0; + + PMD_INIT_LOG(DEBUG, "Port %d is %s", dev->data->port_id, + (link.link_status == ETH_LINK_UP) ? "up" : "down"); + + return rte_eth_linkstatus_set(dev, &link); +} + +static void hn_dev_info_get(struct rte_eth_dev *dev, + struct rte_eth_dev_info *dev_info) +{ + struct hn_data *hv = dev->data->dev_private; + + dev_info->speed_capa = ETH_LINK_SPEED_10G; + dev_info->min_rx_bufsize = HN_MIN_RX_BUF_SIZE; + dev_info->max_rx_pktlen = HN_MAX_XFER_LEN; + dev_info->max_mac_addrs = 1; + + dev_info->hash_key_size = NDIS_HASH_KEYSIZE_TOEPLITZ; + dev_info->flow_type_rss_offloads = + ETH_RSS_IPV4 | ETH_RSS_IPV6 | ETH_RSS_TCP | ETH_RSS_UDP; + + dev_info->max_rx_queues = hv->max_queues; + dev_info->max_tx_queues = hv->max_queues; + + hn_rndis_get_offload(hv, dev_info); +} + +static void +hn_dev_promiscuous_enable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_PROMISCUOUS); +} + +static void +hn_dev_promiscuous_disable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + uint32_t filter; + + filter = NDIS_PACKET_TYPE_DIRECTED | NDIS_PACKET_TYPE_BROADCAST; + if (dev->data->all_multicast) + filter |= NDIS_PACKET_TYPE_ALL_MULTICAST; + hn_rndis_set_rxfilter(hv, filter); +} + +static void +hn_dev_allmulticast_enable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_BROADCAST); +} + +static void +hn_dev_allmulticast_disable(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_DIRECTED | + NDIS_PACKET_TYPE_BROADCAST); +} + +/* Setup shared rx/tx queue data */ +static int hn_subchan_configure(struct hn_data *hv, + uint32_t subchan) +{ + struct vmbus_channel *primary = hn_primary_chan(hv); + int err; + unsigned int retry = 0; + + PMD_DRV_LOG(DEBUG, + "open %u subchannels", subchan); + + /* Send create sub channels command */ + err = hn_nvs_alloc_subchans(hv, &subchan); + if (err) + return err; + + while (subchan > 0) { + struct vmbus_channel *new_sc; + uint16_t chn_index; + + err = rte_vmbus_subchan_open(primary, &new_sc); + if (err == -ENOENT && ++retry < 1000) { + /* This can happen if not ready yet */ + rte_delay_ms(10); + continue; + } + + if (err) { + PMD_DRV_LOG(ERR, + "open subchannel failed: %d", err); + return err; + } + + retry = 0; + chn_index = rte_vmbus_sub_channel_index(new_sc); + if (chn_index == 0 || chn_index > hv->max_queues) { + PMD_DRV_LOG(ERR, + "Invalid subchannel offermsg channel %u", + chn_index); + return -EIO; + } + + PMD_DRV_LOG(DEBUG, "new sub channel %u", chn_index); + hv->channels[chn_index] = new_sc; + --subchan; + } + + return err; +} + +static int hn_dev_configure(struct rte_eth_dev *dev) +{ + const struct rte_eth_conf *dev_conf = &dev->data->dev_conf; + const struct rte_eth_rxmode *rxmode = &dev_conf->rxmode; + const struct rte_eth_txmode *txmode = &dev_conf->txmode; + + const struct rte_eth_rss_conf *rss_conf = + &dev_conf->rx_adv_conf.rss_conf; + struct hn_data *hv = dev->data->dev_private; + uint64_t unsupported; + int err, subchan; + + PMD_INIT_FUNC_TRACE(); + + unsupported = txmode->offloads & ~HN_TX_OFFLOAD_CAPS; + if (unsupported) { + PMD_DRV_LOG(NOTICE, + "unsupported TX offload: %#" PRIx64, + unsupported); + return -EINVAL; + } + + unsupported = rxmode->offloads & ~HN_RX_OFFLOAD_CAPS; + if (unsupported) { + PMD_DRV_LOG(NOTICE, + "unsupported RX offload: %#" PRIx64, + rxmode->offloads); + return -EINVAL; + } + + err = hn_rndis_conf_offload(hv, txmode->offloads, + rxmode->offloads); + if (err) { + PMD_DRV_LOG(NOTICE, + "offload configure failed"); + return err; + } + + hv->num_queues = RTE_MAX(dev->data->nb_rx_queues, + dev->data->nb_tx_queues); + subchan = hv->num_queues - 1; + if (subchan > 0) { + err = hn_subchan_configure(hv, subchan); + if (err) { + PMD_DRV_LOG(NOTICE, + "subchannel configuration failed"); + return err; + } + + err = hn_rndis_conf_rss(hv, rss_conf); + if (err) { + PMD_DRV_LOG(NOTICE, + "rss configuration failed"); + return err; + } + } + + return 0; +} + +static int hn_dev_stats_get(struct rte_eth_dev *dev, + struct rte_eth_stats *stats) +{ + unsigned int i; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + stats->opackets += txq->stats.packets; + stats->obytes += txq->stats.bytes; + stats->oerrors += txq->stats.errors + txq->stats.nomemory; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_opackets[i] = txq->stats.packets; + stats->q_obytes[i] = txq->stats.bytes; + } + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + stats->ipackets += rxq->stats.packets; + stats->ibytes += rxq->stats.bytes; + stats->ierrors += rxq->stats.errors; + stats->imissed += rxq->ring_full; + + if (i < RTE_ETHDEV_QUEUE_STAT_CNTRS) { + stats->q_ipackets[i] = rxq->stats.packets; + stats->q_ibytes[i] = rxq->stats.bytes; + } + } + + stats->rx_nombuf = dev->data->rx_mbuf_alloc_failed; + return 0; +} + +static void +hn_dev_stats_reset(struct rte_eth_dev *dev) +{ + unsigned int i; + + PMD_INIT_FUNC_TRACE(); + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + memset(&txq->stats, 0, sizeof(struct hn_stats)); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + memset(&rxq->stats, 0, sizeof(struct hn_stats)); + rxq->ring_full = 0; + } +} + +static int +hn_dev_xstats_get_names(struct rte_eth_dev *dev, + struct rte_eth_xstat_name *xstats_names, + __rte_unused unsigned int limit) +{ + unsigned int i, t, count = 0; + + PMD_INIT_FUNC_TRACE(); + + if (!xstats_names) + return dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings) + + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + + /* Note: limit checked in rte_eth_xstats_names() */ + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + snprintf(xstats_names[count++].name, + RTE_ETH_XSTATS_NAME_SIZE, + "tx_q%u_%s", i, hn_stat_strings[t].name); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + snprintf(xstats_names[count++].name, + RTE_ETH_XSTATS_NAME_SIZE, + "rx_q%u_%s", i, + hn_stat_strings[t].name); + } + + return count; +} + +static int +hn_dev_xstats_get(struct rte_eth_dev *dev, + struct rte_eth_xstat *xstats, + unsigned int n) +{ + unsigned int i, t, count = 0; + + const unsigned int nstats = + dev->data->nb_tx_queues * RTE_DIM(hn_stat_strings) + + dev->data->nb_rx_queues * RTE_DIM(hn_stat_strings); + const char *stats; + + PMD_INIT_FUNC_TRACE(); + + if (n < nstats) + return nstats; + + for (i = 0; i < dev->data->nb_tx_queues; i++) { + const struct hn_tx_queue *txq = dev->data->tx_queues[i]; + + if (!txq) + continue; + + stats = (const char *)&txq->stats; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + xstats[count++].value = *(const uint64_t *) + (stats + hn_stat_strings[t].offset); + } + + for (i = 0; i < dev->data->nb_rx_queues; i++) { + const struct hn_rx_queue *rxq = dev->data->rx_queues[i]; + + if (!rxq) + continue; + + stats = (const char *)&rxq->stats; + for (t = 0; t < RTE_DIM(hn_stat_strings); t++) + xstats[count++].value = *(const uint64_t *) + (stats + hn_stat_strings[t].offset); + } + + return count; +} + +static int +hn_dev_start(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + /* check if lsc interrupt feature is enabled */ + if (dev->data->dev_conf.intr_conf.lsc) { + PMD_DRV_LOG(ERR, "link status not supported yet"); + return -ENOTSUP; + } + + return hn_rndis_set_rxfilter(hv, + NDIS_PACKET_TYPE_BROADCAST | + NDIS_PACKET_TYPE_ALL_MULTICAST | + NDIS_PACKET_TYPE_DIRECTED); +} + +static void +hn_dev_stop(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + hn_rndis_set_rxfilter(hv, 0); +} + +static void +hn_dev_close(struct rte_eth_dev *dev __rte_unused) +{ + PMD_INIT_LOG(DEBUG, "close"); +} + +static const struct eth_dev_ops hn_eth_dev_ops = { + .dev_configure = hn_dev_configure, + .dev_start = hn_dev_start, + .dev_stop = hn_dev_stop, + .dev_close = hn_dev_close, + .dev_infos_get = hn_dev_info_get, + .promiscuous_enable = hn_dev_promiscuous_enable, + .promiscuous_disable = hn_dev_promiscuous_disable, + .allmulticast_enable = hn_dev_allmulticast_enable, + .allmulticast_disable = hn_dev_allmulticast_disable, + .tx_queue_setup = hn_dev_tx_queue_setup, + .tx_queue_release = hn_dev_tx_queue_release, + .rx_queue_setup = hn_dev_rx_queue_setup, + .rx_queue_release = hn_dev_rx_queue_release, + .link_update = hn_dev_link_update, + .stats_get = hn_dev_stats_get, + .xstats_get = hn_dev_xstats_get, + .xstats_get_names = hn_dev_xstats_get_names, + .stats_reset = hn_dev_stats_reset, + .xstats_reset = hn_dev_stats_reset, +}; + +/* + * Setup connection between PMD and kernel. + */ +static int +hn_attach(struct hn_data *hv, unsigned int mtu) +{ + int error; + + /* Attach NVS */ + error = hn_nvs_attach(hv, mtu); + if (error) + goto failed_nvs; + + /* Attach RNDIS */ + error = hn_rndis_attach(hv); + if (error) + goto failed_rndis; + + /* + * NOTE: + * Under certain conditions on certain versions of Hyper-V, + * the RNDIS rxfilter is _not_ zero on the hypervisor side + * after the successful RNDIS initialization. + */ + hn_rndis_set_rxfilter(hv, NDIS_PACKET_TYPE_NONE); + return 0; +failed_rndis: + hn_nvs_detach(hv); +failed_nvs: + return error; +} + +static void +hn_detach(struct hn_data *hv) +{ + hn_nvs_detach(hv); + hn_rndis_detach(hv); +} + +static int +eth_hn_dev_init(struct rte_eth_dev *eth_dev) +{ + struct hn_data *hv = eth_dev->data->dev_private; + struct rte_device *device = eth_dev->device; + struct rte_vmbus_device *vmbus; + unsigned int rxr_cnt; + int err, max_chan; + + PMD_INIT_FUNC_TRACE(); + + vmbus = container_of(device, struct rte_vmbus_device, device); + eth_dev->dev_ops = &hn_eth_dev_ops; + eth_dev->tx_pkt_burst = &hn_xmit_pkts; + eth_dev->rx_pkt_burst = &hn_recv_pkts; + + /* + * for secondary processes, we don't initialize any further as primary + * has already done this work. + */ + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + /* Since Hyper-V only supports one MAC address, just use local data */ + eth_dev->data->mac_addrs = &hv->mac_addr; + + hv->vmbus = vmbus; + hv->rxbuf_res = &vmbus->resource[HV_RECV_BUF_MAP]; + hv->chim_res = &vmbus->resource[HV_SEND_BUF_MAP]; + hv->port_id = eth_dev->data->port_id; + + /* Initialize primary channel input for control operations */ + err = rte_vmbus_chan_open(vmbus, &hv->channels[0]); + if (err) + return err; + + hv->primary = hn_rx_queue_alloc(hv, 0, + eth_dev->device->numa_node); + + if (!hv->primary) + return -ENOMEM; + + err = hn_attach(hv, ETHER_MTU); + if (err) + goto failed; + + err = hn_tx_pool_init(eth_dev); + if (err) + goto failed; + + err = hn_rndis_get_eaddr(hv, hv->mac_addr.addr_bytes); + if (err) + goto failed; + + max_chan = rte_vmbus_max_channels(vmbus); + PMD_INIT_LOG(DEBUG, "VMBus max channels %d", max_chan); + if (max_chan <= 0) + goto failed; + + if (hn_rndis_query_rsscaps(hv, &rxr_cnt) != 0) + rxr_cnt = 1; + + hv->max_queues = RTE_MIN(rxr_cnt, (unsigned int)max_chan); + + return 0; + +failed: + PMD_INIT_LOG(NOTICE, "device init failed"); + + hn_detach(hv); + return err; +} + +static int +eth_hn_dev_uninit(struct rte_eth_dev *eth_dev) +{ + struct hn_data *hv = eth_dev->data->dev_private; + + PMD_INIT_FUNC_TRACE(); + + if (rte_eal_process_type() != RTE_PROC_PRIMARY) + return 0; + + hn_dev_stop(eth_dev); + hn_dev_close(eth_dev); + + eth_dev->dev_ops = NULL; + eth_dev->tx_pkt_burst = NULL; + eth_dev->rx_pkt_burst = NULL; + + hn_detach(hv); + rte_vmbus_chan_close(hv->primary->chan); + rte_free(hv->primary); + + eth_dev->data->mac_addrs = NULL; + + return 0; +} + +static int eth_hn_probe(struct rte_vmbus_driver *drv __rte_unused, + struct rte_vmbus_device *dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + PMD_INIT_FUNC_TRACE(); + + eth_dev = eth_dev_vmbus_allocate(dev, sizeof(struct hn_data)); + if (!eth_dev) + return -ENOMEM; + + ret = eth_hn_dev_init(eth_dev); + if (ret) + eth_dev_vmbus_release(eth_dev); + else + rte_eth_dev_probing_finish(eth_dev); + + return ret; +} + +static int eth_hn_remove(struct rte_vmbus_device *dev) +{ + struct rte_eth_dev *eth_dev; + int ret; + + PMD_INIT_FUNC_TRACE(); + + eth_dev = rte_eth_dev_allocated(dev->device.name); + if (!eth_dev) + return -ENODEV; + + ret = eth_hn_dev_uninit(eth_dev); + if (ret) + return ret; + + eth_dev_vmbus_release(eth_dev); + return 0; +} + +/* Network device GUID */ +static const rte_uuid_t hn_net_ids[] = { + /* f8615163-df3e-46c5-913f-f2d2f965ed0e */ + RTE_UUID_INIT(0xf8615163, 0xdf3e, 0x46c5, 0x913f, 0xf2d2f965ed0eULL), + { 0 } +}; + +static struct rte_vmbus_driver rte_netvsc_pmd = { + .id_table = hn_net_ids, + .probe = eth_hn_probe, + .remove = eth_hn_remove, +}; + +RTE_PMD_REGISTER_VMBUS(net_netvsc, rte_netvsc_pmd); +RTE_PMD_REGISTER_KMOD_DEP(net_netvsc, "* uio_hv_generic"); + +RTE_INIT(hn_init_log); +static void +hn_init_log(void) +{ + hn_logtype_init = rte_log_register("pmd.net.netvsc.init"); + if (hn_logtype_init >= 0) + rte_log_set_level(hn_logtype_init, RTE_LOG_NOTICE); + hn_logtype_driver = rte_log_register("pmd.net.netvsc.driver"); + if (hn_logtype_driver >= 0) + rte_log_set_level(hn_logtype_driver, RTE_LOG_NOTICE); +} diff --git a/drivers/net/netvsc/hn_logs.h b/drivers/net/netvsc/hn_logs.h new file mode 100644 index 0000000000..cddadef09c --- /dev/null +++ b/drivers/net/netvsc/hn_logs.h @@ -0,0 +1,36 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#ifndef _HN_LOGS_H_ +#define _HN_LOGS_H_ + +#include + +extern int hn_logtype_init; +extern int hn_logtype_driver; + +#define PMD_INIT_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_init, "%s(): " fmt "\n",\ + __func__, ## args) +#define PMD_INIT_FUNC_TRACE() PMD_INIT_LOG(DEBUG, " >>") + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_RX +#define PMD_RX_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, \ + "%s() rx: " fmt "\n", __func__, ## args) +#else +#define PMD_RX_LOG(level, fmt, args...) do { } while (0) +#endif + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_TX +#define PMD_TX_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, \ + "%s() tx: " fmt "\n", __func__, ## args) +#else +#define PMD_TX_LOG(level, fmt, args...) do { } while (0) +#endif + +#define PMD_DRV_LOG(level, fmt, args...) \ + rte_log(RTE_LOG_ ## level, hn_logtype_driver, "%s(): " fmt "\n", \ + __func__, ## args) + +#endif /* _HN_LOGS_H_ */ diff --git a/drivers/net/netvsc/hn_nvs.c b/drivers/net/netvsc/hn_nvs.c new file mode 100644 index 0000000000..77d3b839fd --- /dev/null +++ b/drivers/net/netvsc/hn_nvs.c @@ -0,0 +1,546 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + */ + +/* + * Network Virtualization Service. + */ + + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" + +static const uint32_t hn_nvs_version[] = { + NVS_VERSION_61, + NVS_VERSION_6, + NVS_VERSION_5, + NVS_VERSION_4, + NVS_VERSION_2, + NVS_VERSION_1 +}; + +static int hn_nvs_req_send(struct hn_data *hv, + void *req, uint32_t reqlen) +{ + return rte_vmbus_chan_send(hn_primary_chan(hv), + VMBUS_CHANPKT_TYPE_INBAND, + req, reqlen, 0, + VMBUS_CHANPKT_FLAG_NONE, NULL); +} + +static int +hn_nvs_execute(struct hn_data *hv, + void *req, uint32_t reqlen, + void *resp, uint32_t resplen, + uint32_t type) +{ + struct vmbus_channel *chan = hn_primary_chan(hv); + char buffer[NVS_RESPSIZE_MAX]; + const struct hn_nvs_hdr *hdr; + uint32_t len; + int ret; + + /* Send request to ring buffer */ + ret = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, + req, reqlen, 0, + VMBUS_CHANPKT_FLAG_RC, NULL); + + if (ret) { + PMD_DRV_LOG(ERR, "send request failed: %d", ret); + return ret; + } + + retry: + len = sizeof(buffer); + ret = rte_vmbus_chan_recv(chan, buffer, &len, NULL); + if (ret == -EAGAIN) { + rte_delay_us(HN_CHAN_INTERVAL_US); + goto retry; + } + + if (ret < 0) { + PMD_DRV_LOG(ERR, "recv response failed: %d", ret); + return ret; + } + + hdr = (struct hn_nvs_hdr *)buffer; + if (hdr->type != type) { + PMD_DRV_LOG(ERR, "unexpected NVS resp %#x, expect %#x", + hdr->type, type); + return -EINVAL; + } + + if (len < resplen) { + PMD_DRV_LOG(ERR, + "invalid NVS resp len %u (expect %u)", + len, resplen); + return -EINVAL; + } + + memcpy(resp, buffer, resplen); + + /* All pass! */ + return 0; +} + +static int +hn_nvs_doinit(struct hn_data *hv, uint32_t nvs_ver) +{ + struct hn_nvs_init init; + struct hn_nvs_init_resp resp; + uint32_t status; + int error; + + memset(&init, 0, sizeof(init)); + init.type = NVS_TYPE_INIT; + init.ver_min = nvs_ver; + init.ver_max = nvs_ver; + + error = hn_nvs_execute(hv, &init, sizeof(init), + &resp, sizeof(resp), + NVS_TYPE_INIT_RESP); + if (error) + return error; + + status = resp.status; + if (status != NVS_STATUS_OK) { + /* Not fatal, try other versions */ + PMD_INIT_LOG(DEBUG, "nvs init failed for ver 0x%x", + nvs_ver); + return -EINVAL; + } + + return 0; +} + +static int +hn_nvs_conn_rxbuf(struct hn_data *hv) +{ + struct hn_nvs_rxbuf_conn conn; + struct hn_nvs_rxbuf_connresp resp; + uint32_t status; + int error; + + /* Kernel has already setup RXBUF on primary channel. */ + + /* + * Connect RXBUF to NVS. + */ + conn.type = NVS_TYPE_RXBUF_CONN; + conn.gpadl = hv->rxbuf_res->phys_addr; + conn.sig = NVS_RXBUF_SIG; + PMD_DRV_LOG(DEBUG, "connect rxbuff va=%p gpad=%#" PRIx64, + hv->rxbuf_res->addr, + hv->rxbuf_res->phys_addr); + + error = hn_nvs_execute(hv, &conn, sizeof(conn), + &resp, sizeof(resp), + NVS_TYPE_RXBUF_CONNRESP); + if (error) { + PMD_DRV_LOG(ERR, + "exec nvs rxbuf conn failed: %d", + error); + return error; + } + + status = resp.status; + if (status != NVS_STATUS_OK) { + PMD_DRV_LOG(ERR, + "nvs rxbuf conn failed: %x", status); + return -EIO; + } + if (resp.nsect != 1) { + PMD_DRV_LOG(ERR, + "nvs rxbuf response num sections %u != 1", + resp.nsect); + return -EIO; + } + + PMD_DRV_LOG(INFO, + "receive buffer size %u count %u", + resp.nvs_sect[0].slotsz, + resp.nvs_sect[0].slotcnt); + hv->rxbuf_section_cnt = resp.nvs_sect[0].slotcnt; + + hv->rxbuf_info = rte_calloc("HN_RXBUF_INFO", hv->rxbuf_section_cnt, + sizeof(*hv->rxbuf_info), RTE_CACHE_LINE_SIZE); + if (!hv->rxbuf_info) { + PMD_DRV_LOG(ERR, + "could not allocate rxbuf info"); + return -ENOMEM; + } + + return 0; +} + +static void +hn_nvs_disconn_rxbuf(struct hn_data *hv) +{ + struct hn_nvs_rxbuf_disconn disconn; + int error; + + /* + * Disconnect RXBUF from NVS. + */ + memset(&disconn, 0, sizeof(disconn)); + disconn.type = NVS_TYPE_RXBUF_DISCONN; + disconn.sig = NVS_RXBUF_SIG; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &disconn, sizeof(disconn)); + if (error) { + PMD_DRV_LOG(ERR, + "send nvs rxbuf disconn failed: %d", + error); + } + + rte_free(hv->rxbuf_info); + /* + * Linger long enough for NVS to disconnect RXBUF. + */ + rte_delay_ms(200); +} + +static void +hn_nvs_disconn_chim(struct hn_data *hv) +{ + int error; + + if (hv->chim_cnt != 0) { + struct hn_nvs_chim_disconn disconn; + + /* Disconnect chimney sending buffer from NVS. */ + memset(&disconn, 0, sizeof(disconn)); + disconn.type = NVS_TYPE_CHIM_DISCONN; + disconn.sig = NVS_CHIM_SIG; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &disconn, sizeof(disconn)); + + if (error) { + PMD_DRV_LOG(ERR, + "send nvs chim disconn failed: %d", error); + } + + hv->chim_cnt = 0; + /* + * Linger long enough for NVS to disconnect chimney + * sending buffer. + */ + rte_delay_ms(200); + } +} + +static int +hn_nvs_conn_chim(struct hn_data *hv) +{ + struct hn_nvs_chim_conn chim; + struct hn_nvs_chim_connresp resp; + uint32_t sectsz; + unsigned long len = hv->chim_res->len; + int error; + + /* Connect chimney sending buffer to NVS */ + memset(&chim, 0, sizeof(chim)); + chim.type = NVS_TYPE_CHIM_CONN; + chim.gpadl = hv->chim_res->phys_addr; + chim.sig = NVS_CHIM_SIG; + PMD_DRV_LOG(DEBUG, "connect send buf va=%p gpad=%#" PRIx64, + hv->chim_res->addr, + hv->chim_res->phys_addr); + + error = hn_nvs_execute(hv, &chim, sizeof(chim), + &resp, sizeof(resp), + NVS_TYPE_CHIM_CONNRESP); + if (error) { + PMD_DRV_LOG(ERR, "exec nvs chim conn failed"); + goto cleanup; + } + + if (resp.status != NVS_STATUS_OK) { + PMD_DRV_LOG(ERR, "nvs chim conn failed: %x", + resp.status); + error = -EIO; + goto cleanup; + } + + sectsz = resp.sectsz; + if (sectsz == 0 || sectsz & (sizeof(uint32_t) - 1)) { + /* Can't use chimney sending buffer; done! */ + PMD_DRV_LOG(NOTICE, + "invalid chimney sending buffer section size: %u", + sectsz); + return 0; + } + + hv->chim_szmax = sectsz; + hv->chim_cnt = len / sectsz; + + PMD_DRV_LOG(INFO, "send buffer %lu section size:%u, count:%u", + len, hv->chim_szmax, hv->chim_cnt); + + if (len % hv->chim_szmax != 0) { + PMD_DRV_LOG(NOTICE, + "chimney sending sections are not properly aligned"); + } + + /* Done! */ + return 0; + +cleanup: + hn_nvs_disconn_chim(hv); + return error; +} + +/* + * Configure MTU and enable VLAN. + */ +static int +hn_nvs_conf_ndis(struct hn_data *hv, unsigned int mtu) +{ + struct hn_nvs_ndis_conf conf; + int error; + + memset(&conf, 0, sizeof(conf)); + conf.type = NVS_TYPE_NDIS_CONF; + conf.mtu = mtu + ETHER_HDR_LEN; + conf.caps = NVS_NDIS_CONF_VLAN; + + /* TODO enable SRIOV */ + //if (hv->nvs_ver >= NVS_VERSION_5) + // conf.caps |= NVS_NDIS_CONF_SRIOV; + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &conf, sizeof(conf)); + if (error) { + PMD_DRV_LOG(ERR, + "send nvs ndis conf failed: %d", error); + return error; + } + + return 0; +} + +static int +hn_nvs_init_ndis(struct hn_data *hv) +{ + struct hn_nvs_ndis_init ndis; + int error; + + memset(&ndis, 0, sizeof(ndis)); + ndis.type = NVS_TYPE_NDIS_INIT; + ndis.ndis_major = NDIS_VERSION_MAJOR(hv->ndis_ver); + ndis.ndis_minor = NDIS_VERSION_MINOR(hv->ndis_ver); + + /* NOTE: No response. */ + error = hn_nvs_req_send(hv, &ndis, sizeof(ndis)); + if (error) + PMD_DRV_LOG(ERR, + "send nvs ndis init failed: %d", error); + + return error; +} + +static int +hn_nvs_init(struct hn_data *hv) +{ + unsigned int i; + int error; + + /* + * Find the supported NVS version and set NDIS version accordingly. + */ + for (i = 0; i < RTE_DIM(hn_nvs_version); ++i) { + error = hn_nvs_doinit(hv, hn_nvs_version[i]); + if (error) { + PMD_INIT_LOG(DEBUG, "version %#x error %d", + hn_nvs_version[i], error); + continue; + } + + hv->nvs_ver = hn_nvs_version[i]; + + /* Set NDIS version according to NVS version. */ + hv->ndis_ver = NDIS_VERSION_6_30; + if (hv->nvs_ver <= NVS_VERSION_4) + hv->ndis_ver = NDIS_VERSION_6_1; + + PMD_INIT_LOG(DEBUG, + "NVS version %#x, NDIS version %u.%u", + hv->nvs_ver, NDIS_VERSION_MAJOR(hv->ndis_ver), + NDIS_VERSION_MINOR(hv->ndis_ver)); + return 0; + } + + PMD_DRV_LOG(ERR, + "no NVS compatible version available"); + return -ENXIO; +} + +int +hn_nvs_attach(struct hn_data *hv, unsigned int mtu) +{ + int error; + + /* + * Initialize NVS. + */ + error = hn_nvs_init(hv); + if (error) + return error; + + /** Configure NDIS before initializing it. */ + if (hv->nvs_ver >= NVS_VERSION_2) { + error = hn_nvs_conf_ndis(hv, mtu); + if (error) + return error; + } + + /* + * Initialize NDIS. + */ + error = hn_nvs_init_ndis(hv); + if (error) + return error; + + /* + * Connect RXBUF. + */ + error = hn_nvs_conn_rxbuf(hv); + if (error) + return error; + + /* + * Connect chimney sending buffer. + */ + error = hn_nvs_conn_chim(hv); + if (error) { + hn_nvs_disconn_rxbuf(hv); + return error; + } + + return 0; +} + +void +hn_nvs_detach(struct hn_data *hv __rte_unused) +{ + PMD_INIT_FUNC_TRACE(); + + /* NOTE: there are no requests to stop the NVS. */ + hn_nvs_disconn_rxbuf(hv); + hn_nvs_disconn_chim(hv); +} + +/* + * Ack the consumed RXBUF associated w/ this channel packet, + * so that this RXBUF can be recycled by the hypervisor. + */ +void +hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid) +{ + unsigned int retries = 0; + struct hn_nvs_rndis_ack ack = { + .type = NVS_TYPE_RNDIS_ACK, + .status = NVS_STATUS_OK, + }; + int error; + + PMD_RX_LOG(DEBUG, "ack RX id %" PRIu64, tid); + + again: + error = rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_COMP, + &ack, sizeof(ack), tid, + VMBUS_CHANPKT_FLAG_NONE, NULL); + + if (error == 0) + return; + + if (error == -EAGAIN) { + /* + * NOTE: + * This should _not_ happen in real world, since the + * consumption of the TX bufring from the TX path is + * controlled. + */ + PMD_RX_LOG(NOTICE, "RXBUF ack retry"); + if (++retries < 10) { + rte_delay_ms(1); + goto again; + } + } + /* RXBUF leaks! */ + PMD_DRV_LOG(ERR, "RXBUF ack failed"); +} + +int +hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch) +{ + struct hn_nvs_subch_req req; + struct hn_nvs_subch_resp resp; + int error; + + memset(&req, 0, sizeof(req)); + req.type = NVS_TYPE_SUBCH_REQ; + req.op = NVS_SUBCH_OP_ALLOC; + req.nsubch = *nsubch; + + error = hn_nvs_execute(hv, &req, sizeof(req), + &resp, sizeof(resp), + NVS_TYPE_SUBCH_RESP); + if (error) + return error; + + if (resp.status != NVS_STATUS_OK) { + PMD_INIT_LOG(ERR, + "nvs subch alloc failed: %#x", + resp.status); + return -EIO; + } + + if (resp.nsubch > *nsubch) { + PMD_INIT_LOG(NOTICE, + "%u subchans are allocated, requested %u", + resp.nsubch, *nsubch); + } + *nsubch = resp.nsubch; + + return 0; +} + +void +hn_nvs_set_datapath(struct hn_data *hv, uint32_t path) +{ + struct hn_nvs_datapath dp; + + memset(&dp, 0, sizeof(dp)); + dp.type = NVS_TYPE_SET_DATAPATH; + dp.active_path = path; + + hn_nvs_req_send(hv, &dp, sizeof(dp)); +} diff --git a/drivers/net/netvsc/hn_nvs.h b/drivers/net/netvsc/hn_nvs.h new file mode 100644 index 0000000000..984a9c11c5 --- /dev/null +++ b/drivers/net/netvsc/hn_nvs.h @@ -0,0 +1,229 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +/* + * The indirection table message is the largest message + * received from host, and that is 112 bytes. + */ +#define NVS_RESPSIZE_MAX 256 + +/* + * NDIS protocol version numbers + */ +#define NDIS_VERSION_6_1 0x00060001 +#define NDIS_VERSION_6_20 0x00060014 +#define NDIS_VERSION_6_30 0x0006001e +#define NDIS_VERSION_MAJOR(ver) (((ver) & 0xffff0000) >> 16) +#define NDIS_VERSION_MINOR(ver) ((ver) & 0xffff) + +/* + * NVS versions. + */ +#define NVS_VERSION_1 0x00002 +#define NVS_VERSION_2 0x30002 +#define NVS_VERSION_4 0x40000 +#define NVS_VERSION_5 0x50000 +#define NVS_VERSION_6 0x60000 +#define NVS_VERSION_61 0x60001 + +#define NVS_RXBUF_SIG 0xcafe +#define NVS_CHIM_SIG 0xface + +#define NVS_CHIM_IDX_INVALID 0xffffffff + +#define NVS_RNDIS_MTYPE_DATA 0 +#define NVS_RNDIS_MTYPE_CTRL 1 + +/* + * NVS message transacion status codes. + */ +#define NVS_STATUS_OK 1 +#define NVS_STATUS_FAILED 2 + +/* + * NVS request/response message types. + */ +#define NVS_TYPE_INIT 1 +#define NVS_TYPE_INIT_RESP 2 + +#define NVS_TYPE_NDIS_INIT 100 +#define NVS_TYPE_RXBUF_CONN 101 +#define NVS_TYPE_RXBUF_CONNRESP 102 +#define NVS_TYPE_RXBUF_DISCONN 103 +#define NVS_TYPE_CHIM_CONN 104 +#define NVS_TYPE_CHIM_CONNRESP 105 +#define NVS_TYPE_CHIM_DISCONN 106 +#define NVS_TYPE_RNDIS 107 +#define NVS_TYPE_RNDIS_ACK 108 + +#define NVS_TYPE_NDIS_CONF 125 +#define NVS_TYPE_VFASSOC_NOTE 128 /* notification */ +#define NVS_TYPE_SET_DATAPATH 129 +#define NVS_TYPE_SUBCH_REQ 133 +#define NVS_TYPE_SUBCH_RESP 133 /* same as SUBCH_REQ */ +#define NVS_TYPE_TXTBL_NOTE 134 /* notification */ + + +/* NVS message common header */ +struct hn_nvs_hdr { + uint32_t type; +} __rte_packed; + +struct hn_nvs_init { + uint32_t type; /* NVS_TYPE_INIT */ + uint32_t ver_min; + uint32_t ver_max; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_init_resp { + uint32_t type; /* NVS_TYPE_INIT_RESP */ + uint32_t ver; /* deprecated */ + uint32_t rsvd; + uint32_t status; /* NVS_STATUS_ */ +} __rte_packed; + +/* No response */ +struct hn_nvs_ndis_conf { + uint32_t type; /* NVS_TYPE_NDIS_CONF */ + uint32_t mtu; + uint32_t rsvd; + uint64_t caps; /* NVS_NDIS_CONF_ */ + uint8_t rsvd1[20]; +} __rte_packed; + +#define NVS_NDIS_CONF_SRIOV 0x0004 +#define NVS_NDIS_CONF_VLAN 0x0008 + +/* No response */ +struct hn_nvs_ndis_init { + uint32_t type; /* NVS_TYPE_NDIS_INIT */ + uint32_t ndis_major; /* NDIS_VERSION_MAJOR_ */ + uint32_t ndis_minor; /* NDIS_VERSION_MINOR_ */ + uint8_t rsvd[28]; +} __rte_packed; + +#define NVS_DATAPATH_SYNTHETIC 0 +#define NVS_DATAPATH_VF 1 + +/* No response */ +struct hn_nvs_datapath { + uint32_t type; /* NVS_TYPE_SET_DATAPATH */ + uint32_t active_path;/* NVS_DATAPATH_* */ + uint8_t rsvd[32]; +} __rte_packed; + +struct hn_nvs_rxbuf_conn { + uint32_t type; /* NVS_TYPE_RXBUF_CONN */ + uint32_t gpadl; /* RXBUF vmbus GPADL */ + uint16_t sig; /* NVS_RXBUF_SIG */ + uint8_t rsvd[30]; +} __rte_packed; + +struct hn_nvs_rxbuf_sect { + uint32_t start; + uint32_t slotsz; + uint32_t slotcnt; + uint32_t end; +} __rte_packed; + +struct hn_nvs_rxbuf_connresp { + uint32_t type; /* NVS_TYPE_RXBUF_CONNRESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t nsect; /* # of elem in nvs_sect */ + struct hn_nvs_rxbuf_sect nvs_sect[1]; +} __rte_packed; + +/* No response */ +struct hn_nvs_rxbuf_disconn { + uint32_t type; /* NVS_TYPE_RXBUF_DISCONN */ + uint16_t sig; /* NVS_RXBUF_SIG */ + uint8_t rsvd[34]; +} __rte_packed; + +struct hn_nvs_chim_conn { + uint32_t type; /* NVS_TYPE_CHIM_CONN */ + uint32_t gpadl; /* chimney buf vmbus GPADL */ + uint16_t sig; /* NDIS_NVS_CHIM_SIG */ + uint8_t rsvd[30]; +} __rte_packed; + +struct hn_nvs_chim_connresp { + uint32_t type; /* NVS_TYPE_CHIM_CONNRESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t sectsz; /* section size */ +} __rte_packed; + +/* No response */ +struct hn_nvs_chim_disconn { + uint32_t type; /* NVS_TYPE_CHIM_DISCONN */ + uint16_t sig; /* NVS_CHIM_SIG */ + uint8_t rsvd[34]; +} __rte_packed; + +#define NVS_SUBCH_OP_ALLOC 1 + +struct hn_nvs_subch_req { + uint32_t type; /* NVS_TYPE_SUBCH_REQ */ + uint32_t op; /* NVS_SUBCH_OP_ */ + uint32_t nsubch; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_subch_resp { + uint32_t type; /* NVS_TYPE_SUBCH_RESP */ + uint32_t status; /* NVS_STATUS_ */ + uint32_t nsubch; + uint8_t rsvd[28]; +} __rte_packed; + +struct hn_nvs_rndis { + uint32_t type; /* NVS_TYPE_RNDIS */ + uint32_t rndis_mtype;/* NVS_RNDIS_MTYPE_ */ + /* + * Chimney sending buffer index and size. + * + * NOTE: + * If nvs_chim_idx is set to NVS_CHIM_IDX_INVALID + * and nvs_chim_sz is set to 0, then chimney sending + * buffer is _not_ used by this RNDIS message. + */ + uint32_t chim_idx; + uint32_t chim_sz; + uint8_t rsvd[24]; +} __rte_packed; + +struct hn_nvs_rndis_ack { + uint32_t type; /* NVS_TYPE_RNDIS_ACK */ + uint32_t status; /* NVS_STATUS_ */ + uint8_t rsvd[32]; +} __rte_packed; + + +int hn_nvs_attach(struct hn_data *hv, unsigned int mtu); +void hn_nvs_detach(struct hn_data *hv); +void hn_nvs_ack_rxbuf(struct vmbus_channel *chan, uint64_t tid); +int hn_nvs_alloc_subchans(struct hn_data *hv, uint32_t *nsubch); +void hn_nvs_set_datapath(struct hn_data *hv, uint32_t path); + +static inline int +hn_nvs_send(struct vmbus_channel *chan, uint16_t flags, + void *nvs_msg, int nvs_msglen, uintptr_t sndc, + bool *need_sig) +{ + return rte_vmbus_chan_send(chan, VMBUS_CHANPKT_TYPE_INBAND, + nvs_msg, nvs_msglen, (uint64_t)sndc, + flags, need_sig); +} + +static inline int +hn_nvs_send_sglist(struct vmbus_channel *chan, + struct vmbus_gpa sg[], unsigned int sglen, + void *nvs_msg, int nvs_msglen, + uintptr_t sndc, bool *need_sig) +{ + return rte_vmbus_chan_send_sglist(chan, sg, sglen, nvs_msg, nvs_msglen, + (uint64_t)sndc, need_sig); +} diff --git a/drivers/net/netvsc/hn_rndis.c b/drivers/net/netvsc/hn_rndis.c new file mode 100644 index 0000000000..bde3396933 --- /dev/null +++ b/drivers/net/netvsc/hn_rndis.c @@ -0,0 +1,1099 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2009-2018 Microsoft Corp. + * Copyright (c) 2010-2012 Citrix Inc. + * Copyright (c) 2012 NetApp Inc. + * All rights reserved. + */ + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_nvs.h" +#include "hn_rndis.h" +#include "ndis.h" + +#define HN_RNDIS_XFER_SIZE 0x4000 + +#define HN_NDIS_TXCSUM_CAP_IP4 \ + (NDIS_TXCSUM_CAP_IP4 | NDIS_TXCSUM_CAP_IP4OPT) +#define HN_NDIS_TXCSUM_CAP_TCP4 \ + (NDIS_TXCSUM_CAP_TCP4 | NDIS_TXCSUM_CAP_TCP4OPT) +#define HN_NDIS_TXCSUM_CAP_TCP6 \ + (NDIS_TXCSUM_CAP_TCP6 | NDIS_TXCSUM_CAP_TCP6OPT | \ + NDIS_TXCSUM_CAP_IP6EXT) +#define HN_NDIS_TXCSUM_CAP_UDP6 \ + (NDIS_TXCSUM_CAP_UDP6 | NDIS_TXCSUM_CAP_IP6EXT) +#define HN_NDIS_LSOV2_CAP_IP6 \ + (NDIS_LSOV2_CAP_IP6EXT | NDIS_LSOV2_CAP_TCP6OPT) + +/* Get unique request id */ +static inline uint32_t +hn_rndis_rid(struct hn_data *hv) +{ + uint32_t rid; + + do { + rid = rte_atomic32_add_return(&hv->rndis_req_id, 1); + } while (rid == 0); + + return rid; +} + +static void *hn_rndis_alloc(struct hn_data *hv, size_t size) +{ + return rte_zmalloc_socket("RNDIS", size, PAGE_SIZE, + hv->vmbus->device.numa_node); +} + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP +void hn_rndis_dump(const void *buf) +{ + const union { + struct rndis_msghdr hdr; + struct rndis_packet_msg pkt; + struct rndis_init_req init_request; + struct rndis_init_comp init_complete; + struct rndis_halt_req halt; + struct rndis_query_req query_request; + struct rndis_query_comp query_complete; + struct rndis_set_req set_request; + struct rndis_set_comp set_complete; + struct rndis_reset_req reset_request; + struct rndis_reset_comp reset_complete; + struct rndis_keepalive_req keepalive_request; + struct rndis_keepalive_comp keepalive_complete; + struct rndis_status_msg indicate_status; + } *rndis_msg = buf; + + switch (rndis_msg->hdr.type) { + case RNDIS_PACKET_MSG: { + const struct rndis_pktinfo *ppi; + unsigned int ppi_len; + + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_PACKET (len %u, data %u:%u, # oob %u %u:%u, pkt %u:%u)\n", + rndis_msg->pkt.len, + rndis_msg->pkt.dataoffset, + rndis_msg->pkt.datalen, + rndis_msg->pkt.oobdataelements, + rndis_msg->pkt.oobdataoffset, + rndis_msg->pkt.oobdatalen, + rndis_msg->pkt.pktinfooffset, + rndis_msg->pkt.pktinfolen); + + ppi = (const struct rndis_pktinfo *) + ((const char *)buf + + RNDIS_PACKET_MSG_OFFSET_ABS(rndis_msg->pkt.pktinfooffset)); + + ppi_len = rndis_msg->pkt.pktinfolen; + while (ppi_len > 0) { + const void *ppi_data; + + ppi_data = ppi->data; + + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + " PPI (size %u, type %u, offs %u data %#x)\n", + ppi->size, ppi->type, ppi->offset, + *(const uint32_t *)ppi_data); + if (ppi->size == 0) + break; + ppi_len -= ppi->size; + ppi = (const struct rndis_pktinfo *) + ((const char *)ppi + ppi->size); + } + break; + } + case RNDIS_INITIALIZE_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INIT (len %u id %#x, ver %u.%u max xfer %u)\n", + rndis_msg->init_request.len, + rndis_msg->init_request.rid, + rndis_msg->init_request.ver_major, + rndis_msg->init_request.ver_minor, + rndis_msg->init_request.max_xfersz); + break; + + case RNDIS_INITIALIZE_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INIT_C (len %u, id %#x, status 0x%x, vers %u.%u, " + "flags %d, max xfer %u, max pkts %u, aligned %u)\n", + rndis_msg->init_complete.len, + rndis_msg->init_complete.rid, + rndis_msg->init_complete.status, + rndis_msg->init_complete.ver_major, + rndis_msg->init_complete.ver_minor, + rndis_msg->init_complete.devflags, + rndis_msg->init_complete.pktmaxsz, + rndis_msg->init_complete.pktmaxcnt, + rndis_msg->init_complete.align); + break; + + case RNDIS_HALT_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_HALT (len %u id %#x)\n", + rndis_msg->halt.len, rndis_msg->halt.rid); + break; + + case RNDIS_QUERY_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_QUERY (len %u, id %#x, oid %#x, info %u:%u)\n", + rndis_msg->query_request.len, + rndis_msg->query_request.rid, + rndis_msg->query_request.oid, + rndis_msg->query_request.infobuflen, + rndis_msg->query_request.infobufoffset); + break; + + case RNDIS_QUERY_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_QUERY_C (len %u, id %#x, status 0x%x, buf %u:%u)\n", + rndis_msg->query_complete.len, + rndis_msg->query_complete.rid, + rndis_msg->query_complete.status, + rndis_msg->query_complete.infobuflen, + rndis_msg->query_complete.infobufoffset); + break; + + case RNDIS_SET_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_SET (len %u, id %#x, oid %#x, info %u:%u)\n", + rndis_msg->set_request.len, + rndis_msg->set_request.rid, + rndis_msg->set_request.oid, + rndis_msg->set_request.infobuflen, + rndis_msg->set_request.infobufoffset); + break; + + case RNDIS_SET_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_SET_C (len %u, id 0x%x, status 0x%x)\n", + rndis_msg->set_complete.len, + rndis_msg->set_complete.rid, + rndis_msg->set_complete.status); + break; + + case RNDIS_INDICATE_STATUS_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_MSG_INDICATE (len %u, status %#x, buf len %u, buf offset %u)\n", + rndis_msg->indicate_status.len, + rndis_msg->indicate_status.status, + rndis_msg->indicate_status.stbuflen, + rndis_msg->indicate_status.stbufoffset); + break; + + case RNDIS_RESET_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_RESET (len %u, id %#x)\n", + rndis_msg->reset_request.len, + rndis_msg->reset_request.rid); + break; + + case RNDIS_RESET_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_RESET_C (len %u, status %#x address %#x)\n", + rndis_msg->reset_complete.len, + rndis_msg->reset_complete.status, + rndis_msg->reset_complete.adrreset); + break; + + case RNDIS_KEEPALIVE_MSG: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_KEEPALIVE (len %u, id %#x)\n", + rndis_msg->keepalive_request.len, + rndis_msg->keepalive_request.rid); + break; + + case RNDIS_KEEPALIVE_CMPLT: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS_KEEPALIVE_C (len %u, id %#x address %#x)\n", + rndis_msg->keepalive_complete.len, + rndis_msg->keepalive_complete.rid, + rndis_msg->keepalive_complete.status); + break; + + default: + rte_log(RTE_LOG_DEBUG, hn_logtype_driver, + "RNDIS type %#x len %u\n", + rndis_msg->hdr.type, + rndis_msg->hdr.len); + break; + } +} +#endif + +static int hn_nvs_send_rndis_ctrl(struct vmbus_channel *chan, + const void *req, uint32_t reqlen) + +{ + struct hn_nvs_rndis nvs_rndis = { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_CTRL, + .chim_idx = NVS_CHIM_IDX_INVALID, + .chim_sz = 0 + }; + struct vmbus_gpa sg; + rte_iova_t addr; + + addr = rte_malloc_virt2iova(req); + if (unlikely(addr == RTE_BAD_IOVA)) { + PMD_DRV_LOG(ERR, "RNDIS send request can not get iova"); + return -EINVAL; + } + + if (unlikely(reqlen > PAGE_SIZE)) { + PMD_DRV_LOG(ERR, "RNDIS request %u greater than page size", + reqlen); + return -EINVAL; + } + + sg.page = addr / PAGE_SIZE; + sg.ofs = addr & PAGE_MASK; + sg.len = reqlen; + + if (sg.ofs + reqlen > PAGE_SIZE) { + PMD_DRV_LOG(ERR, "RNDIS request crosses page bounary"); + return -EINVAL; + } + + hn_rndis_dump(req); + + return hn_nvs_send_sglist(chan, &sg, 1, + &nvs_rndis, sizeof(nvs_rndis), 0U, NULL); +} + +void hn_rndis_link_status(struct hn_data *hv __rte_unused, const void *msg) +{ + const struct rndis_status_msg *indicate = msg; + + hn_rndis_dump(msg); + + PMD_DRV_LOG(DEBUG, "link status %#x", indicate->status); + + switch (indicate->status) { + case RNDIS_STATUS_LINK_SPEED_CHANGE: + case RNDIS_STATUS_NETWORK_CHANGE: + case RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG: + /* ignore not in DPDK API */ + break; + + case RNDIS_STATUS_MEDIA_CONNECT: + case RNDIS_STATUS_MEDIA_DISCONNECT: + /* TODO handle as LSC interrupt */ + break; + default: + PMD_DRV_LOG(NOTICE, "unknown RNDIS indication: %#x", + indicate->status); + } +} + +/* Callback from hn_process_events when response is visible */ +void hn_rndis_receive_response(struct hn_data *hv, + const void *data, uint32_t len) +{ + const struct rndis_init_comp *hdr = data; + + hn_rndis_dump(data); + + if (len < sizeof(3 * sizeof(uint32_t))) { + PMD_DRV_LOG(ERR, + "missing RNDIS header %u", len); + return; + } + + if (len < hdr->len) { + PMD_DRV_LOG(ERR, + "truncated RNDIS response %u", len); + return; + } + + if (len > sizeof(hv->rndis_resp)) { + PMD_DRV_LOG(NOTICE, + "RNDIS response exceeds buffer"); + len = sizeof(hv->rndis_resp); + } + + if (hdr->rid == 0) { + PMD_DRV_LOG(NOTICE, + "RNDIS response id zero!"); + } + + memcpy(hv->rndis_resp, data, len); + + /* make sure response copied before update */ + rte_smp_wmb(); + + if (rte_atomic32_cmpset(&hv->rndis_pending, hdr->rid, 0) == 0) { + PMD_DRV_LOG(ERR, + "received id %#x pending id %#x", + hdr->rid, (uint32_t)hv->rndis_pending); + } +} + +/* Do request/response transaction */ +static int hn_rndis_exec1(struct hn_data *hv, + const void *req, uint32_t reqlen, + void *comp, uint32_t comp_len) +{ + const struct rndis_halt_req *hdr = req; + uint32_t rid = hdr->rid; + struct vmbus_channel *chan = hn_primary_chan(hv); + int error; + + if (comp_len > sizeof(hv->rndis_resp)) { + PMD_DRV_LOG(ERR, + "Expected completion size %u exceeds buffer %zu", + comp_len, sizeof(hv->rndis_resp)); + return -EIO; + } + + if (comp != NULL && + rte_atomic32_cmpset(&hv->rndis_pending, 0, rid) == 0) { + PMD_DRV_LOG(ERR, + "Request already pending"); + return -EBUSY; + } + + error = hn_nvs_send_rndis_ctrl(chan, req, reqlen); + if (error) { + PMD_DRV_LOG(ERR, "RNDIS ctrl send failed: %d", error); + return error; + } + + if (comp) { + /* Poll primary channel until response received */ + while (hv->rndis_pending == rid) + hn_process_events(hv, 0); + + memcpy(comp, hv->rndis_resp, comp_len); + } + + return 0; +} + +/* Do transaction and validate response */ +static int hn_rndis_execute(struct hn_data *hv, uint32_t rid, + const void *req, uint32_t reqlen, + void *comp, uint32_t comp_len, uint32_t comp_type) +{ + const struct rndis_comp_hdr *hdr = comp; + int ret; + + memset(comp, 0, comp_len); + + ret = hn_rndis_exec1(hv, req, reqlen, comp, comp_len); + if (ret < 0) + return ret; + /* + * Check this RNDIS complete message. + */ + if (unlikely(hdr->type != comp_type)) { + PMD_DRV_LOG(ERR, + "unexpected RNDIS response complete %#x expect %#x", + hdr->type, comp_type); + + return -ENXIO; + } + if (unlikely(hdr->rid != rid)) { + PMD_DRV_LOG(ERR, + "RNDIS comp rid mismatch %#x, expect %#x", + hdr->rid, rid); + return -EINVAL; + } + + /* All pass! */ + return 0; +} + +static int +hn_rndis_query(struct hn_data *hv, uint32_t oid, + const void *idata, uint32_t idlen, + void *odata, uint32_t odlen) +{ + struct rndis_query_req *req; + struct rndis_query_comp *comp; + uint32_t reqlen, comp_len; + int error = -EIO; + unsigned int ofs; + uint32_t rid; + + reqlen = sizeof(*req) + idlen; + req = hn_rndis_alloc(hv, reqlen); + if (req == NULL) + return -ENOMEM; + + comp_len = sizeof(*comp) + odlen; + comp = rte_zmalloc("QUERY", comp_len, PAGE_SIZE); + if (!comp) { + error = -ENOMEM; + goto done; + } + comp->status = RNDIS_STATUS_PENDING; + + rid = hn_rndis_rid(hv); + + req->type = RNDIS_QUERY_MSG; + req->len = reqlen; + req->rid = rid; + req->oid = oid; + req->infobufoffset = RNDIS_QUERY_REQ_INFOBUFOFFSET; + req->infobuflen = idlen; + + /* Input data immediately follows RNDIS query. */ + memcpy(req + 1, idata, idlen); + + error = hn_rndis_execute(hv, rid, req, reqlen, + comp, comp_len, RNDIS_QUERY_CMPLT); + + if (error) + goto done; + + if (comp->status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, "RNDIS query 0x%08x failed: status 0x%08x", + oid, comp->status); + error = -EINVAL; + goto done; + } + + if (comp->infobuflen == 0 || comp->infobufoffset == 0) { + /* No output data! */ + PMD_DRV_LOG(ERR, "RNDIS query 0x%08x, no data", oid); + error = 0; + goto done; + } + + /* + * Check output data length and offset. + */ + /* ofs is the offset from the beginning of comp. */ + ofs = RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(comp->infobufoffset); + if (ofs < sizeof(*comp) || ofs + comp->infobuflen > comp_len) { + PMD_DRV_LOG(ERR, "RNDIS query invalid comp ib off/len, %u/%u", + comp->infobufoffset, comp->infobuflen); + error = -EINVAL; + goto done; + } + + /* Save output data. */ + if (comp->infobuflen < odlen) + odlen = comp->infobuflen; + + /* ofs is the offset from the beginning of comp. */ + memcpy(odata, (const char *)comp + ofs, odlen); + + error = 0; +done: + rte_free(comp); + rte_free(req); + return error; +} + +static int +hn_rndis_halt(struct hn_data *hv) +{ + struct rndis_halt_req *halt; + + halt = hn_rndis_alloc(hv, sizeof(*halt)); + if (halt == NULL) + return -ENOMEM; + + halt->type = RNDIS_HALT_MSG; + halt->len = sizeof(*halt); + halt->rid = hn_rndis_rid(hv); + + /* No RNDIS completion; rely on NVS message send completion */ + hn_rndis_exec1(hv, halt, sizeof(*halt), NULL, 0); + + rte_free(halt); + + PMD_INIT_LOG(DEBUG, "RNDIS halt done"); + return 0; +} + +static int +hn_rndis_query_hwcaps(struct hn_data *hv, struct ndis_offload *caps) +{ + struct ndis_offload in; + uint32_t caps_len, size; + int error; + + memset(caps, 0, sizeof(*caps)); + memset(&in, 0, sizeof(in)); + in.ndis_hdr.ndis_type = NDIS_OBJTYPE_OFFLOAD; + + if (hv->ndis_ver >= NDIS_VERSION_6_30) { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_3; + size = NDIS_OFFLOAD_SIZE; + } else if (hv->ndis_ver >= NDIS_VERSION_6_1) { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_2; + size = NDIS_OFFLOAD_SIZE_6_1; + } else { + in.ndis_hdr.ndis_rev = NDIS_OFFLOAD_REV_1; + size = NDIS_OFFLOAD_SIZE_6_0; + } + in.ndis_hdr.ndis_size = size; + + caps_len = NDIS_OFFLOAD_SIZE; + error = hn_rndis_query(hv, OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES, + &in, size, caps, caps_len); + if (error) + return error; + + /* Preliminary verification. */ + if (caps->ndis_hdr.ndis_type != NDIS_OBJTYPE_OFFLOAD) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objtype 0x%02x", + caps->ndis_hdr.ndis_type); + return -EINVAL; + } + if (caps->ndis_hdr.ndis_rev < NDIS_OFFLOAD_REV_1) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objrev 0x%02x", + caps->ndis_hdr.ndis_rev); + return -EINVAL; + } + if (caps->ndis_hdr.ndis_size > caps_len) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u, data size %u", + caps->ndis_hdr.ndis_size, caps_len); + return -EINVAL; + } else if (caps->ndis_hdr.ndis_size < NDIS_OFFLOAD_SIZE_6_0) { + PMD_DRV_LOG(NOTICE, "invalid NDIS objsize %u", + caps->ndis_hdr.ndis_size); + return -EINVAL; + } + + return 0; +} + +int +hn_rndis_query_rsscaps(struct hn_data *hv, + unsigned int *rxr_cnt0) +{ + struct ndis_rss_caps in, caps; + unsigned int indsz, rxr_cnt; + uint32_t caps_len; + int error; + + *rxr_cnt0 = 0; + + if (hv->ndis_ver < NDIS_VERSION_6_20) { + PMD_DRV_LOG(DEBUG, "RSS not supported on this host"); + return -EOPNOTSUPP; + } + + memset(&in, 0, sizeof(in)); + in.ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_CAPS; + in.ndis_hdr.ndis_rev = NDIS_RSS_CAPS_REV_2; + in.ndis_hdr.ndis_size = NDIS_RSS_CAPS_SIZE; + + caps_len = NDIS_RSS_CAPS_SIZE; + error = hn_rndis_query(hv, OID_GEN_RECEIVE_SCALE_CAPABILITIES, + &in, NDIS_RSS_CAPS_SIZE, + &caps, caps_len); + if (error) + return error; + + PMD_INIT_LOG(DEBUG, "RX rings %u indirect %u caps %#x", + caps.ndis_nrxr, caps.ndis_nind, caps.ndis_caps); + /* + * Preliminary verification. + */ + if (caps.ndis_hdr.ndis_type != NDIS_OBJTYPE_RSS_CAPS) { + PMD_DRV_LOG(ERR, "invalid NDIS objtype 0x%02x", + caps.ndis_hdr.ndis_type); + return -EINVAL; + } + if (caps.ndis_hdr.ndis_rev < NDIS_RSS_CAPS_REV_1) { + PMD_DRV_LOG(ERR, "invalid NDIS objrev 0x%02x", + caps.ndis_hdr.ndis_rev); + return -EINVAL; + } + if (caps.ndis_hdr.ndis_size > caps_len) { + PMD_DRV_LOG(ERR, + "invalid NDIS objsize %u, data size %u", + caps.ndis_hdr.ndis_size, caps_len); + return -EINVAL; + } else if (caps.ndis_hdr.ndis_size < NDIS_RSS_CAPS_SIZE_6_0) { + PMD_DRV_LOG(ERR, "invalid NDIS objsize %u", + caps.ndis_hdr.ndis_size); + return -EINVAL; + } + + /* + * Save information for later RSS configuration. + */ + if (caps.ndis_nrxr == 0) { + PMD_DRV_LOG(ERR, "0 RX rings!?"); + return -EINVAL; + } + rxr_cnt = caps.ndis_nrxr; + + if (caps.ndis_hdr.ndis_size == NDIS_RSS_CAPS_SIZE && + caps.ndis_hdr.ndis_rev >= NDIS_RSS_CAPS_REV_2) { + if (caps.ndis_nind > NDIS_HASH_INDCNT) { + PMD_DRV_LOG(ERR, + "too many RSS indirect table entries %u", + caps.ndis_nind); + return -EOPNOTSUPP; + } + if (!rte_is_power_of_2(caps.ndis_nind)) { + PMD_DRV_LOG(ERR, + "RSS indirect table size is not power-of-2 %u", + caps.ndis_nind); + } + + indsz = caps.ndis_nind; + } else { + indsz = NDIS_HASH_INDCNT; + } + + if (indsz < rxr_cnt) { + PMD_DRV_LOG(NOTICE, + "# of RX rings (%d) > RSS indirect table size %d", + rxr_cnt, indsz); + rxr_cnt = indsz; + } + + hv->rss_offloads = 0; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV4) + hv->rss_offloads |= ETH_RSS_IPV4 + | ETH_RSS_NONFRAG_IPV4_TCP + | ETH_RSS_NONFRAG_IPV4_UDP; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV6) + hv->rss_offloads |= ETH_RSS_IPV6 + | ETH_RSS_NONFRAG_IPV6_TCP; + if (caps.ndis_caps & NDIS_RSS_CAP_IPV6_EX) + hv->rss_offloads |= ETH_RSS_IPV6_EX + | ETH_RSS_IPV6_TCP_EX; + + /* Commit! */ + *rxr_cnt0 = rxr_cnt; + + return 0; +} + +static int +hn_rndis_set(struct hn_data *hv, uint32_t oid, const void *data, uint32_t dlen) +{ + struct rndis_set_req *req; + struct rndis_set_comp comp; + uint32_t reqlen, comp_len; + uint32_t rid; + int error; + + reqlen = sizeof(*req) + dlen; + req = rte_zmalloc("RNDIS_SET", reqlen, PAGE_SIZE); + if (!req) + return -ENOMEM; + + rid = hn_rndis_rid(hv); + req->type = RNDIS_SET_MSG; + req->len = reqlen; + req->rid = rid; + req->oid = oid; + req->infobuflen = dlen; + req->infobufoffset = RNDIS_SET_REQ_INFOBUFOFFSET; + + /* Data immediately follows RNDIS set. */ + memcpy(req + 1, data, dlen); + + comp_len = sizeof(comp); + error = hn_rndis_execute(hv, rid, req, reqlen, + &comp, comp_len, + RNDIS_SET_CMPLT); + if (error) { + PMD_DRV_LOG(ERR, "exec RNDIS set %#" PRIx32 " failed", + oid); + error = EIO; + goto done; + } + + if (comp.status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, + "RNDIS set %#" PRIx32 " failed: status %#" PRIx32, + oid, comp.status); + error = EIO; + goto done; + } + +done: + rte_free(req); + return error; +} + +int hn_rndis_conf_offload(struct hn_data *hv, + uint64_t tx_offloads, uint64_t rx_offloads) +{ + struct ndis_offload_params params; + struct ndis_offload hwcaps; + int error; + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return error; + } + + /* NOTE: 0 means "no change" */ + memset(¶ms, 0, sizeof(params)); + + params.ndis_hdr.ndis_type = NDIS_OBJTYPE_DEFAULT; + if (hv->ndis_ver < NDIS_VERSION_6_30) { + params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_2; + params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE_6_1; + } else { + params.ndis_hdr.ndis_rev = NDIS_OFFLOAD_PARAMS_REV_3; + params.ndis_hdr.ndis_size = NDIS_OFFLOAD_PARAMS_SIZE; + } + + if (tx_offloads & DEV_TX_OFFLOAD_TCP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_TCP4) + params.ndis_tcp4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + + if (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_TCP6) + params.ndis_tcp6csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + + if (rx_offloads & DEV_RX_OFFLOAD_TCP_CKSUM) { + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) + == NDIS_RXCSUM_CAP_TCP4) + params.ndis_tcp4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + + if ((hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6) + == NDIS_RXCSUM_CAP_TCP6) + params.ndis_tcp6csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) + params.ndis_udp4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + + if ((hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6) + == NDIS_TXCSUM_CAP_UDP6) + params.ndis_udp6csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + + if (rx_offloads & DEV_TX_OFFLOAD_UDP_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) + params.ndis_udp4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + + if (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6) + params.ndis_udp6csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_IPV4_CKSUM) { + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_IP4) + == NDIS_TXCSUM_CAP_IP4) + params.ndis_ip4csum = NDIS_OFFLOAD_PARAM_TX; + else + goto unsupported; + } + if (rx_offloads & DEV_RX_OFFLOAD_IPV4_CKSUM) { + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + params.ndis_ip4csum |= NDIS_OFFLOAD_PARAM_RX; + else + goto unsupported; + } + + if (tx_offloads & DEV_TX_OFFLOAD_TCP_TSO) { + if (hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) + params.ndis_lsov2_ip4 = NDIS_OFFLOAD_LSOV2_ON; + else + goto unsupported; + + if ((hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) + == HN_NDIS_LSOV2_CAP_IP6) + params.ndis_lsov2_ip6 = NDIS_OFFLOAD_LSOV2_ON; + else + goto unsupported; + } + + error = hn_rndis_set(hv, OID_TCP_OFFLOAD_PARAMETERS, ¶ms, + params.ndis_hdr.ndis_size); + if (error) { + PMD_DRV_LOG(ERR, "offload config failed"); + return error; + } + + return 0; + unsupported: + PMD_DRV_LOG(NOTICE, + "offload tx:%" PRIx64 " rx:%" PRIx64 " not supported by this version", + tx_offloads, rx_offloads); + return -EINVAL; +} + +int hn_rndis_get_offload(struct hn_data *hv, + struct rte_eth_dev_info *dev_info) +{ + struct ndis_offload hwcaps; + int error; + + memset(&hwcaps, 0, sizeof(hwcaps)); + + error = hn_rndis_query_hwcaps(hv, &hwcaps); + if (error) { + PMD_DRV_LOG(ERR, "hwcaps query failed: %d", error); + return error; + } + + dev_info->tx_offload_capa = DEV_TX_OFFLOAD_MULTI_SEGS | + DEV_TX_OFFLOAD_VLAN_INSERT; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_IP4) + == HN_NDIS_TXCSUM_CAP_IP4) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_IPV4_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & HN_NDIS_TXCSUM_CAP_TCP4) + == HN_NDIS_TXCSUM_CAP_TCP4 && + (hwcaps.ndis_csum.ndis_ip6_txcsum & HN_NDIS_TXCSUM_CAP_TCP6) + == HN_NDIS_TXCSUM_CAP_TCP6) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_txcsum & NDIS_TXCSUM_CAP_UDP4) && + (hwcaps.ndis_csum.ndis_ip6_txcsum & NDIS_TXCSUM_CAP_UDP6)) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_UDP_CKSUM; + + if ((hwcaps.ndis_lsov2.ndis_ip4_encap & NDIS_OFFLOAD_ENCAP_8023) && + (hwcaps.ndis_lsov2.ndis_ip6_opts & HN_NDIS_LSOV2_CAP_IP6) + == HN_NDIS_LSOV2_CAP_IP6) + dev_info->tx_offload_capa |= DEV_TX_OFFLOAD_TCP_TSO; + + dev_info->rx_offload_capa = DEV_RX_OFFLOAD_VLAN_STRIP | + DEV_RX_OFFLOAD_CRC_STRIP; + + if (hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_IP4) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_IPV4_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_TCP4) && + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_TCP6)) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_TCP_CKSUM; + + if ((hwcaps.ndis_csum.ndis_ip4_rxcsum & NDIS_RXCSUM_CAP_UDP4) && + (hwcaps.ndis_csum.ndis_ip6_rxcsum & NDIS_RXCSUM_CAP_UDP6)) + dev_info->rx_offload_capa |= DEV_RX_OFFLOAD_UDP_CKSUM; + + return 0; +} + +int +hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter) +{ + int error; + + error = hn_rndis_set(hv, OID_GEN_CURRENT_PACKET_FILTER, + &filter, sizeof(filter)); + if (error) { + PMD_DRV_LOG(ERR, "set RX filter %#" PRIx32 " failed: %d", + filter, error); + } else { + PMD_DRV_LOG(DEBUG, "set RX filter %#" PRIx32 " done", filter); + } + + return error; +} + +/* The default RSS key. + * This value is the same as MLX5 so that flows will be + * received on same path for both VF ans synthetic NIC. + */ +static const uint8_t rss_default_key[NDIS_HASH_KEYSIZE_TOEPLITZ] = { + 0x2c, 0xc6, 0x81, 0xd1, 0x5b, 0xdb, 0xf4, 0xf7, + 0xfc, 0xa2, 0x83, 0x19, 0xdb, 0x1a, 0x3e, 0x94, + 0x6b, 0x9e, 0x38, 0xd9, 0x2c, 0x9c, 0x03, 0xd1, + 0xad, 0x99, 0x44, 0xa7, 0xd9, 0x56, 0x3d, 0x59, + 0x06, 0x3c, 0x25, 0xf3, 0xfc, 0x1f, 0xdc, 0x2a, +}; + +int hn_rndis_conf_rss(struct hn_data *hv, + const struct rte_eth_rss_conf *rss_conf) +{ + struct ndis_rssprm_toeplitz rssp; + struct ndis_rss_params *prm = &rssp.rss_params; + const uint8_t *rss_key = rss_conf->rss_key ? : rss_default_key; + uint32_t rss_hash; + unsigned int i; + int error; + + PMD_INIT_FUNC_TRACE(); + + memset(&rssp, 0, sizeof(rssp)); + + prm->ndis_hdr.ndis_type = NDIS_OBJTYPE_RSS_PARAMS; + prm->ndis_hdr.ndis_rev = NDIS_RSS_PARAMS_REV_2; + prm->ndis_hdr.ndis_size = sizeof(*prm); + prm->ndis_flags = 0; + + rss_hash = NDIS_HASH_FUNCTION_TOEPLITZ; + if (rss_conf->rss_hf & ETH_RSS_IPV4) + rss_hash |= NDIS_HASH_IPV4; + if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV4_TCP) + rss_hash |= NDIS_HASH_TCP_IPV4; + if (rss_conf->rss_hf & ETH_RSS_IPV6) + rss_hash |= NDIS_HASH_IPV6; + if (rss_conf->rss_hf & ETH_RSS_NONFRAG_IPV6_TCP) + rss_hash |= NDIS_HASH_TCP_IPV6; + + prm->ndis_hash = rss_hash; + prm->ndis_indsize = sizeof(rssp.rss_ind[0]) * NDIS_HASH_INDCNT; + prm->ndis_indoffset = offsetof(struct ndis_rssprm_toeplitz, rss_ind[0]); + prm->ndis_keysize = NDIS_HASH_KEYSIZE_TOEPLITZ; + prm->ndis_keyoffset = offsetof(struct ndis_rssprm_toeplitz, rss_key[0]); + + for (i = 0; i < NDIS_HASH_INDCNT; i++) + rssp.rss_ind[i] = i % hv->num_queues; + + /* Set hask key values */ + memcpy(&rssp.rss_key, rss_key, NDIS_HASH_KEYSIZE_TOEPLITZ); + + error = hn_rndis_set(hv, OID_GEN_RECEIVE_SCALE_PARAMETERS, + &rssp, sizeof(rssp)); + if (error) { + PMD_DRV_LOG(ERR, + "RSS config num queues=%u failed: %d", + hv->num_queues, error); + } + return error; +} + +static int hn_rndis_init(struct hn_data *hv) +{ + struct rndis_init_req *req; + struct rndis_init_comp comp; + uint32_t comp_len, rid; + int error; + + req = hn_rndis_alloc(hv, sizeof(*req)); + if (!req) { + PMD_DRV_LOG(ERR, "no memory for RNDIS init"); + return -ENXIO; + } + + rid = hn_rndis_rid(hv); + req->type = RNDIS_INITIALIZE_MSG; + req->len = sizeof(*req); + req->rid = rid; + req->ver_major = RNDIS_VERSION_MAJOR; + req->ver_minor = RNDIS_VERSION_MINOR; + req->max_xfersz = HN_RNDIS_XFER_SIZE; + + comp_len = RNDIS_INIT_COMP_SIZE_MIN; + error = hn_rndis_execute(hv, rid, req, sizeof(*req), + &comp, comp_len, + RNDIS_INITIALIZE_CMPLT); + if (error) + goto done; + + if (comp.status != RNDIS_STATUS_SUCCESS) { + PMD_DRV_LOG(ERR, "RNDIS init failed: status 0x%08x", + comp.status); + error = -EIO; + goto done; + } + + hv->rndis_agg_size = comp.pktmaxsz; + hv->rndis_agg_pkts = comp.pktmaxcnt; + hv->rndis_agg_align = 1U << comp.align; + + if (hv->rndis_agg_align < sizeof(uint32_t)) { + /* + * The RNDIS packet message encap assumes that the RNDIS + * packet message is at least 4 bytes aligned. Fix up the + * alignment here, if the remote side sets the alignment + * too low. + */ + PMD_DRV_LOG(NOTICE, + "fixup RNDIS aggpkt align: %u -> %zu", + hv->rndis_agg_align, sizeof(uint32_t)); + hv->rndis_agg_align = sizeof(uint32_t); + } + + PMD_INIT_LOG(INFO, + "RNDIS ver %u.%u, aggpkt size %u, aggpkt cnt %u, aggpkt align %u", + comp.ver_major, comp.ver_minor, + hv->rndis_agg_size, hv->rndis_agg_pkts, + hv->rndis_agg_align); + error = 0; +done: + rte_free(req); + return error; +} + +int +hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr) +{ + uint32_t eaddr_len; + int error; + + eaddr_len = ETHER_ADDR_LEN; + error = hn_rndis_query(hv, OID_802_3_PERMANENT_ADDRESS, NULL, 0, + eaddr, eaddr_len); + if (error) + return error; + + PMD_DRV_LOG(INFO, "MAC address %02x:%02x:%02x:%02x:%02x:%02x", + eaddr[0], eaddr[1], eaddr[2], + eaddr[3], eaddr[4], eaddr[5]); + return 0; +} + +int +hn_rndis_get_linkstatus(struct hn_data *hv) +{ + return hn_rndis_query(hv, OID_GEN_MEDIA_CONNECT_STATUS, NULL, 0, + &hv->link_status, sizeof(uint32_t)); +} + +int +hn_rndis_get_linkspeed(struct hn_data *hv) +{ + return hn_rndis_query(hv, OID_GEN_LINK_SPEED, NULL, 0, + &hv->link_speed, sizeof(uint32_t)); +} + +int +hn_rndis_attach(struct hn_data *hv) +{ + /* Initialize RNDIS. */ + return hn_rndis_init(hv); +} + +void +hn_rndis_detach(struct hn_data *hv) +{ + /* Halt the RNDIS. */ + hn_rndis_halt(hv); +} diff --git a/drivers/net/netvsc/hn_rndis.h b/drivers/net/netvsc/hn_rndis.h new file mode 100644 index 0000000000..89e2e6ba0f --- /dev/null +++ b/drivers/net/netvsc/hn_rndis.h @@ -0,0 +1,32 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +#include "rndis.h" + +struct hn_data; + +void hn_rndis_receive_response(struct hn_data *hv, + const void *data, uint32_t len); +void hn_rndis_link_status(struct hn_data *hv, const void *data); +int hn_rndis_attach(struct hn_data *hv); +void hn_rndis_detach(struct hn_data *hv); +int hn_rndis_get_eaddr(struct hn_data *hv, uint8_t *eaddr); +int hn_rndis_get_linkstatus(struct hn_data *hv); +int hn_rndis_get_linkspeed(struct hn_data *hv); +int hn_rndis_set_rxfilter(struct hn_data *hv, uint32_t filter); +void hn_rndis_rx_ctrl(struct hn_data *hv, const void *data, + int dlen); +int hn_rndis_get_offload(struct hn_data *hv, + struct rte_eth_dev_info *dev_info); +int hn_rndis_conf_offload(struct hn_data *hv, + uint64_t tx_offloads, + uint64_t rx_offloads); +int hn_rndis_query_rsscaps(struct hn_data *hv, + unsigned int *rxr_cnt0); +int hn_rndis_conf_rss(struct hn_data *hv, + const struct rte_eth_rss_conf *rss_conf); + +#ifdef RTE_LIBRTE_NETVSC_DEBUG_DUMP +void hn_rndis_dump(const void *buf); +#else +#define hn_rndis_dump(buf) +#endif diff --git a/drivers/net/netvsc/hn_rxtx.c b/drivers/net/netvsc/hn_rxtx.c new file mode 100644 index 0000000000..6d2f41c4c0 --- /dev/null +++ b/drivers/net/netvsc/hn_rxtx.c @@ -0,0 +1,1329 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright(c) 2016-2018 Microsoft Corporation + * Copyright(c) 2013-2016 Brocade Communications Systems, Inc. + * All rights reserved. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "hn_logs.h" +#include "hn_var.h" +#include "hn_rndis.h" +#include "hn_nvs.h" +#include "ndis.h" + +#define HN_NVS_SEND_MSG_SIZE \ + (sizeof(struct vmbus_chanpkt_hdr) + sizeof(struct hn_nvs_rndis)) + +#define HN_TXD_CACHE_SIZE 32 /* per cpu tx_descriptor pool cache */ +#define HN_TXCOPY_THRESHOLD 512 + +#define HN_RXCOPY_THRESHOLD 256 +#define HN_RXQ_EVENT_DEFAULT 1024 + +struct hn_rxinfo { + uint32_t vlan_info; + uint32_t csum_info; + uint32_t hash_info; + uint32_t hash_value; +}; + +#define HN_RXINFO_VLAN 0x0001 +#define HN_RXINFO_CSUM 0x0002 +#define HN_RXINFO_HASHINF 0x0004 +#define HN_RXINFO_HASHVAL 0x0008 +#define HN_RXINFO_ALL \ + (HN_RXINFO_VLAN | \ + HN_RXINFO_CSUM | \ + HN_RXINFO_HASHINF | \ + HN_RXINFO_HASHVAL) + +#define HN_NDIS_VLAN_INFO_INVALID 0xffffffff +#define HN_NDIS_RXCSUM_INFO_INVALID 0 +#define HN_NDIS_HASH_INFO_INVALID 0 + +/* + * Per-transmit book keeping. + * A slot in transmit ring (chim_index) is reserved for each transmit. + * + * There are two types of transmit: + * - buffered transmit where chimney buffer is used and RNDIS header + * is in the buffer. mbuf == NULL for this case. + * + * - direct transmit where RNDIS header is in the in rndis_pkt + * mbuf is freed after transmit. + * + * Descriptors come from per-port pool which is used + * to limit number of outstanding requests per device. + */ +struct hn_txdesc { + struct rte_mbuf *m; + + uint16_t queue_id; + uint16_t chim_index; + uint32_t chim_size; + uint32_t data_size; + uint32_t packets; + + struct rndis_packet_msg *rndis_pkt; +}; + +#define HN_RNDIS_PKT_LEN \ + (sizeof(struct rndis_packet_msg) + \ + RNDIS_PKTINFO_SIZE(NDIS_HASH_VALUE_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_VLAN_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_LSO2_INFO_SIZE) + \ + RNDIS_PKTINFO_SIZE(NDIS_TXCSUM_INFO_SIZE)) + +/* Minimum space required for a packet */ +#define HN_PKTSIZE_MIN(align) \ + RTE_ALIGN(ETHER_MIN_LEN + HN_RNDIS_PKT_LEN, align) + +#define DEFAULT_TX_FREE_THRESH 32U + +static void +hn_update_packet_stats(struct hn_stats *stats, const struct rte_mbuf *m) +{ + uint32_t s = m->pkt_len; + const struct ether_addr *ea; + + if (s == 64) { + stats->size_bins[1]++; + } else if (s > 64 && s < 1024) { + uint32_t bin; + + /* count zeros, and offset into correct bin */ + bin = (sizeof(s) * 8) - __builtin_clz(s) - 5; + stats->size_bins[bin]++; + } else { + if (s < 64) + stats->size_bins[0]++; + else if (s < 1519) + stats->size_bins[6]++; + else if (s >= 1519) + stats->size_bins[7]++; + } + + ea = rte_pktmbuf_mtod(m, const struct ether_addr *); + if (is_multicast_ether_addr(ea)) { + if (is_broadcast_ether_addr(ea)) + stats->broadcast++; + else + stats->multicast++; + } +} + +static inline unsigned int hn_rndis_pktlen(const struct rndis_packet_msg *pkt) +{ + return pkt->pktinfooffset + pkt->pktinfolen; +} + +static inline uint32_t +hn_rndis_pktmsg_offset(uint32_t ofs) +{ + return ofs - offsetof(struct rndis_packet_msg, dataoffset); +} + +static void hn_txd_init(struct rte_mempool *mp __rte_unused, + void *opaque, void *obj, unsigned int idx) +{ + struct hn_txdesc *txd = obj; + struct rte_eth_dev *dev = opaque; + struct rndis_packet_msg *pkt; + + memset(txd, 0, sizeof(*txd)); + txd->chim_index = idx; + + pkt = rte_malloc_socket("RNDIS_TX", HN_RNDIS_PKT_LEN, + rte_align32pow2(HN_RNDIS_PKT_LEN), + dev->device->numa_node); + if (!pkt) + rte_exit(EXIT_FAILURE, "can not allocate RNDIS header"); + + txd->rndis_pkt = pkt; +} + +/* + * Unlike Linux and FreeBSD, this driver uses a mempool + * to limit outstanding transmits and reserve buffers + */ +int +hn_tx_pool_init(struct rte_eth_dev *dev) +{ + struct hn_data *hv = dev->data->dev_private; + char name[RTE_MEMPOOL_NAMESIZE]; + struct rte_mempool *mp; + + snprintf(name, sizeof(name), + "hn_txd_%u", dev->data->port_id); + + PMD_INIT_LOG(DEBUG, "create a TX send pool %s n=%u size=%zu socket=%d", + name, hv->chim_cnt, sizeof(struct hn_txdesc), + dev->device->numa_node); + + mp = rte_mempool_create(name, hv->chim_cnt, sizeof(struct hn_txdesc), + HN_TXD_CACHE_SIZE, 0, + NULL, NULL, + hn_txd_init, dev, + dev->device->numa_node, 0); + if (!mp) { + PMD_DRV_LOG(ERR, + "mempool %s create failed: %d", name, rte_errno); + return -rte_errno; + } + + hv->tx_pool = mp; + return 0; +} + +static void hn_reset_txagg(struct hn_tx_queue *txq) +{ + txq->agg_szleft = txq->agg_szmax; + txq->agg_pktleft = txq->agg_pktmax; + txq->agg_txd = NULL; + txq->agg_prevpkt = NULL; +} + +int +hn_dev_tx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc __rte_unused, + unsigned int socket_id, + const struct rte_eth_txconf *tx_conf) + +{ + struct hn_data *hv = dev->data->dev_private; + struct hn_tx_queue *txq; + uint32_t tx_free_thresh; + + PMD_INIT_FUNC_TRACE(); + + txq = rte_zmalloc_socket("HN_TXQ", sizeof(*txq), RTE_CACHE_LINE_SIZE, + socket_id); + if (!txq) + return -ENOMEM; + + txq->hv = hv; + txq->chan = hv->channels[queue_idx]; + txq->port_id = dev->data->port_id; + txq->queue_id = queue_idx; + + tx_free_thresh = tx_conf->tx_free_thresh; + if (tx_free_thresh == 0) + tx_free_thresh = RTE_MIN(hv->chim_cnt / 4, + DEFAULT_TX_FREE_THRESH); + + if (tx_free_thresh >= hv->chim_cnt - 3) + tx_free_thresh = hv->chim_cnt - 3; + + txq->free_thresh = tx_free_thresh; + + txq->agg_szmax = RTE_MIN(hv->chim_szmax, hv->rndis_agg_size); + txq->agg_pktmax = hv->rndis_agg_pkts; + txq->agg_align = hv->rndis_agg_align; + + hn_reset_txagg(txq); + + dev->data->tx_queues[queue_idx] = txq; + + return 0; +} + +void +hn_dev_tx_queue_release(void *arg) +{ + struct hn_tx_queue *txq = arg; + struct hn_txdesc *txd; + + PMD_INIT_FUNC_TRACE(); + + if (!txq) + return; + + /* If any pending data is still present just drop it */ + txd = txq->agg_txd; + if (txd) + rte_mempool_put(txq->hv->tx_pool, txd); + + rte_free(txq); +} + +static void +hn_nvs_send_completed(struct rte_eth_dev *dev, uint16_t queue_id, + unsigned long xactid, const struct hn_nvs_rndis_ack *ack) +{ + struct hn_txdesc *txd = (struct hn_txdesc *)xactid; + struct hn_tx_queue *txq; + + /* Control packets are sent with xacid == 0 */ + if (!txd) + return; + + txq = dev->data->tx_queues[queue_id]; + if (likely(ack->status == NVS_STATUS_OK)) { + PMD_TX_LOG(DEBUG, "port %u:%u complete tx %u packets %u bytes %u", + txq->port_id, txq->queue_id, txd->chim_index, + txd->packets, txd->data_size); + txq->stats.bytes += txd->data_size; + txq->stats.packets += txd->packets; + } else { + PMD_TX_LOG(NOTICE, "port %u:%u complete tx %u failed status %u", + txq->port_id, txq->queue_id, txd->chim_index, ack->status); + ++txq->stats.errors; + } + + rte_pktmbuf_free(txd->m); + + rte_mempool_put(txq->hv->tx_pool, txd); +} + +/* Handle transmit completion events */ +static void +hn_nvs_handle_comp(struct rte_eth_dev *dev, uint16_t queue_id, + const struct vmbus_chanpkt_hdr *pkt, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + switch (hdr->type) { + case NVS_TYPE_RNDIS_ACK: + hn_nvs_send_completed(dev, queue_id, pkt->xactid, data); + break; + + default: + PMD_TX_LOG(NOTICE, + "unexpected send completion type %u", + hdr->type); + } +} + +/* Parse per-packet info (meta data) */ +static int +hn_rndis_rxinfo(const void *info_data, unsigned int info_dlen, + struct hn_rxinfo *info) +{ + const struct rndis_pktinfo *pi = info_data; + uint32_t mask = 0; + + while (info_dlen != 0) { + const void *data; + uint32_t dlen; + + if (unlikely(info_dlen < sizeof(*pi))) + return -EINVAL; + + if (unlikely(info_dlen < pi->size)) + return -EINVAL; + info_dlen -= pi->size; + + if (unlikely(pi->size & RNDIS_PKTINFO_SIZE_ALIGNMASK)) + return -EINVAL; + if (unlikely(pi->size < pi->offset)) + return -EINVAL; + + dlen = pi->size - pi->offset; + data = pi->data; + + switch (pi->type) { + case NDIS_PKTINFO_TYPE_VLAN: + if (unlikely(dlen < NDIS_VLAN_INFO_SIZE)) + return -EINVAL; + info->vlan_info = *((const uint32_t *)data); + mask |= HN_RXINFO_VLAN; + break; + + case NDIS_PKTINFO_TYPE_CSUM: + if (unlikely(dlen < NDIS_RXCSUM_INFO_SIZE)) + return -EINVAL; + info->csum_info = *((const uint32_t *)data); + mask |= HN_RXINFO_CSUM; + break; + + case NDIS_PKTINFO_TYPE_HASHVAL: + if (unlikely(dlen < NDIS_HASH_VALUE_SIZE)) + return -EINVAL; + info->hash_value = *((const uint32_t *)data); + mask |= HN_RXINFO_HASHVAL; + break; + + case NDIS_PKTINFO_TYPE_HASHINF: + if (unlikely(dlen < NDIS_HASH_INFO_SIZE)) + return -EINVAL; + info->hash_info = *((const uint32_t *)data); + mask |= HN_RXINFO_HASHINF; + break; + + default: + goto next; + } + + if (mask == HN_RXINFO_ALL) + break; /* All found; done */ +next: + pi = (const struct rndis_pktinfo *) + ((const uint8_t *)pi + pi->size); + } + + /* + * Final fixup. + * - If there is no hash value, invalidate the hash info. + */ + if (!(mask & HN_RXINFO_HASHVAL)) + info->hash_info = HN_NDIS_HASH_INFO_INVALID; + return 0; +} + +/* + * Ack the consumed RXBUF associated w/ this channel packet, + * so that this RXBUF can be recycled by the hypervisor. + */ +static void hn_rx_buf_release(struct hn_rx_bufinfo *rxb) +{ + struct rte_mbuf_ext_shared_info *shinfo = &rxb->shinfo; + struct hn_data *hv = rxb->hv; + + if (rte_mbuf_ext_refcnt_update(shinfo, -1) == 0) { + hn_nvs_ack_rxbuf(rxb->chan, rxb->xactid); + --hv->rxbuf_outstanding; + } +} + +static void hn_rx_buf_free_cb(void *buf __rte_unused, void *opaque) +{ + hn_rx_buf_release(opaque); +} + +static struct hn_rx_bufinfo *hn_rx_buf_init(const struct hn_rx_queue *rxq, + const struct vmbus_chanpkt_rxbuf *pkt) +{ + struct hn_rx_bufinfo *rxb; + + rxb = rxq->hv->rxbuf_info + pkt->hdr.xactid; + rxb->chan = rxq->chan; + rxb->xactid = pkt->hdr.xactid; + rxb->hv = rxq->hv; + + rxb->shinfo.free_cb = hn_rx_buf_free_cb; + rxb->shinfo.fcb_opaque = rxb; + rte_mbuf_ext_refcnt_set(&rxb->shinfo, 1); + return rxb; +} + +static void hn_rxpkt(struct hn_rx_queue *rxq, struct hn_rx_bufinfo *rxb, + uint8_t *data, unsigned int headroom, unsigned int dlen, + const struct hn_rxinfo *info) +{ + struct hn_data *hv = rxq->hv; + struct rte_mbuf *m; + + m = rte_pktmbuf_alloc(rxq->mb_pool); + if (unlikely(!m)) { + struct rte_eth_dev *dev = + &rte_eth_devices[rxq->port_id]; + + dev->data->rx_mbuf_alloc_failed++; + return; + } + + /* + * For large packets, avoid copy if possible but need to keep + * some space available in receive area for later packets. + */ + if (dlen >= HN_RXCOPY_THRESHOLD && + hv->rxbuf_outstanding < hv->rxbuf_section_cnt / 2) { + struct rte_mbuf_ext_shared_info *shinfo; + const void *rxbuf; + rte_iova_t iova; + + /* + * Build an external mbuf that points to recveive area. + * Use refcount to handle multiple packets in same + * receive buffer section. + */ + rxbuf = hv->rxbuf_res->addr; + iova = rte_mem_virt2iova(rxbuf) + RTE_PTR_DIFF(data, rxbuf); + shinfo = &rxb->shinfo; + + if (rte_mbuf_ext_refcnt_update(shinfo, 1) == 1) + ++hv->rxbuf_outstanding; + + rte_pktmbuf_attach_extbuf(m, data, iova, + dlen + headroom, shinfo); + m->data_off = headroom; + } else { + /* Mbuf's in pool must be large enough to hold small packets */ + if (unlikely(rte_pktmbuf_tailroom(m) < dlen)) { + rte_pktmbuf_free_seg(m); + ++rxq->stats.errors; + return; + } + rte_memcpy(rte_pktmbuf_mtod(m, void *), + data + headroom, dlen); + } + + m->port = rxq->port_id; + m->pkt_len = dlen; + m->data_len = dlen; + + if (info->vlan_info != HN_NDIS_VLAN_INFO_INVALID) { + m->vlan_tci = info->vlan_info; + m->ol_flags |= PKT_RX_VLAN_STRIPPED | PKT_RX_VLAN; + } + + if (info->csum_info != HN_NDIS_RXCSUM_INFO_INVALID) { + if (info->csum_info & NDIS_RXCSUM_INFO_IPCS_OK) + m->ol_flags |= PKT_RX_IP_CKSUM_GOOD; + + if (info->csum_info & (NDIS_RXCSUM_INFO_UDPCS_OK + | NDIS_RXCSUM_INFO_TCPCS_OK)) + m->ol_flags |= PKT_RX_L4_CKSUM_GOOD; + } + + if (info->hash_info != HN_NDIS_HASH_INFO_INVALID) { + m->ol_flags |= PKT_RX_RSS_HASH; + m->hash.rss = info->hash_value; + } + + PMD_RX_LOG(DEBUG, "port %u:%u RX id %" PRIu64 " size %u ol_flags %#" PRIx64, + rxq->port_id, rxq->queue_id, rxb->xactid, + m->pkt_len, m->ol_flags); + + ++rxq->stats.packets; + rxq->stats.bytes += m->pkt_len; + hn_update_packet_stats(&rxq->stats, m); + + if (unlikely(rte_ring_sp_enqueue(rxq->rx_ring, m) != 0)) { + ++rxq->ring_full; + rte_pktmbuf_free(m); + } +} + +static void hn_rndis_rx_data(struct hn_rx_queue *rxq, + struct hn_rx_bufinfo *rxb, + void *data, uint32_t dlen) +{ + unsigned int data_off, data_len, pktinfo_off, pktinfo_len; + const struct rndis_packet_msg *pkt = data; + struct hn_rxinfo info = { + .vlan_info = HN_NDIS_VLAN_INFO_INVALID, + .csum_info = HN_NDIS_RXCSUM_INFO_INVALID, + .hash_info = HN_NDIS_HASH_INFO_INVALID, + }; + int err; + + hn_rndis_dump(pkt); + + if (unlikely(dlen < sizeof(*pkt))) + goto error; + + if (unlikely(dlen < pkt->len)) + goto error; /* truncated RNDIS from host */ + + if (unlikely(pkt->len < pkt->datalen + + pkt->oobdatalen + pkt->pktinfolen)) + goto error; + + if (unlikely(pkt->datalen == 0)) + goto error; + + /* Check offsets. */ + if (unlikely(pkt->dataoffset < RNDIS_PACKET_MSG_OFFSET_MIN)) + goto error; + + if (likely(pkt->pktinfooffset > 0) && + unlikely(pkt->pktinfooffset < RNDIS_PACKET_MSG_OFFSET_MIN || + (pkt->pktinfooffset & RNDIS_PACKET_MSG_OFFSET_ALIGNMASK))) + goto error; + + data_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); + data_len = pkt->datalen; + pktinfo_off = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->pktinfooffset); + pktinfo_len = pkt->pktinfolen; + + if (likely(pktinfo_len > 0)) { + err = hn_rndis_rxinfo((const uint8_t *)pkt + pktinfo_off, + pktinfo_len, &info); + if (err) + goto error; + } + + if (unlikely(data_off + data_len > pkt->len)) + goto error; + + if (unlikely(data_len < ETHER_HDR_LEN)) + goto error; + + hn_rxpkt(rxq, rxb, data, data_off, data_len, &info); + return; +error: + ++rxq->stats.errors; +} + +static void +hn_rndis_receive(const struct rte_eth_dev *dev, struct hn_rx_queue *rxq, + struct hn_rx_bufinfo *rxb, void *buf, uint32_t len) +{ + const struct rndis_msghdr *hdr = buf; + + switch (hdr->type) { + case RNDIS_PACKET_MSG: + if (dev->data->dev_started) + hn_rndis_rx_data(rxq, rxb, buf, len); + break; + + case RNDIS_INDICATE_STATUS_MSG: + hn_rndis_link_status(rxq->hv, buf); + break; + + case RNDIS_INITIALIZE_CMPLT: + case RNDIS_QUERY_CMPLT: + case RNDIS_SET_CMPLT: + hn_rndis_receive_response(rxq->hv, buf, len); + break; + + default: + PMD_DRV_LOG(NOTICE, + "unexpected RNDIS message (type %#x len %u)", + hdr->type, len); + break; + } +} + +static void +hn_nvs_handle_rxbuf(struct rte_eth_dev *dev, + struct hn_data *hv, + struct hn_rx_queue *rxq, + const struct vmbus_chanpkt_hdr *hdr, + const void *buf) +{ + const struct vmbus_chanpkt_rxbuf *pkt; + const struct hn_nvs_hdr *nvs_hdr = buf; + uint32_t rxbuf_sz = hv->rxbuf_res->len; + char *rxbuf = hv->rxbuf_res->addr; + unsigned int i, hlen, count; + struct hn_rx_bufinfo *rxb; + + /* At minimum we need type header */ + if (unlikely(vmbus_chanpkt_datalen(hdr) < sizeof(*nvs_hdr))) { + PMD_RX_LOG(ERR, "invalid receive nvs RNDIS"); + return; + } + + /* Make sure that this is a RNDIS message. */ + if (unlikely(nvs_hdr->type != NVS_TYPE_RNDIS)) { + PMD_RX_LOG(ERR, "nvs type %u, not RNDIS", + nvs_hdr->type); + return; + } + + hlen = vmbus_chanpkt_getlen(hdr->hlen); + if (unlikely(hlen < sizeof(*pkt))) { + PMD_RX_LOG(ERR, "invalid rxbuf chanpkt"); + return; + } + + pkt = container_of(hdr, const struct vmbus_chanpkt_rxbuf, hdr); + if (unlikely(pkt->rxbuf_id != NVS_RXBUF_SIG)) { + PMD_RX_LOG(ERR, "invalid rxbuf_id 0x%08x", + pkt->rxbuf_id); + return; + } + + count = pkt->rxbuf_cnt; + if (unlikely(hlen < offsetof(struct vmbus_chanpkt_rxbuf, + rxbuf[count]))) { + PMD_RX_LOG(ERR, "invalid rxbuf_cnt %u", count); + return; + } + + if (pkt->hdr.xactid > hv->rxbuf_section_cnt) { + PMD_RX_LOG(ERR, "invalid rxbuf section id %" PRIx64, + pkt->hdr.xactid); + return; + } + + /* Setup receive buffer info to allow for callback */ + rxb = hn_rx_buf_init(rxq, pkt); + + /* Each range represents 1 RNDIS pkt that contains 1 Ethernet frame */ + for (i = 0; i < count; ++i) { + unsigned int ofs, len; + + ofs = pkt->rxbuf[i].ofs; + len = pkt->rxbuf[i].len; + + if (unlikely(ofs + len > rxbuf_sz)) { + PMD_RX_LOG(ERR, + "%uth RNDIS msg overflow ofs %u, len %u", + i, ofs, len); + continue; + } + + if (unlikely(len == 0)) { + PMD_RX_LOG(ERR, "%uth RNDIS msg len %u", i, len); + continue; + } + + hn_rndis_receive(dev, rxq, rxb, + rxbuf + ofs, len); + } + + /* Send ACK now if external mbuf not used */ + hn_rx_buf_release(rxb); +} + +struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, + uint16_t queue_id, + unsigned int socket_id) +{ + struct hn_rx_queue *rxq; + + rxq = rte_zmalloc_socket("HN_RXQ", sizeof(*rxq), + RTE_CACHE_LINE_SIZE, socket_id); + if (rxq) { + rxq->hv = hv; + rxq->chan = hv->channels[queue_id]; + rte_spinlock_init(&rxq->ring_lock); + rxq->port_id = hv->port_id; + rxq->queue_id = queue_id; + + rxq->event_sz = HN_RXQ_EVENT_DEFAULT; + rxq->event_buf = rte_malloc_socket("RX_EVENTS", + rxq->event_sz, + RTE_CACHE_LINE_SIZE, + socket_id); + if (!rxq->event_buf) { + rte_free(rxq); + rxq = NULL; + } + } + return rxq; +} + +int +hn_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf __rte_unused, + struct rte_mempool *mp) +{ + struct hn_data *hv = dev->data->dev_private; + uint32_t qmax = hv->rxbuf_section_cnt; + char ring_name[RTE_RING_NAMESIZE]; + struct hn_rx_queue *rxq; + unsigned int count; + size_t size; + int err = -ENOMEM; + + PMD_INIT_FUNC_TRACE(); + + if (nb_desc == 0 || nb_desc > qmax) + nb_desc = qmax; + + if (queue_idx == 0) { + rxq = hv->primary; + } else { + rxq = hn_rx_queue_alloc(hv, queue_idx, socket_id); + if (!rxq) + return -ENOMEM; + } + + rxq->mb_pool = mp; + + count = rte_align32pow2(nb_desc); + size = sizeof(struct rte_ring) + count * sizeof(void *); + rxq->rx_ring = rte_malloc_socket("RX_RING", size, + RTE_CACHE_LINE_SIZE, + socket_id); + if (!rxq->rx_ring) + goto fail; + + /* + * Staging ring from receive event logic to rx_pkts. + * rx_pkts assumes caller is handling multi-thread issue. + * event logic has locking. + */ + snprintf(ring_name, sizeof(ring_name), + "hn_rx_%u_%u", dev->data->port_id, queue_idx); + err = rte_ring_init(rxq->rx_ring, ring_name, + count, 0); + if (err) + goto fail; + + dev->data->rx_queues[queue_idx] = rxq; + return 0; + +fail: + rte_free(rxq->rx_ring); + rte_free(rxq->event_buf); + rte_free(rxq); + return -ENOMEM; +} + +void +hn_dev_rx_queue_release(void *arg) +{ + struct hn_rx_queue *rxq = arg; + + PMD_INIT_FUNC_TRACE(); + + if (!rxq) + return; + + rte_free(rxq->rx_ring); + rxq->rx_ring = NULL; + rxq->mb_pool = NULL; + + if (rxq != rxq->hv->primary) { + rte_free(rxq->event_buf); + rte_free(rxq); + } +} + +static void +hn_nvs_handle_notify(const struct vmbus_chanpkt_hdr *pkthdr, + const void *data) +{ + const struct hn_nvs_hdr *hdr = data; + + if (unlikely(vmbus_chanpkt_datalen(pkthdr) < sizeof(*hdr))) { + PMD_DRV_LOG(ERR, "invalid nvs notify"); + return; + } + + PMD_DRV_LOG(INFO, + "got notify, nvs type %u", hdr->type); +} + +/* + * Process pending events on the channel. + * Called from both Rx queue poll and Tx cleanup + */ +void hn_process_events(struct hn_data *hv, uint16_t queue_id) +{ + struct rte_eth_dev *dev = &rte_eth_devices[hv->port_id]; + struct hn_rx_queue *rxq; + int ret = 0; + + rxq = queue_id == 0 ? hv->primary : dev->data->rx_queues[queue_id]; + + /* If no pending data then nothing to do */ + if (rte_vmbus_chan_rx_empty(rxq->chan)) + return; + + /* + * Since channel is shared between Rx and TX queue need to have a lock + * since DPDK does not force same CPU to be used for Rx/Tx. + */ + if (unlikely(!rte_spinlock_trylock(&rxq->ring_lock))) + return; + + for (;;) { + const struct vmbus_chanpkt_hdr *pkt; + uint32_t len = rxq->event_sz; + const void *data; + + ret = rte_vmbus_chan_recv_raw(rxq->chan, rxq->event_buf, &len); + if (ret == -EAGAIN) + break; /* ring is empty */ + + if (ret == -ENOBUFS) { + /* expanded buffer needed */ + len = rte_align32pow2(len); + PMD_DRV_LOG(DEBUG, "expand event buf to %u", len); + + rxq->event_buf = rte_realloc(rxq->event_buf, + len, RTE_CACHE_LINE_SIZE); + if (rxq->event_buf) { + rxq->event_sz = len; + continue; + } + + rte_exit(EXIT_FAILURE, "can not expand event buf!\n"); + break; + } + + if (ret != 0) { + PMD_DRV_LOG(ERR, "vmbus ring buffer error: %d", ret); + break; + } + + pkt = (const struct vmbus_chanpkt_hdr *)rxq->event_buf; + data = (char *)rxq->event_buf + vmbus_chanpkt_getlen(pkt->hlen); + + switch (pkt->type) { + case VMBUS_CHANPKT_TYPE_COMP: + hn_nvs_handle_comp(dev, queue_id, pkt, data); + break; + + case VMBUS_CHANPKT_TYPE_RXBUF: + hn_nvs_handle_rxbuf(dev, hv, rxq, pkt, data); + break; + + case VMBUS_CHANPKT_TYPE_INBAND: + hn_nvs_handle_notify(pkt, data); + break; + + default: + PMD_DRV_LOG(ERR, "unknown chan pkt %u", pkt->type); + break; + } + } + rte_spinlock_unlock(&rxq->ring_lock); + + if (unlikely(ret != -EAGAIN)) + PMD_DRV_LOG(ERR, "channel receive failed: %d", ret); +} + +static void hn_append_to_chim(struct hn_tx_queue *txq, + struct rndis_packet_msg *pkt, + const struct rte_mbuf *m) +{ + struct hn_txdesc *txd = txq->agg_txd; + uint8_t *buf = (uint8_t *)pkt; + unsigned int data_offs; + + hn_rndis_dump(pkt); + + data_offs = RNDIS_PACKET_MSG_OFFSET_ABS(pkt->dataoffset); + txd->chim_size += pkt->len; + txd->data_size += m->pkt_len; + ++txd->packets; + hn_update_packet_stats(&txq->stats, m); + + for (; m; m = m->next) { + uint16_t len = rte_pktmbuf_data_len(m); + + rte_memcpy(buf + data_offs, + rte_pktmbuf_mtod(m, const char *), len); + data_offs += len; + } +} + +/* + * Send pending aggregated data in chimney buffer (if any). + * Returns error if send was unsuccessful because channel ring buffer + * was full. + */ +static int hn_flush_txagg(struct hn_tx_queue *txq, bool *need_sig) + +{ + struct hn_txdesc *txd = txq->agg_txd; + struct hn_nvs_rndis rndis; + int ret; + + if (!txd) + return 0; + + rndis = (struct hn_nvs_rndis) { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_DATA, + .chim_idx = txd->chim_index, + .chim_sz = txd->chim_size, + }; + + PMD_TX_LOG(DEBUG, "port %u:%u tx %u size %u", + txq->port_id, txq->queue_id, txd->chim_index, txd->chim_size); + + ret = hn_nvs_send(txq->chan, VMBUS_CHANPKT_FLAG_RC, + &rndis, sizeof(rndis), (uintptr_t)txd, need_sig); + + if (likely(ret == 0)) + hn_reset_txagg(txq); + else + PMD_TX_LOG(NOTICE, "port %u:%u send failed: %d", + txq->port_id, txq->queue_id, ret); + + return ret; +} + +static struct hn_txdesc *hn_new_txd(struct hn_data *hv, + struct hn_tx_queue *txq) +{ + struct hn_txdesc *txd; + + if (rte_mempool_get(hv->tx_pool, (void **)&txd)) { + ++txq->stats.nomemory; + PMD_TX_LOG(DEBUG, "tx pool exhausted!"); + return NULL; + } + + txd->m = NULL; + txd->queue_id = txq->queue_id; + txd->packets = 0; + txd->data_size = 0; + txd->chim_size = 0; + + return txd; +} + +static void * +hn_try_txagg(struct hn_data *hv, struct hn_tx_queue *txq, uint32_t pktsize) +{ + struct hn_txdesc *agg_txd = txq->agg_txd; + struct rndis_packet_msg *pkt; + void *chim; + + if (agg_txd) { + unsigned int padding, olen; + + /* + * Update the previous RNDIS packet's total length, + * it can be increased due to the mandatory alignment + * padding for this RNDIS packet. And update the + * aggregating txdesc's chimney sending buffer size + * accordingly. + * + * Zero-out the padding, as required by the RNDIS spec. + */ + pkt = txq->agg_prevpkt; + olen = pkt->len; + padding = RTE_ALIGN(olen, txq->agg_align) - olen; + if (padding > 0) { + agg_txd->chim_size += padding; + pkt->len += padding; + memset((uint8_t *)pkt + olen, 0, padding); + } + + chim = (uint8_t *)pkt + pkt->len; + + txq->agg_pktleft--; + txq->agg_szleft -= pktsize; + if (txq->agg_szleft < HN_PKTSIZE_MIN(txq->agg_align)) { + /* + * Probably can't aggregate more packets, + * flush this aggregating txdesc proactively. + */ + txq->agg_pktleft = 0; + } + } else { + agg_txd = hn_new_txd(hv, txq); + if (!agg_txd) + return NULL; + + chim = (uint8_t *)hv->chim_res->addr + + agg_txd->chim_index * hv->chim_szmax; + + txq->agg_txd = agg_txd; + txq->agg_pktleft = txq->agg_pktmax - 1; + txq->agg_szleft = txq->agg_szmax - pktsize; + } + txq->agg_prevpkt = chim; + + return chim; +} + +static inline void * +hn_rndis_pktinfo_append(struct rndis_packet_msg *pkt, + uint32_t pi_dlen, uint32_t pi_type) +{ + const uint32_t pi_size = RNDIS_PKTINFO_SIZE(pi_dlen); + struct rndis_pktinfo *pi; + + /* + * Per-packet-info does not move; it only grows. + * + * NOTE: + * pktinfooffset in this phase counts from the beginning + * of rndis_packet_msg. + */ + pi = (struct rndis_pktinfo *)((uint8_t *)pkt + hn_rndis_pktlen(pkt)); + + pkt->pktinfolen += pi_size; + + pi->size = pi_size; + pi->type = pi_type; + pi->offset = RNDIS_PKTINFO_OFFSET; + + return pi->data; +} + +/* Put RNDIS header and packet info on packet */ +static void hn_encap(struct rndis_packet_msg *pkt, + uint16_t queue_id, + const struct rte_mbuf *m) +{ + unsigned int hlen = m->l2_len + m->l3_len; + uint32_t *pi_data; + uint32_t pkt_hlen; + + pkt->type = RNDIS_PACKET_MSG; + pkt->len = m->pkt_len; + pkt->dataoffset = 0; + pkt->datalen = m->pkt_len; + pkt->oobdataoffset = 0; + pkt->oobdatalen = 0; + pkt->oobdataelements = 0; + pkt->pktinfooffset = sizeof(*pkt); + pkt->pktinfolen = 0; + pkt->vchandle = 0; + pkt->reserved = 0; + + /* + * Set the hash value for this packet, to the queue_id to cause + * TX done event for this packet on the right channel. + */ + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_HASH_VALUE_SIZE, + NDIS_PKTINFO_TYPE_HASHVAL); + *pi_data = queue_id; + + if (m->ol_flags & PKT_TX_VLAN_PKT) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_VLAN_INFO_SIZE, + NDIS_PKTINFO_TYPE_VLAN); + *pi_data = m->vlan_tci; + } + + if (m->ol_flags & PKT_TX_TCP_SEG) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_LSO2_INFO_SIZE, + NDIS_PKTINFO_TYPE_LSO); + + if (m->ol_flags & PKT_TX_IPV6) { + *pi_data = NDIS_LSO2_INFO_MAKEIPV6(hlen, + m->tso_segsz); + } else { + *pi_data = NDIS_LSO2_INFO_MAKEIPV4(hlen, + m->tso_segsz); + } + } else if (m->ol_flags & + (PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM)) { + pi_data = hn_rndis_pktinfo_append(pkt, NDIS_TXCSUM_INFO_SIZE, + NDIS_PKTINFO_TYPE_CSUM); + *pi_data = 0; + + if (m->ol_flags & PKT_TX_IPV6) + *pi_data |= NDIS_TXCSUM_INFO_IPV6; + if (m->ol_flags & PKT_TX_IPV4) { + *pi_data |= NDIS_TXCSUM_INFO_IPV4; + + if (m->ol_flags & PKT_TX_IP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_IPCS; + } + + if (m->ol_flags & PKT_TX_TCP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKTCPCS(hlen); + else if (m->ol_flags & PKT_TX_UDP_CKSUM) + *pi_data |= NDIS_TXCSUM_INFO_MKUDPCS(hlen); + } + + pkt_hlen = pkt->pktinfooffset + pkt->pktinfolen; + /* Fixup RNDIS packet message total length */ + pkt->len += pkt_hlen; + + /* Convert RNDIS packet message offsets */ + pkt->dataoffset = hn_rndis_pktmsg_offset(pkt_hlen); + pkt->pktinfooffset = hn_rndis_pktmsg_offset(pkt->pktinfooffset); +} + +/* How many scatter gather list elements ar needed */ +static unsigned int hn_get_slots(const struct rte_mbuf *m) +{ + unsigned int slots = 1; /* for RNDIS header */ + + while (m) { + unsigned int size = rte_pktmbuf_data_len(m); + unsigned int offs = rte_mbuf_data_iova(m) & PAGE_MASK; + + slots += (offs + size + PAGE_SIZE - 1) / PAGE_SIZE; + m = m->next; + } + + return slots; +} + +/* Build scatter gather list from chained mbuf */ +static unsigned int hn_fill_sg(struct vmbus_gpa *sg, + const struct rte_mbuf *m) +{ + unsigned int segs = 0; + + while (m) { + rte_iova_t addr = rte_mbuf_data_iova(m); + unsigned int page = addr / PAGE_SIZE; + unsigned int offset = addr & PAGE_MASK; + unsigned int len = rte_pktmbuf_data_len(m); + + while (len > 0) { + unsigned int bytes = RTE_MIN(len, PAGE_SIZE - offset); + + sg[segs].page = page; + sg[segs].ofs = offset; + sg[segs].len = bytes; + segs++; + + ++page; + offset = 0; + len -= bytes; + } + m = m->next; + } + + return segs; +} + +/* Transmit directly from mbuf */ +static int hn_xmit_sg(struct hn_tx_queue *txq, + const struct hn_txdesc *txd, const struct rte_mbuf *m, + bool *need_sig) +{ + struct vmbus_gpa sg[hn_get_slots(m)]; + struct hn_nvs_rndis nvs_rndis = { + .type = NVS_TYPE_RNDIS, + .rndis_mtype = NVS_RNDIS_MTYPE_DATA, + .chim_sz = txd->chim_size, + }; + rte_iova_t addr; + unsigned int segs; + + /* attach aggregation data if present */ + if (txd->chim_size > 0) + nvs_rndis.chim_idx = txd->chim_index; + else + nvs_rndis.chim_idx = NVS_CHIM_IDX_INVALID; + + hn_rndis_dump(txd->rndis_pkt); + + /* pass IOVA of rndis header in first segment */ + addr = rte_malloc_virt2iova(txd->rndis_pkt); + if (unlikely(addr == RTE_BAD_IOVA)) { + PMD_DRV_LOG(ERR, "RNDIS transmit can not get iova"); + return -EINVAL; + } + + sg[0].page = addr / PAGE_SIZE; + sg[0].ofs = addr & PAGE_MASK; + sg[0].len = RNDIS_PACKET_MSG_OFFSET_ABS(hn_rndis_pktlen(txd->rndis_pkt)); + segs = 1; + + hn_update_packet_stats(&txq->stats, m); + + segs += hn_fill_sg(sg + 1, m); + + PMD_TX_LOG(DEBUG, "port %u:%u tx %u segs %u size %u", + txq->port_id, txq->queue_id, txd->chim_index, + segs, nvs_rndis.chim_sz); + + return hn_nvs_send_sglist(txq->chan, sg, segs, + &nvs_rndis, sizeof(nvs_rndis), + (uintptr_t)txd, need_sig); +} + +uint16_t +hn_xmit_pkts(void *ptxq, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct hn_tx_queue *txq = ptxq; + struct hn_data *hv = txq->hv; + bool need_sig = false; + uint16_t nb_tx; + int ret; + + if (unlikely(hv->closed)) + return 0; + + if (rte_mempool_avail_count(hv->tx_pool) <= txq->free_thresh) + hn_process_events(hv, txq->queue_id); + + for (nb_tx = 0; nb_tx < nb_pkts; nb_tx++) { + struct rte_mbuf *m = tx_pkts[nb_tx]; + uint32_t pkt_size = m->pkt_len + HN_RNDIS_PKT_LEN; + struct rndis_packet_msg *pkt; + + /* For small packets aggregate them in chimney buffer */ + if (m->pkt_len < HN_TXCOPY_THRESHOLD && pkt_size <= txq->agg_szmax) { + /* If this packet will not fit, then flush */ + if (txq->agg_pktleft == 0 || + RTE_ALIGN(pkt_size, txq->agg_align) > txq->agg_szleft) { + if (hn_flush_txagg(txq, &need_sig)) + goto fail; + } + + pkt = hn_try_txagg(hv, txq, pkt_size); + if (unlikely(!pkt)) + goto fail; + + hn_encap(pkt, txq->queue_id, m); + hn_append_to_chim(txq, pkt, m); + + rte_pktmbuf_free(m); + + /* if buffer is full, flush */ + if (txq->agg_pktleft == 0 && + hn_flush_txagg(txq, &need_sig)) + goto fail; + } else { + struct hn_txdesc *txd; + + /* can send chimney data and large packet at once */ + txd = txq->agg_txd; + if (txd) { + hn_reset_txagg(txq); + } else { + txd = hn_new_txd(hv, txq); + if (unlikely(!txd)) + goto fail; + } + + pkt = txd->rndis_pkt; + txd->m = m; + txd->data_size += m->pkt_len; + ++txd->packets; + + hn_encap(pkt, txq->queue_id, m); + + ret = hn_xmit_sg(txq, txd, m, &need_sig); + if (unlikely(ret != 0)) { + PMD_TX_LOG(NOTICE, "sg send failed: %d", ret); + ++txq->stats.errors; + rte_mempool_put(hv->tx_pool, txd); + goto fail; + } + } + } + + /* If partial buffer left, then try and send it. + * if that fails, then reuse it on next send. + */ + hn_flush_txagg(txq, &need_sig); + +fail: + if (need_sig) + rte_vmbus_chan_signal_tx(txq->chan); + + return nb_tx; +} + +uint16_t +hn_recv_pkts(void *prxq, struct rte_mbuf **rx_pkts, uint16_t nb_pkts) +{ + struct hn_rx_queue *rxq = prxq; + struct hn_data *hv = rxq->hv; + + if (unlikely(hv->closed)) + return 0; + + /* Get all outstanding receive completions */ + hn_process_events(hv, rxq->queue_id); + + /* Get mbufs off staging ring */ + return rte_ring_sc_dequeue_burst(rxq->rx_ring, (void **)rx_pkts, + nb_pkts, NULL); +} diff --git a/drivers/net/netvsc/hn_var.h b/drivers/net/netvsc/hn_var.h new file mode 100644 index 0000000000..f0358c5822 --- /dev/null +++ b/drivers/net/netvsc/hn_var.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2009-2018 Microsoft Corp. + * Copyright (c) 2016 Brocade Communications Systems, Inc. + * Copyright (c) 2012 NetApp Inc. + * Copyright (c) 2012 Citrix Inc. + * All rights reserved. + */ + +/* + * Tunable ethdev params + */ +#define HN_MIN_RX_BUF_SIZE 1024 +#define HN_MAX_XFER_LEN 2048 +#define HN_MAX_MAC_ADDRS 1 +#define HN_MAX_CHANNELS 64 + +/* Claimed to be 12232B */ +#define HN_MTU_MAX (9 * 1024) + +/* Retry interval */ +#define HN_CHAN_INTERVAL_US 100 + +/* Buffers need to be aligned */ +#ifndef PAGE_SIZE +#define PAGE_SIZE 4096 +#endif + +#ifndef PAGE_MASK +#define PAGE_MASK (PAGE_SIZE - 1) +#endif + +struct hn_data; +struct hn_txdesc; + +struct hn_stats { + uint64_t packets; + uint64_t bytes; + uint64_t errors; + uint64_t nomemory; + uint64_t multicast; + uint64_t broadcast; + /* Size bins in array as RFC 2819, undersized [0], 64 [1], etc */ + uint64_t size_bins[8]; +}; + +struct hn_tx_queue { + struct hn_data *hv; + struct vmbus_channel *chan; + uint16_t port_id; + uint16_t queue_id; + uint32_t free_thresh; + + /* Applied packet transmission aggregation limits. */ + uint32_t agg_szmax; + uint32_t agg_pktmax; + uint32_t agg_align; + + /* Packet transmission aggregation states */ + struct hn_txdesc *agg_txd; + uint32_t agg_pktleft; + uint32_t agg_szleft; + struct rndis_packet_msg *agg_prevpkt; + + struct hn_stats stats; +}; + +struct hn_rx_queue { + struct hn_data *hv; + struct vmbus_channel *chan; + struct rte_mempool *mb_pool; + struct rte_ring *rx_ring; + void *event_buf; + + rte_spinlock_t ring_lock; + uint32_t event_sz; + uint16_t port_id; + uint16_t queue_id; + struct hn_stats stats; + uint64_t ring_full; +}; + + +/* multi-packet data from host */ +struct hn_rx_bufinfo { + struct vmbus_channel *chan; + struct hn_data *hv; + uint64_t xactid; + struct rte_mbuf_ext_shared_info shinfo; +} __rte_cache_aligned; + +struct hn_data { + struct rte_vmbus_device *vmbus; + struct hn_rx_queue *primary; + uint16_t port_id; + bool closed; + uint32_t link_status; + uint32_t link_speed; + + struct rte_mem_resource *rxbuf_res; /* UIO resource for Rx */ + struct hn_rx_bufinfo *rxbuf_info; + uint32_t rxbuf_section_cnt; /* # of Rx sections */ + volatile uint32_t rxbuf_outstanding; + uint16_t max_queues; /* Max available queues */ + uint16_t num_queues; + uint64_t rss_offloads; + + struct rte_mem_resource *chim_res; /* UIO resource for Tx */ + struct rte_mempool *tx_pool; /* Tx descriptors */ + uint32_t chim_szmax; /* Max size per buffer */ + uint32_t chim_cnt; /* Max packets per buffer */ + + uint32_t nvs_ver; + uint32_t ndis_ver; + uint32_t rndis_agg_size; + uint32_t rndis_agg_pkts; + uint32_t rndis_agg_align; + + volatile uint32_t rndis_pending; + rte_atomic32_t rndis_req_id; + uint8_t rndis_resp[256]; + + struct ether_addr mac_addr; + struct vmbus_channel *channels[HN_MAX_CHANNELS]; +}; + +static inline struct vmbus_channel * +hn_primary_chan(const struct hn_data *hv) +{ + return hv->channels[0]; +} + +void hn_process_events(struct hn_data *hv, uint16_t queue_id); + +uint16_t hn_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); +uint16_t hn_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts, + uint16_t nb_pkts); + +int hn_tx_pool_init(struct rte_eth_dev *dev); +int hn_dev_tx_queue_setup(struct rte_eth_dev *dev, uint16_t queue_idx, + uint16_t nb_desc, unsigned int socket_id, + const struct rte_eth_txconf *tx_conf); +void hn_dev_tx_queue_release(void *arg); + +struct hn_rx_queue *hn_rx_queue_alloc(struct hn_data *hv, + uint16_t queue_id, + unsigned int socket_id); +int hn_dev_rx_queue_setup(struct rte_eth_dev *dev, + uint16_t queue_idx, uint16_t nb_desc, + unsigned int socket_id, + const struct rte_eth_rxconf *rx_conf, + struct rte_mempool *mp); +void hn_dev_rx_queue_release(void *arg); diff --git a/drivers/net/netvsc/meson.build b/drivers/net/netvsc/meson.build new file mode 100644 index 0000000000..a15b504870 --- /dev/null +++ b/drivers/net/netvsc/meson.build @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: BSD-3-Clause +# Copyright(c) 2018 Microsoft Corporation + +version = 2 +sources = files('hn_ethdev.c', 'hn_rxtx.c', 'hn_rndis.c', 'hn_nvs.c') + +deps += ['bus_vmbus' ] + +allow_experimental_apis = true diff --git a/drivers/net/netvsc/ndis.h b/drivers/net/netvsc/ndis.h new file mode 100644 index 0000000000..2e7ca99b15 --- /dev/null +++ b/drivers/net/netvsc/ndis.h @@ -0,0 +1,378 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * All rights reserved. + */ + +#ifndef _NET_NDIS_H_ +#define _NET_NDIS_H_ + +#define NDIS_MEDIA_STATE_CONNECTED 0 +#define NDIS_MEDIA_STATE_DISCONNECTED 1 + +#define NDIS_NETCHANGE_TYPE_POSSIBLE 1 +#define NDIS_NETCHANGE_TYPE_DEFINITE 2 +#define NDIS_NETCHANGE_TYPE_FROMMEDIA 3 + +#define NDIS_OFFLOAD_SET_NOCHG 0 +#define NDIS_OFFLOAD_SET_ON 1 +#define NDIS_OFFLOAD_SET_OFF 2 + +/* a.k.a GRE MAC */ +#define NDIS_ENCAP_TYPE_NVGRE 0x00000001 + +#define NDIS_HASH_FUNCTION_MASK 0x000000FF /* see hash function */ +#define NDIS_HASH_TYPE_MASK 0x00FFFF00 /* see hash type */ + +/* hash function */ +#define NDIS_HASH_FUNCTION_TOEPLITZ 0x00000001 + +/* hash type */ +#define NDIS_HASH_IPV4 0x00000100 +#define NDIS_HASH_TCP_IPV4 0x00000200 +#define NDIS_HASH_IPV6 0x00000400 +#define NDIS_HASH_IPV6_EX 0x00000800 +#define NDIS_HASH_TCP_IPV6 0x00001000 +#define NDIS_HASH_TCP_IPV6_EX 0x00002000 + +#define NDIS_HASH_KEYSIZE_TOEPLITZ 40 +#define NDIS_HASH_INDCNT 128 + +#define NDIS_OBJTYPE_DEFAULT 0x80 +#define NDIS_OBJTYPE_RSS_CAPS 0x88 +#define NDIS_OBJTYPE_RSS_PARAMS 0x89 +#define NDIS_OBJTYPE_OFFLOAD 0xa7 + +struct ndis_object_hdr { + uint8_t ndis_type; /* NDIS_OBJTYPE_ */ + uint8_t ndis_rev; /* type specific */ + uint16_t ndis_size; /* incl. this hdr */ +} __rte_packed; + +/* + * OID_TCP_OFFLOAD_PARAMETERS + * ndis_type: NDIS_OBJTYPE_DEFAULT + */ +struct ndis_offload_params { + struct ndis_object_hdr ndis_hdr; + uint8_t ndis_ip4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_tcp4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_udp4csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_tcp6csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_udp6csum; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_lsov1; /* NDIS_OFFLOAD_PARAM_ */ + uint8_t ndis_ipsecv1; /* NDIS_OFFLOAD_IPSECV1_ */ + uint8_t ndis_lsov2_ip4; /* NDIS_OFFLOAD_LSOV2_ */ + uint8_t ndis_lsov2_ip6; /* NDIS_OFFLOAD_LSOV2_ */ + uint8_t ndis_tcp4conn; /* 0 */ + uint8_t ndis_tcp6conn; /* 0 */ + uint32_t ndis_flags; /* 0 */ + /* NDIS >= 6.1 */ + uint8_t ndis_ipsecv2; /* NDIS_OFFLOAD_IPSECV2_ */ + uint8_t ndis_ipsecv2_ip4;/* NDIS_OFFLOAD_IPSECV2_ */ + /* NDIS >= 6.30 */ + uint8_t ndis_rsc_ip4; /* NDIS_OFFLOAD_RSC_ */ + uint8_t ndis_rsc_ip6; /* NDIS_OFFLOAD_RSC_ */ + uint8_t ndis_encap; /* NDIS_OFFLOAD_SET_ */ + uint8_t ndis_encap_types;/* NDIS_ENCAP_TYPE_ */ +}; + +#define NDIS_OFFLOAD_PARAMS_SIZE sizeof(struct ndis_offload_params) +#define NDIS_OFFLOAD_PARAMS_SIZE_6_1 \ + offsetof(struct ndis_offload_params, ndis_rsc_ip4) + +#define NDIS_OFFLOAD_PARAMS_REV_2 2 /* NDIS 6.1 */ +#define NDIS_OFFLOAD_PARAMS_REV_3 3 /* NDIS 6.30 */ + +#define NDIS_OFFLOAD_PARAM_NOCHG 0 /* common */ +#define NDIS_OFFLOAD_PARAM_OFF 1 +#define NDIS_OFFLOAD_PARAM_TX 2 +#define NDIS_OFFLOAD_PARAM_RX 3 +#define NDIS_OFFLOAD_PARAM_TXRX 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_LSOV1_OFF 1 +#define NDIS_OFFLOAD_LSOV1_ON 2 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_IPSECV1_OFF 1 +#define NDIS_OFFLOAD_IPSECV1_AH 2 +#define NDIS_OFFLOAD_IPSECV1_ESP 3 +#define NDIS_OFFLOAD_IPSECV1_AH_ESP 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_LSOV2_OFF 1 +#define NDIS_OFFLOAD_LSOV2_ON 2 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_IPSECV2_OFF 1 +#define NDIS_OFFLOAD_IPSECV2_AH 2 +#define NDIS_OFFLOAD_IPSECV2_ESP 3 +#define NDIS_OFFLOAD_IPSECV2_AH_ESP 4 + +/* NDIS_OFFLOAD_PARAM_NOCHG */ +#define NDIS_OFFLOAD_RSC_OFF 1 +#define NDIS_OFFLOAD_RSC_ON 2 + +/* + * OID_GEN_RECEIVE_SCALE_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_RSS_CAPS + */ +struct ndis_rss_caps { + struct ndis_object_hdr ndis_hdr; + uint32_t ndis_caps; /* NDIS_RSS_CAP_ */ + uint32_t ndis_nmsi; /* # of MSIs */ + uint32_t ndis_nrxr; /* # of RX rings */ + /* NDIS >= 6.30 */ + uint16_t ndis_nind; /* # of indtbl ent. */ + uint16_t ndis_pad; +} __rte_packed; + +#define NDIS_RSS_CAPS_SIZE \ + offsetof(struct ndis_rss_caps, ndis_pad) +#define NDIS_RSS_CAPS_SIZE_6_0 \ + offsetof(struct ndis_rss_caps, ndis_nind) + +#define NDIS_RSS_CAPS_REV_1 1 /* NDIS 6.{0,1,20} */ +#define NDIS_RSS_CAPS_REV_2 2 /* NDIS 6.30 */ + +#define NDIS_RSS_CAP_MSI 0x01000000 +#define NDIS_RSS_CAP_CLASSIFY_ISR 0x02000000 +#define NDIS_RSS_CAP_CLASSIFY_DPC 0x04000000 +#define NDIS_RSS_CAP_MSIX 0x08000000 +#define NDIS_RSS_CAP_IPV4 0x00000100 +#define NDIS_RSS_CAP_IPV6 0x00000200 +#define NDIS_RSS_CAP_IPV6_EX 0x00000400 +#define NDIS_RSS_CAP_HASH_TOEPLITZ NDIS_HASH_FUNCTION_TOEPLITZ +#define NDIS_RSS_CAP_HASHFUNC_MASK NDIS_HASH_FUNCTION_MASK + +/* + * OID_GEN_RECEIVE_SCALE_PARAMETERS + * ndis_type: NDIS_OBJTYPE_RSS_PARAMS + */ +struct ndis_rss_params { + struct ndis_object_hdr ndis_hdr; + uint16_t ndis_flags; /* NDIS_RSS_FLAG_ */ + uint16_t ndis_bcpu; /* base cpu 0 */ + uint32_t ndis_hash; /* NDIS_HASH_ */ + uint16_t ndis_indsize; /* indirect table */ + uint32_t ndis_indoffset; + uint16_t ndis_keysize; /* hash key */ + uint32_t ndis_keyoffset; + /* NDIS >= 6.20 */ + uint32_t ndis_cpumaskoffset; + uint32_t ndis_cpumaskcnt; + uint32_t ndis_cpumaskentsz; +}; + +#define NDIS_RSS_PARAMS_SIZE sizeof(struct ndis_rss_params) +#define NDIS_RSS_PARAMS_SIZE_6_0 \ + offsetof(struct ndis_rss_params, ndis_cpumaskoffset) + +#define NDIS_RSS_PARAMS_REV_1 1 /* NDIS 6.0 */ +#define NDIS_RSS_PARAMS_REV_2 2 /* NDIS 6.20 */ + +#define NDIS_RSS_FLAG_NONE 0x0000 +#define NDIS_RSS_FLAG_BCPU_UNCHG 0x0001 +#define NDIS_RSS_FLAG_HASH_UNCHG 0x0002 +#define NDIS_RSS_FLAG_IND_UNCHG 0x0004 +#define NDIS_RSS_FLAG_KEY_UNCHG 0x0008 +#define NDIS_RSS_FLAG_DISABLE 0x0010 + +/* non-standard convenient struct */ +struct ndis_rssprm_toeplitz { + struct ndis_rss_params rss_params; + /* Indirect table */ + uint32_t rss_ind[NDIS_HASH_INDCNT]; + /* Toeplitz hash key */ + uint8_t rss_key[NDIS_HASH_KEYSIZE_TOEPLITZ]; +}; + +#define NDIS_RSSPRM_TOEPLITZ_SIZE(nind) \ + offsetof(struct ndis_rssprm_toeplitz, rss_ind[nind]) + +/* + * OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES + * ndis_type: NDIS_OBJTYPE_OFFLOAD + */ + +#define NDIS_OFFLOAD_ENCAP_NONE 0x0000 +#define NDIS_OFFLOAD_ENCAP_NULL 0x0001 +#define NDIS_OFFLOAD_ENCAP_8023 0x0002 +#define NDIS_OFFLOAD_ENCAP_8023PQ 0x0004 +#define NDIS_OFFLOAD_ENCAP_8023PQ_OOB 0x0008 +#define NDIS_OFFLOAD_ENCAP_RFC1483 0x0010 + +struct ndis_csum_offload { + uint32_t ndis_ip4_txenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_txcsum; +#define NDIS_TXCSUM_CAP_IP4OPT 0x001 +#define NDIS_TXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP4 0x010 +#define NDIS_TXCSUM_CAP_UDP4 0x040 +#define NDIS_TXCSUM_CAP_IP4 0x100 + uint32_t ndis_ip4_rxenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_rxcsum; +#define NDIS_RXCSUM_CAP_IP4OPT 0x001 +#define NDIS_RXCSUM_CAP_TCP4OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP4 0x010 +#define NDIS_RXCSUM_CAP_UDP4 0x040 +#define NDIS_RXCSUM_CAP_IP4 0x100 + uint32_t ndis_ip6_txenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_txcsum; +#define NDIS_TXCSUM_CAP_IP6EXT 0x001 +#define NDIS_TXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_TXCSUM_CAP_TCP6 0x010 +#define NDIS_TXCSUM_CAP_UDP6 0x040 + uint32_t ndis_ip6_rxenc; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_rxcsum; +#define NDIS_RXCSUM_CAP_IP6EXT 0x001 +#define NDIS_RXCSUM_CAP_TCP6OPT 0x004 +#define NDIS_RXCSUM_CAP_TCP6 0x010 +#define NDIS_RXCSUM_CAP_UDP6 0x040 +}; + +struct ndis_lsov1_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_maxsize; + uint32_t ndis_minsegs; + uint32_t ndis_opts; +}; + +struct ndis_ipsecv1_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ah_esp; + uint32_t ndis_xport_tun; + uint32_t ndis_ip4_opts; + uint32_t ndis_flags; + uint32_t ndis_ip4_ah; + uint32_t ndis_ip4_esp; +}; + +struct ndis_lsov2_offload { + uint32_t ndis_ip4_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip4_maxsz; + uint32_t ndis_ip4_minsg; + uint32_t ndis_ip6_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint32_t ndis_ip6_maxsz; + uint32_t ndis_ip6_minsg; + uint32_t ndis_ip6_opts; +#define NDIS_LSOV2_CAP_IP6EXT 0x001 +#define NDIS_LSOV2_CAP_TCP6OPT 0x004 +}; + +struct ndis_ipsecv2_offload { + uint32_t ndis_encap; /*NDIS_OFFLOAD_ENCAP_*/ + uint16_t ndis_ip6; + uint16_t ndis_ip4opt; + uint16_t ndis_ip6ext; + uint16_t ndis_ah; + uint16_t ndis_esp; + uint16_t ndis_ah_esp; + uint16_t ndis_xport; + uint16_t ndis_tun; + uint16_t ndis_xport_tun; + uint16_t ndis_lso; + uint16_t ndis_extseq; + uint32_t ndis_udp_esp; + uint32_t ndis_auth; + uint32_t ndis_crypto; + uint32_t ndis_sa_caps; +}; + +struct ndis_rsc_offload { + uint16_t ndis_ip4; + uint16_t ndis_ip6; +}; + +struct ndis_encap_offload { + uint32_t ndis_flags; + uint32_t ndis_maxhdr; +}; + +struct ndis_offload { + struct ndis_object_hdr ndis_hdr; + struct ndis_csum_offload ndis_csum; + struct ndis_lsov1_offload ndis_lsov1; + struct ndis_ipsecv1_offload ndis_ipsecv1; + struct ndis_lsov2_offload ndis_lsov2; + uint32_t ndis_flags; + /* NDIS >= 6.1 */ + struct ndis_ipsecv2_offload ndis_ipsecv2; + /* NDIS >= 6.30 */ + struct ndis_rsc_offload ndis_rsc; + struct ndis_encap_offload ndis_encap_gre; +}; + +#define NDIS_OFFLOAD_SIZE sizeof(struct ndis_offload) +#define NDIS_OFFLOAD_SIZE_6_0 offsetof(struct ndis_offload, ndis_ipsecv2) +#define NDIS_OFFLOAD_SIZE_6_1 offsetof(struct ndis_offload, ndis_rsc) + +#define NDIS_OFFLOAD_REV_1 1 /* NDIS 6.0 */ +#define NDIS_OFFLOAD_REV_2 2 /* NDIS 6.1 */ +#define NDIS_OFFLOAD_REV_3 3 /* NDIS 6.30 */ + +/* + * Per-packet-info + */ + +/* VLAN */ +#define NDIS_VLAN_INFO_SIZE sizeof(uint32_t) +#define NDIS_VLAN_INFO_PRI_MASK 0x0007 +#define NDIS_VLAN_INFO_CFI_MASK 0x0008 +#define NDIS_VLAN_INFO_ID_MASK 0xfff0 +#define NDIS_VLAN_INFO_MAKE(id, pri, cfi) \ + (((pri) & NDIS_VLAN_INFO_PRI_MASK) | \ + (((cfi) & 0x1) << 3) | (((id) & 0xfff) << 4)) +#define NDIS_VLAN_INFO_ID(inf) (((inf) & NDIS_VLAN_INFO_ID_MASK) >> 4) +#define NDIS_VLAN_INFO_CFI(inf) (((inf) & NDIS_VLAN_INFO_CFI_MASK) >> 3) +#define NDIS_VLAN_INFO_PRI(inf) ((inf) & NDIS_VLAN_INFO_PRI_MASK) + +/* Reception checksum */ +#define NDIS_RXCSUM_INFO_SIZE sizeof(uint32_t) +#define NDIS_RXCSUM_INFO_TCPCS_FAILED 0x0001 +#define NDIS_RXCSUM_INFO_UDPCS_FAILED 0x0002 +#define NDIS_RXCSUM_INFO_IPCS_FAILED 0x0004 +#define NDIS_RXCSUM_INFO_TCPCS_OK 0x0008 +#define NDIS_RXCSUM_INFO_UDPCS_OK 0x0010 +#define NDIS_RXCSUM_INFO_IPCS_OK 0x0020 +#define NDIS_RXCSUM_INFO_LOOPBACK 0x0040 +#define NDIS_RXCSUM_INFO_TCPCS_INVAL 0x0080 +#define NDIS_RXCSUM_INFO_IPCS_INVAL 0x0100 + +/* LSOv2 */ +#define NDIS_LSO2_INFO_SIZE sizeof(uint32_t) +#define NDIS_LSO2_INFO_MSS_MASK 0x000fffff +#define NDIS_LSO2_INFO_THOFF_MASK 0x3ff00000 +#define NDIS_LSO2_INFO_ISLSO2 0x40000000 +#define NDIS_LSO2_INFO_ISIPV6 0x80000000 + +#define NDIS_LSO2_INFO_MAKE(thoff, mss) \ + ((((uint32_t)(mss)) & NDIS_LSO2_INFO_MSS_MASK) | \ + ((((uint32_t)(thoff)) & 0x3ff) << 20) | \ + NDIS_LSO2_INFO_ISLSO2) + +#define NDIS_LSO2_INFO_MAKEIPV4(thoff, mss) \ + NDIS_LSO2_INFO_MAKE((thoff), (mss)) + +#define NDIS_LSO2_INFO_MAKEIPV6(thoff, mss) \ + (NDIS_LSO2_INFO_MAKE((thoff), (mss)) | NDIS_LSO2_INFO_ISIPV6) + +/* Transmission checksum */ +#define NDIS_TXCSUM_INFO_SIZE sizeof(uint32_t) +#define NDIS_TXCSUM_INFO_IPV4 0x00000001 +#define NDIS_TXCSUM_INFO_IPV6 0x00000002 +#define NDIS_TXCSUM_INFO_TCPCS 0x00000004 +#define NDIS_TXCSUM_INFO_UDPCS 0x00000008 +#define NDIS_TXCSUM_INFO_IPCS 0x00000010 +#define NDIS_TXCSUM_INFO_THOFF 0x03ff0000 + +#define NDIS_TXCSUM_INFO_MKL4CS(thoff, flag) \ + ((((uint32_t)(thoff)) << 16) | (flag)) + +#define NDIS_TXCSUM_INFO_MKTCPCS(thoff) \ + NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_TCPCS) + +#define NDIS_TXCSUM_INFO_MKUDPCS(thoff) \ + NDIS_TXCSUM_INFO_MKL4CS((thoff), NDIS_TXCSUM_INFO_UDPCS) + +#endif /* !_NET_NDIS_H_ */ diff --git a/drivers/net/netvsc/rndis.h b/drivers/net/netvsc/rndis.h new file mode 100644 index 0000000000..eac9a99fd8 --- /dev/null +++ b/drivers/net/netvsc/rndis.h @@ -0,0 +1,414 @@ +/* SPDX-License-Identifier: BSD-3-Clause + * Copyright (c) 2018 Microsoft Corp. + * Copyright (c) 2010 Jonathan Armani + * Copyright (c) 2010 Fabien Romano + * Copyright (c) 2010 Michael Knudsen + * All rights reserved. + */ + +#ifndef _NET_RNDIS_H_ +#define _NET_RNDIS_H_ + +/* Canonical major/minor version as of 22th Aug. 2016. */ +#define RNDIS_VERSION_MAJOR 0x00000001 +#define RNDIS_VERSION_MINOR 0x00000000 + +#define RNDIS_STATUS_SUCCESS 0x00000000 +#define RNDIS_STATUS_PENDING 0x00000103 + +#define RNDIS_STATUS_ONLINE 0x40010003 +#define RNDIS_STATUS_RESET_START 0x40010004 +#define RNDIS_STATUS_RESET_END 0x40010005 +#define RNDIS_STATUS_RING_STATUS 0x40010006 +#define RNDIS_STATUS_CLOSED 0x40010007 +#define RNDIS_STATUS_WAN_LINE_UP 0x40010008 +#define RNDIS_STATUS_WAN_LINE_DOWN 0x40010009 +#define RNDIS_STATUS_WAN_FRAGMENT 0x4001000A +#define RNDIS_STATUS_MEDIA_CONNECT 0x4001000B +#define RNDIS_STATUS_MEDIA_DISCONNECT 0x4001000C +#define RNDIS_STATUS_HARDWARE_LINE_UP 0x4001000D +#define RNDIS_STATUS_HARDWARE_LINE_DOWN 0x4001000E +#define RNDIS_STATUS_INTERFACE_UP 0x4001000F +#define RNDIS_STATUS_INTERFACE_DOWN 0x40010010 +#define RNDIS_STATUS_MEDIA_BUSY 0x40010011 +#define RNDIS_STATUS_MEDIA_SPECIFIC_INDICATION 0x40010012 +#define RNDIS_STATUS_WW_INDICATION RDIA_SPECIFIC_INDICATION +#define RNDIS_STATUS_LINK_SPEED_CHANGE 0x40010013 +#define RNDIS_STATUS_NETWORK_CHANGE 0x40010018 +#define RNDIS_STATUS_TASK_OFFLOAD_CURRENT_CONFIG 0x40020006 + +#define RNDIS_STATUS_FAILURE 0xC0000001 +#define RNDIS_STATUS_RESOURCES 0xC000009A +#define RNDIS_STATUS_NOT_SUPPORTED 0xC00000BB +#define RNDIS_STATUS_CLOSING 0xC0010002 +#define RNDIS_STATUS_BAD_VERSION 0xC0010004 +#define RNDIS_STATUS_BAD_CHARACTERISTICS 0xC0010005 +#define RNDIS_STATUS_ADAPTER_NOT_FOUND 0xC0010006 +#define RNDIS_STATUS_OPEN_FAILED 0xC0010007 +#define RNDIS_STATUS_DEVICE_FAILED 0xC0010008 +#define RNDIS_STATUS_MULTICAST_FULL 0xC0010009 +#define RNDIS_STATUS_MULTICAST_EXISTS 0xC001000A +#define RNDIS_STATUS_MULTICAST_NOT_FOUND 0xC001000B +#define RNDIS_STATUS_REQUEST_ABORTED 0xC001000C +#define RNDIS_STATUS_RESET_IN_PROGRESS 0xC001000D +#define RNDIS_STATUS_CLOSING_INDICATING 0xC001000E +#define RNDIS_STATUS_INVALID_PACKET 0xC001000F +#define RNDIS_STATUS_OPEN_LIST_FULL 0xC0010010 +#define RNDIS_STATUS_ADAPTER_NOT_READY 0xC0010011 +#define RNDIS_STATUS_ADAPTER_NOT_OPEN 0xC0010012 +#define RNDIS_STATUS_NOT_INDICATING 0xC0010013 +#define RNDIS_STATUS_INVALID_LENGTH 0xC0010014 +#define RNDIS_STATUS_INVALID_DATA 0xC0010015 +#define RNDIS_STATUS_BUFFER_TOO_SHORT 0xC0010016 +#define RNDIS_STATUS_INVALID_OID 0xC0010017 +#define RNDIS_STATUS_ADAPTER_REMOVED 0xC0010018 +#define RNDIS_STATUS_UNSUPPORTED_MEDIA 0xC0010019 +#define RNDIS_STATUS_GROUP_ADDRESS_IN_US 0xC001001A +#define RNDIS_STATUS_FILE_NOT_FOUND 0xC001001B +#define RNDIS_STATUS_ERROR_READING_FILE 0xC001001C +#define RNDIS_STATUS_ALREADY_MAPPED 0xC001001D +#define RNDIS_STATUS_RESOURCE_CONFLICT 0xC001001E +#define RNDIS_STATUS_NO_CABLE 0xC001001F + +#define OID_GEN_SUPPORTED_LIST 0x00010101 +#define OID_GEN_HARDWARE_STATUS 0x00010102 +#define OID_GEN_MEDIA_SUPPORTED 0x00010103 +#define OID_GEN_MEDIA_IN_USE 0x00010104 +#define OID_GEN_MAXIMUM_LOOKAHEAD 0x00010105 +#define OID_GEN_MAXIMUM_FRAME_SIZE 0x00010106 +#define OID_GEN_LINK_SPEED 0x00010107 +#define OID_GEN_TRANSMIT_BUFFER_SPACE 0x00010108 +#define OID_GEN_RECEIVE_BUFFER_SPACE 0x00010109 +#define OID_GEN_TRANSMIT_BLOCK_SIZE 0x0001010A +#define OID_GEN_RECEIVE_BLOCK_SIZE 0x0001010B +#define OID_GEN_VENDOR_ID 0x0001010C +#define OID_GEN_VENDOR_DESCRIPTION 0x0001010D +#define OID_GEN_CURRENT_PACKET_FILTER 0x0001010E +#define OID_GEN_CURRENT_LOOKAHEAD 0x0001010F +#define OID_GEN_DRIVER_VERSION 0x00010110 +#define OID_GEN_MAXIMUM_TOTAL_SIZE 0x00010111 +#define OID_GEN_PROTOCOL_OPTIONS 0x00010112 +#define OID_GEN_MAC_OPTIONS 0x00010113 +#define OID_GEN_MEDIA_CONNECT_STATUS 0x00010114 +#define OID_GEN_MAXIMUM_SEND_PACKETS 0x00010115 +#define OID_GEN_VENDOR_DRIVER_VERSION 0x00010116 +#define OID_GEN_SUPPORTED_GUIDS 0x00010117 +#define OID_GEN_NETWORK_LAYER_ADDRESSES 0x00010118 +#define OID_GEN_TRANSPORT_HEADER_OFFSET 0x00010119 +#define OID_GEN_RECEIVE_SCALE_CAPABILITIES 0x00010203 +#define OID_GEN_RECEIVE_SCALE_PARAMETERS 0x00010204 +#define OID_GEN_MACHINE_NAME 0x0001021A +#define OID_GEN_RNDIS_CONFIG_PARAMETER 0x0001021B +#define OID_GEN_VLAN_ID 0x0001021C + +#define OID_802_3_PERMANENT_ADDRESS 0x01010101 +#define OID_802_3_CURRENT_ADDRESS 0x01010102 +#define OID_802_3_MULTICAST_LIST 0x01010103 +#define OID_802_3_MAXIMUM_LIST_SIZE 0x01010104 +#define OID_802_3_MAC_OPTIONS 0x01010105 +#define OID_802_3_RCV_ERROR_ALIGNMENT 0x01020101 +#define OID_802_3_XMIT_ONE_COLLISION 0x01020102 +#define OID_802_3_XMIT_MORE_COLLISIONS 0x01020103 +#define OID_802_3_XMIT_DEFERRED 0x01020201 +#define OID_802_3_XMIT_MAX_COLLISIONS 0x01020202 +#define OID_802_3_RCV_OVERRUN 0x01020203 +#define OID_802_3_XMIT_UNDERRUN 0x01020204 +#define OID_802_3_XMIT_HEARTBEAT_FAILURE 0x01020205 +#define OID_802_3_XMIT_TIMES_CRS_LOST 0x01020206 +#define OID_802_3_XMIT_LATE_COLLISIONS 0x01020207 + +#define OID_TCP_OFFLOAD_PARAMETERS 0xFC01020C +#define OID_TCP_OFFLOAD_HARDWARE_CAPABILITIES 0xFC01020D + +#define RNDIS_MEDIUM_802_3 0x00000000 + +/* Device flags */ +#define RNDIS_DF_CONNECTIONLESS 0x00000001 +#define RNDIS_DF_CONNECTION_ORIENTED 0x00000002 + +/* + * Common RNDIS message header. + */ +struct rndis_msghdr { + uint32_t type; + uint32_t len; +}; + +/* + * RNDIS data message + */ +#define RNDIS_PACKET_MSG 0x00000001 + +struct rndis_packet_msg { + uint32_t type; + uint32_t len; + uint32_t dataoffset; + uint32_t datalen; + uint32_t oobdataoffset; + uint32_t oobdatalen; + uint32_t oobdataelements; + uint32_t pktinfooffset; + uint32_t pktinfolen; + uint32_t vchandle; + uint32_t reserved; +}; + +/* + * Minimum value for dataoffset, oobdataoffset, and + * pktinfooffset. + */ +#define RNDIS_PACKET_MSG_OFFSET_MIN \ + (sizeof(struct rndis_packet_msg) - \ + offsetof(struct rndis_packet_msg, dataoffset)) + +/* Offset from the beginning of rndis_packet_msg. */ +#define RNDIS_PACKET_MSG_OFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_packet_msg, dataoffset)) + +#define RNDIS_PACKET_MSG_OFFSET_ALIGN 4 +#define RNDIS_PACKET_MSG_OFFSET_ALIGNMASK \ + (RNDIS_PACKET_MSG_OFFSET_ALIGN - 1) + +/* Per-packet-info for RNDIS data message */ +struct rndis_pktinfo { + uint32_t size; + uint32_t type; /* NDIS_PKTINFO_TYPE_ */ + uint32_t offset; + uint8_t data[]; +}; + +#define RNDIS_PKTINFO_OFFSET \ + offsetof(struct rndis_pktinfo, data[0]) +#define RNDIS_PKTINFO_SIZE_ALIGN 4 +#define RNDIS_PKTINFO_SIZE_ALIGNMASK (RNDIS_PKTINFO_SIZE_ALIGN - 1) + +#define NDIS_PKTINFO_TYPE_CSUM 0 +#define NDIS_PKTINFO_TYPE_IPSEC 1 +#define NDIS_PKTINFO_TYPE_LSO 2 +#define NDIS_PKTINFO_TYPE_CLASSIFY 3 +/* reserved 4 */ +#define NDIS_PKTINFO_TYPE_SGLIST 5 +#define NDIS_PKTINFO_TYPE_VLAN 6 +#define NDIS_PKTINFO_TYPE_ORIG 7 +#define NDIS_PKTINFO_TYPE_PKT_CANCELID 8 +#define NDIS_PKTINFO_TYPE_ORIG_NBLIST 9 +#define NDIS_PKTINFO_TYPE_CACHE_NBLIST 10 +#define NDIS_PKTINFO_TYPE_PKT_PAD 11 + +/* RNDIS extension */ + +/* Per-packet hash info */ +#define NDIS_HASH_INFO_SIZE sizeof(uint32_t) +#define NDIS_PKTINFO_TYPE_HASHINF NDIS_PKTINFO_TYPE_ORIG_NBLIST +/* NDIS_HASH_ */ + +/* Per-packet hash value */ +#define NDIS_HASH_VALUE_SIZE sizeof(uint32_t) +#define NDIS_PKTINFO_TYPE_HASHVAL NDIS_PKTINFO_TYPE_PKT_CANCELID + +/* Per-packet-info size */ +#define RNDIS_PKTINFO_SIZE(dlen) offsetof(struct rndis_pktinfo, data[dlen]) + +/* + * RNDIS control messages + */ + +/* + * Common header for RNDIS completion messages. + * + * NOTE: It does not apply to RNDIS_RESET_CMPLT. + */ +struct rndis_comp_hdr { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* Initialize the device. */ +#define RNDIS_INITIALIZE_MSG 0x00000002 +#define RNDIS_INITIALIZE_CMPLT 0x80000002 + +struct rndis_init_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t ver_major; + uint32_t ver_minor; + uint32_t max_xfersz; +}; + +struct rndis_init_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; + uint32_t ver_major; + uint32_t ver_minor; + uint32_t devflags; + uint32_t medium; + uint32_t pktmaxcnt; + uint32_t pktmaxsz; + uint32_t align; + uint32_t aflistoffset; + uint32_t aflistsz; +}; + +#define RNDIS_INIT_COMP_SIZE_MIN \ + offsetof(struct rndis_init_comp, aflistsz) + +/* Halt the device. No response sent. */ +#define RNDIS_HALT_MSG 0x00000003 + +struct rndis_halt_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +/* Send a query object. */ +#define RNDIS_QUERY_MSG 0x00000004 +#define RNDIS_QUERY_CMPLT 0x80000004 + +struct rndis_query_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t oid; + uint32_t infobuflen; + uint32_t infobufoffset; + uint32_t devicevchdl; +}; + +#define RNDIS_QUERY_REQ_INFOBUFOFFSET \ + (sizeof(struct rndis_query_req) - \ + offsetof(struct rndis_query_req, rid)) + +struct rndis_query_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; + uint32_t infobuflen; + uint32_t infobufoffset; +}; + +/* infobuf offset from the beginning of rndis_query_comp. */ +#define RNDIS_QUERY_COMP_INFOBUFOFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_query_comp, rid)) + +/* Send a set object request. */ +#define RNDIS_SET_MSG 0x00000005 +#define RNDIS_SET_CMPLT 0x80000005 + +struct rndis_set_req { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t oid; + uint32_t infobuflen; + uint32_t infobufoffset; + uint32_t devicevchdl; +}; + +#define RNDIS_SET_REQ_INFOBUFOFFSET \ + (sizeof(struct rndis_set_req) - \ + offsetof(struct rndis_set_req, rid)) + +struct rndis_set_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* + * Parameter used by OID_GEN_RNDIS_CONFIG_PARAMETER. + */ +#define RNDIS_SET_PARAM_NUMERIC 0x00000000 +#define RNDIS_SET_PARAM_STRING 0x00000002 + +struct rndis_set_parameter { + uint32_t nameoffset; + uint32_t namelen; + uint32_t type; + uint32_t valueoffset; + uint32_t valuelen; +}; + +/* Perform a soft reset on the device. */ +#define RNDIS_RESET_MSG 0x00000006 +#define RNDIS_RESET_CMPLT 0x80000006 + +struct rndis_reset_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +struct rndis_reset_comp { + uint32_t type; + uint32_t len; + uint32_t status; + uint32_t adrreset; +}; + +/* 802.3 link-state or undefined message error. Sent by device. */ +#define RNDIS_INDICATE_STATUS_MSG 0x00000007 + +struct rndis_status_msg { + uint32_t type; + uint32_t len; + uint32_t status; + uint32_t stbuflen; + uint32_t stbufoffset; + /* rndis_diag_info */ +}; + +/* stbuf offset from the beginning of rndis_status_msg. */ +#define RNDIS_STBUFOFFSET_ABS(ofs) \ + ((ofs) + offsetof(struct rndis_status_msg, status)) + +/* + * Immediately after rndis_status_msg.stbufoffset, if a control + * message is malformatted, or a packet message contains inappropriate + * content. + */ +struct rndis_diag_info { + uint32_t diagstatus; + uint32_t erroffset; +}; + +/* Keepalive message. May be sent by device. */ +#define RNDIS_KEEPALIVE_MSG 0x00000008 +#define RNDIS_KEEPALIVE_CMPLT 0x80000008 + +struct rndis_keepalive_req { + uint32_t type; + uint32_t len; + uint32_t rid; +}; + +struct rndis_keepalive_comp { + uint32_t type; + uint32_t len; + uint32_t rid; + uint32_t status; +}; + +/* Packet filter bits used by OID_GEN_CURRENT_PACKET_FILTER */ +#define NDIS_PACKET_TYPE_NONE 0x00000000 +#define NDIS_PACKET_TYPE_DIRECTED 0x00000001 +#define NDIS_PACKET_TYPE_MULTICAST 0x00000002 +#define NDIS_PACKET_TYPE_ALL_MULTICAST 0x00000004 +#define NDIS_PACKET_TYPE_BROADCAST 0x00000008 +#define NDIS_PACKET_TYPE_SOURCE_ROUTING 0x00000010 +#define NDIS_PACKET_TYPE_PROMISCUOUS 0x00000020 +#define NDIS_PACKET_TYPE_SMT 0x00000040 +#define NDIS_PACKET_TYPE_ALL_LOCAL 0x00000080 +#define NDIS_PACKET_TYPE_GROUP 0x00001000 +#define NDIS_PACKET_TYPE_ALL_FUNCTIONAL 0x00002000 +#define NDIS_PACKET_TYPE_FUNCTIONAL 0x00004000 +#define NDIS_PACKET_TYPE_MAC_FRAME 0x00008000 + +#endif /* !_NET_RNDIS_H_ */ diff --git a/drivers/net/netvsc/rte_pmd_netvsc_version.map b/drivers/net/netvsc/rte_pmd_netvsc_version.map new file mode 100644 index 0000000000..d534019a6b --- /dev/null +++ b/drivers/net/netvsc/rte_pmd_netvsc_version.map @@ -0,0 +1,5 @@ +/* SPDX-License-Identifier: BSD-3-Clause */ + +DPDK_18.08 { + local: *; +}; diff --git a/mk/rte.app.mk b/mk/rte.app.mk index 3e7476815f..d21fb5719c 100644 --- a/mk/rte.app.mk +++ b/mk/rte.app.mk @@ -177,6 +177,7 @@ endif # $(CONFIG_RTE_LIBRTE_VHOST) _LDLIBS-$(CONFIG_RTE_LIBRTE_VMXNET3_PMD) += -lrte_pmd_vmxnet3_uio _LDLIBS-$(CONFIG_RTE_LIBRTE_VMBUS) += -lrte_bus_vmbus +_LDLIBS-$(CONFIG_RTE_LIBRTE_NETVSC_PMD) += -lrte_pmd_netvsc ifeq ($(CONFIG_RTE_LIBRTE_BBDEV),y) _LDLIBS-$(CONFIG_RTE_LIBRTE_PMD_BBDEV_NULL) += -lrte_pmd_bbdev_null