net/mlx5: support Sub-Function
Introduce SF support. Similar to VF, SF on auxiliary bus is a portion of hardware PF, no representor or bonding parameters for SF. Devargs to support SF: -a auxiliary:mlx5_core.sf.8,dv_flow_en=1 New global syntax to support SF: -a bus=auxiliary,name=mlx5_core.sf.8/class=eth/driver=mlx5,dv_flow_en=1 Signed-off-by: Xueming Li <xuemingl@nvidia.com> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
This commit is contained in:
parent
a7f34989e9
commit
919488fbfa
@ -112,6 +112,8 @@ Features
|
||||
- Flow integrity offload API.
|
||||
- Connection tracking.
|
||||
- Sub-Function representors.
|
||||
- Sub-Function.
|
||||
|
||||
|
||||
Limitations
|
||||
-----------
|
||||
@ -1478,40 +1480,52 @@ the DPDK application.
|
||||
|
||||
echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
|
||||
|
||||
Sub-Function representor
|
||||
------------------------
|
||||
Sub-Function support
|
||||
--------------------
|
||||
|
||||
Sub-Function is a portion of the PCI device, a SF netdev has its own
|
||||
dedicated queues(txq, rxq). A SF netdev supports E-Switch representation
|
||||
offload similar to existing PF and VF representors. A SF shares PCI
|
||||
level resources with other SFs and/or with its parent PCI function.
|
||||
dedicated queues (txq, rxq).
|
||||
A SF shares PCI level resources with other SFs and/or with its parent PCI function.
|
||||
|
||||
0. Requirement::
|
||||
|
||||
OFED version >= 5.4-0.3.3.0
|
||||
|
||||
1. Configure SF feature::
|
||||
|
||||
mlxconfig -d <mst device> set PF_BAR2_SIZE=<0/1/2/3> PF_BAR2_ENABLE=1
|
||||
# Run mlxconfig on both PFs on host and ECPFs on BlueField.
|
||||
mlxconfig -d <mst device> set PER_PF_NUM_SF=1 PF_TOTAL_SF=252 PF_SF_BAR_SIZE=12
|
||||
|
||||
Value of PF_BAR2_SIZE:
|
||||
2. Enable switchdev mode::
|
||||
|
||||
0: 8 SFs
|
||||
1: 16 SFs
|
||||
2: 32 SFs
|
||||
3: 64 SFs
|
||||
mlxdevm dev eswitch set pci/<DBDF> mode switchdev
|
||||
|
||||
2. Reset the FW::
|
||||
3. Add SF port::
|
||||
|
||||
mlxfwreset -d <mst device> reset
|
||||
mlxdevm port add pci/<DBDF> flavour pcisf pfnum 0 sfnum <sfnum>
|
||||
|
||||
3. Enable switchdev mode::
|
||||
Get SFID from output: pci/<DBDF>/<SFID>
|
||||
|
||||
echo switchdev > /sys/class/net/<net device>/compat/devlink/mode
|
||||
4. Modify MAC address::
|
||||
|
||||
4. Create SF::
|
||||
mlxdevm port function set pci/<DBDF>/<SFID> hw_addr <MAC>
|
||||
|
||||
mlnx-sf -d <PCI_BDF> -a create
|
||||
5. Activate SF port::
|
||||
|
||||
5. Probe SF representor::
|
||||
mlxdevm port function set pci/<DBDF>/<ID> state active
|
||||
|
||||
testpmd> port attach <PCI_BDF>,representor=sf0,dv_flow_en=1
|
||||
6. Devargs to probe SF device::
|
||||
|
||||
auxiliary:mlx5_core.sf.<num>,dv_flow_en=1
|
||||
|
||||
Sub-Function representor support
|
||||
--------------------------------
|
||||
|
||||
A SF netdev supports E-Switch representation offload
|
||||
similar to PF and VF representors.
|
||||
Use <sfnum> to probe SF representor::
|
||||
|
||||
testpmd> port attach <PCI_BDF>,representor=sf<sfnum>,dv_flow_en=1
|
||||
|
||||
Performance tuning
|
||||
------------------
|
||||
|
@ -88,6 +88,7 @@ New Features
|
||||
|
||||
* **Updated Mellanox mlx5 driver.**
|
||||
|
||||
* Added Sub-Function support based on auxiliary bus.
|
||||
* Added support for meter hierarchy.
|
||||
* Added devargs options ``allow_duplicate_pattern``.
|
||||
* Added matching on IPv4 Internet Header Length (IHL).
|
||||
|
@ -128,6 +128,17 @@ struct ethtool_link_settings {
|
||||
#define ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT 2 /* 66 - 64 */
|
||||
#endif
|
||||
|
||||
/* Get interface index from SubFunction device name. */
|
||||
int
|
||||
mlx5_auxiliary_get_ifindex(const char *sf_name)
|
||||
{
|
||||
char if_name[IF_NAMESIZE] = { 0 };
|
||||
|
||||
if (mlx5_auxiliary_get_child_name(sf_name, "/net",
|
||||
if_name, sizeof(if_name)) != 0)
|
||||
return -rte_errno;
|
||||
return if_nametoindex(if_name);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get interface name from private structure.
|
||||
@ -1619,4 +1630,3 @@ mlx5_get_mac(struct rte_eth_dev *dev, uint8_t (*mac)[RTE_ETHER_ADDR_LEN])
|
||||
memcpy(mac, request.ifr_hwaddr.sa_data, RTE_ETHER_ADDR_LEN);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
@ -20,6 +20,7 @@
|
||||
#include <ethdev_pci.h>
|
||||
#include <rte_pci.h>
|
||||
#include <rte_bus_pci.h>
|
||||
#include <rte_bus_auxiliary.h>
|
||||
#include <rte_common.h>
|
||||
#include <rte_kvargs.h>
|
||||
#include <rte_rwlock.h>
|
||||
@ -2061,6 +2062,27 @@ mlx5_device_bond_pci_match(const struct ibv_device *ibv_dev,
|
||||
return pf;
|
||||
}
|
||||
|
||||
static void
|
||||
mlx5_os_config_default(struct mlx5_dev_config *config)
|
||||
{
|
||||
memset(config, 0, sizeof(*config));
|
||||
config->mps = MLX5_ARG_UNSET;
|
||||
config->dbnc = MLX5_ARG_UNSET;
|
||||
config->rx_vec_en = 1;
|
||||
config->txq_inline_max = MLX5_ARG_UNSET;
|
||||
config->txq_inline_min = MLX5_ARG_UNSET;
|
||||
config->txq_inline_mpw = MLX5_ARG_UNSET;
|
||||
config->txqs_inline = MLX5_ARG_UNSET;
|
||||
config->vf_nl_en = 1;
|
||||
config->mr_ext_memseg_en = 1;
|
||||
config->mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
|
||||
config->mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
|
||||
config->dv_esw_en = 1;
|
||||
config->dv_flow_en = 1;
|
||||
config->decap_en = 1;
|
||||
config->log_hp_size = MLX5_ARG_UNSET;
|
||||
}
|
||||
|
||||
/**
|
||||
* Register a PCI device within bonding.
|
||||
*
|
||||
@ -2485,23 +2507,8 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
|
||||
uint32_t restore;
|
||||
|
||||
/* Default configuration. */
|
||||
memset(&dev_config, 0, sizeof(struct mlx5_dev_config));
|
||||
mlx5_os_config_default(&dev_config);
|
||||
dev_config.vf = dev_config_vf;
|
||||
dev_config.mps = MLX5_ARG_UNSET;
|
||||
dev_config.dbnc = MLX5_ARG_UNSET;
|
||||
dev_config.rx_vec_en = 1;
|
||||
dev_config.txq_inline_max = MLX5_ARG_UNSET;
|
||||
dev_config.txq_inline_min = MLX5_ARG_UNSET;
|
||||
dev_config.txq_inline_mpw = MLX5_ARG_UNSET;
|
||||
dev_config.txqs_inline = MLX5_ARG_UNSET;
|
||||
dev_config.vf_nl_en = 1;
|
||||
dev_config.mr_ext_memseg_en = 1;
|
||||
dev_config.mprq.max_memcpy_len = MLX5_MPRQ_MEMCPY_DEFAULT_LEN;
|
||||
dev_config.mprq.min_rxqs_num = MLX5_MPRQ_MIN_RXQS;
|
||||
dev_config.dv_esw_en = 1;
|
||||
dev_config.dv_flow_en = 1;
|
||||
dev_config.decap_en = 1;
|
||||
dev_config.log_hp_size = MLX5_ARG_UNSET;
|
||||
dev_config.allow_duplicate_pattern = 1;
|
||||
list[i].numa_node = pci_dev->device.numa_node;
|
||||
list[i].eth_dev = mlx5_dev_spawn(&pci_dev->device,
|
||||
@ -2560,6 +2567,35 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
|
||||
return ret;
|
||||
}
|
||||
|
||||
static int
|
||||
mlx5_os_parse_eth_devargs(struct rte_device *dev,
|
||||
struct rte_eth_devargs *eth_da)
|
||||
{
|
||||
int ret = 0;
|
||||
|
||||
if (dev->devargs == NULL)
|
||||
return 0;
|
||||
memset(eth_da, 0, sizeof(*eth_da));
|
||||
/* Parse representor information first from class argument. */
|
||||
if (dev->devargs->cls_str)
|
||||
ret = rte_eth_devargs_parse(dev->devargs->cls_str, eth_da);
|
||||
if (ret != 0) {
|
||||
DRV_LOG(ERR, "failed to parse device arguments: %s",
|
||||
dev->devargs->cls_str);
|
||||
return -rte_errno;
|
||||
}
|
||||
if (eth_da->type == RTE_ETH_REPRESENTOR_NONE) {
|
||||
/* Parse legacy device argument */
|
||||
ret = rte_eth_devargs_parse(dev->devargs->args, eth_da);
|
||||
if (ret) {
|
||||
DRV_LOG(ERR, "failed to parse device arguments: %s",
|
||||
dev->devargs->args);
|
||||
return -rte_errno;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Callback to register a PCI device.
|
||||
*
|
||||
@ -2574,31 +2610,13 @@ mlx5_os_pci_probe_pf(struct rte_pci_device *pci_dev,
|
||||
static int
|
||||
mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
|
||||
{
|
||||
struct rte_eth_devargs eth_da = { .type = RTE_ETH_REPRESENTOR_NONE };
|
||||
struct rte_eth_devargs eth_da = { .nb_ports = 0 };
|
||||
int ret = 0;
|
||||
uint16_t p;
|
||||
|
||||
if (pci_dev->device.devargs) {
|
||||
/* Parse representor information from device argument. */
|
||||
if (pci_dev->device.devargs->cls_str)
|
||||
ret = rte_eth_devargs_parse
|
||||
(pci_dev->device.devargs->cls_str, ð_da);
|
||||
if (ret) {
|
||||
DRV_LOG(ERR, "failed to parse device arguments: %s",
|
||||
pci_dev->device.devargs->cls_str);
|
||||
return -rte_errno;
|
||||
}
|
||||
if (eth_da.type == RTE_ETH_REPRESENTOR_NONE) {
|
||||
/* Support legacy device argument */
|
||||
ret = rte_eth_devargs_parse
|
||||
(pci_dev->device.devargs->args, ð_da);
|
||||
if (ret) {
|
||||
DRV_LOG(ERR, "failed to parse device arguments: %s",
|
||||
pci_dev->device.devargs->args);
|
||||
return -rte_errno;
|
||||
}
|
||||
}
|
||||
}
|
||||
ret = mlx5_os_parse_eth_devargs(&pci_dev->device, ð_da);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
|
||||
if (eth_da.nb_ports > 0) {
|
||||
/* Iterate all port if devargs pf is range: "pf[0-1]vf[...]". */
|
||||
@ -2611,10 +2629,56 @@ mlx5_os_pci_probe(struct rte_pci_device *pci_dev)
|
||||
return ret;
|
||||
}
|
||||
|
||||
/* Probe a single SF device on auxiliary bus, no representor support. */
|
||||
static int
|
||||
mlx5_os_auxiliary_probe(struct rte_device *dev)
|
||||
{
|
||||
struct rte_eth_devargs eth_da = { .nb_ports = 0 };
|
||||
struct mlx5_dev_config config;
|
||||
struct mlx5_dev_spawn_data spawn = { .pf_bond = -1 };
|
||||
struct rte_auxiliary_device *adev = RTE_DEV_TO_AUXILIARY(dev);
|
||||
struct rte_eth_dev *eth_dev;
|
||||
int ret = 0;
|
||||
|
||||
/* Parse ethdev devargs. */
|
||||
ret = mlx5_os_parse_eth_devargs(dev, ð_da);
|
||||
if (ret != 0)
|
||||
return ret;
|
||||
/* Set default config data. */
|
||||
mlx5_os_config_default(&config);
|
||||
config.sf = 1;
|
||||
/* Init spawn data. */
|
||||
spawn.max_port = 1;
|
||||
spawn.phys_port = 1;
|
||||
spawn.phys_dev = mlx5_os_get_ibv_dev(dev);
|
||||
if (spawn.phys_dev == NULL)
|
||||
return -rte_errno;
|
||||
ret = mlx5_auxiliary_get_ifindex(dev->name);
|
||||
if (ret < 0) {
|
||||
DRV_LOG(ERR, "failed to get ethdev ifindex: %s", dev->name);
|
||||
return ret;
|
||||
}
|
||||
spawn.ifindex = ret;
|
||||
spawn.numa_node = dev->numa_node;
|
||||
/* Spawn device. */
|
||||
eth_dev = mlx5_dev_spawn(dev, &spawn, &config, ð_da);
|
||||
if (eth_dev == NULL)
|
||||
return -rte_errno;
|
||||
/* Post create. */
|
||||
eth_dev->intr_handle = &adev->intr_handle;
|
||||
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
|
||||
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_LSC;
|
||||
eth_dev->data->dev_flags |= RTE_ETH_DEV_INTR_RMV;
|
||||
eth_dev->data->numa_node = dev->numa_node;
|
||||
}
|
||||
rte_eth_dev_probing_finish(eth_dev);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Net class driver callback to probe a device.
|
||||
*
|
||||
* This function probe PCI bus device(s).
|
||||
* This function probe PCI bus device(s) or a single SF on auxiliary bus.
|
||||
*
|
||||
* @param[in] dev
|
||||
* Pointer to the generic device.
|
||||
@ -2637,7 +2701,8 @@ mlx5_os_net_probe(struct rte_device *dev)
|
||||
}
|
||||
if (mlx5_dev_is_pci(dev))
|
||||
return mlx5_os_pci_probe(RTE_DEV_TO_PCI(dev));
|
||||
return 0;
|
||||
else
|
||||
return mlx5_os_auxiliary_probe(dev);
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -19,4 +19,6 @@ enum {
|
||||
|
||||
#define MLX5_NAMESIZE IF_NAMESIZE
|
||||
|
||||
int mlx5_auxiliary_get_ifindex(const char *sf_name);
|
||||
|
||||
#endif /* RTE_PMD_MLX5_OS_H_ */
|
||||
|
@ -399,6 +399,24 @@ mlx5_is_hpf(struct rte_eth_dev *dev)
|
||||
MLX5_REPRESENTOR_REPR(-1) == repr;
|
||||
}
|
||||
|
||||
/**
|
||||
* Decide whether representor ID is a SF port representor.
|
||||
*
|
||||
* @param dev
|
||||
* Pointer to Ethernet device structure.
|
||||
*
|
||||
* @return
|
||||
* Non-zero if HPF, otherwise 0.
|
||||
*/
|
||||
bool
|
||||
mlx5_is_sf_repr(struct rte_eth_dev *dev)
|
||||
{
|
||||
struct mlx5_priv *priv = dev->data->dev_private;
|
||||
int type = MLX5_REPRESENTOR_TYPE(priv->representor_id);
|
||||
|
||||
return priv->representor != 0 && type == RTE_ETH_REPRESENTOR_SF;
|
||||
}
|
||||
|
||||
/**
|
||||
* Initialize the ASO aging management structure.
|
||||
*
|
||||
@ -2335,7 +2353,10 @@ mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev)
|
||||
(dev->device == odev ||
|
||||
(dev->device->driver &&
|
||||
dev->device->driver->name &&
|
||||
!strcmp(dev->device->driver->name, MLX5_PCI_DRIVER_NAME))))
|
||||
((strcmp(dev->device->driver->name,
|
||||
MLX5_PCI_DRIVER_NAME) == 0) ||
|
||||
(strcmp(dev->device->driver->name,
|
||||
MLX5_AUXILIARY_DRIVER_NAME) == 0)))))
|
||||
break;
|
||||
port_id++;
|
||||
}
|
||||
|
@ -243,6 +243,7 @@ struct mlx5_dev_config {
|
||||
unsigned int hw_fcs_strip:1; /* FCS stripping is supported. */
|
||||
unsigned int hw_padding:1; /* End alignment padding is supported. */
|
||||
unsigned int vf:1; /* This is a VF. */
|
||||
unsigned int sf:1; /* This is a SF. */
|
||||
unsigned int tunnel_en:1;
|
||||
/* Whether tunnel stateless offloads are supported. */
|
||||
unsigned int mpls_en:1; /* MPLS over GRE/UDP is enabled. */
|
||||
@ -1466,6 +1467,7 @@ int mlx5_udp_tunnel_port_add(struct rte_eth_dev *dev,
|
||||
uint16_t mlx5_eth_find_next(uint16_t port_id, struct rte_device *odev);
|
||||
int mlx5_dev_close(struct rte_eth_dev *dev);
|
||||
bool mlx5_is_hpf(struct rte_eth_dev *dev);
|
||||
bool mlx5_is_sf_repr(struct rte_eth_dev *dev);
|
||||
void mlx5_age_event_prepare(struct mlx5_dev_ctx_shared *sh);
|
||||
|
||||
/* Macro to iterate over all valid ports for mlx5 driver. */
|
||||
|
@ -159,7 +159,7 @@ mlx5_mac_addr_set(struct rte_eth_dev *dev, struct rte_ether_addr *mac_addr)
|
||||
* Configuring the VF instead of its representor,
|
||||
* need to skip the special case of HPF on Bluefield.
|
||||
*/
|
||||
if (priv->representor && !mlx5_is_hpf(dev)) {
|
||||
if (priv->representor && !mlx5_is_hpf(dev) && !mlx5_is_sf_repr(dev)) {
|
||||
DRV_LOG(DEBUG, "VF represented by port %u setting primary MAC address",
|
||||
dev->data->port_id);
|
||||
if (priv->pf_bond >= 0) {
|
||||
|
@ -36,7 +36,7 @@ mlx5_promiscuous_enable(struct rte_eth_dev *dev)
|
||||
dev->data->port_id);
|
||||
return 0;
|
||||
}
|
||||
if (priv->config.vf) {
|
||||
if (priv->config.vf || priv->config.sf) {
|
||||
ret = mlx5_os_set_promisc(dev, 1);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -69,7 +69,7 @@ mlx5_promiscuous_disable(struct rte_eth_dev *dev)
|
||||
int ret;
|
||||
|
||||
dev->data->promiscuous = 0;
|
||||
if (priv->config.vf) {
|
||||
if (priv->config.vf || priv->config.sf) {
|
||||
ret = mlx5_os_set_promisc(dev, 0);
|
||||
if (ret)
|
||||
return ret;
|
||||
@ -109,7 +109,7 @@ mlx5_allmulticast_enable(struct rte_eth_dev *dev)
|
||||
dev->data->port_id);
|
||||
return 0;
|
||||
}
|
||||
if (priv->config.vf) {
|
||||
if (priv->config.vf || priv->config.sf) {
|
||||
ret = mlx5_os_set_allmulti(dev, 1);
|
||||
if (ret)
|
||||
goto error;
|
||||
@ -142,7 +142,7 @@ mlx5_allmulticast_disable(struct rte_eth_dev *dev)
|
||||
int ret;
|
||||
|
||||
dev->data->all_multicast = 0;
|
||||
if (priv->config.vf) {
|
||||
if (priv->config.vf || priv->config.sf) {
|
||||
ret = mlx5_os_set_allmulti(dev, 0);
|
||||
if (ret)
|
||||
goto error;
|
||||
|
@ -1259,7 +1259,7 @@ mlx5_traffic_enable(struct rte_eth_dev *dev)
|
||||
}
|
||||
mlx5_txq_release(dev, i);
|
||||
}
|
||||
if (priv->config.dv_esw_en && !priv->config.vf) {
|
||||
if (priv->config.dv_esw_en && !priv->config.vf && !priv->config.sf) {
|
||||
if (mlx5_flow_create_esw_table_zero_flow(dev))
|
||||
priv->fdb_def_rule = 1;
|
||||
else
|
||||
|
@ -922,20 +922,18 @@ mlx5_match_devx_devices_to_addr(struct devx_device_bdf *devx_bdf,
|
||||
/**
|
||||
* DPDK callback to register a PCI device.
|
||||
*
|
||||
* This function spawns Ethernet devices out of a given PCI device.
|
||||
* This function spawns Ethernet devices out of a given device.
|
||||
*
|
||||
* @param[in] pci_drv
|
||||
* PCI driver structure (mlx5_driver).
|
||||
* @param[in] pci_dev
|
||||
* PCI device information.
|
||||
* @param[in] dev
|
||||
* Pointer to the generic device.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, a negative errno value otherwise and rte_errno is set.
|
||||
*/
|
||||
int
|
||||
mlx5_os_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
|
||||
struct rte_pci_device *pci_dev)
|
||||
mlx5_os_net_probe(struct rte_device *dev)
|
||||
{
|
||||
struct rte_pci_device *pci_dev = RTE_DEV_TO_PCI(dev);
|
||||
struct devx_device_bdf *devx_bdf_devs, *orig_devx_bdf_devs;
|
||||
/*
|
||||
* Number of found IB Devices matching with requested PCI BDF.
|
||||
|
Loading…
Reference in New Issue
Block a user