net/i40e: add option to use latest vector path

For IA, the AVX2 vector path is only recommended to be used on later
platforms (identified by AVX512 support, like SKL etc.) This is because
performance benchmark shows downgrade when running AVX2 vector path on
early platform (BDW/HSW) in some cases. But we still observe perf gain
with some real work loading.

So this patch introduced the new devarg use-latest-supported-vec to
force the driver always selecting the latest supported vec path. Then
apps are able to take AVX2 path on early platforms. And this logic can
be re-used if we will have AVX512 vec path in future.

This patch only affects IA platforms. The selected vec path would be
like the following:
  Without devarg/devarg = 0:
  Machine	vPMD
  AVX512F	AVX2
  AVX2	SSE4.2
  SSE4.2	SSE4.2
  <SSE4.2	Not Supported

  With devarg = 1
  Machine	vPMD
  AVX512F	AVX2
  AVX2	AVX2
  SSE4.2	SSE4.2
  <SSE4.2	Not Supported

Other platforms can also apply the same logic if necessary in future.

Signed-off-by: Xiaoyun Li <xiaoyun.li@intel.com>
Reviewed-by: Ferruh Yigit <ferruh.yigit@intel.com>
Acked-by: Qi Zhang <qi.z.zhang@intel.com>
This commit is contained in:
Xiaoyun Li 2018-09-18 10:22:40 +08:00 committed by Ferruh Yigit
parent c322c0e558
commit c11f654042
5 changed files with 157 additions and 68 deletions

View File

@ -163,6 +163,15 @@ Runtime Config Options
Currently hot-plugging of representor ports is not supported so all required
representors must be specified on the creation of the PF.
- ``Use latest supported vector`` (default ``disable``)
Latest supported vector path may not always get the best perf so vector path was
recommended to use only on later platform. But users may want the latest vector path
since it can get better perf in some real work loading cases. So ``devargs`` param
``use-latest-supported-vec`` is introduced, for example::
-w 84:00.0,use-latest-supported-vec=1
Driver compilation and testing
------------------------------

View File

@ -61,6 +61,11 @@ New Features
* Match items: destination MAC address.
* Action items: push/pop/rewrite vlan header.
* **Added a devarg to use the latest supported vector path in i40e.**
A new devarg ``use-latest-supported-vec`` was introduced to allow users to
choose the latest vector path that the platform supported. For example, users
can use AVX2 vector path on BDW/HSW to get better performance.
* **Added support for SR-IOV in netvsc PMD.**
The ``netvsc`` poll mode driver now supports the Accelerated Networking

View File

@ -44,6 +44,7 @@
#define ETH_I40E_FLOATING_VEB_LIST_ARG "floating_veb_list"
#define ETH_I40E_SUPPORT_MULTI_DRIVER "support-multi-driver"
#define ETH_I40E_QUEUE_NUM_PER_VF_ARG "queue-num-per-vf"
#define ETH_I40E_USE_LATEST_VEC "use-latest-supported-vec"
#define I40E_CLEAR_PXE_WAIT_MS 200
@ -409,6 +410,7 @@ static const char *const valid_keys[] = {
ETH_I40E_FLOATING_VEB_LIST_ARG,
ETH_I40E_SUPPORT_MULTI_DRIVER,
ETH_I40E_QUEUE_NUM_PER_VF_ARG,
ETH_I40E_USE_LATEST_VEC,
NULL};
static const struct rte_pci_id pci_id_i40e_map[] = {
@ -1202,6 +1204,64 @@ i40e_aq_debug_write_global_register(struct i40e_hw *hw,
return i40e_aq_debug_write_register(hw, reg_addr, reg_val, cmd_details);
}
static int
i40e_parse_latest_vec_handler(__rte_unused const char *key,
const char *value,
void *opaque)
{
struct i40e_adapter *ad;
int use_latest_vec;
ad = (struct i40e_adapter *)opaque;
use_latest_vec = atoi(value);
if (use_latest_vec != 0 && use_latest_vec != 1)
PMD_DRV_LOG(WARNING, "Value should be 0 or 1, set it as 1!");
ad->use_latest_vec = (uint8_t)use_latest_vec;
return 0;
}
static int
i40e_use_latest_vec(struct rte_eth_dev *dev)
{
struct i40e_adapter *ad =
I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
struct rte_kvargs *kvlist;
int kvargs_count;
ad->use_latest_vec = false;
if (!dev->device->devargs)
return 0;
kvlist = rte_kvargs_parse(dev->device->devargs->args, valid_keys);
if (!kvlist)
return -EINVAL;
kvargs_count = rte_kvargs_count(kvlist, ETH_I40E_USE_LATEST_VEC);
if (!kvargs_count) {
rte_kvargs_free(kvlist);
return 0;
}
if (kvargs_count > 1)
PMD_DRV_LOG(WARNING, "More than one argument \"%s\" and only "
"the first invalid or last valid one is used !",
ETH_I40E_USE_LATEST_VEC);
if (rte_kvargs_process(kvlist, ETH_I40E_USE_LATEST_VEC,
i40e_parse_latest_vec_handler, ad) < 0) {
rte_kvargs_free(kvlist);
return -EINVAL;
}
rte_kvargs_free(kvlist);
return 0;
}
#define I40E_ALARM_INTERVAL 50000 /* us */
static int
@ -1266,6 +1326,8 @@ eth_i40e_dev_init(struct rte_eth_dev *dev, void *init_params __rte_unused)
/* Check if need to support multi-driver */
i40e_support_multi_driver(dev);
/* Check if users want the latest supported vec path */
i40e_use_latest_vec(dev);
/* Make sure all is clean before doing PF reset */
i40e_clear_hw(hw);
@ -12599,4 +12661,5 @@ RTE_PMD_REGISTER_PARAM_STRING(net_i40e,
ETH_I40E_FLOATING_VEB_ARG "=1"
ETH_I40E_FLOATING_VEB_LIST_ARG "=<string>"
ETH_I40E_QUEUE_NUM_PER_VF_ARG "=1|2|4|8|16"
ETH_I40E_SUPPORT_MULTI_DRIVER "=1");
ETH_I40E_SUPPORT_MULTI_DRIVER "=1"
ETH_I40E_USE_LATEST_VEC "=0|1");

View File

@ -1078,6 +1078,9 @@ struct i40e_adapter {
uint64_t pctypes_tbl[I40E_FLOW_TYPE_MAX] __rte_cache_min_aligned;
uint64_t flow_types_mask;
uint64_t pctypes_mask;
/* For devargs */
uint8_t use_latest_vec;
};
/**

View File

@ -2909,6 +2909,35 @@ i40e_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
qinfo->conf.offloads = txq->offloads;
}
static eth_rx_burst_t
i40e_get_latest_rx_vec(bool scatter)
{
#ifdef RTE_ARCH_X86
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
i40e_recv_pkts_vec_avx2;
#endif
return scatter ? i40e_recv_scattered_pkts_vec :
i40e_recv_pkts_vec;
}
static eth_rx_burst_t
i40e_get_recommend_rx_vec(bool scatter)
{
#ifdef RTE_ARCH_X86
/*
* since AVX frequency can be different to base frequency, limit
* use of AVX2 version to later plaforms, not all those that could
* theoretically run it.
*/
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
return scatter ? i40e_recv_scattered_pkts_vec_avx2 :
i40e_recv_pkts_vec_avx2;
#endif
return scatter ? i40e_recv_scattered_pkts_vec :
i40e_recv_pkts_vec;
}
void __attribute__((cold))
i40e_set_rx_function(struct rte_eth_dev *dev)
{
@ -2940,57 +2969,17 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
}
}
if (dev->data->scattered_rx) {
/* Set the non-LRO scattered callback: there are Vector and
* single allocation versions.
*/
if (ad->rx_vec_allowed) {
PMD_INIT_LOG(DEBUG, "Using Vector Scattered Rx "
"callback (port=%d).",
dev->data->port_id);
dev->rx_pkt_burst = i40e_recv_scattered_pkts_vec;
#ifdef RTE_ARCH_X86
/*
* since AVX frequency can be different to base
* frequency, limit use of AVX2 version to later
* plaforms, not all those that could theoretically
* run it.
*/
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
dev->rx_pkt_burst =
i40e_recv_scattered_pkts_vec_avx2;
#endif
} else {
PMD_INIT_LOG(DEBUG, "Using a Scattered with bulk "
"allocation callback (port=%d).",
dev->data->port_id);
dev->rx_pkt_burst = i40e_recv_scattered_pkts;
}
/* If parameters allow we are going to choose between the following
* callbacks:
* - Vector
* - Bulk Allocation
* - Single buffer allocation (the simplest one)
*/
} else if (ad->rx_vec_allowed) {
PMD_INIT_LOG(DEBUG, "Vector rx enabled, please make sure RX "
"burst size no less than %d (port=%d).",
RTE_I40E_DESCS_PER_LOOP,
dev->data->port_id);
dev->rx_pkt_burst = i40e_recv_pkts_vec;
#ifdef RTE_ARCH_X86
/*
* since AVX frequency can be different to base
* frequency, limit use of AVX2 version to later
* plaforms, not all those that could theoretically
* run it.
*/
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
dev->rx_pkt_burst = i40e_recv_pkts_vec_avx2;
#endif
} else if (ad->rx_bulk_alloc_allowed) {
if (ad->rx_vec_allowed) {
/* Vec Rx path */
PMD_INIT_LOG(DEBUG, "Vector Rx path will be used on port=%d.",
dev->data->port_id);
if (ad->use_latest_vec)
dev->rx_pkt_burst =
i40e_get_latest_rx_vec(dev->data->scattered_rx);
else
dev->rx_pkt_burst =
i40e_get_recommend_rx_vec(dev->data->scattered_rx);
} else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
"satisfied. Rx Burst Bulk Alloc function "
"will be used on port=%d.",
@ -2998,12 +2987,12 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
dev->rx_pkt_burst = i40e_recv_pkts_bulk_alloc;
} else {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are not "
"satisfied, or Scattered Rx is requested "
"(port=%d).",
/* Simple Rx Path. */
PMD_INIT_LOG(DEBUG, "Simple Rx path will be used on port=%d.",
dev->data->port_id);
dev->rx_pkt_burst = i40e_recv_pkts;
dev->rx_pkt_burst = dev->data->scattered_rx ?
i40e_recv_scattered_pkts :
i40e_recv_pkts;
}
/* Propagate information about RX function choice through all queues. */
@ -3049,6 +3038,31 @@ i40e_set_tx_function_flag(struct rte_eth_dev *dev, struct i40e_tx_queue *txq)
txq->queue_id);
}
static eth_tx_burst_t
i40e_get_latest_tx_vec(void)
{
#ifdef RTE_ARCH_X86
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX2))
return i40e_xmit_pkts_vec_avx2;
#endif
return i40e_xmit_pkts_vec;
}
static eth_tx_burst_t
i40e_get_recommend_tx_vec(void)
{
#ifdef RTE_ARCH_X86
/*
* since AVX frequency can be different to base frequency, limit
* use of AVX2 version to later plaforms, not all those that could
* theoretically run it.
*/
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
return i40e_xmit_pkts_vec_avx2;
#endif
return i40e_xmit_pkts_vec;
}
void __attribute__((cold))
i40e_set_tx_function(struct rte_eth_dev *dev)
{
@ -3073,17 +3087,12 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
if (ad->tx_simple_allowed) {
if (ad->tx_vec_allowed) {
PMD_INIT_LOG(DEBUG, "Vector tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_vec;
#ifdef RTE_ARCH_X86
/*
* since AVX frequency can be different to base
* frequency, limit use of AVX2 version to later
* plaforms, not all those that could theoretically
* run it.
*/
if (rte_cpu_get_flag_enabled(RTE_CPUFLAG_AVX512F))
dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx2;
#endif
if (ad->use_latest_vec)
dev->tx_pkt_burst =
i40e_get_latest_tx_vec();
else
dev->tx_pkt_burst =
i40e_get_recommend_tx_vec();
} else {
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;