net/i40e: add AVX512 vector path

Add AVX512 support for i40e PMD. This patch adds i40e_rxtx_vec_avx512.c
to support i40e AVX512 vPMD.

This patch aims to enable AVX512 on i40e vPMD. Main changes are focus
on Rx path compared with AVX2 vPMD.

Signed-off-by: Leyi Rong <leyi.rong@intel.com>
Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
Acked-by: Wenzhuo Lu <wenzhuo.lu@intel.com>
This commit is contained in:
Leyi Rong 2021-01-14 14:39:50 +08:00 committed by Ferruh Yigit
parent 6ada10deac
commit e6a6a13891
4 changed files with 1130 additions and 22 deletions

View File

@ -1767,6 +1767,10 @@ i40e_dev_supported_ptypes_get(struct rte_eth_dev *dev)
dev->rx_pkt_burst == i40e_recv_scattered_pkts ||
dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
dev->rx_pkt_burst == i40e_recv_pkts_vec ||
#ifdef CC_AVX512_SUPPORT
dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
#endif
dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2)
return ptypes;
@ -3162,6 +3166,7 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
uint16_t rx_using_sse, i;
bool use_avx2 = false;
bool use_avx512 = false;
/* In order to allow Vector Rx there are a few configuration
* conditions to be met and Rx Bulk Allocation should be allowed.
*/
@ -3185,28 +3190,51 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
}
}
use_avx2 = get_avx_supported(0);
use_avx512 = get_avx_supported(1);
if (!use_avx512)
use_avx2 = get_avx_supported(0);
}
}
if (ad->rx_vec_allowed &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
if (dev->data->scattered_rx) {
PMD_INIT_LOG(DEBUG,
"Using %sVector Scattered Rx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->rx_pkt_burst = use_avx2 ?
i40e_recv_scattered_pkts_vec_avx2 :
i40e_recv_scattered_pkts_vec;
if (use_avx512) {
#ifdef CC_AVX512_SUPPORT
PMD_DRV_LOG(NOTICE,
"Using AVX512 Vector Scattered Rx (port %d).",
dev->data->port_id);
dev->rx_pkt_burst =
i40e_recv_scattered_pkts_vec_avx512;
#endif
} else {
PMD_INIT_LOG(DEBUG,
"Using %sVector Scattered Rx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->rx_pkt_burst = use_avx2 ?
i40e_recv_scattered_pkts_vec_avx2 :
i40e_recv_scattered_pkts_vec;
}
} else {
PMD_INIT_LOG(DEBUG,
"Using %sVector Rx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->rx_pkt_burst = use_avx2 ?
i40e_recv_pkts_vec_avx2 :
i40e_recv_pkts_vec;
if (use_avx512) {
#ifdef CC_AVX512_SUPPORT
PMD_DRV_LOG(NOTICE,
"Using AVX512 Vector Rx (port %d).",
dev->data->port_id);
dev->rx_pkt_burst =
i40e_recv_pkts_vec_avx512;
#endif
} else {
PMD_INIT_LOG(DEBUG,
"Using %sVector Rx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->rx_pkt_burst = use_avx2 ?
i40e_recv_pkts_vec_avx2 :
i40e_recv_pkts_vec;
}
}
} else if (!dev->data->scattered_rx && ad->rx_bulk_alloc_allowed) {
PMD_INIT_LOG(DEBUG, "Rx Burst Bulk Alloc Preconditions are "
@ -3229,6 +3257,10 @@ i40e_set_rx_function(struct rte_eth_dev *dev)
rx_using_sse =
(dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec ||
dev->rx_pkt_burst == i40e_recv_pkts_vec ||
#ifdef CC_AVX512_SUPPORT
dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx512 ||
dev->rx_pkt_burst == i40e_recv_pkts_vec_avx512 ||
#endif
dev->rx_pkt_burst == i40e_recv_scattered_pkts_vec_avx2 ||
dev->rx_pkt_burst == i40e_recv_pkts_vec_avx2);
@ -3249,6 +3281,10 @@ static const struct {
{ i40e_recv_pkts_bulk_alloc, "Scalar Bulk Alloc" },
{ i40e_recv_pkts, "Scalar" },
#ifdef RTE_ARCH_X86
#ifdef CC_AVX512_SUPPORT
{ i40e_recv_scattered_pkts_vec_avx512, "Vector AVX512 Scattered" },
{ i40e_recv_pkts_vec_avx512, "Vector AVX512" },
#endif
{ i40e_recv_scattered_pkts_vec_avx2, "Vector AVX2 Scattered" },
{ i40e_recv_pkts_vec_avx2, "Vector AVX2" },
{ i40e_recv_scattered_pkts_vec, "Vector SSE Scattered" },
@ -3315,6 +3351,7 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
I40E_DEV_PRIVATE_TO_ADAPTER(dev->data->dev_private);
int i;
bool use_avx2 = false;
bool use_avx512 = false;
if (rte_eal_process_type() == RTE_PROC_PRIMARY) {
if (ad->tx_vec_allowed) {
@ -3328,19 +3365,30 @@ i40e_set_tx_function(struct rte_eth_dev *dev)
}
}
use_avx2 = get_avx_supported(0);
use_avx512 = get_avx_supported(1);
if (!use_avx512)
use_avx2 = get_avx_supported(0);
}
}
if (ad->tx_simple_allowed) {
if (ad->tx_vec_allowed &&
rte_vect_get_max_simd_bitwidth() >= RTE_VECT_SIMD_128) {
PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->tx_pkt_burst = use_avx2 ?
i40e_xmit_pkts_vec_avx2 :
i40e_xmit_pkts_vec;
if (use_avx512) {
#ifdef CC_AVX512_SUPPORT
PMD_DRV_LOG(NOTICE, "Using AVX512 Vector Tx (port %d).",
dev->data->port_id);
dev->tx_pkt_burst = i40e_xmit_pkts_vec_avx512;
#endif
} else {
PMD_INIT_LOG(DEBUG, "Using %sVector Tx (port %d).",
use_avx2 ? "avx2 " : "",
dev->data->port_id);
dev->tx_pkt_burst = use_avx2 ?
i40e_xmit_pkts_vec_avx2 :
i40e_xmit_pkts_vec;
}
} else {
PMD_INIT_LOG(DEBUG, "Simple tx finally be used.");
dev->tx_pkt_burst = i40e_xmit_pkts_simple;
@ -3360,6 +3408,9 @@ static const struct {
{ i40e_xmit_pkts_simple, "Scalar Simple" },
{ i40e_xmit_pkts, "Scalar" },
#ifdef RTE_ARCH_X86
#ifdef CC_AVX512_SUPPORT
{ i40e_xmit_pkts_vec_avx512, "Vector AVX512" },
#endif
{ i40e_xmit_pkts_vec_avx2, "Vector AVX2" },
{ i40e_xmit_pkts_vec, "Vector SSE" },
#elif defined(RTE_ARCH_ARM64)

View File

@ -249,6 +249,15 @@ uint16_t i40e_recv_scattered_pkts_vec_avx2(void *rx_queue,
uint16_t i40e_xmit_pkts_vec_avx2(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
int i40e_get_monitor_addr(void *rx_queue, struct rte_power_monitor_cond *pmc);
uint16_t i40e_recv_pkts_vec_avx512(void *rx_queue,
struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t i40e_recv_scattered_pkts_vec_avx512(void *rx_queue,
struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t i40e_xmit_pkts_vec_avx512(void *tx_queue,
struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
/* For each value it means, datasheet of hardware can tell more details
*

File diff suppressed because it is too large Load Diff

View File

@ -45,6 +45,30 @@ if arch_subdir == 'x86'
c_args: [cflags, '-mavx2'])
objs += i40e_avx2_lib.extract_objects('i40e_rxtx_vec_avx2.c')
endif
i40e_avx512_cpu_support = (
cc.get_define('__AVX512F__', args: machine_args) != '' and
cc.get_define('__AVX512BW__', args: machine_args) != '')
i40e_avx512_cc_support = (
not machine_args.contains('-mno-avx512f') and
cc.has_argument('-mavx512f') and
cc.has_argument('-mavx512bw'))
if i40e_avx512_cpu_support == true or i40e_avx512_cc_support == true
cflags += ['-DCC_AVX512_SUPPORT']
avx512_args = [cflags, '-mavx512f', '-mavx512bw']
if cc.has_argument('-march=skylake-avx512')
avx512_args += '-march=skylake-avx512'
endif
i40e_avx512_lib = static_library('i40e_avx512_lib',
'i40e_rxtx_vec_avx512.c',
dependencies: [static_rte_ethdev,
static_rte_kvargs, static_rte_hash],
include_directories: includes,
c_args: avx512_args)
objs += i40e_avx512_lib.extract_objects('i40e_rxtx_vec_avx512.c')
endif
elif arch_subdir == 'ppc'
dpdk_conf.set('RTE_LIBRTE_I40E_INC_VECTOR', 1)
sources += files('i40e_rxtx_vec_altivec.c')