net/hns3: support NEON Tx
This patch adds NEON vector instructions to optimize Tx burst process. Signed-off-by: Huisong Li <lihuisong@huawei.com> Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com> Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
This commit is contained in:
parent
7ef933908f
commit
e31f123db0
@ -2353,6 +2353,8 @@ hns3_dev_configure(struct rte_eth_dev *dev)
|
||||
|
||||
hns->rx_simple_allowed = true;
|
||||
hns->tx_simple_allowed = true;
|
||||
hns->tx_vec_allowed = true;
|
||||
|
||||
hns3_init_rx_ptype_tble(dev);
|
||||
hw->adapter_state = HNS3_NIC_CONFIGURED;
|
||||
|
||||
|
@ -644,6 +644,8 @@ struct hns3_adapter {
|
||||
|
||||
bool rx_simple_allowed;
|
||||
bool tx_simple_allowed;
|
||||
bool tx_vec_allowed;
|
||||
|
||||
struct hns3_ptype_table ptype_tbl __rte_cache_min_aligned;
|
||||
};
|
||||
|
||||
|
@ -823,6 +823,8 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
|
||||
|
||||
hns->rx_simple_allowed = true;
|
||||
hns->tx_simple_allowed = true;
|
||||
hns->tx_vec_allowed = true;
|
||||
|
||||
hns3_init_rx_ptype_tble(dev);
|
||||
|
||||
hw->adapter_state = HNS3_NIC_CONFIGURED;
|
||||
|
@ -95,6 +95,8 @@ hns3_tx_queue_release(void *queue)
|
||||
rte_memzone_free(txq->mz);
|
||||
if (txq->sw_ring)
|
||||
rte_free(txq->sw_ring);
|
||||
if (txq->free)
|
||||
rte_free(txq->free);
|
||||
rte_free(txq);
|
||||
}
|
||||
}
|
||||
@ -1020,6 +1022,7 @@ hns3_fake_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx,
|
||||
|
||||
/* Don't need alloc sw_ring, because upper applications don't use it */
|
||||
txq->sw_ring = NULL;
|
||||
txq->free = NULL;
|
||||
|
||||
txq->hns = hns;
|
||||
txq->tx_deferred_start = false;
|
||||
@ -2052,6 +2055,15 @@ hns3_tx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
|
||||
txq->tx_bd_ready = txq->nb_tx_desc - 1;
|
||||
txq->tx_free_thresh = tx_free_thresh;
|
||||
txq->tx_rs_thresh = tx_rs_thresh;
|
||||
txq->free = rte_zmalloc_socket("hns3 TX mbuf free array",
|
||||
sizeof(struct rte_mbuf *) * txq->tx_rs_thresh,
|
||||
RTE_CACHE_LINE_SIZE, socket_id);
|
||||
if (!txq->free) {
|
||||
hns3_err(hw, "failed to allocate tx mbuf free array!");
|
||||
hns3_tx_queue_release(txq);
|
||||
return -ENOMEM;
|
||||
}
|
||||
|
||||
txq->port_id = dev->data->port_id;
|
||||
txq->pvid_state = hw->port_base_vlan_cfg.state;
|
||||
txq->configured = true;
|
||||
@ -3105,6 +3117,20 @@ hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
|
||||
return nb_tx;
|
||||
}
|
||||
|
||||
int __rte_weak
|
||||
hns3_tx_check_vec_support(__rte_unused struct rte_eth_dev *dev)
|
||||
{
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
uint16_t __rte_weak
|
||||
hns3_xmit_pkts_vec(__rte_unused void *tx_queue,
|
||||
__rte_unused struct rte_mbuf **tx_pkts,
|
||||
__rte_unused uint16_t nb_pkts)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
|
||||
struct rte_eth_burst_mode *mode)
|
||||
@ -3116,6 +3142,8 @@ hns3_tx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
|
||||
info = "Scalar Simple";
|
||||
else if (pkt_burst == hns3_xmit_pkts)
|
||||
info = "Scalar";
|
||||
else if (pkt_burst == hns3_xmit_pkts_vec)
|
||||
info = "Vector Neon";
|
||||
|
||||
if (info == NULL)
|
||||
return -EINVAL;
|
||||
@ -3131,6 +3159,11 @@ hns3_get_tx_function(struct rte_eth_dev *dev, eth_tx_prep_t *prep)
|
||||
uint64_t offloads = dev->data->dev_conf.txmode.offloads;
|
||||
struct hns3_adapter *hns = dev->data->dev_private;
|
||||
|
||||
if (hns->tx_vec_allowed && hns3_tx_check_vec_support(dev) == 0) {
|
||||
*prep = NULL;
|
||||
return hns3_xmit_pkts_vec;
|
||||
}
|
||||
|
||||
if (hns->tx_simple_allowed &&
|
||||
offloads == (offloads & DEV_TX_OFFLOAD_MBUF_FAST_FREE)) {
|
||||
*prep = NULL;
|
||||
|
@ -17,6 +17,10 @@
|
||||
#define HNS3_DEFAULT_TX_RS_THRESH 32
|
||||
#define HNS3_TX_FAST_FREE_AHEAD 64
|
||||
|
||||
#define HNS3_UINT8_BIT 8
|
||||
#define HNS3_UINT16_BIT 16
|
||||
#define HNS3_UINT32_BIT 32
|
||||
|
||||
#define HNS3_512_BD_BUF_SIZE 512
|
||||
#define HNS3_1K_BD_BUF_SIZE 1024
|
||||
#define HNS3_2K_BD_BUF_SIZE 2048
|
||||
@ -132,6 +136,13 @@
|
||||
#define HNS3_L3_LEN_UNIT 2UL
|
||||
#define HNS3_L4_LEN_UNIT 2UL
|
||||
|
||||
#define HNS3_TXD_DEFAULT_BDTYPE 0
|
||||
#define HNS3_TXD_VLD_CMD (0x1 << HNS3_TXD_VLD_B)
|
||||
#define HNS3_TXD_FE_CMD (0x1 << HNS3_TXD_FE_B)
|
||||
#define HNS3_TXD_DEFAULT_VLD_FE_BDTYPE \
|
||||
(HNS3_TXD_VLD_CMD | HNS3_TXD_FE_CMD | HNS3_TXD_DEFAULT_BDTYPE)
|
||||
#define HNS3_TXD_SEND_SIZE_SHIFT 16
|
||||
|
||||
enum hns3_pkt_l2t_type {
|
||||
HNS3_L2_TYPE_UNICAST,
|
||||
HNS3_L2_TYPE_MULTICAST,
|
||||
@ -317,9 +328,13 @@ struct hns3_tx_queue {
|
||||
* all descriptors are cleared. and then free all mbufs in the batch.
|
||||
* - tx_rs_thresh
|
||||
* Number of mbufs released at a time.
|
||||
|
||||
*
|
||||
* - free
|
||||
* Tx mbuf free array used for preserving temporarily address of mbuf
|
||||
* released back to mempool, when releasing mbuf in batches.
|
||||
*/
|
||||
uint16_t tx_rs_thresh;
|
||||
struct rte_mbuf **free;
|
||||
|
||||
/*
|
||||
* port based vlan configuration state.
|
||||
@ -558,6 +573,8 @@ uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
uint16_t hns3_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
uint16_t hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
|
||||
uint16_t nb_pkts);
|
||||
int hns3_tx_burst_mode_get(struct rte_eth_dev *dev,
|
||||
__rte_unused uint16_t queue_id,
|
||||
struct rte_eth_burst_mode *mode);
|
||||
@ -577,6 +594,7 @@ int hns3_restore_gro_conf(struct hns3_hw *hw);
|
||||
void hns3_update_all_queues_pvid_state(struct hns3_hw *hw);
|
||||
void hns3_rx_scattered_reset(struct rte_eth_dev *dev);
|
||||
void hns3_rx_scattered_calc(struct rte_eth_dev *dev);
|
||||
int hns3_tx_check_vec_support(struct rte_eth_dev *dev);
|
||||
void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
|
||||
struct rte_eth_rxq_info *qinfo);
|
||||
void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
|
||||
|
47
drivers/net/hns3/hns3_rxtx_vec.c
Normal file
47
drivers/net/hns3/hns3_rxtx_vec.c
Normal file
@ -0,0 +1,47 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2020 Hisilicon Limited.
|
||||
*/
|
||||
|
||||
#include <rte_io.h>
|
||||
#include <rte_ethdev_driver.h>
|
||||
|
||||
#include "hns3_ethdev.h"
|
||||
#include "hns3_rxtx.h"
|
||||
#include "hns3_rxtx_vec.h"
|
||||
|
||||
#if defined RTE_ARCH_ARM64
|
||||
#include "hns3_rxtx_vec_neon.h"
|
||||
#endif
|
||||
|
||||
int
|
||||
hns3_tx_check_vec_support(struct rte_eth_dev *dev)
|
||||
{
|
||||
struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
|
||||
|
||||
/* Only support DEV_TX_OFFLOAD_MBUF_FAST_FREE */
|
||||
if (txmode->offloads != DEV_TX_OFFLOAD_MBUF_FAST_FREE)
|
||||
return -ENOTSUP;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t
|
||||
hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
|
||||
{
|
||||
struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue;
|
||||
uint16_t nb_tx = 0;
|
||||
|
||||
while (nb_pkts) {
|
||||
uint16_t ret, new_burst;
|
||||
|
||||
new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
|
||||
ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
|
||||
new_burst);
|
||||
nb_tx += ret;
|
||||
nb_pkts -= ret;
|
||||
if (ret < new_burst)
|
||||
break;
|
||||
}
|
||||
|
||||
return nb_tx;
|
||||
}
|
57
drivers/net/hns3/hns3_rxtx_vec.h
Normal file
57
drivers/net/hns3/hns3_rxtx_vec.h
Normal file
@ -0,0 +1,57 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2020 Hisilicon Limited.
|
||||
*/
|
||||
|
||||
#ifndef _HNS3_RXTX_VEC_H_
|
||||
#define _HNS3_RXTX_VEC_H_
|
||||
|
||||
#include "hns3_rxtx.h"
|
||||
#include "hns3_ethdev.h"
|
||||
|
||||
static inline void
|
||||
hns3_tx_free_buffers(struct hns3_tx_queue *txq)
|
||||
{
|
||||
struct rte_mbuf **free = txq->free;
|
||||
struct hns3_entry *tx_entry;
|
||||
struct hns3_desc *tx_desc;
|
||||
struct rte_mbuf *m;
|
||||
int nb_free = 0;
|
||||
int i;
|
||||
|
||||
/*
|
||||
* All mbufs can be released only when the VLD bits of all
|
||||
* descriptors in a batch are cleared.
|
||||
*/
|
||||
tx_desc = &txq->tx_ring[txq->next_to_clean];
|
||||
for (i = 0; i < txq->tx_rs_thresh; i++, tx_desc++) {
|
||||
if (tx_desc->tx.tp_fe_sc_vld_ra_ri &
|
||||
rte_le_to_cpu_16(BIT(HNS3_TXD_VLD_B)))
|
||||
return;
|
||||
}
|
||||
|
||||
tx_entry = &txq->sw_ring[txq->next_to_clean];
|
||||
for (i = 0; i < txq->tx_rs_thresh; i++, tx_entry++) {
|
||||
m = rte_pktmbuf_prefree_seg(tx_entry->mbuf);
|
||||
tx_entry->mbuf = NULL;
|
||||
|
||||
if (m == NULL)
|
||||
continue;
|
||||
|
||||
if (nb_free && m->pool != free[0]->pool) {
|
||||
rte_mempool_put_bulk(free[0]->pool, (void **)free,
|
||||
nb_free);
|
||||
nb_free = 0;
|
||||
}
|
||||
free[nb_free++] = m;
|
||||
}
|
||||
|
||||
if (nb_free)
|
||||
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
|
||||
|
||||
/* Update numbers of available descriptor due to buffer freed */
|
||||
txq->tx_bd_ready += txq->tx_rs_thresh;
|
||||
txq->next_to_clean += txq->tx_rs_thresh;
|
||||
if (txq->next_to_clean >= txq->nb_tx_desc)
|
||||
txq->next_to_clean = 0;
|
||||
}
|
||||
#endif /* _HNS3_RXTX_VEC_H_ */
|
85
drivers/net/hns3/hns3_rxtx_vec_neon.h
Normal file
85
drivers/net/hns3/hns3_rxtx_vec_neon.h
Normal file
@ -0,0 +1,85 @@
|
||||
/* SPDX-License-Identifier: BSD-3-Clause
|
||||
* Copyright(c) 2020 Hisilicon Limited.
|
||||
*/
|
||||
|
||||
#ifndef _HNS3_RXTX_VEC_NEON_H_
|
||||
#define _HNS3_RXTX_VEC_NEON_H_
|
||||
|
||||
#include <arm_neon.h>
|
||||
|
||||
#pragma GCC diagnostic ignored "-Wcast-qual"
|
||||
|
||||
static inline void
|
||||
hns3_vec_tx(volatile struct hns3_desc *desc, struct rte_mbuf *pkt)
|
||||
{
|
||||
uint64x2_t val1 = {
|
||||
pkt->buf_iova + pkt->data_off,
|
||||
((uint64_t)pkt->data_len) << HNS3_TXD_SEND_SIZE_SHIFT
|
||||
};
|
||||
uint64x2_t val2 = {
|
||||
0,
|
||||
((uint64_t)HNS3_TXD_DEFAULT_VLD_FE_BDTYPE) << HNS3_UINT32_BIT
|
||||
};
|
||||
vst1q_u64((uint64_t *)&desc->addr, val1);
|
||||
vst1q_u64((uint64_t *)&desc->tx.outer_vlan_tag, val2);
|
||||
}
|
||||
|
||||
static uint16_t
|
||||
hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
|
||||
struct rte_mbuf **__restrict tx_pkts,
|
||||
uint16_t nb_pkts)
|
||||
{
|
||||
struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue;
|
||||
volatile struct hns3_desc *tx_desc;
|
||||
struct hns3_entry *tx_entry;
|
||||
uint16_t next_to_use;
|
||||
uint16_t nb_commit;
|
||||
uint16_t nb_tx;
|
||||
uint16_t n, i;
|
||||
|
||||
if (txq->tx_bd_ready < txq->tx_free_thresh)
|
||||
hns3_tx_free_buffers(txq);
|
||||
|
||||
nb_commit = RTE_MIN(txq->tx_bd_ready, nb_pkts);
|
||||
if (unlikely(nb_commit == 0)) {
|
||||
txq->queue_full_cnt++;
|
||||
return 0;
|
||||
}
|
||||
nb_tx = nb_commit;
|
||||
|
||||
next_to_use = txq->next_to_use;
|
||||
tx_desc = &txq->tx_ring[next_to_use];
|
||||
tx_entry = &txq->sw_ring[next_to_use];
|
||||
|
||||
/*
|
||||
* We need to deal with n descriptors first for better performance,
|
||||
* if nb_commit is greater than the difference between txq->nb_tx_desc
|
||||
* and next_to_use in sw_ring and tx_ring.
|
||||
*/
|
||||
n = txq->nb_tx_desc - next_to_use;
|
||||
if (nb_commit >= n) {
|
||||
for (i = 0; i < n; i++, tx_pkts++, tx_desc++) {
|
||||
hns3_vec_tx(tx_desc, *tx_pkts);
|
||||
tx_entry[i].mbuf = *tx_pkts;
|
||||
}
|
||||
|
||||
nb_commit -= n;
|
||||
next_to_use = 0;
|
||||
tx_desc = &txq->tx_ring[next_to_use];
|
||||
tx_entry = &txq->sw_ring[next_to_use];
|
||||
}
|
||||
|
||||
for (i = 0; i < nb_commit; i++, tx_pkts++, tx_desc++) {
|
||||
hns3_vec_tx(tx_desc, *tx_pkts);
|
||||
tx_entry[i].mbuf = *tx_pkts;
|
||||
}
|
||||
|
||||
next_to_use += nb_commit;
|
||||
txq->next_to_use = next_to_use;
|
||||
txq->tx_bd_ready -= nb_tx;
|
||||
|
||||
hns3_write_reg_opt(txq->io_tail_reg, nb_tx);
|
||||
|
||||
return nb_tx;
|
||||
}
|
||||
#endif /* _HNS3_RXTX_VEC_NEON_H_ */
|
@ -28,3 +28,7 @@ sources = files('hns3_cmd.c',
|
||||
'hns3_mp.c')
|
||||
|
||||
deps += ['hash']
|
||||
|
||||
if arch_subdir == 'arm' and dpdk_conf.get('RTE_ARCH_64')
|
||||
sources += files('hns3_rxtx_vec.c')
|
||||
endif
|
||||
|
Loading…
Reference in New Issue
Block a user