a3d4f4d291
This patch adds NEON vector instructions to optimize Rx burst process. Signed-off-by: Chengwen Feng <fengchengwen@huawei.com> Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com> Signed-off-by: Huisong Li <lihuisong@huawei.com>
215 lines
5.4 KiB
C
215 lines
5.4 KiB
C
/* SPDX-License-Identifier: BSD-3-Clause
|
|
* Copyright(c) 2020 Hisilicon Limited.
|
|
*/
|
|
|
|
#include <rte_io.h>
|
|
#include <rte_ethdev_driver.h>
|
|
|
|
#include "hns3_ethdev.h"
|
|
#include "hns3_rxtx.h"
|
|
#include "hns3_rxtx_vec.h"
|
|
|
|
#if defined RTE_ARCH_ARM64
|
|
#include "hns3_rxtx_vec_neon.h"
|
|
#endif
|
|
|
|
int
|
|
hns3_tx_check_vec_support(struct rte_eth_dev *dev)
|
|
{
|
|
struct rte_eth_txmode *txmode = &dev->data->dev_conf.txmode;
|
|
|
|
/* Only support DEV_TX_OFFLOAD_MBUF_FAST_FREE */
|
|
if (txmode->offloads != DEV_TX_OFFLOAD_MBUF_FAST_FREE)
|
|
return -ENOTSUP;
|
|
|
|
return 0;
|
|
}
|
|
|
|
uint16_t
|
|
hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
|
|
{
|
|
struct hns3_tx_queue *txq = (struct hns3_tx_queue *)tx_queue;
|
|
uint16_t nb_tx = 0;
|
|
|
|
while (nb_pkts) {
|
|
uint16_t ret, new_burst;
|
|
|
|
new_burst = RTE_MIN(nb_pkts, txq->tx_rs_thresh);
|
|
ret = hns3_xmit_fixed_burst_vec(tx_queue, &tx_pkts[nb_tx],
|
|
new_burst);
|
|
nb_tx += ret;
|
|
nb_pkts -= ret;
|
|
if (ret < new_burst)
|
|
break;
|
|
}
|
|
|
|
return nb_tx;
|
|
}
|
|
|
|
static inline void
|
|
hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq)
|
|
{
|
|
#define REARM_LOOP_STEP_NUM 4
|
|
struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start];
|
|
struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start;
|
|
uint64_t dma_addr;
|
|
int i;
|
|
|
|
if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
|
|
HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) {
|
|
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
|
|
return;
|
|
}
|
|
|
|
for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM,
|
|
rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) {
|
|
if (likely(i <
|
|
HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) {
|
|
rte_prefetch_non_temporal(rxep[4].mbuf);
|
|
rte_prefetch_non_temporal(rxep[5].mbuf);
|
|
rte_prefetch_non_temporal(rxep[6].mbuf);
|
|
rte_prefetch_non_temporal(rxep[7].mbuf);
|
|
}
|
|
|
|
dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf);
|
|
rxdp[0].addr = rte_cpu_to_le_64(dma_addr);
|
|
rxdp[0].rx.bd_base_info = 0;
|
|
|
|
dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf);
|
|
rxdp[1].addr = rte_cpu_to_le_64(dma_addr);
|
|
rxdp[1].rx.bd_base_info = 0;
|
|
|
|
dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf);
|
|
rxdp[2].addr = rte_cpu_to_le_64(dma_addr);
|
|
rxdp[2].rx.bd_base_info = 0;
|
|
|
|
dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf);
|
|
rxdp[3].addr = rte_cpu_to_le_64(dma_addr);
|
|
rxdp[3].rx.bd_base_info = 0;
|
|
}
|
|
|
|
rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH;
|
|
if (rxq->rx_rearm_start >= rxq->nb_rx_desc)
|
|
rxq->rx_rearm_start = 0;
|
|
|
|
rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH;
|
|
|
|
hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH);
|
|
}
|
|
|
|
uint16_t
|
|
hns3_recv_pkts_vec(void *__restrict rx_queue,
|
|
struct rte_mbuf **__restrict rx_pkts,
|
|
uint16_t nb_pkts)
|
|
{
|
|
struct hns3_rx_queue *rxq = rx_queue;
|
|
struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
|
|
uint64_t bd_err_mask; /* bit mask indicate whick pkts is error */
|
|
uint16_t nb_rx;
|
|
|
|
nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
|
|
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
|
|
|
|
rte_prefetch_non_temporal(rxdp);
|
|
|
|
if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
|
|
hns3_rxq_rearm_mbuf(rxq);
|
|
|
|
if (unlikely(!(rxdp->rx.bd_base_info &
|
|
rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B))))
|
|
return 0;
|
|
|
|
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf);
|
|
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf);
|
|
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
|
|
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
|
|
|
|
bd_err_mask = 0;
|
|
nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);
|
|
if (unlikely(bd_err_mask))
|
|
nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
|
|
|
|
return nb_rx;
|
|
}
|
|
|
|
static void
|
|
hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq)
|
|
{
|
|
uintptr_t p;
|
|
struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
|
|
|
|
mb_def.nb_segs = 1;
|
|
mb_def.data_off = RTE_PKTMBUF_HEADROOM;
|
|
mb_def.port = rxq->port_id;
|
|
rte_mbuf_refcnt_set(&mb_def, 1);
|
|
|
|
/* prevent compiler reordering: rearm_data covers previous fields */
|
|
rte_compiler_barrier();
|
|
p = (uintptr_t)&mb_def.rearm_data;
|
|
rxq->mbuf_initializer = *(uint64_t *)p;
|
|
}
|
|
|
|
void
|
|
hns3_rxq_vec_setup(struct hns3_rx_queue *rxq)
|
|
{
|
|
struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc];
|
|
unsigned int i;
|
|
|
|
memset(&rxq->rx_ring[rxq->nb_rx_desc], 0,
|
|
sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST);
|
|
|
|
memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf));
|
|
for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++)
|
|
sw_ring[i].mbuf = &rxq->fake_mbuf;
|
|
|
|
hns3_rxq_vec_setup_rearm_data(rxq);
|
|
|
|
memset(rxq->offset_table, 0, sizeof(rxq->offset_table));
|
|
}
|
|
|
|
#ifndef RTE_LIBRTE_IEEE1588
|
|
static int
|
|
hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg)
|
|
{
|
|
uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH +
|
|
HNS3_DEFAULT_RX_BURST;
|
|
|
|
if (rxq->nb_rx_desc < min_vec_bds)
|
|
return -ENOTSUP;
|
|
|
|
if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH)
|
|
return -ENOTSUP;
|
|
|
|
RTE_SET_USED(arg);
|
|
return 0;
|
|
}
|
|
#endif
|
|
|
|
int
|
|
hns3_rx_check_vec_support(struct rte_eth_dev *dev)
|
|
{
|
|
#ifndef RTE_LIBRTE_IEEE1588
|
|
struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
|
|
struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
|
|
uint64_t offloads_mask = DEV_RX_OFFLOAD_TCP_LRO |
|
|
DEV_RX_OFFLOAD_VLAN;
|
|
|
|
if (dev->data->scattered_rx)
|
|
return -ENOTSUP;
|
|
|
|
if (fconf->mode != RTE_FDIR_MODE_NONE)
|
|
return -ENOTSUP;
|
|
|
|
if (rxmode->offloads & offloads_mask)
|
|
return -ENOTSUP;
|
|
|
|
if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0)
|
|
return -ENOTSUP;
|
|
|
|
return 0;
|
|
#else
|
|
RTE_SET_USED(dev);
|
|
return -ENOTSUP;
|
|
#endif
|
|
}
|