net/hns3: support NEON Rx

This patch adds NEON vector instructions to optimize Rx burst process.

Signed-off-by: Chengwen Feng <fengchengwen@huawei.com>
Signed-off-by: Wei Hu (Xavier) <xavier.huwei@huawei.com>
Signed-off-by: Huisong Li <lihuisong@huawei.com>
This commit is contained in:
Wei Hu (Xavier) 2020-09-09 17:23:37 +08:00 committed by Ferruh Yigit
parent e31f123db0
commit a3d4f4d291
8 changed files with 514 additions and 8 deletions

View File

@ -2352,6 +2352,7 @@ hns3_dev_configure(struct rte_eth_dev *dev)
goto cfg_err;
hns->rx_simple_allowed = true;
hns->rx_vec_allowed = true;
hns->tx_simple_allowed = true;
hns->tx_vec_allowed = true;

View File

@ -643,6 +643,7 @@ struct hns3_adapter {
};
bool rx_simple_allowed;
bool rx_vec_allowed;
bool tx_simple_allowed;
bool tx_vec_allowed;

View File

@ -822,6 +822,7 @@ hns3vf_dev_configure(struct rte_eth_dev *dev)
goto cfg_err;
hns->rx_simple_allowed = true;
hns->rx_vec_allowed = true;
hns->tx_simple_allowed = true;
hns->tx_vec_allowed = true;

View File

@ -41,9 +41,19 @@ hns3_rx_queue_release_mbufs(struct hns3_rx_queue *rxq)
if (rxq->sw_ring == NULL)
return;
for (i = 0; i < rxq->nb_rx_desc; i++)
if (rxq->sw_ring[i].mbuf)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
if (rxq->rx_rearm_nb == 0) {
for (i = 0; i < rxq->nb_rx_desc; i++) {
if (rxq->sw_ring[i].mbuf != NULL)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
}
} else {
for (i = rxq->next_to_use;
i != rxq->rx_rearm_start;
i = (i + 1) % rxq->nb_rx_desc) {
if (rxq->sw_ring[i].mbuf != NULL)
rte_pktmbuf_free_seg(rxq->sw_ring[i].mbuf);
}
}
for (i = 0; i < rxq->bulk_mbuf_num; i++)
rte_pktmbuf_free_seg(rxq->bulk_mbuf[i]);
@ -661,10 +671,13 @@ hns3_dev_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)
}
rxq->next_to_use = 0;
rxq->rx_rearm_start = 0;
rxq->rx_free_hold = 0;
rxq->rx_rearm_nb = 0;
rxq->pkt_first_seg = NULL;
rxq->pkt_last_seg = NULL;
hns3_init_rx_queue_hw(rxq);
hns3_rxq_vec_setup(rxq);
return 0;
}
@ -678,6 +691,8 @@ hns3_fake_rx_queue_start(struct hns3_adapter *hns, uint16_t idx)
rxq = (struct hns3_rx_queue *)hw->fkq_data.rx_queues[idx];
rxq->next_to_use = 0;
rxq->rx_free_hold = 0;
rxq->rx_rearm_start = 0;
rxq->rx_rearm_nb = 0;
hns3_init_rx_queue_hw(rxq);
}
@ -860,6 +875,40 @@ hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue)
return 0;
}
/*
* Iterate over all Rx Queue, and call the callback() function for each Rx
* queue.
*
* @param[in] dev
* The target eth dev.
* @param[in] callback
* The function to call for each queue.
* if callback function return nonzero will stop iterate and return it's value
* @param[in] arg
* The arguments to provide the callback function with.
*
* @return
* 0 on success, otherwise with errno set.
*/
int
hns3_rxq_iterate(struct rte_eth_dev *dev,
int (*callback)(struct hns3_rx_queue *, void *), void *arg)
{
uint32_t i;
int ret;
if (dev->data->rx_queues == NULL)
return -EINVAL;
for (i = 0; i < dev->data->nb_rx_queues; i++) {
ret = callback(dev->data->rx_queues[i], arg);
if (ret != 0)
return ret;
}
return 0;
}
static void*
hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev,
struct hns3_queue_info *q_info)
@ -880,7 +929,13 @@ hns3_alloc_rxq_and_dma_zone(struct rte_eth_dev *dev,
/* Allocate rx ring hardware descriptors. */
rxq->queue_id = q_info->idx;
rxq->nb_rx_desc = q_info->nb_desc;
rx_desc = rxq->nb_rx_desc * sizeof(struct hns3_desc);
/*
* Allocate a litter more memory because rx vector functions
* don't check boundaries each time.
*/
rx_desc = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) *
sizeof(struct hns3_desc);
rx_mz = rte_eth_dma_zone_reserve(dev, q_info->ring_name, q_info->idx,
rx_desc, HNS3_RING_BASE_ALIGN,
q_info->socket_id);
@ -1329,7 +1384,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
conf->rx_free_thresh : HNS3_DEFAULT_RX_FREE_THRESH;
rxq->rx_deferred_start = conf->rx_deferred_start;
rx_entry_len = sizeof(struct hns3_entry) * rxq->nb_rx_desc;
rx_entry_len = (rxq->nb_rx_desc + HNS3_DEFAULT_RX_BURST) *
sizeof(struct hns3_entry);
rxq->sw_ring = rte_zmalloc_socket("hns3 RX sw ring", rx_entry_len,
RTE_CACHE_LINE_SIZE, socket_id);
if (rxq->sw_ring == NULL) {
@ -1340,6 +1396,8 @@ hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
rxq->next_to_use = 0;
rxq->rx_free_hold = 0;
rxq->rx_rearm_start = 0;
rxq->rx_rearm_nb = 0;
rxq->pkt_first_seg = NULL;
rxq->pkt_last_seg = NULL;
rxq->port_id = dev->data->port_id;
@ -1431,7 +1489,8 @@ hns3_dev_supported_ptypes_get(struct rte_eth_dev *dev)
};
if (dev->rx_pkt_burst == hns3_recv_pkts ||
dev->rx_pkt_burst == hns3_recv_scattered_pkts)
dev->rx_pkt_burst == hns3_recv_scattered_pkts ||
dev->rx_pkt_burst == hns3_recv_pkts_vec)
return ptypes;
return NULL;
@ -1915,6 +1974,25 @@ hns3_recv_scattered_pkts(void *rx_queue,
return nb_rx;
}
void __rte_weak
hns3_rxq_vec_setup(__rte_unused struct hns3_rx_queue *rxq)
{
}
int __rte_weak
hns3_rx_check_vec_support(__rte_unused struct rte_eth_dev *dev)
{
return -ENOTSUP;
}
uint16_t __rte_weak
hns3_recv_pkts_vec(__rte_unused void *tx_queue,
__rte_unused struct rte_mbuf **tx_pkts,
__rte_unused uint16_t nb_pkts)
{
return 0;
}
int
hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
struct rte_eth_burst_mode *mode)
@ -1925,6 +2003,7 @@ hns3_rx_burst_mode_get(struct rte_eth_dev *dev, __rte_unused uint16_t queue_id,
} burst_infos[] = {
{ hns3_recv_pkts, "Scalar" },
{ hns3_recv_scattered_pkts, "Scalar Scattered" },
{ hns3_recv_pkts_vec, "Vector Neon" },
};
eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
@ -1949,6 +2028,9 @@ hns3_get_rx_function(struct rte_eth_dev *dev)
struct hns3_adapter *hns = dev->data->dev_private;
uint64_t offloads = dev->data->dev_conf.rxmode.offloads;
if (hns->rx_vec_allowed && hns3_rx_check_vec_support(dev) == 0)
return hns3_recv_pkts_vec;
if (hns->rx_simple_allowed && !dev->data->scattered_rx &&
(offloads & DEV_RX_OFFLOAD_TCP_LRO) == 0)
return hns3_recv_pkts;

View File

@ -17,6 +17,18 @@
#define HNS3_DEFAULT_TX_RS_THRESH 32
#define HNS3_TX_FAST_FREE_AHEAD 64
#define HNS3_DEFAULT_RX_BURST 32
#if (HNS3_DEFAULT_RX_BURST > 64)
#error "PMD HNS3: HNS3_DEFAULT_RX_BURST must <= 64\n"
#endif
#define HNS3_DEFAULT_DESCS_PER_LOOP 4
#define HNS3_SVE_DEFAULT_DESCS_PER_LOOP 8
#if (HNS3_DEFAULT_DESCS_PER_LOOP > HNS3_SVE_DEFAULT_DESCS_PER_LOOP)
#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_DEFAULT_DESCS_PER_LOOP
#else
#define HNS3_VECTOR_RX_OFFSET_TABLE_LEN HNS3_SVE_DEFAULT_DESCS_PER_LOOP
#endif
#define HNS3_DEFAULT_RXQ_REARM_THRESH 64
#define HNS3_UINT8_BIT 8
#define HNS3_UINT16_BIT 16
#define HNS3_UINT32_BIT 32
@ -236,7 +248,13 @@ struct hns3_desc {
uint16_t ot_vlan_tag;
};
};
uint32_t bd_base_info;
union {
uint32_t bd_base_info;
struct {
uint16_t bdtype_vld_udp0;
uint16_t fe_lum_crcp_l3l4p;
};
};
} rx;
};
} __rte_packed;
@ -270,7 +288,8 @@ struct hns3_rx_queue {
uint16_t rx_free_thresh;
uint16_t next_to_use; /* index of next BD to be polled */
uint16_t rx_free_hold; /* num of BDs waited to passed to hardware */
uint16_t rx_rearm_start; /* index of BD that driver re-arming from */
uint16_t rx_rearm_nb; /* number of remaining BDs to be re-armed */
/*
* port based vlan configuration state.
* value range: HNS3_PORT_BASE_VLAN_DISABLE / HNS3_PORT_BASE_VLAN_ENABLE
@ -292,6 +311,11 @@ struct hns3_rx_queue {
struct rte_mbuf *bulk_mbuf[HNS3_BULK_ALLOC_MBUF_NUM];
uint16_t bulk_mbuf_num;
/* offset_table: used for vector, to solve execute re-order problem */
uint8_t offset_table[HNS3_VECTOR_RX_OFFSET_TABLE_LEN + 1];
uint64_t mbuf_initializer; /* value to init mbufs used with vector rx */
struct rte_mbuf fake_mbuf; /* fake mbuf used with vector rx */
};
struct hns3_tx_queue {
@ -554,6 +578,8 @@ int hns3_dev_rx_queue_intr_disable(struct rte_eth_dev *dev, uint16_t queue_id);
void hns3_enable_all_queues(struct hns3_hw *hw, bool en);
int hns3_start_queues(struct hns3_adapter *hns, bool reset_queue);
int hns3_stop_queues(struct hns3_adapter *hns, bool reset_queue);
int hns3_rxq_iterate(struct rte_eth_dev *dev,
int (*callback)(struct hns3_rx_queue *, void *), void *arg);
void hns3_dev_release_mbufs(struct hns3_adapter *hns);
int hns3_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t nb_desc,
unsigned int socket, const struct rte_eth_rxconf *conf,
@ -564,9 +590,12 @@ uint16_t hns3_recv_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t hns3_recv_scattered_pkts(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
uint16_t hns3_recv_pkts_vec(void *rx_queue, struct rte_mbuf **rx_pkts,
uint16_t nb_pkts);
int hns3_rx_burst_mode_get(struct rte_eth_dev *dev,
__rte_unused uint16_t queue_id,
struct rte_eth_burst_mode *mode);
int hns3_rx_check_vec_support(struct rte_eth_dev *dev);
uint16_t hns3_prep_pkts(__rte_unused void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t nb_pkts);
uint16_t hns3_xmit_pkts_simple(void *tx_queue, struct rte_mbuf **tx_pkts,
@ -594,7 +623,9 @@ int hns3_restore_gro_conf(struct hns3_hw *hw);
void hns3_update_all_queues_pvid_state(struct hns3_hw *hw);
void hns3_rx_scattered_reset(struct rte_eth_dev *dev);
void hns3_rx_scattered_calc(struct rte_eth_dev *dev);
int hns3_rx_check_vec_support(struct rte_eth_dev *dev);
int hns3_tx_check_vec_support(struct rte_eth_dev *dev);
void hns3_rxq_vec_setup(struct hns3_rx_queue *rxq);
void hns3_rxq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,
struct rte_eth_rxq_info *qinfo);
void hns3_txq_info_get(struct rte_eth_dev *dev, uint16_t queue_id,

View File

@ -45,3 +45,170 @@ hns3_xmit_pkts_vec(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts)
return nb_tx;
}
static inline void
hns3_rxq_rearm_mbuf(struct hns3_rx_queue *rxq)
{
#define REARM_LOOP_STEP_NUM 4
struct hns3_entry *rxep = &rxq->sw_ring[rxq->rx_rearm_start];
struct hns3_desc *rxdp = rxq->rx_ring + rxq->rx_rearm_start;
uint64_t dma_addr;
int i;
if (unlikely(rte_mempool_get_bulk(rxq->mb_pool, (void *)rxep,
HNS3_DEFAULT_RXQ_REARM_THRESH) < 0)) {
rte_eth_devices[rxq->port_id].data->rx_mbuf_alloc_failed++;
return;
}
for (i = 0; i < HNS3_DEFAULT_RXQ_REARM_THRESH; i += REARM_LOOP_STEP_NUM,
rxep += REARM_LOOP_STEP_NUM, rxdp += REARM_LOOP_STEP_NUM) {
if (likely(i <
HNS3_DEFAULT_RXQ_REARM_THRESH - REARM_LOOP_STEP_NUM)) {
rte_prefetch_non_temporal(rxep[4].mbuf);
rte_prefetch_non_temporal(rxep[5].mbuf);
rte_prefetch_non_temporal(rxep[6].mbuf);
rte_prefetch_non_temporal(rxep[7].mbuf);
}
dma_addr = rte_mbuf_data_iova_default(rxep[0].mbuf);
rxdp[0].addr = rte_cpu_to_le_64(dma_addr);
rxdp[0].rx.bd_base_info = 0;
dma_addr = rte_mbuf_data_iova_default(rxep[1].mbuf);
rxdp[1].addr = rte_cpu_to_le_64(dma_addr);
rxdp[1].rx.bd_base_info = 0;
dma_addr = rte_mbuf_data_iova_default(rxep[2].mbuf);
rxdp[2].addr = rte_cpu_to_le_64(dma_addr);
rxdp[2].rx.bd_base_info = 0;
dma_addr = rte_mbuf_data_iova_default(rxep[3].mbuf);
rxdp[3].addr = rte_cpu_to_le_64(dma_addr);
rxdp[3].rx.bd_base_info = 0;
}
rxq->rx_rearm_start += HNS3_DEFAULT_RXQ_REARM_THRESH;
if (rxq->rx_rearm_start >= rxq->nb_rx_desc)
rxq->rx_rearm_start = 0;
rxq->rx_rearm_nb -= HNS3_DEFAULT_RXQ_REARM_THRESH;
hns3_write_reg_opt(rxq->io_head_reg, HNS3_DEFAULT_RXQ_REARM_THRESH);
}
uint16_t
hns3_recv_pkts_vec(void *__restrict rx_queue,
struct rte_mbuf **__restrict rx_pkts,
uint16_t nb_pkts)
{
struct hns3_rx_queue *rxq = rx_queue;
struct hns3_desc *rxdp = &rxq->rx_ring[rxq->next_to_use];
uint64_t bd_err_mask; /* bit mask indicate whick pkts is error */
uint16_t nb_rx;
nb_pkts = RTE_MIN(nb_pkts, HNS3_DEFAULT_RX_BURST);
nb_pkts = RTE_ALIGN_FLOOR(nb_pkts, HNS3_DEFAULT_DESCS_PER_LOOP);
rte_prefetch_non_temporal(rxdp);
if (rxq->rx_rearm_nb > HNS3_DEFAULT_RXQ_REARM_THRESH)
hns3_rxq_rearm_mbuf(rxq);
if (unlikely(!(rxdp->rx.bd_base_info &
rte_cpu_to_le_32(1u << HNS3_RXD_VLD_B))))
return 0;
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 0].mbuf);
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 1].mbuf);
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 2].mbuf);
rte_prefetch0(rxq->sw_ring[rxq->next_to_use + 3].mbuf);
bd_err_mask = 0;
nb_rx = hns3_recv_burst_vec(rxq, rx_pkts, nb_pkts, &bd_err_mask);
if (unlikely(bd_err_mask))
nb_rx = hns3_rx_reassemble_pkts(rx_pkts, nb_rx, bd_err_mask);
return nb_rx;
}
static void
hns3_rxq_vec_setup_rearm_data(struct hns3_rx_queue *rxq)
{
uintptr_t p;
struct rte_mbuf mb_def = { .buf_addr = 0 }; /* zeroed mbuf */
mb_def.nb_segs = 1;
mb_def.data_off = RTE_PKTMBUF_HEADROOM;
mb_def.port = rxq->port_id;
rte_mbuf_refcnt_set(&mb_def, 1);
/* prevent compiler reordering: rearm_data covers previous fields */
rte_compiler_barrier();
p = (uintptr_t)&mb_def.rearm_data;
rxq->mbuf_initializer = *(uint64_t *)p;
}
void
hns3_rxq_vec_setup(struct hns3_rx_queue *rxq)
{
struct hns3_entry *sw_ring = &rxq->sw_ring[rxq->nb_rx_desc];
unsigned int i;
memset(&rxq->rx_ring[rxq->nb_rx_desc], 0,
sizeof(struct hns3_desc) * HNS3_DEFAULT_RX_BURST);
memset(&rxq->fake_mbuf, 0, sizeof(rxq->fake_mbuf));
for (i = 0; i < HNS3_DEFAULT_RX_BURST; i++)
sw_ring[i].mbuf = &rxq->fake_mbuf;
hns3_rxq_vec_setup_rearm_data(rxq);
memset(rxq->offset_table, 0, sizeof(rxq->offset_table));
}
#ifndef RTE_LIBRTE_IEEE1588
static int
hns3_rxq_vec_check(struct hns3_rx_queue *rxq, void *arg)
{
uint32_t min_vec_bds = HNS3_DEFAULT_RXQ_REARM_THRESH +
HNS3_DEFAULT_RX_BURST;
if (rxq->nb_rx_desc < min_vec_bds)
return -ENOTSUP;
if (rxq->nb_rx_desc % HNS3_DEFAULT_RXQ_REARM_THRESH)
return -ENOTSUP;
RTE_SET_USED(arg);
return 0;
}
#endif
int
hns3_rx_check_vec_support(struct rte_eth_dev *dev)
{
#ifndef RTE_LIBRTE_IEEE1588
struct rte_fdir_conf *fconf = &dev->data->dev_conf.fdir_conf;
struct rte_eth_rxmode *rxmode = &dev->data->dev_conf.rxmode;
uint64_t offloads_mask = DEV_RX_OFFLOAD_TCP_LRO |
DEV_RX_OFFLOAD_VLAN;
if (dev->data->scattered_rx)
return -ENOTSUP;
if (fconf->mode != RTE_FDIR_MODE_NONE)
return -ENOTSUP;
if (rxmode->offloads & offloads_mask)
return -ENOTSUP;
if (hns3_rxq_iterate(dev, hns3_rxq_vec_check, NULL) != 0)
return -ENOTSUP;
return 0;
#else
RTE_SET_USED(dev);
return -ENOTSUP;
#endif
}

View File

@ -54,4 +54,24 @@ hns3_tx_free_buffers(struct hns3_tx_queue *txq)
if (txq->next_to_clean >= txq->nb_tx_desc)
txq->next_to_clean = 0;
}
static inline uint16_t
hns3_rx_reassemble_pkts(struct rte_mbuf **rx_pkts,
uint16_t nb_pkts,
uint64_t pkt_err_mask)
{
uint16_t count, i;
uint64_t mask;
count = 0;
for (i = 0; i < nb_pkts; i++) {
mask = ((uint64_t)1u) << i;
if (pkt_err_mask & mask)
rte_pktmbuf_free_seg(rx_pkts[i]);
else
rx_pkts[count++] = rx_pkts[i];
}
return count;
}
#endif /* _HNS3_RXTX_VEC_H_ */

View File

@ -82,4 +82,207 @@ hns3_xmit_fixed_burst_vec(void *__restrict tx_queue,
return nb_tx;
}
static inline uint32_t
hns3_desc_parse_field(struct hns3_rx_queue *rxq,
struct hns3_entry *sw_ring,
struct hns3_desc *rxdp,
uint32_t bd_vld_num)
{
uint32_t l234_info, ol_info, bd_base_info;
struct rte_mbuf *pkt;
uint32_t retcode = 0;
uint32_t cksum_err;
int ret, i;
for (i = 0; i < (int)bd_vld_num; i++) {
pkt = sw_ring[i].mbuf;
/* init rte_mbuf.rearm_data last 64-bit */
pkt->ol_flags = PKT_RX_RSS_HASH;
l234_info = rxdp[i].rx.l234_info;
ol_info = rxdp[i].rx.ol_info;
bd_base_info = rxdp[i].rx.bd_base_info;
ret = hns3_handle_bdinfo(rxq, pkt, bd_base_info,
l234_info, &cksum_err);
if (unlikely(ret)) {
retcode |= 1u << i;
continue;
}
pkt->packet_type = hns3_rx_calc_ptype(rxq, l234_info, ol_info);
if (likely(bd_base_info & BIT(HNS3_RXD_L3L4P_B)))
hns3_rx_set_cksum_flag(pkt, pkt->packet_type,
cksum_err);
}
return retcode;
}
static inline uint16_t
hns3_recv_burst_vec(struct hns3_rx_queue *__restrict rxq,
struct rte_mbuf **__restrict rx_pkts,
uint16_t nb_pkts,
uint64_t *bd_err_mask)
{
uint16_t rx_id = rxq->next_to_use;
struct hns3_entry *sw_ring = &rxq->sw_ring[rx_id];
struct hns3_desc *rxdp = &rxq->rx_ring[rx_id];
uint32_t bd_valid_num, parse_retcode;
uint16_t nb_rx = 0;
int pos, offset;
/* mask to shuffle from desc to mbuf's rx_descriptor_fields1 */
uint8x16_t shuf_desc_fields_msk = {
0xff, 0xff, 0xff, 0xff, /* packet type init zero */
22, 23, 0xff, 0xff, /* rx.pkt_len to rte_mbuf.pkt_len */
20, 21, /* size to rte_mbuf.data_len */
0xff, 0xff, /* rte_mbuf.vlan_tci init zero */
8, 9, 10, 11, /* rx.rss_hash to rte_mbuf.hash.rss */
};
uint16x8_t crc_adjust = {
0, 0, /* ignore pkt_type field */
rxq->crc_len, /* sub crc on pkt_len */
0, /* ignore high-16bits of pkt_len */
rxq->crc_len, /* sub crc on data_len */
0, 0, 0, /* ignore non-length fields */
};
for (pos = 0; pos < nb_pkts; pos += HNS3_DEFAULT_DESCS_PER_LOOP,
rxdp += HNS3_DEFAULT_DESCS_PER_LOOP) {
uint64x2x2_t descs[HNS3_DEFAULT_DESCS_PER_LOOP];
uint8x16x2_t pkt_mbuf1, pkt_mbuf2, pkt_mbuf3, pkt_mbuf4;
uint8x16_t pkt_mb1, pkt_mb2, pkt_mb3, pkt_mb4;
uint64x2_t mbp1, mbp2;
uint16x4_t bd_vld = {0};
uint16x8_t tmp;
uint64_t stat;
/* calc how many bd valid */
bd_vld = vset_lane_u16(rxdp[0].rx.bdtype_vld_udp0, bd_vld, 0);
bd_vld = vset_lane_u16(rxdp[1].rx.bdtype_vld_udp0, bd_vld, 1);
bd_vld = vset_lane_u16(rxdp[2].rx.bdtype_vld_udp0, bd_vld, 2);
bd_vld = vset_lane_u16(rxdp[3].rx.bdtype_vld_udp0, bd_vld, 3);
/* load 2 mbuf pointer */
mbp1 = vld1q_u64((uint64_t *)&sw_ring[pos]);
bd_vld = vshl_n_u16(bd_vld,
HNS3_UINT16_BIT - 1 - HNS3_RXD_VLD_B);
bd_vld = vreinterpret_u16_s16(
vshr_n_s16(vreinterpret_s16_u16(bd_vld),
HNS3_UINT16_BIT - 1));
stat = ~vget_lane_u64(vreinterpret_u64_u16(bd_vld), 0);
/* load 2 mbuf pointer again */
mbp2 = vld1q_u64((uint64_t *)&sw_ring[pos + 2]);
if (likely(stat == 0))
bd_valid_num = HNS3_DEFAULT_DESCS_PER_LOOP;
else
bd_valid_num = __builtin_ctzl(stat) / HNS3_UINT16_BIT;
if (bd_valid_num == 0)
break;
/* use offset to control below data load oper ordering */
offset = rxq->offset_table[bd_valid_num];
/* store 2 mbuf pointer into rx_pkts */
vst1q_u64((uint64_t *)&rx_pkts[pos], mbp1);
/* read first two descs */
descs[0] = vld2q_u64((uint64_t *)(rxdp + offset));
descs[1] = vld2q_u64((uint64_t *)(rxdp + offset + 1));
/* store 2 mbuf pointer into rx_pkts again */
vst1q_u64((uint64_t *)&rx_pkts[pos + 2], mbp2);
/* read remains two descs */
descs[2] = vld2q_u64((uint64_t *)(rxdp + offset + 2));
descs[3] = vld2q_u64((uint64_t *)(rxdp + offset + 3));
pkt_mbuf1.val[0] = vreinterpretq_u8_u64(descs[0].val[0]);
pkt_mbuf1.val[1] = vreinterpretq_u8_u64(descs[0].val[1]);
pkt_mbuf2.val[0] = vreinterpretq_u8_u64(descs[1].val[0]);
pkt_mbuf2.val[1] = vreinterpretq_u8_u64(descs[1].val[1]);
/* pkt 1,2 convert format from desc to pktmbuf */
pkt_mb1 = vqtbl2q_u8(pkt_mbuf1, shuf_desc_fields_msk);
pkt_mb2 = vqtbl2q_u8(pkt_mbuf2, shuf_desc_fields_msk);
/* store the first 8 bytes of pkt 1,2 mbuf's rearm_data */
*(uint64_t *)&sw_ring[pos + 0].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 1].mbuf->rearm_data =
rxq->mbuf_initializer;
/* pkt 1,2 remove crc */
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb1), crc_adjust);
pkt_mb1 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb2), crc_adjust);
pkt_mb2 = vreinterpretq_u8_u16(tmp);
pkt_mbuf3.val[0] = vreinterpretq_u8_u64(descs[2].val[0]);
pkt_mbuf3.val[1] = vreinterpretq_u8_u64(descs[2].val[1]);
pkt_mbuf4.val[0] = vreinterpretq_u8_u64(descs[3].val[0]);
pkt_mbuf4.val[1] = vreinterpretq_u8_u64(descs[3].val[1]);
/* pkt 3,4 convert format from desc to pktmbuf */
pkt_mb3 = vqtbl2q_u8(pkt_mbuf3, shuf_desc_fields_msk);
pkt_mb4 = vqtbl2q_u8(pkt_mbuf4, shuf_desc_fields_msk);
/* pkt 1,2 save to rx_pkts mbuf */
vst1q_u8((void *)&sw_ring[pos + 0].mbuf->rx_descriptor_fields1,
pkt_mb1);
vst1q_u8((void *)&sw_ring[pos + 1].mbuf->rx_descriptor_fields1,
pkt_mb2);
/* pkt 3,4 remove crc */
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb3), crc_adjust);
pkt_mb3 = vreinterpretq_u8_u16(tmp);
tmp = vsubq_u16(vreinterpretq_u16_u8(pkt_mb4), crc_adjust);
pkt_mb4 = vreinterpretq_u8_u16(tmp);
/* store the first 8 bytes of pkt 3,4 mbuf's rearm_data */
*(uint64_t *)&sw_ring[pos + 2].mbuf->rearm_data =
rxq->mbuf_initializer;
*(uint64_t *)&sw_ring[pos + 3].mbuf->rearm_data =
rxq->mbuf_initializer;
/* pkt 3,4 save to rx_pkts mbuf */
vst1q_u8((void *)&sw_ring[pos + 2].mbuf->rx_descriptor_fields1,
pkt_mb3);
vst1q_u8((void *)&sw_ring[pos + 3].mbuf->rx_descriptor_fields1,
pkt_mb4);
rte_prefetch_non_temporal(rxdp + HNS3_DEFAULT_DESCS_PER_LOOP);
parse_retcode = hns3_desc_parse_field(rxq, &sw_ring[pos],
&rxdp[offset], bd_valid_num);
if (unlikely(parse_retcode))
(*bd_err_mask) |= ((uint64_t)parse_retcode) << pos;
rte_prefetch0(sw_ring[pos +
HNS3_DEFAULT_DESCS_PER_LOOP + 0].mbuf);
rte_prefetch0(sw_ring[pos +
HNS3_DEFAULT_DESCS_PER_LOOP + 1].mbuf);
rte_prefetch0(sw_ring[pos +
HNS3_DEFAULT_DESCS_PER_LOOP + 2].mbuf);
rte_prefetch0(sw_ring[pos +
HNS3_DEFAULT_DESCS_PER_LOOP + 3].mbuf);
nb_rx += bd_valid_num;
if (bd_valid_num < HNS3_DEFAULT_DESCS_PER_LOOP)
break;
}
rxq->rx_rearm_nb += nb_rx;
rxq->next_to_use += nb_rx;
if (rxq->next_to_use >= rxq->nb_rx_desc)
rxq->next_to_use = 0;
return nb_rx;
}
#endif /* _HNS3_RXTX_VEC_NEON_H_ */