net/mlx5: implement vectorized MPRQ burst
MPRQ (Multi-Packet Rx Queue) processes one packet at a time using simple scalar instructions. MPRQ works by posting a single large buffer (consisted of multiple fixed-size strides) in order to receive multiple packets at once on this buffer. A Rx packet is then copied to a user-provided mbuf or PMD attaches the Rx packet to the mbuf by the pointer to an external buffer. There is an opportunity to speed up the packet receiving by processing 4 packets simultaneously using SIMD (single instruction, multiple data) extensions. Allocate mbufs in batches for every MPRQ buffer and process the packets in groups of 4 until all the strides are exhausted. Then switch to another MPRQ buffer and repeat the process over again. The vectorized MPRQ burst routine is engaged automatically in case the mprq_en=1 devarg is specified and the vectorization is not disabled explicitly by providing rx_vec_en=0 devarg. There is a limitation: LRO is not supported and scalar MPRQ is selected if it is on. Signed-off-by: Alexander Kozyrev <akozyrev@nvidia.com> Acked-by: Viacheslav Ovsiienko <viacheslavo@nvidia.com>
This commit is contained in:
parent
1ded26239a
commit
0f20acbf5e
@ -437,10 +437,17 @@ mlx5_rxq_create_devx_cq_resources(struct rte_eth_dev *dev, uint16_t idx)
|
||||
if (priv->config.cqe_comp && !rxq_data->hw_timestamp &&
|
||||
!rxq_data->lro) {
|
||||
cq_attr.cqe_comp_en = 1u;
|
||||
cq_attr.mini_cqe_res_format =
|
||||
mlx5_rxq_mprq_enabled(rxq_data) ?
|
||||
MLX5_CQE_RESP_FORMAT_CSUM_STRIDX :
|
||||
MLX5_CQE_RESP_FORMAT_HASH;
|
||||
/*
|
||||
* Select CSUM miniCQE format only for non-vectorized MPRQ
|
||||
* Rx burst, use HASH miniCQE format for everything else.
|
||||
*/
|
||||
if (mlx5_rxq_check_vec_support(rxq_data) < 0 &&
|
||||
mlx5_rxq_mprq_enabled(rxq_data))
|
||||
cq_attr.mini_cqe_res_format =
|
||||
MLX5_CQE_RESP_FORMAT_CSUM_STRIDX;
|
||||
else
|
||||
cq_attr.mini_cqe_res_format =
|
||||
MLX5_CQE_RESP_FORMAT_HASH;
|
||||
/*
|
||||
* For vectorized Rx, it must not be doubled in order to
|
||||
* make cq_ci and rq_ci aligned.
|
||||
|
@ -427,7 +427,8 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
|
||||
|
||||
if (dev->rx_pkt_burst == mlx5_rx_burst ||
|
||||
dev->rx_pkt_burst == mlx5_rx_burst_mprq ||
|
||||
dev->rx_pkt_burst == mlx5_rx_burst_vec)
|
||||
dev->rx_pkt_burst == mlx5_rx_burst_vec ||
|
||||
dev->rx_pkt_burst == mlx5_rx_burst_mprq_vec)
|
||||
return ptypes;
|
||||
return NULL;
|
||||
}
|
||||
@ -486,11 +487,22 @@ mlx5_select_rx_function(struct rte_eth_dev *dev)
|
||||
|
||||
MLX5_ASSERT(dev != NULL);
|
||||
if (mlx5_check_vec_rx_support(dev) > 0) {
|
||||
rx_pkt_burst = mlx5_rx_burst_vec;
|
||||
DRV_LOG(DEBUG, "port %u selected Rx vectorized function",
|
||||
dev->data->port_id);
|
||||
if (mlx5_mprq_enabled(dev)) {
|
||||
rx_pkt_burst = mlx5_rx_burst_mprq_vec;
|
||||
DRV_LOG(DEBUG, "port %u selected vectorized"
|
||||
" MPRQ Rx function", dev->data->port_id);
|
||||
} else {
|
||||
rx_pkt_burst = mlx5_rx_burst_vec;
|
||||
DRV_LOG(DEBUG, "port %u selected vectorized"
|
||||
" SPRQ Rx function", dev->data->port_id);
|
||||
}
|
||||
} else if (mlx5_mprq_enabled(dev)) {
|
||||
rx_pkt_burst = mlx5_rx_burst_mprq;
|
||||
DRV_LOG(DEBUG, "port %u selected MPRQ Rx function",
|
||||
dev->data->port_id);
|
||||
} else {
|
||||
DRV_LOG(DEBUG, "port %u selected SPRQ Rx function",
|
||||
dev->data->port_id);
|
||||
}
|
||||
return rx_pkt_burst;
|
||||
}
|
||||
|
@ -173,7 +173,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
rxq->mprq_repl = buf;
|
||||
}
|
||||
DRV_LOG(DEBUG,
|
||||
"port %u Rx queue %u allocated and configured %u segments",
|
||||
"port %u MPRQ queue %u allocated and configured %u segments",
|
||||
rxq->port_id, rxq->idx, wqe_n);
|
||||
return 0;
|
||||
error:
|
||||
@ -185,7 +185,7 @@ rxq_alloc_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
(*rxq->mprq_bufs)[i]);
|
||||
(*rxq->mprq_bufs)[i] = NULL;
|
||||
}
|
||||
DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
|
||||
DRV_LOG(DEBUG, "port %u MPRQ queue %u failed, freed everything",
|
||||
rxq->port_id, rxq->idx);
|
||||
rte_errno = err; /* Restore rte_errno. */
|
||||
return -rte_errno;
|
||||
@ -204,7 +204,9 @@ static int
|
||||
rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
{
|
||||
const unsigned int sges_n = 1 << rxq_ctrl->rxq.sges_n;
|
||||
unsigned int elts_n = 1 << rxq_ctrl->rxq.elts_n;
|
||||
unsigned int elts_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
|
||||
(1 << rxq_ctrl->rxq.elts_n) * (1 << rxq_ctrl->rxq.strd_num_n) :
|
||||
(1 << rxq_ctrl->rxq.elts_n);
|
||||
unsigned int i;
|
||||
int err;
|
||||
|
||||
@ -262,7 +264,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
(*rxq->elts)[elts_n + j] = &rxq->fake_mbuf;
|
||||
}
|
||||
DRV_LOG(DEBUG,
|
||||
"port %u Rx queue %u allocated and configured %u segments"
|
||||
"port %u SPRQ queue %u allocated and configured %u segments"
|
||||
" (max %u packets)",
|
||||
PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx, elts_n,
|
||||
elts_n / (1 << rxq_ctrl->rxq.sges_n));
|
||||
@ -275,7 +277,7 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
rte_pktmbuf_free_seg((*rxq_ctrl->rxq.elts)[i]);
|
||||
(*rxq_ctrl->rxq.elts)[i] = NULL;
|
||||
}
|
||||
DRV_LOG(DEBUG, "port %u Rx queue %u failed, freed everything",
|
||||
DRV_LOG(DEBUG, "port %u SPRQ queue %u failed, freed everything",
|
||||
PORT_ID(rxq_ctrl->priv), rxq_ctrl->rxq.idx);
|
||||
rte_errno = err; /* Restore rte_errno. */
|
||||
return -rte_errno;
|
||||
@ -293,8 +295,15 @@ rxq_alloc_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
int
|
||||
rxq_alloc_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
{
|
||||
return mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
|
||||
rxq_alloc_elts_mprq(rxq_ctrl) : rxq_alloc_elts_sprq(rxq_ctrl);
|
||||
int ret = 0;
|
||||
|
||||
/**
|
||||
* For MPRQ we need to allocate both MPRQ buffers
|
||||
* for WQEs and simple mbufs for vector processing.
|
||||
*/
|
||||
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
|
||||
ret = rxq_alloc_elts_mprq(rxq_ctrl);
|
||||
return (ret || rxq_alloc_elts_sprq(rxq_ctrl));
|
||||
}
|
||||
|
||||
/**
|
||||
@ -309,11 +318,10 @@ rxq_free_elts_mprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
|
||||
uint16_t i;
|
||||
|
||||
DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing WRs",
|
||||
rxq->port_id, rxq->idx);
|
||||
DRV_LOG(DEBUG, "port %u Multi-Packet Rx queue %u freeing %d WRs",
|
||||
rxq->port_id, rxq->idx, (1u << rxq->elts_n));
|
||||
if (rxq->mprq_bufs == NULL)
|
||||
return;
|
||||
MLX5_ASSERT(mlx5_rxq_check_vec_support(rxq) < 0);
|
||||
for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
|
||||
if ((*rxq->mprq_bufs)[i] != NULL)
|
||||
mlx5_mprq_buf_free((*rxq->mprq_bufs)[i]);
|
||||
@ -335,25 +343,27 @@ static void
|
||||
rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
{
|
||||
struct mlx5_rxq_data *rxq = &rxq_ctrl->rxq;
|
||||
const uint16_t q_n = (1 << rxq->elts_n);
|
||||
const uint16_t q_n = mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq) ?
|
||||
(1 << rxq->elts_n) * (1 << rxq->strd_num_n) :
|
||||
(1 << rxq->elts_n);
|
||||
const uint16_t q_mask = q_n - 1;
|
||||
uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
|
||||
uint16_t i;
|
||||
|
||||
DRV_LOG(DEBUG, "port %u Rx queue %u freeing WRs",
|
||||
PORT_ID(rxq_ctrl->priv), rxq->idx);
|
||||
DRV_LOG(DEBUG, "port %u Rx queue %u freeing %d WRs",
|
||||
PORT_ID(rxq_ctrl->priv), rxq->idx, q_n);
|
||||
if (rxq->elts == NULL)
|
||||
return;
|
||||
/**
|
||||
* Some mbuf in the Ring belongs to the application. They cannot be
|
||||
* freed.
|
||||
* Some mbuf in the Ring belongs to the application.
|
||||
* They cannot be freed.
|
||||
*/
|
||||
if (mlx5_rxq_check_vec_support(rxq) > 0) {
|
||||
for (i = 0; i < used; ++i)
|
||||
(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = NULL;
|
||||
rxq->rq_pi = rxq->rq_ci;
|
||||
}
|
||||
for (i = 0; (i != (1u << rxq->elts_n)); ++i) {
|
||||
for (i = 0; i != q_n; ++i) {
|
||||
if ((*rxq->elts)[i] != NULL)
|
||||
rte_pktmbuf_free_seg((*rxq->elts)[i]);
|
||||
(*rxq->elts)[i] = NULL;
|
||||
@ -369,10 +379,13 @@ rxq_free_elts_sprq(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
static void
|
||||
rxq_free_elts(struct mlx5_rxq_ctrl *rxq_ctrl)
|
||||
{
|
||||
/*
|
||||
* For MPRQ we need to allocate both MPRQ buffers
|
||||
* for WQEs and simple mbufs for vector processing.
|
||||
*/
|
||||
if (mlx5_rxq_mprq_enabled(&rxq_ctrl->rxq))
|
||||
rxq_free_elts_mprq(rxq_ctrl);
|
||||
else
|
||||
rxq_free_elts_sprq(rxq_ctrl);
|
||||
rxq_free_elts_sprq(rxq_ctrl);
|
||||
}
|
||||
|
||||
/**
|
||||
@ -1334,20 +1347,10 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
struct mlx5_priv *priv = dev->data->dev_private;
|
||||
struct mlx5_rxq_ctrl *tmpl;
|
||||
unsigned int mb_len = rte_pktmbuf_data_room_size(mp);
|
||||
unsigned int mprq_stride_nums;
|
||||
unsigned int mprq_stride_size;
|
||||
unsigned int mprq_stride_cap;
|
||||
struct mlx5_dev_config *config = &priv->config;
|
||||
/*
|
||||
* Always allocate extra slots, even if eventually
|
||||
* the vector Rx will not be used.
|
||||
*/
|
||||
uint16_t desc_n =
|
||||
desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
|
||||
uint64_t offloads = conf->offloads |
|
||||
dev->data->dev_conf.rxmode.offloads;
|
||||
unsigned int lro_on_queue = !!(offloads & DEV_RX_OFFLOAD_TCP_LRO);
|
||||
const int mprq_en = mlx5_check_mprq_support(dev) > 0;
|
||||
unsigned int max_rx_pkt_len = lro_on_queue ?
|
||||
dev->data->dev_conf.rxmode.max_lro_pkt_size :
|
||||
dev->data->dev_conf.rxmode.max_rx_pkt_len;
|
||||
@ -1355,6 +1358,21 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
RTE_PKTMBUF_HEADROOM;
|
||||
unsigned int max_lro_size = 0;
|
||||
unsigned int first_mb_free_size = mb_len - RTE_PKTMBUF_HEADROOM;
|
||||
const int mprq_en = mlx5_check_mprq_support(dev) > 0;
|
||||
unsigned int mprq_stride_nums = config->mprq.stride_num_n ?
|
||||
config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
|
||||
unsigned int mprq_stride_size = non_scatter_min_mbuf_size <=
|
||||
(1U << config->mprq.max_stride_size_n) ?
|
||||
log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
|
||||
unsigned int mprq_stride_cap = (config->mprq.stride_num_n ?
|
||||
(1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
|
||||
(config->mprq.stride_size_n ?
|
||||
(1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
|
||||
/*
|
||||
* Always allocate extra slots, even if eventually
|
||||
* the vector Rx will not be used.
|
||||
*/
|
||||
uint16_t desc_n = desc + config->rx_vec_en * MLX5_VPMD_DESCS_PER_LOOP;
|
||||
|
||||
if (non_scatter_min_mbuf_size > mb_len && !(offloads &
|
||||
DEV_RX_OFFLOAD_SCATTER)) {
|
||||
@ -1366,8 +1384,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
rte_errno = ENOSPC;
|
||||
return NULL;
|
||||
}
|
||||
tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO, sizeof(*tmpl) +
|
||||
desc_n * sizeof(struct rte_mbuf *), 0, socket);
|
||||
tmpl = mlx5_malloc(MLX5_MEM_RTE | MLX5_MEM_ZERO,
|
||||
sizeof(*tmpl) + desc_n * sizeof(struct rte_mbuf *) +
|
||||
(desc >> mprq_stride_nums) * sizeof(struct mlx5_mprq_buf *),
|
||||
0, socket);
|
||||
|
||||
if (!tmpl) {
|
||||
rte_errno = ENOMEM;
|
||||
return NULL;
|
||||
@ -1381,15 +1402,6 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
tmpl->socket = socket;
|
||||
if (dev->data->dev_conf.intr_conf.rxq)
|
||||
tmpl->irq = 1;
|
||||
mprq_stride_nums = config->mprq.stride_num_n ?
|
||||
config->mprq.stride_num_n : MLX5_MPRQ_STRIDE_NUM_N;
|
||||
mprq_stride_size = non_scatter_min_mbuf_size <=
|
||||
(1U << config->mprq.max_stride_size_n) ?
|
||||
log2above(non_scatter_min_mbuf_size) : MLX5_MPRQ_STRIDE_SIZE_N;
|
||||
mprq_stride_cap = (config->mprq.stride_num_n ?
|
||||
(1U << config->mprq.stride_num_n) : (1U << mprq_stride_nums)) *
|
||||
(config->mprq.stride_size_n ?
|
||||
(1U << config->mprq.stride_size_n) : (1U << mprq_stride_size));
|
||||
/*
|
||||
* This Rx queue can be configured as a Multi-Packet RQ if all of the
|
||||
* following conditions are met:
|
||||
@ -1543,9 +1555,11 @@ mlx5_rxq_new(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
tmpl->rxq.mp = mp;
|
||||
tmpl->rxq.elts_n = log2above(desc);
|
||||
tmpl->rxq.rq_repl_thresh =
|
||||
MLX5_VPMD_RXQ_RPLNSH_THRESH(1 << tmpl->rxq.elts_n);
|
||||
MLX5_VPMD_RXQ_RPLNSH_THRESH(desc_n);
|
||||
tmpl->rxq.elts =
|
||||
(struct rte_mbuf *(*)[1 << tmpl->rxq.elts_n])(tmpl + 1);
|
||||
(struct rte_mbuf *(*)[desc_n])(tmpl + 1);
|
||||
tmpl->rxq.mprq_bufs =
|
||||
(struct mlx5_mprq_buf *(*)[desc])(*tmpl->rxq.elts + desc_n);
|
||||
#ifndef RTE_ARCH_64
|
||||
tmpl->rxq.uar_lock_cq = &priv->sh->uar_lock_cq;
|
||||
#endif
|
||||
|
@ -19,12 +19,12 @@
|
||||
#include <mlx5_prm.h>
|
||||
#include <mlx5_common.h>
|
||||
|
||||
#include "mlx5_autoconf.h"
|
||||
#include "mlx5_defs.h"
|
||||
#include "mlx5.h"
|
||||
#include "mlx5_mr.h"
|
||||
#include "mlx5_utils.h"
|
||||
#include "mlx5_rxtx.h"
|
||||
#include "mlx5_autoconf.h"
|
||||
|
||||
/* TX burst subroutines return codes. */
|
||||
enum mlx5_txcmp_code {
|
||||
@ -93,10 +93,6 @@ static __rte_always_inline void
|
||||
rxq_cq_to_mbuf(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt,
|
||||
volatile struct mlx5_cqe *cqe, uint32_t rss_hash_res);
|
||||
|
||||
static __rte_always_inline void
|
||||
mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
|
||||
const unsigned int strd_n);
|
||||
|
||||
static int
|
||||
mlx5_queue_state_modify(struct rte_eth_dev *dev,
|
||||
struct mlx5_mp_arg_queue_state_modify *sm);
|
||||
@ -584,7 +580,14 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
|
||||
struct rte_eth_burst_mode *mode)
|
||||
{
|
||||
eth_rx_burst_t pkt_burst = dev->rx_pkt_burst;
|
||||
struct mlx5_priv *priv = dev->data->dev_private;
|
||||
struct mlx5_rxq_data *rxq;
|
||||
|
||||
rxq = (*priv->rxqs)[rx_queue_id];
|
||||
if (!rxq) {
|
||||
rte_errno = EINVAL;
|
||||
return -rte_errno;
|
||||
}
|
||||
if (pkt_burst == mlx5_rx_burst) {
|
||||
snprintf(mode->info, sizeof(mode->info), "%s", "Scalar");
|
||||
} else if (pkt_burst == mlx5_rx_burst_mprq) {
|
||||
@ -598,6 +601,16 @@ mlx5_rx_burst_mode_get(struct rte_eth_dev *dev,
|
||||
snprintf(mode->info, sizeof(mode->info), "%s", "Vector AltiVec");
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
} else if (pkt_burst == mlx5_rx_burst_mprq_vec) {
|
||||
#if defined RTE_ARCH_X86_64
|
||||
snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector SSE");
|
||||
#elif defined RTE_ARCH_ARM64
|
||||
snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector Neon");
|
||||
#elif defined RTE_ARCH_PPC_64
|
||||
snprintf(mode->info, sizeof(mode->info), "%s", "MPRQ Vector AltiVec");
|
||||
#else
|
||||
return -EINVAL;
|
||||
#endif
|
||||
} else {
|
||||
return -EINVAL;
|
||||
@ -866,6 +879,8 @@ mlx5_rxq_initialize(struct mlx5_rxq_data *rxq)
|
||||
rxq->zip = (struct rxq_zip){
|
||||
.ai = 0,
|
||||
};
|
||||
rxq->elts_ci = mlx5_rxq_mprq_enabled(rxq) ?
|
||||
(wqe_n >> rxq->sges_n) * (1 << rxq->strd_num_n) : 0;
|
||||
/* Update doorbell counter. */
|
||||
rxq->rq_ci = wqe_n >> rxq->sges_n;
|
||||
rte_io_wmb();
|
||||
@ -969,7 +984,8 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
|
||||
{
|
||||
const uint16_t cqe_n = 1 << rxq->cqe_n;
|
||||
const uint16_t cqe_mask = cqe_n - 1;
|
||||
const unsigned int wqe_n = 1 << rxq->elts_n;
|
||||
const uint16_t wqe_n = 1 << rxq->elts_n;
|
||||
const uint16_t strd_n = 1 << rxq->strd_num_n;
|
||||
struct mlx5_rxq_ctrl *rxq_ctrl =
|
||||
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
|
||||
union {
|
||||
@ -1033,21 +1049,27 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
|
||||
&sm))
|
||||
return -1;
|
||||
if (vec) {
|
||||
const uint16_t q_mask = wqe_n - 1;
|
||||
uint16_t elt_idx;
|
||||
const uint32_t elts_n =
|
||||
mlx5_rxq_mprq_enabled(rxq) ?
|
||||
wqe_n * strd_n : wqe_n;
|
||||
const uint32_t e_mask = elts_n - 1;
|
||||
uint32_t elts_ci =
|
||||
mlx5_rxq_mprq_enabled(rxq) ?
|
||||
rxq->elts_ci : rxq->rq_ci;
|
||||
uint32_t elt_idx;
|
||||
struct rte_mbuf **elt;
|
||||
int i;
|
||||
unsigned int n = wqe_n - (rxq->rq_ci -
|
||||
unsigned int n = elts_n - (elts_ci -
|
||||
rxq->rq_pi);
|
||||
|
||||
for (i = 0; i < (int)n; ++i) {
|
||||
elt_idx = (rxq->rq_ci + i) & q_mask;
|
||||
elt_idx = (elts_ci + i) & e_mask;
|
||||
elt = &(*rxq->elts)[elt_idx];
|
||||
*elt = rte_mbuf_raw_alloc(rxq->mp);
|
||||
if (!*elt) {
|
||||
for (i--; i >= 0; --i) {
|
||||
elt_idx = (rxq->rq_ci +
|
||||
i) & q_mask;
|
||||
elt_idx = (elts_ci +
|
||||
i) & elts_n;
|
||||
elt = &(*rxq->elts)
|
||||
[elt_idx];
|
||||
rte_pktmbuf_free_seg
|
||||
@ -1056,7 +1078,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
for (i = 0; i < (int)wqe_n; ++i) {
|
||||
for (i = 0; i < (int)elts_n; ++i) {
|
||||
elt = &(*rxq->elts)[i];
|
||||
DATA_LEN(*elt) =
|
||||
(uint16_t)((*elt)->buf_len -
|
||||
@ -1064,7 +1086,7 @@ mlx5_rx_err_handle(struct mlx5_rxq_data *rxq, uint8_t vec)
|
||||
}
|
||||
/* Padding with a fake mbuf for vec Rx. */
|
||||
for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
|
||||
(*rxq->elts)[wqe_n + i] =
|
||||
(*rxq->elts)[elts_n + i] =
|
||||
&rxq->fake_mbuf;
|
||||
}
|
||||
mlx5_rxq_initialize(rxq);
|
||||
@ -1545,31 +1567,6 @@ mlx5_mprq_buf_free(struct mlx5_mprq_buf *buf)
|
||||
mlx5_mprq_buf_free_cb(NULL, buf);
|
||||
}
|
||||
|
||||
static inline void
|
||||
mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx,
|
||||
const unsigned int strd_n)
|
||||
{
|
||||
struct mlx5_mprq_buf *rep = rxq->mprq_repl;
|
||||
volatile struct mlx5_wqe_data_seg *wqe =
|
||||
&((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
|
||||
void *addr;
|
||||
|
||||
MLX5_ASSERT(rep != NULL);
|
||||
/* Replace MPRQ buf. */
|
||||
(*rxq->mprq_bufs)[rq_idx] = rep;
|
||||
/* Replace WQE. */
|
||||
addr = mlx5_mprq_buf_addr(rep, strd_n);
|
||||
wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
|
||||
/* If there's only one MR, no need to replace LKey in WQE. */
|
||||
if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
|
||||
wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
|
||||
/* Stash a mbuf for next replacement. */
|
||||
if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
|
||||
rxq->mprq_repl = rep;
|
||||
else
|
||||
rxq->mprq_repl = NULL;
|
||||
}
|
||||
|
||||
/**
|
||||
* DPDK callback for RX with Multi-Packet RQ support.
|
||||
*
|
||||
@ -1587,12 +1584,9 @@ uint16_t
|
||||
mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
struct mlx5_rxq_data *rxq = dpdk_rxq;
|
||||
const unsigned int strd_n = 1 << rxq->strd_num_n;
|
||||
const unsigned int strd_sz = 1 << rxq->strd_sz_n;
|
||||
const unsigned int strd_shift =
|
||||
MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
|
||||
const unsigned int cq_mask = (1 << rxq->cqe_n) - 1;
|
||||
const unsigned int wq_mask = (1 << rxq->elts_n) - 1;
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
const uint32_t cq_mask = (1 << rxq->cqe_n) - 1;
|
||||
const uint32_t wq_mask = (1 << rxq->elts_n) - 1;
|
||||
volatile struct mlx5_cqe *cqe = &(*rxq->cqes)[rxq->cq_ci & cq_mask];
|
||||
unsigned int i = 0;
|
||||
uint32_t rq_ci = rxq->rq_ci;
|
||||
@ -1601,37 +1595,18 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
|
||||
while (i < pkts_n) {
|
||||
struct rte_mbuf *pkt;
|
||||
void *addr;
|
||||
int ret;
|
||||
uint32_t len;
|
||||
uint16_t strd_cnt;
|
||||
uint16_t strd_idx;
|
||||
uint32_t offset;
|
||||
uint32_t byte_cnt;
|
||||
int32_t hdrm_overlap;
|
||||
volatile struct mlx5_mini_cqe8 *mcqe = NULL;
|
||||
uint32_t rss_hash_res = 0;
|
||||
enum mlx5_rqx_code rxq_code;
|
||||
|
||||
if (consumed_strd == strd_n) {
|
||||
/* Replace WQE only if the buffer is still in use. */
|
||||
if (__atomic_load_n(&buf->refcnt,
|
||||
__ATOMIC_RELAXED) > 1) {
|
||||
mprq_buf_replace(rxq, rq_ci & wq_mask, strd_n);
|
||||
/* Release the old buffer. */
|
||||
mlx5_mprq_buf_free(buf);
|
||||
} else if (unlikely(rxq->mprq_repl == NULL)) {
|
||||
struct mlx5_mprq_buf *rep;
|
||||
|
||||
/*
|
||||
* Currently, the MPRQ mempool is out of buffer
|
||||
* and doing memcpy regardless of the size of Rx
|
||||
* packet. Retry allocation to get back to
|
||||
* normal.
|
||||
*/
|
||||
if (!rte_mempool_get(rxq->mprq_mp,
|
||||
(void **)&rep))
|
||||
rxq->mprq_repl = rep;
|
||||
}
|
||||
/* Replace WQE if the buffer is still in use. */
|
||||
mprq_buf_replace(rxq, rq_ci & wq_mask);
|
||||
/* Advance to the next WQE. */
|
||||
consumed_strd = 0;
|
||||
++rq_ci;
|
||||
@ -1667,122 +1642,23 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
MLX5_ASSERT((int)len >= (rxq->crc_present << 2));
|
||||
if (rxq->crc_present)
|
||||
len -= RTE_ETHER_CRC_LEN;
|
||||
offset = strd_idx * strd_sz + strd_shift;
|
||||
addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
|
||||
hdrm_overlap = len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
|
||||
/*
|
||||
* Memcpy packets to the target mbuf if:
|
||||
* - The size of packet is smaller than mprq_max_memcpy_len.
|
||||
* - Out of buffer in the Mempool for Multi-Packet RQ.
|
||||
* - The packet's stride overlaps a headroom and scatter is off.
|
||||
*/
|
||||
if (len <= rxq->mprq_max_memcpy_len ||
|
||||
rxq->mprq_repl == NULL ||
|
||||
(hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
|
||||
if (likely(rte_pktmbuf_tailroom(pkt) >= len)) {
|
||||
rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
|
||||
addr, len);
|
||||
DATA_LEN(pkt) = len;
|
||||
} else if (rxq->strd_scatter_en) {
|
||||
struct rte_mbuf *prev = pkt;
|
||||
uint32_t seg_len =
|
||||
RTE_MIN(rte_pktmbuf_tailroom(pkt), len);
|
||||
uint32_t rem_len = len - seg_len;
|
||||
|
||||
rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
|
||||
addr, seg_len);
|
||||
DATA_LEN(pkt) = seg_len;
|
||||
while (rem_len) {
|
||||
struct rte_mbuf *next =
|
||||
rte_pktmbuf_alloc(rxq->mp);
|
||||
|
||||
if (unlikely(next == NULL)) {
|
||||
rte_pktmbuf_free(pkt);
|
||||
++rxq->stats.rx_nombuf;
|
||||
goto out;
|
||||
}
|
||||
NEXT(prev) = next;
|
||||
SET_DATA_OFF(next, 0);
|
||||
addr = RTE_PTR_ADD(addr, seg_len);
|
||||
seg_len = RTE_MIN
|
||||
(rte_pktmbuf_tailroom(next),
|
||||
rem_len);
|
||||
rte_memcpy
|
||||
(rte_pktmbuf_mtod(next, void *),
|
||||
addr, seg_len);
|
||||
DATA_LEN(next) = seg_len;
|
||||
rem_len -= seg_len;
|
||||
prev = next;
|
||||
++NB_SEGS(pkt);
|
||||
}
|
||||
} else {
|
||||
rte_pktmbuf_free_seg(pkt);
|
||||
rxq_code = mprq_buf_to_pkt(rxq, pkt, len, buf,
|
||||
strd_idx, strd_cnt);
|
||||
if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
|
||||
rte_pktmbuf_free_seg(pkt);
|
||||
if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
|
||||
++rxq->stats.idropped;
|
||||
continue;
|
||||
}
|
||||
} else {
|
||||
rte_iova_t buf_iova;
|
||||
struct rte_mbuf_ext_shared_info *shinfo;
|
||||
uint16_t buf_len = strd_cnt * strd_sz;
|
||||
void *buf_addr;
|
||||
|
||||
/* Increment the refcnt of the whole chunk. */
|
||||
__atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
|
||||
MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
|
||||
__ATOMIC_RELAXED) <= strd_n + 1);
|
||||
buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
|
||||
/*
|
||||
* MLX5 device doesn't use iova but it is necessary in a
|
||||
* case where the Rx packet is transmitted via a
|
||||
* different PMD.
|
||||
*/
|
||||
buf_iova = rte_mempool_virt2iova(buf) +
|
||||
RTE_PTR_DIFF(buf_addr, buf);
|
||||
shinfo = &buf->shinfos[strd_idx];
|
||||
rte_mbuf_ext_refcnt_set(shinfo, 1);
|
||||
/*
|
||||
* EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
|
||||
* attaching the stride to mbuf and more offload flags
|
||||
* will be added below by calling rxq_cq_to_mbuf().
|
||||
* Other fields will be overwritten.
|
||||
*/
|
||||
rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
|
||||
buf_len, shinfo);
|
||||
/* Set mbuf head-room. */
|
||||
SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
|
||||
MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
|
||||
MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
|
||||
len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
|
||||
DATA_LEN(pkt) = len;
|
||||
/*
|
||||
* Copy the last fragment of a packet (up to headroom
|
||||
* size bytes) in case there is a stride overlap with
|
||||
* a next packet's headroom. Allocate a separate mbuf
|
||||
* to store this fragment and link it. Scatter is on.
|
||||
*/
|
||||
if (hdrm_overlap > 0) {
|
||||
MLX5_ASSERT(rxq->strd_scatter_en);
|
||||
struct rte_mbuf *seg =
|
||||
rte_pktmbuf_alloc(rxq->mp);
|
||||
|
||||
if (unlikely(seg == NULL)) {
|
||||
rte_pktmbuf_free_seg(pkt);
|
||||
++rxq->stats.rx_nombuf;
|
||||
break;
|
||||
}
|
||||
SET_DATA_OFF(seg, 0);
|
||||
rte_memcpy(rte_pktmbuf_mtod(seg, void *),
|
||||
RTE_PTR_ADD(addr, len - hdrm_overlap),
|
||||
hdrm_overlap);
|
||||
DATA_LEN(seg) = hdrm_overlap;
|
||||
DATA_LEN(pkt) = len - hdrm_overlap;
|
||||
NEXT(pkt) = seg;
|
||||
NB_SEGS(pkt) = 2;
|
||||
if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
|
||||
++rxq->stats.rx_nombuf;
|
||||
break;
|
||||
}
|
||||
}
|
||||
rxq_cq_to_mbuf(rxq, pkt, cqe, rss_hash_res);
|
||||
if (cqe->lro_num_seg > 1) {
|
||||
mlx5_lro_update_hdr(addr, cqe, len);
|
||||
mlx5_lro_update_hdr(rte_pktmbuf_mtod(pkt, uint8_t *),
|
||||
cqe, len);
|
||||
pkt->ol_flags |= PKT_RX_LRO;
|
||||
pkt->tso_segsz = len / cqe->lro_num_seg;
|
||||
}
|
||||
@ -1796,7 +1672,6 @@ mlx5_rx_burst_mprq(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
*(pkts++) = pkt;
|
||||
++i;
|
||||
}
|
||||
out:
|
||||
/* Update the consumer indexes. */
|
||||
rxq->consumed_strd = consumed_strd;
|
||||
rte_io_wmb();
|
||||
@ -1878,6 +1753,14 @@ mlx5_rx_burst_vec(void *dpdk_txq __rte_unused,
|
||||
return 0;
|
||||
}
|
||||
|
||||
__rte_weak uint16_t
|
||||
mlx5_rx_burst_mprq_vec(void *dpdk_txq __rte_unused,
|
||||
struct rte_mbuf **pkts __rte_unused,
|
||||
uint16_t pkts_n __rte_unused)
|
||||
{
|
||||
return 0;
|
||||
}
|
||||
|
||||
__rte_weak int
|
||||
mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq __rte_unused)
|
||||
{
|
||||
|
@ -30,6 +30,7 @@
|
||||
#include "mlx5_utils.h"
|
||||
#include "mlx5.h"
|
||||
#include "mlx5_autoconf.h"
|
||||
#include "mlx5_mr.h"
|
||||
|
||||
/* Support tunnel matching. */
|
||||
#define MLX5_FLOW_TUNNEL 10
|
||||
@ -94,6 +95,12 @@ enum mlx5_rxq_err_state {
|
||||
MLX5_RXQ_ERR_STATE_NEED_READY,
|
||||
};
|
||||
|
||||
enum mlx5_rqx_code {
|
||||
MLX5_RXQ_CODE_EXIT = 0,
|
||||
MLX5_RXQ_CODE_NOMBUF,
|
||||
MLX5_RXQ_CODE_DROPPED,
|
||||
};
|
||||
|
||||
/* RX queue descriptor. */
|
||||
struct mlx5_rxq_data {
|
||||
unsigned int csum:1; /* Enable checksum offloading. */
|
||||
@ -116,6 +123,7 @@ struct mlx5_rxq_data {
|
||||
volatile uint32_t *rq_db;
|
||||
volatile uint32_t *cq_db;
|
||||
uint16_t port_id;
|
||||
uint32_t elts_ci;
|
||||
uint32_t rq_ci;
|
||||
uint16_t consumed_strd; /* Number of consumed strides in WQE. */
|
||||
uint32_t rq_pi;
|
||||
@ -130,11 +138,8 @@ struct mlx5_rxq_data {
|
||||
uint16_t mprq_max_memcpy_len; /* Maximum size of packet to memcpy. */
|
||||
volatile void *wqes;
|
||||
volatile struct mlx5_cqe(*cqes)[];
|
||||
RTE_STD_C11
|
||||
union {
|
||||
struct rte_mbuf *(*elts)[];
|
||||
struct mlx5_mprq_buf *(*mprq_bufs)[];
|
||||
};
|
||||
struct rte_mbuf *(*elts)[];
|
||||
struct mlx5_mprq_buf *(*mprq_bufs)[];
|
||||
struct rte_mempool *mp;
|
||||
struct rte_mempool *mprq_mp; /* Mempool for Multi-Packet RQ. */
|
||||
struct mlx5_mprq_buf *mprq_repl; /* Stashed mbuf for replenish. */
|
||||
@ -423,6 +428,8 @@ int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
|
||||
int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
|
||||
uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
|
||||
uint16_t pkts_n);
|
||||
uint16_t mlx5_rx_burst_mprq_vec(void *dpdk_txq, struct rte_mbuf **pkts,
|
||||
uint16_t pkts_n);
|
||||
|
||||
/* mlx5_mr.c */
|
||||
|
||||
@ -700,4 +707,187 @@ mlx5_timestamp_set(struct rte_mbuf *mbuf, int offset,
|
||||
*RTE_MBUF_DYNFIELD(mbuf, offset, rte_mbuf_timestamp_t *) = timestamp;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replace MPRQ buffer.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to Rx queue structure.
|
||||
* @param rq_idx
|
||||
* RQ index to replace.
|
||||
*/
|
||||
static __rte_always_inline void
|
||||
mprq_buf_replace(struct mlx5_rxq_data *rxq, uint16_t rq_idx)
|
||||
{
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
struct mlx5_mprq_buf *rep = rxq->mprq_repl;
|
||||
volatile struct mlx5_wqe_data_seg *wqe =
|
||||
&((volatile struct mlx5_wqe_mprq *)rxq->wqes)[rq_idx].dseg;
|
||||
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_idx];
|
||||
void *addr;
|
||||
|
||||
if (__atomic_load_n(&buf->refcnt, __ATOMIC_RELAXED) > 1) {
|
||||
MLX5_ASSERT(rep != NULL);
|
||||
/* Replace MPRQ buf. */
|
||||
(*rxq->mprq_bufs)[rq_idx] = rep;
|
||||
/* Replace WQE. */
|
||||
addr = mlx5_mprq_buf_addr(rep, strd_n);
|
||||
wqe->addr = rte_cpu_to_be_64((uintptr_t)addr);
|
||||
/* If there's only one MR, no need to replace LKey in WQE. */
|
||||
if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh) > 1))
|
||||
wqe->lkey = mlx5_rx_addr2mr(rxq, (uintptr_t)addr);
|
||||
/* Stash a mbuf for next replacement. */
|
||||
if (likely(!rte_mempool_get(rxq->mprq_mp, (void **)&rep)))
|
||||
rxq->mprq_repl = rep;
|
||||
else
|
||||
rxq->mprq_repl = NULL;
|
||||
/* Release the old buffer. */
|
||||
mlx5_mprq_buf_free(buf);
|
||||
} else if (unlikely(rxq->mprq_repl == NULL)) {
|
||||
struct mlx5_mprq_buf *rep;
|
||||
|
||||
/*
|
||||
* Currently, the MPRQ mempool is out of buffer
|
||||
* and doing memcpy regardless of the size of Rx
|
||||
* packet. Retry allocation to get back to
|
||||
* normal.
|
||||
*/
|
||||
if (!rte_mempool_get(rxq->mprq_mp, (void **)&rep))
|
||||
rxq->mprq_repl = rep;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Attach or copy MPRQ buffer content to a packet.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to Rx queue structure.
|
||||
* @param pkt
|
||||
* Pointer to a packet to fill.
|
||||
* @param len
|
||||
* Packet length.
|
||||
* @param buf
|
||||
* Pointer to a MPRQ buffer to take the data from.
|
||||
* @param strd_idx
|
||||
* Stride index to start from.
|
||||
* @param strd_cnt
|
||||
* Number of strides to consume.
|
||||
*/
|
||||
static __rte_always_inline enum mlx5_rqx_code
|
||||
mprq_buf_to_pkt(struct mlx5_rxq_data *rxq, struct rte_mbuf *pkt, uint32_t len,
|
||||
struct mlx5_mprq_buf *buf, uint16_t strd_idx, uint16_t strd_cnt)
|
||||
{
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
const uint16_t strd_sz = 1 << rxq->strd_sz_n;
|
||||
const uint16_t strd_shift =
|
||||
MLX5_MPRQ_STRIDE_SHIFT_BYTE * rxq->strd_shift_en;
|
||||
const int32_t hdrm_overlap =
|
||||
len + RTE_PKTMBUF_HEADROOM - strd_cnt * strd_sz;
|
||||
const uint32_t offset = strd_idx * strd_sz + strd_shift;
|
||||
void *addr = RTE_PTR_ADD(mlx5_mprq_buf_addr(buf, strd_n), offset);
|
||||
|
||||
/*
|
||||
* Memcpy packets to the target mbuf if:
|
||||
* - The size of packet is smaller than mprq_max_memcpy_len.
|
||||
* - Out of buffer in the Mempool for Multi-Packet RQ.
|
||||
* - The packet's stride overlaps a headroom and scatter is off.
|
||||
*/
|
||||
if (len <= rxq->mprq_max_memcpy_len ||
|
||||
rxq->mprq_repl == NULL ||
|
||||
(hdrm_overlap > 0 && !rxq->strd_scatter_en)) {
|
||||
if (likely(len <=
|
||||
(uint32_t)(pkt->buf_len - RTE_PKTMBUF_HEADROOM))) {
|
||||
rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
|
||||
addr, len);
|
||||
DATA_LEN(pkt) = len;
|
||||
} else if (rxq->strd_scatter_en) {
|
||||
struct rte_mbuf *prev = pkt;
|
||||
uint32_t seg_len = RTE_MIN(len, (uint32_t)
|
||||
(pkt->buf_len - RTE_PKTMBUF_HEADROOM));
|
||||
uint32_t rem_len = len - seg_len;
|
||||
|
||||
rte_memcpy(rte_pktmbuf_mtod(pkt, void *),
|
||||
addr, seg_len);
|
||||
DATA_LEN(pkt) = seg_len;
|
||||
while (rem_len) {
|
||||
struct rte_mbuf *next =
|
||||
rte_pktmbuf_alloc(rxq->mp);
|
||||
|
||||
if (unlikely(next == NULL))
|
||||
return MLX5_RXQ_CODE_NOMBUF;
|
||||
NEXT(prev) = next;
|
||||
SET_DATA_OFF(next, 0);
|
||||
addr = RTE_PTR_ADD(addr, seg_len);
|
||||
seg_len = RTE_MIN(rem_len, (uint32_t)
|
||||
(next->buf_len - RTE_PKTMBUF_HEADROOM));
|
||||
rte_memcpy
|
||||
(rte_pktmbuf_mtod(next, void *),
|
||||
addr, seg_len);
|
||||
DATA_LEN(next) = seg_len;
|
||||
rem_len -= seg_len;
|
||||
prev = next;
|
||||
++NB_SEGS(pkt);
|
||||
}
|
||||
} else {
|
||||
return MLX5_RXQ_CODE_DROPPED;
|
||||
}
|
||||
} else {
|
||||
rte_iova_t buf_iova;
|
||||
struct rte_mbuf_ext_shared_info *shinfo;
|
||||
uint16_t buf_len = strd_cnt * strd_sz;
|
||||
void *buf_addr;
|
||||
|
||||
/* Increment the refcnt of the whole chunk. */
|
||||
__atomic_add_fetch(&buf->refcnt, 1, __ATOMIC_RELAXED);
|
||||
MLX5_ASSERT(__atomic_load_n(&buf->refcnt,
|
||||
__ATOMIC_RELAXED) <= strd_n + 1);
|
||||
buf_addr = RTE_PTR_SUB(addr, RTE_PKTMBUF_HEADROOM);
|
||||
/*
|
||||
* MLX5 device doesn't use iova but it is necessary in a
|
||||
* case where the Rx packet is transmitted via a
|
||||
* different PMD.
|
||||
*/
|
||||
buf_iova = rte_mempool_virt2iova(buf) +
|
||||
RTE_PTR_DIFF(buf_addr, buf);
|
||||
shinfo = &buf->shinfos[strd_idx];
|
||||
rte_mbuf_ext_refcnt_set(shinfo, 1);
|
||||
/*
|
||||
* EXT_ATTACHED_MBUF will be set to pkt->ol_flags when
|
||||
* attaching the stride to mbuf and more offload flags
|
||||
* will be added below by calling rxq_cq_to_mbuf().
|
||||
* Other fields will be overwritten.
|
||||
*/
|
||||
rte_pktmbuf_attach_extbuf(pkt, buf_addr, buf_iova,
|
||||
buf_len, shinfo);
|
||||
/* Set mbuf head-room. */
|
||||
SET_DATA_OFF(pkt, RTE_PKTMBUF_HEADROOM);
|
||||
MLX5_ASSERT(pkt->ol_flags == EXT_ATTACHED_MBUF);
|
||||
MLX5_ASSERT(rte_pktmbuf_tailroom(pkt) >=
|
||||
len - (hdrm_overlap > 0 ? hdrm_overlap : 0));
|
||||
DATA_LEN(pkt) = len;
|
||||
/*
|
||||
* Copy the last fragment of a packet (up to headroom
|
||||
* size bytes) in case there is a stride overlap with
|
||||
* a next packet's headroom. Allocate a separate mbuf
|
||||
* to store this fragment and link it. Scatter is on.
|
||||
*/
|
||||
if (hdrm_overlap > 0) {
|
||||
MLX5_ASSERT(rxq->strd_scatter_en);
|
||||
struct rte_mbuf *seg =
|
||||
rte_pktmbuf_alloc(rxq->mp);
|
||||
|
||||
if (unlikely(seg == NULL))
|
||||
return MLX5_RXQ_CODE_NOMBUF;
|
||||
SET_DATA_OFF(seg, 0);
|
||||
rte_memcpy(rte_pktmbuf_mtod(seg, void *),
|
||||
RTE_PTR_ADD(addr, len - hdrm_overlap),
|
||||
hdrm_overlap);
|
||||
DATA_LEN(seg) = hdrm_overlap;
|
||||
DATA_LEN(pkt) = len - hdrm_overlap;
|
||||
NEXT(pkt) = seg;
|
||||
NB_SEGS(pkt) = 2;
|
||||
}
|
||||
}
|
||||
return MLX5_RXQ_CODE_EXIT;
|
||||
}
|
||||
|
||||
#endif /* RTE_PMD_MLX5_RXTX_H_ */
|
||||
|
@ -77,6 +77,177 @@ rxq_handle_pending_error(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
|
||||
return n;
|
||||
}
|
||||
|
||||
/**
|
||||
* Replenish buffers for RX in bulk.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
*/
|
||||
static inline void
|
||||
mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
|
||||
{
|
||||
const uint16_t q_n = 1 << rxq->elts_n;
|
||||
const uint16_t q_mask = q_n - 1;
|
||||
uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
|
||||
uint16_t elts_idx = rxq->rq_ci & q_mask;
|
||||
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
|
||||
volatile struct mlx5_wqe_data_seg *wq =
|
||||
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
|
||||
unsigned int i;
|
||||
|
||||
if (n >= rxq->rq_repl_thresh) {
|
||||
MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
|
||||
MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
|
||||
MLX5_VPMD_DESCS_PER_LOOP);
|
||||
/* Not to cross queue end. */
|
||||
n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
|
||||
if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
|
||||
rxq->stats.rx_nombuf += n;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
void *buf_addr;
|
||||
|
||||
/*
|
||||
* In order to support the mbufs with external attached
|
||||
* data buffer we should use the buf_addr pointer
|
||||
* instead of rte_mbuf_buf_addr(). It touches the mbuf
|
||||
* itself and may impact the performance.
|
||||
*/
|
||||
buf_addr = elts[i]->buf_addr;
|
||||
wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
|
||||
RTE_PKTMBUF_HEADROOM);
|
||||
/* If there's a single MR, no need to replace LKey. */
|
||||
if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
|
||||
> 1))
|
||||
wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
|
||||
}
|
||||
rxq->rq_ci += n;
|
||||
/* Prevent overflowing into consumed mbufs. */
|
||||
elts_idx = rxq->rq_ci & q_mask;
|
||||
for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
|
||||
(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
|
||||
rte_io_wmb();
|
||||
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Replenish buffers for MPRQ RX in bulk.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
*/
|
||||
static inline void
|
||||
mlx5_rx_mprq_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
|
||||
{
|
||||
const uint16_t wqe_n = 1 << rxq->elts_n;
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
const uint32_t elts_n = wqe_n * strd_n;
|
||||
const uint32_t wqe_mask = elts_n - 1;
|
||||
uint32_t n = elts_n - (rxq->elts_ci - rxq->rq_pi);
|
||||
uint32_t elts_idx = rxq->elts_ci & wqe_mask;
|
||||
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
|
||||
|
||||
/* Not to cross queue end. */
|
||||
if (n >= rxq->rq_repl_thresh) {
|
||||
MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n));
|
||||
MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(elts_n) >
|
||||
MLX5_VPMD_DESCS_PER_LOOP);
|
||||
n = RTE_MIN(n, elts_n - elts_idx);
|
||||
if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
|
||||
rxq->stats.rx_nombuf += n;
|
||||
return;
|
||||
}
|
||||
rxq->elts_ci += n;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Copy or attach MPRQ buffers to RX SW ring.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
* @param pkts
|
||||
* Pointer to array of packets to be stored.
|
||||
* @param pkts_n
|
||||
* Number of packets to be stored.
|
||||
*
|
||||
* @return
|
||||
* Number of packets successfully copied/attached (<= pkts_n).
|
||||
*/
|
||||
static inline uint16_t
|
||||
rxq_copy_mprq_mbuf_v(struct mlx5_rxq_data *rxq,
|
||||
struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
const uint16_t wqe_n = 1 << rxq->elts_n;
|
||||
const uint16_t wqe_mask = wqe_n - 1;
|
||||
const uint16_t strd_sz = 1 << rxq->strd_sz_n;
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
const uint32_t elts_n = wqe_n * strd_n;
|
||||
const uint32_t elts_mask = elts_n - 1;
|
||||
uint32_t elts_idx = rxq->rq_pi & elts_mask;
|
||||
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
|
||||
uint32_t rq_ci = rxq->rq_ci;
|
||||
struct mlx5_mprq_buf *buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
|
||||
uint16_t copied = 0;
|
||||
uint16_t i = 0;
|
||||
|
||||
for (i = 0; i < pkts_n; ++i) {
|
||||
uint16_t strd_cnt;
|
||||
enum mlx5_rqx_code rxq_code;
|
||||
|
||||
if (rxq->consumed_strd == strd_n) {
|
||||
/* Replace WQE if the buffer is still in use. */
|
||||
mprq_buf_replace(rxq, rq_ci & wqe_mask);
|
||||
/* Advance to the next WQE. */
|
||||
rxq->consumed_strd = 0;
|
||||
rq_ci++;
|
||||
buf = (*rxq->mprq_bufs)[rq_ci & wqe_mask];
|
||||
}
|
||||
|
||||
if (!elts[i]->pkt_len) {
|
||||
rxq->consumed_strd = strd_n;
|
||||
rte_pktmbuf_free_seg(elts[i]);
|
||||
#ifdef MLX5_PMD_SOFT_COUNTERS
|
||||
rxq->stats.ipackets -= 1;
|
||||
#endif
|
||||
continue;
|
||||
}
|
||||
strd_cnt = (elts[i]->pkt_len / strd_sz) +
|
||||
((elts[i]->pkt_len % strd_sz) ? 1 : 0);
|
||||
rxq_code = mprq_buf_to_pkt(rxq, elts[i], elts[i]->pkt_len,
|
||||
buf, rxq->consumed_strd, strd_cnt);
|
||||
rxq->consumed_strd += strd_cnt;
|
||||
if (unlikely(rxq_code != MLX5_RXQ_CODE_EXIT)) {
|
||||
rte_pktmbuf_free_seg(elts[i]);
|
||||
#ifdef MLX5_PMD_SOFT_COUNTERS
|
||||
rxq->stats.ipackets -= 1;
|
||||
rxq->stats.ibytes -= elts[i]->pkt_len;
|
||||
#endif
|
||||
if (rxq_code == MLX5_RXQ_CODE_NOMBUF) {
|
||||
++rxq->stats.rx_nombuf;
|
||||
break;
|
||||
}
|
||||
if (rxq_code == MLX5_RXQ_CODE_DROPPED) {
|
||||
++rxq->stats.idropped;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
pkts[copied++] = elts[i];
|
||||
}
|
||||
rxq->rq_pi += i;
|
||||
rxq->cq_ci += i;
|
||||
rte_io_wmb();
|
||||
*rxq->cq_db = rte_cpu_to_be_32(rxq->cq_ci);
|
||||
if (rq_ci != rxq->rq_ci) {
|
||||
rxq->rq_ci = rq_ci;
|
||||
rte_io_wmb();
|
||||
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
|
||||
}
|
||||
return copied;
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive burst of packets. An errored completion also consumes a mbuf, but the
|
||||
* packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
|
||||
@ -204,7 +375,142 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
bool no_cq = false;
|
||||
|
||||
do {
|
||||
nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn, &err, &no_cq);
|
||||
nb_rx = rxq_burst_v(rxq, pkts + tn, pkts_n - tn,
|
||||
&err, &no_cq);
|
||||
if (unlikely(err | rxq->err_state))
|
||||
nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
|
||||
tn += nb_rx;
|
||||
if (unlikely(no_cq))
|
||||
break;
|
||||
} while (tn != pkts_n);
|
||||
return tn;
|
||||
}
|
||||
|
||||
/**
|
||||
* Receive burst of packets. An errored completion also consumes a mbuf, but the
|
||||
* packet_type is set to be RTE_PTYPE_ALL_MASK. Marked mbufs should be freed
|
||||
* before returning to application.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
* @param[out] pkts
|
||||
* Array to store received packets.
|
||||
* @param pkts_n
|
||||
* Maximum number of packets in array.
|
||||
* @param[out] err
|
||||
* Pointer to a flag. Set non-zero value if pkts array has at least one error
|
||||
* packet to handle.
|
||||
* @param[out] no_cq
|
||||
* Pointer to a boolean. Set true if no new CQE seen.
|
||||
*
|
||||
* @return
|
||||
* Number of packets received including errors (<= pkts_n).
|
||||
*/
|
||||
static inline uint16_t
|
||||
rxq_burst_mprq_v(struct mlx5_rxq_data *rxq, struct rte_mbuf **pkts,
|
||||
uint16_t pkts_n, uint64_t *err, bool *no_cq)
|
||||
{
|
||||
const uint16_t q_n = 1 << rxq->cqe_n;
|
||||
const uint16_t q_mask = q_n - 1;
|
||||
const uint16_t wqe_n = 1 << rxq->elts_n;
|
||||
const uint32_t strd_n = 1 << rxq->strd_num_n;
|
||||
const uint32_t elts_n = wqe_n * strd_n;
|
||||
const uint32_t elts_mask = elts_n - 1;
|
||||
volatile struct mlx5_cqe *cq;
|
||||
struct rte_mbuf **elts;
|
||||
uint64_t comp_idx = MLX5_VPMD_DESCS_PER_LOOP;
|
||||
uint16_t nocmp_n = 0;
|
||||
uint16_t rcvd_pkt = 0;
|
||||
uint16_t cp_pkt = 0;
|
||||
unsigned int cq_idx = rxq->cq_ci & q_mask;
|
||||
unsigned int elts_idx;
|
||||
|
||||
MLX5_ASSERT(rxq->sges_n == 0);
|
||||
cq = &(*rxq->cqes)[cq_idx];
|
||||
rte_prefetch0(cq);
|
||||
rte_prefetch0(cq + 1);
|
||||
rte_prefetch0(cq + 2);
|
||||
rte_prefetch0(cq + 3);
|
||||
pkts_n = RTE_MIN(pkts_n, MLX5_VPMD_RX_MAX_BURST);
|
||||
mlx5_rx_mprq_replenish_bulk_mbuf(rxq);
|
||||
/* See if there're unreturned mbufs from compressed CQE. */
|
||||
rcvd_pkt = rxq->decompressed;
|
||||
if (rcvd_pkt > 0) {
|
||||
rcvd_pkt = RTE_MIN(rcvd_pkt, pkts_n);
|
||||
cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, rcvd_pkt);
|
||||
rxq->decompressed -= rcvd_pkt;
|
||||
pkts += cp_pkt;
|
||||
}
|
||||
elts_idx = rxq->rq_pi & elts_mask;
|
||||
elts = &(*rxq->elts)[elts_idx];
|
||||
/* Not to overflow pkts array. */
|
||||
pkts_n = RTE_ALIGN_FLOOR(pkts_n - cp_pkt, MLX5_VPMD_DESCS_PER_LOOP);
|
||||
/* Not to cross queue end. */
|
||||
pkts_n = RTE_MIN(pkts_n, elts_n - elts_idx);
|
||||
pkts_n = RTE_MIN(pkts_n, q_n - cq_idx);
|
||||
/* Not to move past the allocated mbufs. */
|
||||
pkts_n = RTE_MIN(pkts_n, rxq->elts_ci - rxq->rq_pi);
|
||||
if (!pkts_n) {
|
||||
*no_cq = !cp_pkt;
|
||||
return cp_pkt;
|
||||
}
|
||||
/* At this point, there shouldn't be any remaining packets. */
|
||||
MLX5_ASSERT(rxq->decompressed == 0);
|
||||
/* Process all the CQEs */
|
||||
nocmp_n = rxq_cq_process_v(rxq, cq, elts, pkts, pkts_n, err, &comp_idx);
|
||||
/* If no new CQE seen, return without updating cq_db. */
|
||||
if (unlikely(!nocmp_n && comp_idx == MLX5_VPMD_DESCS_PER_LOOP)) {
|
||||
*no_cq = true;
|
||||
return cp_pkt;
|
||||
}
|
||||
/* Update the consumer indexes for non-compressed CQEs. */
|
||||
MLX5_ASSERT(nocmp_n <= pkts_n);
|
||||
cp_pkt = rxq_copy_mprq_mbuf_v(rxq, pkts, nocmp_n);
|
||||
rcvd_pkt += cp_pkt;
|
||||
/* Decompress the last CQE if compressed. */
|
||||
if (comp_idx < MLX5_VPMD_DESCS_PER_LOOP) {
|
||||
MLX5_ASSERT(comp_idx == (nocmp_n % MLX5_VPMD_DESCS_PER_LOOP));
|
||||
rxq->decompressed = rxq_cq_decompress_v(rxq, &cq[nocmp_n],
|
||||
&elts[nocmp_n]);
|
||||
/* Return more packets if needed. */
|
||||
if (nocmp_n < pkts_n) {
|
||||
uint16_t n = rxq->decompressed;
|
||||
|
||||
n = RTE_MIN(n, pkts_n - nocmp_n);
|
||||
cp_pkt = rxq_copy_mprq_mbuf_v(rxq, &pkts[cp_pkt], n);
|
||||
rcvd_pkt += cp_pkt;
|
||||
rxq->decompressed -= n;
|
||||
}
|
||||
}
|
||||
*no_cq = !rcvd_pkt;
|
||||
return rcvd_pkt;
|
||||
}
|
||||
|
||||
/**
|
||||
* DPDK callback for vectorized MPRQ RX.
|
||||
*
|
||||
* @param dpdk_rxq
|
||||
* Generic pointer to RX queue structure.
|
||||
* @param[out] pkts
|
||||
* Array to store received packets.
|
||||
* @param pkts_n
|
||||
* Maximum number of packets in array.
|
||||
*
|
||||
* @return
|
||||
* Number of packets successfully received (<= pkts_n).
|
||||
*/
|
||||
uint16_t
|
||||
mlx5_rx_burst_mprq_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
struct mlx5_rxq_data *rxq = dpdk_rxq;
|
||||
uint16_t nb_rx = 0;
|
||||
uint16_t tn = 0;
|
||||
uint64_t err = 0;
|
||||
bool no_cq = false;
|
||||
|
||||
do {
|
||||
nb_rx = rxq_burst_mprq_v(rxq, pkts + tn, pkts_n - tn,
|
||||
&err, &no_cq);
|
||||
if (unlikely(err | rxq->err_state))
|
||||
nb_rx = rxq_handle_pending_error(rxq, pkts + tn, nb_rx);
|
||||
tn += nb_rx;
|
||||
@ -229,8 +535,6 @@ mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq)
|
||||
struct mlx5_rxq_ctrl *ctrl =
|
||||
container_of(rxq, struct mlx5_rxq_ctrl, rxq);
|
||||
|
||||
if (mlx5_mprq_enabled(ETH_DEV(ctrl->priv)))
|
||||
return -ENOTSUP;
|
||||
if (!ctrl->priv->config.rx_vec_en || rxq->sges_n != 0)
|
||||
return -ENOTSUP;
|
||||
if (rxq->lro)
|
||||
@ -257,8 +561,6 @@ mlx5_check_vec_rx_support(struct rte_eth_dev *dev)
|
||||
return -ENOTSUP;
|
||||
if (!priv->config.rx_vec_en)
|
||||
return -ENOTSUP;
|
||||
if (mlx5_mprq_enabled(dev))
|
||||
return -ENOTSUP;
|
||||
/* All the configured queues should support. */
|
||||
for (i = 0; i < priv->rxqs_n; ++i) {
|
||||
struct mlx5_rxq_data *rxq = (*priv->rxqs)[i];
|
||||
|
@ -12,7 +12,6 @@
|
||||
#include <mlx5_prm.h>
|
||||
|
||||
#include "mlx5_autoconf.h"
|
||||
|
||||
#include "mlx5_mr.h"
|
||||
|
||||
/* HW checksum offload capabilities of vectorized Tx. */
|
||||
@ -68,59 +67,4 @@ S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, sop_drop_qpn) ==
|
||||
S_ASSERT_MLX5_CQE(offsetof(struct mlx5_cqe, op_own) ==
|
||||
offsetof(struct mlx5_cqe, sop_drop_qpn) + 7);
|
||||
|
||||
/**
|
||||
* Replenish buffers for RX in bulk.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
*/
|
||||
static inline void
|
||||
mlx5_rx_replenish_bulk_mbuf(struct mlx5_rxq_data *rxq)
|
||||
{
|
||||
const uint16_t q_n = 1 << rxq->elts_n;
|
||||
const uint16_t q_mask = q_n - 1;
|
||||
uint16_t n = q_n - (rxq->rq_ci - rxq->rq_pi);
|
||||
uint16_t elts_idx = rxq->rq_ci & q_mask;
|
||||
struct rte_mbuf **elts = &(*rxq->elts)[elts_idx];
|
||||
volatile struct mlx5_wqe_data_seg *wq =
|
||||
&((volatile struct mlx5_wqe_data_seg *)rxq->wqes)[elts_idx];
|
||||
unsigned int i;
|
||||
|
||||
if (n >= rxq->rq_repl_thresh) {
|
||||
MLX5_ASSERT(n >= MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n));
|
||||
MLX5_ASSERT(MLX5_VPMD_RXQ_RPLNSH_THRESH(q_n) >
|
||||
MLX5_VPMD_DESCS_PER_LOOP);
|
||||
/* Not to cross queue end. */
|
||||
n = RTE_MIN(n - MLX5_VPMD_DESCS_PER_LOOP, q_n - elts_idx);
|
||||
if (rte_mempool_get_bulk(rxq->mp, (void *)elts, n) < 0) {
|
||||
rxq->stats.rx_nombuf += n;
|
||||
return;
|
||||
}
|
||||
for (i = 0; i < n; ++i) {
|
||||
void *buf_addr;
|
||||
|
||||
/*
|
||||
* In order to support the mbufs with external attached
|
||||
* data buffer we should use the buf_addr pointer
|
||||
* instead of rte_mbuf_buf_addr(). It touches the mbuf
|
||||
* itself and may impact the performance.
|
||||
*/
|
||||
buf_addr = elts[i]->buf_addr;
|
||||
wq[i].addr = rte_cpu_to_be_64((uintptr_t)buf_addr +
|
||||
RTE_PKTMBUF_HEADROOM);
|
||||
/* If there's a single MR, no need to replace LKey. */
|
||||
if (unlikely(mlx5_mr_btree_len(&rxq->mr_ctrl.cache_bh)
|
||||
> 1))
|
||||
wq[i].lkey = mlx5_rx_mb2mr(rxq, elts[i]);
|
||||
}
|
||||
rxq->rq_ci += n;
|
||||
/* Prevent overflowing into consumed mbufs. */
|
||||
elts_idx = rxq->rq_ci & q_mask;
|
||||
for (i = 0; i < MLX5_VPMD_DESCS_PER_LOOP; ++i)
|
||||
(*rxq->elts)[elts_idx + i] = &rxq->fake_mbuf;
|
||||
rte_io_wmb();
|
||||
*rxq->rq_db = rte_cpu_to_be_32(rxq->rq_ci);
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* RTE_PMD_MLX5_RXTX_VEC_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user