net/mlx5: add vectorized Rx/Tx burst for x86
To make vectorized burst routines enabled, it is required to run on x86_64 architecture. If all the conditions are met, the vectorized burst functions are enabled automatically. The decision is made individually on RX and TX. There's no PMD option to make a selection. Signed-off-by: Yongseok Koh <yskoh@mellanox.com> Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
parent
f3d2dcc856
commit
6cb559d67b
@ -39,6 +39,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c
|
||||
ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec_sse.c
|
||||
endif
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_trigger.c
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c
|
||||
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mac.c
|
||||
|
@ -89,4 +89,22 @@
|
||||
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
|
||||
#define MLX5_MAX_TSO_HEADER 128
|
||||
|
||||
/* Default minimum number of Tx queues for vectorized Tx. */
|
||||
#define MLX5_VPMD_MIN_TXQS 4
|
||||
|
||||
/* Threshold of buffer replenishment for vectorized Rx. */
|
||||
#define MLX5_VPMD_RXQ_RPLNSH_THRESH 64U
|
||||
|
||||
/* Maximum size of burst for vectorized Rx. */
|
||||
#define MLX5_VPMD_RX_MAX_BURST MLX5_VPMD_RXQ_RPLNSH_THRESH
|
||||
|
||||
/*
|
||||
* Maximum size of burst for vectorized Tx. This is related to the maximum size
|
||||
* of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
|
||||
*/
|
||||
#define MLX5_VPMD_TX_MAX_BURST 32U
|
||||
|
||||
/* Number of packets vectorized Rx can simultaneously process in a loop. */
|
||||
#define MLX5_VPMD_DESCS_PER_LOOP 4
|
||||
|
||||
#endif /* RTE_PMD_MLX5_DEFS_H_ */
|
||||
|
@ -723,7 +723,8 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
|
||||
|
||||
};
|
||||
|
||||
if (dev->rx_pkt_burst == mlx5_rx_burst)
|
||||
if (dev->rx_pkt_burst == mlx5_rx_burst ||
|
||||
dev->rx_pkt_burst == mlx5_rx_burst_vec)
|
||||
return ptypes;
|
||||
return NULL;
|
||||
}
|
||||
@ -1585,9 +1586,16 @@ priv_select_tx_function(struct priv *priv)
|
||||
priv->dev->tx_pkt_burst = mlx5_tx_burst;
|
||||
/* Select appropriate TX function. */
|
||||
if (priv->mps == MLX5_MPW_ENHANCED) {
|
||||
priv->dev->tx_pkt_burst =
|
||||
mlx5_tx_burst_empw;
|
||||
DEBUG("selected Enhanced MPW TX function");
|
||||
if (priv_check_vec_tx_support(priv) > 0) {
|
||||
if (priv_check_raw_vec_tx_support(priv) > 0)
|
||||
priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
|
||||
else
|
||||
priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
|
||||
DEBUG("selected Enhanced MPW TX vectorized function");
|
||||
} else {
|
||||
priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
|
||||
DEBUG("selected Enhanced MPW TX function");
|
||||
}
|
||||
} else if (priv->mps && priv->txq_inline) {
|
||||
priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
|
||||
DEBUG("selected MPW inline TX function");
|
||||
@ -1606,5 +1614,11 @@ priv_select_tx_function(struct priv *priv)
|
||||
void
|
||||
priv_select_rx_function(struct priv *priv)
|
||||
{
|
||||
priv->dev->rx_pkt_burst = mlx5_rx_burst;
|
||||
if (priv_check_vec_rx_support(priv) > 0) {
|
||||
priv_prep_vec_rx_function(priv);
|
||||
priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
|
||||
DEBUG("selected RX vectorized function");
|
||||
} else {
|
||||
priv->dev->rx_pkt_burst = mlx5_rx_burst;
|
||||
}
|
||||
}
|
||||
|
@ -633,6 +633,41 @@ priv_rehash_flows(struct priv *priv)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Unlike regular Rx function, vPMD Rx doesn't replace mbufs immediately when
|
||||
* receiving packets. Instead it replaces later in bulk. In rxq->elts[], entries
|
||||
* from rq_pi to rq_ci are owned by device but the rest is already delivered to
|
||||
* application. In order not to reuse those mbufs by rxq_alloc_elts(), this
|
||||
* function must be called to replace used mbufs.
|
||||
*
|
||||
* @param rxq
|
||||
* Pointer to RX queue structure.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, errno value on failure.
|
||||
*/
|
||||
static int
|
||||
rxq_trim_elts(struct rxq *rxq)
|
||||
{
|
||||
const uint16_t q_n = (1 << rxq->elts_n);
|
||||
const uint16_t q_mask = q_n - 1;
|
||||
uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
|
||||
uint16_t i;
|
||||
|
||||
if (!rxq->trim_elts)
|
||||
return 0;
|
||||
for (i = 0; i < used; ++i) {
|
||||
struct rte_mbuf *buf;
|
||||
buf = rte_pktmbuf_alloc(rxq->mp);
|
||||
if (!buf)
|
||||
return ENOMEM;
|
||||
(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = buf;
|
||||
}
|
||||
rxq->rq_pi = rxq->rq_ci;
|
||||
rxq->trim_elts = 0;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Allocate RX queue elements.
|
||||
*
|
||||
@ -800,6 +835,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
|
||||
return err;
|
||||
}
|
||||
/* Snatch mbufs from original queue. */
|
||||
claim_zero(rxq_trim_elts(&rxq_ctrl->rxq));
|
||||
claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
|
||||
for (i = 0; i != elts_n; ++i) {
|
||||
struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
|
||||
@ -860,6 +896,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
|
||||
tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
|
||||
tmpl->rxq.cq_ci = 0;
|
||||
tmpl->rxq.rq_ci = 0;
|
||||
tmpl->rxq.rq_pi = 0;
|
||||
tmpl->rxq.cq_db = cq_info.dbrec;
|
||||
tmpl->rxq.wqes =
|
||||
(volatile struct mlx5_wqe_data_seg (*)[])
|
||||
@ -993,7 +1030,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
|
||||
if (priv->cqe_comp) {
|
||||
attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
|
||||
attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
|
||||
cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
|
||||
/*
|
||||
* For vectorized Rx, it must not be doubled in order to
|
||||
* make cq_ci and rq_ci aligned.
|
||||
*/
|
||||
if (rxq_check_vec_support(&tmpl.rxq) < 0)
|
||||
cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
|
||||
}
|
||||
tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0,
|
||||
&attr.cq);
|
||||
@ -1103,7 +1145,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
|
||||
if (rxq_ctrl->rxq.elts_n) {
|
||||
assert(1 << rxq_ctrl->rxq.elts_n == desc);
|
||||
assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
|
||||
ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
|
||||
ret = rxq_trim_elts(&rxq_ctrl->rxq);
|
||||
if (!ret)
|
||||
ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
|
||||
} else
|
||||
ret = rxq_alloc_elts(&tmpl, desc, NULL);
|
||||
if (ret) {
|
||||
@ -1165,6 +1209,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
struct priv *priv = dev->data->dev_private;
|
||||
struct rxq *rxq = (*priv->rxqs)[idx];
|
||||
struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
|
||||
const uint16_t desc_pad = MLX5_VPMD_DESCS_PER_LOOP; /* For vPMD. */
|
||||
int ret;
|
||||
|
||||
if (mlx5_is_secondary())
|
||||
@ -1198,7 +1243,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
|
||||
rxq_ctrl = rte_realloc(rxq_ctrl,
|
||||
sizeof(*rxq_ctrl) +
|
||||
desc * sizeof(struct rte_mbuf *),
|
||||
(desc + desc_pad) *
|
||||
sizeof(struct rte_mbuf *),
|
||||
RTE_CACHE_LINE_SIZE);
|
||||
if (!rxq_ctrl) {
|
||||
ERROR("%p: unable to reallocate queue index %u",
|
||||
@ -1209,7 +1255,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
|
||||
}
|
||||
} else {
|
||||
rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
|
||||
desc * sizeof(struct rte_mbuf *),
|
||||
(desc + desc_pad) *
|
||||
sizeof(struct rte_mbuf *),
|
||||
0, socket);
|
||||
if (rxq_ctrl == NULL) {
|
||||
ERROR("%p: unable to allocate queue index %u",
|
||||
|
@ -69,19 +69,6 @@
|
||||
#include "mlx5_defs.h"
|
||||
#include "mlx5_prm.h"
|
||||
|
||||
static __rte_always_inline int
|
||||
check_cqe(volatile struct mlx5_cqe *cqe,
|
||||
unsigned int cqes_n, const uint16_t ci);
|
||||
|
||||
static __rte_always_inline void
|
||||
txq_complete(struct txq *txq);
|
||||
|
||||
static __rte_always_inline uint32_t
|
||||
txq_mb2mr(struct txq *txq, struct rte_mbuf *mb);
|
||||
|
||||
static __rte_always_inline void
|
||||
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe);
|
||||
|
||||
static __rte_always_inline uint32_t
|
||||
rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
|
||||
|
||||
@ -92,101 +79,29 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
|
||||
static __rte_always_inline uint32_t
|
||||
rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
|
||||
|
||||
#ifndef NDEBUG
|
||||
|
||||
/**
|
||||
* Verify or set magic value in CQE.
|
||||
*
|
||||
* @param cqe
|
||||
* Pointer to CQE.
|
||||
*
|
||||
* @return
|
||||
* 0 the first time.
|
||||
/*
|
||||
* The index to the array should have:
|
||||
* bit[1:0] = l3_hdr_type, bit[2] = tunneled, bit[3] = outer_l3_type
|
||||
*/
|
||||
static inline int
|
||||
check_cqe_seen(volatile struct mlx5_cqe *cqe)
|
||||
{
|
||||
static const uint8_t magic[] = "seen";
|
||||
volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
|
||||
int ret = 1;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
|
||||
if (!ret || (*buf)[i] != magic[i]) {
|
||||
ret = 0;
|
||||
(*buf)[i] = magic[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
#endif /* NDEBUG */
|
||||
|
||||
/**
|
||||
* Check whether CQE is valid.
|
||||
*
|
||||
* @param cqe
|
||||
* Pointer to CQE.
|
||||
* @param cqes_n
|
||||
* Size of completion queue.
|
||||
* @param ci
|
||||
* Consumer index.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, 1 on failure.
|
||||
*/
|
||||
static inline int
|
||||
check_cqe(volatile struct mlx5_cqe *cqe,
|
||||
unsigned int cqes_n, const uint16_t ci)
|
||||
{
|
||||
uint16_t idx = ci & cqes_n;
|
||||
uint8_t op_own = cqe->op_own;
|
||||
uint8_t op_owner = MLX5_CQE_OWNER(op_own);
|
||||
uint8_t op_code = MLX5_CQE_OPCODE(op_own);
|
||||
|
||||
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
|
||||
return 1; /* No CQE. */
|
||||
#ifndef NDEBUG
|
||||
if ((op_code == MLX5_CQE_RESP_ERR) ||
|
||||
(op_code == MLX5_CQE_REQ_ERR)) {
|
||||
volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
|
||||
uint8_t syndrome = err_cqe->syndrome;
|
||||
|
||||
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
|
||||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
|
||||
return 0;
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected CQE error %u (0x%02x)"
|
||||
" syndrome 0x%02x",
|
||||
op_code, op_code, syndrome);
|
||||
return 1;
|
||||
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
|
||||
(op_code != MLX5_CQE_REQ)) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected CQE opcode %u (0x%02x)",
|
||||
op_code, op_code);
|
||||
return 1;
|
||||
}
|
||||
#endif /* NDEBUG */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the address of the WQE.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param wqe_ci
|
||||
* WQE consumer index.
|
||||
*
|
||||
* @return
|
||||
* WQE address.
|
||||
*/
|
||||
static inline uintptr_t *
|
||||
tx_mlx5_wqe(struct txq *txq, uint16_t ci)
|
||||
{
|
||||
ci &= ((1 << txq->wqe_n) - 1);
|
||||
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
|
||||
}
|
||||
const uint32_t mlx5_ptype_table[] = {
|
||||
RTE_PTYPE_UNKNOWN,
|
||||
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b0001 */
|
||||
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b0010 */
|
||||
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
|
||||
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
|
||||
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b0101 */
|
||||
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
|
||||
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b0110 */
|
||||
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
|
||||
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b1001 */
|
||||
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b1010 */
|
||||
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
|
||||
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
|
||||
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b1101 */
|
||||
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
|
||||
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b1110 */
|
||||
RTE_PTYPE_ALL_MASK /* b1111 */
|
||||
};
|
||||
|
||||
/**
|
||||
* Return the size of tailroom of WQ.
|
||||
@ -244,174 +159,6 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
|
||||
return ret;
|
||||
}
|
||||
|
||||
/**
|
||||
* Manage TX completions.
|
||||
*
|
||||
* When sending a burst, mlx5_tx_burst() posts several WRs.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
*/
|
||||
static inline void
|
||||
txq_complete(struct txq *txq)
|
||||
{
|
||||
const uint16_t elts_n = 1 << txq->elts_n;
|
||||
const uint16_t elts_m = elts_n - 1;
|
||||
const unsigned int cqe_n = 1 << txq->cqe_n;
|
||||
const unsigned int cqe_cnt = cqe_n - 1;
|
||||
uint16_t elts_free = txq->elts_tail;
|
||||
uint16_t elts_tail;
|
||||
uint16_t cq_ci = txq->cq_ci;
|
||||
volatile struct mlx5_cqe *cqe = NULL;
|
||||
volatile struct mlx5_wqe_ctrl *ctrl;
|
||||
struct rte_mbuf *m, *free[elts_n];
|
||||
struct rte_mempool *pool = NULL;
|
||||
unsigned int blk_n = 0;
|
||||
|
||||
do {
|
||||
volatile struct mlx5_cqe *tmp;
|
||||
|
||||
tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
|
||||
if (check_cqe(tmp, cqe_n, cq_ci))
|
||||
break;
|
||||
cqe = tmp;
|
||||
#ifndef NDEBUG
|
||||
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected compressed CQE, TX stopped");
|
||||
return;
|
||||
}
|
||||
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
|
||||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected error CQE, TX stopped");
|
||||
return;
|
||||
}
|
||||
#endif /* NDEBUG */
|
||||
++cq_ci;
|
||||
} while (1);
|
||||
if (unlikely(cqe == NULL))
|
||||
return;
|
||||
txq->wqe_pi = ntohs(cqe->wqe_counter);
|
||||
ctrl = (volatile struct mlx5_wqe_ctrl *)
|
||||
tx_mlx5_wqe(txq, txq->wqe_pi);
|
||||
elts_tail = ctrl->ctrl3;
|
||||
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
|
||||
/* Free buffers. */
|
||||
while (elts_free != elts_tail) {
|
||||
m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
|
||||
if (likely(m != NULL)) {
|
||||
if (likely(m->pool == pool)) {
|
||||
free[blk_n++] = m;
|
||||
} else {
|
||||
if (likely(pool != NULL))
|
||||
rte_mempool_put_bulk(pool,
|
||||
(void *)free,
|
||||
blk_n);
|
||||
free[0] = m;
|
||||
pool = m->pool;
|
||||
blk_n = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (blk_n)
|
||||
rte_mempool_put_bulk(pool, (void *)free, blk_n);
|
||||
#ifndef NDEBUG
|
||||
elts_free = txq->elts_tail;
|
||||
/* Poisoning. */
|
||||
while (elts_free != elts_tail) {
|
||||
memset(&(*txq->elts)[elts_free & elts_m],
|
||||
0x66,
|
||||
sizeof((*txq->elts)[elts_free & elts_m]));
|
||||
++elts_free;
|
||||
}
|
||||
#endif
|
||||
txq->cq_ci = cq_ci;
|
||||
txq->elts_tail = elts_tail;
|
||||
/* Update the consumer index. */
|
||||
rte_wmb();
|
||||
*txq->cq_db = htonl(cq_ci);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
|
||||
* the cloned mbuf is allocated is returned instead.
|
||||
*
|
||||
* @param buf
|
||||
* Pointer to mbuf.
|
||||
*
|
||||
* @return
|
||||
* Memory pool where data is located for given mbuf.
|
||||
*/
|
||||
static struct rte_mempool *
|
||||
txq_mb2mp(struct rte_mbuf *buf)
|
||||
{
|
||||
if (unlikely(RTE_MBUF_INDIRECT(buf)))
|
||||
return rte_mbuf_from_indirect(buf)->pool;
|
||||
return buf->pool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
|
||||
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
|
||||
* remove an entry first.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param[in] mp
|
||||
* Memory Pool for which a Memory Region lkey must be returned.
|
||||
*
|
||||
* @return
|
||||
* mr->lkey on success, (uint32_t)-1 on failure.
|
||||
*/
|
||||
static inline uint32_t
|
||||
txq_mb2mr(struct txq *txq, struct rte_mbuf *mb)
|
||||
{
|
||||
uint16_t i = txq->mr_cache_idx;
|
||||
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
|
||||
|
||||
assert(i < RTE_DIM(txq->mp2mr));
|
||||
if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
|
||||
return txq->mp2mr[i].lkey;
|
||||
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
|
||||
if (unlikely(txq->mp2mr[i].mr == NULL)) {
|
||||
/* Unknown MP, add a new MR for it. */
|
||||
break;
|
||||
}
|
||||
if (txq->mp2mr[i].start <= addr &&
|
||||
txq->mp2mr[i].end >= addr) {
|
||||
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
|
||||
assert(htonl(txq->mp2mr[i].mr->lkey) ==
|
||||
txq->mp2mr[i].lkey);
|
||||
txq->mr_cache_idx = i;
|
||||
return txq->mp2mr[i].lkey;
|
||||
}
|
||||
}
|
||||
txq->mr_cache_idx = 0;
|
||||
return txq_mp2mr_reg(txq, txq_mb2mp(mb), i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ring TX queue doorbell.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param wqe
|
||||
* Pointer to the last WQE posted in the NIC.
|
||||
*/
|
||||
static inline void
|
||||
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
|
||||
{
|
||||
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
|
||||
volatile uint64_t *src = ((volatile uint64_t *)wqe);
|
||||
|
||||
rte_wmb();
|
||||
*txq->qp_db = htonl(txq->wqe_ci);
|
||||
/* Ensure ordering between DB record and BF copy. */
|
||||
rte_wmb();
|
||||
*dst = *src;
|
||||
}
|
||||
|
||||
/**
|
||||
* DPDK callback to check the status of a tx descriptor.
|
||||
*
|
||||
@ -429,7 +176,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
|
||||
struct txq *txq = tx_queue;
|
||||
uint16_t used;
|
||||
|
||||
txq_complete(txq);
|
||||
mlx5_tx_complete(txq);
|
||||
used = txq->elts_head - txq->elts_tail;
|
||||
if (offset < used)
|
||||
return RTE_ETH_TX_DESC_FULL;
|
||||
@ -525,7 +272,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
/* Prefetch first packet cacheline. */
|
||||
rte_prefetch0(*pkts);
|
||||
/* Start processing. */
|
||||
txq_complete(txq);
|
||||
mlx5_tx_complete(txq);
|
||||
max_elts = (elts_n - (elts_head - txq->elts_tail));
|
||||
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
|
||||
if (unlikely(!max_wqe))
|
||||
@ -773,7 +520,7 @@ use_dseg:
|
||||
naddr = htonll(addr);
|
||||
*dseg = (rte_v128u32_t){
|
||||
htonl(length),
|
||||
txq_mb2mr(txq, buf),
|
||||
mlx5_tx_mb2mr(txq, buf),
|
||||
naddr,
|
||||
naddr >> 32,
|
||||
};
|
||||
@ -812,7 +559,7 @@ next_seg:
|
||||
naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
|
||||
*dseg = (rte_v128u32_t){
|
||||
htonl(length),
|
||||
txq_mb2mr(txq, buf),
|
||||
mlx5_tx_mb2mr(txq, buf),
|
||||
naddr,
|
||||
naddr >> 32,
|
||||
};
|
||||
@ -991,7 +738,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
|
||||
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
|
||||
/* Start processing. */
|
||||
txq_complete(txq);
|
||||
mlx5_tx_complete(txq);
|
||||
max_elts = (elts_n - (elts_head - txq->elts_tail));
|
||||
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
|
||||
if (unlikely(!max_wqe))
|
||||
@ -1054,7 +801,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
||||
*dseg = (struct mlx5_wqe_data_seg){
|
||||
.byte_count = htonl(DATA_LEN(buf)),
|
||||
.lkey = txq_mb2mr(txq, buf),
|
||||
.lkey = mlx5_tx_mb2mr(txq, buf),
|
||||
.addr = htonll(addr),
|
||||
};
|
||||
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
||||
@ -1214,7 +961,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
|
||||
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
|
||||
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
|
||||
/* Start processing. */
|
||||
txq_complete(txq);
|
||||
mlx5_tx_complete(txq);
|
||||
max_elts = (elts_n - (elts_head - txq->elts_tail));
|
||||
do {
|
||||
struct rte_mbuf *buf = *(pkts++);
|
||||
@ -1300,7 +1047,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
|
||||
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
||||
*dseg = (struct mlx5_wqe_data_seg){
|
||||
.byte_count = htonl(DATA_LEN(buf)),
|
||||
.lkey = txq_mb2mr(txq, buf),
|
||||
.lkey = mlx5_tx_mb2mr(txq, buf),
|
||||
.addr = htonll(addr),
|
||||
};
|
||||
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
||||
@ -1495,7 +1242,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
if (unlikely(!pkts_n))
|
||||
return 0;
|
||||
/* Start processing. */
|
||||
txq_complete(txq);
|
||||
mlx5_tx_complete(txq);
|
||||
max_elts = (elts_n - (elts_head - txq->elts_tail));
|
||||
/* A CQE slot must always be available. */
|
||||
assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
|
||||
@ -1607,7 +1354,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
||||
*dseg = (struct mlx5_wqe_data_seg){
|
||||
.byte_count = htonl(DATA_LEN(buf)),
|
||||
.lkey = txq_mb2mr(txq, buf),
|
||||
.lkey = mlx5_tx_mb2mr(txq, buf),
|
||||
.addr = htonll(addr),
|
||||
};
|
||||
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
||||
@ -1690,7 +1437,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
naddr = htonll(addr);
|
||||
*dseg = (rte_v128u32_t) {
|
||||
htonl(length),
|
||||
txq_mb2mr(txq, buf),
|
||||
mlx5_tx_mb2mr(txq, buf),
|
||||
naddr,
|
||||
naddr >> 32,
|
||||
};
|
||||
@ -2138,3 +1885,71 @@ removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
(void)pkts_n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Vectorized Rx/Tx routines are not compiled in when required vector
|
||||
* instructions are not supported on a target architecture. The following null
|
||||
* stubs are needed for linkage when those are not included outside of this file
|
||||
* (e.g. mlx5_rxtx_vec_sse.c for x86).
|
||||
*/
|
||||
|
||||
uint16_t __attribute__((weak))
|
||||
mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
(void)dpdk_txq;
|
||||
(void)pkts;
|
||||
(void)pkts_n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t __attribute__((weak))
|
||||
mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
(void)dpdk_txq;
|
||||
(void)pkts;
|
||||
(void)pkts_n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16_t __attribute__((weak))
|
||||
mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
||||
{
|
||||
(void)dpdk_rxq;
|
||||
(void)pkts;
|
||||
(void)pkts_n;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int __attribute__((weak))
|
||||
priv_check_raw_vec_tx_support(struct priv *priv)
|
||||
{
|
||||
(void)priv;
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
int __attribute__((weak))
|
||||
priv_check_vec_tx_support(struct priv *priv)
|
||||
{
|
||||
(void)priv;
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
int __attribute__((weak))
|
||||
rxq_check_vec_support(struct rxq *rxq)
|
||||
{
|
||||
(void)rxq;
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
int __attribute__((weak))
|
||||
priv_check_vec_rx_support(struct priv *priv)
|
||||
{
|
||||
(void)priv;
|
||||
return -ENOTSUP;
|
||||
}
|
||||
|
||||
void __attribute__((weak))
|
||||
priv_prep_vec_rx_function(struct priv *priv)
|
||||
{
|
||||
(void)priv;
|
||||
}
|
||||
|
@ -115,10 +115,13 @@ struct rxq {
|
||||
unsigned int port_id:8;
|
||||
unsigned int rss_hash:1; /* RSS hash result is enabled. */
|
||||
unsigned int mark:1; /* Marked flow available on the queue. */
|
||||
unsigned int :8; /* Remaining bits. */
|
||||
unsigned int pending_err:1; /* CQE error needs to be handled. */
|
||||
unsigned int trim_elts:1; /* Whether elts needs clean-up. */
|
||||
unsigned int :6; /* Remaining bits. */
|
||||
volatile uint32_t *rq_db;
|
||||
volatile uint32_t *cq_db;
|
||||
uint16_t rq_ci;
|
||||
uint16_t rq_pi;
|
||||
uint16_t cq_ci;
|
||||
volatile struct mlx5_wqe_data_seg(*wqes)[];
|
||||
volatile struct mlx5_cqe(*cqes)[];
|
||||
@ -126,6 +129,8 @@ struct rxq {
|
||||
struct rte_mbuf *(*elts)[];
|
||||
struct rte_mempool *mp;
|
||||
struct mlx5_rxq_stats stats;
|
||||
uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
|
||||
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
|
||||
} __rte_cache_aligned;
|
||||
|
||||
/* RX queue control descriptor. */
|
||||
@ -261,6 +266,7 @@ struct txq {
|
||||
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
|
||||
uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
|
||||
uint32_t qp_num_8s; /* QP number shifted by 8. */
|
||||
uint32_t flags; /* Flags for Tx Queue. */
|
||||
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
|
||||
volatile void *wqes; /* Work queue (use volatile to write into). */
|
||||
volatile uint32_t *qp_db; /* Work queue doorbell. */
|
||||
@ -328,6 +334,8 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
|
||||
|
||||
/* mlx5_rxtx.c */
|
||||
|
||||
extern const uint32_t mlx5_ptype_table[];
|
||||
|
||||
uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
|
||||
uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
|
||||
uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
|
||||
@ -338,10 +346,282 @@ uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
|
||||
int mlx5_rx_descriptor_status(void *, uint16_t);
|
||||
int mlx5_tx_descriptor_status(void *, uint16_t);
|
||||
|
||||
/* Vectorized version of mlx5_rxtx.c */
|
||||
int priv_check_raw_vec_tx_support(struct priv *);
|
||||
int priv_check_vec_tx_support(struct priv *);
|
||||
int rxq_check_vec_support(struct rxq *);
|
||||
int priv_check_vec_rx_support(struct priv *);
|
||||
void priv_prep_vec_rx_function(struct priv *);
|
||||
uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
|
||||
uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
|
||||
uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
|
||||
|
||||
/* mlx5_mr.c */
|
||||
|
||||
struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
|
||||
void txq_mp2mr_iter(struct rte_mempool *, void *);
|
||||
uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
|
||||
|
||||
#ifndef NDEBUG
|
||||
/**
|
||||
* Verify or set magic value in CQE.
|
||||
*
|
||||
* @param cqe
|
||||
* Pointer to CQE.
|
||||
*
|
||||
* @return
|
||||
* 0 the first time.
|
||||
*/
|
||||
static inline int
|
||||
check_cqe_seen(volatile struct mlx5_cqe *cqe)
|
||||
{
|
||||
static const uint8_t magic[] = "seen";
|
||||
volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
|
||||
int ret = 1;
|
||||
unsigned int i;
|
||||
|
||||
for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
|
||||
if (!ret || (*buf)[i] != magic[i]) {
|
||||
ret = 0;
|
||||
(*buf)[i] = magic[i];
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
#endif /* NDEBUG */
|
||||
|
||||
/**
|
||||
* Check whether CQE is valid.
|
||||
*
|
||||
* @param cqe
|
||||
* Pointer to CQE.
|
||||
* @param cqes_n
|
||||
* Size of completion queue.
|
||||
* @param ci
|
||||
* Consumer index.
|
||||
*
|
||||
* @return
|
||||
* 0 on success, 1 on failure.
|
||||
*/
|
||||
static __rte_always_inline int
|
||||
check_cqe(volatile struct mlx5_cqe *cqe,
|
||||
unsigned int cqes_n, const uint16_t ci)
|
||||
{
|
||||
uint16_t idx = ci & cqes_n;
|
||||
uint8_t op_own = cqe->op_own;
|
||||
uint8_t op_owner = MLX5_CQE_OWNER(op_own);
|
||||
uint8_t op_code = MLX5_CQE_OPCODE(op_own);
|
||||
|
||||
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
|
||||
return 1; /* No CQE. */
|
||||
#ifndef NDEBUG
|
||||
if ((op_code == MLX5_CQE_RESP_ERR) ||
|
||||
(op_code == MLX5_CQE_REQ_ERR)) {
|
||||
volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
|
||||
uint8_t syndrome = err_cqe->syndrome;
|
||||
|
||||
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
|
||||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
|
||||
return 0;
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected CQE error %u (0x%02x)"
|
||||
" syndrome 0x%02x",
|
||||
op_code, op_code, syndrome);
|
||||
return 1;
|
||||
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
|
||||
(op_code != MLX5_CQE_REQ)) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected CQE opcode %u (0x%02x)",
|
||||
op_code, op_code);
|
||||
return 1;
|
||||
}
|
||||
#endif /* NDEBUG */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**
|
||||
* Return the address of the WQE.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param wqe_ci
|
||||
* WQE consumer index.
|
||||
*
|
||||
* @return
|
||||
* WQE address.
|
||||
*/
|
||||
static inline uintptr_t *
|
||||
tx_mlx5_wqe(struct txq *txq, uint16_t ci)
|
||||
{
|
||||
ci &= ((1 << txq->wqe_n) - 1);
|
||||
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
|
||||
}
|
||||
|
||||
/**
|
||||
* Manage TX completions.
|
||||
*
|
||||
* When sending a burst, mlx5_tx_burst() posts several WRs.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
*/
|
||||
static __rte_always_inline void
|
||||
mlx5_tx_complete(struct txq *txq)
|
||||
{
|
||||
const uint16_t elts_n = 1 << txq->elts_n;
|
||||
const uint16_t elts_m = elts_n - 1;
|
||||
const unsigned int cqe_n = 1 << txq->cqe_n;
|
||||
const unsigned int cqe_cnt = cqe_n - 1;
|
||||
uint16_t elts_free = txq->elts_tail;
|
||||
uint16_t elts_tail;
|
||||
uint16_t cq_ci = txq->cq_ci;
|
||||
volatile struct mlx5_cqe *cqe = NULL;
|
||||
volatile struct mlx5_wqe_ctrl *ctrl;
|
||||
struct rte_mbuf *m, *free[elts_n];
|
||||
struct rte_mempool *pool = NULL;
|
||||
unsigned int blk_n = 0;
|
||||
|
||||
do {
|
||||
volatile struct mlx5_cqe *tmp;
|
||||
|
||||
tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
|
||||
if (check_cqe(tmp, cqe_n, cq_ci))
|
||||
break;
|
||||
cqe = tmp;
|
||||
#ifndef NDEBUG
|
||||
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected compressed CQE, TX stopped");
|
||||
return;
|
||||
}
|
||||
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
|
||||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
|
||||
if (!check_cqe_seen(cqe))
|
||||
ERROR("unexpected error CQE, TX stopped");
|
||||
return;
|
||||
}
|
||||
#endif /* NDEBUG */
|
||||
++cq_ci;
|
||||
} while (1);
|
||||
if (unlikely(cqe == NULL))
|
||||
return;
|
||||
txq->wqe_pi = ntohs(cqe->wqe_counter);
|
||||
ctrl = (volatile struct mlx5_wqe_ctrl *)
|
||||
tx_mlx5_wqe(txq, txq->wqe_pi);
|
||||
elts_tail = ctrl->ctrl3;
|
||||
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
|
||||
/* Free buffers. */
|
||||
while (elts_free != elts_tail) {
|
||||
m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
|
||||
if (likely(m != NULL)) {
|
||||
if (likely(m->pool == pool)) {
|
||||
free[blk_n++] = m;
|
||||
} else {
|
||||
if (likely(pool != NULL))
|
||||
rte_mempool_put_bulk(pool,
|
||||
(void *)free,
|
||||
blk_n);
|
||||
free[0] = m;
|
||||
pool = m->pool;
|
||||
blk_n = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
if (blk_n)
|
||||
rte_mempool_put_bulk(pool, (void *)free, blk_n);
|
||||
#ifndef NDEBUG
|
||||
elts_free = txq->elts_tail;
|
||||
/* Poisoning. */
|
||||
while (elts_free != elts_tail) {
|
||||
memset(&(*txq->elts)[elts_free & elts_m],
|
||||
0x66,
|
||||
sizeof((*txq->elts)[elts_free & elts_m]));
|
||||
++elts_free;
|
||||
}
|
||||
#endif
|
||||
txq->cq_ci = cq_ci;
|
||||
txq->elts_tail = elts_tail;
|
||||
/* Update the consumer index. */
|
||||
rte_wmb();
|
||||
*txq->cq_db = htonl(cq_ci);
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
|
||||
* the cloned mbuf is allocated is returned instead.
|
||||
*
|
||||
* @param buf
|
||||
* Pointer to mbuf.
|
||||
*
|
||||
* @return
|
||||
* Memory pool where data is located for given mbuf.
|
||||
*/
|
||||
static struct rte_mempool *
|
||||
mlx5_tx_mb2mp(struct rte_mbuf *buf)
|
||||
{
|
||||
if (unlikely(RTE_MBUF_INDIRECT(buf)))
|
||||
return rte_mbuf_from_indirect(buf)->pool;
|
||||
return buf->pool;
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
|
||||
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
|
||||
* remove an entry first.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param[in] mp
|
||||
* Memory Pool for which a Memory Region lkey must be returned.
|
||||
*
|
||||
* @return
|
||||
* mr->lkey on success, (uint32_t)-1 on failure.
|
||||
*/
|
||||
static __rte_always_inline uint32_t
|
||||
mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
|
||||
{
|
||||
uint16_t i = txq->mr_cache_idx;
|
||||
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
|
||||
|
||||
assert(i < RTE_DIM(txq->mp2mr));
|
||||
if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
|
||||
return txq->mp2mr[i].lkey;
|
||||
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
|
||||
if (unlikely(txq->mp2mr[i].mr == NULL)) {
|
||||
/* Unknown MP, add a new MR for it. */
|
||||
break;
|
||||
}
|
||||
if (txq->mp2mr[i].start <= addr &&
|
||||
txq->mp2mr[i].end >= addr) {
|
||||
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
|
||||
assert(htonl(txq->mp2mr[i].mr->lkey) ==
|
||||
txq->mp2mr[i].lkey);
|
||||
txq->mr_cache_idx = i;
|
||||
return txq->mp2mr[i].lkey;
|
||||
}
|
||||
}
|
||||
txq->mr_cache_idx = 0;
|
||||
return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
|
||||
}
|
||||
|
||||
/**
|
||||
* Ring TX queue doorbell.
|
||||
*
|
||||
* @param txq
|
||||
* Pointer to TX queue structure.
|
||||
* @param wqe
|
||||
* Pointer to the last WQE posted in the NIC.
|
||||
*/
|
||||
static __rte_always_inline void
|
||||
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
|
||||
{
|
||||
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
|
||||
volatile uint64_t *src = ((volatile uint64_t *)wqe);
|
||||
|
||||
rte_wmb();
|
||||
*txq->qp_db = htonl(txq->wqe_ci);
|
||||
/* Ensure ordering between DB record and BF copy. */
|
||||
rte_wmb();
|
||||
*dst = *src;
|
||||
}
|
||||
|
||||
#endif /* RTE_PMD_MLX5_RXTX_H_ */
|
||||
|
1394
drivers/net/mlx5/mlx5_rxtx_vec_sse.c
Normal file
1394
drivers/net/mlx5/mlx5_rxtx_vec_sse.c
Normal file
File diff suppressed because it is too large
Load Diff
@ -243,7 +243,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
|
||||
ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
|
||||
goto error;
|
||||
}
|
||||
(void)conf; /* Thresholds configuration (ignored). */
|
||||
tmpl.txq.flags = conf->txq_flags;
|
||||
assert(desc > MLX5_TX_COMP_THRESH);
|
||||
tmpl.txq.elts_n = log2above(desc);
|
||||
if (priv->mps == MLX5_MPW_ENHANCED)
|
||||
|
Loading…
x
Reference in New Issue
Block a user