net/mlx5: add vectorized Rx/Tx burst for x86

To make vectorized burst routines enabled, it is required to run on x86_64
architecture. If all the conditions are met, the vectorized burst functions
are enabled automatically. The decision is made individually on RX and TX.
There's no PMD option to make a selection.

Signed-off-by: Yongseok Koh <yskoh@mellanox.com>
Acked-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
This commit is contained in:
Yongseok Koh 2017-07-06 11:41:10 -07:00 committed by Ferruh Yigit
parent f3d2dcc856
commit 6cb559d67b
8 changed files with 1868 additions and 297 deletions

View File

@ -39,6 +39,9 @@ SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_txq.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx.c
ifeq ($(CONFIG_RTE_ARCH_X86_64),y)
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_rxtx_vec_sse.c
endif
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_trigger.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_ethdev.c
SRCS-$(CONFIG_RTE_LIBRTE_MLX5_PMD) += mlx5_mac.c

View File

@ -89,4 +89,22 @@
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
#define MLX5_MAX_TSO_HEADER 128
/* Default minimum number of Tx queues for vectorized Tx. */
#define MLX5_VPMD_MIN_TXQS 4
/* Threshold of buffer replenishment for vectorized Rx. */
#define MLX5_VPMD_RXQ_RPLNSH_THRESH 64U
/* Maximum size of burst for vectorized Rx. */
#define MLX5_VPMD_RX_MAX_BURST MLX5_VPMD_RXQ_RPLNSH_THRESH
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
* of Enhaned MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
/* Number of packets vectorized Rx can simultaneously process in a loop. */
#define MLX5_VPMD_DESCS_PER_LOOP 4
#endif /* RTE_PMD_MLX5_DEFS_H_ */

View File

@ -723,7 +723,8 @@ mlx5_dev_supported_ptypes_get(struct rte_eth_dev *dev)
};
if (dev->rx_pkt_burst == mlx5_rx_burst)
if (dev->rx_pkt_burst == mlx5_rx_burst ||
dev->rx_pkt_burst == mlx5_rx_burst_vec)
return ptypes;
return NULL;
}
@ -1585,9 +1586,16 @@ priv_select_tx_function(struct priv *priv)
priv->dev->tx_pkt_burst = mlx5_tx_burst;
/* Select appropriate TX function. */
if (priv->mps == MLX5_MPW_ENHANCED) {
priv->dev->tx_pkt_burst =
mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
if (priv_check_vec_tx_support(priv) > 0) {
if (priv_check_raw_vec_tx_support(priv) > 0)
priv->dev->tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
priv->dev->tx_pkt_burst = mlx5_tx_burst_vec;
DEBUG("selected Enhanced MPW TX vectorized function");
} else {
priv->dev->tx_pkt_burst = mlx5_tx_burst_empw;
DEBUG("selected Enhanced MPW TX function");
}
} else if (priv->mps && priv->txq_inline) {
priv->dev->tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DEBUG("selected MPW inline TX function");
@ -1606,5 +1614,11 @@ priv_select_tx_function(struct priv *priv)
void
priv_select_rx_function(struct priv *priv)
{
priv->dev->rx_pkt_burst = mlx5_rx_burst;
if (priv_check_vec_rx_support(priv) > 0) {
priv_prep_vec_rx_function(priv);
priv->dev->rx_pkt_burst = mlx5_rx_burst_vec;
DEBUG("selected RX vectorized function");
} else {
priv->dev->rx_pkt_burst = mlx5_rx_burst;
}
}

View File

@ -633,6 +633,41 @@ priv_rehash_flows(struct priv *priv)
return 0;
}
/**
* Unlike regular Rx function, vPMD Rx doesn't replace mbufs immediately when
* receiving packets. Instead it replaces later in bulk. In rxq->elts[], entries
* from rq_pi to rq_ci are owned by device but the rest is already delivered to
* application. In order not to reuse those mbufs by rxq_alloc_elts(), this
* function must be called to replace used mbufs.
*
* @param rxq
* Pointer to RX queue structure.
*
* @return
* 0 on success, errno value on failure.
*/
static int
rxq_trim_elts(struct rxq *rxq)
{
const uint16_t q_n = (1 << rxq->elts_n);
const uint16_t q_mask = q_n - 1;
uint16_t used = q_n - (rxq->rq_ci - rxq->rq_pi);
uint16_t i;
if (!rxq->trim_elts)
return 0;
for (i = 0; i < used; ++i) {
struct rte_mbuf *buf;
buf = rte_pktmbuf_alloc(rxq->mp);
if (!buf)
return ENOMEM;
(*rxq->elts)[(rxq->rq_ci + i) & q_mask] = buf;
}
rxq->rq_pi = rxq->rq_ci;
rxq->trim_elts = 0;
return 0;
}
/**
* Allocate RX queue elements.
*
@ -800,6 +835,7 @@ rxq_rehash(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl)
return err;
}
/* Snatch mbufs from original queue. */
claim_zero(rxq_trim_elts(&rxq_ctrl->rxq));
claim_zero(rxq_alloc_elts(rxq_ctrl, elts_n, rxq_ctrl->rxq.elts));
for (i = 0; i != elts_n; ++i) {
struct rte_mbuf *buf = (*rxq_ctrl->rxq.elts)[i];
@ -860,6 +896,7 @@ rxq_setup(struct rxq_ctrl *tmpl)
tmpl->rxq.cqe_n = log2above(cq_info.cqe_cnt);
tmpl->rxq.cq_ci = 0;
tmpl->rxq.rq_ci = 0;
tmpl->rxq.rq_pi = 0;
tmpl->rxq.cq_db = cq_info.dbrec;
tmpl->rxq.wqes =
(volatile struct mlx5_wqe_data_seg (*)[])
@ -993,7 +1030,12 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
if (priv->cqe_comp) {
attr.cq.comp_mask |= IBV_EXP_CQ_INIT_ATTR_FLAGS;
attr.cq.flags |= IBV_EXP_CQ_COMPRESSED_CQE;
cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
/*
* For vectorized Rx, it must not be doubled in order to
* make cq_ci and rq_ci aligned.
*/
if (rxq_check_vec_support(&tmpl.rxq) < 0)
cqe_n = (desc * 2) - 1; /* Double the number of CQEs. */
}
tmpl.cq = ibv_exp_create_cq(priv->ctx, cqe_n, NULL, tmpl.channel, 0,
&attr.cq);
@ -1103,7 +1145,9 @@ rxq_ctrl_setup(struct rte_eth_dev *dev, struct rxq_ctrl *rxq_ctrl,
if (rxq_ctrl->rxq.elts_n) {
assert(1 << rxq_ctrl->rxq.elts_n == desc);
assert(rxq_ctrl->rxq.elts != tmpl.rxq.elts);
ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
ret = rxq_trim_elts(&rxq_ctrl->rxq);
if (!ret)
ret = rxq_alloc_elts(&tmpl, desc, rxq_ctrl->rxq.elts);
} else
ret = rxq_alloc_elts(&tmpl, desc, NULL);
if (ret) {
@ -1165,6 +1209,7 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
struct priv *priv = dev->data->dev_private;
struct rxq *rxq = (*priv->rxqs)[idx];
struct rxq_ctrl *rxq_ctrl = container_of(rxq, struct rxq_ctrl, rxq);
const uint16_t desc_pad = MLX5_VPMD_DESCS_PER_LOOP; /* For vPMD. */
int ret;
if (mlx5_is_secondary())
@ -1198,7 +1243,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
if (rxq_ctrl->rxq.elts_n != log2above(desc)) {
rxq_ctrl = rte_realloc(rxq_ctrl,
sizeof(*rxq_ctrl) +
desc * sizeof(struct rte_mbuf *),
(desc + desc_pad) *
sizeof(struct rte_mbuf *),
RTE_CACHE_LINE_SIZE);
if (!rxq_ctrl) {
ERROR("%p: unable to reallocate queue index %u",
@ -1209,7 +1255,8 @@ mlx5_rx_queue_setup(struct rte_eth_dev *dev, uint16_t idx, uint16_t desc,
}
} else {
rxq_ctrl = rte_calloc_socket("RXQ", 1, sizeof(*rxq_ctrl) +
desc * sizeof(struct rte_mbuf *),
(desc + desc_pad) *
sizeof(struct rte_mbuf *),
0, socket);
if (rxq_ctrl == NULL) {
ERROR("%p: unable to allocate queue index %u",

View File

@ -69,19 +69,6 @@
#include "mlx5_defs.h"
#include "mlx5_prm.h"
static __rte_always_inline int
check_cqe(volatile struct mlx5_cqe *cqe,
unsigned int cqes_n, const uint16_t ci);
static __rte_always_inline void
txq_complete(struct txq *txq);
static __rte_always_inline uint32_t
txq_mb2mr(struct txq *txq, struct rte_mbuf *mb);
static __rte_always_inline void
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe);
static __rte_always_inline uint32_t
rxq_cq_to_pkt_type(volatile struct mlx5_cqe *cqe);
@ -92,101 +79,29 @@ mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe *cqe,
static __rte_always_inline uint32_t
rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe *cqe);
#ifndef NDEBUG
/**
* Verify or set magic value in CQE.
*
* @param cqe
* Pointer to CQE.
*
* @return
* 0 the first time.
/*
* The index to the array should have:
* bit[1:0] = l3_hdr_type, bit[2] = tunneled, bit[3] = outer_l3_type
*/
static inline int
check_cqe_seen(volatile struct mlx5_cqe *cqe)
{
static const uint8_t magic[] = "seen";
volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
int ret = 1;
unsigned int i;
for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
if (!ret || (*buf)[i] != magic[i]) {
ret = 0;
(*buf)[i] = magic[i];
}
return ret;
}
#endif /* NDEBUG */
/**
* Check whether CQE is valid.
*
* @param cqe
* Pointer to CQE.
* @param cqes_n
* Size of completion queue.
* @param ci
* Consumer index.
*
* @return
* 0 on success, 1 on failure.
*/
static inline int
check_cqe(volatile struct mlx5_cqe *cqe,
unsigned int cqes_n, const uint16_t ci)
{
uint16_t idx = ci & cqes_n;
uint8_t op_own = cqe->op_own;
uint8_t op_owner = MLX5_CQE_OWNER(op_own);
uint8_t op_code = MLX5_CQE_OPCODE(op_own);
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
return 1; /* No CQE. */
#ifndef NDEBUG
if ((op_code == MLX5_CQE_RESP_ERR) ||
(op_code == MLX5_CQE_REQ_ERR)) {
volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
uint8_t syndrome = err_cqe->syndrome;
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
return 0;
if (!check_cqe_seen(cqe))
ERROR("unexpected CQE error %u (0x%02x)"
" syndrome 0x%02x",
op_code, op_code, syndrome);
return 1;
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
(op_code != MLX5_CQE_REQ)) {
if (!check_cqe_seen(cqe))
ERROR("unexpected CQE opcode %u (0x%02x)",
op_code, op_code);
return 1;
}
#endif /* NDEBUG */
return 0;
}
/**
* Return the address of the WQE.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe_ci
* WQE consumer index.
*
* @return
* WQE address.
*/
static inline uintptr_t *
tx_mlx5_wqe(struct txq *txq, uint16_t ci)
{
ci &= ((1 << txq->wqe_n) - 1);
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
}
const uint32_t mlx5_ptype_table[] = {
RTE_PTYPE_UNKNOWN,
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b0001 */
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b0010 */
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b0101 */
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b0110 */
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN, /* b1001 */
RTE_PTYPE_L3_IPV4_EXT_UNKNOWN, /* b1010 */
RTE_PTYPE_UNKNOWN, RTE_PTYPE_UNKNOWN,
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV6_EXT_UNKNOWN, /* b1101 */
RTE_PTYPE_L3_IPV6_EXT_UNKNOWN |
RTE_PTYPE_INNER_L3_IPV4_EXT_UNKNOWN, /* b1110 */
RTE_PTYPE_ALL_MASK /* b1111 */
};
/**
* Return the size of tailroom of WQ.
@ -244,174 +159,6 @@ mlx5_copy_to_wq(void *dst, const void *src, size_t n,
return ret;
}
/**
* Manage TX completions.
*
* When sending a burst, mlx5_tx_burst() posts several WRs.
*
* @param txq
* Pointer to TX queue structure.
*/
static inline void
txq_complete(struct txq *txq)
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe *cqe = NULL;
volatile struct mlx5_wqe_ctrl *ctrl;
struct rte_mbuf *m, *free[elts_n];
struct rte_mempool *pool = NULL;
unsigned int blk_n = 0;
do {
volatile struct mlx5_cqe *tmp;
tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
if (check_cqe(tmp, cqe_n, cq_ci))
break;
cqe = tmp;
#ifndef NDEBUG
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
if (!check_cqe_seen(cqe))
ERROR("unexpected compressed CQE, TX stopped");
return;
}
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
if (!check_cqe_seen(cqe))
ERROR("unexpected error CQE, TX stopped");
return;
}
#endif /* NDEBUG */
++cq_ci;
} while (1);
if (unlikely(cqe == NULL))
return;
txq->wqe_pi = ntohs(cqe->wqe_counter);
ctrl = (volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
if (likely(m != NULL)) {
if (likely(m->pool == pool)) {
free[blk_n++] = m;
} else {
if (likely(pool != NULL))
rte_mempool_put_bulk(pool,
(void *)free,
blk_n);
free[0] = m;
pool = m->pool;
blk_n = 1;
}
}
}
if (blk_n)
rte_mempool_put_bulk(pool, (void *)free, blk_n);
#ifndef NDEBUG
elts_free = txq->elts_tail;
/* Poisoning. */
while (elts_free != elts_tail) {
memset(&(*txq->elts)[elts_free & elts_m],
0x66,
sizeof((*txq->elts)[elts_free & elts_m]));
++elts_free;
}
#endif
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
/* Update the consumer index. */
rte_wmb();
*txq->cq_db = htonl(cq_ci);
}
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
* the cloned mbuf is allocated is returned instead.
*
* @param buf
* Pointer to mbuf.
*
* @return
* Memory pool where data is located for given mbuf.
*/
static struct rte_mempool *
txq_mb2mp(struct rte_mbuf *buf)
{
if (unlikely(RTE_MBUF_INDIRECT(buf)))
return rte_mbuf_from_indirect(buf)->pool;
return buf->pool;
}
/**
* Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
* remove an entry first.
*
* @param txq
* Pointer to TX queue structure.
* @param[in] mp
* Memory Pool for which a Memory Region lkey must be returned.
*
* @return
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static inline uint32_t
txq_mb2mr(struct txq *txq, struct rte_mbuf *mb)
{
uint16_t i = txq->mr_cache_idx;
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
assert(i < RTE_DIM(txq->mp2mr));
if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
return txq->mp2mr[i].lkey;
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
if (unlikely(txq->mp2mr[i].mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
if (txq->mp2mr[i].start <= addr &&
txq->mp2mr[i].end >= addr) {
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
assert(htonl(txq->mp2mr[i].mr->lkey) ==
txq->mp2mr[i].lkey);
txq->mr_cache_idx = i;
return txq->mp2mr[i].lkey;
}
}
txq->mr_cache_idx = 0;
return txq_mp2mr_reg(txq, txq_mb2mp(mb), i);
}
/**
* Ring TX queue doorbell.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe
* Pointer to the last WQE posted in the NIC.
*/
static inline void
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
{
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
rte_wmb();
*txq->qp_db = htonl(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
*dst = *src;
}
/**
* DPDK callback to check the status of a tx descriptor.
*
@ -429,7 +176,7 @@ mlx5_tx_descriptor_status(void *tx_queue, uint16_t offset)
struct txq *txq = tx_queue;
uint16_t used;
txq_complete(txq);
mlx5_tx_complete(txq);
used = txq->elts_head - txq->elts_tail;
if (offset < used)
return RTE_ETH_TX_DESC_FULL;
@ -525,7 +272,7 @@ mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
/* Prefetch first packet cacheline. */
rte_prefetch0(*pkts);
/* Start processing. */
txq_complete(txq);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
@ -773,7 +520,7 @@ use_dseg:
naddr = htonll(addr);
*dseg = (rte_v128u32_t){
htonl(length),
txq_mb2mr(txq, buf),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@ -812,7 +559,7 @@ next_seg:
naddr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
*dseg = (rte_v128u32_t){
htonl(length),
txq_mb2mr(txq, buf),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@ -991,7 +738,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
if (unlikely(!max_wqe))
@ -1054,7 +801,7 @@ mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
.lkey = txq_mb2mr(txq, buf),
.lkey = mlx5_tx_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@ -1214,7 +961,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci));
rte_prefetch0(tx_mlx5_wqe(txq, txq->wqe_ci + 1));
/* Start processing. */
txq_complete(txq);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
do {
struct rte_mbuf *buf = *(pkts++);
@ -1300,7 +1047,7 @@ mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
.lkey = txq_mb2mr(txq, buf),
.lkey = mlx5_tx_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@ -1495,7 +1242,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
if (unlikely(!pkts_n))
return 0;
/* Start processing. */
txq_complete(txq);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi - txq->cq_ci));
@ -1607,7 +1354,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
addr = rte_pktmbuf_mtod(buf, uintptr_t);
*dseg = (struct mlx5_wqe_data_seg){
.byte_count = htonl(DATA_LEN(buf)),
.lkey = txq_mb2mr(txq, buf),
.lkey = mlx5_tx_mb2mr(txq, buf),
.addr = htonll(addr),
};
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
@ -1690,7 +1437,7 @@ mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
naddr = htonll(addr);
*dseg = (rte_v128u32_t) {
htonl(length),
txq_mb2mr(txq, buf),
mlx5_tx_mb2mr(txq, buf),
naddr,
naddr >> 32,
};
@ -2138,3 +1885,71 @@ removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
(void)pkts_n;
return 0;
}
/*
* Vectorized Rx/Tx routines are not compiled in when required vector
* instructions are not supported on a target architecture. The following null
* stubs are needed for linkage when those are not included outside of this file
* (e.g. mlx5_rxtx_vec_sse.c for x86).
*/
uint16_t __attribute__((weak))
mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
return 0;
}
uint16_t __attribute__((weak))
mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
(void)dpdk_txq;
(void)pkts;
(void)pkts_n;
return 0;
}
uint16_t __attribute__((weak))
mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
(void)dpdk_rxq;
(void)pkts;
(void)pkts_n;
return 0;
}
int __attribute__((weak))
priv_check_raw_vec_tx_support(struct priv *priv)
{
(void)priv;
return -ENOTSUP;
}
int __attribute__((weak))
priv_check_vec_tx_support(struct priv *priv)
{
(void)priv;
return -ENOTSUP;
}
int __attribute__((weak))
rxq_check_vec_support(struct rxq *rxq)
{
(void)rxq;
return -ENOTSUP;
}
int __attribute__((weak))
priv_check_vec_rx_support(struct priv *priv)
{
(void)priv;
return -ENOTSUP;
}
void __attribute__((weak))
priv_prep_vec_rx_function(struct priv *priv)
{
(void)priv;
}

View File

@ -115,10 +115,13 @@ struct rxq {
unsigned int port_id:8;
unsigned int rss_hash:1; /* RSS hash result is enabled. */
unsigned int mark:1; /* Marked flow available on the queue. */
unsigned int :8; /* Remaining bits. */
unsigned int pending_err:1; /* CQE error needs to be handled. */
unsigned int trim_elts:1; /* Whether elts needs clean-up. */
unsigned int :6; /* Remaining bits. */
volatile uint32_t *rq_db;
volatile uint32_t *cq_db;
uint16_t rq_ci;
uint16_t rq_pi;
uint16_t cq_ci;
volatile struct mlx5_wqe_data_seg(*wqes)[];
volatile struct mlx5_cqe(*cqes)[];
@ -126,6 +129,8 @@ struct rxq {
struct rte_mbuf *(*elts)[];
struct rte_mempool *mp;
struct mlx5_rxq_stats stats;
uint64_t mbuf_initializer; /* Default rearm_data for vectorized Rx. */
struct rte_mbuf fake_mbuf; /* elts padding for vectorized Rx. */
} __rte_cache_aligned;
/* RX queue control descriptor. */
@ -261,6 +266,7 @@ struct txq {
uint16_t max_inline; /* Multiple of RTE_CACHE_LINE_SIZE to inline. */
uint16_t inline_max_packet_sz; /* Max packet size for inlining. */
uint32_t qp_num_8s; /* QP number shifted by 8. */
uint32_t flags; /* Flags for Tx Queue. */
volatile struct mlx5_cqe (*cqes)[]; /* Completion queue. */
volatile void *wqes; /* Work queue (use volatile to write into). */
volatile uint32_t *qp_db; /* Work queue doorbell. */
@ -328,6 +334,8 @@ uint16_t mlx5_tx_burst_secondary_setup(void *, struct rte_mbuf **, uint16_t);
/* mlx5_rxtx.c */
extern const uint32_t mlx5_ptype_table[];
uint16_t mlx5_tx_burst(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_tx_burst_mpw(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_tx_burst_mpw_inline(void *, struct rte_mbuf **, uint16_t);
@ -338,10 +346,282 @@ uint16_t removed_rx_burst(void *, struct rte_mbuf **, uint16_t);
int mlx5_rx_descriptor_status(void *, uint16_t);
int mlx5_tx_descriptor_status(void *, uint16_t);
/* Vectorized version of mlx5_rxtx.c */
int priv_check_raw_vec_tx_support(struct priv *);
int priv_check_vec_tx_support(struct priv *);
int rxq_check_vec_support(struct rxq *);
int priv_check_vec_rx_support(struct priv *);
void priv_prep_vec_rx_function(struct priv *);
uint16_t mlx5_tx_burst_raw_vec(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_tx_burst_vec(void *, struct rte_mbuf **, uint16_t);
uint16_t mlx5_rx_burst_vec(void *, struct rte_mbuf **, uint16_t);
/* mlx5_mr.c */
struct ibv_mr *mlx5_mp2mr(struct ibv_pd *, struct rte_mempool *);
void txq_mp2mr_iter(struct rte_mempool *, void *);
uint32_t txq_mp2mr_reg(struct txq *, struct rte_mempool *, unsigned int);
#ifndef NDEBUG
/**
* Verify or set magic value in CQE.
*
* @param cqe
* Pointer to CQE.
*
* @return
* 0 the first time.
*/
static inline int
check_cqe_seen(volatile struct mlx5_cqe *cqe)
{
static const uint8_t magic[] = "seen";
volatile uint8_t (*buf)[sizeof(cqe->rsvd0)] = &cqe->rsvd0;
int ret = 1;
unsigned int i;
for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
if (!ret || (*buf)[i] != magic[i]) {
ret = 0;
(*buf)[i] = magic[i];
}
return ret;
}
#endif /* NDEBUG */
/**
* Check whether CQE is valid.
*
* @param cqe
* Pointer to CQE.
* @param cqes_n
* Size of completion queue.
* @param ci
* Consumer index.
*
* @return
* 0 on success, 1 on failure.
*/
static __rte_always_inline int
check_cqe(volatile struct mlx5_cqe *cqe,
unsigned int cqes_n, const uint16_t ci)
{
uint16_t idx = ci & cqes_n;
uint8_t op_own = cqe->op_own;
uint8_t op_owner = MLX5_CQE_OWNER(op_own);
uint8_t op_code = MLX5_CQE_OPCODE(op_own);
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
return 1; /* No CQE. */
#ifndef NDEBUG
if ((op_code == MLX5_CQE_RESP_ERR) ||
(op_code == MLX5_CQE_REQ_ERR)) {
volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
uint8_t syndrome = err_cqe->syndrome;
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
return 0;
if (!check_cqe_seen(cqe))
ERROR("unexpected CQE error %u (0x%02x)"
" syndrome 0x%02x",
op_code, op_code, syndrome);
return 1;
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
(op_code != MLX5_CQE_REQ)) {
if (!check_cqe_seen(cqe))
ERROR("unexpected CQE opcode %u (0x%02x)",
op_code, op_code);
return 1;
}
#endif /* NDEBUG */
return 0;
}
/**
* Return the address of the WQE.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe_ci
* WQE consumer index.
*
* @return
* WQE address.
*/
static inline uintptr_t *
tx_mlx5_wqe(struct txq *txq, uint16_t ci)
{
ci &= ((1 << txq->wqe_n) - 1);
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
}
/**
* Manage TX completions.
*
* When sending a burst, mlx5_tx_burst() posts several WRs.
*
* @param txq
* Pointer to TX queue structure.
*/
static __rte_always_inline void
mlx5_tx_complete(struct txq *txq)
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
uint16_t cq_ci = txq->cq_ci;
volatile struct mlx5_cqe *cqe = NULL;
volatile struct mlx5_wqe_ctrl *ctrl;
struct rte_mbuf *m, *free[elts_n];
struct rte_mempool *pool = NULL;
unsigned int blk_n = 0;
do {
volatile struct mlx5_cqe *tmp;
tmp = &(*txq->cqes)[cq_ci & cqe_cnt];
if (check_cqe(tmp, cqe_n, cq_ci))
break;
cqe = tmp;
#ifndef NDEBUG
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
if (!check_cqe_seen(cqe))
ERROR("unexpected compressed CQE, TX stopped");
return;
}
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
if (!check_cqe_seen(cqe))
ERROR("unexpected error CQE, TX stopped");
return;
}
#endif /* NDEBUG */
++cq_ci;
} while (1);
if (unlikely(cqe == NULL))
return;
txq->wqe_pi = ntohs(cqe->wqe_counter);
ctrl = (volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, txq->wqe_pi);
elts_tail = ctrl->ctrl3;
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
if (likely(m != NULL)) {
if (likely(m->pool == pool)) {
free[blk_n++] = m;
} else {
if (likely(pool != NULL))
rte_mempool_put_bulk(pool,
(void *)free,
blk_n);
free[0] = m;
pool = m->pool;
blk_n = 1;
}
}
}
if (blk_n)
rte_mempool_put_bulk(pool, (void *)free, blk_n);
#ifndef NDEBUG
elts_free = txq->elts_tail;
/* Poisoning. */
while (elts_free != elts_tail) {
memset(&(*txq->elts)[elts_free & elts_m],
0x66,
sizeof((*txq->elts)[elts_free & elts_m]));
++elts_free;
}
#endif
txq->cq_ci = cq_ci;
txq->elts_tail = elts_tail;
/* Update the consumer index. */
rte_wmb();
*txq->cq_db = htonl(cq_ci);
}
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
* the cloned mbuf is allocated is returned instead.
*
* @param buf
* Pointer to mbuf.
*
* @return
* Memory pool where data is located for given mbuf.
*/
static struct rte_mempool *
mlx5_tx_mb2mp(struct rte_mbuf *buf)
{
if (unlikely(RTE_MBUF_INDIRECT(buf)))
return rte_mbuf_from_indirect(buf)->pool;
return buf->pool;
}
/**
* Get Memory Region (MR) <-> rte_mbuf association from txq->mp2mr[].
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
* remove an entry first.
*
* @param txq
* Pointer to TX queue structure.
* @param[in] mp
* Memory Pool for which a Memory Region lkey must be returned.
*
* @return
* mr->lkey on success, (uint32_t)-1 on failure.
*/
static __rte_always_inline uint32_t
mlx5_tx_mb2mr(struct txq *txq, struct rte_mbuf *mb)
{
uint16_t i = txq->mr_cache_idx;
uintptr_t addr = rte_pktmbuf_mtod(mb, uintptr_t);
assert(i < RTE_DIM(txq->mp2mr));
if (likely(txq->mp2mr[i].start <= addr && txq->mp2mr[i].end >= addr))
return txq->mp2mr[i].lkey;
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
if (unlikely(txq->mp2mr[i].mr == NULL)) {
/* Unknown MP, add a new MR for it. */
break;
}
if (txq->mp2mr[i].start <= addr &&
txq->mp2mr[i].end >= addr) {
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
assert(htonl(txq->mp2mr[i].mr->lkey) ==
txq->mp2mr[i].lkey);
txq->mr_cache_idx = i;
return txq->mp2mr[i].lkey;
}
}
txq->mr_cache_idx = 0;
return txq_mp2mr_reg(txq, mlx5_tx_mb2mp(mb), i);
}
/**
* Ring TX queue doorbell.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe
* Pointer to the last WQE posted in the NIC.
*/
static __rte_always_inline void
mlx5_tx_dbrec(struct txq *txq, volatile struct mlx5_wqe *wqe)
{
uint64_t *dst = (uint64_t *)((uintptr_t)txq->bf_reg);
volatile uint64_t *src = ((volatile uint64_t *)wqe);
rte_wmb();
*txq->qp_db = htonl(txq->wqe_ci);
/* Ensure ordering between DB record and BF copy. */
rte_wmb();
*dst = *src;
}
#endif /* RTE_PMD_MLX5_RXTX_H_ */

File diff suppressed because it is too large Load Diff

View File

@ -243,7 +243,7 @@ txq_ctrl_setup(struct rte_eth_dev *dev, struct txq_ctrl *txq_ctrl,
ERROR("MLX5_ENABLE_CQE_COMPRESSION must never be set");
goto error;
}
(void)conf; /* Thresholds configuration (ignored). */
tmpl.txq.flags = conf->txq_flags;
assert(desc > MLX5_TX_COMP_THRESH);
tmpl.txq.elts_n = log2above(desc);
if (priv->mps == MLX5_MPW_ENHANCED)