net/mlx5: remove Tx implementation

This patch removes the existing Tx datapath code
as preparation step before introducing the new
implementation. The following entities are being
removed:

- deprecated devargs support
- tx_burst() routines
- related PRM definitions
- SQ configuration code
- Tx routine selection code
- incompatible Tx completion code

The following devargs are deprecated and ignored:
- "txq_inline" is going to be converted to "txq_inline_max"
  for compatibility issue
- "tx_vec_en"
- "txqs_max_vec"
- "txq_mpw_hdr_dseg_en"
- "txq_max_inline_len" is going to be converted
  to "txq_inline_mpw" for compatibility issue

The deprecated devarg keys are recognized by PMD
and ignored/converted to the new ones in order not
to block device probing.

Signed-off-by: Viacheslav Ovsiienko <viacheslavo@mellanox.com>
Acked-by: Yongseok Koh <yskoh@mellanox.com>
This commit is contained in:
Viacheslav Ovsiienko 2019-07-21 14:24:53 +00:00 committed by Ferruh Yigit
parent 42280dd91b
commit a6bd4911ad
12 changed files with 69 additions and 2733 deletions

View File

@ -350,13 +350,8 @@ Run-time configuration
- ``txq_inline`` parameter [int]
Amount of data to be inlined during TX operations. Improves latency.
Can improve PPS performance when PCI back pressure is detected and may be
useful for scenarios involving heavy traffic on many queues.
Because additional software logic is necessary to handle this mode, this
option should be used with care, as it can lower performance when back
pressure is not expected.
Amount of data to be inlined during TX operations. This parameter is
deprecated and ignored, kept for compatibility issue.
- ``txqs_min_inline`` parameter [int]
@ -378,16 +373,8 @@ Run-time configuration
- ``txqs_max_vec`` parameter [int]
Enable vectorized Tx only when the number of TX queues is less than or
equal to this value. Effective only when ``tx_vec_en`` is enabled.
On ConnectX-5:
- Set to 8 by default on ARMv8.
- Set to 4 by default otherwise.
On BlueField
- Set to 16 by default.
equal to this value. This parameter is deprecated and ignored, kept
for compatibility issue to not prevent driver from probing.
- ``txq_mpw_en`` parameter [int]
@ -418,7 +405,8 @@ Run-time configuration
- ``txq_mpw_hdr_dseg_en`` parameter [int]
A nonzero value enables including two pointers in the first block of TX
descriptor. This can be used to lessen CPU load for memory copy.
descriptor. The parameter is deprecated and ignored, kept for compatibility
issue.
Effective only when Enhanced MPS is supported. Disabled by default.
@ -427,14 +415,14 @@ Run-time configuration
Maximum size of packet to be inlined. This limits the size of packet to
be inlined. If the size of a packet is larger than configured value, the
packet isn't inlined even though there's enough space remained in the
descriptor. Instead, the packet is included with pointer.
Effective only when Enhanced MPS is supported. The default value is 256.
descriptor. Instead, the packet is included with pointer. This parameter
is deprecated.
- ``tx_vec_en`` parameter [int]
A nonzero value enables Tx vector on ConnectX-5, ConnectX-6 and BlueField NICs if the number of
global Tx queues on the port is less than ``txqs_max_vec``.
A nonzero value enables Tx vector on ConnectX-5, ConnectX-6 and BlueField
NICs if the number of global Tx queues on the port is less than
``txqs_max_vec``. The parameter is deprecated and ignored.
This option cannot be used with certain offloads such as ``DEV_TX_OFFLOAD_TCP_TSO,
DEV_TX_OFFLOAD_VXLAN_TNL_TSO, DEV_TX_OFFLOAD_GRE_TNL_TSO, DEV_TX_OFFLOAD_VLAN_INSERT``.

View File

@ -69,7 +69,7 @@
/* Device parameter to set the minimum number of Rx queues to enable MPRQ. */
#define MLX5_RXQS_MIN_MPRQ "rxqs_min_mprq"
/* Device parameter to configure inline send. */
/* Device parameter to configure inline send. Deprecated, ignored.*/
#define MLX5_TXQ_INLINE "txq_inline"
/*
@ -80,20 +80,29 @@
/*
* Device parameter to configure the number of TX queues threshold for
* enabling vectorized Tx.
* enabling vectorized Tx, deprecated, ignored (no vectorized Tx routines).
*/
#define MLX5_TXQS_MAX_VEC "txqs_max_vec"
/* Device parameter to enable multi-packet send WQEs. */
#define MLX5_TXQ_MPW_EN "txq_mpw_en"
/* Device parameter to include 2 dsegs in the title WQEBB. */
/*
* Device parameter to include 2 dsegs in the title WQEBB.
* Deprecated, ignored.
*/
#define MLX5_TXQ_MPW_HDR_DSEG_EN "txq_mpw_hdr_dseg_en"
/* Device parameter to limit the size of inlining packet. */
/*
* Device parameter to limit the size of inlining packet.
* Deprecated, ignored.
*/
#define MLX5_TXQ_MAX_INLINE_LEN "txq_max_inline_len"
/* Device parameter to enable hardware Tx vector. */
/*
* Device parameter to enable hardware Tx vector.
* Deprecated, ignored (no vectorized Tx routines anymore).
*/
#define MLX5_TX_VEC_EN "tx_vec_en"
/* Device parameter to enable hardware Rx vector. */
@ -997,19 +1006,19 @@ mlx5_args_check(const char *key, const char *val, void *opaque)
} else if (strcmp(MLX5_RXQS_MIN_MPRQ, key) == 0) {
config->mprq.min_rxqs_num = tmp;
} else if (strcmp(MLX5_TXQ_INLINE, key) == 0) {
config->txq_inline = tmp;
DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
} else if (strcmp(MLX5_TXQS_MIN_INLINE, key) == 0) {
config->txqs_inline = tmp;
} else if (strcmp(MLX5_TXQS_MAX_VEC, key) == 0) {
config->txqs_vec = tmp;
DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
} else if (strcmp(MLX5_TXQ_MPW_EN, key) == 0) {
config->mps = !!tmp;
} else if (strcmp(MLX5_TXQ_MPW_HDR_DSEG_EN, key) == 0) {
config->mpw_hdr_dseg = !!tmp;
DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
} else if (strcmp(MLX5_TXQ_MAX_INLINE_LEN, key) == 0) {
config->inline_max_packet_sz = tmp;
DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
} else if (strcmp(MLX5_TX_VEC_EN, key) == 0) {
config->tx_vec_en = !!tmp;
DRV_LOG(WARNING, "%s: deprecated parameter, ignored", key);
} else if (strcmp(MLX5_RX_VEC_EN, key) == 0) {
config->rx_vec_en = !!tmp;
} else if (strcmp(MLX5_L3_VXLAN_EN, key) == 0) {
@ -2016,12 +2025,8 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
dev_config = (struct mlx5_dev_config){
.hw_padding = 0,
.mps = MLX5_ARG_UNSET,
.tx_vec_en = 1,
.rx_vec_en = 1,
.txq_inline = MLX5_ARG_UNSET,
.txqs_inline = MLX5_ARG_UNSET,
.txqs_vec = MLX5_ARG_UNSET,
.inline_max_packet_sz = MLX5_ARG_UNSET,
.vf_nl_en = 1,
.mr_ext_memseg_en = 1,
.mprq = {
@ -2034,9 +2039,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
};
/* Device specific configuration. */
switch (pci_dev->id.device_id) {
case PCI_DEVICE_ID_MELLANOX_CONNECTX5BF:
dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS_BLUEFIELD;
break;
case PCI_DEVICE_ID_MELLANOX_CONNECTX4VF:
case PCI_DEVICE_ID_MELLANOX_CONNECTX4LXVF:
case PCI_DEVICE_ID_MELLANOX_CONNECTX5VF:
@ -2046,9 +2048,6 @@ mlx5_pci_probe(struct rte_pci_driver *pci_drv __rte_unused,
default:
break;
}
/* Set architecture-dependent default value if unset. */
if (dev_config.txqs_vec == MLX5_ARG_UNSET)
dev_config.txqs_vec = MLX5_VPMD_MAX_TXQS;
for (i = 0; i != ns; ++i) {
uint32_t restore;

View File

@ -198,9 +198,7 @@ struct mlx5_dev_config {
unsigned int cqe_comp:1; /* CQE compression is enabled. */
unsigned int cqe_pad:1; /* CQE padding is enabled. */
unsigned int tso:1; /* Whether TSO is supported. */
unsigned int tx_vec_en:1; /* Tx vector is enabled. */
unsigned int rx_vec_en:1; /* Rx vector is enabled. */
unsigned int mpw_hdr_dseg:1; /* Enable DSEGs in the title WQEBB. */
unsigned int mr_ext_memseg_en:1;
/* Whether memseg should be extended for MR creation. */
unsigned int l3_vxlan_en:1; /* Enable L3 VXLAN flow creation. */
@ -224,10 +222,7 @@ struct mlx5_dev_config {
unsigned int tso_max_payload_sz; /* Maximum TCP payload for TSO. */
unsigned int ind_table_max_size; /* Maximum indirection table size. */
unsigned int max_dump_files_num; /* Maximum dump files per queue. */
int txq_inline; /* Maximum packet size for inlining. */
int txqs_inline; /* Queue number threshold for inlining. */
int txqs_vec; /* Queue number threshold for vectorized Tx. */
int inline_max_packet_sz; /* Max packet size for inlining. */
struct mlx5_hca_attr hca_attr; /* HCA attributes. */
};

View File

@ -60,15 +60,6 @@
/* Maximum Packet headers size (L2+L3+L4) for TSO. */
#define MLX5_MAX_TSO_HEADER 192
/* Default maximum number of Tx queues for vectorized Tx. */
#if defined(RTE_ARCH_ARM64)
#define MLX5_VPMD_MAX_TXQS 8
#define MLX5_VPMD_MAX_TXQS_BLUEFIELD 16
#else
#define MLX5_VPMD_MAX_TXQS 4
#define MLX5_VPMD_MAX_TXQS_BLUEFIELD MLX5_VPMD_MAX_TXQS
#endif
/* Threshold of buffer replenishment for vectorized Rx. */
#define MLX5_VPMD_RXQ_RPLNSH_THRESH(n) \
(RTE_MIN(MLX5_VPMD_RX_MAX_BURST, (unsigned int)(n) >> 2))
@ -76,13 +67,6 @@
/* Maximum size of burst for vectorized Rx. */
#define MLX5_VPMD_RX_MAX_BURST 64U
/*
* Maximum size of burst for vectorized Tx. This is related to the maximum size
* of Enhanced MPW (eMPW) WQE as vectorized Tx is supported with eMPW.
* Careful when changing, large value can cause WQE DS to overlap.
*/
#define MLX5_VPMD_TX_MAX_BURST 32U
/* Number of packets vectorized Rx can simultaneously process in a loop. */
#define MLX5_VPMD_DESCS_PER_LOOP 4

View File

@ -1652,64 +1652,6 @@ mlx5_set_link_up(struct rte_eth_dev *dev)
return mlx5_set_flags(dev, ~IFF_UP, IFF_UP);
}
/**
* Configure the TX function to use.
*
* @param dev
* Pointer to private data structure.
*
* @return
* Pointer to selected Tx burst function.
*/
eth_tx_burst_t
mlx5_select_tx_function(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
eth_tx_burst_t tx_pkt_burst = mlx5_tx_burst;
struct mlx5_dev_config *config = &priv->config;
uint64_t tx_offloads = dev->data->dev_conf.txmode.offloads;
int tso = !!(tx_offloads & (DEV_TX_OFFLOAD_TCP_TSO |
DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
DEV_TX_OFFLOAD_GRE_TNL_TSO |
DEV_TX_OFFLOAD_IP_TNL_TSO |
DEV_TX_OFFLOAD_UDP_TNL_TSO));
int swp = !!(tx_offloads & (DEV_TX_OFFLOAD_IP_TNL_TSO |
DEV_TX_OFFLOAD_UDP_TNL_TSO |
DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM));
int vlan_insert = !!(tx_offloads & DEV_TX_OFFLOAD_VLAN_INSERT);
assert(priv != NULL);
/* Select appropriate TX function. */
if (vlan_insert || tso || swp)
return tx_pkt_burst;
if (config->mps == MLX5_MPW_ENHANCED) {
if (mlx5_check_vec_tx_support(dev) > 0) {
if (mlx5_check_raw_vec_tx_support(dev) > 0)
tx_pkt_burst = mlx5_tx_burst_raw_vec;
else
tx_pkt_burst = mlx5_tx_burst_vec;
DRV_LOG(DEBUG,
"port %u selected enhanced MPW Tx vectorized"
" function",
dev->data->port_id);
} else {
tx_pkt_burst = mlx5_tx_burst_empw;
DRV_LOG(DEBUG,
"port %u selected enhanced MPW Tx function",
dev->data->port_id);
}
} else if (config->mps && (config->txq_inline > 0)) {
tx_pkt_burst = mlx5_tx_burst_mpw_inline;
DRV_LOG(DEBUG, "port %u selected MPW inline Tx function",
dev->data->port_id);
} else if (config->mps) {
tx_pkt_burst = mlx5_tx_burst_mpw;
DRV_LOG(DEBUG, "port %u selected MPW Tx function",
dev->data->port_id);
}
return tx_pkt_burst;
}
/**
* Configure the RX function to use.
*

View File

@ -39,32 +39,12 @@
/* Invalidate a CQE. */
#define MLX5_CQE_INVALIDATE (MLX5_CQE_INVALID << 4)
/* Maximum number of packets a multi-packet WQE can handle. */
#define MLX5_MPW_DSEG_MAX 5
/* WQE DWORD size */
#define MLX5_WQE_DWORD_SIZE 16
/* WQE size */
#define MLX5_WQE_SIZE (4 * MLX5_WQE_DWORD_SIZE)
/* Max size of a WQE session. */
#define MLX5_WQE_SIZE_MAX 960U
/* Compute the number of DS. */
#define MLX5_WQE_DS(n) \
(((n) + MLX5_WQE_DWORD_SIZE - 1) / MLX5_WQE_DWORD_SIZE)
/* Room for inline data in multi-packet WQE. */
#define MLX5_MWQE64_INL_DATA 28
/* Default minimum number of Tx queues for inlining packets. */
#define MLX5_EMPW_MIN_TXQS 8
/* Default max packet length to be inlined. */
#define MLX5_EMPW_MAX_INLINE_LEN (4U * MLX5_WQE_SIZE)
#define MLX5_OPC_MOD_ENHANCED_MPSW 0
#define MLX5_OPCODE_ENHANCED_MPSW 0x29
@ -164,47 +144,11 @@ enum mlx5_completion_mode {
MLX5_COMP_CQE_AND_EQE = 0x3,
};
/* Subset of struct mlx5_wqe_eth_seg. */
struct mlx5_wqe_eth_seg_small {
uint32_t rsvd0;
uint8_t cs_flags;
uint8_t rsvd1;
uint16_t mss;
uint32_t flow_table_metadata;
uint16_t inline_hdr_sz;
uint8_t inline_hdr[2];
} __rte_aligned(MLX5_WQE_DWORD_SIZE);
struct mlx5_wqe_inl_small {
uint32_t byte_cnt;
uint8_t raw;
} __rte_aligned(MLX5_WQE_DWORD_SIZE);
struct mlx5_wqe_ctrl {
uint32_t ctrl0;
uint32_t ctrl1;
uint32_t ctrl2;
uint32_t ctrl3;
} __rte_aligned(MLX5_WQE_DWORD_SIZE);
/* Small common part of the WQE. */
struct mlx5_wqe {
uint32_t ctrl[4];
struct mlx5_wqe_eth_seg_small eseg;
};
/* Vectorize WQE header. */
struct mlx5_wqe_v {
rte_v128u32_t ctrl;
rte_v128u32_t eseg;
};
/* WQE. */
struct mlx5_wqe64 {
struct mlx5_wqe hdr;
uint8_t raw[32];
} __rte_aligned(MLX5_WQE_SIZE);
/* MPW mode. */
enum mlx5_mpw_mode {
MLX5_MPW_DISABLED,
@ -212,27 +156,6 @@ enum mlx5_mpw_mode {
MLX5_MPW_ENHANCED, /* Enhanced Multi-Packet Send WQE, a.k.a MPWv2. */
};
/* MPW session status. */
enum mlx5_mpw_state {
MLX5_MPW_STATE_OPENED,
MLX5_MPW_INL_STATE_OPENED,
MLX5_MPW_ENHANCED_STATE_OPENED,
MLX5_MPW_STATE_CLOSED,
};
/* MPW session descriptor. */
struct mlx5_mpw {
enum mlx5_mpw_state state;
unsigned int pkts_n;
unsigned int len;
unsigned int total_len;
volatile struct mlx5_wqe *wqe;
union {
volatile struct mlx5_wqe_data_seg *dseg[MLX5_MPW_DSEG_MAX];
volatile uint8_t *raw;
} data;
};
/* WQE for Multi-Packet RQ. */
struct mlx5_wqe_mprq {
struct mlx5_wqe_srq_next_seg next_seg;

File diff suppressed because it is too large Load Diff

View File

@ -329,14 +329,6 @@ extern uint8_t mlx5_swp_types_table[];
void mlx5_set_ptype_table(void);
void mlx5_set_cksum_table(void);
void mlx5_set_swp_types_table(void);
uint16_t mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_tx_burst_empw(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
__rte_noinline uint16_t mlx5_tx_error_cqe_handle(struct mlx5_txq_data *txq,
volatile struct mlx5_err_cqe *err_cqe);
uint16_t mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n);
@ -360,14 +352,8 @@ int mlx5_queue_state_modify_primary(struct rte_eth_dev *dev,
const struct mlx5_mp_arg_queue_state_modify *sm);
/* Vectorized version of mlx5_rxtx.c */
int mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev);
int mlx5_check_vec_tx_support(struct rte_eth_dev *dev);
int mlx5_rxq_check_vec_support(struct mlx5_rxq_data *rxq_data);
int mlx5_check_vec_rx_support(struct rte_eth_dev *dev);
uint16_t mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
uint16_t mlx5_rx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n);
@ -477,122 +463,6 @@ check_cqe(volatile struct mlx5_cqe *cqe, const uint16_t cqes_n,
return MLX5_CQE_STATUS_SW_OWN;
}
/**
* Return the address of the WQE.
*
* @param txq
* Pointer to TX queue structure.
* @param wqe_ci
* WQE consumer index.
*
* @return
* WQE address.
*/
static inline uintptr_t *
tx_mlx5_wqe(struct mlx5_txq_data *txq, uint16_t ci)
{
ci &= ((1 << txq->wqe_n) - 1);
return (uintptr_t *)((uintptr_t)txq->wqes + ci * MLX5_WQE_SIZE);
}
/**
* Handle the next CQE.
*
* @param txq
* Pointer to TX queue structure.
*
* @return
* The last Tx buffer element to free.
*/
static __rte_always_inline uint16_t
mlx5_tx_cqe_handle(struct mlx5_txq_data *txq)
{
const unsigned int cqe_n = 1 << txq->cqe_n;
const unsigned int cqe_cnt = cqe_n - 1;
uint16_t last_elts;
union {
volatile struct mlx5_cqe *cqe;
volatile struct mlx5_err_cqe *err_cqe;
} u = {
.cqe = &(*txq->cqes)[txq->cq_ci & cqe_cnt],
};
int ret = check_cqe(u.cqe, cqe_n, txq->cq_ci);
if (unlikely(ret != MLX5_CQE_STATUS_SW_OWN)) {
if (unlikely(ret == MLX5_CQE_STATUS_ERR))
last_elts = mlx5_tx_error_cqe_handle(txq, u.err_cqe);
else
/* Do not release buffers. */
return txq->elts_tail;
} else {
uint16_t new_wqe_pi = rte_be_to_cpu_16(u.cqe->wqe_counter);
volatile struct mlx5_wqe_ctrl *ctrl =
(volatile struct mlx5_wqe_ctrl *)
tx_mlx5_wqe(txq, new_wqe_pi);
/* Release completion burst buffers. */
last_elts = ctrl->ctrl3;
txq->wqe_pi = new_wqe_pi;
txq->cq_ci++;
}
rte_compiler_barrier();
*txq->cq_db = rte_cpu_to_be_32(txq->cq_ci);
return last_elts;
}
/**
* Manage TX completions.
*
* When sending a burst, mlx5_tx_burst() posts several WRs.
*
* @param txq
* Pointer to TX queue structure.
*/
static __rte_always_inline void
mlx5_tx_complete(struct mlx5_txq_data *txq)
{
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
uint16_t elts_free = txq->elts_tail;
uint16_t elts_tail;
struct rte_mbuf *m, *free[elts_n];
struct rte_mempool *pool = NULL;
unsigned int blk_n = 0;
elts_tail = mlx5_tx_cqe_handle(txq);
assert((elts_tail & elts_m) < (1 << txq->wqe_n));
/* Free buffers. */
while (elts_free != elts_tail) {
m = rte_pktmbuf_prefree_seg((*txq->elts)[elts_free++ & elts_m]);
if (likely(m != NULL)) {
if (likely(m->pool == pool)) {
free[blk_n++] = m;
} else {
if (likely(pool != NULL))
rte_mempool_put_bulk(pool,
(void *)free,
blk_n);
free[0] = m;
pool = m->pool;
blk_n = 1;
}
}
}
if (blk_n)
rte_mempool_put_bulk(pool, (void *)free, blk_n);
#ifndef NDEBUG
elts_free = txq->elts_tail;
/* Poisoning. */
while (elts_free != elts_tail) {
memset(&(*txq->elts)[elts_free & elts_m],
0x66,
sizeof((*txq->elts)[elts_free & elts_m]));
++elts_free;
}
#endif
txq->elts_tail = elts_tail;
}
/**
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which the
* cloned mbuf is allocated is returned instead.
@ -710,147 +580,4 @@ mlx5_tx_dbrec(struct mlx5_txq_data *txq, volatile struct mlx5_wqe *wqe)
mlx5_tx_dbrec_cond_wmb(txq, wqe, 1);
}
/**
* Convert mbuf to Verb SWP.
*
* @param txq_data
* Pointer to the Tx queue.
* @param buf
* Pointer to the mbuf.
* @param offsets
* Pointer to the SWP header offsets.
* @param swp_types
* Pointer to the SWP header types.
*/
static __rte_always_inline void
txq_mbuf_to_swp(struct mlx5_txq_data *txq, struct rte_mbuf *buf,
uint8_t *offsets, uint8_t *swp_types)
{
const uint64_t vlan = buf->ol_flags & PKT_TX_VLAN_PKT;
const uint64_t tunnel = buf->ol_flags & PKT_TX_TUNNEL_MASK;
const uint64_t tso = buf->ol_flags & PKT_TX_TCP_SEG;
const uint64_t csum_flags = buf->ol_flags & PKT_TX_L4_MASK;
const uint64_t inner_ip =
buf->ol_flags & (PKT_TX_IPV4 | PKT_TX_IPV6);
const uint64_t ol_flags_mask = PKT_TX_L4_MASK | PKT_TX_IPV6 |
PKT_TX_OUTER_IPV6;
uint16_t idx;
uint16_t off;
if (likely(!txq->swp_en || (tunnel != PKT_TX_TUNNEL_UDP &&
tunnel != PKT_TX_TUNNEL_IP)))
return;
/*
* The index should have:
* bit[0:1] = PKT_TX_L4_MASK
* bit[4] = PKT_TX_IPV6
* bit[8] = PKT_TX_OUTER_IPV6
* bit[9] = PKT_TX_OUTER_UDP
*/
idx = (buf->ol_flags & ol_flags_mask) >> 52;
if (tunnel == PKT_TX_TUNNEL_UDP)
idx |= 1 << 9;
*swp_types = mlx5_swp_types_table[idx];
/*
* Set offsets for SW parser. Since ConnectX-5, SW parser just
* complements HW parser. SW parser starts to engage only if HW parser
* can't reach a header. For the older devices, HW parser will not kick
* in if any of SWP offsets is set. Therefore, all of the L3 offsets
* should be set regardless of HW offload.
*/
off = buf->outer_l2_len + (vlan ? sizeof(struct rte_vlan_hdr) : 0);
offsets[1] = off >> 1; /* Outer L3 offset. */
off += buf->outer_l3_len;
if (tunnel == PKT_TX_TUNNEL_UDP)
offsets[0] = off >> 1; /* Outer L4 offset. */
if (inner_ip) {
off += buf->l2_len;
offsets[3] = off >> 1; /* Inner L3 offset. */
if (csum_flags == PKT_TX_TCP_CKSUM || tso ||
csum_flags == PKT_TX_UDP_CKSUM) {
off += buf->l3_len;
offsets[2] = off >> 1; /* Inner L4 offset. */
}
}
}
/**
* Convert the Checksum offloads to Verbs.
*
* @param buf
* Pointer to the mbuf.
*
* @return
* Converted checksum flags.
*/
static __rte_always_inline uint8_t
txq_ol_cksum_to_cs(struct rte_mbuf *buf)
{
uint32_t idx;
uint8_t is_tunnel = !!(buf->ol_flags & PKT_TX_TUNNEL_MASK);
const uint64_t ol_flags_mask = PKT_TX_TCP_SEG | PKT_TX_L4_MASK |
PKT_TX_IP_CKSUM | PKT_TX_OUTER_IP_CKSUM;
/*
* The index should have:
* bit[0] = PKT_TX_TCP_SEG
* bit[2:3] = PKT_TX_UDP_CKSUM, PKT_TX_TCP_CKSUM
* bit[4] = PKT_TX_IP_CKSUM
* bit[8] = PKT_TX_OUTER_IP_CKSUM
* bit[9] = tunnel
*/
idx = ((buf->ol_flags & ol_flags_mask) >> 50) | (!!is_tunnel << 9);
return mlx5_cksum_table[idx];
}
/**
* Count the number of contiguous single segment packets.
*
* @param pkts
* Pointer to array of packets.
* @param pkts_n
* Number of packets.
*
* @return
* Number of contiguous single segment packets.
*/
static __rte_always_inline unsigned int
txq_count_contig_single_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
{
unsigned int pos;
if (!pkts_n)
return 0;
/* Count the number of contiguous single segment packets. */
for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) > 1)
break;
return pos;
}
/**
* Count the number of contiguous multi-segment packets.
*
* @param pkts
* Pointer to array of packets.
* @param pkts_n
* Number of packets.
*
* @return
* Number of contiguous multi-segment packets.
*/
static __rte_always_inline unsigned int
txq_count_contig_multi_seg(struct rte_mbuf **pkts, uint16_t pkts_n)
{
unsigned int pos;
if (!pkts_n)
return 0;
/* Count the number of contiguous multi-segment packets. */
for (pos = 0; pos < pkts_n; ++pos)
if (NB_SEGS(pkts[pos]) == 1)
break;
return pos;
}
#endif /* RTE_PMD_MLX5_RXTX_H_ */

View File

@ -39,138 +39,6 @@
#error "This should not be compiled if SIMD instructions are not supported."
#endif
/**
* Count the number of packets having same ol_flags and same metadata (if
* PKT_TX_METADATA is set in ol_flags), and calculate cs_flags.
*
* @param pkts
* Pointer to array of packets.
* @param pkts_n
* Number of packets.
* @param cs_flags
* Pointer of flags to be returned.
* @param metadata
* Pointer of metadata to be returned.
* @param txq_offloads
* Offloads enabled on Tx queue
*
* @return
* Number of packets having same ol_flags and metadata, if relevant.
*/
static inline unsigned int
txq_calc_offload(struct rte_mbuf **pkts, uint16_t pkts_n, uint8_t *cs_flags,
rte_be32_t *metadata, const uint64_t txq_offloads)
{
unsigned int pos;
const uint64_t cksum_ol_mask =
PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_TUNNEL_GRE |
PKT_TX_TUNNEL_VXLAN | PKT_TX_OUTER_IP_CKSUM;
rte_be32_t p0_metadata, pn_metadata;
if (!pkts_n)
return 0;
p0_metadata = pkts[0]->ol_flags & PKT_TX_METADATA ?
pkts[0]->tx_metadata : 0;
/* Count the number of packets having same offload parameters. */
for (pos = 1; pos < pkts_n; ++pos) {
/* Check if packet has same checksum flags. */
if ((txq_offloads & MLX5_VEC_TX_CKSUM_OFFLOAD_CAP) &&
((pkts[pos]->ol_flags ^ pkts[0]->ol_flags) & cksum_ol_mask))
break;
/* Check if packet has same metadata. */
if (txq_offloads & DEV_TX_OFFLOAD_MATCH_METADATA) {
pn_metadata = pkts[pos]->ol_flags & PKT_TX_METADATA ?
pkts[pos]->tx_metadata : 0;
if (pn_metadata != p0_metadata)
break;
}
}
*cs_flags = txq_ol_cksum_to_cs(pkts[0]);
*metadata = p0_metadata;
return pos;
}
/**
* DPDK callback for vectorized TX.
*
* @param dpdk_txq
* Generic pointer to TX queue structure.
* @param[in] pkts
* Packets to transmit.
* @param pkts_n
* Number of packets in array.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
uint16_t
mlx5_tx_burst_raw_vec(void *dpdk_txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
{
struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t nb_tx = 0;
while (pkts_n > nb_tx) {
uint16_t n;
uint16_t ret;
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
ret = txq_burst_v(txq, &pkts[nb_tx], n, 0, 0);
nb_tx += ret;
if (!ret)
break;
}
return nb_tx;
}
/**
* DPDK callback for vectorized TX with multi-seg packets and offload.
*
* @param dpdk_txq
* Generic pointer to TX queue structure.
* @param[in] pkts
* Packets to transmit.
* @param pkts_n
* Number of packets in array.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
uint16_t
mlx5_tx_burst_vec(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
{
struct mlx5_txq_data *txq = (struct mlx5_txq_data *)dpdk_txq;
uint16_t nb_tx = 0;
while (pkts_n > nb_tx) {
uint8_t cs_flags = 0;
uint16_t n;
uint16_t ret;
rte_be32_t metadata = 0;
/* Transmit multi-seg packets in the head of pkts list. */
if ((txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS) &&
NB_SEGS(pkts[nb_tx]) > 1)
nb_tx += txq_scatter_v(txq,
&pkts[nb_tx],
pkts_n - nb_tx);
n = RTE_MIN((uint16_t)(pkts_n - nb_tx), MLX5_VPMD_TX_MAX_BURST);
if (txq->offloads & DEV_TX_OFFLOAD_MULTI_SEGS)
n = txq_count_contig_single_seg(&pkts[nb_tx], n);
if (txq->offloads & (MLX5_VEC_TX_CKSUM_OFFLOAD_CAP |
DEV_TX_OFFLOAD_MATCH_METADATA))
n = txq_calc_offload(&pkts[nb_tx], n,
&cs_flags, &metadata,
txq->offloads);
ret = txq_burst_v(txq, &pkts[nb_tx], n, cs_flags, metadata);
nb_tx += ret;
if (!ret)
break;
}
return nb_tx;
}
/**
* Skip error packets.
*
@ -242,49 +110,6 @@ mlx5_rx_burst_vec(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
return nb_rx;
}
/**
* Check Tx queue flags are set for raw vectorized Tx.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 1 if supported, negative errno value if not.
*/
int __attribute__((cold))
mlx5_check_raw_vec_tx_support(struct rte_eth_dev *dev)
{
uint64_t offloads = dev->data->dev_conf.txmode.offloads;
/* Doesn't support any offload. */
if (offloads)
return -ENOTSUP;
return 1;
}
/**
* Check a device can support vectorized TX.
*
* @param dev
* Pointer to Ethernet device.
*
* @return
* 1 if supported, negative errno value if not.
*/
int __attribute__((cold))
mlx5_check_vec_tx_support(struct rte_eth_dev *dev)
{
struct mlx5_priv *priv = dev->data->dev_private;
uint64_t offloads = dev->data->dev_conf.txmode.offloads;
if (!priv->config.tx_vec_en ||
priv->txqs_n > (unsigned int)priv->config.txqs_vec ||
priv->config.mps != MLX5_MPW_ENHANCED ||
offloads & ~MLX5_VEC_TX_OFFLOAD_CAP)
return -ENOTSUP;
return 1;
}
/**
* Check a RX queue can support vectorized RX.
*

View File

@ -26,295 +26,6 @@
#pragma GCC diagnostic ignored "-Wcast-qual"
/**
* Fill in buffer descriptors in a multi-packet send descriptor.
*
* @param txq
* Pointer to TX queue structure.
* @param dseg
* Pointer to buffer descriptor to be written.
* @param pkts
* Pointer to array of packets to be sent.
* @param n
* Number of packets to be filled.
*/
static inline void
txq_wr_dseg_v(struct mlx5_txq_data *txq, uint8_t *dseg,
struct rte_mbuf **pkts, unsigned int n)
{
unsigned int pos;
uintptr_t addr;
const uint8x16_t dseg_shuf_m = {
3, 2, 1, 0, /* length, bswap32 */
4, 5, 6, 7, /* lkey */
15, 14, 13, 12, /* addr, bswap64 */
11, 10, 9, 8
};
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t tx_byte = 0;
#endif
for (pos = 0; pos < n; ++pos, dseg += MLX5_WQE_DWORD_SIZE) {
uint8x16_t desc;
struct rte_mbuf *pkt = pkts[pos];
addr = rte_pktmbuf_mtod(pkt, uintptr_t);
desc = vreinterpretq_u8_u32((uint32x4_t) {
DATA_LEN(pkt),
mlx5_tx_mb2mr(txq, pkt),
addr,
addr >> 32 });
desc = vqtbl1q_u8(desc, dseg_shuf_m);
vst1q_u8(dseg, desc);
#ifdef MLX5_PMD_SOFT_COUNTERS
tx_byte += DATA_LEN(pkt);
#endif
}
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.obytes += tx_byte;
#endif
}
/**
* Send multi-segmented packets until it encounters a single segment packet in
* the pkts list.
*
* @param txq
* Pointer to TX queue structure.
* @param pkts
* Pointer to array of packets to be sent.
* @param pkts_n
* Number of packets to be sent.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static uint16_t
txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
{
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
const unsigned int nb_dword_per_wqebb =
MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
const unsigned int nb_dword_in_hdr =
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n;
volatile struct mlx5_wqe *wqe = NULL;
bool metadata_ol =
txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false;
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
if (unlikely(!pkts_n))
return 0;
for (n = 0; n < pkts_n; ++n) {
struct rte_mbuf *buf = pkts[n];
unsigned int segs_n = buf->nb_segs;
unsigned int ds = nb_dword_in_hdr;
unsigned int len = PKT_LEN(buf);
uint16_t wqe_ci = txq->wqe_ci;
const uint8x16_t ctrl_shuf_m = {
3, 2, 1, 0, /* bswap32 */
7, 6, 5, 4, /* bswap32 */
11, 10, 9, 8, /* bswap32 */
12, 13, 14, 15
};
uint8_t cs_flags;
uint16_t max_elts;
uint16_t max_wqe;
uint8x16_t *t_wqe;
uint8_t *dseg;
uint8x16_t ctrl;
rte_be32_t metadata =
metadata_ol && (buf->ol_flags & PKT_TX_METADATA) ?
buf->tx_metadata : 0;
assert(segs_n);
max_elts = elts_n - (elts_head - txq->elts_tail);
max_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);
/*
* A MPW session consumes 2 WQEs at most to
* include MLX5_MPW_DSEG_MAX pointers.
*/
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
cs_flags = txq_ol_cksum_to_cs(buf);
/* Title WQEBB pointer. */
t_wqe = (uint8x16_t *)wqe;
dseg = (uint8_t *)(wqe + 1);
do {
if (!(ds++ % nb_dword_per_wqebb)) {
dseg = (uint8_t *)
&((volatile struct mlx5_wqe64 *)
txq->wqes)[++wqe_ci & wq_mask];
}
txq_wr_dseg_v(txq, dseg, &buf, 1);
dseg += MLX5_WQE_DWORD_SIZE;
(*txq->elts)[elts_head++ & elts_m] = buf;
buf = buf->next;
} while (--segs_n);
++wqe_ci;
/* Fill CTRL in the header. */
ctrl = vreinterpretq_u8_u32((uint32x4_t) {
MLX5_OPC_MOD_MPW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_TSO,
txq->qp_num_8s | ds, 4, 0});
ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
vst1q_u32((void *)(t_wqe + 1),
((uint32x4_t){ 0,
rte_cpu_to_be_16(len) << 16 | cs_flags,
metadata, 0 }));
txq->wqe_ci = wqe_ci;
}
if (!n)
return 0;
txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
txq->elts_head = elts_head;
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
MLX5_COMP_MODE_OFFSET);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += n;
#endif
mlx5_tx_dbrec(txq, wqe);
return n;
}
/**
* Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
* it returns to make it processed by txq_scatter_v(). All the packets in
* the pkts list should be single segment packets having same offload flags.
* This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
*
* @param txq
* Pointer to TX queue structure.
* @param pkts
* Pointer to array of packets to be sent.
* @param pkts_n
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
* @param metadata
* Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
uint8_t cs_flags, rte_be32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const unsigned int nb_dword_per_wqebb =
MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
const unsigned int nb_dword_in_hdr =
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n = 0;
unsigned int pos;
uint16_t max_elts;
uint16_t max_wqe;
uint32_t comp_req;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
uint16_t wq_idx = txq->wqe_ci & wq_mask;
volatile struct mlx5_wqe64 *wq =
&((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];
volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;
const uint8x16_t ctrl_shuf_m = {
3, 2, 1, 0, /* bswap32 */
7, 6, 5, 4, /* bswap32 */
11, 10, 9, 8, /* bswap32 */
12, 13, 14, 15
};
uint8x16_t *t_wqe;
uint8_t *dseg;
uint8x16_t ctrl;
/* Make sure all packets can fit into a single WQE. */
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
/* Loop for available tailroom first. */
n = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);
for (pos = 0; pos < (n & -2); pos += 2)
vst1q_u64((void *)&elts[pos], vld1q_u64((void *)&pkts[pos]));
if (n & 1)
elts[pos] = pkts[pos];
/* Check if it crosses the end of the queue. */
if (unlikely(n < pkts_n)) {
elts = &(*txq->elts)[0];
for (pos = 0; pos < pkts_n - n; ++pos)
elts[pos] = pkts[n + pos];
}
txq->elts_head += pkts_n;
/* Save title WQEBB pointer. */
t_wqe = (uint8x16_t *)wqe;
dseg = (uint8_t *)(wqe + 1);
/* Calculate the number of entries to the end. */
n = RTE_MIN(
(wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,
pkts_n);
/* Fill DSEGs. */
txq_wr_dseg_v(txq, dseg, pkts, n);
/* Check if it crosses the end of the queue. */
if (n < pkts_n) {
dseg = (uint8_t *)txq->wqes;
txq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);
}
if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
txq->elts_comp += pkts_n;
comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
} else {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request a completion. */
txq->elts_comp = 0;
comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
}
/* Fill CTRL in the header. */
ctrl = vreinterpretq_u8_u32((uint32x4_t) {
MLX5_OPC_MOD_ENHANCED_MPSW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW,
txq->qp_num_8s | (pkts_n + 2),
comp_req,
txq->elts_head });
ctrl = vqtbl1q_u8(ctrl, ctrl_shuf_m);
vst1q_u8((void *)t_wqe, ctrl);
/* Fill ESEG in the header. */
vst1q_u32((void *)(t_wqe + 1),
((uint32x4_t) { 0, cs_flags, metadata, 0 }));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
nb_dword_per_wqebb;
/* Ring QP doorbell. */
mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
return pkts_n;
}
/**
* Store free buffers to RX SW ring.
*

View File

@ -28,290 +28,6 @@
#pragma GCC diagnostic ignored "-Wcast-qual"
#endif
/**
* Fill in buffer descriptors in a multi-packet send descriptor.
*
* @param txq
* Pointer to TX queue structure.
* @param dseg
* Pointer to buffer descriptor to be written.
* @param pkts
* Pointer to array of packets to be sent.
* @param n
* Number of packets to be filled.
*/
static inline void
txq_wr_dseg_v(struct mlx5_txq_data *txq, __m128i *dseg,
struct rte_mbuf **pkts, unsigned int n)
{
unsigned int pos;
uintptr_t addr;
const __m128i shuf_mask_dseg =
_mm_set_epi8(8, 9, 10, 11, /* addr, bswap64 */
12, 13, 14, 15,
7, 6, 5, 4, /* lkey */
0, 1, 2, 3 /* length, bswap32 */);
#ifdef MLX5_PMD_SOFT_COUNTERS
uint32_t tx_byte = 0;
#endif
for (pos = 0; pos < n; ++pos, ++dseg) {
__m128i desc;
struct rte_mbuf *pkt = pkts[pos];
addr = rte_pktmbuf_mtod(pkt, uintptr_t);
desc = _mm_set_epi32(addr >> 32,
addr,
mlx5_tx_mb2mr(txq, pkt),
DATA_LEN(pkt));
desc = _mm_shuffle_epi8(desc, shuf_mask_dseg);
_mm_store_si128(dseg, desc);
#ifdef MLX5_PMD_SOFT_COUNTERS
tx_byte += DATA_LEN(pkt);
#endif
}
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.obytes += tx_byte;
#endif
}
/**
* Send multi-segmented packets until it encounters a single segment packet in
* the pkts list.
*
* @param txq
* Pointer to TX queue structure.
* @param pkts
* Pointer to array of packets to be sent.
* @param pkts_n
* Number of packets to be sent.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static uint16_t
txq_scatter_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts,
uint16_t pkts_n)
{
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
const unsigned int nb_dword_per_wqebb =
MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
const unsigned int nb_dword_in_hdr =
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n;
volatile struct mlx5_wqe *wqe = NULL;
bool metadata_ol =
txq->offloads & DEV_TX_OFFLOAD_MATCH_METADATA ? true : false;
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
if (unlikely(!pkts_n))
return 0;
for (n = 0; n < pkts_n; ++n) {
struct rte_mbuf *buf = pkts[n];
unsigned int segs_n = buf->nb_segs;
unsigned int ds = nb_dword_in_hdr;
unsigned int len = PKT_LEN(buf);
uint16_t wqe_ci = txq->wqe_ci;
const __m128i shuf_mask_ctrl =
_mm_set_epi8(15, 14, 13, 12,
8, 9, 10, 11, /* bswap32 */
4, 5, 6, 7, /* bswap32 */
0, 1, 2, 3 /* bswap32 */);
uint8_t cs_flags;
uint16_t max_elts;
uint16_t max_wqe;
__m128i *t_wqe, *dseg;
__m128i ctrl;
rte_be32_t metadata =
metadata_ol && (buf->ol_flags & PKT_TX_METADATA) ?
buf->tx_metadata : 0;
assert(segs_n);
max_elts = elts_n - (elts_head - txq->elts_tail);
max_wqe = wq_n - (txq->wqe_ci - txq->wqe_pi);
/*
* A MPW session consumes 2 WQEs at most to
* include MLX5_MPW_DSEG_MAX pointers.
*/
if (segs_n == 1 ||
max_elts < segs_n || max_wqe < 2)
break;
if (segs_n > MLX5_MPW_DSEG_MAX) {
txq->stats.oerrors++;
break;
}
wqe = &((volatile struct mlx5_wqe64 *)
txq->wqes)[wqe_ci & wq_mask].hdr;
cs_flags = txq_ol_cksum_to_cs(buf);
/* Title WQEBB pointer. */
t_wqe = (__m128i *)wqe;
dseg = (__m128i *)(wqe + 1);
do {
if (!(ds++ % nb_dword_per_wqebb)) {
dseg = (__m128i *)
&((volatile struct mlx5_wqe64 *)
txq->wqes)[++wqe_ci & wq_mask];
}
txq_wr_dseg_v(txq, dseg++, &buf, 1);
(*txq->elts)[elts_head++ & elts_m] = buf;
buf = buf->next;
} while (--segs_n);
++wqe_ci;
/* Fill CTRL in the header. */
ctrl = _mm_set_epi32(0, 4, txq->qp_num_8s | ds,
MLX5_OPC_MOD_MPW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_TSO);
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
_mm_store_si128(t_wqe + 1,
_mm_set_epi32(0, metadata,
(rte_cpu_to_be_16(len) << 16) |
cs_flags, 0));
txq->wqe_ci = wqe_ci;
}
if (!n)
return 0;
txq->elts_comp += (uint16_t)(elts_head - txq->elts_head);
txq->elts_head = elts_head;
if (txq->elts_comp >= MLX5_TX_COMP_THRESH) {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
wqe->ctrl[2] = rte_cpu_to_be_32(MLX5_COMP_ALWAYS <<
MLX5_COMP_MODE_OFFSET);
wqe->ctrl[3] = txq->elts_head;
txq->elts_comp = 0;
}
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += n;
#endif
mlx5_tx_dbrec(txq, wqe);
return n;
}
/**
* Send burst of packets with Enhanced MPW. If it encounters a multi-seg packet,
* it returns to make it processed by txq_scatter_v(). All the packets in
* the pkts list should be single segment packets having same offload flags.
* This must be checked by txq_count_contig_single_seg() and txq_calc_offload().
*
* @param txq
* Pointer to TX queue structure.
* @param pkts
* Pointer to array of packets to be sent.
* @param pkts_n
* Number of packets to be sent (<= MLX5_VPMD_TX_MAX_BURST).
* @param cs_flags
* Checksum offload flags to be written in the descriptor.
* @param metadata
* Metadata value to be written in the descriptor.
*
* @return
* Number of packets successfully transmitted (<= pkts_n).
*/
static inline uint16_t
txq_burst_v(struct mlx5_txq_data *txq, struct rte_mbuf **pkts, uint16_t pkts_n,
uint8_t cs_flags, rte_be32_t metadata)
{
struct rte_mbuf **elts;
uint16_t elts_head = txq->elts_head;
const uint16_t elts_n = 1 << txq->elts_n;
const uint16_t elts_m = elts_n - 1;
const unsigned int nb_dword_per_wqebb =
MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE;
const unsigned int nb_dword_in_hdr =
sizeof(struct mlx5_wqe) / MLX5_WQE_DWORD_SIZE;
unsigned int n = 0;
unsigned int pos;
uint16_t max_elts;
uint16_t max_wqe;
uint32_t comp_req;
const uint16_t wq_n = 1 << txq->wqe_n;
const uint16_t wq_mask = wq_n - 1;
uint16_t wq_idx = txq->wqe_ci & wq_mask;
volatile struct mlx5_wqe64 *wq =
&((volatile struct mlx5_wqe64 *)txq->wqes)[wq_idx];
volatile struct mlx5_wqe *wqe = (volatile struct mlx5_wqe *)wq;
const __m128i shuf_mask_ctrl =
_mm_set_epi8(15, 14, 13, 12,
8, 9, 10, 11, /* bswap32 */
4, 5, 6, 7, /* bswap32 */
0, 1, 2, 3 /* bswap32 */);
__m128i *t_wqe, *dseg;
__m128i ctrl;
/* Make sure all packets can fit into a single WQE. */
assert(elts_n > pkts_n);
mlx5_tx_complete(txq);
max_elts = (elts_n - (elts_head - txq->elts_tail));
max_wqe = (1u << txq->wqe_n) - (txq->wqe_ci - txq->wqe_pi);
pkts_n = RTE_MIN((unsigned int)RTE_MIN(pkts_n, max_wqe), max_elts);
assert(pkts_n <= MLX5_DSEG_MAX - nb_dword_in_hdr);
if (unlikely(!pkts_n))
return 0;
elts = &(*txq->elts)[elts_head & elts_m];
/* Loop for available tailroom first. */
n = RTE_MIN(elts_n - (elts_head & elts_m), pkts_n);
for (pos = 0; pos < (n & -2); pos += 2)
_mm_storeu_si128((__m128i *)&elts[pos],
_mm_loadu_si128((__m128i *)&pkts[pos]));
if (n & 1)
elts[pos] = pkts[pos];
/* Check if it crosses the end of the queue. */
if (unlikely(n < pkts_n)) {
elts = &(*txq->elts)[0];
for (pos = 0; pos < pkts_n - n; ++pos)
elts[pos] = pkts[n + pos];
}
txq->elts_head += pkts_n;
/* Save title WQEBB pointer. */
t_wqe = (__m128i *)wqe;
dseg = (__m128i *)(wqe + 1);
/* Calculate the number of entries to the end. */
n = RTE_MIN(
(wq_n - wq_idx) * nb_dword_per_wqebb - nb_dword_in_hdr,
pkts_n);
/* Fill DSEGs. */
txq_wr_dseg_v(txq, dseg, pkts, n);
/* Check if it crosses the end of the queue. */
if (n < pkts_n) {
dseg = (__m128i *)txq->wqes;
txq_wr_dseg_v(txq, dseg, &pkts[n], pkts_n - n);
}
if (txq->elts_comp + pkts_n < MLX5_TX_COMP_THRESH) {
txq->elts_comp += pkts_n;
comp_req = MLX5_COMP_ONLY_FIRST_ERR << MLX5_COMP_MODE_OFFSET;
} else {
/* A CQE slot must always be available. */
assert((1u << txq->cqe_n) - (txq->cq_pi++ - txq->cq_ci));
/* Request a completion. */
txq->elts_comp = 0;
comp_req = MLX5_COMP_ALWAYS << MLX5_COMP_MODE_OFFSET;
}
/* Fill CTRL in the header. */
ctrl = _mm_set_epi32(txq->elts_head, comp_req,
txq->qp_num_8s | (pkts_n + 2),
MLX5_OPC_MOD_ENHANCED_MPSW << 24 |
txq->wqe_ci << 8 | MLX5_OPCODE_ENHANCED_MPSW);
ctrl = _mm_shuffle_epi8(ctrl, shuf_mask_ctrl);
_mm_store_si128(t_wqe, ctrl);
/* Fill ESEG in the header. */
_mm_store_si128(t_wqe + 1, _mm_set_epi32(0, metadata, cs_flags, 0));
#ifdef MLX5_PMD_SOFT_COUNTERS
txq->stats.opackets += pkts_n;
#endif
txq->wqe_ci += (nb_dword_in_hdr + pkts_n + (nb_dword_per_wqebb - 1)) /
nb_dword_per_wqebb;
/* Ring QP doorbell. */
mlx5_tx_dbrec_cond_wmb(txq, wqe, pkts_n < MLX5_VPMD_TX_MAX_BURST);
return pkts_n;
}
/**
* Store free buffers to RX SW ring.
*

View File

@ -364,25 +364,6 @@ error:
return -rte_errno;
}
/**
* Check if the burst function is using eMPW.
*
* @param tx_pkt_burst
* Tx burst function pointer.
*
* @return
* 1 if the burst function is using eMPW, 0 otherwise.
*/
static int
is_empw_burst_func(eth_tx_burst_t tx_pkt_burst)
{
if (tx_pkt_burst == mlx5_tx_burst_raw_vec ||
tx_pkt_burst == mlx5_tx_burst_vec ||
tx_pkt_burst == mlx5_tx_burst_empw)
return 1;
return 0;
}
/**
* Create the Tx queue Verbs object.
*
@ -414,7 +395,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
struct mlx5dv_cq cq_info;
struct mlx5dv_obj obj;
const int desc = 1 << txq_data->elts_n;
eth_tx_burst_t tx_pkt_burst = mlx5_select_tx_function(dev);
int ret = 0;
assert(txq_data);
@ -432,8 +412,6 @@ mlx5_txq_ibv_new(struct rte_eth_dev *dev, uint16_t idx)
.comp_mask = 0,
};
cqe_n = desc / MLX5_TX_COMP_THRESH + 1;
if (is_empw_burst_func(tx_pkt_burst))
cqe_n += MLX5_TX_COMP_THRESH_INLINE_DIV;
tmpl.cq = mlx5_glue->create_cq(priv->sh->ctx, cqe_n, NULL, NULL, 0);
if (tmpl.cq == NULL) {
DRV_LOG(ERR, "port %u Tx queue %u CQ creation failure",
@ -698,93 +676,7 @@ txq_calc_wqebb_cnt(struct mlx5_txq_ctrl *txq_ctrl)
static void
txq_set_params(struct mlx5_txq_ctrl *txq_ctrl)
{
struct mlx5_priv *priv = txq_ctrl->priv;
struct mlx5_dev_config *config = &priv->config;
const unsigned int max_tso_inline =
((MLX5_MAX_TSO_HEADER + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
unsigned int txq_inline;
unsigned int txqs_inline;
unsigned int inline_max_packet_sz;
eth_tx_burst_t tx_pkt_burst =
mlx5_select_tx_function(ETH_DEV(priv));
int is_empw_func = is_empw_burst_func(tx_pkt_burst);
int tso = !!(txq_ctrl->txq.offloads & (DEV_TX_OFFLOAD_TCP_TSO |
DEV_TX_OFFLOAD_VXLAN_TNL_TSO |
DEV_TX_OFFLOAD_GRE_TNL_TSO |
DEV_TX_OFFLOAD_IP_TNL_TSO |
DEV_TX_OFFLOAD_UDP_TNL_TSO));
txq_inline = (config->txq_inline == MLX5_ARG_UNSET) ?
0 : config->txq_inline;
txqs_inline = (config->txqs_inline == MLX5_ARG_UNSET) ?
0 : config->txqs_inline;
inline_max_packet_sz =
(config->inline_max_packet_sz == MLX5_ARG_UNSET) ?
0 : config->inline_max_packet_sz;
if (is_empw_func) {
if (config->txq_inline == MLX5_ARG_UNSET)
txq_inline = MLX5_WQE_SIZE_MAX - MLX5_WQE_SIZE;
if (config->txqs_inline == MLX5_ARG_UNSET)
txqs_inline = MLX5_EMPW_MIN_TXQS;
if (config->inline_max_packet_sz == MLX5_ARG_UNSET)
inline_max_packet_sz = MLX5_EMPW_MAX_INLINE_LEN;
txq_ctrl->txq.mpw_hdr_dseg = config->mpw_hdr_dseg;
txq_ctrl->txq.inline_max_packet_sz = inline_max_packet_sz;
}
if (txq_inline && priv->txqs_n >= txqs_inline) {
unsigned int ds_cnt;
txq_ctrl->txq.max_inline =
((txq_inline + (RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE);
if (is_empw_func) {
/* To minimize the size of data set, avoid requesting
* too large WQ.
*/
txq_ctrl->max_inline_data =
((RTE_MIN(txq_inline,
inline_max_packet_sz) +
(RTE_CACHE_LINE_SIZE - 1)) /
RTE_CACHE_LINE_SIZE) * RTE_CACHE_LINE_SIZE;
} else {
txq_ctrl->max_inline_data =
txq_ctrl->txq.max_inline * RTE_CACHE_LINE_SIZE;
}
/*
* Check if the inline size is too large in a way which
* can make the WQE DS to overflow.
* Considering in calculation:
* WQE CTRL (1 DS)
* WQE ETH (1 DS)
* Inline part (N DS)
*/
ds_cnt = 2 + (txq_ctrl->txq.max_inline / MLX5_WQE_DWORD_SIZE);
if (ds_cnt > MLX5_DSEG_MAX) {
unsigned int max_inline = (MLX5_DSEG_MAX - 2) *
MLX5_WQE_DWORD_SIZE;
max_inline = max_inline - (max_inline %
RTE_CACHE_LINE_SIZE);
DRV_LOG(WARNING,
"port %u txq inline is too large (%d) setting"
" it to the maximum possible: %d\n",
PORT_ID(priv), txq_inline, max_inline);
txq_ctrl->txq.max_inline = max_inline /
RTE_CACHE_LINE_SIZE;
}
}
if (tso) {
txq_ctrl->max_tso_header = max_tso_inline * RTE_CACHE_LINE_SIZE;
txq_ctrl->txq.max_inline = RTE_MAX(txq_ctrl->txq.max_inline,
max_tso_inline);
txq_ctrl->txq.tso_en = 1;
}
txq_ctrl->txq.tunnel_en = config->tunnel_en | config->swp;
txq_ctrl->txq.swp_en = ((DEV_TX_OFFLOAD_IP_TNL_TSO |
DEV_TX_OFFLOAD_UDP_TNL_TSO |
DEV_TX_OFFLOAD_OUTER_IPV4_CKSUM) &
txq_ctrl->txq.offloads) && config->swp;
(void)txq_ctrl;
}
/**