e2f116ee3c
PMD uses only power of two number of Completion Queue Elements (aka CQE), storing the number of elements in log2 helps to reduce the size of the container to store it. Signed-off-by: Nelio Laranjeiro <nelio.laranjeiro@6wind.com>
1454 lines
38 KiB
C
1454 lines
38 KiB
C
/*-
|
|
* BSD LICENSE
|
|
*
|
|
* Copyright 2015 6WIND S.A.
|
|
* Copyright 2015 Mellanox.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
*
|
|
* * Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* * Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in
|
|
* the documentation and/or other materials provided with the
|
|
* distribution.
|
|
* * Neither the name of 6WIND S.A. nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
|
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <assert.h>
|
|
#include <stdint.h>
|
|
#include <string.h>
|
|
#include <stdlib.h>
|
|
|
|
/* Verbs header. */
|
|
/* ISO C doesn't support unnamed structs/unions, disabling -pedantic. */
|
|
#ifdef PEDANTIC
|
|
#pragma GCC diagnostic ignored "-Wpedantic"
|
|
#endif
|
|
#include <infiniband/verbs.h>
|
|
#include <infiniband/mlx5_hw.h>
|
|
#include <infiniband/arch.h>
|
|
#ifdef PEDANTIC
|
|
#pragma GCC diagnostic error "-Wpedantic"
|
|
#endif
|
|
|
|
/* DPDK headers don't like -pedantic. */
|
|
#ifdef PEDANTIC
|
|
#pragma GCC diagnostic ignored "-Wpedantic"
|
|
#endif
|
|
#include <rte_mbuf.h>
|
|
#include <rte_mempool.h>
|
|
#include <rte_prefetch.h>
|
|
#include <rte_common.h>
|
|
#include <rte_branch_prediction.h>
|
|
#include <rte_ether.h>
|
|
#ifdef PEDANTIC
|
|
#pragma GCC diagnostic error "-Wpedantic"
|
|
#endif
|
|
|
|
#include "mlx5.h"
|
|
#include "mlx5_utils.h"
|
|
#include "mlx5_rxtx.h"
|
|
#include "mlx5_autoconf.h"
|
|
#include "mlx5_defs.h"
|
|
#include "mlx5_prm.h"
|
|
|
|
#ifndef NDEBUG
|
|
|
|
/**
|
|
* Verify or set magic value in CQE.
|
|
*
|
|
* @param cqe
|
|
* Pointer to CQE.
|
|
*
|
|
* @return
|
|
* 0 the first time.
|
|
*/
|
|
static inline int
|
|
check_cqe64_seen(volatile struct mlx5_cqe64 *cqe)
|
|
{
|
|
static const uint8_t magic[] = "seen";
|
|
volatile uint8_t (*buf)[sizeof(cqe->rsvd40)] = &cqe->rsvd40;
|
|
int ret = 1;
|
|
unsigned int i;
|
|
|
|
for (i = 0; i < sizeof(magic) && i < sizeof(*buf); ++i)
|
|
if (!ret || (*buf)[i] != magic[i]) {
|
|
ret = 0;
|
|
(*buf)[i] = magic[i];
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#endif /* NDEBUG */
|
|
|
|
static inline int
|
|
check_cqe64(volatile struct mlx5_cqe64 *cqe,
|
|
unsigned int cqes_n, const uint16_t ci)
|
|
__attribute__((always_inline));
|
|
|
|
/**
|
|
* Check whether CQE is valid.
|
|
*
|
|
* @param cqe
|
|
* Pointer to CQE.
|
|
* @param cqes_n
|
|
* Size of completion queue.
|
|
* @param ci
|
|
* Consumer index.
|
|
*
|
|
* @return
|
|
* 0 on success, 1 on failure.
|
|
*/
|
|
static inline int
|
|
check_cqe64(volatile struct mlx5_cqe64 *cqe,
|
|
unsigned int cqes_n, const uint16_t ci)
|
|
{
|
|
uint16_t idx = ci & cqes_n;
|
|
uint8_t op_own = cqe->op_own;
|
|
uint8_t op_owner = MLX5_CQE_OWNER(op_own);
|
|
uint8_t op_code = MLX5_CQE_OPCODE(op_own);
|
|
|
|
if (unlikely((op_owner != (!!(idx))) || (op_code == MLX5_CQE_INVALID)))
|
|
return 1; /* No CQE. */
|
|
#ifndef NDEBUG
|
|
if ((op_code == MLX5_CQE_RESP_ERR) ||
|
|
(op_code == MLX5_CQE_REQ_ERR)) {
|
|
volatile struct mlx5_err_cqe *err_cqe = (volatile void *)cqe;
|
|
uint8_t syndrome = err_cqe->syndrome;
|
|
|
|
if ((syndrome == MLX5_CQE_SYNDROME_LOCAL_LENGTH_ERR) ||
|
|
(syndrome == MLX5_CQE_SYNDROME_REMOTE_ABORTED_ERR))
|
|
return 0;
|
|
if (!check_cqe64_seen(cqe))
|
|
ERROR("unexpected CQE error %u (0x%02x)"
|
|
" syndrome 0x%02x",
|
|
op_code, op_code, syndrome);
|
|
return 1;
|
|
} else if ((op_code != MLX5_CQE_RESP_SEND) &&
|
|
(op_code != MLX5_CQE_REQ)) {
|
|
if (!check_cqe64_seen(cqe))
|
|
ERROR("unexpected CQE opcode %u (0x%02x)",
|
|
op_code, op_code);
|
|
return 1;
|
|
}
|
|
#endif /* NDEBUG */
|
|
return 0;
|
|
}
|
|
|
|
static inline void
|
|
txq_complete(struct txq *txq) __attribute__((always_inline));
|
|
|
|
/**
|
|
* Manage TX completions.
|
|
*
|
|
* When sending a burst, mlx5_tx_burst() posts several WRs.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
*/
|
|
static inline void
|
|
txq_complete(struct txq *txq)
|
|
{
|
|
const unsigned int elts_n = 1 << txq->elts_n;
|
|
const unsigned int cqe_n = 1 << txq->cqe_n;
|
|
const unsigned int cqe_cnt = cqe_n - 1;
|
|
uint16_t elts_free = txq->elts_tail;
|
|
uint16_t elts_tail;
|
|
uint16_t cq_ci = txq->cq_ci;
|
|
volatile struct mlx5_cqe64 *cqe = NULL;
|
|
volatile struct mlx5_wqe *wqe;
|
|
|
|
do {
|
|
volatile struct mlx5_cqe64 *tmp;
|
|
|
|
tmp = &(*txq->cqes)[cq_ci & cqe_cnt].cqe64;
|
|
if (check_cqe64(tmp, cqe_n, cq_ci))
|
|
break;
|
|
cqe = tmp;
|
|
#ifndef NDEBUG
|
|
if (MLX5_CQE_FORMAT(cqe->op_own) == MLX5_COMPRESSED) {
|
|
if (!check_cqe64_seen(cqe))
|
|
ERROR("unexpected compressed CQE, TX stopped");
|
|
return;
|
|
}
|
|
if ((MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_RESP_ERR) ||
|
|
(MLX5_CQE_OPCODE(cqe->op_own) == MLX5_CQE_REQ_ERR)) {
|
|
if (!check_cqe64_seen(cqe))
|
|
ERROR("unexpected error CQE, TX stopped");
|
|
return;
|
|
}
|
|
#endif /* NDEBUG */
|
|
++cq_ci;
|
|
} while (1);
|
|
if (unlikely(cqe == NULL))
|
|
return;
|
|
wqe = &(*txq->wqes)[htons(cqe->wqe_counter) & (txq->wqe_n - 1)].hdr;
|
|
elts_tail = wqe->ctrl[3];
|
|
assert(elts_tail < txq->wqe_n);
|
|
/* Free buffers. */
|
|
while (elts_free != elts_tail) {
|
|
struct rte_mbuf *elt = (*txq->elts)[elts_free];
|
|
unsigned int elts_free_next =
|
|
(elts_free + 1) & (elts_n - 1);
|
|
struct rte_mbuf *elt_next = (*txq->elts)[elts_free_next];
|
|
|
|
#ifndef NDEBUG
|
|
/* Poisoning. */
|
|
memset(&(*txq->elts)[elts_free],
|
|
0x66,
|
|
sizeof((*txq->elts)[elts_free]));
|
|
#endif
|
|
RTE_MBUF_PREFETCH_TO_FREE(elt_next);
|
|
/* Only one segment needs to be freed. */
|
|
rte_pktmbuf_free_seg(elt);
|
|
elts_free = elts_free_next;
|
|
}
|
|
txq->cq_ci = cq_ci;
|
|
txq->elts_tail = elts_tail;
|
|
/* Update the consumer index. */
|
|
rte_wmb();
|
|
*txq->cq_db = htonl(cq_ci);
|
|
}
|
|
|
|
/**
|
|
* Get Memory Pool (MP) from mbuf. If mbuf is indirect, the pool from which
|
|
* the cloned mbuf is allocated is returned instead.
|
|
*
|
|
* @param buf
|
|
* Pointer to mbuf.
|
|
*
|
|
* @return
|
|
* Memory pool where data is located for given mbuf.
|
|
*/
|
|
static struct rte_mempool *
|
|
txq_mb2mp(struct rte_mbuf *buf)
|
|
{
|
|
if (unlikely(RTE_MBUF_INDIRECT(buf)))
|
|
return rte_mbuf_from_indirect(buf)->pool;
|
|
return buf->pool;
|
|
}
|
|
|
|
static inline uint32_t
|
|
txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
|
|
__attribute__((always_inline));
|
|
|
|
/**
|
|
* Get Memory Region (MR) <-> Memory Pool (MP) association from txq->mp2mr[].
|
|
* Add MP to txq->mp2mr[] if it's not registered yet. If mp2mr[] is full,
|
|
* remove an entry first.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param[in] mp
|
|
* Memory Pool for which a Memory Region lkey must be returned.
|
|
*
|
|
* @return
|
|
* mr->lkey on success, (uint32_t)-1 on failure.
|
|
*/
|
|
static inline uint32_t
|
|
txq_mp2mr(struct txq *txq, struct rte_mempool *mp)
|
|
{
|
|
unsigned int i;
|
|
uint32_t lkey = (uint32_t)-1;
|
|
|
|
for (i = 0; (i != RTE_DIM(txq->mp2mr)); ++i) {
|
|
if (unlikely(txq->mp2mr[i].mp == NULL)) {
|
|
/* Unknown MP, add a new MR for it. */
|
|
break;
|
|
}
|
|
if (txq->mp2mr[i].mp == mp) {
|
|
assert(txq->mp2mr[i].lkey != (uint32_t)-1);
|
|
assert(htonl(txq->mp2mr[i].mr->lkey) ==
|
|
txq->mp2mr[i].lkey);
|
|
lkey = txq->mp2mr[i].lkey;
|
|
break;
|
|
}
|
|
}
|
|
if (unlikely(lkey == (uint32_t)-1))
|
|
lkey = txq_mp2mr_reg(txq, mp, i);
|
|
return lkey;
|
|
}
|
|
|
|
/**
|
|
* Write a regular WQE.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param wqe
|
|
* Pointer to the WQE to fill.
|
|
* @param buf
|
|
* Buffer.
|
|
* @param length
|
|
* Packet length.
|
|
*
|
|
* @return ds
|
|
* Number of DS elements consumed.
|
|
*/
|
|
static inline unsigned int
|
|
mlx5_wqe_write(struct txq *txq, volatile struct mlx5_wqe *wqe,
|
|
struct rte_mbuf *buf, uint32_t length)
|
|
{
|
|
uint8_t *raw = (uint8_t *)(uintptr_t)&wqe->eseg.inline_hdr[0];
|
|
uint16_t ds;
|
|
uint16_t pkt_inline_sz = MLX5_WQE_DWORD_SIZE;
|
|
uintptr_t addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
|
struct mlx5_wqe_data_seg *dseg = NULL;
|
|
|
|
assert(length >= MLX5_WQE_DWORD_SIZE);
|
|
/* Start the know and common part of the WQE structure. */
|
|
wqe->ctrl[0] = htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND);
|
|
wqe->ctrl[2] = 0;
|
|
wqe->ctrl[3] = 0;
|
|
wqe->eseg.rsvd0 = 0;
|
|
wqe->eseg.rsvd1 = 0;
|
|
wqe->eseg.mss = 0;
|
|
wqe->eseg.rsvd2 = 0;
|
|
/* Start by copying the Ethernet Header. */
|
|
rte_mov16((uint8_t *)raw, (uint8_t *)addr);
|
|
length -= MLX5_WQE_DWORD_SIZE;
|
|
addr += MLX5_WQE_DWORD_SIZE;
|
|
/* Replace the Ethernet type by the VLAN if necessary. */
|
|
if (buf->ol_flags & PKT_TX_VLAN_PKT) {
|
|
uint32_t vlan = htonl(0x81000000 | buf->vlan_tci);
|
|
|
|
memcpy((uint8_t *)(raw + MLX5_WQE_DWORD_SIZE - sizeof(vlan)),
|
|
&vlan, sizeof(vlan));
|
|
addr -= sizeof(vlan);
|
|
length += sizeof(vlan);
|
|
}
|
|
/* Inline if enough room. */
|
|
if (txq->max_inline != 0) {
|
|
uintptr_t end = (uintptr_t)&(*txq->wqes)[txq->wqe_n];
|
|
uint16_t max_inline = txq->max_inline * RTE_CACHE_LINE_SIZE;
|
|
uint16_t room;
|
|
|
|
raw += MLX5_WQE_DWORD_SIZE;
|
|
room = end - (uintptr_t)raw;
|
|
if (room > max_inline) {
|
|
uintptr_t addr_end = (addr + max_inline) &
|
|
~(RTE_CACHE_LINE_SIZE - 1);
|
|
uint16_t copy_b = ((addr_end - addr) > length) ?
|
|
length :
|
|
(addr_end - addr);
|
|
|
|
rte_memcpy((void *)raw, (void *)addr, copy_b);
|
|
addr += copy_b;
|
|
length -= copy_b;
|
|
pkt_inline_sz += copy_b;
|
|
/* Sanity check. */
|
|
assert(addr <= addr_end);
|
|
}
|
|
/* Store the inlined packet size in the WQE. */
|
|
wqe->eseg.inline_hdr_sz = htons(pkt_inline_sz);
|
|
/*
|
|
* 2 DWORDs consumed by the WQE header + 1 DSEG +
|
|
* the size of the inline part of the packet.
|
|
*/
|
|
ds = 2 + MLX5_WQE_DS(pkt_inline_sz - 2);
|
|
if (length > 0) {
|
|
dseg = (struct mlx5_wqe_data_seg *)
|
|
((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
|
|
if ((uintptr_t)dseg >= end)
|
|
dseg = (struct mlx5_wqe_data_seg *)
|
|
((uintptr_t)&(*txq->wqes)[0]);
|
|
goto use_dseg;
|
|
}
|
|
} else {
|
|
/* Add the remaining packet as a simple ds. */
|
|
ds = 3;
|
|
/*
|
|
* No inline has been done in the packet, only the Ethernet
|
|
* Header as been stored.
|
|
*/
|
|
wqe->eseg.inline_hdr_sz = htons(MLX5_WQE_DWORD_SIZE);
|
|
dseg = (struct mlx5_wqe_data_seg *)
|
|
((uintptr_t)wqe + (ds * MLX5_WQE_DWORD_SIZE));
|
|
use_dseg:
|
|
*dseg = (struct mlx5_wqe_data_seg) {
|
|
.addr = htonll(addr),
|
|
.byte_count = htonl(length),
|
|
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
|
|
};
|
|
++ds;
|
|
}
|
|
wqe->ctrl[1] = htonl(txq->qp_num_8s | ds);
|
|
return ds;
|
|
}
|
|
|
|
/**
|
|
* Ring TX queue doorbell.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
*/
|
|
static inline void
|
|
mlx5_tx_dbrec(struct txq *txq)
|
|
{
|
|
uint8_t *dst = (uint8_t *)((uintptr_t)txq->bf_reg + txq->bf_offset);
|
|
uint32_t data[4] = {
|
|
htonl((txq->wqe_ci << 8) | MLX5_OPCODE_SEND),
|
|
htonl(txq->qp_num_8s),
|
|
0,
|
|
0,
|
|
};
|
|
rte_wmb();
|
|
*txq->qp_db = htonl(txq->wqe_ci);
|
|
/* Ensure ordering between DB record and BF copy. */
|
|
rte_wmb();
|
|
rte_mov16(dst, (uint8_t *)data);
|
|
txq->bf_offset ^= txq->bf_buf_size;
|
|
}
|
|
|
|
/**
|
|
* Prefetch a CQE.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param cqe_ci
|
|
* CQE consumer index.
|
|
*/
|
|
static inline void
|
|
tx_prefetch_cqe(struct txq *txq, uint16_t ci)
|
|
{
|
|
volatile struct mlx5_cqe *cqe;
|
|
|
|
cqe = &(*txq->cqes)[ci & ((1 << txq->cqe_n) - 1)];
|
|
rte_prefetch0(cqe);
|
|
}
|
|
|
|
/**
|
|
* Prefetch a WQE.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param wqe_ci
|
|
* WQE consumer index.
|
|
*/
|
|
static inline void
|
|
tx_prefetch_wqe(struct txq *txq, uint16_t ci)
|
|
{
|
|
volatile struct mlx5_wqe64 *wqe;
|
|
|
|
wqe = &(*txq->wqes)[ci & (txq->wqe_n - 1)];
|
|
rte_prefetch0(wqe);
|
|
}
|
|
|
|
/**
|
|
* DPDK callback for TX.
|
|
*
|
|
* @param dpdk_txq
|
|
* Generic pointer to TX queue structure.
|
|
* @param[in] pkts
|
|
* Packets to transmit.
|
|
* @param pkts_n
|
|
* Number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully transmitted (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
mlx5_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|
{
|
|
struct txq *txq = (struct txq *)dpdk_txq;
|
|
uint16_t elts_head = txq->elts_head;
|
|
const unsigned int elts_n = 1 << txq->elts_n;
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int max;
|
|
unsigned int comp;
|
|
volatile struct mlx5_wqe *wqe = NULL;
|
|
|
|
if (unlikely(!pkts_n))
|
|
return 0;
|
|
/* Prefetch first packet cacheline. */
|
|
tx_prefetch_cqe(txq, txq->cq_ci);
|
|
tx_prefetch_cqe(txq, txq->cq_ci + 1);
|
|
rte_prefetch0(*pkts);
|
|
/* Start processing. */
|
|
txq_complete(txq);
|
|
max = (elts_n - (elts_head - txq->elts_tail));
|
|
if (max > elts_n)
|
|
max -= elts_n;
|
|
do {
|
|
struct rte_mbuf *buf = *(pkts++);
|
|
unsigned int elts_head_next;
|
|
uint32_t length;
|
|
unsigned int segs_n = buf->nb_segs;
|
|
volatile struct mlx5_wqe_data_seg *dseg;
|
|
unsigned int ds = 0;
|
|
|
|
/*
|
|
* Make sure there is enough room to store this packet and
|
|
* that one ring entry remains unused.
|
|
*/
|
|
assert(segs_n);
|
|
if (max < segs_n + 1)
|
|
break;
|
|
max -= segs_n;
|
|
--pkts_n;
|
|
elts_head_next = (elts_head + 1) & (elts_n - 1);
|
|
wqe = &(*txq->wqes)[txq->wqe_ci & (txq->wqe_n - 1)].hdr;
|
|
tx_prefetch_wqe(txq, txq->wqe_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
|
|
if (pkts_n)
|
|
rte_prefetch0(*pkts);
|
|
length = DATA_LEN(buf);
|
|
/* Update element. */
|
|
(*txq->elts)[elts_head] = buf;
|
|
/* Prefetch next buffer data. */
|
|
if (pkts_n)
|
|
rte_prefetch0(rte_pktmbuf_mtod(*pkts,
|
|
volatile void *));
|
|
/* Should we enable HW CKSUM offload */
|
|
if (buf->ol_flags &
|
|
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM)) {
|
|
wqe->eseg.cs_flags =
|
|
MLX5_ETH_WQE_L3_CSUM |
|
|
MLX5_ETH_WQE_L4_CSUM;
|
|
} else {
|
|
wqe->eseg.cs_flags = 0;
|
|
}
|
|
ds = mlx5_wqe_write(txq, wqe, buf, length);
|
|
if (segs_n == 1)
|
|
goto skip_segs;
|
|
dseg = (volatile struct mlx5_wqe_data_seg *)
|
|
(((uintptr_t)wqe) + ds * MLX5_WQE_DWORD_SIZE);
|
|
while (--segs_n) {
|
|
/*
|
|
* Spill on next WQE when the current one does not have
|
|
* enough room left. Size of WQE must a be a multiple
|
|
* of data segment size.
|
|
*/
|
|
assert(!(MLX5_WQE_SIZE % MLX5_WQE_DWORD_SIZE));
|
|
if (!(ds % (MLX5_WQE_SIZE / MLX5_WQE_DWORD_SIZE)))
|
|
dseg = (volatile void *)
|
|
&(*txq->wqes)[txq->wqe_ci++ &
|
|
(txq->wqe_n - 1)];
|
|
else
|
|
++dseg;
|
|
++ds;
|
|
buf = buf->next;
|
|
assert(buf);
|
|
/* Store segment information. */
|
|
dseg->byte_count = htonl(DATA_LEN(buf));
|
|
dseg->lkey = txq_mp2mr(txq, txq_mb2mp(buf));
|
|
dseg->addr = htonll(rte_pktmbuf_mtod(buf, uintptr_t));
|
|
(*txq->elts)[elts_head_next] = buf;
|
|
elts_head_next = (elts_head_next + 1) & (elts_n - 1);
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
length += DATA_LEN(buf);
|
|
#endif
|
|
++j;
|
|
}
|
|
/* Update DS field in WQE. */
|
|
wqe->ctrl[1] &= htonl(0xffffffc0);
|
|
wqe->ctrl[1] |= htonl(ds & 0x3f);
|
|
skip_segs:
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent bytes counter. */
|
|
txq->stats.obytes += length;
|
|
#endif
|
|
/* Increment consumer index. */
|
|
txq->wqe_ci += (ds + 3) / 4;
|
|
elts_head = elts_head_next;
|
|
++i;
|
|
} while (pkts_n);
|
|
/* Take a shortcut if nothing must be sent. */
|
|
if (unlikely(i == 0))
|
|
return 0;
|
|
/* Check whether completion threshold has been reached. */
|
|
comp = txq->elts_comp + i + j;
|
|
if (comp >= MLX5_TX_COMP_THRESH) {
|
|
/* Request completion on last WQE. */
|
|
wqe->ctrl[2] = htonl(8);
|
|
/* Save elts_head in unused "immediate" field of WQE. */
|
|
wqe->ctrl[3] = elts_head;
|
|
txq->elts_comp = 0;
|
|
} else {
|
|
txq->elts_comp = comp;
|
|
}
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent packets counter. */
|
|
txq->stats.opackets += i;
|
|
#endif
|
|
/* Ring QP doorbell. */
|
|
mlx5_tx_dbrec(txq);
|
|
txq->elts_head = elts_head;
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* Open a MPW session.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param mpw
|
|
* Pointer to MPW session structure.
|
|
* @param length
|
|
* Packet length.
|
|
*/
|
|
static inline void
|
|
mlx5_mpw_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
|
|
{
|
|
uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
|
|
volatile struct mlx5_wqe_data_seg (*dseg)[MLX5_MPW_DSEG_MAX] =
|
|
(volatile struct mlx5_wqe_data_seg (*)[])
|
|
(uintptr_t)&(*txq->wqes)[(idx + 1) & (txq->wqe_n - 1)];
|
|
|
|
mpw->state = MLX5_MPW_STATE_OPENED;
|
|
mpw->pkts_n = 0;
|
|
mpw->len = length;
|
|
mpw->total_len = 0;
|
|
mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
|
|
mpw->wqe->eseg.mss = htons(length);
|
|
mpw->wqe->eseg.inline_hdr_sz = 0;
|
|
mpw->wqe->eseg.rsvd0 = 0;
|
|
mpw->wqe->eseg.rsvd1 = 0;
|
|
mpw->wqe->eseg.rsvd2 = 0;
|
|
mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
|
|
(txq->wqe_ci << 8) | MLX5_OPCODE_LSO_MPW);
|
|
mpw->wqe->ctrl[2] = 0;
|
|
mpw->wqe->ctrl[3] = 0;
|
|
mpw->data.dseg[0] = (volatile struct mlx5_wqe_data_seg *)
|
|
(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
|
|
mpw->data.dseg[1] = (volatile struct mlx5_wqe_data_seg *)
|
|
(((uintptr_t)mpw->wqe) + (3 * MLX5_WQE_DWORD_SIZE));
|
|
mpw->data.dseg[2] = &(*dseg)[0];
|
|
mpw->data.dseg[3] = &(*dseg)[1];
|
|
mpw->data.dseg[4] = &(*dseg)[2];
|
|
}
|
|
|
|
/**
|
|
* Close a MPW session.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param mpw
|
|
* Pointer to MPW session structure.
|
|
*/
|
|
static inline void
|
|
mlx5_mpw_close(struct txq *txq, struct mlx5_mpw *mpw)
|
|
{
|
|
unsigned int num = mpw->pkts_n;
|
|
|
|
/*
|
|
* Store size in multiple of 16 bytes. Control and Ethernet segments
|
|
* count as 2.
|
|
*/
|
|
mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | (2 + num));
|
|
mpw->state = MLX5_MPW_STATE_CLOSED;
|
|
if (num < 3)
|
|
++txq->wqe_ci;
|
|
else
|
|
txq->wqe_ci += 2;
|
|
tx_prefetch_wqe(txq, txq->wqe_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
|
|
}
|
|
|
|
/**
|
|
* DPDK callback for TX with MPW support.
|
|
*
|
|
* @param dpdk_txq
|
|
* Generic pointer to TX queue structure.
|
|
* @param[in] pkts
|
|
* Packets to transmit.
|
|
* @param pkts_n
|
|
* Number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully transmitted (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
mlx5_tx_burst_mpw(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|
{
|
|
struct txq *txq = (struct txq *)dpdk_txq;
|
|
uint16_t elts_head = txq->elts_head;
|
|
const unsigned int elts_n = 1 << txq->elts_n;
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int max;
|
|
unsigned int comp;
|
|
struct mlx5_mpw mpw = {
|
|
.state = MLX5_MPW_STATE_CLOSED,
|
|
};
|
|
|
|
if (unlikely(!pkts_n))
|
|
return 0;
|
|
/* Prefetch first packet cacheline. */
|
|
tx_prefetch_cqe(txq, txq->cq_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
|
|
/* Start processing. */
|
|
txq_complete(txq);
|
|
max = (elts_n - (elts_head - txq->elts_tail));
|
|
if (max > elts_n)
|
|
max -= elts_n;
|
|
do {
|
|
struct rte_mbuf *buf = *(pkts++);
|
|
unsigned int elts_head_next;
|
|
uint32_t length;
|
|
unsigned int segs_n = buf->nb_segs;
|
|
uint32_t cs_flags = 0;
|
|
|
|
/*
|
|
* Make sure there is enough room to store this packet and
|
|
* that one ring entry remains unused.
|
|
*/
|
|
assert(segs_n);
|
|
if (max < segs_n + 1)
|
|
break;
|
|
/* Do not bother with large packets MPW cannot handle. */
|
|
if (segs_n > MLX5_MPW_DSEG_MAX)
|
|
break;
|
|
max -= segs_n;
|
|
--pkts_n;
|
|
/* Should we enable HW CKSUM offload */
|
|
if (buf->ol_flags &
|
|
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
|
|
cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
|
|
/* Retrieve packet information. */
|
|
length = PKT_LEN(buf);
|
|
assert(length);
|
|
/* Start new session if packet differs. */
|
|
if ((mpw.state == MLX5_MPW_STATE_OPENED) &&
|
|
((mpw.len != length) ||
|
|
(segs_n != 1) ||
|
|
(mpw.wqe->eseg.cs_flags != cs_flags)))
|
|
mlx5_mpw_close(txq, &mpw);
|
|
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
|
|
mlx5_mpw_new(txq, &mpw, length);
|
|
mpw.wqe->eseg.cs_flags = cs_flags;
|
|
}
|
|
/* Multi-segment packets must be alone in their MPW. */
|
|
assert((segs_n == 1) || (mpw.pkts_n == 0));
|
|
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
|
length = 0;
|
|
#endif
|
|
do {
|
|
volatile struct mlx5_wqe_data_seg *dseg;
|
|
uintptr_t addr;
|
|
|
|
elts_head_next = (elts_head + 1) & (elts_n - 1);
|
|
assert(buf);
|
|
(*txq->elts)[elts_head] = buf;
|
|
dseg = mpw.data.dseg[mpw.pkts_n];
|
|
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
|
*dseg = (struct mlx5_wqe_data_seg){
|
|
.byte_count = htonl(DATA_LEN(buf)),
|
|
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
|
|
.addr = htonll(addr),
|
|
};
|
|
elts_head = elts_head_next;
|
|
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
|
length += DATA_LEN(buf);
|
|
#endif
|
|
buf = buf->next;
|
|
++mpw.pkts_n;
|
|
++j;
|
|
} while (--segs_n);
|
|
assert(length == mpw.len);
|
|
if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
|
|
mlx5_mpw_close(txq, &mpw);
|
|
elts_head = elts_head_next;
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent bytes counter. */
|
|
txq->stats.obytes += length;
|
|
#endif
|
|
++i;
|
|
} while (pkts_n);
|
|
/* Take a shortcut if nothing must be sent. */
|
|
if (unlikely(i == 0))
|
|
return 0;
|
|
/* Check whether completion threshold has been reached. */
|
|
/* "j" includes both packets and segments. */
|
|
comp = txq->elts_comp + j;
|
|
if (comp >= MLX5_TX_COMP_THRESH) {
|
|
volatile struct mlx5_wqe *wqe = mpw.wqe;
|
|
|
|
/* Request completion on last WQE. */
|
|
wqe->ctrl[2] = htonl(8);
|
|
/* Save elts_head in unused "immediate" field of WQE. */
|
|
wqe->ctrl[3] = elts_head;
|
|
txq->elts_comp = 0;
|
|
} else {
|
|
txq->elts_comp = comp;
|
|
}
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent packets counter. */
|
|
txq->stats.opackets += i;
|
|
#endif
|
|
/* Ring QP doorbell. */
|
|
if (mpw.state == MLX5_MPW_STATE_OPENED)
|
|
mlx5_mpw_close(txq, &mpw);
|
|
mlx5_tx_dbrec(txq);
|
|
txq->elts_head = elts_head;
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* Open a MPW inline session.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param mpw
|
|
* Pointer to MPW session structure.
|
|
* @param length
|
|
* Packet length.
|
|
*/
|
|
static inline void
|
|
mlx5_mpw_inline_new(struct txq *txq, struct mlx5_mpw *mpw, uint32_t length)
|
|
{
|
|
uint16_t idx = txq->wqe_ci & (txq->wqe_n - 1);
|
|
struct mlx5_wqe_inl_small *inl;
|
|
|
|
mpw->state = MLX5_MPW_INL_STATE_OPENED;
|
|
mpw->pkts_n = 0;
|
|
mpw->len = length;
|
|
mpw->total_len = 0;
|
|
mpw->wqe = (volatile struct mlx5_wqe *)&(*txq->wqes)[idx].hdr;
|
|
mpw->wqe->ctrl[0] = htonl((MLX5_OPC_MOD_MPW << 24) |
|
|
(txq->wqe_ci << 8) |
|
|
MLX5_OPCODE_LSO_MPW);
|
|
mpw->wqe->ctrl[2] = 0;
|
|
mpw->wqe->ctrl[3] = 0;
|
|
mpw->wqe->eseg.mss = htons(length);
|
|
mpw->wqe->eseg.inline_hdr_sz = 0;
|
|
mpw->wqe->eseg.cs_flags = 0;
|
|
mpw->wqe->eseg.rsvd0 = 0;
|
|
mpw->wqe->eseg.rsvd1 = 0;
|
|
mpw->wqe->eseg.rsvd2 = 0;
|
|
inl = (struct mlx5_wqe_inl_small *)
|
|
(((uintptr_t)mpw->wqe) + 2 * MLX5_WQE_DWORD_SIZE);
|
|
mpw->data.raw = (uint8_t *)&inl->raw;
|
|
}
|
|
|
|
/**
|
|
* Close a MPW inline session.
|
|
*
|
|
* @param txq
|
|
* Pointer to TX queue structure.
|
|
* @param mpw
|
|
* Pointer to MPW session structure.
|
|
*/
|
|
static inline void
|
|
mlx5_mpw_inline_close(struct txq *txq, struct mlx5_mpw *mpw)
|
|
{
|
|
unsigned int size;
|
|
struct mlx5_wqe_inl_small *inl = (struct mlx5_wqe_inl_small *)
|
|
(((uintptr_t)mpw->wqe) + (2 * MLX5_WQE_DWORD_SIZE));
|
|
|
|
size = MLX5_WQE_SIZE - MLX5_MWQE64_INL_DATA + mpw->total_len;
|
|
/*
|
|
* Store size in multiple of 16 bytes. Control and Ethernet segments
|
|
* count as 2.
|
|
*/
|
|
mpw->wqe->ctrl[1] = htonl(txq->qp_num_8s | MLX5_WQE_DS(size));
|
|
mpw->state = MLX5_MPW_STATE_CLOSED;
|
|
inl->byte_cnt = htonl(mpw->total_len | MLX5_INLINE_SEG);
|
|
txq->wqe_ci += (size + (MLX5_WQE_SIZE - 1)) / MLX5_WQE_SIZE;
|
|
}
|
|
|
|
/**
|
|
* DPDK callback for TX with MPW inline support.
|
|
*
|
|
* @param dpdk_txq
|
|
* Generic pointer to TX queue structure.
|
|
* @param[in] pkts
|
|
* Packets to transmit.
|
|
* @param pkts_n
|
|
* Number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully transmitted (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
mlx5_tx_burst_mpw_inline(void *dpdk_txq, struct rte_mbuf **pkts,
|
|
uint16_t pkts_n)
|
|
{
|
|
struct txq *txq = (struct txq *)dpdk_txq;
|
|
uint16_t elts_head = txq->elts_head;
|
|
const unsigned int elts_n = 1 << txq->elts_n;
|
|
unsigned int i = 0;
|
|
unsigned int j = 0;
|
|
unsigned int max;
|
|
unsigned int comp;
|
|
unsigned int inline_room = txq->max_inline * RTE_CACHE_LINE_SIZE;
|
|
struct mlx5_mpw mpw = {
|
|
.state = MLX5_MPW_STATE_CLOSED,
|
|
};
|
|
|
|
if (unlikely(!pkts_n))
|
|
return 0;
|
|
/* Prefetch first packet cacheline. */
|
|
tx_prefetch_cqe(txq, txq->cq_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci);
|
|
tx_prefetch_wqe(txq, txq->wqe_ci + 1);
|
|
/* Start processing. */
|
|
txq_complete(txq);
|
|
max = (elts_n - (elts_head - txq->elts_tail));
|
|
if (max > elts_n)
|
|
max -= elts_n;
|
|
do {
|
|
struct rte_mbuf *buf = *(pkts++);
|
|
unsigned int elts_head_next;
|
|
uintptr_t addr;
|
|
uint32_t length;
|
|
unsigned int segs_n = buf->nb_segs;
|
|
uint32_t cs_flags = 0;
|
|
|
|
/*
|
|
* Make sure there is enough room to store this packet and
|
|
* that one ring entry remains unused.
|
|
*/
|
|
assert(segs_n);
|
|
if (max < segs_n + 1)
|
|
break;
|
|
/* Do not bother with large packets MPW cannot handle. */
|
|
if (segs_n > MLX5_MPW_DSEG_MAX)
|
|
break;
|
|
max -= segs_n;
|
|
--pkts_n;
|
|
/* Should we enable HW CKSUM offload */
|
|
if (buf->ol_flags &
|
|
(PKT_TX_IP_CKSUM | PKT_TX_TCP_CKSUM | PKT_TX_UDP_CKSUM))
|
|
cs_flags = MLX5_ETH_WQE_L3_CSUM | MLX5_ETH_WQE_L4_CSUM;
|
|
/* Retrieve packet information. */
|
|
length = PKT_LEN(buf);
|
|
/* Start new session if packet differs. */
|
|
if (mpw.state == MLX5_MPW_STATE_OPENED) {
|
|
if ((mpw.len != length) ||
|
|
(segs_n != 1) ||
|
|
(mpw.wqe->eseg.cs_flags != cs_flags))
|
|
mlx5_mpw_close(txq, &mpw);
|
|
} else if (mpw.state == MLX5_MPW_INL_STATE_OPENED) {
|
|
if ((mpw.len != length) ||
|
|
(segs_n != 1) ||
|
|
(length > inline_room) ||
|
|
(mpw.wqe->eseg.cs_flags != cs_flags)) {
|
|
mlx5_mpw_inline_close(txq, &mpw);
|
|
inline_room =
|
|
txq->max_inline * RTE_CACHE_LINE_SIZE;
|
|
}
|
|
}
|
|
if (mpw.state == MLX5_MPW_STATE_CLOSED) {
|
|
if ((segs_n != 1) ||
|
|
(length > inline_room)) {
|
|
mlx5_mpw_new(txq, &mpw, length);
|
|
mpw.wqe->eseg.cs_flags = cs_flags;
|
|
} else {
|
|
mlx5_mpw_inline_new(txq, &mpw, length);
|
|
mpw.wqe->eseg.cs_flags = cs_flags;
|
|
}
|
|
}
|
|
/* Multi-segment packets must be alone in their MPW. */
|
|
assert((segs_n == 1) || (mpw.pkts_n == 0));
|
|
if (mpw.state == MLX5_MPW_STATE_OPENED) {
|
|
assert(inline_room ==
|
|
txq->max_inline * RTE_CACHE_LINE_SIZE);
|
|
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
|
length = 0;
|
|
#endif
|
|
do {
|
|
volatile struct mlx5_wqe_data_seg *dseg;
|
|
|
|
elts_head_next =
|
|
(elts_head + 1) & (elts_n - 1);
|
|
assert(buf);
|
|
(*txq->elts)[elts_head] = buf;
|
|
dseg = mpw.data.dseg[mpw.pkts_n];
|
|
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
|
*dseg = (struct mlx5_wqe_data_seg){
|
|
.byte_count = htonl(DATA_LEN(buf)),
|
|
.lkey = txq_mp2mr(txq, txq_mb2mp(buf)),
|
|
.addr = htonll(addr),
|
|
};
|
|
elts_head = elts_head_next;
|
|
#if defined(MLX5_PMD_SOFT_COUNTERS) || !defined(NDEBUG)
|
|
length += DATA_LEN(buf);
|
|
#endif
|
|
buf = buf->next;
|
|
++mpw.pkts_n;
|
|
++j;
|
|
} while (--segs_n);
|
|
assert(length == mpw.len);
|
|
if (mpw.pkts_n == MLX5_MPW_DSEG_MAX)
|
|
mlx5_mpw_close(txq, &mpw);
|
|
} else {
|
|
unsigned int max;
|
|
|
|
assert(mpw.state == MLX5_MPW_INL_STATE_OPENED);
|
|
assert(length <= inline_room);
|
|
assert(length == DATA_LEN(buf));
|
|
elts_head_next = (elts_head + 1) & (elts_n - 1);
|
|
addr = rte_pktmbuf_mtod(buf, uintptr_t);
|
|
(*txq->elts)[elts_head] = buf;
|
|
/* Maximum number of bytes before wrapping. */
|
|
max = ((uintptr_t)&(*txq->wqes)[txq->wqe_n] -
|
|
(uintptr_t)mpw.data.raw);
|
|
if (length > max) {
|
|
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
|
|
(void *)addr,
|
|
max);
|
|
mpw.data.raw =
|
|
(volatile void *)&(*txq->wqes)[0];
|
|
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
|
|
(void *)(addr + max),
|
|
length - max);
|
|
mpw.data.raw += length - max;
|
|
} else {
|
|
rte_memcpy((void *)(uintptr_t)mpw.data.raw,
|
|
(void *)addr,
|
|
length);
|
|
mpw.data.raw += length;
|
|
}
|
|
if ((uintptr_t)mpw.data.raw ==
|
|
(uintptr_t)&(*txq->wqes)[txq->wqe_n])
|
|
mpw.data.raw =
|
|
(volatile void *)&(*txq->wqes)[0];
|
|
++mpw.pkts_n;
|
|
++j;
|
|
if (mpw.pkts_n == MLX5_MPW_DSEG_MAX) {
|
|
mlx5_mpw_inline_close(txq, &mpw);
|
|
inline_room =
|
|
txq->max_inline * RTE_CACHE_LINE_SIZE;
|
|
} else {
|
|
inline_room -= length;
|
|
}
|
|
}
|
|
mpw.total_len += length;
|
|
elts_head = elts_head_next;
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent bytes counter. */
|
|
txq->stats.obytes += length;
|
|
#endif
|
|
++i;
|
|
} while (pkts_n);
|
|
/* Take a shortcut if nothing must be sent. */
|
|
if (unlikely(i == 0))
|
|
return 0;
|
|
/* Check whether completion threshold has been reached. */
|
|
/* "j" includes both packets and segments. */
|
|
comp = txq->elts_comp + j;
|
|
if (comp >= MLX5_TX_COMP_THRESH) {
|
|
volatile struct mlx5_wqe *wqe = mpw.wqe;
|
|
|
|
/* Request completion on last WQE. */
|
|
wqe->ctrl[2] = htonl(8);
|
|
/* Save elts_head in unused "immediate" field of WQE. */
|
|
wqe->ctrl[3] = elts_head;
|
|
txq->elts_comp = 0;
|
|
} else {
|
|
txq->elts_comp = comp;
|
|
}
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment sent packets counter. */
|
|
txq->stats.opackets += i;
|
|
#endif
|
|
/* Ring QP doorbell. */
|
|
if (mpw.state == MLX5_MPW_INL_STATE_OPENED)
|
|
mlx5_mpw_inline_close(txq, &mpw);
|
|
else if (mpw.state == MLX5_MPW_STATE_OPENED)
|
|
mlx5_mpw_close(txq, &mpw);
|
|
mlx5_tx_dbrec(txq);
|
|
txq->elts_head = elts_head;
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* Translate RX completion flags to packet type.
|
|
*
|
|
* @param[in] cqe
|
|
* Pointer to CQE.
|
|
*
|
|
* @note: fix mlx5_dev_supported_ptypes_get() if any change here.
|
|
*
|
|
* @return
|
|
* Packet type for struct rte_mbuf.
|
|
*/
|
|
static inline uint32_t
|
|
rxq_cq_to_pkt_type(volatile struct mlx5_cqe64 *cqe)
|
|
{
|
|
uint32_t pkt_type;
|
|
uint8_t flags = cqe->l4_hdr_type_etc;
|
|
uint8_t info = cqe->rsvd0[0];
|
|
|
|
if (info & IBV_EXP_CQ_RX_TUNNEL_PACKET)
|
|
pkt_type =
|
|
TRANSPOSE(flags,
|
|
IBV_EXP_CQ_RX_OUTER_IPV4_PACKET,
|
|
RTE_PTYPE_L3_IPV4) |
|
|
TRANSPOSE(flags,
|
|
IBV_EXP_CQ_RX_OUTER_IPV6_PACKET,
|
|
RTE_PTYPE_L3_IPV6) |
|
|
TRANSPOSE(flags,
|
|
IBV_EXP_CQ_RX_IPV4_PACKET,
|
|
RTE_PTYPE_INNER_L3_IPV4) |
|
|
TRANSPOSE(flags,
|
|
IBV_EXP_CQ_RX_IPV6_PACKET,
|
|
RTE_PTYPE_INNER_L3_IPV6);
|
|
else
|
|
pkt_type =
|
|
TRANSPOSE(flags,
|
|
MLX5_CQE_L3_HDR_TYPE_IPV6,
|
|
RTE_PTYPE_L3_IPV6) |
|
|
TRANSPOSE(flags,
|
|
MLX5_CQE_L3_HDR_TYPE_IPV4,
|
|
RTE_PTYPE_L3_IPV4);
|
|
return pkt_type;
|
|
}
|
|
|
|
/**
|
|
* Get size of the next packet for a given CQE. For compressed CQEs, the
|
|
* consumer index is updated only once all packets of the current one have
|
|
* been processed.
|
|
*
|
|
* @param rxq
|
|
* Pointer to RX queue.
|
|
* @param cqe
|
|
* CQE to process.
|
|
*
|
|
* @return
|
|
* Packet size in bytes (0 if there is none), -1 in case of completion
|
|
* with error.
|
|
*/
|
|
static inline int
|
|
mlx5_rx_poll_len(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe,
|
|
uint16_t cqe_cnt)
|
|
{
|
|
struct rxq_zip *zip = &rxq->zip;
|
|
uint16_t cqe_n = cqe_cnt + 1;
|
|
int len = 0;
|
|
|
|
/* Process compressed data in the CQE and mini arrays. */
|
|
if (zip->ai) {
|
|
volatile struct mlx5_mini_cqe8 (*mc)[8] =
|
|
(volatile struct mlx5_mini_cqe8 (*)[8])
|
|
(uintptr_t)(&(*rxq->cqes)[zip->ca & cqe_cnt].cqe64);
|
|
|
|
len = ntohl((*mc)[zip->ai & 7].byte_cnt);
|
|
if ((++zip->ai & 7) == 0) {
|
|
/*
|
|
* Increment consumer index to skip the number of
|
|
* CQEs consumed. Hardware leaves holes in the CQ
|
|
* ring for software use.
|
|
*/
|
|
zip->ca = zip->na;
|
|
zip->na += 8;
|
|
}
|
|
if (unlikely(rxq->zip.ai == rxq->zip.cqe_cnt)) {
|
|
uint16_t idx = rxq->cq_ci;
|
|
uint16_t end = zip->cq_ci;
|
|
|
|
while (idx != end) {
|
|
(*rxq->cqes)[idx & cqe_cnt].cqe64.op_own =
|
|
MLX5_CQE_INVALIDATE;
|
|
++idx;
|
|
}
|
|
rxq->cq_ci = zip->cq_ci;
|
|
zip->ai = 0;
|
|
}
|
|
/* No compressed data, get next CQE and verify if it is compressed. */
|
|
} else {
|
|
int ret;
|
|
int8_t op_own;
|
|
|
|
ret = check_cqe64(cqe, cqe_n, rxq->cq_ci);
|
|
if (unlikely(ret == 1))
|
|
return 0;
|
|
++rxq->cq_ci;
|
|
op_own = cqe->op_own;
|
|
if (MLX5_CQE_FORMAT(op_own) == MLX5_COMPRESSED) {
|
|
volatile struct mlx5_mini_cqe8 (*mc)[8] =
|
|
(volatile struct mlx5_mini_cqe8 (*)[8])
|
|
(uintptr_t)(&(*rxq->cqes)[rxq->cq_ci &
|
|
cqe_cnt].cqe64);
|
|
|
|
/* Fix endianness. */
|
|
zip->cqe_cnt = ntohl(cqe->byte_cnt);
|
|
/*
|
|
* Current mini array position is the one returned by
|
|
* check_cqe64().
|
|
*
|
|
* If completion comprises several mini arrays, as a
|
|
* special case the second one is located 7 CQEs after
|
|
* the initial CQE instead of 8 for subsequent ones.
|
|
*/
|
|
zip->ca = rxq->cq_ci & cqe_cnt;
|
|
zip->na = zip->ca + 7;
|
|
/* Compute the next non compressed CQE. */
|
|
--rxq->cq_ci;
|
|
zip->cq_ci = rxq->cq_ci + zip->cqe_cnt;
|
|
/* Get packet size to return. */
|
|
len = ntohl((*mc)[0].byte_cnt);
|
|
zip->ai = 1;
|
|
} else {
|
|
len = ntohl(cqe->byte_cnt);
|
|
}
|
|
/* Error while receiving packet. */
|
|
if (unlikely(MLX5_CQE_OPCODE(op_own) == MLX5_CQE_RESP_ERR))
|
|
return -1;
|
|
}
|
|
return len;
|
|
}
|
|
|
|
/**
|
|
* Translate RX completion flags to offload flags.
|
|
*
|
|
* @param[in] rxq
|
|
* Pointer to RX queue structure.
|
|
* @param[in] cqe
|
|
* Pointer to CQE.
|
|
*
|
|
* @return
|
|
* Offload flags (ol_flags) for struct rte_mbuf.
|
|
*/
|
|
static inline uint32_t
|
|
rxq_cq_to_ol_flags(struct rxq *rxq, volatile struct mlx5_cqe64 *cqe)
|
|
{
|
|
uint32_t ol_flags = 0;
|
|
uint8_t l3_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L3_HDR_TYPE_MASK;
|
|
uint8_t l4_hdr = (cqe->l4_hdr_type_etc) & MLX5_CQE_L4_HDR_TYPE_MASK;
|
|
uint8_t info = cqe->rsvd0[0];
|
|
|
|
if ((l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV4) ||
|
|
(l3_hdr == MLX5_CQE_L3_HDR_TYPE_IPV6))
|
|
ol_flags |=
|
|
(!(cqe->hds_ip_ext & MLX5_CQE_L3_OK) *
|
|
PKT_RX_IP_CKSUM_BAD);
|
|
if ((l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP) ||
|
|
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_EMP_ACK) ||
|
|
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_TCP_ACK) ||
|
|
(l4_hdr == MLX5_CQE_L4_HDR_TYPE_UDP))
|
|
ol_flags |=
|
|
(!(cqe->hds_ip_ext & MLX5_CQE_L4_OK) *
|
|
PKT_RX_L4_CKSUM_BAD);
|
|
/*
|
|
* PKT_RX_IP_CKSUM_BAD and PKT_RX_L4_CKSUM_BAD are used in place
|
|
* of PKT_RX_EIP_CKSUM_BAD because the latter is not functional
|
|
* (its value is 0).
|
|
*/
|
|
if ((info & IBV_EXP_CQ_RX_TUNNEL_PACKET) && (rxq->csum_l2tun))
|
|
ol_flags |=
|
|
TRANSPOSE(~cqe->l4_hdr_type_etc,
|
|
IBV_EXP_CQ_RX_OUTER_IP_CSUM_OK,
|
|
PKT_RX_IP_CKSUM_BAD) |
|
|
TRANSPOSE(~cqe->l4_hdr_type_etc,
|
|
IBV_EXP_CQ_RX_OUTER_TCP_UDP_CSUM_OK,
|
|
PKT_RX_L4_CKSUM_BAD);
|
|
return ol_flags;
|
|
}
|
|
|
|
/**
|
|
* DPDK callback for RX.
|
|
*
|
|
* @param dpdk_rxq
|
|
* Generic pointer to RX queue structure.
|
|
* @param[out] pkts
|
|
* Array to store received packets.
|
|
* @param pkts_n
|
|
* Maximum number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully received (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
mlx5_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|
{
|
|
struct rxq *rxq = dpdk_rxq;
|
|
const unsigned int wqe_cnt = (1 << rxq->elts_n) - 1;
|
|
const unsigned int cqe_cnt = (1 << rxq->cqe_n) - 1;
|
|
const unsigned int sges_n = rxq->sges_n;
|
|
struct rte_mbuf *pkt = NULL;
|
|
struct rte_mbuf *seg = NULL;
|
|
volatile struct mlx5_cqe64 *cqe =
|
|
&(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
|
|
unsigned int i = 0;
|
|
unsigned int rq_ci = rxq->rq_ci << sges_n;
|
|
int len;
|
|
|
|
while (pkts_n) {
|
|
unsigned int idx = rq_ci & wqe_cnt;
|
|
volatile struct mlx5_wqe_data_seg *wqe = &(*rxq->wqes)[idx];
|
|
struct rte_mbuf *rep = (*rxq->elts)[idx];
|
|
|
|
if (pkt)
|
|
NEXT(seg) = rep;
|
|
seg = rep;
|
|
rte_prefetch0(seg);
|
|
rte_prefetch0(cqe);
|
|
rte_prefetch0(wqe);
|
|
rep = rte_mbuf_raw_alloc(rxq->mp);
|
|
if (unlikely(rep == NULL)) {
|
|
++rxq->stats.rx_nombuf;
|
|
if (!pkt) {
|
|
/*
|
|
* no buffers before we even started,
|
|
* bail out silently.
|
|
*/
|
|
break;
|
|
}
|
|
while (pkt != seg) {
|
|
assert(pkt != (*rxq->elts)[idx]);
|
|
seg = NEXT(pkt);
|
|
rte_mbuf_refcnt_set(pkt, 0);
|
|
__rte_mbuf_raw_free(pkt);
|
|
pkt = seg;
|
|
}
|
|
break;
|
|
}
|
|
if (!pkt) {
|
|
cqe = &(*rxq->cqes)[rxq->cq_ci & cqe_cnt].cqe64;
|
|
len = mlx5_rx_poll_len(rxq, cqe, cqe_cnt);
|
|
if (len == 0) {
|
|
rte_mbuf_refcnt_set(rep, 0);
|
|
__rte_mbuf_raw_free(rep);
|
|
break;
|
|
}
|
|
if (unlikely(len == -1)) {
|
|
/* RX error, packet is likely too large. */
|
|
rte_mbuf_refcnt_set(rep, 0);
|
|
__rte_mbuf_raw_free(rep);
|
|
++rxq->stats.idropped;
|
|
goto skip;
|
|
}
|
|
pkt = seg;
|
|
assert(len >= (rxq->crc_present << 2));
|
|
/* Update packet information. */
|
|
pkt->packet_type = 0;
|
|
pkt->ol_flags = 0;
|
|
if (rxq->csum | rxq->csum_l2tun | rxq->vlan_strip |
|
|
rxq->crc_present) {
|
|
if (rxq->csum) {
|
|
pkt->packet_type =
|
|
rxq_cq_to_pkt_type(cqe);
|
|
pkt->ol_flags =
|
|
rxq_cq_to_ol_flags(rxq, cqe);
|
|
}
|
|
if (cqe->l4_hdr_type_etc &
|
|
MLX5_CQE_VLAN_STRIPPED) {
|
|
pkt->ol_flags |= PKT_RX_VLAN_PKT |
|
|
PKT_RX_VLAN_STRIPPED;
|
|
pkt->vlan_tci = ntohs(cqe->vlan_info);
|
|
}
|
|
if (rxq->crc_present)
|
|
len -= ETHER_CRC_LEN;
|
|
}
|
|
PKT_LEN(pkt) = len;
|
|
}
|
|
DATA_LEN(rep) = DATA_LEN(seg);
|
|
PKT_LEN(rep) = PKT_LEN(seg);
|
|
SET_DATA_OFF(rep, DATA_OFF(seg));
|
|
NB_SEGS(rep) = NB_SEGS(seg);
|
|
PORT(rep) = PORT(seg);
|
|
NEXT(rep) = NULL;
|
|
(*rxq->elts)[idx] = rep;
|
|
/*
|
|
* Fill NIC descriptor with the new buffer. The lkey and size
|
|
* of the buffers are already known, only the buffer address
|
|
* changes.
|
|
*/
|
|
wqe->addr = htonll(rte_pktmbuf_mtod(rep, uintptr_t));
|
|
if (len > DATA_LEN(seg)) {
|
|
len -= DATA_LEN(seg);
|
|
++NB_SEGS(pkt);
|
|
++rq_ci;
|
|
continue;
|
|
}
|
|
DATA_LEN(seg) = len;
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment bytes counter. */
|
|
rxq->stats.ibytes += PKT_LEN(pkt);
|
|
#endif
|
|
/* Return packet. */
|
|
*(pkts++) = pkt;
|
|
pkt = NULL;
|
|
--pkts_n;
|
|
++i;
|
|
skip:
|
|
/* Align consumer index to the next stride. */
|
|
rq_ci >>= sges_n;
|
|
++rq_ci;
|
|
rq_ci <<= sges_n;
|
|
}
|
|
if (unlikely((i == 0) && ((rq_ci >> sges_n) == rxq->rq_ci)))
|
|
return 0;
|
|
/* Update the consumer index. */
|
|
rxq->rq_ci = rq_ci >> sges_n;
|
|
rte_wmb();
|
|
*rxq->cq_db = htonl(rxq->cq_ci);
|
|
rte_wmb();
|
|
*rxq->rq_db = htonl(rxq->rq_ci);
|
|
#ifdef MLX5_PMD_SOFT_COUNTERS
|
|
/* Increment packets counter. */
|
|
rxq->stats.ipackets += i;
|
|
#endif
|
|
return i;
|
|
}
|
|
|
|
/**
|
|
* Dummy DPDK callback for TX.
|
|
*
|
|
* This function is used to temporarily replace the real callback during
|
|
* unsafe control operations on the queue, or in case of error.
|
|
*
|
|
* @param dpdk_txq
|
|
* Generic pointer to TX queue structure.
|
|
* @param[in] pkts
|
|
* Packets to transmit.
|
|
* @param pkts_n
|
|
* Number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully transmitted (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
removed_tx_burst(void *dpdk_txq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|
{
|
|
(void)dpdk_txq;
|
|
(void)pkts;
|
|
(void)pkts_n;
|
|
return 0;
|
|
}
|
|
|
|
/**
|
|
* Dummy DPDK callback for RX.
|
|
*
|
|
* This function is used to temporarily replace the real callback during
|
|
* unsafe control operations on the queue, or in case of error.
|
|
*
|
|
* @param dpdk_rxq
|
|
* Generic pointer to RX queue structure.
|
|
* @param[out] pkts
|
|
* Array to store received packets.
|
|
* @param pkts_n
|
|
* Maximum number of packets in array.
|
|
*
|
|
* @return
|
|
* Number of packets successfully received (<= pkts_n).
|
|
*/
|
|
uint16_t
|
|
removed_rx_burst(void *dpdk_rxq, struct rte_mbuf **pkts, uint16_t pkts_n)
|
|
{
|
|
(void)dpdk_rxq;
|
|
(void)pkts;
|
|
(void)pkts_n;
|
|
return 0;
|
|
}
|