net/bnxt: optimize Tx completion handling

Avoid copying mbuf pointers to separate array for bulk
mbuf free when handling transmit completions for vector
mode transmit.

Signed-off-by: Lance Richardson <lance.richardson@broadcom.com>
Reviewed-by: Ajit Khaparde <ajit.khaparde@broadcom.com>
This commit is contained in:
Lance Richardson 2021-03-06 10:19:11 -05:00 committed by Ajit Khaparde
parent 87a8fa1287
commit 527b10089c
8 changed files with 103 additions and 81 deletions

View File

@ -3189,7 +3189,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
struct bnxt_tx_queue *txq = (struct bnxt_tx_queue *)tx_queue;
struct bnxt_tx_ring_info *txr;
struct bnxt_cp_ring_info *cpr;
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
struct tx_pkt_cmpl *txcmp;
uint32_t cons, cp_cons;
int rc;
@ -3219,7 +3219,7 @@ bnxt_tx_descriptor_status_op(void *tx_queue, uint16_t offset)
return RTE_ETH_TX_DESC_UNAVAIL;
}
tx_buf = &txr->tx_buf_ring[cons];
if (tx_buf->mbuf == NULL)
if (*tx_buf == NULL)
return RTE_ETH_TX_DESC_DONE;
return RTE_ETH_TX_DESC_FULL;

View File

@ -230,7 +230,7 @@ int bnxt_alloc_rings(struct bnxt *bp, uint16_t qidx,
tx_ring->vmem =
(void **)((char *)mz->addr + tx_vmem_start);
tx_ring_info->tx_buf_ring =
(struct bnxt_sw_tx_bd *)tx_ring->vmem;
(struct rte_mbuf **)tx_ring->vmem;
}
}

View File

@ -100,57 +100,78 @@ bnxt_rxq_rearm(struct bnxt_rx_queue *rxq, struct bnxt_rx_ring_info *rxr)
* is enabled.
*/
static inline void
bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, int nr_pkts)
bnxt_tx_cmp_vec_fast(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
{
struct bnxt_tx_ring_info *txr = txq->tx_ring;
struct rte_mbuf **free = txq->free;
uint16_t cons, raw_cons = txr->tx_raw_cons;
unsigned int blk = 0;
uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
uint32_t ring_mask, ring_size, num;
struct rte_mempool *pool;
while (nr_pkts--) {
struct bnxt_sw_tx_bd *tx_buf;
ring_mask = txr->tx_ring_struct->ring_mask;
ring_size = txr->tx_ring_struct->ring_size;
cons = raw_cons++ & ring_mask;
tx_buf = &txr->tx_buf_ring[cons];
free[blk++] = tx_buf->mbuf;
tx_buf->mbuf = NULL;
cons = raw_cons & ring_mask;
num = RTE_MIN(nr_pkts, ring_size - cons);
pool = txr->tx_buf_ring[cons]->pool;
rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons], num);
memset(&txr->tx_buf_ring[cons], 0, num * sizeof(struct rte_mbuf *));
raw_cons += num;
num = nr_pkts - num;
if (num) {
cons = raw_cons & ring_mask;
rte_mempool_put_bulk(pool, (void **)&txr->tx_buf_ring[cons],
num);
memset(&txr->tx_buf_ring[cons], 0,
num * sizeof(struct rte_mbuf *));
raw_cons += num;
}
if (blk)
rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
txr->tx_raw_cons = raw_cons;
}
static inline void
bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, int nr_pkts)
bnxt_tx_cmp_vec(struct bnxt_tx_queue *txq, uint32_t nr_pkts)
{
struct bnxt_tx_ring_info *txr = txq->tx_ring;
struct rte_mbuf **free = txq->free;
uint16_t cons, raw_cons = txr->tx_raw_cons;
unsigned int blk = 0;
uint32_t ring_mask = txr->tx_ring_struct->ring_mask;
uint32_t ring_mask, ring_size, num, blk;
struct rte_mempool *pool;
while (nr_pkts--) {
struct bnxt_sw_tx_bd *tx_buf;
ring_mask = txr->tx_ring_struct->ring_mask;
ring_size = txr->tx_ring_struct->ring_size;
while (nr_pkts) {
struct rte_mbuf *mbuf;
cons = raw_cons++ & ring_mask;
tx_buf = &txr->tx_buf_ring[cons];
mbuf = rte_pktmbuf_prefree_seg(tx_buf->mbuf);
if (unlikely(mbuf == NULL))
continue;
tx_buf->mbuf = NULL;
cons = raw_cons & ring_mask;
num = RTE_MIN(nr_pkts, ring_size - cons);
pool = txr->tx_buf_ring[cons]->pool;
if (blk && mbuf->pool != free[0]->pool) {
rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
blk = 0;
blk = 0;
do {
mbuf = txr->tx_buf_ring[cons + blk];
mbuf = rte_pktmbuf_prefree_seg(mbuf);
if (!mbuf || mbuf->pool != pool)
break;
blk++;
} while (blk < num);
if (blk) {
rte_mempool_put_bulk(pool,
(void **)&txr->tx_buf_ring[cons],
blk);
memset(&txr->tx_buf_ring[cons], 0,
blk * sizeof(struct rte_mbuf *));
raw_cons += blk;
nr_pkts -= blk;
}
if (!mbuf) {
/* Skip freeing mbufs with non-zero reference count. */
raw_cons++;
nr_pkts--;
}
free[blk++] = mbuf;
}
if (blk)
rte_mempool_put_bulk(free[0]->pool, (void **)free, blk);
txr->tx_raw_cons = raw_cons;
}
#endif /* _BNXT_RXTX_VEC_COMMON_H_ */

View File

@ -346,7 +346,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
struct rte_mbuf *tx_mbuf;
struct tx_bd_long *txbd = NULL;
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
uint16_t to_send;
nb_pkts = RTE_MIN(nb_pkts, bnxt_tx_avail(txq));
@ -362,8 +362,7 @@ bnxt_xmit_fixed_burst_vec(void *tx_queue, struct rte_mbuf **tx_pkts,
tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);
tx_buf = &txr->tx_buf_ring[tx_prod];
tx_buf->mbuf = tx_mbuf;
tx_buf->nr_bds = 1;
*tx_buf = tx_mbuf;
txbd = &txr->tx_desc_ring[tx_prod];
txbd->address = tx_mbuf->buf_iova + tx_mbuf->data_off;

View File

@ -321,12 +321,11 @@ bnxt_handle_tx_cp_vec(struct bnxt_tx_queue *txq)
static inline void
bnxt_xmit_one(struct rte_mbuf *mbuf, struct tx_bd_long *txbd,
struct bnxt_sw_tx_bd *tx_buf)
struct rte_mbuf **tx_buf)
{
__m128i desc;
tx_buf->mbuf = mbuf;
tx_buf->nr_bds = 1;
*tx_buf = mbuf;
desc = _mm_set_epi64x(mbuf->buf_iova + mbuf->data_off,
bnxt_xmit_flags_len(mbuf->data_len,
@ -343,7 +342,7 @@ bnxt_xmit_fixed_burst_vec(struct bnxt_tx_queue *txq, struct rte_mbuf **tx_pkts,
struct bnxt_tx_ring_info *txr = txq->tx_ring;
uint16_t tx_prod, tx_raw_prod = txr->tx_raw_prod;
struct tx_bd_long *txbd;
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
uint16_t to_send;
tx_prod = RING_IDX(txr->tx_ring_struct, tx_raw_prod);

View File

@ -24,7 +24,7 @@ void bnxt_free_txq_stats(struct bnxt_tx_queue *txq)
static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
{
struct bnxt_sw_tx_bd *sw_ring;
struct rte_mbuf **sw_ring;
uint16_t i;
if (!txq || !txq->tx_ring)
@ -33,9 +33,9 @@ static void bnxt_tx_queue_release_mbufs(struct bnxt_tx_queue *txq)
sw_ring = txq->tx_ring->tx_buf_ring;
if (sw_ring) {
for (i = 0; i < txq->tx_ring->tx_ring_struct->ring_size; i++) {
if (sw_ring[i].mbuf) {
rte_pktmbuf_free_seg(sw_ring[i].mbuf);
sw_ring[i].mbuf = NULL;
if (sw_ring[i]) {
rte_pktmbuf_free_seg(sw_ring[i]);
sw_ring[i] = NULL;
}
}
}

View File

@ -76,7 +76,7 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
ring->ring_mask = ring->ring_size - 1;
ring->bd = (void *)txr->tx_desc_ring;
ring->bd_dma = txr->tx_desc_mapping;
ring->vmem_size = ring->ring_size * sizeof(struct bnxt_sw_tx_bd);
ring->vmem_size = ring->ring_size * sizeof(struct rte_mbuf *);
ring->vmem = (void **)&txr->tx_buf_ring;
ring->fw_ring_id = INVALID_HW_RING_ID;
@ -104,6 +104,21 @@ int bnxt_init_tx_ring_struct(struct bnxt_tx_queue *txq, unsigned int socket_id)
return 0;
}
static bool
bnxt_xmit_need_long_bd(struct rte_mbuf *tx_pkt, struct bnxt_tx_queue *txq)
{
if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
PKT_TX_QINQ_PKT) ||
(BNXT_TRUFLOW_EN(txq->bp) &&
(txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
return true;
return false;
}
static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
struct bnxt_tx_queue *txq,
uint16_t *coal_pkts,
@ -116,10 +131,10 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
struct tx_bd_long_hi *txbd1 = NULL;
uint32_t vlan_tag_flags;
bool long_bd = false;
unsigned short nr_bds = 0;
unsigned short nr_bds;
uint16_t prod;
struct rte_mbuf *m_seg;
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
static const uint32_t lhint_arr[4] = {
TX_BD_LONG_FLAGS_LHINT_LT512,
TX_BD_LONG_FLAGS_LHINT_LT1K,
@ -130,17 +145,9 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
if (unlikely(is_bnxt_in_error(txq->bp)))
return -EIO;
if (tx_pkt->ol_flags & (PKT_TX_TCP_SEG | PKT_TX_TCP_CKSUM |
PKT_TX_UDP_CKSUM | PKT_TX_IP_CKSUM |
PKT_TX_VLAN_PKT | PKT_TX_OUTER_IP_CKSUM |
PKT_TX_TUNNEL_GRE | PKT_TX_TUNNEL_VXLAN |
PKT_TX_TUNNEL_GENEVE | PKT_TX_IEEE1588_TMST |
PKT_TX_QINQ_PKT) ||
(BNXT_TRUFLOW_EN(txq->bp) &&
(txq->bp->tx_cfa_action || txq->vfr_tx_cfa_action)))
long_bd = true;
long_bd = bnxt_xmit_need_long_bd(tx_pkt, txq);
nr_bds = long_bd + tx_pkt->nb_segs;
if (unlikely(bnxt_tx_avail(txq) < nr_bds))
return -ENOMEM;
@ -172,8 +179,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
prod = RING_IDX(ring, txr->tx_raw_prod);
tx_buf = &txr->tx_buf_ring[prod];
tx_buf->mbuf = tx_pkt;
tx_buf->nr_bds = nr_bds;
*tx_buf = tx_pkt;
txbd = &txr->tx_desc_ring[prod];
txbd->opaque = *coal_pkts;
@ -185,7 +191,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
txbd->flags_type |= TX_BD_LONG_FLAGS_LHINT_GTE2K;
else
txbd->flags_type |= lhint_arr[tx_pkt->pkt_len >> 9];
txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_buf->mbuf));
txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(tx_pkt));
*last_txbd = txbd;
if (long_bd) {
@ -193,18 +199,18 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
vlan_tag_flags = 0;
/* HW can accelerate only outer vlan in QinQ mode */
if (tx_buf->mbuf->ol_flags & PKT_TX_QINQ_PKT) {
if (tx_pkt->ol_flags & PKT_TX_QINQ_PKT) {
vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
tx_buf->mbuf->vlan_tci_outer;
tx_pkt->vlan_tci_outer;
outer_tpid_bd = txq->bp->outer_tpid_bd &
BNXT_OUTER_TPID_BD_MASK;
vlan_tag_flags |= outer_tpid_bd;
} else if (tx_buf->mbuf->ol_flags & PKT_TX_VLAN_PKT) {
} else if (tx_pkt->ol_flags & PKT_TX_VLAN_PKT) {
/* shurd: Should this mask at
* TX_BD_LONG_CFA_META_VLAN_VID_MASK?
*/
vlan_tag_flags = TX_BD_LONG_CFA_META_KEY_VLAN_TAG |
tx_buf->mbuf->vlan_tci;
tx_pkt->vlan_tci;
/* Currently supports 8021Q, 8021AD vlan offloads
* QINQ1, QINQ2, QINQ3 vlan headers are deprecated
*/
@ -325,7 +331,7 @@ static uint16_t bnxt_start_xmit(struct rte_mbuf *tx_pkt,
prod = RING_IDX(ring, txr->tx_raw_prod);
tx_buf = &txr->tx_buf_ring[prod];
tx_buf->mbuf = m_seg;
*tx_buf = m_seg;
txbd = &txr->tx_desc_ring[prod];
txbd->address = rte_cpu_to_le_64(rte_mbuf_data_iova(m_seg));
@ -356,16 +362,17 @@ static void bnxt_tx_cmp_fast(struct bnxt_tx_queue *txq, int nr_pkts)
int i, j;
for (i = 0; i < nr_pkts; i++) {
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
unsigned short nr_bds;
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
nr_bds = tx_buf->nr_bds;
nr_bds = (*tx_buf)->nb_segs +
bnxt_xmit_need_long_bd(*tx_buf, txq);
for (j = 0; j < nr_bds; j++) {
if (tx_buf->mbuf) {
if (*tx_buf) {
/* Add mbuf to the bulk free array */
free[blk++] = tx_buf->mbuf;
tx_buf->mbuf = NULL;
free[blk++] = *tx_buf;
*tx_buf = NULL;
}
raw_cons = RING_NEXT(raw_cons);
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
@ -389,14 +396,15 @@ static void bnxt_tx_cmp(struct bnxt_tx_queue *txq, int nr_pkts)
for (i = 0; i < nr_pkts; i++) {
struct rte_mbuf *mbuf;
struct bnxt_sw_tx_bd *tx_buf;
struct rte_mbuf **tx_buf;
unsigned short nr_bds;
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
nr_bds = tx_buf->nr_bds;
nr_bds = (*tx_buf)->nb_segs +
bnxt_xmit_need_long_bd(*tx_buf, txq);
for (j = 0; j < nr_bds; j++) {
mbuf = tx_buf->mbuf;
tx_buf->mbuf = NULL;
mbuf = *tx_buf;
*tx_buf = NULL;
raw_cons = RING_NEXT(raw_cons);
tx_buf = &txr->tx_buf_ring[RING_IDX(ring, raw_cons)];
if (!mbuf) /* long_bd's tx_buf ? */

View File

@ -17,18 +17,13 @@ struct bnxt_tx_ring_info {
struct bnxt_db_info tx_db;
struct tx_bd_long *tx_desc_ring;
struct bnxt_sw_tx_bd *tx_buf_ring;
struct rte_mbuf **tx_buf_ring;
rte_iova_t tx_desc_mapping;
struct bnxt_ring *tx_ring_struct;
};
struct bnxt_sw_tx_bd {
struct rte_mbuf *mbuf; /* mbuf associated with TX descriptor */
unsigned short nr_bds;
};
static inline uint32_t bnxt_tx_bds_in_hw(struct bnxt_tx_queue *txq)
{
return ((txq->tx_ring->tx_raw_prod - txq->tx_ring->tx_raw_cons) &