ixgbe: speed up non-vector Tx

The freeing of mbuf's in ixgbe is one of the observable hot spots
under load. Optimize it by doing bulk free of mbufs using code similar
to i40e and fm10k.

Drop the no longer needed micro-optimization for the no refcount flag.

Signed-off-by: Stephen Hemminger <stephen@networkplumber.org>
Acked-by: Konstantin Ananyev <konstantin.ananyev@intel.com>
This commit is contained in:
Stephen Hemminger 2015-11-13 08:10:13 -08:00 committed by Thomas Monjalon
parent 4a41c17dba
commit 06554d381d
2 changed files with 21 additions and 13 deletions

View File

@ -127,7 +127,8 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
{
struct ixgbe_tx_entry *txep;
uint32_t status;
int i;
int i, nb_free = 0;
struct rte_mbuf *m, *free[RTE_IXGBE_TX_MAX_FREE_BUF_SZ];
/* check DD bit on threshold descriptor */
status = txq->tx_ring[txq->tx_next_dd].wb.status;
@ -140,20 +141,27 @@ ixgbe_tx_free_bufs(struct ixgbe_tx_queue *txq)
*/
txep = &(txq->sw_ring[txq->tx_next_dd - (txq->tx_rs_thresh - 1)]);
/* free buffers one at a time */
if ((txq->txq_flags & (uint32_t)ETH_TXQ_FLAGS_NOREFCOUNT) != 0) {
for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
txep->mbuf->next = NULL;
rte_mempool_put(txep->mbuf->pool, txep->mbuf);
txep->mbuf = NULL;
}
} else {
for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
rte_pktmbuf_free_seg(txep->mbuf);
txep->mbuf = NULL;
for (i = 0; i < txq->tx_rs_thresh; ++i, ++txep) {
/* free buffers one at a time */
m = __rte_pktmbuf_prefree_seg(txep->mbuf);
txep->mbuf = NULL;
if (unlikely(m == NULL))
continue;
if (nb_free >= RTE_IXGBE_TX_MAX_FREE_BUF_SZ ||
(nb_free > 0 && m->pool != free[0]->pool)) {
rte_mempool_put_bulk(free[0]->pool,
(void **)free, nb_free);
nb_free = 0;
}
free[nb_free++] = m;
}
if (nb_free > 0)
rte_mempool_put_bulk(free[0]->pool, (void **)free, nb_free);
/* buffers were freed, update counters */
txq->nb_tx_free = (uint16_t)(txq->nb_tx_free + txq->tx_rs_thresh);
txq->tx_next_dd = (uint16_t)(txq->tx_next_dd + txq->tx_rs_thresh);

View File

@ -58,13 +58,13 @@
#define RTE_PMD_IXGBE_TX_MAX_BURST 32
#define RTE_PMD_IXGBE_RX_MAX_BURST 32
#define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
#define RTE_IXGBE_DESCS_PER_LOOP 4
#ifdef RTE_IXGBE_INC_VECTOR
#define RTE_IXGBE_RXQ_REARM_THRESH 32
#define RTE_IXGBE_MAX_RX_BURST RTE_IXGBE_RXQ_REARM_THRESH
#define RTE_IXGBE_TX_MAX_FREE_BUF_SZ 64
#endif
#define RX_RING_SZ ((IXGBE_MAX_RING_DESC + RTE_IXGBE_DESCS_PER_LOOP - 1) * \