diff --git a/drivers/net/sfc/sfc_ethdev.c b/drivers/net/sfc/sfc_ethdev.c index 4a4c4dda87..68717a6529 100644 --- a/drivers/net/sfc/sfc_ethdev.c +++ b/drivers/net/sfc/sfc_ethdev.c @@ -387,6 +387,7 @@ sfc_eth_dev_init(struct rte_eth_dev *dev) dev->dev_ops = &sfc_eth_dev_ops; dev->rx_pkt_burst = &sfc_recv_pkts; + dev->tx_pkt_burst = &sfc_xmit_pkts; sfc_adapter_unlock(sa); @@ -425,6 +426,7 @@ sfc_eth_dev_uninit(struct rte_eth_dev *dev) dev->dev_ops = NULL; dev->rx_pkt_burst = NULL; + dev->tx_pkt_burst = NULL; sfc_kvargs_cleanup(sa); diff --git a/drivers/net/sfc/sfc_ev.c b/drivers/net/sfc/sfc_ev.c index 075172addf..96b95cccbe 100644 --- a/drivers/net/sfc/sfc_ev.c +++ b/drivers/net/sfc/sfc_ev.c @@ -140,12 +140,30 @@ sfc_ev_rx(void *arg, __rte_unused uint32_t label, uint32_t id, } static boolean_t -sfc_ev_tx(void *arg, __rte_unused uint32_t label, __rte_unused uint32_t id) +sfc_ev_tx(void *arg, __rte_unused uint32_t label, uint32_t id) { struct sfc_evq *evq = arg; + struct sfc_txq *txq; + unsigned int stop; + unsigned int delta; - sfc_err(evq->sa, "EVQ %u unexpected Tx event", evq->evq_index); - return B_TRUE; + txq = evq->txq; + + SFC_ASSERT(txq != NULL); + SFC_ASSERT(txq->evq == evq); + + if (unlikely((txq->state & SFC_TXQ_STARTED) == 0)) + goto done; + + stop = (id + 1) & txq->ptr_mask; + id = txq->pending & txq->ptr_mask; + + delta = (stop >= id) ? (stop - id) : (txq->ptr_mask + 1 - id + stop); + + txq->pending += delta; + +done: + return B_FALSE; } static boolean_t diff --git a/drivers/net/sfc/sfc_tweak.h b/drivers/net/sfc/sfc_tweak.h index 8538d565c4..8a60f35120 100644 --- a/drivers/net/sfc/sfc_tweak.h +++ b/drivers/net/sfc/sfc_tweak.h @@ -41,4 +41,11 @@ */ #define SFC_RX_REFILL_BULK (RTE_CACHE_LINE_SIZE / sizeof(efx_qword_t)) +/** + * Make the transmit path reap at least one time per a burst; + * this improves cache locality because the same mbufs may be used to send + * subsequent bursts in certain cases because of well-timed reap + */ +#define SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE 0 + #endif /* _SFC_TWEAK_H_ */ diff --git a/drivers/net/sfc/sfc_tx.c b/drivers/net/sfc/sfc_tx.c index 3f38066536..a2406109d4 100644 --- a/drivers/net/sfc/sfc_tx.c +++ b/drivers/net/sfc/sfc_tx.c @@ -32,6 +32,7 @@ #include "sfc_log.h" #include "sfc_ev.h" #include "sfc_tx.h" +#include "sfc_tweak.h" /* * Maximum number of TX queue flush attempts in case of @@ -526,3 +527,117 @@ sfc_tx_stop(struct sfc_adapter *sa) efx_tx_fini(sa->nic); } + +uint16_t +sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, uint16_t nb_pkts) +{ + struct sfc_txq *txq = (struct sfc_txq *)tx_queue; + unsigned int added = txq->added; + unsigned int pushed = added; + unsigned int pkts_sent = 0; + efx_desc_t *pend = &txq->pend_desc[0]; + const unsigned int hard_max_fill = EFX_TXQ_LIMIT(txq->ptr_mask + 1); + const unsigned int soft_max_fill = hard_max_fill - + SFC_TX_MAX_PKT_DESC; + unsigned int fill_level = added - txq->completed; + boolean_t reap_done; + int rc __rte_unused; + struct rte_mbuf **pktp; + + if (unlikely((txq->state & SFC_TXQ_RUNNING) == 0)) + goto done; + + /* + * If insufficient space for a single packet is present, + * we should reap; otherwise, we shouldn't do that all the time + * to avoid latency increase + */ + reap_done = (fill_level > soft_max_fill); + + if (reap_done) { + sfc_tx_reap(txq); + /* + * Recalculate fill level since 'txq->completed' + * might have changed on reap + */ + fill_level = added - txq->completed; + } + + for (pkts_sent = 0, pktp = &tx_pkts[0]; + (pkts_sent < nb_pkts) && (fill_level <= soft_max_fill); + pkts_sent++, pktp++) { + struct rte_mbuf *m_seg = *pktp; + size_t pkt_len = m_seg->pkt_len; + unsigned int pkt_descs = 0; + + for (; m_seg != NULL; m_seg = m_seg->next) { + efsys_dma_addr_t next_frag; + size_t seg_len; + + seg_len = m_seg->data_len; + next_frag = rte_mbuf_data_dma_addr(m_seg); + + do { + efsys_dma_addr_t frag_addr = next_frag; + size_t frag_len; + + next_frag = RTE_ALIGN(frag_addr + 1, + SFC_TX_SEG_BOUNDARY); + frag_len = MIN(next_frag - frag_addr, seg_len); + seg_len -= frag_len; + pkt_len -= frag_len; + + efx_tx_qdesc_dma_create(txq->common, + frag_addr, frag_len, + (pkt_len == 0), + pend++); + + pkt_descs++; + } while (seg_len != 0); + } + + added += pkt_descs; + + fill_level += pkt_descs; + if (unlikely(fill_level > hard_max_fill)) { + /* + * Our estimation for maximum number of descriptors + * required to send a packet seems to be wrong. + * Try to reap (if we haven't yet). + */ + if (!reap_done) { + sfc_tx_reap(txq); + reap_done = B_TRUE; + fill_level = added - txq->completed; + if (fill_level > hard_max_fill) { + pend -= pkt_descs; + break; + } + } else { + pend -= pkt_descs; + break; + } + } + + /* Assign mbuf to the last used desc */ + txq->sw_ring[(added - 1) & txq->ptr_mask].mbuf = *pktp; + } + + if (likely(pkts_sent > 0)) { + rc = efx_tx_qdesc_post(txq->common, txq->pend_desc, + pend - &txq->pend_desc[0], + txq->completed, &txq->added); + SFC_ASSERT(rc == 0); + + if (likely(pushed != txq->added)) + efx_tx_qpush(txq->common, txq->added, pushed); + } + +#if SFC_TX_XMIT_PKTS_REAP_AT_LEAST_ONCE + if (!reap_done) + sfc_tx_reap(txq); +#endif + +done: + return pkts_sent; +} diff --git a/drivers/net/sfc/sfc_tx.h b/drivers/net/sfc/sfc_tx.h index d74de00d9e..fe2736b167 100644 --- a/drivers/net/sfc/sfc_tx.h +++ b/drivers/net/sfc/sfc_tx.h @@ -39,6 +39,21 @@ extern "C" { #endif +/** + * Estimated maximum number of segments that transmit packet consists of; + * it is determined with respect to the expectation of a packet to consist + * of a header plus a couple of data segments one of those crossing 4K page; + * it is used by transmit path to avoid redundant reaping and, thus, + * to avoid increase of latency + */ +#define SFC_TX_MAX_PKT_DESC 4 + +/** + * A segment must not cross 4K boundary + * (this is a requirement of NIC TX descriptors) + */ +#define SFC_TX_SEG_BOUNDARY 4096 + struct sfc_adapter; struct sfc_evq; @@ -100,6 +115,9 @@ void sfc_tx_qstop(struct sfc_adapter *sa, unsigned int sw_index); int sfc_tx_start(struct sfc_adapter *sa); void sfc_tx_stop(struct sfc_adapter *sa); +uint16_t sfc_xmit_pkts(void *tx_queue, struct rte_mbuf **tx_pkts, + uint16_t nb_pkts); + #ifdef __cplusplus } #endif