From bf89cbedd2d9a48e2c0c4b5ce4da628b3a71db5c Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Thu, 8 Oct 2015 19:16:05 +0530 Subject: [PATCH] cxgbe: optimize forwarding performance for 40G Update sge initialization with respect to free-list manager configuration and ingress arbiter. Also update refill logic to refill mbufs only after a certain threshold for rx. Optimize tx packet prefetch. Approx. 3 MPPS improvement seen in forwarding performance after the optimization. Signed-off-by: Rahul Lakkireddy Signed-off-by: Kumar Sanghvi --- doc/guides/rel_notes/release_2_2.rst | 4 ++++ drivers/net/cxgbe/base/t4_regs.h | 16 ++++++++++++++++ drivers/net/cxgbe/cxgbe_main.c | 7 +++++++ drivers/net/cxgbe/sge.c | 8 ++++---- 4 files changed, 31 insertions(+), 4 deletions(-) diff --git a/doc/guides/rel_notes/release_2_2.rst b/doc/guides/rel_notes/release_2_2.rst index 89cba4dbd3..3068ef8f54 100644 --- a/doc/guides/rel_notes/release_2_2.rst +++ b/doc/guides/rel_notes/release_2_2.rst @@ -4,6 +4,10 @@ DPDK Release 2.2 New Features ------------ +* **Enhanced support for the Chelsio CXGBE driver.** + + * Optimize forwarding performance for Chelsio T5 40GbE cards. + Resolved Issues --------------- diff --git a/drivers/net/cxgbe/base/t4_regs.h b/drivers/net/cxgbe/base/t4_regs.h index cd28b593a8..9057e40930 100644 --- a/drivers/net/cxgbe/base/t4_regs.h +++ b/drivers/net/cxgbe/base/t4_regs.h @@ -266,6 +266,18 @@ #define A_SGE_FL_BUFFER_SIZE2 0x104c #define A_SGE_FL_BUFFER_SIZE3 0x1050 +#define A_SGE_FLM_CFG 0x1090 + +#define S_CREDITCNT 4 +#define M_CREDITCNT 0x3U +#define V_CREDITCNT(x) ((x) << S_CREDITCNT) +#define G_CREDITCNT(x) (((x) >> S_CREDITCNT) & M_CREDITCNT) + +#define S_CREDITCNTPACKING 2 +#define M_CREDITCNTPACKING 0x3U +#define V_CREDITCNTPACKING(x) ((x) << S_CREDITCNTPACKING) +#define G_CREDITCNTPACKING(x) (((x) >> S_CREDITCNTPACKING) & M_CREDITCNTPACKING) + #define A_SGE_CONM_CTRL 0x1094 #define S_EGRTHRESHOLD 8 @@ -361,6 +373,10 @@ #define A_SGE_CONTROL2 0x1124 +#define S_IDMAARBROUNDROBIN 19 +#define V_IDMAARBROUNDROBIN(x) ((x) << S_IDMAARBROUNDROBIN) +#define F_IDMAARBROUNDROBIN V_IDMAARBROUNDROBIN(1U) + #define S_INGPACKBOUNDARY 16 #define M_INGPACKBOUNDARY 0x7U #define V_INGPACKBOUNDARY(x) ((x) << S_INGPACKBOUNDARY) diff --git a/drivers/net/cxgbe/cxgbe_main.c b/drivers/net/cxgbe/cxgbe_main.c index 3755444042..316b87d0c1 100644 --- a/drivers/net/cxgbe/cxgbe_main.c +++ b/drivers/net/cxgbe/cxgbe_main.c @@ -422,6 +422,13 @@ static int adap_init0_tweaks(struct adapter *adapter) t4_set_reg_field(adapter, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT), V_PKTSHIFT(rx_dma_offset)); + t4_set_reg_field(adapter, A_SGE_FLM_CFG, + V_CREDITCNT(M_CREDITCNT) | M_CREDITCNTPACKING, + V_CREDITCNT(3) | V_CREDITCNTPACKING(1)); + + t4_set_reg_field(adapter, A_SGE_CONTROL2, V_IDMAARBROUNDROBIN(1U), + V_IDMAARBROUNDROBIN(1U)); + /* * Don't include the "IP Pseudo Header" in CPL_RX_PKT checksums: Linux * adds the pseudo header itself. diff --git a/drivers/net/cxgbe/sge.c b/drivers/net/cxgbe/sge.c index 6eb12441bf..69ab487973 100644 --- a/drivers/net/cxgbe/sge.c +++ b/drivers/net/cxgbe/sge.c @@ -286,8 +286,7 @@ static void unmap_rx_buf(struct sge_fl *q) static inline void ring_fl_db(struct adapter *adap, struct sge_fl *q) { - /* see if we have exceeded q->size / 4 */ - if (q->pend_cred >= (q->size / 4)) { + if (q->pend_cred >= 64) { u32 val = adap->params.arch.sge_fl_db; if (is_t4(adap->params.chip)) @@ -1054,7 +1053,6 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf) return 0; } - rte_prefetch0(&((&txq->q)->sdesc->mbuf->pool)); pi = (struct port_info *)txq->eth_dev->data->dev_private; adap = pi->adapter; @@ -1070,6 +1068,7 @@ int t4_eth_xmit(struct sge_eth_txq *txq, struct rte_mbuf *mbuf) txq->stats.mapping_err++; goto out_free; } + rte_prefetch0((volatile void *)addr); return tx_do_packet_coalesce(txq, mbuf, cflits, adap, pi, addr); } else { @@ -1454,7 +1453,8 @@ static int process_responses(struct sge_rspq *q, int budget, unsigned int params; u32 val; - __refill_fl(q->adapter, &rxq->fl); + if (fl_cap(&rxq->fl) - rxq->fl.avail >= 64) + __refill_fl(q->adapter, &rxq->fl); params = V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX); q->next_intr_params = params; val = V_CIDXINC(cidx_inc) | V_SEINTARM(params);