cxgbe(4): Split sge_nm_rxq into three cachelines.

This reduces the lines bouncing around between the driver rx ithread and
the netmap rxsync thread.  There is no net change in the size of the
struct (it continues to waste a lot of space).

This kind of split was originally proposed in D17869 by Marc De La
Gueronniere @ Verisign, Inc.

MFC after:	1 week
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2020-03-20 05:12:16 +00:00
parent 4b156472c6
commit aa301e5ffe
3 changed files with 20 additions and 14 deletions

View File

@ -685,7 +685,9 @@ struct sge_wrq {
#define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1)) #define INVALID_NM_RXQ_CNTXT_ID ((uint16_t)(-1))
struct sge_nm_rxq { struct sge_nm_rxq {
volatile int nm_state; /* NM_OFF, NM_ON, or NM_BUSY */ /* Items used by the driver rx ithread are in this cacheline. */
volatile int nm_state __aligned(CACHE_LINE_SIZE); /* NM_OFF, NM_ON, or NM_BUSY */
u_int nid; /* netmap ring # for this queue */
struct vi_info *vi; struct vi_info *vi;
struct iq_desc *iq_desc; struct iq_desc *iq_desc;
@ -694,19 +696,22 @@ struct sge_nm_rxq {
uint16_t iq_cidx; uint16_t iq_cidx;
uint16_t iq_sidx; uint16_t iq_sidx;
uint8_t iq_gen; uint8_t iq_gen;
__be64 *fl_desc;
uint16_t fl_cntxt_id;
uint32_t fl_cidx;
uint32_t fl_pidx;
uint32_t fl_sidx; uint32_t fl_sidx;
/* Items used by netmap rxsync are in this cacheline. */
__be64 *fl_desc __aligned(CACHE_LINE_SIZE);
uint16_t fl_cntxt_id;
uint32_t fl_pidx;
uint32_t fl_sidx2; /* copy of fl_sidx */
uint32_t fl_db_val; uint32_t fl_db_val;
u_int fl_db_saved;
u_int fl_hwidx:4; u_int fl_hwidx:4;
u_int fl_db_saved; /*
u_int nid; /* netmap ring # for this queue */ * fl_cidx is used by both the ithread and rxsync, the rest are not used
* in the rx fast path.
/* infrequently used items after this */ */
uint32_t fl_cidx __aligned(CACHE_LINE_SIZE);
bus_dma_tag_t iq_desc_tag; bus_dma_tag_t iq_desc_tag;
bus_dmamap_t iq_desc_map; bus_dmamap_t iq_desc_map;
@ -716,7 +721,7 @@ struct sge_nm_rxq {
bus_dma_tag_t fl_desc_tag; bus_dma_tag_t fl_desc_tag;
bus_dmamap_t fl_desc_map; bus_dmamap_t fl_desc_map;
bus_addr_t fl_ba; bus_addr_t fl_ba;
} __aligned(CACHE_LINE_SIZE); };
#define INVALID_NM_TXQ_CNTXT_ID ((u_int)(-1)) #define INVALID_NM_TXQ_CNTXT_ID ((u_int)(-1))
struct sge_nm_txq { struct sge_nm_txq {

View File

@ -921,7 +921,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
MPASS((n & 7) == 0); MPASS((n & 7) == 0);
IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots); IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots);
IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx); IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx2);
while (n > 0) { while (n > 0) {
for (i = 0; i < 8; i++, fl_pidx++, slot++) { for (i = 0; i < 8; i++, fl_pidx++, slot++) {
@ -929,10 +929,10 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
MPASS(ba != 0); MPASS(ba != 0);
nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx); nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx);
slot->flags &= ~NS_BUF_CHANGED; slot->flags &= ~NS_BUF_CHANGED;
MPASS(fl_pidx <= nm_rxq->fl_sidx); MPASS(fl_pidx <= nm_rxq->fl_sidx2);
} }
n -= 8; n -= 8;
if (fl_pidx == nm_rxq->fl_sidx) { if (fl_pidx == nm_rxq->fl_sidx2) {
fl_pidx = 0; fl_pidx = 0;
slot = &ring->slot[0]; slot = &ring->slot[0];
} }

View File

@ -3655,6 +3655,7 @@ alloc_nm_rxq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int intr_idx,
nm_rxq->iq_gen = F_RSPD_GEN; nm_rxq->iq_gen = F_RSPD_GEN;
nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0; nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
nm_rxq->fl_sidx = na->num_rx_desc; nm_rxq->fl_sidx = na->num_rx_desc;
nm_rxq->fl_sidx2 = nm_rxq->fl_sidx; /* copy for rxsync cacheline */
nm_rxq->intr_idx = intr_idx; nm_rxq->intr_idx = intr_idx;
nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID; nm_rxq->iq_cntxt_id = INVALID_NM_RXQ_CNTXT_ID;