MFC r279243-r279246, r279251, r279691, r279700, and r279701.
r279243: cxgbe(4): request an automatic tx update when a netmap txq idles. r279244: cxgbe(4): wait for the hardware to catch up before destroying a netmap txq. r279245: cxgbe(4): do not set the netmap rxq interrupts on a hair-trigger. r279246: cxgbe(4): set up congestion management for netmap rx queues. The hw.cxgbe.cong_drop knob controls the response of the chip when netmap queues are congested. r279251: cxgbe(4): allow tx hardware checksumming on the netmap interface. It is disabled by default but users can set IFCAP_TXCSUM on the netmap ifnet (ifconfig ncxl0 txcsum) to override netmap and force the hardware to calculate and insert proper IP and L4 checksums in outbound frames. r279691: cxgbe(4): provide the correct size of freelists associated with netmap rx queues to the chip. This will fix many problems with native netmap rx on ncxl/ncxgbe interfaces. r279700: cxgbe(4): knobs to experiment with the interrupt coalescing timer for netmap rx queues, and the "batchiness" of rx updates sent to the chip. These knobs will probably become per-rxq in the near future and will be documented only after their final form is decided. r279701: cxgbe(4): experimental rx packet sink for netmap queues. This is not intended for general use.
This commit is contained in:
parent
406b6dc8fb
commit
3713ef6b03
@ -1022,6 +1022,7 @@ void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
|
||||
int t4_eth_tx(struct ifnet *, struct sge_txq *, struct mbuf *);
|
||||
void t4_update_fl_bufsize(struct ifnet *);
|
||||
int can_resume_tx(struct sge_eq *);
|
||||
int tnl_cong(struct port_info *);
|
||||
|
||||
/* t4_tracer.c */
|
||||
struct t4_tracer;
|
||||
|
@ -58,6 +58,25 @@ extern int fl_pad; /* XXXNM */
|
||||
extern int spg_len; /* XXXNM */
|
||||
extern int fl_pktshift; /* XXXNM */
|
||||
|
||||
SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe netmap parameters");
|
||||
|
||||
/*
|
||||
* 0 = normal netmap rx
|
||||
* 1 = black hole
|
||||
* 2 = supermassive black hole (buffer packing enabled)
|
||||
*/
|
||||
int black_hole = 0;
|
||||
SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RDTUN, &black_hole, 0,
|
||||
"Sink incoming packets.");
|
||||
|
||||
int rx_ndesc = 256;
|
||||
SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN,
|
||||
&rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated.");
|
||||
|
||||
int holdoff_tmr_idx = 2;
|
||||
SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN,
|
||||
&holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues.");
|
||||
|
||||
/* netmap ifnet routines */
|
||||
static void cxgbe_nm_init(void *);
|
||||
static int cxgbe_nm_ioctl(struct ifnet *, unsigned long, caddr_t);
|
||||
@ -226,9 +245,9 @@ cxgbe_nm_qflush(struct ifnet *ifp)
|
||||
}
|
||||
|
||||
static int
|
||||
alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
|
||||
alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq, int cong)
|
||||
{
|
||||
int rc, cntxt_id;
|
||||
int rc, cntxt_id, i;
|
||||
__be32 v;
|
||||
struct adapter *sc = pi->adapter;
|
||||
struct netmap_adapter *na = NA(pi->nm_ifp);
|
||||
@ -267,14 +286,20 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
|
||||
V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
|
||||
c.iqsize = htobe16(pi->qsize_rxq);
|
||||
c.iqaddr = htobe64(nm_rxq->iq_ba);
|
||||
if (cong >= 0) {
|
||||
c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN |
|
||||
V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF |
|
||||
F_FW_IQ_CMD_FL0CONGEN);
|
||||
}
|
||||
c.iqns_to_fl0congen |=
|
||||
htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
|
||||
F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
|
||||
(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0));
|
||||
(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
|
||||
(black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0));
|
||||
c.fl0dcaen_to_fl0cidxfthresh =
|
||||
htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
|
||||
V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
|
||||
c.fl0size = htobe16(na->num_rx_desc + spg_len / EQ_ESIZE);
|
||||
c.fl0size = htobe16(na->num_rx_desc / 8 + spg_len / EQ_ESIZE);
|
||||
c.fl0addr = htobe64(nm_rxq->fl_ba);
|
||||
|
||||
rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
|
||||
@ -310,8 +335,37 @@ alloc_nm_rxq_hwq(struct port_info *pi, struct sge_nm_rxq *nm_rxq)
|
||||
if (is_t5(sc))
|
||||
nm_rxq->fl_db_val |= F_DBTYPE;
|
||||
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(F_QINTR_CNT_EN) |
|
||||
V_INGRESSQID(nm_rxq->iq_cntxt_id));
|
||||
if (is_t5(sc) && cong >= 0) {
|
||||
uint32_t param, val;
|
||||
|
||||
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
|
||||
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
|
||||
V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
|
||||
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
|
||||
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
|
||||
V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
|
||||
if (cong == 0)
|
||||
val = 1 << 19;
|
||||
else {
|
||||
val = 2 << 19;
|
||||
for (i = 0; i < 4; i++) {
|
||||
if (cong & (1 << i))
|
||||
val |= 1 << (i << 2);
|
||||
}
|
||||
}
|
||||
|
||||
rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, ¶m, &val);
|
||||
if (rc != 0) {
|
||||
/* report error but carry on */
|
||||
device_printf(sc->dev,
|
||||
"failed to set congestion manager context for "
|
||||
"ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc);
|
||||
}
|
||||
}
|
||||
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
|
||||
V_INGRESSQID(nm_rxq->iq_cntxt_id) |
|
||||
V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
|
||||
|
||||
return (rc);
|
||||
}
|
||||
@ -351,7 +405,8 @@ alloc_nm_txq_hwq(struct port_info *pi, struct sge_nm_txq *nm_txq)
|
||||
V_FW_EQ_ETH_CMD_VFN(0));
|
||||
c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
|
||||
F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
|
||||
c.autoequiqe_to_viid = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->nm_viid));
|
||||
c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUEQE |
|
||||
V_FW_EQ_ETH_CMD_VIID(pi->nm_viid));
|
||||
c.fetchszm_to_iqid =
|
||||
htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
|
||||
V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
|
||||
@ -448,7 +503,7 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
|
||||
nm_set_native_flags(na);
|
||||
|
||||
for_each_nm_rxq(pi, i, nm_rxq) {
|
||||
alloc_nm_rxq_hwq(pi, nm_rxq);
|
||||
alloc_nm_rxq_hwq(pi, nm_rxq, tnl_cong(pi));
|
||||
nm_rxq->fl_hwidx = hwidx;
|
||||
slot = netmap_reset(na, NR_RX, i, 0);
|
||||
MPASS(slot != NULL); /* XXXNM: error check, not assert */
|
||||
@ -456,13 +511,14 @@ cxgbe_netmap_on(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
|
||||
/* We deal with 8 bufs at a time */
|
||||
MPASS((na->num_rx_desc & 7) == 0);
|
||||
MPASS(na->num_rx_desc == nm_rxq->fl_sidx);
|
||||
for (j = 0; j < nm_rxq->fl_sidx - 8; j++) {
|
||||
for (j = 0; j < nm_rxq->fl_sidx; j++) {
|
||||
uint64_t ba;
|
||||
|
||||
PNMB(na, &slot[j], &ba);
|
||||
MPASS(ba != 0);
|
||||
nm_rxq->fl_desc[j] = htobe64(ba | hwidx);
|
||||
}
|
||||
nm_rxq->fl_pidx = j;
|
||||
j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8;
|
||||
MPASS((j & 7) == 0);
|
||||
j /= 8; /* driver pidx to hardware pidx */
|
||||
wmb();
|
||||
@ -513,12 +569,17 @@ cxgbe_netmap_off(struct adapter *sc, struct port_info *pi, struct ifnet *ifp,
|
||||
if_printf(ifp, "netmap disable_vi failed: %d\n", rc);
|
||||
nm_clear_native_flags(na);
|
||||
|
||||
/*
|
||||
* XXXNM: We need to make sure that the tx queues are quiet and won't
|
||||
* request any more SGE_EGR_UPDATEs.
|
||||
*/
|
||||
|
||||
for_each_nm_txq(pi, i, nm_txq) {
|
||||
struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx];
|
||||
|
||||
/* Wait for hw pidx to catch up ... */
|
||||
while (be16toh(nm_txq->pidx) != spg->pidx)
|
||||
pause("nmpidx", 1);
|
||||
|
||||
/* ... and then for the cidx. */
|
||||
while (spg->pidx != spg->cidx)
|
||||
pause("nmcidx", 1);
|
||||
|
||||
free_nm_txq_hwq(pi, nm_txq);
|
||||
}
|
||||
for_each_nm_rxq(pi, i, nm_rxq) {
|
||||
@ -641,7 +702,7 @@ int lazy_tx_credit_flush = 1;
|
||||
*/
|
||||
static void
|
||||
cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
|
||||
struct netmap_kring *kring, int npkt, int npkt_remaining)
|
||||
struct netmap_kring *kring, int npkt, int npkt_remaining, int txcsum)
|
||||
{
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
struct netmap_slot *slot;
|
||||
@ -668,6 +729,7 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
|
||||
for (i = 0; i < n; i++) {
|
||||
slot = &ring->slot[kring->nr_hwcur];
|
||||
PNMB(kring->na, slot, &ba);
|
||||
MPASS(ba != 0);
|
||||
|
||||
cpl->ctrl0 = nm_txq->cpl_ctrl0;
|
||||
cpl->pack = 0;
|
||||
@ -677,10 +739,12 @@ cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
|
||||
* checksum offloading, TCP segmentation offloading,
|
||||
* encryption, VLAN encapsulation/decapsulation, etc."
|
||||
*
|
||||
* XXXNM: it makes sense to enable checksum offload.
|
||||
* So the ncxl interfaces have tx hardware checksumming
|
||||
* disabled by default. But you can override netmap by
|
||||
* enabling IFCAP_TXCSUM on the interface manully.
|
||||
*/
|
||||
cpl->ctrl1 = htobe64(F_TXPKT_IPCSUM_DIS |
|
||||
F_TXPKT_L4CSUM_DIS);
|
||||
cpl->ctrl1 = txcsum ? 0 :
|
||||
htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
|
||||
|
||||
usgl = (void *)(cpl + 1);
|
||||
usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
|
||||
@ -793,7 +857,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[pi->first_nm_txq + kring->ring_id];
|
||||
const u_int head = kring->rhead;
|
||||
u_int reclaimed = 0;
|
||||
int n, d, npkt_remaining, ndesc_remaining;
|
||||
int n, d, npkt_remaining, ndesc_remaining, txcsum;
|
||||
|
||||
/*
|
||||
* Tx was at kring->nr_hwcur last time around and now we need to advance
|
||||
@ -804,6 +868,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
|
||||
npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur :
|
||||
kring->nkr_num_slots - kring->nr_hwcur + head;
|
||||
txcsum = ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
|
||||
while (npkt_remaining) {
|
||||
reclaimed += reclaim_nm_tx_desc(nm_txq);
|
||||
ndesc_remaining = contiguous_ndesc_available(nm_txq);
|
||||
@ -827,7 +892,7 @@ cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
|
||||
|
||||
/* Send n packets and update nm_txq->pidx and kring->nr_hwcur */
|
||||
npkt_remaining -= n;
|
||||
cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining);
|
||||
cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining, txcsum);
|
||||
}
|
||||
MPASS(npkt_remaining == 0);
|
||||
MPASS(kring->nr_hwcur == head);
|
||||
@ -861,6 +926,9 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
u_int n;
|
||||
int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
|
||||
|
||||
if (black_hole)
|
||||
return (0); /* No updates ever. */
|
||||
|
||||
if (netmap_no_pendintr || force_update) {
|
||||
kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx);
|
||||
kring->nr_kflags &= ~NKR_PENDINTR;
|
||||
@ -890,6 +958,7 @@ cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
|
||||
while (n > 0) {
|
||||
for (i = 0; i < 8; i++, fl_pidx++, slot++) {
|
||||
PNMB(na, slot, &ba);
|
||||
MPASS(ba != 0);
|
||||
nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx);
|
||||
slot->flags &= ~NS_BUF_CHANGED;
|
||||
MPASS(fl_pidx <= nm_rxq->fl_sidx);
|
||||
@ -1064,10 +1133,10 @@ t4_nm_intr(void *arg)
|
||||
struct netmap_ring *ring = kring->ring;
|
||||
struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
|
||||
uint32_t lq;
|
||||
u_int n = 0;
|
||||
int processed = 0;
|
||||
u_int n = 0, work = 0;
|
||||
uint8_t opcode;
|
||||
uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx);
|
||||
u_int fl_credits = fl_cidx & 7;
|
||||
|
||||
while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) {
|
||||
|
||||
@ -1078,8 +1147,10 @@ t4_nm_intr(void *arg)
|
||||
|
||||
switch (G_RSPD_TYPE(d->rsp.u.type_gen)) {
|
||||
case X_RSPD_TYPE_FLBUF:
|
||||
/* No buffer packing so new buf every time */
|
||||
MPASS(lq & F_RSPD_NEWBUF);
|
||||
if (black_hole != 2) {
|
||||
/* No buffer packing so new buf every time */
|
||||
MPASS(lq & F_RSPD_NEWBUF);
|
||||
}
|
||||
|
||||
/* fall through */
|
||||
|
||||
@ -1095,7 +1166,9 @@ t4_nm_intr(void *arg)
|
||||
case CPL_RX_PKT:
|
||||
ring->slot[fl_cidx].len = G_RSPD_LEN(lq) - fl_pktshift;
|
||||
ring->slot[fl_cidx].flags = kring->nkr_slot_flags;
|
||||
if (__predict_false(++fl_cidx == nm_rxq->fl_sidx))
|
||||
fl_cidx += (lq & F_RSPD_NEWBUF) ? 1 : 0;
|
||||
fl_credits += (lq & F_RSPD_NEWBUF) ? 1 : 0;
|
||||
if (__predict_false(fl_cidx == nm_rxq->fl_sidx))
|
||||
fl_cidx = 0;
|
||||
break;
|
||||
default:
|
||||
@ -1121,18 +1194,37 @@ t4_nm_intr(void *arg)
|
||||
nm_rxq->iq_gen ^= F_RSPD_GEN;
|
||||
}
|
||||
|
||||
if (__predict_false(++n == 64)) { /* XXXNM: tune */
|
||||
if (__predict_false(++n == rx_ndesc)) {
|
||||
atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
|
||||
if (black_hole && fl_credits >= 8) {
|
||||
fl_credits /= 8;
|
||||
IDXINCR(nm_rxq->fl_pidx, fl_credits * 8,
|
||||
nm_rxq->fl_sidx);
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
|
||||
nm_rxq->fl_db_val | V_PIDX(fl_credits));
|
||||
fl_credits = fl_cidx & 7;
|
||||
} else if (!black_hole) {
|
||||
netmap_rx_irq(ifp, nm_rxq->nid, &work);
|
||||
MPASS(work != 0);
|
||||
}
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
|
||||
V_CIDXINC(n) | V_INGRESSQID(nm_rxq->iq_cntxt_id) |
|
||||
V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
|
||||
n = 0;
|
||||
}
|
||||
}
|
||||
if (fl_cidx != nm_rxq->fl_cidx) {
|
||||
atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
|
||||
netmap_rx_irq(ifp, nm_rxq->nid, &processed);
|
||||
}
|
||||
|
||||
atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
|
||||
if (black_hole) {
|
||||
fl_credits /= 8;
|
||||
IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx);
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
|
||||
nm_rxq->fl_db_val | V_PIDX(fl_credits));
|
||||
} else
|
||||
netmap_rx_irq(ifp, nm_rxq->nid, &work);
|
||||
|
||||
t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) |
|
||||
V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) | V_SEINTARM(F_QINTR_CNT_EN));
|
||||
V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) |
|
||||
V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
|
||||
}
|
||||
#endif
|
||||
|
@ -2563,7 +2563,7 @@ free_mgmtq(struct adapter *sc)
|
||||
return free_wrq(sc, &sc->sge.mgmtq);
|
||||
}
|
||||
|
||||
static inline int
|
||||
int
|
||||
tnl_cong(struct port_info *pi)
|
||||
{
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user