cxgbe(4): Add another setting to the knob that controls congestion.

hw.cxgbe.cong_drop=2 will generate backpressure *and* drop frames for
queues that are congested.

MFC after:	2 weeks
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2022-09-09 15:16:22 -07:00
parent c387ff0045
commit df275ae509
4 changed files with 84 additions and 70 deletions

View File

@ -31,7 +31,7 @@
.\"
.\" $FreeBSD$
.\"
.Dd November 10, 2021
.Dd September 12, 2022
.Dt CXGBE 4
.Os
.Sh NAME
@ -277,6 +277,7 @@ Controls the hardware response to congestion.
0 instructs the hardware to backpressure its pipeline on congestion.
This usually results in the port emitting PAUSE frames.
1 instructs the hardware to drop frames destined for congested queues.
2 instructs the hardware to both backpressure the pipeline and drop frames.
.It Va hw.cxgbe.pause_settings
PAUSE frame settings.
Bit 0 is rx_pause, bit 1 is tx_pause, bit 2 is pause_autoneg.

View File

@ -428,7 +428,7 @@ struct sge_iq {
int8_t intr_pktc_idx; /* packet count threshold index */
uint8_t gen; /* generation bit */
uint8_t intr_params; /* interrupt holdoff parameters */
int8_t cong; /* congestion settings */
int8_t cong_drop; /* congestion drop settings for the queue */
uint16_t qsize; /* size (# of entries) of the queue */
uint16_t sidx; /* index of the entry with the status page */
uint16_t cidx; /* consumer index */
@ -1384,7 +1384,7 @@ struct mbuf *alloc_wr_mbuf(int, int);
int parse_pkt(struct mbuf **, bool);
void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *);
void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *);
int tnl_cong(struct port_info *, int);
int t4_sge_set_conm_context(struct adapter *, int, int, int);
void t4_register_an_handler(an_handler_t);
void t4_register_fw_msg_handler(int, fw_msg_handler_t);
void t4_register_cpl_handler(int, cpl_handler_t);

View File

@ -276,14 +276,17 @@ free_nm_txq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
}
static int
alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
{
int rc, cntxt_id, i;
int rc, cntxt_id;
__be32 v;
struct adapter *sc = vi->adapter;
struct port_info *pi = vi->pi;
struct sge_params *sp = &sc->params.sge;
struct netmap_adapter *na = NA(vi->ifp);
struct fw_iq_cmd c;
const int cong_drop = nm_cong_drop;
const int cong_map = pi->rx_e_chan_map;
MPASS(na != NULL);
MPASS(nm_rxq->iq_desc != NULL);
@ -313,15 +316,15 @@ alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
V_FW_IQ_CMD_VIID(vi->viid) |
V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) |
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
F_FW_IQ_CMD_IQGTSMODE |
V_FW_IQ_CMD_IQINTCNTTHRESH(0) |
V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
c.iqsize = htobe16(vi->qsize_rxq);
c.iqaddr = htobe64(nm_rxq->iq_ba);
if (cong >= 0) {
if (cong_drop != -1) {
c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN |
V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF |
V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) | F_FW_IQ_CMD_FL0CONGCIF |
F_FW_IQ_CMD_FL0CONGEN);
}
c.iqns_to_fl0congen |=
@ -373,29 +376,9 @@ alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) |
sc->chip_params->sge_fl_db;
if (chip_id(sc) >= CHELSIO_T5 && cong >= 0) {
uint32_t param, val;
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
if (cong == 0)
val = 1 << 19;
else {
val = 2 << 19;
for (i = 0; i < 4; i++) {
if (cong & (1 << i))
val |= 1 << (i << 2);
}
}
rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
if (rc != 0) {
/* report error but carry on */
device_printf(sc->dev,
"failed to set congestion manager context for "
"ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc);
}
if (chip_id(sc) >= CHELSIO_T5 && cong_drop != -1) {
t4_sge_set_conm_context(sc, nm_rxq->iq_cntxt_id, cong_drop,
cong_map);
}
t4_write_reg(sc, sc->sge_gts_reg,
@ -749,7 +732,7 @@ cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
if (!nm_kring_pending_on(kring))
continue;
alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop));
alloc_nm_rxq_hwq(vi, nm_rxq);
nm_rxq->fl_hwidx = hwidx;
slot = netmap_reset(na, NR_RX, i, 0);
MPASS(slot != NULL); /* XXXNM: error check, not assert */

View File

@ -124,10 +124,11 @@ SYSCTL_INT(_hw_cxgbe, OID_AUTO, spg_len, CTLFLAG_RDTUN, &spg_len, 0,
* -1: no congestion feedback (not recommended).
* 0: backpressure the channel instead of dropping packets right away.
* 1: no backpressure, drop packets for the congested queue immediately.
* 2: both backpressure and drop.
*/
static int cong_drop = 0;
SYSCTL_INT(_hw_cxgbe, OID_AUTO, cong_drop, CTLFLAG_RDTUN, &cong_drop, 0,
"Congestion control for RX queues (0 = backpressure, 1 = drop");
"Congestion control for NIC RX queues (0 = backpressure, 1 = drop, 2 = both");
/*
* Deliver multiple frames in the same free list buffer if they fit.
@ -554,7 +555,7 @@ t4_sge_modload(void)
spg_len = len;
}
if (cong_drop < -1 || cong_drop > 1) {
if (cong_drop < -1 || cong_drop > 2) {
printf("Invalid hw.cxgbe.cong_drop value (%d),"
" using 0 instead.\n", cong_drop);
cong_drop = 0;
@ -3382,7 +3383,7 @@ init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
iq->qsize = roundup2(qsize, 16); /* See FW_IQ_CMD/iqsize */
iq->sidx = iq->qsize - sc->params.sge.spg_len / IQ_ESIZE;
iq->intr_idx = intr_idx;
iq->cong = cong;
iq->cong_drop = cong;
}
static inline void
@ -3548,9 +3549,10 @@ free_iq_fl(struct adapter *sc, struct sge_iq *iq, struct sge_fl *fl)
static int
alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
{
int rc, i, cntxt_id;
int rc, cntxt_id, cong_map;
struct fw_iq_cmd c;
struct adapter *sc = vi->adapter;
struct port_info *pi = vi->pi;
__be32 v = 0;
MPASS (!(iq->flags & IQ_HW_ALLOCATED));
@ -3582,15 +3584,17 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
V_FW_IQ_CMD_VIID(vi->viid) |
V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) |
c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
F_FW_IQ_CMD_IQGTSMODE |
V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
c.iqsize = htobe16(iq->qsize);
c.iqaddr = htobe64(iq->ba);
c.iqns_to_fl0congen = htobe32(V_FW_IQ_CMD_IQTYPE(iq->qtype));
if (iq->cong >= 0)
if (iq->cong_drop != -1) {
cong_map = iq->qtype == IQ_ETH ? pi->rx_e_chan_map : 0;
c.iqns_to_fl0congen |= htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
}
if (fl) {
bzero(fl->desc, fl->sidx * EQ_ESIZE + sc->params.sge.spg_len);
@ -3600,9 +3604,9 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
(fl->flags & FL_BUF_PACKING ? F_FW_IQ_CMD_FL0PACKEN :
0));
if (iq->cong >= 0) {
if (iq->cong_drop != -1) {
c.iqns_to_fl0congen |=
htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(iq->cong) |
htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong_map) |
F_FW_IQ_CMD_FL0CONGCIF |
F_FW_IQ_CMD_FL0CONGEN);
}
@ -3636,6 +3640,8 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
if (fl) {
u_int qid;
#ifdef INVARIANTS
int i;
MPASS(!(fl->flags & FL_BUF_RESUME));
for (i = 0; i < fl->sidx * 8; i++)
MPASS(fl->sdesc[i].cl == NULL);
@ -3675,28 +3681,10 @@ alloc_iq_fl_hwq(struct vi_info *vi, struct sge_iq *iq, struct sge_fl *fl)
FL_UNLOCK(fl);
}
if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) && iq->cong >= 0) {
uint32_t param, val;
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
V_FW_PARAMS_PARAM_YZ(iq->cntxt_id);
if (iq->cong == 0)
val = 1 << 19;
else {
val = 2 << 19;
for (i = 0; i < 4; i++) {
if (iq->cong & (1 << i))
val |= 1 << (i << 2);
}
}
rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
if (rc != 0) {
/* report error but carry on */
CH_ERR(sc, "failed to set congestion manager context "
"for ingress queue %d: %d\n", iq->cntxt_id, rc);
}
if (chip_id(sc) >= CHELSIO_T5 && !(sc->flags & IS_VF) &&
iq->cong_drop != -1) {
t4_sge_set_conm_context(sc, iq->cntxt_id, iq->cong_drop,
cong_map);
}
/* Enable IQ interrupts */
@ -3920,15 +3908,57 @@ free_ctrlq(struct adapter *sc, int idx)
}
int
tnl_cong(struct port_info *pi, int drop)
t4_sge_set_conm_context(struct adapter *sc, int cntxt_id, int cong_drop,
int cong_map)
{
const int cng_ch_bits_log = sc->chip_params->cng_ch_bits_log;
uint32_t param, val;
uint16_t ch_map;
int cong_mode, rc, i;
if (drop == -1)
return (-1);
else if (drop == 1)
return (0);
else
return (pi->rx_e_chan_map);
if (chip_id(sc) < CHELSIO_T5)
return (ENOTSUP);
/* Convert the driver knob to the mode understood by the firmware. */
switch (cong_drop) {
case -1:
cong_mode = X_CONMCTXT_CNGTPMODE_DISABLE;
break;
case 0:
cong_mode = X_CONMCTXT_CNGTPMODE_CHANNEL;
break;
case 1:
cong_mode = X_CONMCTXT_CNGTPMODE_QUEUE;
break;
case 2:
cong_mode = X_CONMCTXT_CNGTPMODE_BOTH;
break;
default:
MPASS(0);
CH_ERR(sc, "cong_drop = %d is invalid (ingress queue %d).\n",
cong_drop, cntxt_id);
return (EINVAL);
}
param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
V_FW_PARAMS_PARAM_YZ(cntxt_id);
val = V_CONMCTXT_CNGTPMODE(cong_mode);
if (cong_mode == X_CONMCTXT_CNGTPMODE_CHANNEL ||
cong_mode == X_CONMCTXT_CNGTPMODE_BOTH) {
for (i = 0, ch_map = 0; i < 4; i++) {
if (cong_map & (1 << i))
ch_map |= 1 << (i << cng_ch_bits_log);
}
val |= V_CONMCTXT_CNGCHMAP(ch_map);
}
rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
if (rc != 0) {
CH_ERR(sc, "failed to set congestion manager context "
"for ingress queue %d: %d\n", cntxt_id, rc);
}
return (rc);
}
/*
@ -3960,7 +3990,7 @@ alloc_rxq(struct vi_info *vi, struct sge_rxq *rxq, int idx, int intr_idx,
"rx queue");
init_iq(&rxq->iq, sc, vi->tmr_idx, vi->pktc_idx, vi->qsize_rxq,
intr_idx, tnl_cong(vi->pi, cong_drop), IQ_ETH);
intr_idx, cong_drop, IQ_ETH);
#if defined(INET) || defined(INET6)
if (ifp->if_capenable & IFCAP_LRO)
rxq->iq.flags |= IQ_LRO_ENABLED;