cxgbe(4): let the PF driver use VM work requests for transmit.

This allows the PF interfaces to communicate with the VF interfaces over
the internal switch in the ASIC.  Fix the GL limits for VM work requests
while here.

MFC after:	3 days
Sponsored by:	Chelsio Communications
This commit is contained in:
Navdeep Parhar 2020-09-22 04:16:40 +00:00
parent 54fab0fbc4
commit 30e3f2b4ea
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=365993
4 changed files with 124 additions and 22 deletions

View File

@ -351,6 +351,17 @@ This tunable is for specialized applications only and should not be used in
normal operation.
The capabilities for which hardware resources have been reserved are listed in
dev.<nexus>.X.*caps sysctls.
.It Va hw.cxgbe.tx_vm_wr
Setting this to 1 instructs the driver to use VM work requests to transmit data.
This lets PF interfaces transmit frames to VF interfaces over the internal
switch in the ASIC.
Note that the
.Xr cxgbev 4
VF driver always uses VM work requests and is not affected by this tunable.
The default value is 0 and should be changed only if PF and VF interfaces need
to communicate with each other.
Different interfaces can be assigned different values using the
dev.<port>.X.tx_vm_wr sysctl when the interface is administratively down.
.El
.Sh SUPPORT
For general information and support,

View File

@ -118,6 +118,8 @@ enum {
SGE_MAX_WR_NDESC = SGE_MAX_WR_LEN / EQ_ESIZE, /* max WR size in desc */
TX_SGL_SEGS = 39,
TX_SGL_SEGS_TSO = 38,
TX_SGL_SEGS_VM = 38,
TX_SGL_SEGS_VM_TSO = 37,
TX_SGL_SEGS_EO_TSO = 30, /* XXX: lower for IPv6. */
TX_SGL_SEGS_VXLAN_TSO = 37,
TX_WR_FLITS = SGE_MAX_WR_LEN / 8
@ -173,6 +175,7 @@ enum {
DOOMED = (1 << 0),
VI_INIT_DONE = (1 << 1),
VI_SYSCTL_CTX = (1 << 2),
TX_USES_VM_WR = (1 << 3),
/* adapter debug_flags */
DF_DUMP_MBOX = (1 << 0), /* Log all mbox cmd/rpl. */
@ -1267,7 +1270,7 @@ void t4_intr_evt(void *);
void t4_wrq_tx_locked(struct adapter *, struct sge_wrq *, struct wrqe *);
void t4_update_fl_bufsize(struct ifnet *);
struct mbuf *alloc_wr_mbuf(int, int);
int parse_pkt(struct adapter *, struct mbuf **);
int parse_pkt(struct mbuf **, bool);
void *start_wrq_wr(struct sge_wrq *, int, struct wrq_cookie *);
void commit_wrq_wr(struct sge_wrq *, void *, struct wrq_cookie *);
int tnl_cong(struct port_info *, int);

View File

@ -591,6 +591,10 @@ static int t4_panic_on_fatal_err = 0;
SYSCTL_INT(_hw_cxgbe, OID_AUTO, panic_on_fatal_err, CTLFLAG_RDTUN,
&t4_panic_on_fatal_err, 0, "panic on fatal errors");
static int t4_tx_vm_wr = 0;
SYSCTL_INT(_hw_cxgbe, OID_AUTO, tx_vm_wr, CTLFLAG_RWTUN, &t4_tx_vm_wr, 0,
"Use VM work requests to transmit packets.");
#ifdef TCP_OFFLOAD
/*
* TOE tunables.
@ -695,6 +699,7 @@ static int sysctl_bitfield_8b(SYSCTL_HANDLER_ARGS);
static int sysctl_bitfield_16b(SYSCTL_HANDLER_ARGS);
static int sysctl_btphy(SYSCTL_HANDLER_ARGS);
static int sysctl_noflowq(SYSCTL_HANDLER_ARGS);
static int sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS);
static int sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS);
static int sysctl_holdoff_pktc_idx(SYSCTL_HANDLER_ARGS);
static int sysctl_qsize_rxq(SYSCTL_HANDLER_ARGS);
@ -1723,6 +1728,8 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
vi->xact_addr_filt = -1;
callout_init(&vi->tick, 1);
if (sc->flags & IS_VF || t4_tx_vm_wr != 0)
vi->flags |= TX_USES_VM_WR;
/* Allocate an ifnet and set it up */
ifp = if_alloc_dev(IFT_ETHER, dev);
@ -1775,7 +1782,10 @@ cxgbe_vi_attach(device_t dev, struct vi_info *vi)
#endif
ifp->if_hw_tsomax = IP_MAXPACKET;
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
if (vi->flags & TX_USES_VM_WR)
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
else
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
#ifdef RATELIMIT
if (is_ethoffload(sc) && vi->nofldtxq != 0)
ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_EO_TSO;
@ -2174,7 +2184,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
{
struct vi_info *vi = ifp->if_softc;
struct port_info *pi = vi->pi;
struct adapter *sc = pi->adapter;
struct adapter *sc;
struct sge_txq *txq;
#ifdef RATELIMIT
struct cxgbe_snd_tag *cst;
@ -2194,7 +2204,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
return (ENETDOWN);
}
rc = parse_pkt(sc, &m);
rc = parse_pkt(&m, vi->flags & TX_USES_VM_WR);
if (__predict_false(rc != 0)) {
MPASS(m == NULL); /* was freed already */
atomic_add_int(&pi->tx_parse_error, 1); /* rare, atomic is ok */
@ -2209,6 +2219,7 @@ cxgbe_transmit(struct ifnet *ifp, struct mbuf *m)
#endif
/* Select a txq. */
sc = vi->adapter;
txq = &sc->sge.txq[vi->first_txq];
if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
txq += ((m->m_pkthdr.flowid % (vi->ntxq - vi->rsrv_noflowq)) +
@ -6818,6 +6829,16 @@ vi_sysctls(struct vi_info *vi)
"Reserve queue 0 for non-flowid packets");
}
if (vi->adapter->flags & IS_VF) {
MPASS(vi->flags & TX_USES_VM_WR);
SYSCTL_ADD_UINT(ctx, children, OID_AUTO, "tx_vm_wr", CTLFLAG_RD,
NULL, 1, "use VM work requests for transmit");
} else {
SYSCTL_ADD_PROC(ctx, children, OID_AUTO, "tx_vm_wr",
CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, vi, 0,
sysctl_tx_vm_wr, "I", "use VM work requestes for transmit");
}
#ifdef TCP_OFFLOAD
if (vi->nofldrxq != 0) {
SYSCTL_ADD_INT(ctx, children, OID_AUTO, "nofldrxq", CTLFLAG_RD,
@ -7248,6 +7269,63 @@ sysctl_noflowq(SYSCTL_HANDLER_ARGS)
return (rc);
}
static int
sysctl_tx_vm_wr(SYSCTL_HANDLER_ARGS)
{
struct vi_info *vi = arg1;
struct adapter *sc = vi->adapter;
int rc, val, i;
MPASS(!(sc->flags & IS_VF));
val = vi->flags & TX_USES_VM_WR ? 1 : 0;
rc = sysctl_handle_int(oidp, &val, 0, req);
if (rc != 0 || req->newptr == NULL)
return (rc);
if (val != 0 && val != 1)
return (EINVAL);
rc = begin_synchronized_op(sc, vi, HOLD_LOCK | SLEEP_OK | INTR_OK,
"t4txvm");
if (rc)
return (rc);
if (vi->ifp->if_drv_flags & IFF_DRV_RUNNING) {
/*
* We don't want parse_pkt to run with one setting (VF or PF)
* and then eth_tx to see a different setting but still use
* stale information calculated by parse_pkt.
*/
rc = EBUSY;
} else {
struct port_info *pi = vi->pi;
struct sge_txq *txq;
uint32_t ctrl0;
uint8_t npkt = sc->params.max_pkts_per_eth_tx_pkts_wr;
if (val) {
vi->flags |= TX_USES_VM_WR;
vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_VM_TSO;
ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
V_TXPKT_INTF(pi->tx_chan));
if (!(sc->flags & IS_VF))
npkt--;
} else {
vi->flags &= ~TX_USES_VM_WR;
vi->ifp->if_hw_tsomaxsegcount = TX_SGL_SEGS_TSO;
ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(sc->pf) |
V_TXPKT_VF(vi->vin) | V_TXPKT_VF_VLD(vi->vfvld));
}
for_each_txq(vi, i, txq) {
txq->cpl_ctrl0 = ctrl0;
txq->txp.max_npkt = npkt;
}
}
end_synchronized_op(sc, LOCK_HELD);
return (rc);
}
static int
sysctl_holdoff_tmr_idx(SYSCTL_HANDLER_ARGS)
{

View File

@ -278,7 +278,7 @@ static void add_fl_to_sfl(struct adapter *, struct sge_fl *);
static inline void get_pkt_gl(struct mbuf *, struct sglist *);
static inline u_int txpkt_len16(u_int, const u_int);
static inline u_int txpkt_vm_len16(u_int, const u_int);
static inline void calculate_mbuf_len16(struct adapter *, struct mbuf *);
static inline void calculate_mbuf_len16(struct mbuf *, bool);
static inline u_int txpkts0_len16(u_int);
static inline u_int txpkts1_len16(void);
static u_int write_raw_wr(struct sge_txq *, void *, struct mbuf *, u_int);
@ -2323,6 +2323,8 @@ set_mbuf_len16(struct mbuf *m, uint8_t len16)
{
M_ASSERTPKTHDR(m);
if (!(mbuf_cflags(m) & MC_TLS))
MPASS(len16 > 0 && len16 <= SGE_MAX_WR_LEN / 16);
m->m_pkthdr.PH_loc.eight[0] = len16;
}
@ -2657,9 +2659,15 @@ count_mbuf_nsegs(struct mbuf *m, int skip, uint8_t *cflags)
* The maximum number of segments that can fit in a WR.
*/
static int
max_nsegs_allowed(struct mbuf *m)
max_nsegs_allowed(struct mbuf *m, bool vm_wr)
{
if (vm_wr) {
if (needs_tso(m))
return (TX_SGL_SEGS_VM_TSO);
return (TX_SGL_SEGS_VM);
}
if (needs_tso(m)) {
if (needs_vxlan_tso(m))
return (TX_SGL_SEGS_VXLAN_TSO);
@ -2676,7 +2684,7 @@ max_nsegs_allowed(struct mbuf *m)
* b) it may get defragged up if the gather list is too long for the hardware.
*/
int
parse_pkt(struct adapter *sc, struct mbuf **mp)
parse_pkt(struct mbuf **mp, bool vm_wr)
{
struct mbuf *m0 = *mp, *m;
int rc, nsegs, defragged = 0, offset;
@ -2728,7 +2736,7 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
return (0);
}
#endif
if (nsegs > max_nsegs_allowed(m0)) {
if (nsegs > max_nsegs_allowed(m0, vm_wr)) {
if (defragged++ > 0) {
rc = EFBIG;
goto fail;
@ -2756,7 +2764,7 @@ parse_pkt(struct adapter *sc, struct mbuf **mp)
}
set_mbuf_nsegs(m0, nsegs);
set_mbuf_cflags(m0, cflags);
calculate_mbuf_len16(sc, m0);
calculate_mbuf_len16(m0, vm_wr);
#ifdef RATELIMIT
/*
@ -3168,7 +3176,7 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
if (txp->npkt > 0 || remaining > 1 || txp->score > 3 ||
atomic_load_int(&txq->eq.equiq) != 0) {
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
rc = add_to_txpkts_vf(sc, txq, m0, avail, &snd);
else
rc = add_to_txpkts_pf(sc, txq, m0, avail, &snd);
@ -3184,14 +3192,14 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
if (txp->score++ >= 10)
txp->score = 10;
MPASS(avail >= tx_len16_to_desc(txp->len16));
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
n = write_txpkts_vm_wr(sc, txq);
else
n = write_txpkts_wr(sc, txq);
} else {
MPASS(avail >=
tx_len16_to_desc(mbuf_len16(txp->mb[0])));
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
n = write_txpkt_vm_wr(sc, txq,
txp->mb[0]);
else
@ -3241,7 +3249,7 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
#endif
} else {
ETHER_BPF_MTAP(ifp, m0);
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
n = write_txpkt_vm_wr(sc, txq, m0);
else
n = write_txpkt_wr(sc, txq, m0, avail);
@ -3285,14 +3293,14 @@ eth_tx(struct mp_ring *r, u_int cidx, u_int pidx, bool *coalescing)
ETHER_BPF_MTAP(ifp, txp->mb[i]);
if (txp->npkt > 1) {
MPASS(avail >= tx_len16_to_desc(txp->len16));
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
n = write_txpkts_vm_wr(sc, txq);
else
n = write_txpkts_wr(sc, txq);
} else {
MPASS(avail >=
tx_len16_to_desc(mbuf_len16(txp->mb[0])));
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
n = write_txpkt_vm_wr(sc, txq, txp->mb[0]);
else
n = write_txpkt_wr(sc, txq, txp->mb[0], avail);
@ -4431,7 +4439,7 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
TASK_INIT(&txq->tx_reclaim_task, 0, tx_reclaim, eq);
txq->ifp = vi->ifp;
txq->gl = sglist_alloc(TX_SGL_SEGS, M_WAITOK);
if (sc->flags & IS_VF)
if (vi->flags & TX_USES_VM_WR)
txq->cpl_ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT_XT) |
V_TXPKT_INTF(pi->tx_chan));
else
@ -4447,6 +4455,8 @@ alloc_txq(struct vi_info *vi, struct sge_txq *txq, int idx,
MPASS(nitems(txp->mb) >= sc->params.max_pkts_per_eth_tx_pkts_wr);
txq->txp.max_npkt = min(nitems(txp->mb),
sc->params.max_pkts_per_eth_tx_pkts_wr);
if (vi->flags & TX_USES_VM_WR && !(sc->flags & IS_VF))
txq->txp.max_npkt--;
snprintf(name, sizeof(name), "%d", idx);
oid = SYSCTL_ADD_NODE(&vi->ctx, children, OID_AUTO, name,
@ -4796,9 +4806,11 @@ get_pkt_gl(struct mbuf *m, struct sglist *gl)
KASSERT(gl->sg_nseg == mbuf_nsegs(m),
("%s: nsegs changed for mbuf %p from %d to %d", __func__, m,
mbuf_nsegs(m), gl->sg_nseg));
KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m),
#if 0 /* vm_wr not readily available here. */
KASSERT(gl->sg_nseg > 0 && gl->sg_nseg <= max_nsegs_allowed(m, vm_wr),
("%s: %d segments, should have been 1 <= nsegs <= %d", __func__,
gl->sg_nseg, max_nsegs_allowed(m)));
gl->sg_nseg, max_nsegs_allowed(m, vm_wr)));
#endif
}
/*
@ -4839,12 +4851,12 @@ txpkt_vm_len16(u_int nsegs, const u_int extra)
}
static inline void
calculate_mbuf_len16(struct adapter *sc, struct mbuf *m)
calculate_mbuf_len16(struct mbuf *m, bool vm_wr)
{
const int lso = sizeof(struct cpl_tx_pkt_lso_core);
const int tnl_lso = sizeof(struct cpl_tx_tnl_lso);
if (sc->flags & IS_VF) {
if (vm_wr) {
if (needs_tso(m))
set_mbuf_len16(m, txpkt_vm_len16(mbuf_nsegs(m), lso));
else
@ -5348,8 +5360,6 @@ add_to_txpkts_vf(struct adapter *sc, struct sge_txq *txq, struct mbuf *m,
{
struct txpkts *txp = &txq->txp;
MPASS(sc->flags & IS_VF);
/* Cannot have TSO and coalesce at the same time. */
if (cannot_use_txpkts(m)) {
cannot_coalesce: