bnxt(4) Enable LRO support, redux

iflib - reset fl-ifl_fragidx to 0 on iflib_fl_bufs_free().  This caused the
panic in em/igb when adding it to a bridge device.

iflib - Handle out of order packet delivery from hardware in support of LRO

Out of order updates to rxd's is fixed in r315217. However, it is not
completely fixed.  While refilling the buffers, iflib is not considering
the out of order descriptors. Hence, it is refilling sequentially.
"idx" variable in _iflib_fl_refill routine is incremented sequentially.
By doing refilling sequentially, it will override the SGEs that
are *IN USE* by other connections.  Fix is to maintain a bitmap of
rx descriptors and differentiate the used one with unused one and
refill only at the unused indices.  This patch also fixes a
few bugs in bnxt, related to the same feature.

Submitted by:	bhargava.marreddy@broadcom.com
Reviewed by:	venkatkumar.duvvuru@broadcom.com shurd
Differential Revision:	https://reviews.freebsd.org/D10681
This commit is contained in:
sbruno 2017-07-03 18:23:35 +00:00
parent 1aaa174dd2
commit 247f67d439
5 changed files with 81 additions and 50 deletions

View File

@ -438,6 +438,7 @@ struct bnxt_ring {
uint32_t ring_size; /* Must be a power of two */
uint16_t id; /* Logical ID */
uint16_t phys_id;
struct bnxt_full_tpa_start *tpa_start;
};
struct bnxt_cp_ring {
@ -564,7 +565,6 @@ struct bnxt_softc {
struct sysctl_ctx_list hw_stats;
struct sysctl_oid *hw_stats_oid;
struct bnxt_full_tpa_start *tpa_start;
struct bnxt_ver_info *ver_info;
struct bnxt_nvram_info *nvm_info;
bool wol;

View File

@ -935,7 +935,7 @@ bnxt_hwrm_vnic_tpa_cfg(struct bnxt_softc *softc, struct bnxt_vnic_info *vnic,
/* TODO: Calculate this based on ring size? */
req.max_agg_segs = htole16(3);
/* Base this in the allocated TPA start size... */
req.max_aggs = htole16(2);
req.max_aggs = htole16(7);
/*
* TODO: max_agg_timer?
* req.mag_agg_timer = htole32(XXX);

View File

@ -264,6 +264,7 @@ bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
uint8_t flid;
uint64_t *paddrs;
caddr_t *vaddrs;
qidx_t *frag_idxs;
rxqid = iru->iru_qsidx;
count = iru->iru_count;
@ -272,6 +273,7 @@ bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
flid = iru->iru_flidx;
vaddrs = iru->iru_vaddrs;
paddrs = iru->iru_paddrs;
frag_idxs = iru->iru_idxs;
if (flid == 0) {
rx_ring = &softc->rx_rings[rxqid];
@ -287,8 +289,8 @@ bnxt_isc_rxd_refill(void *sc, if_rxd_update_t iru)
rxbd[pidx].flags_type = htole16(type);
rxbd[pidx].len = htole16(len);
/* No need to byte-swap the opaque value */
rxbd[pidx].opaque = ((rxqid & 0xff) << 24) | (flid << 16)
| pidx;
rxbd[pidx].opaque = (((rxqid & 0xff) << 24) | (flid << 16)
| (frag_idxs[i]));
rxbd[pidx].addr = htole64(paddrs[i]);
if (++pidx == rx_ring->ring_size)
pidx = 0;
@ -329,7 +331,6 @@ bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx, qidx_t budget)
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[rxqid];
struct rx_pkt_cmpl *rcp;
struct rx_tpa_start_cmpl *rtpa;
struct rx_tpa_end_cmpl *rtpae;
struct cmpl_base *cmp = (struct cmpl_base *)cpr->ring.vaddr;
int avail = 0;
@ -338,7 +339,6 @@ bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx, qidx_t budget)
uint8_t ags;
int i;
uint16_t type;
uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
@ -388,18 +388,11 @@ bnxt_isc_rxd_available(void *sc, uint16_t rxqid, qidx_t idx, qidx_t budget)
avail++;
break;
case CMPL_BASE_TYPE_RX_TPA_START:
rtpa = (void *)&cmp[cons];
agg_id = (rtpa->agg_id &
RX_TPA_START_CMPL_AGG_ID_MASK) >>
RX_TPA_START_CMPL_AGG_ID_SFT;
softc->tpa_start[agg_id].low = *rtpa;
NEXT_CP_CONS_V(&cpr->ring, cons, v_bit);
CMPL_PREFETCH_NEXT(cpr, cons);
if (!CMP_VALID(&cmp[cons], v_bit))
goto cmpl_invalid;
softc->tpa_start[agg_id].high =
((struct rx_tpa_start_cmpl_hi *)cmp)[cons];
break;
case CMPL_BASE_TYPE_RX_AGG:
break;
@ -549,7 +542,7 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
/* Get the agg_id */
agg_id = (agend->agg_id & RX_TPA_END_CMPL_AGG_ID_MASK) >>
RX_TPA_END_CMPL_AGG_ID_SFT;
tpas = &softc->tpa_start[agg_id];
tpas = &(softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id]);
/* Extract from the first 16-byte BD */
if (le16toh(tpas->low.flags_type) & RX_TPA_START_CMPL_FLAGS_RSS_VALID) {
@ -563,8 +556,8 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
RX_TPA_END_CMPL_AGG_BUFS_SFT;
ri->iri_nfrags = ags + 1;
/* No need to byte-swap the opaque value */
ri->iri_frags[0].irf_flid = (tpas->low.opaque >> 16) & 0xff;
ri->iri_frags[0].irf_idx = tpas->low.opaque & 0xffff;
ri->iri_frags[0].irf_flid = ((tpas->low.opaque >> 16) & 0xff);
ri->iri_frags[0].irf_idx = (tpas->low.opaque & 0xffff);
ri->iri_frags[0].irf_len = le16toh(tpas->low.len);
ri->iri_len = le16toh(tpas->low.len);
@ -600,8 +593,8 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
acp = &((struct rx_abuf_cmpl *)cpr->ring.vaddr)[cpr->cons];
/* No need to byte-swap the opaque value */
ri->iri_frags[i].irf_flid = (acp->opaque >> 16) & 0xff;
ri->iri_frags[i].irf_idx = acp->opaque & 0xffff;
ri->iri_frags[i].irf_flid = ((acp->opaque >> 16) & 0xff);
ri->iri_frags[i].irf_idx = (acp->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(acp->len);
ri->iri_len += le16toh(acp->len);
}
@ -609,8 +602,8 @@ bnxt_pkt_get_tpa(struct bnxt_softc *softc, if_rxd_info_t ri,
/* And finally, the empty BD at the end... */
ri->iri_nfrags++;
/* No need to byte-swap the opaque value */
ri->iri_frags[i].irf_flid = (agend->opaque >> 16) % 0xff;
ri->iri_frags[i].irf_idx = agend->opaque & 0xffff;
ri->iri_frags[i].irf_flid = ((agend->opaque >> 16) & 0xff);
ri->iri_frags[i].irf_idx = (agend->opaque & 0xffff);
ri->iri_frags[i].irf_len = le16toh(agend->len);
ri->iri_len += le16toh(agend->len);
@ -623,9 +616,12 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri)
{
struct bnxt_softc *softc = (struct bnxt_softc *)sc;
struct bnxt_cp_ring *cpr = &softc->rx_cp_rings[ri->iri_qsidx];
struct cmpl_base *cmp_q = (struct cmpl_base *)cpr->ring.vaddr;
struct cmpl_base *cmp;
struct rx_tpa_start_cmpl *rtpa;
uint16_t flags_type;
uint16_t type;
uint8_t agg_id;
for (;;) {
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
@ -642,9 +638,18 @@ bnxt_isc_rxd_pkt_get(void *sc, if_rxd_info_t ri)
case CMPL_BASE_TYPE_RX_TPA_END:
return bnxt_pkt_get_tpa(softc, ri, cpr, flags_type);
case CMPL_BASE_TYPE_RX_TPA_START:
rtpa = (void *)&cmp_q[cpr->cons];
agg_id = (rtpa->agg_id &
RX_TPA_START_CMPL_AGG_ID_MASK) >>
RX_TPA_START_CMPL_AGG_ID_SFT;
softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].low = *rtpa;
NEXT_CP_CONS_V(&cpr->ring, cpr->cons, cpr->v_bit);
ri->iri_cidx = RING_NEXT(&cpr->ring, ri->iri_cidx);
CMPL_PREFETCH_NEXT(cpr, cpr->cons);
softc->rx_rings[ri->iri_qsidx].tpa_start[agg_id].high =
((struct rx_tpa_start_cmpl_hi *)cmp_q)[cpr->cons];
break;
default:
device_printf(softc->dev,

View File

@ -506,6 +506,17 @@ bnxt_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs,
softc->rx_rings[i].vaddr = vaddrs[i * nrxqs + 1];
softc->rx_rings[i].paddr = paddrs[i * nrxqs + 1];
/* Allocate the TPA start buffer */
softc->rx_rings[i].tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
(RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (softc->rx_rings[i].tpa_start == NULL) {
rc = -ENOMEM;
device_printf(softc->dev,
"Unable to allocate space for TPA\n");
goto tpa_alloc_fail;
}
/* Allocate the AG ring */
softc->ag_rings[i].phys_id = (uint16_t)HWRM_NA_SIGNATURE;
softc->ag_rings[i].softc = softc;
@ -571,7 +582,10 @@ rss_grp_alloc_fail:
iflib_dma_free(&softc->vnic_info.rss_hash_key_tbl);
rss_hash_alloc_fail:
iflib_dma_free(&softc->vnic_info.mc_list);
tpa_alloc_fail:
mc_list_alloc_fail:
for (i = i - 1; i >= 0; i--)
free(softc->rx_rings[i].tpa_start, M_DEVBUF);
iflib_dma_free(&softc->rx_stats);
hw_stats_alloc_fail:
free(softc->grp_info, M_DEVBUF);
@ -635,16 +649,6 @@ bnxt_attach_pre(if_ctx_t ctx)
if (rc)
goto dma_fail;
/* Allocate the TPA start buffer */
softc->tpa_start = malloc(sizeof(struct bnxt_full_tpa_start) *
(RX_TPA_START_CMPL_AGG_ID_MASK >> RX_TPA_START_CMPL_AGG_ID_SFT),
M_DEVBUF, M_NOWAIT | M_ZERO);
if (softc->tpa_start == NULL) {
rc = ENOMEM;
device_printf(softc->dev,
"Unable to allocate space for TPA\n");
goto tpa_failed;
}
/* Get firmware version and compare with driver */
softc->ver_info = malloc(sizeof(struct bnxt_ver_info),
@ -814,8 +818,6 @@ nvm_alloc_fail:
ver_fail:
free(softc->ver_info, M_DEVBUF);
ver_alloc_fail:
free(softc->tpa_start, M_DEVBUF);
tpa_failed:
bnxt_free_hwrm_dma_mem(softc);
dma_fail:
BNXT_HWRM_LOCK_DESTROY(softc);
@ -877,7 +879,8 @@ bnxt_detach(if_ctx_t ctx)
SLIST_FOREACH_SAFE(tag, &softc->vnic_info.vlan_tags, next, tmp)
free(tag, M_DEVBUF);
iflib_dma_free(&softc->def_cp_ring_mem);
free(softc->tpa_start, M_DEVBUF);
for (i = 0; i < softc->nrxqsets; i++)
free(softc->rx_rings[i].tpa_start, M_DEVBUF);
free(softc->ver_info, M_DEVBUF);
free(softc->nvm_info, M_DEVBUF);
@ -1009,14 +1012,17 @@ bnxt_init(if_ctx_t ctx)
if (rc)
goto fail;
#ifdef notyet
/* Enable LRO/TPA/GRO */
/*
* Enable LRO/TPA/GRO
* TBD:
* Enable / Disable HW_LRO based on
* ifconfig lro / ifconfig -lro setting
*/
rc = bnxt_hwrm_vnic_tpa_cfg(softc, &softc->vnic_info,
(if_getcapenable(iflib_get_ifp(ctx)) & IFCAP_LRO) ?
HWRM_VNIC_TPA_CFG_INPUT_FLAGS_TPA : 0);
if (rc)
goto fail;
#endif
for (i = 0; i < softc->ntxqsets; i++) {
/* Allocate the statistics context */

View File

@ -93,6 +93,7 @@ __FBSDID("$FreeBSD$");
#include <x86/iommu/busdma_dmar.h>
#endif
#include <sys/bitstring.h>
/*
* enable accounting of every mbuf as it comes in to and goes out of
* iflib's software descriptor references
@ -381,6 +382,8 @@ struct iflib_fl {
#endif
/* implicit pad */
bitstr_t *ifl_rx_bitmap;
qidx_t ifl_fragidx;
/* constant */
qidx_t ifl_size;
uint16_t ifl_buf_size;
@ -1797,7 +1800,8 @@ static void
_iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
{
struct mbuf *m;
int idx, pidx = fl->ifl_pidx;
int idx, frag_idx = fl->ifl_fragidx;
int pidx = fl->ifl_pidx;
caddr_t cl, *sd_cl;
struct mbuf **sd_m;
uint8_t *sd_flags;
@ -1840,8 +1844,11 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
*
* If the cluster is still set then we know a minimum sized packet was received
*/
if ((cl = sd_cl[idx]) == NULL) {
if ((cl = sd_cl[idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
bit_ffc_at(fl->ifl_rx_bitmap, frag_idx, fl->ifl_size, &frag_idx);
if ((frag_idx < 0) || (frag_idx >= fl->ifl_size))
bit_ffc(fl->ifl_rx_bitmap, fl->ifl_size, &frag_idx);
if ((cl = sd_cl[frag_idx]) == NULL) {
if ((cl = sd_cl[frag_idx] = m_cljget(NULL, M_NOWAIT, fl->ifl_buf_size)) == NULL)
break;
#if MEMORY_LOGGING
fl->ifl_cl_enqueued++;
@ -1867,10 +1874,11 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
cb_arg.error = 0;
q = fl->ifl_rxq;
MPASS(sd_map != NULL);
MPASS(sd_map[idx] != NULL);
err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[idx],
MPASS(sd_map[frag_idx] != NULL);
err = bus_dmamap_load(fl->ifl_desc_tag, sd_map[frag_idx],
cl, fl->ifl_buf_size, _rxq_refill_cb, &cb_arg, 0);
bus_dmamap_sync(fl->ifl_desc_tag, sd_map[idx], BUS_DMASYNC_PREREAD);
bus_dmamap_sync(fl->ifl_desc_tag, sd_map[frag_idx],
BUS_DMASYNC_PREREAD);
if (err != 0 || cb_arg.error) {
/*
@ -1884,12 +1892,13 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
}
bus_addr = cb_arg.seg.ds_addr;
}
sd_flags[idx] |= RX_SW_DESC_INUSE;
bit_set(fl->ifl_rx_bitmap, frag_idx);
sd_flags[frag_idx] |= RX_SW_DESC_INUSE;
MPASS(sd_m[idx] == NULL);
sd_cl[idx] = cl;
sd_m[idx] = m;
fl->ifl_rxd_idxs[i] = idx;
MPASS(sd_m[frag_idx] == NULL);
sd_cl[frag_idx] = cl;
sd_m[frag_idx] = m;
fl->ifl_rxd_idxs[i] = frag_idx;
fl->ifl_bus_addrs[i] = bus_addr;
fl->ifl_vm_addrs[i] = cl;
fl->ifl_credits++;
@ -1905,8 +1914,8 @@ _iflib_fl_refill(if_ctx_t ctx, iflib_fl_t fl, int count)
ctx->isc_rxd_refill(ctx->ifc_softc, &iru);
i = 0;
pidx = idx;
fl->ifl_pidx = idx;
}
fl->ifl_pidx = idx;
}
done:
@ -1920,6 +1929,7 @@ done:
bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
ctx->isc_rxd_flush(ctx->ifc_softc, fl->ifl_rxq->ifr_id, fl->ifl_id, pidx);
fl->ifl_fragidx = frag_idx;
}
static __inline void
@ -1983,7 +1993,7 @@ iflib_fl_bufs_free(iflib_fl_t fl)
/*
* Reset free list values
*/
fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = 0;;
fl->ifl_credits = fl->ifl_cidx = fl->ifl_pidx = fl->ifl_gen = fl->ifl_fragidx = 0;
bzero(idi->idi_vaddr, idi->idi_size);
}
@ -1999,6 +2009,7 @@ iflib_fl_setup(iflib_fl_t fl)
if_ctx_t ctx = rxq->ifr_ctx;
if_softc_ctx_t sctx = &ctx->ifc_softc_ctx;
bit_nclear(fl->ifl_rx_bitmap, 0, fl->ifl_size);
/*
** Free current RX buffer structs and their mbufs
*/
@ -2348,6 +2359,7 @@ rxd_frag_to_sd(iflib_rxq_t rxq, if_rxd_frag_t irf, int unload, if_rxsd_t sd)
if (map != NULL)
bus_dmamap_sync(fl->ifl_ifdi->idi_tag, fl->ifl_ifdi->idi_map,
BUS_DMASYNC_PREREAD | BUS_DMASYNC_PREWRITE);
bit_clear(fl->ifl_rx_bitmap, cidx);
}
static struct mbuf *
@ -4243,8 +4255,9 @@ iflib_device_deregister(if_ctx_t ctx)
iflib_txq_t txq;
iflib_rxq_t rxq;
device_t dev = ctx->ifc_dev;
int i;
int i, j;
struct taskqgroup *tqg;
iflib_fl_t fl;
/* Make sure VLANS are not using driver */
if (if_vlantrunkinuse(ifp)) {
@ -4279,6 +4292,10 @@ iflib_device_deregister(if_ctx_t ctx)
for (i = 0, rxq = ctx->ifc_rxqs; i < NRXQSETS(ctx); i++, rxq++) {
if (rxq->ifr_task.gt_uniq != NULL)
taskqgroup_detach(tqg, &rxq->ifr_task);
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
free(fl->ifl_rx_bitmap, M_IFLIB);
}
tqg = qgroup_if_config_tqg;
if (ctx->ifc_admin_task.gt_uniq != NULL)
@ -4672,6 +4689,9 @@ iflib_queues_alloc(if_ctx_t ctx)
err = ENOMEM;
goto err_rx_desc;
}
for (j = 0, fl = rxq->ifr_fl; j < rxq->ifr_nfl; j++, fl++)
fl->ifl_rx_bitmap = bit_alloc(fl->ifl_size, M_IFLIB, M_WAITOK|M_ZERO);
}
/* TXQs */