- Simplify mb_free_ext_fast
- increase asserts for mbuf accounting - track outstanding mbufs (maps very closely to leaked) - actually only create one thread per port if !multiq Oddly enough this fixes the use after free - move txq_segs to stack in t3_encap - add checks that pidx doesn't move pass cidx - simplify mbuf free logic in collapse mbufs routine
This commit is contained in:
parent
3c58f6ddbd
commit
139edb19d9
@ -298,8 +298,7 @@ struct sge_txq {
|
||||
unsigned long txq_frees;
|
||||
struct mtx lock;
|
||||
struct sg_ent txq_sgl[TX_MAX_SEGS / 2 + 1];
|
||||
bus_dma_segment_t txq_segs[TX_MAX_SEGS];
|
||||
#define TXQ_NAME_LEN 32
|
||||
#define TXQ_NAME_LEN 32
|
||||
char lockbuf[TXQ_NAME_LEN];
|
||||
};
|
||||
|
||||
|
@ -422,6 +422,7 @@ cxgb_pcpu_start_(struct sge_qset *qs, struct mbuf *immpkt, int tx_flush)
|
||||
txq = &qs->txq[TXQ_ETH];
|
||||
|
||||
mtx_assert(&txq->lock, MA_OWNED);
|
||||
KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
|
||||
|
||||
retry:
|
||||
if (!pi->link_config.link_ok)
|
||||
@ -667,13 +668,14 @@ cxgb_pcpu_startup_threads(struct adapter *sc)
|
||||
#else
|
||||
nqsets = 1;
|
||||
#endif
|
||||
for (j = 0; j < pi->nqsets; ++j) {
|
||||
for (j = 0; j < nqsets; ++j) {
|
||||
struct sge_qset *qs;
|
||||
|
||||
qs = &sc->sge.qs[pi->first_qset + j];
|
||||
qs->port = pi;
|
||||
qs->qs_cpuid = ((pi->first_qset + j) % mp_ncpus);
|
||||
device_printf(sc->dev, "starting thread for %d\n", qs->qs_cpuid);
|
||||
device_printf(sc->dev, "starting thread for %d\n",
|
||||
qs->qs_cpuid);
|
||||
|
||||
kproc_create(cxgb_pcpu_start_proc, qs, &p,
|
||||
RFNOWAIT, 0, "cxgbsp");
|
||||
@ -686,11 +688,18 @@ void
|
||||
cxgb_pcpu_shutdown_threads(struct adapter *sc)
|
||||
{
|
||||
int i, j;
|
||||
int nqsets;
|
||||
|
||||
#ifdef IFNET_MULTIQUEUE
|
||||
nqsets = pi->nqsets;
|
||||
#else
|
||||
nqsets = 1;
|
||||
#endif
|
||||
|
||||
for (i = 0; i < sc->params.nports; i++) {
|
||||
struct port_info *pi = &sc->port[i];
|
||||
int first = pi->first_qset;
|
||||
for (j = 0; j < pi->nqsets; j++) {
|
||||
for (j = 0; j < nqsets; j++) {
|
||||
struct sge_qset *qs = &sc->sge.qs[first + j];
|
||||
|
||||
qs->qs_flags |= QS_EXITING;
|
||||
|
@ -927,7 +927,17 @@ txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
|
||||
txq->unacked &= 7;
|
||||
txqs->pidx = txq->pidx;
|
||||
txq->pidx += ndesc;
|
||||
|
||||
#ifdef INVARIANTS
|
||||
if (((txqs->pidx > txq->cidx) &&
|
||||
(txq->pidx < txqs->pidx) &&
|
||||
(txq->pidx >= txq->cidx)) ||
|
||||
((txqs->pidx < txq->cidx) &&
|
||||
(txq->pidx >= txq-> cidx)) ||
|
||||
((txqs->pidx < txq->cidx) &&
|
||||
(txq->cidx < txqs->pidx)))
|
||||
panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
|
||||
txqs->pidx, txq->pidx, txq->cidx);
|
||||
#endif
|
||||
if (txq->pidx >= txq->size) {
|
||||
txq->pidx -= txq->size;
|
||||
txq->gen ^= 1;
|
||||
@ -1205,23 +1215,23 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
|
||||
struct work_request_hdr *wrp;
|
||||
struct tx_sw_desc *txsd;
|
||||
struct sg_ent *sgp, *sgl;
|
||||
bus_dma_segment_t *segs;
|
||||
uint32_t wr_hi, wr_lo, sgl_flits;
|
||||
bus_dma_segment_t segs[TX_MAX_SEGS];
|
||||
|
||||
struct tx_desc *txd;
|
||||
struct mbuf_vec *mv;
|
||||
struct mbuf_iovec *mi;
|
||||
|
||||
DPRINTF("t3_encap cpu=%d ", curcpu);
|
||||
KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
|
||||
|
||||
mi = NULL;
|
||||
pi = qs->port;
|
||||
sc = pi->adapter;
|
||||
txq = &qs->txq[TXQ_ETH];
|
||||
txsd = &txq->sdesc[txq->pidx];
|
||||
txd = &txq->desc[txq->pidx];
|
||||
txsd = &txq->sdesc[txq->pidx];
|
||||
sgl = txq->txq_sgl;
|
||||
segs = txq->txq_segs;
|
||||
m0 = *m;
|
||||
|
||||
DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
|
||||
@ -1240,6 +1250,8 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
|
||||
#endif
|
||||
KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p",
|
||||
txsd->mi.mi_base));
|
||||
if (cxgb_debug)
|
||||
printf("uipc_mvec PIO_LEN=%ld\n", PIO_LEN);
|
||||
|
||||
if (count > 1) {
|
||||
panic("count > 1 not support in CVS\n");
|
||||
@ -1253,7 +1265,7 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
|
||||
}
|
||||
KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
|
||||
|
||||
if (m0->m_pkthdr.len > PIO_LEN) {
|
||||
if (!(m0->m_pkthdr.len <= PIO_LEN)) {
|
||||
mi_collapse_mbuf(&txsd->mi, m0);
|
||||
mi = &txsd->mi;
|
||||
}
|
||||
@ -1393,8 +1405,11 @@ t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
|
||||
write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
|
||||
check_ring_tx_db(pi->adapter, txq);
|
||||
|
||||
if ((m0->m_type == MT_DATA) && ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT)) {
|
||||
if ((m0->m_type == MT_DATA) &&
|
||||
((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
|
||||
(m0->m_ext.ext_type != EXT_PACKET)) {
|
||||
m0->m_flags &= ~M_EXT ;
|
||||
mbufs_outstanding--;
|
||||
m_free(m0);
|
||||
}
|
||||
|
||||
@ -1797,6 +1812,7 @@ t3_free_tx_desc(struct sge_txq *q, int reclaimable)
|
||||
cidx = q->cidx;
|
||||
txsd = &q->sdesc[cidx];
|
||||
DPRINTF("reclaiming %d WR\n", reclaimable);
|
||||
mtx_assert(&q->lock, MA_OWNED);
|
||||
while (reclaimable--) {
|
||||
DPRINTF("cidx=%d d=%p\n", cidx, txsd);
|
||||
if (txsd->mi.mi_base != NULL) {
|
||||
@ -2208,6 +2224,7 @@ t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
|
||||
}
|
||||
|
||||
init_qset_cntxt(q, id);
|
||||
q->idx = id;
|
||||
|
||||
if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
|
||||
sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
|
||||
@ -3203,7 +3220,11 @@ t3_add_attach_sysctls(adapter_t *sc)
|
||||
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
|
||||
"ext_freed",
|
||||
CTLFLAG_RD, &cxgb_ext_freed,
|
||||
0, "#times a cluster was freed through ext_free");
|
||||
0, "#times a cluster was freed through ext_free");
|
||||
SYSCTL_ADD_INT(ctx, children, OID_AUTO,
|
||||
"mbufs_outstanding",
|
||||
CTLFLAG_RD, &mbufs_outstanding,
|
||||
0, "#mbufs in flight in the driver");
|
||||
}
|
||||
|
||||
|
||||
|
@ -45,6 +45,7 @@ void cxgb_cache_refill(void);
|
||||
extern int cxgb_cached_allocations;
|
||||
extern int cxgb_cached;
|
||||
extern int cxgb_ext_freed;
|
||||
extern int mbufs_outstanding;
|
||||
|
||||
#define mtomv(m) ((struct mbuf_vec *)((m)->m_pktdat))
|
||||
#define M_IOVEC 0x100000 /* mbuf immediate data area is used for cluster ptrs */
|
||||
@ -162,7 +163,7 @@ m_explode(struct mbuf *m)
|
||||
static __inline void
|
||||
busdma_map_mbuf_fast(struct mbuf *m, bus_dma_segment_t *seg)
|
||||
{
|
||||
seg->ds_addr = pmap_kextract((vm_offset_t)m->m_data);
|
||||
seg->ds_addr = pmap_kextract(mtod(m, vm_offset_t));
|
||||
seg->ds_len = m->m_len;
|
||||
}
|
||||
|
||||
@ -229,11 +230,17 @@ m_free_iovec(struct mbuf *m, int type)
|
||||
static __inline void
|
||||
m_freem_iovec(struct mbuf_iovec *mi)
|
||||
{
|
||||
struct mbuf *m;
|
||||
struct mbuf *m = (struct mbuf *)mi->mi_base;
|
||||
|
||||
switch (mi->mi_type) {
|
||||
case EXT_MBUF:
|
||||
m_free_fast((struct mbuf *)mi->mi_base);
|
||||
#ifdef PIO_LEN
|
||||
KASSERT(m->m_pkthdr.len > PIO_LEN, ("freeing PIO buf"));
|
||||
#endif
|
||||
KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
|
||||
KASSERT(m->m_next == NULL, ("freeing chain"));
|
||||
mbufs_outstanding--;
|
||||
m_free_fast(m);
|
||||
break;
|
||||
case EXT_IOVEC:
|
||||
case EXT_CLIOVEC:
|
||||
|
@ -75,6 +75,7 @@ extern uint32_t mb_free_vec_free;
|
||||
|
||||
uma_zone_t zone_miovec;
|
||||
static int mi_inited = 0;
|
||||
int mbufs_outstanding = 0;
|
||||
|
||||
void
|
||||
mi_init(void)
|
||||
@ -198,12 +199,18 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
|
||||
struct mbuf *marray[TX_MAX_SEGS];
|
||||
int i, type, seg_count, defragged = 0, err = 0;
|
||||
struct mbuf_vec *mv;
|
||||
int skipped, freed, outstanding;
|
||||
|
||||
|
||||
|
||||
KASSERT(n->m_pkthdr.len, ("packet has zero header len"));
|
||||
|
||||
if (n->m_flags & M_PKTHDR && !SLIST_EMPTY(&n->m_pkthdr.tags))
|
||||
m_tag_delete_chain(n, NULL);
|
||||
|
||||
if (cxgb_debug)
|
||||
printf("cxgb_sge PIO_LEN=%ld\n", PIO_LEN);
|
||||
|
||||
if (n->m_pkthdr.len <= PIO_LEN)
|
||||
return (0);
|
||||
retry:
|
||||
@ -211,16 +218,21 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
|
||||
if (n->m_next == NULL) {
|
||||
busdma_map_mbuf_fast(n, segs);
|
||||
*nsegs = 1;
|
||||
if ((n->m_flags & M_NOFREE) == 0)
|
||||
mbufs_outstanding++;
|
||||
return (0);
|
||||
}
|
||||
skipped = freed = outstanding = 0;
|
||||
while (n && seg_count < TX_MAX_SEGS) {
|
||||
marray[seg_count] = n;
|
||||
|
||||
|
||||
/*
|
||||
* firmware doesn't like empty segments
|
||||
*/
|
||||
if (__predict_true(n->m_len != 0))
|
||||
if (__predict_true(n->m_len != 0))
|
||||
seg_count++;
|
||||
else
|
||||
skipped++;
|
||||
|
||||
n = n->m_next;
|
||||
}
|
||||
@ -265,24 +277,35 @@ busdma_map_sg_collapse(struct mbuf **m, bus_dma_segment_t *segs, int *nsegs)
|
||||
}
|
||||
n = *m;
|
||||
while (n) {
|
||||
if (n->m_ext.ext_type == EXT_PACKET)
|
||||
if (n->m_len == 0)
|
||||
/* do nothing - free if mbuf or cluster */;
|
||||
else if (((n->m_flags & M_EXT) == 0) ||
|
||||
((n->m_flags & M_EXT) && (n->m_ext.ext_type == EXT_PACKET)) ||
|
||||
(n->m_flags & M_NOFREE))
|
||||
goto skip;
|
||||
else if (n->m_len == 0)
|
||||
/* do nothing */;
|
||||
else if ((n->m_flags & (M_EXT|M_NOFREE)) == M_EXT)
|
||||
n->m_flags &= ~M_EXT;
|
||||
else
|
||||
goto skip;
|
||||
mhead = n->m_next;
|
||||
m_free(n);
|
||||
n = mhead;
|
||||
freed++;
|
||||
continue;
|
||||
skip:
|
||||
/*
|
||||
* is an immediate mbuf or is from the packet zone
|
||||
*/
|
||||
outstanding++;
|
||||
n = n->m_next;
|
||||
}
|
||||
*nsegs = seg_count;
|
||||
*m = m0;
|
||||
DPRINTF("pktlen=%d m0=%p *m=%p m=%p\n", m0->m_pkthdr.len, m0, *m, m);
|
||||
mbufs_outstanding += outstanding;
|
||||
KASSERT(outstanding + freed == skipped + seg_count,
|
||||
("outstanding=%d freed=%d skipped=%d seg_count=%d",
|
||||
outstanding, freed, skipped, seg_count));
|
||||
|
||||
|
||||
return (0);
|
||||
err_out:
|
||||
m_freem(*m);
|
||||
@ -325,6 +348,7 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs,
|
||||
(*mp)->m_next = (*mp)->m_nextpkt = NULL;
|
||||
if (((*mp)->m_flags & (M_EXT|M_NOFREE)) == M_EXT) {
|
||||
(*mp)->m_flags &= ~M_EXT;
|
||||
mbufs_outstanding--;
|
||||
m_free(*mp);
|
||||
}
|
||||
}
|
||||
@ -336,29 +360,17 @@ busdma_map_sg_vec(struct mbuf **m, struct mbuf **mret, bus_dma_segment_t *segs,
|
||||
void
|
||||
mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
|
||||
{
|
||||
u_int cnt;
|
||||
int dofree;
|
||||
caddr_t cl;
|
||||
|
||||
/* Account for lazy ref count assign. */
|
||||
dofree = (mi->mi_refcnt == NULL);
|
||||
|
||||
/*
|
||||
* This is tricky. We need to make sure to decrement the
|
||||
* refcount in a safe way but to also clean up if we're the
|
||||
* last reference. This method seems to do it without race.
|
||||
*/
|
||||
while (dofree == 0) {
|
||||
cnt = *(mi->mi_refcnt);
|
||||
if (cnt == 1) {
|
||||
dofree = 1;
|
||||
break;
|
||||
}
|
||||
if (atomic_cmpset_int(mi->mi_refcnt, cnt, cnt - 1)) {
|
||||
if (cnt == 1)
|
||||
dofree = 1;
|
||||
break;
|
||||
}
|
||||
if (dofree == 0) {
|
||||
KASSERT(mi->mi_type != EXT_MBUF,
|
||||
("refcnt must be null for mbuf"));
|
||||
if (*(mi->mi_refcnt) == 1 ||
|
||||
atomic_fetchadd_int(mi->mi_refcnt, -1) == 1)
|
||||
dofree = 1;
|
||||
}
|
||||
if (dofree == 0)
|
||||
return;
|
||||
@ -366,6 +378,8 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
|
||||
cl = mi->mi_base;
|
||||
switch (type) {
|
||||
case EXT_MBUF:
|
||||
KASSERT((mi->mi_flags & M_NOFREE) == 0, ("no free set on mbuf"));
|
||||
mbufs_outstanding--;
|
||||
m_free_fast((struct mbuf *)cl);
|
||||
break;
|
||||
case EXT_CLUSTER:
|
||||
@ -395,6 +409,7 @@ mb_free_ext_fast(struct mbuf_iovec *mi, int type, int idx)
|
||||
mi->mi_ext.ext_args);
|
||||
break;
|
||||
case EXT_PACKET:
|
||||
mbufs_outstanding--;
|
||||
if (*(mi->mi_refcnt) == 0)
|
||||
*(mi->mi_refcnt) = 1;
|
||||
uma_zfree(zone_pack, mi->mi_mbuf);
|
||||
|
Loading…
Reference in New Issue
Block a user