[atheros] [if_arge] Various fixes to avoid TX stalls and bad sized packets

This is stuff I've been running for a couple years.  It's inspired by changes
I found in the linux ag71xx ethernet driver.

* Delay between stopping DMA and checking to see if it's stopped; this gives
  the hardware time to do its thing.

* Non-final frames in the chain need to be a multiple of 4 bytes in size.
  Ensure this is the case when assembling a TX DMA list.

* Add counters for tx/rx underflow and too-short packets.

* Log if TX/RX DMA couldn't be stopped when resetting the MAC.

* Add some more debugging / logging around TX/RX ring bits.

Tested:

* AR7240, AR7241
* AR9344 (TL-WDR3600/TL-WDR4300 APs)
* AR9331 (Carambola 2)
This commit is contained in:
Adrian Chadd 2020-05-10 03:36:11 +00:00
parent 134a253edc
commit b8aa77b74d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=360865
2 changed files with 66 additions and 3 deletions

View File

@ -332,6 +332,11 @@ arge_attach_sysctl(device_t dev)
"tx_pkts_unaligned_len", CTLFLAG_RW, &sc->stats.tx_pkts_unaligned_len,
0, "number of TX unaligned packets (len)");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"tx_pkts_unaligned_tooshort", CTLFLAG_RW,
&sc->stats.tx_pkts_unaligned_tooshort,
0, "number of TX unaligned packets (mbuf length < 4 bytes)");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"tx_pkts_nosegs", CTLFLAG_RW, &sc->stats.tx_pkts_nosegs,
0, "number of TX packets fail with no ring slots avail");
@ -347,6 +352,13 @@ arge_attach_sysctl(device_t dev)
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"intr_ok", CTLFLAG_RW, &sc->stats.intr_ok,
0, "number of OK interrupts");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"tx_underflow", CTLFLAG_RW, &sc->stats.tx_underflow,
0, "Number of TX underflows");
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
"rx_overflow", CTLFLAG_RW, &sc->stats.rx_overflow,
0, "Number of RX overflows");
#ifdef ARGE_DEBUG
SYSCTL_ADD_UINT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO, "tx_prod",
CTLFLAG_RW, &sc->arge_cdata.arge_tx_prod, 0, "");
@ -1365,15 +1377,24 @@ arge_set_pll(struct arge_softc *sc, int media, int duplex)
static void
arge_reset_dma(struct arge_softc *sc)
{
uint32_t val;
ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: called\n", __func__);
ARGE_WRITE(sc, AR71XX_DMA_RX_CONTROL, 0);
ARGE_WRITE(sc, AR71XX_DMA_TX_CONTROL, 0);
/* Give hardware a chance to finish */
DELAY(1000);
ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, 0);
ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, 0);
ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: RX_STATUS=%08x, TX_STATUS=%08x\n",
__func__,
ARGE_READ(sc, AR71XX_DMA_RX_STATUS),
ARGE_READ(sc, AR71XX_DMA_TX_STATUS));
/* Clear all possible RX interrupts */
while(ARGE_READ(sc, AR71XX_DMA_RX_STATUS) & DMA_RX_STATUS_PKT_RECVD)
ARGE_WRITE(sc, AR71XX_DMA_RX_STATUS, DMA_RX_STATUS_PKT_RECVD);
@ -1397,6 +1418,24 @@ arge_reset_dma(struct arge_softc *sc)
* flushed to RAM before underlying buffers are freed.
*/
arge_flush_ddr(sc);
/* Check if we cleared RX status */
val = ARGE_READ(sc, AR71XX_DMA_RX_STATUS);
if (val != 0) {
device_printf(sc->arge_dev,
"%s: unable to clear DMA_RX_STATUS: %08x\n",
__func__, val);
}
/* Check if we cleared TX status */
val = ARGE_READ(sc, AR71XX_DMA_TX_STATUS);
/* Mask out reserved bits */
val = val & 0x00ffffff;
if (val != 0) {
device_printf(sc->arge_dev,
"%s: unable to clear DMA_TX_STATUS: %08x\n",
__func__, val);
}
}
static void
@ -1417,9 +1456,13 @@ arge_init_locked(struct arge_softc *sc)
ARGE_LOCK_ASSERT(sc);
ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: called\n", __func__);
if ((ifp->if_flags & IFF_UP) && (ifp->if_drv_flags & IFF_DRV_RUNNING))
return;
ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: init'ing\n", __func__);
/* Init circular RX list. */
if (arge_rx_ring_init(sc) != 0) {
device_printf(sc->arge_dev,
@ -1431,6 +1474,7 @@ arge_init_locked(struct arge_softc *sc)
/* Init tx descriptors. */
arge_tx_ring_init(sc);
/* Restart DMA */
arge_reset_dma(sc);
if (sc->arge_miibus) {
@ -1452,6 +1496,11 @@ arge_init_locked(struct arge_softc *sc)
arge_update_link_locked(sc);
}
ARGEDEBUG(sc, ARGE_DBG_RESET, "%s: desc ring; TX=0x%x, RX=0x%x\n",
__func__,
ARGE_TX_RING_ADDR(sc, 0),
ARGE_RX_RING_ADDR(sc, 0));
ARGE_WRITE(sc, AR71XX_DMA_TX_DESC, ARGE_TX_RING_ADDR(sc, 0));
ARGE_WRITE(sc, AR71XX_DMA_RX_DESC, ARGE_RX_RING_ADDR(sc, 0));
@ -1495,6 +1544,15 @@ arge_mbuf_chain_is_tx_aligned(struct arge_softc *sc, struct mbuf *m0)
sc->stats.tx_pkts_unaligned_len++;
return 0;
}
/*
* All chips have this requirement for length being greater
* than 4.
*/
if ((m->m_next != NULL) && ((m->m_len < 4))) {
sc->stats.tx_pkts_unaligned_tooshort++;
return 0;
}
}
return 1;
}
@ -1582,6 +1640,11 @@ arge_encap(struct arge_softc *sc, struct mbuf **m_head)
tmp |= ARGE_DESC_EMPTY;
desc->packet_ctrl = tmp;
ARGEDEBUG(sc, ARGE_DBG_TX, " [%d / %d] addr=0x%x, len=%d\n",
i,
prod,
(uint32_t) txsegs[i].ds_addr, (int) txsegs[i].ds_len);
/* XXX Note: only relevant for older MACs; but check length! */
if ((sc->arge_hw_flags & ARGE_HW_FLG_TX_DESC_ALIGN_4BYTE) &&
(txsegs[i].ds_addr & 3))
@ -2606,7 +2669,7 @@ arge_intr(void *arg)
}
/*
* If we've finished TXing and there's space for more packets
* If we've finished RX /or/ TX and there's space for more packets
* to be queued for TX, do so. Otherwise we may end up in a
* situation where the interface send queue was filled
* whilst the hardware queue was full, then the hardware
@ -2620,8 +2683,7 @@ arge_intr(void *arg)
* after a TX underrun, then having the hardware queue added
* to below.
*/
if (status & (DMA_INTR_TX_PKT_SENT | DMA_INTR_TX_UNDERRUN) &&
(ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
if ((ifp->if_drv_flags & IFF_DRV_OACTIVE) == 0) {
if (!IFQ_IS_EMPTY(&ifp->if_snd))
arge_start_locked(ifp);
}

View File

@ -206,6 +206,7 @@ struct arge_softc {
uint32_t tx_pkts_unaligned;
uint32_t tx_pkts_unaligned_start;
uint32_t tx_pkts_unaligned_len;
uint32_t tx_pkts_unaligned_tooshort;
uint32_t tx_pkts_nosegs;
uint32_t tx_pkts_aligned;
uint32_t rx_overflow;