o In re_newbuf() and re_encap() if re_dma_map_desc() aborts the mapping

operation as it ran out of free descriptors or if there are too many
  segments in the first place, call bus_dmamap_unload() in order to
  unload the already loaded segments.
  For trying to map the defragmented mbuf (chain) in re_encap() this
  introduces re_dma_map_desc() setting arg.rl_maxsegs to 0 as a new
  failure mode. Previously we just ignored this case, corrupting our
  view of the TX ring.
o In re_txeof():
  - Don't clear IFF_DRV_OACTIVE unless there are at least 4 free TX
    descriptors. Further down the road re_encap() will bail if there
    aren't at least 4 free TX descriptors, causing re_start() to
    abort and prepend the dequeued mbuf again so it makes no sense
    to pretend we could process mbufs again when in fact we won't.
    While at it replace this magic 4 with a macro RL_TX_DESC_THLD
    throughout this driver.
  - Don't cancel the watchdog timeout as soon as there's at least one
    free TX descriptor but instead only if all descriptors have been
    handled. It's perfectly normal, especially in the DEVICE_POLLING
    case, that re_txeof() is called when only a part of the enqueued
    TX descriptors have been handled, causing the watchdog to be
    disarmed prematurely.
o In re_encap():
  - If m_defrag() fails just drop the packet like other NIC drivers
    do. This should only happen when there's a mbuf shortage, in which
    case it was possible to end up with an IFQ full of packets which
    couldn't be processed as they couldn't be defragmented as they
    were taking up all the mbufs themselves. This includes adjusting
    re_start() to not trying to prepend the mbuf (chain) if re_encap()
    has freed it.
  - Remove dupe initialization of members of struct rl_dmaload_arg to
    values that didn't change since trying to process the fragmented
    mbuf chain.
    While at it remove an unused member from struct rl_dmaload_arg.
o In re_start() remove a abandoned, banal comment. The corresponding
  code was moved to re_attach() some time ago.

With these changes re(4) now survives one day (until stopped) of
hammering out packets here.

Reviewed by:	yongari
MFC after:	2 weeks
This commit is contained in:
Marius Strobl 2007-01-16 20:35:23 +00:00
parent fce8985370
commit b4b958792b
2 changed files with 44 additions and 48 deletions

View File

@ -1487,7 +1487,6 @@ re_newbuf(sc, idx, m)
*/
m_adj(m, RE_ETHER_ALIGN);
#endif
arg.sc = sc;
arg.rl_idx = idx;
arg.rl_maxsegs = 1;
arg.rl_flags = 0;
@ -1499,6 +1498,9 @@ re_newbuf(sc, idx, m)
if (error || arg.rl_maxsegs != 1) {
if (n != NULL)
m_freem(n);
if (arg.rl_maxsegs == 0)
bus_dmamap_unload(sc->rl_ldata.rl_mtag,
sc->rl_ldata.rl_rx_dmamap[idx]);
return (ENOMEM);
}
@ -1781,13 +1783,11 @@ re_txeof(sc)
idx = sc->rl_ldata.rl_tx_considx;
/* Invalidate the TX descriptor list */
bus_dmamap_sync(sc->rl_ldata.rl_tx_list_tag,
sc->rl_ldata.rl_tx_list_map,
BUS_DMASYNC_POSTREAD);
while (sc->rl_ldata.rl_tx_free < RL_TX_DESC_CNT) {
txstat = le32toh(sc->rl_ldata.rl_tx_list[idx].rl_cmdstat);
if (txstat & RL_TDESC_CMD_OWN)
break;
@ -1800,7 +1800,6 @@ re_txeof(sc)
* be the only place where the TX status bits
* are valid.
*/
if (txstat & RL_TDESC_CMD_EOF) {
m_freem(sc->rl_ldata.rl_tx_mbuf[idx]);
sc->rl_ldata.rl_tx_mbuf[idx] = NULL;
@ -1817,37 +1816,35 @@ re_txeof(sc)
sc->rl_ldata.rl_tx_free++;
RL_DESC_INC(idx);
}
sc->rl_ldata.rl_tx_considx = idx;
/* No changes made to the TX ring, so no flush needed */
if (sc->rl_ldata.rl_tx_free) {
sc->rl_ldata.rl_tx_considx = idx;
if (sc->rl_ldata.rl_tx_free > RL_TX_DESC_THLD)
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
sc->rl_watchdog_timer = 0;
}
/*
* Some chips will ignore a second TX request issued while an
* existing transmission is in progress. If the transmitter goes
* idle but there are still packets waiting to be sent, we need
* to restart the channel here to flush them out. This only seems
* to be required with the PCIe devices.
*/
if (sc->rl_ldata.rl_tx_free < RL_TX_DESC_CNT)
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
if (sc->rl_ldata.rl_tx_free < RL_TX_DESC_CNT) {
/*
* Some chips will ignore a second TX request issued
* while an existing transmission is in progress. If
* the transmitter goes idle but there are still
* packets waiting to be sent, we need to restart the
* channel here to flush them out. This only seems to
* be required with the PCIe devices.
*/
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
#ifdef RE_TX_MODERATION
/*
* If not all descriptors have been released reaped yet,
* reload the timer so that we will eventually get another
* interrupt that will cause us to re-enter this routine.
* This is done in case the transmitter has gone idle.
*/
if (sc->rl_ldata.rl_tx_free != RL_TX_DESC_CNT)
/*
* If not all descriptors have been reaped yet, reload
* the timer so that we will eventually get another
* interrupt that will cause us to re-enter this routine.
* This is done in case the transmitter has gone idle.
*/
CSR_WRITE_4(sc, RL_TIMERCNT, 1);
#endif
} else
sc->rl_watchdog_timer = 0;
}
static void
@ -2028,7 +2025,7 @@ re_encap(sc, m_head, idx)
RL_LOCK_ASSERT(sc);
if (sc->rl_ldata.rl_tx_free <= 4)
if (sc->rl_ldata.rl_tx_free <= RL_TX_DESC_THLD)
return (EFBIG);
/*
@ -2053,11 +2050,10 @@ re_encap(sc, m_head, idx)
arg.rl_flags |= RL_TDESC_CMD_UDPCSUM;
}
arg.sc = sc;
arg.rl_idx = *idx;
arg.rl_maxsegs = sc->rl_ldata.rl_tx_free;
if (arg.rl_maxsegs > 4)
arg.rl_maxsegs -= 4;
if (arg.rl_maxsegs > RL_TX_DESC_THLD)
arg.rl_maxsegs -= RL_TX_DESC_THLD;
arg.rl_ring = sc->rl_ldata.rl_tx_list;
map = sc->rl_ldata.rl_tx_dmamap[*idx];
@ -2076,7 +2072,6 @@ re_encap(sc, m_head, idx)
* below can assemble the packet into a single buffer that's
* padded out to the mininum frame size.
*/
if (arg.rl_flags && (*m_head)->m_pkthdr.len < RL_MIN_FRAMELEN)
error = EFBIG;
else
@ -2091,17 +2086,20 @@ re_encap(sc, m_head, idx)
/* Too many segments to map, coalesce into a single mbuf */
if (error || arg.rl_maxsegs == 0) {
if (arg.rl_maxsegs == 0)
bus_dmamap_unload(sc->rl_ldata.rl_mtag, map);
m_new = m_defrag(*m_head, M_DONTWAIT);
if (m_new == NULL)
if (m_new == NULL) {
m_freem(*m_head);
*m_head = NULL;
return (ENOBUFS);
else
*m_head = m_new;
}
*m_head = m_new;
/*
* Manually pad short frames, and zero the pad space
* to avoid leaking data.
*/
if (m_new->m_pkthdr.len < RL_MIN_FRAMELEN) {
bzero(mtod(m_new, char *) + m_new->m_pkthdr.len,
RL_MIN_FRAMELEN - m_new->m_pkthdr.len);
@ -2110,16 +2108,17 @@ re_encap(sc, m_head, idx)
m_new->m_len = m_new->m_pkthdr.len;
}
arg.sc = sc;
arg.rl_idx = *idx;
/* Note that we'll run over RL_TX_DESC_THLD here. */
arg.rl_maxsegs = sc->rl_ldata.rl_tx_free;
arg.rl_ring = sc->rl_ldata.rl_tx_list;
error = bus_dmamap_load_mbuf(sc->rl_ldata.rl_mtag, map,
*m_head, re_dma_map_desc, &arg, BUS_DMA_NOWAIT);
if (error) {
device_printf(sc->rl_dev, "can't map mbuf (error %d)\n",
error);
if (error || arg.rl_maxsegs == 0) {
device_printf(sc->rl_dev,
"can't map defragmented mbuf (error %d)\n", error);
m_freem(m_new);
*m_head = NULL;
if (arg.rl_maxsegs == 0)
bus_dmamap_unload(sc->rl_ldata.rl_mtag, map);
return (EFBIG);
}
}
@ -2201,6 +2200,8 @@ re_start(ifp)
break;
if (re_encap(sc, &m_head, &idx)) {
if (m_head == NULL)
break;
IFQ_DRV_PREPEND(&ifp->if_snd, m_head);
ifp->if_drv_flags |= IFF_DRV_OACTIVE;
break;
@ -2232,11 +2233,6 @@ re_start(ifp)
sc->rl_ldata.rl_tx_prodidx = idx;
/*
* RealTek put the TX poll request register in a different
* location on the 8169 gigE chip. I don't know why.
*/
CSR_WRITE_1(sc, sc->rl_txstart, RL_TXSTART_START);
#ifdef RE_TX_MODERATION

View File

@ -643,6 +643,7 @@ struct rl_stats {
#endif
#define RL_TX_DESC_CNT 64
#define RL_TX_DESC_THLD 4
#define RL_RX_DESC_CNT RL_TX_DESC_CNT
#define RL_RX_LIST_SZ (RL_RX_DESC_CNT * sizeof(struct rl_desc))
@ -671,7 +672,6 @@ struct rl_stats {
struct rl_softc;
struct rl_dmaload_arg {
struct rl_softc *sc;
int rl_idx;
int rl_maxsegs;
uint32_t rl_flags;