It appears that under certain circumstances that I still can't quite pin

down, the dc driver and receiver can fall out of sync with one another,
resulting in a condition where the chip continues to receive packets
but the driver never notices. Normally, the receive handler checks each
descriptor starting from the current producer index to see if the chip
has relinquished ownership, indicating that a packet has been received.
The driver hands the packet off to ether_input() and then prepares the
descriptor to receive another frame before moving on to the next
descriptor in the ring. But sometimes, the chip appears to skip a
descriptor. This leaves the driver testing the status word in a descriptor
that never gets updated. The driver still gets "RX done" interrupts but
never advances further into the RX ring, until the ring fills up and the
chip interrupts again to signal an error condition. Sometimes, the
driver will remain in this desynchronized state, resulting in spotty
performance until the interface is reset.

Fortunately, it's fairly simple to detect this condition: if we call
the rxeof routine but the number of received packets doesn't increase,
we suspect that there could be a problem. In this case, we call a new
routine called dc_rx_resync(), which scans ahead in the RX ring to see
if there's a frame waiting for us somewhere beyond that the driver thinks
is the current producer index. If it finds one, it bumps up the index
and calls the rxeof handler again to snarf up the packet and bring the
driver back in sync with the chip. (It may actually do this several times
in the event that there's more than one "hole" in the ring.)

So far the only card supported by if_dc which has exhibited this problem
is a LinkSys LNE100TX v2.0 (82c115 PNIC II), and it only seems to happen
on one particular system, however the fix is general enough and has low
enough overhead that we may as well apply it for all supported chipsets.
I also implemented the same fix for the 3Com xl driver, which is apparently
vulnerable to the same problem.

Problem originally noted and patch tested by: Matt Dillon
This commit is contained in:
wpaul 2000-01-03 15:28:47 +00:00
parent 8aa0982899
commit cd6f97abb5
3 changed files with 138 additions and 5 deletions

View File

@ -198,6 +198,7 @@ static int dc_newbuf __P((struct dc_softc *, int, struct mbuf *));
static int dc_encap __P((struct dc_softc *, struct mbuf *,
u_int32_t *));
static void dc_pnic_rx_bug_war __P((struct dc_softc *, int));
static int dc_rx_resync __P((struct dc_softc *));
static void dc_rxeof __P((struct dc_softc *));
static void dc_txeof __P((struct dc_softc *));
static void dc_tick __P((void *));
@ -1918,6 +1919,42 @@ static void dc_pnic_rx_bug_war(sc, idx)
return;
}
/*
* This routine searches the RX ring for dirty descriptors in the
* event that the rxeof routine falls out of sync with the chip's
* current descriptor pointer. This may happen sometimes as a result
* of a "no RX buffer available" condition that happens when the chip
* consumes all of the RX buffers before the driver has a chance to
* process the RX ring. This routine may need to be called more than
* once to bring the driver back in sync with the chip, however we
* should still be getting RX DONE interrupts to drive the search
* for new packets in the RX ring, so we should catch up eventually.
*/
static int dc_rx_resync(sc)
struct dc_softc *sc;
{
int i, pos;
struct dc_desc *cur_rx;
pos = sc->dc_cdata.dc_rx_prod;
for (i = 0; i < DC_RX_LIST_CNT; i++) {
cur_rx = &sc->dc_ldata->dc_rx_list[pos];
if (!(cur_rx->dc_status & DC_RXSTAT_OWN))
break;
DC_INC(pos, DC_RX_LIST_CNT);
}
/* If the ring really is empty, then just return. */
if (i == DC_RX_LIST_CNT)
return(0);
/* We've fallen behing the chip: catch it. */
sc->dc_cdata.dc_rx_prod = pos;
return(EAGAIN);
}
/*
* A frame has been uploaded: pass the resulting mbuf chain up to
* the higher level protocols.
@ -2204,8 +2241,15 @@ static void dc_intr(arg)
CSR_WRITE_4(sc, DC_ISR, status);
if (status & DC_ISR_RX_OK)
if (status & DC_ISR_RX_OK) {
int curpkts;
curpkts = ifp->if_ipackets;
dc_rxeof(sc);
if (curpkts == ifp->if_ipackets) {
while(dc_rx_resync(sc))
dc_rxeof(sc);
}
}
if (status & (DC_ISR_TX_OK|DC_ISR_TX_NOBUF))
dc_txeof(sc);
@ -2241,8 +2285,15 @@ static void dc_intr(arg)
}
if ((status & DC_ISR_RX_WATDOGTIMEO)
|| (status & DC_ISR_RX_NOBUF))
|| (status & DC_ISR_RX_NOBUF)) {
int curpkts;
curpkts = ifp->if_ipackets;
dc_rxeof(sc);
if (curpkts == ifp->if_ipackets) {
while(dc_rx_resync(sc))
dc_rxeof(sc);
}
}
if (status & DC_ISR_BUS_ERR) {
dc_reset(sc);

View File

@ -198,6 +198,7 @@ static int dc_newbuf __P((struct dc_softc *, int, struct mbuf *));
static int dc_encap __P((struct dc_softc *, struct mbuf *,
u_int32_t *));
static void dc_pnic_rx_bug_war __P((struct dc_softc *, int));
static int dc_rx_resync __P((struct dc_softc *));
static void dc_rxeof __P((struct dc_softc *));
static void dc_txeof __P((struct dc_softc *));
static void dc_tick __P((void *));
@ -1918,6 +1919,42 @@ static void dc_pnic_rx_bug_war(sc, idx)
return;
}
/*
* This routine searches the RX ring for dirty descriptors in the
* event that the rxeof routine falls out of sync with the chip's
* current descriptor pointer. This may happen sometimes as a result
* of a "no RX buffer available" condition that happens when the chip
* consumes all of the RX buffers before the driver has a chance to
* process the RX ring. This routine may need to be called more than
* once to bring the driver back in sync with the chip, however we
* should still be getting RX DONE interrupts to drive the search
* for new packets in the RX ring, so we should catch up eventually.
*/
static int dc_rx_resync(sc)
struct dc_softc *sc;
{
int i, pos;
struct dc_desc *cur_rx;
pos = sc->dc_cdata.dc_rx_prod;
for (i = 0; i < DC_RX_LIST_CNT; i++) {
cur_rx = &sc->dc_ldata->dc_rx_list[pos];
if (!(cur_rx->dc_status & DC_RXSTAT_OWN))
break;
DC_INC(pos, DC_RX_LIST_CNT);
}
/* If the ring really is empty, then just return. */
if (i == DC_RX_LIST_CNT)
return(0);
/* We've fallen behing the chip: catch it. */
sc->dc_cdata.dc_rx_prod = pos;
return(EAGAIN);
}
/*
* A frame has been uploaded: pass the resulting mbuf chain up to
* the higher level protocols.
@ -2204,8 +2241,15 @@ static void dc_intr(arg)
CSR_WRITE_4(sc, DC_ISR, status);
if (status & DC_ISR_RX_OK)
if (status & DC_ISR_RX_OK) {
int curpkts;
curpkts = ifp->if_ipackets;
dc_rxeof(sc);
if (curpkts == ifp->if_ipackets) {
while(dc_rx_resync(sc))
dc_rxeof(sc);
}
}
if (status & (DC_ISR_TX_OK|DC_ISR_TX_NOBUF))
dc_txeof(sc);
@ -2241,8 +2285,15 @@ static void dc_intr(arg)
}
if ((status & DC_ISR_RX_WATDOGTIMEO)
|| (status & DC_ISR_RX_NOBUF))
|| (status & DC_ISR_RX_NOBUF)) {
int curpkts;
curpkts = ifp->if_ipackets;
dc_rxeof(sc);
if (curpkts == ifp->if_ipackets) {
while(dc_rx_resync(sc))
dc_rxeof(sc);
}
}
if (status & DC_ISR_BUS_ERR) {
dc_reset(sc);

View File

@ -205,6 +205,7 @@ static int xl_encap_90xB __P((struct xl_softc *, struct xl_chain *,
struct mbuf * ));
static void xl_rxeof __P((struct xl_softc *));
static int xl_rx_resync __P((struct xl_softc *));
static void xl_txeof __P((struct xl_softc *));
static void xl_txeof_90xB __P((struct xl_softc *));
static void xl_txeoc __P((struct xl_softc *));
@ -1676,6 +1677,28 @@ static int xl_newbuf(sc, c)
return(0);
}
static int xl_rx_resync(sc)
struct xl_softc *sc;
{
struct xl_chain_onefrag *pos;
int i;
pos = sc->xl_cdata.xl_rx_head;
for (i = 0; i < XL_RX_LIST_CNT; i++) {
if (pos->xl_ptr->xl_status)
break;
pos = pos->xl_next;
}
if (i == XL_RX_LIST_CNT)
return(0);
sc->xl_cdata.xl_rx_head = pos;
return(EAGAIN);
}
/*
* A frame has been uploaded: pass the resulting mbuf chain up to
* the higher level protocols.
@ -1981,8 +2004,16 @@ static void xl_intr(arg)
CSR_WRITE_2(sc, XL_COMMAND,
XL_CMD_INTR_ACK|(status & XL_INTRS));
if (status & XL_STAT_UP_COMPLETE)
if (status & XL_STAT_UP_COMPLETE) {
int curpkts;
curpkts = ifp->if_ipackets;
xl_rxeof(sc);
if (curpkts == ifp->if_ipackets) {
while (xl_rx_resync(sc))
xl_rxeof(sc);
}
}
if (status & XL_STAT_DOWN_COMPLETE) {
if (sc->xl_type == XL_TYPE_905B)