This commit adds a software workaround for a hardware bug in certain PNIC

chip revisions. (A buggy taiwanese chip? I'm just shocked; shocked I tell
you.) So far I have only observed the anomalous behavior on board with
PCI revision 33 chips. At the moment, this seems to include only the
Netgear FA310-TX rev D1 boards with chips labeled NGMC169B. (Possibly this
means it's an 82c169B part from Lite-On.)

The bug only manifests itself in promiscuous mode, and usually only at
10Mbps half-duplex. (I have not observed the problem in full-duplex mode,
and I don't think it ever happens at 100Mbps.) The bug appears to be in
the receiver DMA engine. Normally, the chip is programmed with a linked
list of receiver descriptors, each with a receive buffer capable of holding
a complete full-sized ethernet frame. During periods of heavy traffic
(i.e. ping -c 100 -f 8100 <otherhost>), the receiver will sometimes appear
to upload its entire FIFO memory contents instead of just uploading the
desired received frame. The uploaded data will span several receive
buffers, in spite of the fact that the chip has been told to only use
one descriptor per frame, and appears to consist of previously transmitted
frames with the correct received frame appended to the end.

Unfortunately, there is no way to determine exactly how much data is
uploaded when this happens; the chip doesn't tell you anything except the
size of the desired received frame, and the amount of bogus data varies.
Sometimes, the desired frame is also split across multiple buffers.

The workaround is ugly and nasty. The driver assembles all of the data
from the bogus frames into a single buffer. The receive buffers are always
zeroed out, and we program the chip to always include the receive CRC
at the end of each frame. We therefore know that we can start from the
end of the buffer and scan back until we encounter a non-zero data byte,
and say conclusively that this is the end of the desired frame. We can
then subtract the frame length from this address to determine the real
start of the frame, and copy it into an mbuf and pass it on.

This is kludgy and time consuming, but it's better than dropping frames.
It's not too bad since the problem only happens at 10Mbps.

The workaround is only enabled for chips with PCI revision == 33. The
LinkSys LNE100TX and Matrox FastNIC 10/100 cards use a revision 32 chip
and work fine in promiscuous mode. Netgear support has confirmed that
they "have some previous knowledge of problems in promiscuous mode" but
didn't have a workaround. The people at Lite-On who would be able to
suggest a possible fix are on vacation. So, I decided to implement a
workaround of my own until I hear from them. I suppose this problem made
it through Netgear's QA department since Windows doesn't normally use
promiscuous mode, and if Windows doesn't need the feature than it can't
possibly be important, right? Grrr.
This commit is contained in:
Bill Paul 1998-12-31 17:19:21 +00:00
parent 56f7a84010
commit d1b5b058f7
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=42213
2 changed files with 179 additions and 13 deletions

View File

@ -29,7 +29,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: if_pn.c,v 1.3 1998/12/07 21:58:46 archie Exp $
* $Id: if_pn.c,v 1.35 1998/12/31 16:51:01 wpaul Exp $
*/
/*
@ -91,11 +91,13 @@
/* #define PN_BACKGROUND_AUTONEG */
#define PN_PROMISC_BUG_WAR
#include <pci/if_pnreg.h>
#ifndef lint
static const char rcsid[] =
"$Id: if_pn.c,v 1.3 1998/12/07 21:58:46 archie Exp $";
"$Id: if_pn.c,v 1.35 1998/12/31 16:51:01 wpaul Exp $";
#endif
/*
@ -132,6 +134,10 @@ static int pn_newbuf __P((struct pn_softc *,
static int pn_encap __P((struct pn_softc *, struct pn_chain *,
struct mbuf *));
#ifdef PN_PROMISC_BUG_WAR
static void pn_promisc_bug_war __P((struct pn_softc *,
struct pn_chain_onefrag *));
#endif
static void pn_rxeof __P((struct pn_softc *));
static void pn_rxeoc __P((struct pn_softc *));
static void pn_txeof __P((struct pn_softc *));
@ -785,6 +791,9 @@ pn_attach(config_id, unit)
caddr_t roundptr;
struct pn_type *p;
u_int16_t phy_vid, phy_did, phy_sts;
#ifdef PN_PROMISC_BUG_WAR
u_int32_t revision = 0;
#endif
s = splimp();
@ -886,7 +895,7 @@ pn_attach(config_id, unit)
if (sc->pn_ldata_ptr == NULL) {
free(sc, M_DEVBUF);
printf("pn%d: no memory for list buffers!\n", unit);
return;
goto fail;
}
sc->pn_ldata = (struct pn_list_data *)sc->pn_ldata_ptr;
@ -902,6 +911,20 @@ pn_attach(config_id, unit)
sc->pn_ldata = (struct pn_list_data *)roundptr;
bzero(sc->pn_ldata, sizeof(struct pn_list_data));
#ifdef PN_PROMISC_BUG_WAR
revision = pci_conf_read(config_id, PN_PCI_REVISION) & 0x000000FF;
if (revision == PN_169B_REV) {
sc->pn_promisc_war = 1;
sc->pn_promisc_buf = malloc(PN_RXLEN * 5, M_DEVBUF, M_NOWAIT);
if (sc->pn_promisc_buf == NULL) {
printf("pn%d: no memory for workaround buffer\n", unit);
goto fail;
}
} else {
sc->pn_promisc_war = 0;
}
#endif
ifp = &sc->arpcom.ac_if;
ifp->if_softc = sc;
ifp->if_unit = unit;
@ -1078,14 +1101,130 @@ static int pn_newbuf(sc, c)
return(ENOBUFS);
}
/*
* Zero the buffer. This is part of the workaround for the
* promiscuous mode bug in the revision 33 PNIC chips.
*/
bzero((char *)mtod(m_new, char *), MCLBYTES);
m_new->m_len = m_new->m_pkthdr.len = MCLBYTES;
c->pn_mbuf = m_new;
c->pn_ptr->pn_status = PN_RXSTAT;
c->pn_ptr->pn_data = vtophys(mtod(m_new, caddr_t));
c->pn_ptr->pn_ctl = PN_RXCTL_RLINK | (MCLBYTES - 1);
c->pn_ptr->pn_ctl = PN_RXCTL_RLINK | PN_RXLEN;
return(0);
}
#ifdef PN_PROMISC_BUG_WAR
/*
* Grrrrr.
* Revision 33 of the PNIC chip has a terrible bug in it that manifests
* itself when you enable promiscuous mode. Sometimes instead of uploading
* one complete frame, it uploads its entire FIFO memory. The frame we
* want is at the end of this whole mess, but we never know exactly
* how much data has been uploaded, so finding it can be hard.
*
* There is only one way to do it reliably, and it's disgusting.
* Here's what we know:
*
* - We know there will always be somewhere between one and three extra
* descriptors uploaded.
*
* - We know the desired received frame will always be at the end of the
* total data upload.
*
* - We know the size of the desired received frame because it will be
* provided in the length field of the status word in the last descriptor.
*
* Here's what we do:
*
* - When we allocate buffers for the receive ring, we bzero() them.
* This means that we know that the buffer contents should be all
* zeros, except for data uploaded by the chip.
*
* - We also force the PNIC chip to upload frames that include the
* ethernet CRC at the end.
*
* - We gather all of the bogus frame data into a single buffer.
*
* - We then position a pointer at the end of this buffer and scan
* backwards until we encounter the first non-zero byte of data.
* This is the end of the received frame. We know we will encounter
* some data at the end of the frame because the CRC will always be
* there, so even if the sender transmits a packet of all zeros,
* we won't be fooled.
*
* - We know the size of the actual received frame, so we subtract
* that value from the current pointer location. This brings us
* to the start of the actual received packet.
*
* - We copy this into an mbuf and pass it on, along with the actual
* frame length.
*
* The performance hit is tremendous, but it beats dropping frames all
* the time.
*/
#define PN_WHOLEFRAME (PN_RXSTAT_FIRSTFRAG|PN_RXSTAT_LASTFRAG)
static void pn_promisc_bug_war(sc, cur_rx)
struct pn_softc *sc;
struct pn_chain_onefrag *cur_rx;
{
struct pn_chain_onefrag *c;
unsigned char *ptr;
int total_len;
u_int32_t rxstat = 0;
c = sc->pn_promisc_bug_save;
ptr = sc->pn_promisc_buf;
bzero(ptr, sizeof(PN_RXLEN * 5));
/* Copy all the bytes from the bogus buffers. */
while ((c->pn_ptr->pn_status & PN_WHOLEFRAME) != PN_WHOLEFRAME) {
rxstat = c->pn_ptr->pn_status;
m_copydata(c->pn_mbuf, 0, PN_RXLEN, ptr);
ptr += PN_RXLEN - 2; /* round down to 32-bit boundary */
if (c == cur_rx)
break;
if (rxstat & PN_RXSTAT_LASTFRAG)
break;
c->pn_ptr->pn_status = PN_RXSTAT;
c->pn_ptr->pn_ctl = PN_RXCTL_RLINK | PN_RXLEN;
bzero((char *)mtod(c->pn_mbuf, char *), MCLBYTES);
c = c->pn_nextdesc;
}
/* Find the length of the actual receive frame. */
total_len = PN_RXBYTES(rxstat);
/* Scan backwards until we hit a non-zero byte. */
while(*ptr == 0x00) {
ptr--;
}
if ((u_int32_t)(ptr) & 0x3)
ptr -= 1;
/* Now find the start of the frame. */
ptr -= total_len;
if (ptr < sc->pn_promisc_buf)
ptr = sc->pn_promisc_buf;
/*
* Now copy the salvaged frame to the last mbuf and fake up
* the status word to make it look like a successful
* frame reception.
*/
m_copyback(cur_rx->pn_mbuf, 0, total_len, ptr);
cur_rx->pn_mbuf->m_len = c->pn_mbuf->m_pkthdr.len = MCLBYTES;
cur_rx->pn_ptr->pn_status |= PN_RXSTAT_FIRSTFRAG;
return;
}
#endif
/*
* A frame has been uploaded: pass the resulting mbuf chain up to
* the higher level protocols.
@ -1107,6 +1246,24 @@ static void pn_rxeof(sc)
cur_rx = sc->pn_cdata.pn_rx_head;
sc->pn_cdata.pn_rx_head = cur_rx->pn_nextdesc;
#ifdef PN_PROMISC_BUG_WAR
/*
* XXX The PNIC seems to have a bug that manifests
* when the promiscuous mode bit is set: we have to
* watch for it and work around it.
*/
if (sc->pn_promisc_war && ifp->if_flags & IFF_PROMISC) {
if ((rxstat & PN_WHOLEFRAME) != PN_WHOLEFRAME) {
if (rxstat & PN_RXSTAT_FIRSTFRAG)
sc->pn_promisc_bug_save = cur_rx;
if ((rxstat & PN_RXSTAT_LASTFRAG) == 0)
continue;
pn_promisc_bug_war(sc, cur_rx);
rxstat = cur_rx->pn_ptr->pn_status;
}
}
#endif
/*
* If an error occurs, update stats, clear the
* status word and leave the mbuf cluster in place:
@ -1118,8 +1275,8 @@ static void pn_rxeof(sc)
if (rxstat & PN_RXSTAT_COLLSEEN)
ifp->if_collisions++;
cur_rx->pn_ptr->pn_status = PN_RXSTAT;
cur_rx->pn_ptr->pn_ctl =
PN_RXCTL_RLINK | (MCLBYTES - 1);
cur_rx->pn_ptr->pn_ctl = PN_RXCTL_RLINK | PN_RXLEN;
bzero((char *)mtod(cur_rx->pn_mbuf, char *), MCLBYTES);
continue;
}
@ -1127,6 +1284,9 @@ static void pn_rxeof(sc)
m = cur_rx->pn_mbuf;
total_len = PN_RXBYTES(cur_rx->pn_ptr->pn_status);
/* Trim off the CRC. */
total_len -= ETHER_CRC_LEN;
/*
* Try to conjure up a new mbuf cluster. If that
* fails, it means we have an out of memory condition and
@ -1136,10 +1296,9 @@ static void pn_rxeof(sc)
*/
if (pn_newbuf(sc, cur_rx) == ENOBUFS) {
ifp->if_ierrors++;
cur_rx->pn_ptr->pn_status =
PN_RXSTAT_FIRSTFRAG|PN_RXSTAT_LASTFRAG;
cur_rx->pn_ptr->pn_ctl =
PN_RXCTL_RLINK | (MCLBYTES - 1);
cur_rx->pn_ptr->pn_status = PN_RXSTAT;
cur_rx->pn_ptr->pn_ctl = PN_RXCTL_RLINK | PN_RXLEN;
bzero((char *)mtod(cur_rx->pn_mbuf, char *), MCLBYTES);
continue;
}
@ -1300,7 +1459,6 @@ static void pn_intr(arg)
if ((status & PN_INTRS) == 0)
break;
if (status & PN_ISR_RX_OK)
pn_rxeof(sc);
@ -1550,7 +1708,7 @@ static void pn_init(xsc)
CSR_WRITE_4(sc, PN_BUSCTL, PN_BUSCTL_CONFIG);
PN_CLRBIT(sc, PN_NETCFG, PN_NETCFG_TX_IMMEDIATE);
PN_SETBIT(sc, PN_NETCFG, PN_NETCFG_NO_RXCRC);
PN_CLRBIT(sc, PN_NETCFG, PN_NETCFG_NO_RXCRC);
PN_CLRBIT(sc, PN_NETCFG, PN_NETCFG_HEARTBEAT);
PN_CLRBIT(sc, PN_NETCFG, PN_NETCFG_STORENFWD);
PN_CLRBIT(sc, PN_NETCFG, PN_NETCFG_TX_BACKOFF);

View File

@ -29,7 +29,7 @@
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGE.
*
* $Id: if_pnreg.h,v 1.1 1998/12/04 18:01:21 wpaul Exp $
* $Id: if_pnreg.h,v 1.15 1998/12/31 16:51:01 wpaul Exp $
*/
/*
@ -350,6 +350,7 @@ struct pn_desc {
#define PN_TX_LIST_CNT 64
#define PN_MIN_FRAMELEN 60
#define PN_FRAMELEN 1536
#define PN_RXLEN 1518
/*
* A tx 'super descriptor' is actually 16 regular descriptors
@ -440,6 +441,12 @@ struct pn_softc {
u_int8_t pn_want_auto;
u_int8_t pn_autoneg;
caddr_t pn_ldata_ptr;
#ifdef PN_PROMISC_BUG_WAR
#define PN_169B_REV 33
u_int8_t pn_promisc_war;
struct pn_chain_onefrag *pn_promisc_bug_save;
unsigned char *pn_promisc_buf;
#endif
struct pn_list_data *pn_ldata;
struct pn_chain_data pn_cdata;
};
@ -516,6 +523,7 @@ struct pn_softc {
#define PN_PCI_DEVICE_ID 0x02
#define PN_PCI_COMMAND 0x04
#define PN_PCI_STATUS 0x06
#define PN_PCI_REVISION 0x08
#define PN_PCI_CLASSCODE 0x09
#define PN_PCI_LATENCY_TIMER 0x0D
#define PN_PCI_HEADER_TYPE 0x0E