Fixes for the D-Link DFE-580 card.

This is pretty much fixes any issue I can find:
     -  Watchdog timeouts were due to starting the TX DMA engine
        before we had a packet ready for it.  So the first packet
        sent never got out only if we sent more then one packet
        at a time did the others make it out and not blow up.
        Of course reseting the chip then caused us not to transmit
        the first packet again ie. catch-22.  This required logic changes.
     -  Combine interrupts on TX packets being queued up.
     -  Don't keep running around the RX ring since we might get
        out of sync so only go around once per receive
     -  Let the RX engine recover via the poll interface which is
        similar to the TX interface.  This way the chip wakes
        up with no effort when we read enough packets.
     -  Do better hand-shaking on RX & TX packets so they don't
        start of to soon.
     -  Force a duplex setting when the link comes up after
        an ste_init or it will default to half-duplex and be
        really slow.  This only happens on subsequent ste_init.
        The first one worked.
     -  Don't call stat_update for every overflow.  We only monitor
        the collisions so the tick interval is good enough for that.
        Just read in the collision stats to minimize bus reads.
     -  Don't read the miibus every tick since it uses delays and
        delays are not good for performance.
     -  Tie link events directly to the miibus code so the port
        gets set correctly if someone changes the port settings.
     -  Reduce the extreme number of {R,T}FD's.  They would consume
        130K of kernel memory for each NIC.
     -  Set the TX_THRESH to wait for the DMA engine to complete
        before running the TX FIFO.  This hurts peak TX performance
        but under bi-directional load the DMA engine can't keep up
        with the FIFO.  Testing shows that we end up in the case
        anyways (a la dc(4) issues but worse since the RX engine hogs
	everything).
     -  When stopping the card do a reset since the reset verifies the
	card has stopped.  Otherwise on heavy RX load the RX DMA engine
	is still stuffing packets into memory.  If that happens after
	we free the DMA area memory bits get scribled in memory and
	bad things happen.

This card still has seemingly unfixable issues under heavy RX load in
which the card takes over the PCI bus.

Sponsored by:	Vernier Networks
MFC after:	1 week
This commit is contained in:
Doug Ambrisko 2002-08-07 22:31:27 +00:00
parent 130be1f120
commit d44ef39e51
2 changed files with 111 additions and 83 deletions

View File

@ -45,6 +45,7 @@
#include <net/ethernet.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_vlan_var.h>
#include <net/bpf.h>
@ -643,6 +644,10 @@ static void ste_intr(xsc)
ste_stats_update(sc);
}
if (status & STE_ISR_LINKEVENT)
mii_pollstat(device_get_softc(sc->ste_miibus));
if (status & STE_ISR_HOSTERR) {
ste_reset(sc);
ste_init(sc);
@ -671,14 +676,17 @@ static void ste_rxeof(sc)
struct mbuf *m;
struct ifnet *ifp;
struct ste_chain_onefrag *cur_rx;
int total_len = 0;
int total_len = 0, count=0;
u_int32_t rxstat;
ifp = &sc->arpcom.ac_if;
again:
while((rxstat = sc->ste_cdata.ste_rx_head->ste_ptr->ste_status)
& STE_RXSTAT_DMADONE) {
if ((STE_RX_LIST_CNT - count) < 3) {
break;
}
while((rxstat = sc->ste_cdata.ste_rx_head->ste_ptr->ste_status)) {
cur_rx = sc->ste_cdata.ste_rx_head;
sc->ste_cdata.ste_rx_head = cur_rx->ste_next;
@ -732,29 +740,9 @@ static void ste_rxeof(sc)
/* Remove header from mbuf and pass it on. */
m_adj(m, sizeof(struct ether_header));
ether_input(ifp, eh, m);
}
/*
* Handle the 'end of channel' condition. When the upload
* engine hits the end of the RX ring, it will stall. This
* is our cue to flush the RX ring, reload the uplist pointer
* register and unstall the engine.
* XXX This is actually a little goofy. With the ThunderLAN
* chip, you get an interrupt when the receiver hits the end
* of the receive ring, which tells you exactly when you
* you need to reload the ring pointer. Here we have to
* fake it. I'm mad at myself for not being clever enough
* to avoid the use of a goto here.
*/
if (CSR_READ_4(sc, STE_RX_DMALIST_PTR) == 0 ||
CSR_READ_4(sc, STE_DMACTL) & STE_DMACTL_RXDMA_STOPPED) {
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_STALL);
ste_wait(sc);
CSR_WRITE_4(sc, STE_RX_DMALIST_PTR,
vtophys(&sc->ste_ldata->ste_rx_list[0]));
sc->ste_cdata.ste_rx_head = &sc->ste_cdata.ste_rx_chain[0];
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_UNSTALL);
goto again;
cur_rx->ste_ptr->ste_status = 0;
count++;
}
return;
@ -838,11 +826,8 @@ static void ste_stats_update(xsc)
void *xsc;
{
struct ste_softc *sc;
struct ste_stats stats;
struct ifnet *ifp;
struct mii_data *mii;
int i;
u_int8_t *p;
sc = xsc;
STE_LOCK(sc);
@ -850,22 +835,23 @@ static void ste_stats_update(xsc)
ifp = &sc->arpcom.ac_if;
mii = device_get_softc(sc->ste_miibus);
p = (u_int8_t *)&stats;
ifp->if_collisions += CSR_READ_1(sc, STE_LATE_COLLS)
+ CSR_READ_1(sc, STE_MULTI_COLLS)
+ CSR_READ_1(sc, STE_SINGLE_COLLS);
for (i = 0; i < sizeof(stats); i++) {
*p = CSR_READ_1(sc, STE_STATS + i);
p++;
}
ifp->if_collisions += stats.ste_single_colls +
stats.ste_multi_colls + stats.ste_late_colls;
mii_tick(mii);
if (!sc->ste_link && mii->mii_media_status & IFM_ACTIVE &&
IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
sc->ste_link++;
if (ifp->if_snd.ifq_head != NULL)
ste_start(ifp);
if (!sc->ste_link) {
mii_pollstat(mii);
if (mii->mii_media_status & IFM_ACTIVE &&
IFM_SUBTYPE(mii->mii_media_active) != IFM_NONE) {
sc->ste_link++;
/*
* we don't get a call-back on re-init so do it
* otherwise we get stuck in the wrong link state
*/
ste_miibus_statchg(sc->ste_dev);
if (ifp->if_snd.ifq_head != NULL)
ste_start(ifp);
}
}
sc->ste_stat_ch = timeout(ste_stats_update, sc, hz);
@ -913,6 +899,7 @@ static int ste_attach(dev)
sc = device_get_softc(dev);
unit = device_get_unit(dev);
bzero(sc, sizeof(struct ste_softc));
sc->ste_dev = dev;
mtx_init(&sc->ste_mtx, device_get_nameunit(dev), MTX_NETWORK_LOCK,
MTX_DEF | MTX_RECURSE);
@ -1065,10 +1052,18 @@ static int ste_attach(dev)
ifp->if_baudrate = 10000000;
ifp->if_snd.ifq_maxlen = STE_TX_LIST_CNT - 1;
sc->ste_tx_thresh = STE_TXSTART_THRESH;
/*
* Call MI attach routine.
*/
ether_ifattach(ifp, ETHER_BPF_SUPPORTED);
/*
* Tell the upper layer(s) we support long frames.
*/
ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header);
STE_UNLOCK(sc);
return(0);
@ -1134,7 +1129,7 @@ static int ste_newbuf(sc, c, m)
c->ste_mbuf = m_new;
c->ste_ptr->ste_status = 0;
c->ste_ptr->ste_frag.ste_addr = vtophys(mtod(m_new, caddr_t));
c->ste_ptr->ste_frag.ste_len = 1536 | STE_FRAG_LAST;
c->ste_ptr->ste_frag.ste_len = (1536 + EVL_ENCAPLEN) | STE_FRAG_LAST;
return(0);
}
@ -1164,7 +1159,7 @@ static int ste_init_rx_list(sc)
ld->ste_rx_list[i].ste_next =
vtophys(&ld->ste_rx_list[i + 1]);
}
ld->ste_rx_list[i].ste_status = 0;
}
cd->ste_rx_head = &cd->ste_rx_chain[0];
@ -1183,6 +1178,8 @@ static void ste_init_tx_list(sc)
ld = sc->ste_ldata;
for (i = 0; i < STE_TX_LIST_CNT; i++) {
cd->ste_tx_chain[i].ste_ptr = &ld->ste_tx_list[i];
cd->ste_tx_chain[i].ste_ptr->ste_next = 0;
cd->ste_tx_chain[i].ste_ptr->ste_ctl = 0;
cd->ste_tx_chain[i].ste_phys = vtophys(&ld->ste_tx_list[i]);
if (i == (STE_TX_LIST_CNT - 1))
cd->ste_tx_chain[i].ste_next =
@ -1198,10 +1195,6 @@ static void ste_init_tx_list(sc)
&cd->ste_tx_chain[i - 1];
}
bzero((char *)ld->ste_tx_list,
sizeof(struct ste_desc) * STE_TX_LIST_CNT);
cd->ste_tx_prod = 0;
cd->ste_tx_cons = 0;
cd->ste_tx_cnt = 0;
@ -1238,6 +1231,9 @@ static void ste_init(xsc)
return;
}
/* Set RX polling interval */
CSR_WRITE_1(sc, STE_RX_DMAPOLL_PERIOD, 1);
/* Init TX descriptors */
ste_init_tx_list(sc);
@ -1277,20 +1273,21 @@ static void ste_init(xsc)
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_UNSTALL);
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_RXDMA_UNSTALL);
/* Set TX polling interval */
CSR_WRITE_1(sc, STE_TX_DMAPOLL_PERIOD, 64);
/* Set TX polling interval (defer until we TX first packet */
CSR_WRITE_1(sc, STE_TX_DMAPOLL_PERIOD, 0);
/* Load address of the TX list */
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_STALL);
ste_wait(sc);
CSR_WRITE_4(sc, STE_TX_DMALIST_PTR,
vtophys(&sc->ste_ldata->ste_tx_list[0]));
CSR_WRITE_4(sc, STE_TX_DMALIST_PTR, 0);
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
ste_wait(sc);
sc->ste_tx_prev_idx=-1;
/* Enable receiver and transmitter */
CSR_WRITE_2(sc, STE_MACCTL0, 0);
CSR_WRITE_2(sc, STE_MACCTL1, 0);
STE_SETBIT2(sc, STE_MACCTL1, STE_MACCTL1_TX_ENABLE);
STE_SETBIT2(sc, STE_MACCTL1, STE_MACCTL1_RX_ENABLE);
@ -1301,6 +1298,9 @@ static void ste_init(xsc)
CSR_WRITE_2(sc, STE_ISR, 0xFFFF);
CSR_WRITE_2(sc, STE_IMR, STE_INTRS);
/* Accept VLAN length packets */
CSR_WRITE_2(sc, STE_MAX_FRAMELEN, ETHER_MAX_LEN + EVL_ENCAPLEN);
ste_ifmedia_upd(ifp);
ifp->if_flags |= IFF_RUNNING;
@ -1330,6 +1330,11 @@ static void ste_stop(sc)
STE_SETBIT2(sc, STE_DMACTL, STE_DMACTL_TXDMA_STALL);
STE_SETBIT2(sc, STE_DMACTL, STE_DMACTL_RXDMA_STALL);
ste_wait(sc);
/*
* Try really hard to stop the RX engine or under heavy RX
* data chip will write into de-allocated memory.
*/
ste_reset(sc);
sc->ste_link = 0;
@ -1347,6 +1352,8 @@ static void ste_stop(sc)
}
}
bzero(sc->ste_ldata, sizeof(struct ste_list_data));
ifp->if_flags &= ~(IFF_RUNNING|IFF_OACTIVE);
STE_UNLOCK(sc);
@ -1410,8 +1417,10 @@ static int ste_ioctl(ifp, command, data)
sc->ste_if_flags & IFF_PROMISC) {
STE_CLRBIT1(sc, STE_RX_MODE,
STE_RXMODE_PROMISC);
} else if (!(ifp->if_flags & IFF_RUNNING)) {
sc->ste_tx_thresh = STE_MIN_FRAMELEN;
}
if (!(ifp->if_flags & IFF_RUNNING)) {
sc->ste_tx_thresh = STE_MIN_FRAMELEN * 2;
sc->ste_tx_thresh = STE_TXSTART_THRESH;
ste_init(sc);
}
} else {
@ -1454,14 +1463,13 @@ static int ste_encap(sc, c, m_head)
d = c->ste_ptr;
d->ste_ctl = 0;
d->ste_next = 0;
for (m = m_head, frag = 0; m != NULL; m = m->m_next) {
if (m->m_len != 0) {
if (frag == STE_MAXFRAGS)
break;
total_len += m->m_len;
f = &c->ste_ptr->ste_frags[frag];
f = &d->ste_frags[frag];
f->ste_addr = vtophys(mtod(m, vm_offset_t));
f->ste_len = m->m_len;
frag++;
@ -1469,8 +1477,8 @@ static int ste_encap(sc, c, m_head)
}
c->ste_mbuf = m_head;
c->ste_ptr->ste_frags[frag - 1].ste_len |= STE_FRAG_LAST;
c->ste_ptr->ste_ctl = total_len;
d->ste_frags[frag - 1].ste_len |= STE_FRAG_LAST;
d->ste_ctl = 1;
return(0);
}
@ -1480,7 +1488,7 @@ static void ste_start(ifp)
{
struct ste_softc *sc;
struct mbuf *m_head = NULL;
struct ste_chain *prev = NULL, *cur_tx = NULL, *start_tx;
struct ste_chain *cur_tx = NULL;
int idx;
sc = ifp->if_softc;
@ -1497,7 +1505,6 @@ static void ste_start(ifp)
}
idx = sc->ste_cdata.ste_tx_prod;
start_tx = &sc->ste_cdata.ste_tx_chain[idx];
while(sc->ste_cdata.ste_tx_chain[idx].ste_mbuf == NULL) {
@ -1514,9 +1521,32 @@ static void ste_start(ifp)
ste_encap(sc, cur_tx, m_head);
if (prev != NULL)
prev->ste_ptr->ste_next = cur_tx->ste_phys;
prev = cur_tx;
cur_tx->ste_ptr->ste_next = 0;
if(sc->ste_tx_prev_idx < 0){
cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1;
/* Load address of the TX list */
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_STALL);
ste_wait(sc);
CSR_WRITE_4(sc, STE_TX_DMALIST_PTR,
vtophys(&sc->ste_ldata->ste_tx_list[0]));
/* Set TX polling interval to start TX engine */
CSR_WRITE_1(sc, STE_TX_DMAPOLL_PERIOD, 64);
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
STE_SETBIT4(sc, STE_DMACTL, STE_DMACTL_TXDMA_UNSTALL);
ste_wait(sc);
}else{
cur_tx->ste_ptr->ste_ctl = STE_TXCTL_DMAINTR | 1;
sc->ste_cdata.ste_tx_chain[
sc->ste_tx_prev_idx].ste_ptr->ste_next
= cur_tx->ste_phys;
}
sc->ste_tx_prev_idx=idx;
/*
* If there's a BPF listener, bounce a copy of this frame
@ -1527,20 +1557,10 @@ static void ste_start(ifp)
STE_INC(idx, STE_TX_LIST_CNT);
sc->ste_cdata.ste_tx_cnt++;
ifp->if_timer = 5;
sc->ste_cdata.ste_tx_prod = idx;
}
if (cur_tx == NULL) {
STE_UNLOCK(sc);
return;
}
cur_tx->ste_ptr->ste_ctl |= STE_TXCTL_DMAINTR;
/* Start transmission */
sc->ste_cdata.ste_tx_prod = idx;
start_tx->ste_prev->ste_ptr->ste_next = start_tx->ste_phys;
ifp->if_timer = 5;
STE_UNLOCK(sc);
return;

View File

@ -93,6 +93,10 @@
#define STE_MAR3 0x66
#define STE_STATS 0x68
#define STE_LATE_COLLS 0x75
#define STE_MULTI_COLLS 0x76
#define STE_SINGLE_COLLS 0x77
#define STE_DMACTL_RXDMA_STOPPED 0x00000001
#define STE_DMACTL_TXDMA_CMPREQ 0x00000002
#define STE_DMACTL_TXDMA_STOPPED 0x00000004
@ -224,13 +228,13 @@
* The number of bytes that must in present in the TX FIFO before
* transmission begins. Value should be in increments of 4 bytes.
*/
#define STE_TXSTART_THRESH 0x1FFF
#define STE_TXSTART_THRESH 0x1FFC
/*
* Number of bytes that must be present in the RX FIFO before
* an RX EARLY interrupt is generated.
*/
#define STE_RXEARLY_THRESH 0x1FFF
#define STE_RXEARLY_THRESH 0x1FFC
#define STE_WAKEEVENT_WAKEPKT_ENB 0x01
#define STE_WAKEEVENT_MAGICPKT_ENB 0x02
@ -272,8 +276,9 @@
#define STE_IMR_RX_DMADONE 0x0400
#define STE_INTRS \
(STE_IMR_RX_DMADONE|STE_IMR_TX_DMADONE|STE_IMR_STATS_OFLOW| \
STE_IMR_TX_DONE|STE_IMR_HOSTERR|STE_IMR_RX_EARLY)
(STE_IMR_RX_DMADONE|STE_IMR_TX_DMADONE| \
STE_IMR_TX_DONE|STE_IMR_HOSTERR| \
STE_IMR_LINKEVENT)
#define STE_ISR_INTLATCH 0x0001
#define STE_ISR_HOSTERR 0x0002
@ -406,7 +411,7 @@ struct ste_frag {
#define STE_FRAG_LAST 0x80000000
#define STE_FRAG_LEN 0x00001FFF
#define STE_MAXFRAGS 63
#define STE_MAXFRAGS 8
struct ste_desc {
u_int32_t ste_next;
@ -460,9 +465,10 @@ struct ste_desc_onefrag {
#define STE_MIN_FRAMELEN 60
#define STE_PACKET_SIZE 1536
#define ETHER_ALIGN 2
#define STE_RX_LIST_CNT 128
#define STE_TX_LIST_CNT 256
#define STE_RX_LIST_CNT 64
#define STE_TX_LIST_CNT 64
#define STE_INC(x, y) (x) = (x + 1) % y
#define STE_NEXT(x, y) (x + 1) % y
struct ste_type {
u_int16_t ste_vid;
@ -509,10 +515,12 @@ struct ste_softc {
void *ste_intrhand;
struct ste_type *ste_info;
device_t ste_miibus;
device_t ste_dev;
int ste_unit;
int ste_tx_thresh;
u_int8_t ste_link;
int ste_if_flags;
int ste_tx_prev_idx;
struct ste_list_data *ste_ldata;
struct ste_chain_data ste_cdata;
struct callout_handle ste_stat_ch;