Implement the replacement EDMA FIFO code.

(Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've
actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was
cleared!)

This code implements a whole bunch of sorely needed EDMA TX improvements
along with CABQ TX support.

The specifics:

* When filling/refilling the FIFO, use the new TXQ staging queue
  for FIFO frames

* Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly.
  For now the non-CABQ transmit path pushes one frame into the TXQ
  staging queue without setting up the intermediary link pointers
  to chain them together, so draining frames from the txq staging
  queue to the FIFO queue occurs AMPDU / MPDU at a time.

* In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and
  ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO
  at once.

* Now that frames are in a FIFO pending queue, we can top up the
  FIFO after completing a single frame.  This means we can keep
  it filled rather than waiting for it drain and _then_ adding
  more frames.

* The EDMA restart routine now walks the FIFO queue in the TXQ
  rather than the pending queue and re-initialises the FIFO with
  that.

* When restarting EDMA, we may have partially completed sending
  a list.  So stamp the first frame that we see in a list with
  ATH_BUF_FIFOPTR and push _that_ into the hardware.

* When completing frames, only check those on the FIFO queue.
  We should never ever queue frames from the pending queue
  direct to the hardware, so there's no point in checking.

* Until I figure out what's going on, make sure if the TXSTATUS
  for an empty queue pops up, complain loudly and continue.
  This will stop the panics that people are seeing.  I'll add
  some code later which will assist in ensuring I'm populating
  each descriptor with the correct queue ID.

* When considering whether to queue frames to the hardware queue
  directly or software queue frames, make sure the depth of
  the FIFO is taken into account now.

* When completing frames, tag them with ATH_BUF_BUSY if they're
  not the final frame in a FIFO list.  The same holding descriptor
  behaviour is required when handling descriptors linked together
  with a link pointer as the hardware will re-read the previous
  descriptor to refresh the link pointer before contiuning.

* .. and if we complete the FIFO list (ie, the buffer has
  ATH_BUF_FIFOEND set), then we don't need the holding buffer
  any longer.  Thus, free it.

Tested:

* AR9380/AR9580, STA and hostap
* AR9280, STA/hostap

TODO:

* I don't yet trust that the EDMA restart routine is totally correct
  in all circumstances.  I'll continue to thrash this out under heavy
  multiple-TXQ traffic load and fix whatever pops up.
This commit is contained in:
Adrian Chadd 2013-03-26 20:04:45 +00:00
parent 941433323c
commit 92e84e43a6
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=248750
3 changed files with 347 additions and 59 deletions

View File

@ -474,6 +474,10 @@ ath_beacon_proc(void *arg, int pending)
vap = sc->sc_bslot[slot];
if (vap != NULL && vap->iv_state >= IEEE80211_S_RUN) {
bf = ath_beacon_generate(sc, vap);
/*
* XXX TODO: this should use settxdesclinkptr()
* otherwise it won't work for EDMA chipsets!
*/
if (bf != NULL) {
/* XXX should do this using the ds */
*bflink = bf->bf_daddr;
@ -482,6 +486,10 @@ ath_beacon_proc(void *arg, int pending)
}
}
}
/*
* XXX TODO: this should use settxdesclinkptr()
* otherwise it won't work for EDMA chipsets!
*/
*bflink = 0; /* terminate list */
}
@ -540,17 +548,99 @@ ath_beacon_proc(void *arg, int pending)
}
}
/*
* Start CABQ transmission - this assumes that all frames are prepped
* and ready in the CABQ.
*
* XXX TODO: methodize this; for the EDMA case it should only push
* into the hardware if the FIFO isn't full _AND_ then it should
* tag the final buffer in the queue as ATH_BUF_FIFOEND so the FIFO
* depth is correctly accounted for.
*/
void
ath_beacon_cabq_start(struct ath_softc *sc)
static void
ath_beacon_cabq_start_edma(struct ath_softc *sc)
{
struct ath_buf *bf, *bf_last;
struct ath_txq *cabq = sc->sc_cabq;
#if 0
struct ath_buf *bfi;
int i = 0;
#endif
ATH_TXQ_LOCK_ASSERT(cabq);
if (TAILQ_EMPTY(&cabq->axq_q))
return;
bf = TAILQ_FIRST(&cabq->axq_q);
bf_last = TAILQ_LAST(&cabq->axq_q, axq_q_s);
/*
* This is a dirty, dirty hack to push the contents of
* the cabq staging queue into the FIFO.
*
* This ideally should live in the EDMA code file
* and only push things into the CABQ if there's a FIFO
* slot.
*
* We can't treat this like a normal TX queue because
* in the case of multi-VAP traffic, we may have to flush
* the CABQ each new (staggered) beacon that goes out.
* But for non-staggered beacons, we could in theory
* handle multicast traffic for all VAPs in one FIFO
* push. Just keep all of this in mind if you're wondering
* how to correctly/better handle multi-VAP CABQ traffic
* with EDMA.
*/
/*
* Is the CABQ FIFO free? If not, complain loudly and
* don't queue anything. Maybe we'll flush the CABQ
* traffic, maybe we won't. But that'll happen next
* beacon interval.
*/
if (cabq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
device_printf(sc->sc_dev,
"%s: Q%d: CAB FIFO queue=%d?\n",
__func__,
cabq->axq_qnum,
cabq->axq_fifo_depth);
return;
}
/*
* Ok, so here's the gymnastics reqiured to make this
* all sensible.
*/
/*
* Tag the first/last buffer appropriately.
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
bf_last->bf_flags |= ATH_BUF_FIFOEND;
#if 0
i = 0;
TAILQ_FOREACH(bfi, &cabq->axq_q, bf_list) {
ath_printtxbuf(sc, bf, cabq->axq_qnum, i, 0);
i++;
}
#endif
/*
* We now need to push this set of frames onto the tail
* of the FIFO queue. We don't adjust the aggregate
* count, only the queue depth counter(s).
* We also need to blank the link pointer now.
*/
TAILQ_CONCAT(&cabq->fifo.axq_q, &cabq->axq_q, bf_list);
cabq->axq_link = NULL;
cabq->fifo.axq_depth += cabq->axq_depth;
cabq->axq_depth = 0;
/* Bump FIFO queue */
cabq->axq_fifo_depth++;
/* Push the first entry into the hardware */
ath_hal_puttxbuf(sc->sc_ah, cabq->axq_qnum, bf->bf_daddr);
/* NB: gated by beacon so safe to start here */
ath_hal_txstart(sc->sc_ah, cabq->axq_qnum);
}
static void
ath_beacon_cabq_start_legacy(struct ath_softc *sc)
{
struct ath_buf *bf;
struct ath_txq *cabq = sc->sc_cabq;
@ -567,6 +657,26 @@ ath_beacon_cabq_start(struct ath_softc *sc)
ath_hal_txstart(sc->sc_ah, cabq->axq_qnum);
}
/*
* Start CABQ transmission - this assumes that all frames are prepped
* and ready in the CABQ.
*/
void
ath_beacon_cabq_start(struct ath_softc *sc)
{
struct ath_txq *cabq = sc->sc_cabq;
ATH_TXQ_LOCK_ASSERT(cabq);
if (TAILQ_EMPTY(&cabq->axq_q))
return;
if (sc->sc_isedma)
ath_beacon_cabq_start_edma(sc);
else
ath_beacon_cabq_start_legacy(sc);
}
struct ath_buf *
ath_beacon_generate(struct ath_softc *sc, struct ieee80211vap *vap)
{
@ -637,9 +747,6 @@ ath_beacon_generate(struct ath_softc *sc, struct ieee80211vap *vap)
/*
* Move frames from the s/w mcast q to the h/w cab q.
*
* XXX TODO: This should be methodized - the EDMA
* CABQ setup code may look different!
*
* XXX TODO: if we chain together multiple VAPs
* worth of CABQ traffic, should we keep the
* MORE data bit set on the last frame of each

View File

@ -1816,7 +1816,8 @@ ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni,
* XXX duplicated in ath_raw_xmit().
*/
if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
if (sc->sc_cabq->axq_depth > sc->sc_txq_mcastq_maxdepth) {
if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
> sc->sc_txq_mcastq_maxdepth) {
sc->sc_stats.ast_tx_mcastq_overflow++;
r = ENOBUFS;
}
@ -2219,7 +2220,8 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
* XXX duplicated in ath_tx_start().
*/
if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
if (sc->sc_cabq->axq_depth > sc->sc_txq_mcastq_maxdepth) {
if (sc->sc_cabq->axq_depth + sc->sc_cabq->fifo.axq_depth
> sc->sc_txq_mcastq_maxdepth) {
sc->sc_stats.ast_tx_mcastq_overflow++;
error = ENOBUFS;
}
@ -2845,7 +2847,7 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_txq *txq,
*
* Otherwise, schedule the TID.
*/
if (txq->axq_depth < sc->sc_hwq_limit) {
if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
bf = ATH_TID_FIRST(atid);
ATH_TID_REMOVE(atid, bf, bf_list);
@ -2869,7 +2871,7 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_txq *txq,
ath_tx_tid_sched(sc, atid);
}
} else if (txq->axq_depth < sc->sc_hwq_limit) {
} else if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
/* AMPDU not running, attempt direct dispatch */
DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
/* See if clrdmask needs to be set */

View File

@ -136,19 +136,65 @@ MALLOC_DECLARE(M_ATHDEV);
static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
/*
* Push some frames into the TX FIFO if we have space.
*/
static void
ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
{
struct ath_buf *bf;
struct ath_buf *bf, *bf_last;
int i = 0;
ATH_TXQ_LOCK_ASSERT(txq);
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: called\n",
__func__,
txq->axq_qnum);
TAILQ_FOREACH(bf, &txq->axq_q, bf_list) {
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
break;
/*
* We have space in the FIFO - so let's push a frame
* into it.
*/
/*
* Remove it from the normal list
*/
ATH_TXQ_REMOVE(txq, bf, bf_list);
/*
* XXX for now, we only dequeue a frame at a time, so
* that's only one buffer. Later on when we just
* push this staging _list_ into the queue, we'll
* set bf_last to the end pointer in the list.
*/
bf_last = bf;
DPRINTF(sc, ATH_DEBUG_TX_PROC,
"%s: Q%d: depth=%d; pushing %p->%p\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
bf,
bf_last);
/*
* Append it to the FIFO staging list
*/
ATH_TXQ_INSERT_TAIL(&txq->fifo, bf, bf_list);
/*
* Set fifo start / fifo end flags appropriately
*
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
bf_last->bf_flags |= ATH_BUF_FIFOEND;
/*
* Push _into_ the FIFO.
*/
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
@ -175,14 +221,115 @@ ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
static void
ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
{
struct ath_buf *bf;
int i = 0;
int fifostart = 1;
int old_fifo_depth;
DPRINTF(sc, ATH_DEBUG_RESET, "%s: called: txq=%p, qnum=%d\n",
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
__func__,
txq,
txq->axq_qnum);
ATH_TXQ_LOCK_ASSERT(txq);
ath_edma_tx_fifo_fill(sc, txq);
/*
* Let's log if the tracked FIFO depth doesn't match
* what we actually push in.
*/
old_fifo_depth = txq->axq_fifo_depth;
txq->axq_fifo_depth = 0;
/*
* Walk the FIFO staging list, looking for "head" entries.
* Since we may have a partially completed list of frames,
* we push the first frame we see into the FIFO and re-mark
* it as the head entry. We then skip entries until we see
* FIFO end, at which point we get ready to push another
* entry into the FIFO.
*/
TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
/*
* If we're looking for FIFOEND and we haven't found
* it, skip.
*
* If we're looking for FIFOEND and we've found it,
* reset for another descriptor.
*/
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
if (fifostart == 0) {
if (bf->bf_flags & ATH_BUF_FIFOEND)
fifostart = 1;
continue;
}
/* Make sure we're not overflowing the FIFO! */
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
device_printf(sc->sc_dev,
"%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth);
}
#if 0
DPRINTF(sc, ATH_DEBUG_RESET,
"%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
#endif
/*
* Set this to be the first buffer in the FIFO
* list - even if it's also the last buffer in
* a FIFO list!
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
/* Push it into the FIFO and bump the FIFO count */
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
txq->axq_fifo_depth++;
/*
* If this isn't the last entry either, let's
* clear fifostart so we continue looking for
* said last entry.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
fifostart = 0;
i++;
}
/* Only bother starting the queue if there's something in it */
if (i > 0)
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
/* And now, let's check! */
if (txq->axq_fifo_depth != old_fifo_depth) {
device_printf(sc->sc_dev,
"%s: Q%d: FIFO depth should be %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
}
}
/*
@ -201,7 +348,6 @@ static void
ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
struct ath_hal *ah = sc->sc_ah;
ATH_TXQ_LOCK(txq);
@ -220,20 +366,18 @@ ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
/* Push and update frame stats */
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
/* Only schedule to the FIFO if there's space */
if (txq->axq_fifo_depth < HAL_TXFIFO_DEPTH) {
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, 0, 0);
#endif /* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
ath_hal_puttxbuf(ah, txq->axq_qnum, bf->bf_daddr);
txq->axq_fifo_depth++;
ath_hal_txstart(ah, txq->axq_qnum);
}
/* For now, set the link pointer in the last descriptor
* to be NULL.
*
* Later on, when it comes time to handling multiple descriptors
* in one FIFO push, we can link descriptors together this way.
*/
/*
* Finally, call the FIFO schedule routine to schedule some
* frames to the FIFO.
*/
ath_edma_tx_fifo_fill(sc, txq);
ATH_TXQ_UNLOCK(txq);
}
@ -274,7 +418,6 @@ ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
bf_last->bf_lastds,
bf->bf_daddr);
}
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
@ -434,8 +577,10 @@ ath_edma_tx_proc(void *arg, int npending)
{
struct ath_softc *sc = (struct ath_softc *) arg;
#if 0
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
__func__, npending);
#endif
ath_edma_tx_processq(sc, 1);
}
@ -469,15 +614,16 @@ ath_edma_tx_processq(struct ath_softc *sc, int dosched)
status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
ATH_TXSTATUS_UNLOCK(sc);
if (status == HAL_EINPROGRESS)
break;
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_TX_PROC)
if (ts.ts_queue_id != sc->sc_bhalq)
ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
idx, (status == HAL_OK));
#endif
if (status == HAL_EINPROGRESS)
break;
/*
* If there is an error with this descriptor, continue
* processing.
@ -519,11 +665,25 @@ ath_edma_tx_processq(struct ath_softc *sc, int dosched)
txq = &sc->sc_txq[ts.ts_queue_id];
ATH_TXQ_LOCK(txq);
bf = TAILQ_FIRST(&txq->axq_q);
bf = ATH_TXQ_FIRST(&txq->fifo);
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: qcuid=%d, bf=%p\n",
/*
* Work around the situation where I'm seeing notifications
* for Q1 when no frames are available. That needs to be
* debugged but not by crashing _here_.
*/
if (bf == NULL) {
device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
__func__,
ts.ts_queue_id);
continue;
}
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
__func__,
ts.ts_queue_id, bf);
ts.ts_queue_id, bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
/* XXX TODO: actually output debugging info about this */
@ -541,13 +701,43 @@ ath_edma_tx_processq(struct ath_softc *sc, int dosched)
#endif
/* This removes the buffer and decrements the queue depth */
ATH_TXQ_REMOVE(txq, bf, bf_list);
ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth--;
txq->axq_fifo_depth --;
/*
* If this was the end of a FIFO set, decrement FIFO depth
*/
if (bf->bf_flags & ATH_BUF_FIFOEND)
txq->axq_fifo_depth--;
/*
* If this isn't the final buffer in a FIFO set, mark
* the buffer as busy so it goes onto the holding queue.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
bf->bf_flags |= ATH_BUF_BUSY;
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
txq->fifo.axq_depth);
/* XXX assert FIFO depth >= 0 */
ATH_TXQ_UNLOCK(txq);
/*
* Outside of the TX lock - if the buffer is end
* end buffer in this FIFO, we don't need a holding
* buffer any longer.
*/
if (bf->bf_flags & ATH_BUF_FIFOEND) {
ATH_TXBUF_LOCK(sc);
ath_txq_freeholdingbuf(sc, txq);
ATH_TXBUF_UNLOCK(sc);
}
/*
* First we need to make sure ts_rate is valid.
*
@ -617,21 +807,10 @@ ath_edma_tx_processq(struct ath_softc *sc, int dosched)
/*
* Now that there's space in the FIFO, let's push some
* more frames into it.
*
* Unfortunately for now, the txq has FIFO and non-FIFO
* frames in the same linked list, so there's no way
* to quickly/easily populate frames without walking
* the queue and skipping 'axq_fifo_depth' frames.
*
* So for now, let's only repopulate the FIFO once it
* is empty. It's sucky for performance but it's enough
* to begin validating that things are somewhat
* working.
*/
ATH_TXQ_LOCK(txq);
if (dosched && txq->axq_fifo_depth == 0) {
if (dosched)
ath_edma_tx_fifo_fill(sc, txq);
}
ATH_TXQ_UNLOCK(txq);
}