freebsd-nq/sys/dev/ath/if_ath_tx_edma.c
Adrian Chadd 6eb9f206a6 [ath] [ar9300] ensure the software scheduler is called to form more aggregates for EDMA chips
When investigating performance on UDP TX on the AR9380 I found that the
following sequence was occuring:

* INTR
* EINPROGRESS - nothing yet
* INTR
* TXSTATUS - process a TX completion for an aggregate
* INTR, INTR
* TXSTATUS - process a TX completion for an aggregate
* TXD, TXD ... populate frames from the hardware queue and submit

What should be happening is a completed TXSTATUS fires off more packets
that are queued on active TIDs.

What /was/ happening was after that first TXSTATUS the TX queue hardware queue
was still empty, so it didn't push anything into the FIFO.  Only after the
second TXSTATUS did any progress get made.

This is one of two commits - it ensures that the software TX queue scheduler
is called /after/ TX completion, otherwise no frames from the software staging
queues will be processed into the hardware queues.

The second commit will fix it so it populates aggregate frames correctly
when the above occurs - right now ath_txq_sched() is called, but it doesn't
populate anything because its pre-check conditions are wrong.

Whilst here, add/tweak debugging.

Tested:

* AR9380 STA (testing device) -> AR9580 hostap
2017-01-23 04:20:37 +00:00

1081 lines
28 KiB
C

/*-
* Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
* redistribution must be conditioned upon including a substantially
* similar Disclaimer requirement for further binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Driver for the Atheros Wireless LAN controller.
*
* This software is derived from work of Atsushi Onoe; his contribution
* is greatly appreciated.
*/
#include "opt_inet.h"
#include "opt_ath.h"
/*
* This is needed for register operations which are performed
* by the driver - eg, calls to ath_hal_gettsf32().
*
* It's also required for any AH_DEBUG checks in here, eg the
* module dependencies.
*/
#include "opt_ah.h"
#include "opt_wlan.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/errno.h>
#include <sys/callout.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kthread.h>
#include <sys/taskqueue.h>
#include <sys/priv.h>
#include <sys/module.h>
#include <sys/ktr.h>
#include <sys/smp.h> /* for mp_ncpus */
#include <machine/bus.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_llc.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_regdomain.h>
#ifdef IEEE80211_SUPPORT_SUPERG
#include <net80211/ieee80211_superg.h>
#endif
#ifdef IEEE80211_SUPPORT_TDMA
#include <net80211/ieee80211_tdma.h>
#endif
#include <net/bpf.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/if_ether.h>
#endif
#include <dev/ath/if_athvar.h>
#include <dev/ath/ath_hal/ah_devid.h> /* XXX for softled */
#include <dev/ath/ath_hal/ah_diagcodes.h>
#include <dev/ath/if_ath_debug.h>
#include <dev/ath/if_ath_misc.h>
#include <dev/ath/if_ath_tsf.h>
#include <dev/ath/if_ath_tx.h>
#include <dev/ath/if_ath_sysctl.h>
#include <dev/ath/if_ath_led.h>
#include <dev/ath/if_ath_keycache.h>
#include <dev/ath/if_ath_rx.h>
#include <dev/ath/if_ath_beacon.h>
#include <dev/ath/if_athdfs.h>
#include <dev/ath/if_ath_descdma.h>
#ifdef ATH_TX99_DIAG
#include <dev/ath/ath_tx99/ath_tx99.h>
#endif
#include <dev/ath/if_ath_tx_edma.h>
#ifdef ATH_DEBUG_ALQ
#include <dev/ath/if_ath_alq.h>
#endif
/*
* some general macros
*/
#define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1)
#define DECR(_l, _sz) (_l) --; (_l) &= ((_sz) - 1)
/*
* XXX doesn't belong here, and should be tunable
*/
#define ATH_TXSTATUS_RING_SIZE 512
MALLOC_DECLARE(M_ATHDEV);
static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
#ifdef ATH_DEBUG_ALQ
static void
ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
int fifo_depth, int frame_cnt)
{
struct if_ath_alq_tx_fifo_push aq;
aq.txq = htobe32(txq);
aq.nframes = htobe32(nframes);
aq.fifo_depth = htobe32(fifo_depth);
aq.frame_cnt = htobe32(frame_cnt);
if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
sizeof(aq),
(const char *) &aq);
}
#endif /* ATH_DEBUG_ALQ */
/*
* XXX TODO: push an aggregate as a single FIFO slot, even though
* it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
*
* The TX completion code handles a TX FIFO slot having multiple frames,
* aggregate or otherwise, but it may just make things easier to deal
* with.
*
* XXX TODO: track the number of aggregate subframes and put that in the
* push alq message.
*/
static void
ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
int limit)
{
struct ath_buf *bf, *bf_last;
struct ath_buf *bfi, *bfp;
int i, sqdepth;
TAILQ_HEAD(axq_q_f_s, ath_buf) sq;
ATH_TXQ_LOCK_ASSERT(txq);
DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
"%s: called; TXQ=%d, fifo.depth=%d, axq_q empty=%d\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
!! (TAILQ_EMPTY(&txq->axq_q)));
/*
* Don't bother doing any work if it's full.
*/
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
return;
if (TAILQ_EMPTY(&txq->axq_q))
return;
TAILQ_INIT(&sq);
/*
* First pass - walk sq, queue up to 'limit' entries,
* subtract them from the staging queue.
*/
sqdepth = 0;
for (i = 0; i < limit; i++) {
/* Grab the head entry */
bf = ATH_TXQ_FIRST(txq);
if (bf == NULL)
break;
ATH_TXQ_REMOVE(txq, bf, bf_list);
/* Queue it into our staging list */
TAILQ_INSERT_TAIL(&sq, bf, bf_list);
/* Ensure the flags are cleared */
bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
sqdepth++;
}
/*
* Ok, so now we have a staging list of up to 'limit'
* frames from the txq. Now let's wrap that up
* into its own list and pass that to the hardware
* as one FIFO entry.
*/
bf = TAILQ_FIRST(&sq);
bf_last = TAILQ_LAST(&sq, axq_q_s);
/*
* Ok, so here's the gymnastics reqiured to make this
* all sensible.
*/
/*
* Tag the first/last buffer appropriately.
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
bf_last->bf_flags |= ATH_BUF_FIFOEND;
/*
* Walk the descriptor list and link them appropriately.
*/
bfp = NULL;
TAILQ_FOREACH(bfi, &sq, bf_list) {
if (bfp != NULL) {
ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
bfi->bf_daddr);
}
bfp = bfi;
}
i = 0;
TAILQ_FOREACH(bfi, &sq, bf_list) {
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bfi);
#endif /* ATH_DEBUG_ALQ */
i++;
}
/*
* We now need to push this set of frames onto the tail
* of the FIFO queue. We don't adjust the aggregate
* count, only the queue depth counter(s).
* We also need to blank the link pointer now.
*/
TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
/* Bump total queue tracking in FIFO queue */
txq->fifo.axq_depth += sqdepth;
/* Bump FIFO queue */
txq->axq_fifo_depth++;
DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
"%s: queued %d packets; depth=%d, fifo depth=%d\n",
__func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
/* Push the first entry into the hardware */
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
/* Push start on the DMA if it's not already started */
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
#ifdef ATH_DEBUG_ALQ
ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
txq->axq_fifo_depth,
txq->fifo.axq_depth);
#endif /* ATH_DEBUG_ALQ */
}
#define TX_BATCH_SIZE 32
/*
* Push some frames into the TX FIFO if we have space.
*/
static void
ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
{
ATH_TXQ_LOCK_ASSERT(txq);
DPRINTF(sc, ATH_DEBUG_TX_PROC,
"%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
__func__,
txq->axq_qnum,
txq->fifo.axq_depth,
txq->axq_fifo_depth,
txq->axq_depth,
txq->axq_aggr_depth);
/*
* For now, push up to 32 frames per TX FIFO slot.
* If more are in the hardware queue then they'll
* get populated when we try to send another frame
* or complete a frame - so at most there'll be
* 32 non-AMPDU frames per node/TID anyway.
*
* Note that the hardware staging queue will limit
* how many frames in total we will have pushed into
* here.
*
* Later on, we'll want to push less frames into
* the TX FIFO since we don't want to necessarily
* fill tens or hundreds of milliseconds of potential
* frames.
*
* However, we need more frames right now because of
* how the MAC implements the frame scheduling policy.
* It only ungates a single FIFO entry at a time,
* and will run that until CHNTIME expires or the
* end of that FIFO entry descriptor list is reached.
* So for TDMA we suffer a big performance penalty -
* single TX FIFO entries mean the MAC only sends out
* one frame per DBA event, which turned out on average
* 6ms per TX frame.
*
* So, for aggregates it's okay - it'll push two at a
* time and this will just do them more efficiently.
* For non-aggregates it'll do 4 at a time, up to the
* non-aggr limit (non_aggr, which is 32.) They should
* be time based rather than a hard count, but I also
* do need sleep.
*/
/*
* Do some basic, basic batching to the hardware
* queue.
*
* If we have TX_BATCH_SIZE entries in the staging
* queue, then let's try to send them all in one hit.
*
* Ensure we don't push more than TX_BATCH_SIZE worth
* in, otherwise we end up draining 8 slots worth of
* 32 frames into the hardware queue and then we don't
* attempt to push more frames in until we empty the
* FIFO.
*/
if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
txq->fifo.axq_depth <= TX_BATCH_SIZE) {
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
}
/*
* Aggregate check: if we have less than two FIFO slots
* busy and we have some aggregate frames, queue it.
*
* Now, ideally we'd just check to see if the scheduler
* has given us aggregate frames and push them into the FIFO
* as individual slots, as honestly we should just be pushing
* a single aggregate in as one FIFO slot.
*
* Let's do that next once I know this works.
*/
else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
/*
*
* If we have less, and the TXFIFO isn't empty, let's
* wait until we've finished sending the FIFO.
*
* If we have less, and the TXFIFO is empty, then
* send them.
*/
else if (txq->axq_fifo_depth == 0) {
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
}
}
/*
* Re-initialise the DMA FIFO with the current contents of
* said TXQ.
*
* This should only be called as part of the chip reset path, as it
* assumes the FIFO is currently empty.
*/
static void
ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
{
struct ath_buf *bf;
int i = 0;
int fifostart = 1;
int old_fifo_depth;
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
__func__,
txq->axq_qnum);
ATH_TXQ_LOCK_ASSERT(txq);
/*
* Let's log if the tracked FIFO depth doesn't match
* what we actually push in.
*/
old_fifo_depth = txq->axq_fifo_depth;
txq->axq_fifo_depth = 0;
/*
* Walk the FIFO staging list, looking for "head" entries.
* Since we may have a partially completed list of frames,
* we push the first frame we see into the FIFO and re-mark
* it as the head entry. We then skip entries until we see
* FIFO end, at which point we get ready to push another
* entry into the FIFO.
*/
TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
/*
* If we're looking for FIFOEND and we haven't found
* it, skip.
*
* If we're looking for FIFOEND and we've found it,
* reset for another descriptor.
*/
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
if (fifostart == 0) {
if (bf->bf_flags & ATH_BUF_FIFOEND)
fifostart = 1;
continue;
}
/* Make sure we're not overflowing the FIFO! */
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
device_printf(sc->sc_dev,
"%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth);
}
#if 0
DPRINTF(sc, ATH_DEBUG_RESET,
"%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
#endif
/*
* Set this to be the first buffer in the FIFO
* list - even if it's also the last buffer in
* a FIFO list!
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
/* Push it into the FIFO and bump the FIFO count */
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
txq->axq_fifo_depth++;
/*
* If this isn't the last entry either, let's
* clear fifostart so we continue looking for
* said last entry.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
fifostart = 0;
i++;
}
/* Only bother starting the queue if there's something in it */
if (i > 0)
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
/* And now, let's check! */
if (txq->axq_fifo_depth != old_fifo_depth) {
device_printf(sc->sc_dev,
"%s: Q%d: FIFO depth should be %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
}
}
/*
* Hand off this frame to a hardware queue.
*
* Things are a bit hairy in the EDMA world. The TX FIFO is only
* 8 entries deep, so we need to keep track of exactly what we've
* pushed into the FIFO and what's just sitting in the TX queue,
* waiting to go out.
*
* So this is split into two halves - frames get appended to the
* TXQ; then a scheduler is called to push some frames into the
* actual TX FIFO.
*/
static void
ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
ATH_TXQ_LOCK(txq);
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
/*
* XXX TODO: write a hard-coded check to ensure that
* the queue id in the TX descriptor matches txq->axq_qnum.
*/
/* Update aggr stats */
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth++;
/* Push and update frame stats */
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
/*
* Finally, call the FIFO schedule routine to schedule some
* frames to the FIFO.
*/
ath_edma_tx_fifo_fill(sc, txq);
ATH_TXQ_UNLOCK(txq);
}
/*
* Hand off this frame to a multicast software queue.
*
* The EDMA TX CABQ will get a list of chained frames, chained
* together using the next pointer. The single head of that
* particular queue is pushed to the hardware CABQ.
*/
static void
ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
ATH_TX_LOCK_ASSERT(sc);
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
ATH_TXQ_LOCK(txq);
/*
* XXX this is mostly duplicated in ath_tx_handoff_mcast().
*/
if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
struct ieee80211_frame *wh;
/* mark previous frame */
wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
/* re-sync buffer to memory */
bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
BUS_DMASYNC_PREWRITE);
/* link descriptor */
ath_hal_settxdesclink(sc->sc_ah,
bf_last->bf_lastds,
bf->bf_daddr);
}
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
ATH_TXQ_UNLOCK(txq);
}
/*
* Handoff this frame to the hardware.
*
* For the multicast queue, this will treat it as a software queue
* and append it to the list, after updating the MORE_DATA flag
* in the previous frame. The cabq processing code will ensure
* that the queue contents gets transferred over.
*
* For the hardware queues, this will queue a frame to the queue
* like before, then populate the FIFO from that. Since the
* EDMA hardware has 8 FIFO slots per TXQ, this ensures that
* frames such as management frames don't get prematurely dropped.
*
* This does imply that a similar flush-hwq-to-fifoq method will
* need to be called from the processq function, before the
* per-node software scheduler is called.
*/
static void
ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
"%s: called; bf=%p, txq=%p, qnum=%d\n",
__func__,
bf,
txq,
txq->axq_qnum);
if (txq->axq_qnum == ATH_TXQ_SWQ)
ath_edma_xmit_handoff_mcast(sc, txq, bf);
else
ath_edma_xmit_handoff_hw(sc, txq, bf);
}
static int
ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
M_ATHDEV,
M_NOWAIT | M_ZERO);
if (te->m_fifo == NULL) {
device_printf(sc->sc_dev, "%s: malloc failed\n",
__func__);
return (-ENOMEM);
}
/*
* Set initial "empty" state.
*/
te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
return (0);
}
static int
ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
/* XXX TODO: actually deref the ath_buf entries? */
free(te->m_fifo, M_ATHDEV);
return (0);
}
static int
ath_edma_dma_txsetup(struct ath_softc *sc)
{
int error;
int i;
error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
if (error != 0)
return (error);
ath_hal_setuptxstatusring(sc->sc_ah,
(void *) sc->sc_txsdma.dd_desc,
sc->sc_txsdma.dd_desc_paddr,
ATH_TXSTATUS_RING_SIZE);
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_setup_txfifo(sc, i);
}
return (0);
}
static int
ath_edma_dma_txteardown(struct ath_softc *sc)
{
int i;
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_free_txfifo(sc, i);
}
ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
return (0);
}
/*
* Drain all TXQs, potentially after completing the existing completed
* frames.
*/
static void
ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
{
int i;
DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
(void) ath_stoptxdma(sc);
/*
* If reset type is noloss, the TX FIFO needs to be serviced
* and those frames need to be handled.
*
* Otherwise, just toss everything in each TX queue.
*/
if (reset_type == ATH_RESET_NOLOSS) {
ath_edma_tx_processq(sc, 0);
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i)) {
ATH_TXQ_LOCK(&sc->sc_txq[i]);
/*
* Free the holding buffer; DMA is now
* stopped.
*/
ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
/*
* Reset the link pointer to NULL; there's
* no frames to chain DMA to.
*/
sc->sc_txq[i].axq_link = NULL;
ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
}
}
} else {
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i))
ath_tx_draintxq(sc, &sc->sc_txq[i]);
}
}
/* XXX dump out the TX completion FIFO contents */
/* XXX dump out the frames */
sc->sc_wd_timer = 0;
}
/*
* TX completion tasklet.
*/
static void
ath_edma_tx_proc(void *arg, int npending)
{
struct ath_softc *sc = (struct ath_softc *) arg;
ATH_PCU_LOCK(sc);
sc->sc_txproc_cnt++;
ATH_PCU_UNLOCK(sc);
ATH_LOCK(sc);
ath_power_set_power_state(sc, HAL_PM_AWAKE);
ATH_UNLOCK(sc);
#if 0
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
__func__, npending);
#endif
ath_edma_tx_processq(sc, 1);
ATH_PCU_LOCK(sc);
sc->sc_txproc_cnt--;
ATH_PCU_UNLOCK(sc);
ATH_LOCK(sc);
ath_power_restore_power_state(sc);
ATH_UNLOCK(sc);
ath_tx_kick(sc);
}
/*
* Process the TX status queue.
*/
static void
ath_edma_tx_processq(struct ath_softc *sc, int dosched)
{
struct ath_hal *ah = sc->sc_ah;
HAL_STATUS status;
struct ath_tx_status ts;
struct ath_txq *txq;
struct ath_buf *bf;
struct ieee80211_node *ni;
int nacked = 0;
int idx;
int i;
#ifdef ATH_DEBUG
/* XXX */
uint32_t txstatus[32];
#endif
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
for (idx = 0; ; idx++) {
bzero(&ts, sizeof(ts));
ATH_TXSTATUS_LOCK(sc);
#ifdef ATH_DEBUG
ath_hal_gettxrawtxdesc(ah, txstatus);
#endif
status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
ATH_TXSTATUS_UNLOCK(sc);
if (status == HAL_EINPROGRESS) {
DPRINTF(sc, ATH_DEBUG_TX_PROC,
"%s: (%d): EINPROGRESS\n",
__func__, idx);
break;
}
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_TX_PROC)
if (ts.ts_queue_id != sc->sc_bhalq)
ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
idx, (status == HAL_OK));
#endif
/*
* If there is an error with this descriptor, continue
* processing.
*
* XXX TBD: log some statistics?
*/
if (status == HAL_EIO) {
device_printf(sc->sc_dev, "%s: invalid TX status?\n",
__func__);
break;
}
#if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
sc->sc_tx_statuslen,
(char *) txstatus);
}
#endif /* ATH_DEBUG_ALQ */
/*
* At this point we have a valid status descriptor.
* The QID and descriptor ID (which currently isn't set)
* is part of the status.
*
* We then assume that the descriptor in question is the
* -head- of the given QID. Eventually we should verify
* this by using the descriptor ID.
*/
/*
* The beacon queue is not currently a "real" queue.
* Frames aren't pushed onto it and the lock isn't setup.
* So skip it for now; the beacon handling code will
* free and alloc more beacon buffers as appropriate.
*/
if (ts.ts_queue_id == sc->sc_bhalq)
continue;
txq = &sc->sc_txq[ts.ts_queue_id];
ATH_TXQ_LOCK(txq);
bf = ATH_TXQ_FIRST(&txq->fifo);
/*
* Work around the situation where I'm seeing notifications
* for Q1 when no frames are available. That needs to be
* debugged but not by crashing _here_.
*/
if (bf == NULL) {
device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
__func__,
ts.ts_queue_id);
ATH_TXQ_UNLOCK(txq);
continue;
}
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
__func__,
ts.ts_queue_id, bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
/* XXX TODO: actually output debugging info about this */
#if 0
/* XXX assert the buffer/descriptor matches the status descid */
if (ts.ts_desc_id != bf->bf_descid) {
device_printf(sc->sc_dev,
"%s: mismatched descid (qid=%d, tsdescid=%d, "
"bfdescid=%d\n",
__func__,
ts.ts_queue_id,
ts.ts_desc_id,
bf->bf_descid);
}
#endif
/* This removes the buffer and decrements the queue depth */
ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth--;
/*
* If this was the end of a FIFO set, decrement FIFO depth
*/
if (bf->bf_flags & ATH_BUF_FIFOEND)
txq->axq_fifo_depth--;
/*
* If this isn't the final buffer in a FIFO set, mark
* the buffer as busy so it goes onto the holding queue.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
bf->bf_flags |= ATH_BUF_BUSY;
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
txq->fifo.axq_depth);
/* XXX assert FIFO depth >= 0 */
ATH_TXQ_UNLOCK(txq);
/*
* Outside of the TX lock - if the buffer is end
* end buffer in this FIFO, we don't need a holding
* buffer any longer.
*/
if (bf->bf_flags & ATH_BUF_FIFOEND) {
ATH_TXQ_LOCK(txq);
ath_txq_freeholdingbuf(sc, txq);
ATH_TXQ_UNLOCK(txq);
}
/*
* First we need to make sure ts_rate is valid.
*
* Pre-EDMA chips pass the whole TX descriptor to
* the proctxdesc function which will then fill out
* ts_rate based on the ts_finaltsi (final TX index)
* in the TX descriptor. However the TX completion
* FIFO doesn't have this information. So here we
* do a separate HAL call to populate that information.
*
* The same problem exists with ts_longretry.
* The FreeBSD HAL corrects ts_longretry in the HAL layer;
* the AR9380 HAL currently doesn't. So until the HAL
* is imported and this can be added, we correct for it
* here.
*/
/* XXX TODO */
/* XXX faked for now. Ew. */
if (ts.ts_finaltsi < 4) {
ts.ts_rate =
bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
switch (ts.ts_finaltsi) {
case 3: ts.ts_longretry +=
bf->bf_state.bfs_rc[2].tries;
case 2: ts.ts_longretry +=
bf->bf_state.bfs_rc[1].tries;
case 1: ts.ts_longretry +=
bf->bf_state.bfs_rc[0].tries;
}
} else {
device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
__func__,
ts.ts_finaltsi);
ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
}
/*
* XXX This is terrible.
*
* Right now, some code uses the TX status that is
* passed in here, but the completion handlers in the
* software TX path also use bf_status.ds_txstat.
* Ew. That should all go away.
*
* XXX It's also possible the rate control completion
* routine is called twice.
*/
memcpy(&bf->bf_status, &ts, sizeof(ts));
ni = bf->bf_node;
/* Update RSSI */
/* XXX duplicate from ath_tx_processq */
if (ni != NULL && ts.ts_status == 0 &&
((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
nacked++;
sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
ts.ts_rssi);
}
/* Handle frame completion and rate control update */
ath_tx_process_buf_completion(sc, txq, &ts, bf);
/* NB: bf is invalid at this point */
}
sc->sc_wd_timer = 0;
/*
* XXX It's inefficient to do this if the FIFO queue is full,
* but there's no easy way right now to only populate
* the txq task for _one_ TXQ. This should be fixed.
*/
if (dosched) {
/* Attempt to schedule more hardware frames to the TX FIFO */
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i)) {
ATH_TX_LOCK(sc);
ath_txq_sched(sc, &sc->sc_txq[i]);
ATH_TX_UNLOCK(sc);
ATH_TXQ_LOCK(&sc->sc_txq[i]);
ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
}
}
/* Kick software scheduler */
ath_tx_swq_kick(sc);
}
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: end\n", __func__);
}
static void
ath_edma_attach_comp_func(struct ath_softc *sc)
{
TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
}
void
ath_xmit_setup_edma(struct ath_softc *sc)
{
/* Fetch EDMA field and buffer sizes */
(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
if (bootverbose) {
device_printf(sc->sc_dev, "TX descriptor length: %d\n",
sc->sc_tx_desclen);
device_printf(sc->sc_dev, "TX status length: %d\n",
sc->sc_tx_statuslen);
device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
sc->sc_tx_nmaps);
}
sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
sc->sc_tx.xmit_drain = ath_edma_tx_drain;
}