freebsd-skq/sys/dev/ath/if_ath_tx_edma.c

1062 lines
27 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
* redistribution must be conditioned upon including a substantially
* similar Disclaimer requirement for further binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Driver for the Atheros Wireless LAN controller.
*
* This software is derived from work of Atsushi Onoe; his contribution
* is greatly appreciated.
*/
#include "opt_inet.h"
#include "opt_ath.h"
/*
* This is needed for register operations which are performed
* by the driver - eg, calls to ath_hal_gettsf32().
*
* It's also required for any AH_DEBUG checks in here, eg the
* module dependencies.
*/
#include "opt_ah.h"
#include "opt_wlan.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/errno.h>
#include <sys/callout.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kthread.h>
#include <sys/taskqueue.h>
#include <sys/priv.h>
#include <sys/module.h>
#include <sys/ktr.h>
#include <sys/smp.h> /* for mp_ncpus */
#include <machine/bus.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_llc.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_regdomain.h>
#ifdef IEEE80211_SUPPORT_SUPERG
#include <net80211/ieee80211_superg.h>
#endif
#ifdef IEEE80211_SUPPORT_TDMA
#include <net80211/ieee80211_tdma.h>
#endif
#include <net/bpf.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/if_ether.h>
#endif
#include <dev/ath/if_athvar.h>
#include <dev/ath/ath_hal/ah_devid.h> /* XXX for softled */
#include <dev/ath/ath_hal/ah_diagcodes.h>
#include <dev/ath/if_ath_debug.h>
#include <dev/ath/if_ath_misc.h>
#include <dev/ath/if_ath_tsf.h>
#include <dev/ath/if_ath_tx.h>
#include <dev/ath/if_ath_sysctl.h>
#include <dev/ath/if_ath_led.h>
#include <dev/ath/if_ath_keycache.h>
#include <dev/ath/if_ath_rx.h>
#include <dev/ath/if_ath_beacon.h>
#include <dev/ath/if_athdfs.h>
#include <dev/ath/if_ath_descdma.h>
#ifdef ATH_TX99_DIAG
#include <dev/ath/ath_tx99/ath_tx99.h>
#endif
#include <dev/ath/if_ath_tx_edma.h>
#ifdef ATH_DEBUG_ALQ
#include <dev/ath/if_ath_alq.h>
#endif
/*
* some general macros
*/
#define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1)
#define DECR(_l, _sz) (_l) --; (_l) &= ((_sz) - 1)
/*
* XXX doesn't belong here, and should be tunable
*/
#define ATH_TXSTATUS_RING_SIZE 512
MALLOC_DECLARE(M_ATHDEV);
static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
#ifdef ATH_DEBUG_ALQ
static void
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
int fifo_depth, int frame_cnt)
{
struct if_ath_alq_tx_fifo_push aq;
aq.txq = htobe32(txq);
aq.nframes = htobe32(nframes);
aq.fifo_depth = htobe32(fifo_depth);
aq.frame_cnt = htobe32(frame_cnt);
if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
sizeof(aq),
(const char *) &aq);
}
#endif /* ATH_DEBUG_ALQ */
/*
* XXX TODO: push an aggregate as a single FIFO slot, even though
* it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
*
* The TX completion code handles a TX FIFO slot having multiple frames,
* aggregate or otherwise, but it may just make things easier to deal
* with.
*
* XXX TODO: track the number of aggregate subframes and put that in the
* push alq message.
*/
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
static void
ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
int limit)
{
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
struct ath_buf *bf, *bf_last;
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
struct ath_buf *bfi, *bfp;
int i, sqdepth;
TAILQ_HEAD(axq_q_f_s, ath_buf) sq;
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK_ASSERT(txq);
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/*
* Don't bother doing any work if it's full.
*/
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
return;
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
if (TAILQ_EMPTY(&txq->axq_q))
return;
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
TAILQ_INIT(&sq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/*
* First pass - walk sq, queue up to 'limit' entries,
* subtract them from the staging queue.
*/
sqdepth = 0;
for (i = 0; i < limit; i++) {
/* Grab the head entry */
bf = ATH_TXQ_FIRST(txq);
if (bf == NULL)
break;
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
ATH_TXQ_REMOVE(txq, bf, bf_list);
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/* Queue it into our staging list */
TAILQ_INSERT_TAIL(&sq, bf, bf_list);
/* Ensure the flags are cleared */
bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
sqdepth++;
}
/*
* Ok, so now we have a staging list of up to 'limit'
* frames from the txq. Now let's wrap that up
* into its own list and pass that to the hardware
* as one FIFO entry.
*/
bf = TAILQ_FIRST(&sq);
bf_last = TAILQ_LAST(&sq, axq_q_s);
/*
* Ok, so here's the gymnastics reqiured to make this
* all sensible.
*/
/*
* Tag the first/last buffer appropriately.
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
bf_last->bf_flags |= ATH_BUF_FIFOEND;
/*
* Walk the descriptor list and link them appropriately.
*/
bfp = NULL;
TAILQ_FOREACH(bfi, &sq, bf_list) {
if (bfp != NULL) {
ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
bfi->bf_daddr);
}
bfp = bfi;
}
i = 0;
TAILQ_FOREACH(bfi, &sq, bf_list) {
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
ath_tx_alq_post(sc, bfi);
#endif /* ATH_DEBUG_ALQ */
i++;
}
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/*
* We now need to push this set of frames onto the tail
* of the FIFO queue. We don't adjust the aggregate
* count, only the queue depth counter(s).
* We also need to blank the link pointer now.
*/
TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
/* Bump total queue tracking in FIFO queue */
txq->fifo.axq_depth += sqdepth;
/* Bump FIFO queue */
txq->axq_fifo_depth++;
DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
"%s: queued %d packets; depth=%d, fifo depth=%d\n",
__func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
/* Push the first entry into the hardware */
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
/* Push start on the DMA if it's not already started */
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
#ifdef ATH_DEBUG_ALQ
ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
txq->axq_fifo_depth,
txq->fifo.axq_depth);
#endif /* ATH_DEBUG_ALQ */
}
#define TX_BATCH_SIZE 32
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/*
* Push some frames into the TX FIFO if we have space.
*/
static void
ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
{
ATH_TXQ_LOCK_ASSERT(txq);
DPRINTF(sc, ATH_DEBUG_TX_PROC,
"%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
__func__,
txq->axq_qnum,
txq->fifo.axq_depth,
txq->axq_fifo_depth,
txq->axq_depth,
txq->axq_aggr_depth);
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
/*
* For now, push up to 32 frames per TX FIFO slot.
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
* If more are in the hardware queue then they'll
* get populated when we try to send another frame
* or complete a frame - so at most there'll be
* 32 non-AMPDU frames per node/TID anyway.
[ath] add support for batching frames to the general TX queues. It turns out the frame scheduling policies (eg DBA_GATED) operate on a single TX FIFO entry. ASAP scheduling is fine; those frames always go out. DBA-gated sets the TX queue ready when the DBA timer fires, which triggers a beacon transmit. Normally this is used for content-after-beacon queue (CABQ) work, which needs to burst out immediately after a beacon. (eg broadcast, multicast, etc frames.) This is a general policy that you can use for any queue, and Sam's TDMA code uses it. When DBA_GATED is used and something like say, an 11e TX burst window, it only operates on a single TX FIFO entry. If you have a single frame per TX FIFO entry and say, a 2.5ms long burst window (eg TDMA!) then it'll only burst a single frame every 2.5ms. If there's no gating (eg ASAP) then the burst window is fine, and multiple TX FIFO slots get used. The CABQ code does pack in a list of frames (ie, the whole cabq) but up until this commit, the normal TX queues didn't. It showed up when I started to debug TDMA on the AR9380 and later. This commit doesn't fix the TDMA case - that's still broken here, because all I'm doing here is allowing 'some' frames to be bursting, but I'm certainly not filling the whole TX FIFO slot entry with frames. Doing that 'properly' kind of requires me to take into account how long packets should take to transmit and say, doing 1.5 or something times that per TX FIFO slot, as if you partially transmit a slot, when it's next gated it'll just finish that TX FIFO slot, then not advance to the next one. Now, I /also/ think queuing a new packet restarts DMA, but you have to push new frames into the TX FIFO. I need to experiment some more with this because if it's really the case, I will be able to do TDMA support without the egregious hacks I have in my local tree. Sam's TDMA code for previous chips would just kick the TXE bit to push along DMA again, but we can't do that for EDMA chips - we /have/ to push a new frame into the TX FIFO to restart DMA. Ugh. Tested: * AR9380, STA mode * AR9380, hostap mode * AR9580, hostap mode Approved by: re (gjb)
2016-06-19 03:45:32 +00:00
*
* Note that the hardware staging queue will limit
* how many frames in total we will have pushed into
* here.
*
* Later on, we'll want to push less frames into
* the TX FIFO since we don't want to necessarily
* fill tens or hundreds of milliseconds of potential
* frames.
*
* However, we need more frames right now because of
* how the MAC implements the frame scheduling policy.
* It only ungates a single FIFO entry at a time,
* and will run that until CHNTIME expires or the
* end of that FIFO entry descriptor list is reached.
* So for TDMA we suffer a big performance penalty -
* single TX FIFO entries mean the MAC only sends out
* one frame per DBA event, which turned out on average
* 6ms per TX frame.
*
* So, for aggregates it's okay - it'll push two at a
* time and this will just do them more efficiently.
* For non-aggregates it'll do 4 at a time, up to the
* non-aggr limit (non_aggr, which is 32.) They should
* be time based rather than a hard count, but I also
* do need sleep.
*/
/*
* Do some basic, basic batching to the hardware
* queue.
*
* If we have TX_BATCH_SIZE entries in the staging
* queue, then let's try to send them all in one hit.
*
* Ensure we don't push more than TX_BATCH_SIZE worth
* in, otherwise we end up draining 8 slots worth of
* 32 frames into the hardware queue and then we don't
* attempt to push more frames in until we empty the
* FIFO.
*/
if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
txq->fifo.axq_depth <= TX_BATCH_SIZE) {
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
}
/*
* Aggregate check: if we have less than two FIFO slots
* busy and we have some aggregate frames, queue it.
*
* Now, ideally we'd just check to see if the scheduler
* has given us aggregate frames and push them into the FIFO
* as individual slots, as honestly we should just be pushing
* a single aggregate in as one FIFO slot.
*
* Let's do that next once I know this works.
*/
else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
/*
*
* If we have less, and the TXFIFO isn't empty, let's
* wait until we've finished sending the FIFO.
*
* If we have less, and the TXFIFO is empty, then
* send them.
*/
else if (txq->axq_fifo_depth == 0) {
ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
}
}
/*
* Re-initialise the DMA FIFO with the current contents of
2012-08-11 22:20:28 +00:00
* said TXQ.
*
* This should only be called as part of the chip reset path, as it
* assumes the FIFO is currently empty.
*/
static void
ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
{
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
struct ath_buf *bf;
int i = 0;
int fifostart = 1;
int old_fifo_depth;
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
__func__,
txq->axq_qnum);
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK_ASSERT(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
/*
* Let's log if the tracked FIFO depth doesn't match
* what we actually push in.
*/
old_fifo_depth = txq->axq_fifo_depth;
txq->axq_fifo_depth = 0;
/*
* Walk the FIFO staging list, looking for "head" entries.
* Since we may have a partially completed list of frames,
* we push the first frame we see into the FIFO and re-mark
* it as the head entry. We then skip entries until we see
* FIFO end, at which point we get ready to push another
* entry into the FIFO.
*/
TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
/*
* If we're looking for FIFOEND and we haven't found
* it, skip.
*
* If we're looking for FIFOEND and we've found it,
* reset for another descriptor.
*/
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
if (fifostart == 0) {
if (bf->bf_flags & ATH_BUF_FIFOEND)
fifostart = 1;
continue;
}
/* Make sure we're not overflowing the FIFO! */
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
device_printf(sc->sc_dev,
"%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth);
}
#if 0
DPRINTF(sc, ATH_DEBUG_RESET,
"%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
#endif
/*
* Set this to be the first buffer in the FIFO
* list - even if it's also the last buffer in
* a FIFO list!
*/
bf->bf_flags |= ATH_BUF_FIFOPTR;
/* Push it into the FIFO and bump the FIFO count */
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
txq->axq_fifo_depth++;
/*
* If this isn't the last entry either, let's
* clear fifostart so we continue looking for
* said last entry.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
fifostart = 0;
i++;
}
/* Only bother starting the queue if there's something in it */
if (i > 0)
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
/* And now, let's check! */
if (txq->axq_fifo_depth != old_fifo_depth) {
device_printf(sc->sc_dev,
"%s: Q%d: FIFO depth should be %d, is %d\n",
__func__,
txq->axq_qnum,
old_fifo_depth,
txq->axq_fifo_depth);
}
}
2012-08-11 22:20:28 +00:00
/*
* Hand off this frame to a hardware queue.
*
* Things are a bit hairy in the EDMA world. The TX FIFO is only
* 8 entries deep, so we need to keep track of exactly what we've
* pushed into the FIFO and what's just sitting in the TX queue,
* waiting to go out.
*
* So this is split into two halves - frames get appended to the
* TXQ; then a scheduler is called to push some frames into the
* actual TX FIFO.
*/
static void
ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
ATH_TXQ_LOCK(txq);
2012-08-11 22:20:28 +00:00
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
/*
* XXX TODO: write a hard-coded check to ensure that
* the queue id in the TX descriptor matches txq->axq_qnum.
*/
/* Update aggr stats */
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth++;
/* Push and update frame stats */
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
/*
* Finally, call the FIFO schedule routine to schedule some
* frames to the FIFO.
*/
ath_edma_tx_fifo_fill(sc, txq);
ATH_TXQ_UNLOCK(txq);
2012-08-11 22:20:28 +00:00
}
/*
* Hand off this frame to a multicast software queue.
*
* The EDMA TX CABQ will get a list of chained frames, chained
* together using the next pointer. The single head of that
* particular queue is pushed to the hardware CABQ.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
ATH_TX_LOCK_ASSERT(sc);
2012-08-11 22:20:28 +00:00
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
ATH_TXQ_LOCK(txq);
2012-08-11 22:20:28 +00:00
/*
* XXX this is mostly duplicated in ath_tx_handoff_mcast().
*/
if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
2012-08-11 22:20:28 +00:00
struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
struct ieee80211_frame *wh;
/* mark previous frame */
wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
/* re-sync buffer to memory */
2012-08-11 22:20:28 +00:00
bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
BUS_DMASYNC_PREWRITE);
/* link descriptor */
ath_hal_settxdesclink(sc->sc_ah,
bf_last->bf_lastds,
bf->bf_daddr);
2012-08-11 22:20:28 +00:00
}
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
2012-08-11 22:20:28 +00:00
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
ATH_TXQ_UNLOCK(txq);
2012-08-11 22:20:28 +00:00
}
/*
* Handoff this frame to the hardware.
*
* For the multicast queue, this will treat it as a software queue
* and append it to the list, after updating the MORE_DATA flag
* in the previous frame. The cabq processing code will ensure
* that the queue contents gets transferred over.
*
* For the hardware queues, this will queue a frame to the queue
* like before, then populate the FIFO from that. Since the
* EDMA hardware has 8 FIFO slots per TXQ, this ensures that
* frames such as management frames don't get prematurely dropped.
*
* This does imply that a similar flush-hwq-to-fifoq method will
* need to be called from the processq function, before the
* per-node software scheduler is called.
*/
static void
ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
"%s: called; bf=%p, txq=%p, qnum=%d\n",
__func__,
bf,
txq,
txq->axq_qnum);
2012-08-11 22:20:28 +00:00
if (txq->axq_qnum == ATH_TXQ_SWQ)
ath_edma_xmit_handoff_mcast(sc, txq, bf);
else
ath_edma_xmit_handoff_hw(sc, txq, bf);
}
static int
ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
M_ATHDEV,
M_NOWAIT | M_ZERO);
if (te->m_fifo == NULL) {
device_printf(sc->sc_dev, "%s: malloc failed\n",
__func__);
return (-ENOMEM);
}
/*
* Set initial "empty" state.
*/
te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
return (0);
}
static int
ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
/* XXX TODO: actually deref the ath_buf entries? */
free(te->m_fifo, M_ATHDEV);
return (0);
}
static int
ath_edma_dma_txsetup(struct ath_softc *sc)
{
int error;
int i;
error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
if (error != 0)
return (error);
ath_hal_setuptxstatusring(sc->sc_ah,
(void *) sc->sc_txsdma.dd_desc,
sc->sc_txsdma.dd_desc_paddr,
ATH_TXSTATUS_RING_SIZE);
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_setup_txfifo(sc, i);
}
return (0);
}
static int
ath_edma_dma_txteardown(struct ath_softc *sc)
{
int i;
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_free_txfifo(sc, i);
}
ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
return (0);
}
2012-08-11 22:20:28 +00:00
/*
* Drain all TXQs, potentially after completing the existing completed
* frames.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
{
int i;
DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
(void) ath_stoptxdma(sc);
/*
* If reset type is noloss, the TX FIFO needs to be serviced
* and those frames need to be handled.
*
* Otherwise, just toss everything in each TX queue.
*/
2012-11-08 17:46:27 +00:00
if (reset_type == ATH_RESET_NOLOSS) {
ath_edma_tx_processq(sc, 0);
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i)) {
ATH_TXQ_LOCK(&sc->sc_txq[i]);
/*
* Free the holding buffer; DMA is now
* stopped.
*/
ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
/*
* Reset the link pointer to NULL; there's
* no frames to chain DMA to.
*/
sc->sc_txq[i].axq_link = NULL;
ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
}
}
2012-11-08 17:46:27 +00:00
} else {
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i))
ath_tx_draintxq(sc, &sc->sc_txq[i]);
}
}
/* XXX dump out the TX completion FIFO contents */
/* XXX dump out the frames */
sc->sc_wd_timer = 0;
}
2012-08-11 22:20:28 +00:00
/*
* TX completion tasklet.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_tx_proc(void *arg, int npending)
{
struct ath_softc *sc = (struct ath_softc *) arg;
ATH_PCU_LOCK(sc);
sc->sc_txproc_cnt++;
ATH_PCU_UNLOCK(sc);
ATH_LOCK(sc);
ath_power_set_power_state(sc, HAL_PM_AWAKE);
ATH_UNLOCK(sc);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
#if 0
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
__func__, npending);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
#endif
ath_edma_tx_processq(sc, 1);
ATH_PCU_LOCK(sc);
sc->sc_txproc_cnt--;
ATH_PCU_UNLOCK(sc);
ATH_LOCK(sc);
ath_power_restore_power_state(sc);
ATH_UNLOCK(sc);
ath_tx_kick(sc);
}
/*
* Process the TX status queue.
*/
static void
ath_edma_tx_processq(struct ath_softc *sc, int dosched)
{
2012-08-11 22:20:28 +00:00
struct ath_hal *ah = sc->sc_ah;
HAL_STATUS status;
struct ath_tx_status ts;
struct ath_txq *txq;
struct ath_buf *bf;
struct ieee80211_node *ni;
int nacked = 0;
int idx;
int i;
#ifdef ATH_DEBUG
/* XXX */
uint32_t txstatus[32];
#endif
for (idx = 0; ; idx++) {
bzero(&ts, sizeof(ts));
2012-08-11 22:20:28 +00:00
ATH_TXSTATUS_LOCK(sc);
2012-11-04 00:46:01 +00:00
#ifdef ATH_DEBUG
ath_hal_gettxrawtxdesc(ah, txstatus);
2012-11-04 00:46:01 +00:00
#endif
status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
2012-08-11 22:20:28 +00:00
ATH_TXSTATUS_UNLOCK(sc);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
if (status == HAL_EINPROGRESS)
break;
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_TX_PROC)
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
if (ts.ts_queue_id != sc->sc_bhalq)
ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
idx, (status == HAL_OK));
#endif
/*
* If there is an error with this descriptor, continue
* processing.
*
* XXX TBD: log some statistics?
*/
if (status == HAL_EIO) {
device_printf(sc->sc_dev, "%s: invalid TX status?\n",
__func__);
break;
}
#if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
sc->sc_tx_statuslen,
(char *) txstatus);
}
#endif /* ATH_DEBUG_ALQ */
2012-08-11 22:20:28 +00:00
/*
* At this point we have a valid status descriptor.
* The QID and descriptor ID (which currently isn't set)
* is part of the status.
*
* We then assume that the descriptor in question is the
* -head- of the given QID. Eventually we should verify
* this by using the descriptor ID.
*/
/*
* The beacon queue is not currently a "real" queue.
* Frames aren't pushed onto it and the lock isn't setup.
* So skip it for now; the beacon handling code will
* free and alloc more beacon buffers as appropriate.
*/
if (ts.ts_queue_id == sc->sc_bhalq)
continue;
2012-08-11 22:20:28 +00:00
txq = &sc->sc_txq[ts.ts_queue_id];
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
bf = ATH_TXQ_FIRST(&txq->fifo);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
/*
* Work around the situation where I'm seeing notifications
* for Q1 when no frames are available. That needs to be
* debugged but not by crashing _here_.
*/
if (bf == NULL) {
device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
__func__,
ts.ts_queue_id);
ATH_TXQ_UNLOCK(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
continue;
}
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
__func__,
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
ts.ts_queue_id, bf,
!! (bf->bf_flags & ATH_BUF_FIFOPTR),
!! (bf->bf_flags & ATH_BUF_FIFOEND));
/* XXX TODO: actually output debugging info about this */
#if 0
/* XXX assert the buffer/descriptor matches the status descid */
if (ts.ts_desc_id != bf->bf_descid) {
device_printf(sc->sc_dev,
"%s: mismatched descid (qid=%d, tsdescid=%d, "
"bfdescid=%d\n",
__func__,
ts.ts_queue_id,
ts.ts_desc_id,
bf->bf_descid);
}
#endif
/* This removes the buffer and decrements the queue depth */
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth--;
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
/*
* If this was the end of a FIFO set, decrement FIFO depth
*/
if (bf->bf_flags & ATH_BUF_FIFOEND)
txq->axq_fifo_depth--;
/*
* If this isn't the final buffer in a FIFO set, mark
* the buffer as busy so it goes onto the holding queue.
*/
if (! (bf->bf_flags & ATH_BUF_FIFOEND))
bf->bf_flags |= ATH_BUF_BUSY;
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
__func__,
txq->axq_qnum,
txq->axq_fifo_depth,
txq->fifo.axq_depth);
/* XXX assert FIFO depth >= 0 */
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_UNLOCK(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
/*
* Outside of the TX lock - if the buffer is end
* end buffer in this FIFO, we don't need a holding
* buffer any longer.
*/
if (bf->bf_flags & ATH_BUF_FIFOEND) {
ATH_TXQ_LOCK(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
ath_txq_freeholdingbuf(sc, txq);
ATH_TXQ_UNLOCK(txq);
Implement the replacement EDMA FIFO code. (Yes, the previous code temporarily broke EDMA TX. I'm sorry; I should've actually setup ATH_BUF_FIFOEND on frames so txq->axq_fifo_depth was cleared!) This code implements a whole bunch of sorely needed EDMA TX improvements along with CABQ TX support. The specifics: * When filling/refilling the FIFO, use the new TXQ staging queue for FIFO frames * Tag frames with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND correctly. For now the non-CABQ transmit path pushes one frame into the TXQ staging queue without setting up the intermediary link pointers to chain them together, so draining frames from the txq staging queue to the FIFO queue occurs AMPDU / MPDU at a time. * In the CABQ case, manually tag the list with ATH_BUF_FIFOPTR and ATH_BUF_FIFOEND so a chain of frames is pushed into the FIFO at once. * Now that frames are in a FIFO pending queue, we can top up the FIFO after completing a single frame. This means we can keep it filled rather than waiting for it drain and _then_ adding more frames. * The EDMA restart routine now walks the FIFO queue in the TXQ rather than the pending queue and re-initialises the FIFO with that. * When restarting EDMA, we may have partially completed sending a list. So stamp the first frame that we see in a list with ATH_BUF_FIFOPTR and push _that_ into the hardware. * When completing frames, only check those on the FIFO queue. We should never ever queue frames from the pending queue direct to the hardware, so there's no point in checking. * Until I figure out what's going on, make sure if the TXSTATUS for an empty queue pops up, complain loudly and continue. This will stop the panics that people are seeing. I'll add some code later which will assist in ensuring I'm populating each descriptor with the correct queue ID. * When considering whether to queue frames to the hardware queue directly or software queue frames, make sure the depth of the FIFO is taken into account now. * When completing frames, tag them with ATH_BUF_BUSY if they're not the final frame in a FIFO list. The same holding descriptor behaviour is required when handling descriptors linked together with a link pointer as the hardware will re-read the previous descriptor to refresh the link pointer before contiuning. * .. and if we complete the FIFO list (ie, the buffer has ATH_BUF_FIFOEND set), then we don't need the holding buffer any longer. Thus, free it. Tested: * AR9380/AR9580, STA and hostap * AR9280, STA/hostap TODO: * I don't yet trust that the EDMA restart routine is totally correct in all circumstances. I'll continue to thrash this out under heavy multiple-TXQ traffic load and fix whatever pops up.
2013-03-26 20:04:45 +00:00
}
/*
* First we need to make sure ts_rate is valid.
*
* Pre-EDMA chips pass the whole TX descriptor to
* the proctxdesc function which will then fill out
* ts_rate based on the ts_finaltsi (final TX index)
* in the TX descriptor. However the TX completion
* FIFO doesn't have this information. So here we
* do a separate HAL call to populate that information.
*
* The same problem exists with ts_longretry.
* The FreeBSD HAL corrects ts_longretry in the HAL layer;
* the AR9380 HAL currently doesn't. So until the HAL
* is imported and this can be added, we correct for it
* here.
*/
/* XXX TODO */
/* XXX faked for now. Ew. */
if (ts.ts_finaltsi < 4) {
ts.ts_rate =
bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
switch (ts.ts_finaltsi) {
case 3: ts.ts_longretry +=
bf->bf_state.bfs_rc[2].tries;
case 2: ts.ts_longretry +=
bf->bf_state.bfs_rc[1].tries;
case 1: ts.ts_longretry +=
bf->bf_state.bfs_rc[0].tries;
}
} else {
device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
__func__,
ts.ts_finaltsi);
ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
}
/*
* XXX This is terrible.
*
* Right now, some code uses the TX status that is
* passed in here, but the completion handlers in the
* software TX path also use bf_status.ds_txstat.
* Ew. That should all go away.
*
* XXX It's also possible the rate control completion
* routine is called twice.
*/
memcpy(&bf->bf_status, &ts, sizeof(ts));
ni = bf->bf_node;
/* Update RSSI */
/* XXX duplicate from ath_tx_processq */
if (ni != NULL && ts.ts_status == 0 &&
((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
nacked++;
sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
ts.ts_rssi);
}
/* Handle frame completion and rate control update */
ath_tx_process_buf_completion(sc, txq, &ts, bf);
/* NB: bf is invalid at this point */
2012-08-11 22:20:28 +00:00
}
sc->sc_wd_timer = 0;
/*
* XXX It's inefficient to do this if the FIFO queue is full,
* but there's no easy way right now to only populate
* the txq task for _one_ TXQ. This should be fixed.
*/
if (dosched) {
/* Attempt to schedule more hardware frames to the TX FIFO */
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i)) {
ATH_TXQ_LOCK(&sc->sc_txq[i]);
ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
}
}
/* Kick software scheduler */
ath_tx_swq_kick(sc);
}
}
static void
ath_edma_attach_comp_func(struct ath_softc *sc)
{
TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
}
void
ath_xmit_setup_edma(struct ath_softc *sc)
{
/* Fetch EDMA field and buffer sizes */
(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
if (bootverbose) {
device_printf(sc->sc_dev, "TX descriptor length: %d\n",
sc->sc_tx_desclen);
device_printf(sc->sc_dev, "TX status length: %d\n",
sc->sc_tx_statuslen);
device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
sc->sc_tx_nmaps);
}
sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
sc->sc_tx.xmit_drain = ath_edma_tx_drain;
}