freebsd-dev/sys/dev/ath/if_ath_tx_edma.c

695 lines
18 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer,
* without modification.
* 2. Redistributions in binary form must reproduce at minimum a disclaimer
* similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
* redistribution must be conditioned upon including a substantially
* similar Disclaimer requirement for further binary redistribution.
*
* NO WARRANTY
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
* THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
* OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
* SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
* INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
* IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
* ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
* THE POSSIBILITY OF SUCH DAMAGES.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
/*
* Driver for the Atheros Wireless LAN controller.
*
* This software is derived from work of Atsushi Onoe; his contribution
* is greatly appreciated.
*/
#include "opt_inet.h"
#include "opt_ath.h"
/*
* This is needed for register operations which are performed
* by the driver - eg, calls to ath_hal_gettsf32().
*
* It's also required for any AH_DEBUG checks in here, eg the
* module dependencies.
*/
#include "opt_ah.h"
#include "opt_wlan.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/sysctl.h>
#include <sys/mbuf.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/kernel.h>
#include <sys/socket.h>
#include <sys/sockio.h>
#include <sys/errno.h>
#include <sys/callout.h>
#include <sys/bus.h>
#include <sys/endian.h>
#include <sys/kthread.h>
#include <sys/taskqueue.h>
#include <sys/priv.h>
#include <sys/module.h>
#include <sys/ktr.h>
#include <sys/smp.h> /* for mp_ncpus */
#include <machine/bus.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_media.h>
#include <net/if_types.h>
#include <net/if_arp.h>
#include <net/ethernet.h>
#include <net/if_llc.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_regdomain.h>
#ifdef IEEE80211_SUPPORT_SUPERG
#include <net80211/ieee80211_superg.h>
#endif
#ifdef IEEE80211_SUPPORT_TDMA
#include <net80211/ieee80211_tdma.h>
#endif
#include <net/bpf.h>
#ifdef INET
#include <netinet/in.h>
#include <netinet/if_ether.h>
#endif
#include <dev/ath/if_athvar.h>
#include <dev/ath/ath_hal/ah_devid.h> /* XXX for softled */
#include <dev/ath/ath_hal/ah_diagcodes.h>
#include <dev/ath/if_ath_debug.h>
#include <dev/ath/if_ath_misc.h>
#include <dev/ath/if_ath_tsf.h>
#include <dev/ath/if_ath_tx.h>
#include <dev/ath/if_ath_sysctl.h>
#include <dev/ath/if_ath_led.h>
#include <dev/ath/if_ath_keycache.h>
#include <dev/ath/if_ath_rx.h>
#include <dev/ath/if_ath_beacon.h>
#include <dev/ath/if_athdfs.h>
#ifdef ATH_TX99_DIAG
#include <dev/ath/ath_tx99/ath_tx99.h>
#endif
#include <dev/ath/if_ath_tx_edma.h>
#ifdef ATH_DEBUG_ALQ
#include <dev/ath/if_ath_alq.h>
#endif
/*
* some general macros
*/
#define INCR(_l, _sz) (_l) ++; (_l) &= ((_sz) - 1)
#define DECR(_l, _sz) (_l) --; (_l) &= ((_sz) - 1)
/*
* XXX doesn't belong here, and should be tunable
*/
#define ATH_TXSTATUS_RING_SIZE 512
MALLOC_DECLARE(M_ATHDEV);
static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
static void
ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
{
struct ath_buf *bf;
int i = 0;
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK_ASSERT(txq);
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
TAILQ_FOREACH(bf, &txq->axq_q, bf_list) {
if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
break;
ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
#endif/* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
txq->axq_fifo_depth++;
i++;
}
if (i > 0)
ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
}
/*
* Re-initialise the DMA FIFO with the current contents of
2012-08-11 22:20:28 +00:00
* said TXQ.
*
* This should only be called as part of the chip reset path, as it
* assumes the FIFO is currently empty.
*/
static void
ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
{
DPRINTF(sc, ATH_DEBUG_RESET, "%s: called: txq=%p, qnum=%d\n",
__func__,
txq,
txq->axq_qnum);
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK_ASSERT(txq);
ath_edma_tx_fifo_fill(sc, txq);
}
2012-08-11 22:20:28 +00:00
/*
* Hand off this frame to a hardware queue.
*
* Things are a bit hairy in the EDMA world. The TX FIFO is only
* 8 entries deep, so we need to keep track of exactly what we've
* pushed into the FIFO and what's just sitting in the TX queue,
* waiting to go out.
*
* So this is split into two halves - frames get appended to the
* TXQ; then a scheduler is called to push some frames into the
* actual TX FIFO.
*/
static void
ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
struct ath_hal *ah = sc->sc_ah;
ATH_TXQ_LOCK(txq);
2012-08-11 22:20:28 +00:00
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
/*
* XXX TODO: write a hard-coded check to ensure that
* the queue id in the TX descriptor matches txq->axq_qnum.
*/
/* Update aggr stats */
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth++;
/* Push and update frame stats */
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
/* Only schedule to the FIFO if there's space */
if (txq->axq_fifo_depth < HAL_TXFIFO_DEPTH) {
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
ath_printtxbuf(sc, bf, txq->axq_qnum, 0, 0);
#endif /* ATH_DEBUG */
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
2012-08-11 22:20:28 +00:00
ath_hal_puttxbuf(ah, txq->axq_qnum, bf->bf_daddr);
txq->axq_fifo_depth++;
2012-08-11 22:20:28 +00:00
ath_hal_txstart(ah, txq->axq_qnum);
}
ATH_TXQ_UNLOCK(txq);
2012-08-11 22:20:28 +00:00
}
/*
* Hand off this frame to a multicast software queue.
*
* The EDMA TX CABQ will get a list of chained frames, chained
* together using the next pointer. The single head of that
* particular queue is pushed to the hardware CABQ.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK_ASSERT(txq);
2012-08-11 22:20:28 +00:00
KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
("%s: busy status 0x%x", __func__, bf->bf_flags));
ATH_TXQ_LOCK(txq);
2012-08-11 22:20:28 +00:00
/*
* XXX this is mostly duplicated in ath_tx_handoff_mcast().
*/
if (ATH_TXQ_FIRST(txq) != NULL) {
struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
struct ieee80211_frame *wh;
/* mark previous frame */
wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
/* sync descriptor to memory */
bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
BUS_DMASYNC_PREWRITE);
/* link descriptor */
*txq->axq_link = bf->bf_daddr;
2012-08-11 22:20:28 +00:00
}
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
ath_tx_alq_post(sc, bf);
#endif /* ATH_DEBUG_ALQ */
2012-08-11 22:20:28 +00:00
ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
ath_hal_gettxdesclinkptr(sc->sc_ah, bf->bf_lastds, &txq->axq_link);
ATH_TXQ_UNLOCK(txq);
2012-08-11 22:20:28 +00:00
}
/*
* Handoff this frame to the hardware.
*
* For the multicast queue, this will treat it as a software queue
* and append it to the list, after updating the MORE_DATA flag
* in the previous frame. The cabq processing code will ensure
* that the queue contents gets transferred over.
*
* For the hardware queues, this will queue a frame to the queue
* like before, then populate the FIFO from that. Since the
* EDMA hardware has 8 FIFO slots per TXQ, this ensures that
* frames such as management frames don't get prematurely dropped.
*
* This does imply that a similar flush-hwq-to-fifoq method will
* need to be called from the processq function, before the
* per-node software scheduler is called.
*/
static void
ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
struct ath_buf *bf)
{
DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
"%s: called; bf=%p, txq=%p, qnum=%d\n",
__func__,
bf,
txq,
txq->axq_qnum);
2012-08-11 22:20:28 +00:00
if (txq->axq_qnum == ATH_TXQ_SWQ)
ath_edma_xmit_handoff_mcast(sc, txq, bf);
else
ath_edma_xmit_handoff_hw(sc, txq, bf);
#if 0
/*
* XXX For now this is a placeholder; free the buffer
* and inform the stack that the TX failed.
*/
ath_tx_default_comp(sc, bf, 1);
2012-08-11 22:20:28 +00:00
#endif
}
static int
ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
M_ATHDEV,
M_NOWAIT | M_ZERO);
if (te->m_fifo == NULL) {
device_printf(sc->sc_dev, "%s: malloc failed\n",
__func__);
return (-ENOMEM);
}
/*
* Set initial "empty" state.
*/
te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
return (0);
}
static int
ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
{
struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
/* XXX TODO: actually deref the ath_buf entries? */
free(te->m_fifo, M_ATHDEV);
return (0);
}
static int
ath_edma_dma_txsetup(struct ath_softc *sc)
{
int error;
int i;
error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
if (error != 0)
return (error);
ath_hal_setuptxstatusring(sc->sc_ah,
(void *) sc->sc_txsdma.dd_desc,
sc->sc_txsdma.dd_desc_paddr,
ATH_TXSTATUS_RING_SIZE);
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_setup_txfifo(sc, i);
}
return (0);
}
static int
ath_edma_dma_txteardown(struct ath_softc *sc)
{
int i;
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
ath_edma_free_txfifo(sc, i);
}
ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
return (0);
}
2012-08-11 22:20:28 +00:00
/*
* Drain all TXQs, potentially after completing the existing completed
* frames.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
{
struct ifnet *ifp = sc->sc_ifp;
int i;
DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
(void) ath_stoptxdma(sc);
/*
* If reset type is noloss, the TX FIFO needs to be serviced
* and those frames need to be handled.
*
* Otherwise, just toss everything in each TX queue.
*/
2012-11-08 17:46:27 +00:00
if (reset_type == ATH_RESET_NOLOSS) {
ath_edma_tx_processq(sc, 0);
} else {
for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
if (ATH_TXQ_SETUP(sc, i))
ath_tx_draintxq(sc, &sc->sc_txq[i]);
}
}
/* XXX dump out the TX completion FIFO contents */
/* XXX dump out the frames */
IF_LOCK(&ifp->if_snd);
ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
IF_UNLOCK(&ifp->if_snd);
sc->sc_wd_timer = 0;
}
2012-08-11 22:20:28 +00:00
/*
* TX completion tasklet.
2012-08-11 22:20:28 +00:00
*/
static void
ath_edma_tx_proc(void *arg, int npending)
{
struct ath_softc *sc = (struct ath_softc *) arg;
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
__func__, npending);
ath_edma_tx_processq(sc, 1);
}
/*
* Process the TX status queue.
*/
static void
ath_edma_tx_processq(struct ath_softc *sc, int dosched)
{
2012-08-11 22:20:28 +00:00
struct ath_hal *ah = sc->sc_ah;
HAL_STATUS status;
struct ath_tx_status ts;
struct ath_txq *txq;
struct ath_buf *bf;
struct ieee80211_node *ni;
int nacked = 0;
int idx;
#ifdef ATH_DEBUG
/* XXX */
uint32_t txstatus[32];
#endif
for (idx = 0; ; idx++) {
bzero(&ts, sizeof(ts));
2012-08-11 22:20:28 +00:00
ATH_TXSTATUS_LOCK(sc);
2012-11-04 00:46:01 +00:00
#ifdef ATH_DEBUG
ath_hal_gettxrawtxdesc(ah, txstatus);
2012-11-04 00:46:01 +00:00
#endif
status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
2012-08-11 22:20:28 +00:00
ATH_TXSTATUS_UNLOCK(sc);
#ifdef ATH_DEBUG
if (sc->sc_debug & ATH_DEBUG_TX_PROC)
ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
idx, (status == HAL_OK));
#endif
if (status == HAL_EINPROGRESS)
2012-08-11 22:20:28 +00:00
break;
/*
* If there is an error with this descriptor, continue
* processing.
*
* XXX TBD: log some statistics?
*/
if (status == HAL_EIO) {
device_printf(sc->sc_dev, "%s: invalid TX status?\n",
__func__);
continue;
}
#ifdef ATH_DEBUG_ALQ
if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS))
if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
sc->sc_tx_statuslen,
(char *) txstatus);
#endif /* ATH_DEBUG_ALQ */
2012-08-11 22:20:28 +00:00
/*
* At this point we have a valid status descriptor.
* The QID and descriptor ID (which currently isn't set)
* is part of the status.
*
* We then assume that the descriptor in question is the
* -head- of the given QID. Eventually we should verify
* this by using the descriptor ID.
*/
/*
* The beacon queue is not currently a "real" queue.
* Frames aren't pushed onto it and the lock isn't setup.
* So skip it for now; the beacon handling code will
* free and alloc more beacon buffers as appropriate.
*/
if (ts.ts_queue_id == sc->sc_bhalq)
continue;
2012-08-11 22:20:28 +00:00
txq = &sc->sc_txq[ts.ts_queue_id];
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK(txq);
bf = TAILQ_FIRST(&txq->axq_q);
DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: qcuid=%d, bf=%p\n",
__func__,
ts.ts_queue_id, bf);
/* XXX TODO: actually output debugging info about this */
#if 0
/* XXX assert the buffer/descriptor matches the status descid */
if (ts.ts_desc_id != bf->bf_descid) {
device_printf(sc->sc_dev,
"%s: mismatched descid (qid=%d, tsdescid=%d, "
"bfdescid=%d\n",
__func__,
ts.ts_queue_id,
ts.ts_desc_id,
bf->bf_descid);
}
#endif
/* This removes the buffer and decrements the queue depth */
ATH_TXQ_REMOVE(txq, bf, bf_list);
if (bf->bf_state.bfs_aggr)
txq->axq_aggr_depth--;
txq->axq_fifo_depth --;
/* XXX assert FIFO depth >= 0 */
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_UNLOCK(txq);
/*
* First we need to make sure ts_rate is valid.
*
* Pre-EDMA chips pass the whole TX descriptor to
* the proctxdesc function which will then fill out
* ts_rate based on the ts_finaltsi (final TX index)
* in the TX descriptor. However the TX completion
* FIFO doesn't have this information. So here we
* do a separate HAL call to populate that information.
*
* The same problem exists with ts_longretry.
* The FreeBSD HAL corrects ts_longretry in the HAL layer;
* the AR9380 HAL currently doesn't. So until the HAL
* is imported and this can be added, we correct for it
* here.
*/
/* XXX TODO */
/* XXX faked for now. Ew. */
if (ts.ts_finaltsi < 4) {
ts.ts_rate =
bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
switch (ts.ts_finaltsi) {
case 3: ts.ts_longretry +=
bf->bf_state.bfs_rc[2].tries;
case 2: ts.ts_longretry +=
bf->bf_state.bfs_rc[1].tries;
case 1: ts.ts_longretry +=
bf->bf_state.bfs_rc[0].tries;
}
} else {
device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
__func__,
ts.ts_finaltsi);
ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
}
/*
* XXX This is terrible.
*
* Right now, some code uses the TX status that is
* passed in here, but the completion handlers in the
* software TX path also use bf_status.ds_txstat.
* Ew. That should all go away.
*
* XXX It's also possible the rate control completion
* routine is called twice.
*/
memcpy(&bf->bf_status, &ts, sizeof(ts));
ni = bf->bf_node;
/* Update RSSI */
/* XXX duplicate from ath_tx_processq */
if (ni != NULL && ts.ts_status == 0 &&
((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
nacked++;
sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
ts.ts_rssi);
}
/* Handle frame completion and rate control update */
ath_tx_process_buf_completion(sc, txq, &ts, bf);
/* bf is invalid at this point */
/*
* Now that there's space in the FIFO, let's push some
* more frames into it.
*
* Unfortunately for now, the txq has FIFO and non-FIFO
* frames in the same linked list, so there's no way
* to quickly/easily populate frames without walking
* the queue and skipping 'axq_fifo_depth' frames.
*
* So for now, let's only repopulate the FIFO once it
* is empty. It's sucky for performance but it's enough
* to begin validating that things are somewhat
* working.
*/
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_LOCK(txq);
if (dosched && txq->axq_fifo_depth == 0) {
ath_edma_tx_fifo_fill(sc, txq);
}
Overhaul the TXQ locking (again!) as part of some beacon/cabq timing related issues. Moving the TX locking under one lock made things easier to progress on but it had one important side-effect - it increased the latency when handling CABQ setup when sending beacons. This commit introduces a bunch of new changes and a few unrelated changs that are just easier to lump in here. The aim is to have the CABQ locking separate from other locking. The CABQ transmit path in the beacon process thus doesn't have to grab the general TX lock, reducing lock contention/latency and making it more likely that we'll make the beacon TX timing. The second half of this commit is the CABQ related setup changes needed for sane looking EDMA CABQ support. Right now the EDMA TX code naively assumes that only one frame (MPDU or A-MPDU) is being pushed into each FIFO slot. For the CABQ this isn't true - a whole list of frames is being pushed in - and thus CABQ handling breaks very quickly. The aim here is to setup the CABQ list and then push _that list_ to the hardware for transmission. I can then extend the EDMA TX code to stamp that list as being "one" FIFO entry (likely by tagging the last buffer in that list as "FIFO END") so the EDMA TX completion code correctly tracks things. Major: * Migrate the per-TXQ add/removal locking back to per-TXQ, rather than a single lock. * Leave the software queue side of things under the ATH_TX_LOCK lock, (continuing) to serialise things as they are. * Add a new function which is called whenever there's a beacon miss, to print out some debugging. This is primarily designed to help me figure out if the beacon miss events are due to a noisy environment, issues with the PHY/MAC, or other. * Move the CABQ setup/enable to occur _after_ all the VAPs have been looked at. This means that for multiple VAPS in bursted mode, the CABQ gets primed once all VAPs are checked, rather than being primed on the first VAP and then having frames appended after this. Minor: * Add a (disabled) twiddle to let me enable/disable cabq traffic. It's primarily there to let me easily debug what's going on with beacon and CABQ setup/traffic; there's some DMA engine hangs which I'm finally trying to trace down. * Clear bf_next when flushing frames; it should quieten some warnings that show up when a node goes away. Tested: * AR9280, STA/hostap, up to 4 vaps (staggered) * AR5416, STA/hostap, up to 4 vaps (staggered) TODO: * (Lots) more AR9380 and later testing, as I may have missed something here. * Leverage this to fix CABQ hanling for AR9380 and later chips. * Force bursted beaconing on the chips that default to staggered beacons and ensure the CABQ stuff is all sane (eg, the MORE bits that aren't being correctly set when chaining descriptors.)
2013-03-24 00:03:12 +00:00
ATH_TXQ_UNLOCK(txq);
2012-08-11 22:20:28 +00:00
}
sc->sc_wd_timer = 0;
if (idx > 0) {
IF_LOCK(&sc->sc_ifp->if_snd);
sc->sc_ifp->if_drv_flags &= ~IFF_DRV_OACTIVE;
IF_UNLOCK(&sc->sc_ifp->if_snd);
}
/* Kick software scheduler */
/*
* XXX It's inefficient to do this if the FIFO queue is full,
* but there's no easy way right now to only populate
* the txq task for _one_ TXQ. This should be fixed.
*/
if (dosched)
ath_tx_swq_kick(sc);
}
static void
ath_edma_attach_comp_func(struct ath_softc *sc)
{
TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
}
void
ath_xmit_setup_edma(struct ath_softc *sc)
{
/* Fetch EDMA field and buffer sizes */
(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
device_printf(sc->sc_dev, "TX descriptor length: %d\n",
sc->sc_tx_desclen);
device_printf(sc->sc_dev, "TX status length: %d\n",
sc->sc_tx_statuslen);
device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
sc->sc_tx_nmaps);
sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
sc->sc_tx.xmit_drain = ath_edma_tx_drain;
}