Push the actual TX processing into the ath taskqueue, rather than having

it run out of multiple concurrent contexts.

Right now the ath(4) TX processing is a bit hairy. Specifically:

* It was running out of ath_start(), which could occur from multiple
  concurrent sending processes (as if_start() can be started from multiple
  sending threads nowdays.. sigh)

* during RX if fast frames are enabled (so not really at the moment, not
  until I fix this particular feature again..)

* during ath_reset() - so anything which calls that

* during ath_tx_proc*() in the ath taskqueue - ie, TX is attempted again
  after TX completion, as there's now hopefully some ath_bufs available.

* Then, the ic_raw_xmit() method can queue raw frames for transmission
  at any time, from any net80211 TX context. Ew.

This has caused packet ordering issues in the past - specifically,
there's absolutely no guarantee that preemption won't occuring _during_
ath_start() by the TX completion processing, which will call ath_start()
again. It's a mess - 802.11 really, really wants things to be in
sequence or things go all kinds of loopy.

So:

* create a new task struct for TX'ing;
* make the if_start method simply queue the task on the ath taskqueue;
* make ath_start() just be called by the new TX task;
* make ath_tx_kick() just schedule the ath TX task, rather than directly
  calling ath_start().

Now yes, this means that I've taken a step backwards in terms of
concurrency - TX -and- RX now occur in the same single-task taskqueue.
But there's nothing stopping me from separating out the TX / TX completion
code into a separate taskqueue which runs in parallel with the RX path,
if that ends up being appropriate for some platforms.

This fixes the CCMP/seqno concurrency issues that creep up when you
transmit large amounts of uni-directional UDP traffic (>200MBit) on a
FreeBSD STA -> AP, as now there's only one TX context no matter what's
going on (TX completion->retry/software queue,
userland->net80211->ath_start(), TX completion -> ath_start());
but it won't fix any concurrency issues between raw transmitted frames
and non-raw transmitted frames (eg EAPOL frames on TID 16 and any other
TID 16 multicast traffic that gets put on the CABQ.)  That is going to
require a bunch more re-architecture before it's feasible to fix.

In any case, this is a big step towards making the majority of the TX
path locking irrelevant, as now almost all TX activity occurs in the
taskqueue.

Phew.
This commit is contained in:
Adrian Chadd 2012-10-14 20:44:08 +00:00
parent 516f67965a
commit 8e7393944d
3 changed files with 36 additions and 15 deletions

View File

@ -142,6 +142,7 @@ static void ath_init(void *);
static void ath_stop_locked(struct ifnet *);
static void ath_stop(struct ifnet *);
static int ath_reset_vap(struct ieee80211vap *, u_long);
static void ath_start_queue(struct ifnet *ifp);
static int ath_media_change(struct ifnet *);
static void ath_watchdog(void *);
static int ath_ioctl(struct ifnet *, u_long, caddr_t);
@ -420,6 +421,7 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
TASK_INIT(&sc->sc_txqtask,0, ath_txq_sched_tasklet, sc);
TASK_INIT(&sc->sc_fataltask,0, ath_fatal_proc, sc);
TASK_INIT(&sc->sc_txsndtask, 0, ath_start_task, sc);
/*
* Allocate hardware transmit queues: one queue for
@ -531,7 +533,7 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
ifp->if_softc = sc;
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
ifp->if_start = ath_start;
ifp->if_start = ath_start_queue;
ifp->if_ioctl = ath_ioctl;
ifp->if_init = ath_init;
IFQ_SET_MAXLEN(&ifp->if_snd, ifqmaxlen);
@ -2246,7 +2248,7 @@ ath_reset(struct ifnet *ifp, ATH_RESET_TYPE reset_type)
* XXX should this be done by the caller, rather than
* ath_reset() ?
*/
ath_start(ifp); /* restart xmit */
ath_tx_kick(sc); /* restart xmit */
return 0;
}
@ -2428,17 +2430,19 @@ ath_getbuf(struct ath_softc *sc, ath_buf_type_t btype)
return bf;
}
void
ath_start(struct ifnet *ifp)
static void
ath_start_queue(struct ifnet *ifp)
{
struct ath_softc *sc = ifp->if_softc;
struct ieee80211_node *ni;
struct ath_buf *bf;
struct mbuf *m, *next;
ath_bufhead frags;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid)
return;
ath_tx_kick(sc);
}
void
ath_start_task(void *arg, int npending)
{
struct ath_softc *sc = (struct ath_softc *) arg;
struct ifnet *ifp = sc->sc_ifp;
/* XXX is it ok to hold the ATH_LOCK here? */
ATH_PCU_LOCK(sc);
@ -2455,6 +2459,25 @@ ath_start(struct ifnet *ifp)
sc->sc_txstart_cnt++;
ATH_PCU_UNLOCK(sc);
ath_start(sc->sc_ifp);
ATH_PCU_LOCK(sc);
sc->sc_txstart_cnt--;
ATH_PCU_UNLOCK(sc);
}
void
ath_start(struct ifnet *ifp)
{
struct ath_softc *sc = ifp->if_softc;
struct ieee80211_node *ni;
struct ath_buf *bf;
struct mbuf *m, *next;
ath_bufhead frags;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid)
return;
for (;;) {
ATH_TXBUF_LOCK(sc);
if (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree) {
@ -2549,10 +2572,6 @@ ath_start(struct ifnet *ifp)
sc->sc_wd_timer = 5;
}
ATH_PCU_LOCK(sc);
sc->sc_txstart_cnt--;
ATH_PCU_UNLOCK(sc);
}
static int

View File

@ -115,12 +115,13 @@ extern int ath_stoptxdma(struct ath_softc *sc);
* we can kill this.
*/
extern void ath_start(struct ifnet *ifp);
extern void ath_start_task(void *arg, int npending);
static inline void
ath_tx_kick(struct ath_softc *sc)
{
ath_start(sc->sc_ifp);
taskqueue_enqueue(sc->sc_tq, &sc->sc_txsndtask);
}
#endif

View File

@ -632,6 +632,7 @@ struct ath_softc {
struct ath_txq *sc_ac2q[5]; /* WME AC -> h/w q map */
struct task sc_txtask; /* tx int processing */
struct task sc_txqtask; /* tx proc processing */
struct task sc_txsndtask; /* tx send processing */
struct ath_descdma sc_txcompdma; /* TX EDMA completion */
struct mtx sc_txcomplock; /* TX EDMA completion lock */