I give up - introduce a TX lock to serialise TX operations.

I've tried serialising TX using queues and such but unfortunately
due to how this interacts with the locking going on elsewhere in the
networking stack, the TX task gets delayed, resulting in quite a
noticable throughput loss:

* baseline TCP for 2x2 11n HT40 is ~ 170mbit/sec;
* TCP for TX task in the ath taskq, with the RX also going on - 80mbit/sec;
* TCP for TX task in a separate, second taskq - 100mbit/sec.

So for now I'm going with the Linux wireless stack approach - lock tx
early.  The linux code does in the wireless stack, before the 802.11
state stuff happens and before it's punted to the driver.
But TX locking needs to also occur at the driver layer as the TX
completion code _also_ begins to drain the ifnet TX queue.

Whilst I'm here, add some KTR traces for the TX path.

Note:

* This really should be done at the net80211 layer (as well, at least.)
  But that'll have to wait for a little more thought to happen.
This commit is contained in:
adrian 2012-10-31 06:27:58 +00:00
parent 62321d8c39
commit 2ef1c4bedf
6 changed files with 51 additions and 3 deletions

View File

@ -424,7 +424,6 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc); TASK_INIT(&sc->sc_resettask,0, ath_reset_proc, sc);
TASK_INIT(&sc->sc_txqtask,0, ath_txq_sched_tasklet, sc); TASK_INIT(&sc->sc_txqtask,0, ath_txq_sched_tasklet, sc);
TASK_INIT(&sc->sc_fataltask,0, ath_fatal_proc, sc); TASK_INIT(&sc->sc_fataltask,0, ath_fatal_proc, sc);
TASK_INIT(&sc->sc_txsndtask, 0, ath_start_task, sc);
/* /*
* Allocate hardware transmit queues: one queue for * Allocate hardware transmit queues: one queue for
@ -2447,7 +2446,9 @@ ath_start_queue(struct ifnet *ifp)
{ {
struct ath_softc *sc = ifp->if_softc; struct ath_softc *sc = ifp->if_softc;
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_queue: start");
ath_tx_kick(sc); ath_tx_kick(sc);
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_queue: finished");
} }
void void
@ -2456,6 +2457,8 @@ ath_start_task(void *arg, int npending)
struct ath_softc *sc = (struct ath_softc *) arg; struct ath_softc *sc = (struct ath_softc *) arg;
struct ifnet *ifp = sc->sc_ifp; struct ifnet *ifp = sc->sc_ifp;
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: start");
/* XXX is it ok to hold the ATH_LOCK here? */ /* XXX is it ok to hold the ATH_LOCK here? */
ATH_PCU_LOCK(sc); ATH_PCU_LOCK(sc);
if (sc->sc_inreset_cnt > 0) { if (sc->sc_inreset_cnt > 0) {
@ -2466,6 +2469,7 @@ ath_start_task(void *arg, int npending)
sc->sc_stats.ast_tx_qstop++; sc->sc_stats.ast_tx_qstop++;
ifp->if_drv_flags |= IFF_DRV_OACTIVE; ifp->if_drv_flags |= IFF_DRV_OACTIVE;
IF_UNLOCK(&ifp->if_snd); IF_UNLOCK(&ifp->if_snd);
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: OACTIVE, finish");
return; return;
} }
sc->sc_txstart_cnt++; sc->sc_txstart_cnt++;
@ -2476,6 +2480,7 @@ ath_start_task(void *arg, int npending)
ATH_PCU_LOCK(sc); ATH_PCU_LOCK(sc);
sc->sc_txstart_cnt--; sc->sc_txstart_cnt--;
ATH_PCU_UNLOCK(sc); ATH_PCU_UNLOCK(sc);
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start_task: finished");
} }
void void
@ -2486,10 +2491,13 @@ ath_start(struct ifnet *ifp)
struct ath_buf *bf; struct ath_buf *bf;
struct mbuf *m, *next; struct mbuf *m, *next;
ath_bufhead frags; ath_bufhead frags;
int npkts = 0;
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid)
return; return;
ATH_KTR(sc, ATH_KTR_TX, 0, "ath_start: called");
for (;;) { for (;;) {
ATH_TXBUF_LOCK(sc); ATH_TXBUF_LOCK(sc);
if (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree) { if (sc->sc_txbuf_cnt <= sc->sc_txq_data_minfree) {
@ -2517,6 +2525,7 @@ ath_start(struct ifnet *ifp)
break; break;
} }
ni = (struct ieee80211_node *) m->m_pkthdr.rcvif; ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
npkts ++;
/* /*
* Check for fragmentation. If this frame * Check for fragmentation. If this frame
* has been broken up verify we have enough * has been broken up verify we have enough
@ -2590,6 +2599,7 @@ ath_start(struct ifnet *ifp)
sc->sc_wd_timer = 5; sc->sc_wd_timer = 5;
} }
ATH_KTR(sc, ATH_KTR_TX, 1, "ath_start: finished; npkts=%d", npkts);
} }
static int static int

View File

@ -194,6 +194,7 @@ ath_ahb_attach(device_t dev)
ATH_LOCK_INIT(sc); ATH_LOCK_INIT(sc);
ATH_PCU_LOCK_INIT(sc); ATH_PCU_LOCK_INIT(sc);
ATH_RX_LOCK_INIT(sc); ATH_RX_LOCK_INIT(sc);
ATH_TX_LOCK_INIT(sc);
ATH_TXSTATUS_LOCK_INIT(sc); ATH_TXSTATUS_LOCK_INIT(sc);
error = ath_attach(AR9130_DEVID, sc); error = ath_attach(AR9130_DEVID, sc);
@ -202,6 +203,7 @@ ath_ahb_attach(device_t dev)
ATH_TXSTATUS_LOCK_DESTROY(sc); ATH_TXSTATUS_LOCK_DESTROY(sc);
ATH_RX_LOCK_DESTROY(sc); ATH_RX_LOCK_DESTROY(sc);
ATH_TX_LOCK_DESTROY(sc);
ATH_PCU_LOCK_DESTROY(sc); ATH_PCU_LOCK_DESTROY(sc);
ATH_LOCK_DESTROY(sc); ATH_LOCK_DESTROY(sc);
bus_dma_tag_destroy(sc->sc_dmat); bus_dma_tag_destroy(sc->sc_dmat);
@ -244,6 +246,7 @@ ath_ahb_detach(device_t dev)
ATH_TXSTATUS_LOCK_DESTROY(sc); ATH_TXSTATUS_LOCK_DESTROY(sc);
ATH_RX_LOCK_DESTROY(sc); ATH_RX_LOCK_DESTROY(sc);
ATH_TX_LOCK_DESTROY(sc);
ATH_PCU_LOCK_DESTROY(sc); ATH_PCU_LOCK_DESTROY(sc);
ATH_LOCK_DESTROY(sc); ATH_LOCK_DESTROY(sc);

View File

@ -124,7 +124,9 @@ static inline void
ath_tx_kick(struct ath_softc *sc) ath_tx_kick(struct ath_softc *sc)
{ {
taskqueue_enqueue(sc->sc_tq, &sc->sc_txsndtask); ATH_TX_LOCK(sc);
ath_start(sc->sc_ifp);
ATH_TX_UNLOCK(sc);
} }
#endif #endif

View File

@ -250,6 +250,7 @@ ath_pci_attach(device_t dev)
ATH_LOCK_INIT(sc); ATH_LOCK_INIT(sc);
ATH_PCU_LOCK_INIT(sc); ATH_PCU_LOCK_INIT(sc);
ATH_RX_LOCK_INIT(sc); ATH_RX_LOCK_INIT(sc);
ATH_TX_LOCK_INIT(sc);
ATH_TXSTATUS_LOCK_INIT(sc); ATH_TXSTATUS_LOCK_INIT(sc);
error = ath_attach(pci_get_device(dev), sc); error = ath_attach(pci_get_device(dev), sc);
@ -259,6 +260,7 @@ ath_pci_attach(device_t dev)
ATH_TXSTATUS_LOCK_DESTROY(sc); ATH_TXSTATUS_LOCK_DESTROY(sc);
ATH_PCU_LOCK_DESTROY(sc); ATH_PCU_LOCK_DESTROY(sc);
ATH_RX_LOCK_DESTROY(sc); ATH_RX_LOCK_DESTROY(sc);
ATH_TX_LOCK_DESTROY(sc);
ATH_LOCK_DESTROY(sc); ATH_LOCK_DESTROY(sc);
bus_dma_tag_destroy(sc->sc_dmat); bus_dma_tag_destroy(sc->sc_dmat);
bad3: bad3:
@ -300,6 +302,7 @@ ath_pci_detach(device_t dev)
ATH_TXSTATUS_LOCK_DESTROY(sc); ATH_TXSTATUS_LOCK_DESTROY(sc);
ATH_PCU_LOCK_DESTROY(sc); ATH_PCU_LOCK_DESTROY(sc);
ATH_RX_LOCK_DESTROY(sc); ATH_RX_LOCK_DESTROY(sc);
ATH_TX_LOCK_DESTROY(sc);
ATH_LOCK_DESTROY(sc); ATH_LOCK_DESTROY(sc);
return (0); return (0);

View File

@ -2153,6 +2153,8 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
sc->sc_txstart_cnt++; sc->sc_txstart_cnt++;
ATH_PCU_UNLOCK(sc); ATH_PCU_UNLOCK(sc);
ATH_TX_LOCK(sc);
if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) { if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__, DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
(ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ? (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
@ -2230,6 +2232,8 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
sc->sc_txstart_cnt--; sc->sc_txstart_cnt--;
ATH_PCU_UNLOCK(sc); ATH_PCU_UNLOCK(sc);
ATH_TX_UNLOCK(sc);
return 0; return 0;
bad2: bad2:
ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: bad2: m=%p, params=%p, " ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: bad2: m=%p, params=%p, "
@ -2241,6 +2245,9 @@ ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
ath_returnbuf_head(sc, bf); ath_returnbuf_head(sc, bf);
ATH_TXBUF_UNLOCK(sc); ATH_TXBUF_UNLOCK(sc);
bad: bad:
ATH_TX_UNLOCK(sc);
ATH_PCU_LOCK(sc); ATH_PCU_LOCK(sc);
sc->sc_txstart_cnt--; sc->sc_txstart_cnt--;
ATH_PCU_UNLOCK(sc); ATH_PCU_UNLOCK(sc);

View File

@ -531,6 +531,8 @@ struct ath_softc {
char sc_pcu_mtx_name[32]; char sc_pcu_mtx_name[32];
struct mtx sc_rx_mtx; /* RX access mutex */ struct mtx sc_rx_mtx; /* RX access mutex */
char sc_rx_mtx_name[32]; char sc_rx_mtx_name[32];
struct mtx sc_tx_mtx; /* TX access mutex */
char sc_tx_mtx_name[32];
struct taskqueue *sc_tq; /* private task queue */ struct taskqueue *sc_tq; /* private task queue */
struct ath_hal *sc_ah; /* Atheros HAL */ struct ath_hal *sc_ah; /* Atheros HAL */
struct ath_ratectrl *sc_rc; /* tx rate control support */ struct ath_ratectrl *sc_rc; /* tx rate control support */
@ -663,7 +665,6 @@ struct ath_softc {
struct ath_txq *sc_ac2q[5]; /* WME AC -> h/w q map */ struct ath_txq *sc_ac2q[5]; /* WME AC -> h/w q map */
struct task sc_txtask; /* tx int processing */ struct task sc_txtask; /* tx int processing */
struct task sc_txqtask; /* tx proc processing */ struct task sc_txqtask; /* tx proc processing */
struct task sc_txsndtask; /* tx send processing */
struct ath_descdma sc_txcompdma; /* TX EDMA completion */ struct ath_descdma sc_txcompdma; /* TX EDMA completion */
struct mtx sc_txcomplock; /* TX EDMA completion lock */ struct mtx sc_txcomplock; /* TX EDMA completion lock */
@ -778,6 +779,28 @@ struct ath_softc {
#define ATH_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED) #define ATH_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_OWNED)
#define ATH_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED) #define ATH_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_mtx, MA_NOTOWNED)
/*
* The TX lock is non-reentrant and serialises the TX send operations.
* (ath_start(), ath_raw_xmit().) It doesn't yet serialise the TX
* completion operations; thus it can't be used (yet!) to protect
* hardware / software TXQ operations.
*/
#define ATH_TX_LOCK_INIT(_sc) do {\
snprintf((_sc)->sc_tx_mtx_name, \
sizeof((_sc)->sc_tx_mtx_name), \
"%s TX lock", \
device_get_nameunit((_sc)->sc_dev)); \
mtx_init(&(_sc)->sc_tx_mtx, (_sc)->sc_tx_mtx_name, \
NULL, MTX_DEF); \
} while (0)
#define ATH_TX_LOCK_DESTROY(_sc) mtx_destroy(&(_sc)->sc_tx_mtx)
#define ATH_TX_LOCK(_sc) mtx_lock(&(_sc)->sc_tx_mtx)
#define ATH_TX_UNLOCK(_sc) mtx_unlock(&(_sc)->sc_tx_mtx)
#define ATH_TX_LOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_tx_mtx, \
MA_OWNED)
#define ATH_TX_UNLOCK_ASSERT(_sc) mtx_assert(&(_sc)->sc_tx_mtx, \
MA_NOTOWNED)
/* /*
* The PCU lock is non-recursive and should be treated as a spinlock. * The PCU lock is non-recursive and should be treated as a spinlock.
* Although currently the interrupt code is run in netisr context and * Although currently the interrupt code is run in netisr context and