Implement a separate hardware queue threshold for aggregate and non-aggr
traffic. When transmitting non-aggregate traffic, we need to keep the hardware busy whilst transmitting or small bursts in txdone/tx latency will kill us. This restores non-aggregate iperf performance, especially when doing TDMA. Tested: * AR5416<->AR5416, TDMA * AR5416 STA <-> AR9280 AP
This commit is contained in:
parent
c283b0b0ae
commit
e1aea355e7
@ -839,7 +839,8 @@ ath_attach(u_int16_t devid, struct ath_softc *sc)
|
||||
/*
|
||||
* Initial aggregation settings.
|
||||
*/
|
||||
sc->sc_hwq_limit = ATH_AGGR_MIN_QDEPTH;
|
||||
sc->sc_hwq_limit_aggr = ATH_AGGR_MIN_QDEPTH;
|
||||
sc->sc_hwq_limit_nonaggr = ATH_NONAGGR_MIN_QDEPTH;
|
||||
sc->sc_tid_hwq_lo = ATH_AGGR_SCHED_LOW;
|
||||
sc->sc_tid_hwq_hi = ATH_AGGR_SCHED_HIGH;
|
||||
sc->sc_aggr_limit = ATH_AGGR_MAXSIZE;
|
||||
|
@ -722,8 +722,11 @@ ath_sysctlattach(struct ath_softc *sc)
|
||||
"mask of error frames to pass when monitoring");
|
||||
|
||||
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
|
||||
"hwq_limit", CTLFLAG_RW, &sc->sc_hwq_limit, 0,
|
||||
"Hardware queue depth before software-queuing TX frames");
|
||||
"hwq_limit_nonaggr", CTLFLAG_RW, &sc->sc_hwq_limit_nonaggr, 0,
|
||||
"Hardware non-AMPDU queue depth before software-queuing TX frames");
|
||||
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
|
||||
"hwq_limit_aggr", CTLFLAG_RW, &sc->sc_hwq_limit_aggr, 0,
|
||||
"Hardware AMPDU queue depth before software-queuing TX frames");
|
||||
SYSCTL_ADD_INT(ctx, SYSCTL_CHILDREN(tree), OID_AUTO,
|
||||
"tid_hwq_lo", CTLFLAG_RW, &sc->sc_tid_hwq_lo, 0,
|
||||
"");
|
||||
|
@ -3108,9 +3108,15 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
|
||||
* the head frame in the list. Don't schedule the
|
||||
* TID - let it build some more frames first?
|
||||
*
|
||||
* When running A-MPDU, always just check the hardware
|
||||
* queue depth against the aggregate frame limit.
|
||||
* We don't want to burst a large number of single frames
|
||||
* out to the hardware; we want to aggressively hold back.
|
||||
*
|
||||
* Otherwise, schedule the TID.
|
||||
*/
|
||||
if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
|
||||
/* XXX TXQ locking */
|
||||
if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) {
|
||||
bf = ATH_TID_FIRST(atid);
|
||||
ATH_TID_REMOVE(atid, bf, bf_list);
|
||||
|
||||
@ -3134,7 +3140,22 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni,
|
||||
|
||||
ath_tx_tid_sched(sc, atid);
|
||||
}
|
||||
} else if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit) {
|
||||
/*
|
||||
* If we're not doing A-MPDU, be prepared to direct dispatch
|
||||
* up to both limits if possible. This particular corner
|
||||
* case may end up with packet starvation between aggregate
|
||||
* traffic and non-aggregate traffic: we wnat to ensure
|
||||
* that non-aggregate stations get a few frames queued to the
|
||||
* hardware before the aggregate station(s) get their chance.
|
||||
*
|
||||
* So if you only ever see a couple of frames direct dispatched
|
||||
* to the hardware from a non-AMPDU client, check both here
|
||||
* and in the software queue dispatcher to ensure that those
|
||||
* non-AMPDU stations get a fair chance to transmit.
|
||||
*/
|
||||
/* XXX TXQ locking */
|
||||
} else if ((txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_nonaggr) &&
|
||||
(txq->axq_aggr_depth < sc->sc_hwq_limit_aggr)) {
|
||||
/* AMPDU not running, attempt direct dispatch */
|
||||
DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
|
||||
/* See if clrdmask needs to be set */
|
||||
@ -5339,7 +5360,8 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
|
||||
*
|
||||
* XXX locking on txq here?
|
||||
*/
|
||||
if (txq->axq_aggr_depth >= sc->sc_hwq_limit ||
|
||||
/* XXX TXQ locking */
|
||||
if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr ||
|
||||
(status == ATH_AGGR_BAW_CLOSED ||
|
||||
status == ATH_AGGR_LEAK_CLOSED))
|
||||
break;
|
||||
@ -5348,6 +5370,15 @@ ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
|
||||
|
||||
/*
|
||||
* Schedule some packets from the given node/TID to the hardware.
|
||||
*
|
||||
* XXX TODO: this routine doesn't enforce the maximum TXQ depth.
|
||||
* It just dumps frames into the TXQ. We should limit how deep
|
||||
* the transmit queue can grow for frames dispatched to the given
|
||||
* TXQ.
|
||||
*
|
||||
* To avoid locking issues, either we need to own the TXQ lock
|
||||
* at this point, or we need to pass in the maximum frame count
|
||||
* from the caller.
|
||||
*/
|
||||
void
|
||||
ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an,
|
||||
@ -5452,8 +5483,16 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
|
||||
* Don't schedule if the hardware queue is busy.
|
||||
* This (hopefully) gives some more time to aggregate
|
||||
* some packets in the aggregation queue.
|
||||
*
|
||||
* XXX It doesn't stop a parallel sender from sneaking
|
||||
* in transmitting a frame!
|
||||
*/
|
||||
if (txq->axq_aggr_depth >= sc->sc_hwq_limit) {
|
||||
/* XXX TXQ locking */
|
||||
if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
|
||||
sc->sc_aggr_stats.aggr_sched_nopkt++;
|
||||
return;
|
||||
}
|
||||
if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
|
||||
sc->sc_aggr_stats.aggr_sched_nopkt++;
|
||||
return;
|
||||
}
|
||||
@ -5489,7 +5528,11 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
|
||||
* packets. If we aren't running aggregation then
|
||||
* we should still limit the hardware queue depth.
|
||||
*/
|
||||
if (txq->axq_depth >= sc->sc_hwq_limit) {
|
||||
/* XXX TXQ locking */
|
||||
if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) {
|
||||
break;
|
||||
}
|
||||
if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) {
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -47,6 +47,7 @@
|
||||
* How 'busy' to try and keep the hardware txq
|
||||
*/
|
||||
#define ATH_AGGR_MIN_QDEPTH 2
|
||||
#define ATH_NONAGGR_MIN_QDEPTH 32
|
||||
|
||||
/*
|
||||
* Watermark for scheduling TIDs in order to maximise aggregation.
|
||||
|
@ -814,16 +814,21 @@ struct ath_softc {
|
||||
int sc_txq_node_psq_maxdepth;
|
||||
|
||||
/*
|
||||
* Aggregation twiddles
|
||||
* Software queue twiddles
|
||||
*
|
||||
* hwq_limit: how busy to keep the hardware queue - don't schedule
|
||||
* further packets to the hardware, regardless of the TID
|
||||
* hwq_limit_nonaggr:
|
||||
* when to begin limiting non-aggregate frames to the
|
||||
* hardware queue, regardless of the TID.
|
||||
* hwq_limit_aggr:
|
||||
* when to begin limiting A-MPDU frames to the
|
||||
* hardware queue, regardless of the TID.
|
||||
* tid_hwq_lo: how low the per-TID hwq count has to be before the
|
||||
* TID will be scheduled again
|
||||
* tid_hwq_hi: how many frames to queue to the HWQ before the TID
|
||||
* stops being scheduled.
|
||||
*/
|
||||
int sc_hwq_limit;
|
||||
int sc_hwq_limit_nonaggr;
|
||||
int sc_hwq_limit_aggr;
|
||||
int sc_tid_hwq_lo;
|
||||
int sc_tid_hwq_hi;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user