diff --git a/sys/dev/ath/if_ath_tx.c b/sys/dev/ath/if_ath_tx.c index e6a31e5d62dd..0a248bedc5a6 100644 --- a/sys/dev/ath/if_ath_tx.c +++ b/sys/dev/ath/if_ath_tx.c @@ -3129,7 +3129,21 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, ATH_TID_INSERT_TAIL(atid, bf, bf_list); /* XXX sched? */ } else if (ath_tx_ampdu_running(sc, an, tid)) { - /* AMPDU running, attempt direct dispatch if possible */ + /* + * AMPDU running, queue single-frame if the hardware queue + * isn't busy. + * + * If the hardware queue is busy, sending an aggregate frame + * then just hold off so we can queue more aggregate frames. + * + * Otherwise we may end up with single frames leaking through + * because we are dispatching them too quickly. + * + * TODO: maybe we should treat this as two policies - minimise + * latency, or maximise throughput. Then for BE/BK we can + * maximise throughput, and VO/VI (if AMPDU is enabled!) + * minimise latency. + */ /* * Always queue the frame to the tail of the list. @@ -3138,18 +3152,18 @@ ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, /* * If the hardware queue isn't busy, direct dispatch - * the head frame in the list. Don't schedule the - * TID - let it build some more frames first? + * the head frame in the list. * - * When running A-MPDU, always just check the hardware - * queue depth against the aggregate frame limit. - * We don't want to burst a large number of single frames - * out to the hardware; we want to aggressively hold back. + * Note: if we're say, configured to do ADDBA but not A-MPDU + * then maybe we want to still queue two non-aggregate frames + * to the hardware. Again with the per-TID policy + * configuration..) * * Otherwise, schedule the TID. */ /* XXX TXQ locking */ - if (txq->axq_depth + txq->fifo.axq_depth < sc->sc_hwq_limit_aggr) { + if (txq->axq_depth + txq->fifo.axq_depth == 0) { + bf = ATH_TID_FIRST(atid); ATH_TID_REMOVE(atid, bf, bf_list); @@ -5615,19 +5629,40 @@ ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq) ATH_TX_LOCK_ASSERT(sc); /* - * Don't schedule if the hardware queue is busy. - * This (hopefully) gives some more time to aggregate - * some packets in the aggregation queue. + * For non-EDMA chips, aggr frames that have been built are + * in axq_aggr_depth, whether they've been scheduled or not. + * There's no FIFO, so txq->axq_depth is what's been scheduled + * to the hardware. * - * XXX It doesn't stop a parallel sender from sneaking - * in transmitting a frame! + * For EDMA chips, we do it in two stages. The existing code + * builds a list of frames to go to the hardware and the EDMA + * code turns it into a single entry to push into the FIFO. + * That way we don't take up one packet per FIFO slot. + * We do push one aggregate per FIFO slot though, just to keep + * things simple. + * + * The FIFO depth is what's in the hardware; the txq->axq_depth + * is what's been scheduled to the FIFO. + * + * fifo.axq_depth is the number of frames (or aggregates) pushed + * into the EDMA FIFO. For multi-frame lists, this is the number + * of frames pushed in. + * axq_fifo_depth is the number of FIFO slots currently busy. */ - /* XXX TXQ locking */ - if (txq->axq_aggr_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_aggr) { + + /* For EDMA and non-EDMA, check built/scheduled against aggr limit */ + if (txq->axq_aggr_depth >= sc->sc_hwq_limit_aggr) { sc->sc_aggr_stats.aggr_sched_nopkt++; return; } - if (txq->axq_depth >= sc->sc_hwq_limit_nonaggr) { + + /* + * For non-EDMA chips, axq_depth is the "what's scheduled to + * the hardware list". For EDMA it's "What's built for the hardware" + * and fifo.axq_depth is how many frames have been dispatched + * already to the hardware. + */ + if (txq->axq_depth + txq->fifo.axq_depth >= sc->sc_hwq_limit_nonaggr) { sc->sc_aggr_stats.aggr_sched_nopkt++; return; }