From e81d909274198da222c9efb11ccbbc651ebee09e Mon Sep 17 00:00:00 2001 From: Adrian Chadd Date: Sat, 13 Jun 2020 23:35:22 +0000 Subject: [PATCH] [net80211] Handle offloaded AMSDU in AMPDU reordering. In the 11n world, most NICs did A-MPDU receive/transmit offloading but not A-MSDU offloading. So, the net80211 A-MPDU receive path would just receive MPDUs, do the reordering bit, pass it up to the rest of net80211 for crypto decap and then do A-MSDU decap before throwing ethernet frames up to the rest of the system. However 11ac and 11ax NICs are increasingly doing A-MSDU offload (and newer 11ax stuff does socket offload, but hey I don't want to scare people JUST yet) - so although A-MPDU reordering may be done in the OS, A-MSDUs look like a normal MPDU. This means that all the MSDUs are actually faked into a set of MPDUs with matching 802.11 header - the sequence number, QoS header and any encryption verification bits (like IV) are just copied. This shows up as MASSIVE packet loss in net80211, cause after the first MPDU we just toss the rest. (And don't get me started about ethernet decap with A-MPDU host reordering; we'll have to cross that bridge for later 11ac and 11ax bits too.) Anyway, this work changes each A-MPDU reorder slot into an mbufq. The mbufq is treated as a whole set of frames to pass up to the stack and reordered/de-duped as a group. The last frame in the reorder list is checked to see if it's an A-MSDU final frame so any duplicates are correctly tossed rather than double-received. Other than that, the rest of the logic is unchanged. The previous commit did a small subset of this - if there wasn't any reordering going on then it'd accept the A-MSDUs. This is the rest of the needed work. This is a no-op for 11n NICs doing A-MPDU reordering but needing software A-MSDU decap - they aren't tagged as A-MSDU and so any subsequent frames added to the reorder slot are tossed. Tested: * QCA9880 (ath10k/athp) - STA/AP mode; * RT3593 (if_rsu) - 11n STA+DWDS mode (I'm committing through it rn); * QCA9380 (if_ath) - STA/AP mode. --- sys/net80211/ieee80211_ht.c | 285 ++++++++++++++++++++++++++++-------- sys/net80211/ieee80211_ht.h | 4 +- 2 files changed, 226 insertions(+), 63 deletions(-) diff --git a/sys/net80211/ieee80211_ht.c b/sys/net80211/ieee80211_ht.c index e818cddea907..1827f012f4e0 100644 --- a/sys/net80211/ieee80211_ht.c +++ b/sys/net80211/ieee80211_ht.c @@ -517,6 +517,22 @@ ieee80211_decap_amsdu(struct ieee80211_node *ni, struct mbuf *m) return m; /* last delivered by caller */ } +static void +ampdu_rx_purge_slot(struct ieee80211_rx_ampdu *rap, int i) +{ + struct mbuf *m; + + /* Walk the queue, removing frames as appropriate */ + while (mbufq_len(&rap->rxa_mq[i]) != 0) { + m = mbufq_dequeue(&rap->rxa_mq[i]); + if (m == NULL) + break; + rap->rxa_qbytes -= m->m_pkthdr.len; + rap->rxa_qframes--; + m_freem(m); + } +} + /* * Add the given frame to the current RX reorder slot. * @@ -528,16 +544,94 @@ static int ampdu_rx_add_slot(struct ieee80211_rx_ampdu *rap, int off, int tid, ieee80211_seq rxseq, struct ieee80211_node *ni, - struct mbuf *m) + struct mbuf *m, + const struct ieee80211_rx_stats *rxs) { + const struct ieee80211_rx_stats *rxs_final = NULL; struct ieee80211vap *vap = ni->ni_vap; + int toss_dup; +#define PROCESS 0 /* caller should process frame */ +#define CONSUMED 1 /* frame consumed, caller does nothing */ - if (rap->rxa_m[off] == NULL) { - rap->rxa_m[off] = m; + /* + * Figure out if this is a duplicate frame for the given slot. + * + * We're assuming that the driver will hand us all the frames + * for a given AMSDU decap pass and if we get /a/ frame + * for an AMSDU decap then we'll get all of them. + * + * The tricksy bit is that we don't know when the /end/ of + * the decap pass is, because we aren't tracking state here + * per-slot to know that we've finished receiving the frame list. + * + * The driver sets RX_F_AMSDU and RX_F_AMSDU_MORE to tell us + * what's going on; so ideally we'd just check the frame at the + * end of the reassembly slot to see if its F_AMSDU w/ no F_AMSDU_MORE - + * that means we've received the whole AMSDU decap pass. + */ + + /* + * Get the rxs of the final mbuf in the slot, if one exists. + */ + if (mbufq_len(&rap->rxa_mq[off]) != 0) { + rxs_final = ieee80211_get_rx_params_ptr(mbufq_last(&rap->rxa_mq[off])); + } + + /* Default to tossing the duplicate frame */ + toss_dup = 1; + + /* + * Check to see if the final frame has F_AMSDU and F_AMSDU set, AND + * this frame has F_AMSDU set (MORE or otherwise.) That's a sign + * that more can come. + */ + + if ((rxs != NULL) && (rxs_final != NULL) && + ieee80211_check_rxseq_amsdu(rxs) && + ieee80211_check_rxseq_amsdu(rxs_final)) { + if (! ieee80211_check_rxseq_amsdu_more(rxs_final)) { + /* + * amsdu_more() returning 0 means "it's not the + * final frame" so we can append more + * frames here. + */ + toss_dup = 0; + } + } + + /* + * If the list is empty OR we have determined we can put more + * driver decap'ed AMSDU frames in here, then insert. + */ + if ((mbufq_len(&rap->rxa_mq[off]) == 0) || (toss_dup == 0)) { + if (mbufq_enqueue(&rap->rxa_mq[off], m) != 0) { + IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, + ni->ni_macaddr, + "a-mpdu queue fail", + "seqno %u tid %u BA win <%u:%u> off=%d, qlen=%d, maxqlen=%d", + rxseq, tid, rap->rxa_start, + IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1), + off, + mbufq_len(&rap->rxa_mq[off]), + rap->rxa_mq[off].mq_maxlen); + /* XXX error count */ + m_freem(m); + return CONSUMED; + } rap->rxa_qframes++; rap->rxa_qbytes += m->m_pkthdr.len; vap->iv_stats.is_ampdu_rx_reorder++; - return (0); + /* + * Statistics for AMSDU decap. + */ + if (rxs != NULL && ieee80211_check_rxseq_amsdu(rxs)) { + if (ieee80211_check_rxseq_amsdu_more(rxs)) { + /* more=1, AMSDU, end of batch */ + IEEE80211_NODE_STAT(ni, rx_amsdu_more_end); + } else { + IEEE80211_NODE_STAT(ni, rx_amsdu_more); + } + } } else { IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, @@ -545,26 +639,27 @@ ampdu_rx_add_slot(struct ieee80211_rx_ampdu *rap, int off, int tid, "seqno %u tid %u BA win <%u:%u>", rxseq, tid, rap->rxa_start, IEEE80211_SEQ_ADD(rap->rxa_start, rap->rxa_wnd-1)); + if (rxs != NULL) { + IEEE80211_DISCARD_MAC(vap, + IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, + ni->ni_macaddr, "a-mpdu duplicate", + "seqno %d tid %u pktflags 0x%08x\n", + rxseq, tid, rxs->c_pktflags); + } + if (rxs_final != NULL) { + IEEE80211_DISCARD_MAC(vap, + IEEE80211_MSG_INPUT | IEEE80211_MSG_11N, + ni->ni_macaddr, "a-mpdu duplicate", + "final: pktflags 0x%08x\n", + rxs_final->c_pktflags); + } vap->iv_stats.is_rx_dup++; IEEE80211_NODE_STAT(ni, rx_dup); m_freem(m); - return (-1); } -} - -static void -ampdu_rx_purge_slot(struct ieee80211_rx_ampdu *rap, int i) -{ - struct mbuf *m; - - m = rap->rxa_m[i]; - if (m == NULL) - return; - - rap->rxa_m[i] = NULL; - rap->rxa_qbytes -= m->m_pkthdr.len; - rap->rxa_qframes--; - m_freem(m); + return CONSUMED; +#undef CONSUMED +#undef PROCESS } /* @@ -585,6 +680,18 @@ ampdu_rx_purge(struct ieee80211_rx_ampdu *rap) rap->rxa_qbytes, rap->rxa_qframes)); } +static void +ieee80211_ampdu_rx_init_rap(struct ieee80211_node *ni, + struct ieee80211_rx_ampdu *rap) +{ + int i; + + /* XXX TODO: ensure the queues are empty */ + memset(rap, 0, sizeof(*rap)); + for (i = 0; i < IEEE80211_AGGR_BAWMAX; i++) + mbufq_init(&rap->rxa_mq[i], 256); +} + /* * Start A-MPDU rx/re-order processing for the specified TID. */ @@ -602,7 +709,7 @@ ampdu_rx_start(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap, */ ampdu_rx_purge(rap); } - memset(rap, 0, sizeof(*rap)); + ieee80211_ampdu_rx_init_rap(ni, rap); rap->rxa_wnd = (bufsiz == 0) ? IEEE80211_AGGR_BAWMAX : min(bufsiz, IEEE80211_AGGR_BAWMAX); rap->rxa_start = MS(baseqctl, IEEE80211_BASEQ_START); @@ -638,7 +745,8 @@ ieee80211_ampdu_rx_start_ext(struct ieee80211_node *ni, int tid, int seq, int ba ampdu_rx_purge(rap); } - memset(rap, 0, sizeof(*rap)); + ieee80211_ampdu_rx_init_rap(ni, rap); + rap->rxa_wnd = (baw== 0) ? IEEE80211_AGGR_BAWMAX : min(baw, IEEE80211_AGGR_BAWMAX); if (seq == -1) { @@ -708,18 +816,20 @@ ampdu_dispatch_slot(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni, int i) { struct mbuf *m; + int n = 0; - if (rap->rxa_m[i] == NULL) - return (0); + while (mbufq_len(&rap->rxa_mq[i]) != 0) { + m = mbufq_dequeue(&rap->rxa_mq[i]); + if (m == NULL) + break; + n++; - m = rap->rxa_m[i]; - rap->rxa_m[i] = NULL; - rap->rxa_qbytes -= m->m_pkthdr.len; - rap->rxa_qframes--; + rap->rxa_qbytes -= m->m_pkthdr.len; + rap->rxa_qframes--; - ampdu_dispatch(ni, m); - - return (1); + ampdu_dispatch(ni, m); + } + return (n); } static void @@ -728,22 +838,22 @@ ampdu_rx_moveup(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni, { struct ieee80211vap *vap = ni->ni_vap; + /* + * If frames remain, copy the mbuf pointers down so + * they correspond to the offsets in the new window. + */ if (rap->rxa_qframes != 0) { int n = rap->rxa_qframes, j; - - if (winstart != -1) { - /* - * NB: in window-sliding mode, loop assumes i > 0 - * and/or rxa_m[0] is NULL - */ - KASSERT(rap->rxa_m[0] == NULL, - ("%s: BA window slot 0 occupied", __func__)); - } for (j = i+1; j < rap->rxa_wnd; j++) { - if (rap->rxa_m[j] != NULL) { - rap->rxa_m[j-i] = rap->rxa_m[j]; - rap->rxa_m[j] = NULL; - if (--n == 0) + /* + * Concat the list contents over, which will + * blank the source list for us. + */ + if (mbufq_len(&rap->rxa_mq[j]) != 0) { + n = n - mbufq_len(&rap->rxa_mq[j]); + mbufq_concat(&rap->rxa_mq[j-i], &rap->rxa_mq[j]); + KASSERT(n >= 0, ("%s: n < 0 (%d)", __func__, n)); + if (n == 0) break; } } @@ -768,18 +878,18 @@ static void ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) { struct ieee80211vap *vap = ni->ni_vap; - int i; + int i, r, r2; /* flush run of frames */ + r2 = 0; for (i = 1; i < rap->rxa_wnd; i++) { - if (ampdu_dispatch_slot(rap, ni, i) == 0) + r = ampdu_dispatch_slot(rap, ni, i); + if (r == 0) break; + r2 += r; } - /* - * If frames remain, copy the mbuf pointers down so - * they correspond to the offsets in the new window. - */ + /* move up frames */ ampdu_rx_moveup(rap, ni, i, -1); /* @@ -787,7 +897,14 @@ ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) * reflect the frames just dispatched. */ rap->rxa_start = IEEE80211_SEQ_ADD(rap->rxa_start, i); - vap->iv_stats.is_ampdu_rx_oor += i; + vap->iv_stats.is_ampdu_rx_oor += r2; + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: moved slot up %d slots to start at %d (%d frames)", + __func__, + i, + rap->rxa_start, + r2); } /* @@ -796,14 +913,20 @@ ampdu_rx_dispatch(struct ieee80211_rx_ampdu *rap, struct ieee80211_node *ni) static void ampdu_rx_flush(struct ieee80211_node *ni, struct ieee80211_rx_ampdu *rap) { - struct ieee80211vap *vap = ni->ni_vap; int i, r; for (i = 0; i < rap->rxa_wnd; i++) { r = ampdu_dispatch_slot(rap, ni, i); if (r == 0) continue; - vap->iv_stats.is_ampdu_rx_oor += r; + ni->ni_vap->iv_stats.is_ampdu_rx_oor += r; + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: moved slot up %d slots to start at %d (%d frames)", + __func__, + 1, + rap->rxa_start, + r); if (rap->rxa_qframes == 0) break; @@ -832,14 +955,23 @@ ampdu_rx_flush_upto(struct ieee80211_node *ni, */ seqno = rap->rxa_start; for (i = 0; i < rap->rxa_wnd; i++) { - r = ampdu_dispatch_slot(rap, ni, i); - if (r == 0) { + if ((r = mbufq_len(&rap->rxa_mq[i])) != 0) { + (void) ampdu_dispatch_slot(rap, ni, i); + } else { if (!IEEE80211_SEQ_BA_BEFORE(seqno, winstart)) break; } vap->iv_stats.is_ampdu_rx_oor += r; seqno = IEEE80211_SEQ_INC(seqno); + + IEEE80211_NOTE(ni->ni_vap, IEEE80211_MSG_11N, ni, + "%s: moved slot up %d slots to start at %d (%d frames)", + __func__, + 1, + seqno, + r); } + /* * If frames remain, copy the mbuf pointers down so * they correspond to the offsets in the new window. @@ -862,6 +994,10 @@ ampdu_rx_flush_upto(struct ieee80211_node *ni, * this frame completes a run, flush any pending frames. We * return 1 if the frame is consumed. A 0 is returned if * the frame should be processed normally by the caller. + * + * A-MSDU: handle hardware decap'ed A-MSDU frames that are + * pretending to be MPDU's. They're dispatched directly if + * able; or attempted to put into the receive reordering slot. */ int ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, @@ -875,7 +1011,8 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, ieee80211_seq rxseq; uint8_t tid; int off; - int amsdu_more = ieee80211_check_rxseq_amsdu_more(rxs); + int amsdu = ieee80211_check_rxseq_amsdu(rxs); + int amsdu_end = ieee80211_check_rxseq_amsdu_more(rxs); KASSERT((m->m_flags & (M_AMPDU | M_AMPDU_MPDU)) == M_AMPDU, ("!a-mpdu or already re-ordered, flags 0x%x", m->m_flags)); @@ -944,7 +1081,7 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, /* * Dispatch as many packets as we can. */ - KASSERT(rap->rxa_m[0] == NULL, ("unexpected dup")); + KASSERT((mbufq_len(&rap->rxa_mq[0]) == 0), ("unexpected dup")); ampdu_dispatch(ni, m); ampdu_rx_dispatch(rap, ni); return CONSUMED; @@ -953,8 +1090,16 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, * In order; advance window if needed and notify * caller to dispatch directly. */ - if (amsdu_more) + if (amsdu) { + if (amsdu_end) { + rap->rxa_start = IEEE80211_SEQ_INC(rxseq); + IEEE80211_NODE_STAT(ni, rx_amsdu_more_end); + } else { + IEEE80211_NODE_STAT(ni, rx_amsdu_more); + } + } else { rap->rxa_start = IEEE80211_SEQ_INC(rxseq); + } return PROCESS; } } @@ -998,8 +1143,24 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, rap->rxa_qframes; ampdu_rx_flush(ni, rap); } - if (amsdu_more) - rap->rxa_start = IEEE80211_SEQ_INC(rxseq); + /* + * Advance the window if needed and notify + * the caller to dispatch directly. + */ + if (amsdu) { + if (amsdu_end) { + rap->rxa_start = + IEEE80211_SEQ_INC(rxseq); + IEEE80211_NODE_STAT(ni, + rx_amsdu_more_end); + } else { + IEEE80211_NODE_STAT(ni, + rx_amsdu_more); + } + } else { + rap->rxa_start = + IEEE80211_SEQ_INC(rxseq); + } return PROCESS; } } else { @@ -1010,8 +1171,7 @@ ieee80211_ampdu_reorder(struct ieee80211_node *ni, struct mbuf *m, } /* save packet - this consumes, no matter what */ - ampdu_rx_add_slot(rap, off, tid, rxseq, ni, m); - + ampdu_rx_add_slot(rap, off, tid, rxseq, ni, m, rxs); return CONSUMED; } if (off < IEEE80211_SEQ_BA_RANGE) { @@ -1175,6 +1335,7 @@ ieee80211_ht_node_init(struct ieee80211_node *ni) tap->txa_ni = ni; ieee80211_txampdu_init_pps(tap); /* NB: further initialization deferred */ + ieee80211_ampdu_rx_init_rap(ni, &ni->ni_rx_ampdu[tid]); } ni->ni_flags |= IEEE80211_NODE_HT | IEEE80211_NODE_AMPDU | IEEE80211_NODE_AMSDU; diff --git a/sys/net80211/ieee80211_ht.h b/sys/net80211/ieee80211_ht.h index 05fb433ad4a1..c867d3a50513 100644 --- a/sys/net80211/ieee80211_ht.h +++ b/sys/net80211/ieee80211_ht.h @@ -33,6 +33,8 @@ * 802.11n protocol implementation definitions. */ +#include + #define IEEE80211_AGGR_BAWMAX 64 /* max block ack window size */ /* threshold for aging overlapping non-HT bss */ #define IEEE80211_NONHT_PRESENT_AGE msecs_to_ticks(60*1000) @@ -169,7 +171,7 @@ struct ieee80211_rx_ampdu { uint16_t rxa_wnd; /* BA window size */ int rxa_age; /* age of oldest frame in window */ int rxa_nframes; /* frames since ADDBA */ - struct mbuf *rxa_m[IEEE80211_AGGR_BAWMAX]; + struct mbufq rxa_mq[IEEE80211_AGGR_BAWMAX]; void *rxa_private; uint64_t rxa_pad[3]; };