freebsd-dev/sys/net80211/ieee80211_superg.c

1054 lines
29 KiB
C
Raw Normal View History

/*-
* Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_wlan.h"
#ifdef IEEE80211_SUPPORT_SUPERG
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/kernel.h>
#include <sys/endian.h>
#include <sys/socket.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_llc.h>
#include <net/if_media.h>
#include <net/bpf.h>
#include <net/ethernet.h>
#include <net80211/ieee80211_var.h>
#include <net80211/ieee80211_input.h>
#include <net80211/ieee80211_phy.h>
#include <net80211/ieee80211_superg.h>
/*
* Atheros fast-frame encapsulation format.
* FF max payload:
* 802.2 + FFHDR + HPAD + 802.3 + 802.2 + 1500 + SPAD + 802.3 + 802.2 + 1500:
* 8 + 4 + 4 + 14 + 8 + 1500 + 6 + 14 + 8 + 1500
* = 3066
*/
/* fast frame header is 32-bits */
#define ATH_FF_PROTO 0x0000003f /* protocol */
#define ATH_FF_PROTO_S 0
#define ATH_FF_FTYPE 0x000000c0 /* frame type */
#define ATH_FF_FTYPE_S 6
#define ATH_FF_HLEN32 0x00000300 /* optional hdr length */
#define ATH_FF_HLEN32_S 8
#define ATH_FF_SEQNUM 0x001ffc00 /* sequence number */
#define ATH_FF_SEQNUM_S 10
#define ATH_FF_OFFSET 0xffe00000 /* offset to 2nd payload */
#define ATH_FF_OFFSET_S 21
#define ATH_FF_MAX_HDR_PAD 4
#define ATH_FF_MAX_SEP_PAD 6
#define ATH_FF_MAX_HDR 30
#define ATH_FF_PROTO_L2TUNNEL 0 /* L2 tunnel protocol */
#define ATH_FF_ETH_TYPE 0x88bd /* Ether type for encapsulated frames */
#define ATH_FF_SNAP_ORGCODE_0 0x00
#define ATH_FF_SNAP_ORGCODE_1 0x03
#define ATH_FF_SNAP_ORGCODE_2 0x7f
#define ATH_FF_TXQMIN 2 /* min txq depth for staging */
#define ATH_FF_TXQMAX 50 /* maximum # of queued frames allowed */
#define ATH_FF_STAGEMAX 5 /* max waiting period for staged frame*/
#define ETHER_HEADER_COPY(dst, src) \
memcpy(dst, src, sizeof(struct ether_header))
static int ieee80211_ffppsmin = 2; /* pps threshold for ff aggregation */
SYSCTL_INT(_net_wlan, OID_AUTO, ffppsmin, CTLFLAG_RW,
&ieee80211_ffppsmin, 0, "min packet rate before fast-frame staging");
static int ieee80211_ffagemax = -1; /* max time frames held on stage q */
SYSCTL_PROC(_net_wlan, OID_AUTO, ffagemax, CTLTYPE_INT | CTLFLAG_RW,
&ieee80211_ffagemax, 0, ieee80211_sysctl_msecs_ticks, "I",
"max hold time for fast-frame staging (ms)");
void
ieee80211_superg_attach(struct ieee80211com *ic)
{
struct ieee80211_superg *sg;
IEEE80211_FF_LOCK_INIT(ic, ic->ic_name);
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
sg = (struct ieee80211_superg *) IEEE80211_MALLOC(
sizeof(struct ieee80211_superg), M_80211_VAP,
IEEE80211_M_NOWAIT | IEEE80211_M_ZERO);
if (sg == NULL) {
printf("%s: cannot allocate SuperG state block\n",
__func__);
return;
}
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
ic->ic_superg = sg;
/*
* Default to not being so aggressive for FF/AMSDU
* aging, otherwise we may hold a frame around
* for way too long before we expire it out.
*/
ieee80211_ffagemax = msecs_to_ticks(2);
}
void
ieee80211_superg_detach(struct ieee80211com *ic)
{
IEEE80211_FF_LOCK_DESTROY(ic);
if (ic->ic_superg != NULL) {
IEEE80211_FREE(ic->ic_superg, M_80211_VAP);
ic->ic_superg = NULL;
}
}
void
ieee80211_superg_vattach(struct ieee80211vap *vap)
{
struct ieee80211com *ic = vap->iv_ic;
if (ic->ic_superg == NULL) /* NB: can't do fast-frames w/o state */
vap->iv_caps &= ~IEEE80211_C_FF;
if (vap->iv_caps & IEEE80211_C_FF)
vap->iv_flags |= IEEE80211_F_FF;
/* NB: we only implement sta mode */
if (vap->iv_opmode == IEEE80211_M_STA &&
(vap->iv_caps & IEEE80211_C_TURBOP))
vap->iv_flags |= IEEE80211_F_TURBOP;
}
void
ieee80211_superg_vdetach(struct ieee80211vap *vap)
{
}
#define ATH_OUI_BYTES 0x00, 0x03, 0x7f
/*
* Add a WME information element to a frame.
*/
uint8_t *
ieee80211_add_ath(uint8_t *frm, uint8_t caps, ieee80211_keyix defkeyix)
{
static const struct ieee80211_ath_ie info = {
.ath_id = IEEE80211_ELEMID_VENDOR,
.ath_len = sizeof(struct ieee80211_ath_ie) - 2,
.ath_oui = { ATH_OUI_BYTES },
.ath_oui_type = ATH_OUI_TYPE,
.ath_oui_subtype= ATH_OUI_SUBTYPE,
.ath_version = ATH_OUI_VERSION,
};
struct ieee80211_ath_ie *ath = (struct ieee80211_ath_ie *) frm;
memcpy(frm, &info, sizeof(info));
ath->ath_capability = caps;
if (defkeyix != IEEE80211_KEYIX_NONE) {
ath->ath_defkeyix[0] = (defkeyix & 0xff);
ath->ath_defkeyix[1] = ((defkeyix >> 8) & 0xff);
} else {
ath->ath_defkeyix[0] = 0xff;
ath->ath_defkeyix[1] = 0x7f;
}
return frm + sizeof(info);
}
#undef ATH_OUI_BYTES
uint8_t *
ieee80211_add_athcaps(uint8_t *frm, const struct ieee80211_node *bss)
{
const struct ieee80211vap *vap = bss->ni_vap;
return ieee80211_add_ath(frm,
vap->iv_flags & IEEE80211_F_ATHEROS,
((vap->iv_flags & IEEE80211_F_WPA) == 0 &&
bss->ni_authmode != IEEE80211_AUTH_8021X) ?
vap->iv_def_txkey : IEEE80211_KEYIX_NONE);
}
void
ieee80211_parse_ath(struct ieee80211_node *ni, uint8_t *ie)
{
const struct ieee80211_ath_ie *ath =
(const struct ieee80211_ath_ie *) ie;
ni->ni_ath_flags = ath->ath_capability;
ni->ni_ath_defkeyix = le16dec(&ath->ath_defkeyix);
}
int
ieee80211_parse_athparams(struct ieee80211_node *ni, uint8_t *frm,
const struct ieee80211_frame *wh)
{
struct ieee80211vap *vap = ni->ni_vap;
const struct ieee80211_ath_ie *ath;
u_int len = frm[1];
int capschanged;
uint16_t defkeyix;
if (len < sizeof(struct ieee80211_ath_ie)-2) {
IEEE80211_DISCARD_IE(vap,
IEEE80211_MSG_ELEMID | IEEE80211_MSG_SUPERG,
wh, "Atheros", "too short, len %u", len);
return -1;
}
ath = (const struct ieee80211_ath_ie *)frm;
capschanged = (ni->ni_ath_flags != ath->ath_capability);
defkeyix = le16dec(ath->ath_defkeyix);
if (capschanged || defkeyix != ni->ni_ath_defkeyix) {
ni->ni_ath_flags = ath->ath_capability;
ni->ni_ath_defkeyix = defkeyix;
IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni,
"ath ie change: new caps 0x%x defkeyix 0x%x",
ni->ni_ath_flags, ni->ni_ath_defkeyix);
}
if (IEEE80211_ATH_CAP(vap, ni, ATHEROS_CAP_TURBO_PRIME)) {
uint16_t curflags, newflags;
/*
* Check for turbo mode switch. Calculate flags
* for the new mode and effect the switch.
*/
newflags = curflags = vap->iv_ic->ic_bsschan->ic_flags;
/* NB: BOOST is not in ic_flags, so get it from the ie */
if (ath->ath_capability & ATHEROS_CAP_BOOST)
newflags |= IEEE80211_CHAN_TURBO;
else
newflags &= ~IEEE80211_CHAN_TURBO;
if (newflags != curflags)
ieee80211_dturbo_switch(vap, newflags);
}
return capschanged;
}
/*
* Decap the encapsulated frame pair and dispatch the first
* for delivery. The second frame is returned for delivery
* via the normal path.
*/
struct mbuf *
ieee80211_ff_decap(struct ieee80211_node *ni, struct mbuf *m)
{
#define FF_LLC_SIZE (sizeof(struct ether_header) + sizeof(struct llc))
#define MS(x,f) (((x) & f) >> f##_S)
struct ieee80211vap *vap = ni->ni_vap;
struct llc *llc;
uint32_t ath;
struct mbuf *n;
int framelen;
/* NB: we assume caller does this check for us */
KASSERT(IEEE80211_ATH_CAP(vap, ni, IEEE80211_NODE_FF),
("ff not negotiated"));
/*
* Check for fast-frame tunnel encapsulation.
*/
if (m->m_pkthdr.len < 3*FF_LLC_SIZE)
return m;
if (m->m_len < FF_LLC_SIZE &&
(m = m_pullup(m, FF_LLC_SIZE)) == NULL) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
ni->ni_macaddr, "fast-frame",
"%s", "m_pullup(llc) failed");
vap->iv_stats.is_rx_tooshort++;
return NULL;
}
llc = (struct llc *)(mtod(m, uint8_t *) +
sizeof(struct ether_header));
if (llc->llc_snap.ether_type != htons(ATH_FF_ETH_TYPE))
return m;
m_adj(m, FF_LLC_SIZE);
m_copydata(m, 0, sizeof(uint32_t), (caddr_t) &ath);
if (MS(ath, ATH_FF_PROTO) != ATH_FF_PROTO_L2TUNNEL) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
ni->ni_macaddr, "fast-frame",
"unsupport tunnel protocol, header 0x%x", ath);
vap->iv_stats.is_ff_badhdr++;
m_freem(m);
return NULL;
}
/* NB: skip header and alignment padding */
m_adj(m, roundup(sizeof(uint32_t) - 2, 4) + 2);
vap->iv_stats.is_ff_decap++;
/*
* Decap the first frame, bust it apart from the
* second and deliver; then decap the second frame
* and return it to the caller for normal delivery.
*/
m = ieee80211_decap1(m, &framelen);
if (m == NULL) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
ni->ni_macaddr, "fast-frame", "%s", "first decap failed");
vap->iv_stats.is_ff_tooshort++;
return NULL;
}
n = m_split(m, framelen, M_NOWAIT);
if (n == NULL) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
ni->ni_macaddr, "fast-frame",
"%s", "unable to split encapsulated frames");
vap->iv_stats.is_ff_split++;
m_freem(m); /* NB: must reclaim */
return NULL;
}
/* XXX not right for WDS */
vap->iv_deliver_data(vap, ni, m); /* 1st of pair */
/*
* Decap second frame.
*/
m_adj(n, roundup2(framelen, 4) - framelen); /* padding */
n = ieee80211_decap1(n, &framelen);
if (n == NULL) {
IEEE80211_DISCARD_MAC(vap, IEEE80211_MSG_ANY,
ni->ni_macaddr, "fast-frame", "%s", "second decap failed");
vap->iv_stats.is_ff_tooshort++;
}
/* XXX verify framelen against mbuf contents */
return n; /* 2nd delivered by caller */
#undef MS
#undef FF_LLC_SIZE
}
/*
* Fast frame encapsulation. There must be two packets
* chained with m_nextpkt. We do header adjustment for
* each, add the tunnel encapsulation, and then concatenate
* the mbuf chains to form a single frame for transmission.
*/
struct mbuf *
ieee80211_ff_encap(struct ieee80211vap *vap, struct mbuf *m1, int hdrspace,
struct ieee80211_key *key)
{
struct mbuf *m2;
struct ether_header eh1, eh2;
struct llc *llc;
struct mbuf *m;
int pad;
m2 = m1->m_nextpkt;
if (m2 == NULL) {
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: only one frame\n", __func__);
goto bad;
}
m1->m_nextpkt = NULL;
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
/*
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
* Adjust to include 802.11 header requirement.
*/
KASSERT(m1->m_len >= sizeof(eh1), ("no ethernet header!"));
ETHER_HEADER_COPY(&eh1, mtod(m1, caddr_t));
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
m1 = ieee80211_mbuf_adjust(vap, hdrspace, key, m1);
if (m1 == NULL) {
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
printf("%s: failed initial mbuf_adjust\n", __func__);
/* NB: ieee80211_mbuf_adjust handles msgs+statistics */
m_freem(m2);
goto bad;
}
/*
* Copy second frame's Ethernet header out of line
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
* and adjust for possible padding in case there isn't room
* at the end of first frame.
*/
KASSERT(m2->m_len >= sizeof(eh2), ("no ethernet header!"));
ETHER_HEADER_COPY(&eh2, mtod(m2, caddr_t));
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
m2 = ieee80211_mbuf_adjust(vap, 4, NULL, m2);
if (m2 == NULL) {
/* NB: ieee80211_mbuf_adjust handles msgs+statistics */
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
printf("%s: failed second \n", __func__);
goto bad;
}
/*
* Now do tunnel encapsulation. First, each
* frame gets a standard encapsulation.
*/
m1 = ieee80211_ff_encap1(vap, m1, &eh1);
if (m1 == NULL)
goto bad;
m2 = ieee80211_ff_encap1(vap, m2, &eh2);
if (m2 == NULL)
goto bad;
/*
* Pad leading frame to a 4-byte boundary. If there
* is space at the end of the first frame, put it
* there; otherwise prepend to the front of the second
* frame. We know doing the second will always work
* because we reserve space above. We prefer appending
* as this typically has better DMA alignment properties.
*/
for (m = m1; m->m_next != NULL; m = m->m_next)
;
pad = roundup2(m1->m_pkthdr.len, 4) - m1->m_pkthdr.len;
if (pad) {
if (M_TRAILINGSPACE(m) < pad) { /* prepend to second */
m2->m_data -= pad;
m2->m_len += pad;
m2->m_pkthdr.len += pad;
} else { /* append to first */
m->m_len += pad;
m1->m_pkthdr.len += pad;
}
}
/*
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
* A-MSDU's are just appended; the "I'm A-MSDU!" bit is in the
* QoS header.
*
* XXX optimize by prepending together
*/
m->m_next = m2; /* NB: last mbuf from above */
m1->m_pkthdr.len += m2->m_pkthdr.len;
M_PREPEND(m1, sizeof(uint32_t)+2, M_NOWAIT);
if (m1 == NULL) { /* XXX cannot happen */
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: no space for tunnel header\n", __func__);
vap->iv_stats.is_tx_nobuf++;
return NULL;
}
memset(mtod(m1, void *), 0, sizeof(uint32_t)+2);
M_PREPEND(m1, sizeof(struct llc), M_NOWAIT);
if (m1 == NULL) { /* XXX cannot happen */
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: no space for llc header\n", __func__);
vap->iv_stats.is_tx_nobuf++;
return NULL;
}
llc = mtod(m1, struct llc *);
llc->llc_dsap = llc->llc_ssap = LLC_SNAP_LSAP;
llc->llc_control = LLC_UI;
llc->llc_snap.org_code[0] = ATH_FF_SNAP_ORGCODE_0;
llc->llc_snap.org_code[1] = ATH_FF_SNAP_ORGCODE_1;
llc->llc_snap.org_code[2] = ATH_FF_SNAP_ORGCODE_2;
llc->llc_snap.ether_type = htons(ATH_FF_ETH_TYPE);
vap->iv_stats.is_ff_encap++;
return m1;
bad:
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
vap->iv_stats.is_ff_encapfail++;
if (m1 != NULL)
m_freem(m1);
if (m2 != NULL)
m_freem(m2);
return NULL;
}
/*
* A-MSDU encapsulation.
*
* This assumes just two frames for now, since we're borrowing the
* same queuing code and infrastructure as fast-frames.
*
* There must be two packets chained with m_nextpkt.
* We do header adjustment for each, and then concatenate the mbuf chains
* to form a single frame for transmission.
*/
struct mbuf *
ieee80211_amsdu_encap(struct ieee80211vap *vap, struct mbuf *m1, int hdrspace,
struct ieee80211_key *key)
{
struct mbuf *m2;
struct ether_header eh1, eh2;
struct mbuf *m;
int pad;
m2 = m1->m_nextpkt;
if (m2 == NULL) {
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: only one frame\n", __func__);
goto bad;
}
m1->m_nextpkt = NULL;
/*
* Include A-MSDU header in adjusting header layout.
*/
KASSERT(m1->m_len >= sizeof(eh1), ("no ethernet header!"));
ETHER_HEADER_COPY(&eh1, mtod(m1, caddr_t));
m1 = ieee80211_mbuf_adjust(vap,
hdrspace + sizeof(struct llc) + sizeof(uint32_t) +
sizeof(struct ether_header),
key, m1);
if (m1 == NULL) {
/* NB: ieee80211_mbuf_adjust handles msgs+statistics */
m_freem(m2);
goto bad;
}
/*
* Copy second frame's Ethernet header out of line
* and adjust for encapsulation headers. Note that
* we make room for padding in case there isn't room
* at the end of first frame.
*/
KASSERT(m2->m_len >= sizeof(eh2), ("no ethernet header!"));
ETHER_HEADER_COPY(&eh2, mtod(m2, caddr_t));
m2 = ieee80211_mbuf_adjust(vap, 4, NULL, m2);
if (m2 == NULL) {
/* NB: ieee80211_mbuf_adjust handles msgs+statistics */
goto bad;
}
/*
* Now do tunnel encapsulation. First, each
* frame gets a standard encapsulation.
*/
m1 = ieee80211_ff_encap1(vap, m1, &eh1);
if (m1 == NULL)
goto bad;
m2 = ieee80211_ff_encap1(vap, m2, &eh2);
if (m2 == NULL)
goto bad;
/*
* Pad leading frame to a 4-byte boundary. If there
* is space at the end of the first frame, put it
* there; otherwise prepend to the front of the second
* frame. We know doing the second will always work
* because we reserve space above. We prefer appending
* as this typically has better DMA alignment properties.
*/
for (m = m1; m->m_next != NULL; m = m->m_next)
;
pad = roundup2(m1->m_pkthdr.len, 4) - m1->m_pkthdr.len;
if (pad) {
if (M_TRAILINGSPACE(m) < pad) { /* prepend to second */
m2->m_data -= pad;
m2->m_len += pad;
m2->m_pkthdr.len += pad;
} else { /* append to first */
m->m_len += pad;
m1->m_pkthdr.len += pad;
}
}
/*
* Now, stick 'em together.
*/
m->m_next = m2; /* NB: last mbuf from above */
m1->m_pkthdr.len += m2->m_pkthdr.len;
vap->iv_stats.is_amsdu_encap++;
return m1;
bad:
vap->iv_stats.is_amsdu_encapfail++;
if (m1 != NULL)
m_freem(m1);
if (m2 != NULL)
m_freem(m2);
return NULL;
}
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
static void
ff_transmit(struct ieee80211_node *ni, struct mbuf *m)
{
struct ieee80211vap *vap = ni->ni_vap;
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
struct ieee80211com *ic = ni->ni_ic;
IEEE80211_TX_LOCK_ASSERT(ic);
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
/* encap and xmit */
m = ieee80211_encap(vap, ni, m);
if (m != NULL)
(void) ieee80211_parent_xmitpkt(ic, m);
else
ieee80211_free_node(ni);
}
/*
* Flush frames to device; note we re-use the linked list
* the frames were stored on and use the sentinel (unchanged)
* which may be non-NULL.
*/
static void
ff_flush(struct mbuf *head, struct mbuf *last)
{
struct mbuf *m, *next;
struct ieee80211_node *ni;
struct ieee80211vap *vap;
for (m = head; m != last; m = next) {
next = m->m_nextpkt;
m->m_nextpkt = NULL;
ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
vap = ni->ni_vap;
IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni,
"%s: flush frame, age %u", __func__, M_AGE_GET(m));
vap->iv_stats.is_ff_flush++;
ff_transmit(ni, m);
}
}
/*
* Age frames on the staging queue.
*/
void
ieee80211_ff_age(struct ieee80211com *ic, struct ieee80211_stageq *sq,
int quanta)
{
struct mbuf *m, *head;
struct ieee80211_node *ni;
IEEE80211_FF_LOCK(ic);
if (sq->depth == 0) {
IEEE80211_FF_UNLOCK(ic);
return; /* nothing to do */
}
KASSERT(sq->head != NULL, ("stageq empty"));
head = sq->head;
while ((m = sq->head) != NULL && M_AGE_GET(m) < quanta) {
int tid = WME_AC_TO_TID(M_WME_GETAC(m));
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
/* clear staging ref to frame */
ni = (struct ieee80211_node *) m->m_pkthdr.rcvif;
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
KASSERT(ni->ni_tx_superg[tid] == m, ("staging queue empty"));
ni->ni_tx_superg[tid] = NULL;
sq->head = m->m_nextpkt;
sq->depth--;
}
if (m == NULL)
sq->tail = NULL;
else
M_AGE_SUB(m, quanta);
IEEE80211_FF_UNLOCK(ic);
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
IEEE80211_TX_LOCK(ic);
ff_flush(head, m);
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
IEEE80211_TX_UNLOCK(ic);
}
static void
stageq_add(struct ieee80211com *ic, struct ieee80211_stageq *sq, struct mbuf *m)
{
int age = ieee80211_ffagemax;
IEEE80211_FF_LOCK_ASSERT(ic);
if (sq->tail != NULL) {
sq->tail->m_nextpkt = m;
age -= M_AGE_GET(sq->head);
} else
sq->head = m;
KASSERT(age >= 0, ("age %d", age));
M_AGE_SET(m, age);
m->m_nextpkt = NULL;
sq->tail = m;
sq->depth++;
}
static void
stageq_remove(struct ieee80211com *ic, struct ieee80211_stageq *sq, struct mbuf *mstaged)
{
struct mbuf *m, *mprev;
IEEE80211_FF_LOCK_ASSERT(ic);
mprev = NULL;
for (m = sq->head; m != NULL; m = m->m_nextpkt) {
if (m == mstaged) {
if (mprev == NULL)
sq->head = m->m_nextpkt;
else
mprev->m_nextpkt = m->m_nextpkt;
if (sq->tail == m)
sq->tail = mprev;
sq->depth--;
return;
}
mprev = m;
}
printf("%s: packet not found\n", __func__);
}
static uint32_t
ff_approx_txtime(struct ieee80211_node *ni,
const struct mbuf *m1, const struct mbuf *m2)
{
struct ieee80211com *ic = ni->ni_ic;
struct ieee80211vap *vap = ni->ni_vap;
uint32_t framelen;
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
uint32_t frame_time;
/*
* Approximate the frame length to be transmitted. A swag to add
* the following maximal values to the skb payload:
* - 32: 802.11 encap + CRC
* - 24: encryption overhead (if wep bit)
* - 4 + 6: fast-frame header and padding
* - 16: 2 LLC FF tunnel headers
* - 14: 1 802.3 FF tunnel header (mbuf already accounts for 2nd)
*/
framelen = m1->m_pkthdr.len + 32 +
ATH_FF_MAX_HDR_PAD + ATH_FF_MAX_SEP_PAD + ATH_FF_MAX_HDR;
if (vap->iv_flags & IEEE80211_F_PRIVACY)
framelen += 24;
if (m2 != NULL)
framelen += m2->m_pkthdr.len;
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
/*
* For now, we assume non-shortgi, 20MHz, just because I want to
* at least test 802.11n.
*/
if (ni->ni_txrate & IEEE80211_RATE_MCS)
frame_time = ieee80211_compute_duration_ht(framelen,
ni->ni_txrate,
IEEE80211_HT_RC_2_STREAMS(ni->ni_txrate),
0, /* isht40 */
0); /* isshortgi */
else
frame_time = ieee80211_compute_duration(ic->ic_rt, framelen,
ni->ni_txrate, 0);
return (frame_time);
}
/*
* Check if the supplied frame can be partnered with an existing
* or pending frame. Return a reference to any frame that should be
* sent on return; otherwise return NULL.
*/
struct mbuf *
ieee80211_ff_check(struct ieee80211_node *ni, struct mbuf *m)
{
struct ieee80211vap *vap = ni->ni_vap;
struct ieee80211com *ic = ni->ni_ic;
struct ieee80211_superg *sg = ic->ic_superg;
const int pri = M_WME_GETAC(m);
struct ieee80211_stageq *sq;
struct ieee80211_tx_ampdu *tap;
struct mbuf *mstaged;
uint32_t txtime, limit;
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
IEEE80211_TX_UNLOCK_ASSERT(ic);
IEEE80211_LOCK(ic);
limit = IEEE80211_TXOP_TO_US(
ic->ic_wme.wme_chanParams.cap_wmeParams[pri].wmep_txopLimit);
IEEE80211_UNLOCK(ic);
/*
* Check if the supplied frame can be aggregated.
*
* NB: we allow EAPOL frames to be aggregated with other ucast traffic.
* Do 802.1x EAPOL frames proceed in the clear? Then they couldn't
* be aggregated with other types of frames when encryption is on?
*/
IEEE80211_FF_LOCK(ic);
tap = &ni->ni_tx_ampdu[WME_AC_TO_TID(pri)];
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
mstaged = ni->ni_tx_superg[WME_AC_TO_TID(pri)];
/* XXX NOTE: reusing packet counter state from A-MPDU */
/*
* XXX NOTE: this means we're double-counting; it should just
* be done in ieee80211_output.c once for both superg and A-MPDU.
*/
ieee80211_txampdu_count_packet(tap);
/*
* When not in station mode never aggregate a multicast
* frame; this insures, for example, that a combined frame
* does not require multiple encryption keys.
*/
if (vap->iv_opmode != IEEE80211_M_STA &&
ETHER_IS_MULTICAST(mtod(m, struct ether_header *)->ether_dhost)) {
/* XXX flush staged frame? */
IEEE80211_FF_UNLOCK(ic);
return m;
}
/*
* If there is no frame to combine with and the pps is
* too low; then do not attempt to aggregate this frame.
*/
if (mstaged == NULL &&
ieee80211_txampdu_getpps(tap) < ieee80211_ffppsmin) {
IEEE80211_FF_UNLOCK(ic);
return m;
}
sq = &sg->ff_stageq[pri];
/*
* Check the txop limit to insure the aggregate fits.
*/
if (limit != 0 &&
(txtime = ff_approx_txtime(ni, m, mstaged)) > limit) {
/*
* Aggregate too long, return to the caller for direct
* transmission. In addition, flush any pending frame
* before sending this one.
*/
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: txtime %u exceeds txop limit %u\n",
__func__, txtime, limit);
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
ni->ni_tx_superg[WME_AC_TO_TID(pri)] = NULL;
if (mstaged != NULL)
stageq_remove(ic, sq, mstaged);
IEEE80211_FF_UNLOCK(ic);
if (mstaged != NULL) {
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
IEEE80211_TX_LOCK(ic);
IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni,
"%s: flush staged frame", __func__);
/* encap and xmit */
ff_transmit(ni, mstaged);
Bring over my initial work from the net80211 TX locking branch. This patchset implements a new TX lock, covering both the per-VAP (and thus per-node) TX locking and the serialisation through to the underlying physical device. This implements the hard requirement that frames to the underlying physical device are scheduled to the underlying device in the same order that they are processed at the VAP layer. This includes adding extra encapsulation state (such as sequence numbers and CCMP IV numbers.) Any order mismatch here will result in dropped packets at the receiver. There are multiple transmit contexts from the upper protocol layers as well as the "raw" interface via the management and BPF transmit paths. All of these need to be correctly serialised or bad behaviour will result under load. The specifics: * add a new TX IC lock - it will eventually just be used for serialisation to the underlying physical device but for now it's used for both the VAP encapsulation/serialisation and the physical device dispatch. This lock is specifically non-recursive. * Methodize the parent transmit, vap transmit and ic_raw_xmit function pointers; use lock assertions in the parent/vap transmit routines. * Add a lock assertion in ieee80211_encap() - the TX lock must be held here to guarantee sensible behaviour. * Refactor out the packet sending code from ieee80211_start() - now ieee80211_start() is just a loop over the ifnet queue and it dispatches each VAP packet send through ieee80211_start_pkt(). Yes, I will likely rename ieee80211_start_pkt() to something that better reflects its status as a VAP packet transmit path. More on that later. * Add locking around the management and BAR TX sending - to ensure that encapsulation and TX are done hand-in-hand. * Add locking in the mesh code - again, to ensure that encapsulation and mesh transmit are done hand-in-hand. * Add locking around the power save queue and ageq handling, when dispatching to the parent interface. * Add locking around the WDS handoff. * Add a note in the mesh dispatch code that the TX path needs to be re-thought-out - right now it's doing a direct parent device transmit rather than going via the vap layer. It may "work", but it's likely incorrect (as it bypasses any possible per-node power save and aggregation handling.) Why not a per-VAP or per-node lock? Because in order to ensure per-VAP ordering, we'd have to hold the VAP lock across parent->if_transmit(). There are a few problems with this: * There's some state being setup during each driver transmit - specifically, the encryption encap / CCMP IV setup. That should eventually be dragged back into the encapsulation phase but for now it lives in the driver TX path. This should be locked. * Two drivers (ath, iwn) re-use the node->ni_txseqs array in order to allocate sequence numbers when doing transmit aggregation. This should also be locked. * Drivers may have multiple frames queued already - so when one calls if_transmit(), it may end up dispatching multiple frames for different VAPs/nodes, each needing a different lock when handling that particular end destination. So to be "correct" locking-wise, we'd end up needing to grab a VAP or node lock inside the driver TX path when setting up crypto / AMPDU sequence numbers, and we may already _have_ a TX lock held - mostly for the same destination vap/node, but sometimes it'll be for others. That could lead to LORs and thus deadlocks. So for now, I'm sticking with an IC TX lock. It has the advantage of papering over the above and it also has the added advantage that I can assert that it's being held when doing a parent device transmit. I'll look at splitting the locks out a bit more later on. General outstanding net80211 TX path issues / TODO: * Look into separating out the VAP serialisation and the IC handoff. It's going to be tricky as parent->if_transmit() doesn't give me the opportunity to split queuing from driver dispatch. See above. * Work with monthadar to fix up the mesh transmit path so it doesn't go via the parent interface when retransmitting frames. * Push the encryption handling back into the driver, if it's at all architectually sane to do so. I know it's possible - it's what mac80211 in Linux does. * Make ieee80211_raw_xmit() queue a frame into VAP or parent queue rather than doing a short-cut direct into the driver. There are QoS issues here - you do want your management frames to be encapsulated and pushed onto the stack sooner than the (large, bursty) amount of data frames that are queued. But there has to be a saner way to do this. * Fragments are still broken - drivers need to be upgraded to an if_transmit() implementation and then fragmentation handling needs to be properly fixed. Tested: * STA - AR5416, AR9280, Intel 5300 abgn wifi * Hostap - AR5416, AR9160, AR9280 * Mesh - some testing by monthadar@, more to come.
2013-03-08 20:23:55 +00:00
IEEE80211_TX_UNLOCK(ic);
}
return m; /* NB: original frame */
}
/*
* An aggregation candidate. If there's a frame to partner
* with then combine and return for processing. Otherwise
* save this frame and wait for a partner to show up (or
* the frame to be flushed). Note that staged frames also
* hold their node reference.
*/
if (mstaged != NULL) {
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
ni->ni_tx_superg[WME_AC_TO_TID(pri)] = NULL;
stageq_remove(ic, sq, mstaged);
IEEE80211_FF_UNLOCK(ic);
IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni,
"%s: aggregate fast-frame", __func__);
/*
* Release the node reference; we only need
* the one already in mstaged.
*/
KASSERT(mstaged->m_pkthdr.rcvif == (void *)ni,
("rcvif %p ni %p", mstaged->m_pkthdr.rcvif, ni));
ieee80211_free_node(ni);
m->m_nextpkt = NULL;
mstaged->m_nextpkt = m;
mstaged->m_flags |= M_FF; /* NB: mark for encap work */
} else {
KASSERT(ni->ni_tx_superg[WME_AC_TO_TID(pri)] == NULL,
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
("ni_tx_superg[]: %p",
ni->ni_tx_superg[WME_AC_TO_TID(pri)]));
ni->ni_tx_superg[WME_AC_TO_TID(pri)] = m;
stageq_add(ic, sq, m);
IEEE80211_FF_UNLOCK(ic);
IEEE80211_NOTE(vap, IEEE80211_MSG_SUPERG, ni,
"%s: stage frame, %u queued", __func__, sq->depth);
/* NB: mstaged is NULL */
}
return mstaged;
}
[net80211] Initial A-MSDU support for testing / evaluation A-MSDU is another 11n aggregation mechanism where multiple ethernet frames get LLC encapsulated (so they have a length field), padded, and put in a single MPDU (802.11 MAC frame.) This means it gets sent out as a single frame, with a single seqno, it's acked as one frame, etc. It turns out that, hah, atheros fast frames is almost but not quite like this, so I'm reusing all of the current superg/fast-frames stuff in order to actually transmit A-MSDU. Yes, this means that A-MSDU frames are also only aggregated two at a time, so it's not necessarily a huge win, but it's better than nothing. This doesn't do anything by default - the driver needs to say it does A-MSDU as well as set the AMSDU software TX capability so this code path gets exercised. For now, the only driver that enables this is urtwn. I'll enable it for rsu at some point soon. Tested: * Add an amsdu encap path to aggregate two frames, same as the fast-frames path. * Always do the superg init/teardown and node init/teardown stuff, regardless of whether the nodes are doing fast-frames (the ATH capability stuff.) That way we can reuse it for amsdu. * Don't do AMSDU for multicast/broadcast and EAPOL frames. * If we're doing A-MPDU, then don't bother doing FF/A-MSDU. We can likely do both together, but I don't want to change behaviour. * Teach the fast frames approx txtime logic to support the 11n rates. But, since we don't currently have a full "current rate" support, assume it's HT20, long-gi, etc. That way we overshoot on the TX time estimation, so we're always inside the requirements. (And we only aggregate two frames for now, so we're not really going to exceed that.) * Drop the maximum FF age default down to 2ms, otherwise we end up with some very annoyingly large latencies. TODO: * We only aggregate two ethernet frames, so I'm not checking the max A-MSDU size. But when it comes time to support >2 frames, we should obey that. Tested: * urtwn(4)
2016-04-06 01:21:51 +00:00
struct mbuf *
ieee80211_amsdu_check(struct ieee80211_node *ni, struct mbuf *m)
{
/*
* XXX TODO: actually enforce the node support
* and HTCAP requirements for the maximum A-MSDU
* size.
*/
/* First: software A-MSDU transmit? */
if (! ieee80211_amsdu_tx_ok(ni))
return (m);
/* Next - EAPOL? Nope, don't aggregate; we don't QoS encap them */
if (m->m_flags & (M_EAPOL | M_MCAST | M_BCAST))
return (m);
/* Next - needs to be a data frame, non-broadcast, etc */
if (ETHER_IS_MULTICAST(mtod(m, struct ether_header *)->ether_dhost))
return (m);
return (ieee80211_ff_check(ni, m));
}
void
ieee80211_ff_node_init(struct ieee80211_node *ni)
{
/*
* Clean FF state on re-associate. This handles the case
* where a station leaves w/o notifying us and then returns
* before node is reaped for inactivity.
*/
ieee80211_ff_node_cleanup(ni);
}
[net80211] remove node scan lock / generation number + fix few LORs Drop scan generation number and node table scan lock - the only place where ni_scangen is checked is in ieee80211_timeout_stations() (and it is used to prevent duplicate checking of the same node); node scan lock protects only this variable + node table scan generation number. This will fix (at least) next LOR (hostap mode): lock order reversal: 1st 0xc175f84c urtwm0_scan_loc (urtwm0_scan_loc) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2019 2nd 0xc175e018 urtwm0_com_lock (urtwm0_com_lock) @ /usr/src/sys/modules/wlan/../../net80211/ieee80211_node.c:2693 stack backtrace: #0 0xa070d1c5 at witness_debugger+0x75 #1 0xa070d0f6 at witness_checkorder+0xd46 #2 0xa0694cce at __mtx_lock_flags+0x9e #3 0xb03ad9ef at ieee80211_node_leave+0x12f #4 0xb03afd13 at ieee80211_timeout_stations+0x483 #5 0xb03aa1c2 at ieee80211_node_timeout+0x42 #6 0xa06c6fa1 at softclock_call_cc+0x1e1 #7 0xa06c7518 at softclock+0xc8 #8 0xa06789ae at intr_event_execute_handlers+0x8e #9 0xa0678fa0 at ithread_loop+0x90 #10 0xa0675fbe at fork_exit+0x7e #11 0xa08af910 at fork_trampoline+0x8 In addition to the above: * switch to ieee80211_iterate_nodes(); * do not assert that node table lock is held, while calling node_age(); that's not really needed (there are no resources, which can be protected by this lock) + this fixes LOR/deadlock between ieee80211_timeout_stations() and ieee80211_set_tim() (easy to reproduce in HOSTAP mode while sending something to an STA with enabled power management). Tested: * (avos) urtwn0, hostap mode * (adrian) AR9380, STA mode * (adrian) AR9380, AR9331, AR9580, hostap mode Notes: * This changes the net80211 internals, so you have to recompile all of it and the wifi drivers. Submitted by: avos Approved by: re (delphij) Differential Revision: https://reviews.freebsd.org/D6833
2016-06-19 07:31:02 +00:00
/*
* Note: this comlock acquisition LORs with the node lock:
*
* 1: sta_join1 -> NODE_LOCK -> node_free -> node_cleanup -> ff_node_cleanup -> COM_LOCK
* 2: TBD
*/
void
ieee80211_ff_node_cleanup(struct ieee80211_node *ni)
{
struct ieee80211com *ic = ni->ni_ic;
struct ieee80211_superg *sg = ic->ic_superg;
struct mbuf *m, *next_m, *head;
int tid;
IEEE80211_FF_LOCK(ic);
head = NULL;
for (tid = 0; tid < WME_NUM_TID; tid++) {
int ac = TID_TO_WME_AC(tid);
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
/*
* XXX Initialise the packet counter.
*
* This may be double-work for 11n stations;
* but without it we never setup things.
*/
ieee80211_txampdu_init_pps(&ni->ni_tx_ampdu[tid]);
m = ni->ni_tx_superg[tid];
if (m != NULL) {
Migrate the fast-frames transmit support away from using the txa_private field and into a separate fast-frames staging pointer in ieee80211_node. The A-MPDU TX path allows txa_private to be used by drivers. So it will clash with any attempt to use fast-frames. Now, fast-frames is not really anything special - it's just a custom ethernet frame type that contains two MSDUs into one MPDU. So all the NIC has to support doing is transmitting up to a 4KiB frame with an arbitrary ethertype and bam! Fast-frames. However, using txa_private means we can /either/ do fast-frames or A-MPDU TX, so fast frames has been turned off in the Atheros HAL for 11n chipsets. This is a bit silly - it actually means that 802.11 performance to/from 11abg Atheros chips is actually better than between an 11abg atheros device and an 11n Atheros device. So: * create a new mbuf staging queue for fast frames. It only queues a single frame in the staging queue (and there's a top-level ic staging queue used for expiry/tracking) so it's just an mbuf pointer per TID. * Still use the ampdu TX packet counter to determine whether to do aggregation or not. It'll double count if we start doing both A-MPDU TX and fast frames, but that's not all that important right now. * Initialise the pps tracker so ticks isn't zero. This ensures that fast-frames actually gets used - without it, the ticks math overflows and the pps math always sets txa_pps=0. This is the same bug that plagued A-MPDU TX starting logic. This actually allows fast-frames transmit to occur between the AR9331 (in 11n HT/20 mode) and AR9170 (if_otus) in 11bg mode. Now, this is a great big no-op on atheros 11n hardware, so don't worry. It may mean you start seeing more reliable fast-frames transmission on 11abg hardware which may expose some more amusing bugs. TODO: * further testing and debugging of all of this before flipping on fast-frames in if_ath (for 11n) and if_otus.
2015-09-28 00:59:07 +00:00
ni->ni_tx_superg[tid] = NULL;
stageq_remove(ic, &sg->ff_stageq[ac], m);
m->m_nextpkt = head;
head = m;
}
}
IEEE80211_FF_UNLOCK(ic);
/*
* Free mbufs, taking care to not dereference the mbuf after
* we free it (hence grabbing m_nextpkt before we free it.)
*/
m = head;
while (m != NULL) {
next_m = m->m_nextpkt;
m_freem(m);
ieee80211_free_node(ni);
m = next_m;
}
}
/*
* Switch between turbo and non-turbo operating modes.
* Use the specified channel flags to locate the new
* channel, update 802.11 state, and then call back into
* the driver to effect the change.
*/
void
ieee80211_dturbo_switch(struct ieee80211vap *vap, int newflags)
{
struct ieee80211com *ic = vap->iv_ic;
struct ieee80211_channel *chan;
chan = ieee80211_find_channel(ic, ic->ic_bsschan->ic_freq, newflags);
if (chan == NULL) { /* XXX should not happen */
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: no channel with freq %u flags 0x%x\n",
__func__, ic->ic_bsschan->ic_freq, newflags);
return;
}
IEEE80211_DPRINTF(vap, IEEE80211_MSG_SUPERG,
"%s: %s -> %s (freq %u flags 0x%x)\n", __func__,
ieee80211_phymode_name[ieee80211_chan2mode(ic->ic_bsschan)],
ieee80211_phymode_name[ieee80211_chan2mode(chan)],
chan->ic_freq, chan->ic_flags);
ic->ic_bsschan = chan;
ic->ic_prevchan = ic->ic_curchan;
ic->ic_curchan = chan;
ic->ic_rt = ieee80211_get_ratetable(chan);
ic->ic_set_channel(ic);
ieee80211_radiotap_chan_change(ic);
/* NB: do not need to reset ERP state 'cuz we're in sta mode */
}
/*
* Return the current ``state'' of an Atheros capbility.
* If associated in station mode report the negotiated
* setting. Otherwise report the current setting.
*/
static int
getathcap(struct ieee80211vap *vap, int cap)
{
if (vap->iv_opmode == IEEE80211_M_STA &&
vap->iv_state == IEEE80211_S_RUN)
return IEEE80211_ATH_CAP(vap, vap->iv_bss, cap) != 0;
else
return (vap->iv_flags & cap) != 0;
}
static int
superg_ioctl_get80211(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
switch (ireq->i_type) {
case IEEE80211_IOC_FF:
ireq->i_val = getathcap(vap, IEEE80211_F_FF);
break;
case IEEE80211_IOC_TURBOP:
ireq->i_val = getathcap(vap, IEEE80211_F_TURBOP);
break;
default:
return ENOSYS;
}
return 0;
}
IEEE80211_IOCTL_GET(superg, superg_ioctl_get80211);
static int
superg_ioctl_set80211(struct ieee80211vap *vap, struct ieee80211req *ireq)
{
switch (ireq->i_type) {
case IEEE80211_IOC_FF:
if (ireq->i_val) {
if ((vap->iv_caps & IEEE80211_C_FF) == 0)
return EOPNOTSUPP;
vap->iv_flags |= IEEE80211_F_FF;
} else
vap->iv_flags &= ~IEEE80211_F_FF;
return ENETRESET;
case IEEE80211_IOC_TURBOP:
if (ireq->i_val) {
if ((vap->iv_caps & IEEE80211_C_TURBOP) == 0)
return EOPNOTSUPP;
vap->iv_flags |= IEEE80211_F_TURBOP;
} else
vap->iv_flags &= ~IEEE80211_F_TURBOP;
return ENETRESET;
default:
return ENOSYS;
}
}
IEEE80211_IOCTL_SET(superg, superg_ioctl_set80211);
#endif /* IEEE80211_SUPPORT_SUPERG */