LACP: When suppressing distributing, return ENOBUFS

When links come and go, lacp goes into a "suppress distributing" mode
where it drops traffic for 3 seconds. When in this mode, lagg/lacp
historiclally drops traffic with ENETDOWN. That return value causes TCP
to close any connection where it gets that value back from the lower
parts of the stack.  This means that any TCP connection with active
traffic during a 3-second windown when an LACP link comes or goes
would get closed.

TCP treats return values of ENOBUFS as transient errors, and re-schedules
transmission later. So rather than returning ENETDOWN, lets
return ENOBUFS instead.  This allows TCP connections to be preserved.

I've tested this by repeatedly bouncing links on a Netlfix CDN server
under a moderate (20Gb/s) load and overved ENOBUFS reported back to
the TCP stack (as reported by a RACK TCP sysctl).

Reviewed by:	jhb, jtl, rrs
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D27188
This commit is contained in:
Andrew Gallatin 2020-11-18 14:55:49 +00:00
parent 4f8f476e73
commit 8732245d29
3 changed files with 15 additions and 8 deletions

View File

@ -832,7 +832,8 @@ lacp_stop(struct lagg_softc *sc)
}
struct lagg_port *
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain)
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash,
uint8_t numa_domain, int *err)
{
struct lacp_softc *lsc = LACP_SOFTC(sc);
struct lacp_portmap *pm;
@ -842,12 +843,14 @@ lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_d
if (__predict_false(lsc->lsc_suppress_distributing)) {
LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
*err = ENOBUFS;
return (NULL);
}
pm = &lsc->lsc_pmap[lsc->lsc_activemap];
if (pm->pm_count == 0) {
LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
*err = ENETDOWN;
return (NULL);
}
@ -879,7 +882,7 @@ lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_d
}
struct lagg_port *
lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m, int *err)
{
struct lacp_softc *lsc = LACP_SOFTC(sc);
uint32_t hash;
@ -892,7 +895,7 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
numa_domain = m->m_pkthdr.numa_domain;
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, err));
}
/*

View File

@ -292,8 +292,10 @@ struct lacp_softc {
#define LACP_LOCK_ASSERT(_lsc) mtx_assert(&(_lsc)->lsc_mtx, MA_OWNED)
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t, uint8_t);
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *,
int *);
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t,
uint8_t, int *);
void lacp_attach(struct lagg_softc *);
void lacp_detach(void *);
void lacp_init(struct lagg_softc *);

View File

@ -1763,6 +1763,7 @@ lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
struct lagg_port *lp;
struct lagg_lb *lb;
uint32_t hash, p;
int err;
sc = ifp->if_softc;
@ -1783,7 +1784,7 @@ lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
flowtype == M_HASHTYPE_NONE)
return (NULL);
hash = flowid >> sc->flowid_shift;
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain, &err));
default:
return (NULL);
}
@ -2580,12 +2581,13 @@ static int
lagg_lacp_start(struct lagg_softc *sc, struct mbuf *m)
{
struct lagg_port *lp;
int err;
lp = lacp_select_tx_port(sc, m);
lp = lacp_select_tx_port(sc, m, &err);
if (lp == NULL) {
if_inc_counter(sc->sc_ifp, IFCOUNTER_OERRORS, 1);
m_freem(m);
return (ENETDOWN);
return (err);
}
/* Send mbuf */