make lacp's use_numa hashing aware of send tags
When I did the use_numa support, I missed the fact that there is a separate hash function for send tag nic selection. So when use_numa is enabled, ktls offload does not work properly, as it does not reliably allocate a send tag on the proper egress nic since different egress nics are selected for send-tag allocation and packet transmit. To fix this, this change: - refectors lacp_select_tx_port_by_hash() and lacp_select_tx_port() to make lacp_select_tx_port_by_hash() always called by lacp_select_tx_port() - pre-shifts flowids to convert them to hashes when calling lacp_select_tx_port_by_hash() - adds a numa_domain field to if_snd_tag_alloc_params - plumbs the numa domain into places where we allocate send tags In testing with NIC TLS setup on a NUMA machine, I see thousands of output errors before the change when enabling kern.ipc.tls.ifnet.permitted=1. After the change, I see no errors, and I see the NIC sysctl counters showing active TLS offload sessions. Reviewed by: rrs, hselasky, jhb Sponsored by: Netflix
This commit is contained in:
parent
11fcae0558
commit
3ea844b1da
@ -800,6 +800,7 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
|
||||
params.hdr.type = IF_SND_TAG_TYPE_TLS;
|
||||
params.hdr.flowid = inp->inp_flowid;
|
||||
params.hdr.flowtype = inp->inp_flowtype;
|
||||
params.hdr.numa_domain = inp->inp_numa_domain;
|
||||
params.tls.inp = inp;
|
||||
params.tls.tls = tls;
|
||||
INP_RUNLOCK(inp);
|
||||
|
@ -832,13 +832,12 @@ lacp_stop(struct lagg_softc *sc)
|
||||
}
|
||||
|
||||
struct lagg_port *
|
||||
lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
|
||||
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t hash, uint8_t numa_domain)
|
||||
{
|
||||
struct lacp_softc *lsc = LACP_SOFTC(sc);
|
||||
struct lacp_portmap *pm;
|
||||
struct lacp_port *lp;
|
||||
struct lacp_port **map;
|
||||
uint32_t hash;
|
||||
int count;
|
||||
|
||||
if (__predict_false(lsc->lsc_suppress_distributing)) {
|
||||
@ -854,10 +853,10 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
|
||||
|
||||
#ifdef NUMA
|
||||
if ((sc->sc_opts & LAGG_OPT_USE_NUMA) &&
|
||||
pm->pm_num_dom > 1 && m->m_pkthdr.numa_domain < MAXMEMDOM) {
|
||||
count = pm->pm_numa[m->m_pkthdr.numa_domain].count;
|
||||
pm->pm_num_dom > 1 && numa_domain < MAXMEMDOM) {
|
||||
count = pm->pm_numa[numa_domain].count;
|
||||
if (count > 0) {
|
||||
map = pm->pm_numa[m->m_pkthdr.numa_domain].map;
|
||||
map = pm->pm_numa[numa_domain].map;
|
||||
} else {
|
||||
/* No ports on this domain; use global hash. */
|
||||
map = pm->pm_map;
|
||||
@ -869,11 +868,6 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
|
||||
map = pm->pm_map;
|
||||
count = pm->pm_count;
|
||||
}
|
||||
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
|
||||
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
|
||||
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
|
||||
else
|
||||
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
|
||||
|
||||
hash %= count;
|
||||
lp = map[hash];
|
||||
@ -884,33 +878,22 @@ lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
|
||||
return (lp->lp_lagg);
|
||||
}
|
||||
|
||||
#if defined(RATELIMIT) || defined(KERN_TLS)
|
||||
struct lagg_port *
|
||||
lacp_select_tx_port_by_hash(struct lagg_softc *sc, uint32_t flowid)
|
||||
lacp_select_tx_port(struct lagg_softc *sc, struct mbuf *m)
|
||||
{
|
||||
struct lacp_softc *lsc = LACP_SOFTC(sc);
|
||||
struct lacp_portmap *pm;
|
||||
struct lacp_port *lp;
|
||||
uint32_t hash;
|
||||
uint8_t numa_domain;
|
||||
|
||||
if (__predict_false(lsc->lsc_suppress_distributing)) {
|
||||
LACP_DPRINTF((NULL, "%s: waiting transit\n", __func__));
|
||||
return (NULL);
|
||||
}
|
||||
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) &&
|
||||
M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
|
||||
hash = m->m_pkthdr.flowid >> sc->flowid_shift;
|
||||
else
|
||||
hash = m_ether_tcpip_hash(sc->sc_flags, m, lsc->lsc_hashkey);
|
||||
|
||||
pm = &lsc->lsc_pmap[lsc->lsc_activemap];
|
||||
if (pm->pm_count == 0) {
|
||||
LACP_DPRINTF((NULL, "%s: no active aggregator\n", __func__));
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
hash = flowid >> sc->flowid_shift;
|
||||
hash %= pm->pm_count;
|
||||
lp = pm->pm_map[hash];
|
||||
|
||||
return (lp->lp_lagg);
|
||||
numa_domain = m->m_pkthdr.numa_domain;
|
||||
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
|
||||
}
|
||||
#endif
|
||||
|
||||
/*
|
||||
* lacp_suppress_distributing: drop transmit packets for a while
|
||||
|
@ -293,9 +293,7 @@ struct lacp_softc {
|
||||
|
||||
struct mbuf *lacp_input(struct lagg_port *, struct mbuf *);
|
||||
struct lagg_port *lacp_select_tx_port(struct lagg_softc *, struct mbuf *);
|
||||
#if defined(RATELIMIT) || defined(KERN_TLS)
|
||||
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t);
|
||||
#endif
|
||||
struct lagg_port *lacp_select_tx_port_by_hash(struct lagg_softc *, uint32_t, uint8_t);
|
||||
void lacp_attach(struct lagg_softc *);
|
||||
void lacp_detach(void *);
|
||||
void lacp_init(struct lagg_softc *);
|
||||
|
@ -1609,12 +1609,13 @@ mst_to_lst(struct m_snd_tag *mst)
|
||||
* contents.
|
||||
*/
|
||||
static struct lagg_port *
|
||||
lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
|
||||
lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype,
|
||||
uint8_t numa_domain)
|
||||
{
|
||||
struct lagg_softc *sc;
|
||||
struct lagg_port *lp;
|
||||
struct lagg_lb *lb;
|
||||
uint32_t p;
|
||||
uint32_t hash, p;
|
||||
|
||||
sc = ifp->if_softc;
|
||||
|
||||
@ -1634,7 +1635,8 @@ lookup_snd_tag_port(struct ifnet *ifp, uint32_t flowid, uint32_t flowtype)
|
||||
if ((sc->sc_opts & LAGG_OPT_USE_FLOWID) == 0 ||
|
||||
flowtype == M_HASHTYPE_NONE)
|
||||
return (NULL);
|
||||
return (lacp_select_tx_port_by_hash(sc, flowid));
|
||||
hash = flowid >> sc->flowid_shift;
|
||||
return (lacp_select_tx_port_by_hash(sc, hash, numa_domain));
|
||||
default:
|
||||
return (NULL);
|
||||
}
|
||||
@ -1654,7 +1656,8 @@ lagg_snd_tag_alloc(struct ifnet *ifp,
|
||||
sc = ifp->if_softc;
|
||||
|
||||
LAGG_RLOCK();
|
||||
lp = lookup_snd_tag_port(ifp, params->hdr.flowid, params->hdr.flowtype);
|
||||
lp = lookup_snd_tag_port(ifp, params->hdr.flowid,
|
||||
params->hdr.flowtype, params->hdr.numa_domain);
|
||||
if (lp == NULL) {
|
||||
LAGG_RUNLOCK();
|
||||
return (EOPNOTSUPP);
|
||||
|
@ -198,6 +198,7 @@ struct if_snd_tag_alloc_header {
|
||||
uint32_t type; /* send tag type, see IF_SND_TAG_XXX */
|
||||
uint32_t flowid; /* mbuf hash value */
|
||||
uint32_t flowtype; /* mbuf hash type */
|
||||
uint8_t numa_domain; /* numa domain of associated inp */
|
||||
};
|
||||
|
||||
struct if_snd_tag_alloc_rate_limit {
|
||||
|
@ -3258,6 +3258,7 @@ in_pcbattach_txrtlmt(struct inpcb *inp, struct ifnet *ifp,
|
||||
IF_SND_TAG_TYPE_UNLIMITED : IF_SND_TAG_TYPE_RATE_LIMIT,
|
||||
.rate_limit.hdr.flowid = flowid,
|
||||
.rate_limit.hdr.flowtype = flowtype,
|
||||
.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
|
||||
.rate_limit.max_rate = max_pacing_rate,
|
||||
.rate_limit.flags = M_NOWAIT,
|
||||
};
|
||||
|
@ -1006,6 +1006,7 @@ rt_find_real_interface(struct ifnet *ifp, struct inpcb *inp, int *error)
|
||||
union if_snd_tag_alloc_params params = {
|
||||
.rate_limit.hdr.type = IF_SND_TAG_TYPE_RATE_LIMIT,
|
||||
.rate_limit.hdr.flowid = 1,
|
||||
.rate_limit.hdr.numa_domain = inp->inp_numa_domain,
|
||||
.rate_limit.max_rate = COMMON_RATE,
|
||||
.rate_limit.flags = M_NOWAIT,
|
||||
};
|
||||
|
Loading…
Reference in New Issue
Block a user