Track TCP connection's NUMA domain in the inpcb

Drivers can now pass up numa domain information via the
mbuf numa domain field.  This information is then used
by TCP syncache_socket() to associate that information
with the inpcb. The domain information is then fed back
into transmitted mbufs in ip{6}_output(). This mechanism
is nearly identical to what is done to track RSS hash values
in the inp_flowid.

Follow on changes will use this information for lacp egress
port selection, binding TCP pacers to the appropriate NUMA
domain, etc.

Reviewed by:	markj, kib, slavash, bz, scottl, jtl, tuexen
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D20028
This commit is contained in:
Andrew Gallatin 2019-04-25 15:37:28 +00:00
parent e77f4eb2a0
commit 50575ce11c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=346677
8 changed files with 22 additions and 1 deletions

View File

@ -2046,6 +2046,9 @@ t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0)
rxq->vlan_extraction++;
}
#ifdef NUMA
m0->m_pkthdr.numa_domain = ifp->if_numa_domain;
#endif
#if defined(INET) || defined(INET6)
if (iq->flags & IQ_LRO_ENABLED) {
if (sort_before_lro(lro)) {

View File

@ -520,6 +520,9 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget)
mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt);
rq->stats.bytes += byte_cnt;
rq->stats.packets++;
#ifdef NUMA
mb->m_pkthdr.numa_domain = rq->ifp->if_numa_domain;
#endif
#if !defined(HAVE_TCP_LRO_RX)
tcp_lro_queue_mbuf(&rq->lro, mb);

View File

@ -341,6 +341,9 @@ m_pkthdr_init(struct mbuf *m, int how)
#endif
m->m_data = m->m_pktdat;
bzero(&m->m_pkthdr, sizeof(m->m_pkthdr));
#ifdef NUMA
m->m_pkthdr.numa_domain = M_NODOM;
#endif
#ifdef MAC
/* If the label init fails, fail the alloc */
error = mac_mbuf_init(m, how);

View File

@ -510,6 +510,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
if (inp == NULL)
return (ENOBUFS);
bzero(&inp->inp_start_zero, inp_zero_size);
#ifdef NUMA
inp->inp_numa_domain = M_NODOM;
#endif
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
inp->inp_cred = crhold(so->so_cred);

View File

@ -272,7 +272,7 @@ struct inpcb {
inp_hpts_calls :1, /* (i) from output hpts */
inp_input_calls :1, /* (i) from input hpts */
inp_spare_bits2 : 4;
uint8_t inp_spare_byte; /* Compiler hole */
uint8_t inp_numa_domain; /* numa domain */
void *inp_ppcb; /* (i) pointer to per-protocol pcb */
struct socket *inp_socket; /* (i) back pointer to socket */
uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */

View File

@ -247,6 +247,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
}
#ifdef NUMA
m->m_pkthdr.numa_domain = inp->inp_numa_domain;
#endif
}
if (ro == NULL) {

View File

@ -777,6 +777,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) {
inp->inp_flowid = m->m_pkthdr.flowid;
inp->inp_flowtype = M_HASHTYPE_GET(m);
#ifdef NUMA
inp->inp_numa_domain = m->m_pkthdr.numa_domain;
#endif
}
/*

View File

@ -322,6 +322,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
m->m_pkthdr.flowid = inp->inp_flowid;
M_HASHTYPE_SET(m, inp->inp_flowtype);
}
#ifdef NUMA
m->m_pkthdr.numa_domain = inp->inp_numa_domain;
#endif
}
#if defined(IPSEC) || defined(IPSEC_SUPPORT)