From 50575ce11cbe09fd4d7cfe1e11d8dcca209515b9 Mon Sep 17 00:00:00 2001 From: Andrew Gallatin Date: Thu, 25 Apr 2019 15:37:28 +0000 Subject: [PATCH] Track TCP connection's NUMA domain in the inpcb Drivers can now pass up numa domain information via the mbuf numa domain field. This information is then used by TCP syncache_socket() to associate that information with the inpcb. The domain information is then fed back into transmitted mbufs in ip{6}_output(). This mechanism is nearly identical to what is done to track RSS hash values in the inp_flowid. Follow on changes will use this information for lacp egress port selection, binding TCP pacers to the appropriate NUMA domain, etc. Reviewed by: markj, kib, slavash, bz, scottl, jtl, tuexen Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D20028 --- sys/dev/cxgbe/t4_sge.c | 3 +++ sys/dev/mlx5/mlx5_en/mlx5_en_rx.c | 3 +++ sys/kern/uipc_mbuf.c | 3 +++ sys/netinet/in_pcb.c | 3 +++ sys/netinet/in_pcb.h | 2 +- sys/netinet/ip_output.c | 3 +++ sys/netinet/tcp_syncache.c | 3 +++ sys/netinet6/ip6_output.c | 3 +++ 8 files changed, 22 insertions(+), 1 deletion(-) diff --git a/sys/dev/cxgbe/t4_sge.c b/sys/dev/cxgbe/t4_sge.c index f69d0b4b9a06..0ac08d207cd4 100644 --- a/sys/dev/cxgbe/t4_sge.c +++ b/sys/dev/cxgbe/t4_sge.c @@ -2046,6 +2046,9 @@ t4_eth_rx(struct sge_iq *iq, const struct rss_header *rss, struct mbuf *m0) rxq->vlan_extraction++; } +#ifdef NUMA + m0->m_pkthdr.numa_domain = ifp->if_numa_domain; +#endif #if defined(INET) || defined(INET6) if (iq->flags & IQ_LRO_ENABLED) { if (sort_before_lro(lro)) { diff --git a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c index ab0bedc01b05..969bd685cbec 100644 --- a/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c +++ b/sys/dev/mlx5/mlx5_en/mlx5_en_rx.c @@ -520,6 +520,9 @@ mlx5e_poll_rx_cq(struct mlx5e_rq *rq, int budget) mlx5e_build_rx_mbuf(cqe, rq, mb, byte_cnt); rq->stats.bytes += byte_cnt; rq->stats.packets++; +#ifdef NUMA + mb->m_pkthdr.numa_domain = rq->ifp->if_numa_domain; +#endif #if !defined(HAVE_TCP_LRO_RX) tcp_lro_queue_mbuf(&rq->lro, mb); diff --git a/sys/kern/uipc_mbuf.c b/sys/kern/uipc_mbuf.c index ddc846c3efb9..96e7418f0bca 100644 --- a/sys/kern/uipc_mbuf.c +++ b/sys/kern/uipc_mbuf.c @@ -341,6 +341,9 @@ m_pkthdr_init(struct mbuf *m, int how) #endif m->m_data = m->m_pktdat; bzero(&m->m_pkthdr, sizeof(m->m_pkthdr)); +#ifdef NUMA + m->m_pkthdr.numa_domain = M_NODOM; +#endif #ifdef MAC /* If the label init fails, fail the alloc */ error = mac_mbuf_init(m, how); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 5c4e6d904718..af91d18ae2a8 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -510,6 +510,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo) if (inp == NULL) return (ENOBUFS); bzero(&inp->inp_start_zero, inp_zero_size); +#ifdef NUMA + inp->inp_numa_domain = M_NODOM; +#endif inp->inp_pcbinfo = pcbinfo; inp->inp_socket = so; inp->inp_cred = crhold(so->so_cred); diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 70e9d90d2180..d074ad7e0a21 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -272,7 +272,7 @@ struct inpcb { inp_hpts_calls :1, /* (i) from output hpts */ inp_input_calls :1, /* (i) from input hpts */ inp_spare_bits2 : 4; - uint8_t inp_spare_byte; /* Compiler hole */ + uint8_t inp_numa_domain; /* numa domain */ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ struct socket *inp_socket; /* (i) back pointer to socket */ uint32_t inp_hptsslot; /* Hpts wheel slot this tcb is Lock(i&b) */ diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 45b928037293..48d3aa1aed03 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -247,6 +247,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } +#ifdef NUMA + m->m_pkthdr.numa_domain = inp->inp_numa_domain; +#endif } if (ro == NULL) { diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 1d4cad70b50d..7f22973d8a0e 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -777,6 +777,9 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) if (m != NULL && M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) { inp->inp_flowid = m->m_pkthdr.flowid; inp->inp_flowtype = M_HASHTYPE_GET(m); +#ifdef NUMA + inp->inp_numa_domain = m->m_pkthdr.numa_domain; +#endif } /* diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 2161a51e50f1..4ce7f896cbe3 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -322,6 +322,9 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } +#ifdef NUMA + m->m_pkthdr.numa_domain = inp->inp_numa_domain; +#endif } #if defined(IPSEC) || defined(IPSEC_SUPPORT)