From 2510235150927f9f2137261f7afdee04e7277cd8 Mon Sep 17 00:00:00 2001 From: Mike Karels Date: Mon, 18 May 2020 22:53:12 +0000 Subject: [PATCH] Allow TCP to reuse local port with different destinations Previously, tcp_connect() would bind a local port before connecting, forcing the local port to be unique across all outgoing TCP connections for the address family. Instead, choose a local port after selecting the destination and the local address, requiring only that the tuple is unique and does not match a wildcard binding. Reviewed by: tuexen (rscheff, rrs previous version) MFC after: 1 month Sponsored by: Forcepoint LLC Differential Revision: https://reviews.freebsd.org/D24781 --- sys/netinet/in_pcb.c | 128 ++++++++++++++++++++++++++++----------- sys/netinet/in_pcb.h | 3 + sys/netinet/tcp_usrreq.c | 23 ++++++- sys/netinet6/in6_pcb.c | 31 +++++++--- sys/netinet6/in6_pcb.h | 4 ++ 5 files changed, 140 insertions(+), 49 deletions(-) diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index d9df083ab369..cf4a5c12ade7 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -610,13 +610,15 @@ in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) } #endif -/* - * Select a local port (number) to use. - */ #if defined(INET) || defined(INET6) +/* + * Assign a local port like in_pcb_lport(), but also used with connect() + * and a foreign address and port. If fsa is non-NULL, choose a local port + * that is unused with those, otherwise one that is completely unused. + */ int -in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, - struct ucred *cred, int lookupflags) +in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, u_short *lportp, + struct sockaddr *fsa, u_short fport, struct ucred *cred, int lookupflags) { struct inpcbinfo *pcbinfo; struct inpcb *tmpinp; @@ -624,7 +626,10 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, int count, dorandom, error; u_short aux, first, last, lport; #ifdef INET - struct in_addr laddr; + struct in_addr laddr, faddr; +#endif +#ifdef INET6 + struct in6_addr *laddr6, *faddr6; #endif pcbinfo = inp->inp_pcbinfo; @@ -685,15 +690,22 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, } #ifdef INET - /* Make the compiler happy. */ - laddr.s_addr = 0; + laddr.s_addr = INADDR_ANY; if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) { - KASSERT(laddrp != NULL, ("%s: laddrp NULL for v4 inp %p", - __func__, inp)); - laddr = *laddrp; + laddr = ((struct sockaddr_in *)lsa)->sin_addr; + if (fsa != NULL) + faddr = ((struct sockaddr_in *)fsa)->sin_addr; } #endif - tmpinp = NULL; /* Make compiler happy. */ +#ifdef INET6 + if (lsa->sa_family == AF_INET6) { + laddr6 = &((struct sockaddr_in6 *)lsa)->sin6_addr; + if (fsa != NULL) + faddr6 = &((struct sockaddr_in6 *)fsa)->sin6_addr; + } +#endif + + tmpinp = NULL; lport = *lportp; if (dorandom) @@ -709,29 +721,61 @@ in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, *lastport = first; lport = htons(*lastport); -#ifdef INET6 - if ((inp->inp_vflag & INP_IPV6) != 0) - tmpinp = in6_pcblookup_local(pcbinfo, - &inp->in6p_laddr, lport, lookupflags, cred); -#endif -#if defined(INET) && defined(INET6) - else -#endif -#ifdef INET - tmpinp = in_pcblookup_local(pcbinfo, laddr, - lport, lookupflags, cred); -#endif - } while (tmpinp != NULL); + if (fsa != NULL) { #ifdef INET - if ((inp->inp_vflag & (INP_IPV4|INP_IPV6)) == INP_IPV4) - laddrp->s_addr = laddr.s_addr; + if (lsa->sa_family == AF_INET) { + tmpinp = in_pcblookup_hash_locked(pcbinfo, + faddr, fport, laddr, lport, lookupflags, + NULL); + } #endif +#ifdef INET6 + if (lsa->sa_family == AF_INET6) { + tmpinp = in6_pcblookup_hash_locked(pcbinfo, + faddr6, fport, laddr6, lport, lookupflags, + NULL); + } +#endif + } else { +#ifdef INET6 + if ((inp->inp_vflag & INP_IPV6) != 0) + tmpinp = in6_pcblookup_local(pcbinfo, + &inp->in6p_laddr, lport, lookupflags, cred); +#endif +#if defined(INET) && defined(INET6) + else +#endif +#ifdef INET + tmpinp = in_pcblookup_local(pcbinfo, laddr, + lport, lookupflags, cred); +#endif + } + } while (tmpinp != NULL); + *lportp = lport; return (0); } +/* + * Select a local port (number) to use. + */ +int +in_pcb_lport(struct inpcb *inp, struct in_addr *laddrp, u_short *lportp, + struct ucred *cred, int lookupflags) +{ + struct sockaddr_in laddr; + + if (laddrp) { + bzero(&laddr, sizeof(laddr)); + laddr.sin_family = AF_INET; + laddr.sin_addr = *laddrp; + } + return (in_pcb_lport_dest(inp, laddrp ? (struct sockaddr *) &laddr : + NULL, lportp, NULL, 0, cred, lookupflags)); +} + /* * Return cached socket options. */ @@ -1344,16 +1388,26 @@ in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam, if (error) return (error); } - oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, fport, - laddr, lport, 0, NULL); - if (oinp != NULL) { - if (oinpp != NULL) - *oinpp = oinp; - return (EADDRINUSE); - } - if (lport == 0) { - error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, - cred); + if (lport != 0) { + oinp = in_pcblookup_hash_locked(inp->inp_pcbinfo, faddr, + fport, laddr, lport, 0, NULL); + if (oinp != NULL) { + if (oinpp != NULL) + *oinpp = oinp; + return (EADDRINUSE); + } + } else { + struct sockaddr_in lsin, fsin; + + bzero(&lsin, sizeof(lsin)); + bzero(&fsin, sizeof(fsin)); + lsin.sin_family = AF_INET; + lsin.sin_addr = laddr; + fsin.sin_family = AF_INET; + fsin.sin_addr = faddr; + error = in_pcb_lport_dest(inp, (struct sockaddr *) &lsin, + &lport, (struct sockaddr *)& fsin, fport, cred, + INPLOOKUP_WILDCARD); if (error) return (error); } diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index b874bc49c632..9138ee186f6b 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -824,6 +824,9 @@ void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *); void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *); int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *); +int in_pcb_lport_dest(struct inpcb *inp, struct sockaddr *lsa, + u_short *lportp, struct sockaddr *fsa, u_short fport, + struct ucred *cred, int lookupflags); int in_pcb_lport(struct inpcb *, struct in_addr *, u_short *, struct ucred *, int); int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *, diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 211a2d78aeb5..53718f5defad 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -146,6 +146,16 @@ static int tcp_pru_options_support(struct tcpcb *tp, int flags); #define TCPDEBUG2(req) #endif +/* + * tcp_require_unique port requires a globally-unique source port for each + * outgoing connection. The default is to require the 4-tuple to be unique. + */ +VNET_DEFINE(int, tcp_require_unique_port) = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, require_unique_port, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_require_unique_port), 0, + "Require globally-unique ephemeral port for outgoing connections"); +#define V_tcp_require_unique_port VNET(tcp_require_unique_port) + /* * TCP attaches to socket via pru_attach(), reserving space, * and an internet control block. @@ -1514,7 +1524,7 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK(&V_tcbinfo); - if (inp->inp_lport == 0) { + if (V_tcp_require_unique_port && inp->inp_lport == 0) { error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) goto out; @@ -1535,6 +1545,15 @@ tcp_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) error = EADDRINUSE; goto out; } + /* Handle initial bind if it hadn't been done in advance. */ + if (inp->inp_lport == 0) { + inp->inp_lport = lport; + if (in_pcbinshash(inp) != 0) { + inp->inp_lport = 0; + error = EAGAIN; + goto out; + } + } inp->inp_laddr = laddr; in_pcbrehash(inp); INP_HASH_WUNLOCK(&V_tcbinfo); @@ -1574,7 +1593,7 @@ tcp6_connect(struct tcpcb *tp, struct sockaddr *nam, struct thread *td) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK(&V_tcbinfo); - if (inp->inp_lport == 0) { + if (V_tcp_require_unique_port && inp->inp_lport == 0) { error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred); if (error) goto out; diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index edde97d6f26c..3444ca1b2fe7 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -113,9 +113,6 @@ __FBSDID("$FreeBSD$"); #include #include -static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, - struct in6_addr *, u_int, struct in6_addr *, u_int, int, struct ifnet *); - int in6_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred) @@ -417,9 +414,12 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, { struct inpcbinfo *pcbinfo = inp->inp_pcbinfo; struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam; - struct in6_addr addr6; + struct sockaddr_in6 laddr6; int error; + bzero(&laddr6, sizeof(laddr6)); + laddr6.sin6_family = AF_INET6; + INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); @@ -427,25 +427,36 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam, * Call inner routine, to assign local interface address. * in6_pcbladdr() may automatically fill in sin6_scope_id. */ - if ((error = in6_pcbladdr(inp, nam, &addr6)) != 0) + if ((error = in6_pcbladdr(inp, nam, &laddr6.sin6_addr)) != 0) return (error); if (in6_pcblookup_hash_locked(pcbinfo, &sin6->sin6_addr, sin6->sin6_port, IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr) - ? &addr6 : &inp->in6p_laddr, + ? &laddr6.sin6_addr : &inp->in6p_laddr, inp->inp_lport, 0, NULL) != NULL) { return (EADDRINUSE); } if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { if (inp->inp_lport == 0) { + /* + * rehash was required to be true in the past for + * this case; retain that convention. However, + * we now call in_pcb_lport_dest rather than + * in6_pcbbind; the former does not insert into + * the hash table, the latter does. Change rehash + * to false to do the in_pcbinshash below. + */ KASSERT(rehash == true, ("Rehashing required for unbound inps")); - error = in6_pcbbind(inp, (struct sockaddr *)0, cred); + rehash = false; + error = in_pcb_lport_dest(inp, + (struct sockaddr *) &laddr6, &inp->inp_lport, + (struct sockaddr *) sin6, sin6->sin6_port, cred, 0); if (error) return (error); } - inp->in6p_laddr = addr6; + inp->in6p_laddr = laddr6.sin6_addr; } inp->in6p_faddr = sin6->sin6_addr; inp->inp_fport = sin6->sin6_port; @@ -1122,9 +1133,9 @@ in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, #endif /* PCBGROUP */ /* - * Lookup PCB in hash list. + * Lookup PCB in hash list. Used in in_pcb.c as well as here. */ -static struct inpcb * +struct inpcb * in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, u_int lport_arg, int lookupflags, struct ifnet *ifp) diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h index e01d6f751f79..d2df04402b2f 100644 --- a/sys/netinet6/in6_pcb.h +++ b/sys/netinet6/in6_pcb.h @@ -92,6 +92,10 @@ struct inpcb * in6_pcblookup_local(struct inpcbinfo *, struct in6_addr *, u_short, int, struct ucred *); +struct inpcb * + in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, + struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp); struct inpcb * in6_pcblookup(struct inpcbinfo *, struct in6_addr *, u_int, struct in6_addr *, u_int, int,