diff --git a/share/man/man4/tcp.4 b/share/man/man4/tcp.4 index cbb8021226fe..873cfe4b822a 100644 --- a/share/man/man4/tcp.4 +++ b/share/man/man4/tcp.4 @@ -34,7 +34,7 @@ .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd April 17, 2021 +.Dd April 18, 2021 .Dt TCP 4 .Os .Sh NAME @@ -329,6 +329,9 @@ currently executing. This is typically used after a process or thread inherits a listen socket from its parent, and sets its CPU affinity to a particular core. .El +.It Dv TCP_REMOTE_UDP_ENCAPS_PORT +Set and get the remote UDP encapsulation port. +It can only be set on a closed TCP socket. .El .Pp The option level for the @@ -755,6 +758,16 @@ A CSV list of template_spec=percent key-value pairs which controls the per template sampling rates when .Xr stats 3 sampling is enabled. +.It Va udp_tunneling_port +The local UDP encapsulation port. +A value of 0 indicates that UDP encapsulation is disabled. +The default is 0. +.It Va udp_tunneling_overhead +The overhead taken into account when using UDP encapsulation. +Since MSS clamping by middleboxes will most likely not work, values larger than +8 (the size of the UDP header) are also supported. +Supported values are between 8 and 1024. +The default is 8. .El .Sh ERRORS A socket operation may fail with one of the following errors returned: diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index 0b71bd4658f8..d2bf1f8431fd 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -183,6 +183,7 @@ struct tcphdr { #define TCP_RXTLS_MODE 42 /* Receive TLS mode */ #define TCP_CONGESTION 64 /* get/set congestion control algorithm */ #define TCP_CCALGOOPT 65 /* get/set cc algorithm specific options */ +#define TCP_REMOTE_UDP_ENCAPS_PORT 71 /* Enable TCP over UDP tunneling via the specified port */ #define TCP_DELACK 72 /* socket option for delayed ack */ #define TCP_FIN_IS_RST 73 /* A fin from the peer is treated has a RST */ #define TCP_LOG_LIMIT 74 /* Limit to number of records in tcp-log */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index ed184de4a4bf..8592f3313725 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -123,6 +123,7 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include #endif +#include #include @@ -567,7 +568,7 @@ cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos) */ #ifdef INET6 int -tcp6_input(struct mbuf **mp, int *offp, int proto) +tcp6_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) { struct mbuf *m; struct in6_ifaddr *ia6; @@ -597,12 +598,19 @@ tcp6_input(struct mbuf **mp, int *offp, int proto) } *mp = m; - return (tcp_input(mp, offp, proto)); + return (tcp_input_with_port(mp, offp, proto, port)); +} + +int +tcp6_input(struct mbuf **mp, int *offp, int proto) +{ + + return(tcp6_input_with_port(mp, offp, proto, 0)); } #endif /* INET6 */ int -tcp_input(struct mbuf **mp, int *offp, int proto) +tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port) { struct mbuf *m = *mp; struct tcphdr *th = NULL; @@ -659,6 +667,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) ip6 = mtod(m, struct ip6_hdr *); th = (struct tcphdr *)((caddr_t)ip6 + off0); tlen = sizeof(*ip6) + ntohs(ip6->ip6_plen) - off0; + if (port) + goto skip6_csum; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID_IPV6) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; @@ -672,7 +682,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) TCPSTAT_INC(tcps_rcvbadsum); goto drop; } - + skip6_csum: /* * Be proactive about unspecified IPv6 address in source. * As we use all-zero to indicate unbounded/unconnected pcb, @@ -713,6 +723,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) tlen = ntohs(ip->ip_len) - off0; iptos = ip->ip_tos; + if (port) + goto skip_csum; if (m->m_pkthdr.csum_flags & CSUM_DATA_VALID) { if (m->m_pkthdr.csum_flags & CSUM_PSEUDO_HDR) th->th_sum = m->m_pkthdr.csum_data; @@ -742,8 +754,8 @@ tcp_input(struct mbuf **mp, int *offp, int proto) ip->ip_v = IPVERSION; ip->ip_hl = off0 >> 2; } - - if (th->th_sum) { + skip_csum: + if (th->th_sum && (port == 0)) { TCPSTAT_INC(tcps_rcvbadsum); goto drop; } @@ -1004,6 +1016,11 @@ tcp_input(struct mbuf **mp, int *offp, int proto) goto dropwithreset; } + if ((tp->t_port != port) && (tp->t_state > TCPS_LISTEN)) { + rstreason = BANDLIM_RST_CLOSEDPORT; + goto dropwithreset; + } + #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) { tcp_offload_input(tp, m); @@ -1074,7 +1091,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) * NB: syncache_expand() doesn't unlock * inp and tcpinfo locks. */ - rstreason = syncache_expand(&inc, &to, th, &so, m); + rstreason = syncache_expand(&inc, &to, th, &so, m, port); if (rstreason < 0) { /* * A failing TCP MD5 signature comparison @@ -1156,7 +1173,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) * causes. */ if (thflags & TH_RST) { - syncache_chkrst(&inc, th, m); + syncache_chkrst(&inc, th, m, port); goto dropunlock; } /* @@ -1178,7 +1195,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) log(LOG_DEBUG, "%s; %s: Listen socket: " "SYN|ACK invalid, segment rejected\n", s, __func__); - syncache_badack(&inc); /* XXX: Not needed! */ + syncache_badack(&inc, port); /* XXX: Not needed! */ TCPSTAT_INC(tcps_badsyn); rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; @@ -1337,7 +1354,7 @@ tcp_input(struct mbuf **mp, int *offp, int proto) TCP_PROBE3(debug__input, tp, th, m); tcp_dooptions(&to, optp, optlen, TO_SYN); if ((so = syncache_add(&inc, &to, th, inp, so, m, NULL, NULL, - iptos)) != NULL) + iptos, port)) != NULL) goto tfo_socket_result; /* @@ -1468,6 +1485,12 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, struct socket *so, return (newsize); } +int +tcp_input(struct mbuf **mp, int *offp, int proto) +{ + return(tcp_input_with_port(mp, offp, proto, 0)); +} + void tcp_handle_wakeup(struct tcpcb *tp, struct socket *so) { @@ -3672,11 +3695,13 @@ tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, sizeof (struct ip6_hdr) + sizeof (struct tcphdr) : sizeof (struct tcpiphdr); #else - const size_t min_protoh = sizeof(struct tcpiphdr); + size_t min_protoh = sizeof(struct tcpiphdr); #endif INP_WLOCK_ASSERT(tp->t_inpcb); + if (tp->t_port) + min_protoh += V_tcp_udp_tunneling_overhead; if (mtuoffer != -1) { KASSERT(offer == -1, ("%s: conflict", __func__)); offer = mtuoffer - min_protoh; diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index e23cdc749e98..5bda2be14df0 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -101,6 +101,8 @@ __FBSDID("$FreeBSD$"); #include +#include +#include #include #include @@ -207,7 +209,7 @@ tcp_output(struct tcpcb *tp) #endif struct tcphdr *th; u_char opt[TCP_MAXOLEN]; - unsigned ipoptlen, optlen, hdrlen; + unsigned ipoptlen, optlen, hdrlen, ulen; #if defined(IPSEC) || defined(IPSEC_SUPPORT) unsigned ipsec_optlen = 0; #endif @@ -216,6 +218,7 @@ tcp_output(struct tcpcb *tp) struct sackhole *p; int tso, mtu; struct tcpopt to; + struct udphdr *udp = NULL; unsigned int wanted_cookie = 0; unsigned int dont_sendalot = 0; #if 0 @@ -558,6 +561,7 @@ tcp_output(struct tcpcb *tp) #endif if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && + (tp->t_port == 0) && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && ipoptlen == 0 && !(flags & TH_SYN)) @@ -800,6 +804,8 @@ tcp_output(struct tcpcb *tp) /* Maximum segment size. */ if (flags & TH_SYN) { to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc); + if (tp->t_port) + to.to_mss -= V_tcp_udp_tunneling_overhead; to.to_flags |= TOF_MSS; /* @@ -887,7 +893,14 @@ tcp_output(struct tcpcb *tp) !(to.to_flags & TOF_FASTOPEN)) len = 0; } - + if (tp->t_port) { + if (V_tcp_udp_tunneling_port == 0) { + /* The port was removed?? */ + SOCKBUF_UNLOCK(&so->so_snd); + return (EHOSTUNREACH); + } + hdrlen += sizeof(struct udphdr); + } /* * Adjust data length if insertion of options will * bump the packet length beyond the t_maxseg length. @@ -1140,8 +1153,17 @@ tcp_output(struct tcpcb *tp) #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); - th = (struct tcphdr *)(ip6 + 1); - tcpip_fillheaders(tp->t_inpcb, ip6, th); + if (tp->t_port) { + udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr)); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = tp->t_port; + ulen = hdrlen + len - sizeof(struct ip6_hdr); + udp->uh_ulen = htons(ulen); + th = (struct tcphdr *)(udp + 1); + } else { + th = (struct tcphdr *)(ip6 + 1); + } + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip6, th); } else #endif /* INET6 */ { @@ -1149,8 +1171,16 @@ tcp_output(struct tcpcb *tp) #ifdef TCPDEBUG ipov = (struct ipovly *)ip; #endif - th = (struct tcphdr *)(ip + 1); - tcpip_fillheaders(tp->t_inpcb, ip, th); + if (tp->t_port) { + udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip)); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = tp->t_port; + ulen = hdrlen + len - sizeof(struct ip); + udp->uh_ulen = htons(ulen); + th = (struct tcphdr *)(udp + 1); + } else + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(tp->t_inpcb, tp->t_port, ip, th); } /* @@ -1309,7 +1339,6 @@ tcp_output(struct tcpcb *tp) * checksum extended header and data. */ m->m_pkthdr.len = hdrlen + len; /* in6_cksum() need this */ - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (to.to_flags & TOF_SIGNATURE) { @@ -1336,9 +1365,19 @@ tcp_output(struct tcpcb *tp) * There is no need to fill in ip6_plen right now. * It will be filled later by ip6_output. */ - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; - th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + - optlen + len, IPPROTO_TCP, 0); + if (tp->t_port) { + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0); + th->th_sum = htons(0); + UDPSTAT_INC(udps_opackets); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in6_cksum_pseudo(ip6, + sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, + 0); + } } #endif #if defined(INET6) && defined(INET) @@ -1346,9 +1385,20 @@ tcp_output(struct tcpcb *tp) #endif #ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); + if (tp->t_port) { + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); + th->th_sum = htons(0); + UDPSTAT_INC(udps_opackets); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + + IPPROTO_TCP + len + optlen)); + } /* IP version must be set here for ipv4/ipv6 checking later */ KASSERT(ip->ip_v == IPVERSION, @@ -1473,8 +1523,10 @@ tcp_output(struct tcpcb *tp) * NB: Don't set DF on small MTU/MSS to have a safe fallback. */ if (V_path_mtu_discovery && tp->t_maxseg > V_tcp_minmss) { - ip->ip_off |= htons(IP_DF); tp->t_flags2 |= TF2_PLPMTU_PMTUD; + if (tp->t_port == 0 || len < V_tcp_minmss) { + ip->ip_off |= htons(IP_DF); + } } else { tp->t_flags2 &= ~TF2_PLPMTU_PMTUD; } diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 673dee911c87..febac7ad424c 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -11969,14 +11969,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) #endif struct tcp_bbr *bbr; struct tcphdr *th; -#ifdef NETFLIX_TCPOUDP struct udphdr *udp = NULL; -#endif u_char opt[TCP_MAXOLEN]; unsigned ipoptlen, optlen, hdrlen; -#ifdef NETFLIX_TCPOUDP unsigned ulen; -#endif uint32_t bbr_seq; uint32_t delay_calc=0; uint8_t doing_tlp = 0; @@ -12991,10 +12987,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) /* Maximum segment size. */ if (flags & TH_SYN) { to.to_mss = tcp_mssopt(&inp->inp_inc); -#ifdef NETFLIX_TCPOUDP if (tp->t_port) to.to_mss -= V_tcp_udp_tunneling_overhead; -#endif to.to_flags |= TOF_MSS; /* * On SYN or SYN|ACK transmits on TFO connections, @@ -13063,7 +13057,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) !(to.to_flags & TOF_FASTOPEN)) len = 0; } -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { if (V_tcp_udp_tunneling_port == 0) { /* The port was removed?? */ @@ -13072,7 +13065,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) } hdrlen += sizeof(struct udphdr); } -#endif #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); @@ -13408,7 +13400,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); @@ -13417,17 +13408,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); } else { -#endif th = (struct tcphdr *)(ip6 + 1); - -#ifdef NETFLIX_TCPOUDP } -#endif - tcpip_fillheaders(inp, -#ifdef NETFLIX_TCPOUDP - tp->t_port, -#endif - ip6, th); + tcpip_fillheaders(inp, tp->t_port, ip6, th); } else #endif /* INET6 */ { @@ -13435,7 +13418,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) #ifdef TCPDEBUG ipov = (struct ipovly *)ip; #endif -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); @@ -13443,14 +13425,10 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) ulen = hdrlen + len - sizeof(struct ip); udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); - } else -#endif + } else { th = (struct tcphdr *)(ip + 1); - tcpip_fillheaders(inp, -#ifdef NETFLIX_TCPOUDP - tp->t_port, -#endif - ip, th); + } + tcpip_fillheaders(inp, tp->t_port, ip, th); } /* * If we are doing retransmissions, then snd_nxt will not reflect @@ -13600,7 +13578,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) * ip6_plen is not need to be filled now, and will be filled * in ip6_output. */ -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); @@ -13608,14 +13585,11 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) th->th_sum = htons(0); UDPSTAT_INC(udps_opackets); } else { -#endif csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in6_cksum_pseudo(ip6, sizeof(struct tcphdr) + optlen + len, IPPROTO_TCP, 0); -#ifdef NETFLIX_TCPOUDP } -#endif } #endif #if defined(INET6) && defined(INET) @@ -13623,7 +13597,6 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) #endif #ifdef INET { -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { m->m_pkthdr.csum_flags = CSUM_UDP; m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); @@ -13632,15 +13605,12 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) th->th_sum = htons(0); UDPSTAT_INC(udps_opackets); } else { -#endif csum_flags = m->m_pkthdr.csum_flags = CSUM_TCP; m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, htons(sizeof(struct tcphdr) + IPPROTO_TCP + len + optlen)); -#ifdef NETFLIX_TCPOUDP } -#endif /* IP version must be set here for ipv4/ipv6 checking later */ KASSERT(ip->ip_v == IPVERSION, ("%s: IP version incorrect: %d", __func__, ip->ip_v)); diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c index 0079bf8b6400..d2093e1afab7 100644 --- a/sys/netinet/tcp_stacks/rack.c +++ b/sys/netinet/tcp_stacks/rack.c @@ -13008,10 +13008,8 @@ rack_output(struct tcpcb *tp) if (flags & TH_SYN) { tp->snd_nxt = tp->iss; to.to_mss = tcp_mssopt(&inp->inp_inc); -#ifdef NETFLIX_TCPOUDP if (tp->t_port) to.to_mss -= V_tcp_udp_tunneling_overhead; -#endif to.to_flags |= TOF_MSS; /* @@ -13088,7 +13086,6 @@ rack_output(struct tcpcb *tp) !(to.to_flags & TOF_FASTOPEN)) len = 0; } -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { if (V_tcp_udp_tunneling_port == 0) { /* The port was removed?? */ @@ -13097,7 +13094,6 @@ rack_output(struct tcpcb *tp) } hdrlen += sizeof(struct udphdr); } -#endif #ifdef INET6 if (isipv6) ipoptlen = ip6_optlen(tp->t_inpcb); @@ -13372,7 +13368,6 @@ rack_output(struct tcpcb *tp) #ifdef INET6 if (isipv6) { ip6 = mtod(m, struct ip6_hdr *); -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip6 + ipoptlen + sizeof(struct ip6_hdr)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); @@ -13380,14 +13375,10 @@ rack_output(struct tcpcb *tp) ulen = hdrlen + len - sizeof(struct ip6_hdr); udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); - } else -#endif + } else { th = (struct tcphdr *)(ip6 + 1); - tcpip_fillheaders(inp, -#ifdef NETFLIX_TCPOUDP - tp->t_port, -#endif - ip6, th); + } + tcpip_fillheaders(inp, tp->t_port, ip6, th); } else #endif /* INET6 */ { @@ -13395,7 +13386,6 @@ rack_output(struct tcpcb *tp) #ifdef TCPDEBUG ipov = (struct ipovly *)ip; #endif -#ifdef NETFLIX_TCPOUDP if (tp->t_port) { udp = (struct udphdr *)((caddr_t)ip + ipoptlen + sizeof(struct ip)); udp->uh_sport = htons(V_tcp_udp_tunneling_port); @@ -13403,14 +13393,10 @@ rack_output(struct tcpcb *tp) ulen = hdrlen + len - sizeof(struct ip); udp->uh_ulen = htons(ulen); th = (struct tcphdr *)(udp + 1); - } else -#endif + } else { th = (struct tcphdr *)(ip + 1); - tcpip_fillheaders(inp, -#ifdef NETFLIX_TCPOUDP - tp->t_port, -#endif - ip, th); + } + tcpip_fillheaders(inp, tp->t_port, ip, th); } /* * Fill in fields, remembering maximum advertised window for use in diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index e973555efbcb..1ebc7357def3 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -126,6 +126,8 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include #endif +#include +#include #include @@ -501,6 +503,80 @@ tcp_switch_back_to_default(struct tcpcb *tp) } } +static void +tcp_recv_udp_tunneled_packet(struct mbuf *m, int off, struct inpcb *inp, + const struct sockaddr *sa, void *ctx) +{ + struct ip *iph; +#ifdef INET6 + struct ip6_hdr *ip6; +#endif + struct udphdr *uh; + struct tcphdr *th; + int thlen; + uint16_t port; + + TCPSTAT_INC(tcps_tunneled_pkts); + if ((m->m_flags & M_PKTHDR) == 0) { + /* Can't handle one that is not a pkt hdr */ + TCPSTAT_INC(tcps_tunneled_errs); + goto out; + } + thlen = sizeof(struct tcphdr); + if (m->m_len < off + sizeof(struct udphdr) + thlen && + (m = m_pullup(m, off + sizeof(struct udphdr) + thlen)) == NULL) { + TCPSTAT_INC(tcps_tunneled_errs); + goto out; + } + iph = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)iph + off); + th = (struct tcphdr *)(uh + 1); + thlen = th->th_off << 2; + if (m->m_len < off + sizeof(struct udphdr) + thlen) { + m = m_pullup(m, off + sizeof(struct udphdr) + thlen); + if (m == NULL) { + TCPSTAT_INC(tcps_tunneled_errs); + goto out; + } else { + iph = mtod(m, struct ip *); + uh = (struct udphdr *)((caddr_t)iph + off); + th = (struct tcphdr *)(uh + 1); + } + } + m->m_pkthdr.tcp_tun_port = port = uh->uh_sport; + bcopy(th, uh, m->m_len - off); + m->m_len -= sizeof(struct udphdr); + m->m_pkthdr.len -= sizeof(struct udphdr); + /* + * We use the same algorithm for + * both UDP and TCP for c-sum. So + * the code in tcp_input will skip + * the checksum. So we do nothing + * with the flag (m->m_pkthdr.csum_flags). + */ + switch (iph->ip_v) { +#ifdef INET + case IPVERSION: + iph->ip_len = htons(ntohs(iph->ip_len) - sizeof(struct udphdr)); + tcp_input_with_port(&m, &off, IPPROTO_TCP, port); + break; +#endif +#ifdef INET6 + case IPV6_VERSION >> 4: + ip6 = mtod(m, struct ip6_hdr *); + ip6->ip6_plen = htons(ntohs(ip6->ip6_plen) - sizeof(struct udphdr)); + tcp6_input_with_port(&m, &off, IPPROTO_TCP, port); + break; +#endif + default: + goto out; + break; + } + return; +out: + m_freem(m); +} + static int sysctl_net_inet_default_tcp_functions(SYSCTL_HANDLER_ARGS) { @@ -598,6 +674,183 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, functions_available, NULL, 0, sysctl_net_inet_list_available, "A", "list available TCP Function sets"); +VNET_DEFINE(int, tcp_udp_tunneling_port) = TCP_TUNNELING_PORT_DEFAULT; + +#ifdef INET +VNET_DEFINE(struct socket *, udp4_tun_socket) = NULL; +#define V_udp4_tun_socket VNET(udp4_tun_socket) +#endif +#ifdef INET6 +VNET_DEFINE(struct socket *, udp6_tun_socket) = NULL; +#define V_udp6_tun_socket VNET(udp6_tun_socket) +#endif + +static void +tcp_over_udp_stop(void) +{ + /* + * This function assumes sysctl caller holds inp_rinfo_lock() + * for writting! + */ +#ifdef INET + if (V_udp4_tun_socket != NULL) { + soclose(V_udp4_tun_socket); + V_udp4_tun_socket = NULL; + } +#endif +#ifdef INET6 + if (V_udp6_tun_socket != NULL) { + soclose(V_udp6_tun_socket); + V_udp6_tun_socket = NULL; + } +#endif +} + +static int +tcp_over_udp_start(void) +{ + uint16_t port; + int ret; +#ifdef INET + struct sockaddr_in sin; +#endif +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif + /* + * This function assumes sysctl caller holds inp_info_rlock() + * for writting! + */ + port = V_tcp_udp_tunneling_port; + if (ntohs(port) == 0) { + /* Must have a port set */ + return (EINVAL); + } +#ifdef INET + if (V_udp4_tun_socket != NULL) { + /* Already running -- must stop first */ + return (EALREADY); + } +#endif +#ifdef INET6 + if (V_udp6_tun_socket != NULL) { + /* Already running -- must stop first */ + return (EALREADY); + } +#endif +#ifdef INET + if ((ret = socreate(PF_INET, &V_udp4_tun_socket, + SOCK_DGRAM, IPPROTO_UDP, + curthread->td_ucred, curthread))) { + tcp_over_udp_stop(); + return (ret); + } + /* Call the special UDP hook. */ + if ((ret = udp_set_kernel_tunneling(V_udp4_tun_socket, + tcp_recv_udp_tunneled_packet, + tcp_ctlinput_viaudp, + NULL))) { + tcp_over_udp_stop(); + return (ret); + } + /* Ok, we have a socket, bind it to the port. */ + memset(&sin, 0, sizeof(struct sockaddr_in)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_port = htons(port); + if ((ret = sobind(V_udp4_tun_socket, + (struct sockaddr *)&sin, curthread))) { + tcp_over_udp_stop(); + return (ret); + } +#endif +#ifdef INET6 + if ((ret = socreate(PF_INET6, &V_udp6_tun_socket, + SOCK_DGRAM, IPPROTO_UDP, + curthread->td_ucred, curthread))) { + tcp_over_udp_stop(); + return (ret); + } + /* Call the special UDP hook. */ + if ((ret = udp_set_kernel_tunneling(V_udp6_tun_socket, + tcp_recv_udp_tunneled_packet, + tcp6_ctlinput_viaudp, + NULL))) { + tcp_over_udp_stop(); + return (ret); + } + /* Ok, we have a socket, bind it to the port. */ + memset(&sin6, 0, sizeof(struct sockaddr_in6)); + sin6.sin6_len = sizeof(struct sockaddr_in6); + sin6.sin6_family = AF_INET6; + sin6.sin6_port = htons(port); + if ((ret = sobind(V_udp6_tun_socket, + (struct sockaddr *)&sin6, curthread))) { + tcp_over_udp_stop(); + return (ret); + } +#endif + return (0); +} + +static int +sysctl_net_inet_tcp_udp_tunneling_port_check(SYSCTL_HANDLER_ARGS) +{ + int error; + uint32_t old, new; + + old = V_tcp_udp_tunneling_port; + new = old; + error = sysctl_handle_int(oidp, &new, 0, req); + if ((error == 0) && + (req->newptr != NULL)) { + if ((new < TCP_TUNNELING_PORT_MIN) || + (new > TCP_TUNNELING_PORT_MAX)) { + error = EINVAL; + } else { + V_tcp_udp_tunneling_port = new; + if (old != 0) { + tcp_over_udp_stop(); + } + if (new != 0) { + error = tcp_over_udp_start(); + } + } + } + return (error); +} + +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_port, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + &VNET_NAME(tcp_udp_tunneling_port), + 0, &sysctl_net_inet_tcp_udp_tunneling_port_check, "IU", + "Tunneling port for tcp over udp"); + +VNET_DEFINE(int, tcp_udp_tunneling_overhead) = TCP_TUNNELING_OVERHEAD_DEFAULT; + +static int +sysctl_net_inet_tcp_udp_tunneling_overhead_check(SYSCTL_HANDLER_ARGS) +{ + int error, new; + + new = V_tcp_udp_tunneling_overhead; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if ((new < TCP_TUNNELING_OVERHEAD_MIN) || + (new > TCP_TUNNELING_OVERHEAD_MAX)) + error = EINVAL; + else + V_tcp_udp_tunneling_overhead = new; + } + return (error); +} + +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, udp_tunneling_overhead, + CTLFLAG_VNET | CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + &VNET_NAME(tcp_udp_tunneling_overhead), + 0, &sysctl_net_inet_tcp_udp_tunneling_overhead_check, "IU", + "MSS reduction when using tcp over udp"); + /* * Exports one (struct tcp_function_info) for each alias/name. */ @@ -1314,7 +1567,7 @@ tcp_fini(void *xtp) * of the tcpcb each time to conserve mbufs. */ void -tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) +tcpip_fillheaders(struct inpcb *inp, uint16_t port, void *ip_ptr, void *tcp_ptr) { struct tcphdr *th = (struct tcphdr *)tcp_ptr; @@ -1329,7 +1582,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) (inp->inp_flow & IPV6_FLOWINFO_MASK); ip6->ip6_vfc = (ip6->ip6_vfc & ~IPV6_VERSION_MASK) | (IPV6_VERSION & IPV6_VERSION_MASK); - ip6->ip6_nxt = IPPROTO_TCP; + if (port == 0) + ip6->ip6_nxt = IPPROTO_TCP; + else + ip6->ip6_nxt = IPPROTO_UDP; ip6->ip6_plen = htons(sizeof(struct tcphdr)); ip6->ip6_src = inp->in6p_laddr; ip6->ip6_dst = inp->in6p_faddr; @@ -1351,7 +1607,10 @@ tcpip_fillheaders(struct inpcb *inp, void *ip_ptr, void *tcp_ptr) ip->ip_off = 0; ip->ip_ttl = inp->inp_ip_ttl; ip->ip_sum = 0; - ip->ip_p = IPPROTO_TCP; + if (port == 0) + ip->ip_p = IPPROTO_TCP; + else + ip->ip_p = IPPROTO_UDP; ip->ip_src = inp->inp_laddr; ip->ip_dst = inp->inp_faddr; } @@ -1381,7 +1640,7 @@ tcpip_maketemplate(struct inpcb *inp) t = malloc(sizeof(*t), M_TEMP, M_NOWAIT); if (t == NULL) return (NULL); - tcpip_fillheaders(inp, (void *)&t->tt_ipgen, (void *)&t->tt_t); + tcpip_fillheaders(inp, 0, (void *)&t->tt_ipgen, (void *)&t->tt_t); return (t); } @@ -1407,14 +1666,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, struct inpcb *inp; struct ip *ip; struct mbuf *optm; + struct udphdr *uh = NULL; struct tcphdr *nth; u_char *optp; #ifdef INET6 struct ip6_hdr *ip6; int isipv6; #endif /* INET6 */ - int optlen, tlen, win; + int optlen, tlen, win, ulen; bool incl_opts; + uint16_t port; KASSERT(tp != NULL || m != NULL, ("tcp_respond: tp and m both NULL")); NET_EPOCH_ASSERT(); @@ -1432,6 +1693,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, } else inp = NULL; + if (m != NULL) { +#ifdef INET6 + if (isipv6 && ip6 && (ip6->ip6_nxt == IPPROTO_UDP)) + port = m->m_pkthdr.tcp_tun_port; + else +#endif + if (ip && (ip->ip_p == IPPROTO_UDP)) + port = m->m_pkthdr.tcp_tun_port; + else + port = 0; + } else + port = tp->t_port; + incl_opts = false; win = 0; if (tp != NULL) { @@ -1454,16 +1728,30 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, sizeof(struct ip6_hdr)); ip6 = mtod(m, struct ip6_hdr *); nth = (struct tcphdr *)(ip6 + 1); + if (port) { + /* Insert a UDP header */ + uh = (struct udphdr *)nth; + uh->uh_sport = htons(V_tcp_udp_tunneling_port); + uh->uh_dport = port; + nth = (struct tcphdr *)(uh + 1); + } } else #endif /* INET6 */ { bcopy((caddr_t)ip, mtod(m, caddr_t), sizeof(struct ip)); ip = mtod(m, struct ip *); nth = (struct tcphdr *)(ip + 1); + if (port) { + /* Insert a UDP header */ + uh = (struct udphdr *)nth; + uh->uh_sport = htons(V_tcp_udp_tunneling_port); + uh->uh_dport = port; + nth = (struct tcphdr *)(uh + 1); + } } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); flags = TH_ACK; - } else if (!M_WRITABLE(m)) { + } else if ((!M_WRITABLE(m)) || (port != 0)) { struct mbuf *n; /* Can't reuse 'm', allocate a new mbuf. */ @@ -1489,6 +1777,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, ip6 = mtod(n, struct ip6_hdr *); xchg(ip6->ip6_dst, ip6->ip6_src, struct in6_addr); nth = (struct tcphdr *)(ip6 + 1); + if (port) { + /* Insert a UDP header */ + uh = (struct udphdr *)nth; + uh->uh_sport = htons(V_tcp_udp_tunneling_port); + uh->uh_dport = port; + nth = (struct tcphdr *)(uh + 1); + } } else #endif /* INET6 */ { @@ -1496,6 +1791,13 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, ip = mtod(n, struct ip *); xchg(ip->ip_dst.s_addr, ip->ip_src.s_addr, uint32_t); nth = (struct tcphdr *)(ip + 1); + if (port) { + /* Insert a UDP header */ + uh = (struct udphdr *)nth; + uh->uh_sport = htons(V_tcp_udp_tunneling_port); + uh->uh_dport = port; + nth = (struct tcphdr *)(uh + 1); + } } bcopy((caddr_t)th, (caddr_t)nth, sizeof(struct tcphdr)); xchg(nth->th_dport, nth->th_sport, uint16_t); @@ -1544,6 +1846,8 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, #ifdef INET tlen = sizeof (struct tcpiphdr); #endif + if (port) + tlen += sizeof (struct udphdr); #ifdef INVARIANTS m->m_len = 0; KASSERT(M_TRAILINGSPACE(m) >= tlen, @@ -1587,9 +1891,16 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, optlen = 0; #ifdef INET6 if (isipv6) { + if (uh) { + ulen = tlen - sizeof(struct ip6_hdr); + uh->uh_ulen = htons(ulen); + } ip6->ip6_flow = 0; ip6->ip6_vfc = IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_TCP; + if (port) + ip6->ip6_nxt = IPPROTO_UDP; + else + ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_plen = htons(tlen - sizeof(*ip6)); } #endif @@ -1598,8 +1909,17 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, #endif #ifdef INET { + if (uh) { + ulen = tlen - sizeof(struct ip); + uh->uh_ulen = htons(ulen); + } ip->ip_len = htons(tlen); ip->ip_ttl = V_ip_defttl; + if (port) { + ip->ip_p = IPPROTO_UDP; + } else { + ip->ip_p = IPPROTO_TCP; + } if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); } @@ -1643,12 +1963,19 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, } #endif - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; - nth->th_sum = in6_cksum_pseudo(ip6, - tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); + if (port) { + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + uh->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0); + nth->th_sum = 0; + } else { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + nth->th_sum = in6_cksum_pseudo(ip6, + tlen - sizeof(struct ip6_hdr), IPPROTO_TCP, 0); + } ip6->ip6_hlim = in6_selecthlim(tp != NULL ? tp->t_inpcb : NULL, NULL); } @@ -1658,9 +1985,18 @@ tcp_respond(struct tcpcb *tp, void *ipgen, struct tcphdr *th, struct mbuf *m, #endif #ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); + if (port) { + uh->uh_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(ulen + IPPROTO_UDP)); + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + nth->th_sum = 0; + } else { + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + nth->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons((u_short)(tlen - sizeof(struct ip) + ip->ip_p))); + } } #endif /* INET */ #ifdef TCPDEBUG @@ -2460,8 +2796,8 @@ SYSCTL_PROC(_net_inet6_tcp6, OID_AUTO, getcred, #endif /* INET6 */ #ifdef INET -void -tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) +static void +tcp_ctlinput_with_port(int cmd, struct sockaddr *sa, void *vip, uint16_t port) { struct ip *ip = vip; struct tcphdr *th; @@ -2515,6 +2851,9 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); + if (tp->t_port != port) { + goto out; + } if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) && SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { @@ -2561,17 +2900,61 @@ tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) inc.inc_lport = th->th_sport; inc.inc_faddr = faddr; inc.inc_laddr = ip->ip_src; - syncache_unreach(&inc, icmp_tcp_seq); + syncache_unreach(&inc, icmp_tcp_seq, port); } out: if (inp != NULL) INP_WUNLOCK(inp); } + +void +tcp_ctlinput(int cmd, struct sockaddr *sa, void *vip) +{ + tcp_ctlinput_with_port(cmd, sa, vip, htons(0)); +} + +void +tcp_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *vip, void *unused) +{ + /* Its a tunneled TCP over UDP icmp */ + struct ip *outer_ip, *inner_ip; + struct icmp *icmp; + struct udphdr *udp; + struct tcphdr *th, ttemp; + int i_hlen, o_len; + uint16_t port; + + inner_ip = (struct ip *)vip; + icmp = (struct icmp *)((caddr_t)inner_ip - + (sizeof(struct icmp) - sizeof(struct ip))); + outer_ip = (struct ip *)((caddr_t)icmp - sizeof(struct ip)); + i_hlen = inner_ip->ip_hl << 2; + o_len = ntohs(outer_ip->ip_len); + if (o_len < + (sizeof(struct ip) + 8 + i_hlen + sizeof(struct udphdr) + offsetof(struct tcphdr, th_ack))) { + /* Not enough data present */ + return; + } + /* Ok lets strip out the inner udphdr header by copying up on top of it the tcp hdr */ + udp = (struct udphdr *)(((caddr_t)inner_ip) + i_hlen); + if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) { + return; + } + port = udp->uh_dport; + th = (struct tcphdr *)(udp + 1); + memcpy(&ttemp, th, sizeof(struct tcphdr)); + memcpy(udp, &ttemp, sizeof(struct tcphdr)); + /* Now adjust down the size of the outer IP header */ + o_len -= sizeof(struct udphdr); + outer_ip->ip_len = htons(o_len); + /* Now call in to the normal handling code */ + tcp_ctlinput_with_port(cmd, sa, vip, port); +} #endif /* INET */ #ifdef INET6 -void -tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) +static void +tcp6_ctlinput_with_port(int cmd, struct sockaddr *sa, void *d, uint16_t port) { struct in6_addr *dst; struct inpcb *(*notify)(struct inpcb *, int) = tcp_notify; @@ -2661,6 +3044,9 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) !(inp->inp_flags & INP_DROPPED) && !(inp->inp_socket == NULL)) { tp = intotcpcb(inp); + if (tp->t_port != port) { + goto out; + } if (SEQ_GEQ(ntohl(icmp_tcp_seq), tp->snd_una) && SEQ_LT(ntohl(icmp_tcp_seq), tp->snd_max)) { if (cmd == PRC_MSGSIZE) { @@ -2710,12 +3096,45 @@ tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) inc.inc_lport = t_ports.th_sport; inc.inc6_faddr = *dst; inc.inc6_laddr = ip6->ip6_src; - syncache_unreach(&inc, icmp_tcp_seq); + syncache_unreach(&inc, icmp_tcp_seq, port); } out: if (inp != NULL) INP_WUNLOCK(inp); } + +void +tcp6_ctlinput(int cmd, struct sockaddr *sa, void *d) +{ + tcp6_ctlinput_with_port(cmd, sa, d, htons(0)); +} + +void +tcp6_ctlinput_viaudp(int cmd, struct sockaddr *sa, void *d, void *unused) +{ + struct ip6ctlparam *ip6cp; + struct mbuf *m; + struct udphdr *udp; + uint16_t port; + + ip6cp = (struct ip6ctlparam *)d; + m = m_pulldown(ip6cp->ip6c_m, ip6cp->ip6c_off, sizeof(struct udphdr), NULL); + if (m == NULL) { + return; + } + udp = mtod(m, struct udphdr *); + if (ntohs(udp->uh_sport) != V_tcp_udp_tunneling_port) { + return; + } + port = udp->uh_dport; + m_adj(m, sizeof(struct udphdr)); + if ((m->m_flags & M_PKTHDR) == 0) { + ip6cp->ip6c_m->m_pkthdr.len -= sizeof(struct udphdr); + } + /* Now call in to the normal handling code */ + tcp6_ctlinput_with_port(cmd, sa, d, port); +} + #endif /* INET6 */ static uint32_t @@ -3448,11 +3867,13 @@ void tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt) { struct tcpcb *tp = intotcpcb(inp); + struct tcptw *tw = intotw(inp); sbintime_t now; bzero(xt, sizeof(*xt)); if (inp->inp_flags & INP_TIMEWAIT) { xt->t_state = TCPS_TIME_WAIT; + xt->xt_encaps_port = tw->t_port; } else { xt->t_state = tp->t_state; xt->t_logstate = tp->t_logstate; @@ -3484,6 +3905,7 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *xt) #undef COPYTIMER xt->t_rcvtime = 1000 * (ticks - tp->t_rcvtime) / hz; + xt->xt_encaps_port = tp->t_port; bcopy(tp->t_fb->tfb_tcp_block_name, xt->xt_stack, TCP_FUNCTION_NAME_LEN_MAX); bcopy(CC_ALGO(tp)->name, xt->xt_cc, diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 4cd8411af8d5..35d9c091ab96 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -96,6 +96,8 @@ __FBSDID("$FreeBSD$"); #ifdef TCP_OFFLOAD #include #endif +#include +#include #include @@ -143,14 +145,14 @@ static tcp_seq syncookie_generate(struct syncache_head *, struct syncache *); static struct syncache *syncookie_lookup(struct in_conninfo *, struct syncache_head *, struct syncache *, struct tcphdr *, struct tcpopt *, - struct socket *); + struct socket *, uint16_t); static void syncache_pause(struct in_conninfo *); static void syncache_unpause(void *); static void syncookie_reseed(void *); #ifdef INVARIANTS static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso); + struct socket *lso, uint16_t port); #endif /* @@ -610,7 +612,8 @@ syncache_lookup(struct in_conninfo *inc, struct syncache_head **schp) * If required send a challenge ACK. */ void -syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) +syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m, + uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -650,6 +653,16 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) goto done; } + /* The remote UDP encaps port does not match. */ + if (sc->sc_port != port) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, "%s; %s: Spurious RST with matching " + "syncache entry but non-matching UDP encaps port, " + "segment ignored\n", s, __func__); + TCPSTAT_INC(tcps_badrst); + goto done; + } + /* * If the RST bit is set, check the sequence number to see * if this is a valid reset segment. @@ -716,7 +729,7 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) } void -syncache_badack(struct in_conninfo *inc) +syncache_badack(struct in_conninfo *inc, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -725,7 +738,7 @@ syncache_badack(struct in_conninfo *inc) return; sc = syncache_lookup(inc, &sch); /* returns locked sch */ SCH_LOCK_ASSERT(sch); - if (sc != NULL) { + if ((sc != NULL) && (sc->sc_port == port)) { syncache_drop(sc, sch); TCPSTAT_INC(tcps_sc_badack); } @@ -733,7 +746,7 @@ syncache_badack(struct in_conninfo *inc) } void -syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq) +syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -745,6 +758,10 @@ syncache_unreach(struct in_conninfo *inc, tcp_seq th_seq) if (sc == NULL) goto done; + /* If the port != sc_port, then it's a bogus ICMP msg */ + if (port != sc->sc_port) + goto done; + /* If the sequence number != sc_iss, then it's a bogus ICMP msg */ if (ntohl(th_seq) != sc->sc_iss) goto done; @@ -951,6 +968,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) tcp_state_change(tp, TCPS_SYN_RECEIVED); tp->iss = sc->sc_iss; tp->irs = sc->sc_irs; + tp->t_port = sc->sc_port; tcp_rcvseqinit(tp); tcp_sendseqinit(tp); blk = sototcpcb(lso)->t_fb; @@ -1071,7 +1089,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) */ int syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, - struct socket **lsop, struct mbuf *m) + struct socket **lsop, struct mbuf *m, uint16_t port) { struct syncache *sc; struct syncache_head *sch; @@ -1099,7 +1117,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, * values with the reconstructed values from the cookie. */ if (sc != NULL) - syncookie_cmp(inc, sch, sc, th, to, *lsop); + syncookie_cmp(inc, sch, sc, th, to, *lsop, port); #endif if (sc == NULL) { @@ -1133,7 +1151,7 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, goto failed; } bzero(&scs, sizeof(scs)); - sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop); + sc = syncookie_lookup(inc, sch, &scs, th, to, *lsop, port); if (locked) SCH_UNLOCK(sch); if (sc == NULL) { @@ -1160,6 +1178,10 @@ syncache_expand(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, } #endif /* TCP_SIGNATURE */ } else { + if (sc->sc_port != port) { + SCH_UNLOCK(sch); + return (0); + } #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) /* * If listening socket requested TCP digests, check that @@ -1380,7 +1402,7 @@ syncache_tfo_expand(struct syncache *sc, struct socket *lso, struct mbuf *m, struct socket * syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, struct socket *so, struct mbuf *m, void *tod, - void *todctx, uint8_t iptos) + void *todctx, uint8_t iptos, uint16_t port) { struct tcpcb *tp; struct socket *rv = NULL; @@ -1640,6 +1662,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc->sc_label = maclabel; #endif sc->sc_cred = cred; + sc->sc_port = port; cred = NULL; sc->sc_ipopts = ipopts; bcopy(inc, &sc->sc_inc, sizeof(struct in_conninfo)); @@ -1797,8 +1820,9 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) struct ip *ip = NULL; struct mbuf *m; struct tcphdr *th = NULL; + struct udphdr *udp = NULL; int optlen, error = 0; /* Make compiler happy */ - u_int16_t hlen, tlen, mssopt; + u_int16_t hlen, tlen, mssopt, ulen; struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; @@ -1812,9 +1836,14 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) #endif sizeof(struct ip); tlen = hlen + sizeof(struct tcphdr); - + if (sc->sc_port) { + tlen += sizeof(struct udphdr); + } /* Determine MSS we advertize to other end of connection. */ - mssopt = max(tcp_mssopt(&sc->sc_inc), V_tcp_minmss); + mssopt = tcp_mssopt(&sc->sc_inc); + if (sc->sc_port) + mssopt -= V_tcp_udp_tunneling_overhead; + mssopt = max(mssopt, V_tcp_minmss); /* XXX: Assume that the entire packet will fit in a header mbuf. */ KASSERT(max_linkhdr + tlen + TCP_MAXOLEN <= MHLEN, @@ -1836,7 +1865,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) if (sc->sc_inc.inc_flags & INC_ISIPV6) { ip6 = mtod(m, struct ip6_hdr *); ip6->ip6_vfc = IPV6_VERSION; - ip6->ip6_nxt = IPPROTO_TCP; ip6->ip6_src = sc->sc_inc.inc6_laddr; ip6->ip6_dst = sc->sc_inc.inc6_faddr; ip6->ip6_plen = htons(tlen - hlen); @@ -1844,9 +1872,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) /* Zero out traffic class and flow label. */ ip6->ip6_flow &= ~IPV6_FLOWINFO_MASK; ip6->ip6_flow |= sc->sc_flowlabel; + if (sc->sc_port != 0) { + ip6->ip6_nxt = IPPROTO_UDP; + udp = (struct udphdr *)(ip6 + 1); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = sc->sc_port; + ulen = (tlen - sizeof(struct ip6_hdr)); + th = (struct tcphdr *)(udp + 1); + } else { + ip6->ip6_nxt = IPPROTO_TCP; + th = (struct tcphdr *)(ip6 + 1); + } ip6->ip6_flow |= htonl(sc->sc_ip_tos << 20); - - th = (struct tcphdr *)(ip6 + 1); } #endif #if defined(INET6) && defined(INET) @@ -1861,7 +1898,6 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) ip->ip_id = 0; ip->ip_off = 0; ip->ip_sum = 0; - ip->ip_p = IPPROTO_TCP; ip->ip_src = sc->sc_inc.inc_laddr; ip->ip_dst = sc->sc_inc.inc_faddr; ip->ip_ttl = sc->sc_ip_ttl; @@ -1876,8 +1912,17 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) */ if (V_path_mtu_discovery && ((sc->sc_flags & SCF_UNREACH) == 0)) ip->ip_off |= htons(IP_DF); - - th = (struct tcphdr *)(ip + 1); + if (sc->sc_port == 0) { + ip->ip_p = IPPROTO_TCP; + th = (struct tcphdr *)(ip + 1); + } else { + ip->ip_p = IPPROTO_UDP; + udp = (struct udphdr *)(ip + 1); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = sc->sc_port; + ulen = (tlen - sizeof(struct ip)); + th = (struct tcphdr *)(udp + 1); + } } #endif /* INET */ th->th_sport = sc->sc_inc.inc_lport; @@ -1957,8 +2002,11 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) } else optlen = 0; + if (udp) { + ulen += optlen; + udp->uh_ulen = htons(ulen); + } M_SETFIB(m, sc->sc_inc.inc_fibnum); - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); /* * If we have peer's SYN and it has a flowid, then let's assign it to * our SYN|ACK. ip6_output() and ip_output() will not assign flowid @@ -1970,9 +2018,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) } #ifdef INET6 if (sc->sc_inc.inc_flags & INC_ISIPV6) { - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; - th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, - IPPROTO_TCP, 0); + if (sc->sc_port) { + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in6_cksum_pseudo(ip6, ulen, + IPPROTO_UDP, 0); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in6_cksum_pseudo(ip6, tlen + optlen - hlen, + IPPROTO_TCP, 0); + } ip6->ip6_hlim = sc->sc_ip_ttl; #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { @@ -1992,9 +2049,18 @@ syncache_respond(struct syncache *sc, const struct mbuf *m0, int flags) #endif #ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(tlen + optlen - hlen + IPPROTO_TCP)); + if (sc->sc_port) { + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(tlen + optlen - hlen + IPPROTO_TCP)); + } #ifdef TCP_OFFLOAD if (ADDED_BY_TOE(sc)) { struct toedev *tod = sc->sc_tod; @@ -2224,7 +2290,7 @@ syncookie_generate(struct syncache_head *sch, struct syncache *sc) static struct syncache * syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso) + struct socket *lso, uint16_t port) { uint32_t hash; uint8_t *secbits; @@ -2310,6 +2376,8 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, sc->sc_rxmits = 0; + sc->sc_port = port; + TCPSTAT_INC(tcps_sc_recvcookie); return (sc); } @@ -2318,13 +2386,13 @@ syncookie_lookup(struct in_conninfo *inc, struct syncache_head *sch, static int syncookie_cmp(struct in_conninfo *inc, struct syncache_head *sch, struct syncache *sc, struct tcphdr *th, struct tcpopt *to, - struct socket *lso) + struct socket *lso, uint16_t port) { struct syncache scs, *scx; char *s; bzero(&scs, sizeof(scs)); - scx = syncookie_lookup(inc, sch, &scs, th, to, lso); + scx = syncookie_lookup(inc, sch, &scs, th, to, lso, port); if ((s = tcp_log_addrs(inc, th, NULL, NULL)) == NULL) return (0); @@ -2510,6 +2578,7 @@ syncache_pcblist(struct sysctl_req *req) xt.xt_inp.inp_vflag = INP_IPV6; else xt.xt_inp.inp_vflag = INP_IPV4; + xt.xt_encaps_port = sc->sc_port; bcopy(&sc->sc_inc, &xt.xt_inp.inp_inc, sizeof (struct in_conninfo)); error = SYSCTL_OUT(req, &xt, sizeof xt); diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index 03e34a89c112..a16a80c483d5 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -40,14 +40,15 @@ void syncache_init(void); #ifdef VIMAGE void syncache_destroy(void); #endif -void syncache_unreach(struct in_conninfo *, tcp_seq); +void syncache_unreach(struct in_conninfo *, tcp_seq, uint16_t); int syncache_expand(struct in_conninfo *, struct tcpopt *, - struct tcphdr *, struct socket **, struct mbuf *); + struct tcphdr *, struct socket **, struct mbuf *, uint16_t); struct socket * syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct inpcb *, struct socket *, struct mbuf *, - void *, void *, uint8_t); -void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *); -void syncache_badack(struct in_conninfo *); + void *, void *, uint8_t, uint16_t); +void syncache_chkrst(struct in_conninfo *, struct tcphdr *, struct mbuf *, + uint16_t); +void syncache_badack(struct in_conninfo *, uint16_t); int syncache_pcblist(struct sysctl_req *); struct syncache { @@ -55,6 +56,7 @@ struct syncache { struct in_conninfo sc_inc; /* addresses */ int sc_rxttime; /* retransmit time */ u_int16_t sc_rxmits; /* retransmit counter */ + u_int16_t sc_port; /* remote UDP encaps port */ u_int32_t sc_tsreflect; /* timestamp to reflect */ u_int32_t sc_tsoff; /* ts offset w/ syncookies */ u_int32_t sc_flowlabel; /* IPv6 flowlabel */ diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index f98927b196fc..b62386ddca05 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -93,6 +93,8 @@ __FBSDID("$FreeBSD$"); #include #endif +#include +#include #include #include @@ -318,6 +320,7 @@ tcp_twstart(struct tcpcb *tp) } tw->snd_nxt = tp->snd_nxt; + tw->t_port = tp->t_port; tw->rcv_nxt = tp->rcv_nxt; tw->iss = tp->iss; tw->irs = tp->irs; @@ -436,12 +439,32 @@ tcp_twcheck(struct inpcb *inp, struct tcpopt *to, struct tcphdr *th, * while in TIME_WAIT, drop the old connection * and start over if the sequence numbers * are above the previous ones. + * Allow UDP port number changes in this case. */ if ((thflags & TH_SYN) && SEQ_GT(th->th_seq, tw->rcv_nxt)) { tcp_twclose(tw, 0); return (1); } + /* + * Send RST if UDP port numbers don't match + */ + if (tw->t_port != m->m_pkthdr.tcp_tun_port) { + if (th->th_flags & TH_ACK) { + tcp_respond(NULL, mtod(m, void *), th, m, + (tcp_seq)0, th->th_ack, TH_RST); + } else { + if (th->th_flags & TH_SYN) + tlen++; + if (th->th_flags & TH_FIN) + tlen++; + tcp_respond(NULL, mtod(m, void *), th, m, + th->th_seq+tlen, (tcp_seq)0, TH_RST|TH_ACK); + } + INP_WUNLOCK(inp); + return (0); + } + /* * Drop the segment if it does not contain an ACK. */ @@ -555,13 +578,14 @@ tcp_twrespond(struct tcptw *tw, int flags) #ifdef INET struct ip *ip = NULL; #endif - u_int hdrlen, optlen; + u_int hdrlen, optlen, ulen; int error = 0; /* Keep compiler happy */ struct tcpopt to; #ifdef INET6 struct ip6_hdr *ip6 = NULL; int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6; #endif + struct udphdr *udp = NULL; hdrlen = 0; /* Keep compiler happy */ INP_WLOCK_ASSERT(inp); @@ -579,8 +603,16 @@ tcp_twrespond(struct tcptw *tw, int flags) if (isipv6) { hdrlen = sizeof(struct ip6_hdr) + sizeof(struct tcphdr); ip6 = mtod(m, struct ip6_hdr *); - th = (struct tcphdr *)(ip6 + 1); - tcpip_fillheaders(inp, ip6, th); + if (tw->t_port) { + udp = (struct udphdr *)(ip6 + 1); + hdrlen += sizeof(struct udphdr); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = tw->t_port; + ulen = (hdrlen - sizeof(struct ip6_hdr)); + th = (struct tcphdr *)(udp + 1); + } else + th = (struct tcphdr *)(ip6 + 1); + tcpip_fillheaders(inp, tw->t_port, ip6, th); } #endif #if defined(INET6) && defined(INET) @@ -590,8 +622,16 @@ tcp_twrespond(struct tcptw *tw, int flags) { hdrlen = sizeof(struct tcpiphdr); ip = mtod(m, struct ip *); - th = (struct tcphdr *)(ip + 1); - tcpip_fillheaders(inp, ip, th); + if (tw->t_port) { + udp = (struct udphdr *)(ip + 1); + hdrlen += sizeof(struct udphdr); + udp->uh_sport = htons(V_tcp_udp_tunneling_port); + udp->uh_dport = tw->t_port; + ulen = (hdrlen - sizeof(struct ip)); + th = (struct tcphdr *)(udp + 1); + } else + th = (struct tcphdr *)(ip + 1); + tcpip_fillheaders(inp, tw->t_port, ip, th); } #endif to.to_flags = 0; @@ -607,6 +647,10 @@ tcp_twrespond(struct tcptw *tw, int flags) } optlen = tcp_addoptions(&to, (u_char *)(th + 1)); + if (udp) { + ulen += optlen; + udp->uh_ulen = htons(ulen); + } m->m_len = hdrlen + optlen; m->m_pkthdr.len = m->m_len; @@ -618,12 +662,19 @@ tcp_twrespond(struct tcptw *tw, int flags) th->th_flags = flags; th->th_win = htons(tw->last_win); - m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); #ifdef INET6 if (isipv6) { - m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; - th->th_sum = in6_cksum_pseudo(ip6, - sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); + if (tw->t_port) { + m->m_pkthdr.csum_flags = CSUM_UDP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in6_cksum_pseudo(ip6, ulen, IPPROTO_UDP, 0); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP_IPV6; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in6_cksum_pseudo(ip6, + sizeof(struct tcphdr) + optlen, IPPROTO_TCP, 0); + } ip6->ip6_hlim = in6_selecthlim(inp, NULL); TCP_PROBE5(send, NULL, NULL, ip6, NULL, th); error = ip6_output(m, inp->in6p_outputopts, NULL, @@ -635,9 +686,18 @@ tcp_twrespond(struct tcptw *tw, int flags) #endif #ifdef INET { - m->m_pkthdr.csum_flags = CSUM_TCP; - th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, - htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); + if (tw->t_port) { + m->m_pkthdr.csum_flags = CSUM_UDP; + m->m_pkthdr.csum_data = offsetof(struct udphdr, uh_sum); + udp->uh_sum = in_pseudo(ip->ip_src.s_addr, + ip->ip_dst.s_addr, htons(ulen + IPPROTO_UDP)); + th->th_sum = htons(0); + } else { + m->m_pkthdr.csum_flags = CSUM_TCP; + m->m_pkthdr.csum_data = offsetof(struct tcphdr, th_sum); + th->th_sum = in_pseudo(ip->ip_src.s_addr, ip->ip_dst.s_addr, + htons(sizeof(struct tcphdr) + optlen + IPPROTO_TCP)); + } ip->ip_len = htons(m->m_pkthdr.len); if (V_path_mtu_discovery) ip->ip_off |= htons(IP_DF); diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index 31b580bfafcc..c4cfb5ea199f 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -2049,6 +2049,31 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp } goto unlock_and_done; + case TCP_REMOTE_UDP_ENCAPS_PORT: + INP_WUNLOCK(inp); + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + if ((optval < TCP_TUNNELING_PORT_MIN) || + (optval > TCP_TUNNELING_PORT_MAX)) { + /* Its got to be in range */ + return (EINVAL); + } + if ((V_tcp_udp_tunneling_port == 0) && (optval != 0)) { + /* You have to have enabled a UDP tunneling port first */ + return (EINVAL); + } + INP_WLOCK_RECHECK(inp); + if (tp->t_state != TCPS_CLOSED) { + /* You can't change after you are connected */ + error = EINVAL; + } else { + /* Ok we are all good set the port */ + tp->t_port = htons(optval); + } + goto unlock_and_done; + case TCP_MAXSEG: INP_WUNLOCK(inp); error = sooptcopyin(sopt, &optval, sizeof optval, @@ -2388,6 +2413,11 @@ tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct inpcb *inp INP_WUNLOCK(inp); error = sooptcopyout(sopt, &optval, sizeof optval); break; + case TCP_REMOTE_UDP_ENCAPS_PORT: + optval = ntohs(tp->t_port); + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; case TCP_NOOPT: optval = tp->t_flags & TF_NOOPT; INP_WUNLOCK(inp); diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 4d28cab80d89..dfd2f239d007 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -282,6 +282,16 @@ struct tcptemp { struct tcphdr tt_t; }; +/* Enable TCP/UDP tunneling port */ +#define TCP_TUNNELING_PORT_MIN 0 +#define TCP_TUNNELING_PORT_MAX 65535 +#define TCP_TUNNELING_PORT_DEFAULT 0 + +/* Enable TCP/UDP tunneling port */ +#define TCP_TUNNELING_OVERHEAD_MIN sizeof(struct udphdr) +#define TCP_TUNNELING_OVERHEAD_MAX 1024 +#define TCP_TUNNELING_OVERHEAD_DEFAULT TCP_TUNNELING_OVERHEAD_MIN + /* Minimum map entries limit value, if set */ #define TCP_MIN_MAP_ENTRIES_LIMIT 128 @@ -502,6 +512,8 @@ struct in_conninfo; struct tcptw { struct inpcb *tw_inpcb; /* XXX back pointer to internet pcb */ + uint32_t t_port:16, /* UDP port number if TCPoUDP */ + t_unused:16; tcp_seq snd_nxt; tcp_seq rcv_nxt; tcp_seq iss; @@ -678,7 +690,10 @@ struct tcpstat { uint64_t tcps_pmtud_blackhole_activated_min_mss; /* BH at min MSS Count */ uint64_t tcps_pmtud_blackhole_failed; /* Black Hole Failure Count */ - uint64_t _pad[12]; /* 6 UTO, 6 TBD */ + uint64_t tcps_tunneled_pkts; /* Packets encap's in UDP received */ + uint64_t tcps_tunneled_errs; /* Packets that had errors that were UDP encaped */ + + uint64_t _pad[10]; /* 6 UTO, 6 TBD */ }; #define tcps_rcvmemdrop tcps_rcvreassfull /* compat */ @@ -776,7 +791,9 @@ struct xtcpcb { uint32_t t_rcv_wnd; /* (s) */ uint32_t t_snd_wnd; /* (s) */ uint32_t xt_ecn; /* (s) */ - int32_t spare32[26]; + uint16_t xt_encaps_port; /* (s) */ + int16_t spare16; + int32_t spare32[25]; } __aligned(8); #ifdef _KERNEL @@ -867,6 +884,8 @@ VNET_DECLARE(int, tcp_sack_globalmaxholes); VNET_DECLARE(int, tcp_sack_maxholes); VNET_DECLARE(int, tcp_sc_rst_sock_fail); VNET_DECLARE(int, tcp_sendspace); +VNET_DECLARE(int, tcp_udp_tunneling_overhead); +VNET_DECLARE(int, tcp_udp_tunneling_port); VNET_DECLARE(struct inpcbhead, tcb); VNET_DECLARE(struct inpcbinfo, tcbinfo); @@ -929,6 +948,7 @@ void tcp_twstart(struct tcpcb *); void tcp_twclose(struct tcptw *, int); void tcp_ctlinput(int, struct sockaddr *, void *); int tcp_ctloutput(struct socket *, struct sockopt *); +void tcp_ctlinput_viaudp(int, struct sockaddr *, void *, void *); struct tcpcb * tcp_drop(struct tcpcb *, int); void tcp_drain(void); @@ -963,6 +983,7 @@ void hhook_run_tcp_est_in(struct tcpcb *tp, int tcp_input(struct mbuf **, int *, int); int tcp_autorcvbuf(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int); +int tcp_input_with_port(struct mbuf **, int *, int, uint16_t); void tcp_handle_wakeup(struct tcpcb *, struct socket *); void tcp_do_segment(struct mbuf *, struct tcphdr *, struct socket *, struct tcpcb *, int, int, uint8_t); @@ -1033,7 +1054,7 @@ void tcp_setpersist(struct tcpcb *); void tcp_slowtimo(void); struct tcptemp * tcpip_maketemplate(struct inpcb *); -void tcpip_fillheaders(struct inpcb *, void *, void *); +void tcpip_fillheaders(struct inpcb *, uint16_t, void *, void *); void tcp_timer_activate(struct tcpcb *, uint32_t, u_int); int tcp_timer_suspend(struct tcpcb *, uint32_t); void tcp_timers_unsuspend(struct tcpcb *, uint32_t); diff --git a/sys/netinet/toecore.c b/sys/netinet/toecore.c index 480aa64c1bf7..d8d499a6fde3 100644 --- a/sys/netinet/toecore.c +++ b/sys/netinet/toecore.c @@ -352,7 +352,7 @@ toe_syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, INP_RLOCK_ASSERT(inp); (void )syncache_add(inc, to, th, inp, inp->inp_socket, NULL, tod, - todctx, iptos); + todctx, iptos, htons(0)); } int @@ -362,7 +362,7 @@ toe_syncache_expand(struct in_conninfo *inc, struct tcpopt *to, NET_EPOCH_ASSERT(); - return (syncache_expand(inc, to, th, lsop, NULL)); + return (syncache_expand(inc, to, th, lsop, NULL, htons(0))); } /* diff --git a/sys/netinet6/tcp6_var.h b/sys/netinet6/tcp6_var.h index 7c758fbd3479..2e411963676e 100644 --- a/sys/netinet6/tcp6_var.h +++ b/sys/netinet6/tcp6_var.h @@ -74,8 +74,10 @@ VNET_DECLARE(int, tcp_v6mssdflt); /* XXX */ struct ip6_hdr; void tcp6_ctlinput(int, struct sockaddr *, void *); +void tcp6_ctlinput_viaudp(int, struct sockaddr *, void *, void *); void tcp6_init(void); int tcp6_input(struct mbuf **, int *, int); +int tcp6_input_with_port(struct mbuf **, int *, int, uint16_t); extern struct pr_usrreqs tcp6_usrreqs; diff --git a/sys/sys/mbuf.h b/sys/sys/mbuf.h index 371ae8feae46..dd92103cb1fd 100644 --- a/sys/sys/mbuf.h +++ b/sys/sys/mbuf.h @@ -198,6 +198,7 @@ struct pkthdr { } PH_loc; }; #define ether_vtag PH_per.sixteen[0] +#define tcp_tun_port PH_per.sixteen[0] /* outbound */ #define PH_vt PH_per #define vt_nrecs sixteen[0] /* mld and v6-ND */ #define tso_segsz PH_per.sixteen[1] /* inbound after LRO */ diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index 95b0d8931b26..49478c4a9247 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -664,6 +664,10 @@ tcp_stats(u_long off, const char *name, int af1 __unused, int proto __unused) "{N:(for} {:received-ack-bytes/%ju} {N:/byte%s})\n"); p(tcps_rcvdupack, "\t\t{:received-duplicate-acks/%ju} " "{N:/duplicate ack%s}\n"); + p(tcps_tunneled_pkts, "\t\t{:received-udp-tunneled-pkts/%ju} " + "{N:/UDP tunneled pkt%s}\n"); + p(tcps_tunneled_errs, "\t\t{:received-bad-udp-tunneled-pkts/%ju} " + "{N:/UDP tunneled pkt cnt with error%s}\n"); p(tcps_rcvacktoomuch, "\t\t{:received-acks-for-unsent-data/%ju} " "{N:/ack%s for unsent data}\n"); p2(tcps_rcvpack, tcps_rcvbyte, "\t\t" diff --git a/usr.bin/sockstat/sockstat.1 b/usr.bin/sockstat/sockstat.1 index 8521c50348c9..f602ad467f9f 100644 --- a/usr.bin/sockstat/sockstat.1 +++ b/usr.bin/sockstat/sockstat.1 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd December 30, 2020 +.Dd March 28, 2021 .Dt SOCKSTAT 1 .Os .Sh NAME @@ -98,7 +98,7 @@ Display the protocol state, if applicable. This is currently only implemented for SCTP and TCP. .It Fl U Display the remote UDP encapsulation port number, if applicable. -This is currently only implemented for SCTP. +This is currently only implemented for SCTP and TCP. .It Fl u Show .Dv AF_LOCAL @@ -163,7 +163,7 @@ The address the foreign end of the socket is bound to (see .It Li ENCAPS The remote UDP encapsulation port number if .Fl U -is specified (only for SCTP). +is specified (only for SCTP or TCP). .It Li PATH STATE The path state if .Fl s diff --git a/usr.bin/sockstat/sockstat.c b/usr.bin/sockstat/sockstat.c index 26f31d96b8e0..109b254b7438 100644 --- a/usr.bin/sockstat/sockstat.c +++ b/usr.bin/sockstat/sockstat.c @@ -710,6 +710,8 @@ gather_inet(int proto) sockaddr(&faddr->address, sock->family, &xip->in6p_faddr, xip->inp_fport); } + if (proto == IPPROTO_TCP) + faddr->encaps_port = xtp->xt_encaps_port; laddr->next = NULL; faddr->next = NULL; sock->laddr = laddr; @@ -1087,10 +1089,13 @@ displaysock(struct sock *s, int pos) } if (opt_U) { if (faddr != NULL && - s->proto == IPPROTO_SCTP && - s->state != SCTP_CLOSED && - s->state != SCTP_BOUND && - s->state != SCTP_LISTEN) { + ((s->proto == IPPROTO_SCTP && + s->state != SCTP_CLOSED && + s->state != SCTP_BOUND && + s->state != SCTP_LISTEN) || + (s->proto == IPPROTO_TCP && + s->state != TCPS_CLOSED && + s->state != TCPS_LISTEN))) { while (pos < offset) pos += xprintf(" "); pos += xprintf("%u",