From ca800abb78cb6010f8fa04bd0ba6cb26e7722d92 Mon Sep 17 00:00:00 2001 From: pkelsey Date: Mon, 28 Dec 2015 02:43:12 +0000 Subject: [PATCH] MFC r292706: Implementation of server-side TCP Fast Open (TFO) [RFC7413]. TFO is disabled by default in the kernel build. See the top comment in sys/netinet/tcp_fastopen.c for implementation particulars. Differential Revision: https://reviews.freebsd.org/D4350 Sponsored by: Verisign, Inc. --- sys/conf/files | 1 + sys/conf/options | 2 + sys/netinet/tcp.h | 5 + sys/netinet/tcp_fastopen.c | 442 +++++++++++++++++++++++++++++++++++++ sys/netinet/tcp_fastopen.h | 47 ++++ sys/netinet/tcp_input.c | 92 +++++++- sys/netinet/tcp_output.c | 71 +++++- sys/netinet/tcp_subr.c | 21 ++ sys/netinet/tcp_syncache.c | 137 +++++++++++- sys/netinet/tcp_syncache.h | 6 +- sys/netinet/tcp_timer.c | 3 +- sys/netinet/tcp_usrreq.c | 57 +++++ sys/netinet/tcp_var.h | 16 +- 13 files changed, 881 insertions(+), 19 deletions(-) create mode 100644 sys/netinet/tcp_fastopen.c create mode 100644 sys/netinet/tcp_fastopen.h diff --git a/sys/conf/files b/sys/conf/files index 73582b8f9f3b..18230401ac27 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -3503,6 +3503,7 @@ netinet/sctp_usrreq.c optional inet sctp | inet6 sctp netinet/sctputil.c optional inet sctp | inet6 sctp netinet/siftr.c optional inet siftr alq | inet6 siftr alq netinet/tcp_debug.c optional tcpdebug +netinet/tcp_fastopen.c optional inet tcp_rfc7413 | inet6 tcp_rfc7413 netinet/tcp_hostcache.c optional inet | inet6 netinet/tcp_input.c optional inet | inet6 netinet/tcp_lro.c optional inet | inet6 diff --git a/sys/conf/options b/sys/conf/options index 413b44e8e191..0699c2c18b15 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -442,6 +442,8 @@ SLIP_IFF_OPTS opt_slip.h TCPDEBUG SIFTR TCP_OFFLOAD opt_inet.h # Enable code to dispatch TCP offloading +TCP_RFC7413 opt_inet.h +TCP_RFC7413_MAX_KEYS opt_inet.h TCP_SIGNATURE opt_inet.h VLAN_ARRAY opt_vlan.h XBONEHACK diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h index fb2f8108d4f6..7e3a1a36abc6 100644 --- a/sys/netinet/tcp.h +++ b/sys/netinet/tcp.h @@ -97,6 +97,10 @@ struct tcphdr { #define TCPOLEN_TSTAMP_APPA (TCPOLEN_TIMESTAMP+2) /* appendix A */ #define TCPOPT_SIGNATURE 19 /* Keyed MD5: RFC 2385 */ #define TCPOLEN_SIGNATURE 18 +#define TCPOPT_FAST_OPEN 34 +#define TCPOLEN_FAST_OPEN_EMPTY 2 +#define TCPOLEN_FAST_OPEN_MIN 6 +#define TCPOLEN_FAST_OPEN_MAX 18 /* Miscellaneous constants */ #define MAX_SACK_BLKS 6 /* Max # SACK blocks stored at receiver side */ @@ -165,6 +169,7 @@ struct tcphdr { #define TCP_KEEPIDLE 256 /* L,N,X start keeplives after this period */ #define TCP_KEEPINTVL 512 /* L,N interval between keepalives */ #define TCP_KEEPCNT 1024 /* L,N number of keepalives before close */ +#define TCP_FASTOPEN 1025 /* enable TFO / was created via TFO */ /* Start of reserved space for third-party user-settable options. */ #define TCP_VENDOR SO_VENDOR diff --git a/sys/netinet/tcp_fastopen.c b/sys/netinet/tcp_fastopen.c new file mode 100644 index 000000000000..482320e5b730 --- /dev/null +++ b/sys/netinet/tcp_fastopen.c @@ -0,0 +1,442 @@ +/*- + * Copyright (c) 2015 Patrick Kelsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * This is a server-side implementation of TCP Fast Open (TFO) [RFC7413]. + * + * This implementation is currently considered to be experimental and is not + * included in kernel builds by default. To include this code, add the + * following line to your kernel config: + * + * options TCP_RFC7413 + * + * The generated TFO cookies are the 64-bit output of + * SipHash24(<16-byte-key>). Multiple concurrent valid keys are + * supported so that time-based rolling cookie invalidation policies can be + * implemented in the system. The default number of concurrent keys is 2. + * This can be adjusted in the kernel config as follows: + * + * options TCP_RFC7413_MAX_KEYS= + * + * + * The following TFO-specific sysctls are defined: + * + * net.inet.tcp.fastopen.acceptany (RW, default 0) + * When non-zero, all client-supplied TFO cookies will be considered to + * be valid. + * + * net.inet.tcp.fastopen.autokey (RW, default 120) + * When this and net.inet.tcp.fastopen.enabled are non-zero, a new key + * will be automatically generated after this many seconds. + * + * net.inet.tcp.fastopen.enabled (RW, default 0) + * When zero, no new TFO connections can be created. On the transition + * from enabled to disabled, all installed keys are removed. On the + * transition from disabled to enabled, if net.inet.tcp.fastopen.autokey + * is non-zero and there are no keys installed, a new key will be + * generated immediately. The transition from enabled to disabled does + * not affect any TFO connections in progress; it only prevents new ones + * from being made. + * + * net.inet.tcp.fastopen.keylen (RO) + * The key length in bytes. + * + * net.inet.tcp.fastopen.maxkeys (RO) + * The maximum number of keys supported. + * + * net.inet.tcp.fastopen.numkeys (RO) + * The current number of keys installed. + * + * net.inet.tcp.fastopen.setkey (WO) + * Install a new key by writing net.inet.tcp.fastopen.keylen bytes to this + * sysctl. + * + * + * In order for TFO connections to be created via a listen socket, that + * socket must have the TCP_FASTOPEN socket option set on it. This option + * can be set on the socket either before or after the listen() is invoked. + * Clearing this option on a listen socket after it has been set has no + * effect on existing TFO connections or TFO connections in progress; it + * only prevents new TFO connections from being made. + * + * For passively-created sockets, the TCP_FASTOPEN socket option can be + * queried to determine whether the connection was established using TFO. + * Note that connections that are established via a TFO SYN, but that fall + * back to using a non-TFO SYN|ACK will have the TCP_FASTOPEN socket option + * set. + * + * Per the RFC, this implementation limits the number of TFO connections + * that can be in the SYN_RECEIVED state on a per listen-socket basis. + * Whenever this limit is exceeded, requests for new TFO connections are + * serviced as non-TFO requests. Without such a limit, given a valid TFO + * cookie, an attacker could keep the listen queue in an overflow condition + * using a TFO SYN flood. This implementation sets the limit at half the + * configured listen backlog. + * + */ + +#include +__FBSDID("$FreeBSD$"); + +#include "opt_inet.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +#include +#include +#include +#include + + +#define TCP_FASTOPEN_KEY_LEN SIPHASH_KEY_LENGTH + +#if !defined(TCP_RFC7413_MAX_KEYS) || (TCP_RFC7413_MAX_KEYS < 1) +#define TCP_FASTOPEN_MAX_KEYS 2 +#else +#define TCP_FASTOPEN_MAX_KEYS TCP_RFC7413_MAX_KEYS +#endif + +struct tcp_fastopen_keylist { + unsigned int newest; + uint8_t key[TCP_FASTOPEN_MAX_KEYS][TCP_FASTOPEN_KEY_LEN]; +}; + +struct tcp_fastopen_callout { + struct callout c; + struct vnet *v; +}; + +SYSCTL_NODE(_net_inet_tcp, OID_AUTO, fastopen, CTLFLAG_RW, 0, "TCP Fast Open"); + +static VNET_DEFINE(int, tcp_fastopen_acceptany) = 0; +#define V_tcp_fastopen_acceptany VNET(tcp_fastopen_acceptany) +SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, acceptany, + CTLFLAG_VNET | CTLFLAG_RW, &VNET_NAME(tcp_fastopen_acceptany), 0, + "Accept any non-empty cookie"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_autokey) = 120; +#define V_tcp_fastopen_autokey VNET(tcp_fastopen_autokey) +static int sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, autokey, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_autokey, "IU", + "Number of seconds between auto-generation of a new key; zero disables"); + +VNET_DEFINE(unsigned int, tcp_fastopen_enabled) = 0; +static int sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, enabled, + CTLFLAG_VNET | CTLTYPE_UINT | CTLFLAG_RW, NULL, 0, + &sysctl_net_inet_tcp_fastopen_enabled, "IU", + "Enable/disable TCP Fast Open processing"); + +SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, keylen, + CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_KEY_LEN, + "Key length in bytes"); + +SYSCTL_INT(_net_inet_tcp_fastopen, OID_AUTO, maxkeys, + CTLFLAG_RD, SYSCTL_NULL_INT_PTR, TCP_FASTOPEN_MAX_KEYS, + "Maximum number of keys supported"); + +static VNET_DEFINE(unsigned int, tcp_fastopen_numkeys) = 0; +#define V_tcp_fastopen_numkeys VNET(tcp_fastopen_numkeys) +SYSCTL_UINT(_net_inet_tcp_fastopen, OID_AUTO, numkeys, + CTLFLAG_VNET | CTLFLAG_RD, &VNET_NAME(tcp_fastopen_numkeys), 0, + "Number of keys installed"); + +static int sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS); +SYSCTL_PROC(_net_inet_tcp_fastopen, OID_AUTO, setkey, + CTLFLAG_VNET | CTLTYPE_OPAQUE | CTLFLAG_WR, NULL, 0, + &sysctl_net_inet_tcp_fastopen_setkey, "", + "Install a new key"); + +static VNET_DEFINE(struct rmlock, tcp_fastopen_keylock); +#define V_tcp_fastopen_keylock VNET(tcp_fastopen_keylock) + +#define TCP_FASTOPEN_KEYS_RLOCK(t) rm_rlock(&V_tcp_fastopen_keylock, (t)) +#define TCP_FASTOPEN_KEYS_RUNLOCK(t) rm_runlock(&V_tcp_fastopen_keylock, (t)) +#define TCP_FASTOPEN_KEYS_WLOCK() rm_wlock(&V_tcp_fastopen_keylock) +#define TCP_FASTOPEN_KEYS_WUNLOCK() rm_wunlock(&V_tcp_fastopen_keylock) + +static VNET_DEFINE(struct tcp_fastopen_keylist, tcp_fastopen_keys); +#define V_tcp_fastopen_keys VNET(tcp_fastopen_keys) + +static VNET_DEFINE(struct tcp_fastopen_callout, tcp_fastopen_autokey_ctx); +#define V_tcp_fastopen_autokey_ctx VNET(tcp_fastopen_autokey_ctx) + +static VNET_DEFINE(uma_zone_t, counter_zone); +#define V_counter_zone VNET(counter_zone) + +void +tcp_fastopen_init(void) +{ + V_counter_zone = uma_zcreate("tfo", sizeof(unsigned int), + NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + rm_init(&V_tcp_fastopen_keylock, "tfo_keylock"); + callout_init_rm(&V_tcp_fastopen_autokey_ctx.c, + &V_tcp_fastopen_keylock, 0); + V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; +} + +void +tcp_fastopen_destroy(void) +{ + callout_drain(&V_tcp_fastopen_autokey_ctx.c); + rm_destroy(&V_tcp_fastopen_keylock); + uma_zdestroy(V_counter_zone); +} + +unsigned int * +tcp_fastopen_alloc_counter(void) +{ + unsigned int *counter; + counter = uma_zalloc(V_counter_zone, M_NOWAIT); + if (counter) + *counter = 1; + return (counter); +} + +void +tcp_fastopen_decrement_counter(unsigned int *counter) +{ + if (*counter == 1) + uma_zfree(V_counter_zone, counter); + else + atomic_subtract_int(counter, 1); +} + +static void +tcp_fastopen_addkey_locked(uint8_t *key) +{ + + V_tcp_fastopen_keys.newest++; + if (V_tcp_fastopen_keys.newest == TCP_FASTOPEN_MAX_KEYS) + V_tcp_fastopen_keys.newest = 0; + memcpy(V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], key, + TCP_FASTOPEN_KEY_LEN); + if (V_tcp_fastopen_numkeys < TCP_FASTOPEN_MAX_KEYS) + V_tcp_fastopen_numkeys++; +} + +static void +tcp_fastopen_autokey_locked(void) +{ + uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; + + arc4rand(newkey, TCP_FASTOPEN_KEY_LEN, 0); + tcp_fastopen_addkey_locked(newkey); +} + +static void +tcp_fastopen_autokey_callout(void *arg) +{ + struct tcp_fastopen_callout *ctx = arg; + + CURVNET_SET(ctx->v); + tcp_fastopen_autokey_locked(); + callout_reset(&ctx->c, V_tcp_fastopen_autokey * hz, + tcp_fastopen_autokey_callout, ctx); + CURVNET_RESTORE(); +} + + +static uint64_t +tcp_fastopen_make_cookie(uint8_t key[SIPHASH_KEY_LENGTH], struct in_conninfo *inc) +{ + SIPHASH_CTX ctx; + uint64_t siphash; + + SipHash24_Init(&ctx); + SipHash_SetKey(&ctx, key); + switch (inc->inc_flags & INC_ISIPV6) { +#ifdef INET + case 0: + SipHash_Update(&ctx, &inc->inc_faddr, sizeof(inc->inc_faddr)); + break; +#endif +#ifdef INET6 + case INC_ISIPV6: + SipHash_Update(&ctx, &inc->inc6_faddr, sizeof(inc->inc6_faddr)); + break; +#endif + } + SipHash_Final((u_int8_t *)&siphash, &ctx); + + return (siphash); +} + + +/* + * Return values: + * -1 the cookie is invalid and no valid cookie is available + * 0 the cookie is invalid and the latest cookie has been returned + * 1 the cookie is valid and the latest cookie has been returned + */ +int +tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie, + unsigned int len, uint64_t *latest_cookie) +{ + struct rm_priotracker tracker; + unsigned int i, key_index; + uint64_t cur_cookie; + + if (V_tcp_fastopen_acceptany) { + *latest_cookie = 0; + return (1); + } + + if (len != TCP_FASTOPEN_COOKIE_LEN) { + if (V_tcp_fastopen_numkeys > 0) { + *latest_cookie = + tcp_fastopen_make_cookie( + V_tcp_fastopen_keys.key[V_tcp_fastopen_keys.newest], + inc); + return (0); + } + return (-1); + } + + /* + * Check against each available key, from newest to oldest. + */ + TCP_FASTOPEN_KEYS_RLOCK(&tracker); + key_index = V_tcp_fastopen_keys.newest; + for (i = 0; i < V_tcp_fastopen_numkeys; i++) { + cur_cookie = + tcp_fastopen_make_cookie(V_tcp_fastopen_keys.key[key_index], + inc); + if (i == 0) + *latest_cookie = cur_cookie; + if (memcmp(cookie, &cur_cookie, TCP_FASTOPEN_COOKIE_LEN) == 0) { + TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); + return (1); + } + if (key_index == 0) + key_index = TCP_FASTOPEN_MAX_KEYS - 1; + else + key_index--; + } + TCP_FASTOPEN_KEYS_RUNLOCK(&tracker); + + return (0); +} + +static int +sysctl_net_inet_tcp_fastopen_autokey(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int new; + + new = V_tcp_fastopen_autokey; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (new > (INT_MAX / hz)) + return (EINVAL); + + TCP_FASTOPEN_KEYS_WLOCK(); + if (V_tcp_fastopen_enabled) { + if (V_tcp_fastopen_autokey && !new) + callout_stop(&V_tcp_fastopen_autokey_ctx.c); + else if (new) + callout_reset(&V_tcp_fastopen_autokey_ctx.c, + new * hz, tcp_fastopen_autokey_callout, + &V_tcp_fastopen_autokey_ctx); + } + V_tcp_fastopen_autokey = new; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } + + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_enabled(SYSCTL_HANDLER_ARGS) +{ + int error; + unsigned int new; + + new = V_tcp_fastopen_enabled; + error = sysctl_handle_int(oidp, &new, 0, req); + if (error == 0 && req->newptr) { + if (V_tcp_fastopen_enabled && !new) { + /* enabled -> disabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + V_tcp_fastopen_numkeys = 0; + V_tcp_fastopen_keys.newest = TCP_FASTOPEN_MAX_KEYS - 1; + if (V_tcp_fastopen_autokey) + callout_stop(&V_tcp_fastopen_autokey_ctx.c); + V_tcp_fastopen_enabled = 0; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } else if (!V_tcp_fastopen_enabled && new) { + /* disabled -> enabled */ + TCP_FASTOPEN_KEYS_WLOCK(); + if (V_tcp_fastopen_autokey && + (V_tcp_fastopen_numkeys == 0)) { + tcp_fastopen_autokey_locked(); + callout_reset(&V_tcp_fastopen_autokey_ctx.c, + V_tcp_fastopen_autokey * hz, + tcp_fastopen_autokey_callout, + &V_tcp_fastopen_autokey_ctx); + } + V_tcp_fastopen_enabled = 1; + TCP_FASTOPEN_KEYS_WUNLOCK(); + } + } + return (error); +} + +static int +sysctl_net_inet_tcp_fastopen_setkey(SYSCTL_HANDLER_ARGS) +{ + int error; + uint8_t newkey[TCP_FASTOPEN_KEY_LEN]; + + if (req->oldptr != NULL || req->oldlen != 0) + return (EINVAL); + if (req->newptr == NULL) + return (EPERM); + if (req->newlen != sizeof(newkey)) + return (EINVAL); + error = SYSCTL_IN(req, newkey, sizeof(newkey)); + if (error) + return (error); + + TCP_FASTOPEN_KEYS_WLOCK(); + tcp_fastopen_addkey_locked(newkey); + TCP_FASTOPEN_KEYS_WUNLOCK(); + + return (0); +} diff --git a/sys/netinet/tcp_fastopen.h b/sys/netinet/tcp_fastopen.h new file mode 100644 index 000000000000..c64ba2c04d5d --- /dev/null +++ b/sys/netinet/tcp_fastopen.h @@ -0,0 +1,47 @@ +/*- + * Copyright (c) 2015 Patrick Kelsey + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _TCP_FASTOPEN_H_ +#define _TCP_FASTOPEN_H_ + +#ifdef _KERNEL + +#define TCP_FASTOPEN_COOKIE_LEN 8 /* tied to SipHash24 64-bit output */ + +VNET_DECLARE(unsigned int, tcp_fastopen_enabled); +#define V_tcp_fastopen_enabled VNET(tcp_fastopen_enabled) + +void tcp_fastopen_init(void); +void tcp_fastopen_destroy(void); +unsigned int *tcp_fastopen_alloc_counter(void); +void tcp_fastopen_decrement_counter(unsigned int *counter); +int tcp_fastopen_check_cookie(struct in_conninfo *inc, uint8_t *cookie, + unsigned int len, uint64_t *latest_cookie); +#endif /* _KERNEL */ + +#endif /* _TCP_FASTOPEN_H_ */ diff --git a/sys/netinet/tcp_input.c b/sys/netinet/tcp_input.c index 0500f2506518..b824ba27dc76 100644 --- a/sys/netinet/tcp_input.c +++ b/sys/netinet/tcp_input.c @@ -98,6 +98,9 @@ __FBSDID("$FreeBSD$"); #include #include #include +#ifdef TCP_RFC7413 +#include +#endif #include #include #include @@ -1072,6 +1075,9 @@ relocked: rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } +#ifdef TCP_RFC7413 +new_tfo_socket: +#endif if (so == NULL) { /* * We completed the 3-way handshake @@ -1329,7 +1335,12 @@ relocked: (void *)tcp_saveipgen, &tcp_savetcp, 0); #endif tcp_dooptions(&to, optp, optlen, TO_SYN); +#ifdef TCP_RFC7413 + if (syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL)) + goto new_tfo_socket; +#else syncache_add(&inc, &to, th, inp, &so, m, NULL, NULL); +#endif /* * Entry added to syncache and mbuf consumed. * Everything already unlocked by syncache_add(). @@ -1439,6 +1450,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct in_conninfo *inc; struct mbuf *mfree; struct tcpopt to; + int tfo_syn; #ifdef TCPDEBUG /* @@ -1878,6 +1890,28 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, rstreason = BANDLIM_RST_OPENPORT; goto dropwithreset; } +#ifdef TCP_RFC7413 + if (tp->t_flags & TF_FASTOPEN) { + /* + * When a TFO connection is in SYN_RECEIVED, the + * only valid packets are the initial SYN, a + * retransmit/copy of the initial SYN (possibly with + * a subset of the original data), a valid ACK, a + * FIN, or a RST. + */ + if ((thflags & (TH_SYN|TH_ACK)) == (TH_SYN|TH_ACK)) { + rstreason = BANDLIM_RST_OPENPORT; + goto dropwithreset; + } else if (thflags & TH_SYN) { + /* non-initial SYN is ignored */ + if ((tcp_timer_active(tp, TT_DELACK) || + tcp_timer_active(tp, TT_REXMT))) + goto drop; + } else if (!(thflags & (TH_ACK|TH_FIN|TH_RST))) { + goto drop; + } + } +#endif break; /* @@ -2318,9 +2352,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, */ if ((thflags & TH_ACK) == 0) { if (tp->t_state == TCPS_SYN_RECEIVED || - (tp->t_flags & TF_NEEDSYN)) + (tp->t_flags & TF_NEEDSYN)) { +#ifdef TCP_RFC7413 + if (tp->t_state == TCPS_SYN_RECEIVED && + tp->t_flags & TF_FASTOPEN) { + tp->snd_wnd = tiwin; + cc_conn_init(tp); + } +#endif goto step6; - else if (tp->t_flags & TF_ACKNOW) + } else if (tp->t_flags & TF_ACKNOW) goto dropafterack; else goto drop; @@ -2359,7 +2400,27 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, tcp_state_change(tp, TCPS_ESTABLISHED); TCP_PROBE5(accept__established, NULL, tp, mtod(m, const char *), tp, th); - cc_conn_init(tp); +#ifdef TCP_RFC7413 + if (tp->t_tfo_pending) { + tcp_fastopen_decrement_counter(tp->t_tfo_pending); + tp->t_tfo_pending = NULL; + + /* + * Account for the ACK of our SYN prior to + * regular ACK processing below. + */ + tp->snd_una++; + } + /* + * TFO connections call cc_conn_init() during SYN + * processing. Calling it again here for such + * connections is not harmless as it would undo the + * snd_cwnd reduction that occurs when a TFO SYN|ACK + * is retransmitted. + */ + if (!(tp->t_flags & TF_FASTOPEN)) +#endif + cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* @@ -2890,9 +2951,12 @@ dodata: /* XXX */ * case PRU_RCVD). If a FIN has already been received on this * connection then we just ignore the text. */ - if ((tlen || (thflags & TH_FIN)) && + tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags & TF_FASTOPEN)); + if ((tlen || (thflags & TH_FIN) || tfo_syn) && TCPS_HAVERCVDFIN(tp->t_state) == 0) { tcp_seq save_start = th->th_seq; + m_adj(m, drop_hdrlen); /* delayed header drop */ /* * Insert segment which includes th into TCP reassembly queue @@ -2908,8 +2972,9 @@ dodata: /* XXX */ */ if (th->th_seq == tp->rcv_nxt && LIST_EMPTY(&tp->t_segq) && - TCPS_HAVEESTABLISHED(tp->t_state)) { - if (DELAY_ACK(tp, tlen)) + (TCPS_HAVEESTABLISHED(tp->t_state) || + tfo_syn)) { + if (DELAY_ACK(tp, tlen) || tfo_syn) tp->t_flags |= TF_DELACK; else tp->t_flags |= TF_ACKNOW; @@ -3262,6 +3327,21 @@ tcp_dooptions(struct tcpopt *to, u_char *cp, int cnt, int flags) to->to_sacks = cp + 2; TCPSTAT_INC(tcps_sack_rcv_blocks); break; +#ifdef TCP_RFC7413 + case TCPOPT_FAST_OPEN: + if ((optlen != TCPOLEN_FAST_OPEN_EMPTY) && + (optlen < TCPOLEN_FAST_OPEN_MIN) && + (optlen > TCPOLEN_FAST_OPEN_MAX)) + continue; + if (!(flags & TO_SYN)) + continue; + if (!V_tcp_fastopen_enabled) + continue; + to->to_flags |= TOF_FASTOPEN; + to->to_tfo_len = optlen - 2; + to->to_tfo_cookie = to->to_tfo_len ? cp + 2 : NULL; + break; +#endif default: continue; } diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index f1196e10ca01..c7dc547b320a 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -69,6 +69,9 @@ __FBSDID("$FreeBSD$"); #include #include #endif +#ifdef TCP_RFC7413 +#include +#endif #define TCPOUTFLAGS #include #include @@ -202,6 +205,17 @@ tcp_output(struct tcpcb *tp) return (tcp_offload_output(tp)); #endif +#ifdef TCP_RFC7413 + /* + * For TFO connections in SYN_RECEIVED, only allow the initial + * SYN|ACK and those sent by the retransmit timer. + */ + if ((tp->t_flags & TF_FASTOPEN) && + (tp->t_state == TCPS_SYN_RECEIVED) && + SEQ_GT(tp->snd_max, tp->snd_una) && /* inital SYN|ACK sent */ + (tp->snd_nxt != tp->snd_una)) /* not a retransmit */ + return (0); +#endif /* * Determine length of data that should be transmitted, * and flags that will be used. @@ -387,6 +401,15 @@ after_sack_rexmit: if ((flags & TH_SYN) && SEQ_GT(tp->snd_nxt, tp->snd_una)) { if (tp->t_state != TCPS_SYN_RECEIVED) flags &= ~TH_SYN; +#ifdef TCP_RFC7413 + /* + * When sending additional segments following a TFO SYN|ACK, + * do not include the SYN bit. + */ + if ((tp->t_flags & TF_FASTOPEN) && + (tp->t_state == TCPS_SYN_RECEIVED)) + flags &= ~TH_SYN; +#endif off--, len++; } @@ -400,6 +423,17 @@ after_sack_rexmit: flags &= ~TH_FIN; } +#ifdef TCP_RFC7413 + /* + * When retransmitting SYN|ACK on a passively-created TFO socket, + * don't include data, as the presence of data may have caused the + * original SYN|ACK to have been dropped by a middlebox. + */ + if ((tp->t_flags & TF_FASTOPEN) && + (((tp->t_state == TCPS_SYN_RECEIVED) && (tp->t_rxtshift > 0)) || + (flags & TH_RST))) + len = 0; +#endif if (len <= 0) { /* * If FIN has been sent but not acked, @@ -718,6 +752,22 @@ send: tp->snd_nxt = tp->iss; to.to_mss = tcp_mssopt(&tp->t_inpcb->inp_inc); to.to_flags |= TOF_MSS; +#ifdef TCP_RFC7413 + /* + * Only include the TFO option on the first + * transmission of the SYN|ACK on a + * passively-created TFO socket, as the presence of + * the TFO option may have caused the original + * SYN|ACK to have been dropped by a middlebox. + */ + if ((tp->t_flags & TF_FASTOPEN) && + (tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_rxtshift == 0)) { + to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; + to.to_tfo_cookie = (u_char *)&tp->t_tfo_cookie; + to.to_flags |= TOF_FASTOPEN; + } +#endif } /* Window scaling. */ if ((flags & TH_SYN) && (tp->t_flags & TF_REQ_SCALE)) { @@ -997,7 +1047,7 @@ send: * give data to the user when a buffer fills or * a PUSH comes in.) */ - if (off + len == so->so_snd.sb_cc) + if ((off + len == so->so_snd.sb_cc) && !(flags & TH_SYN)) flags |= TH_PUSH; SOCKBUF_UNLOCK(&so->so_snd); } else { @@ -1694,6 +1744,25 @@ tcp_addoptions(struct tcpopt *to, u_char *optp) TCPSTAT_INC(tcps_sack_send_blocks); break; } +#ifdef TCP_RFC7413 + case TOF_FASTOPEN: + { + int total_len; + + /* XXX is there any point to aligning this option? */ + total_len = TCPOLEN_FAST_OPEN_EMPTY + to->to_tfo_len; + if (TCP_MAXOLEN - optlen < total_len) + continue; + *optp++ = TCPOPT_FAST_OPEN; + *optp++ = total_len; + if (to->to_tfo_len > 0) { + bcopy(to->to_tfo_cookie, optp, to->to_tfo_len); + optp += to->to_tfo_len; + } + optlen += total_len; + break; + } +#endif default: panic("%s: unknown TCP option type", __func__); break; diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7bdd573aaeb0..3ce21a955f34 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -83,6 +83,9 @@ __FBSDID("$FreeBSD$"); #include #endif +#ifdef TCP_RFC7413 +#include +#endif #include #include #include @@ -427,6 +430,10 @@ tcp_init(void) SHUTDOWN_PRI_DEFAULT); EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL, EVENTHANDLER_PRI_ANY); + +#ifdef TCP_RFC7413 + tcp_fastopen_init(); +#endif } #ifdef VIMAGE @@ -434,6 +441,9 @@ void tcp_destroy(void) { +#ifdef TCP_RFC7413 + tcp_fastopen_destroy(); +#endif tcp_hc_destroy(); syncache_destroy(); tcp_tw_destroy(); @@ -1101,6 +1111,17 @@ tcp_close(struct tcpcb *tp) #ifdef TCP_OFFLOAD if (tp->t_state == TCPS_LISTEN) tcp_offload_listen_stop(tp); +#endif +#ifdef TCP_RFC7413 + /* + * This releases the TFO pending counter resource for TFO listen + * sockets as well as passively-created TFO sockets that transition + * from SYN_RECEIVED to CLOSED. + */ + if (tp->t_tfo_pending) { + tcp_fastopen_decrement_counter(tp->t_tfo_pending); + tp->t_tfo_pending = NULL; + } #endif in_pcbdrop(inp); TCPSTAT_INC(tcps_closed); diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 9896788f45f4..8d2cc984617a 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -78,6 +78,9 @@ __FBSDID("$FreeBSD$"); #include #endif #include +#ifdef TCP_RFC7413 +#include +#endif #include #include #include @@ -1089,6 +1092,39 @@ failed: return (0); } +#ifdef TCP_RFC7413 +static void +syncache_tfo_expand(struct syncache *sc, struct socket **lsop, struct mbuf *m, + uint64_t response_cookie) +{ + struct inpcb *inp; + struct tcpcb *tp; + unsigned int *pending_counter; + + /* + * Global TCP locks are held because we manipulate the PCB lists + * and create a new socket. + */ + INP_INFO_RLOCK_ASSERT(&V_tcbinfo); + + pending_counter = intotcpcb(sotoinpcb(*lsop))->t_tfo_pending; + *lsop = syncache_socket(sc, *lsop, m); + if (*lsop == NULL) { + TCPSTAT_INC(tcps_sc_aborted); + atomic_subtract_int(pending_counter, 1); + } else { + inp = sotoinpcb(*lsop); + tp = intotcpcb(inp); + tp->t_flags |= TF_FASTOPEN; + tp->t_tfo_cookie = response_cookie; + tp->snd_max = tp->iss; + tp->snd_nxt = tp->iss; + tp->t_tfo_pending = pending_counter; + TCPSTAT_INC(tcps_sc_completed); + } +} +#endif /* TCP_RFC7413 */ + /* * Given a LISTEN socket and an inbound SYN request, add * this to the syn cache, and send back a segment: @@ -1101,8 +1137,15 @@ failed: * DoS attack, an attacker could send data which would eventually * consume all available buffer space if it were ACKed. By not ACKing * the data, we avoid this DoS scenario. + * + * The exception to the above is when a SYN with a valid TCP Fast Open (TFO) + * cookie is processed, V_tcp_fastopen_enabled set to true, and the + * TCP_FASTOPEN socket option is set. In this case, a new socket is created + * and returned via lsop, the mbuf is not freed so that tcp_input() can + * queue its data to the socket, and 1 is returned to indicate the + * TFO-socket-creation path was taken. */ -void +int syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, struct inpcb *inp, struct socket **lsop, struct mbuf *m, void *tod, void *todctx) @@ -1115,6 +1158,7 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, u_int ltflags; int win, sb_hiwat, ip_ttl, ip_tos; char *s; + int rv = 0; #ifdef INET6 int autoflowlabel = 0; #endif @@ -1123,6 +1167,11 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #endif struct syncache scs; struct ucred *cred; +#ifdef TCP_RFC7413 + uint64_t tfo_response_cookie; + int tfo_cookie_valid = 0; + int tfo_response_cookie_valid = 0; +#endif INP_INFO_WLOCK_ASSERT(&V_tcbinfo); INP_WLOCK_ASSERT(inp); /* listen socket */ @@ -1148,6 +1197,29 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sb_hiwat = so->so_rcv.sb_hiwat; ltflags = (tp->t_flags & (TF_NOOPT | TF_SIGNATURE)); +#ifdef TCP_RFC7413 + if (V_tcp_fastopen_enabled && (tp->t_flags & TF_FASTOPEN) && + (tp->t_tfo_pending != NULL) && (to->to_flags & TOF_FASTOPEN)) { + /* + * Limit the number of pending TFO connections to + * approximately half of the queue limit. This prevents TFO + * SYN floods from starving the service by filling the + * listen queue with bogus TFO connections. + */ + if (atomic_fetchadd_int(tp->t_tfo_pending, 1) <= + (so->so_qlimit / 2)) { + int result; + + result = tcp_fastopen_check_cookie(inc, + to->to_tfo_cookie, to->to_tfo_len, + &tfo_response_cookie); + tfo_cookie_valid = (result > 0); + tfo_response_cookie_valid = (result >= 0); + } else + atomic_subtract_int(tp->t_tfo_pending, 1); + } +#endif + /* By the time we drop the lock these should no longer be used. */ so = NULL; tp = NULL; @@ -1160,9 +1232,16 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, } else mac_syncache_create(maclabel, inp); #endif +#ifdef TCP_RFC7413 + if (!tfo_cookie_valid) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + } +#else INP_WUNLOCK(inp); INP_INFO_WUNLOCK(&V_tcbinfo); - +#endif + /* * Remember the IP options, if any. */ @@ -1190,6 +1269,12 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, sc = syncache_lookup(inc, &sch); /* returns locked entry */ SCH_LOCK_ASSERT(sch); if (sc != NULL) { +#ifdef TCP_RFC7413 + if (tfo_cookie_valid) { + INP_WUNLOCK(inp); + INP_INFO_WUNLOCK(&V_tcbinfo); + } +#endif TCPSTAT_INC(tcps_sc_dupsyn); if (ipopts) { /* @@ -1232,6 +1317,14 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, goto done; } +#ifdef TCP_RFC7413 + if (tfo_cookie_valid) { + bzero(&scs, sizeof(scs)); + sc = &scs; + goto skip_alloc; + } +#endif + sc = uma_zalloc(V_tcp_syncache.zone, M_NOWAIT | M_ZERO); if (sc == NULL) { /* @@ -1255,7 +1348,13 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, } } } - + +#ifdef TCP_RFC7413 +skip_alloc: + if (!tfo_cookie_valid && tfo_response_cookie_valid) + sc->sc_tfo_cookie = &tfo_response_cookie; +#endif + /* * Fill in the syncache values. */ @@ -1365,6 +1464,15 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, #endif SCH_UNLOCK(sch); +#ifdef TCP_RFC7413 + if (tfo_cookie_valid) { + syncache_tfo_expand(sc, lsop, m, tfo_response_cookie); + /* INP_WUNLOCK(inp) will be performed by the called */ + rv = 1; + goto tfo_done; + } +#endif + /* * Do a standard 3-way handshake. */ @@ -1382,17 +1490,20 @@ syncache_add(struct in_conninfo *inc, struct tcpopt *to, struct tcphdr *th, } done: + if (m) { + *lsop = NULL; + m_freem(m); + } +#ifdef TCP_RFC7413 +tfo_done: +#endif if (cred != NULL) crfree(cred); #ifdef MAC if (sc == &scs) mac_syncache_destroy(&maclabel); #endif - if (m) { - - *lsop = NULL; - m_freem(m); - } + return (rv); } static int @@ -1520,6 +1631,16 @@ syncache_respond(struct syncache *sc) if (sc->sc_flags & SCF_SIGNATURE) to.to_flags |= TOF_SIGNATURE; #endif + +#ifdef TCP_RFC7413 + if (sc->sc_tfo_cookie) { + to.to_flags |= TOF_FASTOPEN; + to.to_tfo_len = TCP_FASTOPEN_COOKIE_LEN; + to.to_tfo_cookie = sc->sc_tfo_cookie; + /* don't send cookie again when retransmitting response */ + sc->sc_tfo_cookie = NULL; + } +#endif optlen = tcp_addoptions(&to, (u_char *)(th + 1)); /* Adjust headers by option size. */ diff --git a/sys/netinet/tcp_syncache.h b/sys/netinet/tcp_syncache.h index fb9a6c64c452..d56afb693030 100644 --- a/sys/netinet/tcp_syncache.h +++ b/sys/netinet/tcp_syncache.h @@ -41,7 +41,7 @@ void syncache_destroy(void); void syncache_unreach(struct in_conninfo *, struct tcphdr *); int syncache_expand(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct socket **, struct mbuf *); -void syncache_add(struct in_conninfo *, struct tcpopt *, +int syncache_add(struct in_conninfo *, struct tcpopt *, struct tcphdr *, struct inpcb *, struct socket **, struct mbuf *, void *, void *); void syncache_chkrst(struct in_conninfo *, struct tcphdr *); @@ -74,7 +74,9 @@ struct syncache { #endif struct label *sc_label; /* MAC label reference */ struct ucred *sc_cred; /* cred cache for jail checks */ - +#ifdef TCP_RFC7413 + void *sc_tfo_cookie; /* for TCP Fast Open response */ +#endif void *sc_pspare; /* TCP_SIGNATURE */ u_int32_t sc_spare[2]; /* UTO */ }; diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c index 3dc3a8198ece..c66cb76bd060 100644 --- a/sys/netinet/tcp_timer.c +++ b/sys/netinet/tcp_timer.c @@ -596,7 +596,8 @@ tcp_timer_rexmt(void * xtp) } else tp->t_flags &= ~TF_PREVVALID; TCPSTAT_INC(tcps_rexmttimeo); - if (tp->t_state == TCPS_SYN_SENT) + if ((tp->t_state == TCPS_SYN_SENT) || + (tp->t_state == TCPS_SYN_RECEIVED)) rexmt = TCPTV_RTOBASE * tcp_syn_backoff[tp->t_rxtshift]; else rexmt = TCP_REXMTVAL(tp) * tcp_backoff[tp->t_rxtshift]; diff --git a/sys/netinet/tcp_usrreq.c b/sys/netinet/tcp_usrreq.c index c848306c8f07..1ccbf9a8f267 100644 --- a/sys/netinet/tcp_usrreq.c +++ b/sys/netinet/tcp_usrreq.c @@ -79,6 +79,9 @@ __FBSDID("$FreeBSD$"); #include #include #endif +#ifdef TCP_RFC7413 +#include +#endif #include #include #include @@ -391,6 +394,10 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td) } SOCK_UNLOCK(so); +#ifdef TCP_RFC7413 + if (tp->t_flags & TF_FASTOPEN) + tp->t_tfo_pending = tcp_fastopen_alloc_counter(); +#endif out: TCPDEBUG2(PRU_LISTEN); INP_WUNLOCK(inp); @@ -436,6 +443,10 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td) } SOCK_UNLOCK(so); +#ifdef TCP_RFC7413 + if (tp->t_flags & TF_FASTOPEN) + tp->t_tfo_pending = tcp_fastopen_alloc_counter(); +#endif out: TCPDEBUG2(PRU_LISTEN); INP_WUNLOCK(inp); @@ -791,6 +802,18 @@ tcp_usr_rcvd(struct socket *so, int flags) } tp = intotcpcb(inp); TCPDEBUG1(); +#ifdef TCP_RFC7413 + /* + * For passively-created TFO connections, don't attempt a window + * update while still in SYN_RECEIVED as this may trigger an early + * SYN|ACK. It is preferable to have the SYN|ACK be sent along with + * application response data, or failing that, when the DELACK timer + * expires. + */ + if ((tp->t_flags & TF_FASTOPEN) && + (tp->t_state == TCPS_SYN_RECEIVED)) + goto out; +#endif #ifdef TCP_OFFLOAD if (tp->t_flags & TF_TOE) tcp_offload_rcvd(tp); @@ -1558,6 +1581,29 @@ unlock_and_done: TP_MAXIDLE(tp)); goto unlock_and_done; +#ifdef TCP_RFC7413 + case TCP_FASTOPEN: + INP_WUNLOCK(inp); + if (!V_tcp_fastopen_enabled) + return (EPERM); + + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + return (error); + + INP_WLOCK_RECHECK(inp); + if (optval) { + tp->t_flags |= TF_FASTOPEN; + if ((tp->t_state == TCPS_LISTEN) && + (tp->t_tfo_pending == NULL)) + tp->t_tfo_pending = + tcp_fastopen_alloc_counter(); + } else + tp->t_flags &= ~TF_FASTOPEN; + goto unlock_and_done; +#endif + default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1628,6 +1674,13 @@ unlock_and_done: INP_WUNLOCK(inp); error = sooptcopyout(sopt, &ui, sizeof(ui)); break; +#ifdef TCP_RFC7413 + case TCP_FASTOPEN: + optval = tp->t_flags & TF_FASTOPEN; + INP_WUNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof optval); + break; +#endif default: INP_WUNLOCK(inp); error = ENOPROTOOPT; @@ -1951,6 +2004,10 @@ db_print_tflags(u_int t_flags) db_printf("%sTF_ECN_PERMIT", comma ? ", " : ""); comma = 1; } + if (t_flags & TF_FASTOPEN) { + db_printf("%sTF_FASTOPEN", comma ? ", " : ""); + comma = 1; + } } static void diff --git a/sys/netinet/tcp_var.h b/sys/netinet/tcp_var.h index 758f2c59f81a..cff81c57e66b 100644 --- a/sys/netinet/tcp_var.h +++ b/sys/netinet/tcp_var.h @@ -213,8 +213,18 @@ struct tcpcb { u_int t_flags2; /* More tcpcb flags storage */ uint32_t t_ispare[6]; /* 5 UTO, 1 TBD */ +#if defined(_KERNEL) && defined(TCP_RFC7413) + void *t_pspare2[3]; /* 1 TCP_SIGNATURE, 2 TBD */ + unsigned int *t_tfo_pending; /* TCP Fast Open pending counter */ +#else void *t_pspare2[4]; /* 1 TCP_SIGNATURE, 3 TBD */ +#endif +#if defined(_KERNEL) && defined(TCP_RFC7413) + uint64_t _pad[4]; /* 4 TBD (1-2 CC/RTT?) */ + uint64_t t_tfo_cookie; /* TCP Fast Open cookie */ +#else uint64_t _pad[5]; /* 5 TBD (1-2 CC/RTT?) */ +#endif uint32_t t_tsomaxsegcount; /* TSO maximum segment count */ uint32_t t_tsomaxsegsize; /* TSO maximum segment size in bytes */ }; @@ -251,6 +261,7 @@ struct tcpcb { #define TF_ECN_SND_ECE 0x10000000 /* ECN ECE in queue */ #define TF_CONGRECOVERY 0x20000000 /* congestion recovery mode */ #define TF_WASCRECOVERY 0x40000000 /* was in congestion recovery */ +#define TF_FASTOPEN 0x80000000 /* TCP Fast Open indication */ #define IN_FASTRECOVERY(t_flags) (t_flags & TF_FASTRECOVERY) #define ENTER_FASTRECOVERY(t_flags) t_flags |= TF_FASTRECOVERY @@ -310,14 +321,17 @@ struct tcpopt { #define TOF_TS 0x0010 /* timestamp */ #define TOF_SIGNATURE 0x0040 /* TCP-MD5 signature option (RFC2385) */ #define TOF_SACK 0x0080 /* Peer sent SACK option */ -#define TOF_MAXOPT 0x0100 +#define TOF_FASTOPEN 0x0100 /* TCP Fast Open (TFO) cookie */ +#define TOF_MAXOPT 0x0200 u_int32_t to_tsval; /* new timestamp */ u_int32_t to_tsecr; /* reflected timestamp */ u_char *to_sacks; /* pointer to the first SACK blocks */ u_char *to_signature; /* pointer to the TCP-MD5 signature */ + u_char *to_tfo_cookie; /* pointer to the TFO cookie */ u_int16_t to_mss; /* maximum segment size */ u_int8_t to_wscale; /* window scaling */ u_int8_t to_nsacks; /* number of SACK blocks */ + u_int8_t to_tfo_len; /* TFO cookie length */ u_int32_t to_spare; /* UTO */ };