The net.inet.tcp.nolocaltimewait=1 optimization prevents local TCP connections

from entering the TIME_WAIT state. However, it omits sending the ACK for the
FIN, which results in RST. This becomes a bigger deal if the sysctl
net.inet.tcp.blackhole is 2. In this case RST isn't send, so the other side of
the connection (also local) keeps retransmitting FINs.

To fix that in tcp_twstart() we will not call tcp_close() immediately. Instead
we will allocate a tcptw on stack and proceed to the end of the function all
the way to tcp_twrespond(), to generate the correct ACK, then we will drop the
last PCB reference.

While here, make a few tiny improvements:
- use bools for boolean variable
- staticize nolocaltimewait
- remove pointless acquisiton of socket lock

Reported by:	jtl
Reviewed by:	jtl
Sponsored by:	Netflix
Differential Revision:	https://reviews.freebsd.org/D14697
This commit is contained in:
Gleb Smirnoff 2018-03-21 20:59:30 +00:00
parent 5f8cfbe134
commit dd388cfd9b
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=331322
2 changed files with 30 additions and 29 deletions

View File

@ -172,7 +172,7 @@ SYSCTL_PROC(_net_inet_tcp, OID_AUTO, maxtcptw, CTLTYPE_INT|CTLFLAG_RW,
&maxtcptw, 0, sysctl_maxtcptw, "IU",
"Maximum number of compressed TCP TIME_WAIT entries");
VNET_DEFINE(int, nolocaltimewait) = 0;
static VNET_DEFINE(int, nolocaltimewait) = 0;
#define V_nolocaltimewait VNET(nolocaltimewait)
SYSCTL_INT(_net_inet_tcp, OID_AUTO, nolocaltimewait, CTLFLAG_VNET | CTLFLAG_RW,
&VNET_NAME(nolocaltimewait), 0,
@ -225,12 +225,12 @@ tcp_tw_destroy(void)
void
tcp_twstart(struct tcpcb *tp)
{
struct tcptw *tw;
struct tcptw twlocal, *tw;
struct inpcb *inp = tp->t_inpcb;
int acknow;
struct socket *so;
bool acknow, local;
#ifdef INET6
int isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
bool isipv6 = inp->inp_inc.inc_flags & INC_ISIPV6;
#endif
INP_INFO_RLOCK_ASSERT(&V_tcbinfo);
@ -241,25 +241,18 @@ tcp_twstart(struct tcpcb *tp)
"(inp->inp_flags & INP_DROPPED) != 0"));
if (V_nolocaltimewait) {
int error = 0;
#ifdef INET6
if (isipv6)
error = in6_localaddr(&inp->in6p_faddr);
local = in6_localaddr(&inp->in6p_faddr);
#endif
#if defined(INET6) && defined(INET)
else
#endif
#ifdef INET
error = in_localip(inp->inp_faddr);
local = in_localip(inp->inp_faddr);
#endif
if (error) {
tp = tcp_close(tp);
if (tp != NULL)
INP_WUNLOCK(inp);
return;
}
}
} else
local = false;
/*
* For use only by DTrace. We do not reference the state
@ -267,7 +260,10 @@ tcp_twstart(struct tcpcb *tp)
*/
tcp_state_change(tp, TCPS_TIME_WAIT);
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
if (local)
tw = &twlocal;
else
tw = uma_zalloc(V_tcptw_zone, M_NOWAIT);
if (tw == NULL) {
/*
* Reached limit on total number of TIMEWAIT connections
@ -286,11 +282,10 @@ tcp_twstart(struct tcpcb *tp)
}
}
/*
* The tcptw will hold a reference on its inpcb until tcp_twclose
* is called
* For !local case the tcptw will hold a reference on its inpcb
* until tcp_twclose is called.
*/
tw->tw_inpcb = inp;
in_pcbref(inp); /* Reference from tw */
/*
* Recover last window size sent.
@ -337,16 +332,19 @@ tcp_twstart(struct tcpcb *tp)
tcp_discardcb(tp);
so = inp->inp_socket;
soisdisconnected(so);
tw->tw_cred = crhold(so->so_cred);
SOCK_LOCK(so);
tw->tw_so_options = so->so_options;
SOCK_UNLOCK(so);
inp->inp_flags |= INP_TIMEWAIT;
if (acknow)
tcp_twrespond(tw, TH_ACK);
inp->inp_ppcb = tw;
inp->inp_flags |= INP_TIMEWAIT;
TCPSTATES_INC(TCPS_TIME_WAIT);
tcp_tw_2msl_reset(tw, 0);
if (local)
in_pcbdrop(inp);
else {
in_pcbref(inp); /* Reference from tw */
tw->tw_cred = crhold(so->so_cred);
inp->inp_ppcb = tw;
TCPSTATES_INC(TCPS_TIME_WAIT);
tcp_tw_2msl_reset(tw, 0);
}
/*
* If the inpcb owns the sole reference to the socket, then we can

View File

@ -198,15 +198,18 @@ tcp_detach(struct socket *so, struct inpcb *inp)
* XXXRW: Would it be cleaner to free the tcptw here?
*
* Astute question indeed, from twtcp perspective there are
* three cases to consider:
* four cases to consider:
*
* #1 tcp_detach is called at tcptw creation time by
* tcp_twstart, then do not discard the newly created tcptw
* and leave inpcb present until timewait ends
* #2 tcp_detach is called at timewait end (or reuse) by
* #2 tcp_detach is called at tcptw creation time by
* tcp_twstart, but connection is local and tw will be
* discarded immediately
* #3 tcp_detach is called at timewait end (or reuse) by
* tcp_twclose, then the tcptw has already been discarded
* (or reused) and inpcb is freed here
* #3 tcp_detach is called() after timewait ends (or reuse)
* #4 tcp_detach is called() after timewait ends (or reuse)
* (e.g. by soclose), then tcptw has already been discarded
* (or reused) and inpcb is freed here
*