tcp: address a wire level race with 2 ACKs at the end of TCP handshake
Imagine we are in SYN-RCVD state and two ACKs arrive at the same time, both valid, e.g. coming from the same host and with valid sequence. First packet would locate the listening socket in the inpcb database, write-lock it and start expanding the syncache entry into a socket. Meanwhile second packet would wait on the write lock of the listening socket. First packet will create a new ESTABLISHED socket, free the syncache entry and unlock the listening socket. Second packet would call into syncache_expand(), but this time it will fail as there is no syncache entry. Second packet would generate RST, effectively resetting the remote connection. It seems to me, that it is impossible to solve this problem with just rearranging locks, as the race happens at a wire level. To solve the problem, for an ACK packet arrived on a listening socket, that failed syncache lookup, perform a second non-wildcard lookup right away. That lookup may find the new born socket. Otherwise, we indeed send RST. Tested by: kp Reviewed by: tuexen, rrs PR: 265154 Differential revision: https://reviews.freebsd.org/D36066
This commit is contained in:
parent
f998535a66
commit
d88eb4654f
@ -833,8 +833,9 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
* PCB, be it a listening one or a synchronized one. The packet
|
||||
* shall not modify its state.
|
||||
*/
|
||||
lookupflag = (thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
|
||||
INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB;
|
||||
lookupflag = INPLOOKUP_WILDCARD |
|
||||
((thflags & (TH_ACK|TH_SYN)) == TH_SYN ?
|
||||
INPLOOKUP_RLOCKPCB : INPLOOKUP_WLOCKPCB);
|
||||
findpcb:
|
||||
#ifdef INET6
|
||||
if (isipv6 && fwd_tag != NULL) {
|
||||
@ -857,13 +858,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
inp = in6_pcblookup(&V_tcbinfo, &ip6->ip6_src,
|
||||
th->th_sport, &next_hop6->sin6_addr,
|
||||
next_hop6->sin6_port ? ntohs(next_hop6->sin6_port) :
|
||||
th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
|
||||
m->m_pkthdr.rcvif);
|
||||
th->th_dport, lookupflag, m->m_pkthdr.rcvif);
|
||||
}
|
||||
} else if (isipv6) {
|
||||
inp = in6_pcblookup_mbuf(&V_tcbinfo, &ip6->ip6_src,
|
||||
th->th_sport, &ip6->ip6_dst, th->th_dport,
|
||||
INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
|
||||
th->th_sport, &ip6->ip6_dst, th->th_dport, lookupflag,
|
||||
m->m_pkthdr.rcvif, m);
|
||||
}
|
||||
#endif /* INET6 */
|
||||
#if defined(INET6) && defined(INET)
|
||||
@ -889,13 +889,12 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
inp = in_pcblookup(&V_tcbinfo, ip->ip_src,
|
||||
th->th_sport, next_hop->sin_addr,
|
||||
next_hop->sin_port ? ntohs(next_hop->sin_port) :
|
||||
th->th_dport, INPLOOKUP_WILDCARD | lookupflag,
|
||||
m->m_pkthdr.rcvif);
|
||||
th->th_dport, lookupflag, m->m_pkthdr.rcvif);
|
||||
}
|
||||
} else
|
||||
inp = in_pcblookup_mbuf(&V_tcbinfo, ip->ip_src,
|
||||
th->th_sport, ip->ip_dst, th->th_dport,
|
||||
INPLOOKUP_WILDCARD | lookupflag, m->m_pkthdr.rcvif, m);
|
||||
th->th_sport, ip->ip_dst, th->th_dport, lookupflag,
|
||||
m->m_pkthdr.rcvif, m);
|
||||
#endif /* INET */
|
||||
|
||||
/*
|
||||
@ -904,6 +903,11 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
* XXX MRT Send RST using which routing table?
|
||||
*/
|
||||
if (inp == NULL) {
|
||||
if (rstreason != 0) {
|
||||
/* We came here after second (safety) lookup. */
|
||||
MPASS((lookupflag & INPLOOKUP_WILDCARD) == 0);
|
||||
goto dropwithreset;
|
||||
}
|
||||
/*
|
||||
* Log communication attempts to ports that are not
|
||||
* in use.
|
||||
@ -1095,13 +1099,26 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
goto dropunlock;
|
||||
} else if (rstreason == 0) {
|
||||
/*
|
||||
* No syncache entry or ACK was not
|
||||
* for our SYN/ACK. Send a RST.
|
||||
* No syncache entry, or ACK was not for our
|
||||
* SYN/ACK. Do our protection against double
|
||||
* ACK. If peer sent us 2 ACKs, then for the
|
||||
* first one syncache_expand() successfully
|
||||
* converted syncache entry into a socket,
|
||||
* while we were waiting on the inpcb lock. We
|
||||
* don't want to sent RST for the second ACK,
|
||||
* so we perform second lookup without wildcard
|
||||
* match, hoping to find the new socket. If
|
||||
* the ACK is stray indeed, rstreason would
|
||||
* hint the above code that the lookup was a
|
||||
* second attempt.
|
||||
*
|
||||
* NB: syncache did its own logging
|
||||
* of the failure cause.
|
||||
*/
|
||||
INP_WUNLOCK(inp);
|
||||
rstreason = BANDLIM_RST_OPENPORT;
|
||||
goto dropwithreset;
|
||||
lookupflag &= ~INPLOOKUP_WILDCARD;
|
||||
goto findpcb;
|
||||
}
|
||||
tfo_socket_result:
|
||||
if (so == NULL) {
|
||||
@ -1390,7 +1407,7 @@ tcp_input_with_port(struct mbuf **mp, int *offp, int proto, uint16_t port)
|
||||
* to upgrade the lock, because calling convention for stacks is
|
||||
* write-lock on PCB. If upgrade fails, drop the SYN.
|
||||
*/
|
||||
if (lookupflag == INPLOOKUP_RLOCKPCB && INP_TRY_UPGRADE(inp) == 0)
|
||||
if ((lookupflag & INPLOOKUP_RLOCKPCB) && INP_TRY_UPGRADE(inp) == 0)
|
||||
goto dropunlock;
|
||||
|
||||
tp->t_fb->tfb_tcp_do_segment(m, th, so, tp, drop_hdrlen, tlen, iptos);
|
||||
|
Loading…
Reference in New Issue
Block a user