Fix race when accepting TCP connections.

When expanding a SYN-cache entry to a socket/inp a two step approach was
taken:
1) The local address was filled in, then the inp was added to the hash
   table.
2) The remote address was filled in and the inp was relocated in the
   hash table.
Before the epoch changes, a write lock was held when this happens and
the code looking up entries was holding a corresponding read lock.
Since the read lock is gone away after the introduction of the
epochs, the half populated inp was found during lookup.
This resulted in processing TCP segments in the context of the wrong
TCP connection.
This patch changes the above procedure in a way that the inp is fully
populated before inserted into the hash table.

Thanks to Paul <devgs@ukr.net> for reporting the issue on the net@
mailing list and for testing the patch!

Reviewed by:		rrs@
MFC after:		1 week
Sponsored by:		Netflix, Inc.
Differential Revision:	https://reviews.freebsd.org/D22971
This commit is contained in:
Michael Tuexen 2020-01-12 17:52:32 +00:00
parent c6feea3b89
commit fe1274ee39
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=356663
5 changed files with 31 additions and 53 deletions

View File

@ -972,7 +972,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
*/
int
in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred, struct mbuf *m)
struct ucred *cred, struct mbuf *m, bool rehash)
{
u_short lport, fport;
in_addr_t laddr, faddr;
@ -991,6 +991,8 @@ in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
/* Do the initial binding of the local address if required. */
if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
KASSERT(rehash == true,
("Rehashing required for unbound inps"));
inp->inp_lport = lport;
inp->inp_laddr.s_addr = laddr;
if (in_pcbinshash(inp) != 0) {
@ -1005,7 +1007,11 @@ in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
inp->inp_laddr.s_addr = laddr;
inp->inp_faddr.s_addr = faddr;
inp->inp_fport = fport;
in_pcbrehash_mbuf(inp, m);
if (rehash) {
in_pcbrehash_mbuf(inp, m);
} else {
in_pcbinshash_mbuf(inp, m);
}
if (anonport)
inp->inp_flags |= INP_ANONPORT;
@ -1016,7 +1022,7 @@ int
in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
{
return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
return (in_pcbconnect_mbuf(inp, nam, cred, NULL, true));
}
/*
@ -2500,7 +2506,7 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
* Insert PCB onto various hash lists.
*/
static int
in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
in_pcbinshash_internal(struct inpcb *inp, struct mbuf *m)
{
struct inpcbhead *pcbhash;
struct inpcbporthead *pcbporthash;
@ -2566,35 +2572,27 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
inp->inp_flags |= INP_INHASHLIST;
#ifdef PCBGROUP
if (do_pcbgroup_update)
if (m != NULL) {
in_pcbgroup_update_mbuf(inp, m);
} else {
in_pcbgroup_update(inp);
}
#endif
return (0);
}
/*
* For now, there are two public interfaces to insert an inpcb into the hash
* lists -- one that does update pcbgroups, and one that doesn't. The latter
* is used only in the TCP syncache, where in_pcbinshash is called before the
* full 4-tuple is set for the inpcb, and we don't want to install in the
* pcbgroup until later.
*
* XXXRW: This seems like a misfeature. in_pcbinshash should always update
* connection groups, and partially initialised inpcbs should not be exposed
* to either reservation hash tables or pcbgroups.
*/
int
in_pcbinshash(struct inpcb *inp)
{
return (in_pcbinshash_internal(inp, 1));
return (in_pcbinshash_internal(inp, NULL));
}
int
in_pcbinshash_nopcbgroup(struct inpcb *inp)
in_pcbinshash_mbuf(struct inpcb *inp, struct mbuf *m)
{
return (in_pcbinshash_internal(inp, 0));
return (in_pcbinshash_internal(inp, m));
}
/*

View File

@ -832,7 +832,7 @@ int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
struct mbuf *);
struct mbuf *, bool);
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, in_addr_t *, u_short *, struct inpcb **,
struct ucred *);
@ -841,7 +841,7 @@ void in_pcbdisconnect(struct inpcb *);
void in_pcbdrop(struct inpcb *);
void in_pcbfree(struct inpcb *);
int in_pcbinshash(struct inpcb *);
int in_pcbinshash_nopcbgroup(struct inpcb *);
int in_pcbinshash_mbuf(struct inpcb *, struct mbuf *);
int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
struct ucred *);
struct inpcb *

View File

@ -841,33 +841,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
#endif
}
/*
* Install in the reservation hash table for now, but don't yet
* install a connection group since the full 4-tuple isn't yet
* configured.
*/
inp->inp_lport = sc->sc_inc.inc_lport;
if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
/*
* Undo the assignments above if we failed to
* put the PCB on the hash lists.
*/
#ifdef INET6
if (sc->sc_inc.inc_flags & INC_ISIPV6)
inp->in6p_laddr = in6addr_any;
else
#endif
inp->inp_laddr.s_addr = INADDR_ANY;
inp->inp_lport = 0;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
"with error %i\n",
s, __func__, error);
free(s, M_TCPLOG);
}
INP_HASH_WUNLOCK(&V_tcbinfo);
goto abort;
}
#ifdef INET6
if (inp->inp_vflag & INP_IPV6PROTO) {
struct inpcb *oinp = sotoinpcb(lso);
@ -900,7 +874,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
inp->in6p_laddr = sc->sc_inc.inc6_laddr;
if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
thread0.td_ucred, m)) != 0) {
thread0.td_ucred, m, false)) != 0) {
inp->in6p_laddr = laddr6;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
@ -940,7 +914,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
if (inp->inp_laddr.s_addr == INADDR_ANY)
inp->inp_laddr = sc->sc_inc.inc_laddr;
if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
thread0.td_ucred, m)) != 0) {
thread0.td_ucred, m, false)) != 0) {
inp->inp_laddr = laddr;
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "

View File

@ -411,7 +411,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
*/
int
in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
struct ucred *cred, struct mbuf *m)
struct ucred *cred, struct mbuf *m, bool rehash)
{
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
@ -437,6 +437,8 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
}
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
if (inp->inp_lport == 0) {
KASSERT(rehash == true,
("Rehashing required for unbound inps"));
error = in6_pcbbind(inp, (struct sockaddr *)0, cred);
if (error)
return (error);
@ -451,7 +453,11 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
inp->inp_flow |=
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
in_pcbrehash_mbuf(inp, m);
if (rehash) {
in_pcbrehash_mbuf(inp, m);
} else {
in_pcbinshash_mbuf(inp, m);
}
return (0);
}
@ -460,7 +466,7 @@ int
in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
{
return (in6_pcbconnect_mbuf(inp, nam, cred, NULL));
return (in6_pcbconnect_mbuf(inp, nam, cred, NULL, true));
}
void

View File

@ -86,7 +86,7 @@ void in6_losing(struct inpcb *);
int in6_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *,
struct ucred *, struct mbuf *);
struct ucred *, struct mbuf *, bool);
void in6_pcbdisconnect(struct inpcb *);
struct inpcb *
in6_pcblookup_local(struct inpcbinfo *,