Fix race when accepting TCP connections.
When expanding a SYN-cache entry to a socket/inp a two step approach was taken: 1) The local address was filled in, then the inp was added to the hash table. 2) The remote address was filled in and the inp was relocated in the hash table. Before the epoch changes, a write lock was held when this happens and the code looking up entries was holding a corresponding read lock. Since the read lock is gone away after the introduction of the epochs, the half populated inp was found during lookup. This resulted in processing TCP segments in the context of the wrong TCP connection. This patch changes the above procedure in a way that the inp is fully populated before inserted into the hash table. Thanks to Paul <devgs@ukr.net> for reporting the issue on the net@ mailing list and for testing the patch! Reviewed by: rrs@ MFC after: 1 week Sponsored by: Netflix, Inc. Differential Revision: https://reviews.freebsd.org/D22971
This commit is contained in:
parent
2b23aff29a
commit
58c0da1e84
@ -972,7 +972,7 @@ in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
|
||||
*/
|
||||
int
|
||||
in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
struct ucred *cred, struct mbuf *m)
|
||||
struct ucred *cred, struct mbuf *m, bool rehash)
|
||||
{
|
||||
u_short lport, fport;
|
||||
in_addr_t laddr, faddr;
|
||||
@ -991,6 +991,8 @@ in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
|
||||
/* Do the initial binding of the local address if required. */
|
||||
if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
|
||||
KASSERT(rehash == true,
|
||||
("Rehashing required for unbound inps"));
|
||||
inp->inp_lport = lport;
|
||||
inp->inp_laddr.s_addr = laddr;
|
||||
if (in_pcbinshash(inp) != 0) {
|
||||
@ -1005,7 +1007,11 @@ in_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
inp->inp_laddr.s_addr = laddr;
|
||||
inp->inp_faddr.s_addr = faddr;
|
||||
inp->inp_fport = fport;
|
||||
in_pcbrehash_mbuf(inp, m);
|
||||
if (rehash) {
|
||||
in_pcbrehash_mbuf(inp, m);
|
||||
} else {
|
||||
in_pcbinshash_mbuf(inp, m);
|
||||
}
|
||||
|
||||
if (anonport)
|
||||
inp->inp_flags |= INP_ANONPORT;
|
||||
@ -1016,7 +1022,7 @@ int
|
||||
in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
|
||||
{
|
||||
|
||||
return (in_pcbconnect_mbuf(inp, nam, cred, NULL));
|
||||
return (in_pcbconnect_mbuf(inp, nam, cred, NULL, true));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -2500,7 +2506,7 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
|
||||
* Insert PCB onto various hash lists.
|
||||
*/
|
||||
static int
|
||||
in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
|
||||
in_pcbinshash_internal(struct inpcb *inp, struct mbuf *m)
|
||||
{
|
||||
struct inpcbhead *pcbhash;
|
||||
struct inpcbporthead *pcbporthash;
|
||||
@ -2566,35 +2572,27 @@ in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
|
||||
CK_LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
|
||||
inp->inp_flags |= INP_INHASHLIST;
|
||||
#ifdef PCBGROUP
|
||||
if (do_pcbgroup_update)
|
||||
if (m != NULL) {
|
||||
in_pcbgroup_update_mbuf(inp, m);
|
||||
} else {
|
||||
in_pcbgroup_update(inp);
|
||||
}
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* For now, there are two public interfaces to insert an inpcb into the hash
|
||||
* lists -- one that does update pcbgroups, and one that doesn't. The latter
|
||||
* is used only in the TCP syncache, where in_pcbinshash is called before the
|
||||
* full 4-tuple is set for the inpcb, and we don't want to install in the
|
||||
* pcbgroup until later.
|
||||
*
|
||||
* XXXRW: This seems like a misfeature. in_pcbinshash should always update
|
||||
* connection groups, and partially initialised inpcbs should not be exposed
|
||||
* to either reservation hash tables or pcbgroups.
|
||||
*/
|
||||
int
|
||||
in_pcbinshash(struct inpcb *inp)
|
||||
{
|
||||
|
||||
return (in_pcbinshash_internal(inp, 1));
|
||||
return (in_pcbinshash_internal(inp, NULL));
|
||||
}
|
||||
|
||||
int
|
||||
in_pcbinshash_nopcbgroup(struct inpcb *inp)
|
||||
in_pcbinshash_mbuf(struct inpcb *inp, struct mbuf *m)
|
||||
{
|
||||
|
||||
return (in_pcbinshash_internal(inp, 0));
|
||||
return (in_pcbinshash_internal(inp, m));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -832,7 +832,7 @@ int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
|
||||
u_short *, struct ucred *);
|
||||
int in_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
|
||||
int in_pcbconnect_mbuf(struct inpcb *, struct sockaddr *, struct ucred *,
|
||||
struct mbuf *);
|
||||
struct mbuf *, bool);
|
||||
int in_pcbconnect_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
|
||||
u_short *, in_addr_t *, u_short *, struct inpcb **,
|
||||
struct ucred *);
|
||||
@ -841,7 +841,7 @@ void in_pcbdisconnect(struct inpcb *);
|
||||
void in_pcbdrop(struct inpcb *);
|
||||
void in_pcbfree(struct inpcb *);
|
||||
int in_pcbinshash(struct inpcb *);
|
||||
int in_pcbinshash_nopcbgroup(struct inpcb *);
|
||||
int in_pcbinshash_mbuf(struct inpcb *, struct mbuf *);
|
||||
int in_pcbladdr(struct inpcb *, struct in_addr *, struct in_addr *,
|
||||
struct ucred *);
|
||||
struct inpcb *
|
||||
|
@ -841,33 +841,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Install in the reservation hash table for now, but don't yet
|
||||
* install a connection group since the full 4-tuple isn't yet
|
||||
* configured.
|
||||
*/
|
||||
inp->inp_lport = sc->sc_inc.inc_lport;
|
||||
if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
|
||||
/*
|
||||
* Undo the assignments above if we failed to
|
||||
* put the PCB on the hash lists.
|
||||
*/
|
||||
#ifdef INET6
|
||||
if (sc->sc_inc.inc_flags & INC_ISIPV6)
|
||||
inp->in6p_laddr = in6addr_any;
|
||||
else
|
||||
#endif
|
||||
inp->inp_laddr.s_addr = INADDR_ANY;
|
||||
inp->inp_lport = 0;
|
||||
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: in_pcbinshash failed "
|
||||
"with error %i\n",
|
||||
s, __func__, error);
|
||||
free(s, M_TCPLOG);
|
||||
}
|
||||
INP_HASH_WUNLOCK(&V_tcbinfo);
|
||||
goto abort;
|
||||
}
|
||||
#ifdef INET6
|
||||
if (inp->inp_vflag & INP_IPV6PROTO) {
|
||||
struct inpcb *oinp = sotoinpcb(lso);
|
||||
@ -900,7 +874,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr))
|
||||
inp->in6p_laddr = sc->sc_inc.inc6_laddr;
|
||||
if ((error = in6_pcbconnect_mbuf(inp, (struct sockaddr *)&sin6,
|
||||
thread0.td_ucred, m)) != 0) {
|
||||
thread0.td_ucred, m, false)) != 0) {
|
||||
inp->in6p_laddr = laddr6;
|
||||
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: in6_pcbconnect failed "
|
||||
@ -940,7 +914,7 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
|
||||
if (inp->inp_laddr.s_addr == INADDR_ANY)
|
||||
inp->inp_laddr = sc->sc_inc.inc_laddr;
|
||||
if ((error = in_pcbconnect_mbuf(inp, (struct sockaddr *)&sin,
|
||||
thread0.td_ucred, m)) != 0) {
|
||||
thread0.td_ucred, m, false)) != 0) {
|
||||
inp->inp_laddr = laddr;
|
||||
if ((s = tcp_log_addrs(&sc->sc_inc, NULL, NULL, NULL))) {
|
||||
log(LOG_DEBUG, "%s; %s: in_pcbconnect failed "
|
||||
|
@ -411,7 +411,7 @@ in6_pcbladdr(struct inpcb *inp, struct sockaddr *nam,
|
||||
*/
|
||||
int
|
||||
in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
struct ucred *cred, struct mbuf *m)
|
||||
struct ucred *cred, struct mbuf *m, bool rehash)
|
||||
{
|
||||
struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
|
||||
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)nam;
|
||||
@ -437,6 +437,8 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
}
|
||||
if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
|
||||
if (inp->inp_lport == 0) {
|
||||
KASSERT(rehash == true,
|
||||
("Rehashing required for unbound inps"));
|
||||
error = in6_pcbbind(inp, (struct sockaddr *)0, cred);
|
||||
if (error)
|
||||
return (error);
|
||||
@ -451,7 +453,11 @@ in6_pcbconnect_mbuf(struct inpcb *inp, struct sockaddr *nam,
|
||||
inp->inp_flow |=
|
||||
(htonl(ip6_randomflowlabel()) & IPV6_FLOWLABEL_MASK);
|
||||
|
||||
in_pcbrehash_mbuf(inp, m);
|
||||
if (rehash) {
|
||||
in_pcbrehash_mbuf(inp, m);
|
||||
} else {
|
||||
in_pcbinshash_mbuf(inp, m);
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -460,7 +466,7 @@ int
|
||||
in6_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
|
||||
{
|
||||
|
||||
return (in6_pcbconnect_mbuf(inp, nam, cred, NULL));
|
||||
return (in6_pcbconnect_mbuf(inp, nam, cred, NULL, true));
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -86,7 +86,7 @@ void in6_losing(struct inpcb *);
|
||||
int in6_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
|
||||
int in6_pcbconnect(struct inpcb *, struct sockaddr *, struct ucred *);
|
||||
int in6_pcbconnect_mbuf(struct inpcb *, struct sockaddr *,
|
||||
struct ucred *, struct mbuf *);
|
||||
struct ucred *, struct mbuf *, bool);
|
||||
void in6_pcbdisconnect(struct inpcb *);
|
||||
struct inpcb *
|
||||
in6_pcblookup_local(struct inpcbinfo *,
|
||||
|
Loading…
Reference in New Issue
Block a user