In the current world order, solisten() implements the state transition of

a socket from a regular socket to a listening socket able to accept new
connections.  As part of this state transition, solisten() calls into the
protocol to update protocol-layer state.  There were several bugs in this
implementation that could result in a race wherein a TCP SYN received
in the interval between the protocol state transition and the shortly
following socket layer transition would result in a panic in the TCP code,
as the socket would be in the TCPS_LISTEN state, but the socket would not
have the SO_ACCEPTCONN flag set.

This change does the following:

- Pushes the socket state transition from the socket layer solisten() to
  to socket "library" routines called from the protocol.  This permits
  the socket routines to be called while holding the protocol mutexes,
  preventing a race exposing the incomplete socket state transition to TCP
  after the TCP state transition has completed.  The check for a socket
  layer state transition is performed by solisten_proto_check(), and the
  actual transition is performed by solisten_proto().

- Holds the socket lock for the duration of the socket state test and set,
  and over the protocol layer state transition, which is now possible as
  the socket lock is acquired by the protocol layer, rather than vice
  versa.  This prevents additional state related races in the socket
  layer.

This permits the dual transition of socket layer and protocol layer state
to occur while holding locks for both layers, making the two changes
atomic with respect to one another.  Similar changes are likely require
elsewhere in the socket/protocol code.

Reported by:		Peter Holm <peter@holm.cc>
Review and fixes from:	emax, Antoine Brodin <antoine.brodin@laposte.net>
Philosophical head nod:	gnn
This commit is contained in:
Robert Watson 2005-02-21 21:58:17 +00:00
parent f94ec97d48
commit 0daccb9c94
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=142190
11 changed files with 143 additions and 60 deletions

View File

@ -1,6 +1,6 @@
/*-
* Copyright (c) 2004 The FreeBSD Foundation
* Copyright (c) 2004-2005 Robert Watson
* Copyright (c) 2004-2005 Robert N. M. Watson
* Copyright (c) 1982, 1986, 1988, 1990, 1993
* The Regents of the University of California. All rights reserved.
*
@ -271,6 +271,18 @@ sodealloc(struct socket *so)
mtx_unlock(&so_global_mtx);
}
/*
* solisten() transitions a socket from a non-listening state to a listening
* state, but can also be used to update the listen queue depth on an
* existing listen socket. The protocol will call back into the sockets
* layer using solisten_proto_check() and solisten_proto() to check and set
* socket-layer listen state. Call backs are used so that the protocol can
* acquire both protocol and socket layer locks in whatever order is reuiqred
* by the protocol.
*
* Protocol implementors are advised to hold the socket lock across the
* socket-layer test and set to avoid races at the socket layer.
*/
int
solisten(so, backlog, td)
struct socket *so;
@ -279,28 +291,44 @@ solisten(so, backlog, td)
{
int error;
/*
* XXXRW: Ordering issue here -- perhaps we need to set
* SO_ACCEPTCONN before the call to pru_listen()?
* XXXRW: General atomic test-and-set concerns here also.
*/
if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
SS_ISDISCONNECTING))
return (EINVAL);
error = (*so->so_proto->pr_usrreqs->pru_listen)(so, td);
if (error)
return (error);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_options |= SO_ACCEPTCONN;
SOCK_UNLOCK(so);
/*
* XXXRW: The following state adjustment should occur in
* solisten_proto(), but we don't currently pass the backlog request
* to the protocol via pru_listen().
*/
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
so->so_qlimit = backlog;
ACCEPT_UNLOCK();
return (0);
}
int
solisten_proto_check(so)
struct socket *so;
{
SOCK_LOCK_ASSERT(so);
if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
SS_ISDISCONNECTING))
return (EINVAL);
return (0);
}
void
solisten_proto(so)
struct socket *so;
{
SOCK_LOCK_ASSERT(so);
so->so_options |= SO_ACCEPTCONN;
}
/*
* Attempt to free a socket. This should really be sotryfree().
*

View File

@ -124,7 +124,7 @@ static void unp_mark(struct file *);
static void unp_discard(struct file *);
static void unp_freerights(struct file **, int);
static int unp_internalize(struct mbuf **, struct thread *);
static int unp_listen(struct unpcb *, struct thread *);
static int unp_listen(struct socket *, struct unpcb *, struct thread *);
static int
uipc_abort(struct socket *so)
@ -284,7 +284,7 @@ uipc_listen(struct socket *so, struct thread *td)
UNP_UNLOCK();
return (EINVAL);
}
error = unp_listen(unp, td);
error = unp_listen(so, unp, td);
UNP_UNLOCK();
return (error);
}
@ -1708,16 +1708,21 @@ unp_dispose(struct mbuf *m)
}
static int
unp_listen(struct unpcb *unp, struct thread *td)
unp_listen(struct socket *so, struct unpcb *unp, struct thread *td)
{
int error;
UNP_LOCK_ASSERT();
/*
* XXXRW: Why populate the local peer cred with our own credential?
*/
cru2x(td->td_ucred, &unp->unp_peercred);
unp->unp_flags |= UNP_HAVEPCCACHED;
return (0);
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0) {
cru2x(td->td_ucred, &unp->unp_peercred);
unp->unp_flags |= UNP_HAVEPCCACHED;
solisten_proto(so);
}
SOCK_UNLOCK(so);
return (error);
}
static void

View File

@ -524,6 +524,7 @@ atm_cm_connect(epp, token, ap, copp)
* atm_cm_release().
*
* Arguments:
* so optional socket pointer -- if present, will set listen state
* epp pointer to endpoint definition structure
* token endpoint's listen instance token
* ap pointer to listening connection attributes
@ -535,7 +536,8 @@ atm_cm_connect(epp, token, ap, copp)
*
*/
int
atm_cm_listen(epp, token, ap, copp)
atm_cm_listen(so, epp, token, ap, copp)
struct socket *so;
Atm_endpoint *epp;
void *token;
Atm_attributes *ap;
@ -718,7 +720,13 @@ atm_cm_listen(epp, token, ap, copp)
/*
* Now try to register the listening connection
*/
if (so != NULL)
SOCK_LOCK(so);
s = splnet();
if (so != NULL)
err = solisten_proto_check(so);
if (err)
goto donex;
if (atm_cm_match(cop->co_lattr, NULL) != NULL) {
/*
* Can't have matching listeners
@ -728,9 +736,13 @@ atm_cm_listen(epp, token, ap, copp)
}
cop->co_state = COS_LISTEN;
LINK2TAIL(cop, Atm_connection, atm_listen_queue, co_next);
if (so != NULL)
solisten_proto(so);
donex:
(void) splx(s);
if (so != NULL)
SOCK_UNLOCK(so);
done:
if (err) {

View File

@ -350,7 +350,7 @@ atm_sock_listen(so, epp)
/*
* Start listening for incoming calls
*/
return (atm_cm_listen(epp, atp, &atp->atp_attr, &atp->atp_conn));
return (atm_cm_listen(so, epp, atp, &atp->atp_attr, &atp->atp_conn));
}

View File

@ -81,8 +81,8 @@ void atm_aal5_init(void);
/* atm_cm.c */
int atm_cm_connect(Atm_endpoint *, void *, Atm_attributes *,
Atm_connection **);
int atm_cm_listen(Atm_endpoint *, void *, Atm_attributes *,
Atm_connection **);
int atm_cm_listen(struct socket *, Atm_endpoint *, void *,
Atm_attributes *, Atm_connection **);
int atm_cm_addllc(Atm_endpoint *, void *, struct attr_llc *,
Atm_connection *, Atm_connection **);
int atm_cm_addparty(Atm_connection *, int, struct t_atm_sap *);

View File

@ -522,8 +522,8 @@ ipatm_start()
/*
* Now start listening
*/
if ((err = atm_cm_listen(&ipatm_endpt, (void *)(intptr_t)i,
&ipatm_listeners[i].attr,
if ((err = atm_cm_listen(NULL, &ipatm_endpt,
(void *)(intptr_t)i, &ipatm_listeners[i].attr,
&ipatm_listeners[i].conn)) != 0)
goto done;
}

View File

@ -2409,13 +2409,28 @@ int
ng_btsocket_l2cap_listen(struct socket *so, struct thread *td)
{
ng_btsocket_l2cap_pcb_p pcb = so2l2cap_pcb(so);
int error;
if (pcb == NULL)
return (EINVAL);
if (ng_btsocket_l2cap_node == NULL)
return (EINVAL);
return ((pcb->psm == 0)? EDESTADDRREQ : 0);
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0)
goto out;
if (pcb == NULL) {
error = EINVAL;
goto out;
}
if (ng_btsocket_l2cap_node == NULL) {
error = EINVAL;
goto out;
}
if (pcb->psm == 0) {
error = EDESTADDRREQ;
goto out;
}
solisten_proto(so);
out:
SOCK_UNLOCK(so);
return (error);
} /* ng_btsocket_listen */
/*

View File

@ -800,6 +800,7 @@ ng_btsocket_rfcomm_listen(struct socket *so, struct thread *td)
ng_btsocket_rfcomm_session_p s = NULL;
struct socket *l2so = NULL;
int error;
int socreate_error;
if (pcb == NULL)
return (EINVAL);
@ -816,14 +817,18 @@ ng_btsocket_rfcomm_listen(struct socket *so, struct thread *td)
* session.
*/
error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
socreate_error = socreate(PF_BLUETOOTH, &l2so, SOCK_SEQPACKET,
BLUETOOTH_PROTO_L2CAP, td->td_ucred, td);
/*
* Look for the session in LISTENING state. There can only be one.
/*
* Transition the socket and session into the LISTENING state. Check
* for collisions first, as there can only be one.
*/
mtx_lock(&ng_btsocket_rfcomm_sessions_mtx);
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0)
goto out;
LIST_FOREACH(s, &ng_btsocket_rfcomm_sessions, next)
if (s->state == NG_BTSOCKET_RFCOMM_SESSION_LISTENING)
@ -835,10 +840,9 @@ ng_btsocket_rfcomm_listen(struct socket *so, struct thread *td)
* L2CAP socket. If l2so == NULL then error has the error code
* from socreate()
*/
if (l2so == NULL) {
mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
return (error);
error = socreate_error;
goto out;
}
/*
@ -848,21 +852,23 @@ ng_btsocket_rfcomm_listen(struct socket *so, struct thread *td)
* XXX FIXME Note that currently there is no way to adjust MTU
* for the default session.
*/
error = ng_btsocket_rfcomm_session_create(&s, l2so,
NG_HCI_BDADDR_ANY, NULL, td);
if (error != 0) {
mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
soclose(l2so);
return (error);
}
} else if (l2so != NULL)
soclose(l2so); /* we don't need new L2CAP socket */
if (error != 0)
goto out;
l2so = NULL;
}
solisten_proto(so);
out:
SOCK_UNLOCK(so);
mtx_unlock(&ng_btsocket_rfcomm_sessions_mtx);
return (0);
/*
* If we still have an l2so reference here, it's unneeded, so release
* it.
*/
if (l2so != NULL)
soclose(l2so);
return (error);
} /* ng_btsocket_listen */
/*

View File

@ -302,10 +302,15 @@ tcp_usr_listen(struct socket *so, struct thread *td)
const int inirw = INI_WRITE;
COMMON_START();
if (inp->inp_lport == 0)
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
if (error == 0)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so);
}
SOCK_UNLOCK(so);
COMMON_END(PRU_LISTEN);
}
@ -319,14 +324,19 @@ tcp6_usr_listen(struct socket *so, struct thread *td)
const int inirw = INI_WRITE;
COMMON_START();
if (inp->inp_lport == 0) {
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0 && inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
}
if (error == 0)
if (error == 0) {
tp->t_state = TCPS_LISTEN;
solisten_proto(so);
}
SOCK_UNLOCK(so);
COMMON_END(PRU_LISTEN);
}
#endif /* INET6 */

View File

@ -1532,10 +1532,15 @@ spx_listen(so, td)
IPX_LIST_LOCK();
IPX_LOCK(ipxp);
if (ipxp->ipxp_lport == 0)
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error == 0 && ipxp->ipxp_lport == 0)
error = ipx_pcbbind(ipxp, NULL, td);
if (error == 0)
if (error == 0) {
cb->s_state = TCPS_LISTEN;
solisten_proto(so);
}
SOCK_UNLOCK(so);
IPX_UNLOCK(ipxp);
IPX_LIST_UNLOCK();
return (error);

View File

@ -513,6 +513,8 @@ void soisconnecting(struct socket *so);
void soisdisconnected(struct socket *so);
void soisdisconnecting(struct socket *so);
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so);
int solisten_proto_check(struct socket *so);
struct socket *
sonewconn(struct socket *head, int connstatus);
int sooptcopyin(struct sockopt *sopt, void *buf, size_t len, size_t minlen);