socket: Properly interlock when transitioning to a listening socket

Currently, most protocols implement pru_listen with something like the
following:

	SOCK_LOCK(so);
	error = solisten_proto_check(so);
	if (error) {
		SOCK_UNLOCK(so);
		return (error);
	}
	solisten_proto(so);
	SOCK_UNLOCK(so);

solisten_proto_check() fails if the socket is connected or connecting.
However, the socket lock is not used during I/O, so this pattern is
racy.

The change modifies solisten_proto_check() to additionally acquire
socket buffer locks, and the calling thread holds them until
solisten_proto() or solisten_proto_abort() is called.  Now that the
socket buffer locks are preserved across a listen(2), this change allows
socket I/O paths to properly interlock with listen(2).

This fixes a large number of syzbot reports, only one is listed below
and the rest will be dup'ed to it.

Reported by:	syzbot+9fece8a63c0e27273821@syzkaller.appspotmail.com
Reviewed by:	tuexen, gallatin
MFC after:	1 month
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D31659
This commit is contained in:
Mark Johnston 2021-09-07 14:49:53 -04:00
parent c67f3b8b78
commit bd4a39cc93
7 changed files with 130 additions and 46 deletions

View File

@ -418,12 +418,14 @@ soalloc(struct vnet *vnet)
* a feature to change class of an existing lock, so we use DUPOK.
*/
mtx_init(&so->so_lock, "socket", NULL, MTX_DEF | MTX_DUPOK);
so->so_snd.sb_mtx = &so->so_snd_mtx;
so->so_rcv.sb_mtx = &so->so_rcv_mtx;
SOCKBUF_LOCK_INIT(&so->so_snd, "so_snd");
SOCKBUF_LOCK_INIT(&so->so_rcv, "so_rcv");
so->so_rcv.sb_sel = &so->so_rdsel;
so->so_snd.sb_sel = &so->so_wrsel;
sx_init(&so->so_snd.sb_sx, "so_snd_sx");
sx_init(&so->so_rcv.sb_sx, "so_rcv_sx");
sx_init(&so->so_snd_sx, "so_snd_sx");
sx_init(&so->so_rcv_sx, "so_rcv_sx");
TAILQ_INIT(&so->so_snd.sb_aiojobq);
TAILQ_INIT(&so->so_rcv.sb_aiojobq);
TASK_INIT(&so->so_snd.sb_aiotask, 0, soaio_snd, so);
@ -487,8 +489,8 @@ sodealloc(struct socket *so)
if (so->so_snd.sb_hiwat)
(void)chgsbsize(so->so_cred->cr_uidinfo,
&so->so_snd.sb_hiwat, 0, RLIM_INFINITY);
sx_destroy(&so->so_snd.sb_sx);
sx_destroy(&so->so_rcv.sb_sx);
sx_destroy(&so->so_snd_sx);
sx_destroy(&so->so_rcv_sx);
SOCKBUF_LOCK_DESTROY(&so->so_snd);
SOCKBUF_LOCK_DESTROY(&so->so_rcv);
}
@ -899,18 +901,48 @@ solisten(struct socket *so, int backlog, struct thread *td)
return (error);
}
/*
* Prepare for a call to solisten_proto(). Acquire all socket buffer locks in
* order to interlock with socket I/O.
*/
int
solisten_proto_check(struct socket *so)
{
SOCK_LOCK_ASSERT(so);
if (so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
SS_ISDISCONNECTING))
if ((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
SS_ISDISCONNECTING)) != 0)
return (EINVAL);
/*
* Sleeping is not permitted here, so simply fail if userspace is
* attempting to transmit or receive on the socket. This kind of
* transient failure is not ideal, but it should occur only if userspace
* is misusing the socket interfaces.
*/
if (!sx_try_xlock(&so->so_snd_sx))
return (EAGAIN);
if (!sx_try_xlock(&so->so_rcv_sx)) {
sx_xunlock(&so->so_snd_sx);
return (EAGAIN);
}
mtx_lock(&so->so_snd_mtx);
mtx_lock(&so->so_rcv_mtx);
return (0);
}
/*
* Undo the setup done by solisten_proto_check().
*/
void
solisten_proto_abort(struct socket *so)
{
mtx_unlock(&so->so_snd_mtx);
mtx_unlock(&so->so_rcv_mtx);
sx_xunlock(&so->so_snd_sx);
sx_xunlock(&so->so_rcv_sx);
}
void
solisten_proto(struct socket *so, int backlog)
{
@ -920,6 +952,9 @@ solisten_proto(struct socket *so, int backlog)
sbintime_t sbrcv_timeo, sbsnd_timeo;
SOCK_LOCK_ASSERT(so);
KASSERT((so->so_state & (SS_ISCONNECTED | SS_ISCONNECTING |
SS_ISDISCONNECTING)) == 0,
("%s: bad socket state %p", __func__, so));
if (SOLISTENING(so))
goto listening;
@ -938,10 +973,6 @@ solisten_proto(struct socket *so, int backlog)
sbdestroy(&so->so_snd, so);
sbdestroy(&so->so_rcv, so);
sx_destroy(&so->so_snd.sb_sx);
sx_destroy(&so->so_rcv.sb_sx);
SOCKBUF_LOCK_DESTROY(&so->so_snd);
SOCKBUF_LOCK_DESTROY(&so->so_rcv);
#ifdef INVARIANTS
bzero(&so->so_rcv,
@ -974,6 +1005,11 @@ listening:
if (backlog < 0 || backlog > somaxconn)
backlog = somaxconn;
so->sol_qlimit = backlog;
mtx_unlock(&so->so_snd_mtx);
mtx_unlock(&so->so_rcv_mtx);
sx_xunlock(&so->so_snd_sx);
sx_xunlock(&so->so_rcv_sx);
}
/*

View File

@ -890,13 +890,17 @@ uipc_listen(struct socket *so, int backlog, struct thread *td)
if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET)
return (EOPNOTSUPP);
/*
* Synchronize with concurrent connection attempts.
*/
error = 0;
unp = sotounpcb(so);
KASSERT(unp != NULL, ("uipc_listen: unp == NULL"));
UNP_PCB_LOCK(unp);
if (unp->unp_vnode == NULL) {
/* Already connected or not bound to an address. */
error = unp->unp_conn != NULL ? EINVAL : EDESTADDRREQ;
if (unp->unp_conn != NULL || (unp->unp_flags & UNP_CONNECTING) != 0)
error = EINVAL;
else if (unp->unp_vnode == NULL)
error = EDESTADDRREQ;
if (error != 0) {
UNP_PCB_UNLOCK(unp);
return (error);
}
@ -1523,6 +1527,7 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
bcopy(soun->sun_path, buf, len);
buf[len] = 0;
error = 0;
unp = sotounpcb(so);
UNP_PCB_LOCK(unp);
for (;;) {
@ -1538,13 +1543,16 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
* lock the peer socket, to ensure that unp_conn cannot
* transition between two valid sockets while locks are dropped.
*/
if (unp->unp_conn != NULL) {
UNP_PCB_UNLOCK(unp);
return (EISCONN);
if (SOLISTENING(so))
error = EOPNOTSUPP;
else if (unp->unp_conn != NULL)
error = EISCONN;
else if ((unp->unp_flags & UNP_CONNECTING) != 0) {
error = EALREADY;
}
if ((unp->unp_flags & UNP_CONNECTING) != 0) {
if (error != 0) {
UNP_PCB_UNLOCK(unp);
return (EALREADY);
return (error);
}
if (unp->unp_pairbusy > 0) {
unp->unp_flags |= UNP_WAITING;

View File

@ -2506,14 +2506,17 @@ ng_btsocket_l2cap_listen(struct socket *so, int backlog, struct thread *td)
if (error != 0)
goto out;
if (pcb == NULL) {
solisten_proto_abort(so);
error = EINVAL;
goto out;
}
if (ng_btsocket_l2cap_node == NULL) {
solisten_proto_abort(so);
error = EINVAL;
goto out;
}
if (pcb->psm == 0) {
solisten_proto_abort(so);
error = EADDRNOTAVAIL;
goto out;
}

View File

@ -894,6 +894,7 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
* from socreate()
*/
if (l2so == NULL) {
solisten_proto_abort(so);
error = socreate_error;
goto out;
}
@ -907,8 +908,10 @@ ng_btsocket_rfcomm_listen(struct socket *so, int backlog, struct thread *td)
*/
error = ng_btsocket_rfcomm_session_create(&s, l2so,
NG_HCI_BDADDR_ANY, NULL, td);
if (error != 0)
if (error != 0) {
solisten_proto_abort(so);
goto out;
}
l2so = NULL;
}
SOCK_LOCK(so);

View File

@ -7201,7 +7201,8 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
}
}
}
SCTP_INP_RLOCK(inp);
SCTP_INP_INFO_WLOCK();
SCTP_INP_WLOCK(inp);
#ifdef SCTP_LOCK_LOGGING
if (SCTP_BASE_SYSCTL(sctp_logging_level) & SCTP_LOCK_LOGGING_ENABLE) {
sctp_log_lock(inp, (struct sctp_tcb *)NULL, SCTP_LOG_LOCK_SOCK);
@ -7209,10 +7210,9 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
#endif
SOCK_LOCK(so);
error = solisten_proto_check(so);
SOCK_UNLOCK(so);
if (error) {
SCTP_INP_RUNLOCK(inp);
return (error);
SOCK_UNLOCK(so);
goto out;
}
if ((sctp_is_feature_on(inp, SCTP_PCB_FLAGS_PORTREUSE)) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_IN_TCPPOOL)) {
@ -7223,39 +7223,44 @@ sctp_listen(struct socket *so, int backlog, struct thread *p)
* move the guy that was listener to the TCP Pool.
*/
if (sctp_swap_inpcb_for_listen(inp)) {
SCTP_INP_RUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
SOCK_UNLOCK(so);
solisten_proto_abort(so);
error = EADDRINUSE;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
goto out;
}
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_TCPTYPE) &&
(inp->sctp_flags & SCTP_PCB_FLAGS_CONNECTED)) {
/* We are already connected AND the TCP model */
SCTP_INP_RUNLOCK(inp);
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, EADDRINUSE);
return (EADDRINUSE);
}
SCTP_INP_RUNLOCK(inp);
if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
/* We must do a bind. */
if ((error = sctp_inpcb_bind(so, NULL, NULL, p))) {
/* bind error, probably perm */
return (error);
}
}
SCTP_INP_WLOCK(inp);
if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
SOCK_LOCK(so);
solisten_proto(so, backlog);
SOCK_UNLOCK(so);
solisten_proto_abort(so);
error = EADDRINUSE;
SCTP_LTRACE_ERR_RET(inp, NULL, NULL, SCTP_FROM_SCTP_USRREQ, error);
goto out;
}
if (inp->sctp_flags & SCTP_PCB_FLAGS_UNBOUND) {
if ((error = sctp_inpcb_bind_locked(inp, NULL, NULL, p))) {
SOCK_UNLOCK(so);
solisten_proto_abort(so);
/* bind error, probably perm */
goto out;
}
}
if ((inp->sctp_flags & SCTP_PCB_FLAGS_UDPTYPE) == 0) {
solisten_proto(so, backlog);
} else {
solisten_proto_abort(so);
}
SOCK_UNLOCK(so);
if (backlog > 0) {
inp->sctp_flags |= SCTP_PCB_FLAGS_ACCEPTING;
} else {
inp->sctp_flags &= ~SCTP_PCB_FLAGS_ACCEPTING;
}
out:
SCTP_INP_WUNLOCK(inp);
SCTP_INP_INFO_WUNLOCK();
return (error);
}

View File

@ -457,10 +457,15 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0) {
SOCK_UNLOCK(so);
goto out;
}
if (inp->inp_lport == 0) {
INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0)
error = in_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
error = in_pcbbind(inp, NULL, td->td_ucred);
INP_HASH_WUNLOCK(&V_tcbinfo);
}
if (error == 0) {
tcp_state_change(tp, TCPS_LISTEN);
solisten_proto(so, backlog);
@ -468,6 +473,8 @@ tcp_usr_listen(struct socket *so, int backlog, struct thread *td)
if ((so->so_options & SO_NO_OFFLOAD) == 0)
tcp_offload_listen_start(tp);
#endif
} else {
solisten_proto_abort(so);
}
SOCK_UNLOCK(so);
@ -504,12 +511,16 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
TCPDEBUG1();
SOCK_LOCK(so);
error = solisten_proto_check(so);
if (error != 0) {
SOCK_UNLOCK(so);
goto out;
}
INP_HASH_WLOCK(&V_tcbinfo);
if (error == 0 && inp->inp_lport == 0) {
if (inp->inp_lport == 0) {
inp->inp_vflag &= ~INP_IPV4;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
inp->inp_vflag |= INP_IPV4;
error = in6_pcbbind(inp, (struct sockaddr *)0, td->td_ucred);
error = in6_pcbbind(inp, NULL, td->td_ucred);
}
INP_HASH_WUNLOCK(&V_tcbinfo);
if (error == 0) {
@ -519,6 +530,8 @@ tcp6_usr_listen(struct socket *so, int backlog, struct thread *td)
if ((so->so_options & SO_NO_OFFLOAD) == 0)
tcp_offload_listen_start(tp);
#endif
} else {
solisten_proto_abort(so);
}
SOCK_UNLOCK(so);
@ -581,6 +594,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = ECONNREFUSED;
goto out;
}
if (SOLISTENING(so)) {
error = EOPNOTSUPP;
goto out;
}
tp = intotcpcb(inp);
TCPDEBUG1();
NET_EPOCH_ENTER(et);
@ -643,6 +660,10 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *nam, struct thread *td)
error = ECONNREFUSED;
goto out;
}
if (SOLISTENING(so)) {
error = EINVAL;
goto out;
}
tp = intotcpcb(inp);
TCPDEBUG1();
#ifdef INET
@ -1021,6 +1042,10 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
TCPDEBUG1();
if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
if (tp->t_state == TCPS_LISTEN) {
error = EINVAL;
goto out;
}
switch (nam->sa_family) {
#ifdef INET
case AF_INET:
@ -1119,6 +1144,9 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf *m,
sbappendstream(&so->so_snd, m, flags);
m = NULL;
if (nam && tp->t_state < TCPS_SYN_SENT) {
KASSERT(tp->t_state == TCPS_CLOSED,
("%s: tp %p is listening", __func__, tp));
/*
* Do implied connect if not yet connected,
* initialize window to default value, and

View File

@ -453,6 +453,7 @@ void sofree(struct socket *so);
void sohasoutofband(struct socket *so);
int solisten(struct socket *so, int backlog, struct thread *td);
void solisten_proto(struct socket *so, int backlog);
void solisten_proto_abort(struct socket *so);
int solisten_proto_check(struct socket *so);
int solisten_dequeue(struct socket *, struct socket **, int);
struct socket *