From 81158452be7803fda8aa36feb4464d5301dce85c Mon Sep 17 00:00:00 2001 From: Robert Watson Date: Mon, 18 Oct 2004 22:19:43 +0000 Subject: [PATCH] Push acquisition of the accept mutex out of sofree() into the caller (sorele()/sotryfree()): - This permits the caller to acquire the accept mutex before the socket mutex, avoiding sofree() having to drop the socket mutex and re-order, which could lead to races permitting more than one thread to enter sofree() after a socket is ready to be free'd. - This also covers clearing of the so_pcb weak socket reference from the protocol to the socket, preventing races in clearing and evaluation of the reference such that sofree() might be called more than once on the same socket. This appears to close a race I was able to easily trigger by repeatedly opening and resetting TCP connections to a host, in which the tcp_close() code called as a result of the RST raced with the close() of the accepted socket in the user process resulting in simultaneous attempts to de-allocate the same socket. The new locking increases the overhead for operations that may potentially free the socket, so we will want to revise the synchronization strategy here as we normalize the reference counting model for sockets. The use of the accept mutex in freeing of sockets that are not listen sockets is primarily motivated by the potential need to remove the socket from the incomplete connection queue on its parent (listen) socket, so cleaning up the reference model here may allow us to substantially weaken the synchronization requirements. RELENG_5_3 candidate. MFC after: 3 days Reviewed by: dwhite Discussed with: gnn, dwhite, green Reported by: Marc UBM Bocklet Reported by: Vlad --- sys/kern/kern_descrip.c | 1 + sys/kern/uipc_socket.c | 7 ++++--- sys/kern/uipc_usrreq.c | 1 + sys/net/raw_cb.c | 1 + sys/net/raw_usrreq.c | 1 + sys/netatalk/ddp_pcb.c | 1 + sys/netatm/atm_socket.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c | 2 ++ .../bluetooth/socket/ng_btsocket_l2cap_raw.c | 1 + sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c | 1 + sys/netinet/in_pcb.c | 1 + sys/netinet/tcp_subr.c | 1 + sys/netinet/tcp_timewait.c | 1 + sys/netinet6/in6_pcb.c | 1 + sys/netipx/ipx_pcb.c | 1 + sys/netipx/ipx_usrreq.c | 1 + sys/netnatm/natm.c | 2 ++ sys/sys/socketvar.h | 12 ++++++++++-- 19 files changed, 33 insertions(+), 5 deletions(-) diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 769b8db72a24..9a448b4e938f 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -2063,6 +2063,7 @@ fputsock(struct socket *so) { NET_ASSERT_GIANT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sorele(so); } diff --git a/sys/kern/uipc_socket.c b/sys/kern/uipc_socket.c index 17ecd6a39c18..1a74cffdd27a 100644 --- a/sys/kern/uipc_socket.c +++ b/sys/kern/uipc_socket.c @@ -227,6 +227,7 @@ socreate(dom, aso, type, proto, cred, td) SOCK_UNLOCK(so); error = (*prp->pr_usrreqs->pru_attach)(so, proto, td); if (error) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_state |= SS_NOFDREF; sorele(so); @@ -333,9 +334,8 @@ sofree(so) { struct socket *head; - SOCK_UNLOCK(so); - ACCEPT_LOCK(); - SOCK_LOCK(so); + ACCEPT_LOCK_ASSERT(); + SOCK_LOCK_ASSERT(so); if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0 || so->so_count != 0) { @@ -467,6 +467,7 @@ drop: error = error2; } discard: + ACCEPT_LOCK(); SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index 07cdbc7a9f7a..6b97f94ad235 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -140,6 +140,7 @@ uipc_abort(struct socket *so) unp_drop(unp, ECONNABORTED); unp_detach(unp); UNP_UNLOCK_ASSERT(); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/net/raw_cb.c b/sys/net/raw_cb.c index 04b45163a3ba..b6ab2083a0fe 100644 --- a/sys/net/raw_cb.c +++ b/sys/net/raw_cb.c @@ -98,6 +98,7 @@ raw_detach(rp) { struct socket *so = rp->rcb_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/net/raw_usrreq.c b/sys/net/raw_usrreq.c index 7517743dfa86..f444b571ba42 100644 --- a/sys/net/raw_usrreq.c +++ b/sys/net/raw_usrreq.c @@ -147,6 +147,7 @@ raw_uabort(struct socket *so) return EINVAL; raw_disconnect(rp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return 0; diff --git a/sys/netatalk/ddp_pcb.c b/sys/netatalk/ddp_pcb.c index 8073d553af8b..dd69850cd8fd 100644 --- a/sys/netatalk/ddp_pcb.c +++ b/sys/netatalk/ddp_pcb.c @@ -282,6 +282,7 @@ at_pcbdetach(struct socket *so, struct ddpcb *ddp) DDP_LOCK_ASSERT(ddp); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netatm/atm_socket.c b/sys/netatm/atm_socket.c index 6a027e2a0909..99dbaf6ceac6 100644 --- a/sys/netatm/atm_socket.c +++ b/sys/netatm/atm_socket.c @@ -173,6 +173,7 @@ atm_sock_detach(so) /* * Break links and free control blocks */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c index d4df5bda7586..7c347c370a9d 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_hci_raw.c @@ -1417,6 +1417,7 @@ ng_btsocket_hci_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c index f52bafa2113f..cb91fd79a856 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap.c @@ -1804,6 +1804,7 @@ ng_btsocket_l2cap_rtclean(void *context, int pending) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -2347,6 +2348,7 @@ ng_btsocket_l2cap_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c index 8103f2790bbf..97e924748e55 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_l2cap_raw.c @@ -1129,6 +1129,7 @@ ng_btsocket_l2cap_raw_detach(struct socket *so) bzero(pcb, sizeof(*pcb)); FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP_RAW); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c index a81afac7c378..1ba72114a1ea 100644 --- a/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c +++ b/sys/netgraph/bluetooth/socket/ng_btsocket_rfcomm.c @@ -724,6 +724,7 @@ ng_btsocket_rfcomm_detach(struct socket *so) FREE(pcb, M_NETGRAPH_BTSOCKET_RFCOMM); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index ec2565556178..11e5b0dd0d58 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -687,6 +687,7 @@ in_pcbdetach(inp) inp->inp_gencnt = ++ipi->ipi_gencnt; in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 7619916a3415..03610b390d98 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet/tcp_timewait.c b/sys/netinet/tcp_timewait.c index 7619916a3415..03610b390d98 100644 --- a/sys/netinet/tcp_timewait.c +++ b/sys/netinet/tcp_timewait.c @@ -1680,6 +1680,7 @@ tcp_twstart(tp) } tcp_discardcb(tp); so = inp->inp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; tw->tw_cred = crhold(so->so_cred); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 9bcbeb82d3ce..e7941030aa8c 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -436,6 +436,7 @@ in6_pcbdetach(inp) in_pcbremlists(inp); if (so) { + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/netipx/ipx_pcb.c b/sys/netipx/ipx_pcb.c index 2be7e1df92c4..b1e2cabd3dcf 100644 --- a/sys/netipx/ipx_pcb.c +++ b/sys/netipx/ipx_pcb.c @@ -268,6 +268,7 @@ ipx_pcbdetach(ipxp) { struct socket *so = ipxp->ipxp_socket; + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = 0; sotryfree(so); diff --git a/sys/netipx/ipx_usrreq.c b/sys/netipx/ipx_usrreq.c index f07cecb87544..cff94bc4ba36 100644 --- a/sys/netipx/ipx_usrreq.c +++ b/sys/netipx/ipx_usrreq.c @@ -424,6 +424,7 @@ ipx_usr_abort(so) ipx_pcbdetach(ipxp); splx(s); soisdisconnected(so); + ACCEPT_LOCK(); SOCK_LOCK(so); sotryfree(so); return (0); diff --git a/sys/netnatm/natm.c b/sys/netnatm/natm.c index 5613b6a9e660..f999f32346a8 100644 --- a/sys/netnatm/natm.c +++ b/sys/netnatm/natm.c @@ -135,6 +135,7 @@ natm_usr_detach(struct socket *so) * we turn on 'drain' *before* we sofree. */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); @@ -464,6 +465,7 @@ struct proc *p; */ npcb_free(npcb, NPCB_DESTROY); /* drain */ + ACCEPT_LOCK(); SOCK_LOCK(so); so->so_pcb = NULL; sotryfree(so); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index cc2481474c41..391ed3f7b802 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -158,6 +158,8 @@ struct socket { * until such time as it proves to be a good idea. */ extern struct mtx accept_mtx; +#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED) +#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED) #define ACCEPT_LOCK() mtx_lock(&accept_mtx) #define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx) @@ -344,21 +346,27 @@ struct xsocket { } while (0) #define sorele(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count <= 0) \ panic("sorele"); \ if (--(so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while (0) #define sotryfree(so) do { \ + ACCEPT_LOCK_ASSERT(); \ SOCK_LOCK_ASSERT(so); \ if ((so)->so_count == 0) \ sofree(so); \ - else \ + else { \ SOCK_UNLOCK(so); \ + ACCEPT_UNLOCK(); \ + } \ } while(0) /*