Push acquisition of the accept mutex out of sofree() into the caller

(sorele()/sotryfree()):

- This permits the caller to acquire the accept mutex before the socket
  mutex, avoiding sofree() having to drop the socket mutex and re-order,
  which could lead to races permitting more than one thread to enter
  sofree() after a socket is ready to be free'd.

- This also covers clearing of the so_pcb weak socket reference from
  the protocol to the socket, preventing races in clearing and
  evaluation of the reference such that sofree() might be called more
  than once on the same socket.

This appears to close a race I was able to easily trigger by repeatedly
opening and resetting TCP connections to a host, in which the
tcp_close() code called as a result of the RST raced with the close()
of the accepted socket in the user process resulting in simultaneous
attempts to de-allocate the same socket.  The new locking increases
the overhead for operations that may potentially free the socket, so we
will want to revise the synchronization strategy here as we normalize
the reference counting model for sockets.  The use of the accept mutex
in freeing of sockets that are not listen sockets is primarily
motivated by the potential need to remove the socket from the
incomplete connection queue on its parent (listen) socket, so cleaning
up the reference model here may allow us to substantially weaken the
synchronization requirements.

RELENG_5_3 candidate.

MFC after:	3 days
Reviewed by:	dwhite
Discussed with:	gnn, dwhite, green
Reported by:	Marc UBM Bocklet <ubm at u-boot-man dot de>
Reported by:	Vlad <marchenko at gmail dot com>
This commit is contained in:
rwatson 2004-10-18 22:19:43 +00:00
parent d2f67f65f7
commit 4b81ce6dd2
19 changed files with 33 additions and 5 deletions

View File

@ -2063,6 +2063,7 @@ fputsock(struct socket *so)
{
NET_ASSERT_GIANT();
ACCEPT_LOCK();
SOCK_LOCK(so);
sorele(so);
}

View File

@ -227,6 +227,7 @@ socreate(dom, aso, type, proto, cred, td)
SOCK_UNLOCK(so);
error = (*prp->pr_usrreqs->pru_attach)(so, proto, td);
if (error) {
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_state |= SS_NOFDREF;
sorele(so);
@ -333,9 +334,8 @@ sofree(so)
{
struct socket *head;
SOCK_UNLOCK(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
ACCEPT_LOCK_ASSERT();
SOCK_LOCK_ASSERT(so);
if (so->so_pcb != NULL || (so->so_state & SS_NOFDREF) == 0 ||
so->so_count != 0) {
@ -467,6 +467,7 @@ drop:
error = error2;
}
discard:
ACCEPT_LOCK();
SOCK_LOCK(so);
KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF"));
so->so_state |= SS_NOFDREF;

View File

@ -140,6 +140,7 @@ uipc_abort(struct socket *so)
unp_drop(unp, ECONNABORTED);
unp_detach(unp);
UNP_UNLOCK_ASSERT();
ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return (0);

View File

@ -98,6 +98,7 @@ raw_detach(rp)
{
struct socket *so = rp->rcb_socket;
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = 0;
sotryfree(so);

View File

@ -147,6 +147,7 @@ raw_uabort(struct socket *so)
return EINVAL;
raw_disconnect(rp);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return 0;

View File

@ -282,6 +282,7 @@ at_pcbdetach(struct socket *so, struct ddpcb *ddp)
DDP_LOCK_ASSERT(ddp);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -173,6 +173,7 @@ atm_sock_detach(so)
/*
* Break links and free control blocks
*/
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -1417,6 +1417,7 @@ ng_btsocket_hci_raw_detach(struct socket *so)
bzero(pcb, sizeof(*pcb));
FREE(pcb, M_NETGRAPH_BTSOCKET_HCI_RAW);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -1804,6 +1804,7 @@ ng_btsocket_l2cap_rtclean(void *context, int pending)
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
@ -2347,6 +2348,7 @@ ng_btsocket_l2cap_detach(struct socket *so)
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -1129,6 +1129,7 @@ ng_btsocket_l2cap_raw_detach(struct socket *so)
bzero(pcb, sizeof(*pcb));
FREE(pcb, M_NETGRAPH_BTSOCKET_L2CAP_RAW);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -724,6 +724,7 @@ ng_btsocket_rfcomm_detach(struct socket *so)
FREE(pcb, M_NETGRAPH_BTSOCKET_RFCOMM);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -687,6 +687,7 @@ in_pcbdetach(inp)
inp->inp_gencnt = ++ipi->ipi_gencnt;
in_pcbremlists(inp);
if (so) {
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -1680,6 +1680,7 @@ tcp_twstart(tp)
}
tcp_discardcb(tp);
so = inp->inp_socket;
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
tw->tw_cred = crhold(so->so_cred);

View File

@ -1680,6 +1680,7 @@ tcp_twstart(tp)
}
tcp_discardcb(tp);
so = inp->inp_socket;
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
tw->tw_cred = crhold(so->so_cred);

View File

@ -436,6 +436,7 @@ in6_pcbdetach(inp)
in_pcbremlists(inp);
if (so) {
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -268,6 +268,7 @@ ipx_pcbdetach(ipxp)
{
struct socket *so = ipxp->ipxp_socket;
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = 0;
sotryfree(so);

View File

@ -424,6 +424,7 @@ ipx_usr_abort(so)
ipx_pcbdetach(ipxp);
splx(s);
soisdisconnected(so);
ACCEPT_LOCK();
SOCK_LOCK(so);
sotryfree(so);
return (0);

View File

@ -135,6 +135,7 @@ natm_usr_detach(struct socket *so)
* we turn on 'drain' *before* we sofree.
*/
npcb_free(npcb, NPCB_DESTROY); /* drain */
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);
@ -464,6 +465,7 @@ struct proc *p;
*/
npcb_free(npcb, NPCB_DESTROY); /* drain */
ACCEPT_LOCK();
SOCK_LOCK(so);
so->so_pcb = NULL;
sotryfree(so);

View File

@ -158,6 +158,8 @@ struct socket {
* until such time as it proves to be a good idea.
*/
extern struct mtx accept_mtx;
#define ACCEPT_LOCK_ASSERT() mtx_assert(&accept_mtx, MA_OWNED)
#define ACCEPT_UNLOCK_ASSERT() mtx_assert(&accept_mtx, MA_NOTOWNED)
#define ACCEPT_LOCK() mtx_lock(&accept_mtx)
#define ACCEPT_UNLOCK() mtx_unlock(&accept_mtx)
@ -344,21 +346,27 @@ struct xsocket {
} while (0)
#define sorele(so) do { \
ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count <= 0) \
panic("sorele"); \
if (--(so)->so_count == 0) \
sofree(so); \
else \
else { \
SOCK_UNLOCK(so); \
ACCEPT_UNLOCK(); \
} \
} while (0)
#define sotryfree(so) do { \
ACCEPT_LOCK_ASSERT(); \
SOCK_LOCK_ASSERT(so); \
if ((so)->so_count == 0) \
sofree(so); \
else \
else { \
SOCK_UNLOCK(so); \
ACCEPT_UNLOCK(); \
} \
} while(0)
/*