Fix race conditions on enumerating pcb lists by moving the initialization
( and where appropriate the destruction) of the pcb mutex to the init/finit functions of the pcb zones. This allows locking of the pcb entries and race condition free comparison of the generation count. Rearrange locking a bit to avoid extra locking operation to update the generation count in in_pcballoc(). (in_pcballoc now returns the pcb locked) I am planning to convert pcb list handling from a type safe to a reference count model soon. ( As this allows really freeing the PCBs) Reviewed by: rwatson@, mohans@ MFC after: 1 week
This commit is contained in:
parent
0c6d6356ba
commit
d915b28015
@ -167,19 +167,20 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
|
||||
|
||||
/*
|
||||
* Allocate a PCB and associate it with the socket.
|
||||
* On success return with the PCB locked.
|
||||
*/
|
||||
int
|
||||
in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
|
||||
in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
|
||||
{
|
||||
struct inpcb *inp;
|
||||
int error;
|
||||
|
||||
INP_INFO_WLOCK_ASSERT(pcbinfo);
|
||||
error = 0;
|
||||
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
|
||||
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
|
||||
if (inp == NULL)
|
||||
return (ENOBUFS);
|
||||
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
||||
bzero(inp,inp_zero_size);
|
||||
inp->inp_pcbinfo = pcbinfo;
|
||||
inp->inp_socket = so;
|
||||
#ifdef MAC
|
||||
@ -209,11 +210,13 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
|
||||
LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
|
||||
pcbinfo->ipi_count++;
|
||||
so->so_pcb = (caddr_t)inp;
|
||||
INP_LOCK_INIT(inp, "inp", type);
|
||||
#ifdef INET6
|
||||
if (ip6_auto_flowlabel)
|
||||
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
|
||||
#endif
|
||||
INP_LOCK(inp);
|
||||
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
|
||||
|
||||
#if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC)
|
||||
out:
|
||||
if (error != 0)
|
||||
@ -721,10 +724,11 @@ in_pcbfree(struct inpcb *inp)
|
||||
(void)m_free(inp->inp_options);
|
||||
ip_freemoptions(inp->inp_moptions);
|
||||
inp->inp_vflag = 0;
|
||||
INP_LOCK_DESTROY(inp);
|
||||
|
||||
#ifdef MAC
|
||||
mac_destroy_inpcb(inp);
|
||||
#endif
|
||||
INP_UNLOCK(inp);
|
||||
uma_zfree(ipi->ipi_zone, inp);
|
||||
}
|
||||
|
||||
|
@ -166,6 +166,7 @@ struct inpcb {
|
||||
} inp_depend6;
|
||||
LIST_ENTRY(inpcb) inp_portlist;
|
||||
struct inpcbport *inp_phd; /* head of this list */
|
||||
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
|
||||
inp_gen_t inp_gencnt; /* generation count of this instance */
|
||||
struct mtx inp_mtx;
|
||||
|
||||
@ -342,7 +343,7 @@ extern int ipport_hilastauto;
|
||||
extern struct callout ipport_tick_callout;
|
||||
|
||||
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
|
||||
int in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
|
||||
int in_pcballoc(struct socket *, struct inpcbinfo *);
|
||||
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
|
||||
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
|
||||
u_short *, struct ucred *);
|
||||
|
@ -123,6 +123,22 @@ div_zone_change(void *tag)
|
||||
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
|
||||
}
|
||||
|
||||
static int
|
||||
div_inpcb_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_INIT(inp, "inp", "divinp");
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
div_inpcb_fini(void *mem, int size)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_DESTROY(inp);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
div_init(void)
|
||||
{
|
||||
@ -137,7 +153,7 @@ div_init(void)
|
||||
divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
|
||||
divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
|
||||
divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
|
||||
EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
|
||||
NULL, EVENTHANDLER_PRI_ANY);
|
||||
@ -409,13 +425,12 @@ div_attach(struct socket *so, int proto, struct thread *td)
|
||||
if (error)
|
||||
return error;
|
||||
INP_INFO_WLOCK(&divcbinfo);
|
||||
error = in_pcballoc(so, &divcbinfo, "divinp");
|
||||
error = in_pcballoc(so, &divcbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&divcbinfo);
|
||||
return error;
|
||||
}
|
||||
inp = (struct inpcb *)so->so_pcb;
|
||||
INP_LOCK(inp);
|
||||
INP_INFO_WUNLOCK(&divcbinfo);
|
||||
inp->inp_ip_p = proto;
|
||||
inp->inp_vflag |= INP_IPV4;
|
||||
@ -567,6 +582,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
error = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
inp = inp_list[i];
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt) {
|
||||
struct xinpcb xi;
|
||||
bzero(&xi, sizeof(xi));
|
||||
@ -575,8 +591,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
bcopy(inp, &xi.xi_inp, sizeof *inp);
|
||||
if (inp->inp_socket)
|
||||
sotoxsocket(inp->inp_socket, &xi.xi_socket);
|
||||
INP_UNLOCK(inp);
|
||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||
}
|
||||
} else
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
if (!error) {
|
||||
/*
|
||||
|
@ -123,6 +123,14 @@ rip_zone_change(void *tag)
|
||||
uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
|
||||
}
|
||||
|
||||
static int
|
||||
rip_inpcb_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_INIT(inp, "inp", "rawinp");
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
rip_init()
|
||||
{
|
||||
@ -137,7 +145,7 @@ rip_init()
|
||||
ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
|
||||
ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
|
||||
ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
|
||||
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change,
|
||||
NULL, EVENTHANDLER_PRI_ANY);
|
||||
@ -599,13 +607,12 @@ rip_attach(struct socket *so, int proto, struct thread *td)
|
||||
if (error)
|
||||
return error;
|
||||
INP_INFO_WLOCK(&ripcbinfo);
|
||||
error = in_pcballoc(so, &ripcbinfo, "rawinp");
|
||||
error = in_pcballoc(so, &ripcbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&ripcbinfo);
|
||||
return error;
|
||||
}
|
||||
inp = (struct inpcb *)so->so_pcb;
|
||||
INP_LOCK(inp);
|
||||
INP_INFO_WUNLOCK(&ripcbinfo);
|
||||
inp->inp_vflag |= INP_IPV4;
|
||||
inp->inp_ip_p = proto;
|
||||
@ -836,6 +843,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
error = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
inp = inp_list[i];
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt) {
|
||||
struct xinpcb xi;
|
||||
bzero(&xi, sizeof(xi));
|
||||
@ -844,8 +852,10 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
bcopy(inp, &xi.xi_inp, sizeof *inp);
|
||||
if (inp->inp_socket)
|
||||
sotoxsocket(inp->inp_socket, &xi.xi_socket);
|
||||
INP_UNLOCK(inp);
|
||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||
}
|
||||
} else
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
if (!error) {
|
||||
/*
|
||||
|
@ -262,6 +262,14 @@ tcp_zone_change(void *tag)
|
||||
uma_zone_set_max(tcptw_zone, maxsockets / 5);
|
||||
}
|
||||
|
||||
static int
|
||||
tcp_inpcb_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_INIT(inp, "inp", "tcpinp");
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_init(void)
|
||||
{
|
||||
@ -290,7 +298,7 @@ tcp_init(void)
|
||||
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
|
||||
&tcbinfo.porthashmask);
|
||||
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
|
||||
#ifdef INET6
|
||||
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
|
||||
@ -989,6 +997,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
error = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
inp = inp_list[i];
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt) {
|
||||
struct xtcpcb xt;
|
||||
void *inp_ppcb;
|
||||
@ -1012,8 +1021,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
xt.xt_socket.xso_protocol = IPPROTO_TCP;
|
||||
}
|
||||
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
|
||||
INP_UNLOCK(inp);
|
||||
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
||||
}
|
||||
} else
|
||||
INP_UNLOCK(inp);
|
||||
|
||||
}
|
||||
if (!error) {
|
||||
/*
|
||||
|
@ -262,6 +262,14 @@ tcp_zone_change(void *tag)
|
||||
uma_zone_set_max(tcptw_zone, maxsockets / 5);
|
||||
}
|
||||
|
||||
static int
|
||||
tcp_inpcb_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_INIT(inp, "inp", "tcpinp");
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
tcp_init(void)
|
||||
{
|
||||
@ -290,7 +298,7 @@ tcp_init(void)
|
||||
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
|
||||
&tcbinfo.porthashmask);
|
||||
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
|
||||
#ifdef INET6
|
||||
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
|
||||
@ -989,6 +997,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
error = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
inp = inp_list[i];
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt) {
|
||||
struct xtcpcb xt;
|
||||
void *inp_ppcb;
|
||||
@ -1012,8 +1021,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
xt.xt_socket.xso_protocol = IPPROTO_TCP;
|
||||
}
|
||||
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
|
||||
INP_UNLOCK(inp);
|
||||
error = SYSCTL_OUT(req, &xt, sizeof xt);
|
||||
}
|
||||
} else
|
||||
INP_UNLOCK(inp);
|
||||
|
||||
}
|
||||
if (!error) {
|
||||
/*
|
||||
|
@ -1401,13 +1401,12 @@ tcp_attach(so)
|
||||
return (error);
|
||||
}
|
||||
INP_INFO_WLOCK(&tcbinfo);
|
||||
error = in_pcballoc(so, &tcbinfo, "tcpinp");
|
||||
error = in_pcballoc(so, &tcbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&tcbinfo);
|
||||
return (error);
|
||||
}
|
||||
inp = sotoinpcb(so);
|
||||
INP_LOCK(inp);
|
||||
#ifdef INET6
|
||||
if (isipv6) {
|
||||
inp->inp_vflag |= INP_IPV6;
|
||||
|
@ -137,6 +137,14 @@ udp_zone_change(void *tag)
|
||||
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
|
||||
}
|
||||
|
||||
static int
|
||||
udp_inpcb_init(void *mem, int size, int flags)
|
||||
{
|
||||
struct inpcb *inp = (struct inpcb *) mem;
|
||||
INP_LOCK_INIT(inp, "inp", "udpinp");
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
udp_init()
|
||||
{
|
||||
@ -147,7 +155,7 @@ udp_init()
|
||||
udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
|
||||
&udbinfo.porthashmask);
|
||||
udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
|
||||
NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
|
||||
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
|
||||
EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
|
||||
EVENTHANDLER_PRI_ANY);
|
||||
@ -633,6 +641,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
error = 0;
|
||||
for (i = 0; i < n; i++) {
|
||||
inp = inp_list[i];
|
||||
INP_LOCK(inp);
|
||||
if (inp->inp_gencnt <= gencnt) {
|
||||
struct xinpcb xi;
|
||||
bzero(&xi, sizeof(xi));
|
||||
@ -642,8 +651,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
|
||||
if (inp->inp_socket)
|
||||
sotoxsocket(inp->inp_socket, &xi.xi_socket);
|
||||
xi.xi_inp.inp_gencnt = inp->inp_gencnt;
|
||||
INP_UNLOCK(inp);
|
||||
error = SYSCTL_OUT(req, &xi, sizeof xi);
|
||||
}
|
||||
} else
|
||||
INP_UNLOCK(inp);
|
||||
}
|
||||
if (!error) {
|
||||
/*
|
||||
@ -966,14 +977,13 @@ udp_attach(struct socket *so, int proto, struct thread *td)
|
||||
if (error)
|
||||
return error;
|
||||
INP_INFO_WLOCK(&udbinfo);
|
||||
error = in_pcballoc(so, &udbinfo, "udpinp");
|
||||
error = in_pcballoc(so, &udbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&udbinfo);
|
||||
return error;
|
||||
}
|
||||
|
||||
inp = (struct inpcb *)so->so_pcb;
|
||||
INP_LOCK(inp);
|
||||
INP_INFO_WUNLOCK(&udbinfo);
|
||||
inp->inp_vflag |= INP_IPV4;
|
||||
inp->inp_ip_ttl = ip_defttl;
|
||||
|
@ -455,7 +455,7 @@ in6_pcbfree(struct inpcb *inp)
|
||||
(void)m_free(inp->inp_options);
|
||||
ip_freemoptions(inp->inp_moptions);
|
||||
inp->inp_vflag = 0;
|
||||
INP_LOCK_DESTROY(inp);
|
||||
INP_UNLOCK(inp);
|
||||
uma_zfree(ipi->ipi_zone, inp);
|
||||
}
|
||||
|
||||
|
@ -561,14 +561,13 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
|
||||
if (filter == NULL)
|
||||
return ENOMEM;
|
||||
INP_INFO_WLOCK(&ripcbinfo);
|
||||
error = in_pcballoc(so, &ripcbinfo, "raw6inp");
|
||||
error = in_pcballoc(so, &ripcbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&ripcbinfo);
|
||||
FREE(filter, M_PCB);
|
||||
return error;
|
||||
}
|
||||
inp = (struct inpcb *)so->so_pcb;
|
||||
INP_LOCK(inp);
|
||||
INP_INFO_WUNLOCK(&ripcbinfo);
|
||||
inp->inp_vflag |= INP_IPV6;
|
||||
inp->in6p_ip6_nxt = (long)proto;
|
||||
|
@ -503,13 +503,12 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
|
||||
return error;
|
||||
}
|
||||
INP_INFO_WLOCK(&udbinfo);
|
||||
error = in_pcballoc(so, &udbinfo, "udp6inp");
|
||||
error = in_pcballoc(so, &udbinfo);
|
||||
if (error) {
|
||||
INP_INFO_WUNLOCK(&udbinfo);
|
||||
return error;
|
||||
}
|
||||
inp = (struct inpcb *)so->so_pcb;
|
||||
INP_LOCK(inp);
|
||||
INP_INFO_WUNLOCK(&udbinfo);
|
||||
inp->inp_vflag |= INP_IPV6;
|
||||
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)
|
||||
|
Loading…
x
Reference in New Issue
Block a user