Fix race conditions on enumerating pcb lists by moving the initialization

( and where appropriate the destruction) of the pcb mutex to the init/finit
functions of the pcb zones.
This allows locking of the pcb entries and race condition free comparison
of the generation count.
Rearrange locking a bit to avoid extra locking operation to update the generation
count in in_pcballoc(). (in_pcballoc now returns the pcb locked)

I am planning to convert pcb list handling from a type safe to a reference count
model soon. ( As this allows really freeing the PCBs)

Reviewed by:	rwatson@, mohans@
MFC after:	1 week
This commit is contained in:
Stephan Uphoff 2006-07-18 22:34:27 +00:00
parent 0c6d6356ba
commit d915b28015
11 changed files with 93 additions and 29 deletions

View File

@ -167,19 +167,20 @@ SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
/*
* Allocate a PCB and associate it with the socket.
* On success return with the PCB locked.
*/
int
in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
{
struct inpcb *inp;
int error;
INP_INFO_WLOCK_ASSERT(pcbinfo);
error = 0;
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
if (inp == NULL)
return (ENOBUFS);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
bzero(inp,inp_zero_size);
inp->inp_pcbinfo = pcbinfo;
inp->inp_socket = so;
#ifdef MAC
@ -209,11 +210,13 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, const char *type)
LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
pcbinfo->ipi_count++;
so->so_pcb = (caddr_t)inp;
INP_LOCK_INIT(inp, "inp", type);
#ifdef INET6
if (ip6_auto_flowlabel)
inp->inp_flags |= IN6P_AUTOFLOWLABEL;
#endif
INP_LOCK(inp);
inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
#if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC)
out:
if (error != 0)
@ -721,10 +724,11 @@ in_pcbfree(struct inpcb *inp)
(void)m_free(inp->inp_options);
ip_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
INP_LOCK_DESTROY(inp);
#ifdef MAC
mac_destroy_inpcb(inp);
#endif
INP_UNLOCK(inp);
uma_zfree(ipi->ipi_zone, inp);
}

View File

@ -166,6 +166,7 @@ struct inpcb {
} inp_depend6;
LIST_ENTRY(inpcb) inp_portlist;
struct inpcbport *inp_phd; /* head of this list */
#define inp_zero_size offsetof(struct inpcb, inp_gencnt)
inp_gen_t inp_gencnt; /* generation count of this instance */
struct mtx inp_mtx;
@ -342,7 +343,7 @@ extern int ipport_hilastauto;
extern struct callout ipport_tick_callout;
void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
int in_pcballoc(struct socket *, struct inpcbinfo *, const char *);
int in_pcballoc(struct socket *, struct inpcbinfo *);
int in_pcbbind(struct inpcb *, struct sockaddr *, struct ucred *);
int in_pcbbind_setup(struct inpcb *, struct sockaddr *, in_addr_t *,
u_short *, struct ucred *);

View File

@ -123,6 +123,22 @@ div_zone_change(void *tag)
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
}
static int
div_inpcb_init(void *mem, int size, int flags)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_INIT(inp, "inp", "divinp");
return (0);
}
static void
div_inpcb_fini(void *mem, int size)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_DESTROY(inp);
}
void
div_init(void)
{
@ -137,7 +153,7 @@ div_init(void)
divcbinfo.hashbase = hashinit(1, M_PCB, &divcbinfo.hashmask);
divcbinfo.porthashbase = hashinit(1, M_PCB, &divcbinfo.porthashmask);
divcbinfo.ipi_zone = uma_zcreate("divcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, NULL, div_inpcb_init, div_inpcb_fini, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(divcbinfo.ipi_zone, maxsockets);
EVENTHANDLER_REGISTER(maxsockets_change, div_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
@ -409,13 +425,12 @@ div_attach(struct socket *so, int proto, struct thread *td)
if (error)
return error;
INP_INFO_WLOCK(&divcbinfo);
error = in_pcballoc(so, &divcbinfo, "divinp");
error = in_pcballoc(so, &divcbinfo);
if (error) {
INP_INFO_WUNLOCK(&divcbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
INP_LOCK(inp);
INP_INFO_WUNLOCK(&divcbinfo);
inp->inp_ip_p = proto;
inp->inp_vflag |= INP_IPV4;
@ -567,6 +582,7 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@ -575,8 +591,10 @@ div_pcblist(SYSCTL_HANDLER_ARGS)
bcopy(inp, &xi.xi_inp, sizeof *inp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
}
} else
INP_UNLOCK(inp);
}
if (!error) {
/*

View File

@ -123,6 +123,14 @@ rip_zone_change(void *tag)
uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
}
static int
rip_inpcb_init(void *mem, int size, int flags)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_INIT(inp, "inp", "rawinp");
return (0);
}
void
rip_init()
{
@ -137,7 +145,7 @@ rip_init()
ripcbinfo.hashbase = hashinit(1, M_PCB, &ripcbinfo.hashmask);
ripcbinfo.porthashbase = hashinit(1, M_PCB, &ripcbinfo.porthashmask);
ripcbinfo.ipi_zone = uma_zcreate("ripcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, NULL, rip_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(ripcbinfo.ipi_zone, maxsockets);
EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change,
NULL, EVENTHANDLER_PRI_ANY);
@ -599,13 +607,12 @@ rip_attach(struct socket *so, int proto, struct thread *td)
if (error)
return error;
INP_INFO_WLOCK(&ripcbinfo);
error = in_pcballoc(so, &ripcbinfo, "rawinp");
error = in_pcballoc(so, &ripcbinfo);
if (error) {
INP_INFO_WUNLOCK(&ripcbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
INP_LOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
inp->inp_vflag |= INP_IPV4;
inp->inp_ip_p = proto;
@ -836,6 +843,7 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@ -844,8 +852,10 @@ rip_pcblist(SYSCTL_HANDLER_ARGS)
bcopy(inp, &xi.xi_inp, sizeof *inp);
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
}
} else
INP_UNLOCK(inp);
}
if (!error) {
/*

View File

@ -262,6 +262,14 @@ tcp_zone_change(void *tag)
uma_zone_set_max(tcptw_zone, maxsockets / 5);
}
static int
tcp_inpcb_init(void *mem, int size, int flags)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_INIT(inp, "inp", "tcpinp");
return (0);
}
void
tcp_init(void)
{
@ -290,7 +298,7 @@ tcp_init(void)
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
&tcbinfo.porthashmask);
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
#ifdef INET6
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
@ -989,6 +997,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xtcpcb xt;
void *inp_ppcb;
@ -1012,8 +1021,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
xt.xt_socket.xso_protocol = IPPROTO_TCP;
}
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xt, sizeof xt);
}
} else
INP_UNLOCK(inp);
}
if (!error) {
/*

View File

@ -262,6 +262,14 @@ tcp_zone_change(void *tag)
uma_zone_set_max(tcptw_zone, maxsockets / 5);
}
static int
tcp_inpcb_init(void *mem, int size, int flags)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_INIT(inp, "inp", "tcpinp");
return (0);
}
void
tcp_init(void)
{
@ -290,7 +298,7 @@ tcp_init(void)
tcbinfo.porthashbase = hashinit(hashsize, M_PCB,
&tcbinfo.porthashmask);
tcbinfo.ipi_zone = uma_zcreate("inpcb", sizeof(struct inpcb),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, NULL, tcp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(tcbinfo.ipi_zone, maxsockets);
#ifdef INET6
#define TCP_MINPROTOHDR (sizeof(struct ip6_hdr) + sizeof(struct tcphdr))
@ -989,6 +997,7 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xtcpcb xt;
void *inp_ppcb;
@ -1012,8 +1021,11 @@ tcp_pcblist(SYSCTL_HANDLER_ARGS)
xt.xt_socket.xso_protocol = IPPROTO_TCP;
}
xt.xt_inp.inp_gencnt = inp->inp_gencnt;
INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xt, sizeof xt);
}
} else
INP_UNLOCK(inp);
}
if (!error) {
/*

View File

@ -1401,13 +1401,12 @@ tcp_attach(so)
return (error);
}
INP_INFO_WLOCK(&tcbinfo);
error = in_pcballoc(so, &tcbinfo, "tcpinp");
error = in_pcballoc(so, &tcbinfo);
if (error) {
INP_INFO_WUNLOCK(&tcbinfo);
return (error);
}
inp = sotoinpcb(so);
INP_LOCK(inp);
#ifdef INET6
if (isipv6) {
inp->inp_vflag |= INP_IPV6;

View File

@ -137,6 +137,14 @@ udp_zone_change(void *tag)
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
}
static int
udp_inpcb_init(void *mem, int size, int flags)
{
struct inpcb *inp = (struct inpcb *) mem;
INP_LOCK_INIT(inp, "inp", "udpinp");
return (0);
}
void
udp_init()
{
@ -147,7 +155,7 @@ udp_init()
udbinfo.porthashbase = hashinit(UDBHASHSIZE, M_PCB,
&udbinfo.porthashmask);
udbinfo.ipi_zone = uma_zcreate("udpcb", sizeof(struct inpcb), NULL,
NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
NULL, udp_inpcb_init, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
uma_zone_set_max(udbinfo.ipi_zone, maxsockets);
EVENTHANDLER_REGISTER(maxsockets_change, udp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
@ -633,6 +641,7 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
error = 0;
for (i = 0; i < n; i++) {
inp = inp_list[i];
INP_LOCK(inp);
if (inp->inp_gencnt <= gencnt) {
struct xinpcb xi;
bzero(&xi, sizeof(xi));
@ -642,8 +651,10 @@ udp_pcblist(SYSCTL_HANDLER_ARGS)
if (inp->inp_socket)
sotoxsocket(inp->inp_socket, &xi.xi_socket);
xi.xi_inp.inp_gencnt = inp->inp_gencnt;
INP_UNLOCK(inp);
error = SYSCTL_OUT(req, &xi, sizeof xi);
}
} else
INP_UNLOCK(inp);
}
if (!error) {
/*
@ -966,14 +977,13 @@ udp_attach(struct socket *so, int proto, struct thread *td)
if (error)
return error;
INP_INFO_WLOCK(&udbinfo);
error = in_pcballoc(so, &udbinfo, "udpinp");
error = in_pcballoc(so, &udbinfo);
if (error) {
INP_INFO_WUNLOCK(&udbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
INP_LOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
inp->inp_vflag |= INP_IPV4;
inp->inp_ip_ttl = ip_defttl;

View File

@ -455,7 +455,7 @@ in6_pcbfree(struct inpcb *inp)
(void)m_free(inp->inp_options);
ip_freemoptions(inp->inp_moptions);
inp->inp_vflag = 0;
INP_LOCK_DESTROY(inp);
INP_UNLOCK(inp);
uma_zfree(ipi->ipi_zone, inp);
}

View File

@ -561,14 +561,13 @@ rip6_attach(struct socket *so, int proto, struct thread *td)
if (filter == NULL)
return ENOMEM;
INP_INFO_WLOCK(&ripcbinfo);
error = in_pcballoc(so, &ripcbinfo, "raw6inp");
error = in_pcballoc(so, &ripcbinfo);
if (error) {
INP_INFO_WUNLOCK(&ripcbinfo);
FREE(filter, M_PCB);
return error;
}
inp = (struct inpcb *)so->so_pcb;
INP_LOCK(inp);
INP_INFO_WUNLOCK(&ripcbinfo);
inp->inp_vflag |= INP_IPV6;
inp->in6p_ip6_nxt = (long)proto;

View File

@ -503,13 +503,12 @@ udp6_attach(struct socket *so, int proto, struct thread *td)
return error;
}
INP_INFO_WLOCK(&udbinfo);
error = in_pcballoc(so, &udbinfo, "udp6inp");
error = in_pcballoc(so, &udbinfo);
if (error) {
INP_INFO_WUNLOCK(&udbinfo);
return error;
}
inp = (struct inpcb *)so->so_pcb;
INP_LOCK(inp);
INP_INFO_WUNLOCK(&udbinfo);
inp->inp_vflag |= INP_IPV6;
if ((inp->inp_flags & IN6P_IPV6_V6ONLY) == 0)