Introduce in_multi_mtx, which will protect IPv4-layer multicast address

lists, as well as accessor macros.  For now, this is a recursive mutex
due code sequences where IPv4 multicast calls into IGMP calls into
ip_output(), which then tests for a multicast forwarding case.

For support macros in in_var.h to check multicast address lists, assert
that in_multi_mtx is held.

Acquire in_multi_mtx around iteration over the IPv4 multicast address
lists, such as in ip_input() and ip_output().

Acquire in_multi_mtx when manipulating the IPv4 layer multicast addresses,
as well as over the manipulation of ifnet multicast address lists in order
to keep the two layers in sync.

Lock down accesses to IPv4 multicast addresses in IGMP, or assert the
lock when performing IGMP join/leave events.

Eliminate spl's associated with IPv4 multicast addresses, portions of
IGMP that weren't previously expunged by IGMP locking.

Add in_multi_mtx, igmp_mtx, and if_addr_mtx lock order to hard-coded
lock order in WITNESS, in that order.

Problem reported by:	Ed Maste <emaste at phaedrus dot sandvine dot ca>
MFC after:		10 days
This commit is contained in:
Robert Watson 2005-08-03 19:29:47 +00:00
parent fa2b8debfe
commit dd5a318ba3
6 changed files with 56 additions and 18 deletions

View File

@ -285,6 +285,13 @@ static struct witness_order_list_entry order_lists[] = {
{ "rtentry", &lock_class_mtx_sleep },
{ "ifaddr", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* Multicast - protocol locks before interface locks.
*/
{ "in_multi_mtx", &lock_class_mtx_sleep },
{ "igmp_mtx", &lock_class_mtx_sleep },
{ "if_addr_mtx", &lock_class_mtx_sleep },
{ NULL, NULL },
/*
* UNIX Domain Sockets
*/

View File

@ -286,6 +286,7 @@ igmp_input(register struct mbuf *m, int off)
* - Use the value specified in the query message as
* the maximum timeout.
*/
IN_MULTI_LOCK();
IN_FIRST_MULTI(step, inm);
while (inm != NULL) {
if (inm->inm_ifp == ifp &&
@ -301,6 +302,7 @@ igmp_input(register struct mbuf *m, int off)
}
IN_NEXT_MULTI(step, inm);
}
IN_MULTI_UNLOCK();
break;
@ -343,14 +345,15 @@ igmp_input(register struct mbuf *m, int off)
* If we belong to the group being reported, stop
* our timer for that group.
*/
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(igmp->igmp_group, ifp, inm);
if (inm != NULL) {
inm->inm_timer = 0;
++igmpstat.igps_rcv_ourreports;
inm->inm_state = IGMP_OTHERMEMBER;
}
IN_MULTI_UNLOCK();
break;
}
@ -365,7 +368,8 @@ igmp_input(register struct mbuf *m, int off)
void
igmp_joingroup(struct in_multi *inm)
{
int s = splnet();
IN_MULTI_LOCK_ASSERT();
if (inm->inm_addr.s_addr == igmp_all_hosts_group
|| inm->inm_ifp->if_flags & IFF_LOOPBACK) {
@ -384,13 +388,14 @@ igmp_joingroup(struct in_multi *inm)
}
/* XXX handling of failure case? */
}
splx(s);
}
void
igmp_leavegroup(struct in_multi *inm)
{
IN_MULTI_LOCK_ASSERT();
if (inm->inm_state == IGMP_IREPORTEDLAST &&
inm->inm_addr.s_addr != igmp_all_hosts_group &&
!(inm->inm_ifp->if_flags & IFF_LOOPBACK) &&
@ -403,7 +408,6 @@ igmp_fasttimo(void)
{
register struct in_multi *inm;
struct in_multistep step;
int s;
/*
* Quick check to see if any work needs to be done, in order
@ -413,7 +417,7 @@ igmp_fasttimo(void)
if (!igmp_timers_are_running)
return;
s = splnet();
IN_MULTI_LOCK();
igmp_timers_are_running = 0;
IN_FIRST_MULTI(step, inm);
while (inm != NULL) {
@ -427,13 +431,12 @@ igmp_fasttimo(void)
}
IN_NEXT_MULTI(step, inm);
}
splx(s);
IN_MULTI_UNLOCK();
}
void
igmp_slowtimo(void)
{
int s = splnet();
struct router_info *rti;
IGMP_PRINTF("[igmp.c,_slowtimo] -- > entering \n");
@ -447,7 +450,6 @@ igmp_slowtimo(void)
}
mtx_unlock(&igmp_mtx);
IGMP_PRINTF("[igmp.c,_slowtimo] -- > exiting \n");
splx(s);
}
static void
@ -458,6 +460,8 @@ igmp_sendpkt(struct in_multi *inm, int type, unsigned long addr)
struct ip *ip;
struct ip_moptions imo;
IN_MULTI_LOCK_ASSERT();
MGETHDR(m, M_DONTWAIT, MT_HEADER);
if (m == NULL)
return;

View File

@ -68,7 +68,16 @@ static int subnetsarelocal = 0;
SYSCTL_INT(_net_inet_ip, OID_AUTO, subnets_are_local, CTLFLAG_RW,
&subnetsarelocal, 0, "Treat all subnets as directly connected");
/*
* The IPv4 multicast list (in_multihead and associated structures) are
* protected by the global in_multi_mtx. See in_var.h for more details. For
* now, in_multi_mtx is marked as recursible due to IGMP's calling back into
* ip_output() to send IGMP packets while holding the lock; this probably is
* not quite desirable.
*/
struct in_multihead in_multihead; /* XXX BSS initialization */
struct mtx in_multi_mtx;
MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE);
extern struct inpcbinfo ripcbinfo;
extern struct inpcbinfo udbinfo;
@ -949,8 +958,8 @@ in_addmulti(ap, ifp)
int error;
struct sockaddr_in sin;
struct ifmultiaddr *ifma;
int s = splnet();
IN_MULTI_LOCK();
/*
* Call generic routine to add membership or increment
* refcount. It wants addresses in the form of a sockaddr,
@ -962,7 +971,7 @@ in_addmulti(ap, ifp)
sin.sin_addr = *ap;
error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma);
if (error) {
splx(s);
IN_MULTI_UNLOCK();
return 0;
}
@ -971,16 +980,14 @@ in_addmulti(ap, ifp)
* a new record. Otherwise, we are done.
*/
if (ifma->ifma_protospec != NULL) {
splx(s);
IN_MULTI_UNLOCK();
return ifma->ifma_protospec;
}
/* XXX - if_addmulti uses M_WAITOK. Can this really be called
at interrupt time? If so, need to fix if_addmulti. XXX */
inm = (struct in_multi *)malloc(sizeof(*inm), M_IPMADDR,
M_NOWAIT | M_ZERO);
if (inm == NULL) {
splx(s);
IN_MULTI_UNLOCK();
return (NULL);
}
@ -994,7 +1001,7 @@ in_addmulti(ap, ifp)
* Let IGMP know that we have joined a new IP multicast group.
*/
igmp_joingroup(inm);
splx(s);
IN_MULTI_UNLOCK();
return (inm);
}
@ -1005,10 +1012,11 @@ void
in_delmulti(inm)
register struct in_multi *inm;
{
struct ifmultiaddr *ifma = inm->inm_ifma;
struct ifmultiaddr *ifma;
struct in_multi my_inm;
int s = splnet();
IN_MULTI_LOCK();
ifma = inm->inm_ifma;
my_inm.inm_ifp = NULL ; /* don't send the leave msg */
if (ifma->ifma_refcount == 1) {
/*
@ -1026,5 +1034,5 @@ in_delmulti(inm)
if_delmulti(ifma->ifma_ifp, ifma->ifma_addr);
if (my_inm.inm_ifp != NULL)
igmp_leavegroup(&my_inm);
splx(s);
IN_MULTI_UNLOCK();
}

View File

@ -166,6 +166,17 @@ SYSCTL_DECL(_net_inet_raw);
extern LIST_HEAD(in_multihead, in_multi) in_multihead;
/*
* Lock macros for IPv4 layer multicast address lists. IPv4 lock goes
* before link layer multicast locks in the lock order. In most cases,
* consumers of IN_*_MULTI() macros should acquire the locks before
* calling them; users of the in_{add,del}multi() functions should not.
*/
extern struct mtx in_multi_mtx;
#define IN_MULTI_LOCK() mtx_lock(&in_multi_mtx)
#define IN_MULTI_UNLOCK() mtx_unlock(&in_multi_mtx)
#define IN_MULTI_LOCK_ASSERT() mtx_assert(&in_multi_mtx, MA_OWNED)
/*
* Structure used by macros below to remember position when stepping through
* all of the in_multi records.
@ -185,6 +196,7 @@ struct in_multistep {
do { \
struct ifmultiaddr *ifma; \
\
IN_MULTI_LOCK_ASSERT(); \
IF_ADDR_LOCK(ifp); \
TAILQ_FOREACH(ifma, &((ifp)->if_multiaddrs), ifma_link) { \
if (ifma->ifma_addr->sa_family == AF_INET \
@ -207,6 +219,7 @@ do { \
/* struct in_multistep step; */ \
/* struct in_multi *inm; */ \
do { \
IN_MULTI_LOCK_ASSERT(); \
if (((inm) = (step).i_inm) != NULL) \
(step).i_inm = LIST_NEXT((step).i_inm, inm_link); \
} while(0)
@ -215,6 +228,7 @@ do { \
/* struct in_multistep step; */ \
/* struct in_multi *inm; */ \
do { \
IN_MULTI_LOCK_ASSERT(); \
(step).i_inm = LIST_FIRST(&in_multihead); \
IN_NEXT_MULTI((step), (inm)); \
} while(0)

View File

@ -607,7 +607,9 @@ passin:
* See if we belong to the destination multicast group on the
* arrival interface.
*/
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(ip->ip_dst, m->m_pkthdr.rcvif, inm);
IN_MULTI_UNLOCK();
if (inm == NULL) {
ipstat.ips_notmember++;
m_freem(m);

View File

@ -291,9 +291,11 @@ again:
ip->ip_src = IA_SIN(ia)->sin_addr;
}
IN_MULTI_LOCK();
IN_LOOKUP_MULTI(ip->ip_dst, ifp, inm);
if (inm != NULL &&
(imo == NULL || imo->imo_multicast_loop)) {
IN_MULTI_UNLOCK();
/*
* If we belong to the destination multicast group
* on the outgoing interface, and the caller did not
@ -302,6 +304,7 @@ again:
ip_mloopback(ifp, m, dst, hlen);
}
else {
IN_MULTI_UNLOCK();
/*
* If we are acting as a multicast router, perform
* multicast forwarding as if the packet had just