diff --git a/share/man/man4/Makefile b/share/man/man4/Makefile index 61352e9f143c..9d6debbbff34 100644 --- a/share/man/man4/Makefile +++ b/share/man/man4/Makefile @@ -191,6 +191,7 @@ MAN= aac.4 \ meteor.4 \ mfi.4 \ miibus.4 \ + mld.4 \ mlx.4 \ mly.4 \ mmc.4 \ diff --git a/share/man/man4/multicast.4 b/share/man/man4/multicast.4 index 4fbe5b6bc56d..2f0954ee9a3e 100644 --- a/share/man/man4/multicast.4 +++ b/share/man/man4/multicast.4 @@ -25,7 +25,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 13, 2009 +.Dd May 27, 2009 .Dt MULTICAST 4 .Os .\" @@ -962,6 +962,7 @@ after the previous upcall. .Xr intro 4 , .Xr ip 4 , .Xr ip6 4 , +.Xr mld 4 , .Xr pim 4 .\" .Sh HISTORY @@ -1002,6 +1003,8 @@ monitoring were implemented by in collaboration with .An Chris Brown (NextHop). +The IGMPv3 and MLDv2 multicast support was implemented by +.An Bruce Simpson . .Pp This manual page was written by .An Pavlin Radoslavov diff --git a/sys/netinet6/icmp6.c b/sys/netinet6/icmp6.c index 1c8a132ce3f0..f49a40796ece 100644 --- a/sys/netinet6/icmp6.c +++ b/sys/netinet6/icmp6.c @@ -403,6 +403,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) INIT_VNET_INET6(curvnet); INIT_VPROCG(TD_TO_VPROCG(curthread)); /* XXX V_hostname needs this */ struct mbuf *m = *mp, *n; + struct ifnet *ifp; struct ip6_hdr *ip6, *nip6; struct icmp6_hdr *icmp6, *nicmp6; int off = *offp; @@ -410,6 +411,8 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) int code, sum, noff; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; + ifp = m->m_pkthdr.rcvif; + #ifndef PULLDOWN_TEST IP6_EXTHDR_CHECK(m, off, sizeof(struct icmp6_hdr), IPPROTO_DONE); /* m might change if M_LOOP. So, call mtod after this */ @@ -431,10 +434,8 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) * Note: SSM filters are not applied for ICMPv6 traffic. */ if (IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - struct ifnet *ifp; - struct in6_multi *inm; + struct in6_multi *inm; - ifp = m->m_pkthdr.rcvif; inm = in6m_lookup(ifp, &ip6->ip6_dst); if (inm == NULL) { IP6STAT_INC(ip6s_notmember); @@ -483,19 +484,19 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) } ICMP6STAT_INC(icp6s_inhist[icmp6->icmp6_type]); - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_msg); + icmp6_ifstat_inc(ifp, ifs6_in_msg); if (icmp6->icmp6_type < ICMP6_INFOMSG_MASK) - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_error); + icmp6_ifstat_inc(ifp, ifs6_in_error); switch (icmp6->icmp6_type) { case ICMP6_DST_UNREACH: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_dstunreach); + icmp6_ifstat_inc(ifp, ifs6_in_dstunreach); switch (code) { case ICMP6_DST_UNREACH_NOROUTE: code = PRC_UNREACH_NET; break; case ICMP6_DST_UNREACH_ADMIN: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_adminprohib); + icmp6_ifstat_inc(ifp, ifs6_in_adminprohib); code = PRC_UNREACH_PROTOCOL; /* is this a good code? */ break; case ICMP6_DST_UNREACH_ADDR: @@ -515,7 +516,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ICMP6_PACKET_TOO_BIG: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_pkttoobig); + icmp6_ifstat_inc(ifp, ifs6_in_pkttoobig); /* validation is made in icmp6_mtudisc_update */ @@ -529,7 +530,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ICMP6_TIME_EXCEEDED: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_timeexceed); + icmp6_ifstat_inc(ifp, ifs6_in_timeexceed); switch (code) { case ICMP6_TIME_EXCEED_TRANSIT: code = PRC_TIMXCEED_INTRANS; @@ -544,7 +545,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ICMP6_PARAM_PROB: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_paramprob); + icmp6_ifstat_inc(ifp, ifs6_in_paramprob); switch (code) { case ICMP6_PARAMPROB_NEXTHEADER: code = PRC_UNREACH_PROTOCOL; @@ -560,7 +561,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ICMP6_ECHO_REQUEST: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echo); + icmp6_ifstat_inc(ifp, ifs6_in_echo); if (code != 0) goto badcode; if ((n = m_copy(m, 0, M_COPYALL)) == NULL) { @@ -623,7 +624,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ICMP6_ECHO_REPLY: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_echoreply); + icmp6_ifstat_inc(ifp, ifs6_in_echoreply); if (code != 0) goto badcode; break; @@ -633,11 +634,15 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) case MLD_LISTENER_DONE: case MLDV2_LISTENER_REPORT: /* - * Drop MLD traffic which is not link-local. + * Drop MLD traffic which is not link-local, has a hop limit + * of greater than 1 hop, or which does not have the + * IPv6 HBH Router Alert option. + * As IPv6 HBH options are stripped in ip6_input() we must + * check an mbuf header flag. * XXX Should we also sanity check that these messages * were directed to a link-local multicast prefix? */ - if (ip6->ip6_hlim != 1) + if ((ip6->ip6_hlim != 1) || (m->m_flags & M_RTALERT_MLD) == 0) goto freeit; if (mld_input(m, off, icmp6len) != 0) return (IPPROTO_DONE); @@ -748,7 +753,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ND_ROUTER_SOLICIT: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routersolicit); + icmp6_ifstat_inc(ifp, ifs6_in_routersolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_solicit)) @@ -764,7 +769,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ND_ROUTER_ADVERT: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_routeradvert); + icmp6_ifstat_inc(ifp, ifs6_in_routeradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_router_advert)) @@ -780,7 +785,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ND_NEIGHBOR_SOLICIT: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighborsolicit); + icmp6_ifstat_inc(ifp, ifs6_in_neighborsolicit); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_solicit)) @@ -796,7 +801,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ND_NEIGHBOR_ADVERT: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_neighboradvert); + icmp6_ifstat_inc(ifp, ifs6_in_neighboradvert); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_neighbor_advert)) @@ -812,7 +817,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) break; case ND_REDIRECT: - icmp6_ifstat_inc(m->m_pkthdr.rcvif, ifs6_in_redirect); + icmp6_ifstat_inc(ifp, ifs6_in_redirect); if (code != 0) goto badcode; if (icmp6len < sizeof(struct nd_redirect)) @@ -840,7 +845,7 @@ icmp6_input(struct mbuf **mp, int *offp, int proto) "icmp6_input: unknown type %d(src=%s, dst=%s, ifid=%d)\n", icmp6->icmp6_type, ip6_sprintf(ip6bufs, &ip6->ip6_src), ip6_sprintf(ip6bufd, &ip6->ip6_dst), - m->m_pkthdr.rcvif ? m->m_pkthdr.rcvif->if_index : 0)); + ifp ? ifp->if_index : 0)); if (icmp6->icmp6_type < ICMP6_ECHO_REQUEST) { /* ICMPv6 error: MUST deliver it by spec... */ code = PRC_NCMDS; diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index ecec6e4963bf..751569c6151e 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -619,6 +619,7 @@ struct ip6_mtuinfo { #define M_DECRYPTED M_PROTO3 #define M_LOOP M_PROTO4 #define M_AUTHIPDGM M_PROTO5 +#define M_RTALERT_MLD M_PROTO6 #ifdef _KERNEL struct cmsghdr; diff --git a/sys/netinet6/in6_mcast.c b/sys/netinet6/in6_mcast.c index 80dfcf19bac5..678dda024d7b 100644 --- a/sys/netinet6/in6_mcast.c +++ b/sys/netinet6/in6_mcast.c @@ -305,6 +305,10 @@ im6o_match_group(const struct ip6_moptions *imo, const struct ifnet *ifp, * Find an IPv6 multicast source entry for this imo which matches * the given group index for this socket, and source address. * + * XXX TODO: The scope ID, if present in src, is stripped before + * any comparison. We SHOULD enforce scope/zone checks where the source + * filter entry has a link scope. + * * NOTE: This does not check if the entry is in-mode, merely if * it exists, which may not be the desired behaviour. */ @@ -328,6 +332,7 @@ im6o_match_source(const struct ip6_moptions *imo, const size_t gidx, psa = (const sockunion_t *)src; find.im6s_addr = psa->sin6.sin6_addr; + in6_clearscope(&find.im6s_addr); /* XXX */ ims = RB_FIND(ip6_msource_tree, &imf->im6f_sources, &find); return ((struct in6_msource *)ims); @@ -1159,6 +1164,20 @@ in6_mc_join_locked(struct ifnet *ifp, const struct in6_addr *mcaddr, char ip6tbuf[INET6_ADDRSTRLEN]; #endif +#ifdef INVARIANTS + /* + * Sanity: Check scope zone ID was set for ifp, if and + * only if group is scoped to an interface. + */ + KASSERT(IN6_IS_ADDR_MULTICAST(mcaddr), + ("%s: not a multicast address", __func__)); + if (IN6_IS_ADDR_MC_LINKLOCAL(mcaddr) || + IN6_IS_ADDR_MC_INTFACELOCAL(mcaddr)) { + KASSERT(mcaddr->s6_addr16[1] != 0, + ("%s: scope zone ID not set", __func__)); + } +#endif + IN6_MULTI_LOCK_ASSERT(); CTR4(KTR_MLD, "%s: join %s on %p(%s))", __func__, @@ -1360,6 +1379,8 @@ in6p_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + /* * Check if we are actually a member of this group. */ @@ -1566,19 +1587,26 @@ in6p_get_source_filters(struct inpcb *inp, struct sockopt *sopt) if (error) return (error); - if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) + if (msfr.msfr_group.ss_family != AF_INET6 || + msfr.msfr_group.ss_len != sizeof(struct sockaddr_in6)) return (EINVAL); + gsa = (sockunion_t *)&msfr.msfr_group; + if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) + return (EINVAL); + + if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) + return (EADDRNOTAVAIL); ifp = ifnet_byindex(msfr.msfr_ifindex); if (ifp == NULL) - return (EINVAL); + return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); INP_WLOCK(inp); /* * Lookup group on the socket. */ - gsa = (sockunion_t *)&msfr.msfr_group; idx = im6o_match_group(imo, ifp, &gsa->sa); if (idx == -1 || imo->im6o_mfilters == NULL) { INP_WUNLOCK(inp); @@ -1803,6 +1831,12 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) ssa = (sockunion_t *)&gsr.gsr_source; ssa->ss.ss_family = AF_UNSPEC; + /* + * Chew everything into struct group_source_req. + * Overwrite the port field if present, as the sockaddr + * being copied in may be matched with a binary comparison. + * Ignore passed-in scope ID. + */ switch (sopt->sopt_name) { case IPV6_JOIN_GROUP: { struct ipv6_mreq mreq; @@ -1846,16 +1880,20 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); - /* - * Overwrite the port field if present, as the sockaddr - * being copied in may be matched with a binary comparison. - */ - gsa->sin6.sin6_port = 0; if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { if (ssa->sin6.sin6_family != AF_INET6 || ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); + if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) + return (EINVAL); + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&ssa->sin6.sin6_addr); ssa->sin6.sin6_port = 0; + ssa->sin6.sin6_scope_id = 0; } if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) @@ -1870,34 +1908,22 @@ in6p_join_group(struct inpcb *inp, struct sockopt *sopt) break; } -#ifdef notyet - /* - * FIXME: Check for unspecified address (all groups). - * Do we have a normative reference for this 'feature'? - * - * We use the unspecified address to specify to accept - * all multicast addresses. Only super user is allowed - * to do this. - * XXX-BZ might need a better PRIV_NETINET_x for this - */ - if (IN6_IS_ADDR_UNSPECIFIED(&gsa->sin6.sin6_addr)) { - error = priv_check(curthread, PRIV_NETINET_MROUTE); - if (error) - break; - } else -#endif if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) return (EADDRNOTAVAIL); -#ifdef notyet + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + /* - * FIXME: Set interface scope in group address. + * Always set the scope zone ID on memberships created from userland. + * Use the passed-in ifp to do this. + * XXX The in6_setscope() return value is meaningless. + * XXX SCOPE6_LOCK() is taken by in6_setscope(). */ - (void)in6_setscope(&gsa->sin6.sin_addr, ifp, NULL); -#endif + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); /* * MCAST_JOIN_SOURCE on an exclusive membership is an error. @@ -2031,6 +2057,8 @@ static int in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) { INIT_VNET_NET(curvnet); + INIT_VNET_INET6(curvnet); + struct ipv6_mreq mreq; struct group_source_req gsr; sockunion_t *gsa, *ssa; struct ifnet *ifp; @@ -2038,6 +2066,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) struct ip6_moptions *imo; struct in6_msource *ims; struct in6_multi *inm; + uint32_t ifindex; size_t idx; int error, is_final; #ifdef KTR @@ -2045,6 +2074,7 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) #endif ifp = NULL; + ifindex = 0; error = 0; is_final = 1; @@ -2054,39 +2084,26 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) ssa = (sockunion_t *)&gsr.gsr_source; ssa->ss.ss_family = AF_UNSPEC; + /* + * Chew everything passed in up into a struct group_source_req + * as that is easier to process. + * Note: Any embedded scope ID in the multicast group passed + * in by userland is ignored, the interface index is the recommended + * mechanism to specify an interface; see below. + */ switch (sopt->sopt_name) { - case IPV6_LEAVE_GROUP: { - struct ipv6_mreq mreq; - + case IPV6_LEAVE_GROUP: error = sooptcopyin(sopt, &mreq, sizeof(struct ipv6_mreq), sizeof(struct ipv6_mreq)); if (error) return (error); - gsa->sin6.sin6_family = AF_INET6; gsa->sin6.sin6_len = sizeof(struct sockaddr_in6); gsa->sin6.sin6_addr = mreq.ipv6mr_multiaddr; - - if (mreq.ipv6mr_interface == 0) { -#ifdef notyet - /* - * FIXME: Resolve scope ambiguity when interface - * index is unspecified. - */ - ifp = in6p_lookup_mcast_ifp(inp, &gsa->sin6); -#else - return (EADDRNOTAVAIL); -#endif - } else { - if (mreq.ipv6mr_interface < 0 || - V_if_index < mreq.ipv6mr_interface) - return (EADDRNOTAVAIL); - ifp = ifnet_byindex(mreq.ipv6mr_interface); - } - - CTR3(KTR_MLD, "%s: ipv6mr_interface = %d, ifp = %p", - __func__, mreq.ipv6mr_interface, ifp); - } break; + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + ifindex = mreq.ipv6mr_interface; + break; case MCAST_LEAVE_GROUP: case MCAST_LEAVE_SOURCE_GROUP: @@ -2105,17 +2122,22 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) if (gsa->sin6.sin6_family != AF_INET6 || gsa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); - if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { if (ssa->sin6.sin6_family != AF_INET6 || ssa->sin6.sin6_len != sizeof(struct sockaddr_in6)) return (EINVAL); + if (IN6_IS_ADDR_MULTICAST(&ssa->sin6.sin6_addr)) + return (EINVAL); + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&ssa->sin6.sin6_addr); } - - if (gsr.gsr_interface == 0 || V_if_index < gsr.gsr_interface) - return (EADDRNOTAVAIL); - - ifp = ifnet_byindex(gsr.gsr_interface); + gsa->sin6.sin6_port = 0; + gsa->sin6.sin6_scope_id = 0; + ifindex = gsr.gsr_interface; break; default: @@ -2128,14 +2150,39 @@ in6p_leave_group(struct inpcb *inp, struct sockopt *sopt) if (!IN6_IS_ADDR_MULTICAST(&gsa->sin6.sin6_addr)) return (EINVAL); -#ifdef notyet /* - * FIXME: Need to embed ifp's scope ID in the address - * handed down to MLD. - * See KAME IPV6_LEAVE_GROUP implementation. + * Validate interface index if provided. If no interface index + * was provided separately, attempt to look the membership up + * from the default scope as a last resort to disambiguate + * the membership we are being asked to leave. + * XXX SCOPE6 lock potentially taken here. */ - (void)in6_setscope(&mreq->ipv6mr_multiaddr, ifp, NULL); -#endif + if (ifindex != 0) { + if (ifindex < 0 || V_if_index < ifindex) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); + } else { + error = sa6_embedscope(&gsa->sin6, V_ip6_use_defzone); + if (error) + return (EADDRNOTAVAIL); + /* + * XXX For now, stomp on zone ID for the corner case. + * This is not the 'KAME way', but we need to see the ifp + * directly until such time as this implementation is + * refactored, assuming the scope IDs are the way to go. + */ + ifindex = ntohs(gsa->sin6.sin6_addr.s6_addr16[1]); + KASSERT(ifindex != 0, ("%s: bad zone ID", __func__)); + ifp = ifnet_byindex(ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + } + + CTR2(KTR_MLD, "%s: ifp = %p", __func__, ifp); + KASSERT(ifp != NULL, ("%s: ifp did not resolve", __func__)); /* * Find the membership in the membership array. @@ -2312,10 +2359,10 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) if (msfr.msfr_ifindex == 0 || V_if_index < msfr.msfr_ifindex) return (EADDRNOTAVAIL); - ifp = ifnet_byindex(msfr.msfr_ifindex); if (ifp == NULL) return (EADDRNOTAVAIL); + (void)in6_setscope(&gsa->sin6.sin6_addr, ifp, NULL); /* * Take the INP write lock. @@ -2393,6 +2440,16 @@ in6p_set_source_filters(struct inpcb *inp, struct sockopt *sopt) error = EINVAL; break; } + if (IN6_IS_ADDR_MULTICAST(&psin->sin6_addr)) { + error = EINVAL; + break; + } + /* + * TODO: Validate embedded scope ID in source + * list entry against passed-in ifp, if and only + * if source list filter entry is iface or node local. + */ + in6_clearscope(&psin->sin6_addr); error = im6f_get_source(imf, psin, &lims); if (error) break; @@ -2560,7 +2617,7 @@ static int sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) { INIT_VNET_NET(curvnet); - struct in6_addr *pgina; + struct in6_addr mcaddr; struct in6_addr src; struct ifnet *ifp; struct ifmultiaddr *ifma; @@ -2591,10 +2648,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) return (ENOENT); } - pgina = (struct in6_addr *)&name[1]; - if (!IN6_IS_ADDR_MULTICAST(pgina)) { + memcpy(&mcaddr, &name[1], sizeof(struct in6_addr)); + if (!IN6_IS_ADDR_MULTICAST(&mcaddr)) { CTR2(KTR_MLD, "%s: group %s is not multicast", - __func__, ip6_sprintf(ip6tbuf, pgina)); + __func__, ip6_sprintf(ip6tbuf, &mcaddr)); return (EINVAL); } @@ -2604,6 +2661,10 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) __func__, ifindex); return (ENOENT); } + /* + * Internal MLD lookups require that scope/zone ID is set. + */ + (void)in6_setscope(&mcaddr, ifp, NULL); retval = sysctl_wire_old_buffer(req, sizeof(uint32_t) + (in6_mcast_maxgrpsrc * sizeof(struct in6_addr))); @@ -2618,7 +2679,7 @@ sysctl_ip6_mcast_filters(SYSCTL_HANDLER_ARGS) ifma->ifma_protospec == NULL) continue; inm = (struct in6_multi *)ifma->ifma_protospec; - if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, pgina)) + if (!IN6_ARE_ADDR_EQUAL(&inm->in6m_addr, &mcaddr)) continue; fmode = inm->in6m_st[1].iss_fmode; retval = SYSCTL_OUT(req, &fmode, sizeof(uint32_t)); diff --git a/sys/netinet6/ip6_input.c b/sys/netinet6/ip6_input.c index b0cf3448e4f8..749886011a3c 100644 --- a/sys/netinet6/ip6_input.c +++ b/sys/netinet6/ip6_input.c @@ -773,10 +773,11 @@ ip6_input(struct mbuf *m) * case we should pass the packet to the multicast routing * daemon. */ - if (rtalert != ~0 && V_ip6_forwarding) { + if (rtalert != ~0) { switch (rtalert) { case IP6OPT_RTALERT_MLD: - ours = 1; + if (V_ip6_forwarding) + ours = 1; break; default: /* @@ -820,6 +821,9 @@ ip6_input(struct mbuf *m) * The packet is returned (relatively) intact; if * ip6_mforward() returns a non-zero value, the packet * must be discarded, else it may be accepted below. + * + * XXX TODO: Check hlim and multicast scope here to avoid + * unnecessarily calling into ip6_mforward(). */ if (ip6_mforward && ip6_mforward(ip6, m->m_pkthdr.rcvif, m)) { @@ -882,6 +886,14 @@ ip6_input(struct mbuf *m) if (ip6_ipsec_input(m, nxt)) goto bad; #endif /* IPSEC */ + + /* + * Use mbuf flags to propagate Router Alert option to + * ICMPv6 layer, as hop-by-hop options have been stripped. + */ + if (nxt == IPPROTO_ICMPV6 && rtalert != ~0) + m->m_flags |= M_RTALERT_MLD; + nxt = (*inet6sw[ip6_protox[nxt]].pr_input)(&m, &off, nxt); } goto out; diff --git a/sys/netinet6/mld6.c b/sys/netinet6/mld6.c index 1eeeba14ec0e..31278a802457 100644 --- a/sys/netinet6/mld6.c +++ b/sys/netinet6/mld6.c @@ -122,9 +122,9 @@ static void mld_slowtimo_vnet(void); static void mld_sysinit(void); static void mld_sysuninit(void); static int mld_v1_input_query(struct ifnet *, const struct ip6_hdr *, - const struct mld_hdr *); + /*const*/ struct mld_hdr *); static int mld_v1_input_report(struct ifnet *, const struct ip6_hdr *, - const struct mld_hdr *); + /*const*/ struct mld_hdr *); static void mld_v1_process_group_timer(struct in6_multi *, const int); static void mld_v1_process_querier_timers(struct mld_ifinfo *); static int mld_v1_transmit_report(struct in6_multi *, const int); @@ -239,6 +239,11 @@ SYSCTL_V_PROC(V_NET, vnet_inet6, _net_inet6_mld, OID_AUTO, gsrdelay, SYSCTL_NODE(_net_inet6_mld, OID_AUTO, ifinfo, CTLFLAG_RD | CTLFLAG_MPSAFE, sysctl_mld_ifinfo, "Per-interface MLDv2 state"); +static int mld_v1enable = 1; +SYSCTL_INT(_net_inet6_mld, OID_AUTO, v1enable, CTLFLAG_RW, + &mld_v1enable, 0, "Enable fallback to MLDv1"); +TUNABLE_INT("net.inet6.mld.v1enable", &mld_v1enable); + /* * Packed Router Alert option structure declaration. */ @@ -615,36 +620,97 @@ mli_delete_locked(const struct ifnet *ifp) /* * Process a received MLDv1 general or address-specific query. * Assumes that the query header has been pulled up to sizeof(mld_hdr). + * + * NOTE: Can't be fully const correct as we temporarily embed scope ID in + * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, - const struct mld_hdr *mld) + /*const*/ struct mld_hdr *mld) { struct ifmultiaddr *ifma; struct mld_ifinfo *mli; struct in6_multi *inm; + int is_general_query; uint16_t timer; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif + is_general_query = 0; + + if (!mld_v1enable) { + CTR3(KTR_MLD, "ignore v1 query %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + return (0); + } + + /* + * RFC3810 Section 6.2: MLD queries must originate from + * a router's link-local address. + */ + if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (0); + } + + /* + * Do address field validation upfront before we accept + * the query. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + /* + * MLDv1 General Query. + * If this was not sent to the all-nodes group, ignore it. + */ + struct in6_addr dst; + + dst = ip6->ip6_dst; + in6_clearscope(&dst); + if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) + return (EINVAL); + is_general_query = 1; + } else { + /* + * Embed scope ID of receiving interface in MLD query for + * lookup whilst we don't hold other locks. + */ + in6_setscope(&mld->mld_addr, ifp, NULL); + } + IN6_MULTI_LOCK(); MLD_LOCK(); IF_ADDR_LOCK(ifp); - mli = MLD_IFINFO(ifp); - KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); - /* * Switch to MLDv1 host compatibility mode. */ + mli = MLD_IFINFO(ifp); + KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); mld_set_version(mli, MLD_VERSION_1); - timer = ntohs(mld->mld_maxdelay) * PR_FASTHZ / MLD_TIMER_SCALE; + timer = (ntohs(mld->mld_maxdelay) * PR_FASTHZ) / MLD_TIMER_SCALE; if (timer == 0) timer = 1; - if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + if (is_general_query) { + /* + * For each reporting group joined on this + * interface, kick the report timer. + */ + CTR2(KTR_MLD, "process v1 general query on ifp %p(%s)", + ifp, ifp->if_xname); + TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { + if (ifma->ifma_addr->sa_family != AF_INET6 || + ifma->ifma_protospec == NULL) + continue; + inm = (struct in6_multi *)ifma->ifma_protospec; + mld_v1_update_group(inm, timer); + } + } else { /* * MLDv1 Group-Specific Query. * If this is a group-specific MLDv1 query, we need only @@ -657,32 +723,8 @@ mld_v1_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, ifp, ifp->if_xname); mld_v1_update_group(inm, timer); } - } else { - /* - * MLDv1 General Query. - * If this was not sent to the all-nodes group, ignore it. - */ - struct in6_addr dst; - - dst = ip6->ip6_dst; - in6_clearscope(&dst); - if (IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes)) { - /* - * For each reporting group joined on this - * interface, kick the report timer. - */ - CTR2(KTR_MLD, - "process v1 general query on ifp %p(%s)", - ifp, ifp->if_xname); - - TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { - if (ifma->ifma_addr->sa_family != AF_INET6 || - ifma->ifma_protospec == NULL) - continue; - inm = (struct in6_multi *)ifma->ifma_protospec; - mld_v1_update_group(inm, timer); - } - } + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); } IF_ADDR_UNLOCK(ifp); @@ -769,18 +811,38 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, struct mldv2_query *mld; struct in6_multi *inm; uint32_t maxdelay, nsrc, qqi; + int is_general_query; uint16_t timer; uint8_t qrv; +#ifdef KTR + char ip6tbuf[INET6_ADDRSTRLEN]; +#endif - CTR2(KTR_MLD, "process v2 query on ifp %p(%s)", ifp, ifp->if_xname); + is_general_query = 0; + + /* + * RFC3810 Section 6.2: MLD queries must originate from + * a router's link-local address. + */ + if (!IN6_IS_SCOPE_LINKLOCAL(&ip6->ip6_src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (0); + } + + CTR2(KTR_MLD, "input v2 query on ifp %p(%s)", ifp, ifp->if_xname); mld = (struct mldv2_query *)(mtod(m, uint8_t *) + off); maxdelay = ntohs(mld->mld_maxdelay); /* in 1/10ths of a second */ if (maxdelay >= 32678) { - maxdelay = (MLD_MRC_MANT(mld->mld_maxdelay) | 0x1000) << - (MLD_MRC_EXP(mld->mld_maxdelay) + 3); + maxdelay = (MLD_MRC_MANT(maxdelay) | 0x1000) << + (MLD_MRC_EXP(maxdelay) + 3); } + timer = (maxdelay * PR_FASTHZ) / MLD_TIMER_SCALE; + if (timer == 0) + timer = 1; qrv = MLD_QRV(mld->mld_misc); if (qrv < 2) { @@ -795,10 +857,6 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, (MLD_QQIC_EXP(mld->mld_qqi) + 3); } - timer = maxdelay * PR_FASTHZ / MLD_TIMER_SCALE; - if (timer == 0) - timer = 1; - nsrc = ntohs(mld->mld_numsrc); if (nsrc > MLD_MAX_GS_SOURCES) return (EMSGSIZE); @@ -806,6 +864,33 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, (nsrc * sizeof(struct in6_addr))) return (EMSGSIZE); + /* + * Do further input validation upfront to avoid resetting timers + * should we need to discard this query. + */ + if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + /* + * General Queries SHOULD be directed to ff02::1. + * A general query with a source list has undefined + * behaviour; discard it. + */ + struct in6_addr dst; + + dst = ip6->ip6_dst; + in6_clearscope(&dst); + if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) || + nsrc > 0) + return (EINVAL); + is_general_query = 1; + } else { + /* + * Embed scope ID of receiving interface in MLD query for + * lookup whilst we don't hold other locks (due to KAME + * locking lameness). We own this mbuf chain just now. + */ + in6_setscope(&mld->mld_addr, ifp, NULL); + } + IN6_MULTI_LOCK(); MLD_LOCK(); IF_ADDR_LOCK(ifp); @@ -813,8 +898,15 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, mli = MLD_IFINFO(ifp); KASSERT(mli != NULL, ("%s: no mld_ifinfo for ifp %p", __func__, ifp)); - mld_set_version(mli, MLD_VERSION_2); + /* + * Discard the v2 query if we're in Compatibility Mode. + * The RFC is pretty clear that hosts need to stay in MLDv1 mode + * until the Old Version Querier Present timer expires. + */ + if (mli->mli_version != MLD_VERSION_2) + goto out_locked; + mld_set_version(mli, MLD_VERSION_2); mli->mli_rv = qrv; mli->mli_qi = qqi; mli->mli_qri = maxdelay; @@ -822,39 +914,20 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, CTR4(KTR_MLD, "%s: qrv %d qi %d maxdelay %d", __func__, qrv, qqi, maxdelay); - if (IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) { + if (is_general_query) { /* * MLDv2 General Query. * * Schedule a current-state report on this ifp for * all groups, possibly containing source lists. * - * Strip scope ID embedded by ip6_input(). We do not need - * to do this for the MLD payload. - */ - struct in6_addr dst; - - dst = ip6->ip6_dst; - in6_clearscope(&dst); - if (!IN6_ARE_ADDR_EQUAL(&dst, &in6addr_linklocal_allnodes) || - nsrc > 0) { - /* - * General Queries SHOULD be directed to ff02::1. - * A general query with a source list has undefined - * behaviour; discard it. - */ - goto out_locked; - } - - CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", - ifp, ifp->if_xname); - - /* * If there is a pending General Query response * scheduled earlier than the selected delay, do * not schedule any other reports. * Otherwise, reset the interface timer. */ + CTR2(KTR_MLD, "process v2 general query on ifp %p(%s)", + ifp, ifp->if_xname); if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) { mli->mli_v2_timer = MLD_RANDOM_DELAY(timer); V_interface_timers_running6 = 1; @@ -890,6 +963,9 @@ mld_v2_input_query(struct ifnet *ifp, const struct ip6_hdr *ip6, */ if (mli->mli_v2_timer == 0 || mli->mli_v2_timer >= timer) mld_v2_process_group_query(inm, mli, timer, m, off); + + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); } out_locked: @@ -1017,27 +1093,57 @@ mld_v2_process_group_query(struct in6_multi *inm, struct mld_ifinfo *mli, /* * Process a received MLDv1 host membership report. * Assumes mld points to mld_hdr in pulled up mbuf chain. + * + * NOTE: Can't be fully const correct as we temporarily embed scope ID in + * mld_addr. This is OK as we own the mbuf chain. */ static int mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, - const struct mld_hdr *mld) + /*const*/ struct mld_hdr *mld) { + struct in6_addr src, dst; struct in6_ifaddr *ia; struct in6_multi *inm; - struct in6_addr src, dst; #ifdef KTR char ip6tbuf[INET6_ADDRSTRLEN]; #endif + if (!mld_v1enable) { + CTR3(KTR_MLD, "ignore v1 report %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &mld->mld_addr), + ifp, ifp->if_xname); + return (0); + } + if (ifp->if_flags & IFF_LOOPBACK) return (0); - if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr)) - return (EINVAL); + /* + * MLDv1 reports must originate from a host's link-local address, + * or the unspecified address (when booting). + */ + src = ip6->ip6_src; + in6_clearscope(&src); + if (!IN6_IS_SCOPE_LINKLOCAL(&src) && !IN6_IS_ADDR_UNSPECIFIED(&src)) { + CTR3(KTR_MLD, "ignore v1 query src %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_src), + ifp, ifp->if_xname); + return (EINVAL); + } + + /* + * RFC2710 Section 4: MLDv1 reports must pertain to a multicast + * group, and must be directed to the group itself. + */ dst = ip6->ip6_dst; in6_clearscope(&dst); - if (!IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) + if (!IN6_IS_ADDR_MULTICAST(&mld->mld_addr) || + !IN6_ARE_ADDR_EQUAL(&mld->mld_addr, &dst)) { + CTR3(KTR_MLD, "ignore v1 query dst %s on ifp %p(%s)", + ip6_sprintf(ip6tbuf, &ip6->ip6_dst), + ifp, ifp->if_xname); return (EINVAL); + } /* * Make sure we don't hear our own membership report, as fast @@ -1050,8 +1156,6 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, * performed for the on-wire address. */ ia = in6ifa_ifpforlinklocal(ifp, IN6_IFF_NOTREADY|IN6_IFF_ANYCAST); - src = ip6->ip6_src; - in6_clearscope(&src); if ((ia && IN6_ARE_ADDR_EQUAL(&ip6->ip6_src, IA6_IN6(ia))) || (ia == NULL && IN6_IS_ADDR_UNSPECIFIED(&src))) return (0); @@ -1059,6 +1163,13 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, CTR3(KTR_MLD, "process v1 report %s on ifp %p(%s)", ip6_sprintf(ip6tbuf, &mld->mld_addr), ifp, ifp->if_xname); + /* + * Embed scope ID of receiving interface in MLD query for lookup + * whilst we don't hold other locks (due to KAME locking lameness). + */ + if (!IN6_IS_ADDR_UNSPECIFIED(&mld->mld_addr)) + in6_setscope(&mld->mld_addr, ifp, NULL); + IN6_MULTI_LOCK(); MLD_LOCK(); IF_ADDR_LOCK(ifp); @@ -1113,6 +1224,9 @@ mld_v1_input_report(struct ifnet *ifp, const struct ip6_hdr *ip6, IF_ADDR_UNLOCK(ifp); IN6_MULTI_UNLOCK(); + /* XXX Clear embedded scope ID as userland won't expect it. */ + in6_clearscope(&mld->mld_addr); + return (0); } @@ -1156,6 +1270,10 @@ mld_input(struct mbuf *m, int off, int icmp6len) return (IPPROTO_DONE); } + /* + * Userland needs to see all of this traffic for implementing + * the endpoint discovery portion of multicast routing. + */ switch (mld->mld_type) { case MLD_LISTENER_QUERY: icmp6_ifstat_inc(ifp, ifs6_in_mldquery); @@ -1171,7 +1289,7 @@ mld_input(struct mbuf *m, int off, int icmp6len) case MLD_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); if (mld_v1_input_report(ifp, ip6, mld) != 0) - return (0); /* Userland needs to see it. */ + return (0); break; case MLDV2_LISTENER_REPORT: icmp6_ifstat_inc(ifp, ifs6_in_mldreport); @@ -1290,8 +1408,16 @@ mld_fasttimo_vnet(void) inm = (struct in6_multi *)ifma->ifma_protospec; switch (mli->mli_version) { case MLD_VERSION_1: + /* + * XXX Drop IF_ADDR lock temporarily to + * avoid recursion caused by a potential + * call by in6ifa_ifpforlinklocal(). + * rwlock candidate? + */ + IF_ADDR_UNLOCK(ifp); mld_v1_process_group_timer(inm, mli->mli_version); + IF_ADDR_LOCK(ifp); break; case MLD_VERSION_2: mld_v2_process_group_timers(mli, &qrq, @@ -1510,7 +1636,7 @@ mld_set_version(struct mld_ifinfo *mli, const int version) * Compute the "Older Version Querier Present" timer as per * Section 9.12. */ - old_version_timer = mli->mli_rv * mli->mli_qi + mli->mli_qri; + old_version_timer = (mli->mli_rv * mli->mli_qi) + mli->mli_qri; old_version_timer *= PR_SLOWHZ; mli->mli_v1_timer = old_version_timer; } @@ -1643,7 +1769,7 @@ mld_v1_process_querier_timers(struct mld_ifinfo *mli) MLD_LOCK_ASSERT(); - if (mli->mli_v1_timer == 0 && mli->mli_version != MLD_VERSION_2) { + if (mli->mli_version != MLD_VERSION_2 && --mli->mli_v1_timer == 0) { /* * MLDv1 Querier Present timer expired; revert to MLDv2. */ diff --git a/usr.sbin/ifmcstat/ifmcstat.8 b/usr.sbin/ifmcstat/ifmcstat.8 index 5805183fa4d3..5a6956d52e91 100644 --- a/usr.sbin/ifmcstat/ifmcstat.8 +++ b/usr.sbin/ifmcstat/ifmcstat.8 @@ -30,7 +30,7 @@ .\" .\" $FreeBSD$ .\" -.Dd February 28, 2009 +.Dd May 27, 2009 .Dt IFMCSTAT 8 .Os .Sh NAME @@ -71,8 +71,8 @@ Source lists for each group will also be printed. .Pp If specified twice, and .Xr kvm 3 -is in use, the IGMP timers for each interface -and the IGMP source list counters for each group +is in use, the control plane timers for each interface +and the source list counters for each group will also be printed. .El .Pp @@ -82,9 +82,9 @@ has been built with support for .Xr kvm 3 : .Bl -tag -width Fl .It Fl K -forces the use of +attempts to use .Xr kvm 3 -to be disabled. +to retrieve the multicast group information. .It Fl M Ar core extracts values associated with the name list from the specified core, instead of the default @@ -94,6 +94,11 @@ extracts the name list from the specified kernel instead of the default, which is the kernel image the system has booted from. .El .Sh IMPLEMENTATION NOTES +.Nm +will always print the embedded scope IDs of IPv6 multicast group +memberships. +This is because memberships are always scoped to an interface. +.Pp When run with the .Fl v option, @@ -119,13 +124,6 @@ is more limited. This support is recommended for debugging purposes. It requires super-user privilege if used to inspect a running kernel. .Pp -The -.Xr kvm 3 -back-end will be used by default if -.Nm -is run with super-user privileges, unless the -.Fl K -option is specified. .Sh SEE ALSO .Xr getifaddrs 3 , .Xr getifmaddrs 3 , diff --git a/usr.sbin/ifmcstat/ifmcstat.c b/usr.sbin/ifmcstat/ifmcstat.c index 0aea6fdc1ca7..e45e36d143c3 100644 --- a/usr.sbin/ifmcstat/ifmcstat.c +++ b/usr.sbin/ifmcstat/ifmcstat.c @@ -264,13 +264,13 @@ main(int argc, char **argv) usage(); #ifdef WITH_KVM - if (!Kflag) + if (Kflag) error = ifmcstat_kvm(kernel, core); /* * If KVM failed, and user did not explicitly specify a core file, * or force KVM backend to be disabled, try the sysctl backend. */ - if (Kflag || (error != 0 && (core == NULL && kernel == NULL))) + if (!Kflag || (error != 0 && (core == NULL && kernel == NULL))) #endif error = ifmcstat_getifmaddrs(); if (error != 0)