From 71498f308b2324dbd94e94fd8c4ae41bf4bd663b Mon Sep 17 00:00:00 2001 From: Bruce M Simpson Date: Tue, 12 Jun 2007 16:24:56 +0000 Subject: [PATCH] Import rewrite of IPv4 socket multicast layer to support source-specific and protocol-independent host mode multicast. The code is written to accomodate IPv6, IGMPv3 and MLDv2 with only a little additional work. This change only pertains to FreeBSD's use as a multicast end-station and does not concern multicast routing; for an IGMPv3/MLDv2 router implementation, consider the XORP project. The work is based on Wilbert de Graaf's IGMPv3 code drop for FreeBSD 4.6, which is available at: http://www.kloosterhof.com/wilbert/igmpv3.html Summary * IPv4 multicast socket processing is now moved out of ip_output.c into a new module, in_mcast.c. * The in_mcast.c module implements the IPv4 legacy any-source API in terms of the protocol-independent source-specific API. * Source filters are lazy allocated as the common case does not use them. They are part of per inpcb state and are covered by the inpcb lock. * struct ip_mreqn is now supported to allow applications to specify multicast joins by interface index in the legacy IPv4 any-source API. * In UDP, an incoming multicast datagram only requires that the source port matches the 4-tuple if the socket was already bound by source port. An unbound socket SHOULD be able to receive multicasts sent from an ephemeral source port. * The UDP socket multicast filter mode defaults to exclusive, that is, sources present in the per-socket list will be blocked from delivery. * The RFC 3678 userland functions have been added to libc: setsourcefilter, getsourcefilter, setipv4sourcefilter, getipv4sourcefilter. * Definitions for IGMPv3 are merged but not yet used. * struct sockaddr_storage is now referenced from . It is therefore defined there if not already declared in the same way as for the C99 types. * The RFC 1724 hack (specify 0.0.0.0/8 addresses to IP_MULTICAST_IF which are then interpreted as interface indexes) is now deprecated. * A patch for the Rhyolite.com routed in the FreeBSD base system is available in the -net archives. This only affects individuals running RIPv1 or RIPv2 via point-to-point and/or unnumbered interfaces. * Make IPv6 detach path similar to IPv4's in code flow; functionally same. * Bump __FreeBSD_version to 700048; see UPDATING. This work was financially supported by another FreeBSD committer. Obtained from: p4://bms_netdev Submitted by: Wilbert de Graaf (original work) Reviewed by: rwatson (locking), silence from fenner, net@ (but with encouragement) --- UPDATING | 19 + lib/libc/net/Makefile.inc | 5 +- lib/libc/net/Symbol.map | 4 + share/man/man4/ip.4 | 16 +- sys/conf/files | 1 + sys/netinet/igmp.h | 39 +- sys/netinet/igmp_var.h | 56 +- sys/netinet/in.c | 165 +- sys/netinet/in.h | 124 +- sys/netinet/in_mcast.c | 1786 +++++++++++++++++ sys/netinet/in_pcb.c | 3 +- sys/netinet/in_var.h | 48 + sys/netinet/ip_carp.c | 3 + sys/netinet/ip_output.c | 500 +---- sys/netinet/ip_var.h | 37 +- sys/netinet/sctp_pcb.c | 2 +- sys/netinet/udp_usrreq.c | 119 +- sys/netinet/udp_var.h | 1 + sys/netinet6/in6.h | 20 + sys/netinet6/in6_ifattach.c | 32 +- sys/netinet6/in6_pcb.c | 3 +- sys/sys/param.h | 2 +- sys/sys/socket.h | 3 + .../regression/netinet/ipsockopt/ipsockopt.c | 4 +- usr.bin/netstat/inet.c | 5 +- usr.sbin/mtest/mtest.c | 38 +- 26 files changed, 2310 insertions(+), 725 deletions(-) create mode 100644 sys/netinet/in_mcast.c diff --git a/UPDATING b/UPDATING index d509867a86b0..a6cfbe20abac 100644 --- a/UPDATING +++ b/UPDATING @@ -21,6 +21,25 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 7.x IS SLOW: developers choose to disable these features on build machines to maximize performance. +20070612: + The IPv4 multicast socket code has been considerably modified, and + moved to the file sys/netinet/in_mcast.c. Initial support for the + RFC 3678 Source-Specific Multicast Socket API has been added to + the IPv4 network stack. + + Strict multicast and broadcast reception is now the default for + UDP/IPv4 sockets; the net.inet.udp.strict_mcast_mship sysctl variable + has now been removed. + + The RFC 1724 hack for interface selection has been removed; the use + of the Linux-derived ip_mreqn structure with IP_MULTICAST_IF has + been added to replace it. Consumers such as routed will soon be + updated to reflect this. + + These changes affect users who are running routed(8) or rdisc(8) + from the FreeBSD base system on point-to-point or unnumbered + interfaces. + 20070610: The net80211 layer has changed significantly and all wireless drivers that depend on it need to be recompiled. Further these diff --git a/lib/libc/net/Makefile.inc b/lib/libc/net/Makefile.inc index 3592f50160ed..23c1c9d28b15 100644 --- a/lib/libc/net/Makefile.inc +++ b/lib/libc/net/Makefile.inc @@ -14,7 +14,7 @@ SRCS+= base64.c ether_addr.c eui64.c \ ip6opt.c linkaddr.c map_v4v6.c name6.c ntoh.c \ nsdispatch.c nslexer.c nsparser.c nss_compat.c \ rcmd.c rcmdsh.c recv.c rthdr.c sctp_sys_calls.c send.c \ - sockatmark.c vars.c + sockatmark.c sourcefilter.c vars.c .if ${MK_NS_CACHING} != "no" SRCS+= nscache.c nscachedcli.c @@ -52,6 +52,7 @@ MAN+= byteorder.3 ethers.3 eui64.3 \ inet6_opt_init.3 inet6_option_space.3 inet6_rth_space.3 \ inet6_rthdr_space.3 linkaddr.3 \ nsdispatch.3 rcmd.3 rcmdsh.3 resolver.3 sockatmark.3 \ + setsourcefilter.3 \ sctp_bindx.3 sctp_connectx.3 sctp_freepaddrs.3 \ sctp_getaddrlen.3 sctp_getassocid.3 sctp_getpaddrs.3 \ sctp_opt_info.3 sctp_recvmsg.3 sctp_send.3 sctp_sendmsg.3 \ @@ -121,6 +122,8 @@ MLINKS+=resolver.3 dn_comp.3 resolver.3 dn_expand.3 resolver.3 res_init.3 \ resolver.3 res_search.3 resolver.3 res_send.3 resolver.3 dn_skipname.3 \ resolver.3 ns_get16.3 resolver.3 ns_get32.3 \ resolver.3 ns_put16.3 resolver.3 ns_put32.3 +MLINKS+=sourcefilter.3 setipv4sourcefilter.3 getipv4sourcefilter.3 \ + sourcefilter.3 setsourcefilter.3 getsourcefilter.3 .if ${MK_HESIOD} != "no" SRCS+= hesiod.c diff --git a/lib/libc/net/Symbol.map b/lib/libc/net/Symbol.map index 7fb0f3b54f5b..de44cc576149 100644 --- a/lib/libc/net/Symbol.map +++ b/lib/libc/net/Symbol.map @@ -137,6 +137,10 @@ FBSD_1.0 { sctp_send; sctp_sendx; sctp_recvmsg; + setipv4sourcefilter; + getipv4sourcefilter; + getsourcefilter; + setsourcefilter; }; FBSDprivate_1.0 { diff --git a/share/man/man4/ip.4 b/share/man/man4/ip.4 index e531f74684f6..833c4632c01f 100644 --- a/share/man/man4/ip.4 +++ b/share/man/man4/ip.4 @@ -32,7 +32,7 @@ .\" @(#)ip.4 8.2 (Berkeley) 11/30/93 .\" $FreeBSD$ .\" -.Dd March 18, 2007 +.Dd April 9, 2007 .Dt IP 4 .Os .Sh NAME @@ -420,6 +420,16 @@ where "addr" is the local address of the desired interface or .Dv INADDR_ANY to specify the default interface. +.Pp +To specify an interface by index, an instance of +.Vt ip_mreqn +should be passed instead. +The +.Vt imr_ifindex +member should be set to the index of the desired interface, +or 0 to specify the default interface. +The kernel differentiates between these two structures by their size. +.\" An interface's local IP address and multicast capability can be obtained via the .Dv SIOCGIFCONF @@ -672,3 +682,7 @@ The .Nm protocol appeared in .Bx 4.2 . +The +.Vt ip_mreqn +structure appeared in +.Tn Linux 2.4 . diff --git a/sys/conf/files b/sys/conf/files index c08878a11d97..08309bcc684b 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -1801,6 +1801,7 @@ netinet/ip_carp.c optional carp netinet/in_gif.c optional gif inet netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet +netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet netinet/in_proto.c optional inet \ compile-with "${NORMAL_C} -I$S/contrib/pf" diff --git a/sys/netinet/igmp.h b/sys/netinet/igmp.h index 05a70fc4c145..4c2f9e0b387f 100644 --- a/sys/netinet/igmp.h +++ b/sys/netinet/igmp.h @@ -55,7 +55,42 @@ struct igmp { struct in_addr igmp_group; /* group address being reported */ }; /* (zero for queries) */ -#define IGMP_MINLEN 8 +struct igmpv3 { + u_char igmp_type; /* version & type of IGMP message */ + u_char igmp_code; /* subtype for routing msgs */ + u_short igmp_cksum; /* IP-style checksum */ + struct in_addr igmp_group; /* group address being reported */ + /* (zero for queries) */ + u_char igmp_misc; /* reserved/suppress/robustness */ + u_char igmp_qqi; /* querier's query interval */ + u_short igmp_numsrc; /* number of sources */ + /*struct in_addr igmp_sources[1];*/ /* source addresses */ +}; + +struct igmp_grouprec { + u_char ig_type; /* record type */ + u_char ig_datalen; /* length of auxiliary data */ + u_short ig_numsrc; /* number of sources */ + struct in_addr ig_group; /* group address being reported */ + /*struct in_addr ig_sources[1];*/ /* source addresses */ +}; + +struct igmp_report { + u_char ir_type; /* record type */ + u_char ir_rsv1; /* reserved */ + u_short ir_cksum; /* checksum */ + u_short ir_rsv2; /* reserved */ + u_short ir_numgrps; /* number of group records */ + struct igmp_grouprec ir_groups[1]; /* group records */ +}; + +#define IGMP_MINLEN 8 +#define IGMP_HDRLEN 8 +#define IGMP_GRPREC_HDRLEN 8 +#define IGMP_PREPEND 0 + +#define IGMP_QRV(pigmp) ((pigmp)->igmp_misc & (0x07)) /* XXX */ +#define IGMP_MAXSOURCES(len) (((len) - 12) >> 2) /* XXX */ /* * Message types, including version number. @@ -71,6 +106,8 @@ struct igmp { #define IGMP_MTRACE_RESP 0x1e /* traceroute resp.(to sender)*/ #define IGMP_MTRACE 0x1f /* mcast traceroute messages */ +#define IGMP_V3_MEMBERSHIP_REPORT 0x22 /* Ver. 3 membership report */ + #define IGMP_MAX_HOST_REPORT_DELAY 10 /* max delay for response to */ /* query (in seconds) according */ /* to RFC1112 */ diff --git a/sys/netinet/igmp_var.h b/sys/netinet/igmp_var.h index aa083a804cb4..11c3769a6f65 100644 --- a/sys/netinet/igmp_var.h +++ b/sys/netinet/igmp_var.h @@ -56,6 +56,7 @@ struct igmpstat { u_int igps_rcv_badreports; /* received invalid reports */ u_int igps_rcv_ourreports; /* received reports for our groups */ u_int igps_snd_reports; /* sent membership reports */ + u_int igps_rcv_toolong; /* received with too many bytes */ }; #ifdef _KERNEL @@ -67,6 +68,13 @@ struct igmpstat { #define IGMP_OTHERMEMBER 0 #define IGMP_IREPORTEDLAST 1 +/* + * State masks for IGMPv3 + */ +#define IGMP_V3_NONEXISTENT 0x01 +#define IGMP_V3_OTHERMEMBER 0x02 +#define IGMP_V3_IREPORTEDLAST 0x04 + /* * We must remember what version the subnet's querier is. * We conveniently use the IGMP message type for the proper @@ -74,6 +82,7 @@ struct igmpstat { */ #define IGMP_V1_ROUTER IGMP_V1_MEMBERSHIP_REPORT #define IGMP_V2_ROUTER IGMP_V2_MEMBERSHIP_REPORT +#define IGMP_V3_ROUTER IGMP_V3_MEMBERSHIP_REPORT /* * Revert to new router if we haven't heard from an old router in @@ -81,6 +90,51 @@ struct igmpstat { */ #define IGMP_AGE_THRESHOLD 540 +/* + * IGMPv3 protocol defaults + */ +#define IGMP_INIT_ROBVAR 2 /* Robustness */ +#define IGMP_MAX_ROBVAR 7 +#define IGMP_INIT_QRYINT 125 /* Querier's Query interval */ +#define IGMP_MAX_QRYINT 255 +#define IGMP_INIT_QRYRSP 10 /* Query Response interval */ +#define IGMP_DEF_QRYMRT 10 +#define IGMP_UNSOL_INT 1 /* Unsolicited Report interval */ + +/* + * IGMPv3 report types + */ +#define IGMP_REPORT_MODE_IN 1 /* mode-is-include */ +#define IGMP_REPORT_MODE_EX 2 /* mode-is-exclude */ +#define IGMP_REPORT_TO_IN 3 /* change-to-include */ +#define IGMP_REPORT_TO_EX 4 /* change-to-exclude */ +#define IGMP_REPORT_ALLOW_NEW 5 /* allow-new-sources */ +#define IGMP_REPORT_BLOCK_OLD 6 /* block-old-sources */ + +/* + * Report types + */ +#define IGMP_MASK_CUR_STATE 0x01 /* Report current-state */ +#define IGMP_MASK_ALLOW_NEW 0x02 /* Report source as allow-new */ +#define IGMP_MASK_BLOCK_OLD 0x04 /* Report source as block-old */ +#define IGMP_MASK_TO_IN 0x08 /* Report source as to_in */ +#define IGMP_MASK_TO_EX 0x10 /* Report source as to_ex */ +#define IGMP_MASK_STATE_T1 0x20 /* State at T1 */ +#define IGMP_MASK_STATE_T2 0x40 /* State at T2 */ +#define IGMP_MASK_IF_STATE 0x80 /* Report current-state per interface */ + +#define IGMP_MASK_STATE_TX (IGMP_MASK_STATE_T1 | IGMP_MASK_STATE_T2) +#define IGMP_MASK_PENDING (IGMP_MASK_CUR_STATE | \ + IGMP_MASK_ALLOW_NEW | \ + IGMP_MASK_BLOCK_OLD) + +/* + * List identifiers + */ +#define IGMP_EXCLUDE_LIST 1 /* exclude list used to tag report */ +#define IGMP_INCLUDE_LIST 2 /* include list used to tag report */ +#define IGMP_RECORDED_LIST 3 /* recorded list used to tag report */ + void igmp_init(void); void igmp_input(struct mbuf *, int); void igmp_joingroup(struct in_multi *); @@ -100,6 +154,6 @@ SYSCTL_DECL(_net_inet_igmp); #define IGMPCTL_NAMES { \ { 0, 0 }, \ - { "stats", CTLTYPE_STRUCT }, \ + { "stats", CTLTYPE_STRUCT } \ } #endif diff --git a/sys/netinet/in.c b/sys/netinet/in.c index dd20e00cd4b5..d0c36fa2f2b9 100644 --- a/sys/netinet/in.c +++ b/sys/netinet/in.c @@ -49,10 +49,7 @@ #include #include #include - -#include - -static MALLOC_DEFINE(M_IPMADDR, "in_multi", "internet multicast address"); +#include static int in_mask2len(struct in_addr *); static void in_len2mask(struct in_addr *, int); @@ -74,17 +71,6 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, same_prefix_carp_only, CTLFLAG_RW, &sameprefixcarponly, 0, "Refuse to create same prefixes on different interfaces"); -/* - * The IPv4 multicast list (in_multihead and associated structures) are - * protected by the global in_multi_mtx. See in_var.h for more details. For - * now, in_multi_mtx is marked as recursible due to IGMP's calling back into - * ip_output() to send IGMP packets while holding the lock; this probably is - * not quite desirable. - */ -struct in_multihead in_multihead; /* XXX BSS initialization */ -struct mtx in_multi_mtx; -MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE); - extern struct inpcbinfo ripcbinfo; extern struct inpcbinfo udbinfo; @@ -976,155 +962,6 @@ in_broadcast(struct in_addr in, struct ifnet *ifp) #undef ia } -/* - * Add an address to the list of IP multicast addresses for a given interface. - */ -struct in_multi * -in_addmulti(struct in_addr *ap, struct ifnet *ifp) -{ - struct in_multi *inm; - - inm = NULL; - - IFF_LOCKGIANT(ifp); - IN_MULTI_LOCK(); - - IN_LOOKUP_MULTI(*ap, ifp, inm); - if (inm != NULL) { - /* - * If we already joined this group, just bump the - * refcount and return it. - */ - KASSERT(inm->inm_refcount >= 1, - ("%s: bad refcount %d", __func__, inm->inm_refcount)); - ++inm->inm_refcount; - } else do { - struct sockaddr_in sin; - struct ifmultiaddr *ifma; - struct in_multi *ninm; - int error; - - bzero(&sin, sizeof sin); - sin.sin_family = AF_INET; - sin.sin_len = sizeof(struct sockaddr_in); - sin.sin_addr = *ap; - - /* - * Check if a link-layer group is already associated - * with this network-layer group on the given ifnet. - * If so, bump the refcount on the existing network-layer - * group association and return it. - */ - error = if_addmulti(ifp, (struct sockaddr *)&sin, &ifma); - if (error) - break; - if (ifma->ifma_protospec != NULL) { - inm = (struct in_multi *)ifma->ifma_protospec; -#ifdef INVARIANTS - if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || - inm->inm_addr.s_addr != ap->s_addr) - panic("%s: ifma is inconsistent", __func__); -#endif - ++inm->inm_refcount; - break; - } - - /* - * A new membership is needed; construct it and - * perform the IGMP join. - */ - ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO); - if (ninm == NULL) { - if_delmulti_ifma(ifma); - break; - } - ninm->inm_addr = *ap; - ninm->inm_ifp = ifp; - ninm->inm_ifma = ifma; - ninm->inm_refcount = 1; - ifma->ifma_protospec = ninm; - LIST_INSERT_HEAD(&in_multihead, ninm, inm_link); - - igmp_joingroup(ninm); - - inm = ninm; - } while (0); - - IN_MULTI_UNLOCK(); - IFF_UNLOCKGIANT(ifp); - - return (inm); -} - -/* - * Delete a multicast address record. - * It is OK to call this routine if the underlying ifnet went away. - * - * XXX: To deal with the ifp going away, we cheat; the link-layer code in net - * will set ifma_ifp to NULL when the associated ifnet instance is detached - * from the system. - * The only reason we need to violate layers and check ifma_ifp here at all - * is because certain hardware drivers still require Giant to be held, - * and it must always be taken before other locks. - */ -void -in_delmulti(struct in_multi *inm) -{ - struct ifnet *ifp; - - KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); - ifp = inm->inm_ifma->ifma_ifp; - - if (ifp != NULL) { - /* - * Sanity check that netinet's notion of ifp is the - * same as net's. - */ - KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); - IFF_LOCKGIANT(ifp); - } - - IN_MULTI_LOCK(); - in_delmulti_locked(inm); - IN_MULTI_UNLOCK(); - - if (ifp != NULL) - IFF_UNLOCKGIANT(ifp); -} - -/* - * Delete a multicast address record, with locks held. - * - * It is OK to call this routine if the ifp went away. - * Assumes that caller holds the IN_MULTI lock, and that - * Giant was taken before other locks if required by the hardware. - */ -void -in_delmulti_locked(struct in_multi *inm) -{ - struct ifmultiaddr *ifma; - - IN_MULTI_LOCK_ASSERT(); - KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__)); - - if (--inm->inm_refcount == 0) { - igmp_leavegroup(inm); - - ifma = inm->inm_ifma; -#ifdef DIAGNOSTIC - printf("%s: purging ifma %p\n", __func__, ifma); -#endif - KASSERT(ifma->ifma_protospec == inm, - ("%s: ifma_protospec != inm", __func__)); - ifma->ifma_protospec = NULL; - - LIST_REMOVE(inm, inm_link); - free(inm, M_IPMADDR); - - if_delmulti_ifma(ifma); - } -} - /* * Delete all IPv4 multicast address records, and associated link-layer * multicast address records, associated with ifp. diff --git a/sys/netinet/in.h b/sys/netinet/in.h index 89181e0931ca..1fca43d43792 100644 --- a/sys/netinet/in.h +++ b/sys/netinet/in.h @@ -84,6 +84,33 @@ struct in_addr { #define _STRUCT_IN_ADDR_DECLARED #endif +#ifndef _SOCKLEN_T_DECLARED +typedef __socklen_t socklen_t; +#define _SOCKLEN_T_DECLARED +#endif + +/* Avoid collision with original definition in sys/socket.h. */ +#ifndef _STRUCT_SOCKADDR_STORAGE_DECLARED +/* + * RFC 2553: protocol-independent placeholder for socket addresses + */ +#define _SS_MAXSIZE 128U +#define _SS_ALIGNSIZE (sizeof(__int64_t)) +#define _SS_PAD1SIZE (_SS_ALIGNSIZE - sizeof(unsigned char) - \ + sizeof(sa_family_t)) +#define _SS_PAD2SIZE (_SS_MAXSIZE - sizeof(unsigned char) - \ + sizeof(sa_family_t) - _SS_PAD1SIZE - _SS_ALIGNSIZE) + +struct sockaddr_storage { + unsigned char ss_len; /* address length */ + sa_family_t ss_family; /* address family */ + char __ss_pad1[_SS_PAD1SIZE]; + __int64_t __ss_align; /* force desired struct alignment */ + char __ss_pad2[_SS_PAD2SIZE]; +}; +#define _STRUCT_SOCKADDR_STORAGE_DECLARED +#endif + /* Socket address, internet style. */ struct sockaddr_in { uint8_t sin_len; @@ -390,7 +417,8 @@ __END_DECLS #define IP_RECVDSTADDR 7 /* bool; receive IP dst addr w/dgram */ #define IP_SENDSRCADDR IP_RECVDSTADDR /* cmsg_type to set src addr */ #define IP_RETOPTS 8 /* ip_opts; set/get IP options */ -#define IP_MULTICAST_IF 9 /* u_char; set/get IP multicast i/f */ +#define IP_MULTICAST_IF 9 /* struct in_addr *or* struct ip_mreqn; + * set/get IP multicast i/f */ #define IP_MULTICAST_TTL 10 /* u_char; set/get IP multicast ttl */ #define IP_MULTICAST_LOOP 11 /* u_char; set/get IP multicast loopback */ #define IP_ADD_MEMBERSHIP 12 /* ip_mreq; add an IP group membership */ @@ -435,6 +463,23 @@ __END_DECLS #define IP_MINTTL 66 /* minimum TTL for packet or drop */ #define IP_DONTFRAG 67 /* don't fragment packet */ +/* IPv4 Source Filter Multicast API [RFC3678] */ +#define IP_ADD_SOURCE_MEMBERSHIP 70 /* join a source-specific group */ +#define IP_DROP_SOURCE_MEMBERSHIP 71 /* drop a single source */ +#define IP_BLOCK_SOURCE 72 /* block a source */ +#define IP_UNBLOCK_SOURCE 73 /* unblock a source */ + +/* The following option is private; do not use it from user applications. */ +#define IP_MSFILTER 74 /* set/get filter list */ + +/* Protocol Independent Multicast API [RFC3678] */ +#define MCAST_JOIN_GROUP 80 /* join an any-source group */ +#define MCAST_LEAVE_GROUP 81 /* leave all sources for group */ +#define MCAST_JOIN_SOURCE_GROUP 82 /* join a source-specific group */ +#define MCAST_LEAVE_SOURCE_GROUP 83 /* leave a single source */ +#define MCAST_BLOCK_SOURCE 84 /* block a source */ +#define MCAST_UNBLOCK_SOURCE 85 /* unblock a source */ + /* * Defaults and limits for options */ @@ -448,6 +493,7 @@ __END_DECLS */ #define IP_MIN_MEMBERSHIPS 31 #define IP_MAX_MEMBERSHIPS 4095 +#define IP_MAX_SOURCE_FILTER 1024 /* # of filters per socket, per group */ /* * Argument structure for IP_ADD_MEMBERSHIP and IP_DROP_MEMBERSHIP. @@ -457,6 +503,82 @@ struct ip_mreq { struct in_addr imr_interface; /* local IP address of interface */ }; +/* + * Modified argument structure for IP_MULTICAST_IF, obtained from Linux. + * This is used to specify an interface index for multicast sends, as + * the IPv4 legacy APIs do not support this (unless IP_SENDIF is available). + */ +struct ip_mreqn { + struct in_addr imr_multiaddr; /* IP multicast address of group */ + struct in_addr imr_address; /* local IP address of interface */ + int imr_ifindex; /* Interface index; cast to uint32_t */ +}; + +/* + * Argument structure for IPv4 Multicast Source Filter APIs. [RFC3678] + */ +struct ip_mreq_source { + struct in_addr imr_multiaddr; /* IP multicast address of group */ + struct in_addr imr_sourceaddr; /* IP address of source */ + struct in_addr imr_interface; /* local IP address of interface */ +}; + +/* + * Argument structures for Protocol-Independent Multicast Source + * Filter APIs. [RFC3678] + */ +struct group_req { + uint32_t gr_interface; /* interface index */ + struct sockaddr_storage gr_group; /* group address */ +}; + +struct group_source_req { + uint32_t gsr_interface; /* interface index */ + struct sockaddr_storage gsr_group; /* group address */ + struct sockaddr_storage gsr_source; /* source address */ +}; + +#ifndef __MSFILTERREQ_DEFINED +#define __MSFILTERREQ_DEFINED +/* + * The following structure is private; do not use it from user applications. + * It is used to communicate IP_MSFILTER/IPV6_MSFILTER information between + * the RFC 3678 libc functions and the kernel. + */ +struct __msfilterreq { + uint32_t msfr_ifindex; /* interface index */ + uint32_t msfr_fmode; /* filter mode for group */ + uint32_t msfr_nsrcs; /* # of sources in msfr_srcs */ + struct sockaddr_storage msfr_group; /* group address */ + struct sockaddr_storage *msfr_srcs; /* pointer to the first member + * of a contiguous array of + * sources to filter in full. + */ +}; +#endif + +struct sockaddr; + +/* + * Advanced (Full-state) APIs [RFC3678] + * The RFC specifies uint_t for the 6th argument to [sg]etsourcefilter(). + * We use uint32_t here to be consistent. + */ +int setipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t, + uint32_t, struct in_addr *); +int getipv4sourcefilter(int, struct in_addr, struct in_addr, uint32_t *, + uint32_t *, struct in_addr *); +int setsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, + uint32_t, uint32_t, struct sockaddr_storage *); +int getsourcefilter(int, uint32_t, struct sockaddr *, socklen_t, + uint32_t *, uint32_t *, struct sockaddr_storage *); + +/* + * Filter modes; also used to represent per-socket filter mode internally. + */ +#define MCAST_INCLUDE 1 /* fmode: include these source(s) */ +#define MCAST_EXCLUDE 2 /* fmode: exclude these source(s) */ + /* * Argument for IP_PORTRANGE: * - which range to search when port is unspecified at bind() or connect() diff --git a/sys/netinet/in_mcast.c b/sys/netinet/in_mcast.c new file mode 100644 index 000000000000..0f0dc2d12a44 --- /dev/null +++ b/sys/netinet/in_mcast.c @@ -0,0 +1,1786 @@ +/*- + * Copyright (c) 2007 Bruce M. Simpson. + * Copyright (c) 2005 Robert N. M. Watson. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * IPv4 multicast socket, group, and socket option processing module. + * Until further notice, this file requires INET to compile. + * TODO: Make this infrastructure independent of address family. + * TODO: Teach netinet6 to use this code. + * TODO: Hook up SSM logic to IGMPv3/MLDv2. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef __SOCKUNION_DECLARED +union sockunion { + struct sockaddr_storage ss; + struct sockaddr sa; + struct sockaddr_dl sdl; + struct sockaddr_in sin; +#ifdef INET6 + struct sockaddr_in6 sin6; +#endif +}; +typedef union sockunion sockunion_t; +#define __SOCKUNION_DECLARED +#endif /* __SOCKUNION_DECLARED */ + +static MALLOC_DEFINE(M_IPMADDR, "in_multi", "IPv4 multicast group"); +static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "IPv4 multicast options"); +static MALLOC_DEFINE(M_IPMSOURCE, "in_msource", "IPv4 multicast source filter"); + +/* + * The IPv4 multicast list (in_multihead and associated structures) are + * protected by the global in_multi_mtx. See in_var.h for more details. For + * now, in_multi_mtx is marked as recursible due to IGMP's calling back into + * ip_output() to send IGMP packets while holding the lock; this probably is + * not quite desirable. + */ +struct in_multihead in_multihead; /* XXX BSS initialization */ +struct mtx in_multi_mtx; +MTX_SYSINIT(in_multi_mtx, &in_multi_mtx, "in_multi_mtx", MTX_DEF | MTX_RECURSE); + +/* + * Functions with non-static linkage defined in this file should be + * declared in in_var.h: + * imo_match_group() + * imo_match_source() + * in_addmulti() + * in_delmulti() + * in_delmulti_locked() + * and ip_var.h: + * inp_freemoptions() + * inp_getmoptions() + * inp_setmoptions() + */ +static int imo_grow(struct ip_moptions *); +static int imo_join_source(struct ip_moptions *, size_t, sockunion_t *); +static int imo_leave_source(struct ip_moptions *, size_t, sockunion_t *); +static int inp_change_source_filter(struct inpcb *, struct sockopt *); +static struct ip_moptions * + inp_findmoptions(struct inpcb *); +static int inp_get_source_filters(struct inpcb *, struct sockopt *); +static int inp_join_group(struct inpcb *, struct sockopt *); +static int inp_leave_group(struct inpcb *, struct sockopt *); +static int inp_set_multicast_if(struct inpcb *, struct sockopt *); +static int inp_set_source_filters(struct inpcb *, struct sockopt *); + +/* + * Resize the ip_moptions vector to the next power-of-two minus 1. + * May be called with locks held; do not sleep. + */ +static int +imo_grow(struct ip_moptions *imo) +{ + struct in_multi **nmships; + struct in_multi **omships; + struct in_mfilter *nmfilters; + struct in_mfilter *omfilters; + size_t idx; + size_t newmax; + size_t oldmax; + + nmships = NULL; + nmfilters = NULL; + omships = imo->imo_membership; + omfilters = imo->imo_mfilters; + oldmax = imo->imo_max_memberships; + newmax = ((oldmax + 1) * 2) - 1; + + if (newmax <= IP_MAX_MEMBERSHIPS) { + nmships = (struct in_multi **)realloc(omships, + sizeof(struct in_multi *) * newmax, M_IPMOPTS, M_NOWAIT); + nmfilters = (struct in_mfilter *)realloc(omfilters, + sizeof(struct in_mfilter) * newmax, M_IPMSOURCE, M_NOWAIT); + if (nmships != NULL && nmfilters != NULL) { + /* Initialize newly allocated source filter heads. */ + for (idx = oldmax; idx < newmax; idx++) { + nmfilters[idx].imf_fmode = MCAST_EXCLUDE; + nmfilters[idx].imf_nsources = 0; + TAILQ_INIT(&nmfilters[idx].imf_sources); + } + imo->imo_max_memberships = newmax; + imo->imo_membership = nmships; + imo->imo_mfilters = nmfilters; + } + } + + if (nmships == NULL || nmfilters == NULL) { + if (nmships != NULL) + free(nmships, M_IPMOPTS); + if (nmfilters != NULL) + free(nmfilters, M_IPMSOURCE); + return (ETOOMANYREFS); + } + + return (0); +} + +/* + * Add a source to a multicast filter list. + * Assumes the associated inpcb is locked. + */ +static int +imo_join_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src) +{ + struct in_msource *ims, *nims; + struct in_mfilter *imf; + + KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__)); + KASSERT(imo->imo_mfilters != NULL, + ("%s: imo_mfilters vector not allocated", __func__)); + + imf = &imo->imo_mfilters[gidx]; + if (imf->imf_nsources == IP_MAX_SOURCE_FILTER) + return (ENOBUFS); + + ims = imo_match_source(imo, gidx, &src->sa); + if (ims != NULL) + return (EADDRNOTAVAIL); + + /* Do not sleep with inp lock held. */ + MALLOC(nims, struct in_msource *, sizeof(struct in_msource), + M_IPMSOURCE, M_NOWAIT | M_ZERO); + if (nims == NULL) + return (ENOBUFS); + + nims->ims_addr = src->ss; + TAILQ_INSERT_TAIL(&imf->imf_sources, nims, ims_next); + imf->imf_nsources++; + + return (0); +} + +static int +imo_leave_source(struct ip_moptions *imo, size_t gidx, sockunion_t *src) +{ + struct in_msource *ims; + struct in_mfilter *imf; + + KASSERT(src->ss.ss_family == AF_INET, ("%s: !AF_INET", __func__)); + KASSERT(imo->imo_mfilters != NULL, + ("%s: imo_mfilters vector not allocated", __func__)); + + imf = &imo->imo_mfilters[gidx]; + if (imf->imf_nsources == IP_MAX_SOURCE_FILTER) + return (ENOBUFS); + + ims = imo_match_source(imo, gidx, &src->sa); + if (ims == NULL) + return (EADDRNOTAVAIL); + + TAILQ_REMOVE(&imf->imf_sources, ims, ims_next); + FREE(ims, M_IPMSOURCE); + imf->imf_nsources--; + + return (0); +} + +/* + * Find an IPv4 multicast group entry for this ip_moptions instance + * which matches the specified group, and optionally an interface. + * Return its index into the array, or -1 if not found. + */ +size_t +imo_match_group(struct ip_moptions *imo, struct ifnet *ifp, + struct sockaddr *group) +{ + sockunion_t *gsa; + struct in_multi **pinm; + int idx; + int nmships; + + gsa = (sockunion_t *)group; + + /* The imo_membership array may be lazy allocated. */ + if (imo->imo_membership == NULL || imo->imo_num_memberships == 0) + return (-1); + + nmships = imo->imo_num_memberships; + pinm = &imo->imo_membership[0]; + for (idx = 0; idx < nmships; idx++, pinm++) { + if (*pinm == NULL) + continue; +#if 0 + printf("%s: trying ifp = %p, inaddr = %s ", __func__, + ifp, inet_ntoa(gsa->sin.sin_addr)); + printf("against %p, %s\n", + (*pinm)->inm_ifp, inet_ntoa((*pinm)->inm_addr)); +#endif + if ((ifp == NULL || ((*pinm)->inm_ifp == ifp)) && + (*pinm)->inm_addr.s_addr == gsa->sin.sin_addr.s_addr) { + break; + } + } + if (idx >= nmships) + idx = -1; + + return (idx); +} + +/* + * Find a multicast source entry for this imo which matches + * the given group index for this socket, and source address. + */ +struct in_msource * +imo_match_source(struct ip_moptions *imo, size_t gidx, struct sockaddr *src) +{ + struct in_mfilter *imf; + struct in_msource *ims, *pims; + + KASSERT(src->sa_family == AF_INET, ("%s: !AF_INET", __func__)); + KASSERT(gidx != -1 && gidx < imo->imo_num_memberships, + ("%s: invalid index %d\n", __func__, (int)gidx)); + + /* The imo_mfilters array may be lazy allocated. */ + if (imo->imo_mfilters == NULL) + return (NULL); + + pims = NULL; + imf = &imo->imo_mfilters[gidx]; + TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) { + /* + * Perform bitwise comparison of two IPv4 addresses. + * TODO: Do the same for IPv6. + * Do not use sa_equal() for this as it is not aware of + * deeper structure in sockaddr_in or sockaddr_in6. + */ + if (((struct sockaddr_in *)&ims->ims_addr)->sin_addr.s_addr == + ((struct sockaddr_in *)src)->sin_addr.s_addr) { + pims = ims; + break; + } + } + + return (pims); +} + +/* + * Join an IPv4 multicast group. + */ +struct in_multi * +in_addmulti(struct in_addr *ap, struct ifnet *ifp) +{ + struct in_multi *inm; + + inm = NULL; + + IFF_LOCKGIANT(ifp); + IN_MULTI_LOCK(); + + IN_LOOKUP_MULTI(*ap, ifp, inm); + if (inm != NULL) { + /* + * If we already joined this group, just bump the + * refcount and return it. + */ + KASSERT(inm->inm_refcount >= 1, + ("%s: bad refcount %d", __func__, inm->inm_refcount)); + ++inm->inm_refcount; + } else do { + sockunion_t gsa; + struct ifmultiaddr *ifma; + struct in_multi *ninm; + int error; + + memset(&gsa, 0, sizeof(gsa)); + gsa.sin.sin_family = AF_INET; + gsa.sin.sin_len = sizeof(struct sockaddr_in); + gsa.sin.sin_addr = *ap; + + /* + * Check if a link-layer group is already associated + * with this network-layer group on the given ifnet. + * If so, bump the refcount on the existing network-layer + * group association and return it. + */ + error = if_addmulti(ifp, &gsa.sa, &ifma); + if (error) + break; + if (ifma->ifma_protospec != NULL) { + inm = (struct in_multi *)ifma->ifma_protospec; +#ifdef INVARIANTS + if (inm->inm_ifma != ifma || inm->inm_ifp != ifp || + inm->inm_addr.s_addr != ap->s_addr) + panic("%s: ifma is inconsistent", __func__); +#endif + ++inm->inm_refcount; + break; + } + + /* + * A new membership is needed; construct it and + * perform the IGMP join. + */ + ninm = malloc(sizeof(*ninm), M_IPMADDR, M_NOWAIT | M_ZERO); + if (ninm == NULL) { + if_delmulti_ifma(ifma); + break; + } + ninm->inm_addr = *ap; + ninm->inm_ifp = ifp; + ninm->inm_ifma = ifma; + ninm->inm_refcount = 1; + ifma->ifma_protospec = ninm; + LIST_INSERT_HEAD(&in_multihead, ninm, inm_link); + + igmp_joingroup(ninm); + + inm = ninm; + } while (0); + + IN_MULTI_UNLOCK(); + IFF_UNLOCKGIANT(ifp); + + return (inm); +} + +/* + * Leave an IPv4 multicast group. + * It is OK to call this routine if the underlying ifnet went away. + * + * XXX: To deal with the ifp going away, we cheat; the link-layer code in net + * will set ifma_ifp to NULL when the associated ifnet instance is detached + * from the system. + * + * The only reason we need to violate layers and check ifma_ifp here at all + * is because certain hardware drivers still require Giant to be held, + * and it must always be taken before other locks. + */ +void +in_delmulti(struct in_multi *inm) +{ + struct ifnet *ifp; + + KASSERT(inm != NULL, ("%s: inm is NULL", __func__)); + KASSERT(inm->inm_ifma != NULL, ("%s: no ifma", __func__)); + ifp = inm->inm_ifma->ifma_ifp; + + if (ifp != NULL) { + /* + * Sanity check that netinet's notion of ifp is the + * same as net's. + */ + KASSERT(inm->inm_ifp == ifp, ("%s: bad ifp", __func__)); + IFF_LOCKGIANT(ifp); + } + + IN_MULTI_LOCK(); + in_delmulti_locked(inm); + IN_MULTI_UNLOCK(); + + if (ifp != NULL) + IFF_UNLOCKGIANT(ifp); +} + +/* + * Delete a multicast address record, with locks held. + * + * It is OK to call this routine if the ifp went away. + * Assumes that caller holds the IN_MULTI lock, and that + * Giant was taken before other locks if required by the hardware. + */ +void +in_delmulti_locked(struct in_multi *inm) +{ + struct ifmultiaddr *ifma; + + IN_MULTI_LOCK_ASSERT(); + KASSERT(inm->inm_refcount >= 1, ("%s: freeing freed inm", __func__)); + + if (--inm->inm_refcount == 0) { + igmp_leavegroup(inm); + + ifma = inm->inm_ifma; +#ifdef DIAGNOSTIC + if (bootverbose) + printf("%s: purging ifma %p\n", __func__, ifma); +#endif + KASSERT(ifma->ifma_protospec == inm, + ("%s: ifma_protospec != inm", __func__)); + ifma->ifma_protospec = NULL; + + LIST_REMOVE(inm, inm_link); + free(inm, M_IPMADDR); + + if_delmulti_ifma(ifma); + } +} + +/* + * Block or unblock an ASM/SSM multicast source on an inpcb. + */ +static int +inp_change_source_filter(struct inpcb *inp, struct sockopt *sopt) +{ + struct group_source_req gsr; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in_mfilter *imf; + struct ip_moptions *imo; + struct in_msource *ims; + size_t idx; + int error; + int block; + + ifp = NULL; + error = 0; + block = 0; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + ssa = (sockunion_t *)&gsr.gsr_source; + + switch (sopt->sopt_name) { + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: { + struct ip_mreq_source mreqs; + + error = sooptcopyin(sopt, &mreqs, + sizeof(struct ip_mreq_source), + sizeof(struct ip_mreq_source)); + if (error) + return (error); + + gsa->sin.sin_family = AF_INET; + gsa->sin.sin_len = sizeof(struct sockaddr_in); + gsa->sin.sin_addr = mreqs.imr_multiaddr; + + ssa->sin.sin_family = AF_INET; + ssa->sin.sin_len = sizeof(struct sockaddr_in); + ssa->sin.sin_addr = mreqs.imr_sourceaddr; + + if (mreqs.imr_interface.s_addr != INADDR_ANY) + INADDR_TO_IFP(mreqs.imr_interface, ifp); + + if (sopt->sopt_name == IP_BLOCK_SOURCE) + block = 1; + +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: imr_interface = %s, ifp = %p\n", + __func__, inet_ntoa(mreqs.imr_interface), ifp); + } +#endif + break; + } + + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + if (error) + return (error); + + if (gsa->sin.sin_family != AF_INET || + gsa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + + if (ssa->sin.sin_family != AF_INET || + ssa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + + if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface) + return (EADDRNOTAVAIL); + + ifp = ifnet_byindex(gsr.gsr_interface); + + if (sopt->sopt_name == MCAST_BLOCK_SOURCE) + block = 1; + break; + + default: +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: unknown sopt_name %d\n", __func__, + sopt->sopt_name); + } +#endif + return (EOPNOTSUPP); + break; + } + + /* XXX INET6 */ + if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) + return (EINVAL); + + /* + * Check if we are actually a member of this group. + */ + imo = inp_findmoptions(inp); + idx = imo_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->imo_mfilters == NULL) { + error = EADDRNOTAVAIL; + goto out_locked; + } + + KASSERT(imo->imo_mfilters != NULL, + ("%s: imo_mfilters not allocated", __func__)); + imf = &imo->imo_mfilters[idx]; + + /* + * SSM multicast truth table for block/unblock operations. + * + * Operation Filter Mode Entry exists? Action + * + * block exclude no add source to filter + * unblock include no add source to filter + * block include no EINVAL + * unblock exclude no EINVAL + * block exclude yes EADDRNOTAVAIL + * unblock include yes EADDRNOTAVAIL + * block include yes remove source from filter + * unblock exclude yes remove source from filter + * + * FreeBSD does not explicitly distinguish between ASM and SSM + * mode sockets; all sockets are assumed to have a filter list. + */ +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: imf_fmode is %s\n", __func__, + imf->imf_fmode == MCAST_INCLUDE ? "include" : "exclude"); + } +#endif + ims = imo_match_source(imo, idx, &ssa->sa); + if (ims == NULL) { + if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) || + (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) { +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: adding %s to filter list\n", + __func__, inet_ntoa(ssa->sin.sin_addr)); + } +#endif + error = imo_join_source(imo, idx, ssa); + } + if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) || + (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) { + /* + * If the socket is in inclusive mode: + * the source is already blocked as it has no entry. + * If the socket is in exclusive mode: + * the source is already unblocked as it has no entry. + */ +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: ims %p; %s already [un]blocked\n", + __func__, ims, + inet_ntoa(ssa->sin.sin_addr)); + } +#endif + error = EINVAL; + } + } else { + if ((block == 1 && imf->imf_fmode == MCAST_EXCLUDE) || + (block == 0 && imf->imf_fmode == MCAST_INCLUDE)) { + /* + * If the socket is in exclusive mode: + * the source is already blocked as it has an entry. + * If the socket is in inclusive mode: + * the source is already unblocked as it has an entry. + */ +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: ims %p; %s already [un]blocked\n", + __func__, ims, + inet_ntoa(ssa->sin.sin_addr)); + } +#endif + error = EADDRNOTAVAIL; + } + if ((block == 1 && imf->imf_fmode == MCAST_INCLUDE) || + (block == 0 && imf->imf_fmode == MCAST_EXCLUDE)) { +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: removing %s from filter list\n", + __func__, inet_ntoa(ssa->sin.sin_addr)); + } +#endif + error = imo_leave_source(imo, idx, ssa); + } + } + +out_locked: + INP_UNLOCK(inp); + return (error); +} + +/* + * Given an inpcb, return its multicast options structure pointer. Accepts + * an unlocked inpcb pointer, but will return it locked. May sleep. + */ +static struct ip_moptions * +inp_findmoptions(struct inpcb *inp) +{ + struct ip_moptions *imo; + struct in_multi **immp; + struct in_mfilter *imfp; + size_t idx; + + INP_LOCK(inp); + if (inp->inp_moptions != NULL) + return (inp->inp_moptions); + + INP_UNLOCK(inp); + + imo = (struct ip_moptions *)malloc(sizeof(*imo), M_IPMOPTS, + M_WAITOK); + immp = (struct in_multi **)malloc(sizeof(*immp) * IP_MIN_MEMBERSHIPS, + M_IPMOPTS, M_WAITOK | M_ZERO); + imfp = (struct in_mfilter *)malloc( + sizeof(struct in_mfilter) * IP_MIN_MEMBERSHIPS, + M_IPMSOURCE, M_WAITOK); + + imo->imo_multicast_ifp = NULL; + imo->imo_multicast_addr.s_addr = INADDR_ANY; + imo->imo_multicast_vif = -1; + imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; + imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; + imo->imo_num_memberships = 0; + imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; + imo->imo_membership = immp; + + /* Initialize per-group source filters. */ + for (idx = 0; idx < IP_MIN_MEMBERSHIPS; idx++) { + imfp[idx].imf_fmode = MCAST_EXCLUDE; + imfp[idx].imf_nsources = 0; + TAILQ_INIT(&imfp[idx].imf_sources); + } + imo->imo_mfilters = imfp; + + INP_LOCK(inp); + if (inp->inp_moptions != NULL) { + free(imfp, M_IPMSOURCE); + free(immp, M_IPMOPTS); + free(imo, M_IPMOPTS); + return (inp->inp_moptions); + } + inp->inp_moptions = imo; + return (imo); +} + +/* + * Discard the IP multicast options (and source filters). + */ +void +inp_freemoptions(struct ip_moptions *imo) +{ + struct in_mfilter *imf; + struct in_msource *ims, *tims; + size_t idx, nmships; + + KASSERT(imo != NULL, ("%s: ip_moptions is NULL", __func__)); + + nmships = imo->imo_num_memberships; + for (idx = 0; idx < nmships; ++idx) { + in_delmulti(imo->imo_membership[idx]); + + if (imo->imo_mfilters != NULL) { + imf = &imo->imo_mfilters[idx]; + TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, + ims_next, tims) { + TAILQ_REMOVE(&imf->imf_sources, ims, ims_next); + FREE(ims, M_IPMSOURCE); + imf->imf_nsources--; + } + KASSERT(imf->imf_nsources == 0, + ("%s: did not free all imf_nsources", __func__)); + } + } + + if (imo->imo_mfilters != NULL) + free(imo->imo_mfilters, M_IPMSOURCE); + free(imo->imo_membership, M_IPMOPTS); + free(imo, M_IPMOPTS); +} + +/* + * Atomically get source filters on a socket for an IPv4 multicast group. + * Called with INP lock held; returns with lock released. + */ +static int +inp_get_source_filters(struct inpcb *inp, struct sockopt *sopt) +{ + struct __msfilterreq msfr; + sockunion_t *gsa; + struct ifnet *ifp; + struct ip_moptions *imo; + struct in_mfilter *imf; + struct in_msource *ims; + struct sockaddr_storage *ptss; + struct sockaddr_storage *tss; + int error; + size_t idx; + + INP_LOCK_ASSERT(inp); + + imo = inp->inp_moptions; + KASSERT(imo != NULL, ("%s: null ip_moptions", __func__)); + + INP_UNLOCK(inp); + + error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), + sizeof(struct __msfilterreq)); + if (error) + return (error); + + if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex) + return (EINVAL); + + ifp = ifnet_byindex(msfr.msfr_ifindex); + if (ifp == NULL) + return (EINVAL); + + INP_LOCK(inp); + + /* + * Lookup group on the socket. + */ + gsa = (sockunion_t *)&msfr.msfr_group; + idx = imo_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->imo_mfilters == NULL) { + INP_UNLOCK(inp); + return (EADDRNOTAVAIL); + } + + imf = &imo->imo_mfilters[idx]; + msfr.msfr_fmode = imf->imf_fmode; + msfr.msfr_nsrcs = imf->imf_nsources; + + /* + * If the user specified a buffer, copy out the source filter + * entries to userland gracefully. + * msfr.msfr_nsrcs is always set to the total number of filter + * entries which the kernel currently has for this group. + */ + tss = NULL; + if (msfr.msfr_srcs != NULL && msfr.msfr_nsrcs > 0) { + /* + * Make a copy of the source vector so that we do not + * thrash the inpcb lock whilst copying it out. + * We only copy out the number of entries which userland + * has asked for, but we always tell userland how big the + * buffer really needs to be. + */ + MALLOC(tss, struct sockaddr_storage *, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, + M_TEMP, M_NOWAIT); + if (tss == NULL) { + error = ENOBUFS; + } else { + ptss = tss; + TAILQ_FOREACH(ims, &imf->imf_sources, ims_next) { + memcpy(ptss++, &ims->ims_addr, + sizeof(struct sockaddr_storage)); + } + } + } + + INP_UNLOCK(inp); + + if (tss != NULL) { + error = copyout(tss, msfr.msfr_srcs, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); + FREE(tss, M_TEMP); + } + + if (error) + return (error); + + error = sooptcopyout(sopt, &msfr, sizeof(struct __msfilterreq)); + + return (error); +} + +/* + * Return the IP multicast options in response to user getsockopt(). + */ +int +inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) +{ + struct ip_mreqn mreqn; + struct ip_moptions *imo; + struct ifnet *ifp; + struct in_ifaddr *ia; + int error, optval; + u_char coptval; + + INP_LOCK(inp); + imo = inp->inp_moptions; + + error = 0; + switch (sopt->sopt_name) { + case IP_MULTICAST_VIF: + if (imo != NULL) + optval = imo->imo_multicast_vif; + else + optval = -1; + INP_UNLOCK(inp); + error = sooptcopyout(sopt, &optval, sizeof(int)); + break; + + case IP_MULTICAST_IF: + memset(&mreqn, 0, sizeof(struct ip_mreqn)); + if (imo != NULL) { + ifp = imo->imo_multicast_ifp; + if (imo->imo_multicast_addr.s_addr != INADDR_ANY) { + mreqn.imr_address = imo->imo_multicast_addr; + } else if (ifp != NULL) { + mreqn.imr_ifindex = ifp->if_index; + IFP_TO_IA(ifp, ia); + if (ia != NULL) { + mreqn.imr_address = + IA_SIN(ia)->sin_addr; + } + } + } + INP_UNLOCK(inp); + if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { + error = sooptcopyout(sopt, &mreqn, + sizeof(struct ip_mreqn)); + } else { + error = sooptcopyout(sopt, &mreqn.imr_address, + sizeof(struct in_addr)); + } + break; + + case IP_MULTICAST_TTL: + if (imo == 0) + optval = coptval = IP_DEFAULT_MULTICAST_TTL; + else + optval = coptval = imo->imo_multicast_ttl; + INP_UNLOCK(inp); + if (sopt->sopt_valsize == sizeof(u_char)) + error = sooptcopyout(sopt, &coptval, sizeof(u_char)); + else + error = sooptcopyout(sopt, &optval, sizeof(int)); + break; + + case IP_MULTICAST_LOOP: + if (imo == 0) + optval = coptval = IP_DEFAULT_MULTICAST_LOOP; + else + optval = coptval = imo->imo_multicast_loop; + INP_UNLOCK(inp); + if (sopt->sopt_valsize == sizeof(u_char)) + error = sooptcopyout(sopt, &coptval, sizeof(u_char)); + else + error = sooptcopyout(sopt, &optval, sizeof(int)); + break; + + case IP_MSFILTER: + if (imo == NULL) { + error = EADDRNOTAVAIL; + INP_UNLOCK(inp); + } else { + error = inp_get_source_filters(inp, sopt); + } + break; + + default: + INP_UNLOCK(inp); + error = ENOPROTOOPT; + break; + } + + INP_UNLOCK_ASSERT(inp); + + return (error); +} + +/* + * Join an IPv4 multicast group, possibly with a source. + */ +static int +inp_join_group(struct inpcb *inp, struct sockopt *sopt) +{ + struct group_source_req gsr; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in_mfilter *imf; + struct ip_moptions *imo; + struct in_multi *inm; + size_t idx; + int error; + + ifp = NULL; + error = 0; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + gsa->ss.ss_family = AF_UNSPEC; + ssa = (sockunion_t *)&gsr.gsr_source; + ssa->ss.ss_family = AF_UNSPEC; + + switch (sopt->sopt_name) { + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: { + struct ip_mreq_source mreqs; + + if (sopt->sopt_name == IP_ADD_MEMBERSHIP) { + error = sooptcopyin(sopt, &mreqs, + sizeof(struct ip_mreq), + sizeof(struct ip_mreq)); + /* + * Do argument switcharoo from ip_mreq into + * ip_mreq_source to avoid using two instances. + */ + mreqs.imr_interface = mreqs.imr_sourceaddr; + mreqs.imr_sourceaddr.s_addr = INADDR_ANY; + } else if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { + error = sooptcopyin(sopt, &mreqs, + sizeof(struct ip_mreq_source), + sizeof(struct ip_mreq_source)); + } + if (error) + return (error); + + gsa->sin.sin_family = AF_INET; + gsa->sin.sin_len = sizeof(struct sockaddr_in); + gsa->sin.sin_addr = mreqs.imr_multiaddr; + + if (sopt->sopt_name == IP_ADD_SOURCE_MEMBERSHIP) { + ssa->sin.sin_family = AF_INET; + ssa->sin.sin_len = sizeof(struct sockaddr_in); + ssa->sin.sin_addr = mreqs.imr_sourceaddr; + } + + /* + * Obtain ifp. If no interface address was provided, + * use the interface of the route to the given multicast + * address (usually this is the default route). + */ + if (mreqs.imr_interface.s_addr != INADDR_ANY) { + INADDR_TO_IFP(mreqs.imr_interface, ifp); + } else { + struct route ro; + + ro.ro_rt = NULL; + *(struct sockaddr_in *)&ro.ro_dst = gsa->sin; + rtalloc_ign(&ro, RTF_CLONING); + if (ro.ro_rt == NULL) { +#ifdef DIAGNOSTIC + printf("%s: no route to %s\n", __func__, + inet_ntoa(gsa->sin.sin_addr)); +#endif + return (EADDRNOTAVAIL); + } + ifp = ro.ro_rt->rt_ifp; + KASSERT(ifp != NULL, ("%s: null ifp", __func__)); + RTFREE(ro.ro_rt); + } +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: imr_interface = %s, ifp = %p\n", + __func__, inet_ntoa(mreqs.imr_interface), ifp); + } +#endif + break; + } + + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + if (sopt->sopt_name == MCAST_JOIN_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_req), + sizeof(struct group_req)); + } else if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + } + if (error) + return (error); + + if (gsa->sin.sin_family != AF_INET || + gsa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + + /* + * Overwrite the port field if present, as the sockaddr + * being copied in may be matched with a binary comparison. + * XXX INET6 + */ + gsa->sin.sin_port = 0; + if (sopt->sopt_name == MCAST_JOIN_SOURCE_GROUP) { + if (ssa->sin.sin_family != AF_INET || + ssa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + ssa->sin.sin_port = 0; + } + + /* + * Obtain the ifp. + */ + if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface) + return (EADDRNOTAVAIL); + ifp = ifnet_byindex(gsr.gsr_interface); + + break; + + default: +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: unknown sopt_name %d\n", __func__, + sopt->sopt_name); + } +#endif + return (EOPNOTSUPP); + break; + } + + if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) + return (EINVAL); + + if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) + return (EADDRNOTAVAIL); + + /* + * Check if we already hold membership of this group for this inpcb. + * If so, we do not need to perform the initial join. + */ + imo = inp_findmoptions(inp); + idx = imo_match_group(imo, ifp, &gsa->sa); + if (idx != -1) { + if (ssa->ss.ss_family != AF_UNSPEC) { + /* + * Attempting to join an ASM group (when already + * an ASM or SSM member) is an error. + */ + error = EADDRNOTAVAIL; + } else { + imf = &imo->imo_mfilters[idx]; + if (imf->imf_nsources == 0) { + /* + * Attempting to join an SSM group (when + * already an ASM member) is an error. + */ + error = EINVAL; + } else { + /* + * Attempting to join an SSM group (when + * already an SSM member) means "add this + * source to the inclusive filter list". + */ + error = imo_join_source(imo, idx, ssa); + } + } + goto out_locked; + } + + /* + * Call imo_grow() to reallocate the membership and source filter + * vectors if they are full. If the size would exceed the hard limit, + * then we know we've really run out of entries. We keep the INP + * lock held to avoid introducing a race condition. + */ + if (imo->imo_num_memberships == imo->imo_max_memberships) { + error = imo_grow(imo); + if (error) + goto out_locked; + } + + /* + * So far, so good: perform the layer 3 join, layer 2 join, + * and make an IGMP announcement if needed. + */ + inm = in_addmulti(&gsa->sin.sin_addr, ifp); + if (inm == NULL) { + error = ENOBUFS; + goto out_locked; + } + idx = imo->imo_num_memberships; + imo->imo_membership[idx] = inm; + imo->imo_num_memberships++; + + KASSERT(imo->imo_mfilters != NULL, + ("%s: imf_mfilters vector was not allocated", __func__)); + imf = &imo->imo_mfilters[idx]; + KASSERT(TAILQ_EMPTY(&imf->imf_sources), + ("%s: imf_sources not empty", __func__)); + + /* + * If this is a new SSM group join (i.e. a source was specified + * with this group), add this source to the filter list. + */ + if (ssa->ss.ss_family != AF_UNSPEC) { + /* + * An initial SSM join implies that this socket's membership + * of the multicast group is now in inclusive mode. + */ + imf->imf_fmode = MCAST_INCLUDE; + + error = imo_join_source(imo, idx, ssa); + if (error) { + /* + * Drop inp lock before calling in_delmulti(), + * to prevent a lock order reversal. + */ + --imo->imo_num_memberships; + INP_UNLOCK(inp); + in_delmulti(inm); + return (error); + } + } + +out_locked: + INP_UNLOCK(inp); + return (error); +} + +/* + * Leave an IPv4 multicast group on an inpcb, possibly with a source. + */ +static int +inp_leave_group(struct inpcb *inp, struct sockopt *sopt) +{ + struct group_source_req gsr; + struct ip_mreq_source mreqs; + sockunion_t *gsa, *ssa; + struct ifnet *ifp; + struct in_mfilter *imf; + struct ip_moptions *imo; + struct in_msource *ims, *tims; + struct in_multi *inm; + size_t idx; + int error; + + ifp = NULL; + error = 0; + + memset(&gsr, 0, sizeof(struct group_source_req)); + gsa = (sockunion_t *)&gsr.gsr_group; + gsa->ss.ss_family = AF_UNSPEC; + ssa = (sockunion_t *)&gsr.gsr_source; + ssa->ss.ss_family = AF_UNSPEC; + + switch (sopt->sopt_name) { + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + if (sopt->sopt_name == IP_DROP_MEMBERSHIP) { + error = sooptcopyin(sopt, &mreqs, + sizeof(struct ip_mreq), + sizeof(struct ip_mreq)); + /* + * Swap interface and sourceaddr arguments, + * as ip_mreq and ip_mreq_source are laid + * out differently. + */ + mreqs.imr_interface = mreqs.imr_sourceaddr; + mreqs.imr_sourceaddr.s_addr = INADDR_ANY; + } else if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { + error = sooptcopyin(sopt, &mreqs, + sizeof(struct ip_mreq_source), + sizeof(struct ip_mreq_source)); + } + if (error) + return (error); + + gsa->sin.sin_family = AF_INET; + gsa->sin.sin_len = sizeof(struct sockaddr_in); + gsa->sin.sin_addr = mreqs.imr_multiaddr; + + if (sopt->sopt_name == IP_DROP_SOURCE_MEMBERSHIP) { + ssa->sin.sin_family = AF_INET; + ssa->sin.sin_len = sizeof(struct sockaddr_in); + ssa->sin.sin_addr = mreqs.imr_sourceaddr; + } + + if (gsa->sin.sin_addr.s_addr != INADDR_ANY) + INADDR_TO_IFP(mreqs.imr_interface, ifp); + +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: imr_interface = %s, ifp = %p\n", + __func__, inet_ntoa(mreqs.imr_interface), ifp); + } +#endif + break; + + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + if (sopt->sopt_name == MCAST_LEAVE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_req), + sizeof(struct group_req)); + } else if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { + error = sooptcopyin(sopt, &gsr, + sizeof(struct group_source_req), + sizeof(struct group_source_req)); + } + if (error) + return (error); + + if (gsa->sin.sin_family != AF_INET || + gsa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + + if (sopt->sopt_name == MCAST_LEAVE_SOURCE_GROUP) { + if (ssa->sin.sin_family != AF_INET || + ssa->sin.sin_len != sizeof(struct sockaddr_in)) + return (EINVAL); + } + + if (gsr.gsr_interface == 0 || if_index < gsr.gsr_interface) + return (EADDRNOTAVAIL); + + ifp = ifnet_byindex(gsr.gsr_interface); + break; + + default: +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: unknown sopt_name %d\n", __func__, + sopt->sopt_name); + } +#endif + return (EOPNOTSUPP); + break; + } + + if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) + return (EINVAL); + + /* + * Find the membership in the membership array. + */ + imo = inp_findmoptions(inp); + idx = imo_match_group(imo, ifp, &gsa->sa); + if (idx == -1) { + error = EADDRNOTAVAIL; + goto out_locked; + } + imf = &imo->imo_mfilters[idx]; + + /* + * If we were instructed only to leave a given source, do so. + */ + if (ssa->ss.ss_family != AF_UNSPEC) { + if (imf->imf_nsources == 0 || + imf->imf_fmode == MCAST_EXCLUDE) { + /* + * Attempting to SSM leave an ASM group + * is an error; should use *_BLOCK_SOURCE instead. + * Attempting to SSM leave a source in a group when + * the socket is in 'exclude mode' is also an error. + */ + error = EINVAL; + } else { + error = imo_leave_source(imo, idx, ssa); + } + /* + * If an error occurred, or this source is not the last + * source in the group, do not leave the whole group. + */ + if (error || imf->imf_nsources > 0) + goto out_locked; + } + + /* + * Give up the multicast address record to which the membership points. + */ + inm = imo->imo_membership[idx]; + in_delmulti(inm); + + /* + * Free any source filters for this group if they exist. + * Revert inpcb to the default MCAST_EXCLUDE state. + */ + if (imo->imo_mfilters != NULL) { + TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) { + TAILQ_REMOVE(&imf->imf_sources, ims, ims_next); + FREE(ims, M_IPMSOURCE); + imf->imf_nsources--; + } + KASSERT(imf->imf_nsources == 0, + ("%s: imf_nsources not 0", __func__)); + KASSERT(TAILQ_EMPTY(&imf->imf_sources), + ("%s: imf_sources not empty", __func__)); + imf->imf_fmode = MCAST_EXCLUDE; + } + + /* + * Remove the gap in the membership array. + */ + for (++idx; idx < imo->imo_num_memberships; ++idx) + imo->imo_membership[idx-1] = imo->imo_membership[idx]; + imo->imo_num_memberships--; + +out_locked: + INP_UNLOCK(inp); + return (error); +} + +/* + * Select the interface for transmitting IPv4 multicast datagrams. + * + * Either an instance of struct in_addr or an instance of struct ip_mreqn + * may be passed to this socket option. An address of INADDR_ANY or an + * interface index of 0 is used to remove a previous selection. + * When no interface is selected, one is chosen for every send. + */ +static int +inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) +{ + struct in_addr addr; + struct ip_mreqn mreqn; + struct ifnet *ifp; + struct ip_moptions *imo; + int error; + + if (sopt->sopt_valsize == sizeof(struct ip_mreqn)) { + /* + * An interface index was specified using the + * Linux-derived ip_mreqn structure. + */ + error = sooptcopyin(sopt, &mreqn, sizeof(struct ip_mreqn), + sizeof(struct ip_mreqn)); + if (error) + return (error); + + if (mreqn.imr_ifindex < 0 || if_index < mreqn.imr_ifindex) + return (EINVAL); + + if (mreqn.imr_ifindex == 0) { + ifp = NULL; + } else { + ifp = ifnet_byindex(mreqn.imr_ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + } + } else { + /* + * An interface was specified by IPv4 address. + * This is the traditional BSD usage. + */ + error = sooptcopyin(sopt, &addr, sizeof(struct in_addr), + sizeof(struct in_addr)); + if (error) + return (error); + if (addr.s_addr == INADDR_ANY) { + ifp = NULL; + } else { + INADDR_TO_IFP(addr, ifp); + if (ifp == NULL) + return (EADDRNOTAVAIL); + } +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: ifp = %p, addr = %s\n", + __func__, ifp, inet_ntoa(addr)); /* XXX INET6 */ + } +#endif + } + + /* Reject interfaces which do not support multicast. */ + if (ifp != NULL && (ifp->if_flags & IFF_MULTICAST) == 0) + return (EOPNOTSUPP); + + imo = inp_findmoptions(inp); + imo->imo_multicast_ifp = ifp; + imo->imo_multicast_addr.s_addr = INADDR_ANY; + INP_UNLOCK(inp); + + return (0); +} + +/* + * Atomically set source filters on a socket for an IPv4 multicast group. + */ +static int +inp_set_source_filters(struct inpcb *inp, struct sockopt *sopt) +{ + struct __msfilterreq msfr; + sockunion_t *gsa; + struct ifnet *ifp; + struct in_mfilter *imf; + struct ip_moptions *imo; + struct in_msource *ims, *tims; + size_t idx; + int error; + + error = sooptcopyin(sopt, &msfr, sizeof(struct __msfilterreq), + sizeof(struct __msfilterreq)); + if (error) + return (error); + + if (msfr.msfr_nsrcs > IP_MAX_SOURCE_FILTER || + (msfr.msfr_fmode != MCAST_EXCLUDE && + msfr.msfr_fmode != MCAST_INCLUDE)) + return (EINVAL); + + if (msfr.msfr_group.ss_family != AF_INET || + msfr.msfr_group.ss_len != sizeof(struct sockaddr_in)) + return (EINVAL); + + gsa = (sockunion_t *)&msfr.msfr_group; + if (!IN_MULTICAST(ntohl(gsa->sin.sin_addr.s_addr))) + return (EINVAL); + + gsa->sin.sin_port = 0; /* ignore port */ + + if (msfr.msfr_ifindex == 0 || if_index < msfr.msfr_ifindex) + return (EADDRNOTAVAIL); + + ifp = ifnet_byindex(msfr.msfr_ifindex); + if (ifp == NULL) + return (EADDRNOTAVAIL); + + /* + * Take the INP lock. + * Check if this socket is a member of this group. + */ + imo = inp_findmoptions(inp); + idx = imo_match_group(imo, ifp, &gsa->sa); + if (idx == -1 || imo->imo_mfilters == NULL) { + error = EADDRNOTAVAIL; + goto out_locked; + } + imf = &imo->imo_mfilters[idx]; + +#ifdef DIAGNOSTIC + if (bootverbose) + printf("%s: clearing source list\n", __func__); +#endif + + /* + * Remove any existing source filters. + */ + TAILQ_FOREACH_SAFE(ims, &imf->imf_sources, ims_next, tims) { + TAILQ_REMOVE(&imf->imf_sources, ims, ims_next); + FREE(ims, M_IPMSOURCE); + imf->imf_nsources--; + } + KASSERT(imf->imf_nsources == 0, + ("%s: source list not cleared", __func__)); + + /* + * Apply any new source filters, if present. + */ + if (msfr.msfr_nsrcs > 0) { + struct in_msource **pnims; + struct in_msource *nims; + struct sockaddr_storage *kss; + struct sockaddr_storage *pkss; + sockunion_t *psu; + int i, j; + + /* + * Drop the inp lock so we may sleep if we need to + * in order to satisfy a malloc request. + * We will re-take it before changing socket state. + */ + INP_UNLOCK(inp); +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: loading %lu source list entries\n", + __func__, (unsigned long)msfr.msfr_nsrcs); + } +#endif + /* + * Make a copy of the user-space source vector so + * that we may copy them with a single copyin. This + * allows us to deal with page faults up-front. + */ + MALLOC(kss, struct sockaddr_storage *, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs, + M_TEMP, M_WAITOK); + error = copyin(msfr.msfr_srcs, kss, + sizeof(struct sockaddr_storage) * msfr.msfr_nsrcs); + if (error) { + FREE(kss, M_TEMP); + return (error); + } + + /* + * Perform argument checking on every sockaddr_storage + * structure in the vector provided to us. Overwrite + * fields which should not apply to source entries. + * TODO: Check for duplicate sources on this pass. + */ + psu = (sockunion_t *)kss; + for (i = 0; i < msfr.msfr_nsrcs; i++, psu++) { + switch (psu->ss.ss_family) { + case AF_INET: + if (psu->sin.sin_len != + sizeof(struct sockaddr_in)) { + error = EINVAL; + } else { + psu->sin.sin_port = 0; + } + break; +#ifdef notyet + case AF_INET6; + if (psu->sin6.sin6_len != + sizeof(struct sockaddr_in6)) { + error = EINVAL; + } else { + psu->sin6.sin6_port = 0; + psu->sin6.sin6_flowinfo = 0; + } + break; +#endif + default: + error = EAFNOSUPPORT; + break; + } + if (error) + break; + } + if (error) { + FREE(kss, M_TEMP); + return (error); + } + + /* + * Allocate a block to track all the in_msource + * entries we are about to allocate, in case we + * abruptly need to free them. + */ + MALLOC(pnims, struct in_msource **, + sizeof(struct in_msource *) * msfr.msfr_nsrcs, + M_TEMP, M_WAITOK | M_ZERO); + + /* + * Allocate up to nsrcs individual chunks. + * If we encounter an error, backtrack out of + * all allocations cleanly; updates must be atomic. + */ + pkss = kss; + nims = NULL; + for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) { + MALLOC(nims, struct in_msource *, + sizeof(struct in_msource) * msfr.msfr_nsrcs, + M_IPMSOURCE, M_WAITOK | M_ZERO); + pnims[i] = nims; + } + if (i < msfr.msfr_nsrcs) { + for (j = 0; j < i; j++) { + if (pnims[j] != NULL) + FREE(pnims[j], M_IPMSOURCE); + } + FREE(pnims, M_TEMP); + FREE(kss, M_TEMP); + return (ENOBUFS); + } + + INP_UNLOCK_ASSERT(inp); + + /* + * Finally, apply the filters to the socket. + * Re-take the inp lock; we are changing socket state. + */ + pkss = kss; + INP_LOCK(inp); + for (i = 0; i < msfr.msfr_nsrcs; i++, pkss++) { + memcpy(&(pnims[i]->ims_addr), pkss, + sizeof(struct sockaddr_storage)); + TAILQ_INSERT_TAIL(&imf->imf_sources, pnims[i], + ims_next); + imf->imf_nsources++; + } + FREE(pnims, M_TEMP); + FREE(kss, M_TEMP); + } + + /* + * Update the filter mode on the socket before releasing the inpcb. + */ + INP_LOCK_ASSERT(inp); + imf->imf_fmode = msfr.msfr_fmode; + +out_locked: + INP_UNLOCK(inp); + return (error); +} + +/* + * Set the IP multicast options in response to user setsockopt(). + * + * Many of the socket options handled in this function duplicate the + * functionality of socket options in the regular unicast API. However, + * it is not possible to merge the duplicate code, because the idempotence + * of the IPv4 multicast part of the BSD Sockets API must be preserved; + * the effects of these options must be treated as separate and distinct. + */ +int +inp_setmoptions(struct inpcb *inp, struct sockopt *sopt) +{ + struct ip_moptions *imo; + int error; + + error = 0; + + switch (sopt->sopt_name) { + case IP_MULTICAST_VIF: { + int vifi; + /* + * Select a multicast VIF for transmission. + * Only useful if multicast forwarding is active. + */ + if (legal_vif_num == NULL) { + error = EOPNOTSUPP; + break; + } + error = sooptcopyin(sopt, &vifi, sizeof(int), sizeof(int)); + if (error) + break; + if (!legal_vif_num(vifi) && (vifi != -1)) { + error = EINVAL; + break; + } + imo = inp_findmoptions(inp); + imo->imo_multicast_vif = vifi; + INP_UNLOCK(inp); + break; + } + + case IP_MULTICAST_IF: + error = inp_set_multicast_if(inp, sopt); + break; + + case IP_MULTICAST_TTL: { + u_char ttl; + + /* + * Set the IP time-to-live for outgoing multicast packets. + * The original multicast API required a char argument, + * which is inconsistent with the rest of the socket API. + * We allow either a char or an int. + */ + if (sopt->sopt_valsize == sizeof(u_char)) { + error = sooptcopyin(sopt, &ttl, sizeof(u_char), + sizeof(u_char)); + if (error) + break; + } else { + u_int ittl; + + error = sooptcopyin(sopt, &ittl, sizeof(u_int), + sizeof(u_int)); + if (error) + break; + if (ittl > 255) { + error = EINVAL; + break; + } + ttl = (u_char)ittl; + } + imo = inp_findmoptions(inp); + imo->imo_multicast_ttl = ttl; + INP_UNLOCK(inp); + break; + } + + case IP_MULTICAST_LOOP: { + u_char loop; + + /* + * Set the loopback flag for outgoing multicast packets. + * Must be zero or one. The original multicast API required a + * char argument, which is inconsistent with the rest + * of the socket API. We allow either a char or an int. + */ + if (sopt->sopt_valsize == sizeof(u_char)) { + error = sooptcopyin(sopt, &loop, sizeof(u_char), + sizeof(u_char)); + if (error) + break; + } else { + u_int iloop; + + error = sooptcopyin(sopt, &iloop, sizeof(u_int), + sizeof(u_int)); + if (error) + break; + loop = (u_char)iloop; + } + imo = inp_findmoptions(inp); + imo->imo_multicast_loop = !!loop; + INP_UNLOCK(inp); + break; + } + + case IP_ADD_MEMBERSHIP: + case IP_ADD_SOURCE_MEMBERSHIP: + case MCAST_JOIN_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + error = inp_join_group(inp, sopt); + break; + + case IP_DROP_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case MCAST_LEAVE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + error = inp_leave_group(inp, sopt); + break; + + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + error = inp_change_source_filter(inp, sopt); + break; + + case IP_MSFILTER: + error = inp_set_source_filters(inp, sopt); + break; + + default: + error = EOPNOTSUPP; + break; + } + + INP_UNLOCK_ASSERT(inp); + + return (error); +} diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 61f2894706c4..e91ac55a96ab 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -735,7 +735,8 @@ in_pcbfree(struct inpcb *inp) in_pcbremlists(inp); if (inp->inp_options) (void)m_free(inp->inp_options); - ip_freemoptions(inp->inp_moptions); + if (inp->inp_moptions != NULL) + inp_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; #ifdef MAC diff --git a/sys/netinet/in_var.h b/sys/netinet/in_var.h index 7605199c8d88..47a160a8b7c0 100644 --- a/sys/netinet/in_var.h +++ b/sys/netinet/in_var.h @@ -147,6 +147,12 @@ struct router_info { int rti_type; /* type of router which is querier on this interface */ int rti_time; /* # of slow timeouts since last old query */ SLIST_ENTRY(router_info) rti_list; +#ifdef notyet + int rti_timev1; /* IGMPv1 querier present */ + int rti_timev2; /* IGMPv2 querier present */ + int rti_timer; /* report to general query */ + int rti_qrv; /* querier robustness */ +#endif }; /* @@ -166,8 +172,44 @@ struct in_multi { u_int inm_state; /* state of the membership */ struct router_info *inm_rti; /* router info*/ u_int inm_refcount; /* reference count */ +#ifdef notyet /* IGMPv3 source-specific multicast fields */ + TAILQ_HEAD(, in_msfentry) inm_msf; /* all active source filters */ + TAILQ_HEAD(, in_msfentry) inm_msf_record; /* recorded sources */ + TAILQ_HEAD(, in_msfentry) inm_msf_exclude; /* exclude sources */ + TAILQ_HEAD(, in_msfentry) inm_msf_include; /* include sources */ + /* XXX: should this lot go to the router_info structure? */ + /* XXX: can/should these be callouts? */ + /* IGMP protocol timers */ + int32_t inm_ti_curstate; /* current state timer */ + int32_t inm_ti_statechg; /* state change timer */ + /* IGMP report timers */ + uint16_t inm_rpt_statechg; /* state change report timer */ + uint16_t inm_rpt_toxx; /* fmode change report timer */ + /* IGMP protocol state */ + uint16_t inm_fmode; /* filter mode */ + uint32_t inm_recsrc_count; /* # of recorded sources */ + uint16_t inm_exclude_sock_count; /* # of exclude-mode sockets */ + uint16_t inm_gass_count; /* # of g-a-s queries */ +#endif }; +#ifdef notyet +/* + * Internet multicast source filter list. This list is used to store + * IP multicast source addresses for each membership on an interface. + * TODO: Allocate these structures using UMA. + * TODO: Find an easier way of linking the struct into two lists at once. + */ +struct in_msfentry { + TAILQ_ENTRY(in_msfentry) isf_link; /* next filter in all-list */ + TAILQ_ENTRY(in_msfentry) isf_next; /* next filter in queue */ + struct in_addr isf_addr; /* the address of this source */ + uint16_t isf_refcount; /* reference count */ + uint16_t isf_reporttag; /* what to report to the IGMP router */ + uint16_t isf_rexmit; /* retransmission state/count */ +}; +#endif + #ifdef _KERNEL #ifdef SYSCTL_DECL @@ -246,6 +288,12 @@ do { \ } while(0) struct route; +struct ip_moptions; + +size_t imo_match_group(struct ip_moptions *, struct ifnet *, + struct sockaddr *); +struct in_msource *imo_match_source(struct ip_moptions *, size_t, + struct sockaddr *); struct in_multi *in_addmulti(struct in_addr *, struct ifnet *); void in_delmulti(struct in_multi *); void in_delmulti_locked(struct in_multi *); diff --git a/sys/netinet/ip_carp.c b/sys/netinet/ip_carp.c index 32b868084a53..4cabe30bfac4 100644 --- a/sys/netinet/ip_carp.c +++ b/sys/netinet/ip_carp.c @@ -380,6 +380,7 @@ carp_clone_create(struct if_clone *ifc, int unit, caddr_t params) sc->sc_imo.imo_membership = (struct in_multi **)malloc( (sizeof(struct in_multi *) * IP_MIN_MEMBERSHIPS), M_CARP, M_WAITOK); + sc->sc_imo.imo_mfilters = NULL; sc->sc_imo.imo_max_memberships = IP_MIN_MEMBERSHIPS; sc->sc_imo.imo_multicast_vif = -1; @@ -1397,6 +1398,8 @@ carp_multicast_cleanup(struct carp_softc *sc) imo->imo_membership[n] = NULL; } } + KASSERT(imo->imo_mfilters == NULL, + ("%s: imo_mfilters != NULL", __func__)); imo->imo_num_memberships = 0; imo->imo_multicast_ifp = NULL; } diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index 4c4a51f5442e..2b800dc35e56 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -73,8 +73,6 @@ #include -static MALLOC_DEFINE(M_IPMOPTS, "ip_moptions", "internet multicast options"); - #define print_ip(x, a, y) printf("%s %d.%d.%d.%d%s",\ x, (ntohl(a.s_addr)>>24)&0xFF,\ (ntohl(a.s_addr)>>16)&0xFF,\ @@ -89,11 +87,8 @@ SYSCTL_INT(_net_inet_ip, OID_AUTO, mbuf_frag_size, CTLFLAG_RW, &mbuf_frag_size, 0, "Fragment outgoing mbufs to this size"); #endif -static struct ifnet *ip_multicast_if(struct in_addr *, int *); static void ip_mloopback (struct ifnet *, struct mbuf *, struct sockaddr_in *, int); -static int ip_getmoptions(struct inpcb *, struct sockopt *); -static int ip_setmoptions(struct inpcb *, struct sockopt *); extern struct protosw inetsw[]; @@ -930,13 +925,28 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) break; #undef OPTSET + /* + * Multicast socket options are processed by the in_mcast + * module. + */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: case IP_ADD_MEMBERSHIP: case IP_DROP_MEMBERSHIP: - error = ip_setmoptions(inp, sopt); + case IP_ADD_SOURCE_MEMBERSHIP: + case IP_DROP_SOURCE_MEMBERSHIP: + case IP_BLOCK_SOURCE: + case IP_UNBLOCK_SOURCE: + case IP_MSFILTER: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + error = inp_setmoptions(inp, sopt); break; case IP_PORTRANGE: @@ -1095,11 +1105,16 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) error = sooptcopyout(sopt, &optval, sizeof optval); break; + /* + * Multicast socket options are processed by the in_mcast + * module. + */ case IP_MULTICAST_IF: case IP_MULTICAST_VIF: case IP_MULTICAST_TTL: case IP_MULTICAST_LOOP: - error = ip_getmoptions(inp, sopt); + case IP_MSFILTER: + error = inp_getmoptions(inp, sopt); break; #if defined(IPSEC) || defined(FAST_IPSEC) @@ -1131,477 +1146,6 @@ ip_ctloutput(struct socket *so, struct sockopt *sopt) return (error); } -/* - * XXX - * The whole multicast option thing needs to be re-thought. - * Several of these options are equally applicable to non-multicast - * transmission, and one (IP_MULTICAST_TTL) totally duplicates a - * standard option (IP_TTL). - */ - -/* - * following RFC1724 section 3.3, 0.0.0.0/8 is interpreted as interface index. - */ -static struct ifnet * -ip_multicast_if(struct in_addr *a, int *ifindexp) -{ - int ifindex; - struct ifnet *ifp; - - if (ifindexp) - *ifindexp = 0; - if (ntohl(a->s_addr) >> 24 == 0) { - ifindex = ntohl(a->s_addr) & 0xffffff; - if (ifindex < 0 || if_index < ifindex) - return NULL; - ifp = ifnet_byindex(ifindex); - if (ifindexp) - *ifindexp = ifindex; - } else { - INADDR_TO_IFP(*a, ifp); - } - return ifp; -} - -/* - * Given an inpcb, return its multicast options structure pointer. Accepts - * an unlocked inpcb pointer, but will return it locked. May sleep. - */ -static struct ip_moptions * -ip_findmoptions(struct inpcb *inp) -{ - struct ip_moptions *imo; - struct in_multi **immp; - - INP_LOCK(inp); - if (inp->inp_moptions != NULL) - return (inp->inp_moptions); - - INP_UNLOCK(inp); - - imo = (struct ip_moptions*)malloc(sizeof(*imo), M_IPMOPTS, M_WAITOK); - immp = (struct in_multi **)malloc((sizeof(*immp) * IP_MIN_MEMBERSHIPS), - M_IPMOPTS, M_WAITOK); - - imo->imo_multicast_ifp = NULL; - imo->imo_multicast_addr.s_addr = INADDR_ANY; - imo->imo_multicast_vif = -1; - imo->imo_multicast_ttl = IP_DEFAULT_MULTICAST_TTL; - imo->imo_multicast_loop = IP_DEFAULT_MULTICAST_LOOP; - imo->imo_num_memberships = 0; - imo->imo_max_memberships = IP_MIN_MEMBERSHIPS; - imo->imo_membership = immp; - - INP_LOCK(inp); - if (inp->inp_moptions != NULL) { - free(immp, M_IPMOPTS); - free(imo, M_IPMOPTS); - return (inp->inp_moptions); - } - inp->inp_moptions = imo; - return (imo); -} - -/* - * Set the IP multicast options in response to user setsockopt(). - */ -static int -ip_setmoptions(struct inpcb *inp, struct sockopt *sopt) -{ - int error = 0; - int i; - struct in_addr addr; - struct ip_mreq mreq; - struct ifnet *ifp; - struct ip_moptions *imo; - struct route ro; - struct sockaddr_in *dst; - int ifindex; - int s; - - switch (sopt->sopt_name) { - /* store an index number for the vif you wanna use in the send */ - case IP_MULTICAST_VIF: - if (legal_vif_num == 0) { - error = EOPNOTSUPP; - break; - } - error = sooptcopyin(sopt, &i, sizeof i, sizeof i); - if (error) - break; - if (!legal_vif_num(i) && (i != -1)) { - error = EINVAL; - break; - } - imo = ip_findmoptions(inp); - imo->imo_multicast_vif = i; - INP_UNLOCK(inp); - break; - - case IP_MULTICAST_IF: - /* - * Select the interface for outgoing multicast packets. - */ - error = sooptcopyin(sopt, &addr, sizeof addr, sizeof addr); - if (error) - break; - /* - * INADDR_ANY is used to remove a previous selection. - * When no interface is selected, a default one is - * chosen every time a multicast packet is sent. - */ - imo = ip_findmoptions(inp); - if (addr.s_addr == INADDR_ANY) { - imo->imo_multicast_ifp = NULL; - INP_UNLOCK(inp); - break; - } - /* - * The selected interface is identified by its local - * IP address. Find the interface and confirm that - * it supports multicasting. - */ - s = splimp(); - ifp = ip_multicast_if(&addr, &ifindex); - if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { - INP_UNLOCK(inp); - splx(s); - error = EADDRNOTAVAIL; - break; - } - imo->imo_multicast_ifp = ifp; - if (ifindex) - imo->imo_multicast_addr = addr; - else - imo->imo_multicast_addr.s_addr = INADDR_ANY; - INP_UNLOCK(inp); - splx(s); - break; - - case IP_MULTICAST_TTL: - /* - * Set the IP time-to-live for outgoing multicast packets. - * The original multicast API required a char argument, - * which is inconsistent with the rest of the socket API. - * We allow either a char or an int. - */ - if (sopt->sopt_valsize == 1) { - u_char ttl; - error = sooptcopyin(sopt, &ttl, 1, 1); - if (error) - break; - imo = ip_findmoptions(inp); - imo->imo_multicast_ttl = ttl; - INP_UNLOCK(inp); - } else { - u_int ttl; - error = sooptcopyin(sopt, &ttl, sizeof ttl, - sizeof ttl); - if (error) - break; - if (ttl > 255) - error = EINVAL; - else { - imo = ip_findmoptions(inp); - imo->imo_multicast_ttl = ttl; - INP_UNLOCK(inp); - } - } - break; - - case IP_MULTICAST_LOOP: - /* - * Set the loopback flag for outgoing multicast packets. - * Must be zero or one. The original multicast API required a - * char argument, which is inconsistent with the rest - * of the socket API. We allow either a char or an int. - */ - if (sopt->sopt_valsize == 1) { - u_char loop; - error = sooptcopyin(sopt, &loop, 1, 1); - if (error) - break; - imo = ip_findmoptions(inp); - imo->imo_multicast_loop = !!loop; - INP_UNLOCK(inp); - } else { - u_int loop; - error = sooptcopyin(sopt, &loop, sizeof loop, - sizeof loop); - if (error) - break; - imo = ip_findmoptions(inp); - imo->imo_multicast_loop = !!loop; - INP_UNLOCK(inp); - } - break; - - case IP_ADD_MEMBERSHIP: - /* - * Add a multicast group membership. - * Group must be a valid IP multicast address. - */ - error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); - if (error) - break; - - if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { - error = EINVAL; - break; - } - s = splimp(); - /* - * If no interface address was provided, use the interface of - * the route to the given multicast address. - */ - if (mreq.imr_interface.s_addr == INADDR_ANY) { - bzero((caddr_t)&ro, sizeof(ro)); - dst = (struct sockaddr_in *)&ro.ro_dst; - dst->sin_len = sizeof(*dst); - dst->sin_family = AF_INET; - dst->sin_addr = mreq.imr_multiaddr; - rtalloc_ign(&ro, RTF_CLONING); - if (ro.ro_rt == NULL) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - ifp = ro.ro_rt->rt_ifp; - RTFREE(ro.ro_rt); - } - else { - ifp = ip_multicast_if(&mreq.imr_interface, NULL); - } - - /* - * See if we found an interface, and confirm that it - * supports multicast. - */ - if (ifp == NULL || (ifp->if_flags & IFF_MULTICAST) == 0) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - /* - * See if the membership already exists or if all the - * membership slots are full. - */ - imo = ip_findmoptions(inp); - for (i = 0; i < imo->imo_num_memberships; ++i) { - if (imo->imo_membership[i]->inm_ifp == ifp && - imo->imo_membership[i]->inm_addr.s_addr - == mreq.imr_multiaddr.s_addr) - break; - } - if (i < imo->imo_num_memberships) { - INP_UNLOCK(inp); - error = EADDRINUSE; - splx(s); - break; - } - if (imo->imo_num_memberships == imo->imo_max_memberships) { - struct in_multi **nmships, **omships; - size_t newmax; - /* - * Resize the vector to next power-of-two minus 1. If the - * size would exceed the maximum then we know we've really - * run out of entries. Otherwise, we realloc() the vector - * with the INP lock held to avoid introducing a race. - */ - nmships = NULL; - omships = imo->imo_membership; - newmax = ((imo->imo_max_memberships + 1) * 2) - 1; - if (newmax <= IP_MAX_MEMBERSHIPS) { - nmships = (struct in_multi **)realloc(omships, -sizeof(*nmships) * newmax, M_IPMOPTS, M_NOWAIT); - if (nmships != NULL) { - imo->imo_membership = nmships; - imo->imo_max_memberships = newmax; - } - } - if (nmships == NULL) { - INP_UNLOCK(inp); - error = ETOOMANYREFS; - splx(s); - break; - } - } - /* - * Everything looks good; add a new record to the multicast - * address list for the given interface. - */ - if ((imo->imo_membership[i] = - in_addmulti(&mreq.imr_multiaddr, ifp)) == NULL) { - INP_UNLOCK(inp); - error = ENOBUFS; - splx(s); - break; - } - ++imo->imo_num_memberships; - INP_UNLOCK(inp); - splx(s); - break; - - case IP_DROP_MEMBERSHIP: - /* - * Drop a multicast group membership. - * Group must be a valid IP multicast address. - */ - error = sooptcopyin(sopt, &mreq, sizeof mreq, sizeof mreq); - if (error) - break; - - if (!IN_MULTICAST(ntohl(mreq.imr_multiaddr.s_addr))) { - error = EINVAL; - break; - } - - s = splimp(); - /* - * If an interface address was specified, get a pointer - * to its ifnet structure. - */ - if (mreq.imr_interface.s_addr == INADDR_ANY) - ifp = NULL; - else { - ifp = ip_multicast_if(&mreq.imr_interface, NULL); - if (ifp == NULL) { - error = EADDRNOTAVAIL; - splx(s); - break; - } - } - /* - * Find the membership in the membership array. - */ - imo = ip_findmoptions(inp); - for (i = 0; i < imo->imo_num_memberships; ++i) { - if ((ifp == NULL || - imo->imo_membership[i]->inm_ifp == ifp) && - imo->imo_membership[i]->inm_addr.s_addr == - mreq.imr_multiaddr.s_addr) - break; - } - if (i == imo->imo_num_memberships) { - INP_UNLOCK(inp); - error = EADDRNOTAVAIL; - splx(s); - break; - } - /* - * Give up the multicast address record to which the - * membership points. - */ - in_delmulti(imo->imo_membership[i]); - /* - * Remove the gap in the membership array. - */ - for (++i; i < imo->imo_num_memberships; ++i) - imo->imo_membership[i-1] = imo->imo_membership[i]; - --imo->imo_num_memberships; - INP_UNLOCK(inp); - splx(s); - break; - - default: - error = EOPNOTSUPP; - break; - } - - return (error); -} - -/* - * Return the IP multicast options in response to user getsockopt(). - */ -static int -ip_getmoptions(struct inpcb *inp, struct sockopt *sopt) -{ - struct ip_moptions *imo; - struct in_addr addr; - struct in_ifaddr *ia; - int error, optval; - u_char coptval; - - INP_LOCK(inp); - imo = inp->inp_moptions; - - error = 0; - switch (sopt->sopt_name) { - case IP_MULTICAST_VIF: - if (imo != NULL) - optval = imo->imo_multicast_vif; - else - optval = -1; - INP_UNLOCK(inp); - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - - case IP_MULTICAST_IF: - if (imo == NULL || imo->imo_multicast_ifp == NULL) - addr.s_addr = INADDR_ANY; - else if (imo->imo_multicast_addr.s_addr) { - /* return the value user has set */ - addr = imo->imo_multicast_addr; - } else { - IFP_TO_IA(imo->imo_multicast_ifp, ia); - addr.s_addr = (ia == NULL) ? INADDR_ANY - : IA_SIN(ia)->sin_addr.s_addr; - } - INP_UNLOCK(inp); - error = sooptcopyout(sopt, &addr, sizeof addr); - break; - - case IP_MULTICAST_TTL: - if (imo == 0) - optval = coptval = IP_DEFAULT_MULTICAST_TTL; - else - optval = coptval = imo->imo_multicast_ttl; - INP_UNLOCK(inp); - if (sopt->sopt_valsize == 1) - error = sooptcopyout(sopt, &coptval, 1); - else - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - - case IP_MULTICAST_LOOP: - if (imo == 0) - optval = coptval = IP_DEFAULT_MULTICAST_LOOP; - else - optval = coptval = imo->imo_multicast_loop; - INP_UNLOCK(inp); - if (sopt->sopt_valsize == 1) - error = sooptcopyout(sopt, &coptval, 1); - else - error = sooptcopyout(sopt, &optval, sizeof optval); - break; - - default: - INP_UNLOCK(inp); - error = ENOPROTOOPT; - break; - } - INP_UNLOCK_ASSERT(inp); - - return (error); -} - -/* - * Discard the IP multicast options. - */ -void -ip_freemoptions(struct ip_moptions *imo) -{ - register int i; - - if (imo != NULL) { - for (i = 0; i < imo->imo_num_memberships; ++i) - in_delmulti(imo->imo_membership[i]); - free(imo->imo_membership, M_IPMOPTS); - free(imo, M_IPMOPTS); - } -} - /* * Routine called from ip_output() to loop back a copy of an IP multicast * packet to the input queue of a specified interface. Note that this diff --git a/sys/netinet/ip_var.h b/sys/netinet/ip_var.h index 1ad36bb94110..eef4e1f736a1 100644 --- a/sys/netinet/ip_var.h +++ b/sys/netinet/ip_var.h @@ -78,9 +78,29 @@ struct ipoption { char ipopt_list[MAX_IPOPTLEN]; /* options proper */ }; +/* + * Multicast source list entry. + */ +struct in_msource { + TAILQ_ENTRY(in_msource) ims_next; /* next source */ + struct sockaddr_storage ims_addr; /* address of this source */ +}; + +/* + * Multicast filter descriptor; there is one instance per group membership + * on a socket, allocated as an expandable vector hung off ip_moptions. + * struct in_multi contains separate IPv4-stack-wide state for IGMPv3. + */ +struct in_mfilter { + uint16_t imf_fmode; /* filter mode for this socket/group */ + uint16_t imf_nsources; /* # of sources for this socket/group */ + TAILQ_HEAD(, in_msource) imf_sources; /* source list */ +}; + /* * Structure attached to inpcb.ip_moptions and * passed to ip_output when IP multicast options are in use. + * This structure is lazy-allocated. */ struct ip_moptions { struct ifnet *imo_multicast_ifp; /* ifp for outgoing multicasts */ @@ -91,6 +111,7 @@ struct ip_moptions { u_short imo_num_memberships; /* no. memberships this socket */ u_short imo_max_memberships; /* max memberships this socket */ struct in_multi **imo_membership; /* group memberships */ + struct in_mfilter *imo_mfilters; /* source filters */ }; struct ipstat { @@ -127,12 +148,11 @@ struct ipstat { #ifdef _KERNEL -/* - * Flags passed to ip_output as last parameter. - */ -#define IP_FORWARDING 0x01 /* most of ip header exists */ -#define IP_RAWOUTPUT 0x02 /* raw ip header exists */ -#define IP_SENDONES 0x04 /* send all-ones broadcast */ +/* flags passed to ip_output as last parameter */ +#define IP_FORWARDING 0x1 /* most of ip header exists */ +#define IP_RAWOUTPUT 0x2 /* raw ip header exists */ +#define IP_SENDONES 0x4 /* send all-ones broadcast */ +#define IP_SENDTOIF 0x8 /* send on specific ifnet */ #define IP_ROUTETOIF SO_DONTROUTE /* 0x10 bypass routing tables */ #define IP_ALLOWBROADCAST SO_BROADCAST /* 0x20 can send broadcast packets */ @@ -167,12 +187,15 @@ extern u_long (*ip_mcast_src)(int); extern int rsvp_on; extern struct pr_usrreqs rip_usrreqs; +void inp_freemoptions(struct ip_moptions *); +int inp_getmoptions(struct inpcb *, struct sockopt *); +int inp_setmoptions(struct inpcb *, struct sockopt *); + int ip_ctloutput(struct socket *, struct sockopt *sopt); void ip_drain(void); void ip_fini(void *xtp); int ip_fragment(struct ip *ip, struct mbuf **m_frag, int mtu, u_long if_hwassist_flags, int sw_csum); -void ip_freemoptions(struct ip_moptions *); void ip_forward(struct mbuf *m, int srcrt); void ip_init(void); extern int diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 5423208e046f..1db8d333ed4b 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -2791,7 +2791,7 @@ sctp_inpcb_free(struct sctp_inpcb *inp, int immediate, int from) ip_pcb->inp_options = 0; } if (ip_pcb->inp_moptions) { - ip_freemoptions(ip_pcb->inp_moptions); + inp_freemoptions(ip_pcb->inp_moptions); ip_pcb->inp_moptions = 0; } #ifdef INET6 diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index f6031d6d524d..16796996eb15 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -113,10 +113,6 @@ static int blackhole = 0; SYSCTL_INT(_net_inet_udp, OID_AUTO, blackhole, CTLFLAG_RW, &blackhole, 0, "Do not send port unreachables for refused connects"); -static int strict_mcast_mship = 0; -SYSCTL_INT(_net_inet_udp, OID_AUTO, strict_mcast_mship, CTLFLAG_RW, - &strict_mcast_mship, 0, "Only send multicast to member sockets"); - struct inpcbhead udb; /* from udp_var.h */ struct inpcbinfo udbinfo; @@ -176,6 +172,7 @@ udp_input(struct mbuf *m, int off) int iphlen = off; struct ip *ip; struct udphdr *uh; + struct ifnet *ifp; struct inpcb *inp; int len; struct ip save_ip; @@ -184,6 +181,7 @@ udp_input(struct mbuf *m, int off) struct m_tag *fwd_tag; #endif + ifp = m->m_pkthdr.rcvif; udpstat.udps_ipackets++; /* @@ -301,25 +299,10 @@ udp_input(struct mbuf *m, int off) INP_INFO_RLOCK(&udbinfo); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) || - in_broadcast(ip->ip_dst, m->m_pkthdr.rcvif)) { + in_broadcast(ip->ip_dst, ifp)) { struct inpcb *last; + struct ip_moptions *imo; - /* - * Deliver a multicast or broadcast datagram to *all* sockets - * for which the local and remote addresses and ports match - * those of the incoming datagram. This allows more than one - * process to receive multi/broadcasts on the same port. - * (This really ought to be done for unicast datagrams as - * well, but that would cause problems with existing - * applications that open both address-specific sockets and a - * wildcard socket listening to the same port -- they would - * end up receiving duplicates of every unicast datagram. - * Those applications open the multiple sockets to overcome - * an inadequacy of the UDP socket interface, but for - * backwards compatibility we avoid the problem here rather - * than fixing the interface. Maybe 4.5BSD will remedy - * this?) - */ last = NULL; LIST_FOREACH(inp, &udb, inp_list) { if (inp->inp_lport != uh->uh_dport) @@ -328,45 +311,83 @@ udp_input(struct mbuf *m, int off) if ((inp->inp_vflag & INP_IPV4) == 0) continue; #endif - if (inp->inp_laddr.s_addr != INADDR_ANY) { - if (inp->inp_laddr.s_addr != ip->ip_dst.s_addr) + if (inp->inp_laddr.s_addr != INADDR_ANY && + inp->inp_laddr.s_addr != ip->ip_dst.s_addr) continue; - } - if (inp->inp_faddr.s_addr != INADDR_ANY) { - if (inp->inp_faddr.s_addr != - ip->ip_src.s_addr || - inp->inp_fport != uh->uh_sport) + if (inp->inp_faddr.s_addr != INADDR_ANY && + inp->inp_faddr.s_addr != ip->ip_src.s_addr) continue; - } + /* + * XXX: Do not check source port of incoming datagram + * unless inp_connect() has been called to bind the + * fport part of the 4-tuple; the source could be + * trying to talk to us with an ephemeral port. + */ + if (inp->inp_fport != 0 && + inp->inp_fport != uh->uh_sport) + continue; + + INP_LOCK(inp); /* - * Check multicast packets to make sure they are only - * sent to sockets with multicast memberships for the - * packet's destination address and arrival interface + * Handle socket delivery policy for any-source + * and source-specific multicast. [RFC3678] */ -#define MSHIP(_inp, n) ((_inp)->inp_moptions->imo_membership[(n)]) -#define NMSHIPS(_inp) ((_inp)->inp_moptions->imo_num_memberships) - INP_LOCK(inp); - if (strict_mcast_mship && inp->inp_moptions != NULL) { - int mship, foundmship = 0; + imo = inp->inp_moptions; + if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr)) && + imo != NULL) { + struct sockaddr_in sin; + struct in_msource *ims; + int blocked, mode; + size_t idx; - for (mship = 0; mship < NMSHIPS(inp); - mship++) { - if (MSHIP(inp, mship)->inm_addr.s_addr - == ip->ip_dst.s_addr && - MSHIP(inp, mship)->inm_ifp - == m->m_pkthdr.rcvif) { - foundmship = 1; - break; + bzero(&sin, sizeof(struct sockaddr_in)); + sin.sin_len = sizeof(struct sockaddr_in); + sin.sin_family = AF_INET; + sin.sin_addr = ip->ip_dst; + + blocked = 0; + idx = imo_match_group(imo, ifp, + (struct sockaddr *)&sin); + if (idx == -1) { + /* + * No group membership for this socket. + * Do not bump udps_noportbcast, as + * this will happen further down. + */ + blocked++; + } else { + /* + * Check for a multicast source filter + * entry on this socket for this group. + * MCAST_EXCLUDE is the default + * behaviour. It means default accept; + * entries, if present, denote sources + * to be excluded from delivery. + */ + ims = imo_match_source(imo, idx, + (struct sockaddr *)&udp_in); + mode = imo->imo_mfilters[idx].imf_fmode; + if ((ims != NULL && + mode == MCAST_EXCLUDE) || + (ims == NULL && + mode == MCAST_INCLUDE)) { +#ifdef DIAGNOSTIC + if (bootverbose) { + printf("%s: blocked by" + " source filter\n", + __func__); + } +#endif + udpstat.udps_filtermcast++; + blocked++; } } - if (foundmship == 0) { + if (blocked != 0) { INP_UNLOCK(inp); continue; } } -#undef NMSHIPS -#undef MSHIP if (last != NULL) { struct mbuf *n; @@ -410,7 +431,7 @@ udp_input(struct mbuf *m, int off) * Locate pcb for datagram. */ inp = in_pcblookup_hash(&udbinfo, ip->ip_src, uh->uh_sport, - ip->ip_dst, uh->uh_dport, 1, m->m_pkthdr.rcvif); + ip->ip_dst, uh->uh_dport, 1, ifp); if (inp == NULL) { if (udp_log_in_vain) { char buf[4*sizeof "123"]; diff --git a/sys/netinet/udp_var.h b/sys/netinet/udp_var.h index 015cc4e2a375..fc4a21c6ab16 100644 --- a/sys/netinet/udp_var.h +++ b/sys/netinet/udp_var.h @@ -68,6 +68,7 @@ struct udpstat { u_long udps_fastout; /* output packets on fast path */ /* of no socket on port, arrived as multicast */ u_long udps_noportmcast; + u_long udps_filtermcast; /* blocked by multicast filter */ }; /* diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 4df2b8713f69..380b8c3410e9 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -467,6 +467,14 @@ struct route_in6 { * the source address. */ +/* + * The following option is private; do not use it from user applications. + * It is deliberately defined to the same value as IP_MSFILTER. + */ +#define IPV6_MSFILTER 74 /* struct __msfilterreq; + * set/get multicast source filter list. + */ + /* to define items, should talk with KAME guys first, for *BSD compatibility */ #define IPV6_RTHDR_LOOSE 0 /* this hop need not be a neighbor. XXX old spec */ @@ -487,6 +495,18 @@ struct ipv6_mreq { unsigned int ipv6mr_interface; }; +#ifdef notyet +/* + * Argument structure for IPV6_ADD_SOURCE_MEMBERSHIP, + * IPV6_DROP_SOURCE_MEMBERSHIP, IPV6_BLOCK_SOURCE, and IPV6_UNBLOCK_SOURCE. + */ +struct ipv6_mreq_source { + struct in6_addr ipv6mr_multiaddr; + struct in6_addr ipv6mr_sourceaddr; + uint32_t ipv6mr_interface; +}; +#endif + /* * IPV6_PKTINFO: Packet information(RFC2292 sec 5) */ diff --git a/sys/netinet6/in6_ifattach.c b/sys/netinet6/in6_ifattach.c index b78b5c972004..e00e7fab546a 100644 --- a/sys/netinet6/in6_ifattach.c +++ b/sys/netinet6/in6_ifattach.c @@ -76,6 +76,7 @@ static int generate_tmp_ifid __P((u_int8_t *, const u_int8_t *, u_int8_t *)); static int get_ifid __P((struct ifnet *, struct ifnet *, struct in6_addr *)); static int in6_ifattach_linklocal __P((struct ifnet *, struct ifnet *)); static int in6_ifattach_loopback __P((struct ifnet *)); +static void in6_purgemaddrs __P((struct ifnet *)); #define EUI64_GBIT 0x01 #define EUI64_UBIT 0x02 @@ -798,18 +799,10 @@ in6_ifdetach(ifp) IFAFREE(&oia->ia_ifa); } - /* leave from all multicast groups joined */ - in6_pcbpurgeif0(&udbinfo, ifp); in6_pcbpurgeif0(&ripcbinfo, ifp); - - for (in6m = LIST_FIRST(&in6_multihead); in6m; in6m = in6m_next) { - in6m_next = LIST_NEXT(in6m, in6m_entry); - if (in6m->in6m_ifp != ifp) - continue; - in6_delmulti(in6m); - in6m = NULL; - } + /* leave from all multicast groups joined */ + in6_purgemaddrs(ifp); /* * remove neighbor management table. we call it twice just to make @@ -898,3 +891,22 @@ in6_tmpaddrtimer(ignored_arg) splx(s); } + +static void +in6_purgemaddrs(ifp) + struct ifnet *ifp; +{ + struct in6_multi *in6m; + struct in6_multi *oin6m; + +#ifdef DIAGNOSTIC + printf("%s: purging ifp %p\n", __func__, ifp); +#endif + + IFF_LOCKGIANT(ifp); + LIST_FOREACH_SAFE(in6m, &in6_multihead, in6m_entry, oin6m) { + if (in6m->in6m_ifp == ifp) + in6_delmulti(in6m); + } + IFF_UNLOCKGIANT(ifp); +} diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 5ea647ecd5fd..863e53fd1659 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -456,7 +456,8 @@ in6_pcbfree(struct inpcb *inp) /* Check and free IPv4 related resources in case of mapped addr */ if (inp->inp_options) (void)m_free(inp->inp_options); - ip_freemoptions(inp->inp_moptions); + if (inp->inp_moptions != NULL) + inp_freemoptions(inp->inp_moptions); inp->inp_vflag = 0; INP_UNLOCK(inp); uma_zfree(ipi->ipi_zone, inp); diff --git a/sys/sys/param.h b/sys/sys/param.h index 4d01ed4c2f7e..c77057303e22 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -57,7 +57,7 @@ * is created, otherwise 1. */ #undef __FreeBSD_version -#define __FreeBSD_version 700047 /* Master, propagated to newvers */ +#define __FreeBSD_version 700048 /* Master, propagated to newvers */ #ifndef LOCORE #include diff --git a/sys/sys/socket.h b/sys/sys/socket.h index ac1b1ed5017b..79d180b53956 100644 --- a/sys/sys/socket.h +++ b/sys/sys/socket.h @@ -234,6 +234,7 @@ struct sockproto { }; #endif +#ifndef _STRUCT_SOCKADDR_STORAGE_DECLARED /* * RFC 2553: protocol-independent placeholder for socket addresses */ @@ -251,6 +252,8 @@ struct sockaddr_storage { __int64_t __ss_align; /* force desired struct alignment */ char __ss_pad2[_SS_PAD2SIZE]; }; +#define _STRUCT_SOCKADDR_STORAGE_DECLARED +#endif #if __BSD_VISIBLE /* diff --git a/tools/regression/netinet/ipsockopt/ipsockopt.c b/tools/regression/netinet/ipsockopt/ipsockopt.c index ffcb48efab2a..d03ddf652f42 100644 --- a/tools/regression/netinet/ipsockopt/ipsockopt.c +++ b/tools/regression/netinet/ipsockopt/ipsockopt.c @@ -679,7 +679,7 @@ test_ip_multicast_membership(int sock, const char *socktypename) * this usually maps to the interface to which the default * route is pointing. */ - for (i = 0; i < nmcastgroups; i++) { + for (i = 1; i < nmcastgroups+1; i++) { mreq.imr_multiaddr.s_addr = htonl((basegroup + i)); mreq.imr_interface.s_addr = INADDR_ANY; inet_ntop(AF_INET, &mreq.imr_multiaddr, addrbuf, sizeof(addrbuf)); @@ -692,7 +692,7 @@ test_ip_multicast_membership(int sock, const char *socktypename) sock, socktypename, addrbuf, "INADDR_ANY"); } } - for (i = 0; i < nmcastgroups; i++) { + for (i = 1; i < nmcastgroups+1; i++) { mreq.imr_multiaddr.s_addr = htonl((basegroup + i)); mreq.imr_interface.s_addr = INADDR_ANY; inet_ntop(AF_INET, &mreq.imr_multiaddr, addrbuf, sizeof(addrbuf)); diff --git a/usr.bin/netstat/inet.c b/usr.bin/netstat/inet.c index 2d96cb669677..1be7b874b58f 100644 --- a/usr.bin/netstat/inet.c +++ b/usr.bin/netstat/inet.c @@ -513,7 +513,7 @@ udp_stats(u_long off __unused, const char *name, int af1 __unused) p1a(udps_nosum, "\t%lu with no checksum\n"); p1a(udps_noport, "\t%lu dropped due to no socket\n"); p(udps_noportbcast, - "\t%lu broadcast/multicast datagram%s dropped due to no socket\n"); + "\t%lu broadcast/multicast datagram%s undelivered\n"); p1a(udps_fullsock, "\t%lu dropped due to full socket buffers\n"); p1a(udpps_pcbhashmiss, "\t%lu not for hashed pcb\n"); delivered = udpstat.udps_ipackets - @@ -526,6 +526,9 @@ udp_stats(u_long off __unused, const char *name, int af1 __unused) if (delivered || sflag <= 1) printf("\t%lu delivered\n", delivered); p(udps_opackets, "\t%lu datagram%s output\n"); + /* the next statistic is cumulative in udps_noportbcast */ + p(udps_filtermcast, + "\t%lu time%s multicast source filter matched\n"); #undef p #undef p1a } diff --git a/usr.sbin/mtest/mtest.c b/usr.sbin/mtest/mtest.c index 747a1f38d6d4..28c0fc6ced33 100644 --- a/usr.sbin/mtest/mtest.c +++ b/usr.sbin/mtest/mtest.c @@ -55,6 +55,15 @@ __FBSDID("$FreeBSD$"); #include #include +/* The following two socket options are private to the kernel and libc. */ + +#ifndef IP_SETMSFILTER +#define IP_SETMSFILTER 74 /* atomically set filter list */ +#endif +#ifndef IP_GETMSFILTER +#define IP_GETMSFILTER 75 /* get filter list */ +#endif + static void process_file(char *, int); static void process_cmd(char*, int, FILE *fp); static void usage(void); @@ -135,14 +144,14 @@ process_cmd(char *cmd, int s, FILE *fp __unused) { char str1[STR_SIZE]; char str2[STR_SIZE]; -#ifdef WITH_IGMPV3 char str3[STR_SIZE]; +#ifdef WITH_IGMPV3 char filtbuf[IP_MSFILTER_SIZE(MAX_ADDRS)]; #endif struct ifreq ifr; struct ip_mreq imr; -#ifdef WITH_IGMPV3 struct ip_mreq_source imrs; +#ifdef WITH_IGMPV3 struct ip_msfilter *imsfp; #endif char *line; @@ -256,6 +265,7 @@ process_cmd(char *cmd, int s, FILE *fp __unused) */ case 'i': case 'e': + /* XXX: SIOCSIPMSFILTER will be made an internal API. */ if ((sscanf(line, "%s %s %d", str1, str2, &n)) != 3) { printf("-1\n"); break; @@ -284,10 +294,13 @@ process_cmd(char *cmd, int s, FILE *fp __unused) else printf("ok\n"); break; +#endif /* WITH_IGMPV3 */ /* * Allow or block traffic from a source, using the * delta based api. + * XXX: Currently we allow this to be used with the ASM-only + * implementation of RFC3678 in FreeBSD 7. */ case 't': case 'b': @@ -302,6 +315,8 @@ process_cmd(char *cmd, int s, FILE *fp __unused) break; } +#ifdef WITH_IGMPV3 + /* XXX: SIOCSIPMSFILTER will be made an internal API. */ /* First determine out current filter mode. */ imsfp = (struct ip_msfilter *)filtbuf; imsfp->imsf_multiaddr.s_addr = imrs.imr_multiaddr.s_addr; @@ -325,13 +340,22 @@ process_cmd(char *cmd, int s, FILE *fp __unused) opt = (*cmd == 't') ? IP_ADD_SOURCE_MEMBERSHIP : IP_DROP_SOURCE_MEMBERSHIP; } +#else /* !WITH_IGMPV3 */ + /* + * Don't look before we leap; we may only block or unblock + * sources on a socket in exclude mode. + */ + opt = (*cmd == 't') ? IP_UNBLOCK_SOURCE : IP_BLOCK_SOURCE; +#endif /* WITH_IGMPV3 */ if (setsockopt(s, IPPROTO_IP, opt, &imrs, sizeof(imrs)) == -1) warn("ioctl IP_ADD_SOURCE_MEMBERSHIP/IP_DROP_SOURCE_MEMBERSHIP/IP_UNBLOCK_SOURCE/IP_BLOCK_SOURCE"); else printf("ok\n"); break; +#ifdef WITH_IGMPV3 case 'g': + /* XXX: SIOCSIPMSFILTER will be made an internal API. */ if ((sscanf(line, "%s %s %d", str1, str2, &n)) != 3) { printf("-1\n"); break; @@ -360,11 +384,11 @@ process_cmd(char *cmd, int s, FILE *fp __unused) printf("%s\n", inet_ntoa(imsfp->imsf_slist[i])); } break; -#else /* !WITH_IGMPV3 */ +#endif /* !WITH_IGMPV3 */ + +#ifndef WITH_IGMPV3 case 'i': case 'e': - case 't': - case 'b': case 'g': printf("warning: IGMPv3 is not supported by this version " "of FreeBSD; command ignored.\n"); @@ -389,11 +413,15 @@ usage(void) printf("d ifname e.e.e.e.e.e - delete ether multicast address\n"); printf("m ifname 1/0 - set/clear ether allmulti flag\n"); printf("p ifname 1/0 - set/clear ether promisc flag\n"); +#ifdef WITH_IGMPv3 printf("i g.g.g.g i.i.i.i n - set n include mode src filter\n"); printf("e g.g.g.g i.i.i.i n - set n exclude mode src filter\n"); +#endif printf("t g.g.g.g i.i.i.i s.s.s.s - allow traffic from src\n"); printf("b g.g.g.g i.i.i.i s.s.s.s - block traffic from src\n"); +#ifdef WITH_IGMPV3 printf("g g.g.g.g i.i.i.i n - get and show n src filters\n"); +#endif printf("f filename - read command(s) from file\n"); printf("s seconds - sleep for some time\n"); printf("q - quit\n");