diff --git a/UPDATING b/UPDATING index 6b490fcb7e01..3864a0d8cb67 100644 --- a/UPDATING +++ b/UPDATING @@ -22,6 +22,13 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW: to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20090319: + The multicast forwarding code has been cleaned up. netstat(1) + only relies on KVM now for printing bandwidth upcall meters. + The IPv4 and IPv6 modules are split into ip_mroute_mod and + ip6_mroute_mod respectively. The config(5) options for statically + compiling this code remain the same, i.e. 'options MROUTING'. + 20090315: Support for the IFF_NEEDSGIANT network interface flag has been removed, which means that non-MPSAFE network device drivers are no diff --git a/sys/modules/ip6_mroute_mod/Makefile b/sys/modules/ip6_mroute_mod/Makefile new file mode 100644 index 000000000000..f361c9df229c --- /dev/null +++ b/sys/modules/ip6_mroute_mod/Makefile @@ -0,0 +1,19 @@ +# $FreeBSD$ + +.include + +.PATH: ${.CURDIR}/../../netinet6 + +KMOD= ip6_mroute + +SRCS= ip6_mroute.c +SRCS+= opt_inet6.h opt_mac.h opt_mrouting.h + +.if !defined(KERNBUILDDIR) +opt_inet6.h: + echo "#define INET6 1" > ${.TARGET} +opt_mrouting.h: + echo "#define MROUTING 1" > ${.TARGET} +.endif + +.include diff --git a/sys/modules/ip_mroute_mod/Makefile b/sys/modules/ip_mroute_mod/Makefile index 3ca0a3dc20e9..141eceb6831b 100644 --- a/sys/modules/ip_mroute_mod/Makefile +++ b/sys/modules/ip_mroute_mod/Makefile @@ -8,21 +8,12 @@ KMOD= ip_mroute SRCS= ip_mroute.c SRCS+= opt_inet.h opt_mac.h opt_mrouting.h opt_route.h -SRCS+= opt_inet6.h - -.if ${MK_INET6_SUPPORT} != "no" -SRCS+= ip6_mroute.c -.endif .if !defined(KERNBUILDDIR) opt_inet.h: echo "#define INET 1" > ${.TARGET} opt_mrouting.h: echo "#define MROUTING 1" > ${.TARGET} -.if ${MK_INET6_SUPPORT} != "no" -opt_inet6.h: - echo "#define INET6 1" > ${.TARGET} -.endif .endif .include diff --git a/sys/netinet/ip_mroute.c b/sys/netinet/ip_mroute.c index 36a5efdd5d37..d4b0828d3e66 100644 --- a/sys/netinet/ip_mroute.c +++ b/sys/netinet/ip_mroute.c @@ -53,11 +53,24 @@ * bandwidth metering and signaling */ +/* + * TODO: Prefix functions with ipmf_. + * TODO: Maintain a refcount on if_allmulti() in ifnet or in the protocol + * domain attachment (if_afdata) so we can track consumers of that service. + * TODO: Deprecate routing socket path for SIOCGETSGCNT and SIOCGETVIFCNT, + * move it to socket options. + * TODO: Rototile log_debug to use KTR. + * TODO: Cleanup LSRR removal further. + * TODO: Push RSVP stubs into raw_ip.c. + * TODO: Use bitstring.h for vif set. + * TODO: Fix mrt6_ioctl dangling ref when dynamically loaded. + * TODO: Sync ip6_mroute.c with this file. + */ + #include __FBSDID("$FreeBSD$"); #include "opt_inet.h" -#include "opt_inet6.h" #include "opt_mac.h" #include "opt_mrouting.h" @@ -81,9 +94,11 @@ __FBSDID("$FreeBSD$"); #include #include #include + #include #include #include + #include #include #include @@ -98,96 +113,84 @@ __FBSDID("$FreeBSD$"); #include #include -#ifdef INET6 -#include -#include -#include -#include -#endif #include #include -/* - * Control debugging code for rsvp and multicast routing code. - * Can only set them with the debugger. - */ -static u_int rsvpdebug; /* non-zero enables debugging */ - -static u_int mrtdebug; /* any set of the flags below */ #define DEBUG_MFC 0x02 #define DEBUG_FORWARD 0x04 #define DEBUG_EXPIRE 0x08 #define DEBUG_XMIT 0x10 #define DEBUG_PIM 0x20 +static u_int mrtdebug; +SYSCTL_INT(_debug, OID_AUTO, mrtdebug, CTLFLAG_RW, &mrtdebug, 0, + "Enable/disable IPv4 multicast forwarding debugging flags"); #define VIFI_INVALID ((vifi_t) -1) +#define M_HASCL(m) ((m)->m_flags & M_EXT) -#define M_HASCL(m) ((m)->m_flags & M_EXT) - -static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast routing tables"); +static MALLOC_DEFINE(M_MRTABLE, "mroutetbl", "multicast forwarding cache"); /* * Locking. We use two locks: one for the virtual interface table and * one for the forwarding table. These locks may be nested in which case * the VIF lock must always be taken first. Note that each lock is used * to cover not only the specific data structure but also related data - * structures. It may be better to add more fine-grained locking later; - * it's not clear how performance-critical this code is. - * - * XXX: This module could particularly benefit from being cleaned - * up to use the macros. - * + * structures. */ -static struct mrtstat mrtstat; -SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, - &mrtstat, mrtstat, - "Multicast Routing Statistics (struct mrtstat, netinet/ip_mroute.h)"); - -static struct mfc *mfctable[MFCTBLSIZ]; -SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, - &mfctable, sizeof(mfctable), "S,*mfc[MFCTBLSIZ]", - "Multicast Forwarding Table (struct *mfc[MFCTBLSIZ], netinet/ip_mroute.h)"); - static struct mtx mrouter_mtx; #define MROUTER_LOCK() mtx_lock(&mrouter_mtx) #define MROUTER_UNLOCK() mtx_unlock(&mrouter_mtx) #define MROUTER_LOCK_ASSERT() mtx_assert(&mrouter_mtx, MA_OWNED) -#define MROUTER_LOCK_INIT() \ +#define MROUTER_LOCK_INIT() \ mtx_init(&mrouter_mtx, "IPv4 multicast forwarding", NULL, MTX_DEF) #define MROUTER_LOCK_DESTROY() mtx_destroy(&mrouter_mtx) +static struct mrtstat mrtstat; +SYSCTL_STRUCT(_net_inet_ip, OID_AUTO, mrtstat, CTLFLAG_RW, + &mrtstat, mrtstat, + "IPv4 Multicast Forwarding Statistics (struct mrtstat, " + "netinet/ip_mroute.h)"); + +static u_long mfchash; +#define MFCHASH(a, g) \ + ((((a).s_addr >> 20) ^ ((a).s_addr >> 10) ^ (a).s_addr ^ \ + ((g).s_addr >> 20) ^ ((g).s_addr >> 10) ^ (g).s_addr) & mfchash) +#define MFCHASHSIZE 256 + +static u_char *nexpire; /* 0..mfchashsize-1 */ +static u_long mfchashsize; /* Hash size */ +LIST_HEAD(mfchashhdr, mfc) *mfchashtbl; + static struct mtx mfc_mtx; -#define MFC_LOCK() mtx_lock(&mfc_mtx) -#define MFC_UNLOCK() mtx_unlock(&mfc_mtx) +#define MFC_LOCK() mtx_lock(&mfc_mtx) +#define MFC_UNLOCK() mtx_unlock(&mfc_mtx) #define MFC_LOCK_ASSERT() mtx_assert(&mfc_mtx, MA_OWNED) -#define MFC_LOCK_INIT() mtx_init(&mfc_mtx, "mroute mfc table", NULL, MTX_DEF) +#define MFC_LOCK_INIT() \ + mtx_init(&mfc_mtx, "IPv4 multicast forwarding cache", NULL, MTX_DEF) #define MFC_LOCK_DESTROY() mtx_destroy(&mfc_mtx) +static vifi_t numvifs; static struct vif viftable[MAXVIFS]; SYSCTL_OPAQUE(_net_inet_ip, OID_AUTO, viftable, CTLFLAG_RD, &viftable, sizeof(viftable), "S,vif[MAXVIFS]", - "Multicast Virtual Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); + "IPv4 Multicast Interfaces (struct vif[MAXVIFS], netinet/ip_mroute.h)"); static struct mtx vif_mtx; -#define VIF_LOCK() mtx_lock(&vif_mtx) -#define VIF_UNLOCK() mtx_unlock(&vif_mtx) +#define VIF_LOCK() mtx_lock(&vif_mtx) +#define VIF_UNLOCK() mtx_unlock(&vif_mtx) #define VIF_LOCK_ASSERT() mtx_assert(&vif_mtx, MA_OWNED) -#define VIF_LOCK_INIT() mtx_init(&vif_mtx, "mroute vif table", NULL, MTX_DEF) +#define VIF_LOCK_INIT() \ + mtx_init(&vif_mtx, "IPv4 multicast interfaces", NULL, MTX_DEF) #define VIF_LOCK_DESTROY() mtx_destroy(&vif_mtx) -static u_char nexpire[MFCTBLSIZ]; - static eventhandler_tag if_detach_event_tag = NULL; static struct callout expire_upcalls_ch; - #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ -#define ENCAP_TTL 64 - /* * Bandwidth meter variables and constants */ @@ -223,7 +226,7 @@ SYSCTL_ULONG(_net_inet_pim, OID_AUTO, squelch_wholepkt, CTLFLAG_RW, "Disable IGMP_WHOLEPKT notifications if rendezvous point is unspecified"); extern struct domain inetdomain; -struct protosw in_pim_protosw = { +static const struct protosw in_pim_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inetdomain, .pr_protocol = IPPROTO_PIM, @@ -235,18 +238,6 @@ struct protosw in_pim_protosw = { }; static const struct encaptab *pim_encap_cookie; -#ifdef INET6 -/* ip6_mroute.c glue */ -extern struct in6_protosw in6_pim_protosw; -static const struct encaptab *pim6_encap_cookie; - -extern int X_ip6_mrouter_set(struct socket *, struct sockopt *); -extern int X_ip6_mrouter_get(struct socket *, struct sockopt *); -extern int X_ip6_mrouter_done(void); -extern int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); -extern int X_mrt6_ioctl(int, caddr_t); -#endif - static int pim_encapcheck(const struct mbuf *, int, int, void *); /* @@ -264,6 +255,7 @@ struct pim_encap_pimhdr { struct pim pim; uint32_t flags; }; +#define PIM_ENCAP_TTL 64 static struct ip pim_encap_iphdr = { #if BYTE_ORDER == LITTLE_ENDIAN @@ -277,7 +269,7 @@ static struct ip pim_encap_iphdr = { sizeof(struct ip), /* total length */ 0, /* id */ 0, /* frag offset */ - ENCAP_TTL, + PIM_ENCAP_TTL, IPPROTO_PIM, 0, /* checksum */ }; @@ -297,129 +289,97 @@ static vifi_t reg_vif_num = VIFI_INVALID; /* * Private variables. */ -static vifi_t numvifs; -static u_long X_ip_mcast_src(int vifi); -static int X_ip_mforward(struct ip *ip, struct ifnet *ifp, - struct mbuf *m, struct ip_moptions *imo); +static u_long X_ip_mcast_src(int); +static int X_ip_mforward(struct ip *, struct ifnet *, struct mbuf *, + struct ip_moptions *); static int X_ip_mrouter_done(void); -static int X_ip_mrouter_get(struct socket *so, struct sockopt *m); -static int X_ip_mrouter_set(struct socket *so, struct sockopt *m); -static int X_legal_vif_num(int vif); -static int X_mrt_ioctl(int cmd, caddr_t data, int fibnum); +static int X_ip_mrouter_get(struct socket *, struct sockopt *); +static int X_ip_mrouter_set(struct socket *, struct sockopt *); +static int X_legal_vif_num(int); +static int X_mrt_ioctl(int, caddr_t, int); -static int get_sg_cnt(struct sioc_sg_req *); -static int get_vif_cnt(struct sioc_vif_req *); -static void if_detached_event(void *arg __unused, struct ifnet *); -static int ip_mrouter_init(struct socket *, int); -static int add_vif(struct vifctl *); -static int del_vif_locked(vifi_t); -static int del_vif(vifi_t); -static int add_mfc(struct mfcctl2 *); -static int del_mfc(struct mfcctl2 *); -static int set_api_config(uint32_t *); /* chose API capabilities */ -static int socket_send(struct socket *, struct mbuf *, struct sockaddr_in *); -static int set_assert(int); -static void expire_upcalls(void *); -static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); -static void phyint_send(struct ip *, struct vif *, struct mbuf *); -static void send_packet(struct vif *, struct mbuf *); +static int add_bw_upcall(struct bw_upcall *); +static int add_mfc(struct mfcctl2 *); +static int add_vif(struct vifctl *); +static void bw_meter_prepare_upcall(struct bw_meter *, struct timeval *); +static void bw_meter_process(void); +static void bw_meter_receive_packet(struct bw_meter *, int, + struct timeval *); +static void bw_upcalls_send(void); +static int del_bw_upcall(struct bw_upcall *); +static int del_mfc(struct mfcctl2 *); +static int del_vif(vifi_t); +static int del_vif_locked(vifi_t); +static void expire_bw_meter_process(void *); +static void expire_bw_upcalls_send(void *); +static void expire_mfc(struct mfc *); +static void expire_upcalls(void *); +static void free_bw_list(struct bw_meter *); +static int get_sg_cnt(struct sioc_sg_req *); +static int get_vif_cnt(struct sioc_vif_req *); +static void if_detached_event(void *, struct ifnet *); +static int ip_mdq(struct mbuf *, struct ifnet *, struct mfc *, vifi_t); +static int ip_mrouter_init(struct socket *, int); +static __inline struct mfc * + mfc_find(struct in_addr *, struct in_addr *); +static void phyint_send(struct ip *, struct vif *, struct mbuf *); +static struct mbuf * + pim_register_prepare(struct ip *, struct mbuf *); +static int pim_register_send(struct ip *, struct vif *, + struct mbuf *, struct mfc *); +static int pim_register_send_rp(struct ip *, struct vif *, + struct mbuf *, struct mfc *); +static int pim_register_send_upcall(struct ip *, struct vif *, + struct mbuf *, struct mfc *); +static void schedule_bw_meter(struct bw_meter *, struct timeval *); +static void send_packet(struct vif *, struct mbuf *); +static int set_api_config(uint32_t *); +static int set_assert(int); +static int socket_send(struct socket *, struct mbuf *, + struct sockaddr_in *); +static void unschedule_bw_meter(struct bw_meter *); /* - * Bandwidth monitoring - */ -static void free_bw_list(struct bw_meter *list); -static int add_bw_upcall(struct bw_upcall *); -static int del_bw_upcall(struct bw_upcall *); -static void bw_meter_receive_packet(struct bw_meter *x, int plen, - struct timeval *nowp); -static void bw_meter_prepare_upcall(struct bw_meter *x, struct timeval *nowp); -static void bw_upcalls_send(void); -static void schedule_bw_meter(struct bw_meter *x, struct timeval *nowp); -static void unschedule_bw_meter(struct bw_meter *x); -static void bw_meter_process(void); -static void expire_bw_upcalls_send(void *); -static void expire_bw_meter_process(void *); - -static int pim_register_send(struct ip *, struct vif *, - struct mbuf *, struct mfc *); -static int pim_register_send_rp(struct ip *, struct vif *, - struct mbuf *, struct mfc *); -static int pim_register_send_upcall(struct ip *, struct vif *, - struct mbuf *, struct mfc *); -static struct mbuf *pim_register_prepare(struct ip *, struct mbuf *); - -/* - * whether or not special PIM assert processing is enabled. - */ -static int pim_assert; -/* - * Rate limit for assert notification messages, in usec - */ -#define ASSERT_MSG_TIME 3000000 - -/* - * Kernel multicast routing API capabilities and setup. + * Kernel multicast forwarding API capabilities and setup. * If more API capabilities are added to the kernel, they should be * recorded in `mrt_api_support'. */ +#define MRT_API_VERSION 0x0305 + +static const int mrt_api_version = MRT_API_VERSION; static const uint32_t mrt_api_support = (MRT_MFC_FLAGS_DISABLE_WRONGVIF | MRT_MFC_FLAGS_BORDER_VIF | MRT_MFC_RP | MRT_MFC_BW_UPCALL); static uint32_t mrt_api_config = 0; -/* - * Hash function for a source, group entry - */ -#define MFCHASH(a, g) MFCHASHMOD(((a) >> 20) ^ ((a) >> 10) ^ (a) ^ \ - ((g) >> 20) ^ ((g) >> 10) ^ (g)) +static int pim_assert_enabled; +static struct timeval pim_assert_interval = { 3, 0 }; /* Rate limit */ /* - * Find a route for a given origin IP address and Multicast group address - * Statistics are updated by the caller if needed - * (mrtstat.mrts_mfc_lookups and mrtstat.mrts_mfc_misses) + * Find a route for a given origin IP address and multicast group address. + * Statistics must be updated by the caller. */ -static struct mfc * -mfc_find(in_addr_t o, in_addr_t g) +static __inline struct mfc * +mfc_find(struct in_addr *o, struct in_addr *g) { - struct mfc *rt; + struct mfc *rt; - MFC_LOCK_ASSERT(); + MFC_LOCK_ASSERT(); - for (rt = mfctable[MFCHASH(o,g)]; rt; rt = rt->mfc_next) - if ((rt->mfc_origin.s_addr == o) && - (rt->mfc_mcastgrp.s_addr == g) && (rt->mfc_stall == NULL)) - break; - return rt; + LIST_FOREACH(rt, &mfchashtbl[MFCHASH(*o, *g)], mfc_hash) { + if (in_hosteq(rt->mfc_origin, *o) && + in_hosteq(rt->mfc_mcastgrp, *g) && + TAILQ_EMPTY(&rt->mfc_stall)) + break; + } + + return (rt); } /* - * Macros to compute elapsed time efficiently - * Borrowed from Van Jacobson's scheduling code - */ -#define TV_DELTA(a, b, delta) { \ - int xxs; \ - delta = (a).tv_usec - (b).tv_usec; \ - if ((xxs = (a).tv_sec - (b).tv_sec)) { \ - switch (xxs) { \ - case 2: \ - delta += 1000000; \ - /* FALLTHROUGH */ \ - case 1: \ - delta += 1000000; \ - break; \ - default: \ - delta += (1000000 * xxs); \ - } \ - } \ -} - -#define TV_LT(a, b) (((a).tv_usec < (b).tv_usec && \ - (a).tv_sec <= (b).tv_sec) || (a).tv_sec < (b).tv_sec) - -/* - * Handle MRT setsockopt commands to modify the multicast routing tables. + * Handle MRT setsockopt commands to modify the multicast forwarding tables. */ static int X_ip_mrouter_set(struct socket *so, struct sockopt *sopt) @@ -526,15 +486,15 @@ static int X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) { int error; - static int version = 0x0305; /* !!! why is this here? XXX */ switch (sopt->sopt_name) { case MRT_VERSION: - error = sooptcopyout(sopt, &version, sizeof version); + error = sooptcopyout(sopt, &mrt_api_version, sizeof mrt_api_version); break; case MRT_ASSERT: - error = sooptcopyout(sopt, &pim_assert, sizeof pim_assert); + error = sooptcopyout(sopt, &pim_assert_enabled, + sizeof pim_assert_enabled); break; case MRT_API_SUPPORT: @@ -556,7 +516,7 @@ X_ip_mrouter_get(struct socket *so, struct sockopt *sopt) * Handle ioctl commands to obtain information from the cache */ static int -X_mrt_ioctl(int cmd, caddr_t data, int fibnum) +X_mrt_ioctl(int cmd, caddr_t data, int fibnum __unused) { int error = 0; @@ -593,7 +553,7 @@ get_sg_cnt(struct sioc_sg_req *req) struct mfc *rt; MFC_LOCK(); - rt = mfc_find(req->src.s_addr, req->grp.s_addr); + rt = mfc_find(&req->src, &req->grp); if (rt == NULL) { MFC_UNLOCK(); req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; @@ -632,10 +592,8 @@ get_vif_cnt(struct sioc_vif_req *req) static void ip_mrouter_reset(void) { - bzero((caddr_t)mfctable, sizeof(mfctable)); - bzero((caddr_t)nexpire, sizeof(nexpire)); - pim_assert = 0; + pim_assert_enabled = 0; mrt_api_config = 0; callout_init(&expire_upcalls_ch, CALLOUT_MPSAFE); @@ -652,55 +610,40 @@ if_detached_event(void *arg __unused, struct ifnet *ifp) INIT_VNET_INET(curvnet); vifi_t vifi; int i; - struct mfc *mfc; - struct mfc *nmfc; - struct mfc **ppmfc; /* Pointer to previous node's next-pointer */ - struct rtdetq *pq; - struct rtdetq *npq; MROUTER_LOCK(); + if (V_ip_mrouter == NULL) { MROUTER_UNLOCK(); + return; } + VIF_LOCK(); + MFC_LOCK(); + /* * Tear down multicast forwarder state associated with this ifnet. * 1. Walk the vif list, matching vifs against this ifnet. * 2. Walk the multicast forwarding cache (mfc) looking for * inner matches with this vif's index. - * 3. Free any pending mbufs for this mfc. - * 4. Free the associated mfc entry and state associated with this vif. - * Be very careful about unlinking from a singly-linked list whose - * "head node" is a pointer in a simple array. - * 5. Free vif state. This should disable ALLMULTI on the interface. + * 3. Expire any matching multicast forwarding cache entries. + * 4. Free vif state. This should disable ALLMULTI on the interface. */ - VIF_LOCK(); - MFC_LOCK(); for (vifi = 0; vifi < numvifs; vifi++) { if (viftable[vifi].v_ifp != ifp) continue; - for (i = 0; i < MFCTBLSIZ; i++) { - ppmfc = &mfctable[i]; - for (mfc = mfctable[i]; mfc != NULL; ) { - nmfc = mfc->mfc_next; - if (mfc->mfc_parent == vifi) { - for (pq = mfc->mfc_stall; pq != NULL; ) { - npq = pq->next; - m_freem(pq->m); - free(pq, M_MRTABLE); - pq = npq; - } - free_bw_list(mfc->mfc_bw_meter); - free(mfc, M_MRTABLE); - *ppmfc = nmfc; - } else { - ppmfc = &mfc->mfc_next; + for (i = 0; i < mfchashsize; i++) { + struct mfc *rt, *nrt; + for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { + nrt = LIST_NEXT(rt, mfc_hash); + if (rt->mfc_parent == vifi) { + expire_mfc(rt); + } } - mfc = nmfc; - } } del_vif_locked(vifi); } + MFC_UNLOCK(); VIF_UNLOCK(); @@ -708,7 +651,7 @@ if_detached_event(void *arg __unused, struct ifnet *ifp) } /* - * Enable multicast routing + * Enable multicast forwarding. */ static int ip_mrouter_init(struct socket *so, int version) @@ -739,6 +682,8 @@ ip_mrouter_init(struct socket *so, int version) return (ENOMEM); } + mfchashtbl = hashinit_flags(mfchashsize, M_MRTABLE, &mfchash, HASH_NOWAIT); + callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); callout_reset(&bw_upcalls_ch, BW_UPCALLS_PERIOD, @@ -756,7 +701,7 @@ ip_mrouter_init(struct socket *so, int version) } /* - * Disable multicast routing + * Disable multicast forwarding. */ static int X_ip_mrouter_done(void) @@ -766,8 +711,6 @@ X_ip_mrouter_done(void) int i; struct ifnet *ifp; struct ifreq ifr; - struct mfc *rt; - struct rtdetq *rte; MROUTER_LOCK(); @@ -783,12 +726,13 @@ X_ip_mrouter_done(void) mrt_api_config = 0; VIF_LOCK(); + /* * For each phyint in use, disable promiscuous reception of all IP * multicasts. */ for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_lcl_addr.s_addr != 0 && + if (!in_nullhost(viftable[vifi].v_lcl_addr) && !(viftable[vifi].v_flags & (VIFF_TUNNEL | VIFF_REGISTER))) { struct sockaddr_in *so = (struct sockaddr_in *)&(ifr.ifr_addr); @@ -801,38 +745,37 @@ X_ip_mrouter_done(void) } bzero((caddr_t)viftable, sizeof(viftable)); numvifs = 0; - pim_assert = 0; + pim_assert_enabled = 0; + VIF_UNLOCK(); + EVENTHANDLER_DEREGISTER(ifnet_departure_event, if_detach_event_tag); - /* - * Free all multicast forwarding cache entries. - */ callout_stop(&expire_upcalls_ch); callout_stop(&bw_upcalls_ch); callout_stop(&bw_meter_ch); MFC_LOCK(); - for (i = 0; i < MFCTBLSIZ; i++) { - for (rt = mfctable[i]; rt != NULL; ) { - struct mfc *nr = rt->mfc_next; - for (rte = rt->mfc_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - - m_freem(rte->m); - free(rte, M_MRTABLE); - rte = n; - } - free_bw_list(rt->mfc_bw_meter); - free(rt, M_MRTABLE); - rt = nr; + /* + * Free all multicast forwarding cache entries. + * Do not use hashdestroy(), as we must perform other cleanup. + */ + for (i = 0; i < mfchashsize; i++) { + struct mfc *rt, *nrt; + for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { + nrt = LIST_NEXT(rt, mfc_hash); + expire_mfc(rt); } } - bzero((caddr_t)mfctable, sizeof(mfctable)); - bzero((caddr_t)nexpire, sizeof(nexpire)); + free(mfchashtbl, M_MRTABLE); + mfchashtbl = NULL; + + bzero(nexpire, sizeof(nexpire[0]) * mfchashsize); + bw_upcalls_n = 0; bzero(bw_meter_timers, sizeof(bw_meter_timers)); + MFC_UNLOCK(); reg_vif_num = VIFI_INVALID; @@ -854,7 +797,7 @@ set_assert(int i) if ((i != 1) && (i != 0)) return EINVAL; - pim_assert = i; + pim_assert_enabled = i; return 0; } @@ -878,17 +821,22 @@ set_api_config(uint32_t *apival) *apival = 0; return EPERM; } - if (pim_assert) { + if (pim_assert_enabled) { *apival = 0; return EPERM; } - for (i = 0; i < MFCTBLSIZ; i++) { - if (mfctable[i] != NULL) { + + MFC_LOCK(); + + for (i = 0; i < mfchashsize; i++) { + if (LIST_FIRST(&mfchashtbl[i]) != NULL) { *apival = 0; return EPERM; } } + MFC_UNLOCK(); + mrt_api_config = *apival & mrt_api_support; *apival = mrt_api_config; @@ -918,11 +866,11 @@ add_vif(struct vifctl *vifcp) VIF_UNLOCK(); return EINVAL; } - if (vifp->v_lcl_addr.s_addr != INADDR_ANY) { + if (!in_nullhost(vifp->v_lcl_addr)) { VIF_UNLOCK(); return EADDRINUSE; } - if (vifcp->vifc_lcl_addr.s_addr == INADDR_ANY) { + if (in_nullhost(vifcp->vifc_lcl_addr)) { VIF_UNLOCK(); return EADDRNOTAVAIL; } @@ -978,8 +926,6 @@ add_vif(struct vifctl *vifcp) vifp->v_lcl_addr = vifcp->vifc_lcl_addr; vifp->v_rmt_addr = vifcp->vifc_rmt_addr; vifp->v_ifp = ifp; - vifp->v_rsvp_on = 0; - vifp->v_rsvpd = NULL; /* initialize per vif pkt counters */ vifp->v_pkt_in = 0; vifp->v_pkt_out = 0; @@ -988,7 +934,8 @@ add_vif(struct vifctl *vifcp) bzero(&vifp->v_route, sizeof(vifp->v_route)); /* Adjust numvifs up if the vifi is higher than numvifs */ - if (numvifs <= vifcp->vifc_vifi) numvifs = vifcp->vifc_vifi + 1; + if (numvifs <= vifcp->vifc_vifi) + numvifs = vifcp->vifc_vifi + 1; VIF_UNLOCK(); @@ -1017,7 +964,7 @@ del_vif_locked(vifi_t vifi) return EINVAL; } vifp = &viftable[vifi]; - if (vifp->v_lcl_addr.s_addr == INADDR_ANY) { + if (in_nullhost(vifp->v_lcl_addr)) { return EADDRNOTAVAIL; } @@ -1034,7 +981,7 @@ del_vif_locked(vifi_t vifi) /* Adjust numvifs down */ for (vifi = numvifs; vifi > 0; vifi--) - if (viftable[vifi-1].v_lcl_addr.s_addr != INADDR_ANY) + if (!in_nullhost(viftable[vifi-1].v_lcl_addr)) break; numvifs = vifi; @@ -1089,9 +1036,25 @@ init_mfc_params(struct mfc *rt, struct mfcctl2 *mfccp) rt->mfc_pkt_cnt = 0; rt->mfc_byte_cnt = 0; rt->mfc_wrong_if = 0; - rt->mfc_last_assert.tv_sec = rt->mfc_last_assert.tv_usec = 0; + timevalclear(&rt->mfc_last_assert); } +static void +expire_mfc(struct mfc *rt) +{ + struct rtdetq *rte, *nrte; + + free_bw_list(rt->mfc_bw_meter); + + TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { + m_freem(rte->m); + TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); + free(rte, M_MRTABLE); + } + + LIST_REMOVE(rt, mfc_hash); + free(rt, M_MRTABLE); +} /* * Add an mfc entry @@ -1100,14 +1063,14 @@ static int add_mfc(struct mfcctl2 *mfccp) { struct mfc *rt; - u_long hash; - struct rtdetq *rte; + struct rtdetq *rte, *nrte; + u_long hash = 0; u_short nstl; VIF_LOCK(); MFC_LOCK(); - rt = mfc_find(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); + rt = mfc_find(&mfccp->mfcc_origin, &mfccp->mfcc_mcastgrp); /* If an entry already exists, just update the fields */ if (rt) { @@ -1120,47 +1083,48 @@ add_mfc(struct mfcctl2 *mfccp) update_mfc_params(rt, mfccp); MFC_UNLOCK(); VIF_UNLOCK(); - return 0; + return (0); } /* * Find the entry for which the upcall was made and update */ - hash = MFCHASH(mfccp->mfcc_origin.s_addr, mfccp->mfcc_mcastgrp.s_addr); - for (rt = mfctable[hash], nstl = 0; rt; rt = rt->mfc_next) { + nstl = 0; + hash = MFCHASH(mfccp->mfcc_origin, mfccp->mfcc_mcastgrp); + LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { + if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && + in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp) && + !TAILQ_EMPTY(&rt->mfc_stall)) { + if (nstl++) { + log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", + "multiple kernel entries", + (u_long)ntohl(mfccp->mfcc_origin.s_addr), + (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, + (void *)TAILQ_FIRST(&rt->mfc_stall)); + } - if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && - (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr) && - (rt->mfc_stall != NULL)) { + if (mrtdebug & DEBUG_MFC) { + log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", + (u_long)ntohl(mfccp->mfcc_origin.s_addr), + (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), + mfccp->mfcc_parent, + (void *)TAILQ_FIRST(&rt->mfc_stall)); + } - if (nstl++) - log(LOG_ERR, "add_mfc %s o %lx g %lx p %x dbx %p\n", - "multiple kernel entries", - (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent, (void *)rt->mfc_stall); + init_mfc_params(rt, mfccp); + rt->mfc_expire = 0; /* Don't clean this guy up */ + nexpire[hash]--; - if (mrtdebug & DEBUG_MFC) - log(LOG_DEBUG,"add_mfc o %lx g %lx p %x dbg %p\n", - (u_long)ntohl(mfccp->mfcc_origin.s_addr), - (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), - mfccp->mfcc_parent, (void *)rt->mfc_stall); - - init_mfc_params(rt, mfccp); - - rt->mfc_expire = 0; /* Don't clean this guy up */ - nexpire[hash]--; - - /* free packets Qed at the end of this entry */ - for (rte = rt->mfc_stall; rte != NULL; ) { - struct rtdetq *n = rte->next; - - ip_mdq(rte->m, rte->ifp, rt, -1); - m_freem(rte->m); - free(rte, M_MRTABLE); - rte = n; - } - rt->mfc_stall = NULL; + /* Free queued packets, but attempt to forward them first. */ + TAILQ_FOREACH_SAFE(rte, &rt->mfc_stall, rte_link, nrte) { + if (rte->ifp != NULL) + ip_mdq(rte->m, rte->ifp, rt, -1); + m_freem(rte->m); + TAILQ_REMOVE(&rt->mfc_stall, rte, rte_link); + rt->mfc_nstall--; + free(rte, M_MRTABLE); + } } } @@ -1168,43 +1132,50 @@ add_mfc(struct mfcctl2 *mfccp) * It is possible that an entry is being inserted without an upcall */ if (nstl == 0) { + /* + * No mfc; make a new one + */ if (mrtdebug & DEBUG_MFC) log(LOG_DEBUG,"add_mfc no upcall h %lu o %lx g %lx p %x\n", hash, (u_long)ntohl(mfccp->mfcc_origin.s_addr), (u_long)ntohl(mfccp->mfcc_mcastgrp.s_addr), mfccp->mfcc_parent); - for (rt = mfctable[hash]; rt != NULL; rt = rt->mfc_next) { - if ((rt->mfc_origin.s_addr == mfccp->mfcc_origin.s_addr) && - (rt->mfc_mcastgrp.s_addr == mfccp->mfcc_mcastgrp.s_addr)) { - init_mfc_params(rt, mfccp); - if (rt->mfc_expire) - nexpire[hash]--; - rt->mfc_expire = 0; - break; /* XXX */ - } + LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { + if (in_hosteq(rt->mfc_origin, mfccp->mfcc_origin) && + in_hosteq(rt->mfc_mcastgrp, mfccp->mfcc_mcastgrp)) { + init_mfc_params(rt, mfccp); + if (rt->mfc_expire) + nexpire[hash]--; + rt->mfc_expire = 0; + break; /* XXX */ + } } + if (rt == NULL) { /* no upcall, so make a new entry */ rt = (struct mfc *)malloc(sizeof(*rt), M_MRTABLE, M_NOWAIT); if (rt == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); - return ENOBUFS; + return (ENOBUFS); } init_mfc_params(rt, mfccp); - rt->mfc_expire = 0; - rt->mfc_stall = NULL; + TAILQ_INIT(&rt->mfc_stall); + rt->mfc_nstall = 0; + rt->mfc_expire = 0; rt->mfc_bw_meter = NULL; + /* insert new entry at head of hash chain */ - rt->mfc_next = mfctable[hash]; - mfctable[hash] = rt; + LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); } } + MFC_UNLOCK(); VIF_UNLOCK(); - return 0; + + return (0); } /* @@ -1216,49 +1187,40 @@ del_mfc(struct mfcctl2 *mfccp) struct in_addr origin; struct in_addr mcastgrp; struct mfc *rt; - struct mfc **nptr; - u_long hash; - struct bw_meter *list; origin = mfccp->mfcc_origin; mcastgrp = mfccp->mfcc_mcastgrp; - if (mrtdebug & DEBUG_MFC) + if (mrtdebug & DEBUG_MFC) { log(LOG_DEBUG,"del_mfc orig %lx mcastgrp %lx\n", - (u_long)ntohl(origin.s_addr), (u_long)ntohl(mcastgrp.s_addr)); + (u_long)ntohl(origin.s_addr), + (u_long)ntohl(mcastgrp.s_addr)); + } MFC_LOCK(); - hash = MFCHASH(origin.s_addr, mcastgrp.s_addr); - for (nptr = &mfctable[hash]; (rt = *nptr) != NULL; nptr = &rt->mfc_next) - if (origin.s_addr == rt->mfc_origin.s_addr && - mcastgrp.s_addr == rt->mfc_mcastgrp.s_addr && - rt->mfc_stall == NULL) - break; + rt = mfc_find(&origin, &mcastgrp); if (rt == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; } - *nptr = rt->mfc_next; - /* * free the bw_meter entries */ - list = rt->mfc_bw_meter; + free_bw_list(rt->mfc_bw_meter); rt->mfc_bw_meter = NULL; + LIST_REMOVE(rt, mfc_hash); free(rt, M_MRTABLE); - free_bw_list(list); - MFC_UNLOCK(); - return 0; + return (0); } /* - * Send a message to the routing daemon on the multicast routing socket + * Send a message to the routing daemon on the multicast routing socket. */ static int socket_send(struct socket *s, struct mbuf *mm, struct sockaddr_in *src) @@ -1329,26 +1291,11 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, if (imo && ((vifi = imo->imo_multicast_vif) < numvifs)) { if (ip->ip_ttl < MAXTTL) ip->ip_ttl++; /* compensate for -1 in *_send routines */ - if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { - struct vif *vifp = viftable + vifi; - - printf("Sending IPPROTO_RSVP from %lx to %lx on vif %d (%s%s)\n", - (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr), - vifi, - (vifp->v_flags & VIFF_TUNNEL) ? "tunnel on " : "", - vifp->v_ifp->if_xname); - } error = ip_mdq(m, ifp, NULL, vifi); MFC_UNLOCK(); VIF_UNLOCK(); return error; } - if (rsvpdebug && ip->ip_p == IPPROTO_RSVP) { - printf("Warning: IPPROTO_RSVP from %lx to %lx without vif option\n", - (long)ntohl(ip->ip_src.s_addr), (long)ntohl(ip->ip_dst.s_addr)); - if (!imo) - printf("In fact, no options were specified at all\n"); - } /* * Don't forward a packet with time-to-live of zero or one, @@ -1364,7 +1311,7 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, * Determine forwarding vifs from the forwarding cache table */ ++mrtstat.mrts_mfc_lookups; - rt = mfc_find(ip->ip_src.s_addr, ip->ip_dst.s_addr); + rt = mfc_find(&ip->ip_src, &ip->ip_dst); /* Entry exists, so forward if necessary */ if (rt != NULL) { @@ -1396,12 +1343,14 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, * just going to fail anyway. Make sure to pullup the header so * that other people can't step on it. */ - rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, M_NOWAIT); + rte = (struct rtdetq *)malloc((sizeof *rte), M_MRTABLE, + M_NOWAIT|M_ZERO); if (rte == NULL) { MFC_UNLOCK(); VIF_UNLOCK(); return ENOBUFS; } + mb0 = m_copypacket(m, M_DONTWAIT); if (mb0 && (M_HASCL(mb0) || mb0->m_len < hlen)) mb0 = m_pullup(mb0, hlen); @@ -1413,12 +1362,12 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, } /* is there an upcall waiting for this flow ? */ - hash = MFCHASH(ip->ip_src.s_addr, ip->ip_dst.s_addr); - for (rt = mfctable[hash]; rt; rt = rt->mfc_next) { - if ((ip->ip_src.s_addr == rt->mfc_origin.s_addr) && - (ip->ip_dst.s_addr == rt->mfc_mcastgrp.s_addr) && - (rt->mfc_stall != NULL)) - break; + hash = MFCHASH(ip->ip_src, ip->ip_dst); + LIST_FOREACH(rt, &mfchashtbl[hash], mfc_hash) { + if (in_hosteq(ip->ip_src, rt->mfc_origin) && + in_hosteq(ip->ip_dst, rt->mfc_mcastgrp) && + !TAILQ_EMPTY(&rt->mfc_stall)) + break; } if (rt == NULL) { @@ -1431,7 +1380,8 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, * Locate the vifi for the incoming interface for this packet. * If none found, drop packet. */ - for (vifi=0; vifi < numvifs && viftable[vifi].v_ifp != ifp; vifi++) + for (vifi = 0; vifi < numvifs && + viftable[vifi].v_ifp != ifp; vifi++) ; if (vifi >= numvifs) /* vif not found, drop packet */ goto non_fatal; @@ -1482,45 +1432,32 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, } rt->mfc_parent = -1; - rt->mfc_rp.s_addr = INADDR_ANY; /* clear the RP address */ - + /* clear the RP address */ + rt->mfc_rp.s_addr = INADDR_ANY; rt->mfc_bw_meter = NULL; /* link into table */ - rt->mfc_next = mfctable[hash]; - mfctable[hash] = rt; - rt->mfc_stall = rte; + LIST_INSERT_HEAD(&mfchashtbl[hash], rt, mfc_hash); + TAILQ_INSERT_HEAD(&rt->mfc_stall, rte, rte_link); + rt->mfc_nstall++; } else { - /* determine if q has overflowed */ - int npkts = 0; - struct rtdetq **p; - - /* - * XXX ouch! we need to append to the list, but we - * only have a pointer to the front, so we have to - * scan the entire list every time. - */ - for (p = &rt->mfc_stall; *p != NULL; p = &(*p)->next) - npkts++; - - if (npkts > MAX_UPQ) { + /* determine if queue has overflowed */ + if (rt->mfc_nstall > MAX_UPQ) { mrtstat.mrts_upq_ovflw++; non_fatal: free(rte, M_MRTABLE); m_freem(mb0); MFC_UNLOCK(); VIF_UNLOCK(); - return 0; + return (0); } - - /* Add this entry to the end of the queue */ - *p = rte; + TAILQ_INSERT_TAIL(&rt->mfc_stall, rte, rte_link); + rt->mfc_nstall++; } rte->m = mb0; rte->ifp = ifp; - rte->next = NULL; MFC_UNLOCK(); VIF_UNLOCK(); @@ -1535,58 +1472,46 @@ X_ip_mforward(struct ip *ip, struct ifnet *ifp, struct mbuf *m, static void expire_upcalls(void *unused) { - struct rtdetq *rte; - struct mfc *mfc, **nptr; int i; MFC_LOCK(); - for (i = 0; i < MFCTBLSIZ; i++) { + + for (i = 0; i < mfchashsize; i++) { + struct mfc *rt, *nrt; + if (nexpire[i] == 0) continue; - nptr = &mfctable[i]; - for (mfc = *nptr; mfc != NULL; mfc = *nptr) { - /* - * Skip real cache entries - * Make sure it wasn't marked to not expire (shouldn't happen) - * If it expires now - */ - if (mfc->mfc_stall != NULL && mfc->mfc_expire != 0 && - --mfc->mfc_expire == 0) { - if (mrtdebug & DEBUG_EXPIRE) - log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", - (u_long)ntohl(mfc->mfc_origin.s_addr), - (u_long)ntohl(mfc->mfc_mcastgrp.s_addr)); - /* - * drop all the packets - * free the mbuf with the pkt, if, timing info - */ - for (rte = mfc->mfc_stall; rte; ) { - struct rtdetq *n = rte->next; - m_freem(rte->m); - free(rte, M_MRTABLE); - rte = n; - } - ++mrtstat.mrts_cache_cleanups; - nexpire[i]--; + for (rt = LIST_FIRST(&mfchashtbl[i]); rt; rt = nrt) { + nrt = LIST_NEXT(rt, mfc_hash); + + if (TAILQ_EMPTY(&rt->mfc_stall)) + continue; + + if (rt->mfc_expire == 0 || --rt->mfc_expire > 0) + continue; /* * free the bw_meter entries */ - while (mfc->mfc_bw_meter != NULL) { - struct bw_meter *x = mfc->mfc_bw_meter; + while (rt->mfc_bw_meter != NULL) { + struct bw_meter *x = rt->mfc_bw_meter; - mfc->mfc_bw_meter = x->bm_mfc_next; + rt->mfc_bw_meter = x->bm_mfc_next; free(x, M_BWMETER); } - *nptr = mfc->mfc_next; - free(mfc, M_MRTABLE); - } else { - nptr = &mfc->mfc_next; + ++mrtstat.mrts_cache_cleanups; + if (mrtdebug & DEBUG_EXPIRE) { + log(LOG_DEBUG, "expire_upcalls: expiring (%lx %lx)\n", + (u_long)ntohl(rt->mfc_origin.s_addr), + (u_long)ntohl(rt->mfc_mcastgrp.s_addr)); + } + + expire_mfc(rt); } - } } + MFC_UNLOCK(); callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); @@ -1637,9 +1562,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) * can complete the SPT switch, regardless of the type * of the iif (broadcast media, GRE tunnel, etc). */ - if (pim_assert && (vifi < numvifs) && viftable[vifi].v_ifp) { - struct timeval now; - u_long delta; + if (pim_assert_enabled && (vifi < numvifs) && viftable[vifi].v_ifp) { if (ifp == &multicast_register_if) pimstat.pims_rcv_registers_wrongiif++; @@ -1653,11 +1576,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) if (rt->mfc_flags[vifi] & MRT_MFC_FLAGS_DISABLE_WRONGVIF) return 0; /* WRONGVIF disabled: ignore the packet */ - GET_TIME(now); - - TV_DELTA(now, rt->mfc_last_assert, delta); - - if (delta > ASSERT_MSG_TIME) { + if (ratecheck(&rt->mfc_last_assert, &pim_assert_interval)) { struct sockaddr_in k_igmpsrc = { sizeof k_igmpsrc, AF_INET }; struct igmpmsg *im; int hlen = ip->ip_hl << 2; @@ -1668,8 +1587,6 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) if (mm == NULL) return ENOBUFS; - rt->mfc_last_assert = now; - im = mtod(mm, struct igmpmsg *); im->im_msgtype = IGMPMSG_WRONGVIF; im->im_mbz = 0; @@ -1689,8 +1606,9 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) return 0; } + /* If I sourced this packet, it counts as output, else it was input. */ - if (ip->ip_src.s_addr == viftable[vifi].v_lcl_addr.s_addr) { + if (in_hosteq(ip->ip_src, viftable[vifi].v_lcl_addr)) { viftable[vifi].v_pkt_out++; viftable[vifi].v_bytes_out += plen; } else { @@ -1723,7 +1641,7 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) struct bw_meter *x; struct timeval now; - GET_TIME(now); + microtime(&now); MFC_LOCK_ASSERT(); for (x = rt->mfc_bw_meter; x != NULL; x = x->bm_mfc_next) bw_meter_receive_packet(x, plen, &now); @@ -1733,13 +1651,23 @@ ip_mdq(struct mbuf *m, struct ifnet *ifp, struct mfc *rt, vifi_t xmt_vif) } /* - * check if a vif number is legal/ok. This is used by ip_output. + * Check if a vif number is legal/ok. This is used by in_mcast.c. */ static int X_legal_vif_num(int vif) { - /* XXX unlocked, matter? */ - return (vif >= 0 && vif < numvifs); + int ret; + + ret = 0; + if (vif < 0) + return (ret); + + VIF_LOCK(); + if (vif < numvifs) + ret = 1; + VIF_UNLOCK(); + + return (ret); } /* @@ -1748,11 +1676,18 @@ X_legal_vif_num(int vif) static u_long X_ip_mcast_src(int vifi) { - /* XXX unlocked, matter? */ - if (vifi >= 0 && vifi < numvifs) - return viftable[vifi].v_lcl_addr.s_addr; - else - return INADDR_ANY; + in_addr_t addr; + + addr = INADDR_ANY; + if (vifi < 0) + return (addr); + + VIF_LOCK(); + if (vifi < numvifs) + addr = viftable[vifi].v_lcl_addr.s_addr; + VIF_UNLOCK(); + + return (addr); } static void @@ -1807,167 +1742,29 @@ send_packet(struct vif *vifp, struct mbuf *m) } } +/* + * Stubs for old RSVP socket shim implementation. + */ + static int -X_ip_rsvp_vif(struct socket *so, struct sockopt *sopt) +X_ip_rsvp_vif(struct socket *so __unused, struct sockopt *sopt __unused) { - INIT_VNET_INET(curvnet); - int error, vifi; - if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) - return EOPNOTSUPP; - - error = sooptcopyin(sopt, &vifi, sizeof vifi, sizeof vifi); - if (error) - return error; - - VIF_LOCK(); - - if (vifi < 0 || vifi >= numvifs) { /* Error if vif is invalid */ - VIF_UNLOCK(); - return EADDRNOTAVAIL; - } - - if (sopt->sopt_name == IP_RSVP_VIF_ON) { - /* Check if socket is available. */ - if (viftable[vifi].v_rsvpd != NULL) { - VIF_UNLOCK(); - return EADDRINUSE; - } - - viftable[vifi].v_rsvpd = so; - /* This may seem silly, but we need to be sure we don't over-increment - * the RSVP counter, in case something slips up. - */ - if (!viftable[vifi].v_rsvp_on) { - viftable[vifi].v_rsvp_on = 1; - V_rsvp_on++; - } - } else { /* must be VIF_OFF */ - /* - * XXX as an additional consistency check, one could make sure - * that viftable[vifi].v_rsvpd == so, otherwise passing so as - * first parameter is pretty useless. - */ - viftable[vifi].v_rsvpd = NULL; - /* - * This may seem silly, but we need to be sure we don't over-decrement - * the RSVP counter, in case something slips up. - */ - if (viftable[vifi].v_rsvp_on) { - viftable[vifi].v_rsvp_on = 0; - V_rsvp_on--; - } - } - VIF_UNLOCK(); - return 0; + return (EOPNOTSUPP); } static void -X_ip_rsvp_force_done(struct socket *so) +X_ip_rsvp_force_done(struct socket *so __unused) { - INIT_VNET_INET(curvnet); - int vifi; - /* Don't bother if it is not the right type of socket. */ - if (so->so_type != SOCK_RAW || so->so_proto->pr_protocol != IPPROTO_RSVP) - return; - - VIF_LOCK(); - - /* The socket may be attached to more than one vif...this - * is perfectly legal. - */ - for (vifi = 0; vifi < numvifs; vifi++) { - if (viftable[vifi].v_rsvpd == so) { - viftable[vifi].v_rsvpd = NULL; - /* This may seem silly, but we need to be sure we don't - * over-decrement the RSVP counter, in case something slips up. - */ - if (viftable[vifi].v_rsvp_on) { - viftable[vifi].v_rsvp_on = 0; - V_rsvp_on--; - } - } - } - - VIF_UNLOCK(); } static void -X_rsvp_input(struct mbuf *m, int off) +X_rsvp_input(struct mbuf *m, int off __unused) { - INIT_VNET_INET(curvnet); - int vifi; - struct ip *ip = mtod(m, struct ip *); - struct sockaddr_in rsvp_src = { sizeof rsvp_src, AF_INET }; - struct ifnet *ifp; - if (rsvpdebug) - printf("rsvp_input: rsvp_on %d\n", V_rsvp_on); - - /* Can still get packets with rsvp_on = 0 if there is a local member - * of the group to which the RSVP packet is addressed. But in this - * case we want to throw the packet away. - */ - if (!V_rsvp_on) { - m_freem(m); - return; - } - - if (rsvpdebug) - printf("rsvp_input: check vifs\n"); - -#ifdef DIAGNOSTIC - M_ASSERTPKTHDR(m); -#endif - - ifp = m->m_pkthdr.rcvif; - - VIF_LOCK(); - /* Find which vif the packet arrived on. */ - for (vifi = 0; vifi < numvifs; vifi++) - if (viftable[vifi].v_ifp == ifp) - break; - - if (vifi == numvifs || viftable[vifi].v_rsvpd == NULL) { - /* - * Drop the lock here to avoid holding it across rip_input. - * This could make rsvpdebug printfs wrong. If you care, - * record the state of stuff before dropping the lock. - */ - VIF_UNLOCK(); - /* - * If the old-style non-vif-associated socket is set, - * then use it. Otherwise, drop packet since there - * is no specific socket for this vif. - */ - if (V_ip_rsvpd != NULL) { - if (rsvpdebug) - printf("rsvp_input: Sending packet up old-style socket\n"); - rip_input(m, off); /* xxx */ - } else { - if (rsvpdebug && vifi == numvifs) - printf("rsvp_input: Can't find vif for packet.\n"); - else if (rsvpdebug && viftable[vifi].v_rsvpd == NULL) - printf("rsvp_input: No socket defined for vif %d\n",vifi); - m_freem(m); - } - return; - } - rsvp_src.sin_addr = ip->ip_src; - - if (rsvpdebug && m) - printf("rsvp_input: m->m_len = %d, sbspace() = %ld\n", - m->m_len,sbspace(&(viftable[vifi].v_rsvpd->so_rcv))); - - if (socket_send(viftable[vifi].v_rsvpd, m, &rsvp_src) < 0) { - if (rsvpdebug) - printf("rsvp_input: Failed to append to socket\n"); - } else { - if (rsvpdebug) - printf("rsvp_input: send packet up\n"); - } - VIF_UNLOCK(); + if (!V_rsvp_on) + m_freem(m); } /* @@ -2033,7 +1830,7 @@ add_bw_upcall(struct bw_upcall *req) * Find if we have already same bw_meter entry */ MFC_LOCK(); - mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); + mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; @@ -2058,7 +1855,7 @@ add_bw_upcall(struct bw_upcall *req) /* Set the new bw_meter entry */ x->bm_threshold.b_time = req->bu_threshold.b_time; - GET_TIME(now); + microtime(&now); x->bm_start_time = now; x->bm_threshold.b_packets = req->bu_threshold.b_packets; x->bm_threshold.b_bytes = req->bu_threshold.b_bytes; @@ -2103,8 +1900,9 @@ del_bw_upcall(struct bw_upcall *req) return EOPNOTSUPP; MFC_LOCK(); + /* Find the corresponding MFC entry */ - mfc = mfc_find(req->bu_src.s_addr, req->bu_dst.s_addr); + mfc = mfc_find(&req->bu_src, &req->bu_dst); if (mfc == NULL) { MFC_UNLOCK(); return EADDRNOTAVAIL; @@ -2446,7 +2244,7 @@ bw_meter_process() int i; struct timeval now, process_endtime; - GET_TIME(now); + microtime(&now); if (last_tv_sec == now.tv_sec) return; /* nothing to do */ @@ -2572,7 +2370,7 @@ pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, * rendezvous point was unspecified, and we were told not to. */ if (pim_squelch_wholepkt != 0 && (mrt_api_config & MRT_MFC_RP) && - (rt->mfc_rp.s_addr == INADDR_ANY)) + in_nullhost(rt->mfc_rp)) return 0; mb_copy = pim_register_prepare(ip, m); @@ -2589,8 +2387,7 @@ pim_register_send(struct ip *ip, struct vif *vifp, struct mbuf *m, mm = m_pullup(mm, sizeof(struct ip)); if (mm != NULL) { ip = mtod(mm, struct ip *); - if ((mrt_api_config & MRT_MFC_RP) && - (rt->mfc_rp.s_addr != INADDR_ANY)) { + if ((mrt_api_config & MRT_MFC_RP) && !in_nullhost(rt->mfc_rp)) { pim_register_send_rp(ip, vifp, mm, rt); } else { pim_register_send_upcall(ip, vifp, mm, rt); @@ -2723,7 +2520,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, VIF_LOCK_ASSERT(); - if ((vifi >= numvifs) || (viftable[vifi].v_lcl_addr.s_addr == 0)) { + if ((vifi >= numvifs) || in_nullhost(viftable[vifi].v_lcl_addr)) { m_freem(mb_copy); return EADDRNOTAVAIL; /* The iif vif is invalid */ } @@ -2779,7 +2576,7 @@ pim_register_send_rp(struct ip *ip, struct vif *vifp, struct mbuf *mb_copy, } /* - * pim_encapcheck() is called by the encap[46]_input() path at runtime to + * pim_encapcheck() is called by the encap4_input() path at runtime to * determine if a packet is for PIM; allowing PIM to be dynamically loaded * into the kernel. */ @@ -3037,10 +2834,37 @@ pim_input(struct mbuf *m, int off) return; } -/* - * XXX: This is common code for dealing with initialization for both - * the IPv4 and IPv6 multicast forwarding paths. It could do with cleanup. - */ +static int +sysctl_mfctable(SYSCTL_HANDLER_ARGS) +{ + struct mfc *rt; + int error, i; + + if (req->newptr) + return (EPERM); + if (mfchashtbl == NULL) /* XXX unlocked */ + return (0); + error = sysctl_wire_old_buffer(req, 0); + if (error) + return (error); + + MFC_LOCK(); + for (i = 0; i < mfchashsize; i++) { + LIST_FOREACH(rt, &mfchashtbl[i], mfc_hash) { + error = SYSCTL_OUT(req, rt, sizeof(struct mfc)); + if (error) + goto out_locked; + } + } +out_locked: + MFC_UNLOCK(); + return (error); +} + +SYSCTL_NODE(_net_inet_ip, OID_AUTO, mfctable, CTLFLAG_RD, sysctl_mfctable, + "IPv4 Multicast Forwarding Table (struct *mfc[mfchashsize], " + "netinet/ip_mroute.h)"); + static int ip_mroute_modevent(module_t mod, int type, void *unused) { @@ -3051,9 +2875,20 @@ ip_mroute_modevent(module_t mod, int type, void *unused) MROUTER_LOCK_INIT(); MFC_LOCK_INIT(); VIF_LOCK_INIT(); - ip_mrouter_reset(); + + mfchashsize = MFCHASHSIZE; + if (TUNABLE_ULONG_FETCH("net.inet.ip.mfchashsize", &mfchashsize) && + !powerof2(mfchashsize)) { + printf("WARNING: %s not a power of 2; using default\n", + "net.inet.ip.mfchashsize"); + mfchashsize = MFCHASHSIZE; + } + MALLOC(nexpire, u_char *, mfchashsize, M_MRTABLE, M_WAITOK|M_ZERO); + + pim_squelch_wholepkt = 0; TUNABLE_ULONG_FETCH("net.inet.pim.squelch_wholepkt", &pim_squelch_wholepkt); + ip_mrouter_reset(); pim_encap_cookie = encap_attach_func(AF_INET, IPPROTO_PIM, pim_encapcheck, &in_pim_protosw, NULL); @@ -3065,36 +2900,12 @@ ip_mroute_modevent(module_t mod, int type, void *unused) return (EINVAL); } -#ifdef INET6 - pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM, - pim_encapcheck, (struct protosw *)&in6_pim_protosw, NULL); - if (pim6_encap_cookie == NULL) { - printf("ip_mroute: unable to attach pim6 encap\n"); - if (pim_encap_cookie) { - encap_detach(pim_encap_cookie); - pim_encap_cookie = NULL; - } - VIF_LOCK_DESTROY(); - MFC_LOCK_DESTROY(); - MROUTER_LOCK_DESTROY(); - return (EINVAL); - } -#endif - ip_mcast_src = X_ip_mcast_src; ip_mforward = X_ip_mforward; ip_mrouter_done = X_ip_mrouter_done; ip_mrouter_get = X_ip_mrouter_get; ip_mrouter_set = X_ip_mrouter_set; -#ifdef INET6 - ip6_mforward = X_ip6_mforward; - ip6_mrouter_done = X_ip6_mrouter_done; - ip6_mrouter_get = X_ip6_mrouter_get; - ip6_mrouter_set = X_ip6_mrouter_set; - mrt6_ioctl = X_mrt6_ioctl; -#endif - ip_rsvp_force_done = X_ip_rsvp_force_done; ip_rsvp_vif = X_ip_rsvp_vif; @@ -3112,31 +2923,18 @@ ip_mroute_modevent(module_t mod, int type, void *unused) * just loaded and then unloaded w/o starting up a user * process we still need to cleanup. */ - if (V_ip_mrouter -#ifdef INET6 - || ip6_mrouter -#endif - ) - return EINVAL; - -#ifdef INET6 - if (pim6_encap_cookie) { - encap_detach(pim6_encap_cookie); - pim6_encap_cookie = NULL; - } - X_ip6_mrouter_done(); - ip6_mforward = NULL; - ip6_mrouter_done = NULL; - ip6_mrouter_get = NULL; - ip6_mrouter_set = NULL; - mrt6_ioctl = NULL; -#endif + if (V_ip_mrouter != NULL) + return (EINVAL); if (pim_encap_cookie) { encap_detach(pim_encap_cookie); pim_encap_cookie = NULL; } X_ip_mrouter_done(); + + FREE(nexpire, M_MRTABLE); + nexpire = NULL; + ip_mcast_src = NULL; ip_mforward = NULL; ip_mrouter_done = NULL; @@ -3166,4 +2964,5 @@ static moduledata_t ip_mroutemod = { ip_mroute_modevent, 0 }; + DECLARE_MODULE(ip_mroute, ip_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/netinet/ip_mroute.h b/sys/netinet/ip_mroute.h index 4043e4439d9c..a16796535075 100644 --- a/sys/netinet/ip_mroute.h +++ b/sys/netinet/ip_mroute.h @@ -70,9 +70,6 @@ #define MRT_ADD_BW_UPCALL 111 /* create bandwidth monitor */ #define MRT_DEL_BW_UPCALL 112 /* delete bandwidth monitor */ - -#define GET_TIME(t) microtime(&t) - /* * Types and macros for handling bitmaps with one bit per virtual interface. */ @@ -253,8 +250,6 @@ struct sioc_vif_req { struct vif { u_char v_flags; /* VIFF_ flags defined above */ u_char v_threshold; /* min ttl required to forward on vif*/ - u_int v_rate_limit; /* ignored; kept for compatibility */ - struct tbf *v_tbf; /* ignored; kept for compatibility */ struct in_addr v_lcl_addr; /* local interface address */ struct in_addr v_rmt_addr; /* remote address (tunnels only) */ struct ifnet *v_ifp; /* pointer to interface */ @@ -263,16 +258,13 @@ struct vif { u_long v_bytes_in; /* # bytes in on interface */ u_long v_bytes_out; /* # bytes out on interface */ struct route v_route; /* cached route */ - u_int v_rsvp_on; /* RSVP listening on this vif */ - struct socket *v_rsvpd; /* RSVP daemon socket */ }; /* * The kernel's multicast forwarding cache entry structure - * (A field for the type of service (mfc_tos) is to be added - * at a future point) */ struct mfc { + LIST_ENTRY(mfc) mfc_hash; struct in_addr mfc_origin; /* IP origin of mcasts */ struct in_addr mfc_mcastgrp; /* multicast group associated*/ vifi_t mfc_parent; /* incoming vif */ @@ -282,11 +274,11 @@ struct mfc { u_long mfc_wrong_if; /* wrong if for src-grp */ int mfc_expire; /* time to clean entry up */ struct timeval mfc_last_assert; /* last time I sent an assert*/ - struct rtdetq *mfc_stall; /* q of packets awaiting mfc */ - struct mfc *mfc_next; /* next mfc entry */ uint8_t mfc_flags[MAXVIFS]; /* the MRT_MFC_FLAGS_* flags */ struct in_addr mfc_rp; /* the RP address */ struct bw_meter *mfc_bw_meter; /* list of bandwidth meters */ + u_long mfc_nstall; /* # of packets awaiting mfc */ + TAILQ_HEAD(, rtdetq) mfc_stall; /* q of packets awaiting mfc */ }; /* @@ -311,19 +303,11 @@ struct igmpmsg { * Argument structure used for pkt info. while upcall is made */ struct rtdetq { + TAILQ_ENTRY(rtdetq) rte_link; struct mbuf *m; /* A copy of the packet */ struct ifnet *ifp; /* Interface pkt came in on */ vifi_t xmt_vif; /* Saved copy of imo_multicast_vif */ - struct rtdetq *next; /* Next in list of packets */ }; - -#define MFCTBLSIZ 256 -#if (MFCTBLSIZ & (MFCTBLSIZ - 1)) == 0 /* from sys:route.h */ -#define MFCHASHMOD(h) ((h) & (MFCTBLSIZ - 1)) -#else -#define MFCHASHMOD(h) ((h) % MFCTBLSIZ) -#endif - #define MAX_UPQ 4 /* max. no of pkts in upcall Q */ /* diff --git a/sys/netinet6/ip6_mroute.c b/sys/netinet6/ip6_mroute.c index 18e7aca1f296..29201d65d6ae 100644 --- a/sys/netinet6/ip6_mroute.c +++ b/sys/netinet6/ip6_mroute.c @@ -92,6 +92,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -114,6 +115,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -130,20 +132,18 @@ static MALLOC_DEFINE(M_MRTABLE6, "mf6c", "multicast forwarding cache entry"); /* XXX: this is a very common idiom; move to ? */ #define M_HASCL(m) ((m)->m_flags & M_EXT) -static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); -static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); - -static void pim6_init(void); -static int set_pim6(int *); -static int socket_send __P((struct socket *, struct mbuf *, - struct sockaddr_in6 *)); -static int register_send __P((struct ip6_hdr *, struct mif6 *, - struct mbuf *)); +static int ip6_mdq(struct mbuf *, struct ifnet *, struct mf6c *); +static void phyint_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); +static void pim6_init(void); +static int register_send(struct ip6_hdr *, struct mif6 *, struct mbuf *); +static int set_pim6(int *); +static int socket_send(struct socket *, struct mbuf *, + struct sockaddr_in6 *); extern struct domain inet6domain; -/* XXX: referenced from ip_mroute.c for dynamically loading this code. */ -struct ip6protosw in6_pim_protosw = { +static const struct encaptab *pim6_encap_cookie; +static const struct ip6protosw in6_pim_protosw = { .pr_type = SOCK_RAW, .pr_domain = &inet6domain, .pr_protocol = IPPROTO_PIM, @@ -154,6 +154,7 @@ struct ip6protosw in6_pim_protosw = { .pr_init = pim6_init, .pr_usrreqs = &rip6_usrreqs }; +static int pim6_encapcheck(const struct mbuf *, int, int, void *); #ifdef VIMAGE_GLOBALS static int ip6_mrouter_ver; @@ -171,18 +172,48 @@ SYSCTL_STRUCT(_net_inet6_ip6, OID_AUTO, mrt6stat, CTLFLAG_RW, #define NO_RTE_FOUND 0x1 #define RTE_FOUND 0x2 +static struct mtx mrouter6_mtx; +#define MROUTER6_LOCK() mtx_lock(&mrouter6_mtx) +#define MROUTER6_UNLOCK() mtx_unlock(&mrouter6_mtx) +#define MROUTER6_LOCK_ASSERT() do { \ + mtx_assert(&mrouter6_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) +#define MROUTER6_LOCK_INIT() \ + mtx_init(&mrouter6_mtx, "IPv6 multicast forwarding", NULL, MTX_DEF) +#define MROUTER6_LOCK_DESTROY() mtx_destroy(&mrouter6_mtx) + static struct mf6c *mf6ctable[MF6CTBLSIZ]; SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mf6ctable, CTLFLAG_RD, &mf6ctable, sizeof(mf6ctable), "S,*mf6ctable[MF6CTBLSIZ]", - "Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], " + "IPv6 Multicast Forwarding Table (struct *mf6ctable[MF6CTBLSIZ], " "netinet6/ip6_mroute.h)"); +static struct mtx mfc6_mtx; +#define MFC6_LOCK() mtx_lock(&mfc6_mtx) +#define MFC6_UNLOCK() mtx_unlock(&mfc6_mtx) +#define MFC6_LOCK_ASSERT() do { \ + mtx_assert(&mfc6_mtx, MA_OWNED); \ + NET_ASSERT_GIANT(); \ +} while (0) +#define MFC6_LOCK_INIT() \ + mtx_init(&mfc6_mtx, "IPv6 multicast forwarding cache", NULL, MTX_DEF) +#define MFC6_LOCK_DESTROY() mtx_destroy(&mfc6_mtx) + static u_char n6expire[MF6CTBLSIZ]; static struct mif6 mif6table[MAXMIFS]; SYSCTL_OPAQUE(_net_inet6_ip6, OID_AUTO, mif6table, CTLFLAG_RD, - &mif6table, sizeof(mif6table), "S,vif[MAXMIFS]", - "Multicast Interfaces (struct mif[MAXMIFS], netinet6/ip6_mroute.h)"); + &mif6table, sizeof(mif6table), "S,mif6[MAXMIFS]", + "IPv6 Multicast Interfaces (struct mif6[MAXMIFS], netinet6/ip6_mroute.h)"); + +static struct mtx mif6_mtx; +#define MIF6_LOCK() mtx_lock(&mif6_mtx) +#define MIF6_UNLOCK() mtx_unlock(&mif6_mtx) +#define MIF6_LOCK_ASSERT() mtx_assert(&mif6_mtx, MA_OWNED) +#define MIF6_LOCK_INIT() \ + mtx_init(&mif6_mtx, "IPv6 multicast interfaces", NULL, MTX_DEF) +#define MIF6_LOCK_DESTROY() mtx_destroy(&mif6_mtx) #ifdef MRT6DEBUG #ifdef VIMAGE_GLOBALS @@ -200,11 +231,9 @@ static void expire_upcalls(void *); #define EXPIRE_TIMEOUT (hz / 4) /* 4x / second */ #define UPCALL_EXPIRE 6 /* number of timeouts */ -#ifdef INET -#ifdef MROUTING -extern struct socket *ip_mrouter; -#endif -#endif +/* + * XXX TODO: maintain a count to if_allmulti() calls in struct ifnet. + */ /* * 'Interfaces' associated with decapsulator (so we can tell @@ -298,21 +327,22 @@ static u_long upcall_data[UPCALL_MAX + 1]; static void collate(); #endif /* UPCALL_TIMING */ -static int get_sg_cnt(struct sioc_sg_req6 *); -static int get_mif6_cnt(struct sioc_mif_req6 *); static int ip6_mrouter_init(struct socket *, int, int); -static int add_m6if(struct mif6ctl *); -static int del_m6if(mifi_t *); static int add_m6fc(struct mf6cctl *); +static int add_m6if(struct mif6ctl *); static int del_m6fc(struct mf6cctl *); +static int del_m6if(mifi_t *); +static int del_m6if_locked(mifi_t *); +static int get_mif6_cnt(struct sioc_mif_req6 *); +static int get_sg_cnt(struct sioc_sg_req6 *); static struct callout expire_upcalls_ch; -int X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m); +int X_ip6_mforward(struct ip6_hdr *, struct ifnet *, struct mbuf *); int X_ip6_mrouter_done(void); -int X_ip6_mrouter_set(struct socket *so, struct sockopt *sopt); -int X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt); -int X_mrt6_ioctl(int cmd, caddr_t data); +int X_ip6_mrouter_set(struct socket *, struct sockopt *); +int X_ip6_mrouter_get(struct socket *, struct sockopt *); +int X_mrt6_ioctl(int, caddr_t); static void pim6_init(void) @@ -419,14 +449,24 @@ X_ip6_mrouter_get(struct socket *so, struct sockopt *sopt) int X_mrt6_ioctl(int cmd, caddr_t data) { + int ret; + + ret = EINVAL; + switch (cmd) { case SIOCGETSGCNT_IN6: - return (get_sg_cnt((struct sioc_sg_req6 *)data)); + ret = get_sg_cnt((struct sioc_sg_req6 *)data); + break; + case SIOCGETMIFCNT_IN6: - return (get_mif6_cnt((struct sioc_mif_req6 *)data)); + ret = get_mif6_cnt((struct sioc_mif_req6 *)data); + break; + default: - return (EINVAL); + break; } + + return (ret); } /* @@ -436,22 +476,24 @@ static int get_sg_cnt(struct sioc_sg_req6 *req) { struct mf6c *rt; - int s; + int ret; + + ret = 0; + + MFC6_LOCK(); - s = splnet(); MF6CFIND(req->src.sin6_addr, req->grp.sin6_addr, rt); - splx(s); - if (rt != NULL) { + if (rt == NULL) { + ret = ESRCH; + } else { req->pktcnt = rt->mf6c_pkt_cnt; req->bytecnt = rt->mf6c_byte_cnt; req->wrong_if = rt->mf6c_wrong_if; - } else - return (ESRCH); -#if 0 - req->pktcnt = req->bytecnt = req->wrong_if = 0xffffffff; -#endif + } - return (0); + MFC6_UNLOCK(); + + return (ret); } /* @@ -460,17 +502,26 @@ get_sg_cnt(struct sioc_sg_req6 *req) static int get_mif6_cnt(struct sioc_mif_req6 *req) { - mifi_t mifi = req->mifi; + mifi_t mifi; + int ret; - if (mifi >= nummifs) - return (EINVAL); + ret = 0; + mifi = req->mifi; - req->icount = mif6table[mifi].m6_pkt_in; - req->ocount = mif6table[mifi].m6_pkt_out; - req->ibytes = mif6table[mifi].m6_bytes_in; - req->obytes = mif6table[mifi].m6_bytes_out; + MIF6_LOCK(); - return (0); + if (mifi >= nummifs) { + ret = EINVAL; + } else { + req->icount = mif6table[mifi].m6_pkt_in; + req->ocount = mif6table[mifi].m6_pkt_out; + req->ibytes = mif6table[mifi].m6_bytes_in; + req->obytes = mif6table[mifi].m6_bytes_out; + } + + MIF6_UNLOCK(); + + return (ret); } static int @@ -507,8 +558,12 @@ ip6_mrouter_init(struct socket *so, int v, int cmd) if (v != 1) return (ENOPROTOOPT); - if (ip6_mrouter != NULL) + MROUTER6_LOCK(); + + if (ip6_mrouter != NULL) { + MROUTER6_UNLOCK(); return (EADDRINUSE); + } ip6_mrouter = so; V_ip6_mrouter_ver = cmd; @@ -522,6 +577,8 @@ ip6_mrouter_init(struct socket *so, int v, int cmd) callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); + MROUTER6_UNLOCK(); + #ifdef MRT6DEBUG if (V_mrt6debug) log(LOG_DEBUG, "ip6_mrouter_init\n"); @@ -531,7 +588,7 @@ ip6_mrouter_init(struct socket *so, int v, int cmd) } /* - * Disable multicast routing + * Disable IPv6 multicast forwarding. */ int X_ip6_mrouter_done(void) @@ -541,31 +598,22 @@ X_ip6_mrouter_done(void) int i; struct mf6c *rt; struct rtdetq *rte; - int s; - s = splnet(); + MROUTER6_LOCK(); + + if (ip6_mrouter == NULL) { + MROUTER6_UNLOCK(); + return (EINVAL); + } /* * For each phyint in use, disable promiscuous reception of all IPv6 * multicasts. */ -#ifdef INET -#ifdef MROUTING - /* - * If there is still IPv4 multicast routing daemon, - * we remain interfaces to receive all muliticasted packets. - * XXX: there may be an interface in which the IPv4 multicast - * daemon is not interested... - */ - if (!V_ip_mrouter) -#endif -#endif - { - for (mifi = 0; mifi < nummifs; mifi++) { - if (mif6table[mifi].m6_ifp && - !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { - if_allmulti(mif6table[mifi].m6_ifp, 0); - } + for (mifi = 0; mifi < nummifs; mifi++) { + if (mif6table[mifi].m6_ifp && + !(mif6table[mifi].m6_flags & MIFF_REGISTER)) { + if_allmulti(mif6table[mifi].m6_ifp, 0); } } bzero((caddr_t)mif6table, sizeof(mif6table)); @@ -578,6 +626,7 @@ X_ip6_mrouter_done(void) /* * Free all multicast forwarding cache entries. */ + MFC6_LOCK(); for (i = 0; i < MF6CTBLSIZ; i++) { rt = mf6ctable[i]; while (rt) { @@ -595,8 +644,8 @@ X_ip6_mrouter_done(void) free(frt, M_MRTABLE6); } } - bzero((caddr_t)mf6ctable, sizeof(mf6ctable)); + MFC6_UNLOCK(); /* * Reset register interface @@ -611,7 +660,7 @@ X_ip6_mrouter_done(void) ip6_mrouter = NULL; V_ip6_mrouter_ver = 0; - splx(s); + MROUTER6_UNLOCK(); #ifdef MRT6DEBUG if (V_mrt6debug) @@ -632,15 +681,24 @@ add_m6if(struct mif6ctl *mifcp) INIT_VNET_NET(curvnet); struct mif6 *mifp; struct ifnet *ifp; - int error, s; + int error; - if (mifcp->mif6c_mifi >= MAXMIFS) + MIF6_LOCK(); + + if (mifcp->mif6c_mifi >= MAXMIFS) { + MIF6_UNLOCK(); return (EINVAL); + } mifp = mif6table + mifcp->mif6c_mifi; - if (mifp->m6_ifp) + if (mifp->m6_ifp != NULL) { + MIF6_UNLOCK(); return (EADDRINUSE); /* XXX: is it appropriate? */ - if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) + } + if (mifcp->mif6c_pifi == 0 || mifcp->mif6c_pifi > V_if_index) { + MIF6_UNLOCK(); return (ENXIO); + } + ifp = ifnet_byindex(mifcp->mif6c_pifi); if (mifcp->mif6c_flags & MIFF_REGISTER) { @@ -661,21 +719,20 @@ add_m6if(struct mif6ctl *mifcp) } else { ifp = multicast_register_if6; } - - } /* if REGISTER */ - else { + } else { /* Make sure the interface supports multicast */ - if ((ifp->if_flags & IFF_MULTICAST) == 0) + if ((ifp->if_flags & IFF_MULTICAST) == 0) { + MIF6_UNLOCK(); return (EOPNOTSUPP); + } - s = splnet(); error = if_allmulti(ifp, 1); - splx(s); - if (error) + if (error) { + MIF6_UNLOCK(); return (error); + } } - s = splnet(); mifp->m6_flags = mifcp->mif6c_flags; mifp->m6_ifp = ifp; @@ -684,12 +741,14 @@ add_m6if(struct mif6ctl *mifcp) mifp->m6_pkt_out = 0; mifp->m6_bytes_in = 0; mifp->m6_bytes_out = 0; - splx(s); + bzero(&mifp->m6_route, sizeof(mifp->m6_route)); /* Adjust nummifs up if the mifi is higher than nummifs */ if (nummifs <= mifcp->mif6c_mifi) nummifs = mifcp->mif6c_mifi + 1; + MIF6_UNLOCK(); + #ifdef MRT6DEBUG if (V_mrt6debug) log(LOG_DEBUG, @@ -705,27 +764,22 @@ add_m6if(struct mif6ctl *mifcp) * Delete a mif from the mif table */ static int -del_m6if(mifi_t *mifip) +del_m6if_locked(mifi_t *mifip) { struct mif6 *mifp = mif6table + *mifip; mifi_t mifi; struct ifnet *ifp; - int s; + + MIF6_LOCK_ASSERT(); if (*mifip >= nummifs) return (EINVAL); if (mifp->m6_ifp == NULL) return (EINVAL); - s = splnet(); - if (!(mifp->m6_flags & MIFF_REGISTER)) { - /* - * XXX: what if there is yet IPv4 multicast daemon - * using the interface? - */ + /* XXX: TODO: Maintain an ALLMULTI refcount in struct ifnet. */ ifp = mifp->m6_ifp; - if_allmulti(ifp, 0); } else { if (reg_mif_num != (mifi_t)-1 && @@ -745,8 +799,6 @@ del_m6if(mifi_t *mifip) break; nummifs = mifi; - splx(s); - #ifdef MRT6DEBUG if (V_mrt6debug) log(LOG_DEBUG, "del_m6if %d, nummifs %d\n", *mifip, nummifs); @@ -755,6 +807,18 @@ del_m6if(mifi_t *mifip) return (0); } +static int +del_m6if(mifi_t *mifip) +{ + int cc; + + MIF6_LOCK(); + cc = del_m6if_locked(mifip); + MIF6_UNLOCK(); + + return (cc); +} + /* * Add an mfc entry */ @@ -765,9 +829,10 @@ add_m6fc(struct mf6cctl *mfccp) u_long hash; struct rtdetq *rte; u_short nstl; - int s; char ip6bufo[INET6_ADDRSTRLEN], ip6bufg[INET6_ADDRSTRLEN]; + MFC6_LOCK(); + MF6CFIND(mfccp->mf6cc_origin.sin6_addr, mfccp->mf6cc_mcastgrp.sin6_addr, rt); @@ -783,17 +848,16 @@ add_m6fc(struct mf6cctl *mfccp) } #endif - s = splnet(); rt->mf6c_parent = mfccp->mf6cc_parent; rt->mf6c_ifset = mfccp->mf6cc_ifset; - splx(s); + + MFC6_UNLOCK(); return (0); } /* * Find the entry for which the upcall was made and update */ - s = splnet(); hash = MF6CHASH(mfccp->mf6cc_origin.sin6_addr, mfccp->mf6cc_mcastgrp.sin6_addr); for (rt = mf6ctable[hash], nstl = 0; rt; rt = rt->mf6c_next) { @@ -891,7 +955,7 @@ add_m6fc(struct mf6cctl *mfccp) rt = (struct mf6c *)malloc(sizeof(*rt), M_MRTABLE6, M_NOWAIT); if (rt == NULL) { - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } @@ -912,7 +976,8 @@ add_m6fc(struct mf6cctl *mfccp) mf6ctable[hash] = rt; } } - splx(s); + + MFC6_UNLOCK(); return (0); } @@ -953,7 +1018,6 @@ del_m6fc(struct mf6cctl *mfccp) struct mf6c *rt; struct mf6c **nptr; u_long hash; - int s; origin = mfccp->mf6cc_origin; mcastgrp = mfccp->mf6cc_mcastgrp; @@ -968,7 +1032,7 @@ del_m6fc(struct mf6cctl *mfccp) } #endif - s = splnet(); + MFC6_LOCK(); nptr = &mf6ctable[hash]; while ((rt = *nptr) != NULL) { @@ -982,14 +1046,14 @@ del_m6fc(struct mf6cctl *mfccp) nptr = &rt->mf6c_next; } if (rt == NULL) { - splx(s); + MFC6_UNLOCK(); return (EADDRNOTAVAIL); } *nptr = rt->mf6c_next; free(rt, M_MRTABLE6); - splx(s); + MFC6_UNLOCK(); return (0); } @@ -1035,7 +1099,6 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) struct mf6c *rt; struct mif6 *mifp; struct mbuf *mm; - int s; mifi_t mifi; char ip6bufs[INET6_ADDRSTRLEN], ip6bufd[INET6_ADDRSTRLEN]; @@ -1078,15 +1141,16 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) return (0); } + MFC6_LOCK(); + /* * Determine forwarding mifs from the forwarding cache table */ - s = splnet(); MF6CFIND(ip6->ip6_src, ip6->ip6_dst, rt); /* Entry exists, so forward if necessary */ if (rt) { - splx(s); + MFC6_UNLOCK(); return (ip6_mdq(m, ifp, rt)); } else { /* @@ -1120,7 +1184,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) rte = (struct rtdetq *)malloc(sizeof(*rte), M_MRTABLE6, M_NOWAIT); if (rte == NULL) { - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } mb0 = m_copy(m, 0, M_COPYALL); @@ -1133,7 +1197,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) mb0 = m_pullup(mb0, sizeof(struct ip6_hdr)); if (mb0 == NULL) { free(rte, M_MRTABLE6); - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } @@ -1160,7 +1224,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) if (rt == NULL) { free(rte, M_MRTABLE6); m_freem(mb0); - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } /* @@ -1173,7 +1237,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) free(rte, M_MRTABLE6); m_freem(mb0); free(rt, M_MRTABLE6); - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } @@ -1203,7 +1267,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) free(rte, M_MRTABLE6); m_freem(mb0); free(rt, M_MRTABLE6); - splx(s); + MFC6_UNLOCK(); return (EINVAL); } @@ -1236,7 +1300,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) free(rte, M_MRTABLE6); m_freem(mb0); free(rt, M_MRTABLE6); - splx(s); + MFC6_UNLOCK(); return (ENOBUFS); } @@ -1269,7 +1333,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) mrt6stat.mrt6s_upq_ovflw++; free(rte, M_MRTABLE6); m_freem(mb0); - splx(s); + MFC6_UNLOCK(); return (0); } @@ -1284,7 +1348,7 @@ X_ip6_mforward(struct ip6_hdr *ip6, struct ifnet *ifp, struct mbuf *m) rte->t = tp; #endif /* UPCALL_TIMING */ - splx(s); + MFC6_UNLOCK(); return (0); } @@ -1300,9 +1364,8 @@ expire_upcalls(void *unused) struct rtdetq *rte; struct mf6c *mfc, **nptr; int i; - int s; - s = splnet(); + MFC6_LOCK(); for (i = 0; i < MF6CTBLSIZ; i++) { if (n6expire[i] == 0) continue; @@ -1346,7 +1409,7 @@ expire_upcalls(void *unused) } } } - splx(s); + MFC6_UNLOCK(); callout_reset(&expire_upcalls_ch, EXPIRE_TIMEOUT, expire_upcalls, NULL); } @@ -1540,8 +1603,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) struct mbuf *mb_copy; struct ifnet *ifp = mifp->m6_ifp; int error = 0; - int s = splnet(); /* needs to protect static "ro" below. */ - static struct route_in6 ro; struct in6_multi *in6m; struct sockaddr_in6 *dst6; u_long linkmtu; @@ -1556,7 +1617,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) (M_HASCL(mb_copy) || mb_copy->m_len < sizeof(struct ip6_hdr))) mb_copy = m_pullup(mb_copy, sizeof(struct ip6_hdr)); if (mb_copy == NULL) { - splx(s); return; } /* set MCAST flag to the outgoing packet */ @@ -1576,7 +1636,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) /* XXX: ip6_output will override ip6->ip6_hlim */ im6o.im6o_multicast_hlim = ip6->ip6_hlim; im6o.im6o_multicast_loop = 1; - error = ip6_output(mb_copy, NULL, &ro, + error = ip6_output(mb_copy, NULL, &mifp->m6_route, IPV6_FORWARDING, &im6o, NULL, NULL); #ifdef MRT6DEBUG @@ -1584,7 +1644,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) log(LOG_DEBUG, "phyint_send on mif %d err %d\n", mifp - mif6table, error); #endif - splx(s); return; } @@ -1592,13 +1651,13 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) * If we belong to the destination multicast group * on the outgoing interface, loop back a copy. */ - dst6 = (struct sockaddr_in6 *)&ro.ro_dst; + dst6 = &mifp->m6_route.ro_dst; IN6_LOOKUP_MULTI(ip6->ip6_dst, ifp, in6m); if (in6m != NULL) { dst6->sin6_len = sizeof(struct sockaddr_in6); dst6->sin6_family = AF_INET6; dst6->sin6_addr = ip6->ip6_dst; - ip6_mloopback(ifp, m, (struct sockaddr_in6 *)&ro.ro_dst); + ip6_mloopback(ifp, m, &mifp->m6_route.ro_dst); } /* * Put the packet into the sending queue of the outgoing interface @@ -1614,7 +1673,7 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) * we need no ND for a multicast forwarded packet...right? */ error = (*ifp->if_output)(ifp, mb_copy, - (struct sockaddr *)&ro.ro_dst, NULL); + (struct sockaddr *)&mifp->m6_route.ro_dst, NULL); #ifdef MRT6DEBUG if (V_mrt6debug & DEBUG_XMIT) log(LOG_DEBUG, "phyint_send on mif %d err %d\n", @@ -1645,8 +1704,6 @@ phyint_send(struct ip6_hdr *ip6, struct mif6 *mifp, struct mbuf *m) m_freem(mb_copy); /* simply discard the packet */ } } - - splx(s); } static int @@ -1714,6 +1771,24 @@ register_send(struct ip6_hdr *ip6, struct mif6 *mif, struct mbuf *m) return (0); } +/* + * pim6_encapcheck() is called by the encap6_input() path at runtime to + * determine if a packet is for PIM; allowing PIM to be dynamically loaded + * into the kernel. + */ +static int +pim6_encapcheck(const struct mbuf *m, int off, int proto, void *arg) +{ + +#ifdef DIAGNOSTIC + KASSERT(proto == IPPROTO_PIM, ("not for IPPROTO_PIM")); +#endif + if (proto != IPPROTO_PIM) + return 0; /* not for us; reject the datagram. */ + + return 64; /* claim the datagram. */ +} + /* * PIM sparse mode hook * Receives the pim control messages, and passes them up to the listening @@ -1951,3 +2026,66 @@ pim6_input(struct mbuf **mp, int *offp, int proto) rip6_input(&m, offp, proto); return (IPPROTO_DONE); } + +static int +ip6_mroute_modevent(module_t mod, int type, void *unused) +{ + + switch (type) { + case MOD_LOAD: + MROUTER6_LOCK_INIT(); + MFC6_LOCK_INIT(); + MIF6_LOCK_INIT(); + + pim6_encap_cookie = encap_attach_func(AF_INET6, IPPROTO_PIM, + pim6_encapcheck, + (const struct protosw *)&in6_pim_protosw, NULL); + if (pim6_encap_cookie == NULL) { + printf("ip6_mroute: unable to attach pim6 encap\n"); + MIF6_LOCK_DESTROY(); + MFC6_LOCK_DESTROY(); + MROUTER6_LOCK_DESTROY(); + return (EINVAL); + } + + ip6_mforward = X_ip6_mforward; + ip6_mrouter_done = X_ip6_mrouter_done; + ip6_mrouter_get = X_ip6_mrouter_get; + ip6_mrouter_set = X_ip6_mrouter_set; + mrt6_ioctl = X_mrt6_ioctl; + break; + + case MOD_UNLOAD: + if (ip6_mrouter != NULL) + return EINVAL; + + if (pim6_encap_cookie) { + encap_detach(pim6_encap_cookie); + pim6_encap_cookie = NULL; + } + X_ip6_mrouter_done(); + ip6_mforward = NULL; + ip6_mrouter_done = NULL; + ip6_mrouter_get = NULL; + ip6_mrouter_set = NULL; + mrt6_ioctl = NULL; + + MIF6_LOCK_DESTROY(); + MFC6_LOCK_DESTROY(); + MROUTER6_LOCK_DESTROY(); + break; + + default: + return (EOPNOTSUPP); + } + + return (0); +} + +static moduledata_t ip6_mroutemod = { + "ip6_mroute", + ip6_mroute_modevent, + 0 +}; + +DECLARE_MODULE(ip6_mroute, ip6_mroutemod, SI_SUB_PSEUDO, SI_ORDER_ANY); diff --git a/sys/netinet6/ip6_mroute.h b/sys/netinet6/ip6_mroute.h index 0e35e629498e..a0a5e4ad0874 100644 --- a/sys/netinet6/ip6_mroute.h +++ b/sys/netinet6/ip6_mroute.h @@ -212,7 +212,7 @@ struct mif6 { u_quad_t m6_pkt_out; /* # pkts out on interface */ u_quad_t m6_bytes_in; /* # bytes in on interface */ u_quad_t m6_bytes_out; /* # bytes out on interface */ - struct route_in6 m6_route;/* cached route if this is a tunnel */ + struct route_in6 m6_route; /* cached route */ #ifdef notyet u_int m6_rsvp_on; /* RSVP listening on this vif */ struct socket *m6_rsvpd; /* RSVP daemon socket */ diff --git a/usr.bin/netstat/main.c b/usr.bin/netstat/main.c index 6eb313a68699..8b25ff520b5e 100644 --- a/usr.bin/netstat/main.c +++ b/usr.bin/netstat/main.c @@ -82,8 +82,8 @@ static struct nlist nl[] = { { .n_name = "_rt_tables"}, #define N_MRTSTAT 3 { .n_name = "_mrtstat" }, -#define N_MFCTABLE 4 - { .n_name = "_mfctable" }, +#define N_MFCHASHTBL 4 + { .n_name = "_mfchashtbl" }, #define N_VIFTABLE 5 { .n_name = "_viftable" }, #define N_IPX 6 @@ -182,6 +182,8 @@ static struct nlist nl[] = { { .n_name = "_rip6stat" }, #define N_SCTPSTAT 53 { .n_name = "_sctpstat" }, +#define N_MFCTABLESIZE 54 + { .n_name = "_mfctablesize" }, { .n_name = NULL }, }; @@ -550,7 +552,8 @@ main(int argc, char *argv[]) #endif } else { if (af == AF_INET || af == AF_UNSPEC) - mroutepr(nl[N_MFCTABLE].n_value, + mroutepr(nl[N_MFCHASHTBL].n_value, + nl[N_MFCTABLESIZE].n_value, nl[N_VIFTABLE].n_value); #ifdef INET6 if (af == AF_INET6 || af == AF_UNSPEC) diff --git a/usr.bin/netstat/mroute.c b/usr.bin/netstat/mroute.c index dbce3181f966..8009c30c78a5 100644 --- a/usr.bin/netstat/mroute.c +++ b/usr.bin/netstat/mroute.c @@ -67,124 +67,9 @@ __FBSDID("$FreeBSD$"); #include #include "netstat.h" -static void print_bw_meter(struct bw_meter *bw_meter, int *banner_printed); -void -mroutepr(u_long mfcaddr, u_long vifaddr) -{ - struct mfc *mfctable[MFCTBLSIZ]; - struct vif viftable[MAXVIFS]; - struct mfc mfc, *m; - struct vif *v; - vifi_t vifi; - int i; - int banner_printed; - int saved_numeric_addr; - vifi_t maxvif = 0; - size_t len; - - len = sizeof(mfctable); - if (live) { - if (sysctlbyname("net.inet.ip.mfctable", mfctable, &len, NULL, - 0) < 0) { - warn("sysctl: net.inet.ip.mfctable"); - return; - } - } else - kread(mfcaddr, (char *)mfctable, sizeof(mfctable)); - - len = sizeof(viftable); - if (live) { - if (sysctlbyname("net.inet.ip.viftable", viftable, &len, NULL, - 0) < 0) { - warn("sysctl: net.inet.ip.viftable"); - return; - } - } else - kread(vifaddr, (char *)viftable, sizeof(viftable)); - - saved_numeric_addr = numeric_addr; - numeric_addr = 1; - - banner_printed = 0; - for (vifi = 0, v = viftable; vifi < MAXVIFS; ++vifi, ++v) { - if (v->v_lcl_addr.s_addr == 0) - continue; - - maxvif = vifi; - if (!banner_printed) { - printf("\nIPv4 Virtual Interface Table\n" - " Vif Thresh Rate Local-Address " - "Remote-Address Pkts-In Pkts-Out\n"); - banner_printed = 1; - } - - printf(" %2u %6u %4d %-15.15s", - /* opposite math of add_vif() */ - vifi, v->v_threshold, v->v_rate_limit * 1000 / 1024, - routename(v->v_lcl_addr.s_addr)); - printf(" %-15.15s", (v->v_flags & VIFF_TUNNEL) ? - routename(v->v_rmt_addr.s_addr) : ""); - - printf(" %9lu %9lu\n", v->v_pkt_in, v->v_pkt_out); - } - if (!banner_printed) - printf("\nIPv4 Virtual Interface Table is empty\n"); - - banner_printed = 0; - for (i = 0; i < MFCTBLSIZ; ++i) { - m = mfctable[i]; - while(m) { - /* XXX KVM */ - kread((u_long)m, (char *)&mfc, sizeof mfc); - - if (!banner_printed) { - printf("\nIPv4 Multicast Forwarding Table\n" - " Origin Group " - " Packets In-Vif Out-Vifs:Ttls\n"); - banner_printed = 1; - } - - printf(" %-15.15s", routename(mfc.mfc_origin.s_addr)); - printf(" %-15.15s", routename(mfc.mfc_mcastgrp.s_addr)); - printf(" %9lu", mfc.mfc_pkt_cnt); - printf(" %3d ", mfc.mfc_parent); - for (vifi = 0; vifi <= maxvif; vifi++) { - if (mfc.mfc_ttls[vifi] > 0) - printf(" %u:%u", vifi, - mfc.mfc_ttls[vifi]); - } - printf("\n"); - - /* Print the bw meter information */ - { - struct bw_meter bw_meter, *bwm; - int banner_printed2 = 0; - - bwm = mfc.mfc_bw_meter; - while (bwm) { - /* XXX KVM */ - kread((u_long)bwm, (char *)&bw_meter, - sizeof bw_meter); - print_bw_meter(&bw_meter, - &banner_printed2); - bwm = bw_meter.bm_mfc_next; - } -#if 0 /* Don't ever print it? */ - if (! banner_printed2) - printf("\n No Bandwidth Meters\n"); -#endif - } - - m = mfc.mfc_next; - } - } - if (!banner_printed) - printf("\nIPv4 Multicast Forwarding Table is empty\n"); - - printf("\n"); - numeric_addr = saved_numeric_addr; -} +static void print_bw_meter(struct bw_meter *, int *); +static void print_mfc(struct mfc *, int, int *); static void print_bw_meter(struct bw_meter *bw_meter, int *banner_printed) @@ -262,6 +147,193 @@ print_bw_meter(struct bw_meter *bw_meter, int *banner_printed) printf("\n"); } +static void +print_mfc(struct mfc *m, int maxvif, int *banner_printed) +{ + struct bw_meter bw_meter, *bwm; + int bw_banner_printed; + int error; + vifi_t vifi; + + bw_banner_printed = 0; + + if (! *banner_printed) { + printf("\nIPv4 Multicast Forwarding Table\n" + " Origin Group " + " Packets In-Vif Out-Vifs:Ttls\n"); + *banner_printed = 1; + } + + printf(" %-15.15s", routename(m->mfc_origin.s_addr)); + printf(" %-15.15s", routename(m->mfc_mcastgrp.s_addr)); + printf(" %9lu", m->mfc_pkt_cnt); + printf(" %3d ", m->mfc_parent); + for (vifi = 0; vifi <= maxvif; vifi++) { + if (m->mfc_ttls[vifi] > 0) + printf(" %u:%u", vifi, m->mfc_ttls[vifi]); + } + printf("\n"); + + /* + * XXX We break the rules and try to use KVM to read the + * bandwidth meters, they are not retrievable via sysctl yet. + */ + bwm = m->mfc_bw_meter; + while (bwm != NULL) { + error = kread((u_long)bwm, (char *)&bw_meter, + sizeof(bw_meter)); + if (error) + break; + print_bw_meter(&bw_meter, &bw_banner_printed); + bwm = bw_meter.bm_mfc_next; + } +} + +void +mroutepr(u_long pmfchashtbl, u_long pmfctablesize, u_long pviftbl) +{ + struct vif viftable[MAXVIFS]; + struct vif *v; + struct mfc *m; + int banner_printed; + int saved_numeric_addr; + size_t len; + vifi_t vifi, maxvif; + + saved_numeric_addr = numeric_addr; + numeric_addr = 1; + + /* + * TODO: + * The VIF table will move to hanging off the struct if_info for + * each IPv4 configured interface. Currently it is statically + * allocated, and retrieved either using KVM or an opaque SYSCTL. + * + * This can't happen until the API documented in multicast(4) + * is itself refactored. The historical reason why VIFs use + * a separate ifindex space is entirely due to the legacy + * capability of the MROUTING code to create IPIP tunnels on + * the fly to support DVMRP. When gif(4) became available, this + * functionality was deprecated, as PIM does not use it. + */ + maxvif = 0; + + len = sizeof(viftable); + if (live) { + if (sysctlbyname("net.inet.ip.viftable", viftable, &len, NULL, + 0) < 0) { + warn("sysctl: net.inet.ip.viftable"); + return; + } + } else + kread(pviftbl, (char *)viftable, sizeof(viftable)); + + banner_printed = 0; + for (vifi = 0, v = viftable; vifi < MAXVIFS; ++vifi, ++v) { + if (v->v_lcl_addr.s_addr == 0) + continue; + + maxvif = vifi; + if (!banner_printed) { + printf("\nIPv4 Virtual Interface Table\n" + " Vif Thresh Local-Address " + "Remote-Address Pkts-In Pkts-Out\n"); + banner_printed = 1; + } + + printf(" %2u %6u %-15.15s", + /* opposite math of add_vif() */ + vifi, v->v_threshold, + routename(v->v_lcl_addr.s_addr)); + printf(" %-15.15s", (v->v_flags & VIFF_TUNNEL) ? + routename(v->v_rmt_addr.s_addr) : ""); + + printf(" %9lu %9lu\n", v->v_pkt_in, v->v_pkt_out); + } + if (!banner_printed) + printf("\nIPv4 Virtual Interface Table is empty\n"); + + banner_printed = 0; + + /* + * TODO: + * The MFC table will move into the AF_INET radix trie in future. + * In 8.x, it becomes a dynamically allocated structure referenced + * by a hashed LIST, allowing more than 256 entries w/o kernel tuning. + * + * If retrieved via opaque SYSCTL, the kernel will coalesce it into + * a static table for us. + * If retrieved via KVM, the hash list pointers must be followed. + */ + if (live) { + struct mfc *mfctable; + + len = 0; + if (sysctlbyname("net.inet.ip.mfctable", NULL, &len, NULL, + 0) < 0) { + warn("sysctl: net.inet.ip.mfctable"); + return; + } + + mfctable = malloc(len); + if (mfctable == NULL) { + warnx("malloc %lu bytes", (u_long)len); + return; + } + if (sysctlbyname("net.inet.ip.mfctable", mfctable, &len, NULL, + 0) < 0) { + free(mfctable); + warn("sysctl: net.inet.ip.mfctable"); + return; + } + + m = mfctable; + while (len >= sizeof(*m)) { + print_mfc(m++, maxvif, &banner_printed); + len -= sizeof(*m); + } + if (len != 0) + warnx("print_mfc: %d trailing bytes", len); + + free(mfctable); + } else { + LIST_HEAD(, mfc) *mfchashtbl; + u_long i, mfctablesize; + struct mfc mfc; + int error; + + error = kread(pmfctablesize, (char *)&mfctablesize, + sizeof(u_long)); + if (error) { + warn("kread: mfctablesize"); + return; + } + + len = sizeof(*mfchashtbl) * mfctablesize; + mfchashtbl = malloc(len); + if (mfchashtbl == NULL) { + warnx("malloc %lu bytes", (u_long)len); + return; + } + kread(pmfchashtbl, (char *)&mfchashtbl, len); + + for (i = 0; i < mfctablesize; i++) { + LIST_FOREACH(m, &mfchashtbl[i], mfc_hash) { + kread((u_long)m, (char *)&mfc, sizeof(mfc)); + print_mfc(m, maxvif, &banner_printed); + } + } + + free(mfchashtbl); + } + + if (!banner_printed) + printf("\nIPv4 Multicast Forwarding Table is empty\n"); + + printf("\n"); + numeric_addr = saved_numeric_addr; +} + void mrt_stats(u_long mstaddr) { diff --git a/usr.bin/netstat/netstat.h b/usr.bin/netstat/netstat.h index da76ff163b57..483bd6c43ef7 100644 --- a/usr.bin/netstat/netstat.h +++ b/usr.bin/netstat/netstat.h @@ -160,6 +160,6 @@ void tp_protopr(u_long, const char *, int, int); void tp_inproto(u_long); void tp_stats(caddr_t, caddr_t); -void mroutepr(u_long, u_long); +void mroutepr(u_long, u_long, u_long); void mrt_stats(u_long); void bpf_stats(char *);