diff --git a/sys/fs/nfsclient/nfs_clvfsops.c b/sys/fs/nfsclient/nfs_clvfsops.c index d68171e7ad2b..5490155da68c 100644 --- a/sys/fs/nfsclient/nfs_clvfsops.c +++ b/sys/fs/nfsclient/nfs_clvfsops.c @@ -465,18 +465,21 @@ nfs_mountroot(struct mount *mp) if (nd->mygateway.sin_len != 0 && nd->mygateway.sin_addr.s_addr != 0) { struct sockaddr_in mask, sin; + struct epoch_tracker et; bzero((caddr_t)&mask, sizeof(mask)); sin = mask; sin.sin_family = AF_INET; sin.sin_len = sizeof(sin); /* XXX MRT use table 0 for this sort of thing */ + NET_EPOCH_ENTER(et); CURVNET_SET(TD_TO_VNET(td)); error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin, (struct sockaddr *)&nd->mygateway, (struct sockaddr *)&mask, RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB); CURVNET_RESTORE(); + NET_EPOCH_EXIT(et); if (error) panic("nfs_mountroot: RTM_ADD: %d", error); } diff --git a/sys/net/if.c b/sys/net/if.c index 2a80a0bc46e6..bccbba268b56 100644 --- a/sys/net/if.c +++ b/sys/net/if.c @@ -1854,18 +1854,17 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, ifp = ifa->ifa_ifp; + NET_EPOCH_ENTER(et); bzero(&info, sizeof(info)); if (cmd != RTM_DELETE) info.rti_ifp = V_loif; if (cmd == RTM_ADD) { /* explicitly specify (loopback) ifa */ if (info.rti_ifp != NULL) { - NET_EPOCH_ENTER(et); rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp); if (rti_ifa != NULL) ifa_ref(rti_ifa); info.rti_ifa = rti_ifa; - NET_EPOCH_EXIT(et); } } info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED; @@ -1874,6 +1873,7 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa, link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type); error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib); + NET_EPOCH_EXIT(et); if (rti_ifa != NULL) ifa_free(rti_ifa); diff --git a/sys/net/route.c b/sys/net/route.c index 1b966b691bd5..c21f3de66a57 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -120,9 +120,6 @@ VNET_PCPUSTAT_SYSUNINIT(rtstat); VNET_DEFINE(struct rib_head *, rt_tables); #define V_rt_tables VNET(rt_tables) -VNET_DEFINE(int, rttrash); /* routes not in table but not freed */ -#define V_rttrash VNET(rttrash) - /* * Convert a 'struct radix_node *' to a 'struct rtentry *'. @@ -148,6 +145,7 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *, static struct rtentry *rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror); static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info); +static void destroy_rtentry_epoch(epoch_context_t ctx); #ifdef RADIX_MPATH static struct radix_node *rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry *rto, int *perror); @@ -332,6 +330,16 @@ vnet_route_uninit(const void *unused __unused) } } + /* + * dom_rtdetach calls rt_table_destroy(), which + * schedules deletion for all rtentries, nexthops and control + * structures. Wait for the destruction callbacks to fire. + * Note that this should result in freeing all rtentries, but + * nexthops deletions will be scheduled for the next epoch run + * and will be completed after vnet teardown. + */ + epoch_drain_callbacks(net_epoch_preempt); + free(V_rt_tables, M_RTABLE); uma_zdestroy(V_rtzone); } @@ -449,41 +457,54 @@ rtfree(struct rtentry *rt) if ((rt->rt_flags & RTF_UP) == 0) { if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT)) panic("rtfree 2"); - /* - * the rtentry must have been removed from the routing table - * so it is represented in rttrash.. remove that now. - */ - V_rttrash--; #ifdef DIAGNOSTIC if (rt->rt_refcnt < 0) { printf("rtfree: %p not freed (neg refs)\n", rt); goto done; } #endif - - /* Unreference nexthop */ - nhop_free(rt->rt_nhop); + epoch_call(net_epoch_preempt, destroy_rtentry_epoch, + &rt->rt_epoch_ctx); /* - * and the rtentry itself of course + * FALLTHROUGH to RT_UNLOCK() so the reporting functions + * have consistent behaviour of operating on unlocked entry. */ - uma_zfree(V_rtzone, rt); - return; } done: RT_UNLOCK(rt); } -/* - * Temporary RTFREE() function wrapper. - * Intended to use in control plane code to - * avoid exposing internal layout of 'struct rtentry'. - */ -void -rtfree_func(struct rtentry *rt) +static void +destroy_rtentry(struct rtentry *rt) { - RTFREE(rt); + /* + * At this moment rnh, nh_control may be already freed. + * nhop interface may have been migrated to a different vnet. + * Use vnet stored in the nexthop to delete the entry. + */ + CURVNET_SET(nhop_get_vnet(rt->rt_nhop)); + + /* Unreference nexthop */ + nhop_free(rt->rt_nhop); + + uma_zfree(V_rtzone, rt); + + CURVNET_RESTORE(); +} + +/* + * Epoch callback indicating rtentry is safe to destroy + */ +static void +destroy_rtentry_epoch(epoch_context_t ctx) +{ + struct rtentry *rt; + + rt = __containerof(ctx, struct rtentry, rt_epoch_ctx); + + destroy_rtentry(rt); } /* @@ -546,7 +567,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, RT_LOCK(rt); flags = rt->rt_flags; - RTFREE_LOCKED(rt); + RT_UNLOCK(rt); RTSTAT_INC(rts_dynamic); @@ -1112,13 +1133,6 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info) ifa = rt->rt_nhop->nh_ifa; if (ifa != NULL && ifa->ifa_rtrequest != NULL) ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info); - - /* - * One more rtentry floating around that is not - * linked to the routing table. rttrash will be decremented - * when RTFREE(rt) is eventually called. - */ - V_rttrash++; } @@ -1386,6 +1400,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt, KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum")); KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked")); + NET_EPOCH_ASSERT(); dst = info->rti_info[RTAX_DST]; @@ -1580,13 +1595,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info); /* - * actually return a resultant rtentry and - * give the caller a single reference. + * actually return a resultant rtentry */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_ADDREF(rt); - } rnh->rnh_gen++; /* Routing table updated */ RT_UNLOCK(rt); @@ -1622,15 +1634,13 @@ del_route(struct rib_head *rnh, struct rt_addrinfo *info, /* * If the caller wants it, then it can have it, - * but it's up to it to free the rtentry as we won't be - * doing it. + * the entry will be deleted after the end of the current epoch. */ - if (ret_nrt) { + if (ret_nrt) *ret_nrt = rt; - RT_UNLOCK(rt); - } else - RTFREE_LOCKED(rt); - + + RTFREE_LOCKED(rt); + return (0); } @@ -1736,10 +1746,8 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest) nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info); - if (ret_nrt != NULL) { + if (ret_nrt != NULL) *ret_nrt = rt; - RT_ADDREF(rt); - } RT_UNLOCK(rt); @@ -1757,7 +1765,6 @@ static int change_route(struct rib_head *rnh, struct rt_addrinfo *info, struct rtentry **ret_nrt) { - struct epoch_tracker et; int error; /* Check if updated gateway exists */ @@ -1765,8 +1772,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info, (info->rti_info[RTAX_GATEWAY] == NULL)) return (EINVAL); - NET_EPOCH_ENTER(et); - /* * route change is done in multiple steps, with dropping and * reacquiring lock. In the situations with multiple processes @@ -1779,7 +1784,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info, if (error != EAGAIN) break; } - NET_EPOCH_EXIT(et); return (error); } @@ -1825,6 +1829,7 @@ static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { RIB_RLOCK_TRACKER; + struct epoch_tracker et; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; @@ -1957,38 +1962,18 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) else info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr; info.rti_info[RTAX_NETMASK] = netmask; + NET_EPOCH_ENTER(et); error = rtrequest1_fib(cmd, &info, &rt, fibnum); if (error == 0 && rt != NULL) { /* * notify any listening routing agents of the change */ - RT_LOCK(rt); /* TODO: interface routes/aliases */ - RT_ADDREF(rt); - RT_UNLOCK(rt); rt_newaddrmsg_fib(cmd, ifa, rt, fibnum); - RT_LOCK(rt); - RT_REMREF(rt); - if (cmd == RTM_DELETE) { - /* - * If we are deleting, and we found an entry, - * then it's been removed from the tree.. - * now throw it away. - */ - RTFREE_LOCKED(rt); - } else { - if (cmd == RTM_ADD) { - /* - * We just wanted to add it.. - * we don't actually need a reference. - */ - RT_REMREF(rt); - } - RT_UNLOCK(rt); - } didwork = 1; } + NET_EPOCH_EXIT(et); if (error) a_failure = error; } diff --git a/sys/net/route.h b/sys/net/route.h index 4ce3310139ab..355a84407f9b 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -332,8 +332,6 @@ struct rt_addrinfo { #define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \ || (ifp)->if_link_state == LINK_STATE_UP) -#define RTFREE_FUNC(_rt) rtfree_func(_rt) - #define RO_NHFREE(_ro) do { \ if ((_ro)->ro_nh) { \ NH_FREE((_ro)->ro_nh); \ diff --git a/sys/net/route/nhop.h b/sys/net/route/nhop.h index d0304f37f6dc..a3ee003cdf7d 100644 --- a/sys/net/route/nhop.h +++ b/sys/net/route/nhop.h @@ -176,6 +176,7 @@ struct rib_head; uint32_t nhop_get_idx(const struct nhop_object *nh); enum nhop_type nhop_get_type(const struct nhop_object *nh); int nhop_get_rtflags(const struct nhop_object *nh); +struct vnet *nhop_get_vnet(const struct nhop_object *nh); #endif /* _KERNEL */ diff --git a/sys/net/route/nhop_ctl.c b/sys/net/route/nhop_ctl.c index 45e7b4cbb1a8..464d4cb05eab 100644 --- a/sys/net/route/nhop_ctl.c +++ b/sys/net/route/nhop_ctl.c @@ -500,6 +500,9 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info, return (ENOMEM); } + /* Save vnet to ease destruction */ + nh_priv->nh_vnet = curvnet; + /* Reference external objects and calculate (referenced) ifa */ if_ref(nh->nh_ifp); ifa_ref(nh->nh_ifa); @@ -698,6 +701,13 @@ nhop_set_rtflags(struct nhop_object *nh, int rt_flags) nh->nh_priv->rt_flags = rt_flags; } +struct vnet * +nhop_get_vnet(const struct nhop_object *nh) +{ + + return (nh->nh_priv->nh_vnet); +} + void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu) { diff --git a/sys/net/route/nhop_var.h b/sys/net/route/nhop_var.h index 4bf26ff54269..aac2829e186f 100644 --- a/sys/net/route/nhop_var.h +++ b/sys/net/route/nhop_var.h @@ -78,6 +78,7 @@ struct nhop_priv { struct nhop_object *nh; /* backreference to the dataplane nhop */ struct nh_control *nh_control; /* backreference to the rnh */ struct nhop_priv *nh_next; /* hash table membership */ + struct vnet *nh_vnet; /* vnet nhop belongs to */ struct epoch_context nh_epoch_ctx; /* epoch data for nhop */ }; diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h index 3dfe7f152954..288713e2b564 100644 --- a/sys/net/route/route_var.h +++ b/sys/net/route/route_var.h @@ -35,6 +35,7 @@ #ifndef RNF_NORMAL #include #endif +#include #include /* struct sockaddr_in */ #include @@ -148,6 +149,7 @@ struct rtentry { #define rt_endzero rt_mtx struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ + struct epoch_context rt_epoch_ctx; /* net epoch tracker */ }; #define RT_LOCK_INIT(_rt) \ diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index e474da739cbb..a1806df834c5 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -742,7 +742,6 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, } } RT_LOCK(rt); - RT_ADDREF(rt); RIB_RUNLOCK(rnh); *ret_nrt = rt; @@ -930,7 +929,6 @@ route_output(struct mbuf *m, struct socket *so, ...) #endif RT_LOCK(saved_nrt); rtm->rtm_index = saved_nrt->rt_nhop->nh_ifp->if_index; - RT_REMREF(saved_nrt); RT_UNLOCK(saved_nrt); } break; @@ -987,8 +985,7 @@ route_output(struct mbuf *m, struct socket *so, ...) flush: NET_EPOCH_EXIT(et); - if (rt != NULL) - RTFREE(rt); + rt = NULL; #ifdef INET6 if (rtm != NULL) { diff --git a/sys/netinet6/nd6.c b/sys/netinet6/nd6.c index 57009fba5886..1438b1ccd93c 100644 --- a/sys/netinet6/nd6.c +++ b/sys/netinet6/nd6.c @@ -1566,14 +1566,17 @@ nd6_free_redirect(const struct llentry *ln) int fibnum; struct sockaddr_in6 sin6; struct rt_addrinfo info; + struct epoch_tracker et; lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6); memset(&info, 0, sizeof(info)); info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6; info.rti_filter = nd6_isdynrte; + NET_EPOCH_ENTER(et); for (fibnum = 0; fibnum < rt_numfibs; fibnum++) rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum); + NET_EPOCH_EXIT(et); } /* diff --git a/sys/netinet6/nd6_rtr.c b/sys/netinet6/nd6_rtr.c index 123879d29eca..98cc1b220248 100644 --- a/sys/netinet6/nd6_rtr.c +++ b/sys/netinet6/nd6_rtr.c @@ -690,10 +690,8 @@ defrouter_addreq(struct nd_defrouter *new) error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &newrt, fibnum); - if (newrt) { + if (newrt != NULL) rt_routemsg(RTM_ADD, newrt, new->ifp, 0, fibnum); - RTFREE_FUNC(newrt); - } if (error == 0) new->installed = 1; } @@ -708,6 +706,7 @@ defrouter_delreq(struct nd_defrouter *dr) { struct sockaddr_in6 def, mask, gate; struct rtentry *oldrt = NULL; + struct epoch_tracker et; unsigned int fibnum; bzero(&def, sizeof(def)); @@ -720,13 +719,13 @@ defrouter_delreq(struct nd_defrouter *dr) gate.sin6_addr = dr->rtaddr; fibnum = dr->ifp->if_fib; + NET_EPOCH_ENTER(et); in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def, (struct sockaddr *)&gate, (struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, fibnum); - if (oldrt) { + if (oldrt != NULL) rt_routemsg(RTM_DELETE, oldrt, dr->ifp, 0, fibnum); - RTFREE_FUNC(oldrt); - } + NET_EPOCH_EXIT(et); dr->installed = 0; } @@ -1022,6 +1021,7 @@ defrouter_select_fib(int fibnum) } ND6_RUNLOCK(); + NET_EPOCH_ENTER(et); /* * If we selected a router for this FIB and it's different * than the installed one, remove the installed router and @@ -1037,6 +1037,7 @@ defrouter_select_fib(int fibnum) } if (selected_dr != NULL) defrouter_rele(selected_dr); + NET_EPOCH_EXIT(et); } static struct nd_defrouter * @@ -2064,7 +2065,6 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa) pr->ndpr_stateflags |= NDPRF_ONLINK; rt_routemsg(RTM_ADD, rt, pr->ndpr_ifp, 0, fibnum); - RTFREE_FUNC(rt); } /* Return the last error we got. */ @@ -2132,7 +2132,6 @@ nd6_prefix_onlink(struct nd_prefix *pr) } /* should we care about ia6_flags? */ } - NET_EPOCH_EXIT(et); if (ifa == NULL) { /* * This can still happen, when, for example, we receive an RA @@ -2145,13 +2144,12 @@ nd6_prefix_onlink(struct nd_prefix *pr) "prefix(%s/%d) on %s\n", __func__, ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr), pr->ndpr_plen, if_name(ifp))); - return (0); - } - - error = nd6_prefix_onlink_rtrequest(pr, ifa); - - if (ifa != NULL) + error = 0; + } else { + error = nd6_prefix_onlink_rtrequest(pr, ifa); ifa_free(ifa); + } + NET_EPOCH_EXIT(et); return (error); } @@ -2167,6 +2165,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) char ip6buf[INET6_ADDRSTRLEN]; uint64_t genid; int fibnum, maxfib, a_failure; + struct epoch_tracker et; ND6_ONLINK_LOCK_ASSERT(); ND6_UNLOCK_ASSERT(); @@ -2193,6 +2192,7 @@ nd6_prefix_offlink(struct nd_prefix *pr) } a_failure = 0; + NET_EPOCH_ENTER(et); for (; fibnum < maxfib; fibnum++) { rt = NULL; error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL, @@ -2205,8 +2205,8 @@ nd6_prefix_offlink(struct nd_prefix *pr) /* report route deletion to the routing socket. */ rt_routemsg(RTM_DELETE, rt, ifp, 0, fibnum); - RTFREE_FUNC(rt); } + NET_EPOCH_EXIT(et); error = a_failure; a_failure = 1; if (error == 0) { diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index 07b445418327..d64026d0cd90 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -1664,14 +1664,17 @@ bootpc_init(void) goto out; if (gctx->gotrootpath != 0) { + struct epoch_tracker et; kern_setenv("boot.netif.name", ifctx->ifp->if_xname); + NET_EPOCH_ENTER(et); bootpc_add_default_route(ifctx); error = md_mount(&nd->root_saddr, nd->root_hostnam, nd->root_fh, &nd->root_fhsize, &nd->root_args, td); bootpc_remove_default_route(ifctx); + NET_EPOCH_EXIT(et); if (error != 0) { if (gctx->any_root_overrides == 0) panic("nfs_boot: mount root, error=%d", error);