Use epoch(9) for rtentries to simplify control plane operations.

Currently the only reason of refcounting rtentries is the need to report
 the rtable operation details immediately after the execution.
Delaying rtentry reclamation allows to stop refcounting and simplify the code.
Additionally, this change allows to reimplement rib_lookup_info(), which
 is used by some of the customers to get the matching prefix along
 with nexthops, in more efficient way.

The change keeps per-vnet rtzone uma zone. It adds nh_vnet field to
 nhop_priv to be able to reliably set curvnet even during vnet teardown.
Rest of the reference counting code will be removed in the D24867 .

Differential Revision:	https://reviews.freebsd.org/D24866
This commit is contained in:
Alexander V. Chernikov 2020-05-23 10:21:02 +00:00
parent 016fc6ddb3
commit 2bbab0af6d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=361409
12 changed files with 97 additions and 94 deletions

View File

@ -465,18 +465,21 @@ nfs_mountroot(struct mount *mp)
if (nd->mygateway.sin_len != 0 &&
nd->mygateway.sin_addr.s_addr != 0) {
struct sockaddr_in mask, sin;
struct epoch_tracker et;
bzero((caddr_t)&mask, sizeof(mask));
sin = mask;
sin.sin_family = AF_INET;
sin.sin_len = sizeof(sin);
/* XXX MRT use table 0 for this sort of thing */
NET_EPOCH_ENTER(et);
CURVNET_SET(TD_TO_VNET(td));
error = rtrequest_fib(RTM_ADD, (struct sockaddr *)&sin,
(struct sockaddr *)&nd->mygateway,
(struct sockaddr *)&mask,
RTF_UP | RTF_GATEWAY, NULL, RT_DEFAULT_FIB);
CURVNET_RESTORE();
NET_EPOCH_EXIT(et);
if (error)
panic("nfs_mountroot: RTM_ADD: %d", error);
}

View File

@ -1854,18 +1854,17 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
ifp = ifa->ifa_ifp;
NET_EPOCH_ENTER(et);
bzero(&info, sizeof(info));
if (cmd != RTM_DELETE)
info.rti_ifp = V_loif;
if (cmd == RTM_ADD) {
/* explicitly specify (loopback) ifa */
if (info.rti_ifp != NULL) {
NET_EPOCH_ENTER(et);
rti_ifa = ifaof_ifpforaddr(ifa->ifa_addr, info.rti_ifp);
if (rti_ifa != NULL)
ifa_ref(rti_ifa);
info.rti_ifa = rti_ifa;
NET_EPOCH_EXIT(et);
}
}
info.rti_flags = ifa->ifa_flags | RTF_HOST | RTF_STATIC | RTF_PINNED;
@ -1874,6 +1873,7 @@ ifa_maintain_loopback_route(int cmd, const char *otype, struct ifaddr *ifa,
link_init_sdl(ifp, (struct sockaddr *)&null_sdl, ifp->if_type);
error = rtrequest1_fib(cmd, &info, NULL, ifp->if_fib);
NET_EPOCH_EXIT(et);
if (rti_ifa != NULL)
ifa_free(rti_ifa);

View File

@ -120,9 +120,6 @@ VNET_PCPUSTAT_SYSUNINIT(rtstat);
VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)
VNET_DEFINE(int, rttrash); /* routes not in table but not freed */
#define V_rttrash VNET(rttrash)
/*
* Convert a 'struct radix_node *' to a 'struct rtentry *'.
@ -148,6 +145,7 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
static struct rtentry *rt_unlinkrte(struct rib_head *rnh,
struct rt_addrinfo *info, int *perror);
static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
static void destroy_rtentry_epoch(epoch_context_t ctx);
#ifdef RADIX_MPATH
static struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
struct rt_addrinfo *info, struct rtentry *rto, int *perror);
@ -332,6 +330,16 @@ vnet_route_uninit(const void *unused __unused)
}
}
/*
* dom_rtdetach calls rt_table_destroy(), which
* schedules deletion for all rtentries, nexthops and control
* structures. Wait for the destruction callbacks to fire.
* Note that this should result in freeing all rtentries, but
* nexthops deletions will be scheduled for the next epoch run
* and will be completed after vnet teardown.
*/
epoch_drain_callbacks(net_epoch_preempt);
free(V_rt_tables, M_RTABLE);
uma_zdestroy(V_rtzone);
}
@ -449,41 +457,54 @@ rtfree(struct rtentry *rt)
if ((rt->rt_flags & RTF_UP) == 0) {
if (rt->rt_nodes->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic("rtfree 2");
/*
* the rtentry must have been removed from the routing table
* so it is represented in rttrash.. remove that now.
*/
V_rttrash--;
#ifdef DIAGNOSTIC
if (rt->rt_refcnt < 0) {
printf("rtfree: %p not freed (neg refs)\n", rt);
goto done;
}
#endif
/* Unreference nexthop */
nhop_free(rt->rt_nhop);
epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
&rt->rt_epoch_ctx);
/*
* and the rtentry itself of course
* FALLTHROUGH to RT_UNLOCK() so the reporting functions
* have consistent behaviour of operating on unlocked entry.
*/
uma_zfree(V_rtzone, rt);
return;
}
done:
RT_UNLOCK(rt);
}
/*
* Temporary RTFREE() function wrapper.
* Intended to use in control plane code to
* avoid exposing internal layout of 'struct rtentry'.
*/
void
rtfree_func(struct rtentry *rt)
static void
destroy_rtentry(struct rtentry *rt)
{
RTFREE(rt);
/*
* At this moment rnh, nh_control may be already freed.
* nhop interface may have been migrated to a different vnet.
* Use vnet stored in the nexthop to delete the entry.
*/
CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
/* Unreference nexthop */
nhop_free(rt->rt_nhop);
uma_zfree(V_rtzone, rt);
CURVNET_RESTORE();
}
/*
* Epoch callback indicating rtentry is safe to destroy
*/
static void
destroy_rtentry_epoch(epoch_context_t ctx)
{
struct rtentry *rt;
rt = __containerof(ctx, struct rtentry, rt_epoch_ctx);
destroy_rtentry(rt);
}
/*
@ -546,7 +567,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
RT_LOCK(rt);
flags = rt->rt_flags;
RTFREE_LOCKED(rt);
RT_UNLOCK(rt);
RTSTAT_INC(rts_dynamic);
@ -1112,13 +1133,6 @@ rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
ifa = rt->rt_nhop->nh_ifa;
if (ifa != NULL && ifa->ifa_rtrequest != NULL)
ifa->ifa_rtrequest(RTM_DELETE, rt, rt->rt_nhop, info);
/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
* when RTFREE(rt) is eventually called.
*/
V_rttrash++;
}
@ -1386,6 +1400,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
KASSERT((fibnum < rt_numfibs), ("rtrequest1_fib: bad fibnum"));
KASSERT((info->rti_flags & RTF_RNH_LOCKED) == 0, ("rtrequest1_fib: locked"));
NET_EPOCH_ASSERT();
dst = info->rti_info[RTAX_DST];
@ -1580,13 +1595,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
ifa->ifa_rtrequest(RTM_ADD, rt, rt->rt_nhop, info);
/*
* actually return a resultant rtentry and
* give the caller a single reference.
* actually return a resultant rtentry
*/
if (ret_nrt) {
if (ret_nrt)
*ret_nrt = rt;
RT_ADDREF(rt);
}
rnh->rnh_gen++; /* Routing table updated */
RT_UNLOCK(rt);
@ -1622,15 +1634,13 @@ del_route(struct rib_head *rnh, struct rt_addrinfo *info,
/*
* If the caller wants it, then it can have it,
* but it's up to it to free the rtentry as we won't be
* doing it.
* the entry will be deleted after the end of the current epoch.
*/
if (ret_nrt) {
if (ret_nrt)
*ret_nrt = rt;
RT_UNLOCK(rt);
} else
RTFREE_LOCKED(rt);
RTFREE_LOCKED(rt);
return (0);
}
@ -1736,10 +1746,8 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
if ((nh_orig->nh_ifa != nh->nh_ifa) && nh_orig->nh_ifa->ifa_rtrequest)
nh_orig->nh_ifa->ifa_rtrequest(RTM_DELETE, rt, nh_orig, info);
if (ret_nrt != NULL) {
if (ret_nrt != NULL)
*ret_nrt = rt;
RT_ADDREF(rt);
}
RT_UNLOCK(rt);
@ -1757,7 +1765,6 @@ static int
change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rtentry **ret_nrt)
{
struct epoch_tracker et;
int error;
/* Check if updated gateway exists */
@ -1765,8 +1772,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
(info->rti_info[RTAX_GATEWAY] == NULL))
return (EINVAL);
NET_EPOCH_ENTER(et);
/*
* route change is done in multiple steps, with dropping and
* reacquiring lock. In the situations with multiple processes
@ -1779,7 +1784,6 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
if (error != EAGAIN)
break;
}
NET_EPOCH_EXIT(et);
return (error);
}
@ -1825,6 +1829,7 @@ static inline int
rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
{
RIB_RLOCK_TRACKER;
struct epoch_tracker et;
struct sockaddr *dst;
struct sockaddr *netmask;
struct rtentry *rt = NULL;
@ -1957,38 +1962,18 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
else
info.rti_info[RTAX_GATEWAY] = ifa->ifa_addr;
info.rti_info[RTAX_NETMASK] = netmask;
NET_EPOCH_ENTER(et);
error = rtrequest1_fib(cmd, &info, &rt, fibnum);
if (error == 0 && rt != NULL) {
/*
* notify any listening routing agents of the change
*/
RT_LOCK(rt);
/* TODO: interface routes/aliases */
RT_ADDREF(rt);
RT_UNLOCK(rt);
rt_newaddrmsg_fib(cmd, ifa, rt, fibnum);
RT_LOCK(rt);
RT_REMREF(rt);
if (cmd == RTM_DELETE) {
/*
* If we are deleting, and we found an entry,
* then it's been removed from the tree..
* now throw it away.
*/
RTFREE_LOCKED(rt);
} else {
if (cmd == RTM_ADD) {
/*
* We just wanted to add it..
* we don't actually need a reference.
*/
RT_REMREF(rt);
}
RT_UNLOCK(rt);
}
didwork = 1;
}
NET_EPOCH_EXIT(et);
if (error)
a_failure = error;
}

View File

@ -332,8 +332,6 @@ struct rt_addrinfo {
#define RT_LINK_IS_UP(ifp) (!((ifp)->if_capabilities & IFCAP_LINKSTATE) \
|| (ifp)->if_link_state == LINK_STATE_UP)
#define RTFREE_FUNC(_rt) rtfree_func(_rt)
#define RO_NHFREE(_ro) do { \
if ((_ro)->ro_nh) { \
NH_FREE((_ro)->ro_nh); \

View File

@ -176,6 +176,7 @@ struct rib_head;
uint32_t nhop_get_idx(const struct nhop_object *nh);
enum nhop_type nhop_get_type(const struct nhop_object *nh);
int nhop_get_rtflags(const struct nhop_object *nh);
struct vnet *nhop_get_vnet(const struct nhop_object *nh);
#endif /* _KERNEL */

View File

@ -500,6 +500,9 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
return (ENOMEM);
}
/* Save vnet to ease destruction */
nh_priv->nh_vnet = curvnet;
/* Reference external objects and calculate (referenced) ifa */
if_ref(nh->nh_ifp);
ifa_ref(nh->nh_ifa);
@ -698,6 +701,13 @@ nhop_set_rtflags(struct nhop_object *nh, int rt_flags)
nh->nh_priv->rt_flags = rt_flags;
}
struct vnet *
nhop_get_vnet(const struct nhop_object *nh)
{
return (nh->nh_priv->nh_vnet);
}
void
nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
{

View File

@ -78,6 +78,7 @@ struct nhop_priv {
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */
struct vnet *nh_vnet; /* vnet nhop belongs to */
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};

View File

@ -35,6 +35,7 @@
#ifndef RNF_NORMAL
#include <net/radix.h>
#endif
#include <sys/epoch.h>
#include <netinet/in.h> /* struct sockaddr_in */
#include <sys/counter.h>
@ -148,6 +149,7 @@ struct rtentry {
#define rt_endzero rt_mtx
struct mtx rt_mtx; /* mutex for routing entry */
struct rtentry *rt_chain; /* pointer to next rtentry to delete */
struct epoch_context rt_epoch_ctx; /* net epoch tracker */
};
#define RT_LOCK_INIT(_rt) \

View File

@ -742,7 +742,6 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
}
}
RT_LOCK(rt);
RT_ADDREF(rt);
RIB_RUNLOCK(rnh);
*ret_nrt = rt;
@ -930,7 +929,6 @@ route_output(struct mbuf *m, struct socket *so, ...)
#endif
RT_LOCK(saved_nrt);
rtm->rtm_index = saved_nrt->rt_nhop->nh_ifp->if_index;
RT_REMREF(saved_nrt);
RT_UNLOCK(saved_nrt);
}
break;
@ -987,8 +985,7 @@ route_output(struct mbuf *m, struct socket *so, ...)
flush:
NET_EPOCH_EXIT(et);
if (rt != NULL)
RTFREE(rt);
rt = NULL;
#ifdef INET6
if (rtm != NULL) {

View File

@ -1566,14 +1566,17 @@ nd6_free_redirect(const struct llentry *ln)
int fibnum;
struct sockaddr_in6 sin6;
struct rt_addrinfo info;
struct epoch_tracker et;
lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
memset(&info, 0, sizeof(info));
info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
info.rti_filter = nd6_isdynrte;
NET_EPOCH_ENTER(et);
for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
NET_EPOCH_EXIT(et);
}
/*

View File

@ -690,10 +690,8 @@ defrouter_addreq(struct nd_defrouter *new)
error = in6_rtrequest(RTM_ADD, (struct sockaddr *)&def,
(struct sockaddr *)&gate, (struct sockaddr *)&mask,
RTF_GATEWAY, &newrt, fibnum);
if (newrt) {
if (newrt != NULL)
rt_routemsg(RTM_ADD, newrt, new->ifp, 0, fibnum);
RTFREE_FUNC(newrt);
}
if (error == 0)
new->installed = 1;
}
@ -708,6 +706,7 @@ defrouter_delreq(struct nd_defrouter *dr)
{
struct sockaddr_in6 def, mask, gate;
struct rtentry *oldrt = NULL;
struct epoch_tracker et;
unsigned int fibnum;
bzero(&def, sizeof(def));
@ -720,13 +719,13 @@ defrouter_delreq(struct nd_defrouter *dr)
gate.sin6_addr = dr->rtaddr;
fibnum = dr->ifp->if_fib;
NET_EPOCH_ENTER(et);
in6_rtrequest(RTM_DELETE, (struct sockaddr *)&def,
(struct sockaddr *)&gate,
(struct sockaddr *)&mask, RTF_GATEWAY, &oldrt, fibnum);
if (oldrt) {
if (oldrt != NULL)
rt_routemsg(RTM_DELETE, oldrt, dr->ifp, 0, fibnum);
RTFREE_FUNC(oldrt);
}
NET_EPOCH_EXIT(et);
dr->installed = 0;
}
@ -1022,6 +1021,7 @@ defrouter_select_fib(int fibnum)
}
ND6_RUNLOCK();
NET_EPOCH_ENTER(et);
/*
* If we selected a router for this FIB and it's different
* than the installed one, remove the installed router and
@ -1037,6 +1037,7 @@ defrouter_select_fib(int fibnum)
}
if (selected_dr != NULL)
defrouter_rele(selected_dr);
NET_EPOCH_EXIT(et);
}
static struct nd_defrouter *
@ -2064,7 +2065,6 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
pr->ndpr_stateflags |= NDPRF_ONLINK;
rt_routemsg(RTM_ADD, rt, pr->ndpr_ifp, 0, fibnum);
RTFREE_FUNC(rt);
}
/* Return the last error we got. */
@ -2132,7 +2132,6 @@ nd6_prefix_onlink(struct nd_prefix *pr)
}
/* should we care about ia6_flags? */
}
NET_EPOCH_EXIT(et);
if (ifa == NULL) {
/*
* This can still happen, when, for example, we receive an RA
@ -2145,13 +2144,12 @@ nd6_prefix_onlink(struct nd_prefix *pr)
"prefix(%s/%d) on %s\n", __func__,
ip6_sprintf(ip6buf, &pr->ndpr_prefix.sin6_addr),
pr->ndpr_plen, if_name(ifp)));
return (0);
}
error = nd6_prefix_onlink_rtrequest(pr, ifa);
if (ifa != NULL)
error = 0;
} else {
error = nd6_prefix_onlink_rtrequest(pr, ifa);
ifa_free(ifa);
}
NET_EPOCH_EXIT(et);
return (error);
}
@ -2167,6 +2165,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
char ip6buf[INET6_ADDRSTRLEN];
uint64_t genid;
int fibnum, maxfib, a_failure;
struct epoch_tracker et;
ND6_ONLINK_LOCK_ASSERT();
ND6_UNLOCK_ASSERT();
@ -2193,6 +2192,7 @@ nd6_prefix_offlink(struct nd_prefix *pr)
}
a_failure = 0;
NET_EPOCH_ENTER(et);
for (; fibnum < maxfib; fibnum++) {
rt = NULL;
error = in6_rtrequest(RTM_DELETE, (struct sockaddr *)&sa6, NULL,
@ -2205,8 +2205,8 @@ nd6_prefix_offlink(struct nd_prefix *pr)
/* report route deletion to the routing socket. */
rt_routemsg(RTM_DELETE, rt, ifp, 0, fibnum);
RTFREE_FUNC(rt);
}
NET_EPOCH_EXIT(et);
error = a_failure;
a_failure = 1;
if (error == 0) {

View File

@ -1664,14 +1664,17 @@ bootpc_init(void)
goto out;
if (gctx->gotrootpath != 0) {
struct epoch_tracker et;
kern_setenv("boot.netif.name", ifctx->ifp->if_xname);
NET_EPOCH_ENTER(et);
bootpc_add_default_route(ifctx);
error = md_mount(&nd->root_saddr, nd->root_hostnam,
nd->root_fh, &nd->root_fhsize,
&nd->root_args, td);
bootpc_remove_default_route(ifctx);
NET_EPOCH_EXIT(et);
if (error != 0) {
if (gctx->any_root_overrides == 0)
panic("nfs_boot: mount root, error=%d", error);