Add new rt_foreach_fib_walk_del() function for deleting route entries

by filter function instead of picking into routing table details in
  each consumer.
Remove now-unused rt_expunge() (eliminating last external RTF_RNH_LOCKED
 user).
This simplifies future nexthops/mulitipath changes and rtrequest1_fib()
  locking refactoring.

Actual changes:
Add "rt_chain" field to permit rte grouping while doing batched delete
  from routing table (thus growing rte 200->208 on amd64).
Add "rti_filter" /  "rti_filterdata" / "rti_spare" fields to rt_addrinfo
  to pass filter function to various routing subsystems in standard way.
Convert all rt_expunge() customers to new rt_addinfo-based api and eliminate
  rt_expunge().
This commit is contained in:
melifaro 2015-11-30 05:51:14 +00:00
parent 48efdd4bec
commit e198456483
5 changed files with 284 additions and 266 deletions

View File

@ -139,7 +139,14 @@ static VNET_DEFINE(uma_zone_t, rtzone); /* Routing table UMA zone. */
static int rtrequest1_fib_change(struct radix_node_head *, struct rt_addrinfo *,
struct rtentry **, u_int);
static void rt_setmetrics(const struct rt_addrinfo *, struct rtentry *);
static int rt_ifdelroute(struct rtentry *rt, void *arg);
static int rt_ifdelroute(const struct rtentry *rt, void *arg);
static struct rtentry *rt_unlinkrte(struct radix_node_head *rnh,
struct rt_addrinfo *info, int *perror);
static void rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info);
#ifdef RADIX_MPATH
static struct radix_node *rt_mpath_unlink(struct radix_node_head *rnh,
struct rt_addrinfo *info, struct rtentry *rto, int *perror);
#endif
struct if_mtuinfo
{
@ -237,6 +244,7 @@ rtentry_ctor(void *mem, int size, void *arg, int how)
bzero(rt, offsetof(struct rtentry, rt_endzero));
counter_u64_zero(rt->rt_pksent);
rt->rt_chain = NULL;
return (0);
}
@ -867,6 +875,108 @@ rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
}
}
struct rt_delinfo
{
struct rt_addrinfo info;
struct radix_node_head *rnh;
struct rtentry *head;
};
/*
* Conditionally unlinks @rn from radix tree based
* on info data passed in @arg.
*/
static int
rt_checkdelroute(struct radix_node *rn, void *arg)
{
struct rt_delinfo *di;
struct rt_addrinfo *info;
struct rtentry *rt;
int error;
di = (struct rt_delinfo *)arg;
rt = (struct rtentry *)rn;
info = &di->info;
error = 0;
info->rti_info[RTAX_DST] = rt_key(rt);
info->rti_info[RTAX_NETMASK] = rt_mask(rt);
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
rt = rt_unlinkrte(di->rnh, info, &error);
if (rt == NULL) {
/* Either not allowed or not matched. Skip entry */
return (0);
}
/* Entry was unlinked. Add to the list and return */
rt->rt_chain = di->head;
di->head = rt;
return (0);
}
/*
* Iterates over all existing fibs in system.
* Deletes each element for which @filter_f function returned
* non-zero value.
* If @af is not AF_UNSPEC, iterates over fibs in particular
* address family.
*/
void
rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg)
{
struct radix_node_head *rnh;
struct rt_delinfo di;
struct rtentry *rt;
uint32_t fibnum;
int i, start, end;
bzero(&di, sizeof(di));
di.info.rti_filter = filter_f;
di.info.rti_filterdata = arg;
for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
/* Do we want some specific family? */
if (af != AF_UNSPEC) {
start = af;
end = af;
} else {
start = 1;
end = AF_MAX;
}
for (i = start; i <= end; i++) {
rnh = rt_tables_get_rnh(fibnum, i);
if (rnh == NULL)
continue;
di.rnh = rnh;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, rt_checkdelroute, &di);
RADIX_NODE_HEAD_UNLOCK(rnh);
if (di.head == NULL)
continue;
/* We might have something to reclaim */
while (di.head != NULL) {
rt = di.head;
di.head = rt->rt_chain;
rt->rt_chain = NULL;
/* TODO std rt -> rt_addrinfo export */
di.info.rti_info[RTAX_DST] = rt_key(rt);
di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
rt_notifydelete(rt, &di.info);
RTFREE_LOCKED(rt);
}
}
}
}
/*
* Delete Routes for a Network Interface
*
@ -882,10 +992,9 @@ rt_foreach_fib_walk(int af, rt_setwarg_t *setwa_f, rt_walktree_f_t *wa_f,
* errno failed - reason indicated
*/
static int
rt_ifdelroute(struct rtentry *rt, void *arg)
rt_ifdelroute(const struct rtentry *rt, void *arg)
{
struct ifnet *ifp = arg;
int err;
if (rt->rt_ifp != ifp)
return (0);
@ -897,14 +1006,7 @@ rt_ifdelroute(struct rtentry *rt, void *arg)
if ((rt->rt_flags & RTF_UP) == 0)
return (0);
err = rtrequest_fib(RTM_DELETE, rt_key(rt), rt->rt_gateway,
rt_mask(rt),
rt->rt_flags | RTF_RNH_LOCKED | RTF_PINNED,
(struct rtentry **) NULL, rt->rt_fibnum);
if (err != 0)
log(LOG_WARNING, "rt_ifdelroute: error %d\n", err);
return (0);
return (1);
}
/*
@ -917,9 +1019,105 @@ void
rt_flushifroutes(struct ifnet *ifp)
{
rt_foreach_fib_walk(AF_UNSPEC, NULL, rt_ifdelroute, ifp);
rt_foreach_fib_walk_del(AF_UNSPEC, rt_ifdelroute, ifp);
}
/*
* Conditionally unlinks rtentry matching data inside @info from @rnh.
* Returns unlinked, locked and referenced @rtentry on success,
* Returns NULL and sets @perror to:
* ESRCH - if prefix was not found,
* EADDRINUSE - if trying to delete PINNED route without appropriate flag.
* ENOENT - if supplied filter function returned 0 (not matched).
*/
static struct rtentry *
rt_unlinkrte(struct radix_node_head *rnh, struct rt_addrinfo *info, int *perror)
{
struct sockaddr *dst, *netmask;
struct rtentry *rt;
struct radix_node *rn;
dst = info->rti_info[RTAX_DST];
netmask = info->rti_info[RTAX_NETMASK];
rt = (struct rtentry *)rnh->rnh_lookup(dst, netmask, rnh);
if (rt == NULL) {
*perror = ESRCH;
return (NULL);
}
if ((info->rti_flags & RTF_PINNED) == 0) {
/* Check if target route can be deleted */
if (rt->rt_flags & RTF_PINNED) {
*perror = EADDRINUSE;
return (NULL);
}
}
if (info->rti_filter != NULL) {
if (info->rti_filter(rt, info->rti_filterdata) == 0) {
/* Not matched */
*perror = ENOENT;
return (NULL);
}
/*
* Filter function requested rte deletion.
* Ease the caller work by filling in remaining info
* from that particular entry.
*/
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
}
/*
* Remove the item from the tree and return it.
* Complain if it is not there and do no more processing.
*/
*perror = ESRCH;
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh))
rn = rt_mpath_unlink(rnh, info, rt, perror);
else
#endif
rn = rnh->rnh_deladdr(dst, netmask, rnh);
if (rn == NULL)
return (NULL);
if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic ("rtrequest delete");
rt = RNTORT(rn);
RT_LOCK(rt);
RT_ADDREF(rt);
*perror = 0;
return (rt);
}
static void
rt_notifydelete(struct rtentry *rt, struct rt_addrinfo *info)
{
struct ifaddr *ifa;
rt->rt_flags &= ~RTF_UP;
/*
* give the protocol a chance to keep things in sync.
*/
ifa = rt->rt_ifa;
if (ifa != NULL && ifa->ifa_rtrequest != NULL)
ifa->ifa_rtrequest(RTM_DELETE, rt, info);
/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
* when RTFREE(rt) is eventually called.
*/
V_rttrash++;
}
/*
* These (questionable) definitions of apparent local variables apply
* to the next two functions. XXXXXX!!!
@ -975,87 +1173,6 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
return (error);
}
/*
* Expunges references to a route that's about to be reclaimed.
* The route must be locked.
*/
int
rt_expunge(struct radix_node_head *rnh, struct rtentry *rt)
{
#if !defined(RADIX_MPATH)
struct radix_node *rn;
#else
struct rt_addrinfo info;
int fib;
struct rtentry *rt0;
#endif
struct ifaddr *ifa;
int error = 0;
RT_LOCK_ASSERT(rt);
RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
#ifdef RADIX_MPATH
fib = rt->rt_fibnum;
bzero(&info, sizeof(info));
info.rti_ifp = rt->rt_ifp;
info.rti_flags = RTF_RNH_LOCKED;
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
RT_UNLOCK(rt);
error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
if (error == 0 && rt0 != NULL) {
rt = rt0;
RT_LOCK(rt);
} else if (error != 0) {
RT_LOCK(rt);
return (error);
}
#else
/*
* Remove the item from the tree; it should be there,
* but when callers invoke us blindly it may not (sigh).
*/
rn = rnh->rnh_deladdr(rt_key(rt), rt_mask(rt), rnh);
if (rn == NULL) {
error = ESRCH;
goto bad;
}
KASSERT((rn->rn_flags & (RNF_ACTIVE | RNF_ROOT)) == 0,
("unexpected flags 0x%x", rn->rn_flags));
KASSERT(rt == RNTORT(rn),
("lookup mismatch, rt %p rn %p", rt, rn));
#endif /* RADIX_MPATH */
rt->rt_flags &= ~RTF_UP;
/*
* Give the protocol a chance to keep things in sync.
*/
if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest) {
struct rt_addrinfo info;
bzero((caddr_t)&info, sizeof(info));
info.rti_flags = rt->rt_flags;
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_NETMASK] = rt_mask(rt);
ifa->ifa_rtrequest(RTM_DELETE, rt, &info);
}
/*
* one more rtentry floating around that is not
* linked to the routing table.
*/
V_rttrash++;
#if !defined(RADIX_MPATH)
bad:
#endif
return (error);
}
static int
if_updatemtu_cb(struct radix_node *rn, void *arg)
{
@ -1172,26 +1289,32 @@ rt_print(char *buf, int buflen, struct rtentry *rt)
#endif
#ifdef RADIX_MPATH
static int
rn_mpath_update(int req, struct rt_addrinfo *info,
struct radix_node_head *rnh, struct rtentry **ret_nrt)
/*
* Deletes key for single-path routes, unlinks rtentry with
* gateway specified in @info from multi-path routes.
*
* Returnes unlinked entry. In case of failure, returns NULL
* and sets @perror to ESRCH.
*/
static struct radix_node *
rt_mpath_unlink(struct radix_node_head *rnh, struct rt_addrinfo *info,
struct rtentry *rto, int *perror)
{
/*
* if we got multipath routes, we require users to specify
* a matching RTAX_GATEWAY.
*/
struct rtentry *rt, *rto = NULL;
struct rtentry *rt; // *rto = NULL;
struct radix_node *rn;
int error = 0;
struct sockaddr *gw;
rn = rnh->rnh_lookup(dst, netmask, rnh);
if (rn == NULL)
return (ESRCH);
rto = rt = RNTORT(rn);
gw = info->rti_info[RTAX_GATEWAY];
rt = rt_mpath_matchgate(rto, gw);
if (rt == NULL) {
*perror = ESRCH;
return (NULL);
}
rt = rt_mpath_matchgate(rt, gateway);
if (rt == NULL)
return (ESRCH);
/*
* this is the first entry in the chain
*/
@ -1214,67 +1337,31 @@ rn_mpath_update(int req, struct rt_addrinfo *info,
* check the case when there is only
* one route in the chain.
*/
if (gateway &&
(rt->rt_gateway->sa_len != gateway->sa_len ||
memcmp(rt->rt_gateway, gateway, gateway->sa_len)))
error = ESRCH;
else {
/*
* remove from tree before returning it
* to the caller
*/
rn = rnh->rnh_deladdr(dst, netmask, rnh);
KASSERT(rt == RNTORT(rn), ("radix node disappeared"));
goto gwdelete;
if (gw &&
(rt->rt_gateway->sa_len != gw->sa_len ||
memcmp(rt->rt_gateway, gw, gw->sa_len))) {
*perror = ESRCH;
return (NULL);
}
}
/*
* use the normal delete code to remove
* the first entry
*/
if (req != RTM_DELETE)
goto nondelete;
error = ENOENT;
goto done;
rn = rnh->rnh_deladdr(dst, netmask, rnh);
*perror = 0;
return (rn);
}
/*
* if the entry is 2nd and on up
*/
if ((req == RTM_DELETE) && !rt_mpath_deldup(rto, rt))
if (rt_mpath_deldup(rto, rt) == 0)
panic ("rtrequest1: rt_mpath_deldup");
gwdelete:
RT_LOCK(rt);
RT_ADDREF(rt);
if (req == RTM_DELETE) {
rt->rt_flags &= ~RTF_UP;
/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
* when RTFREE(rt) is eventually called.
*/
V_rttrash++;
}
nondelete:
if (req != RTM_DELETE)
panic("unrecognized request %d", req);
/*
* If the caller wants it, then it can have it,
* but it's up to it to free the rtentry as we won't be
* doing it.
*/
if (ret_nrt) {
*ret_nrt = rt;
RT_UNLOCK(rt);
} else
RTFREE_LOCKED(rt);
done:
return (error);
*perror = 0;
rn = (struct radix_node *)rt;
return (rn);
}
#endif
@ -1330,52 +1417,12 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
rt_maskedcopy(dst, (struct sockaddr *)&mdst, netmask);
dst = (struct sockaddr *)&mdst;
}
#ifdef RADIX_MPATH
if (rn_mpath_capable(rnh)) {
error = rn_mpath_update(req, info, rnh, ret_nrt);
/*
* "bad" holds true for the success case
* as well
*/
if (error != ENOENT)
goto bad;
error = 0;
}
#endif
if ((flags & RTF_PINNED) == 0) {
/* Check if target route can be deleted */
rt = (struct rtentry *)rnh->rnh_lookup(dst,
netmask, rnh);
if ((rt != NULL) && (rt->rt_flags & RTF_PINNED))
senderr(EADDRINUSE);
}
/*
* Remove the item from the tree and return it.
* Complain if it is not there and do no more processing.
*/
rn = rnh->rnh_deladdr(dst, netmask, rnh);
if (rn == NULL)
senderr(ESRCH);
if (rn->rn_flags & (RNF_ACTIVE | RNF_ROOT))
panic ("rtrequest delete");
rt = RNTORT(rn);
RT_LOCK(rt);
RT_ADDREF(rt);
rt->rt_flags &= ~RTF_UP;
rt = rt_unlinkrte(rnh, info, &error);
if (error != 0)
goto bad;
/*
* give the protocol a chance to keep things in sync.
*/
if ((ifa = rt->rt_ifa) && ifa->ifa_rtrequest)
ifa->ifa_rtrequest(RTM_DELETE, rt, info);
/*
* One more rtentry floating around that is not
* linked to the routing table. rttrash will be decremented
* when RTFREE(rt) is eventually called.
*/
V_rttrash++;
rt_notifydelete(rt, info);
/*
* If the caller wants it, then it can have it,

View File

@ -128,6 +128,7 @@ struct rtentry {
#define rt_endzero rt_pksent
counter_u64_t rt_pksent; /* packets sent using this route */
struct mtx rt_mtx; /* mutex for routing entry */
struct rtentry *rt_chain; /* pointer to next rtentry to delete */
};
#endif /* _KERNEL || _WANT_RTENTRY */
@ -259,14 +260,19 @@ struct rt_msghdr {
#define RTAX_BRD 7 /* for NEWADDR, broadcast or p-p dest addr */
#define RTAX_MAX 8 /* size of array to allocate */
typedef int rt_filter_f_t(const struct rtentry *, void *);
struct rt_addrinfo {
int rti_addrs;
struct sockaddr *rti_info[RTAX_MAX];
int rti_flags;
struct ifaddr *rti_ifa;
struct ifnet *rti_ifp;
u_long rti_mflags;
struct rt_metrics *rti_rmx;
int rti_addrs; /* Route RTF_ flags */
int rti_flags; /* Route RTF_ flags */
struct sockaddr *rti_info[RTAX_MAX]; /* Sockaddr data */
struct ifaddr *rti_ifa; /* value of rt_ifa addr */
struct ifnet *rti_ifp; /* route interface */
rt_filter_f_t *rti_filter; /* filter function */
void *rti_filterdata; /* filter paramenters */
u_long rti_mflags; /* metrics RTV_ flags */
u_long rti_spare; /* Will be used for fib */
struct rt_metrics *rti_rmx; /* Pointer to route metrics */
};
/*
@ -383,6 +389,7 @@ void rt_updatemtu(struct ifnet *);
typedef int rt_walktree_f_t(struct rtentry *, void *);
typedef void rt_setwarg_t(struct radix_node_head *, uint32_t, int, void *);
void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
void rt_flushifroutes(struct ifnet *ifp);
/* XXX MRT COMPAT VERSIONS THAT SET UNIVERSE to 0 */

View File

@ -150,56 +150,22 @@ in_detachhead(void **head, int off)
* plug back in.
*/
struct in_ifadown_arg {
struct radix_node_head *rnh;
struct ifaddr *ifa;
int del;
};
static int
in_ifadownkill(struct rtentry *rt, void *xap)
in_ifadownkill(const struct rtentry *rt, void *xap)
{
struct in_ifadown_arg *ap = xap;
RT_LOCK(rt);
if (rt->rt_ifa == ap->ifa &&
(ap->del || !(rt->rt_flags & RTF_STATIC))) {
/*
* Aquire a reference so that it can later be freed
* as the refcount would be 0 here in case of at least
* ap->del.
*/
RT_ADDREF(rt);
/*
* Disconnect it from the tree and permit protocols
* to cleanup.
*/
rt_expunge(ap->rnh, rt);
/*
* At this point it is an rttrash node, and in case
* the above is the only reference we must free it.
* If we do not noone will have a pointer and the
* rtentry will be leaked forever.
* In case someone else holds a reference, we are
* fine as we only decrement the refcount. In that
* case if the other entity calls RT_REMREF, we
* will still be leaking but at least we tried.
*/
RTFREE_LOCKED(rt);
if (rt->rt_ifa != ap->ifa)
return (0);
}
RT_UNLOCK(rt);
return 0;
}
static void
in_setifarnh(struct radix_node_head *rnh, uint32_t fibnum, int af,
void *_arg)
{
struct in_ifadown_arg *arg;
if ((rt->rt_flags & RTF_STATIC) != 0 && ap->del == 0)
return (0);
arg = (struct in_ifadown_arg *)_arg;
arg->rnh = rnh;
return (1);
}
void
@ -213,7 +179,7 @@ in_ifadown(struct ifaddr *ifa, int delete)
arg.ifa = ifa;
arg.del = delete;
rt_foreach_fib_walk(AF_INET, in_setifarnh, in_ifadownkill, &arg);
rt_foreach_fib_walk_del(AF_INET, in_ifadownkill, &arg);
ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
}

View File

@ -1307,6 +1307,15 @@ nd6_free(struct llentry *ln, int gc)
llentry_free(ln);
}
static int
nd6_isdynrte(const struct rtentry *rt, void *xap)
{
if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
return (1);
return (0);
}
/*
* Remove the rtentry for the given llentry,
* both of which were installed by a redirect.
@ -1315,26 +1324,16 @@ static void
nd6_free_redirect(const struct llentry *ln)
{
int fibnum;
struct rtentry *rt;
struct radix_node_head *rnh;
struct sockaddr_in6 sin6;
struct rt_addrinfo info;
lltable_fill_sa_entry(ln, (struct sockaddr *)&sin6);
for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
rnh = rt_tables_get_rnh(fibnum, AF_INET6);
if (rnh == NULL)
continue;
memset(&info, 0, sizeof(info));
info.rti_info[RTAX_DST] = (struct sockaddr *)&sin6;
info.rti_filter = nd6_isdynrte;
RADIX_NODE_HEAD_LOCK(rnh);
rt = in6_rtalloc1((struct sockaddr *)&sin6, 0,
RTF_RNH_LOCKED, fibnum);
if (rt) {
if (rt->rt_flags == (RTF_UP | RTF_HOST | RTF_DYNAMIC))
rt_expunge(rnh, rt);
RTFREE_LOCKED(rt);
}
RADIX_NODE_HEAD_UNLOCK(rnh);
}
for (fibnum = 0; fibnum < rt_numfibs; fibnum++)
rtrequest1_fib(RTM_DELETE, &info, NULL, fibnum);
}
/*

View File

@ -88,7 +88,7 @@ static void in6_init_address_ltimes(struct nd_prefix *,
static int nd6_prefix_onlink(struct nd_prefix *);
static int nd6_prefix_offlink(struct nd_prefix *);
static int rt6_deleteroute(struct rtentry *, void *);
static int rt6_deleteroute(const struct rtentry *, void *);
VNET_DECLARE(int, nd6_recalc_reachtm_interval);
#define V_nd6_recalc_reachtm_interval VNET(nd6_recalc_reachtm_interval)
@ -2073,11 +2073,11 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
return;
/* XXX Do we really need to walk any but the default FIB? */
rt_foreach_fib_walk(AF_INET6, NULL, rt6_deleteroute, (void *)gateway);
rt_foreach_fib_walk_del(AF_INET6, rt6_deleteroute, (void *)gateway);
}
static int
rt6_deleteroute(struct rtentry *rt, void *arg)
rt6_deleteroute(const struct rtentry *rt, void *arg)
{
#define SIN6(s) ((struct sockaddr_in6 *)s)
struct in6_addr *gate = (struct in6_addr *)arg;
@ -2104,8 +2104,7 @@ rt6_deleteroute(struct rtentry *rt, void *arg)
if ((rt->rt_flags & RTF_HOST) == 0)
return (0);
return (in6_rtrequest(RTM_DELETE, rt_key(rt), rt->rt_gateway,
rt_mask(rt), rt->rt_flags | RTF_RNH_LOCKED, NULL, rt->rt_fibnum));
return (1);
#undef SIN6
}