melifaro b17accbc49 Bring back redirect route expiration.
Redirect (and temporal) route expiration was broken a while ago.
This change brings route expiration back, with unified IPv4/IPv6 handling code.

It introduces net.inet.icmp.redirtimeout sysctl, allowing to set
 an expiration time for redirected routes. It defaults to 10 minutes,
 analogues with net.inet6.icmp6.redirtimeout.

Implementation uses separate file, route_temporal.c, as route.c is already
 bloated with tons of different functions.
Internally, expiration is implemented as an per-rnh callout scheduled when
 route with non-zero rt_expire time is added or rt_expire is changed.
 It does not add any overhead when no temporal routes are present.

Callout traverses entire routing tree under wlock, scheduling expired routes
 for deletion and calculating the next time it needs to be run. The rationale
 for such implemention is the following: typically workloads requiring large
 amount of routes have redirects turned off already, while the systems with
 small amount of routes will not inhibit large overhead during tree traversal.

This changes also fixes netstat -rn display of route expiration time, which
 has been broken since the conversion from kread() to sysctl.

Reviewed by:	bz
MFC after:	3 weeks
Differential Revision:	https://reviews.freebsd.org/D23075
2020-01-22 13:53:18 +00:00

200 lines
5.5 KiB
C

/*-
* Copyright 1994, 1995 Massachusetts Institute of Technology
*
* Permission to use, copy, modify, and distribute this software and
* its documentation for any purpose and without fee is hereby
* granted, provided that both the above copyright notice and this
* permission notice appear in all copies, that both the above
* copyright notice and this permission notice appear in all
* supporting documentation, and that the name of M.I.T. not be used
* in advertising or publicity pertaining to distribution of the
* software without specific, written prior permission. M.I.T. makes
* no representations about the suitability of this software for any
* purpose. It is provided "as is" without express or implied
* warranty.
*
* THIS SOFTWARE IS PROVIDED BY M.I.T. ``AS IS''. M.I.T. DISCLAIMS
* ALL EXPRESS OR IMPLIED WARRANTIES WITH REGARD TO THIS SOFTWARE,
* INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. IN NO EVENT
* SHALL M.I.T. BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF
* USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
* OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <sys/socket.h>
#include <sys/mbuf.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/ip_icmp.h>
#include <netinet/ip_var.h>
extern int in_inithead(void **head, int off, u_int fibnum);
#ifdef VIMAGE
extern int in_detachhead(void **head, int off);
#endif
/*
* Do what we need to do when inserting a route.
*/
static struct radix_node *
in_addroute(void *v_arg, void *n_arg, struct radix_head *head,
struct radix_node *treenodes)
{
struct rtentry *rt = (struct rtentry *)treenodes;
struct sockaddr_in *sin = (struct sockaddr_in *)rt_key(rt);
/*
* A little bit of help for both IP output and input:
* For host routes, we make sure that RTF_BROADCAST
* is set for anything that looks like a broadcast address.
* This way, we can avoid an expensive call to in_broadcast()
* in ip_output() most of the time (because the route passed
* to ip_output() is almost always a host route).
*
* We also do the same for local addresses, with the thought
* that this might one day be used to speed up ip_input().
*
* We also mark routes to multicast addresses as such, because
* it's easy to do and might be useful (but this is much more
* dubious since it's so easy to inspect the address).
*/
if (rt->rt_flags & RTF_HOST) {
struct epoch_tracker et;
bool bcast;
NET_EPOCH_ENTER(et);
bcast = in_broadcast(sin->sin_addr, rt->rt_ifp);
NET_EPOCH_EXIT(et);
if (bcast)
rt->rt_flags |= RTF_BROADCAST;
else if (satosin(rt->rt_ifa->ifa_addr)->sin_addr.s_addr ==
sin->sin_addr.s_addr)
rt->rt_flags |= RTF_LOCAL;
}
if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr)))
rt->rt_flags |= RTF_MULTICAST;
if (rt->rt_ifp != NULL) {
/*
* Check route MTU:
* inherit interface MTU if not set or
* check if MTU is too large.
*/
if (rt->rt_mtu == 0) {
rt->rt_mtu = rt->rt_ifp->if_mtu;
} else if (rt->rt_mtu > rt->rt_ifp->if_mtu)
rt->rt_mtu = rt->rt_ifp->if_mtu;
}
return (rn_addroute(v_arg, n_arg, head, treenodes));
}
static int _in_rt_was_here;
/*
* Initialize our routing tree.
*/
int
in_inithead(void **head, int off, u_int fibnum)
{
struct rib_head *rh;
rh = rt_table_init(32, AF_INET, fibnum);
if (rh == NULL)
return (0);
rh->rnh_addaddr = in_addroute;
*head = (void *)rh;
if (_in_rt_was_here == 0 ) {
_in_rt_was_here = 1;
}
return 1;
}
#ifdef VIMAGE
int
in_detachhead(void **head, int off)
{
rt_table_destroy((struct rib_head *)(*head));
return (1);
}
#endif
/*
* This zaps old routes when the interface goes down or interface
* address is deleted. In the latter case, it deletes static routes
* that point to this address. If we don't do this, we may end up
* using the old address in the future. The ones we always want to
* get rid of are things like ARP entries, since the user might down
* the interface, walk over to a completely different network, and
* plug back in.
*/
struct in_ifadown_arg {
struct ifaddr *ifa;
int del;
};
static int
in_ifadownkill(const struct rtentry *rt, void *xap)
{
struct in_ifadown_arg *ap = xap;
if (rt->rt_ifa != ap->ifa)
return (0);
if ((rt->rt_flags & RTF_STATIC) != 0 && ap->del == 0)
return (0);
return (1);
}
void
in_ifadown(struct ifaddr *ifa, int delete)
{
struct in_ifadown_arg arg;
KASSERT(ifa->ifa_addr->sa_family == AF_INET,
("%s: wrong family", __func__));
arg.ifa = ifa;
arg.del = delete;
rt_foreach_fib_walk_del(AF_INET, in_ifadownkill, &arg);
ifa->ifa_flags &= ~IFA_ROUTE; /* XXXlocking? */
}
/*
* inet versions of rt functions. These have fib extensions and
* for now will just reference the _fib variants.
* eventually this order will be reversed,
*/
void
in_rtalloc_ign(struct route *ro, u_long ignflags, u_int fibnum)
{
rtalloc_ign_fib(ro, ignflags, fibnum);
}