Qing Li 6e6b3f7cbc This main goals of this project are:
1. separating L2 tables (ARP, NDP) from the L3 routing tables
2. removing as much locking dependencies among these layers as
   possible to allow for some parallelism in the search operations
3. simplify the logic in the routing code,

The most notable end result is the obsolescent of the route
cloning (RTF_CLONING) concept, which translated into code reduction
in both IPv4 ARP and IPv6 NDP related modules, and size reduction in
struct rtentry{}. The change in design obsoletes the semantics of
RTF_CLONING, RTF_WASCLONE and RTF_LLINFO routing flags. The userland
applications such as "arp" and "ndp" have been modified to reflect
those changes. The output from "netstat -r" shows only the routing
entries.

Quite a few developers have contributed to this project in the
past: Glebius Smirnoff, Luigi Rizzo, Alessandro Cerri, and
Andre Oppermann. And most recently:

- Kip Macy revised the locking code completely, thus completing
  the last piece of the puzzle, Kip has also been conducting
  active functional testing
- Sam Leffler has helped me improving/refactoring the code, and
  provided valuable reviews
- Julian Elischer setup the perforce tree for me and has helped
  me maintaining that branch before the svn conversion
2008-12-15 06:10:57 +00:00

2157 lines
51 KiB
C

/*
* Copyright (c) 1983, 1988, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "defs.h"
#ifdef __NetBSD__
__RCSID("$NetBSD$");
#elif defined(__FreeBSD__)
__RCSID("$FreeBSD$");
#else
__RCSID("$Revision: 2.27 $");
#ident "$Revision: 2.27 $"
#endif
static struct rt_spare *rts_better(struct rt_entry *);
static struct rt_spare rts_empty = {0,0,0,HOPCNT_INFINITY,0,0,0};
static void set_need_flash(void);
#ifdef _HAVE_SIN_LEN
static void masktrim(struct sockaddr_in *ap);
#else
static void masktrim(struct sockaddr_in_new *ap);
#endif
struct radix_node_head *rhead; /* root of the radix tree */
int need_flash = 1; /* flash update needed
* start =1 to suppress the 1st
*/
struct timeval age_timer; /* next check of old routes */
struct timeval need_kern = { /* need to update kernel table */
EPOCH+MIN_WAITTIME-1, 0
};
int stopint;
int total_routes;
/* zap any old routes through this gateway */
naddr age_bad_gate;
/* It is desirable to "aggregate" routes, to combine differing routes of
* the same metric and next hop into a common route with a smaller netmask
* or to suppress redundant routes, routes that add no information to
* routes with smaller netmasks.
*
* A route is redundant if and only if any and all routes with smaller
* but matching netmasks and nets are the same. Since routes are
* kept sorted in the radix tree, redundant routes always come second.
*
* There are two kinds of aggregations. First, two routes of the same bit
* mask and differing only in the least significant bit of the network
* number can be combined into a single route with a coarser mask.
*
* Second, a route can be suppressed in favor of another route with a more
* coarse mask provided no incompatible routes with intermediate masks
* are present. The second kind of aggregation involves suppressing routes.
* A route must not be suppressed if an incompatible route exists with
* an intermediate mask, since the suppressed route would be covered
* by the intermediate.
*
* This code relies on the radix tree walk encountering routes
* sorted first by address, with the smallest address first.
*/
struct ag_info ag_slots[NUM_AG_SLOTS], *ag_avail, *ag_corsest, *ag_finest;
/* #define DEBUG_AG */
#ifdef DEBUG_AG
#define CHECK_AG() {int acnt = 0; struct ag_info *cag; \
for (cag = ag_avail; cag != 0; cag = cag->ag_fine) \
acnt++; \
for (cag = ag_corsest; cag != 0; cag = cag->ag_fine) \
acnt++; \
if (acnt != NUM_AG_SLOTS) { \
(void)fflush(stderr); \
abort(); \
} \
}
#else
#define CHECK_AG()
#endif
/* Output the contents of an aggregation table slot.
* This function must always be immediately followed with the deletion
* of the target slot.
*/
static void
ag_out(struct ag_info *ag,
void (*out)(struct ag_info *))
{
struct ag_info *ag_cors;
naddr bit;
/* Forget it if this route should not be output for split-horizon. */
if (ag->ag_state & AGS_SPLIT_HZ)
return;
/* If we output both the even and odd twins, then the immediate parent,
* if it is present, is redundant, unless the parent manages to
* aggregate into something coarser.
* On successive calls, this code detects the even and odd twins,
* and marks the parent.
*
* Note that the order in which the radix tree code emits routes
* ensures that the twins are seen before the parent is emitted.
*/
ag_cors = ag->ag_cors;
if (ag_cors != 0
&& ag_cors->ag_mask == ag->ag_mask<<1
&& ag_cors->ag_dst_h == (ag->ag_dst_h & ag_cors->ag_mask)) {
ag_cors->ag_state |= ((ag_cors->ag_dst_h == ag->ag_dst_h)
? AGS_REDUN0
: AGS_REDUN1);
}
/* Skip it if this route is itself redundant.
*
* It is ok to change the contents of the slot here, since it is
* always deleted next.
*/
if (ag->ag_state & AGS_REDUN0) {
if (ag->ag_state & AGS_REDUN1)
return; /* quit if fully redundant */
/* make it finer if it is half-redundant */
bit = (-ag->ag_mask) >> 1;
ag->ag_dst_h |= bit;
ag->ag_mask |= bit;
} else if (ag->ag_state & AGS_REDUN1) {
/* make it finer if it is half-redundant */
bit = (-ag->ag_mask) >> 1;
ag->ag_mask |= bit;
}
out(ag);
}
static void
ag_del(struct ag_info *ag)
{
CHECK_AG();
if (ag->ag_cors == 0)
ag_corsest = ag->ag_fine;
else
ag->ag_cors->ag_fine = ag->ag_fine;
if (ag->ag_fine == 0)
ag_finest = ag->ag_cors;
else
ag->ag_fine->ag_cors = ag->ag_cors;
ag->ag_fine = ag_avail;
ag_avail = ag;
CHECK_AG();
}
/* Flush routes waiting for aggregation.
* This must not suppress a route unless it is known that among all
* routes with coarser masks that match it, the one with the longest
* mask is appropriate. This is ensured by scanning the routes
* in lexical order, and with the most restrictive mask first
* among routes to the same destination.
*/
void
ag_flush(naddr lim_dst_h, /* flush routes to here */
naddr lim_mask, /* matching this mask */
void (*out)(struct ag_info *))
{
struct ag_info *ag, *ag_cors;
naddr dst_h;
for (ag = ag_finest;
ag != 0 && ag->ag_mask >= lim_mask;
ag = ag_cors) {
ag_cors = ag->ag_cors;
/* work on only the specified routes */
dst_h = ag->ag_dst_h;
if ((dst_h & lim_mask) != lim_dst_h)
continue;
if (!(ag->ag_state & AGS_SUPPRESS))
ag_out(ag, out);
else for ( ; ; ag_cors = ag_cors->ag_cors) {
/* Look for a route that can suppress the
* current route */
if (ag_cors == 0) {
/* failed, so output it and look for
* another route to work on
*/
ag_out(ag, out);
break;
}
if ((dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h) {
/* We found a route with a coarser mask that
* aggregates the current target.
*
* If it has a different next hop, it
* cannot replace the target, so output
* the target.
*/
if (ag->ag_gate != ag_cors->ag_gate
&& !(ag->ag_state & AGS_FINE_GATE)
&& !(ag_cors->ag_state & AGS_CORS_GATE)) {
ag_out(ag, out);
break;
}
/* If the coarse route has a good enough
* metric, it suppresses the target.
* If the suppressed target was redundant,
* then mark the suppressor redundant.
*/
if (ag_cors->ag_pref <= ag->ag_pref) {
if (AG_IS_REDUN(ag->ag_state)
&& ag_cors->ag_mask==ag->ag_mask<<1) {
if (ag_cors->ag_dst_h == dst_h)
ag_cors->ag_state |= AGS_REDUN0;
else
ag_cors->ag_state |= AGS_REDUN1;
}
if (ag->ag_tag != ag_cors->ag_tag)
ag_cors->ag_tag = 0;
if (ag->ag_nhop != ag_cors->ag_nhop)
ag_cors->ag_nhop = 0;
break;
}
}
}
/* That route has either been output or suppressed */
ag_cors = ag->ag_cors;
ag_del(ag);
}
CHECK_AG();
}
/* Try to aggregate a route with previous routes.
*/
void
ag_check(naddr dst,
naddr mask,
naddr gate,
naddr nhop,
char metric,
char pref,
u_int new_seqno,
u_short tag,
u_short state,
void (*out)(struct ag_info *)) /* output using this */
{
struct ag_info *ag, *nag, *ag_cors;
naddr xaddr;
int x;
dst = ntohl(dst);
/* Punt non-contiguous subnet masks.
*
* (X & -X) contains a single bit if and only if X is a power of 2.
* (X + (X & -X)) == 0 if and only if X is a power of 2.
*/
if ((mask & -mask) + mask != 0) {
struct ag_info nc_ag;
nc_ag.ag_dst_h = dst;
nc_ag.ag_mask = mask;
nc_ag.ag_gate = gate;
nc_ag.ag_nhop = nhop;
nc_ag.ag_metric = metric;
nc_ag.ag_pref = pref;
nc_ag.ag_tag = tag;
nc_ag.ag_state = state;
nc_ag.ag_seqno = new_seqno;
out(&nc_ag);
return;
}
/* Search for the right slot in the aggregation table.
*/
ag_cors = 0;
ag = ag_corsest;
while (ag != 0) {
if (ag->ag_mask >= mask)
break;
/* Suppress old routes (i.e. combine with compatible routes
* with coarser masks) as we look for the right slot in the
* aggregation table for the new route.
* A route to an address less than the current destination
* will not be affected by the current route or any route
* seen hereafter. That means it is safe to suppress it.
* This check keeps poor routes (e.g. with large hop counts)
* from preventing suppression of finer routes.
*/
if (ag_cors != 0
&& ag->ag_dst_h < dst
&& (ag->ag_state & AGS_SUPPRESS)
&& ag_cors->ag_pref <= ag->ag_pref
&& (ag->ag_dst_h & ag_cors->ag_mask) == ag_cors->ag_dst_h
&& (ag_cors->ag_gate == ag->ag_gate
|| (ag->ag_state & AGS_FINE_GATE)
|| (ag_cors->ag_state & AGS_CORS_GATE))) {
/* If the suppressed target was redundant,
* then mark the suppressor redundant.
*/
if (AG_IS_REDUN(ag->ag_state)
&& ag_cors->ag_mask == ag->ag_mask<<1) {
if (ag_cors->ag_dst_h == dst)
ag_cors->ag_state |= AGS_REDUN0;
else
ag_cors->ag_state |= AGS_REDUN1;
}
if (ag->ag_tag != ag_cors->ag_tag)
ag_cors->ag_tag = 0;
if (ag->ag_nhop != ag_cors->ag_nhop)
ag_cors->ag_nhop = 0;
ag_del(ag);
CHECK_AG();
} else {
ag_cors = ag;
}
ag = ag_cors->ag_fine;
}
/* If we find the even/odd twin of the new route, and if the
* masks and so forth are equal, we can aggregate them.
* We can probably promote one of the pair.
*
* Since the routes are encountered in lexical order,
* the new route must be odd. However, the second or later
* times around this loop, it could be the even twin promoted
* from the even/odd pair of twins of the finer route.
*/
while (ag != 0
&& ag->ag_mask == mask
&& ((ag->ag_dst_h ^ dst) & (mask<<1)) == 0) {
/* Here we know the target route and the route in the current
* slot have the same netmasks and differ by at most the
* last bit. They are either for the same destination, or
* for an even/odd pair of destinations.
*/
if (ag->ag_dst_h == dst) {
/* We have two routes to the same destination.
* Routes are encountered in lexical order, so a
* route is never promoted until the parent route is
* already present. So we know that the new route is
* a promoted (or aggregated) pair and the route
* already in the slot is the explicit route.
*
* Prefer the best route if their metrics differ,
* or the aggregated one if not, following a sort
* of longest-match rule.
*/
if (pref <= ag->ag_pref) {
ag->ag_gate = gate;
ag->ag_nhop = nhop;
ag->ag_tag = tag;
ag->ag_metric = metric;
ag->ag_pref = pref;
if (ag->ag_seqno < new_seqno)
ag->ag_seqno = new_seqno;
x = ag->ag_state;
ag->ag_state = state;
state = x;
}
/* Some bits are set if they are set on either route,
* except when the route is for an interface.
*/
if (!(ag->ag_state & AGS_IF))
ag->ag_state |= (state & (AGS_AGGREGATE_EITHER
| AGS_REDUN0
| AGS_REDUN1));
return;
}
/* If one of the routes can be promoted and the other can
* be suppressed, it may be possible to combine them or
* worthwhile to promote one.
*
* Any route that can be promoted is always
* marked to be eligible to be suppressed.
*/
if (!((state & AGS_AGGREGATE)
&& (ag->ag_state & AGS_SUPPRESS))
&& !((ag->ag_state & AGS_AGGREGATE)
&& (state & AGS_SUPPRESS)))
break;
/* A pair of even/odd twin routes can be combined
* if either is redundant, or if they are via the
* same gateway and have the same metric.
*/
if (AG_IS_REDUN(ag->ag_state)
|| AG_IS_REDUN(state)
|| (ag->ag_gate == gate
&& ag->ag_pref == pref
&& (state & ag->ag_state & AGS_AGGREGATE) != 0)) {
/* We have both the even and odd pairs.
* Since the routes are encountered in order,
* the route in the slot must be the even twin.
*
* Combine and promote (aggregate) the pair of routes.
*/
if (new_seqno < ag->ag_seqno)
new_seqno = ag->ag_seqno;
if (!AG_IS_REDUN(state))
state &= ~AGS_REDUN1;
if (AG_IS_REDUN(ag->ag_state))
state |= AGS_REDUN0;
else
state &= ~AGS_REDUN0;
state |= (ag->ag_state & AGS_AGGREGATE_EITHER);
if (ag->ag_tag != tag)
tag = 0;
if (ag->ag_nhop != nhop)
nhop = 0;
/* Get rid of the even twin that was already
* in the slot.
*/
ag_del(ag);
} else if (ag->ag_pref >= pref
&& (ag->ag_state & AGS_AGGREGATE)) {
/* If we cannot combine the pair, maybe the route
* with the worse metric can be promoted.
*
* Promote the old, even twin, by giving its slot
* in the table to the new, odd twin.
*/
ag->ag_dst_h = dst;
xaddr = ag->ag_gate;
ag->ag_gate = gate;
gate = xaddr;
xaddr = ag->ag_nhop;
ag->ag_nhop = nhop;
nhop = xaddr;
x = ag->ag_tag;
ag->ag_tag = tag;
tag = x;
/* The promoted route is even-redundant only if the
* even twin was fully redundant. It is not
* odd-redundant because the odd-twin will still be
* in the table.
*/
x = ag->ag_state;
if (!AG_IS_REDUN(x))
x &= ~AGS_REDUN0;
x &= ~AGS_REDUN1;
ag->ag_state = state;
state = x;
x = ag->ag_metric;
ag->ag_metric = metric;
metric = x;
x = ag->ag_pref;
ag->ag_pref = pref;
pref = x;
/* take the newest sequence number */
if (new_seqno <= ag->ag_seqno)
new_seqno = ag->ag_seqno;
else
ag->ag_seqno = new_seqno;
} else {
if (!(state & AGS_AGGREGATE))
break; /* cannot promote either twin */
/* Promote the new, odd twin by shaving its
* mask and address.
* The promoted route is odd-redundant only if the
* odd twin was fully redundant. It is not
* even-redundant because the even twin is still in
* the table.
*/
if (!AG_IS_REDUN(state))
state &= ~AGS_REDUN1;
state &= ~AGS_REDUN0;
if (new_seqno < ag->ag_seqno)
new_seqno = ag->ag_seqno;
else
ag->ag_seqno = new_seqno;
}
mask <<= 1;
dst &= mask;
if (ag_cors == 0) {
ag = ag_corsest;
break;
}
ag = ag_cors;
ag_cors = ag->ag_cors;
}
/* When we can no longer promote and combine routes,
* flush the old route in the target slot. Also flush
* any finer routes that we know will never be aggregated by
* the new route.
*
* In case we moved toward coarser masks,
* get back where we belong
*/
if (ag != 0
&& ag->ag_mask < mask) {
ag_cors = ag;
ag = ag->ag_fine;
}
/* Empty the target slot
*/
if (ag != 0 && ag->ag_mask == mask) {
ag_flush(ag->ag_dst_h, ag->ag_mask, out);
ag = (ag_cors == 0) ? ag_corsest : ag_cors->ag_fine;
}
#ifdef DEBUG_AG
(void)fflush(stderr);
if (ag == 0 && ag_cors != ag_finest)
abort();
if (ag_cors == 0 && ag != ag_corsest)
abort();
if (ag != 0 && ag->ag_cors != ag_cors)
abort();
if (ag_cors != 0 && ag_cors->ag_fine != ag)
abort();
CHECK_AG();
#endif
/* Save the new route on the end of the table.
*/
nag = ag_avail;
ag_avail = nag->ag_fine;
nag->ag_dst_h = dst;
nag->ag_mask = mask;
nag->ag_gate = gate;
nag->ag_nhop = nhop;
nag->ag_metric = metric;
nag->ag_pref = pref;
nag->ag_tag = tag;
nag->ag_state = state;
nag->ag_seqno = new_seqno;
nag->ag_fine = ag;
if (ag != 0)
ag->ag_cors = nag;
else
ag_finest = nag;
nag->ag_cors = ag_cors;
if (ag_cors == 0)
ag_corsest = nag;
else
ag_cors->ag_fine = nag;
CHECK_AG();
}
#define NAME0_LEN 14
static const char *
rtm_type_name(u_char type)
{
static const char *rtm_types[] = {
"RTM_ADD",
"RTM_DELETE",
"RTM_CHANGE",
"RTM_GET",
"RTM_LOSING",
"RTM_REDIRECT",
"RTM_MISS",
"RTM_LOCK",
"RTM_OLDADD",
"RTM_OLDDEL",
"RTM_RESOLVE",
"RTM_NEWADDR",
"RTM_DELADDR",
#ifdef RTM_OIFINFO
"RTM_OIFINFO",
#endif
"RTM_IFINFO",
"RTM_NEWMADDR",
"RTM_DELMADDR"
};
#define NEW_RTM_PAT "RTM type %#x"
static char name0[sizeof(NEW_RTM_PAT)+2];
if (type > sizeof(rtm_types)/sizeof(rtm_types[0])
|| type == 0) {
snprintf(name0, sizeof(name0), NEW_RTM_PAT, type);
return name0;
} else {
return rtm_types[type-1];
}
#undef NEW_RTM_PAT
}
/* Trim a mask in a sockaddr
* Produce a length of 0 for an address of 0.
* Otherwise produce the index of the first zero byte.
*/
void
#ifdef _HAVE_SIN_LEN
masktrim(struct sockaddr_in *ap)
#else
masktrim(struct sockaddr_in_new *ap)
#endif
{
char *cp;
if (ap->sin_addr.s_addr == 0) {
ap->sin_len = 0;
return;
}
cp = (char *)(&ap->sin_addr.s_addr+1);
while (*--cp == 0)
continue;
ap->sin_len = cp - (char*)ap + 1;
}
/* Tell the kernel to add, delete or change a route
*/
static void
rtioctl(int action, /* RTM_DELETE, etc */
naddr dst,
naddr gate,
naddr mask,
int metric,
int flags)
{
struct {
struct rt_msghdr w_rtm;
struct sockaddr_in w_dst;
struct sockaddr_in w_gate;
#ifdef _HAVE_SA_LEN
struct sockaddr_in w_mask;
#else
struct sockaddr_in_new w_mask;
#endif
} w;
long cc;
# define PAT " %-10s %s metric=%d flags=%#x"
# define ARGS rtm_type_name(action), rtname(dst,mask,gate), metric, flags
again:
memset(&w, 0, sizeof(w));
w.w_rtm.rtm_msglen = sizeof(w);
w.w_rtm.rtm_version = RTM_VERSION;
w.w_rtm.rtm_type = action;
w.w_rtm.rtm_flags = flags;
w.w_rtm.rtm_seq = ++rt_sock_seqno;
w.w_rtm.rtm_addrs = RTA_DST|RTA_GATEWAY;
if (metric != 0 || action == RTM_CHANGE) {
w.w_rtm.rtm_rmx.rmx_hopcount = metric;
w.w_rtm.rtm_inits |= RTV_HOPCOUNT;
}
w.w_dst.sin_family = AF_INET;
w.w_dst.sin_addr.s_addr = dst;
w.w_gate.sin_family = AF_INET;
w.w_gate.sin_addr.s_addr = gate;
#ifdef _HAVE_SA_LEN
w.w_dst.sin_len = sizeof(w.w_dst);
w.w_gate.sin_len = sizeof(w.w_gate);
#endif
if (mask == HOST_MASK) {
w.w_rtm.rtm_flags |= RTF_HOST;
w.w_rtm.rtm_msglen -= sizeof(w.w_mask);
} else {
w.w_rtm.rtm_addrs |= RTA_NETMASK;
w.w_mask.sin_addr.s_addr = htonl(mask);
#ifdef _HAVE_SA_LEN
masktrim(&w.w_mask);
if (w.w_mask.sin_len == 0)
w.w_mask.sin_len = sizeof(long);
w.w_rtm.rtm_msglen -= (sizeof(w.w_mask) - w.w_mask.sin_len);
#endif
}
#ifndef NO_INSTALL
cc = write(rt_sock, &w, w.w_rtm.rtm_msglen);
if (cc < 0) {
if (errno == ESRCH
&& (action == RTM_CHANGE || action == RTM_DELETE)) {
trace_act("route disappeared before" PAT, ARGS);
if (action == RTM_CHANGE) {
action = RTM_ADD;
goto again;
}
return;
}
msglog("write(rt_sock)" PAT ": %s", ARGS, strerror(errno));
return;
} else if (cc != w.w_rtm.rtm_msglen) {
msglog("write(rt_sock) wrote %ld instead of %d for" PAT,
cc, w.w_rtm.rtm_msglen, ARGS);
return;
}
#endif
if (TRACEKERNEL)
trace_misc("write kernel" PAT, ARGS);
#undef PAT
#undef ARGS
}
#define KHASH_SIZE 71 /* should be prime */
#define KHASH(a,m) khash_bins[((a) ^ (m)) % KHASH_SIZE]
static struct khash {
struct khash *k_next;
naddr k_dst;
naddr k_mask;
naddr k_gate;
short k_metric;
u_short k_state;
#define KS_NEW 0x001
#define KS_DELETE 0x002 /* need to delete the route */
#define KS_ADD 0x004 /* add to the kernel */
#define KS_CHANGE 0x008 /* tell kernel to change the route */
#define KS_DEL_ADD 0x010 /* delete & add to change the kernel */
#define KS_STATIC 0x020 /* Static flag in kernel */
#define KS_GATEWAY 0x040 /* G flag in kernel */
#define KS_DYNAMIC 0x080 /* result of redirect */
#define KS_DELETED 0x100 /* already deleted from kernel */
#define KS_CHECK 0x200
time_t k_keep;
#define K_KEEP_LIM 30
time_t k_redirect_time; /* when redirected route 1st seen */
} *khash_bins[KHASH_SIZE];
static struct khash*
kern_find(naddr dst, naddr mask, struct khash ***ppk)
{
struct khash *k, **pk;
for (pk = &KHASH(dst,mask); (k = *pk) != 0; pk = &k->k_next) {
if (k->k_dst == dst && k->k_mask == mask)
break;
}
if (ppk != 0)
*ppk = pk;
return k;
}
static struct khash*
kern_add(naddr dst, naddr mask)
{
struct khash *k, **pk;
k = kern_find(dst, mask, &pk);
if (k != 0)
return k;
k = (struct khash *)rtmalloc(sizeof(*k), "kern_add");
memset(k, 0, sizeof(*k));
k->k_dst = dst;
k->k_mask = mask;
k->k_state = KS_NEW;
k->k_keep = now.tv_sec;
*pk = k;
return k;
}
/* If a kernel route has a non-zero metric, check that it is still in the
* daemon table, and not deleted by interfaces coming and going.
*/
static void
kern_check_static(struct khash *k,
struct interface *ifp)
{
struct rt_entry *rt;
struct rt_spare new;
if (k->k_metric == 0)
return;
memset(&new, 0, sizeof(new));
new.rts_ifp = ifp;
new.rts_gate = k->k_gate;
new.rts_router = (ifp != 0) ? ifp->int_addr : loopaddr;
new.rts_metric = k->k_metric;
new.rts_time = now.tv_sec;
rt = rtget(k->k_dst, k->k_mask);
if (rt != 0) {
if (!(rt->rt_state & RS_STATIC))
rtchange(rt, rt->rt_state | RS_STATIC, &new, 0);
} else {
rtadd(k->k_dst, k->k_mask, RS_STATIC, &new);
}
}
/* operate on a kernel entry
*/
static void
kern_ioctl(struct khash *k,
int action, /* RTM_DELETE, etc */
int flags)
{
switch (action) {
case RTM_DELETE:
k->k_state &= ~KS_DYNAMIC;
if (k->k_state & KS_DELETED)
return;
k->k_state |= KS_DELETED;
break;
case RTM_ADD:
k->k_state &= ~KS_DELETED;
break;
case RTM_CHANGE:
if (k->k_state & KS_DELETED) {
action = RTM_ADD;
k->k_state &= ~KS_DELETED;
}
break;
}
rtioctl(action, k->k_dst, k->k_gate, k->k_mask, k->k_metric, flags);
}
/* add a route the kernel told us
*/
static void
rtm_add(struct rt_msghdr *rtm,
struct rt_addrinfo *info,
time_t keep)
{
struct khash *k;
struct interface *ifp;
naddr mask;
if (rtm->rtm_flags & RTF_HOST) {
mask = HOST_MASK;
} else if (INFO_MASK(info) != 0) {
mask = ntohl(S_ADDR(INFO_MASK(info)));
} else {
msglog("ignore %s without mask", rtm_type_name(rtm->rtm_type));
return;
}
k = kern_add(S_ADDR(INFO_DST(info)), mask);
if (k->k_state & KS_NEW)
k->k_keep = now.tv_sec+keep;
if (INFO_GATE(info) == 0) {
trace_act("note %s without gateway",
rtm_type_name(rtm->rtm_type));
k->k_metric = HOPCNT_INFINITY;
} else if (INFO_GATE(info)->sa_family != AF_INET) {
trace_act("note %s with gateway AF=%d",
rtm_type_name(rtm->rtm_type),
INFO_GATE(info)->sa_family);
k->k_metric = HOPCNT_INFINITY;
} else {
k->k_gate = S_ADDR(INFO_GATE(info));
k->k_metric = rtm->rtm_rmx.rmx_hopcount;
if (k->k_metric < 0)
k->k_metric = 0;
else if (k->k_metric > HOPCNT_INFINITY-1)
k->k_metric = HOPCNT_INFINITY-1;
}
k->k_state &= ~(KS_DELETE | KS_ADD | KS_CHANGE | KS_DEL_ADD
| KS_DELETED | KS_GATEWAY | KS_STATIC
| KS_NEW | KS_CHECK);
if (rtm->rtm_flags & RTF_GATEWAY)
k->k_state |= KS_GATEWAY;
if (rtm->rtm_flags & RTF_STATIC)
k->k_state |= KS_STATIC;
if (0 != (rtm->rtm_flags & (RTF_DYNAMIC | RTF_MODIFIED))) {
if (INFO_AUTHOR(info) != 0
&& INFO_AUTHOR(info)->sa_family == AF_INET)
ifp = iflookup(S_ADDR(INFO_AUTHOR(info)));
else
ifp = 0;
if (supplier
&& (ifp == 0 || !(ifp->int_state & IS_REDIRECT_OK))) {
/* Routers are not supposed to listen to redirects,
* so delete it if it came via an unknown interface
* or the interface does not have special permission.
*/
k->k_state &= ~KS_DYNAMIC;
k->k_state |= KS_DELETE;
LIM_SEC(need_kern, 0);
trace_act("mark for deletion redirected %s --> %s"
" via %s",
addrname(k->k_dst, k->k_mask, 0),
naddr_ntoa(k->k_gate),
ifp ? ifp->int_name : "unknown interface");
} else {
k->k_state |= KS_DYNAMIC;
k->k_redirect_time = now.tv_sec;
trace_act("accept redirected %s --> %s via %s",
addrname(k->k_dst, k->k_mask, 0),
naddr_ntoa(k->k_gate),
ifp ? ifp->int_name : "unknown interface");
}
return;
}
/* If it is not a static route, quit until the next comparison
* between the kernel and daemon tables, when it will be deleted.
*/
if (!(k->k_state & KS_STATIC)) {
k->k_state |= KS_DELETE;
LIM_SEC(need_kern, k->k_keep);
return;
}
/* Put static routes with real metrics into the daemon table so
* they can be advertised.
*
* Find the interface toward the gateway.
*/
ifp = iflookup(k->k_gate);
if (ifp == 0)
msglog("static route %s --> %s impossibly lacks ifp",
addrname(S_ADDR(INFO_DST(info)), mask, 0),
naddr_ntoa(k->k_gate));
kern_check_static(k, ifp);
}
/* deal with packet loss
*/
static void
rtm_lose(struct rt_msghdr *rtm,
struct rt_addrinfo *info)
{
if (INFO_GATE(info) == 0
|| INFO_GATE(info)->sa_family != AF_INET) {
trace_act("ignore %s without gateway",
rtm_type_name(rtm->rtm_type));
return;
}
if (rdisc_ok)
rdisc_age(S_ADDR(INFO_GATE(info)));
age(S_ADDR(INFO_GATE(info)));
}
/* Make the gateway slot of an info structure point to something
* useful. If it is not already useful, but it specifies an interface,
* then fill in the sockaddr_in provided and point it there.
*/
static int
get_info_gate(struct sockaddr **sap,
struct sockaddr_in *rsin)
{
struct sockaddr_dl *sdl = (struct sockaddr_dl *)*sap;
struct interface *ifp;
if (sdl == 0)
return 0;
if ((sdl)->sdl_family == AF_INET)
return 1;
if ((sdl)->sdl_family != AF_LINK)
return 0;
ifp = ifwithindex(sdl->sdl_index, 1);
if (ifp == 0)
return 0;
rsin->sin_addr.s_addr = ifp->int_addr;
#ifdef _HAVE_SA_LEN
rsin->sin_len = sizeof(*rsin);
#endif
rsin->sin_family = AF_INET;
*sap = (struct sockaddr*)rsin;
return 1;
}
/* Clean the kernel table by copying it to the daemon image.
* Eventually the daemon will delete any extra routes.
*/
void
flush_kern(void)
{
static char *sysctl_buf;
static size_t sysctl_buf_size = 0;
size_t needed;
int mib[6];
char *next, *lim;
struct rt_msghdr *rtm;
struct sockaddr_in gate_sin;
struct rt_addrinfo info;
int i;
struct khash *k;
for (i = 0; i < KHASH_SIZE; i++) {
for (k = khash_bins[i]; k != 0; k = k->k_next) {
k->k_state |= KS_CHECK;
}
}
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0; /* protocol */
mib[3] = 0; /* wildcard address family */
mib[4] = NET_RT_DUMP;
mib[5] = 0; /* no flags */
for (;;) {
if ((needed = sysctl_buf_size) != 0) {
if (sysctl(mib, 6, sysctl_buf,&needed, 0, 0) >= 0)
break;
if (errno != ENOMEM && errno != EFAULT)
BADERR(1,"flush_kern: sysctl(RT_DUMP)");
free(sysctl_buf);
needed = 0;
}
if (sysctl(mib, 6, 0, &needed, 0, 0) < 0)
BADERR(1,"flush_kern: sysctl(RT_DUMP) estimate");
/* Kludge around the habit of some systems, such as
* BSD/OS 3.1, to not admit how many routes are in the
* kernel, or at least to be quite wrong.
*/
needed += 50*(sizeof(*rtm)+5*sizeof(struct sockaddr));
sysctl_buf = rtmalloc(sysctl_buf_size = needed,
"flush_kern sysctl(RT_DUMP)");
}
lim = sysctl_buf + needed;
for (next = sysctl_buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
if (rtm->rtm_msglen == 0) {
msglog("zero length kernel route at "
" %#lx in buffer %#lx before %#lx",
(u_long)rtm, (u_long)sysctl_buf, (u_long)lim);
break;
}
rt_xaddrs(&info,
(struct sockaddr *)(rtm+1),
(struct sockaddr *)(next + rtm->rtm_msglen),
rtm->rtm_addrs);
if (INFO_DST(&info) == 0
|| INFO_DST(&info)->sa_family != AF_INET)
continue;
#if defined (RTF_LLINFO)
/* ignore ARP table entries on systems with a merged route
* and ARP table.
*/
if (rtm->rtm_flags & RTF_LLINFO)
continue;
#endif
#if defined(RTF_WASCLONED) && defined(__FreeBSD__)
/* ignore cloned routes
*/
if (rtm->rtm_flags & RTF_WASCLONED)
continue;
#endif
/* ignore multicast addresses
*/
if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info)))))
continue;
if (!get_info_gate(&INFO_GATE(&info), &gate_sin))
continue;
/* Note static routes and interface routes, and also
* preload the image of the kernel table so that
* we can later clean it, as well as avoid making
* unneeded changes. Keep the old kernel routes for a
* few seconds to allow a RIP or router-discovery
* response to be heard.
*/
rtm_add(rtm,&info,MIN_WAITTIME);
}
for (i = 0; i < KHASH_SIZE; i++) {
for (k = khash_bins[i]; k != 0; k = k->k_next) {
if (k->k_state & KS_CHECK) {
msglog("%s --> %s disappeared from kernel",
addrname(k->k_dst, k->k_mask, 0),
naddr_ntoa(k->k_gate));
del_static(k->k_dst, k->k_mask, k->k_gate, 1);
}
}
}
}
/* Listen to announcements from the kernel
*/
void
read_rt(void)
{
long cc;
struct interface *ifp;
struct sockaddr_in gate_sin;
naddr mask, gate;
union {
struct {
struct rt_msghdr rtm;
struct sockaddr addrs[RTAX_MAX];
} r;
struct if_msghdr ifm;
} m;
char str[100], *strp;
struct rt_addrinfo info;
for (;;) {
cc = read(rt_sock, &m, sizeof(m));
if (cc <= 0) {
if (cc < 0 && errno != EWOULDBLOCK)
LOGERR("read(rt_sock)");
return;
}
if (m.r.rtm.rtm_version != RTM_VERSION) {
msglog("bogus routing message version %d",
m.r.rtm.rtm_version);
continue;
}
/* Ignore our own results.
*/
if (m.r.rtm.rtm_type <= RTM_CHANGE
&& m.r.rtm.rtm_pid == mypid) {
static int complained = 0;
if (!complained) {
msglog("receiving our own change messages");
complained = 1;
}
continue;
}
if (m.r.rtm.rtm_type == RTM_IFINFO
|| m.r.rtm.rtm_type == RTM_NEWADDR
|| m.r.rtm.rtm_type == RTM_DELADDR) {
ifp = ifwithindex(m.ifm.ifm_index,
m.r.rtm.rtm_type != RTM_DELADDR);
if (ifp == 0)
trace_act("note %s with flags %#x"
" for unknown interface index #%d",
rtm_type_name(m.r.rtm.rtm_type),
m.ifm.ifm_flags,
m.ifm.ifm_index);
else
trace_act("note %s with flags %#x for %s",
rtm_type_name(m.r.rtm.rtm_type),
m.ifm.ifm_flags,
ifp->int_name);
/* After being informed of a change to an interface,
* check them all now if the check would otherwise
* be a long time from now, if the interface is
* not known, or if the interface has been turned
* off or on.
*/
if (ifinit_timer.tv_sec-now.tv_sec>=CHECK_BAD_INTERVAL
|| ifp == 0
|| ((ifp->int_if_flags ^ m.ifm.ifm_flags)
& IFF_UP) != 0)
ifinit_timer.tv_sec = now.tv_sec;
continue;
}
#ifdef RTM_OIFINFO
if (m.r.rtm.rtm_type == RTM_OIFINFO)
continue; /* ignore compat message */
#endif
strcpy(str, rtm_type_name(m.r.rtm.rtm_type));
strp = &str[strlen(str)];
if (m.r.rtm.rtm_type <= RTM_CHANGE)
strp += sprintf(strp," from pid %d",m.r.rtm.rtm_pid);
rt_xaddrs(&info, m.r.addrs, &m.r.addrs[RTAX_MAX],
m.r.rtm.rtm_addrs);
if (INFO_DST(&info) == 0) {
trace_act("ignore %s without dst", str);
continue;
}
if (INFO_DST(&info)->sa_family != AF_INET) {
trace_act("ignore %s for AF %d", str,
INFO_DST(&info)->sa_family);
continue;
}
mask = ((INFO_MASK(&info) != 0)
? ntohl(S_ADDR(INFO_MASK(&info)))
: (m.r.rtm.rtm_flags & RTF_HOST)
? HOST_MASK
: std_mask(S_ADDR(INFO_DST(&info))));
strp += sprintf(strp, ": %s",
addrname(S_ADDR(INFO_DST(&info)), mask, 0));
if (IN_MULTICAST(ntohl(S_ADDR(INFO_DST(&info))))) {
trace_act("ignore multicast %s", str);
continue;
}
#if defined(RTF_LLINFO)
if (m.r.rtm.rtm_flags & RTF_LLINFO) {
trace_act("ignore ARP %s", str);
continue;
}
#endif
#if defined(RTF_WASCLONED) && defined(__FreeBSD__)
if (m.r.rtm.rtm_flags & RTF_WASCLONED) {
trace_act("ignore cloned %s", str);
continue;
}
#endif
if (get_info_gate(&INFO_GATE(&info), &gate_sin)) {
gate = S_ADDR(INFO_GATE(&info));
strp += sprintf(strp, " --> %s", naddr_ntoa(gate));
} else {
gate = 0;
}
if (INFO_AUTHOR(&info) != 0)
strp += sprintf(strp, " by authority of %s",
saddr_ntoa(INFO_AUTHOR(&info)));
switch (m.r.rtm.rtm_type) {
case RTM_ADD:
case RTM_CHANGE:
case RTM_REDIRECT:
if (m.r.rtm.rtm_errno != 0) {
trace_act("ignore %s with \"%s\" error",
str, strerror(m.r.rtm.rtm_errno));
} else {
trace_act("%s", str);
rtm_add(&m.r.rtm,&info,0);
}
break;
case RTM_DELETE:
if (m.r.rtm.rtm_errno != 0
&& m.r.rtm.rtm_errno != ESRCH) {
trace_act("ignore %s with \"%s\" error",
str, strerror(m.r.rtm.rtm_errno));
} else {
trace_act("%s", str);
del_static(S_ADDR(INFO_DST(&info)), mask,
gate, 1);
}
break;
case RTM_LOSING:
trace_act("%s", str);
rtm_lose(&m.r.rtm,&info);
break;
default:
trace_act("ignore %s", str);
break;
}
}
}
/* after aggregating, note routes that belong in the kernel
*/
static void
kern_out(struct ag_info *ag)
{
struct khash *k;
/* Do not install bad routes if they are not already present.
* This includes routes that had RS_NET_SYN for interfaces that
* recently died.
*/
if (ag->ag_metric == HOPCNT_INFINITY) {
k = kern_find(htonl(ag->ag_dst_h), ag->ag_mask, 0);
if (k == 0)
return;
} else {
k = kern_add(htonl(ag->ag_dst_h), ag->ag_mask);
}
if (k->k_state & KS_NEW) {
/* will need to add new entry to the kernel table */
k->k_state = KS_ADD;
if (ag->ag_state & AGS_GATEWAY)
k->k_state |= KS_GATEWAY;
k->k_gate = ag->ag_gate;
k->k_metric = ag->ag_metric;
return;
}
if (k->k_state & KS_STATIC)
return;
/* modify existing kernel entry if necessary */
if (k->k_gate != ag->ag_gate
|| k->k_metric != ag->ag_metric) {
/* Must delete bad interface routes etc. to change them. */
if (k->k_metric == HOPCNT_INFINITY)
k->k_state |= KS_DEL_ADD;
k->k_gate = ag->ag_gate;
k->k_metric = ag->ag_metric;
k->k_state |= KS_CHANGE;
}
/* If the daemon thinks the route should exist, forget
* about any redirections.
* If the daemon thinks the route should exist, eventually
* override manual intervention by the operator.
*/
if ((k->k_state & (KS_DYNAMIC | KS_DELETED)) != 0) {
k->k_state &= ~KS_DYNAMIC;
k->k_state |= (KS_ADD | KS_DEL_ADD);
}
if ((k->k_state & KS_GATEWAY)
&& !(ag->ag_state & AGS_GATEWAY)) {
k->k_state &= ~KS_GATEWAY;
k->k_state |= (KS_ADD | KS_DEL_ADD);
} else if (!(k->k_state & KS_GATEWAY)
&& (ag->ag_state & AGS_GATEWAY)) {
k->k_state |= KS_GATEWAY;
k->k_state |= (KS_ADD | KS_DEL_ADD);
}
/* Deleting-and-adding is necessary to change aspects of a route.
* Just delete instead of deleting and then adding a bad route.
* Otherwise, we want to keep the route in the kernel.
*/
if (k->k_metric == HOPCNT_INFINITY
&& (k->k_state & KS_DEL_ADD))
k->k_state |= KS_DELETE;
else
k->k_state &= ~KS_DELETE;
#undef RT
}
/* ARGSUSED */
static int
walk_kern(struct radix_node *rn,
struct walkarg *argp UNUSED)
{
#define RT ((struct rt_entry *)rn)
char metric, pref;
u_int ags = 0;
/* Do not install synthetic routes */
if (RT->rt_state & RS_NET_SYN)
return 0;
if (!(RT->rt_state & RS_IF)) {
/* This is an ordinary route, not for an interface.
*/
/* aggregate, ordinary good routes without regard to
* their metric
*/
pref = 1;
ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
/* Do not install host routes directly to hosts, to avoid
* interfering with ARP entries in the kernel table.
*/
if (RT_ISHOST(RT)
&& ntohl(RT->rt_dst) == RT->rt_gate)
return 0;
} else {
/* This is an interface route.
* Do not install routes for "external" remote interfaces.
*/
if (RT->rt_ifp != 0 && (RT->rt_ifp->int_state & IS_EXTERNAL))
return 0;
/* Interfaces should override received routes.
*/
pref = 0;
ags |= (AGS_IF | AGS_CORS_GATE);
/* If it is not an interface, or an alias for an interface,
* it must be a "gateway."
*
* If it is a "remote" interface, it is also a "gateway" to
* the kernel if is not an alias.
*/
if (RT->rt_ifp == 0
|| (RT->rt_ifp->int_state & IS_REMOTE))
ags |= (AGS_GATEWAY | AGS_SUPPRESS | AGS_AGGREGATE);
}
/* If RIP is off and IRDP is on, let the route to the discovered
* route suppress any RIP routes. Eventually the RIP routes
* will time-out and be deleted. This reaches the steady-state
* quicker.
*/
if ((RT->rt_state & RS_RDISC) && rip_sock < 0)
ags |= AGS_CORS_GATE;
metric = RT->rt_metric;
if (metric == HOPCNT_INFINITY) {
/* if the route is dead, so try hard to aggregate. */
pref = HOPCNT_INFINITY;
ags |= (AGS_FINE_GATE | AGS_SUPPRESS);
ags &= ~(AGS_IF | AGS_CORS_GATE);
}
ag_check(RT->rt_dst, RT->rt_mask, RT->rt_gate, 0,
metric,pref, 0, 0, ags, kern_out);
return 0;
#undef RT
}
/* Update the kernel table to match the daemon table.
*/
static void
fix_kern(void)
{
int i;
struct khash *k, **pk;
need_kern = age_timer;
/* Walk daemon table, updating the copy of the kernel table.
*/
(void)rn_walktree(rhead, walk_kern, 0);
ag_flush(0,0,kern_out);
for (i = 0; i < KHASH_SIZE; i++) {
for (pk = &khash_bins[i]; (k = *pk) != 0; ) {
/* Do not touch static routes */
if (k->k_state & KS_STATIC) {
kern_check_static(k,0);
pk = &k->k_next;
continue;
}
/* check hold on routes deleted by the operator */
if (k->k_keep > now.tv_sec) {
/* ensure we check when the hold is over */
LIM_SEC(need_kern, k->k_keep);
/* mark for the next cycle */
k->k_state |= KS_DELETE;
pk = &k->k_next;
continue;
}
if ((k->k_state & KS_DELETE)
&& !(k->k_state & KS_DYNAMIC)) {
kern_ioctl(k, RTM_DELETE, 0);
*pk = k->k_next;
free(k);
continue;
}
if (k->k_state & KS_DEL_ADD)
kern_ioctl(k, RTM_DELETE, 0);
if (k->k_state & KS_ADD) {
kern_ioctl(k, RTM_ADD,
((0 != (k->k_state & (KS_GATEWAY
| KS_DYNAMIC)))
? RTF_GATEWAY : 0));
} else if (k->k_state & KS_CHANGE) {
kern_ioctl(k, RTM_CHANGE,
((0 != (k->k_state & (KS_GATEWAY
| KS_DYNAMIC)))
? RTF_GATEWAY : 0));
}
k->k_state &= ~(KS_ADD|KS_CHANGE|KS_DEL_ADD);
/* Mark this route to be deleted in the next cycle.
* This deletes routes that disappear from the
* daemon table, since the normal aging code
* will clear the bit for routes that have not
* disappeared from the daemon table.
*/
k->k_state |= KS_DELETE;
pk = &k->k_next;
}
}
}
/* Delete a static route in the image of the kernel table.
*/
void
del_static(naddr dst,
naddr mask,
naddr gate,
int gone)
{
struct khash *k;
struct rt_entry *rt;
/* Just mark it in the table to be deleted next time the kernel
* table is updated.
* If it has already been deleted, mark it as such, and set its
* keep-timer so that it will not be deleted again for a while.
* This lets the operator delete a route added by the daemon
* and add a replacement.
*/
k = kern_find(dst, mask, 0);
if (k != 0 && (gate == 0 || k->k_gate == gate)) {
k->k_state &= ~(KS_STATIC | KS_DYNAMIC | KS_CHECK);
k->k_state |= KS_DELETE;
if (gone) {
k->k_state |= KS_DELETED;
k->k_keep = now.tv_sec + K_KEEP_LIM;
}
}
rt = rtget(dst, mask);
if (rt != 0 && (rt->rt_state & RS_STATIC))
rtbad(rt);
}
/* Delete all routes generated from ICMP Redirects that use a given gateway,
* as well as old redirected routes.
*/
void
del_redirects(naddr bad_gate,
time_t old)
{
int i;
struct khash *k;
for (i = 0; i < KHASH_SIZE; i++) {
for (k = khash_bins[i]; k != 0; k = k->k_next) {
if (!(k->k_state & KS_DYNAMIC)
|| (k->k_state & KS_STATIC))
continue;
if (k->k_gate != bad_gate
&& k->k_redirect_time > old
&& !supplier)
continue;
k->k_state |= KS_DELETE;
k->k_state &= ~KS_DYNAMIC;
need_kern.tv_sec = now.tv_sec;
trace_act("mark redirected %s --> %s for deletion",
addrname(k->k_dst, k->k_mask, 0),
naddr_ntoa(k->k_gate));
}
}
}
/* Start the daemon tables.
*/
extern int max_keylen;
void
rtinit(void)
{
int i;
struct ag_info *ag;
/* Initialize the radix trees */
max_keylen = sizeof(struct sockaddr_in);
rn_init();
rn_inithead((void**)&rhead, 32);
/* mark all of the slots in the table free */
ag_avail = ag_slots;
for (ag = ag_slots, i = 1; i < NUM_AG_SLOTS; i++) {
ag->ag_fine = ag+1;
ag++;
}
}
#ifdef _HAVE_SIN_LEN
static struct sockaddr_in dst_sock = {sizeof(dst_sock), AF_INET, 0, {0}, {0}};
static struct sockaddr_in mask_sock = {sizeof(mask_sock), AF_INET, 0, {0}, {0}};
#else
static struct sockaddr_in_new dst_sock = {_SIN_ADDR_SIZE, AF_INET};
static struct sockaddr_in_new mask_sock = {_SIN_ADDR_SIZE, AF_INET};
#endif
static void
set_need_flash(void)
{
if (!need_flash) {
need_flash = 1;
/* Do not send the flash update immediately. Wait a little
* while to hear from other routers.
*/
no_flash.tv_sec = now.tv_sec + MIN_WAITTIME;
}
}
/* Get a particular routing table entry
*/
struct rt_entry *
rtget(naddr dst, naddr mask)
{
struct rt_entry *rt;
dst_sock.sin_addr.s_addr = dst;
mask_sock.sin_addr.s_addr = htonl(mask);
masktrim(&mask_sock);
rt = (struct rt_entry *)rhead->rnh_lookup(&dst_sock,&mask_sock,rhead);
if (!rt
|| rt->rt_dst != dst
|| rt->rt_mask != mask)
return 0;
return rt;
}
/* Find a route to dst as the kernel would.
*/
struct rt_entry *
rtfind(naddr dst)
{
dst_sock.sin_addr.s_addr = dst;
return (struct rt_entry *)rhead->rnh_matchaddr(&dst_sock, rhead);
}
/* add a route to the table
*/
void
rtadd(naddr dst,
naddr mask,
u_int state, /* rt_state for the entry */
struct rt_spare *new)
{
struct rt_entry *rt;
naddr smask;
int i;
struct rt_spare *rts;
rt = (struct rt_entry *)rtmalloc(sizeof (*rt), "rtadd");
memset(rt, 0, sizeof(*rt));
for (rts = rt->rt_spares, i = NUM_SPARES; i != 0; i--, rts++)
rts->rts_metric = HOPCNT_INFINITY;
rt->rt_nodes->rn_key = (caddr_t)&rt->rt_dst_sock;
rt->rt_dst = dst;
rt->rt_dst_sock.sin_family = AF_INET;
#ifdef _HAVE_SIN_LEN
rt->rt_dst_sock.sin_len = dst_sock.sin_len;
#endif
if (mask != HOST_MASK) {
smask = std_mask(dst);
if ((smask & ~mask) == 0 && mask > smask)
state |= RS_SUBNET;
}
mask_sock.sin_addr.s_addr = htonl(mask);
masktrim(&mask_sock);
rt->rt_mask = mask;
rt->rt_state = state;
rt->rt_spares[0] = *new;
rt->rt_time = now.tv_sec;
rt->rt_poison_metric = HOPCNT_INFINITY;
rt->rt_seqno = update_seqno;
if (++total_routes == MAX_ROUTES)
msglog("have maximum (%d) routes", total_routes);
if (TRACEACTIONS)
trace_add_del("Add", rt);
need_kern.tv_sec = now.tv_sec;
set_need_flash();
if (0 == rhead->rnh_addaddr(&rt->rt_dst_sock, &mask_sock,
rhead, rt->rt_nodes)) {
msglog("rnh_addaddr() failed for %s mask=%#lx",
naddr_ntoa(dst), (u_long)mask);
free(rt);
}
}
/* notice a changed route
*/
void
rtchange(struct rt_entry *rt,
u_int state, /* new state bits */
struct rt_spare *new,
char *label)
{
if (rt->rt_metric != new->rts_metric) {
/* Fix the kernel immediately if it seems the route
* has gone bad, since there may be a working route that
* aggregates this route.
*/
if (new->rts_metric == HOPCNT_INFINITY) {
need_kern.tv_sec = now.tv_sec;
if (new->rts_time >= now.tv_sec - EXPIRE_TIME)
new->rts_time = now.tv_sec - EXPIRE_TIME;
}
rt->rt_seqno = update_seqno;
set_need_flash();
}
if (rt->rt_gate != new->rts_gate) {
need_kern.tv_sec = now.tv_sec;
rt->rt_seqno = update_seqno;
set_need_flash();
}
state |= (rt->rt_state & RS_SUBNET);
/* Keep various things from deciding ageless routes are stale.
*/
if (!AGE_RT(state, new->rts_ifp))
new->rts_time = now.tv_sec;
if (TRACEACTIONS)
trace_change(rt, state, new,
label ? label : "Chg ");
rt->rt_state = state;
rt->rt_spares[0] = *new;
}
/* check for a better route among the spares
*/
static struct rt_spare *
rts_better(struct rt_entry *rt)
{
struct rt_spare *rts, *rts1;
int i;
/* find the best alternative among the spares */
rts = rt->rt_spares+1;
for (i = NUM_SPARES, rts1 = rts+1; i > 2; i--, rts1++) {
if (BETTER_LINK(rt,rts1,rts))
rts = rts1;
}
return rts;
}
/* switch to a backup route
*/
void
rtswitch(struct rt_entry *rt,
struct rt_spare *rts)
{
struct rt_spare swap;
char label[10];
/* Do not change permanent routes */
if (0 != (rt->rt_state & (RS_MHOME | RS_STATIC | RS_RDISC
| RS_NET_SYN | RS_IF)))
return;
/* find the best alternative among the spares */
if (rts == 0)
rts = rts_better(rt);
/* Do not bother if it is not worthwhile.
*/
if (!BETTER_LINK(rt, rts, rt->rt_spares))
return;
swap = rt->rt_spares[0];
(void)sprintf(label, "Use #%d", (int)(rts - rt->rt_spares));
rtchange(rt, rt->rt_state & ~(RS_NET_SYN | RS_RDISC), rts, label);
if (swap.rts_metric == HOPCNT_INFINITY) {
*rts = rts_empty;
} else {
*rts = swap;
}
}
void
rtdelete(struct rt_entry *rt)
{
struct khash *k;
if (TRACEACTIONS)
trace_add_del("Del", rt);
k = kern_find(rt->rt_dst, rt->rt_mask, 0);
if (k != 0) {
k->k_state |= KS_DELETE;
need_kern.tv_sec = now.tv_sec;
}
dst_sock.sin_addr.s_addr = rt->rt_dst;
mask_sock.sin_addr.s_addr = htonl(rt->rt_mask);
masktrim(&mask_sock);
if (rt != (struct rt_entry *)rhead->rnh_deladdr(&dst_sock, &mask_sock,
rhead)) {
msglog("rnh_deladdr() failed");
} else {
free(rt);
total_routes--;
}
}
void
rts_delete(struct rt_entry *rt,
struct rt_spare *rts)
{
trace_upslot(rt, rts, &rts_empty);
*rts = rts_empty;
}
/* Get rid of a bad route, and try to switch to a replacement.
*/
void
rtbad(struct rt_entry *rt)
{
struct rt_spare new;
/* Poison the route */
new = rt->rt_spares[0];
new.rts_metric = HOPCNT_INFINITY;
rtchange(rt, rt->rt_state & ~(RS_IF | RS_LOCAL | RS_STATIC), &new, 0);
rtswitch(rt, 0);
}
/* Junk a RS_NET_SYN or RS_LOCAL route,
* unless it is needed by another interface.
*/
void
rtbad_sub(struct rt_entry *rt)
{
struct interface *ifp, *ifp1;
struct intnet *intnetp;
u_int state;
ifp1 = 0;
state = 0;
if (rt->rt_state & RS_LOCAL) {
/* Is this the route through loopback for the interface?
* If so, see if it is used by any other interfaces, such
* as a point-to-point interface with the same local address.
*/
for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
/* Retain it if another interface needs it.
*/
if (ifp->int_addr == rt->rt_ifp->int_addr) {
state |= RS_LOCAL;
ifp1 = ifp;
break;
}
}
}
if (!(state & RS_LOCAL)) {
/* Retain RIPv1 logical network route if there is another
* interface that justifies it.
*/
if (rt->rt_state & RS_NET_SYN) {
for (ifp = ifnet; ifp != 0; ifp = ifp->int_next) {
if ((ifp->int_state & IS_NEED_NET_SYN)
&& rt->rt_mask == ifp->int_std_mask
&& rt->rt_dst == ifp->int_std_addr) {
state |= RS_NET_SYN;
ifp1 = ifp;
break;
}
}
}
/* or if there is an authority route that needs it. */
for (intnetp = intnets;
intnetp != 0;
intnetp = intnetp->intnet_next) {
if (intnetp->intnet_addr == rt->rt_dst
&& intnetp->intnet_mask == rt->rt_mask) {
state |= (RS_NET_SYN | RS_NET_INT);
break;
}
}
}
if (ifp1 != 0 || (state & RS_NET_SYN)) {
struct rt_spare new = rt->rt_spares[0];
new.rts_ifp = ifp1;
rtchange(rt, ((rt->rt_state & ~(RS_NET_SYN|RS_LOCAL)) | state),
&new, 0);
} else {
rtbad(rt);
}
}
/* Called while walking the table looking for sick interfaces
* or after a time change.
*/
/* ARGSUSED */
int
walk_bad(struct radix_node *rn,
struct walkarg *argp UNUSED)
{
#define RT ((struct rt_entry *)rn)
struct rt_spare *rts;
int i;
/* fix any spare routes through the interface
*/
rts = RT->rt_spares;
for (i = NUM_SPARES; i != 1; i--) {
rts++;
if (rts->rts_metric < HOPCNT_INFINITY
&& (rts->rts_ifp == 0
|| (rts->rts_ifp->int_state & IS_BROKE)))
rts_delete(RT, rts);
}
/* Deal with the main route
*/
/* finished if it has been handled before or if its interface is ok
*/
if (RT->rt_ifp == 0 || !(RT->rt_ifp->int_state & IS_BROKE))
return 0;
/* Bad routes for other than interfaces are easy.
*/
if (0 == (RT->rt_state & (RS_IF | RS_NET_SYN | RS_LOCAL))) {
rtbad(RT);
return 0;
}
rtbad_sub(RT);
return 0;
#undef RT
}
/* Check the age of an individual route.
*/
/* ARGSUSED */
static int
walk_age(struct radix_node *rn,
struct walkarg *argp UNUSED)
{
#define RT ((struct rt_entry *)rn)
struct interface *ifp;
struct rt_spare *rts;
int i;
/* age all of the spare routes, including the primary route
* currently in use
*/
rts = RT->rt_spares;
for (i = NUM_SPARES; i != 0; i--, rts++) {
ifp = rts->rts_ifp;
if (i == NUM_SPARES) {
if (!AGE_RT(RT->rt_state, ifp)) {
/* Keep various things from deciding ageless
* routes are stale
*/
rts->rts_time = now.tv_sec;
continue;
}
/* forget RIP routes after RIP has been turned off.
*/
if (rip_sock < 0) {
rtdelete(RT);
return 0;
}
}
/* age failing routes
*/
if (age_bad_gate == rts->rts_gate
&& rts->rts_time >= now_stale) {
rts->rts_time -= SUPPLY_INTERVAL;
}
/* trash the spare routes when they go bad */
if (rts->rts_metric < HOPCNT_INFINITY
&& now_garbage > rts->rts_time
&& i != NUM_SPARES)
rts_delete(RT, rts);
}
/* finished if the active route is still fresh */
if (now_stale <= RT->rt_time)
return 0;
/* try to switch to an alternative */
rtswitch(RT, 0);
/* Delete a dead route after it has been publically mourned. */
if (now_garbage > RT->rt_time) {
rtdelete(RT);
return 0;
}
/* Start poisoning a bad route before deleting it. */
if (now.tv_sec - RT->rt_time > EXPIRE_TIME) {
struct rt_spare new = RT->rt_spares[0];
new.rts_metric = HOPCNT_INFINITY;
rtchange(RT, RT->rt_state, &new, 0);
}
return 0;
}
/* Watch for dead routes and interfaces.
*/
void
age(naddr bad_gate)
{
struct interface *ifp;
int need_query = 0;
/* If not listening to RIP, there is no need to age the routes in
* the table.
*/
age_timer.tv_sec = (now.tv_sec
+ ((rip_sock < 0) ? NEVER : SUPPLY_INTERVAL));
/* Check for dead IS_REMOTE interfaces by timing their
* transmissions.
*/
for (ifp = ifnet; ifp; ifp = ifp->int_next) {
if (!(ifp->int_state & IS_REMOTE))
continue;
/* ignore unreachable remote interfaces */
if (!check_remote(ifp))
continue;
/* Restore remote interface that has become reachable
*/
if (ifp->int_state & IS_BROKE)
if_ok(ifp, "remote ");
if (ifp->int_act_time != NEVER
&& now.tv_sec - ifp->int_act_time > EXPIRE_TIME) {
msglog("remote interface %s to %s timed out after"
" %ld:%ld",
ifp->int_name,
naddr_ntoa(ifp->int_dstaddr),
(now.tv_sec - ifp->int_act_time)/60,
(now.tv_sec - ifp->int_act_time)%60);
if_sick(ifp);
}
/* If we have not heard from the other router
* recently, ask it.
*/
if (now.tv_sec >= ifp->int_query_time) {
ifp->int_query_time = NEVER;
need_query = 1;
}
}
/* Age routes. */
age_bad_gate = bad_gate;
(void)rn_walktree(rhead, walk_age, 0);
/* delete old redirected routes to keep the kernel table small
* and prevent blackholes
*/
del_redirects(bad_gate, now.tv_sec-STALE_TIME);
/* Update the kernel routing table. */
fix_kern();
/* poke reticent remote gateways */
if (need_query)
rip_query();
}