freebsd-nq/sys/net/radix.c
Qing Li e440aed958 This patch provides the back end support for equal-cost multi-path
(ECMP) for both IPv4 and IPv6. Previously, multipath route insertion
is disallowed. For example,

	route add -net 192.103.54.0/24 10.9.44.1
	route add -net 192.103.54.0/24 10.9.44.2

The second route insertion will trigger an error message of
"add net 192.103.54.0/24: gateway 10.2.5.2: route already in table"

Multiple default routes can also be inserted. Here is the netstat
output:

default		10.2.5.1	UGS	0	3074	bge0 =>
default		10.2.5.2	UGS	0	0	bge0

When multipath routes exist, the "route delete" command requires
a specific gateway to be specified or else an error message would
be displayed. For example,

	route delete default

would fail and trigger the following error message:

"route: writing to routing socket: No such process"
"delete net default: not in table"

On the other hand,

	route delete default 10.2.5.2

would be successful: "delete net default: gateway 10.2.5.2"

One does not have to specify a gateway if there is only a single
route for a particular destination.

I need to perform more testings on address aliases and multiple
interfaces that have the same IP prefixes. This patch as it
stands today is not yet ready for prime time. Therefore, the ECMP
code fragments are fully guarded by the RADIX_MPATH macro.
Include the "options  RADIX_MPATH" in the kernel configuration
to enable this feature.

Reviewed by:	robert, sam, gnn, julian, kmacy
2008-04-13 05:45:14 +00:00

1186 lines
31 KiB
C

/*-
* Copyright (c) 1988, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)radix.c 8.5 (Berkeley) 5/19/95
* $FreeBSD$
*/
/*
* Routines to build and maintain radix trees for routing lookups.
*/
#ifndef _RADIX_H_
#include <sys/param.h>
#ifdef _KERNEL
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/domain.h>
#else
#include <stdlib.h>
#endif
#include <sys/syslog.h>
#include <net/radix.h>
#endif
#include "opt_mpath.h"
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
static int rn_walktree_from(struct radix_node_head *h, void *a, void *m,
walktree_f_t *f, void *w);
static int rn_walktree(struct radix_node_head *, walktree_f_t *, void *);
static struct radix_node
*rn_insert(void *, struct radix_node_head *, int *,
struct radix_node [2]),
*rn_newpair(void *, int, struct radix_node[2]),
*rn_search(void *, struct radix_node *),
*rn_search_m(void *, struct radix_node *, void *);
static int max_keylen;
static struct radix_mask *rn_mkfreelist;
static struct radix_node_head *mask_rnhead;
/*
* Work area -- the following point to 3 buffers of size max_keylen,
* allocated in this order in a block of memory malloc'ed by rn_init.
*/
static char *rn_zeros, *rn_ones, *addmask_key;
#define MKGet(m) { \
if (rn_mkfreelist) { \
m = rn_mkfreelist; \
rn_mkfreelist = (m)->rm_mklist; \
} else \
R_Malloc(m, struct radix_mask *, sizeof (struct radix_mask)); }
#define MKFree(m) { (m)->rm_mklist = rn_mkfreelist; rn_mkfreelist = (m);}
#define rn_masktop (mask_rnhead->rnh_treetop)
static int rn_lexobetter(void *m_arg, void *n_arg);
static struct radix_mask *
rn_new_radix_mask(struct radix_node *tt,
struct radix_mask *next);
static int rn_satisfies_leaf(char *trial, struct radix_node *leaf,
int skip);
/*
* The data structure for the keys is a radix tree with one way
* branching removed. The index rn_bit at an internal node n represents a bit
* position to be tested. The tree is arranged so that all descendants
* of a node n have keys whose bits all agree up to position rn_bit - 1.
* (We say the index of n is rn_bit.)
*
* There is at least one descendant which has a one bit at position rn_bit,
* and at least one with a zero there.
*
* A route is determined by a pair of key and mask. We require that the
* bit-wise logical and of the key and mask to be the key.
* We define the index of a route to associated with the mask to be
* the first bit number in the mask where 0 occurs (with bit number 0
* representing the highest order bit).
*
* We say a mask is normal if every bit is 0, past the index of the mask.
* If a node n has a descendant (k, m) with index(m) == index(n) == rn_bit,
* and m is a normal mask, then the route applies to every descendant of n.
* If the index(m) < rn_bit, this implies the trailing last few bits of k
* before bit b are all 0, (and hence consequently true of every descendant
* of n), so the route applies to all descendants of the node as well.
*
* Similar logic shows that a non-normal mask m such that
* index(m) <= index(n) could potentially apply to many children of n.
* Thus, for each non-host route, we attach its mask to a list at an internal
* node as high in the tree as we can go.
*
* The present version of the code makes use of normal routes in short-
* circuiting an explict mask and compare operation when testing whether
* a key satisfies a normal route, and also in remembering the unique leaf
* that governs a subtree.
*/
/*
* Most of the functions in this code assume that the key/mask arguments
* are sockaddr-like structures, where the first byte is an u_char
* indicating the size of the entire structure.
*
* To make the assumption more explicit, we use the LEN() macro to access
* this field. It is safe to pass an expression with side effects
* to LEN() as the argument is evaluated only once.
*/
#define LEN(x) (*(const u_char *)(x))
/*
* XXX THIS NEEDS TO BE FIXED
* In the code, pointers to keys and masks are passed as either
* 'void *' (because callers use to pass pointers of various kinds), or
* 'caddr_t' (which is fine for pointer arithmetics, but not very
* clean when you dereference it to access data). Furthermore, caddr_t
* is really 'char *', while the natural type to operate on keys and
* masks would be 'u_char'. This mismatch require a lot of casts and
* intermediate variables to adapt types that clutter the code.
*/
/*
* Search a node in the tree matching the key.
*/
static struct radix_node *
rn_search(v_arg, head)
void *v_arg;
struct radix_node *head;
{
register struct radix_node *x;
register caddr_t v;
for (x = head, v = v_arg; x->rn_bit >= 0;) {
if (x->rn_bmask & v[x->rn_offset])
x = x->rn_right;
else
x = x->rn_left;
}
return (x);
}
/*
* Same as above, but with an additional mask.
* XXX note this function is used only once.
*/
static struct radix_node *
rn_search_m(v_arg, head, m_arg)
struct radix_node *head;
void *v_arg, *m_arg;
{
register struct radix_node *x;
register caddr_t v = v_arg, m = m_arg;
for (x = head; x->rn_bit >= 0;) {
if ((x->rn_bmask & m[x->rn_offset]) &&
(x->rn_bmask & v[x->rn_offset]))
x = x->rn_right;
else
x = x->rn_left;
}
return x;
}
int
rn_refines(m_arg, n_arg)
void *m_arg, *n_arg;
{
register caddr_t m = m_arg, n = n_arg;
register caddr_t lim, lim2 = lim = n + LEN(n);
int longer = LEN(n++) - (int)LEN(m++);
int masks_are_equal = 1;
if (longer > 0)
lim -= longer;
while (n < lim) {
if (*n & ~(*m))
return 0;
if (*n++ != *m++)
masks_are_equal = 0;
}
while (n < lim2)
if (*n++)
return 0;
if (masks_are_equal && (longer < 0))
for (lim2 = m - longer; m < lim2; )
if (*m++)
return 1;
return (!masks_are_equal);
}
struct radix_node *
rn_lookup(v_arg, m_arg, head)
void *v_arg, *m_arg;
struct radix_node_head *head;
{
register struct radix_node *x;
caddr_t netmask = 0;
if (m_arg) {
x = rn_addmask(m_arg, 1, head->rnh_treetop->rn_offset);
if (x == 0)
return (0);
netmask = x->rn_key;
}
x = rn_match(v_arg, head);
if (x && netmask) {
while (x && x->rn_mask != netmask)
x = x->rn_dupedkey;
}
return x;
}
static int
rn_satisfies_leaf(trial, leaf, skip)
char *trial;
register struct radix_node *leaf;
int skip;
{
register char *cp = trial, *cp2 = leaf->rn_key, *cp3 = leaf->rn_mask;
char *cplim;
int length = min(LEN(cp), LEN(cp2));
if (cp3 == 0)
cp3 = rn_ones;
else
length = min(length, *(u_char *)cp3);
cplim = cp + length; cp3 += skip; cp2 += skip;
for (cp += skip; cp < cplim; cp++, cp2++, cp3++)
if ((*cp ^ *cp2) & *cp3)
return 0;
return 1;
}
struct radix_node *
rn_match(v_arg, head)
void *v_arg;
struct radix_node_head *head;
{
caddr_t v = v_arg;
register struct radix_node *t = head->rnh_treetop, *x;
register caddr_t cp = v, cp2;
caddr_t cplim;
struct radix_node *saved_t, *top = t;
int off = t->rn_offset, vlen = LEN(cp), matched_off;
register int test, b, rn_bit;
/*
* Open code rn_search(v, top) to avoid overhead of extra
* subroutine call.
*/
for (; t->rn_bit >= 0; ) {
if (t->rn_bmask & cp[t->rn_offset])
t = t->rn_right;
else
t = t->rn_left;
}
/*
* See if we match exactly as a host destination
* or at least learn how many bits match, for normal mask finesse.
*
* It doesn't hurt us to limit how many bytes to check
* to the length of the mask, since if it matches we had a genuine
* match and the leaf we have is the most specific one anyway;
* if it didn't match with a shorter length it would fail
* with a long one. This wins big for class B&C netmasks which
* are probably the most common case...
*/
if (t->rn_mask)
vlen = *(u_char *)t->rn_mask;
cp += off; cp2 = t->rn_key + off; cplim = v + vlen;
for (; cp < cplim; cp++, cp2++)
if (*cp != *cp2)
goto on1;
/*
* This extra grot is in case we are explicitly asked
* to look up the default. Ugh!
*
* Never return the root node itself, it seems to cause a
* lot of confusion.
*/
if (t->rn_flags & RNF_ROOT)
t = t->rn_dupedkey;
return t;
on1:
test = (*cp ^ *cp2) & 0xff; /* find first bit that differs */
for (b = 7; (test >>= 1) > 0;)
b--;
matched_off = cp - v;
b += matched_off << 3;
rn_bit = -1 - b;
/*
* If there is a host route in a duped-key chain, it will be first.
*/
if ((saved_t = t)->rn_mask == 0)
t = t->rn_dupedkey;
for (; t; t = t->rn_dupedkey)
/*
* Even if we don't match exactly as a host,
* we may match if the leaf we wound up at is
* a route to a net.
*/
if (t->rn_flags & RNF_NORMAL) {
if (rn_bit <= t->rn_bit)
return t;
} else if (rn_satisfies_leaf(v, t, matched_off))
return t;
t = saved_t;
/* start searching up the tree */
do {
register struct radix_mask *m;
t = t->rn_parent;
m = t->rn_mklist;
/*
* If non-contiguous masks ever become important
* we can restore the masking and open coding of
* the search and satisfaction test and put the
* calculation of "off" back before the "do".
*/
while (m) {
if (m->rm_flags & RNF_NORMAL) {
if (rn_bit <= m->rm_bit)
return (m->rm_leaf);
} else {
off = min(t->rn_offset, matched_off);
x = rn_search_m(v, t, m->rm_mask);
while (x && x->rn_mask != m->rm_mask)
x = x->rn_dupedkey;
if (x && rn_satisfies_leaf(v, x, off))
return x;
}
m = m->rm_mklist;
}
} while (t != top);
return 0;
}
#ifdef RN_DEBUG
int rn_nodenum;
struct radix_node *rn_clist;
int rn_saveinfo;
int rn_debug = 1;
#endif
/*
* Whenever we add a new leaf to the tree, we also add a parent node,
* so we allocate them as an array of two elements: the first one must be
* the leaf (see RNTORT() in route.c), the second one is the parent.
* This routine initializes the relevant fields of the nodes, so that
* the leaf is the left child of the parent node, and both nodes have
* (almost) all all fields filled as appropriate.
* (XXX some fields are left unset, see the '#if 0' section).
* The function returns a pointer to the parent node.
*/
static struct radix_node *
rn_newpair(v, b, nodes)
void *v;
int b;
struct radix_node nodes[2];
{
register struct radix_node *tt = nodes, *t = tt + 1;
t->rn_bit = b;
t->rn_bmask = 0x80 >> (b & 7);
t->rn_left = tt;
t->rn_offset = b >> 3;
#if 0 /* XXX perhaps we should fill these fields as well. */
t->rn_parent = t->rn_right = NULL;
tt->rn_mask = NULL;
tt->rn_dupedkey = NULL;
tt->rn_bmask = 0;
#endif
tt->rn_bit = -1;
tt->rn_key = (caddr_t)v;
tt->rn_parent = t;
tt->rn_flags = t->rn_flags = RNF_ACTIVE;
tt->rn_mklist = t->rn_mklist = 0;
#ifdef RN_DEBUG
tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
tt->rn_twin = t;
tt->rn_ybro = rn_clist;
rn_clist = tt;
#endif
return t;
}
static struct radix_node *
rn_insert(v_arg, head, dupentry, nodes)
void *v_arg;
struct radix_node_head *head;
int *dupentry;
struct radix_node nodes[2];
{
caddr_t v = v_arg;
struct radix_node *top = head->rnh_treetop;
int head_off = top->rn_offset, vlen = (int)LEN(v);
register struct radix_node *t = rn_search(v_arg, top);
register caddr_t cp = v + head_off;
register int b;
struct radix_node *tt;
/*
* Find first bit at which v and t->rn_key differ
*/
{
register caddr_t cp2 = t->rn_key + head_off;
register int cmp_res;
caddr_t cplim = v + vlen;
while (cp < cplim)
if (*cp2++ != *cp++)
goto on1;
*dupentry = 1;
return t;
on1:
*dupentry = 0;
cmp_res = (cp[-1] ^ cp2[-1]) & 0xff;
for (b = (cp - v) << 3; cmp_res; b--)
cmp_res >>= 1;
}
{
register struct radix_node *p, *x = top;
cp = v;
do {
p = x;
if (cp[x->rn_offset] & x->rn_bmask)
x = x->rn_right;
else
x = x->rn_left;
} while (b > (unsigned) x->rn_bit);
/* x->rn_bit < b && x->rn_bit >= 0 */
#ifdef RN_DEBUG
if (rn_debug)
log(LOG_DEBUG, "rn_insert: Going In:\n"), traverse(p);
#endif
t = rn_newpair(v_arg, b, nodes);
tt = t->rn_left;
if ((cp[p->rn_offset] & p->rn_bmask) == 0)
p->rn_left = t;
else
p->rn_right = t;
x->rn_parent = t;
t->rn_parent = p; /* frees x, p as temp vars below */
if ((cp[t->rn_offset] & t->rn_bmask) == 0) {
t->rn_right = x;
} else {
t->rn_right = tt;
t->rn_left = x;
}
#ifdef RN_DEBUG
if (rn_debug)
log(LOG_DEBUG, "rn_insert: Coming Out:\n"), traverse(p);
#endif
}
return (tt);
}
struct radix_node *
rn_addmask(n_arg, search, skip)
int search, skip;
void *n_arg;
{
caddr_t netmask = (caddr_t)n_arg;
register struct radix_node *x;
register caddr_t cp, cplim;
register int b = 0, mlen, j;
int maskduplicated, m0, isnormal;
struct radix_node *saved_x;
static int last_zeroed = 0;
if ((mlen = LEN(netmask)) > max_keylen)
mlen = max_keylen;
if (skip == 0)
skip = 1;
if (mlen <= skip)
return (mask_rnhead->rnh_nodes);
if (skip > 1)
bcopy(rn_ones + 1, addmask_key + 1, skip - 1);
if ((m0 = mlen) > skip)
bcopy(netmask + skip, addmask_key + skip, mlen - skip);
/*
* Trim trailing zeroes.
*/
for (cp = addmask_key + mlen; (cp > addmask_key) && cp[-1] == 0;)
cp--;
mlen = cp - addmask_key;
if (mlen <= skip) {
if (m0 >= last_zeroed)
last_zeroed = mlen;
return (mask_rnhead->rnh_nodes);
}
if (m0 < last_zeroed)
bzero(addmask_key + m0, last_zeroed - m0);
*addmask_key = last_zeroed = mlen;
x = rn_search(addmask_key, rn_masktop);
if (bcmp(addmask_key, x->rn_key, mlen) != 0)
x = 0;
if (x || search)
return (x);
R_Zalloc(x, struct radix_node *, max_keylen + 2 * sizeof (*x));
if ((saved_x = x) == 0)
return (0);
netmask = cp = (caddr_t)(x + 2);
bcopy(addmask_key, cp, mlen);
x = rn_insert(cp, mask_rnhead, &maskduplicated, x);
if (maskduplicated) {
log(LOG_ERR, "rn_addmask: mask impossibly already in tree");
Free(saved_x);
return (x);
}
/*
* Calculate index of mask, and check for normalcy.
* First find the first byte with a 0 bit, then if there are
* more bits left (remember we already trimmed the trailing 0's),
* the pattern must be one of those in normal_chars[], or we have
* a non-contiguous mask.
*/
cplim = netmask + mlen;
isnormal = 1;
for (cp = netmask + skip; (cp < cplim) && *(u_char *)cp == 0xff;)
cp++;
if (cp != cplim) {
static char normal_chars[] = {
0, 0x80, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe, 0xff};
for (j = 0x80; (j & *cp) != 0; j >>= 1)
b++;
if (*cp != normal_chars[b] || cp != (cplim - 1))
isnormal = 0;
}
b += (cp - netmask) << 3;
x->rn_bit = -1 - b;
if (isnormal)
x->rn_flags |= RNF_NORMAL;
return (x);
}
static int /* XXX: arbitrary ordering for non-contiguous masks */
rn_lexobetter(m_arg, n_arg)
void *m_arg, *n_arg;
{
register u_char *mp = m_arg, *np = n_arg, *lim;
if (LEN(mp) > LEN(np))
return 1; /* not really, but need to check longer one first */
if (LEN(mp) == LEN(np))
for (lim = mp + LEN(mp); mp < lim;)
if (*mp++ > *np++)
return 1;
return 0;
}
static struct radix_mask *
rn_new_radix_mask(tt, next)
register struct radix_node *tt;
register struct radix_mask *next;
{
register struct radix_mask *m;
MKGet(m);
if (m == 0) {
log(LOG_ERR, "Mask for route not entered\n");
return (0);
}
bzero(m, sizeof *m);
m->rm_bit = tt->rn_bit;
m->rm_flags = tt->rn_flags;
if (tt->rn_flags & RNF_NORMAL)
m->rm_leaf = tt;
else
m->rm_mask = tt->rn_mask;
m->rm_mklist = next;
tt->rn_mklist = m;
return m;
}
struct radix_node *
rn_addroute(v_arg, n_arg, head, treenodes)
void *v_arg, *n_arg;
struct radix_node_head *head;
struct radix_node treenodes[2];
{
caddr_t v = (caddr_t)v_arg, netmask = (caddr_t)n_arg;
register struct radix_node *t, *x = 0, *tt;
struct radix_node *saved_tt, *top = head->rnh_treetop;
short b = 0, b_leaf = 0;
int keyduplicated;
caddr_t mmask;
struct radix_mask *m, **mp;
/*
* In dealing with non-contiguous masks, there may be
* many different routes which have the same mask.
* We will find it useful to have a unique pointer to
* the mask to speed avoiding duplicate references at
* nodes and possibly save time in calculating indices.
*/
if (netmask) {
if ((x = rn_addmask(netmask, 0, top->rn_offset)) == 0)
return (0);
b_leaf = x->rn_bit;
b = -1 - x->rn_bit;
netmask = x->rn_key;
}
/*
* Deal with duplicated keys: attach node to previous instance
*/
saved_tt = tt = rn_insert(v, head, &keyduplicated, treenodes);
if (keyduplicated) {
for (t = tt; tt; t = tt, tt = tt->rn_dupedkey) {
#ifdef RADIX_MPATH
/* permit multipath, if enabled for the family */
if (rn_mpath_capable(head) && netmask == tt->rn_mask) {
/*
* go down to the end of multipaths, so that
* new entry goes into the end of rn_dupedkey
* chain.
*/
do {
t = tt;
tt = tt->rn_dupedkey;
} while (tt && t->rn_mask == tt->rn_mask);
break;
}
#endif
if (tt->rn_mask == netmask)
return (0);
if (netmask == 0 ||
(tt->rn_mask &&
((b_leaf < tt->rn_bit) /* index(netmask) > node */
|| rn_refines(netmask, tt->rn_mask)
|| rn_lexobetter(netmask, tt->rn_mask))))
break;
}
/*
* If the mask is not duplicated, we wouldn't
* find it among possible duplicate key entries
* anyway, so the above test doesn't hurt.
*
* We sort the masks for a duplicated key the same way as
* in a masklist -- most specific to least specific.
* This may require the unfortunate nuisance of relocating
* the head of the list.
*
* We also reverse, or doubly link the list through the
* parent pointer.
*/
if (tt == saved_tt) {
struct radix_node *xx = x;
/* link in at head of list */
(tt = treenodes)->rn_dupedkey = t;
tt->rn_flags = t->rn_flags;
tt->rn_parent = x = t->rn_parent;
t->rn_parent = tt; /* parent */
if (x->rn_left == t)
x->rn_left = tt;
else
x->rn_right = tt;
saved_tt = tt; x = xx;
} else {
(tt = treenodes)->rn_dupedkey = t->rn_dupedkey;
t->rn_dupedkey = tt;
tt->rn_parent = t; /* parent */
if (tt->rn_dupedkey) /* parent */
tt->rn_dupedkey->rn_parent = tt; /* parent */
}
#ifdef RN_DEBUG
t=tt+1; tt->rn_info = rn_nodenum++; t->rn_info = rn_nodenum++;
tt->rn_twin = t; tt->rn_ybro = rn_clist; rn_clist = tt;
#endif
tt->rn_key = (caddr_t) v;
tt->rn_bit = -1;
tt->rn_flags = RNF_ACTIVE;
}
/*
* Put mask in tree.
*/
if (netmask) {
tt->rn_mask = netmask;
tt->rn_bit = x->rn_bit;
tt->rn_flags |= x->rn_flags & RNF_NORMAL;
}
t = saved_tt->rn_parent;
if (keyduplicated)
goto on2;
b_leaf = -1 - t->rn_bit;
if (t->rn_right == saved_tt)
x = t->rn_left;
else
x = t->rn_right;
/* Promote general routes from below */
if (x->rn_bit < 0) {
for (mp = &t->rn_mklist; x; x = x->rn_dupedkey)
if (x->rn_mask && (x->rn_bit >= b_leaf) && x->rn_mklist == 0) {
*mp = m = rn_new_radix_mask(x, 0);
if (m)
mp = &m->rm_mklist;
}
} else if (x->rn_mklist) {
/*
* Skip over masks whose index is > that of new node
*/
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m->rm_bit >= b_leaf)
break;
t->rn_mklist = m; *mp = 0;
}
on2:
/* Add new route to highest possible ancestor's list */
if ((netmask == 0) || (b > t->rn_bit ))
return tt; /* can't lift at all */
b_leaf = tt->rn_bit;
do {
x = t;
t = t->rn_parent;
} while (b <= t->rn_bit && x != top);
/*
* Search through routes associated with node to
* insert new route according to index.
* Need same criteria as when sorting dupedkeys to avoid
* double loop on deletion.
*/
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist) {
if (m->rm_bit < b_leaf)
continue;
if (m->rm_bit > b_leaf)
break;
if (m->rm_flags & RNF_NORMAL) {
mmask = m->rm_leaf->rn_mask;
if (tt->rn_flags & RNF_NORMAL) {
log(LOG_ERR,
"Non-unique normal route, mask not entered\n");
return tt;
}
} else
mmask = m->rm_mask;
if (mmask == netmask) {
m->rm_refs++;
tt->rn_mklist = m;
return tt;
}
if (rn_refines(netmask, mmask)
|| rn_lexobetter(netmask, mmask))
break;
}
*mp = rn_new_radix_mask(tt, *mp);
return tt;
}
struct radix_node *
rn_delete(v_arg, netmask_arg, head)
void *v_arg, *netmask_arg;
struct radix_node_head *head;
{
register struct radix_node *t, *p, *x, *tt;
struct radix_mask *m, *saved_m, **mp;
struct radix_node *dupedkey, *saved_tt, *top;
caddr_t v, netmask;
int b, head_off, vlen;
v = v_arg;
netmask = netmask_arg;
x = head->rnh_treetop;
tt = rn_search(v, x);
head_off = x->rn_offset;
vlen = LEN(v);
saved_tt = tt;
top = x;
if (tt == 0 ||
bcmp(v + head_off, tt->rn_key + head_off, vlen - head_off))
return (0);
/*
* Delete our route from mask lists.
*/
if (netmask) {
if ((x = rn_addmask(netmask, 1, head_off)) == 0)
return (0);
netmask = x->rn_key;
while (tt->rn_mask != netmask)
if ((tt = tt->rn_dupedkey) == 0)
return (0);
}
if (tt->rn_mask == 0 || (saved_m = m = tt->rn_mklist) == 0)
goto on1;
if (tt->rn_flags & RNF_NORMAL) {
if (m->rm_leaf != tt || m->rm_refs > 0) {
log(LOG_ERR, "rn_delete: inconsistent annotation\n");
return 0; /* dangling ref could cause disaster */
}
} else {
if (m->rm_mask != tt->rn_mask) {
log(LOG_ERR, "rn_delete: inconsistent annotation\n");
goto on1;
}
if (--m->rm_refs >= 0)
goto on1;
}
b = -1 - tt->rn_bit;
t = saved_tt->rn_parent;
if (b > t->rn_bit)
goto on1; /* Wasn't lifted at all */
do {
x = t;
t = t->rn_parent;
} while (b <= t->rn_bit && x != top);
for (mp = &x->rn_mklist; (m = *mp); mp = &m->rm_mklist)
if (m == saved_m) {
*mp = m->rm_mklist;
MKFree(m);
break;
}
if (m == 0) {
log(LOG_ERR, "rn_delete: couldn't find our annotation\n");
if (tt->rn_flags & RNF_NORMAL)
return (0); /* Dangling ref to us */
}
on1:
/*
* Eliminate us from tree
*/
if (tt->rn_flags & RNF_ROOT)
return (0);
#ifdef RN_DEBUG
/* Get us out of the creation list */
for (t = rn_clist; t && t->rn_ybro != tt; t = t->rn_ybro) {}
if (t) t->rn_ybro = tt->rn_ybro;
#endif
t = tt->rn_parent;
dupedkey = saved_tt->rn_dupedkey;
if (dupedkey) {
/*
* Here, tt is the deletion target and
* saved_tt is the head of the dupekey chain.
*/
if (tt == saved_tt) {
/* remove from head of chain */
x = dupedkey; x->rn_parent = t;
if (t->rn_left == tt)
t->rn_left = x;
else
t->rn_right = x;
} else {
/* find node in front of tt on the chain */
for (x = p = saved_tt; p && p->rn_dupedkey != tt;)
p = p->rn_dupedkey;
if (p) {
p->rn_dupedkey = tt->rn_dupedkey;
if (tt->rn_dupedkey) /* parent */
tt->rn_dupedkey->rn_parent = p;
/* parent */
} else log(LOG_ERR, "rn_delete: couldn't find us\n");
}
t = tt + 1;
if (t->rn_flags & RNF_ACTIVE) {
#ifndef RN_DEBUG
*++x = *t;
p = t->rn_parent;
#else
b = t->rn_info;
*++x = *t;
t->rn_info = b;
p = t->rn_parent;
#endif
if (p->rn_left == t)
p->rn_left = x;
else
p->rn_right = x;
x->rn_left->rn_parent = x;
x->rn_right->rn_parent = x;
}
goto out;
}
if (t->rn_left == tt)
x = t->rn_right;
else
x = t->rn_left;
p = t->rn_parent;
if (p->rn_right == t)
p->rn_right = x;
else
p->rn_left = x;
x->rn_parent = p;
/*
* Demote routes attached to us.
*/
if (t->rn_mklist) {
if (x->rn_bit >= 0) {
for (mp = &x->rn_mklist; (m = *mp);)
mp = &m->rm_mklist;
*mp = t->rn_mklist;
} else {
/* If there are any key,mask pairs in a sibling
duped-key chain, some subset will appear sorted
in the same order attached to our mklist */
for (m = t->rn_mklist; m && x; x = x->rn_dupedkey)
if (m == x->rn_mklist) {
struct radix_mask *mm = m->rm_mklist;
x->rn_mklist = 0;
if (--(m->rm_refs) < 0)
MKFree(m);
m = mm;
}
if (m)
log(LOG_ERR,
"rn_delete: Orphaned Mask %p at %p\n",
(void *)m, (void *)x);
}
}
/*
* We may be holding an active internal node in the tree.
*/
x = tt + 1;
if (t != x) {
#ifndef RN_DEBUG
*t = *x;
#else
b = t->rn_info;
*t = *x;
t->rn_info = b;
#endif
t->rn_left->rn_parent = t;
t->rn_right->rn_parent = t;
p = x->rn_parent;
if (p->rn_left == x)
p->rn_left = t;
else
p->rn_right = t;
}
out:
tt->rn_flags &= ~RNF_ACTIVE;
tt[1].rn_flags &= ~RNF_ACTIVE;
return (tt);
}
/*
* This is the same as rn_walktree() except for the parameters and the
* exit.
*/
static int
rn_walktree_from(h, a, m, f, w)
struct radix_node_head *h;
void *a, *m;
walktree_f_t *f;
void *w;
{
int error;
struct radix_node *base, *next;
u_char *xa = (u_char *)a;
u_char *xm = (u_char *)m;
register struct radix_node *rn, *last = 0 /* shut up gcc */;
int stopping = 0;
int lastb;
/*
* rn_search_m is sort-of-open-coded here. We cannot use the
* function because we need to keep track of the last node seen.
*/
/* printf("about to search\n"); */
for (rn = h->rnh_treetop; rn->rn_bit >= 0; ) {
last = rn;
/* printf("rn_bit %d, rn_bmask %x, xm[rn_offset] %x\n",
rn->rn_bit, rn->rn_bmask, xm[rn->rn_offset]); */
if (!(rn->rn_bmask & xm[rn->rn_offset])) {
break;
}
if (rn->rn_bmask & xa[rn->rn_offset]) {
rn = rn->rn_right;
} else {
rn = rn->rn_left;
}
}
/* printf("done searching\n"); */
/*
* Two cases: either we stepped off the end of our mask,
* in which case last == rn, or we reached a leaf, in which
* case we want to start from the last node we looked at.
* Either way, last is the node we want to start from.
*/
rn = last;
lastb = rn->rn_bit;
/* printf("rn %p, lastb %d\n", rn, lastb);*/
/*
* This gets complicated because we may delete the node
* while applying the function f to it, so we need to calculate
* the successor node in advance.
*/
while (rn->rn_bit >= 0)
rn = rn->rn_left;
while (!stopping) {
/* printf("node %p (%d)\n", rn, rn->rn_bit); */
base = rn;
/* If at right child go back up, otherwise, go right */
while (rn->rn_parent->rn_right == rn
&& !(rn->rn_flags & RNF_ROOT)) {
rn = rn->rn_parent;
/* if went up beyond last, stop */
if (rn->rn_bit <= lastb) {
stopping = 1;
/* printf("up too far\n"); */
/*
* XXX we should jump to the 'Process leaves'
* part, because the values of 'rn' and 'next'
* we compute will not be used. Not a big deal
* because this loop will terminate, but it is
* inefficient and hard to understand!
*/
}
}
/*
* At the top of the tree, no need to traverse the right
* half, prevent the traversal of the entire tree in the
* case of default route.
*/
if (rn->rn_parent->rn_flags & RNF_ROOT)
stopping = 1;
/* Find the next *leaf* since next node might vanish, too */
for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
rn = rn->rn_left;
next = rn;
/* Process leaves */
while ((rn = base) != 0) {
base = rn->rn_dupedkey;
/* printf("leaf %p\n", rn); */
if (!(rn->rn_flags & RNF_ROOT)
&& (error = (*f)(rn, w)))
return (error);
}
rn = next;
if (rn->rn_flags & RNF_ROOT) {
/* printf("root, stopping"); */
stopping = 1;
}
}
return 0;
}
static int
rn_walktree(h, f, w)
struct radix_node_head *h;
walktree_f_t *f;
void *w;
{
int error;
struct radix_node *base, *next;
register struct radix_node *rn = h->rnh_treetop;
/*
* This gets complicated because we may delete the node
* while applying the function f to it, so we need to calculate
* the successor node in advance.
*/
/* First time through node, go left */
while (rn->rn_bit >= 0)
rn = rn->rn_left;
for (;;) {
base = rn;
/* If at right child go back up, otherwise, go right */
while (rn->rn_parent->rn_right == rn
&& (rn->rn_flags & RNF_ROOT) == 0)
rn = rn->rn_parent;
/* Find the next *leaf* since next node might vanish, too */
for (rn = rn->rn_parent->rn_right; rn->rn_bit >= 0;)
rn = rn->rn_left;
next = rn;
/* Process leaves */
while ((rn = base)) {
base = rn->rn_dupedkey;
if (!(rn->rn_flags & RNF_ROOT)
&& (error = (*f)(rn, w)))
return (error);
}
rn = next;
if (rn->rn_flags & RNF_ROOT)
return (0);
}
/* NOTREACHED */
}
/*
* Allocate and initialize an empty tree. This has 3 nodes, which are
* part of the radix_node_head (in the order <left,root,right>) and are
* marked RNF_ROOT so they cannot be freed.
* The leaves have all-zero and all-one keys, with significant
* bits starting at 'off'.
* Return 1 on success, 0 on error.
*/
int
rn_inithead(head, off)
void **head;
int off;
{
register struct radix_node_head *rnh;
register struct radix_node *t, *tt, *ttt;
if (*head)
return (1);
R_Zalloc(rnh, struct radix_node_head *, sizeof (*rnh));
if (rnh == 0)
return (0);
#ifdef _KERNEL
RADIX_NODE_HEAD_LOCK_INIT(rnh);
#endif
*head = rnh;
t = rn_newpair(rn_zeros, off, rnh->rnh_nodes);
ttt = rnh->rnh_nodes + 2;
t->rn_right = ttt;
t->rn_parent = t;
tt = t->rn_left; /* ... which in turn is rnh->rnh_nodes */
tt->rn_flags = t->rn_flags = RNF_ROOT | RNF_ACTIVE;
tt->rn_bit = -1 - off;
*ttt = *tt;
ttt->rn_key = rn_ones;
rnh->rnh_addaddr = rn_addroute;
rnh->rnh_deladdr = rn_delete;
rnh->rnh_matchaddr = rn_match;
rnh->rnh_lookup = rn_lookup;
rnh->rnh_walktree = rn_walktree;
rnh->rnh_walktree_from = rn_walktree_from;
rnh->rnh_treetop = t;
return (1);
}
void
rn_init()
{
char *cp, *cplim;
#ifdef _KERNEL
struct domain *dom;
for (dom = domains; dom; dom = dom->dom_next)
if (dom->dom_maxrtkey > max_keylen)
max_keylen = dom->dom_maxrtkey;
#endif
if (max_keylen == 0) {
log(LOG_ERR,
"rn_init: radix functions require max_keylen be set\n");
return;
}
R_Malloc(rn_zeros, char *, 3 * max_keylen);
if (rn_zeros == NULL)
panic("rn_init");
bzero(rn_zeros, 3 * max_keylen);
rn_ones = cp = rn_zeros + max_keylen;
addmask_key = cplim = rn_ones + max_keylen;
while (cp < cplim)
*cp++ = -1;
if (rn_inithead((void **)(void *)&mask_rnhead, 0) == 0)
panic("rn_init 2");
}