freebsd-skq/sys/net/radix.h
Qing Li e440aed958 This patch provides the back end support for equal-cost multi-path
(ECMP) for both IPv4 and IPv6. Previously, multipath route insertion
is disallowed. For example,

	route add -net 192.103.54.0/24 10.9.44.1
	route add -net 192.103.54.0/24 10.9.44.2

The second route insertion will trigger an error message of
"add net 192.103.54.0/24: gateway 10.2.5.2: route already in table"

Multiple default routes can also be inserted. Here is the netstat
output:

default		10.2.5.1	UGS	0	3074	bge0 =>
default		10.2.5.2	UGS	0	0	bge0

When multipath routes exist, the "route delete" command requires
a specific gateway to be specified or else an error message would
be displayed. For example,

	route delete default

would fail and trigger the following error message:

"route: writing to routing socket: No such process"
"delete net default: not in table"

On the other hand,

	route delete default 10.2.5.2

would be successful: "delete net default: gateway 10.2.5.2"

One does not have to specify a gateway if there is only a single
route for a particular destination.

I need to perform more testings on address aliases and multiple
interfaces that have the same IP prefixes. This patch as it
stands today is not yet ready for prime time. Therefore, the ECMP
code fragments are fully guarded by the RADIX_MPATH macro.
Include the "options  RADIX_MPATH" in the kernel configuration
to enable this feature.

Reviewed by:	robert, sam, gnn, julian, kmacy
2008-04-13 05:45:14 +00:00

169 lines
6.3 KiB
C

/*-
* Copyright (c) 1988, 1989, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 4. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* @(#)radix.h 8.2 (Berkeley) 10/31/94
* $FreeBSD$
*/
#ifndef _RADIX_H_
#define _RADIX_H_
#ifdef _KERNEL
#include <sys/_lock.h>
#include <sys/_mutex.h>
#endif
#ifdef MALLOC_DECLARE
MALLOC_DECLARE(M_RTABLE);
#endif
/*
* Radix search tree node layout.
*/
struct radix_node {
struct radix_mask *rn_mklist; /* list of masks contained in subtree */
struct radix_node *rn_parent; /* parent */
short rn_bit; /* bit offset; -1-index(netmask) */
char rn_bmask; /* node: mask for bit test*/
u_char rn_flags; /* enumerated next */
#define RNF_NORMAL 1 /* leaf contains normal route */
#define RNF_ROOT 2 /* leaf is root leaf for tree */
#define RNF_ACTIVE 4 /* This node is alive (for rtfree) */
union {
struct { /* leaf only data: */
caddr_t rn_Key; /* object of search */
caddr_t rn_Mask; /* netmask, if present */
struct radix_node *rn_Dupedkey;
} rn_leaf;
struct { /* node only data: */
int rn_Off; /* where to start compare */
struct radix_node *rn_L;/* progeny */
struct radix_node *rn_R;/* progeny */
} rn_node;
} rn_u;
#ifdef RN_DEBUG
int rn_info;
struct radix_node *rn_twin;
struct radix_node *rn_ybro;
#endif
};
#define rn_dupedkey rn_u.rn_leaf.rn_Dupedkey
#define rn_key rn_u.rn_leaf.rn_Key
#define rn_mask rn_u.rn_leaf.rn_Mask
#define rn_offset rn_u.rn_node.rn_Off
#define rn_left rn_u.rn_node.rn_L
#define rn_right rn_u.rn_node.rn_R
/*
* Annotations to tree concerning potential routes applying to subtrees.
*/
struct radix_mask {
short rm_bit; /* bit offset; -1-index(netmask) */
char rm_unused; /* cf. rn_bmask */
u_char rm_flags; /* cf. rn_flags */
struct radix_mask *rm_mklist; /* more masks to try */
union {
caddr_t rmu_mask; /* the mask */
struct radix_node *rmu_leaf; /* for normal routes */
} rm_rmu;
int rm_refs; /* # of references to this struct */
};
#define rm_mask rm_rmu.rmu_mask
#define rm_leaf rm_rmu.rmu_leaf /* extra field would make 32 bytes */
typedef int walktree_f_t(struct radix_node *, void *);
struct radix_node_head {
struct radix_node *rnh_treetop;
int rnh_addrsize; /* permit, but not require fixed keys */
int rnh_pktsize; /* permit, but not require fixed keys */
struct radix_node *(*rnh_addaddr) /* add based on sockaddr */
(void *v, void *mask,
struct radix_node_head *head, struct radix_node nodes[]);
struct radix_node *(*rnh_addpkt) /* add based on packet hdr */
(void *v, void *mask,
struct radix_node_head *head, struct radix_node nodes[]);
struct radix_node *(*rnh_deladdr) /* remove based on sockaddr */
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_delpkt) /* remove based on packet hdr */
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchaddr) /* locate based on sockaddr */
(void *v, struct radix_node_head *head);
struct radix_node *(*rnh_lookup) /* locate based on sockaddr */
(void *v, void *mask, struct radix_node_head *head);
struct radix_node *(*rnh_matchpkt) /* locate based on packet hdr */
(void *v, struct radix_node_head *head);
int (*rnh_walktree) /* traverse tree */
(struct radix_node_head *head, walktree_f_t *f, void *w);
int (*rnh_walktree_from) /* traverse tree below a */
(struct radix_node_head *head, void *a, void *m,
walktree_f_t *f, void *w);
void (*rnh_close) /* do something when the last ref drops */
(struct radix_node *rn, struct radix_node_head *head);
struct radix_node rnh_nodes[3]; /* empty tree for common case */
int rnh_multipath; /* multipath capable ? */
#ifdef _KERNEL
struct mtx rnh_mtx; /* locks entire radix tree */
#endif
};
#ifndef _KERNEL
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned int)(n)))
#define R_Zalloc(p, t, n) (p = (t) calloc(1,(unsigned int)(n)))
#define Free(p) free((char *)p);
#else
#define R_Malloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT))
#define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO))
#define Free(p) free((caddr_t)p, M_RTABLE);
#define RADIX_NODE_HEAD_LOCK_INIT(rnh) \
mtx_init(&(rnh)->rnh_mtx, "radix node head", NULL, MTX_DEF | MTX_RECURSE)
#define RADIX_NODE_HEAD_LOCK(rnh) mtx_lock(&(rnh)->rnh_mtx)
#define RADIX_NODE_HEAD_UNLOCK(rnh) mtx_unlock(&(rnh)->rnh_mtx)
#define RADIX_NODE_HEAD_DESTROY(rnh) mtx_destroy(&(rnh)->rnh_mtx)
#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) mtx_assert(&(rnh)->rnh_mtx, MA_OWNED)
#endif /* _KERNEL */
void rn_init(void);
int rn_inithead(void **, int);
int rn_refines(void *, void *);
struct radix_node
*rn_addmask(void *, int, int),
*rn_addroute (void *, void *, struct radix_node_head *,
struct radix_node [2]),
*rn_delete(void *, void *, struct radix_node_head *),
*rn_lookup (void *v_arg, void *m_arg,
struct radix_node_head *head),
*rn_match(void *, struct radix_node_head *);
#endif /* _RADIX_H_ */