Convert the two dimensional array to be malloced and introduce

an accessor function to get the correct rnh pointer back.

Update netstat to get the correct pointer using kvm_read()
as well.

This not only fixes the ABI problem depending on the kernel
option but also permits the tunable to overwrite the kernel
option at boot time up to MAXFIBS, enlarging the number of
FIBs without having to recompile. So people could just use
GENERIC now.

Reviewed by:	julian, rwatson, zec
X-MFC:		not possible
This commit is contained in:
Bjoern A. Zeeb 2009-06-01 15:49:42 +00:00
parent b01c90a31a
commit c2c2a7c11e
13 changed files with 123 additions and 69 deletions

View File

@ -22,6 +22,12 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 8.x IS SLOW:
to maximize performance. (To disable malloc debugging, run
ln -s aj /etc/malloc.conf.)
20090601:
The way we are storing and accessing `routeing table' entries
has changed. Programs reading the FIB, like netstat, need to
be re-compiled.
Bump __FreeBSD_version to 800097.
20090530:
Remove the tunable/sysctl debug.mpsafevfs as its initial purpose
is no more valid.

View File

@ -1001,7 +1001,8 @@ if_detach_internal(struct ifnet *ifp, int vmove)
*/
for (i = 1; i <= AF_MAX; i++) {
for (j = 0; j < rt_numfibs; j++) {
if ((rnh = V_rt_tables[j][i]) == NULL)
rnh = rt_tables_get_rnh(j, i);
if (rnh == NULL)
continue;
RADIX_NODE_HEAD_LOCK(rnh);
(void) rnh->rnh_walktree(rnh, if_rtdel, ifp);

View File

@ -91,15 +91,7 @@ TUNABLE_INT("net.add_addr_allfibs", &rt_add_addr_allfibs);
#ifdef VIMAGE_GLOBALS
static struct rtstat rtstat;
/* by default only the first 'row' of tables will be accessed. */
/*
* XXXMRT When we fix netstat, and do this differnetly,
* we can allocate this dynamically. As long as we are keeping
* things backwards compaitble we need to allocate this
* statically.
*/
struct radix_node_head *rt_tables[RT_MAXFIBS][AF_MAX+1];
struct radix_node_head *rt_tables;
static int rttrash; /* routes not in table but not freed */
#endif
@ -158,6 +150,32 @@ sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
SYSCTL_PROC(_net, OID_AUTO, my_fibnum, CTLTYPE_INT|CTLFLAG_RD,
NULL, 0, &sysctl_my_fibnum, "I", "default FIB of caller");
static __inline struct radix_node_head **
rt_tables_get_rnh_ptr(int table, int fam)
{
INIT_VNET_NET(curvnet);
struct radix_node_head **rnh;
KASSERT(table >= 0 && table < rt_numfibs, ("%s: table out of bounds.",
__func__));
KASSERT(fam >= 0 && fam < (AF_MAX+1), ("%s: fam out of bounds.",
__func__));
/* rnh is [fib=0][af=0]. */
rnh = (struct radix_node_head **)V_rt_tables;
/* Get the offset to the requested table and fam. */
rnh += table * (AF_MAX+1) + fam;
return (rnh);
}
struct radix_node_head *
rt_tables_get_rnh(int table, int fam)
{
return (*rt_tables_get_rnh_ptr(table, fam));
}
static void
route_init(void)
{
@ -179,10 +197,14 @@ route_init(void)
static int vnet_route_iattach(const void *unused __unused)
{
INIT_VNET_NET(curvnet);
int table;
struct domain *dom;
struct radix_node_head **rnh;
int table;
int fam;
V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
sizeof(struct radix_node_head *), M_RTABLE, M_WAITOK|M_ZERO);
V_rtzone = uma_zcreate("rtentry", sizeof(struct rtentry), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, 0);
for (dom = domains; dom; dom = dom->dom_next) {
@ -198,8 +220,10 @@ static int vnet_route_iattach(const void *unused __unused)
* (only for AF_INET and AF_INET6
* which don't need it anyhow)
*/
dom->dom_rtattach(
(void **)&V_rt_tables[table][fam],
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
dom->dom_rtattach((void **)rnh,
dom->dom_rtoffset);
} else {
break;
@ -300,7 +324,7 @@ rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags,
KASSERT((fibnum < rt_numfibs), ("rtalloc1_fib: bad fibnum"));
if (dst->sa_family != AF_INET) /* Only INET supports > 1 fib now */
fibnum = 0;
rnh = V_rt_tables[fibnum][dst->sa_family];
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
newrt = NULL;
/*
* Look up the address in the table for that Address Family
@ -362,7 +386,7 @@ rtfree(struct rtentry *rt)
struct radix_node_head *rnh;
KASSERT(rt != NULL,("%s: NULL rt", __func__));
rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
KASSERT(rnh != NULL,("%s: NULL rnh", __func__));
RT_LOCK_ASSERT(rt);
@ -463,8 +487,13 @@ rtredirect_fib(struct sockaddr *dst,
short *stat = NULL;
struct rt_addrinfo info;
struct ifaddr *ifa;
struct radix_node_head *rnh =
V_rt_tables[fibnum][dst->sa_family];
struct radix_node_head *rnh;
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL) {
error = EAFNOSUPPORT;
goto out;
}
/* verify the gateway is directly reachable */
if ((ifa = ifa_ifwithnet(gateway)) == NULL) {
@ -774,7 +803,7 @@ rtexpunge(struct rtentry *rt)
/*
* Find the correct routing tree to use for this Address Family
*/
rnh = V_rt_tables[rt->rt_fibnum][rt_key(rt)->sa_family];
rnh = rt_tables_get_rnh(rt->rt_fibnum, rt_key(rt)->sa_family);
RT_LOCK_ASSERT(rt);
if (rnh == NULL)
return (EAFNOSUPPORT);
@ -942,7 +971,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
/*
* Find the correct routing tree to use for this Address Family
*/
rnh = V_rt_tables[fibnum][dst->sa_family];
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL)
return (EAFNOSUPPORT);
needlock = ((flags & RTF_RNH_LOCKED) == 0);
@ -1134,9 +1163,9 @@ rt_setgate(struct rtentry *rt, struct sockaddr *dst, struct sockaddr *gate)
/* XXX dst may be overwritten, can we move this to below */
int dlen = SA_SIZE(dst), glen = SA_SIZE(gate);
#ifdef INVARIANTS
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh =
V_rt_tables[rt->rt_fibnum][dst->sa_family];
struct radix_node_head *rnh;
rnh = rt_tables_get_rnh(rt->rt_fibnum, dst->sa_family);
#endif
RT_LOCK_ASSERT(rt);
@ -1203,7 +1232,6 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma
static inline int
rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
{
INIT_VNET_NET(curvnet);
struct sockaddr *dst;
struct sockaddr *netmask;
struct rtentry *rt = NULL;
@ -1273,7 +1301,8 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
* Look up an rtentry that is in the routing tree and
* contains the correct info.
*/
if ((rnh = V_rt_tables[fibnum][dst->sa_family]) == NULL)
rnh = rt_tables_get_rnh(fibnum, dst->sa_family);
if (rnh == NULL)
/* this table doesn't exist but others might */
continue;
RADIX_NODE_HEAD_LOCK(rnh);

View File

@ -373,7 +373,8 @@ struct rt_addrinfo {
} \
} while (0)
extern struct radix_node_head *rt_tables[][AF_MAX+1];
extern struct radix_node_head *rt_tables;
struct radix_node_head *rt_tables_get_rnh(int, int);
struct ifmultiaddr;

View File

@ -460,7 +460,6 @@ static int
route_output(struct mbuf *m, struct socket *so)
{
#define sa_equal(a1, a2) (bcmp((a1), (a2), (a1)->sa_len) == 0)
INIT_VNET_NET(so->so_vnet);
struct rt_msghdr *rtm = NULL;
struct rtentry *rt = NULL;
struct radix_node_head *rnh;
@ -561,7 +560,8 @@ route_output(struct mbuf *m, struct socket *so)
case RTM_GET:
case RTM_CHANGE:
case RTM_LOCK:
rnh = V_rt_tables[so->so_fibnum][info.rti_info[RTAX_DST]->sa_family];
rnh = rt_tables_get_rnh(so->so_fibnum,
info.rti_info[RTAX_DST]->sa_family);
if (rnh == NULL)
senderr(EAFNOSUPPORT);
RADIX_NODE_HEAD_RLOCK(rnh);
@ -1418,10 +1418,9 @@ sysctl_ifmalist(int af, struct walkarg *w)
static int
sysctl_rtsock(SYSCTL_HANDLER_ARGS)
{
INIT_VNET_NET(curvnet);
int *name = (int *)arg1;
u_int namelen = arg2;
struct radix_node_head *rnh;
struct radix_node_head *rnh = NULL; /* silence compiler. */
int i, lim, error = EINVAL;
u_char af;
struct walkarg w;
@ -1469,7 +1468,8 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
* take care of routing entries
*/
for (error = 0; error == 0 && i <= lim; i++)
if ((rnh = V_rt_tables[req->td->td_proc->p_fibnum][i]) != NULL) {
rnh = rt_tables_get_rnh(req->td->td_proc->p_fibnum, i);
if (rnh != NULL) {
RADIX_NODE_HEAD_LOCK(rnh);
error = rnh->rnh_walktree(rnh,
sysctl_dumpentry, &w);

View File

@ -45,7 +45,7 @@ struct vnet_net {
struct knlist _ifklist;
struct rtstat _rtstat;
struct radix_node_head *_rt_tables[RT_MAXFIBS][AF_MAX+1];
struct radix_node_head *_rt_tables;
int _rttrash;
uma_zone_t _rtzone;

View File

@ -251,14 +251,14 @@ static void
in_rtqtimo(void *rock)
{
CURVNET_SET((struct vnet *) rock);
INIT_VNET_NET(curvnet);
INIT_VNET_INET(curvnet);
int fibnum;
void *newrock;
struct timeval atv;
for (fibnum = 0; fibnum < rt_numfibs; fibnum++) {
if ((newrock = V_rt_tables[fibnum][AF_INET]) != NULL)
newrock = rt_tables_get_rnh(fibnum, AF_INET);
if (newrock != NULL)
in_rtqtimo_one(newrock);
}
atv.tv_usec = 0;
@ -324,10 +324,9 @@ in_rtqdrain(void)
VNET_LIST_RLOCK();
VNET_FOREACH(vnet_iter) {
CURVNET_SET(vnet_iter);
INIT_VNET_NET(vnet_iter);
for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
rnh = V_rt_tables[fibnum][AF_INET];
rnh = rt_tables_get_rnh(fibnum, AF_INET);
arg.found = arg.killed = 0;
arg.rnh = rnh;
arg.nextstop = 0;
@ -423,7 +422,6 @@ in_ifadownkill(struct radix_node *rn, void *xap)
int
in_ifadown(struct ifaddr *ifa, int delete)
{
INIT_VNET_NET(curvnet);
struct in_ifadown_arg arg;
struct radix_node_head *rnh;
int fibnum;
@ -432,7 +430,7 @@ in_ifadown(struct ifaddr *ifa, int delete)
return 1;
for ( fibnum = 0; fibnum < rt_numfibs; fibnum++) {
rnh = V_rt_tables[fibnum][AF_INET];
rnh = rt_tables_get_rnh(fibnum, AF_INET);
arg.ifa = ifa;
arg.del = delete;
RADIX_NODE_HEAD_LOCK(rnh);

View File

@ -777,11 +777,11 @@ in6_ifattach(struct ifnet *ifp, struct ifnet *altifp)
void
in6_ifdetach(struct ifnet *ifp)
{
INIT_VNET_NET(ifp->if_vnet);
INIT_VNET_INET(ifp->if_vnet);
INIT_VNET_INET6(ifp->if_vnet);
struct in6_ifaddr *ia, *oia;
struct ifaddr *ifa, *next;
struct radix_node_head *rnh;
struct rtentry *rt;
short rtflags;
struct sockaddr_in6 sin6;
@ -874,15 +874,16 @@ in6_ifdetach(struct ifnet *ifp)
/* XXX: should not fail */
return;
/* XXX grab lock first to avoid LOR */
if (V_rt_tables[0][AF_INET6] != NULL) {
RADIX_NODE_HEAD_LOCK(V_rt_tables[0][AF_INET6]);
rnh = rt_tables_get_rnh(0, AF_INET6);
if (rnh != NULL) {
RADIX_NODE_HEAD_LOCK(rnh);
rt = rtalloc1((struct sockaddr *)&sin6, 0, RTF_RNH_LOCKED);
if (rt) {
if (rt->rt_ifp == ifp)
rtexpunge(rt);
RTFREE_LOCKED(rt);
}
RADIX_NODE_HEAD_UNLOCK(V_rt_tables[0][AF_INET6]);
RADIX_NODE_HEAD_UNLOCK(rnh);
}
}

View File

@ -289,13 +289,17 @@ static void
in6_rtqtimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
struct radix_node_head *rnh;
struct rtqk_arg arg;
struct timeval atv;
static time_t last_adjusted_timeout = 0;
rnh = rt_tables_get_rnh(0, AF_INET6);
if (rnh == NULL) {
CURVNET_RESTORE();
return;
}
arg.found = arg.killed = 0;
arg.rnh = rnh;
arg.nextstop = time_uptime + V_rtq_timeout6;
@ -377,12 +381,16 @@ static void
in6_mtutimo(void *rock)
{
CURVNET_SET_QUIET((struct vnet *) rock);
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
struct radix_node_head *rnh;
struct mtuex_arg arg;
struct timeval atv;
rnh = rt_tables_get_rnh(0, AF_INET6);
if (rnh == NULL) {
CURVNET_RESTORE();
return;
}
arg.rnh = rnh;
arg.nextstop = time_uptime + MTUTIMO_DEFAULT;
RADIX_NODE_HEAD_LOCK(rnh);
@ -405,9 +413,12 @@ void
in6_rtqdrain(void)
{
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
struct radix_node_head *rnh;
struct rtqk_arg arg;
rnh = rt_tables_get_rnh(0, AF_INET6);
if (rnh == NULL)
panic("%s: rnh == NULL", __func__);
arg.found = arg.killed = 0;
arg.rnh = rnh;
arg.nextstop = 0;
@ -429,9 +440,6 @@ in6_rtqdrain(void)
int
in6_inithead(void **head, int off)
{
#ifdef INVARIANTS
INIT_VNET_NET(curvnet);
#endif
INIT_VNET_INET6(curvnet);
struct radix_node_head *rnh;
@ -447,7 +455,7 @@ in6_inithead(void **head, int off)
V_rtq_timeout6 = RTQ_TIMEOUT;
rnh = *head;
KASSERT(rnh == V_rt_tables[0][AF_INET6], ("rnh?"));
KASSERT(rnh == rt_tables_get_rnh(0, AF_INET6), ("rnh?"));
rnh->rnh_addaddr = in6_addroute;
rnh->rnh_matchaddr = in6_matroute;
callout_init(&V_rtq_timer6, CALLOUT_MPSAFE);

View File

@ -1549,7 +1549,6 @@ pfxlist_onlink_check()
int
nd6_prefix_onlink(struct nd_prefix *pr)
{
INIT_VNET_NET(curvnet);
INIT_VNET_INET6(curvnet);
struct ifaddr *ifa;
struct ifnet *ifp = pr->ndpr_ifp;
@ -1632,7 +1631,8 @@ nd6_prefix_onlink(struct nd_prefix *pr)
ifa->ifa_addr, (struct sockaddr *)&mask6, rtflags, &rt);
if (error == 0) {
if (rt != NULL) /* this should be non NULL, though */ {
rnh = V_rt_tables[rt->rt_fibnum][AF_INET6];
rnh = rt_tables_get_rnh(rt->rt_fibnum, AF_INET6);
/* XXX what if rhn == NULL? */
RADIX_NODE_HEAD_LOCK(rnh);
RT_LOCK(rt);
if (!rt_setgate(rt, rt_key(rt), (struct sockaddr *)&null_sdl)) {
@ -2058,8 +2058,7 @@ in6_init_address_ltimes(struct nd_prefix *new, struct in6_addrlifetime *lt6)
void
rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
{
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh = V_rt_tables[0][AF_INET6];
struct radix_node_head *rnh;
int s = splnet();
/* We'll care only link-local addresses */
@ -2068,6 +2067,10 @@ rt6_flush(struct in6_addr *gateway, struct ifnet *ifp)
return;
}
rnh = rt_tables_get_rnh(0, AF_INET6);
if (rnh == NULL)
return;
RADIX_NODE_HEAD_LOCK(rnh);
rnh->rnh_walktree(rnh, rt6_deleteroute, (void *)gateway);
RADIX_NODE_HEAD_UNLOCK(rnh);

View File

@ -361,11 +361,15 @@ void
bootpboot_p_rtlist(void)
{
INIT_VNET_NET(curvnet);
struct radix_node_head *rnh;
printf("Routing table:\n");
RADIX_NODE_HEAD_RLOCK(V_rt_tables[0][AF_INET]); /* could sleep XXX */
bootpboot_p_tree(V_rt_tables[0][AF_INET]->rnh_treetop);
RADIX_NODE_HEAD_RUNLOCK(V_rt_tables[0][AF_INET]);
rnh = rt_tables_get_rnh(0, AF_INET);
if (rnh == NULL)
return;
RADIX_NODE_HEAD_RLOCK(rnh); /* could sleep XXX */
bootpboot_p_tree(rnh->rnh_treetop);
RADIX_NODE_HEAD_RUNLOCK(rnh);
}
void

View File

@ -57,7 +57,7 @@
* is created, otherwise 1.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 800096 /* Master, propagated to newvers */
#define __FreeBSD_version 800097 /* Master, propagated to newvers */
#ifndef LOCORE
#include <sys/types.h>

View File

@ -122,12 +122,7 @@ int do_rtent = 0;
struct rtentry rtentry;
struct radix_node rnode;
struct radix_mask rmask;
struct rtline {
struct radix_node_head *tables[AF_MAX+1]; /*xxx*/
};
struct rtline *rt_tables;
struct radix_node_head *rt_tables_line[1][AF_MAX+1]; /*xxx*/
struct radix_node_head **rt_tables;
int NewTree = 0;
@ -155,7 +150,7 @@ static void domask(char *, in_addr_t, u_long);
void
routepr(u_long rtree)
{
struct radix_node_head *rnh, head;
struct radix_node_head **rnhp, *rnh, head;
size_t intsize;
int i;
int numfibs;
@ -165,7 +160,8 @@ routepr(u_long rtree)
fibnum = 0;
if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
numfibs = 1;
rt_tables = calloc(numfibs, sizeof(struct rtline));
rt_tables = calloc(numfibs * (AF_MAX+1),
sizeof(struct radix_node_head *));
if (rt_tables == NULL)
err(EX_OSERR, "memory allocation failed");
/*
@ -186,8 +182,8 @@ routepr(u_long rtree)
return;
}
if (kread((u_long)(rtree), (char *)(rt_tables),
(numfibs * sizeof(struct rtline))) != 0)
if (kread((u_long)(rtree), (char *)(rt_tables), (numfibs *
(AF_MAX+1) * sizeof(struct radix_node_head *))) != 0)
return;
for (i = 0; i <= AF_MAX; i++) {
int tmpfib;
@ -195,8 +191,15 @@ routepr(u_long rtree)
tmpfib = 0;
else
tmpfib = fibnum;
if ((rnh = rt_tables[tmpfib].tables[i]) == 0)
rnhp = (struct radix_node_head **)*rt_tables;
/* Calculate the in-kernel address. */
rnhp += tmpfib * (AF_MAX+1) + i;
/* Read the in kernel rhn pointer. */
if (kget(rnhp, rnh) != 0)
continue;
if (rnh == NULL)
continue;
/* Read the rnh data. */
if (kget(rnh, head) != 0)
continue;
if (i == AF_UNSPEC) {