One of the advantages of enabling ECMP (a.k.a RADIX_MPATH) is to

allow for connection load balancing across interfaces. Currently
the address alias handling method is colliding with the ECMP code.
For example, when two interfaces are configured on the same prefix,
only one prefix route is installed. So connection load balancing
among the available interfaces is not possible.

The other advantage of ECMP is for failover. The issue with the
current code, is that the interface link-state is not reflected
in the route entry. For example, if there are two interfaces on
the same prefix, the cable on one interface is unplugged, new and
existing connections should switch over to the other interface.
This is not done today and packets go into a black hole.

Also, there is a small bug in the kernel where deleting ECMP routes
in the userland will always return an error even though the command
is successfully executed.

MFC after:	5 days
This commit is contained in:
Qing Li 2010-03-09 01:11:45 +00:00
parent 13d85d4382
commit c7ea0aa648
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=204902
7 changed files with 51 additions and 11 deletions

View File

@ -472,7 +472,8 @@ flow_stale(struct flowtable *ft, struct flentry *fle)
|| ((fle->f_rt->rt_flags & RTF_HOST) &&
((fle->f_rt->rt_flags & (RTF_UP))
!= (RTF_UP)))
|| (fle->f_rt->rt_ifp == NULL))
|| (fle->f_rt->rt_ifp == NULL)
|| !RT_LINK_IS_UP(fle->f_rt->rt_ifp))
return (1);
idle_time = time_uptime - fle->f_uptime;

View File

@ -761,8 +761,10 @@ rn_addroute(v_arg, n_arg, head, treenodes)
if (m->rm_flags & RNF_NORMAL) {
mmask = m->rm_leaf->rn_mask;
if (tt->rn_flags & RNF_NORMAL) {
#if !defined(RADIX_MPATH)
log(LOG_ERR,
"Non-unique normal route, mask not entered\n");
#endif
return tt;
}
} else

View File

@ -270,7 +270,8 @@ rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
* XXX we don't attempt to lookup cached route again; what should
* be done for sendto(3) case?
*/
if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP))
if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
&& RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
return;
ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);

View File

@ -830,7 +830,13 @@ rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum)
int
rtexpunge(struct rtentry *rt)
{
#if !defined(RADIX_MPATH)
struct radix_node *rn;
#else
struct rt_addrinfo info;
int fib;
struct rtentry *rt0;
#endif
struct radix_node_head *rnh;
struct ifaddr *ifa;
int error = 0;
@ -843,14 +849,26 @@ rtexpunge(struct rtentry *rt)
if (rnh == NULL)
return (EAFNOSUPPORT);
RADIX_NODE_HEAD_LOCK_ASSERT(rnh);
#if 0
/*
* We cannot assume anything about the reference count
* because protocols call us in many situations; often
* before unwinding references to the table entry.
*/
KASSERT(rt->rt_refcnt <= 1, ("bogus refcnt %ld", rt->rt_refcnt));
#endif
#ifdef RADIX_MPATH
fib = rt->rt_fibnum;
bzero(&info, sizeof(info));
info.rti_ifp = rt->rt_ifp;
info.rti_flags = RTF_RNH_LOCKED;
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_ifa->ifa_addr;
RT_UNLOCK(rt);
error = rtrequest1_fib(RTM_DELETE, &info, &rt0, fib);
if (error == 0 && rt0 != NULL) {
rt = rt0;
RT_LOCK(rt);
} else if (error != 0) {
RT_LOCK(rt);
return (error);
}
#else
/*
* Remove the item from the tree; it should be there,
* but when callers invoke us blindly it may not (sigh).
@ -864,6 +882,7 @@ rtexpunge(struct rtentry *rt)
("unexpected flags 0x%x", rn->rn_flags));
KASSERT(rt == RNTORT(rn),
("lookup mismatch, rt %p rn %p", rt, rn));
#endif /* RADIX_MPATH */
rt->rt_flags &= ~RTF_UP;
@ -886,7 +905,9 @@ rtexpunge(struct rtentry *rt)
* linked to the routing table.
*/
V_rttrash++;
#if !defined(RADIX_MPATH)
bad:
#endif
return (error);
}
@ -1044,6 +1065,7 @@ rtrequest1_fib(int req, struct rt_addrinfo *info, struct rtentry **ret_nrt,
*/
if (error != ENOENT)
goto bad;
error = 0;
}
#endif
/*

View File

@ -319,6 +319,8 @@ struct rt_addrinfo {
#ifdef _KERNEL
#define RT_LINK_IS_UP(ifp) ((ifp)->if_link_state == LINK_STATE_UP)
#define RT_LOCK_INIT(_rt) \
mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK)
#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)

View File

@ -34,6 +34,7 @@
__FBSDID("$FreeBSD$");
#include "opt_carp.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -1040,6 +1041,13 @@ in_addprefix(struct in_ifaddr *target, int flags)
* interface address, we are done here.
*/
if (ia->ia_flags & IFA_ROUTE) {
#ifdef RADIX_MPATH
if (ia->ia_addr.sin_addr.s_addr ==
target->ia_addr.sin_addr.s_addr)
return (EEXIST);
else
break;
#endif
if (V_sameprefixcarponly &&
target->ia_ifp->if_type != IFT_CARP &&
ia->ia_ifp->if_type != IFT_CARP) {

View File

@ -199,6 +199,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
*/
rte = ro->ro_rt;
if (rte && ((rte->rt_flags & RTF_UP) == 0 ||
rte->rt_ifp == NULL ||
!RT_LINK_IS_UP(rte->rt_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr)) {
if (!nortfree)
@ -270,7 +272,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
#endif
rte = ro->ro_rt;
}
if (rte == NULL) {
if (rte == NULL ||
rte->rt_ifp == NULL ||
!RT_LINK_IS_UP(rte->rt_ifp)) {
#ifdef IPSEC
/*
* There is no route for this packet, but it is