Convert route caching to nexthop caching.

This change is build on top of nexthop objects introduced in r359823.

Nexthops are separate datastructures, containing all necessary information
 to perform packet forwarding such as gateway interface and mtu. Nexthops
 are shared among the routes, providing more pre-computed cache-efficient
 data while requiring less memory. Splitting the LPM code and the attached
 data solves multiple long-standing problems in the routing layer,
 drastically reduces the coupling with outher parts of the stack and allows
 to transparently introduce faster lookup algorithms.

Route caching was (re)introduced to minimise (slow) routing lookups, allowing
 for notably better performance for large TCP senders. Caching works by
 acquiring rtentry reference, which is protected by per-rtentry mutex.
 If the routing table is changed (checked by comparing the rtable generation id)
 or link goes down, cache record gets withdrawn.

Nexthops have the same reference counting interface, backed by refcount(9).
This change merely replaces rtentry with the actual forwarding nextop as a
 cached object, which is mostly mechanical. Other moving parts like cache
 cleanup on rtable change remains the same.

Differential Revision:	https://reviews.freebsd.org/D24340
This commit is contained in:
Alexander V. Chernikov 2020-04-25 09:06:11 +00:00
parent e4a458bb1b
commit 983066f05b
25 changed files with 308 additions and 301 deletions

View File

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#ifdef RSS
#include <net/netisr.h>
#include <net/nhop.h>
#include <net/rss_config.h>
#endif
#if defined(INET) || defined(INET6)
@ -754,7 +755,7 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
{
union if_snd_tag_alloc_params params;
struct ifnet *ifp;
struct rtentry *rt;
struct nhop_object *nh;
struct tcpcb *tp;
int error;
@ -792,12 +793,12 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force,
* enabled after a connection has completed key negotiation in
* userland, the cached route will be present in practice.
*/
rt = inp->inp_route.ro_rt;
if (rt == NULL || rt->rt_ifp == NULL) {
nh = inp->inp_route.ro_nh;
if (nh == NULL) {
INP_RUNLOCK(inp);
return (ENXIO);
}
ifp = rt->rt_ifp;
ifp = nh->nh_ifp;
if_ref(ifp);
params.hdr.type = IF_SND_TAG_TYPE_TLS;

View File

@ -55,6 +55,8 @@ __FBSDID("$FreeBSD$");
#include <net/radix_mpath.h>
#include <sys/rmlock.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/route/shared.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/if.h>
@ -257,42 +259,41 @@ rt_mpath_select(struct rtentry *rte, uint32_t hash)
void
rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum)
{
struct rtentry *rt;
struct rtentry *rt, *rt_tmp;
/*
* XXX we don't attempt to lookup cached route again; what should
* be done for sendto(3) case?
*/
if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP)
&& RT_LINK_IS_UP(ro->ro_rt->rt_ifp))
if (ro->ro_nh && RT_LINK_IS_UP(ro->ro_nh->nh_ifp))
return;
ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
ro->ro_nh = NULL;
rt_tmp = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum);
/* if the route does not exist or it is not multipath, don't care */
if (ro->ro_rt == NULL)
if (rt_tmp == NULL)
return;
if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) {
RT_UNLOCK(ro->ro_rt);
if (rn_mpath_next((struct radix_node *)rt_tmp) == NULL) {
ro->ro_nh = rt_tmp->rt_nhop;
nhop_ref_object(ro->ro_nh);
RT_UNLOCK(rt_tmp);
return;
}
rt = rt_mpath_selectrte(ro->ro_rt, hash);
rt = rt_mpath_selectrte(rt_tmp, hash);
/* XXX try filling rt_gwroute and avoid unreachable gw */
/* gw selection has failed - there must be only zero weight routes */
if (!rt) {
RT_UNLOCK(ro->ro_rt);
ro->ro_rt = NULL;
RT_UNLOCK(rt_tmp);
return;
}
if (ro->ro_rt != rt) {
RTFREE_LOCKED(ro->ro_rt);
ro->ro_rt = rt;
RT_LOCK(ro->ro_rt);
RT_ADDREF(ro->ro_rt);
}
RT_UNLOCK(ro->ro_rt);
if (rt_tmp != rt) {
RTFREE_LOCKED(rt_tmp);
ro->ro_nh = rt->rt_nhop;
nhop_ref_object(ro->ro_nh);
} else
RT_UNLOCK(rt_tmp);
}
void

View File

@ -441,15 +441,18 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum)
{
struct rtentry *rt;
if ((rt = ro->ro_rt) != NULL) {
if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP)
if (ro->ro_nh != NULL) {
if (NH_IS_VALID(ro->ro_nh))
return;
RTFREE(rt);
ro->ro_rt = NULL;
NH_FREE(ro->ro_nh);
ro->ro_nh = NULL;
}
rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
if (rt != NULL) {
ro->ro_nh = rt->rt_nhop;
nhop_ref_object(rt->rt_nhop);
RT_UNLOCK(rt);
}
ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum);
if (ro->ro_rt)
RT_UNLOCK(ro->ro_rt);
}
/*

View File

@ -51,7 +51,7 @@
* with its length.
*/
struct route {
struct rtentry *ro_rt;
struct nhop_object *ro_nh;
struct llentry *ro_lle;
/*
* ro_prepend and ro_plen are only used for bpf to pass in a
@ -227,21 +227,6 @@ struct rtentry {
/* Control plane route request flags */
#define NHR_COPY 0x100 /* Copy rte data */
#ifdef _KERNEL
/* rte<>ro_flags translation */
static inline void
rt_update_ro_flags(struct route *ro)
{
int rt_flags = ro->ro_rt->rt_flags;
ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0;
ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0;
ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0;
}
#endif
/*
* Routing statistics.
*/
@ -431,12 +416,22 @@ struct rt_addrinfo {
RTFREE((_ro)->ro_rt); \
} while (0)
#define RO_NHFREE(_ro) do { \
if ((_ro)->ro_nh) { \
NH_FREE((_ro)->ro_nh); \
(_ro)->ro_nh = NULL; \
} \
} while (0)
#define RO_INVALIDATE_CACHE(ro) do { \
RO_RTFREE(ro); \
if ((ro)->ro_lle != NULL) { \
LLE_FREE((ro)->ro_lle); \
(ro)->ro_lle = NULL; \
} \
if ((ro)->ro_nh != NULL) { \
NH_FREE((ro)->ro_nh); \
(ro)->ro_nh = NULL; \
} \
} while (0)
/*
@ -444,7 +439,7 @@ struct rt_addrinfo {
* out-of-date cache, simply free it. Update the generation number
* for the new allocation
*/
#define RT_VALIDATE(ro, cookiep, fibnum) do { \
#define NH_VALIDATE(ro, cookiep, fibnum) do { \
rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \
if (*(cookiep) != cookie) { \
RO_INVALIDATE_CACHE(ro); \

View File

@ -80,7 +80,7 @@ _Static_assert(__offsetof(_s1, _f1) == __offsetof(_s2, _f2), \
CHK_STRUCT_FIELD_GENERIC(struct route, _field, _route_new, _field)
#define CHK_STRUCT_ROUTE_FIELDS(_route_new) \
_CHK_ROUTE_FIELD(_route_new, ro_rt) \
_CHK_ROUTE_FIELD(_route_new, ro_nh) \
_CHK_ROUTE_FIELD(_route_new, ro_lle) \
_CHK_ROUTE_FIELD(_route_new, ro_prepend)\
_CHK_ROUTE_FIELD(_route_new, ro_plen) \

View File

@ -62,6 +62,10 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_fib.h>
#ifdef INET
/* Verify struct route compatiblity */
/* Assert 'struct route_in' is compatible with 'struct route' */
CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4);
static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst,
uint32_t flags, struct nhop4_basic *pnh4);
static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst,

View File

@ -32,6 +32,19 @@
#ifndef _NETINET_IN_FIB_H_
#define _NETINET_IN_FIB_H_
struct route_in {
/* common fields shared among all 'struct route' */
struct nhop_object *ro_nh;
struct llentry *ro_lle;
char *ro_prepend;
uint16_t ro_plen;
uint16_t ro_flags;
uint16_t ro_mtu; /* saved ro_rt mtu */
uint16_t spare;
/* custom sockaddr */
struct sockaddr_in ro_dst4;
};
/* Basic nexthop info used for uRPF/mtu checks */
struct nhop4_basic {
struct ifnet *nh_ifp; /* Logical egress interface */

View File

@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_dl.h>
#include <net/pfil.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/netisr.h>
#include <net/rss_config.h>
#include <net/vnet.h>
@ -72,6 +73,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_systm.h>
#include <netinet/in_var.h>
#include <netinet/ip.h>
#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/ip_var.h>
#include <netinet/ip_fw.h>
@ -980,10 +982,11 @@ ip_forward(struct mbuf *m, int srcrt)
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
M_GETFIB(m));
#else
in_rtalloc_ign(&ro, 0, M_GETFIB(m));
ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF,
m->m_pkthdr.flowid);
#endif
if (ro.ro_rt != NULL) {
ia = ifatoia(ro.ro_rt->rt_ifa);
if (ro.ro_nh != NULL) {
ia = ifatoia(ro.ro_nh->nh_ifa);
} else
ia = NULL;
/*
@ -1045,19 +1048,18 @@ ip_forward(struct mbuf *m, int srcrt)
dest.s_addr = 0;
if (!srcrt && V_ipsendredirects &&
ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) {
struct rtentry *rt;
struct nhop_object *nh;
rt = ro.ro_rt;
nh = ro.ro_nh;
if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 &&
satosin(rt_key(rt))->sin_addr.s_addr != 0) {
#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa))
if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) {
struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa);
u_long src = ntohl(ip->ip_src.s_addr);
if (RTA(rt) &&
(src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) {
if (rt->rt_flags & RTF_GATEWAY)
dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr;
if (nh_ia != NULL &&
(src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) {
if (nh->nh_flags & NHF_GATEWAY)
dest.s_addr = nh->gw4_sa.sin_addr.s_addr;
else
dest.s_addr = ip->ip_dst.s_addr;
/* Router requirements says to only send host redirects */
@ -1069,9 +1071,9 @@ ip_forward(struct mbuf *m, int srcrt)
error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL);
if (error == EMSGSIZE && ro.ro_rt)
mtu = ro.ro_rt->rt_mtu;
RO_RTFREE(&ro);
if (error == EMSGSIZE && ro.ro_nh)
mtu = ro.ro_nh->nh_mtu;
RO_NHFREE(&ro);
if (error)
IPSTAT_INC(ips_cantforward);

View File

@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$");
#include <net/netisr.h>
#include <net/pfil.h>
#include <net/route.h>
#include <net/route/nhop.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
@ -78,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <netinet/in_kdtrace.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_rss.h>
#include <netinet/in_var.h>
@ -287,6 +289,19 @@ ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m,
return (error);
}
/* rte<>ro_flags translation */
static inline void
rt_update_ro_flags(struct route *ro)
{
int nh_flags = ro->ro_nh->nh_flags;
ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW);
ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0;
ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0;
ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0;
}
/*
* IP output. The packet in mbuf chain m contains a skeletal IP
* header (with len, off, ttl, proto, tos, src, dst).
@ -368,7 +383,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
dst = (struct sockaddr_in *)&ro->ro_dst;
else
dst = &sin;
if (ro == NULL || ro->ro_rt == NULL) {
if (ro == NULL || ro->ro_nh == NULL) {
bzero(dst, sizeof(*dst));
dst->sin_family = AF_INET;
dst->sin_len = sizeof(*dst);
@ -380,8 +395,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* Validate route against routing table additions;
* a better/more specific route might have been added.
*/
if (inp != NULL && ro != NULL && ro->ro_rt != NULL)
RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
if (inp != NULL && ro != NULL && ro->ro_nh != NULL)
NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum);
/*
* If there is a cached route,
* check that it is to the same destination
@ -390,9 +405,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* cache with IPv6.
* Also check whether routing cache needs invalidation.
*/
if (ro != NULL && ro->ro_rt != NULL &&
((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
if (ro != NULL && ro->ro_nh != NULL &&
((!NH_IS_VALID(ro->ro_nh)) || !RT_LINK_IS_UP(ro->ro_nh->nh_ifp) ||
dst->sin_family != AF_INET ||
dst->sin_addr.s_addr != ip->ip_dst.s_addr))
RO_INVALIDATE_CACHE(ro);
@ -450,7 +464,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
else
src.s_addr = INADDR_ANY;
} else if (ro != NULL) {
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
/*
* We want to do any cloning requested by the link
* layer, as this is probably required in all cases
@ -461,12 +475,11 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr),
fibnum);
#else
in_rtalloc_ign(ro, 0, fibnum);
ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0,
NHR_REF, m->m_pkthdr.flowid);
#endif
if (ro->ro_rt == NULL ||
(ro->ro_rt->rt_flags & RTF_UP) == 0 ||
ro->ro_rt->rt_ifp == NULL ||
!RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) {
if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh)) ||
!RT_LINK_IS_UP(ro->ro_nh->nh_ifp)) {
#if defined(IPSEC) || defined(IPSEC_SUPPORT)
/*
* There is no route for this packet, but it is
@ -481,20 +494,20 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
goto bad;
}
}
ia = ifatoia(ro->ro_rt->rt_ifa);
ifp = ro->ro_rt->rt_ifp;
counter_u64_add(ro->ro_rt->rt_pksent, 1);
ia = ifatoia(ro->ro_nh->nh_ifa);
ifp = ro->ro_nh->nh_ifp;
counter_u64_add(ro->ro_nh->nh_pksent, 1);
rt_update_ro_flags(ro);
if (ro->ro_rt->rt_flags & RTF_GATEWAY)
gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
if (ro->ro_rt->rt_flags & RTF_HOST)
isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST);
if (ro->ro_nh->nh_flags & NHF_GATEWAY)
gw = &ro->ro_nh->gw4_sa;
if (ro->ro_nh->nh_flags & NHF_HOST)
isbroadcast = (ro->ro_nh->nh_flags & NHF_BROADCAST);
else if (ifp->if_flags & IFF_BROADCAST)
isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia);
else
isbroadcast = 0;
if (ro->ro_rt->rt_flags & RTF_HOST)
mtu = ro->ro_rt->rt_mtu;
if (ro->ro_nh->nh_flags & NHF_HOST)
mtu = ro->ro_nh->nh_mtu;
else
mtu = ifp->if_mtu;
src = IA_SIN(ia)->sin_addr;
@ -537,9 +550,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
}
/* Catch a possible divide by zero later. */
KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p",
KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p",
__func__, mtu, ro,
(ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp));
(ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp));
if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) {
m->m_flags |= M_MCAST;
@ -702,7 +715,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
case -1: /* Need to try again */
/* Reset everything for a new round */
if (ro != NULL) {
RO_RTFREE(ro);
RO_NHFREE(ro);
ro->ro_prepend = NULL;
}
gw = dst;

View File

@ -980,8 +980,7 @@ sctp_asconf_nets_cleanup(struct sctp_tcb *stcb, struct sctp_ifn *ifn)
((ifn == NULL) ||
(SCTP_GET_IF_INDEX_FROM_ROUTE(&net->ro) != ifn->ifn_index))) {
/* clear any cached route */
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
RO_NHFREE(&net->ro);
}
/* clear any cached source address */
if (net->src_addr_selected) {
@ -1090,10 +1089,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
if (addrnum == 1) {
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* clear any cached route and source address */
if (net->ro.ro_rt) {
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
}
RO_NHFREE(&net->ro);
if (net->src_addr_selected) {
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
@ -1112,10 +1108,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
/* Multiple local addresses exsist in the association. */
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
/* clear any cached route and source address */
if (net->ro.ro_rt) {
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
}
RO_NHFREE(&net->ro);
if (net->src_addr_selected) {
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
@ -1131,7 +1124,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa)
SCTP_RTALLOC((sctp_route_t *)&net->ro,
stcb->sctp_ep->def_vrf_id,
stcb->sctp_ep->fibnum);
if (net->ro.ro_rt == NULL)
if (net->ro.ro_nh == NULL)
continue;
changed = 0;
@ -2214,18 +2207,13 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb,
struct sctp_nets *net;
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
sctp_rtentry_t *rt;
/* delete this address if cached */
if (net->ro._s_addr == ifa) {
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
net->src_addr_selected = 0;
rt = net->ro.ro_rt;
if (rt) {
RTFREE(rt);
net->ro.ro_rt = NULL;
}
RO_NHFREE(&net->ro);
/*
* Now we deleted our src address,
* should we not also now reset the

View File

@ -71,11 +71,13 @@ __FBSDID("$FreeBSD$");
#include <net/if_types.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <netinet/ip.h>
#include <netinet/in_fib.h>
#include <netinet/in_pcb.h>
#include <netinet/in_var.h>
#include <netinet/ip_var.h>
@ -85,6 +87,7 @@ __FBSDID("$FreeBSD$");
#ifdef INET6
#include <sys/domain.h>
#include <netinet/ip6.h>
#include <netinet6/in6_fib.h>
#include <netinet6/ip6_var.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/ip6protosw.h>
@ -199,15 +202,15 @@ MALLOC_DECLARE(SCTP_M_MCORE);
#define SCTP_INIT_VRF_TABLEID(vrf)
#define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP)
#define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifa && (ro)->ro_rt->rt_ifa->ifa_ifp && (ro)->ro_rt->rt_ifa->ifa_ifp->if_type == IFT_LOOP)
#define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_nh && (ro)->ro_nh->nh_ifa && (ro)->ro_nh->nh_ifa->ifa_ifp && (ro)->ro_nh->nh_ifa->ifa_ifp->if_type == IFT_LOOP)
/*
* Access to IFN's to help with src-addr-selection
*/
/* This could return VOID if the index works but for BSD we provide both. */
#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp
#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index
#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp)
#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_nh->nh_ifp
#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_nh->nh_ifp->if_index
#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_nh && (ro)->ro_nh->nh_ifp)
/*
* general memory allocation
@ -304,12 +307,10 @@ typedef struct callout sctp_os_timer_t;
/* MTU */
/*************************/
#define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu
#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0))
#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, nh) ((uint32_t)((nh != NULL) ? nh->nh_mtu : 0))
#define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0)
#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \
if (rt != NULL) \
rt->rt_mtu = mtu; \
} while(0)
/* XXX: Setting MTU from the protocol in this way is simply incorrect */
#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu)
/* (de-)register interface event notifications */
#define SCTP_REGISTER_INTERFACE(ifhandle, af)
@ -365,7 +366,7 @@ typedef struct callout sctp_os_timer_t;
*/
/* get the v6 hop limit */
#define SCTP_GET_HLIM(inp, ro) in6_selecthlim(&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL)));
#define SCTP_GET_HLIM(inp, ro) in6_selecthlim(&inp->ip_inp.inp, (ro ? (ro->ro_nh ? (ro->ro_nh->nh_ifp) : (NULL)) : (NULL)));
/* is the endpoint v6only? */
#define SCTP_IPV6_V6ONLY(sctp_inpcb) ((sctp_inpcb)->ip_inp.inp.inp_flags & IN6P_IPV6_V6ONLY)
@ -397,10 +398,16 @@ typedef struct callout sctp_os_timer_t;
* routes, output, etc.
*/
typedef struct route sctp_route_t;
typedef struct rtentry sctp_rtentry_t;
#define SCTP_RTALLOC(ro, vrf_id, fibnum) \
rtalloc_ign_fib((struct route *)ro, 0UL, fibnum)
{ \
if ((ro)->ro_nh == NULL) { \
if ((ro)->ro_dst.sa_family == AF_INET) \
(ro)->ro_nh = fib4_lookup(fibnum, ((struct sockaddr_in *)&(ro)->ro_dst)->sin_addr, NHR_REF, 0, 0); \
if ((ro)->ro_dst.sa_family == AF_INET6) \
(ro)->ro_nh = fib6_lookup(fibnum, &((struct sockaddr_in6 *)&(ro)->ro_dst)->sin6_addr, NHR_REF, 0, 0); \
} \
}
/*
* SCTP protocol specific mbuf flags.

View File

@ -3387,13 +3387,13 @@ sctp_source_address_selection(struct sctp_inpcb *inp,
* addresses. If the bound set is NOT assigned to the interface then
* we must use rotation amongst the bound addresses..
*/
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
/*
* Need a route to cache.
*/
SCTP_RTALLOC(ro, vrf_id, inp->fibnum);
}
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
return (NULL);
}
fam = ro->ro_dst.sa_family;
@ -4131,10 +4131,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
net->src_addr_selected = 0;
if (ro->ro_rt) {
RTFREE(ro->ro_rt);
ro->ro_rt = NULL;
}
RO_NHFREE(ro);
}
if (net->src_addr_selected == 0) {
/* Cache the source address */
@ -4206,7 +4203,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
* catch that somewhere and abort the association
* right away (assuming this is an INIT being sent).
*/
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
/*
* src addr selection failed to find a route
* (or valid source addr), so we can't get
@ -4225,7 +4222,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n",
(uint32_t)(ntohl(ip->ip_dst.s_addr)));
SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n",
(void *)ro->ro_rt);
(void *)ro->ro_nh);
if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) {
/* failed to prepend data, give up */
@ -4278,13 +4275,13 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret);
if (net == NULL) {
/* free tempy routes */
RO_RTFREE(ro);
RO_NHFREE(ro);
} else {
if ((ro->ro_rt != NULL) && (net->ro._s_addr) &&
if ((ro->ro_nh != NULL) && (net->ro._s_addr) &&
((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) {
uint32_t mtu;
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh);
if (mtu > 0) {
if (net->port) {
mtu -= sizeof(struct udphdr);
@ -4296,7 +4293,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
net->mtu = mtu;
}
}
} else if (ro->ro_rt == NULL) {
} else if (ro->ro_nh == NULL) {
/* route was freed */
if (net->ro._s_addr &&
net->src_addr_selected) {
@ -4426,10 +4423,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
net->src_addr_selected = 0;
if (ro->ro_rt) {
RTFREE(ro->ro_rt);
ro->ro_rt = NULL;
}
RO_NHFREE(ro);
}
if (net->src_addr_selected == 0) {
sin6 = (struct sockaddr_in6 *)&net->ro._l_addr;
@ -4489,7 +4483,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
}
lsa6->sin6_port = inp->sctp_lport;
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
/*
* src addr selection failed to find a route
* (or valid source addr), so we can't get
@ -4625,13 +4619,13 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
}
if (net == NULL) {
/* Now if we had a temp route free it */
RO_RTFREE(ro);
RO_NHFREE(ro);
} else {
/*
* PMTU check versus smallest asoc MTU goes
* here
*/
if (ro->ro_rt == NULL) {
if (ro->ro_nh == NULL) {
/* Route was freed */
if (net->ro._s_addr &&
net->src_addr_selected) {
@ -4640,11 +4634,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp,
}
net->src_addr_selected = 0;
}
if ((ro->ro_rt != NULL) && (net->ro._s_addr) &&
if ((ro->ro_nh != NULL) && (net->ro._s_addr) &&
((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) {
uint32_t mtu;
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt);
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh);
if (mtu > 0) {
if (net->port) {
mtu -= sizeof(struct udphdr);
@ -13838,7 +13832,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro)
struct nd_pfxrouter *pfxrtr = NULL;
struct sockaddr_in6 gw6;
if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6)
if (ro == NULL || ro->ro_nh == NULL || src6->sin6_family != AF_INET6)
return (0);
/* get prefix entry of address */
@ -13871,8 +13865,8 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro)
SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6);
SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
if (sctp_cmpaddr((struct sockaddr *)&gw6, ro->ro_rt->rt_gateway)) {
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw_sa);
if (sctp_cmpaddr((struct sockaddr *)&gw6, &ro->ro_nh->gw_sa)) {
ND6_RUNLOCK();
SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n");
return (1);
@ -13892,7 +13886,7 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro)
struct ifaddr *ifa;
struct in_addr srcnetaddr, gwnetaddr;
if (ro == NULL || ro->ro_rt == NULL ||
if (ro == NULL || ro->ro_nh == NULL ||
sifa->address.sa.sa_family != AF_INET) {
return (0);
}
@ -13904,10 +13898,10 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro)
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa);
SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr);
sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway;
sin = &ro->ro_nh->gw4_sa;
gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr);
SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is ");
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway);
SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw4_sa);
SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr);
if (srcnetaddr.s_addr == gwnetaddr.s_addr) {
return (1);

View File

@ -3976,17 +3976,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
} else {
imtu = 0;
}
rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt);
rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_nh);
hcmtu = sctp_hc_get_mtu(&net->ro._l_addr, stcb->sctp_ep->fibnum);
net->mtu = sctp_min_mtu(hcmtu, rmtu, imtu);
if (rmtu == 0) {
/*
* Start things off to match mtu of
* interface please.
*/
SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa,
net->ro.ro_rt, net->mtu);
}
}
}
if (net->mtu == 0) {
@ -4067,19 +4059,19 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
*netp = net;
}
netfirst = TAILQ_FIRST(&stcb->asoc.nets);
if (net->ro.ro_rt == NULL) {
if (net->ro.ro_nh == NULL) {
/* Since we have no route put it at the back */
TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
} else if (netfirst == NULL) {
/* We are the first one in the pool. */
TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
} else if (netfirst->ro.ro_rt == NULL) {
} else if (netfirst->ro.ro_nh == NULL) {
/*
* First one has NO route. Place this one ahead of the first
* one.
*/
TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next);
} else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) {
} else if (net->ro.ro_nh->nh_ifp != netfirst->ro.ro_nh->nh_ifp) {
/*
* This one has a different interface than the one at the
* top of the list. Place it ahead.
@ -4100,11 +4092,11 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
/* End of the list */
TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next);
break;
} else if (netlook->ro.ro_rt == NULL) {
} else if (netlook->ro.ro_nh == NULL) {
/* next one has NO route */
TAILQ_INSERT_BEFORE(netfirst, net, sctp_next);
break;
} else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) {
} else if (netlook->ro.ro_nh->nh_ifp != net->ro.ro_nh->nh_ifp) {
TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook,
net, sctp_next);
break;
@ -4117,8 +4109,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr,
/* got to have a primary set */
if (stcb->asoc.primary_destination == 0) {
stcb->asoc.primary_destination = net;
} else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) &&
(net->ro.ro_rt) &&
} else if ((stcb->asoc.primary_destination->ro.ro_nh == NULL) &&
(net->ro.ro_nh) &&
((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) {
/* No route to current primary adopt new primary */
stcb->asoc.primary_destination = net;
@ -5459,14 +5451,9 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa)
TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) {
if (net->ro._s_addr == laddr->ifa) {
/* Yep, purge src address selected */
sctp_rtentry_t *rt;
/* delete this address if cached */
rt = net->ro.ro_rt;
if (rt != NULL) {
RTFREE(rt);
net->ro.ro_rt = NULL;
}
RO_NHFREE(&net->ro);
sctp_free_ifa(net->ro._s_addr);
net->ro._s_addr = NULL;
net->src_addr_selected = 0;

View File

@ -189,7 +189,7 @@ struct iterator_control {
#define SCTP_ITERATOR_STOP_CUR_INP 0x00000008
struct sctp_net_route {
sctp_rtentry_t *ro_rt;
struct nhop_object *ro_nh;
struct llentry *ro_lle;
char *ro_prepend;
uint16_t ro_plen;

View File

@ -350,7 +350,7 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
return (NULL);
}
}
if (alt->ro.ro_rt == NULL) {
if (alt->ro.ro_nh == NULL) {
if (alt->ro._s_addr) {
sctp_free_ifa(alt->ro._s_addr);
alt->ro._s_addr = NULL;
@ -358,7 +358,7 @@ sctp_find_alternate_net(struct sctp_tcb *stcb,
alt->src_addr_selected = 0;
}
if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) &&
(alt->ro.ro_rt != NULL) &&
(alt->ro.ro_nh != NULL) &&
(!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) {
/* Found a reachable address */
break;
@ -937,10 +937,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp,
net->src_addr_selected = 0;
/* Force a route allocation too */
if (net->ro.ro_rt) {
RTFREE(net->ro.ro_rt);
net->ro.ro_rt = NULL;
}
RO_NHFREE(&net->ro);
/* Was it our primary? */
if ((stcb->asoc.primary_destination == net) && (alt != net)) {
@ -1502,7 +1499,7 @@ sctp_pathmtu_timer(struct sctp_inpcb *inp,
net->src_addr_selected = 1;
}
if (net->ro._s_addr) {
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt);
mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_nh);
#if defined(INET) || defined(INET6)
if (net->port) {
mtu -= sizeof(struct udphdr);

View File

@ -187,10 +187,7 @@ extern struct pr_usrreqs sctp_usrreqs;
if ((__net)) { \
if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \
(void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \
if ((__net)->ro.ro_rt) { \
RTFREE((__net)->ro.ro_rt); \
(__net)->ro.ro_rt = NULL; \
} \
RO_NHFREE(&(__net)->ro); \
if ((__net)->src_addr_selected) { \
sctp_free_ifa((__net)->ro._s_addr); \
(__net)->ro._s_addr = NULL; \

View File

@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#include <netinet/in.h>
@ -1411,8 +1412,8 @@ tcp_output(struct tcpcb *tp)
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0),
NULL, NULL, tp->t_inpcb);
if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_rt != NULL)
mtu = tp->t_inpcb->inp_route6.ro_rt->rt_mtu;
if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_nh != NULL)
mtu = tp->t_inpcb->inp_route6.ro_nh->nh_mtu;
}
#endif /* INET6 */
#if defined(INET) && defined(INET6)
@ -1454,8 +1455,8 @@ tcp_output(struct tcpcb *tp)
((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0,
tp->t_inpcb);
if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL)
mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu;
if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_nh != NULL)
mtu = tp->t_inpcb->inp_route.ro_nh->nh_mtu;
}
#endif /* INET */

View File

@ -5601,8 +5601,8 @@ bbr_update_hardware_pacing_rate(struct tcp_bbr *bbr, uint32_t cts)
if (bbr->r_ctl.crte == NULL)
return;
if ((bbr->rc_inp->inp_route.ro_rt == NULL) ||
(bbr->rc_inp->inp_route.ro_rt->rt_ifp == NULL)) {
if ((bbr->rc_inp->inp_route.ro_nh == NULL) ||
(bbr->rc_inp->inp_route.ro_nh->nh_ifp == NULL)) {
/* Lost our routes? */
/* Clear the way for a re-attempt */
bbr->bbr_attempt_hdwr_pace = 0;
@ -5618,7 +5618,7 @@ bbr_update_hardware_pacing_rate(struct tcp_bbr *bbr, uint32_t cts)
rate = bbr_get_hardware_rate(bbr);
nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte,
bbr->rc_tp,
bbr->rc_inp->inp_route.ro_rt->rt_ifp,
bbr->rc_inp->inp_route.ro_nh->nh_ifp,
rate,
(RS_PACING_GEQ|RS_PACING_SUB_OK),
&error);
@ -13975,8 +13975,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
((rsm || sack_rxmit) ? IP_NO_SND_TAG_RL : 0),
NULL, NULL, inp);
if (error == EMSGSIZE && inp->inp_route6.ro_rt != NULL)
mtu = inp->inp_route6.ro_rt->rt_mtu;
if (error == EMSGSIZE && inp->inp_route6.ro_nh != NULL)
mtu = inp->inp_route6.ro_nh->nh_mtu;
}
#endif /* INET6 */
#if defined(INET) && defined(INET6)
@ -14016,8 +14016,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
error = ip_output(m, inp->inp_options, &inp->inp_route,
((rsm || sack_rxmit) ? IP_NO_SND_TAG_RL : 0), 0,
inp);
if (error == EMSGSIZE && inp->inp_route.ro_rt != NULL)
mtu = inp->inp_route.ro_rt->rt_mtu;
if (error == EMSGSIZE && inp->inp_route.ro_nh != NULL)
mtu = inp->inp_route.ro_nh->nh_mtu;
}
#endif /* INET */
out:
@ -14302,8 +14302,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
(bbr->rc_past_init_win) &&
(bbr->rc_bbr_state != BBR_STATE_STARTUP) &&
(get_filter_value(&bbr->r_ctl.rc_delrate)) &&
(inp->inp_route.ro_rt &&
inp->inp_route.ro_rt->rt_ifp)) {
(inp->inp_route.ro_nh &&
inp->inp_route.ro_nh->nh_ifp)) {
/*
* We are past the initial window and
* have at least one measurement so we
@ -14317,7 +14317,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
rate_wanted = bbr_get_hardware_rate(bbr);
bbr->bbr_attempt_hdwr_pace = 1;
bbr->r_ctl.crte = tcp_set_pacing_rate(bbr->rc_tp,
inp->inp_route.ro_rt->rt_ifp,
inp->inp_route.ro_nh->nh_ifp,
rate_wanted,
(RS_PACING_GEQ|RS_PACING_SUB_OK),
&err);
@ -14344,7 +14344,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
tcp_bbr_tso_size_check(bbr, cts);
} else {
bbr_type_log_hdwr_pacing(bbr,
inp->inp_route.ro_rt->rt_ifp,
inp->inp_route.ro_nh->nh_ifp,
rate_wanted,
0,
__LINE__, cts, err);
@ -14361,8 +14361,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv)
if (inp->inp_snd_tag == NULL) {
/* A change during ip output disabled hw pacing? */
bbr->bbr_hdrw_pacing = 0;
} else if ((inp->inp_route.ro_rt == NULL) ||
(inp->inp_route.ro_rt->rt_ifp != inp->inp_snd_tag->ifp)) {
} else if ((inp->inp_route.ro_nh == NULL) ||
(inp->inp_route.ro_nh->nh_ifp != inp->inp_snd_tag->ifp)) {
/*
* We had an interface or route change,
* detach from the current hdwr pacing

View File

@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$");
#include <vm/uma.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/vnet.h>
@ -2199,9 +2200,9 @@ tcp_notify(struct inpcb *inp, int error)
if (tp->t_state == TCPS_ESTABLISHED &&
(error == EHOSTUNREACH || error == ENETUNREACH ||
error == EHOSTDOWN)) {
if (inp->inp_route.ro_rt) {
RTFREE(inp->inp_route.ro_rt);
inp->inp_route.ro_rt = (struct rtentry *)NULL;
if (inp->inp_route.ro_nh) {
NH_FREE(inp->inp_route.ro_nh);
inp->inp_route.ro_nh = (struct nhop_object *)NULL;
}
return (inp);
} else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 &&

View File

@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$");
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/rss_config.h>
#include <netinet/in.h>
@ -761,9 +762,9 @@ udp_notify(struct inpcb *inp, int errno)
INP_WLOCK_ASSERT(inp);
if ((errno == EHOSTUNREACH || errno == ENETUNREACH ||
errno == EHOSTDOWN) && inp->inp_route.ro_rt) {
RTFREE(inp->inp_route.ro_rt);
inp->inp_route.ro_rt = (struct rtentry *)NULL;
errno == EHOSTDOWN) && inp->inp_route.ro_nh) {
NH_FREE(inp->inp_route.ro_nh);
inp->inp_route.ro_nh = (struct nhop_object *)NULL;
}
inp->inp_socket->so_error = errno;

View File

@ -375,8 +375,9 @@ extern const struct in6_addr in6addr_linklocal_allv2routers;
* IP6 route structure
*/
#if __BSD_VISIBLE
struct nhop_object;
struct route_in6 {
struct rtentry *ro_rt;
struct nhop_object *ro_nh;
struct llentry *ro_lle;
/*
* ro_prepend and ro_plen are only used for bpf to pass in a

View File

@ -97,6 +97,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_llatbl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
@ -109,6 +110,7 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
#include <netinet/in_pcb.h>
#include <netinet6/in6_pcb.h>
#include <netinet6/in6_fib.h>
#include <netinet6/scope6_var.h>
static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *,

View File

@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/if_llatbl.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
@ -134,7 +135,7 @@ VNET_DEFINE(int, ip6_prefer_tempaddr) = 0;
static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **, int, u_int);
struct nhop_object **, int, u_int, uint32_t);
static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct ifnet **,
struct ifnet *, u_int);
@ -625,11 +626,12 @@ in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst,
static int
selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
struct ip6_moptions *mopts, struct route_in6 *ro,
struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum)
struct ifnet **retifp, struct nhop_object **retnh, int norouteok,
u_int fibnum, uint32_t flowid)
{
int error = 0;
struct ifnet *ifp = NULL;
struct rtentry *rt = NULL;
struct nhop_object *nh = NULL;
struct sockaddr_in6 *sin6_next;
struct in6_pktinfo *pi = NULL;
struct in6_addr *dst = &dstsock->sin6_addr;
@ -654,7 +656,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
/* XXX boundary check is assumed to be already done. */
ifp = ifnet_byindex(pi->ipi6_ifindex);
if (ifp != NULL &&
(norouteok || retrt == NULL ||
(norouteok || retnh == NULL ||
IN6_IS_ADDR_MULTICAST(dst))) {
/*
* we do not have to check or get the route for
@ -707,26 +709,31 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
}
ron = &opts->ip6po_nextroute;
/* Use a cached route if it exists and is valid. */
if (ron->ro_rt != NULL && (
(ron->ro_rt->rt_flags & RTF_UP) == 0 ||
if (ron->ro_nh != NULL && (
!NH_IS_VALID(ron->ro_nh) ||
ron->ro_dst.sin6_family != AF_INET6 ||
!IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr,
&sin6_next->sin6_addr)))
RO_RTFREE(ron);
if (ron->ro_rt == NULL) {
RO_NHFREE(ron);
if (ron->ro_nh == NULL) {
ron->ro_dst = *sin6_next;
in6_rtalloc(ron, fibnum); /* multi path case? */
/*
* sin6_next is not link-local OR scopeid is 0,
* no need to clear scope
*/
ron->ro_nh = fib6_lookup(fibnum,
&sin6_next->sin6_addr, 0, NHR_REF, flowid);
}
/*
* The node identified by that address must be a
* neighbor of the sending host.
*/
if (ron->ro_rt == NULL ||
(ron->ro_rt->rt_flags & RTF_GATEWAY) != 0)
if (ron->ro_nh == NULL ||
(ron->ro_nh->nh_flags & NHF_GATEWAY) != 0)
error = EHOSTUNREACH;
else {
rt = ron->ro_rt;
ifp = rt->rt_ifp;
nh = ron->ro_nh;
ifp = nh->nh_ifp;
}
goto done;
}
@ -737,15 +744,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* cached destination, in case of sharing the cache with IPv4.
*/
if (ro) {
if (ro->ro_rt &&
(!(ro->ro_rt->rt_flags & RTF_UP) ||
if (ro->ro_nh &&
(!NH_IS_VALID(ro->ro_nh) ||
((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 ||
!IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr,
dst))) {
RTFREE(ro->ro_rt);
ro->ro_rt = (struct rtentry *)NULL;
RO_NHFREE(ro);
}
if (ro->ro_rt == (struct rtentry *)NULL) {
if (ro->ro_nh == (struct nhop_object *)NULL) {
struct sockaddr_in6 *sa6;
/* No route yet, so try to acquire one */
@ -754,15 +760,28 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
*sa6 = *dstsock;
sa6->sin6_scope_id = 0;
/*
* Currently dst has scopeid embedded iff it is LL.
* New routing API accepts scopeid as a separate argument.
* Convert dst before/after doing lookup
*/
uint32_t scopeid = 0;
if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr)) {
/* Unwrap in6_getscope() and in6_clearscope() */
scopeid = ntohs(sa6->sin6_addr.s6_addr16[1]);
sa6->sin6_addr.s6_addr16[1] = 0;
}
#ifdef RADIX_MPATH
rtalloc_mpath_fib((struct route *)ro,
ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum);
#else
ro->ro_rt = in6_rtalloc1((struct sockaddr *)
&ro->ro_dst, 0, 0UL, fibnum);
if (ro->ro_rt)
RT_UNLOCK(ro->ro_rt);
ro->ro_nh = fib6_lookup(fibnum,
&sa6->sin6_addr, scopeid, NHR_REF, flowid);
#endif
if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr))
sa6->sin6_addr.s6_addr16[1] = htons(scopeid);
}
/*
@ -772,17 +791,11 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
if (opts && opts->ip6po_nexthop)
goto done;
if (ro->ro_rt) {
ifp = ro->ro_rt->rt_ifp;
if (ifp == NULL) { /* can this really happen? */
RTFREE(ro->ro_rt);
ro->ro_rt = NULL;
}
}
if (ro->ro_rt == NULL)
if (ro->ro_nh)
ifp = ro->ro_nh->nh_ifp;
else
error = EHOSTUNREACH;
rt = ro->ro_rt;
nh = ro->ro_nh;
/*
* Check if the outgoing interface conflicts with
@ -803,7 +816,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
}
done:
if (ifp == NULL && rt == NULL) {
if (ifp == NULL && nh == NULL) {
/*
* This can happen if the caller did not pass a cached route
* nor any other hints. We treat this case an error.
@ -814,26 +827,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
IP6STAT_INC(ip6s_noroute);
if (retifp != NULL) {
*retifp = ifp;
/*
* Adjust the "outgoing" interface. If we're going to loop
* the packet back to ourselves, the ifp would be the loopback
* interface. However, we'd rather know the interface associated
* to the destination address (which should probably be one of
* our own addresses.)
*/
if (rt) {
if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) &&
(rt->rt_gateway->sa_family == AF_LINK))
*retifp =
ifnet_byindex(((struct sockaddr_dl *)
rt->rt_gateway)->sdl_index);
}
if (nh != NULL)
*retifp = nh->nh_aifp;
else
*retifp = ifp;
}
if (retrt != NULL)
*retrt = rt; /* rt may be NULL */
if (retnh != NULL)
*retnh = nh; /* nh may be NULL */
return (error);
}
@ -845,20 +846,20 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
{
int error;
struct route_in6 sro;
struct rtentry *rt = NULL;
int rt_flags;
struct nhop_object *nh = NULL;
uint16_t nh_flags;
KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__));
bzero(&sro, sizeof(sro));
rt_flags = 0;
nh_flags = 0;
error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum);
error = selectroute(dstsock, opts, mopts, &sro, retifp, &nh, 1, fibnum, 0);
if (rt)
rt_flags = rt->rt_flags;
if (rt && rt == sro.ro_rt)
RTFREE(rt);
if (nh != NULL)
nh_flags = nh->nh_flags;
if (nh != NULL && nh == sro.ro_nh)
NH_FREE(nh);
if (error != 0) {
/* Help ND. See oifp comment in in6_selectsrc(). */
@ -887,8 +888,8 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
* We thus reject the case here.
*/
if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) {
error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH);
if (nh_flags & (NHF_REJECT | NHF_BLACKHOLE)) {
error = (nh_flags & NHF_HOST ? EHOSTUNREACH : ENETUNREACH);
return (error);
}
@ -899,11 +900,11 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
int
in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts,
struct ip6_moptions *mopts, struct route_in6 *ro,
struct ifnet **retifp, struct rtentry **retrt, u_int fibnum)
struct ifnet **retifp, struct nhop_object **retnh, u_int fibnum, uint32_t flowid)
{
return (selectroute(dstsock, opts, mopts, ro, retifp,
retrt, 0, fibnum));
retnh, 0, fibnum, flowid));
}
/*

View File

@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$");
#include <net/if_llatbl.h>
#include <net/netisr.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <net/pfil.h>
#include <net/rss_config.h>
#include <net/vnet.h>
@ -403,9 +404,9 @@ ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp,
* This function may modify ver and hlim only.
* The mbuf chain containing the packet will be freed.
* The mbuf opt, if present, will not be freed.
* If route_in6 ro is present and has ro_rt initialized, route lookup would be
* skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL,
* then result of route lookup is stored in ro->ro_rt.
* If route_in6 ro is present and has ro_nh initialized, route lookup would be
* skipped and ro->ro_nh would be used. If ro is present but ro->ro_nh is NULL,
* then result of route lookup is stored in ro->ro_nh.
*
* Type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and nd_ifinfo.linkmtu
* is uint32_t. So we use u_long to hold largest one, which is rt_mtu.
@ -425,7 +426,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
struct mbuf *m = m0;
struct mbuf *mprev;
struct route_in6 *ro_pmtu;
struct rtentry *rt;
struct nhop_object *nh;
struct sockaddr_in6 *dst, sin6, src_sa, dst_sa;
struct in6_addr odst;
u_char *nexthdrp;
@ -666,7 +667,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
ip6->ip6_hlim = V_ip6_defmcasthlim;
}
if (ro == NULL || ro->ro_rt == NULL) {
if (ro == NULL || ro->ro_nh == NULL) {
bzero(dst, sizeof(*dst));
dst->sin6_family = AF_INET6;
dst->sin6_len = sizeof(*dst);
@ -676,29 +677,26 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
* Validate route against routing table changes.
* Make sure that the address family is set in route.
*/
rt = NULL;
nh = NULL;
ifp = NULL;
mtu = 0;
if (ro != NULL) {
if (ro->ro_rt != NULL && inp != NULL) {
if (ro->ro_nh != NULL && inp != NULL) {
ro->ro_dst.sin6_family = AF_INET6; /* XXX KASSERT? */
RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie,
NH_VALIDATE((struct route *)ro, &inp->inp_rt_cookie,
fibnum);
}
if (ro->ro_rt != NULL && fwd_tag == NULL &&
((ro->ro_rt->rt_flags & RTF_UP) == 0 ||
ro->ro_rt->rt_ifp == NULL ||
!RT_LINK_IS_UP(ro->ro_rt->rt_ifp) ||
if (ro->ro_nh != NULL && fwd_tag == NULL &&
(!NH_IS_VALID(ro->ro_nh) ||
ro->ro_dst.sin6_family != AF_INET6 ||
!IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)))
RO_INVALIDATE_CACHE(ro);
if (ro->ro_rt != NULL && fwd_tag == NULL &&
(ro->ro_rt->rt_flags & RTF_UP) &&
if (ro->ro_nh != NULL && fwd_tag == NULL &&
ro->ro_dst.sin6_family == AF_INET6 &&
IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) {
rt = ro->ro_rt;
ifp = ro->ro_rt->rt_ifp;
nh = ro->ro_nh;
ifp = nh->nh_ifp;
} else {
if (ro->ro_lle)
LLE_FREE(ro->ro_lle); /* zeros ro_lle */
@ -710,7 +708,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
dst_sa.sin6_addr = ip6->ip6_dst;
}
error = in6_selectroute(&dst_sa, opt, im6o, ro, &ifp,
&rt, fibnum);
&nh, fibnum, m->m_pkthdr.flowid);
if (error != 0) {
IP6STAT_INC(ip6s_noroute);
if (ifp != NULL)
@ -720,17 +718,17 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
if (ifp != NULL)
mtu = ifp->if_mtu;
}
if (rt == NULL) {
if (nh == NULL) {
/*
* If in6_selectroute() does not return a route entry
* If in6_selectroute() does not return a nexthop
* dst may not have been updated.
*/
*dst = dst_sa; /* XXX */
} else {
if (rt->rt_flags & RTF_HOST)
mtu = rt->rt_mtu;
ia = (struct in6_ifaddr *)(rt->rt_ifa);
counter_u64_add(rt->rt_pksent, 1);
if (nh->nh_flags & NHF_HOST)
mtu = nh->nh_mtu;
ia = (struct in6_ifaddr *)(nh->nh_ifa);
counter_u64_add(nh->nh_pksent, 1);
}
} else {
struct nhop6_extended nh6;
@ -781,7 +779,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
;
}
/* Then rt (for unicast) and ifp must be non-NULL valid values. */
/* Then nh (for unicast) and ifp must be non-NULL valid values. */
if ((flags & IPV6_FORWARDING) == 0) {
/* XXX: the FORWARDING flag can be set for mrouting. */
in6_ifstat_inc(ifp, ifs6_out_request);
@ -852,8 +850,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
}
/* All scope ID checks are successful. */
if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (opt && opt->ip6po_nextroute.ro_rt) {
if (nh && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
if (opt && opt->ip6po_nextroute.ro_nh) {
/*
* The nexthop is explicitly specified by the
* application. We assume the next hop is an IPv6
@ -861,8 +859,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt,
*/
dst = (struct sockaddr_in6 *)opt->ip6po_nexthop;
}
else if ((rt->rt_flags & RTF_GATEWAY))
dst = (struct sockaddr_in6 *)rt->rt_gateway;
else if ((nh->nh_flags & NHF_GATEWAY))
dst = &nh->gw6_sa;
}
if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) {
@ -1517,8 +1515,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup,
mtu = ro_pmtu->ro_mtu;
}
if (ro_pmtu != NULL && ro_pmtu->ro_rt != NULL)
mtu = ro_pmtu->ro_rt->rt_mtu;
if (ro_pmtu != NULL && ro_pmtu->ro_nh != NULL)
mtu = ro_pmtu->ro_nh->nh_mtu;
return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto));
}
@ -2651,9 +2649,9 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
if (optname == -1 || optname == IPV6_TCLASS)
pktopt->ip6po_tclass = -1;
if (optname == -1 || optname == IPV6_NEXTHOP) {
if (pktopt->ip6po_nextroute.ro_rt) {
RTFREE(pktopt->ip6po_nextroute.ro_rt);
pktopt->ip6po_nextroute.ro_rt = NULL;
if (pktopt->ip6po_nextroute.ro_nh) {
NH_FREE(pktopt->ip6po_nextroute.ro_nh);
pktopt->ip6po_nextroute.ro_nh = NULL;
}
if (pktopt->ip6po_nexthop)
free(pktopt->ip6po_nexthop, M_IP6OPT);
@ -2673,9 +2671,9 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname)
if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr)
free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT);
pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL;
if (pktopt->ip6po_route.ro_rt) {
RTFREE(pktopt->ip6po_route.ro_rt);
pktopt->ip6po_route.ro_rt = NULL;
if (pktopt->ip6po_route.ro_nh) {
NH_FREE(pktopt->ip6po_route.ro_nh);
pktopt->ip6po_route.ro_nh = NULL;
}
}
if (optname == -1 || optname == IPV6_DSTOPTS) {

View File

@ -416,7 +416,7 @@ int in6_selectsrc_addr(uint32_t, const struct in6_addr *,
uint32_t, struct ifnet *, struct in6_addr *, int *);
int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *,
struct ip6_moptions *, struct route_in6 *, struct ifnet **,
struct rtentry **, u_int);
struct nhop_object **, u_int, uint32_t);
u_int32_t ip6_randomid(void);
u_int32_t ip6_randomflowlabel(void);
void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);