diff --git a/sys/kern/uipc_ktls.c b/sys/kern/uipc_ktls.c index 1d96c56265f3..85508cc72cc2 100644 --- a/sys/kern/uipc_ktls.c +++ b/sys/kern/uipc_ktls.c @@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$"); #include #ifdef RSS #include +#include #include #endif #if defined(INET) || defined(INET6) @@ -754,7 +755,7 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, { union if_snd_tag_alloc_params params; struct ifnet *ifp; - struct rtentry *rt; + struct nhop_object *nh; struct tcpcb *tp; int error; @@ -792,12 +793,12 @@ ktls_alloc_snd_tag(struct inpcb *inp, struct ktls_session *tls, bool force, * enabled after a connection has completed key negotiation in * userland, the cached route will be present in practice. */ - rt = inp->inp_route.ro_rt; - if (rt == NULL || rt->rt_ifp == NULL) { + nh = inp->inp_route.ro_nh; + if (nh == NULL) { INP_RUNLOCK(inp); return (ENXIO); } - ifp = rt->rt_ifp; + ifp = nh->nh_ifp; if_ref(ifp); params.hdr.type = IF_SND_TAG_TYPE_TLS; diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index b6ad19a4bf4d..6ec485ea8ef8 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -55,6 +55,8 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include +#include #include #include #include @@ -257,42 +259,41 @@ rt_mpath_select(struct rtentry *rte, uint32_t hash) void rtalloc_mpath_fib(struct route *ro, uint32_t hash, u_int fibnum) { - struct rtentry *rt; + struct rtentry *rt, *rt_tmp; /* * XXX we don't attempt to lookup cached route again; what should * be done for sendto(3) case? */ - if (ro->ro_rt && ro->ro_rt->rt_ifp && (ro->ro_rt->rt_flags & RTF_UP) - && RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) + if (ro->ro_nh && RT_LINK_IS_UP(ro->ro_nh->nh_ifp)) return; - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); + ro->ro_nh = NULL; + rt_tmp = rtalloc1_fib(&ro->ro_dst, 1, 0, fibnum); /* if the route does not exist or it is not multipath, don't care */ - if (ro->ro_rt == NULL) + if (rt_tmp == NULL) return; - if (rn_mpath_next((struct radix_node *)ro->ro_rt) == NULL) { - RT_UNLOCK(ro->ro_rt); + if (rn_mpath_next((struct radix_node *)rt_tmp) == NULL) { + ro->ro_nh = rt_tmp->rt_nhop; + nhop_ref_object(ro->ro_nh); + RT_UNLOCK(rt_tmp); return; } - rt = rt_mpath_selectrte(ro->ro_rt, hash); + rt = rt_mpath_selectrte(rt_tmp, hash); /* XXX try filling rt_gwroute and avoid unreachable gw */ /* gw selection has failed - there must be only zero weight routes */ if (!rt) { - RT_UNLOCK(ro->ro_rt); - ro->ro_rt = NULL; + RT_UNLOCK(rt_tmp); return; } - if (ro->ro_rt != rt) { - RTFREE_LOCKED(ro->ro_rt); - ro->ro_rt = rt; - RT_LOCK(ro->ro_rt); - RT_ADDREF(ro->ro_rt); - - } - RT_UNLOCK(ro->ro_rt); + if (rt_tmp != rt) { + RTFREE_LOCKED(rt_tmp); + ro->ro_nh = rt->rt_nhop; + nhop_ref_object(ro->ro_nh); + } else + RT_UNLOCK(rt_tmp); } void diff --git a/sys/net/route.c b/sys/net/route.c index b68936d2c74f..88af8d9698fa 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -441,15 +441,18 @@ rtalloc_ign_fib(struct route *ro, u_long ignore, u_int fibnum) { struct rtentry *rt; - if ((rt = ro->ro_rt) != NULL) { - if (rt->rt_ifp != NULL && rt->rt_flags & RTF_UP) + if (ro->ro_nh != NULL) { + if (NH_IS_VALID(ro->ro_nh)) return; - RTFREE(rt); - ro->ro_rt = NULL; + NH_FREE(ro->ro_nh); + ro->ro_nh = NULL; + } + rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); + if (rt != NULL) { + ro->ro_nh = rt->rt_nhop; + nhop_ref_object(rt->rt_nhop); + RT_UNLOCK(rt); } - ro->ro_rt = rtalloc1_fib(&ro->ro_dst, 1, ignore, fibnum); - if (ro->ro_rt) - RT_UNLOCK(ro->ro_rt); } /* diff --git a/sys/net/route.h b/sys/net/route.h index 6eb787b5157b..d7a2fe91ee35 100644 --- a/sys/net/route.h +++ b/sys/net/route.h @@ -51,7 +51,7 @@ * with its length. */ struct route { - struct rtentry *ro_rt; + struct nhop_object *ro_nh; struct llentry *ro_lle; /* * ro_prepend and ro_plen are only used for bpf to pass in a @@ -227,21 +227,6 @@ struct rtentry { /* Control plane route request flags */ #define NHR_COPY 0x100 /* Copy rte data */ -#ifdef _KERNEL -/* rte<>ro_flags translation */ -static inline void -rt_update_ro_flags(struct route *ro) -{ - int rt_flags = ro->ro_rt->rt_flags; - - ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); - - ro->ro_flags |= (rt_flags & RTF_REJECT) ? RT_REJECT : 0; - ro->ro_flags |= (rt_flags & RTF_BLACKHOLE) ? RT_BLACKHOLE : 0; - ro->ro_flags |= (rt_flags & RTF_GATEWAY) ? RT_HAS_GW : 0; -} -#endif - /* * Routing statistics. */ @@ -431,12 +416,22 @@ struct rt_addrinfo { RTFREE((_ro)->ro_rt); \ } while (0) +#define RO_NHFREE(_ro) do { \ + if ((_ro)->ro_nh) { \ + NH_FREE((_ro)->ro_nh); \ + (_ro)->ro_nh = NULL; \ + } \ +} while (0) + #define RO_INVALIDATE_CACHE(ro) do { \ - RO_RTFREE(ro); \ if ((ro)->ro_lle != NULL) { \ LLE_FREE((ro)->ro_lle); \ (ro)->ro_lle = NULL; \ } \ + if ((ro)->ro_nh != NULL) { \ + NH_FREE((ro)->ro_nh); \ + (ro)->ro_nh = NULL; \ + } \ } while (0) /* @@ -444,7 +439,7 @@ struct rt_addrinfo { * out-of-date cache, simply free it. Update the generation number * for the new allocation */ -#define RT_VALIDATE(ro, cookiep, fibnum) do { \ +#define NH_VALIDATE(ro, cookiep, fibnum) do { \ rt_gen_t cookie = RT_GEN(fibnum, (ro)->ro_dst.sa_family); \ if (*(cookiep) != cookie) { \ RO_INVALIDATE_CACHE(ro); \ diff --git a/sys/net/route_var.h b/sys/net/route_var.h index 92b5e433a972..f675609fb2fa 100644 --- a/sys/net/route_var.h +++ b/sys/net/route_var.h @@ -80,7 +80,7 @@ _Static_assert(__offsetof(_s1, _f1) == __offsetof(_s2, _f2), \ CHK_STRUCT_FIELD_GENERIC(struct route, _field, _route_new, _field) #define CHK_STRUCT_ROUTE_FIELDS(_route_new) \ - _CHK_ROUTE_FIELD(_route_new, ro_rt) \ + _CHK_ROUTE_FIELD(_route_new, ro_nh) \ _CHK_ROUTE_FIELD(_route_new, ro_lle) \ _CHK_ROUTE_FIELD(_route_new, ro_prepend)\ _CHK_ROUTE_FIELD(_route_new, ro_plen) \ diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c index 4456856c426e..fabf5a5f77ac 100644 --- a/sys/netinet/in_fib.c +++ b/sys/netinet/in_fib.c @@ -62,6 +62,10 @@ __FBSDID("$FreeBSD$"); #include #ifdef INET + +/* Verify struct route compatiblity */ +/* Assert 'struct route_in' is compatible with 'struct route' */ +CHK_STRUCT_ROUTE_COMPAT(struct route_in, ro_dst4); static void fib4_rte_to_nh_basic(struct nhop_object *nh, struct in_addr dst, uint32_t flags, struct nhop4_basic *pnh4); static void fib4_rte_to_nh_extended(struct nhop_object *nh, struct in_addr dst, diff --git a/sys/netinet/in_fib.h b/sys/netinet/in_fib.h index ff78967061e4..1a3cdd73a5d7 100644 --- a/sys/netinet/in_fib.h +++ b/sys/netinet/in_fib.h @@ -32,6 +32,19 @@ #ifndef _NETINET_IN_FIB_H_ #define _NETINET_IN_FIB_H_ +struct route_in { + /* common fields shared among all 'struct route' */ + struct nhop_object *ro_nh; + struct llentry *ro_lle; + char *ro_prepend; + uint16_t ro_plen; + uint16_t ro_flags; + uint16_t ro_mtu; /* saved ro_rt mtu */ + uint16_t spare; + /* custom sockaddr */ + struct sockaddr_in ro_dst4; +}; + /* Basic nexthop info used for uRPF/mtu checks */ struct nhop4_basic { struct ifnet *nh_ifp; /* Logical egress interface */ diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index 9750bd69011d..fa9a4929e98d 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -72,6 +73,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -980,10 +982,11 @@ ip_forward(struct mbuf *m, int srcrt) ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), M_GETFIB(m)); #else - in_rtalloc_ign(&ro, 0, M_GETFIB(m)); + ro.ro_nh = fib4_lookup(M_GETFIB(m), ip->ip_dst, 0, NHR_REF, + m->m_pkthdr.flowid); #endif - if (ro.ro_rt != NULL) { - ia = ifatoia(ro.ro_rt->rt_ifa); + if (ro.ro_nh != NULL) { + ia = ifatoia(ro.ro_nh->nh_ifa); } else ia = NULL; /* @@ -1045,19 +1048,18 @@ ip_forward(struct mbuf *m, int srcrt) dest.s_addr = 0; if (!srcrt && V_ipsendredirects && ia != NULL && ia->ia_ifp == m->m_pkthdr.rcvif) { - struct rtentry *rt; + struct nhop_object *nh; - rt = ro.ro_rt; + nh = ro.ro_nh; - if (rt && (rt->rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) == 0 && - satosin(rt_key(rt))->sin_addr.s_addr != 0) { -#define RTA(rt) ((struct in_ifaddr *)(rt->rt_ifa)) + if (nh != NULL && ((nh->nh_flags & (NHF_REDIRECT|NHF_DEFAULT)) == 0)) { + struct in_ifaddr *nh_ia = (struct in_ifaddr *)(nh->nh_ifa); u_long src = ntohl(ip->ip_src.s_addr); - if (RTA(rt) && - (src & RTA(rt)->ia_subnetmask) == RTA(rt)->ia_subnet) { - if (rt->rt_flags & RTF_GATEWAY) - dest.s_addr = satosin(rt->rt_gateway)->sin_addr.s_addr; + if (nh_ia != NULL && + (src & nh_ia->ia_subnetmask) == nh_ia->ia_subnet) { + if (nh->nh_flags & NHF_GATEWAY) + dest.s_addr = nh->gw4_sa.sin_addr.s_addr; else dest.s_addr = ip->ip_dst.s_addr; /* Router requirements says to only send host redirects */ @@ -1069,9 +1071,9 @@ ip_forward(struct mbuf *m, int srcrt) error = ip_output(m, NULL, &ro, IP_FORWARDING, NULL, NULL); - if (error == EMSGSIZE && ro.ro_rt) - mtu = ro.ro_rt->rt_mtu; - RO_RTFREE(&ro); + if (error == EMSGSIZE && ro.ro_nh) + mtu = ro.ro_nh->nh_mtu; + RO_NHFREE(&ro); if (error) IPSTAT_INC(ips_cantforward); diff --git a/sys/netinet/ip_output.c b/sys/netinet/ip_output.c index f6323c9ce683..ef858a54fab9 100644 --- a/sys/netinet/ip_output.c +++ b/sys/netinet/ip_output.c @@ -67,6 +67,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #ifdef RADIX_MPATH #include #endif @@ -78,6 +79,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -287,6 +289,19 @@ ip_output_send(struct inpcb *inp, struct ifnet *ifp, struct mbuf *m, return (error); } +/* rte<>ro_flags translation */ +static inline void +rt_update_ro_flags(struct route *ro) +{ + int nh_flags = ro->ro_nh->nh_flags; + + ro->ro_flags &= ~ (RT_REJECT|RT_BLACKHOLE|RT_HAS_GW); + + ro->ro_flags |= (nh_flags & NHF_REJECT) ? RT_REJECT : 0; + ro->ro_flags |= (nh_flags & NHF_BLACKHOLE) ? RT_BLACKHOLE : 0; + ro->ro_flags |= (nh_flags & NHF_GATEWAY) ? RT_HAS_GW : 0; +} + /* * IP output. The packet in mbuf chain m contains a skeletal IP * header (with len, off, ttl, proto, tos, src, dst). @@ -368,7 +383,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, dst = (struct sockaddr_in *)&ro->ro_dst; else dst = &sin; - if (ro == NULL || ro->ro_rt == NULL) { + if (ro == NULL || ro->ro_nh == NULL) { bzero(dst, sizeof(*dst)); dst->sin_family = AF_INET; dst->sin_len = sizeof(*dst); @@ -380,8 +395,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, * Validate route against routing table additions; * a better/more specific route might have been added. */ - if (inp != NULL && ro != NULL && ro->ro_rt != NULL) - RT_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); + if (inp != NULL && ro != NULL && ro->ro_nh != NULL) + NH_VALIDATE(ro, &inp->inp_rt_cookie, fibnum); /* * If there is a cached route, * check that it is to the same destination @@ -390,9 +405,8 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, * cache with IPv6. * Also check whether routing cache needs invalidation. */ - if (ro != NULL && ro->ro_rt != NULL && - ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - ro->ro_rt->rt_ifp == NULL || !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) || + if (ro != NULL && ro->ro_nh != NULL && + ((!NH_IS_VALID(ro->ro_nh)) || !RT_LINK_IS_UP(ro->ro_nh->nh_ifp) || dst->sin_family != AF_INET || dst->sin_addr.s_addr != ip->ip_dst.s_addr)) RO_INVALIDATE_CACHE(ro); @@ -450,7 +464,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, else src.s_addr = INADDR_ANY; } else if (ro != NULL) { - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { /* * We want to do any cloning requested by the link * layer, as this is probably required in all cases @@ -461,12 +475,11 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr), fibnum); #else - in_rtalloc_ign(ro, 0, fibnum); + ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0, + NHR_REF, m->m_pkthdr.flowid); #endif - if (ro->ro_rt == NULL || - (ro->ro_rt->rt_flags & RTF_UP) == 0 || - ro->ro_rt->rt_ifp == NULL || - !RT_LINK_IS_UP(ro->ro_rt->rt_ifp)) { + if (ro->ro_nh == NULL || (!NH_IS_VALID(ro->ro_nh)) || + !RT_LINK_IS_UP(ro->ro_nh->nh_ifp)) { #if defined(IPSEC) || defined(IPSEC_SUPPORT) /* * There is no route for this packet, but it is @@ -481,20 +494,20 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, goto bad; } } - ia = ifatoia(ro->ro_rt->rt_ifa); - ifp = ro->ro_rt->rt_ifp; - counter_u64_add(ro->ro_rt->rt_pksent, 1); + ia = ifatoia(ro->ro_nh->nh_ifa); + ifp = ro->ro_nh->nh_ifp; + counter_u64_add(ro->ro_nh->nh_pksent, 1); rt_update_ro_flags(ro); - if (ro->ro_rt->rt_flags & RTF_GATEWAY) - gw = (struct sockaddr_in *)ro->ro_rt->rt_gateway; - if (ro->ro_rt->rt_flags & RTF_HOST) - isbroadcast = (ro->ro_rt->rt_flags & RTF_BROADCAST); + if (ro->ro_nh->nh_flags & NHF_GATEWAY) + gw = &ro->ro_nh->gw4_sa; + if (ro->ro_nh->nh_flags & NHF_HOST) + isbroadcast = (ro->ro_nh->nh_flags & NHF_BROADCAST); else if (ifp->if_flags & IFF_BROADCAST) isbroadcast = in_ifaddr_broadcast(gw->sin_addr, ia); else isbroadcast = 0; - if (ro->ro_rt->rt_flags & RTF_HOST) - mtu = ro->ro_rt->rt_mtu; + if (ro->ro_nh->nh_flags & NHF_HOST) + mtu = ro->ro_nh->nh_mtu; else mtu = ifp->if_mtu; src = IA_SIN(ia)->sin_addr; @@ -537,9 +550,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, } /* Catch a possible divide by zero later. */ - KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (rt_flags=0x%08x) ifp=%p", + KASSERT(mtu > 0, ("%s: mtu %d <= 0, ro=%p (nh_flags=0x%08x) ifp=%p", __func__, mtu, ro, - (ro != NULL && ro->ro_rt != NULL) ? ro->ro_rt->rt_flags : 0, ifp)); + (ro != NULL && ro->ro_nh != NULL) ? ro->ro_nh->nh_flags : 0, ifp)); if (IN_MULTICAST(ntohl(ip->ip_dst.s_addr))) { m->m_flags |= M_MCAST; @@ -702,7 +715,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags, case -1: /* Need to try again */ /* Reset everything for a new round */ if (ro != NULL) { - RO_RTFREE(ro); + RO_NHFREE(ro); ro->ro_prepend = NULL; } gw = dst; diff --git a/sys/netinet/sctp_asconf.c b/sys/netinet/sctp_asconf.c index 99506668ed7d..4617d57579ce 100644 --- a/sys/netinet/sctp_asconf.c +++ b/sys/netinet/sctp_asconf.c @@ -980,8 +980,7 @@ sctp_asconf_nets_cleanup(struct sctp_tcb *stcb, struct sctp_ifn *ifn) ((ifn == NULL) || (SCTP_GET_IF_INDEX_FROM_ROUTE(&net->ro) != ifn->ifn_index))) { /* clear any cached route */ - RTFREE(net->ro.ro_rt); - net->ro.ro_rt = NULL; + RO_NHFREE(&net->ro); } /* clear any cached source address */ if (net->src_addr_selected) { @@ -1090,10 +1089,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa) if (addrnum == 1) { TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* clear any cached route and source address */ - if (net->ro.ro_rt) { - RTFREE(net->ro.ro_rt); - net->ro.ro_rt = NULL; - } + RO_NHFREE(&net->ro); if (net->src_addr_selected) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; @@ -1112,10 +1108,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa) /* Multiple local addresses exsist in the association. */ TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { /* clear any cached route and source address */ - if (net->ro.ro_rt) { - RTFREE(net->ro.ro_rt); - net->ro.ro_rt = NULL; - } + RO_NHFREE(&net->ro); if (net->src_addr_selected) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; @@ -1131,7 +1124,7 @@ sctp_path_check_and_react(struct sctp_tcb *stcb, struct sctp_ifa *newifa) SCTP_RTALLOC((sctp_route_t *)&net->ro, stcb->sctp_ep->def_vrf_id, stcb->sctp_ep->fibnum); - if (net->ro.ro_rt == NULL) + if (net->ro.ro_nh == NULL) continue; changed = 0; @@ -2214,18 +2207,13 @@ sctp_asconf_iterator_stcb(struct sctp_inpcb *inp, struct sctp_tcb *stcb, struct sctp_nets *net; TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { - sctp_rtentry_t *rt; /* delete this address if cached */ if (net->ro._s_addr == ifa) { sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; - rt = net->ro.ro_rt; - if (rt) { - RTFREE(rt); - net->ro.ro_rt = NULL; - } + RO_NHFREE(&net->ro); /* * Now we deleted our src address, * should we not also now reset the diff --git a/sys/netinet/sctp_os_bsd.h b/sys/netinet/sctp_os_bsd.h index 4a7097127ec8..d01d9c5c5ab5 100644 --- a/sys/netinet/sctp_os_bsd.h +++ b/sys/netinet/sctp_os_bsd.h @@ -71,11 +71,13 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include #include +#include #include #include #include @@ -85,6 +87,7 @@ __FBSDID("$FreeBSD$"); #ifdef INET6 #include #include +#include #include #include #include @@ -199,15 +202,15 @@ MALLOC_DECLARE(SCTP_M_MCORE); #define SCTP_INIT_VRF_TABLEID(vrf) #define SCTP_IFN_IS_IFT_LOOP(ifn) ((ifn)->ifn_type == IFT_LOOP) -#define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifa && (ro)->ro_rt->rt_ifa->ifa_ifp && (ro)->ro_rt->rt_ifa->ifa_ifp->if_type == IFT_LOOP) +#define SCTP_ROUTE_IS_REAL_LOOP(ro) ((ro)->ro_nh && (ro)->ro_nh->nh_ifa && (ro)->ro_nh->nh_ifa->ifa_ifp && (ro)->ro_nh->nh_ifa->ifa_ifp->if_type == IFT_LOOP) /* * Access to IFN's to help with src-addr-selection */ /* This could return VOID if the index works but for BSD we provide both. */ -#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_rt->rt_ifp -#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_rt->rt_ifp->if_index -#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_rt && (ro)->ro_rt->rt_ifp) +#define SCTP_GET_IFN_VOID_FROM_ROUTE(ro) (void *)ro->ro_nh->nh_ifp +#define SCTP_GET_IF_INDEX_FROM_ROUTE(ro) (ro)->ro_nh->nh_ifp->if_index +#define SCTP_ROUTE_HAS_VALID_IFN(ro) ((ro)->ro_nh && (ro)->ro_nh->nh_ifp) /* * general memory allocation @@ -304,12 +307,10 @@ typedef struct callout sctp_os_timer_t; /* MTU */ /*************************/ #define SCTP_GATHER_MTU_FROM_IFN_INFO(ifn, ifn_index, af) ((struct ifnet *)ifn)->if_mtu -#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, rt) ((uint32_t)((rt != NULL) ? rt->rt_mtu : 0)) +#define SCTP_GATHER_MTU_FROM_ROUTE(sctp_ifa, sa, nh) ((uint32_t)((nh != NULL) ? nh->nh_mtu : 0)) #define SCTP_GATHER_MTU_FROM_INTFC(sctp_ifn) ((sctp_ifn->ifn_p != NULL) ? ((struct ifnet *)(sctp_ifn->ifn_p))->if_mtu : 0) -#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) do { \ - if (rt != NULL) \ - rt->rt_mtu = mtu; \ - } while(0) +/* XXX: Setting MTU from the protocol in this way is simply incorrect */ +#define SCTP_SET_MTU_OF_ROUTE(sa, rt, mtu) /* (de-)register interface event notifications */ #define SCTP_REGISTER_INTERFACE(ifhandle, af) @@ -365,7 +366,7 @@ typedef struct callout sctp_os_timer_t; */ /* get the v6 hop limit */ -#define SCTP_GET_HLIM(inp, ro) in6_selecthlim(&inp->ip_inp.inp, (ro ? (ro->ro_rt ? (ro->ro_rt->rt_ifp) : (NULL)) : (NULL))); +#define SCTP_GET_HLIM(inp, ro) in6_selecthlim(&inp->ip_inp.inp, (ro ? (ro->ro_nh ? (ro->ro_nh->nh_ifp) : (NULL)) : (NULL))); /* is the endpoint v6only? */ #define SCTP_IPV6_V6ONLY(sctp_inpcb) ((sctp_inpcb)->ip_inp.inp.inp_flags & IN6P_IPV6_V6ONLY) @@ -397,10 +398,16 @@ typedef struct callout sctp_os_timer_t; * routes, output, etc. */ typedef struct route sctp_route_t; -typedef struct rtentry sctp_rtentry_t; #define SCTP_RTALLOC(ro, vrf_id, fibnum) \ - rtalloc_ign_fib((struct route *)ro, 0UL, fibnum) +{ \ + if ((ro)->ro_nh == NULL) { \ + if ((ro)->ro_dst.sa_family == AF_INET) \ + (ro)->ro_nh = fib4_lookup(fibnum, ((struct sockaddr_in *)&(ro)->ro_dst)->sin_addr, NHR_REF, 0, 0); \ + if ((ro)->ro_dst.sa_family == AF_INET6) \ + (ro)->ro_nh = fib6_lookup(fibnum, &((struct sockaddr_in6 *)&(ro)->ro_dst)->sin6_addr, NHR_REF, 0, 0); \ + } \ +} /* * SCTP protocol specific mbuf flags. diff --git a/sys/netinet/sctp_output.c b/sys/netinet/sctp_output.c index 59f53ea722fb..40dc2efe6295 100644 --- a/sys/netinet/sctp_output.c +++ b/sys/netinet/sctp_output.c @@ -3387,13 +3387,13 @@ sctp_source_address_selection(struct sctp_inpcb *inp, * addresses. If the bound set is NOT assigned to the interface then * we must use rotation amongst the bound addresses.. */ - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { /* * Need a route to cache. */ SCTP_RTALLOC(ro, vrf_id, inp->fibnum); } - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { return (NULL); } fam = ro->ro_dst.sa_family; @@ -4131,10 +4131,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; - if (ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } + RO_NHFREE(ro); } if (net->src_addr_selected == 0) { /* Cache the source address */ @@ -4206,7 +4203,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, * catch that somewhere and abort the association * right away (assuming this is an INIT being sent). */ - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { /* * src addr selection failed to find a route * (or valid source addr), so we can't get @@ -4225,7 +4222,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, SCTPDBG(SCTP_DEBUG_OUTPUT3, "Destination is %x\n", (uint32_t)(ntohl(ip->ip_dst.s_addr))); SCTPDBG(SCTP_DEBUG_OUTPUT3, "RTP route is %p through\n", - (void *)ro->ro_rt); + (void *)ro->ro_nh); if (SCTP_GET_HEADER_FOR_OUTPUT(o_pak)) { /* failed to prepend data, give up */ @@ -4278,13 +4275,13 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, SCTPDBG(SCTP_DEBUG_OUTPUT3, "IP output returns %d\n", ret); if (net == NULL) { /* free tempy routes */ - RO_RTFREE(ro); + RO_NHFREE(ro); } else { - if ((ro->ro_rt != NULL) && (net->ro._s_addr) && + if ((ro->ro_nh != NULL) && (net->ro._s_addr) && ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) { uint32_t mtu; - mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); + mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh); if (mtu > 0) { if (net->port) { mtu -= sizeof(struct udphdr); @@ -4296,7 +4293,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, net->mtu = mtu; } } - } else if (ro->ro_rt == NULL) { + } else if (ro->ro_nh == NULL) { /* route was freed */ if (net->ro._s_addr && net->src_addr_selected) { @@ -4426,10 +4423,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; - if (ro->ro_rt) { - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } + RO_NHFREE(ro); } if (net->src_addr_selected == 0) { sin6 = (struct sockaddr_in6 *)&net->ro._l_addr; @@ -4489,7 +4483,7 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, } lsa6->sin6_port = inp->sctp_lport; - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { /* * src addr selection failed to find a route * (or valid source addr), so we can't get @@ -4625,13 +4619,13 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, } if (net == NULL) { /* Now if we had a temp route free it */ - RO_RTFREE(ro); + RO_NHFREE(ro); } else { /* * PMTU check versus smallest asoc MTU goes * here */ - if (ro->ro_rt == NULL) { + if (ro->ro_nh == NULL) { /* Route was freed */ if (net->ro._s_addr && net->src_addr_selected) { @@ -4640,11 +4634,11 @@ sctp_lowlevel_chunk_output(struct sctp_inpcb *inp, } net->src_addr_selected = 0; } - if ((ro->ro_rt != NULL) && (net->ro._s_addr) && + if ((ro->ro_nh != NULL) && (net->ro._s_addr) && ((net->dest_state & SCTP_ADDR_NO_PMTUD) == 0)) { uint32_t mtu; - mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_rt); + mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, ro->ro_nh); if (mtu > 0) { if (net->port) { mtu -= sizeof(struct udphdr); @@ -13838,7 +13832,7 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro) struct nd_pfxrouter *pfxrtr = NULL; struct sockaddr_in6 gw6; - if (ro == NULL || ro->ro_rt == NULL || src6->sin6_family != AF_INET6) + if (ro == NULL || ro->ro_nh == NULL || src6->sin6_family != AF_INET6) return (0); /* get prefix entry of address */ @@ -13871,8 +13865,8 @@ sctp_v6src_match_nexthop(struct sockaddr_in6 *src6, sctp_route_t *ro) SCTPDBG(SCTP_DEBUG_OUTPUT2, "prefix router is "); SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, (struct sockaddr *)&gw6); SCTPDBG(SCTP_DEBUG_OUTPUT2, "installed router is "); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway); - if (sctp_cmpaddr((struct sockaddr *)&gw6, ro->ro_rt->rt_gateway)) { + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw_sa); + if (sctp_cmpaddr((struct sockaddr *)&gw6, &ro->ro_nh->gw_sa)) { ND6_RUNLOCK(); SCTPDBG(SCTP_DEBUG_OUTPUT2, "pfxrouter is installed\n"); return (1); @@ -13892,7 +13886,7 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro) struct ifaddr *ifa; struct in_addr srcnetaddr, gwnetaddr; - if (ro == NULL || ro->ro_rt == NULL || + if (ro == NULL || ro->ro_nh == NULL || sifa->address.sa.sa_family != AF_INET) { return (0); } @@ -13904,10 +13898,10 @@ sctp_v4src_match_nexthop(struct sctp_ifa *sifa, sctp_route_t *ro) SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &sifa->address.sa); SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", srcnetaddr.s_addr); - sin = (struct sockaddr_in *)ro->ro_rt->rt_gateway; + sin = &ro->ro_nh->gw4_sa; gwnetaddr.s_addr = (sin->sin_addr.s_addr & mask->sin_addr.s_addr); SCTPDBG(SCTP_DEBUG_OUTPUT1, "match_nexthop4: nexthop is "); - SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, ro->ro_rt->rt_gateway); + SCTPDBG_ADDR(SCTP_DEBUG_OUTPUT2, &ro->ro_nh->gw4_sa); SCTPDBG(SCTP_DEBUG_OUTPUT1, "network address is %x\n", gwnetaddr.s_addr); if (srcnetaddr.s_addr == gwnetaddr.s_addr) { return (1); diff --git a/sys/netinet/sctp_pcb.c b/sys/netinet/sctp_pcb.c index 5688a70196c2..039613c15555 100644 --- a/sys/netinet/sctp_pcb.c +++ b/sys/netinet/sctp_pcb.c @@ -3976,17 +3976,9 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, } else { imtu = 0; } - rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_rt); + rmtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._l_addr.sa, net->ro.ro_nh); hcmtu = sctp_hc_get_mtu(&net->ro._l_addr, stcb->sctp_ep->fibnum); net->mtu = sctp_min_mtu(hcmtu, rmtu, imtu); - if (rmtu == 0) { - /* - * Start things off to match mtu of - * interface please. - */ - SCTP_SET_MTU_OF_ROUTE(&net->ro._l_addr.sa, - net->ro.ro_rt, net->mtu); - } } } if (net->mtu == 0) { @@ -4067,19 +4059,19 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, *netp = net; } netfirst = TAILQ_FIRST(&stcb->asoc.nets); - if (net->ro.ro_rt == NULL) { + if (net->ro.ro_nh == NULL) { /* Since we have no route put it at the back */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); } else if (netfirst == NULL) { /* We are the first one in the pool. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); - } else if (netfirst->ro.ro_rt == NULL) { + } else if (netfirst->ro.ro_nh == NULL) { /* * First one has NO route. Place this one ahead of the first * one. */ TAILQ_INSERT_HEAD(&stcb->asoc.nets, net, sctp_next); - } else if (net->ro.ro_rt->rt_ifp != netfirst->ro.ro_rt->rt_ifp) { + } else if (net->ro.ro_nh->nh_ifp != netfirst->ro.ro_nh->nh_ifp) { /* * This one has a different interface than the one at the * top of the list. Place it ahead. @@ -4100,11 +4092,11 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, /* End of the list */ TAILQ_INSERT_TAIL(&stcb->asoc.nets, net, sctp_next); break; - } else if (netlook->ro.ro_rt == NULL) { + } else if (netlook->ro.ro_nh == NULL) { /* next one has NO route */ TAILQ_INSERT_BEFORE(netfirst, net, sctp_next); break; - } else if (netlook->ro.ro_rt->rt_ifp != net->ro.ro_rt->rt_ifp) { + } else if (netlook->ro.ro_nh->nh_ifp != net->ro.ro_nh->nh_ifp) { TAILQ_INSERT_AFTER(&stcb->asoc.nets, netlook, net, sctp_next); break; @@ -4117,8 +4109,8 @@ sctp_add_remote_addr(struct sctp_tcb *stcb, struct sockaddr *newaddr, /* got to have a primary set */ if (stcb->asoc.primary_destination == 0) { stcb->asoc.primary_destination = net; - } else if ((stcb->asoc.primary_destination->ro.ro_rt == NULL) && - (net->ro.ro_rt) && + } else if ((stcb->asoc.primary_destination->ro.ro_nh == NULL) && + (net->ro.ro_nh) && ((net->dest_state & SCTP_ADDR_UNCONFIRMED) == 0)) { /* No route to current primary adopt new primary */ stcb->asoc.primary_destination = net; @@ -5459,14 +5451,9 @@ sctp_del_local_addr_ep(struct sctp_inpcb *inp, struct sctp_ifa *ifa) TAILQ_FOREACH(net, &stcb->asoc.nets, sctp_next) { if (net->ro._s_addr == laddr->ifa) { /* Yep, purge src address selected */ - sctp_rtentry_t *rt; /* delete this address if cached */ - rt = net->ro.ro_rt; - if (rt != NULL) { - RTFREE(rt); - net->ro.ro_rt = NULL; - } + RO_NHFREE(&net->ro); sctp_free_ifa(net->ro._s_addr); net->ro._s_addr = NULL; net->src_addr_selected = 0; diff --git a/sys/netinet/sctp_structs.h b/sys/netinet/sctp_structs.h index 34170eacfc53..126387252304 100644 --- a/sys/netinet/sctp_structs.h +++ b/sys/netinet/sctp_structs.h @@ -189,7 +189,7 @@ struct iterator_control { #define SCTP_ITERATOR_STOP_CUR_INP 0x00000008 struct sctp_net_route { - sctp_rtentry_t *ro_rt; + struct nhop_object *ro_nh; struct llentry *ro_lle; char *ro_prepend; uint16_t ro_plen; diff --git a/sys/netinet/sctp_timer.c b/sys/netinet/sctp_timer.c index 02f3df5efd1b..3c27074b7d5a 100644 --- a/sys/netinet/sctp_timer.c +++ b/sys/netinet/sctp_timer.c @@ -350,7 +350,7 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, return (NULL); } } - if (alt->ro.ro_rt == NULL) { + if (alt->ro.ro_nh == NULL) { if (alt->ro._s_addr) { sctp_free_ifa(alt->ro._s_addr); alt->ro._s_addr = NULL; @@ -358,7 +358,7 @@ sctp_find_alternate_net(struct sctp_tcb *stcb, alt->src_addr_selected = 0; } if (((alt->dest_state & SCTP_ADDR_REACHABLE) == SCTP_ADDR_REACHABLE) && - (alt->ro.ro_rt != NULL) && + (alt->ro.ro_nh != NULL) && (!(alt->dest_state & SCTP_ADDR_UNCONFIRMED))) { /* Found a reachable address */ break; @@ -937,10 +937,7 @@ sctp_t3rxt_timer(struct sctp_inpcb *inp, net->src_addr_selected = 0; /* Force a route allocation too */ - if (net->ro.ro_rt) { - RTFREE(net->ro.ro_rt); - net->ro.ro_rt = NULL; - } + RO_NHFREE(&net->ro); /* Was it our primary? */ if ((stcb->asoc.primary_destination == net) && (alt != net)) { @@ -1502,7 +1499,7 @@ sctp_pathmtu_timer(struct sctp_inpcb *inp, net->src_addr_selected = 1; } if (net->ro._s_addr) { - mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_rt); + mtu = SCTP_GATHER_MTU_FROM_ROUTE(net->ro._s_addr, &net->ro._s_addr.sa, net->ro.ro_nh); #if defined(INET) || defined(INET6) if (net->port) { mtu -= sizeof(struct udphdr); diff --git a/sys/netinet/sctp_var.h b/sys/netinet/sctp_var.h index 986702b53362..d9b20f278577 100644 --- a/sys/netinet/sctp_var.h +++ b/sys/netinet/sctp_var.h @@ -187,10 +187,7 @@ extern struct pr_usrreqs sctp_usrreqs; if ((__net)) { \ if (SCTP_DECREMENT_AND_CHECK_REFCOUNT(&(__net)->ref_count)) { \ (void)SCTP_OS_TIMER_STOP(&(__net)->rxt_timer.timer); \ - if ((__net)->ro.ro_rt) { \ - RTFREE((__net)->ro.ro_rt); \ - (__net)->ro.ro_rt = NULL; \ - } \ + RO_NHFREE(&(__net)->ro); \ if ((__net)->src_addr_selected) { \ sctp_free_ifa((__net)->ro._s_addr); \ (__net)->ro._s_addr = NULL; \ diff --git a/sys/netinet/tcp_output.c b/sys/netinet/tcp_output.c index 923dd7ce9574..5530f88b2fc0 100644 --- a/sys/netinet/tcp_output.c +++ b/sys/netinet/tcp_output.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include @@ -1411,8 +1412,8 @@ tcp_output(struct tcpcb *tp) ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), NULL, NULL, tp->t_inpcb); - if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_rt != NULL) - mtu = tp->t_inpcb->inp_route6.ro_rt->rt_mtu; + if (error == EMSGSIZE && tp->t_inpcb->inp_route6.ro_nh != NULL) + mtu = tp->t_inpcb->inp_route6.ro_nh->nh_mtu; } #endif /* INET6 */ #if defined(INET) && defined(INET6) @@ -1454,8 +1455,8 @@ tcp_output(struct tcpcb *tp) ((so->so_options & SO_DONTROUTE) ? IP_ROUTETOIF : 0), 0, tp->t_inpcb); - if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_rt != NULL) - mtu = tp->t_inpcb->inp_route.ro_rt->rt_mtu; + if (error == EMSGSIZE && tp->t_inpcb->inp_route.ro_nh != NULL) + mtu = tp->t_inpcb->inp_route.ro_nh->nh_mtu; } #endif /* INET */ diff --git a/sys/netinet/tcp_stacks/bbr.c b/sys/netinet/tcp_stacks/bbr.c index 2624ba8ea24f..529bf119293e 100644 --- a/sys/netinet/tcp_stacks/bbr.c +++ b/sys/netinet/tcp_stacks/bbr.c @@ -5601,8 +5601,8 @@ bbr_update_hardware_pacing_rate(struct tcp_bbr *bbr, uint32_t cts) if (bbr->r_ctl.crte == NULL) return; - if ((bbr->rc_inp->inp_route.ro_rt == NULL) || - (bbr->rc_inp->inp_route.ro_rt->rt_ifp == NULL)) { + if ((bbr->rc_inp->inp_route.ro_nh == NULL) || + (bbr->rc_inp->inp_route.ro_nh->nh_ifp == NULL)) { /* Lost our routes? */ /* Clear the way for a re-attempt */ bbr->bbr_attempt_hdwr_pace = 0; @@ -5618,7 +5618,7 @@ bbr_update_hardware_pacing_rate(struct tcp_bbr *bbr, uint32_t cts) rate = bbr_get_hardware_rate(bbr); nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp, - bbr->rc_inp->inp_route.ro_rt->rt_ifp, + bbr->rc_inp->inp_route.ro_nh->nh_ifp, rate, (RS_PACING_GEQ|RS_PACING_SUB_OK), &error); @@ -13975,8 +13975,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) ((rsm || sack_rxmit) ? IP_NO_SND_TAG_RL : 0), NULL, NULL, inp); - if (error == EMSGSIZE && inp->inp_route6.ro_rt != NULL) - mtu = inp->inp_route6.ro_rt->rt_mtu; + if (error == EMSGSIZE && inp->inp_route6.ro_nh != NULL) + mtu = inp->inp_route6.ro_nh->nh_mtu; } #endif /* INET6 */ #if defined(INET) && defined(INET6) @@ -14016,8 +14016,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) error = ip_output(m, inp->inp_options, &inp->inp_route, ((rsm || sack_rxmit) ? IP_NO_SND_TAG_RL : 0), 0, inp); - if (error == EMSGSIZE && inp->inp_route.ro_rt != NULL) - mtu = inp->inp_route.ro_rt->rt_mtu; + if (error == EMSGSIZE && inp->inp_route.ro_nh != NULL) + mtu = inp->inp_route.ro_nh->nh_mtu; } #endif /* INET */ out: @@ -14302,8 +14302,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) (bbr->rc_past_init_win) && (bbr->rc_bbr_state != BBR_STATE_STARTUP) && (get_filter_value(&bbr->r_ctl.rc_delrate)) && - (inp->inp_route.ro_rt && - inp->inp_route.ro_rt->rt_ifp)) { + (inp->inp_route.ro_nh && + inp->inp_route.ro_nh->nh_ifp)) { /* * We are past the initial window and * have at least one measurement so we @@ -14317,7 +14317,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) rate_wanted = bbr_get_hardware_rate(bbr); bbr->bbr_attempt_hdwr_pace = 1; bbr->r_ctl.crte = tcp_set_pacing_rate(bbr->rc_tp, - inp->inp_route.ro_rt->rt_ifp, + inp->inp_route.ro_nh->nh_ifp, rate_wanted, (RS_PACING_GEQ|RS_PACING_SUB_OK), &err); @@ -14344,7 +14344,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) tcp_bbr_tso_size_check(bbr, cts); } else { bbr_type_log_hdwr_pacing(bbr, - inp->inp_route.ro_rt->rt_ifp, + inp->inp_route.ro_nh->nh_ifp, rate_wanted, 0, __LINE__, cts, err); @@ -14361,8 +14361,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeval *tv) if (inp->inp_snd_tag == NULL) { /* A change during ip output disabled hw pacing? */ bbr->bbr_hdrw_pacing = 0; - } else if ((inp->inp_route.ro_rt == NULL) || - (inp->inp_route.ro_rt->rt_ifp != inp->inp_snd_tag->ifp)) { + } else if ((inp->inp_route.ro_nh == NULL) || + (inp->inp_route.ro_nh->nh_ifp != inp->inp_snd_tag->ifp)) { /* * We had an interface or route change, * detach from the current hdwr pacing diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index bfa6f3a41867..60d306c8d23c 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -76,6 +76,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -2199,9 +2200,9 @@ tcp_notify(struct inpcb *inp, int error) if (tp->t_state == TCPS_ESTABLISHED && (error == EHOSTUNREACH || error == ENETUNREACH || error == EHOSTDOWN)) { - if (inp->inp_route.ro_rt) { - RTFREE(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = (struct rtentry *)NULL; + if (inp->inp_route.ro_nh) { + NH_FREE(inp->inp_route.ro_nh); + inp->inp_route.ro_nh = (struct nhop_object *)NULL; } return (inp); } else if (tp->t_state < TCPS_ESTABLISHED && tp->t_rxtshift > 3 && diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index 089a63230a3e..b1fcbda2a566 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -71,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -761,9 +762,9 @@ udp_notify(struct inpcb *inp, int errno) INP_WLOCK_ASSERT(inp); if ((errno == EHOSTUNREACH || errno == ENETUNREACH || - errno == EHOSTDOWN) && inp->inp_route.ro_rt) { - RTFREE(inp->inp_route.ro_rt); - inp->inp_route.ro_rt = (struct rtentry *)NULL; + errno == EHOSTDOWN) && inp->inp_route.ro_nh) { + NH_FREE(inp->inp_route.ro_nh); + inp->inp_route.ro_nh = (struct nhop_object *)NULL; } inp->inp_socket->so_error = errno; diff --git a/sys/netinet6/in6.h b/sys/netinet6/in6.h index 56ab11460af2..1092130919cb 100644 --- a/sys/netinet6/in6.h +++ b/sys/netinet6/in6.h @@ -375,8 +375,9 @@ extern const struct in6_addr in6addr_linklocal_allv2routers; * IP6 route structure */ #if __BSD_VISIBLE +struct nhop_object; struct route_in6 { - struct rtentry *ro_rt; + struct nhop_object *ro_nh; struct llentry *ro_lle; /* * ro_prepend and ro_plen are only used for bpf to pass in a diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index 89913ea9189d..edde97d6f26c 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -97,6 +97,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -109,6 +110,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include static struct inpcb *in6_pcblookup_hash_locked(struct inpcbinfo *, diff --git a/sys/netinet6/in6_src.c b/sys/netinet6/in6_src.c index b682f57a87df..9ecf2474ba4a 100644 --- a/sys/netinet6/in6_src.c +++ b/sys/netinet6/in6_src.c @@ -91,6 +91,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #ifdef RADIX_MPATH #include @@ -134,7 +135,7 @@ VNET_DEFINE(int, ip6_prefer_tempaddr) = 0; static int selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, int, u_int); + struct nhop_object **, int, u_int, uint32_t); static int in6_selectif(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct ifnet **, struct ifnet *, u_int); @@ -625,11 +626,12 @@ in6_selectsrc_addr(uint32_t fibnum, const struct in6_addr *dst, static int selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, int norouteok, u_int fibnum) + struct ifnet **retifp, struct nhop_object **retnh, int norouteok, + u_int fibnum, uint32_t flowid) { int error = 0; struct ifnet *ifp = NULL; - struct rtentry *rt = NULL; + struct nhop_object *nh = NULL; struct sockaddr_in6 *sin6_next; struct in6_pktinfo *pi = NULL; struct in6_addr *dst = &dstsock->sin6_addr; @@ -654,7 +656,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, /* XXX boundary check is assumed to be already done. */ ifp = ifnet_byindex(pi->ipi6_ifindex); if (ifp != NULL && - (norouteok || retrt == NULL || + (norouteok || retnh == NULL || IN6_IS_ADDR_MULTICAST(dst))) { /* * we do not have to check or get the route for @@ -707,26 +709,31 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } ron = &opts->ip6po_nextroute; /* Use a cached route if it exists and is valid. */ - if (ron->ro_rt != NULL && ( - (ron->ro_rt->rt_flags & RTF_UP) == 0 || + if (ron->ro_nh != NULL && ( + !NH_IS_VALID(ron->ro_nh) || ron->ro_dst.sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&ron->ro_dst.sin6_addr, &sin6_next->sin6_addr))) - RO_RTFREE(ron); - if (ron->ro_rt == NULL) { + RO_NHFREE(ron); + if (ron->ro_nh == NULL) { ron->ro_dst = *sin6_next; - in6_rtalloc(ron, fibnum); /* multi path case? */ + /* + * sin6_next is not link-local OR scopeid is 0, + * no need to clear scope + */ + ron->ro_nh = fib6_lookup(fibnum, + &sin6_next->sin6_addr, 0, NHR_REF, flowid); } /* * The node identified by that address must be a * neighbor of the sending host. */ - if (ron->ro_rt == NULL || - (ron->ro_rt->rt_flags & RTF_GATEWAY) != 0) + if (ron->ro_nh == NULL || + (ron->ro_nh->nh_flags & NHF_GATEWAY) != 0) error = EHOSTUNREACH; else { - rt = ron->ro_rt; - ifp = rt->rt_ifp; + nh = ron->ro_nh; + ifp = nh->nh_ifp; } goto done; } @@ -737,15 +744,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * cached destination, in case of sharing the cache with IPv4. */ if (ro) { - if (ro->ro_rt && - (!(ro->ro_rt->rt_flags & RTF_UP) || + if (ro->ro_nh && + (!NH_IS_VALID(ro->ro_nh) || ((struct sockaddr *)(&ro->ro_dst))->sa_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&satosin6(&ro->ro_dst)->sin6_addr, dst))) { - RTFREE(ro->ro_rt); - ro->ro_rt = (struct rtentry *)NULL; + RO_NHFREE(ro); } - if (ro->ro_rt == (struct rtentry *)NULL) { + if (ro->ro_nh == (struct nhop_object *)NULL) { struct sockaddr_in6 *sa6; /* No route yet, so try to acquire one */ @@ -754,15 +760,28 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, *sa6 = *dstsock; sa6->sin6_scope_id = 0; + /* + * Currently dst has scopeid embedded iff it is LL. + * New routing API accepts scopeid as a separate argument. + * Convert dst before/after doing lookup + */ + uint32_t scopeid = 0; + if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr)) { + /* Unwrap in6_getscope() and in6_clearscope() */ + scopeid = ntohs(sa6->sin6_addr.s6_addr16[1]); + sa6->sin6_addr.s6_addr16[1] = 0; + + } + #ifdef RADIX_MPATH rtalloc_mpath_fib((struct route *)ro, ntohl(sa6->sin6_addr.s6_addr32[3]), fibnum); #else - ro->ro_rt = in6_rtalloc1((struct sockaddr *) - &ro->ro_dst, 0, 0UL, fibnum); - if (ro->ro_rt) - RT_UNLOCK(ro->ro_rt); + ro->ro_nh = fib6_lookup(fibnum, + &sa6->sin6_addr, scopeid, NHR_REF, flowid); #endif + if (IN6_IS_SCOPE_LINKLOCAL(&sa6->sin6_addr)) + sa6->sin6_addr.s6_addr16[1] = htons(scopeid); } /* @@ -772,17 +791,11 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, if (opts && opts->ip6po_nexthop) goto done; - if (ro->ro_rt) { - ifp = ro->ro_rt->rt_ifp; - - if (ifp == NULL) { /* can this really happen? */ - RTFREE(ro->ro_rt); - ro->ro_rt = NULL; - } - } - if (ro->ro_rt == NULL) + if (ro->ro_nh) + ifp = ro->ro_nh->nh_ifp; + else error = EHOSTUNREACH; - rt = ro->ro_rt; + nh = ro->ro_nh; /* * Check if the outgoing interface conflicts with @@ -803,7 +816,7 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, } done: - if (ifp == NULL && rt == NULL) { + if (ifp == NULL && nh == NULL) { /* * This can happen if the caller did not pass a cached route * nor any other hints. We treat this case an error. @@ -814,26 +827,14 @@ selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, IP6STAT_INC(ip6s_noroute); if (retifp != NULL) { - *retifp = ifp; - - /* - * Adjust the "outgoing" interface. If we're going to loop - * the packet back to ourselves, the ifp would be the loopback - * interface. However, we'd rather know the interface associated - * to the destination address (which should probably be one of - * our own addresses.) - */ - if (rt) { - if ((rt->rt_ifp->if_flags & IFF_LOOPBACK) && - (rt->rt_gateway->sa_family == AF_LINK)) - *retifp = - ifnet_byindex(((struct sockaddr_dl *) - rt->rt_gateway)->sdl_index); - } + if (nh != NULL) + *retifp = nh->nh_aifp; + else + *retifp = ifp; } - if (retrt != NULL) - *retrt = rt; /* rt may be NULL */ + if (retnh != NULL) + *retnh = nh; /* nh may be NULL */ return (error); } @@ -845,20 +846,20 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, { int error; struct route_in6 sro; - struct rtentry *rt = NULL; - int rt_flags; + struct nhop_object *nh = NULL; + uint16_t nh_flags; KASSERT(retifp != NULL, ("%s: retifp is NULL", __func__)); bzero(&sro, sizeof(sro)); - rt_flags = 0; + nh_flags = 0; - error = selectroute(dstsock, opts, mopts, &sro, retifp, &rt, 1, fibnum); + error = selectroute(dstsock, opts, mopts, &sro, retifp, &nh, 1, fibnum, 0); - if (rt) - rt_flags = rt->rt_flags; - if (rt && rt == sro.ro_rt) - RTFREE(rt); + if (nh != NULL) + nh_flags = nh->nh_flags; + if (nh != NULL && nh == sro.ro_nh) + NH_FREE(nh); if (error != 0) { /* Help ND. See oifp comment in in6_selectsrc(). */ @@ -887,8 +888,8 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, * We thus reject the case here. */ - if (rt_flags & (RTF_REJECT | RTF_BLACKHOLE)) { - error = (rt_flags & RTF_HOST ? EHOSTUNREACH : ENETUNREACH); + if (nh_flags & (NHF_REJECT | NHF_BLACKHOLE)) { + error = (nh_flags & NHF_HOST ? EHOSTUNREACH : ENETUNREACH); return (error); } @@ -899,11 +900,11 @@ in6_selectif(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, int in6_selectroute(struct sockaddr_in6 *dstsock, struct ip6_pktopts *opts, struct ip6_moptions *mopts, struct route_in6 *ro, - struct ifnet **retifp, struct rtentry **retrt, u_int fibnum) + struct ifnet **retifp, struct nhop_object **retnh, u_int fibnum, uint32_t flowid) { return (selectroute(dstsock, opts, mopts, ro, retifp, - retrt, 0, fibnum)); + retnh, 0, fibnum, flowid)); } /* diff --git a/sys/netinet6/ip6_output.c b/sys/netinet6/ip6_output.c index 53972e3b2316..251f8cf82d5e 100644 --- a/sys/netinet6/ip6_output.c +++ b/sys/netinet6/ip6_output.c @@ -95,6 +95,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -403,9 +404,9 @@ ip6_output_send(struct inpcb *inp, struct ifnet *ifp, struct ifnet *origifp, * This function may modify ver and hlim only. * The mbuf chain containing the packet will be freed. * The mbuf opt, if present, will not be freed. - * If route_in6 ro is present and has ro_rt initialized, route lookup would be - * skipped and ro->ro_rt would be used. If ro is present but ro->ro_rt is NULL, - * then result of route lookup is stored in ro->ro_rt. + * If route_in6 ro is present and has ro_nh initialized, route lookup would be + * skipped and ro->ro_nh would be used. If ro is present but ro->ro_nh is NULL, + * then result of route lookup is stored in ro->ro_nh. * * Type of "mtu": rt_mtu is u_long, ifnet.ifr_mtu is int, and nd_ifinfo.linkmtu * is uint32_t. So we use u_long to hold largest one, which is rt_mtu. @@ -425,7 +426,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, struct mbuf *m = m0; struct mbuf *mprev; struct route_in6 *ro_pmtu; - struct rtentry *rt; + struct nhop_object *nh; struct sockaddr_in6 *dst, sin6, src_sa, dst_sa; struct in6_addr odst; u_char *nexthdrp; @@ -666,7 +667,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, ip6->ip6_hlim = V_ip6_defmcasthlim; } - if (ro == NULL || ro->ro_rt == NULL) { + if (ro == NULL || ro->ro_nh == NULL) { bzero(dst, sizeof(*dst)); dst->sin6_family = AF_INET6; dst->sin6_len = sizeof(*dst); @@ -676,29 +677,26 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, * Validate route against routing table changes. * Make sure that the address family is set in route. */ - rt = NULL; + nh = NULL; ifp = NULL; mtu = 0; if (ro != NULL) { - if (ro->ro_rt != NULL && inp != NULL) { + if (ro->ro_nh != NULL && inp != NULL) { ro->ro_dst.sin6_family = AF_INET6; /* XXX KASSERT? */ - RT_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, + NH_VALIDATE((struct route *)ro, &inp->inp_rt_cookie, fibnum); } - if (ro->ro_rt != NULL && fwd_tag == NULL && - ((ro->ro_rt->rt_flags & RTF_UP) == 0 || - ro->ro_rt->rt_ifp == NULL || - !RT_LINK_IS_UP(ro->ro_rt->rt_ifp) || + if (ro->ro_nh != NULL && fwd_tag == NULL && + (!NH_IS_VALID(ro->ro_nh) || ro->ro_dst.sin6_family != AF_INET6 || !IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst))) RO_INVALIDATE_CACHE(ro); - if (ro->ro_rt != NULL && fwd_tag == NULL && - (ro->ro_rt->rt_flags & RTF_UP) && + if (ro->ro_nh != NULL && fwd_tag == NULL && ro->ro_dst.sin6_family == AF_INET6 && IN6_ARE_ADDR_EQUAL(&ro->ro_dst.sin6_addr, &ip6->ip6_dst)) { - rt = ro->ro_rt; - ifp = ro->ro_rt->rt_ifp; + nh = ro->ro_nh; + ifp = nh->nh_ifp; } else { if (ro->ro_lle) LLE_FREE(ro->ro_lle); /* zeros ro_lle */ @@ -710,7 +708,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, dst_sa.sin6_addr = ip6->ip6_dst; } error = in6_selectroute(&dst_sa, opt, im6o, ro, &ifp, - &rt, fibnum); + &nh, fibnum, m->m_pkthdr.flowid); if (error != 0) { IP6STAT_INC(ip6s_noroute); if (ifp != NULL) @@ -720,17 +718,17 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, if (ifp != NULL) mtu = ifp->if_mtu; } - if (rt == NULL) { + if (nh == NULL) { /* - * If in6_selectroute() does not return a route entry + * If in6_selectroute() does not return a nexthop * dst may not have been updated. */ *dst = dst_sa; /* XXX */ } else { - if (rt->rt_flags & RTF_HOST) - mtu = rt->rt_mtu; - ia = (struct in6_ifaddr *)(rt->rt_ifa); - counter_u64_add(rt->rt_pksent, 1); + if (nh->nh_flags & NHF_HOST) + mtu = nh->nh_mtu; + ia = (struct in6_ifaddr *)(nh->nh_ifa); + counter_u64_add(nh->nh_pksent, 1); } } else { struct nhop6_extended nh6; @@ -781,7 +779,7 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, ; } - /* Then rt (for unicast) and ifp must be non-NULL valid values. */ + /* Then nh (for unicast) and ifp must be non-NULL valid values. */ if ((flags & IPV6_FORWARDING) == 0) { /* XXX: the FORWARDING flag can be set for mrouting. */ in6_ifstat_inc(ifp, ifs6_out_request); @@ -852,8 +850,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, } /* All scope ID checks are successful. */ - if (rt && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { - if (opt && opt->ip6po_nextroute.ro_rt) { + if (nh && !IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { + if (opt && opt->ip6po_nextroute.ro_nh) { /* * The nexthop is explicitly specified by the * application. We assume the next hop is an IPv6 @@ -861,8 +859,8 @@ ip6_output(struct mbuf *m0, struct ip6_pktopts *opt, */ dst = (struct sockaddr_in6 *)opt->ip6po_nexthop; } - else if ((rt->rt_flags & RTF_GATEWAY)) - dst = (struct sockaddr_in6 *)rt->rt_gateway; + else if ((nh->nh_flags & NHF_GATEWAY)) + dst = &nh->gw6_sa; } if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst)) { @@ -1517,8 +1515,8 @@ ip6_getpmtu(struct route_in6 *ro_pmtu, int do_lookup, mtu = ro_pmtu->ro_mtu; } - if (ro_pmtu != NULL && ro_pmtu->ro_rt != NULL) - mtu = ro_pmtu->ro_rt->rt_mtu; + if (ro_pmtu != NULL && ro_pmtu->ro_nh != NULL) + mtu = ro_pmtu->ro_nh->nh_mtu; return (ip6_calcmtu(ifp, dst, mtu, mtup, alwaysfragp, proto)); } @@ -2651,9 +2649,9 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (optname == -1 || optname == IPV6_TCLASS) pktopt->ip6po_tclass = -1; if (optname == -1 || optname == IPV6_NEXTHOP) { - if (pktopt->ip6po_nextroute.ro_rt) { - RTFREE(pktopt->ip6po_nextroute.ro_rt); - pktopt->ip6po_nextroute.ro_rt = NULL; + if (pktopt->ip6po_nextroute.ro_nh) { + NH_FREE(pktopt->ip6po_nextroute.ro_nh); + pktopt->ip6po_nextroute.ro_nh = NULL; } if (pktopt->ip6po_nexthop) free(pktopt->ip6po_nexthop, M_IP6OPT); @@ -2673,9 +2671,9 @@ ip6_clearpktopts(struct ip6_pktopts *pktopt, int optname) if (pktopt->ip6po_rhinfo.ip6po_rhi_rthdr) free(pktopt->ip6po_rhinfo.ip6po_rhi_rthdr, M_IP6OPT); pktopt->ip6po_rhinfo.ip6po_rhi_rthdr = NULL; - if (pktopt->ip6po_route.ro_rt) { - RTFREE(pktopt->ip6po_route.ro_rt); - pktopt->ip6po_route.ro_rt = NULL; + if (pktopt->ip6po_route.ro_nh) { + NH_FREE(pktopt->ip6po_route.ro_nh); + pktopt->ip6po_route.ro_nh = NULL; } } if (optname == -1 || optname == IPV6_DSTOPTS) { diff --git a/sys/netinet6/ip6_var.h b/sys/netinet6/ip6_var.h index fef3760413f8..de7a938a3289 100644 --- a/sys/netinet6/ip6_var.h +++ b/sys/netinet6/ip6_var.h @@ -416,7 +416,7 @@ int in6_selectsrc_addr(uint32_t, const struct in6_addr *, uint32_t, struct ifnet *, struct in6_addr *, int *); int in6_selectroute(struct sockaddr_in6 *, struct ip6_pktopts *, struct ip6_moptions *, struct route_in6 *, struct ifnet **, - struct rtentry **, u_int); + struct nhop_object **, u_int, uint32_t); u_int32_t ip6_randomid(void); u_int32_t ip6_randomflowlabel(void); void in6_delayed_cksum(struct mbuf *m, uint32_t plen, u_short offset);