Convert rtentry field accesses into nhop field accesses.

One of the goals of the new routing KPI defined in r359823 is to entirely
 hide`struct rtentry` from the consumers. It will allow to improve routing
 subsystem internals and deliver more features much faster.

This commit is mostly mechanical change to eliminate direct struct rtentry
 field accesses.

The only notable difference is AF_LINK gateway encoding.

AF_LINK gw is used in routing stack for operations with interface routes
 and host loopback routes.
In the former case it indicates _some_ non-NULL gateway, as the interface
 is the same as in rt_ifp in kernel and rtm_ifindex in rtsock reporting.
In the latter case the interface index inside gateway was used by the IPv6
 datapath to verify address scope for link-local interfaces.

Kernel uses struct sockaddr_dl for this type of gateway. This structure
 allows for specifying rich interface data, such as mac address and interface
 name. However, this results in relatively large structure size - 52 bytes.
Routing stack fils in only 2 fields - sdl_index and sdl_type, which reside
 in the first 8 bytes of the structure.

In the new KPI, struct nhop_object tries to be cache-efficient, hence
 embodies gateway address inside the structure. In the AF_LINK case it
 stores stortened version of the structure - struct sockaddr_dl_short,
 which occupies 16 bytes. After D24340 changes, the data inside AF_LINK
 gateway will not be used in the kernel at all, leaving rtsock as the only
 potential concern.

The difference in rtsock reporting:

(old)
got message of size 240 on Thu Apr 16 03:12:13 2020
RTM_ADD: Add Route: len 240, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED>
locks:  inits:
sockaddrs: <DST,GATEWAY,NETMASK>
 10.0.0.0 link#5 255.255.255.0

(new)
got message of size 200 on Sun Apr 19 09:46:32 2020
RTM_ADD: Add Route: len 200, pid: 0, seq 0, errno 0, flags:<UP,DONE,PINNED>
locks:  inits:
sockaddrs: <DST,GATEWAY,NETMASK>
 10.0.0.0 link#5 255.255.255.0

Note 40 bytes different (52-16 + alignment).
However, gateway is still a valid AF_LINK gateway with proper data filled in.

It is worth noting that these particular messages (interface routes) are mostly
 ignored by routing daemons:
* bird/quagga/frr uses RTM_NEWADDR and ignores prefix route addition messages.
* quagga/frr ignores routes without gateway

More detailed overview on how rtsock messages are used by the
 routing daemons to reconstruct the kernel view, can be found in D22974.

Differential Revision:	https://reviews.freebsd.org/D24519
This commit is contained in:
Alexander V. Chernikov 2020-04-23 08:04:20 +00:00
parent 9e88f47c8f
commit aaad3c4fca
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=360218
6 changed files with 67 additions and 60 deletions

View File

@ -56,6 +56,7 @@ __FBSDID("$FreeBSD$");
#include <sys/rmlock.h>
#include <net/route.h>
#include <net/route_var.h>
#include <net/route/nhop.h>
#include <net/if.h>
#include <net/if_var.h>
@ -110,22 +111,24 @@ struct rtentry *
rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
{
struct radix_node *rn;
struct nhop_object *nh;
if (!gate || !rt->rt_gateway)
return NULL;
if (gate == NULL)
return (NULL);
/* beyond here, we use rn as the master copy */
rn = (struct radix_node *)rt;
do {
rt = (struct rtentry *)rn;
nh = rt->rt_nhop;
/*
* we are removing an address alias that has
* we are removing an address alias that has
* the same prefix as another address
* we need to compare the interface address because
* rt_gateway is a special sockadd_dl structure
* gateway is a special sockaddr_dl structure
*/
if (rt->rt_gateway->sa_family == AF_LINK) {
if (!memcmp(rt->rt_ifa->ifa_addr, gate, gate->sa_len))
if (nh->gw_sa.sa_family == AF_LINK) {
if (!memcmp(nh->nh_ifa->ifa_addr, gate, gate->sa_len))
break;
}
@ -134,8 +137,8 @@ rt_mpath_matchgate(struct rtentry *rt, struct sockaddr *gate)
* 1) Routes with 'real' IPv4/IPv6 gateway
* 2) Loopback host routes (another AF_LINK/sockadd_dl check)
* */
if (rt->rt_gateway->sa_len == gate->sa_len &&
!memcmp(rt->rt_gateway, gate, gate->sa_len))
if (nh->gw_sa.sa_len == gate->sa_len &&
!memcmp(&nh->gw_sa, gate, gate->sa_len))
break;
} while ((rn = rn_mpath_next(rn)) != NULL);

View File

@ -827,6 +827,7 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
{
struct rt_metrics *rmx;
struct sockaddr *src, *dst;
struct nhop_object *nh;
int sa_len;
if (flags & NHR_COPY) {
@ -858,7 +859,7 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
}
/* Copy gateway is set && dst is non-zero */
src = rt->rt_gateway;
src = &rt->rt_nhop->gw_sa;
dst = info->rti_info[RTAX_GATEWAY];
if ((rt->rt_flags & RTF_GATEWAY) && src != NULL && dst != NULL){
if (src->sa_len > dst->sa_len)
@ -874,20 +875,21 @@ rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info, int flags)
info->rti_addrs |= RTA_NETMASK;
}
if (rt->rt_flags & RTF_GATEWAY) {
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
info->rti_addrs |= RTA_GATEWAY;
}
}
nh = rt->rt_nhop;
rmx = info->rti_rmx;
if (rmx != NULL) {
info->rti_mflags |= RTV_MTU;
rmx->rmx_mtu = rt->rt_mtu;
rmx->rmx_mtu = nh->nh_mtu;
}
info->rti_flags = rt->rt_flags;
info->rti_ifp = rt->rt_ifp;
info->rti_ifa = rt->rt_ifa;
info->rti_flags = rt->rt_flags | nhop_get_rtflags(nh);
info->rti_ifp = nh->nh_ifp;
info->rti_ifa = nh->nh_ifa;
if (flags & NHR_REF) {
if_ref(info->rti_ifp);
ifa_ref(info->rti_ifa);
@ -1021,7 +1023,7 @@ rt_checkdelroute(struct radix_node *rn, void *arg)
info->rti_info[RTAX_DST] = rt_key(rt);
info->rti_info[RTAX_NETMASK] = rt_mask(rt);
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
rt = rt_unlinkrte(di->rnh, info, &error);
if (rt == NULL) {
@ -1216,7 +1218,7 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
* Ease the caller work by filling in remaining info
* from that particular entry.
*/
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
}
/*
@ -1471,7 +1473,7 @@ rt_print(char *buf, int buflen, struct rtentry *rt)
if (rt->rt_flags & RTF_GATEWAY) {
buf[i++] = '>';
i += p_sockaddr(buf + i, buflen - i, rt->rt_gateway);
i += p_sockaddr(buf + i, buflen - i, &rt->rt_nhop->gw_sa);
}
return (i);
@ -1528,8 +1530,8 @@ rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
* one route in the chain.
*/
if (gw &&
(rt->rt_gateway->sa_len != gw->sa_len ||
memcmp(rt->rt_gateway, gw, gw->sa_len))) {
(rt->rt_nhop->gw_sa.sa_len != gw->sa_len ||
memcmp(&rt->rt_nhop->gw_sa, gw, gw->sa_len))) {
*perror = ESRCH;
return (NULL);
}
@ -2078,7 +2080,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
char tempbuf[_SOCKADDR_TMPSIZE];
int didwork = 0;
int a_failure = 0;
struct sockaddr_dl *sdl = NULL;
struct sockaddr_dl_short *sdl = NULL;
struct rib_head *rnh;
if (flags & RTF_HOST) {
@ -2130,10 +2132,10 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
dst = (struct sockaddr *)tempbuf;
}
} else if (cmd == RTM_ADD) {
sdl = (struct sockaddr_dl *)tempbuf;
bzero(sdl, sizeof(struct sockaddr_dl));
sdl = (struct sockaddr_dl_short *)tempbuf;
bzero(sdl, sizeof(struct sockaddr_dl_short));
sdl->sdl_family = AF_LINK;
sdl->sdl_len = sizeof(struct sockaddr_dl);
sdl->sdl_len = sizeof(struct sockaddr_dl_short);
sdl->sdl_type = ifa->ifa_ifp->if_type;
sdl->sdl_index = ifa->ifa_ifp->if_index;
}
@ -2165,8 +2167,7 @@ rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum)
rt = RNTORT(rn);
/*
* for interface route the
* rt->rt_gateway is sockaddr_intf
* for cloning ARP entries, so
* rt->rt_gateway is sockaddr_dl, so
* rt_mpath_matchgate must use the
* interface address
*/

View File

@ -457,15 +457,15 @@ union sockaddr_union {
static int
rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
struct rtentry *rt, union sockaddr_union *saun, struct ucred *cred)
struct nhop_object *nh, union sockaddr_union *saun, struct ucred *cred)
{
#if defined(INET) || defined(INET6)
struct epoch_tracker et;
#endif
/* First, see if the returned address is part of the jail. */
if (prison_if(cred, rt->rt_ifa->ifa_addr) == 0) {
info->rti_info[RTAX_IFA] = rt->rt_ifa->ifa_addr;
if (prison_if(cred, nh->nh_ifa->ifa_addr) == 0) {
info->rti_info[RTAX_IFA] = nh->nh_ifa->ifa_addr;
return (0);
}
@ -499,7 +499,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
ia = ((struct sockaddr_in *)rt->rt_ifa->ifa_addr)->
ia = ((struct sockaddr_in *)nh->nh_ifa->ifa_addr)->
sin_addr;
if (prison_get_ip4(cred, &ia) != 0)
return (ESRCH);
@ -542,7 +542,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp,
/*
* As a last resort return the 'default' jail address.
*/
ia6 = ((struct sockaddr_in6 *)rt->rt_ifa->ifa_addr)->
ia6 = ((struct sockaddr_in6 *)nh->nh_ifa->ifa_addr)->
sin6_addr;
if (prison_get_ip6(cred, &ia6) != 0)
return (ESRCH);
@ -635,7 +635,7 @@ fill_addrinfo(struct rt_msghdr *rtm, int len, u_int fibnum, struct rt_addrinfo *
* is not reachable locally. This behavior is fixed as
* part of the new L2/L3 redesign and rewrite work. The
* signature of this interface address route is the
* AF_LINK sa_family type of the rt_gateway, and the
* AF_LINK sa_family type of the gateway, and the
* rt_ifp has the IFF_LOOPBACK flag set.
*/
if (rib_lookup_info(fibnum, gdst, NHR_REF, 0, &ginfo) == 0) {
@ -711,12 +711,15 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
* the actual PPP host entry is found, perform
* another search to retrieve the prefix route of
* the local end point of the PPP link.
* TODO: move this logic to userland.
*/
if (rtm->rtm_flags & RTF_ANNOUNCE) {
struct sockaddr laddr;
struct nhop_object *nh;
if (rt->rt_ifp != NULL &&
rt->rt_ifp->if_type == IFT_PROPVIRTUAL) {
nh = rt->rt_nhop;
if (nh->nh_ifp != NULL &&
nh->nh_ifp->if_type == IFT_PROPVIRTUAL) {
struct epoch_tracker et;
struct ifaddr *ifa;
@ -729,9 +732,9 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
&laddr,
ifa->ifa_netmask);
} else
rt_maskedcopy(rt->rt_ifa->ifa_addr,
rt_maskedcopy(nh->nh_ifa->ifa_addr,
&laddr,
rt->rt_ifa->ifa_netmask);
nh->nh_ifa->ifa_netmask);
/*
* refactor rt and no lock operation necessary
*/
@ -741,7 +744,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
RIB_RUNLOCK(rnh);
return (ESRCH);
}
}
}
RT_LOCK(rt);
RT_ADDREF(rt);
RIB_RUNLOCK(rnh);
@ -768,6 +771,7 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
struct walkarg w;
union sockaddr_union saun;
struct rt_msghdr *rtm, *orig_rtm = NULL;
struct nhop_object *nh;
struct ifnet *ifp;
int error, len;
@ -775,23 +779,24 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
rtm = *prtm;
nh = rt->rt_nhop;
info->rti_info[RTAX_DST] = rt_key(rt);
info->rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
rt_mask(rt), &netmask_ss);
info->rti_info[RTAX_GENMASK] = 0;
ifp = rt->rt_ifp;
ifp = nh->nh_ifp;
if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
if (ifp) {
info->rti_info[RTAX_IFP] =
ifp->if_addr->ifa_addr;
error = rtm_get_jailed(info, ifp, rt,
error = rtm_get_jailed(info, ifp, nh,
&saun, curthread->td_ucred);
if (error != 0)
return (error);
if (ifp->if_flags & IFF_POINTOPOINT)
info->rti_info[RTAX_BRD] =
rt->rt_ifa->ifa_dstaddr;
nh->nh_ifa->ifa_dstaddr;
rtm->rtm_index = ifp->if_index;
} else {
info->rti_info[RTAX_IFP] = NULL;
@ -1075,7 +1080,7 @@ rt_getmetrics(const struct rtentry *rt, struct rt_metrics *out)
{
bzero(out, sizeof(*out));
out->rmx_mtu = rt->rt_mtu;
out->rmx_mtu = rt->rt_nhop->nh_mtu;
out->rmx_weight = rt->rt_weight;
out->rmx_pksent = counter_u64_fetch(rt->rt_pksent);
out->rmx_nhidx = nhop_get_idx(rt->rt_nhop);
@ -1496,7 +1501,7 @@ rtsock_routemsg(int cmd, struct rtentry *rt, struct ifnet *ifp, int rti_addrs,
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), rt_mask(rt), &ss);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
info.rti_flags = rt->rt_flags;
info.rti_ifp = ifp;
@ -1725,7 +1730,7 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
return (0);
bzero((caddr_t)&info, sizeof(info));
info.rti_info[RTAX_DST] = rt_key(rt);
info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
info.rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
info.rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
rt_mask(rt), &ss);
info.rti_info[RTAX_GENMASK] = 0;
@ -2244,7 +2249,7 @@ rt_dumpentry_ddb(struct radix_node *rn, void *arg __unused)
rt = (void *)rn;
rt_dumpaddr_ddb("dst", rt_key(rt));
rt_dumpaddr_ddb("gateway", rt->rt_gateway);
rt_dumpaddr_ddb("gateway", &rt->rt_nhop->gw_sa);
rt_dumpaddr_ddb("netmask", rtsock_fix_netmask(rt_key(rt), rt_mask(rt),
&ss));
if (rt->rt_ifp != NULL && (rt->rt_ifp->if_flags & IFF_DYING) == 0) {

View File

@ -1565,29 +1565,26 @@ nd6_rtrequest(int req, struct rtentry *rt, struct rt_addrinfo *info)
{
struct sockaddr_in6 *gateway;
struct nd_defrouter *dr;
struct ifnet *ifp;
struct nhop_object *nh;
gateway = (struct sockaddr_in6 *)rt->rt_gateway;
ifp = rt->rt_ifp;
nh = rt->rt_nhop;
gateway = &nh->gw6_sa;
switch (req) {
case RTM_ADD:
break;
case RTM_DELETE:
if (!ifp)
return;
/*
* Only indirect routes are interesting.
*/
if ((rt->rt_flags & RTF_GATEWAY) == 0)
if ((nh->nh_flags & NHF_GATEWAY) == 0)
return;
/*
* check for default route
*/
if (IN6_ARE_ADDR_EQUAL(&in6addr_any,
&SIN6(rt_key(rt))->sin6_addr)) {
dr = defrouter_lookup(&gateway->sin6_addr, ifp);
if (nh->nh_flags & NHF_DEFAULT) {
dr = defrouter_lookup(&gateway->sin6_addr, nh->nh_ifp);
if (dr != NULL) {
dr->installed = 0;
defrouter_rele(dr);

View File

@ -401,8 +401,9 @@ void nd6_dad_stop(struct ifaddr *);
void nd6_rs_input(struct mbuf *, int, int);
void nd6_ra_input(struct mbuf *, int, int);
void nd6_ifnet_link_event(void *, struct ifnet *, int);
struct nd_defrouter *defrouter_lookup(struct in6_addr *, struct ifnet *);
struct nd_defrouter *defrouter_lookup_locked(struct in6_addr *, struct ifnet *);
struct nd_defrouter *defrouter_lookup(const struct in6_addr *, struct ifnet *);
struct nd_defrouter *defrouter_lookup_locked(const struct in6_addr *,
struct ifnet *);
void defrouter_reset(void);
void defrouter_select_fib(int fibnum);
void defrouter_rele(struct nd_defrouter *);

View File

@ -784,7 +784,7 @@ defrouter_del(struct nd_defrouter *dr)
struct nd_defrouter *
defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
defrouter_lookup_locked(const struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
@ -798,7 +798,7 @@ defrouter_lookup_locked(struct in6_addr *addr, struct ifnet *ifp)
}
struct nd_defrouter *
defrouter_lookup(struct in6_addr *addr, struct ifnet *ifp)
defrouter_lookup(const struct in6_addr *addr, struct ifnet *ifp)
{
struct nd_defrouter *dr;
@ -2006,7 +2006,7 @@ pfxlist_onlink_check(void)
static int
nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
{
struct sockaddr_dl sdl;
struct sockaddr_dl_short sdl;
struct rtentry *rt;
struct sockaddr_in6 mask6;
u_long rtflags;
@ -2021,8 +2021,8 @@ nd6_prefix_onlink_rtrequest(struct nd_prefix *pr, struct ifaddr *ifa)
mask6.sin6_addr = pr->ndpr_mask;
rtflags = (ifa->ifa_flags & ~IFA_RTSELF) | RTF_UP;
bzero(&sdl, sizeof(struct sockaddr_dl));
sdl.sdl_len = sizeof(struct sockaddr_dl);
bzero(&sdl, sizeof(struct sockaddr_dl_short));
sdl.sdl_len = sizeof(struct sockaddr_dl_short);
sdl.sdl_family = AF_LINK;
sdl.sdl_type = ifa->ifa_ifp->if_type;
sdl.sdl_index = ifa->ifa_ifp->if_index;