routing: add nhop(9) kpi.

Differential Revision: https://reviews.freebsd.org/D35985
MFC after:	1 month
This commit is contained in:
Alexander V. Chernikov 2022-07-29 12:32:27 +00:00
parent 29029b06a6
commit 800c68469b
10 changed files with 377 additions and 243 deletions

View File

@ -570,7 +570,7 @@ rt_getifa_family(struct rt_addrinfo *info, uint32_t fibnum)
}
/*
* Look up rt_addrinfo for a specific fib.
* Fills in rti_ifp and rti_ifa for the provided fib.
*
* Assume basic consistency checks are executed by callers:
* RTAX_DST exists, if RTF_GATEWAY is set, RTAX_GATEWAY exists as well.

View File

@ -179,6 +179,13 @@ link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
NHOPS_WUNLOCK(ctl);
#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
{
char nhgrp_buf[NHOP_PRINT_BUFSIZE];
nhgrp_print_buf(grp_priv->nhg, nhgrp_buf, sizeof(nhgrp_buf));
FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "linked %s", nhgrp_buf);
}
#endif
consider_resize(ctl, new_num_buckets, new_num_items);
return (1);
@ -207,6 +214,14 @@ unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
NHOPS_WUNLOCK(ctl);
#if DEBUG_MAX_LEVEL >= LOG_DEBUG2
{
char nhgrp_buf[NHOP_PRINT_BUFSIZE];
nhgrp_print_buf(nhg_priv_ret->nhg, nhgrp_buf, sizeof(nhgrp_buf));
FIB_RH_LOG(LOG_DEBUG2, ctl->ctl_rh, "unlinked idx#%d %s", idx,
nhgrp_buf);
}
#endif
return (nhg_priv_ret);
}

View File

@ -304,6 +304,7 @@ link_nhop(struct nh_control *ctl, struct nhop_priv *nh_priv)
nh_priv->nh_idx = idx;
nh_priv->nh_control = ctl;
nh_priv->nh_finalized = 1;
CHT_SLIST_INSERT_HEAD(&ctl->nh_head, nhops, nh_priv);

View File

@ -175,6 +175,29 @@ struct sysctl_req;
struct sockaddr_dl;
struct rib_head;
/* flags that can be set using nhop_set_rtflags() */
#define RT_SET_RTFLAGS_MASK (RTF_PROTO1 | RTF_PROTO2 | RTF_PROTO3 | RTF_STATIC)
#define RT_CHANGE_RTFLAGS_MASK RT_SET_RTFLAGS_MASK
struct nhop_object *nhop_alloc(uint32_t fibnum, int family);
void nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig);
struct nhop_object *nhop_get_nhop(struct nhop_object *nh, int *perror);
void nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp);
bool nhop_set_gw(struct nhop_object *nh, const struct sockaddr *sa, bool is_gw);
void nhop_set_mtu(struct nhop_object *nh, uint32_t mtu, bool from_user);
void nhop_set_rtflags(struct nhop_object *nh, int rt_flags);
void nhop_set_pxtype_flag(struct nhop_object *nh, int nh_flag);
void nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast);
void nhop_set_blackhole(struct nhop_object *nh, int blackhole_rt_flag);
void nhop_set_pinned(struct nhop_object *nh, bool is_pinned);
void nhop_set_redirect(struct nhop_object *nh, bool is_redirect);
void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type);
void nhop_set_src(struct nhop_object *nh, struct ifaddr *ifa);
void nhop_set_transmit_ifp(struct nhop_object *nh, struct ifnet *ifp);
uint32_t nhop_get_idx(const struct nhop_object *nh);
enum nhop_type nhop_get_type(const struct nhop_object *nh);
int nhop_get_rtflags(const struct nhop_object *nh);

View File

@ -85,16 +85,13 @@ _DECLARE_DEBUG(LOG_INFO);
static int dump_nhop_entry(struct rib_head *rh, struct nhop_object *nh, struct sysctl_req *w);
static struct nhop_priv *alloc_nhop_structure(void);
static int get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_priv **pnh_priv);
static int finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
struct nhop_priv *nh_priv);
static int finalize_nhop(struct nh_control *ctl, struct nhop_object *nh);
static struct ifnet *get_aifp(const struct nhop_object *nh);
static void fill_sdl_from_ifp(struct sockaddr_dl_short *sdl, const struct ifnet *ifp);
static void destroy_nhop_epoch(epoch_context_t ctx);
static void destroy_nhop(struct nhop_priv *nh_priv);
static void destroy_nhop(struct nhop_object *nh);
static struct rib_head *nhop_get_rh(const struct nhop_object *nh);
_Static_assert(__offsetof(struct nhop_object, nh_ifp) == 32,
"nhop_object: wrong nh_ifp offset");
@ -172,24 +169,8 @@ cmp_priv(const struct nhop_priv *_one, const struct nhop_priv *_two)
static void
set_nhop_mtu_from_info(struct nhop_object *nh, const struct rt_addrinfo *info)
{
if (info->rti_mflags & RTV_MTU) {
if (info->rti_rmx->rmx_mtu != 0) {
/*
* MTU was explicitly provided by user.
* Keep it.
*/
nh->nh_priv->rt_flags |= RTF_FIXEDMTU;
} else {
/*
* User explicitly sets MTU to 0.
* Assume rollback to default.
*/
nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU;
}
nh->nh_mtu = info->rti_rmx->rmx_mtu;
}
if (info->rti_mflags & RTV_MTU)
nhop_set_mtu(nh, info->rti_rmx->rmx_mtu, true);
}
/*
@ -213,9 +194,10 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
struct sockaddr *gw;
gw = info->rti_info[RTAX_GATEWAY];
KASSERT(gw != NULL, ("gw is NULL"));
MPASS(gw != NULL);
bool is_gw = info->rti_flags & RTF_GATEWAY;
if ((gw->sa_family == AF_LINK) && !(info->rti_flags & RTF_GATEWAY)) {
if ((gw->sa_family == AF_LINK) && !is_gw) {
/*
* Interface route with interface specified by the interface
@ -233,7 +215,7 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
sdl->sdl_index);
return (EINVAL);
}
fill_sdl_from_ifp(&nh->gwl_sa, ifp);
nhop_set_direct_gw(nh, ifp);
} else {
/*
@ -247,31 +229,12 @@ set_nhop_gw_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
* In both cases, save the original nexthop to make the callers
* happy.
*/
if (gw->sa_len > sizeof(struct sockaddr_in6)) {
FIB_NH_LOG(LOG_DEBUG, nh, "nhop SA size too big: AF %d len %u",
gw->sa_family, gw->sa_len);
if (!nhop_set_gw(nh, gw, is_gw))
return (EINVAL);
}
memcpy(&nh->gw_sa, gw, gw->sa_len);
}
return (0);
}
static uint16_t
convert_rt_to_nh_flags(int rt_flags)
{
uint16_t res;
res = (rt_flags & RTF_REJECT) ? NHF_REJECT : 0;
res |= (rt_flags & RTF_HOST) ? NHF_HOST : 0;
res |= (rt_flags & RTF_BLACKHOLE) ? NHF_BLACKHOLE : 0;
res |= (rt_flags & (RTF_DYNAMIC|RTF_MODIFIED)) ? NHF_REDIRECT : 0;
res |= (rt_flags & RTF_BROADCAST) ? NHF_BROADCAST : 0;
res |= (rt_flags & RTF_GATEWAY) ? NHF_GATEWAY : 0;
return (res);
}
static void
set_nhop_expire_from_info(struct nhop_object *nh, const struct rt_addrinfo *info)
{
@ -283,43 +246,6 @@ set_nhop_expire_from_info(struct nhop_object *nh, const struct rt_addrinfo *info
nhop_set_expire(nh, nh_expire);
}
static int
fill_nhop_from_info(struct nhop_priv *nh_priv, struct rt_addrinfo *info)
{
int error, rt_flags;
struct nhop_object *nh;
nh = nh_priv->nh;
rt_flags = info->rti_flags & NHOP_RT_FLAG_MASK;
nh->nh_priv->rt_flags = rt_flags;
nh_priv->nh_upper_family = info->rti_info[RTAX_DST]->sa_family;
nh_priv->nh_type = 0; // hook responsibility to set nhop type
nh->nh_flags = convert_rt_to_nh_flags(rt_flags);
set_nhop_mtu_from_info(nh, info);
if ((error = set_nhop_gw_from_info(nh, info)) != 0)
return (error);
if (nh->gw_sa.sa_family == AF_LINK)
nh_priv->nh_neigh_family = nh_priv->nh_upper_family;
else
nh_priv->nh_neigh_family = nh->gw_sa.sa_family;
set_nhop_expire_from_info(nh, info);
nh->nh_ifp = (info->rti_ifp != NULL) ? info->rti_ifp : info->rti_ifa->ifa_ifp;
nh->nh_ifa = info->rti_ifa;
/* depends on the gateway */
nh->nh_aifp = get_aifp(nh);
/*
* Note some of the remaining data is set by the
* per-address-family pre-add hook.
*/
return (0);
}
/*
* Creates a new nexthop based on the information in @info.
*
@ -331,81 +257,94 @@ int
nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_object **nh_ret)
{
struct nhop_priv *nh_priv;
int error;
NET_EPOCH_ASSERT();
MPASS(info->rti_ifa != NULL);
MPASS(info->rti_ifp != NULL);
if (info->rti_info[RTAX_GATEWAY] == NULL) {
FIB_RH_LOG(LOG_DEBUG, rnh, "error: empty gateway");
return (EINVAL);
}
nh_priv = alloc_nhop_structure();
struct nhop_object *nh = nhop_alloc(rnh->rib_fibnum, rnh->rib_family);
if (nh == NULL)
return (ENOMEM);
error = fill_nhop_from_info(nh_priv, info);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
if ((error = set_nhop_gw_from_info(nh, info)) != 0) {
nhop_free(nh);
return (error);
}
nhop_set_transmit_ifp(nh, info->rti_ifp);
error = get_nhop(rnh, info, &nh_priv);
if (error == 0)
*nh_ret = nh_priv->nh;
nhop_set_blackhole(nh, info->rti_flags & (RTF_BLACKHOLE | RTF_REJECT));
error = rnh->rnh_set_nh_pfxflags(rnh->rib_fibnum, info->rti_info[RTAX_DST],
info->rti_info[RTAX_NETMASK], nh);
nhop_set_redirect(nh, info->rti_flags & RTF_DYNAMIC);
nhop_set_pinned(nh, info->rti_flags & RTF_PINNED);
set_nhop_expire_from_info(nh, info);
nhop_set_rtflags(nh, info->rti_flags);
set_nhop_mtu_from_info(nh, info);
nhop_set_src(nh, info->rti_ifa);
/*
* The remaining fields are either set from nh_preadd hook
* or are computed from the provided data
*/
*nh_ret = nhop_get_nhop(nh, &error);
return (error);
}
/*
* Gets linked nhop using the provided @pnh_priv nexhop data.
* Gets linked nhop using the provided @nh nexhop data.
* If linked nhop is found, returns it, freeing the provided one.
* If there is no such nexthop, attaches the remaining data to the
* provided nexthop and links it.
*
* Returns 0 on success, storing referenced nexthop in @pnh_priv.
* Returns 0 on success, storing referenced nexthop in @pnh.
* Otherwise, errno is returned.
*/
static int
get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_priv **pnh_priv)
struct nhop_object *
nhop_get_nhop(struct nhop_object *nh, int *perror)
{
const struct sockaddr *dst, *netmask;
struct nhop_priv *nh_priv, *tmp_priv;
struct nhop_priv *tmp_priv;
int error;
nh_priv = *pnh_priv;
nh->nh_aifp = get_aifp(nh);
/* Give the protocols chance to augment the request data */
dst = info->rti_info[RTAX_DST];
netmask = info->rti_info[RTAX_NETMASK];
struct rib_head *rnh = nhop_get_rh(nh);
error = rnh->rnh_preadd(rnh->rib_fibnum, dst, netmask, nh_priv->nh);
/* Give the protocols chance to augment nexthop properties */
error = rnh->rnh_augment_nh(rnh->rib_fibnum, nh);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
return (error);
nhop_free(nh);
*perror = error;
return (NULL);
}
tmp_priv = find_nhop(rnh->nh_control, nh_priv);
tmp_priv = find_nhop(rnh->nh_control, nh->nh_priv);
if (tmp_priv != NULL) {
uma_zfree(nhops_zone, nh_priv->nh);
*pnh_priv = tmp_priv;
return (0);
nhop_free(nh);
*perror = 0;
return (tmp_priv->nh);
}
/*
* Existing nexthop not found, need to create new one.
* Note: multiple simultaneous get_nhop() requests
* Note: multiple simultaneous requests
* can result in multiple equal nexhops existing in the
* nexthop table. This is not a not a problem until the
* relative number of such nexthops is significant, which
* is extremely unlikely.
*/
error = finalize_nhop(rnh->nh_control, info, nh_priv);
if (error != 0)
return (error);
return (0);
*perror = finalize_nhop(rnh->nh_control, nh);
return (*perror == 0 ? nh : NULL);
}
/*
@ -413,28 +352,26 @@ get_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
* This is a helper function to support route changes.
*
* It limits the changes that can be done to the route to the following:
* 1) all combination of gateway changes (gw, interface, blackhole/reject)
* 2) route flags (FLAG[123],STATIC,BLACKHOLE,REJECT)
* 1) all combination of gateway changes
* 2) route flags (FLAG[123],STATIC)
* 3) route MTU
*
* Returns:
* 0 on success
* 0 on success, errno otherwise
*/
static int
alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
{
struct nhop_priv *nh_priv = nh->nh_priv;
struct sockaddr *info_gw;
int error;
/* Update MTU if set in the request*/
set_nhop_mtu_from_info(nh, info);
/* XXX: allow only one of BLACKHOLE,REJECT,GATEWAY */
/* Allow some flags (FLAG1,STATIC,BLACKHOLE,REJECT) to be toggled on change. */
nh_priv->rt_flags &= ~RTF_FMASK;
nh_priv->rt_flags |= info->rti_flags & RTF_FMASK;
/* Only RTF_FLAG[123] and RTF_STATIC */
uint32_t rt_flags = nhop_get_rtflags(nh) & ~RT_CHANGE_RTFLAGS_MASK;
rt_flags |= info->rti_flags & RT_CHANGE_RTFLAGS_MASK;
nhop_set_rtflags(nh, rt_flags);
/* Consider gateway change */
info_gw = info->rti_info[RTAX_GATEWAY];
@ -442,22 +379,12 @@ alter_nhop_from_info(struct nhop_object *nh, struct rt_addrinfo *info)
error = set_nhop_gw_from_info(nh, info);
if (error != 0)
return (error);
if (nh->gw_sa.sa_family == AF_LINK)
nh_priv->nh_neigh_family = nh_priv->nh_upper_family;
else
nh_priv->nh_neigh_family = nh->gw_sa.sa_family;
/* Update RTF_GATEWAY flag status */
nh_priv->rt_flags &= ~RTF_GATEWAY;
nh_priv->rt_flags |= (RTF_GATEWAY & info->rti_flags);
}
/* Update datapath flags */
nh->nh_flags = convert_rt_to_nh_flags(nh_priv->rt_flags);
if (info->rti_ifa != NULL)
nh->nh_ifa = info->rti_ifa;
nhop_set_src(nh, info->rti_ifa);
if (info->rti_ifp != NULL)
nh->nh_ifp = info->rti_ifp;
nh->nh_aifp = get_aifp(nh);
nhop_set_transmit_ifp(nh, info->rti_ifp);
return (0);
}
@ -475,64 +402,28 @@ int
nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_orig,
struct rt_addrinfo *info, struct nhop_object **pnh)
{
struct nhop_priv *nh_priv;
struct nhop_object *nh;
int error;
NET_EPOCH_ASSERT();
nh_priv = alloc_nhop_structure();
nh = nh_priv->nh;
nh = nhop_alloc(rnh->rib_fibnum, rnh->rib_family);
if (nh == NULL)
return (ENOMEM);
/* Start with copying data from original nexthop */
nh_priv->nh_upper_family = nh_orig->nh_priv->nh_upper_family;
nh_priv->nh_neigh_family = nh_orig->nh_priv->nh_neigh_family;
nh_priv->rt_flags = nh_orig->nh_priv->rt_flags;
nh_priv->nh_type = nh_orig->nh_priv->nh_type;
nh_priv->nh_fibnum = nh_orig->nh_priv->nh_fibnum;
nh->nh_ifp = nh_orig->nh_ifp;
nh->nh_ifa = nh_orig->nh_ifa;
nh->nh_aifp = nh_orig->nh_aifp;
nh->nh_mtu = nh_orig->nh_mtu;
nh->nh_flags = nh_orig->nh_flags;
memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len);
nhop_copy(nh, nh_orig);
error = alter_nhop_from_info(nh, info);
if (error != 0) {
uma_zfree(nhops_zone, nh_priv->nh);
nhop_free(nh);
return (error);
}
error = get_nhop(rnh, info, &nh_priv);
if (error == 0)
*pnh = nh_priv->nh;
*pnh = nhop_get_nhop(nh, &error);
return (error);
}
/*
* Allocates memory for public/private nexthop structures.
*
* Returns pointer to nhop_priv or NULL.
*/
static struct nhop_priv *
alloc_nhop_structure(void)
{
struct nhop_object *nh;
struct nhop_priv *nh_priv;
nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO);
if (nh == NULL)
return (NULL);
nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE);
nh->nh_priv = nh_priv;
nh_priv->nh = nh;
return (nh_priv);
}
static bool
reference_nhop_deps(struct nhop_object *nh)
{
@ -543,7 +434,8 @@ reference_nhop_deps(struct nhop_object *nh)
ifa_free(nh->nh_ifa);
return (false);
}
FIB_NH_LOG(LOG_DEBUG, nh, "AIFP: %p nh_ifp %p", nh->nh_aifp, nh->nh_ifp);
FIB_NH_LOG(LOG_DEBUG2, nh, "nh_aifp: %s nh_ifp %s",
if_name(nh->nh_aifp), if_name(nh->nh_ifp));
if (!if_try_ref(nh->nh_ifp)) {
ifa_free(nh->nh_ifa);
if_rele(nh->nh_aifp);
@ -560,15 +452,13 @@ reference_nhop_deps(struct nhop_object *nh)
* errno otherwise. @nh_priv is freed in case of error.
*/
static int
finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
struct nhop_priv *nh_priv)
finalize_nhop(struct nh_control *ctl, struct nhop_object *nh)
{
struct nhop_object *nh = nh_priv->nh;
/* Allocate per-cpu packet counter */
nh->nh_pksent = counter_u64_alloc(M_NOWAIT);
if (nh->nh_pksent == NULL) {
uma_zfree(nhops_zone, nh);
nhop_free(nh);
RTSTAT_INC(rts_nh_alloc_failure);
FIB_NH_LOG(LOG_WARNING, nh, "counter_u64_alloc() failed");
return (ENOMEM);
@ -576,23 +466,21 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
if (!reference_nhop_deps(nh)) {
counter_u64_free(nh->nh_pksent);
uma_zfree(nhops_zone, nh);
nhop_free(nh);
RTSTAT_INC(rts_nh_alloc_failure);
FIB_NH_LOG(LOG_WARNING, nh, "interface reference failed");
return (EAGAIN);
}
/* Save vnet to ease destruction */
nh_priv->nh_vnet = curvnet;
refcount_init(&nh_priv->nh_refcnt, 1);
nh->nh_priv->nh_vnet = curvnet;
/* Please see nhop_free() comments on the initial value */
refcount_init(&nh_priv->nh_linked, 2);
refcount_init(&nh->nh_priv->nh_linked, 2);
nh_priv->nh_fibnum = ctl->ctl_rh->rib_fibnum;
nh->nh_priv->nh_fibnum = ctl->ctl_rh->rib_fibnum;
if (link_nhop(ctl, nh_priv) == 0) {
if (link_nhop(ctl, nh->nh_priv) == 0) {
/*
* Adding nexthop to the datastructures
* failed. Call destructor w/o waiting for
@ -602,7 +490,7 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
char nhbuf[NHOP_PRINT_BUFSIZE];
FIB_NH_LOG(LOG_WARNING, nh, "failed to link %s",
nhop_print_buf(nh, nhbuf, sizeof(nhbuf)));
destroy_nhop(nh_priv);
destroy_nhop(nh);
return (ENOBUFS);
}
@ -616,12 +504,8 @@ finalize_nhop(struct nh_control *ctl, struct rt_addrinfo *info,
}
static void
destroy_nhop(struct nhop_priv *nh_priv)
destroy_nhop(struct nhop_object *nh)
{
struct nhop_object *nh;
nh = nh_priv->nh;
if_rele(nh->nh_ifp);
if_rele(nh->nh_aifp);
ifa_free(nh->nh_ifa);
@ -640,7 +524,7 @@ destroy_nhop_epoch(epoch_context_t ctx)
nh_priv = __containerof(ctx, struct nhop_priv, nh_epoch_ctx);
destroy_nhop(nh_priv);
destroy_nhop(nh_priv->nh);
}
void
@ -669,6 +553,12 @@ nhop_free(struct nhop_object *nh)
if (!refcount_release(&nh_priv->nh_refcnt))
return;
/* allows to use nhop_free() during nhop init */
if (__predict_false(nh_priv->nh_finalized == 0)) {
uma_zfree(nhops_zone, nh);
return;
}
#if DEBUG_MAX_LEVEL >= LOG_DEBUG
char nhbuf[NHOP_PRINT_BUFSIZE];
FIB_NH_LOG(LOG_DEBUG, nh, "deleting %s", nhop_print_buf(nh, nhbuf, sizeof(nhbuf)));
@ -738,7 +628,144 @@ nhop_free_any(struct nhop_object *nh)
#endif
}
/* Helper functions */
/* Nhop-related methods */
/*
* Allocates an empty unlinked nhop object.
* Returns object pointer or NULL on failure
*/
struct nhop_object *
nhop_alloc(uint32_t fibnum, int family)
{
struct nhop_object *nh;
struct nhop_priv *nh_priv;
nh = (struct nhop_object *)uma_zalloc(nhops_zone, M_NOWAIT | M_ZERO);
if (__predict_false(nh == NULL))
return (NULL);
nh_priv = (struct nhop_priv *)((char *)nh + NHOP_OBJECT_ALIGNED_SIZE);
nh->nh_priv = nh_priv;
nh_priv->nh = nh;
nh_priv->nh_upper_family = family;
nh_priv->nh_fibnum = fibnum;
/* Setup refcount early to allow nhop_free() to work */
refcount_init(&nh_priv->nh_refcnt, 1);
return (nh);
}
void
nhop_copy(struct nhop_object *nh, const struct nhop_object *nh_orig)
{
struct nhop_priv *nh_priv = nh->nh_priv;
nh->nh_flags = nh_orig->nh_flags;
nh->nh_mtu = nh_orig->nh_mtu;
memcpy(&nh->gw_sa, &nh_orig->gw_sa, nh_orig->gw_sa.sa_len);
nh->nh_ifp = nh_orig->nh_ifp;
nh->nh_ifa = nh_orig->nh_ifa;
nh->nh_aifp = nh_orig->nh_aifp;
nh_priv->nh_upper_family = nh_orig->nh_priv->nh_upper_family;
nh_priv->nh_neigh_family = nh_orig->nh_priv->nh_neigh_family;
nh_priv->nh_type = nh_orig->nh_priv->nh_type;
nh_priv->rt_flags = nh_orig->nh_priv->rt_flags;
nh_priv->nh_fibnum = nh_orig->nh_priv->nh_fibnum;
}
void
nhop_set_direct_gw(struct nhop_object *nh, struct ifnet *ifp)
{
nh->nh_flags &= ~NHF_GATEWAY;
nh->nh_priv->rt_flags &= ~RTF_GATEWAY;
nh->nh_priv->nh_neigh_family = nh->nh_priv->nh_upper_family;
fill_sdl_from_ifp(&nh->gwl_sa, ifp);
memset(&nh->gw_buf[nh->gw_sa.sa_len], 0, sizeof(nh->gw_buf) - nh->gw_sa.sa_len);
}
/*
* Sets gateway for the nexthop.
* It can be "normal" gateway with is_gw set or a special form of
* adding interface route, refering to it by specifying local interface
* address. In that case is_gw is set to false.
*/
bool
nhop_set_gw(struct nhop_object *nh, const struct sockaddr *gw, bool is_gw)
{
if (gw->sa_len > sizeof(nh->gw_buf)) {
FIB_NH_LOG(LOG_DEBUG, nh, "nhop SA size too big: AF %d len %u",
gw->sa_family, gw->sa_len);
return (false);
}
memcpy(&nh->gw_sa, gw, gw->sa_len);
memset(&nh->gw_buf[gw->sa_len], 0, sizeof(nh->gw_buf) - gw->sa_len);
if (is_gw) {
nh->nh_flags |= NHF_GATEWAY;
nh->nh_priv->rt_flags |= RTF_GATEWAY;
nh->nh_priv->nh_neigh_family = gw->sa_family;
} else {
nh->nh_flags &= ~NHF_GATEWAY;
nh->nh_priv->rt_flags &= ~RTF_GATEWAY;
nh->nh_priv->nh_neigh_family = nh->nh_priv->nh_upper_family;
}
return (true);
}
void
nhop_set_broadcast(struct nhop_object *nh, bool is_broadcast)
{
if (is_broadcast) {
nh->nh_flags |= NHF_BROADCAST;
nh->nh_priv->rt_flags |= RTF_BROADCAST;
} else {
nh->nh_flags &= ~NHF_BROADCAST;
nh->nh_priv->rt_flags &= ~RTF_BROADCAST;
}
}
void
nhop_set_blackhole(struct nhop_object *nh, int blackhole_rt_flag)
{
nh->nh_flags &= ~(NHF_BLACKHOLE | NHF_REJECT);
nh->nh_priv->rt_flags &= ~(RTF_BLACKHOLE | RTF_REJECT);
switch (blackhole_rt_flag) {
case RTF_BLACKHOLE:
nh->nh_flags |= NHF_BLACKHOLE;
nh->nh_priv->rt_flags |= RTF_BLACKHOLE;
break;
case RTF_REJECT:
nh->nh_flags |= NHF_REJECT;
nh->nh_priv->rt_flags |= RTF_REJECT;
break;
}
}
void
nhop_set_redirect(struct nhop_object *nh, bool is_redirect)
{
if (is_redirect) {
nh->nh_priv->rt_flags |= RTF_DYNAMIC;
nh->nh_flags |= NHF_REDIRECT;
} else {
nh->nh_priv->rt_flags &= ~RTF_DYNAMIC;
nh->nh_flags &= ~NHF_REDIRECT;
}
}
void
nhop_set_pinned(struct nhop_object *nh, bool is_pinned)
{
if (is_pinned)
nh->nh_priv->rt_flags |= RTF_PINNED;
else
nh->nh_priv->rt_flags &= ~RTF_PINNED;
}
uint32_t
nhop_get_idx(const struct nhop_object *nh)
@ -768,13 +795,65 @@ nhop_get_rtflags(const struct nhop_object *nh)
return (nh->nh_priv->rt_flags);
}
/*
* Sets generic rtflags that are not covered by other functions.
*/
void
nhop_set_rtflags(struct nhop_object *nh, int rt_flags)
{
nh->nh_priv->rt_flags = rt_flags;
nh->nh_priv->rt_flags &= ~RT_SET_RTFLAGS_MASK;
nh->nh_priv->rt_flags |= (rt_flags & RT_SET_RTFLAGS_MASK);
}
/*
* Sets flags that are specific to the prefix (NHF_HOST or NHF_DEFAULT).
*/
void
nhop_set_pxtype_flag(struct nhop_object *nh, int nh_flag)
{
if (nh_flag == NHF_HOST) {
nh->nh_flags |= NHF_HOST;
nh->nh_flags &= ~NHF_DEFAULT;
nh->nh_priv->rt_flags |= RTF_HOST;
} else if (nh_flag == NHF_DEFAULT) {
nh->nh_flags |= NHF_DEFAULT;
nh->nh_flags &= ~NHF_HOST;
nh->nh_priv->rt_flags &= ~RTF_HOST;
} else {
nh->nh_flags &= ~(NHF_HOST | NHF_DEFAULT);
nh->nh_priv->rt_flags &= ~RTF_HOST;
}
}
/*
* Sets nhop MTU. Sets RTF_FIXEDMTU if mtu is explicitly
* specified by userland.
*/
void
nhop_set_mtu(struct nhop_object *nh, uint32_t mtu, bool from_user)
{
if (from_user) {
if (mtu != 0)
nh->nh_priv->rt_flags |= RTF_FIXEDMTU;
else
nh->nh_priv->rt_flags &= ~RTF_FIXEDMTU;
}
nh->nh_mtu = mtu;
}
void
nhop_set_src(struct nhop_object *nh, struct ifaddr *ifa)
{
nh->nh_ifa = ifa;
}
void
nhop_set_transmit_ifp(struct nhop_object *nh, struct ifnet *ifp)
{
nh->nh_ifp = ifp;
}
struct vnet *
nhop_get_vnet(const struct nhop_object *nh)
{
@ -827,6 +906,15 @@ nhop_set_expire(struct nhop_object *nh, uint32_t expire)
nh->nh_priv->nh_expire = expire;
}
static struct rib_head *
nhop_get_rh(const struct nhop_object *nh)
{
uint32_t fibnum = nhop_get_fibnum(nh);
int family = nhop_get_neigh_family(nh);
return (rt_tables_get_rnh(fibnum, family));
}
void
nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu)
{

View File

@ -85,6 +85,7 @@ struct nhop_priv {
void *cb_func; /* function handling additional rewrite caps */
u_int nh_refcnt; /* number of references, refcount(9) */
u_int nh_linked; /* refcount(9), == 2 if linked to the list */
int nh_finalized; /* non-zero if finalized() was called */
struct nhop_object *nh; /* backreference to the dataplane nhop */
struct nh_control *nh_control; /* backreference to the rnh */
struct nhop_priv *nh_next; /* hash table membership */

View File

@ -48,8 +48,11 @@
#endif
struct nh_control;
typedef int rnh_preadd_entry_f_t(u_int fibnum, const struct sockaddr *addr,
/* Sets prefix-specific nexthop flags (NHF_DEFAULT, RTF/NHF_HOST, RTF_BROADCAST,..) */
typedef int rnh_set_nh_pfxflags_f_t(u_int fibnum, const struct sockaddr *addr,
const struct sockaddr *mask, struct nhop_object *nh);
/* Fills in family-specific details that are not yet set up (mtu, nhop type, ..) */
typedef int rnh_augment_nh_f_t(u_int fibnum, struct nhop_object *nh);
struct rib_head {
struct radix_head head;
@ -59,7 +62,7 @@ struct rib_head {
rn_lookup_f_t *rnh_lookup; /* exact match for sockaddr */
rn_walktree_t *rnh_walktree; /* traverse tree */
rn_walktree_from_t *rnh_walktree_from; /* traverse tree below a */
rnh_preadd_entry_f_t *rnh_preadd; /* hook to alter record prior to insertion */
rnh_set_nh_pfxflags_f_t *rnh_set_nh_pfxflags; /* hook to alter record prior to insertion */
rt_gen_t rnh_gen; /* datapath generation counter */
int rnh_multipath; /* multipath capable ? */
struct radix_node rnh_nodes[3]; /* empty tree for common case */
@ -76,6 +79,7 @@ struct rib_head {
uint32_t rib_algo_fixed:1;/* fixed algorithm */
uint32_t rib_algo_init:1;/* algo init done */
struct nh_control *nh_control; /* nexthop subsystem data */
rnh_augment_nh_f_t *rnh_augment_nh;/* hook to alter nexthop prior to insertion */
CK_STAILQ_HEAD(, rib_subscription) rnh_subscribers;/* notification subscribers */
};
@ -204,11 +208,6 @@ struct rtentry {
* RTF_PINNED, RTF_REJECT, RTF_BLACKHOLE, RTF_BROADCAST
*/
/* Nexthop rt flags mask */
#define NHOP_RT_FLAG_MASK (RTF_GATEWAY | RTF_HOST | RTF_REJECT | RTF_DYNAMIC | \
RTF_MODIFIED | RTF_STATIC | RTF_BLACKHOLE | RTF_PROTO1 | RTF_PROTO2 | \
RTF_PROTO3 | RTF_FIXEDMTU | RTF_PINNED | RTF_BROADCAST)
/* rtentry rt flag mask */
#define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST)
@ -250,8 +249,6 @@ int nhop_try_ref_object(struct nhop_object *nh);
void nhop_ref_any(struct nhop_object *nh);
void nhop_free_any(struct nhop_object *nh);
void nhop_set_type(struct nhop_object *nh, enum nhop_type nh_type);
void nhop_set_rtflags(struct nhop_object *nh, int rt_flags);
int nhop_create_from_info(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_object **nh_ret);

View File

@ -52,18 +52,15 @@ __FBSDID("$FreeBSD$");
#include <netinet/ip_var.h>
static int
rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
rib4_set_nh_pfxflags(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
struct nhop_object *nh)
{
const struct sockaddr_in *addr4 = (const struct sockaddr_in *)addr;
uint16_t nh_type;
int rt_flags;
const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask;
bool is_broadcast = false;
/* XXX: RTF_LOCAL && RTF_MULTICAST */
rt_flags = nhop_get_rtflags(nh);
if (rt_flags & RTF_HOST) {
if (mask == NULL) {
nhop_set_pxtype_flag(nh, NHF_HOST);
/*
* Backward compatibility:
* if the destination is broadcast,
@ -76,13 +73,21 @@ rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *ma
* add these routes to support some cases with active-active
* load balancing. Given that, retain this support.
*/
if (in_broadcast(addr4->sin_addr, nh->nh_ifp)) {
rt_flags |= RTF_BROADCAST;
nhop_set_rtflags(nh, rt_flags);
nh->nh_flags |= NHF_BROADCAST;
}
}
if (in_broadcast(addr4->sin_addr, nh->nh_ifp))
is_broadcast = true;
} else if (mask4->sin_addr.s_addr == 0)
nhop_set_pxtype_flag(nh, NHF_DEFAULT);
else
nhop_set_pxtype_flag(nh, 0);
nhop_set_broadcast(nh, is_broadcast);
return (0);
}
static int
rib4_augment_nh(u_int fibnum, struct nhop_object *nh)
{
/*
* Check route MTU:
* inherit interface MTU if not set or
@ -93,14 +98,9 @@ rib4_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *ma
} else if (nh->nh_mtu > nh->nh_ifp->if_mtu)
nh->nh_mtu = nh->nh_ifp->if_mtu;
/* Ensure that default route nhop has special flag */
const struct sockaddr_in *mask4 = (const struct sockaddr_in *)mask;
if ((rt_flags & RTF_HOST) == 0 && mask4 != NULL &&
mask4->sin_addr.s_addr == 0)
nh->nh_flags |= NHF_DEFAULT;
/* Set nhop type to basic per-AF nhop */
if (nhop_get_type(nh) == 0) {
uint16_t nh_type;
if (nh->nh_flags & NHF_GATEWAY)
nh_type = NH_TYPE_IPV4_ETHER_NHOP;
else
@ -124,7 +124,8 @@ in_inithead(uint32_t fibnum)
if (rh == NULL)
return (NULL);
rh->rnh_preadd = rib4_preadd;
rh->rnh_set_nh_pfxflags = rib4_set_nh_pfxflags;
rh->rnh_augment_nh = rib4_augment_nh;
return (rh);
}

View File

@ -94,13 +94,24 @@ __FBSDID("$FreeBSD$");
#include <netinet6/nd6.h>
static int
rib6_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
rib6_set_nh_pfxflags(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *mask,
struct nhop_object *nh)
{
uint16_t nh_type;
const struct sockaddr_in6 *mask6 = (const struct sockaddr_in6 *)mask;
/* XXX: RTF_LOCAL */
if (mask6 == NULL)
nhop_set_pxtype_flag(nh, NHF_HOST);
else if (IN6_IS_ADDR_UNSPECIFIED(&mask6->sin6_addr))
nhop_set_pxtype_flag(nh, NHF_DEFAULT);
else
nhop_set_pxtype_flag(nh, 0);
return (0);
}
static int
rib6_augment_nh(u_int fibnum, struct nhop_object *nh)
{
/*
* Check route MTU:
* inherit interface MTU if not set or
@ -111,14 +122,9 @@ rib6_preadd(u_int fibnum, const struct sockaddr *addr, const struct sockaddr *ma
} else if (nh->nh_mtu > IN6_LINKMTU(nh->nh_ifp))
nh->nh_mtu = IN6_LINKMTU(nh->nh_ifp);
/* Ensure that default route nhop has special flag */
const struct sockaddr_in6 *mask6 = (const struct sockaddr_in6 *)mask;
if ((nhop_get_rtflags(nh) & RTF_HOST) == 0 && mask6 != NULL &&
IN6_IS_ADDR_UNSPECIFIED(&mask6->sin6_addr))
nh->nh_flags |= NHF_DEFAULT;
/* Set nexthop type */
if (nhop_get_type(nh) == 0) {
uint16_t nh_type;
if (nh->nh_flags & NHF_GATEWAY)
nh_type = NH_TYPE_IPV6_ETHER_NHOP;
else
@ -145,7 +151,8 @@ in6_inithead(uint32_t fibnum)
if (rh == NULL)
return (NULL);
rh->rnh_preadd = rib6_preadd;
rh->rnh_set_nh_pfxflags = rib6_set_nh_pfxflags;
rh->rnh_augment_nh = rib6_augment_nh;
rs = rib_subscribe_internal(rh, nd6_subscription_cb, NULL,
RIB_NOTIFY_IMMEDIATE, true);

View File

@ -2042,6 +2042,7 @@ nd6_prefix_rtrequest(uint32_t fibnum, int cmd, struct sockaddr_in6 *dst,
struct rt_addrinfo info = {
.rti_ifa = ifa,
.rti_ifp = ifp,
.rti_flags = RTF_PINNED | ((netmask != NULL) ? 0 : RTF_HOST),
.rti_info = {
[RTAX_DST] = (struct sockaddr *)dst,