Introduce scalable route multipath.

This change is based on the nexthop objects landed in D24232.

The change introduces the concept of nexthop groups.
Each group contains the collection of nexthops with their
 relative weights and a dataplane-optimized structure to enable
 efficient nexthop selection.

Simular to the nexthops, nexthop groups are immutable. Dataplane part
 gets compiled during group creation and is basically an array of
 nexthop pointers, compiled w.r.t their weights.

With this change, `rt_nhop` field of `struct rtentry` contains either
 nexthop or nexthop group. They are distinguished by the presense of
 NHF_MULTIPATH flag.
All dataplane lookup functions returns pointer to the nexthop object,
leaving nexhop groups details inside routing subsystem.

User-visible changes:

The change is intended to be backward-compatible: all non-mpath operations
 should work as before with ROUTE_MPATH and net.route.multipath=1.

All routes now comes with weight, default weight is 1, maximum is 2^24-1.

Current maximum multipath group width is statically set to 64.
 This will become sysctl-tunable in the followup changes.

Using functionality:
* Recompile kernel with ROUTE_MPATH
* set net.route.multipath to 1

route add -6 2001:db8::/32 2001:db8::2 -weight 10
route add -6 2001:db8::/32 2001:db8::3 -weight 20

netstat -6On

Nexthop groups data

Internet6:
GrpIdx  NhIdx     Weight   Slots                                 Gateway     Netif  Refcnt
1         ------- ------- ------- --------------------------------------- ---------       1
              13      10       1                             2001:db8::2     vlan2
              14      20       2                             2001:db8::3     vlan2

Next steps:
* Land outbound hashing for locally-originated routes ( D26523 ).
* Fix net/bird multipath (net/frr seems to work fine)
* Add ROUTE_MPATH to GENERIC
* Set net.route.multipath=1 by default

Tested by:	olivier
Reviewed by:	glebius
Relnotes:	yes
Differential Revision:	https://reviews.freebsd.org/D26449
This commit is contained in:
Alexander V. Chernikov 2020-10-03 10:47:17 +00:00
parent c97d2c8ae8
commit fedeb08b6a
33 changed files with 2433 additions and 244 deletions

View File

@ -1002,7 +1002,7 @@ device lagg
#
# TCP_HHOOK enables the hhook(9) framework hooks for the TCP stack.
#
# RADIX_MPATH provides support for equal-cost multi-path routing.
# ROUTE_MPATH provides support for multipath routing.
#
options MROUTING # Multicast routing
options IPFIREWALL #firewall
@ -1023,7 +1023,7 @@ options TCPDEBUG
options TCPPCAP
options TCP_BLACKBOX
options TCP_HHOOK
options RADIX_MPATH
options ROUTE_MPATH
# The MBUF_STRESS_TEST option enables options which create
# various random failures / extreme cases related to mbuf

View File

@ -4143,10 +4143,12 @@ net/debugnet.c optional inet debugnet
net/debugnet_inet.c optional inet debugnet
net/pfil.c optional ether | inet
net/radix.c standard
net/radix_mpath.c standard
net/raw_cb.c standard
net/raw_usrreq.c standard
net/route.c standard
net/route/mpath_ctl.c optional route_mpath
net/route/nhgrp.c optional route_mpath
net/route/nhgrp_ctl.c optional route_mpath
net/route/nhop.c standard
net/route/nhop_ctl.c standard
net/route/nhop_utils.c standard

View File

@ -454,6 +454,7 @@ NFSLOCKD
PCBGROUP opt_pcbgroup.h
PF_DEFAULT_TO_DROP opt_pf.h
RADIX_MPATH opt_mpath.h
ROUTE_MPATH opt_route.h
ROUTETABLES opt_route.h
RSS opt_rss.h
SLIP_IFF_OPTS opt_slip.h

View File

@ -44,10 +44,6 @@
#include <sys/malloc.h>
#include <sys/syslog.h>
#include <net/radix.h>
#include "opt_mpath.h"
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#else /* !_KERNEL */
#include <stdio.h>
#include <strings.h>

View File

@ -39,7 +39,6 @@
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_mrouting.h"
#include "opt_mpath.h"
#include "opt_route.h"
#include <sys/param.h>

View File

@ -178,6 +178,7 @@ VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
*/
/* Consumer-visible nexthop info flags */
#define NHF_MULTIPATH 0x0008 /* Nexhop is a nexthop group */
#define NHF_REJECT 0x0010 /* RTF_REJECT */
#define NHF_BLACKHOLE 0x0020 /* RTF_BLACKHOLE */
#define NHF_REDIRECT 0x0040 /* RTF_DYNAMIC|RTF_MODIFIED */
@ -208,6 +209,10 @@ struct rtstat {
uint64_t rts_wildcard; /* lookups satisfied by a wildcard */
uint64_t rts_nh_idx_alloc_failure; /* nexthop index alloc failure*/
uint64_t rts_nh_alloc_failure; /* nexthop allocation failure*/
uint64_t rts_add_failure; /* # of route addition failures */
uint64_t rts_add_retry; /* # of route addition retries */
uint64_t rts_del_failure; /* # of route deletion failure */
uint64_t rts_del_retry; /* # of route deletion retries */
};
/*

165
sys/net/route/mpath_ctl.c Normal file
View File

@ -0,0 +1,165 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_inet.h"
#include "opt_route.h"
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
/*
* This file contains the supporting functions for adding/deleting/updating
* multipath routes to the routing table.
*/
SYSCTL_DECL(_net_route);
/*
* Tries to add @rnd_add nhop to the existing set of nhops (@nh_orig) for the
* prefix specified by @rt.
*
* Return 0 ans consumes rt / rnd_add nhop references. @rc gets populated
* with the operation result.
* Otherwise errno is returned.
*
* caller responsibility is to unlock/free rt and
* rt->rt_nhop.
*/
int
add_route_mpath(struct rib_head *rnh, struct rt_addrinfo *info,
struct rtentry *rt, struct route_nhop_data *rnd_add,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
{
RIB_RLOCK_TRACKER;
struct route_nhop_data rnd_new;
int error = 0;
/*
* It is possible that multiple rtsock speakers will try to update
* the same route simultaneously. Reduce the chance of failing the
* request by retrying the cycle multiple times.
*/
for (int i = 0; i < RIB_MAX_RETRIES; i++) {
error = nhgrp_get_addition_group(rnh, rnd_orig, rnd_add,
&rnd_new);
if (error != 0) {
if (error != EAGAIN)
break;
/*
* Group creation failed, most probably because
* @rnd_orig data got scheduled for deletion.
* Refresh @rnd_orig data and retry.
*/
RIB_RLOCK(rnh);
lookup_prefix(rnh, info, rnd_orig);
RIB_RUNLOCK(rnh);
continue;
}
error = change_route_conditional(rnh, rt, info, rnd_orig,
&rnd_new, rc);
if (error != EAGAIN)
break;
RTSTAT_INC(rts_add_retry);
}
return (error);
}
struct rt_match_info {
struct rt_addrinfo *info;
struct rtentry *rt;
};
static bool
gw_filter_func(const struct nhop_object *nh, void *_data)
{
struct rt_match_info *ri = (struct rt_match_info *)_data;
return (check_info_match_nhop(ri->info, ri->rt, nh) == 0);
}
/*
* Tries to delete matching paths from @nhg.
* Returns 0 on success and updates operation result in @rc.
*/
int
del_route_mpath(struct rib_head *rh, struct rt_addrinfo *info,
struct rtentry *rt, struct nhgrp_object *nhg,
struct rib_cmd_info *rc)
{
struct route_nhop_data rnd;
struct rt_match_info ri = { .info = info, .rt = rt };
int error;
RIB_WLOCK_ASSERT(rh);
/*
* Require gateway to delete multipath routes, to forbid
* deleting all paths at once.
* If the filter function is provided, skip gateway check to
* allow rib_walk_del() delete routes for any criteria based
* on provided callback.
*/
if ((info->rti_info[RTAX_GATEWAY] == NULL) && (info->rti_filter == NULL))
return (ESRCH);
error = nhgrp_get_filtered_group(rh, nhg, gw_filter_func, (void *)&ri,
&rnd);
if (error == 0)
error = change_route_nhop(rh, rt, info, &rnd, rc);
return (error);
}

344
sys/net/route/nhgrp.c Normal file
View File

@ -0,0 +1,344 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include "opt_inet.h"
#include "opt_route.h"
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/rwlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/nhgrp_var.h>
/*
* This file contains data structures management logic for the nexthop
* groups ("nhgrp") route subsystem.
*
* Nexthop groups are used to store multiple routes available for the specific
* prefix. Nexthop groups are immutable and can be shared across multiple
* prefixes.
*
* Each group consists of a control plane part and a dataplane part.
* Control plane is basically a collection of nexthop objects with
* weights and refcount.
*
* Datapath consists of a array of nexthop pointers, compiled from control
* plane data to support O(1) nexthop selection.
*
* For example, consider the following group:
* [(nh1, weight=100), (nh2, weight=200)]
* It will compile to the following array:
* [nh1, nh2, nh2]
*
*/
static void consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets,
uint32_t new_idx_items);
static int cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b);
static unsigned int hash_nhgrp(const struct nhgrp_priv *obj);
static unsigned
djb_hash(const unsigned char *h, const int len)
{
unsigned int result = 0;
int i;
for (i = 0; i < len; i++)
result = 33 * result ^ h[i];
return (result);
}
static int
cmp_nhgrp(const struct nhgrp_priv *a, const struct nhgrp_priv *b)
{
/*
* In case of consistent hashing, there can be multiple nexthop groups
* with the same "control plane" list of nexthops with weights and a
* different set of "data plane" nexthops.
* For now, ignore the data plane and focus on the control plane list.
*/
if (a->nhg_nh_count != b->nhg_nh_count)
return (0);
return !memcmp(a->nhg_nh_weights, b->nhg_nh_weights,
sizeof(struct weightened_nhop) * a->nhg_nh_count);
}
/*
* Hash callback: calculate hash of an object
*/
static unsigned int
hash_nhgrp(const struct nhgrp_priv *obj)
{
const unsigned char *key;
key = (const unsigned char *)obj->nhg_nh_weights;
return (djb_hash(key, sizeof(struct weightened_nhop) * obj->nhg_nh_count));
}
/*
* Returns object referenced and unlocked
*/
struct nhgrp_priv *
find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key)
{
struct nhgrp_priv *priv_ret;
NHOPS_RLOCK(ctl);
CHT_SLIST_FIND_BYOBJ(&ctl->gr_head, mpath, key, priv_ret);
if (priv_ret != NULL) {
if (refcount_acquire_if_not_zero(&priv_ret->nhg_refcount) == 0) {
/* refcount is 0 -> group is being deleted */
priv_ret = NULL;
}
}
NHOPS_RUNLOCK(ctl);
return (priv_ret);
}
int
link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv)
{
uint16_t idx;
uint32_t new_num_buckets, new_num_items;
NHOPS_WLOCK(ctl);
/* Check if we need to resize hash and index */
new_num_buckets = CHT_SLIST_GET_RESIZE_BUCKETS(&ctl->gr_head);
new_num_items = bitmask_get_resize_items(&ctl->gr_idx_head);
if (bitmask_alloc_idx(&ctl->gr_idx_head, &idx) != 0) {
NHOPS_WUNLOCK(ctl);
DPRINTF("Unable to allocate mpath index");
consider_resize(ctl, new_num_buckets, new_num_items);
return (0);
}
grp_priv->nhg_idx = idx;
grp_priv->nh_control = ctl;
CHT_SLIST_INSERT_HEAD(&ctl->gr_head, mpath, grp_priv);
NHOPS_WUNLOCK(ctl);
consider_resize(ctl, new_num_buckets, new_num_items);
return (1);
}
struct nhgrp_priv *
unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key)
{
struct nhgrp_priv *nhg_priv_ret;
int ret, idx;
NHOPS_WLOCK(ctl);
CHT_SLIST_REMOVE_BYOBJ(&ctl->gr_head, mpath, key, nhg_priv_ret);
if (nhg_priv_ret == NULL) {
DPRINTF("Unable to find nhop group!");
NHOPS_WUNLOCK(ctl);
return (NULL);
}
idx = nhg_priv_ret->nhg_idx;
ret = bitmask_free_idx(&ctl->gr_idx_head, idx);
nhg_priv_ret->nhg_idx = 0;
nhg_priv_ret->nh_control = NULL;
NHOPS_WUNLOCK(ctl);
return (nhg_priv_ret);
}
/*
* Checks if hash needs resizing and performs this resize if necessary
*
*/
__noinline static void
consider_resize(struct nh_control *ctl, uint32_t new_nh_buckets, uint32_t new_idx_items)
{
void *nh_ptr, *nh_idx_ptr;
void *old_idx_ptr;
size_t alloc_size;
nh_ptr = NULL ;
if (new_nh_buckets != 0) {
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(new_nh_buckets);
nh_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
}
nh_idx_ptr = NULL;
if (new_idx_items != 0) {
alloc_size = bitmask_get_size(new_idx_items);
nh_idx_ptr = malloc(alloc_size, M_NHOP, M_NOWAIT | M_ZERO);
}
if (nh_ptr == NULL && nh_idx_ptr == NULL) {
/* Either resize is not required or allocations have failed. */
return;
}
DPRINTF("mp: going to resize: nh:[ptr:%p sz:%u] idx:[ptr:%p sz:%u]",
nh_ptr, new_nh_buckets, nh_idx_ptr, new_idx_items);
old_idx_ptr = NULL;
NHOPS_WLOCK(ctl);
if (nh_ptr != NULL) {
CHT_SLIST_RESIZE(&ctl->gr_head, mpath, nh_ptr, new_nh_buckets);
}
if (nh_idx_ptr != NULL) {
if (bitmask_copy(&ctl->gr_idx_head, nh_idx_ptr, new_idx_items))
bitmask_swap(&ctl->nh_idx_head, nh_idx_ptr, new_idx_items, &old_idx_ptr);
}
NHOPS_WUNLOCK(ctl);
if (nh_ptr != NULL)
free(nh_ptr, M_NHOP);
if (old_idx_ptr != NULL)
free(old_idx_ptr, M_NHOP);
}
/*
* Function allocating the necessary group data structures.
*/
bool
nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags)
{
size_t alloc_size;
uint32_t num_buckets, num_items;
void *cht_ptr, *mask_ptr;
malloc_flags = (malloc_flags & (M_NOWAIT | M_WAITOK)) | M_ZERO;
num_buckets = 8;
alloc_size = CHT_SLIST_GET_RESIZE_SIZE(num_buckets);
cht_ptr = malloc(alloc_size, M_NHOP, malloc_flags);
if (cht_ptr == NULL) {
DPRINTF("mpath init failed");
return (false);
}
/*
* Allocate nexthop index bitmask.
*/
num_items = 128;
mask_ptr = malloc(bitmask_get_size(num_items), M_NHOP, malloc_flags);
if (mask_ptr == NULL) {
DPRINTF("mpath bitmask init failed");
free(cht_ptr, M_NHOP);
return (false);
}
NHOPS_WLOCK(ctl);
if (ctl->gr_head.hash_size == 0) {
/* Init hash and bitmask */
CHT_SLIST_INIT(&ctl->gr_head, cht_ptr, num_buckets);
bitmask_init(&ctl->gr_idx_head, mask_ptr, num_items);
NHOPS_WUNLOCK(ctl);
} else {
/* Other thread has already initiliazed hash/bitmask */
NHOPS_WUNLOCK(ctl);
free(cht_ptr, M_NHOP);
free(mask_ptr, M_NHOP);
}
DPRINTF("mpath init done for fib/af %d/%d", ctl->rh->rib_fibnum,
ctl->rh->rib_family);
return (true);
}
int
nhgrp_ctl_init(struct nh_control *ctl)
{
/*
* By default, do not allocate datastructures as multipath
* routes will not be necessarily used.
*/
CHT_SLIST_INIT(&ctl->gr_head, NULL, 0);
bitmask_init(&ctl->gr_idx_head, NULL, 0);
return (0);
}
void
nhgrp_ctl_free(struct nh_control *ctl)
{
if (ctl->gr_head.ptr != NULL)
free(ctl->gr_head.ptr, M_NHOP);
if (ctl->gr_idx_head.idx != NULL)
free(ctl->gr_idx_head.idx, M_NHOP);
}
void
nhgrp_ctl_unlink_all(struct nh_control *ctl)
{
struct nhgrp_priv *nhg_priv;
NHOPS_WLOCK_ASSERT(ctl);
CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
DPRINTF("Marking nhgrp %u unlinked", nhg_priv->nhg_idx);
refcount_release(&nhg_priv->nhg_linked);
} CHT_SLIST_FOREACH_END;
}

788
sys/net/route/nhgrp_ctl.c Normal file
View File

@ -0,0 +1,788 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#define RTDEBUG
#include "opt_inet.h"
#include "opt_route.h"
#include <sys/cdefs.h>
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
#include <sys/rmlock.h>
#include <sys/malloc.h>
#include <sys/mbuf.h>
#include <sys/refcount.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <sys/kernel.h>
#include <sys/epoch.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/vnet.h>
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
#include <net/route/nhop_utils.h>
#include <net/route/nhop.h>
#include <net/route/nhop_var.h>
#include <net/route/nhgrp_var.h>
/*
* This file contains the supporting functions for creating multipath groups
* and compiling their dataplane parts.
*/
/* MPF_MULTIPATH must be the same as NHF_MULTIPATH for nhop selection to work */
_Static_assert(MPF_MULTIPATH == NHF_MULTIPATH,
"MPF_MULTIPATH must be the same as NHF_MULTIPATH");
/* Offset and size of flags field has to be the same for nhop/nhop groups */
CHK_STRUCT_FIELD_GENERIC(struct nhop_object, nh_flags, struct nhgrp_object, nhg_flags);
/* Cap multipath to 64, as the larger values would break rib_cmd_info bmasks */
CTASSERT(RIB_MAX_MPATH_WIDTH <= 64);
static int wn_cmp(const void *a, const void *b);
static void sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops);
static struct nhgrp_priv *get_nhgrp(struct nh_control *ctl,
struct weightened_nhop *wn, int num_nhops, int *perror);
static void destroy_nhgrp(struct nhgrp_priv *nhg_priv);
static void destroy_nhgrp_epoch(epoch_context_t ctx);
static void free_nhgrp_nhops(struct nhgrp_priv *nhg_priv);
static int
wn_cmp(const void *a, const void *b)
{
const struct weightened_nhop *wa = a;
const struct weightened_nhop *wb = b;
if (wa->weight > wb->weight)
return (1);
else if (wa->weight < wb->weight)
return (-1);
/* Compare nexthops by pointer */
if (wa->nh > wb->nh)
return (1);
else if (wa->nh < wb->nh)
return (-1);
else
return (0);
}
/*
* Perform in-place sorting for array of nexthops in @wn.
*
* To avoid nh groups duplication, nexthops/weights in the
* @wn need to be ordered deterministically.
* As this sorting is needed only for the control plane functionality,
* there are no specific external requirements.
*
* Sort by weight first, to ease calculation of the slot sizes.
*/
static void
sort_weightened_nhops(struct weightened_nhop *wn, int num_nhops)
{
qsort(wn, num_nhops, sizeof(struct weightened_nhop), wn_cmp);
}
/*
* Calculate minimum number of slots required to fit the existing
* set of weights in the common use case where weights are "easily"
* comparable.
* Assumes @wn is sorted by weight ascending and each weight is > 0.
* Returns number of slots or 0 if precise calculation failed.
*
* Some examples:
* note: (i, X) pair means (nhop=i, weight=X):
* (1, 1) (2, 2) -> 3 slots [1, 2, 2]
* (1, 100), (2, 200) -> 3 slots [1, 2, 2]
* (1, 100), (2, 200), (3, 400) -> 7 slots [1, 2, 2, 3, 3, 3]
*/
static uint32_t
calc_min_mpath_slots_fast(const struct weightened_nhop *wn, size_t num_items)
{
uint32_t i, last, xmin;
uint64_t total = 0;
last = 0;
xmin = wn[0].weight;
for (i = 0; i < num_items; i++) {
total += wn[i].weight;
if ((wn[i].weight - last < xmin) && (wn[i].weight != last))
xmin = wn[i].weight - last;
last = wn[i].weight;
}
/* xmin is the minimum unit of desired capacity */
if ((total % xmin) != 0)
return (0);
for (i = 0; i < num_items; i++) {
if ((wn[i].weight % xmin) != 0)
return (0);
}
return ((uint32_t)(total / xmin));
}
/*
* Calculate minimum number of slots required to fit the existing
* set of weights while maintaining weight coefficients.
*
* Assume @wn is sorted by weight ascending and each weight is > 0.
*
* Tries to find simple precise solution first and falls back to
* RIB_MAX_MPATH_WIDTH in case of any failure.
*/
static uint32_t
calc_min_mpath_slots(const struct weightened_nhop *wn, size_t num_items)
{
uint32_t v;
v = calc_min_mpath_slots_fast(wn, num_items);
if ((v == 0) || (v > RIB_MAX_MPATH_WIDTH))
v = RIB_MAX_MPATH_WIDTH;
return (v);
}
/*
* Nexthop group data consists of
* 1) dataplane part, with nhgrp_object as a header followed by an
* arbitrary number of nexthop pointers.
* 2) control plane part, with nhgrp_priv as a header, followed by
* an arbirtrary number of 'struct weightened_nhop' object.
*
* Given nexthop groups are (mostly) immutable, allocate all data
* in one go.
*
*/
__noinline static size_t
get_nhgrp_alloc_size(uint32_t nhg_size, uint32_t num_nhops)
{
size_t sz;
sz = sizeof(struct nhgrp_object);
sz += nhg_size * sizeof(struct nhop_object *);
sz += sizeof(struct nhgrp_priv);
sz += num_nhops * sizeof(struct weightened_nhop);
return (sz);
}
/*
* Compile actual list of nexthops to be used by datapath from
* the nexthop group @dst.
*
* For example, compiling control plane list of 2 nexthops
* [(200, A), (100, B)] would result in the datapath array
* [A, A, B]
*/
static void
compile_nhgrp(struct nhgrp_priv *dst_priv, const struct weightened_nhop *x,
uint32_t num_slots)
{
struct nhgrp_object *dst;
int i, slot_idx, remaining_slots;
uint64_t remaining_sum, nh_weight, nh_slots;
slot_idx = 0;
dst = dst_priv->nhg;
/* Calculate sum of all weights */
remaining_sum = 0;
for (i = 0; i < dst_priv->nhg_nh_count; i++)
remaining_sum += x[i].weight;
remaining_slots = num_slots;
DPRINTF("O: %u/%u", (uint32_t)remaining_sum, remaining_slots);
for (i = 0; i < dst_priv->nhg_nh_count; i++) {
/* Calculate number of slots for the current nexthop */
if (remaining_sum > 0) {
nh_weight = (uint64_t)x[i].weight;
nh_slots = (nh_weight * remaining_slots / remaining_sum);
} else
nh_slots = 0;
remaining_sum -= x[i].weight;
remaining_slots -= nh_slots;
DPRINTF(" OO[%d]: %u/%u curr=%d slot_idx=%d", i,
(uint32_t)remaining_sum, remaining_slots,
(int)nh_slots, slot_idx);
KASSERT((slot_idx + nh_slots <= num_slots),
("index overflow during nhg compilation"));
while (nh_slots-- > 0)
dst->nhops[slot_idx++] = x[i].nh;
}
}
/*
* Allocates new nexthop group for the list of weightened nexthops.
* Assume sorted list.
* Does NOT reference any nexthops in the group.
* Returns group with refcount=1 or NULL.
*/
static struct nhgrp_priv *
alloc_nhgrp(struct weightened_nhop *wn, int num_nhops)
{
uint32_t nhgrp_size;
int flags = M_NOWAIT;
struct nhgrp_object *nhg;
struct nhgrp_priv *nhg_priv;
nhgrp_size = calc_min_mpath_slots(wn, num_nhops);
if (nhgrp_size == 0) {
/* Zero weights, abort */
return (NULL);
}
size_t sz = get_nhgrp_alloc_size(nhgrp_size, num_nhops);
nhg = malloc(sz, M_NHOP, flags | M_ZERO);
if (nhg == NULL) {
return (NULL);
}
/* Has to be the first to make NHGRP_PRIV() work */
nhg->nhg_size = nhgrp_size;
DPRINTF("new mpath group: num_nhops: %u", (uint32_t)nhgrp_size);
nhg->nhg_flags = MPF_MULTIPATH;
nhg_priv = NHGRP_PRIV(nhg);
nhg_priv->nhg_nh_count = num_nhops;
refcount_init(&nhg_priv->nhg_refcount, 1);
/* Please see nhgrp_free() comments on the initial value */
refcount_init(&nhg_priv->nhg_linked, 2);
nhg_priv->nhg = nhg;
memcpy(&nhg_priv->nhg_nh_weights[0], wn,
num_nhops * sizeof(struct weightened_nhop));
compile_nhgrp(nhg_priv, wn, nhg->nhg_size);
return (nhg_priv);
}
void
nhgrp_free(struct nhgrp_object *nhg)
{
struct nhgrp_priv *nhg_priv;
struct nh_control *ctl;
struct epoch_tracker et;
nhg_priv = NHGRP_PRIV(nhg);
if (!refcount_release(&nhg_priv->nhg_refcount))
return;
/*
* group objects don't have an explicit lock attached to it.
* As groups are reclaimed based on reference count, it is possible
* that some groups will persist after vnet destruction callback
* called. Given that, handle scenario with nhgrp_free_group() being
* called either after or simultaneously with nhgrp_ctl_unlink_all()
* by using another reference counter: nhg_linked.
*
* There are only 2 places, where nhg_linked can be decreased:
* rib destroy (nhgrp_ctl_unlink_all) and this function.
* nhg_link can never be increased.
*
* Hence, use initial value of 2 to make use of
* refcount_release_if_not_last().
*
* There can be two scenarious when calling this function:
*
* 1) nhg_linked value is 2. This means that either
* nhgrp_ctl_unlink_all() has not been called OR it is running,
* but we are guaranteed that nh_control won't be freed in
* this epoch. Hence, nexthop can be safely unlinked.
*
* 2) nh_linked value is 1. In that case, nhgrp_ctl_unlink_all()
* has been called and nhgrp unlink can be skipped.
*/
NET_EPOCH_ENTER(et);
if (refcount_release_if_not_last(&nhg_priv->nhg_linked)) {
ctl = nhg_priv->nh_control;
if (unlink_nhgrp(ctl, nhg_priv) == NULL) {
/* Do not try to reclaim */
DPRINTF("Failed to unlink nexhop group %p", nhg_priv);
NET_EPOCH_EXIT(et);
return;
}
}
NET_EPOCH_EXIT(et);
epoch_call(net_epoch_preempt, destroy_nhgrp_epoch,
&nhg_priv->nhg_epoch_ctx);
}
/*
* Destroys all local resources belonging to @nhg_priv.
*/
__noinline static void
destroy_nhgrp_int(struct nhgrp_priv *nhg_priv)
{
free(nhg_priv->nhg, M_NHOP);
}
__noinline static void
destroy_nhgrp(struct nhgrp_priv *nhg_priv)
{
KASSERT((nhg_priv->nhg_refcount == 0), ("nhg_refcount != 0"));
DPRINTF("DEL MPATH %p", nhg_priv);
KASSERT((nhg_priv->nhg_idx == 0), ("gr_idx != 0"));
free_nhgrp_nhops(nhg_priv);
destroy_nhgrp_int(nhg_priv);
}
/*
* Epoch callback indicating group is safe to destroy
*/
static void
destroy_nhgrp_epoch(epoch_context_t ctx)
{
struct nhgrp_priv *nhg_priv;
nhg_priv = __containerof(ctx, struct nhgrp_priv, nhg_epoch_ctx);
destroy_nhgrp(nhg_priv);
}
static bool
ref_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
{
for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
if (nhop_try_ref_object(nhg_priv->nhg_nh_weights[i].nh) != 0)
continue;
/*
* Failed to ref the nexthop, b/c it's deleted.
* Need to rollback references back.
*/
for (int j = 0; j < i; j++)
nhop_free(nhg_priv->nhg_nh_weights[j].nh);
return (false);
}
return (true);
}
static void
free_nhgrp_nhops(struct nhgrp_priv *nhg_priv)
{
for (int i = 0; i < nhg_priv->nhg_nh_count; i++)
nhop_free(nhg_priv->nhg_nh_weights[i].nh);
}
/*
* Creates or looks up an existing nexthop group based on @wn and @num_nhops.
*
* Returns referenced nhop group or NULL, passing error code in @perror.
*/
struct nhgrp_priv *
get_nhgrp(struct nh_control *ctl, struct weightened_nhop *wn, int num_nhops,
int *perror)
{
struct nhgrp_priv *key, *nhg_priv;
if (num_nhops > RIB_MAX_MPATH_WIDTH) {
*perror = E2BIG;
return (NULL);
}
if (ctl->gr_head.hash_size == 0) {
/* First multipath request. Bootstrap mpath datastructures. */
if (nhgrp_ctl_alloc_default(ctl, M_NOWAIT) == 0) {
*perror = ENOMEM;
return (NULL);
}
}
/* Sort nexthops & check there are no duplicates */
sort_weightened_nhops(wn, num_nhops);
uint32_t last_id = 0;
for (int i = 0; i < num_nhops; i++) {
if (wn[i].nh->nh_priv->nh_idx == last_id) {
*perror = EEXIST;
return (NULL);
}
last_id = wn[i].nh->nh_priv->nh_idx;
}
if ((key = alloc_nhgrp(wn, num_nhops)) == NULL) {
*perror = ENOMEM;
return (NULL);
}
nhg_priv = find_nhgrp(ctl, key);
if (nhg_priv != NULL) {
/*
* Free originally-created group. As it hasn't been linked
* and the dependent nexhops haven't been referenced, just free
* the group.
*/
destroy_nhgrp_int(key);
*perror = 0;
return (nhg_priv);
} else {
/* No existing group, try to link the new one */
if (!ref_nhgrp_nhops(key)) {
/*
* Some of the nexthops have been scheduled for deletion.
* As the group hasn't been linked / no nexhops have been
* referenced, call the final destructor immediately.
*/
destroy_nhgrp_int(key);
*perror = EAGAIN;
return (NULL);
}
if (link_nhgrp(ctl, key) == 0) {
/* Unable to allocate index? */
*perror = EAGAIN;
destroy_nhgrp(key);
}
*perror = 0;
return (key);
}
/* NOTREACHED */
}
/*
* Appends one or more nexthops denoted by @wm to the nexthop group @gr_orig.
*
* Returns referenced nexthop group or NULL. In the latter case, @perror is
* filled with an error code.
* Note that function does NOT care if the next nexthops already exists
* in the @gr_orig. As a result, they will be added, resulting in the
* same nexthop being present multiple times in the new group.
*/
static struct nhgrp_priv *
append_nhops(struct nh_control *ctl, const struct nhgrp_object *gr_orig,
struct weightened_nhop *wn, int num_nhops, int *perror)
{
char storage[64];
struct weightened_nhop *pnhops;
struct nhgrp_priv *nhg_priv;
const struct nhgrp_priv *src_priv;
size_t sz;
int curr_nhops;
src_priv = NHGRP_PRIV_CONST(gr_orig);
curr_nhops = src_priv->nhg_nh_count;
*perror = 0;
sz = (src_priv->nhg_nh_count + num_nhops) * (sizeof(struct weightened_nhop));
/* optimize for <= 4 paths, each path=16 bytes */
if (sz <= sizeof(storage))
pnhops = (struct weightened_nhop *)&storage[0];
else {
pnhops = malloc(sz, M_TEMP, M_NOWAIT);
if (pnhops == NULL) {
*perror = ENOMEM;
return (NULL);
}
}
/* Copy nhops from original group first */
memcpy(pnhops, src_priv->nhg_nh_weights,
curr_nhops * sizeof(struct weightened_nhop));
memcpy(&pnhops[curr_nhops], wn, num_nhops * sizeof(struct weightened_nhop));
curr_nhops += num_nhops;
nhg_priv = get_nhgrp(ctl, pnhops, curr_nhops, perror);
if (pnhops != (struct weightened_nhop *)&storage[0])
free(pnhops, M_TEMP);
if (nhg_priv == NULL)
return (NULL);
return (nhg_priv);
}
/*
* Creates/finds nexthop group based on @wn and @num_nhops.
* Returns 0 on success with referenced group in @rnd, or
* errno.
*
* If the error is EAGAIN, then the operation can be retried.
*/
int
nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn, int num_nhops,
struct route_nhop_data *rnd)
{
struct nh_control *ctl = rh->nh_control;
struct nhgrp_priv *nhg_priv;
int error;
nhg_priv = get_nhgrp(ctl, wn, num_nhops, &error);
if (nhg_priv != NULL)
rnd->rnd_nhgrp = nhg_priv->nhg;
rnd->rnd_weight = 0;
return (error);
}
/*
* Creates new nexthop group based on @src group with the nexthops defined in bitmask
* @nhop_mask removed.
* Returns referenced nexthop group or NULL on failure.
*/
int
nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src,
nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd)
{
char storage[64];
struct nh_control *ctl = rh->nh_control;
struct weightened_nhop *pnhops;
const struct nhgrp_priv *mp_priv, *src_priv;
size_t sz;
int error, i, num_nhops;
src_priv = NHGRP_PRIV_CONST(src);
sz = src_priv->nhg_nh_count * (sizeof(struct weightened_nhop));
/* optimize for <= 4 paths, each path=16 bytes */
if (sz <= sizeof(storage))
pnhops = (struct weightened_nhop *)&storage[0];
else {
if ((pnhops = malloc(sz, M_TEMP, M_NOWAIT)) == NULL)
return (ENOMEM);
}
/* Filter nexthops */
error = 0;
num_nhops = 0;
for (i = 0; i < src_priv->nhg_nh_count; i++) {
if (flt_func(src_priv->nhg_nh_weights[i].nh, flt_data))
continue;
memcpy(&pnhops[num_nhops++], &src_priv->nhg_nh_weights[i],
sizeof(struct weightened_nhop));
}
if (num_nhops == 0) {
rnd->rnd_nhgrp = NULL;
rnd->rnd_weight = 0;
} else if (num_nhops == 1) {
rnd->rnd_nhop = pnhops[0].nh;
rnd->rnd_weight = pnhops[0].weight;
if (nhop_try_ref_object(rnd->rnd_nhop) == 0)
error = EAGAIN;
} else {
mp_priv = get_nhgrp(ctl, pnhops, num_nhops, &error);
if (mp_priv != NULL)
rnd->rnd_nhgrp = mp_priv->nhg;
rnd->rnd_weight = 0;
}
if (pnhops != (struct weightened_nhop *)&storage[0])
free(pnhops, M_TEMP);
return (error);
}
/*
* Creates new multipath group based on existing group/nhop in @rnd_orig and
* to-be-added nhop @wn_add.
* Returns 0 on success and stores result in @rnd_new.
*/
int
nhgrp_get_addition_group(struct rib_head *rh, struct route_nhop_data *rnd_orig,
struct route_nhop_data *rnd_add, struct route_nhop_data *rnd_new)
{
struct nh_control *ctl = rh->nh_control;
struct nhgrp_priv *nhg_priv;
struct weightened_nhop wn[2];
int error;
if (rnd_orig->rnd_nhop == NULL) {
/* No paths to add to, just reference current nhop */
*rnd_new = *rnd_add;
if (nhop_try_ref_object(rnd_new->rnd_nhop) == 0)
return (EAGAIN);
return (0);
}
wn[0].nh = rnd_add->rnd_nhop;
wn[0].weight = rnd_add->rnd_weight;
if (!NH_IS_NHGRP(rnd_orig->rnd_nhop)) {
/* Simple merge of 2 non-multipath nexthops */
wn[1].nh = rnd_orig->rnd_nhop;
wn[1].weight = rnd_orig->rnd_weight;
nhg_priv = get_nhgrp(ctl, wn, 2, &error);
} else {
/* Get new nhop group with @rt->rt_nhop as an additional nhop */
nhg_priv = append_nhops(ctl, rnd_orig->rnd_nhgrp, &wn[0], 1,
&error);
}
if (nhg_priv == NULL)
return (error);
rnd_new->rnd_nhgrp = nhg_priv->nhg;
rnd_new->rnd_weight = 0;
return (0);
}
/*
* Returns pointer to array of nexthops with weights for
* given @nhg. Stores number of items in the array into @pnum_nhops.
*/
struct weightened_nhop *
nhgrp_get_nhops(struct nhgrp_object *nhg, uint32_t *pnum_nhops)
{
struct nhgrp_priv *nhg_priv;
KASSERT(((nhg->nhg_flags & MPF_MULTIPATH) != 0), ("nhop is not mpath"));
nhg_priv = NHGRP_PRIV(nhg);
*pnum_nhops = nhg_priv->nhg_nh_count;
return (nhg_priv->nhg_nh_weights);
}
__noinline static int
dump_nhgrp_entry(struct rib_head *rh, const struct nhgrp_priv *nhg_priv,
char *buffer, size_t buffer_size, struct sysctl_req *w)
{
struct rt_msghdr *rtm;
struct nhgrp_external *nhge;
struct nhgrp_container *nhgc;
const struct nhgrp_object *nhg;
struct nhgrp_nhop_external *ext;
int error;
size_t sz;
nhg = nhg_priv->nhg;
sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
/* controlplane nexthops */
sz += sizeof(struct nhgrp_container);
sz += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
/* dataplane nexthops */
sz += sizeof(struct nhgrp_container);
sz += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
KASSERT(sz <= buffer_size, ("increase nhgrp buffer size"));
bzero(buffer, sz);
rtm = (struct rt_msghdr *)buffer;
rtm->rtm_msglen = sz;
rtm->rtm_version = RTM_VERSION;
rtm->rtm_type = RTM_GET;
nhge = (struct nhgrp_external *)(rtm + 1);
nhge->nhg_idx = nhg_priv->nhg_idx;
nhge->nhg_refcount = nhg_priv->nhg_refcount;
/* fill in control plane nexthops firs */
nhgc = (struct nhgrp_container *)(nhge + 1);
nhgc->nhgc_type = NHG_C_TYPE_CNHOPS;
nhgc->nhgc_subtype = 0;
nhgc->nhgc_len = sizeof(struct nhgrp_container);
nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg_priv->nhg_nh_count;
nhgc->nhgc_count = nhg_priv->nhg_nh_count;
ext = (struct nhgrp_nhop_external *)(nhgc + 1);
for (int i = 0; i < nhg_priv->nhg_nh_count; i++) {
ext[i].nh_idx = nhg_priv->nhg_nh_weights[i].nh->nh_priv->nh_idx;
ext[i].nh_weight = nhg_priv->nhg_nh_weights[i].weight;
}
/* fill in dataplane nexthops */
nhgc = (struct nhgrp_container *)(&ext[nhg_priv->nhg_nh_count]);
nhgc->nhgc_type = NHG_C_TYPE_DNHOPS;
nhgc->nhgc_subtype = 0;
nhgc->nhgc_len = sizeof(struct nhgrp_container);
nhgc->nhgc_len += sizeof(struct nhgrp_nhop_external) * nhg->nhg_size;
nhgc->nhgc_count = nhg->nhg_size;
ext = (struct nhgrp_nhop_external *)(nhgc + 1);
for (int i = 0; i < nhg->nhg_size; i++) {
ext[i].nh_idx = nhg->nhops[i]->nh_priv->nh_idx;
ext[i].nh_weight = 0;
}
error = SYSCTL_OUT(w, buffer, sz);
return (error);
}
int
nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w)
{
struct nh_control *ctl = rh->nh_control;
struct epoch_tracker et;
struct nhgrp_priv *nhg_priv;
char *buffer;
size_t sz;
int error = 0;
if (ctl->gr_head.items_count == 0)
return (0);
/* Calculate the maximum nhop group size in bytes */
sz = sizeof(struct rt_msghdr) + sizeof(struct nhgrp_external);
sz += 2 * sizeof(struct nhgrp_container);
sz += 2 * sizeof(struct nhgrp_nhop_external) * RIB_MAX_MPATH_WIDTH;
buffer = malloc(sz, M_TEMP, M_WAITOK);
NET_EPOCH_ENTER(et);
NHOPS_RLOCK(ctl);
CHT_SLIST_FOREACH(&ctl->gr_head, mpath, nhg_priv) {
error = dump_nhgrp_entry(rh, nhg_priv, buffer, sz, w);
if (error != 0)
break;
} CHT_SLIST_FOREACH_END;
NHOPS_RUNLOCK(ctl);
NET_EPOCH_EXIT(et);
free(buffer, M_TEMP);
return (error);
}

72
sys/net/route/nhgrp_var.h Normal file
View File

@ -0,0 +1,72 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* This header file contains private definitions for the nexthop groups.
*
* Header is not intended to be included by the code external to the
* routing subsystem.
*/
#ifndef _NET_ROUTE_NHGRP_VAR_H_
#define _NET_ROUTE_NHGRP_VAR_H_
/* nhgrp hash definition */
/* produce hash value for an object */
#define mpath_hash_obj(_obj) (hash_nhgrp(_obj))
/* compare two objects */
#define mpath_cmp(_one, _two) (cmp_nhgrp(_one, _two))
/* next object accessor */
#define mpath_next(_obj) (_obj)->nhg_priv_next
struct nhgrp_priv {
uint32_t nhg_idx;
uint8_t nhg_nh_count; /* number of items in nh_weights */
uint8_t nhg_spare[3];
u_int nhg_refcount; /* use refcount */
u_int nhg_linked; /* refcount(9), == 2 if linked to the list */
struct nh_control *nh_control; /* parent control structure */
struct nhgrp_priv *nhg_priv_next;
struct nhgrp_object *nhg;
struct epoch_context nhg_epoch_ctx; /* epoch data for nhop */
struct weightened_nhop nhg_nh_weights[0];
};
#define _NHGRP_PRIV(_src) (&(_src)->nhops[(_src)->nhg_size])
#define NHGRP_PRIV(_src) ((struct nhgrp_priv *)_NHGRP_PRIV(_src))
#define NHGRP_PRIV_CONST(_src) ((const struct nhgrp_priv *)_NHGRP_PRIV(_src))
/* nhgrp.c */
bool nhgrp_ctl_alloc_default(struct nh_control *ctl, int malloc_flags);
struct nhgrp_priv *find_nhgrp(struct nh_control *ctl, const struct nhgrp_priv *key);
int link_nhgrp(struct nh_control *ctl, struct nhgrp_priv *grp_priv);
struct nhgrp_priv *unlink_nhgrp(struct nh_control *ctl, struct nhgrp_priv *key);
#endif

View File

@ -64,7 +64,7 @@ __FBSDID("$FreeBSD$");
* is backed by the bitmask array.
*/
static MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
MALLOC_DEFINE(M_NHOP, "nhops", "nexthops data");
/* Hash management functions */
@ -112,6 +112,9 @@ destroy_ctl(struct nh_control *ctl)
NHOPS_LOCK_DESTROY(ctl);
free(ctl->nh_head.ptr, M_NHOP);
free(ctl->nh_idx_head.idx, M_NHOP);
#ifdef ROUTE_MPATH
nhgrp_ctl_free(ctl);
#endif
free(ctl, M_NHOP);
}
@ -154,6 +157,9 @@ nhops_destroy_rib(struct rib_head *rh)
DPRINTF("Marking nhop %u unlinked", nh_priv->nh_idx);
refcount_release(&nh_priv->nh_linked);
} CHT_SLIST_FOREACH_END;
#ifdef ROUTE_MPATH
nhgrp_ctl_unlink_all(ctl);
#endif
NHOPS_WUNLOCK(ctl);
/*

View File

@ -155,7 +155,7 @@ struct nhop_object {
*/
#define NH_IS_VALID(_nh) RT_LINK_IS_UP((_nh)->nh_ifp)
#define NH_IS_MULTIPATH(_nh) ((_nh)->nh_flags & NHF_MULTIPATH)
#define NH_IS_NHGRP(_nh) ((_nh)->nh_flags & NHF_MULTIPATH)
#define RT_GATEWAY(_rt) ((struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
#define RT_GATEWAY_CONST(_rt) ((const struct sockaddr *)&(_rt)->rt_nhop->gw4_sa)
@ -166,6 +166,11 @@ struct nhop_object {
_nh = NULL; \
} while (0)
struct weightened_nhop {
struct nhop_object *nh;
uint32_t weight;
};
void nhop_free(struct nhop_object *nh);
struct sysctl_req;
@ -209,16 +214,34 @@ struct nhop_addrs {
uint16_t src_sa_off; /* offset of src address SA */
};
struct mpath_nhop_external {
#define NHG_C_TYPE_CNHOPS 0x1 /* Control plane nhops list */
#define NHG_C_TYPE_DNHOPS 0x2 /* Dataplane nhops list */
struct nhgrp_container {
uint32_t nhgc_len; /* container length */
uint16_t nhgc_count; /* number of items */
uint8_t nhgc_type; /* container type */
uint8_t nhgc_subtype; /* container subtype */
};
struct nhgrp_nhop_external {
uint32_t nh_idx;
uint32_t nh_weight;
};
struct mpath_external {
uint32_t mp_idx;
uint32_t mp_refcount;
uint32_t mp_nh_count;
uint32_t mp_group_size;
/*
* Layout:
* - nhgrp_external
* - nhgrp_container (control plane nhops list)
* - nhgrp_nhop_external
* - nhgrp_nhop_external
* ..
* - nhgrp_container (dataplane nhops list)
* - nhgrp_nhop_external
* - nhgrp_nhop_external
*/
struct nhgrp_external {
uint32_t nhg_idx; /* Nexthop group index */
uint32_t nhg_refcount; /* number of references */
};
#endif

View File

@ -695,7 +695,14 @@ void
nhop_free_any(struct nhop_object *nh)
{
#ifdef ROUTE_MPATH
if (!NH_IS_NHGRP(nh))
nhop_free(nh);
else
nhgrp_free((struct nhgrp_object *)nh);
#else
nhop_free(nh);
#endif
}
/* Helper functions */

View File

@ -37,6 +37,8 @@
#ifndef _NET_ROUTE_NHOP_VAR_H_
#define _NET_ROUTE_NHOP_VAR_H_
MALLOC_DECLARE(M_NHOP);
/* define nhop hash table */
struct nhop_priv;
CHT_SLIST_DEFINE(nhops, struct nhop_priv);
@ -47,9 +49,15 @@ CHT_SLIST_DEFINE(nhops, struct nhop_priv);
/* next object accessor */
#define nhops_next(_obj) (_obj)->nh_next
/* define multipath hash table */
struct nhgrp_priv;
CHT_SLIST_DEFINE(nhgroups, struct nhgrp_priv);
struct nh_control {
struct nhops_head nh_head; /* hash table head */
struct bitmask_head nh_idx_head; /* nhop index head */
struct nhgroups_head gr_head; /* nhgrp hash table head */
struct bitmask_head gr_idx_head; /* nhgrp index head */
struct rwlock ctl_lock; /* overall ctl lock */
struct rib_head *ctl_rh; /* pointer back to rnh */
struct epoch_context ctl_epoch_ctx; /* epoch ctl helper */
@ -80,7 +88,8 @@ struct nhop_priv {
struct epoch_context nh_epoch_ctx; /* epoch data for nhop */
};
#define NH_IS_PINNED(_nh) ((_nh)->nh_priv->rt_flags & RTF_PINNED)
#define NH_IS_PINNED(_nh) ((!NH_IS_NHGRP(_nh)) && \
((_nh)->nh_priv->rt_flags & RTF_PINNED))
/* nhop.c */
struct nhop_priv *find_nhop(struct nh_control *ctl,

View File

@ -29,7 +29,7 @@
__FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_mpath.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -83,9 +83,6 @@ static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc);
static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc);
static int rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc);
@ -94,6 +91,20 @@ static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
struct rib_cmd_info *rc);
static void destroy_subscription_epoch(epoch_context_t ctx);
static bool rib_can_multipath(struct rib_head *rh);
/* Per-vnet multipath routing configuration */
SYSCTL_DECL(_net_route);
#define V_rib_route_multipath VNET(rib_route_multipath)
#ifdef ROUTE_MPATH
#define _MP_FLAGS CTLFLAG_RW
#else
#define _MP_FLAGS CTLFLAG_RD
#endif
VNET_DEFINE(u_int, rib_route_multipath) = 0;
SYSCTL_UINT(_net_route, OID_AUTO, multipath, _MP_FLAGS | CTLFLAG_VNET,
&VNET_NAME(rib_route_multipath), 0, "Enable route multipath");
#undef _MP_FLAGS
/* Routing table UMA zone */
VNET_DEFINE_STATIC(uma_zone_t, rtzone);
@ -128,7 +139,7 @@ destroy_rtentry(struct rtentry *rt)
CURVNET_SET(nhop_get_vnet(rt->rt_nhop));
/* Unreference nexthop */
nhop_free(rt->rt_nhop);
nhop_free_any(rt->rt_nhop);
uma_zfree(V_rtzone, rt);
@ -175,6 +186,41 @@ get_rnh(uint32_t fibnum, const struct rt_addrinfo *info)
return (rnh);
}
#ifdef ROUTE_MPATH
static bool
rib_can_multipath(struct rib_head *rh)
{
int result;
CURVNET_SET(rh->rib_vnet);
result = !!V_rib_route_multipath;
CURVNET_RESTORE();
return (result);
}
/*
* Check is nhop is multipath-eligible.
* Avoid nhops without gateways and redirects.
*
* Returns 1 for multipath-eligible nexthop,
* 0 otherwise.
*/
bool
nhop_can_multipath(const struct nhop_object *nh)
{
if ((nh->nh_flags & NHF_MULTIPATH) != 0)
return (1);
if ((nh->nh_flags & NHF_GATEWAY) == 0)
return (0);
if ((nh->nh_flags & NHF_REDIRECT) != 0)
return (0);
return (1);
}
#endif
static int
get_info_weight(const struct rt_addrinfo *info, uint32_t default_weight)
{
@ -206,7 +252,7 @@ rt_set_expire_info(struct rtentry *rt, const struct rt_addrinfo *info)
*
* Returns true if matches, false otherwise.
*/
static bool
bool
match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw)
{
@ -461,7 +507,7 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc)
{
struct nhop_object *nh_orig;
struct route_nhop_data rnd;
struct route_nhop_data rnd_orig, rnd_add;
struct nhop_object *nh;
struct rtentry *rt, *rt_orig;
int error;
@ -470,32 +516,19 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
if (error != 0)
return (error);
rnd.rnd_nhop = rt->rt_nhop;
rnd.rnd_weight = rt->rt_weight;
rnd_add.rnd_nhop = rt->rt_nhop;
rnd_add.rnd_weight = rt->rt_weight;
nh = rt->rt_nhop;
RIB_WLOCK(rnh);
#ifdef RADIX_MPATH
struct sockaddr *netmask;
netmask = info->rti_info[RTAX_NETMASK];
/* do not permit exactly the same dst/mask/gw pair */
if (rt_mpath_capable(rnh) &&
rt_mpath_conflict(rnh, rt, netmask)) {
RIB_WUNLOCK(rnh);
nhop_free(nh);
uma_zfree(V_rtzone, rt);
return (EEXIST);
}
#endif
error = add_route_nhop(rnh, rt, info, &rnd, rc);
error = add_route_nhop(rnh, rt, info, &rnd_add, rc);
if (error == 0) {
RIB_WUNLOCK(rnh);
return (0);
}
/* addition failed. Lookup prefix in the rib to determine the cause */
rt_orig = lookup_prefix(rnh, info, &rnd);
rt_orig = lookup_prefix(rnh, info, &rnd_orig);
if (rt_orig == NULL) {
/* No prefix -> rnh_addaddr() failed to allocate memory */
RIB_WUNLOCK(rnh);
@ -505,11 +538,11 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
}
/* We have existing route in the RIB. */
nh_orig = rnd.rnd_nhop;
nh_orig = rnd_orig.rnd_nhop;
/* Check if new route has higher preference */
if (can_override_nhop(info, nh_orig) > 0) {
/* Update nexthop to the new route */
change_route_nhop(rnh, rt_orig, info, &rnd, rc);
change_route_nhop(rnh, rt_orig, info, &rnd_add, rc);
RIB_WUNLOCK(rnh);
uma_zfree(V_rtzone, rt);
nhop_free(nh_orig);
@ -518,10 +551,25 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
RIB_WUNLOCK(rnh);
#ifdef ROUTE_MPATH
if (rib_can_multipath(rnh) && nhop_can_multipath(rnd_add.rnd_nhop) &&
nhop_can_multipath(rnd_orig.rnd_nhop))
error = add_route_mpath(rnh, info, rt, &rnd_add, &rnd_orig, rc);
else
#endif
/* Unable to add - another route with the same preference exists */
error = EEXIST;
/*
* ROUTE_MPATH disabled: failed to add route, free both nhop and rt.
* ROUTE_MPATH enabled: original nhop reference is unused in any case,
* free rt only if not _adding_ new route to rib (e.g. the case
* when initial lookup returned existing route, but then it got
* deleted prior to multipath group insertion, leading to a simple
* non-multipath add as a result).
*/
nhop_free(nh);
if ((error != 0) || rc->rc_cmd != RTM_ADD)
uma_zfree(V_rtzone, rt);
return (error);
@ -588,7 +636,13 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info
return (ESRCH);
nh = rt->rt_nhop;
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
error = del_route_mpath(rnh, info, rt,
(struct nhgrp_object *)nh, rc);
return (error);
}
#endif
error = check_info_match_nhop(info, rt, nh);
if (error != 0)
return (error);
@ -600,14 +654,6 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, struct rib_cmd_info
* Remove the item from the tree and return it.
* Complain if it is not there and do no more processing.
*/
#ifdef RADIX_MPATH
info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
if (rt_mpath_capable(rnh)) {
rn = rt_mpath_unlink(rnh, info, rt, &error);
if (error != 0)
return (error);
} else
#endif
rn = rnh->rnh_deladdr(info->rti_info[RTAX_DST],
info->rti_info[RTAX_NETMASK], &rnh->head);
if (rn == NULL)
@ -648,7 +694,18 @@ del_route(struct rib_head *rnh, struct rt_addrinfo *info,
* If the caller wants it, then it can have it,
* the entry will be deleted after the end of the current epoch.
*/
if (rc->rc_cmd == RTM_DELETE)
rtfree(rc->rc_rt);
#ifdef ROUTE_MPATH
else {
/*
* Deleting 1 path may result in RTM_CHANGE to
* a different mpath group/nhop.
* Free old mpath group.
*/
nhop_free_any(rc->rc_nh_old);
}
#endif
return (0);
}
@ -694,19 +751,6 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
return (ESRCH);
}
#ifdef RADIX_MPATH
/*
* If we got multipath routes,
* we require users to specify a matching RTAX_GATEWAY.
*/
if (rt_mpath_capable(rnh)) {
rt = rt_mpath_matchgate(rt, info->rti_info[RTAX_GATEWAY]);
if (rt == NULL) {
RIB_RUNLOCK(rnh);
return (ESRCH);
}
}
#endif
rnd_orig.rnd_nhop = rt->rt_nhop;
rnd_orig.rnd_weight = rt->rt_weight;
@ -722,18 +766,11 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
}
static int
change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
change_nhop(struct rib_head *rnh, struct rt_addrinfo *info,
struct nhop_object *nh_orig, struct nhop_object **nh_new)
{
int error = 0;
int free_ifa = 0;
struct nhop_object *nh, *nh_orig;
struct route_nhop_data rnd_new;
nh = NULL;
nh_orig = rnd_orig->rnd_nhop;
if (nh_orig == NULL)
return (ESRCH);
int error;
/*
* New gateway could require new ifaddr, ifp;
@ -759,20 +796,97 @@ change_route(struct rib_head *rnh, struct rt_addrinfo *info,
}
}
error = nhop_create_from_nhop(rnh, nh_orig, info, &nh);
error = nhop_create_from_nhop(rnh, nh_orig, info, nh_new);
if (free_ifa) {
ifa_free(info->rti_ifa);
info->rti_ifa = NULL;
}
return (error);
}
#ifdef ROUTE_MPATH
static int
change_mpath_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
{
int error = 0;
struct nhop_object *nh, *nh_orig, *nh_new;
struct route_nhop_data rnd_new;
nh = NULL;
nh_orig = rnd_orig->rnd_nhop;
struct weightened_nhop *wn = NULL, *wn_new;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh_orig, &num_nhops);
nh_orig = NULL;
for (int i = 0; i < num_nhops; i++) {
if (check_info_match_nhop(info, NULL, wn[i].nh)) {
nh_orig = wn[i].nh;
break;
}
}
if (nh_orig == NULL)
return (ESRCH);
error = change_nhop(rnh, info, nh_orig, &nh_new);
if (error != 0)
return (error);
rnd_new.rnd_nhop = nh;
if (info->rti_mflags & RTV_WEIGHT)
rnd_new.rnd_weight = info->rti_rmx->rmx_weight;
else
rnd_new.rnd_weight = rnd_orig->rnd_weight;
wn_new = mallocarray(num_nhops, sizeof(struct weightened_nhop),
M_TEMP, M_NOWAIT | M_ZERO);
if (wn_new == NULL) {
nhop_free(nh_new);
return (EAGAIN);
}
memcpy(wn_new, wn, num_nhops * sizeof(struct weightened_nhop));
for (int i = 0; i < num_nhops; i++) {
if (wn[i].nh == nh_orig) {
wn[i].nh = nh_new;
wn[i].weight = get_info_weight(info, rnd_orig->rnd_weight);
break;
}
}
error = nhgrp_get_group(rnh, wn_new, num_nhops, &rnd_new);
nhop_free(nh_new);
free(wn_new, M_TEMP);
if (error != 0)
return (error);
error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
return (error);
}
#endif
static int
change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
{
int error = 0;
struct nhop_object *nh, *nh_orig;
struct route_nhop_data rnd_new;
nh = NULL;
nh_orig = rnd_orig->rnd_nhop;
if (nh_orig == NULL)
return (ESRCH);
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh_orig))
return (change_mpath_route(rnh, info, rnd_orig, rc));
#endif
rnd_new.rnd_weight = get_info_weight(info, rnd_orig->rnd_weight);
error = change_nhop(rnh, info, nh_orig, &rnd_new.rnd_nhop);
if (error != 0)
return (error);
error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
return (error);
@ -827,7 +941,7 @@ add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
* Conditionally set rt_expire if set in @info.
* Returns 0 on success.
*/
static int
int
change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc)
@ -855,6 +969,8 @@ change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head);
if (rn == NULL)
return (ESRCH);
rt = RNTORT(rn);
rt->rte_flags &= ~RTF_UP;
}
/* Finalize notification */
@ -989,7 +1105,6 @@ rt_checkdelroute(struct radix_node *rn, void *arg)
info->rti_info[RTAX_DST] = rt_key(rt);
info->rti_info[RTAX_NETMASK] = rt_mask(rt);
info->rti_info[RTAX_GATEWAY] = &rt->rt_nhop->gw_sa;
error = rt_unlinkrte(di->rnh, info, &di->rc);
@ -1000,7 +1115,7 @@ rt_checkdelroute(struct radix_node *rn, void *arg)
* XXX: Delayed notifications not implemented
* for nexthop updates.
*/
if (error == 0) {
if ((error == 0) && (di->rc.rc_cmd == RTM_DELETE)) {
/* Add to the list and return */
rt->rt_chain = di->head;
di->head = rt;
@ -1024,6 +1139,7 @@ rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool
struct rib_head *rnh;
struct rt_delinfo di;
struct rtentry *rt;
struct nhop_object *nh;
struct epoch_tracker et;
rnh = rt_tables_get_rnh(fibnum, family);
@ -1049,18 +1165,31 @@ rib_walk_del(u_int fibnum, int family, rt_filter_f_t *filter_f, void *arg, bool
rt = di.head;
di.head = rt->rt_chain;
rt->rt_chain = NULL;
nh = rt->rt_nhop;
di.rc.rc_rt = rt;
di.rc.rc_nh_old = rt->rt_nhop;
di.rc.rc_nh_old = nh;
rib_notify(rnh, RIB_NOTIFY_DELAYED, &di.rc);
/* TODO std rt -> rt_addrinfo export */
di.info.rti_info[RTAX_DST] = rt_key(rt);
di.info.rti_info[RTAX_NETMASK] = rt_mask(rt);
if (report)
rt_routemsg(RTM_DELETE, rt, rt->rt_nhop->nh_ifp, 0,
fibnum);
if (report) {
#ifdef ROUTE_MPATH
struct nhgrp_object *nhg;
struct weightened_nhop *wn;
uint32_t num_nhops;
if (NH_IS_NHGRP(nh)) {
nhg = (struct nhgrp_object *)nh;
wn = nhgrp_get_nhops(nhg, &num_nhops);
for (int i = 0; i < num_nhops; i++)
rt_routemsg(RTM_DELETE, rt,
wn[i].nh->nh_ifp, 0, fibnum);
} else
#endif
rt_routemsg(RTM_DELETE, rt, nh->nh_ifp, 0, fibnum);
}
rtfree(rt);
}

View File

@ -53,6 +53,10 @@ int rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
int rib_action(uint32_t fibnum, int action, struct rt_addrinfo *info,
struct rib_cmd_info *rc);
typedef void route_notification_t(struct rib_cmd_info *rc, void *);
void rib_decompose_notification(struct rib_cmd_info *rc,
route_notification_t *cb, void *cbdata);
int rib_add_redirect(u_int fibnum, struct sockaddr *dst,
struct sockaddr *gateway, struct sockaddr *author, struct ifnet *ifp,
int flags, int expire_sec);
@ -66,6 +70,20 @@ typedef void rt_setwarg_t(struct rib_head *, uint32_t, int, void *);
void rt_foreach_fib_walk(int af, rt_setwarg_t *, rt_walktree_f_t *, void *);
void rt_foreach_fib_walk_del(int af, rt_filter_f_t *filter_f, void *arg);
struct route_nhop_data;
const struct rtentry *rib_lookup_prefix(uint32_t fibnum, int family,
const struct sockaddr *dst, const struct sockaddr *netmask,
struct route_nhop_data *rnd);
const struct rtentry *rib_lookup_lpm(uint32_t fibnum, int family,
const struct sockaddr *dst, struct route_nhop_data *rnd);
/* Multipath */
struct nhgrp_object;
struct weightened_nhop;
struct weightened_nhop *nhgrp_get_nhops(struct nhgrp_object *nhg,
uint32_t *pnum_nhops);
enum rib_subscription_type {
RIB_NOTIFY_IMMEDIATE,
RIB_NOTIFY_DELAYED

View File

@ -131,3 +131,167 @@ rib_lookup(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags,
return (nh);
}
#ifdef ROUTE_MPATH
static void
decompose_change_notification(struct rib_cmd_info *rc, route_notification_t *cb,
void *cbdata)
{
uint32_t num_old, num_new;
uint32_t nh_idx_old, nh_idx_new;
struct weightened_nhop *wn_old, *wn_new;
struct weightened_nhop tmp = { NULL, 0 };
uint32_t idx_old = 0, idx_new = 0;
struct rib_cmd_info rc_del = { .rc_cmd = RTM_DELETE, .rc_rt = rc->rc_rt };
struct rib_cmd_info rc_add = { .rc_cmd = RTM_ADD, .rc_rt = rc->rc_rt };
if (NH_IS_NHGRP(rc->rc_nh_old)) {
wn_old = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_old);
} else {
tmp.nh = rc->rc_nh_old;
tmp.weight = rc->rc_nh_weight;
wn_old = &tmp;
num_old = 1;
}
if (NH_IS_NHGRP(rc->rc_nh_new)) {
wn_new = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_new);
} else {
tmp.nh = rc->rc_nh_new;
tmp.weight = rc->rc_nh_weight;
wn_new = &tmp;
num_new = 1;
}
/* Use the fact that each @wn array is sorted */
/*
* Want to convert into set of add and delete operations
* [1] -> [1, 2] = A{2}
* [2] -> [1, 2] = A{1}
* [1, 2, 4]->[1, 3, 4] = A{2}, D{3}
* [1, 2, 4]->[1, 4] = D{2}
* [1, 2, 4] -> [3, 4] = D{1}, C{2,3} OR C{1,3}, D{2} OR D{1},D{2},A{3}
* [1, 2] -> [3, 4] =
*
*/
idx_old = 0;
while ((idx_old < num_old) && (idx_new < num_new)) {
nh_idx_old = wn_old[idx_old].nh->nh_priv->nh_idx;
nh_idx_new = wn_new[idx_new].nh->nh_priv->nh_idx;
if (nh_idx_old == nh_idx_new) {
if (wn_old[idx_old].weight != wn_new[idx_new].weight) {
/* Update weight by providing del/add notifications */
rc_del.rc_nh_old = wn_old[idx_old].nh;
rc_del.rc_nh_weight = wn_old[idx_old].weight;
cb(&rc_del, cbdata);
rc_add.rc_nh_new = wn_new[idx_new].nh;
rc_add.rc_nh_weight = wn_new[idx_new].weight;
cb(&rc_add, cbdata);
}
idx_old++;
idx_new++;
} else if (nh_idx_old < nh_idx_new) {
/*
* [1, ~2~, 4], [1, ~3~, 4]
* [1, ~2~, 5], [1, ~3~, 4]
* [1, ~2~], [1, ~3~, 4]
*/
if ((idx_old + 1 >= num_old) ||
(wn_old[idx_old + 1].nh->nh_priv->nh_idx > nh_idx_new)) {
/* Add new unless the next old item is still <= new */
rc_add.rc_nh_new = wn_new[idx_new].nh;
rc_add.rc_nh_weight = wn_new[idx_new].weight;
cb(&rc_add, cbdata);
idx_new++;
}
/* In any case, delete current old */
rc_del.rc_nh_old = wn_old[idx_old].nh;
rc_del.rc_nh_weight = wn_old[idx_old].weight;
cb(&rc_del, cbdata);
idx_old++;
} else {
/*
* nh_idx_old > nh_idx_new
*
* [1, ~3~, 4], [1, ~2~, 4]
* [1, ~3~, 5], [1, ~2~, 4]
* [1, ~3~, 4], [1, ~2~]
*/
if ((idx_new + 1 >= num_new) ||
(wn_new[idx_new + 1].nh->nh_priv->nh_idx > nh_idx_old)) {
/* No next item or next item is > current one */
rc_add.rc_nh_new = wn_new[idx_new].nh;
rc_add.rc_nh_weight = wn_new[idx_new].weight;
cb(&rc_add, cbdata);
idx_new++;
}
/* In any case, delete current old */
rc_del.rc_nh_old = wn_old[idx_old].nh;
rc_del.rc_nh_weight = wn_old[idx_old].weight;
cb(&rc_del, cbdata);
idx_old++;
}
}
while (idx_old < num_old) {
rc_del.rc_nh_old = wn_old[idx_old].nh;
rc_del.rc_nh_weight = wn_old[idx_old].weight;
cb(&rc_del, cbdata);
idx_old++;
}
while (idx_new < num_new) {
rc_add.rc_nh_new = wn_new[idx_new].nh;
rc_add.rc_nh_weight = wn_new[idx_new].weight;
cb(&rc_add, cbdata);
idx_new++;
}
}
/*
* Decompose multipath cmd info @rc into a list of add/del/change
* single-path operations, calling @cb callback for each operation.
* Assumes at least one of the nexthops in @rc is multipath.
*/
void
rib_decompose_notification(struct rib_cmd_info *rc, route_notification_t *cb,
void *cbdata)
{
struct weightened_nhop *wn;
uint32_t num_nhops;
struct rib_cmd_info rc_new;
rc_new = *rc;
DPRINTF("cb=%p cmd=%d nh_old=%p nh_new=%p",
cb, rc->cmd, rc->nh_old, rc->nh_new);
switch (rc->rc_cmd) {
case RTM_ADD:
if (!NH_IS_NHGRP(rc->rc_nh_new))
return;
wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_new, &num_nhops);
for (uint32_t i = 0; i < num_nhops; i++) {
rc_new.rc_nh_new = wn[i].nh;
rc_new.rc_nh_weight = wn[i].weight;
cb(&rc_new, cbdata);
}
break;
case RTM_DELETE:
if (!NH_IS_NHGRP(rc->rc_nh_old))
return;
wn = nhgrp_get_nhops((struct nhgrp_object *)rc->rc_nh_old, &num_nhops);
for (uint32_t i = 0; i < num_nhops; i++) {
rc_new.rc_nh_old = wn[i].nh;
rc_new.rc_nh_weight = wn[i].weight;
cb(&rc_new, cbdata);
}
break;
case RTM_CHANGE:
if (!NH_IS_NHGRP(rc->rc_nh_old) && !NH_IS_NHGRP(rc->rc_nh_new))
return;
decompose_change_notification(rc, cb, cbdata);
break;
}
}
#endif

View File

@ -87,6 +87,7 @@ struct rib_head {
/* Constants */
#define RIB_MAX_RETRIES 3
#define RT_MAXFIBS UINT16_MAX
#define RIB_MAX_MPATH_WIDTH 64
/* Macro for verifying fields in af-specific 'struct route' structures */
#define CHK_STRUCT_FIELD_GENERIC(_s1, _f1, _s2, _f2) \
@ -113,12 +114,7 @@ _Static_assert(__offsetof(struct route, ro_dst) == __offsetof(_ro_new, _dst_new)
"ro_dst and " #_dst_new " are at different offset")
struct rib_head *rt_tables_get_rnh(uint32_t table, sa_family_t family);
void rt_mpath_init_rnh(struct rib_head *rnh);
int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum);
#ifdef RADIX_MPATH
struct radix_node *rt_mpath_unlink(struct rib_head *rnh,
struct rt_addrinfo *info, struct rtentry *rto, int *perror);
#endif
struct rib_cmd_info;
VNET_PCPUSTAT_DECLARE(struct rtstat, rtstat);
@ -202,14 +198,6 @@ struct rtentry {
/* rtentry rt flag mask */
#define RTE_RT_FLAG_MASK (RTF_UP | RTF_HOST)
/* Nexthop selection */
#define _NH2MP(_nh) ((struct nhgrp_object *)(_nh))
#define _SELECT_NHOP(_nh, _flowid) \
(_NH2MP(_nh))->nhops[(_flowid) % (_NH2MP(_nh))->mp_size]
#define _RT_SELECT_NHOP(_nh, _flowid) \
((!NH_IS_MULTIPATH(_nh)) ? (_nh) : _SELECT_NHOP(_nh, _flowid))
#define RT_SELECT_NHOP(_rt, _flowid) _RT_SELECT_NHOP((_rt)->rt_nhop, _flowid)
/* route_temporal.c */
void tmproutes_update(struct rib_head *rnh, struct rtentry *rt);
void tmproutes_init(struct rib_head *rh);
@ -217,14 +205,24 @@ void tmproutes_destroy(struct rib_head *rh);
/* route_ctl.c */
struct route_nhop_data {
union {
struct nhop_object *rnd_nhop;
struct nhgrp_object *rnd_nhgrp;
};
uint32_t rnd_weight;
};
int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc);
int change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
struct route_nhop_data *nhd_new, struct rib_cmd_info *rc);
struct rtentry *lookup_prefix(struct rib_head *rnh,
const struct rt_addrinfo *info, struct route_nhop_data *rnd);
bool nhop_can_multipath(const struct nhop_object *nh);
bool match_nhop_gw(const struct nhop_object *nh, const struct sockaddr *gw);
int check_info_match_nhop(const struct rt_addrinfo *info,
const struct rtentry *rt, const struct nhop_object *nh);
int can_override_nhop(const struct rt_addrinfo *info,
@ -256,5 +254,57 @@ int nhop_create_from_nhop(struct rib_head *rnh, const struct nhop_object *nh_ori
void nhops_update_ifmtu(struct rib_head *rh, struct ifnet *ifp, uint32_t mtu);
int nhops_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
/* MULTIPATH */
#define MPF_MULTIPATH 0x08 /* need to be consistent with NHF_MULTIPATH */
struct nhgrp_object {
uint16_t nhg_flags; /* nexthop group flags */
uint8_t nhg_size; /* dataplain group size */
uint8_t spare;
struct nhop_object *nhops[0]; /* nhops */
};
static inline struct nhop_object *
nhop_select(struct nhop_object *nh, uint32_t flowid)
{
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
struct nhgrp_object *nhg = (struct nhgrp_object *)nh;
nh = nhg->nhops[flowid % nhg->nhg_size];
}
#endif
return (nh);
}
struct weightened_nhop;
/* mpath_ctl.c */
int add_route_mpath(struct rib_head *rnh, struct rt_addrinfo *info,
struct rtentry *rt, struct route_nhop_data *rnd_add,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc);
int del_route_mpath(struct rib_head *rh, struct rt_addrinfo *info,
struct rtentry *rt, struct nhgrp_object *nhg, struct rib_cmd_info *rc);
/* nhgrp.c */
int nhgrp_ctl_init(struct nh_control *ctl);
void nhgrp_ctl_free(struct nh_control *ctl);
void nhgrp_ctl_unlink_all(struct nh_control *ctl);
/* nhgrp_ctl.c */
int nhgrp_dump_sysctl(struct rib_head *rh, struct sysctl_req *w);
int nhgrp_get_group(struct rib_head *rh, struct weightened_nhop *wn,
int num_nhops, struct route_nhop_data *rnd);
typedef bool nhgrp_filter_cb_t(const struct nhop_object *nh, void *data);
int nhgrp_get_filtered_group(struct rib_head *rh, const struct nhgrp_object *src,
nhgrp_filter_cb_t flt_func, void *flt_data, struct route_nhop_data *rnd);
int nhgrp_get_addition_group(struct rib_head *rnh,
struct route_nhop_data *rnd_orig, struct route_nhop_data *rnd_add,
struct route_nhop_data *rnd_new);
void nhgrp_free(struct nhgrp_object *nhg);
#endif

View File

@ -32,7 +32,7 @@
* $FreeBSD$
*/
#include "opt_ddb.h"
#include "opt_mpath.h"
#include "opt_route.h"
#include "opt_inet.h"
#include "opt_inet6.h"
@ -158,8 +158,7 @@ MTX_SYSINIT(rtsock, &rtsock_mtx, "rtsock route_cb lock", MTX_DEF);
#define RTSOCK_UNLOCK() mtx_unlock(&rtsock_mtx)
#define RTSOCK_LOCK_ASSERT() mtx_assert(&rtsock_mtx, MA_OWNED)
static SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
"");
SYSCTL_NODE(_net, OID_AUTO, route, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, "");
struct walkarg {
int w_tmemsize;
@ -650,6 +649,25 @@ fill_addrinfo(struct rt_msghdr *rtm, int len, u_int fibnum, struct rt_addrinfo *
return (0);
}
static struct nhop_object *
select_nhop(struct nhop_object *nh, const struct sockaddr *gw)
{
if (!NH_IS_NHGRP(nh))
return (nh);
#ifdef ROUTE_MPATH
struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
if (gw == NULL)
return (wn[0].nh);
for (int i = 0; i < num_nhops; i++) {
if (match_nhop_gw(wn[i].nh, gw))
return (wn[i].nh);
}
#endif
return (NULL);
}
/*
* Handles RTM_GET message from routing socket, returning matching rt.
*
@ -663,6 +681,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
{
RIB_RLOCK_TRACKER;
struct rib_head *rnh;
struct nhop_object *nh;
sa_family_t saf;
saf = info->rti_info[RTAX_DST]->sa_family;
@ -690,21 +709,12 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
RIB_RUNLOCK(rnh);
return (ESRCH);
}
#ifdef RADIX_MPATH
/*
* for RTM_GET, gate is optional even with multipath.
* if gate == NULL the first match is returned.
* (no need to call rt_mpath_matchgate if gate == NULL)
*/
if (rt_mpath_capable(rnh) && info->rti_info[RTAX_GATEWAY]) {
rc->rc_rt = rt_mpath_matchgate(rc->rc_rt,
info->rti_info[RTAX_GATEWAY]);
if (rc->rc_rt == NULL) {
nh = select_nhop(rc->rc_rt->rt_nhop, info->rti_info[RTAX_GATEWAY]);
if (nh == NULL) {
RIB_RUNLOCK(rnh);
return (ESRCH);
}
}
#endif
/*
* If performing proxied L2 entry insertion, and
* the actual PPP host entry is found, perform
@ -740,8 +750,13 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
RIB_RUNLOCK(rnh);
return (ESRCH);
}
nh = select_nhop(rc->rc_rt->rt_nhop, info->rti_info[RTAX_GATEWAY]);
if (nh == NULL) {
RIB_RUNLOCK(rnh);
return (ESRCH);
}
rc->rc_nh_new = rc->rc_rt->rt_nhop;
}
rc->rc_nh_new = nh;
rc->rc_nh_weight = rc->rc_rt->rt_weight;
RIB_RUNLOCK(rnh);
@ -832,6 +847,24 @@ update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
return (0);
}
static void
save_del_notification(struct rib_cmd_info *rc, void *_cbdata)
{
struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
if (rc->rc_cmd == RTM_DELETE)
*rc_new = *rc;
}
static void
save_add_notification(struct rib_cmd_info *rc, void *_cbdata)
{
struct rib_cmd_info *rc_new = (struct rib_cmd_info *)_cbdata;
if (rc->rc_cmd == RTM_ADD)
*rc_new = *rc;
}
/*ARGSUSED*/
static int
route_output(struct mbuf *m, struct socket *so, ...)
@ -918,6 +951,15 @@ route_output(struct mbuf *m, struct socket *so, ...)
if (error == 0) {
#ifdef INET6
rti_need_deembed = 1;
#endif
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(rc.rc_nh_new) ||
(rc.rc_nh_old && NH_IS_NHGRP(rc.rc_nh_old))) {
struct rib_cmd_info rc_simple = {};
rib_decompose_notification(&rc,
save_add_notification, (void *)&rc_simple);
rc = rc_simple;
}
#endif
nh = rc.rc_nh_new;
rtm->rtm_index = nh->nh_ifp->if_index;
@ -927,6 +969,15 @@ route_output(struct mbuf *m, struct socket *so, ...)
case RTM_DELETE:
error = rib_action(fibnum, RTM_DELETE, &info, &rc);
if (error == 0) {
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(rc.rc_nh_old) ||
(rc.rc_nh_new && NH_IS_NHGRP(rc.rc_nh_new))) {
struct rib_cmd_info rc_simple = {};
rib_decompose_notification(&rc,
save_del_notification, (void *)&rc_simple);
rc = rc_simple;
}
#endif
nh = rc.rc_nh_old;
goto report;
}
@ -1708,6 +1759,18 @@ sysctl_dumpentry(struct radix_node *rn, void *vw)
if (!can_export_rte(w->w_req->td->td_ucred, rt))
return (0);
nh = rt->rt_nhop;
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
for (int i = 0; i < num_nhops; i++) {
error = sysctl_dumpnhop(rt, wn[i].nh, wn[i].weight, w);
if (error != 0)
return (error);
}
} else
#endif
error = sysctl_dumpnhop(rt, nh, rt->rt_weight, w);
return (0);
@ -1748,6 +1811,7 @@ sysctl_dumpnhop(struct rtentry *rt, struct nhop_object *nh, uint32_t weight,
rtm->rtm_flags = rt->rte_flags;
rtm->rtm_flags |= nhop_get_rtflags(nh);
rt_getmetrics(rt, nh, &rtm->rtm_rmx);
rtm->rtm_rmx.rmx_weight = weight;
rtm->rtm_index = nh->nh_ifp->if_index;
rtm->rtm_addrs = info.rti_addrs;
error = SYSCTL_OUT(w->w_req, (caddr_t)rtm, size);
@ -2028,7 +2092,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
namelen--;
if (req->newptr)
return (EPERM);
if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP) {
if (name[1] == NET_RT_DUMP || name[1] == NET_RT_NHOP || name[1] == NET_RT_NHGRP) {
if (namelen == 3)
fib = req->td->td_proc->p_fibnum;
else if (namelen == 4)
@ -2096,6 +2160,7 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
}
break;
case NET_RT_NHOP:
case NET_RT_NHGRP:
/* Allow dumping one specific af/fib at a time */
if (namelen < 4) {
error = EINVAL;
@ -2113,6 +2178,12 @@ sysctl_rtsock(SYSCTL_HANDLER_ARGS)
}
if (w.w_op == NET_RT_NHOP)
error = nhops_dump_sysctl(rnh, w.w_req);
else
#ifdef ROUTE_MPATH
error = nhgrp_dump_sysctl(rnh, w.w_req);
#else
error = ENOTSUP;
#endif
break;
case NET_RT_IFLIST:
case NET_RT_IFLISTL:

View File

@ -35,8 +35,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/eventhandler.h>
#include <sys/systm.h>
@ -699,14 +697,6 @@ in_addprefix(struct in_ifaddr *target, int flags)
* interface address, we are done here.
*/
if (ia->ia_flags & IFA_ROUTE) {
#ifdef RADIX_MPATH
if (ia->ia_addr.sin_addr.s_addr ==
target->ia_addr.sin_addr.s_addr) {
IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (EEXIST);
} else
break;
#endif
if (V_nosameprefix) {
IN_IFADDR_RUNLOCK(&in_ifa_tracker);
return (EEXIST);

View File

@ -32,7 +32,6 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_route.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -48,14 +47,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/in_fib.h>
@ -80,7 +76,6 @@ fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup: bad fibnum"));
@ -99,12 +94,7 @@ fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
if (rt_mpath_next(rt) != NULL)
rt = rt_mpath_selectrte(rt, flowid);
#endif
nh = rt->rt_nhop;
nh = nhop_select((RNTORT(rn))->rt_nhop, flowid);
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)
@ -120,7 +110,7 @@ fib4_lookup(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
}
inline static int
check_urpf(const struct nhop_object *nh, uint32_t flags,
check_urpf_nhop(const struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
@ -137,21 +127,24 @@ check_urpf(const struct nhop_object *nh, uint32_t flags,
return (0);
}
#ifdef RADIX_MPATH
inline static int
check_urpf_mpath(struct rtentry *rt, uint32_t flags,
static int
check_urpf(struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
while (rt != NULL) {
if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
for (int i = 0; i < num_nhops; i++) {
if (check_urpf_nhop(wn[i].nh, flags, src_if) != 0)
return (1);
rt = rt_mpath_next(rt);
}
return (0);
}
} else
#endif
return (check_urpf_nhop(nh, flags, src_if));
}
/*
* Performs reverse path forwarding lookup.
@ -169,7 +162,6 @@ fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
int ret;
KASSERT((fibnum < rt_numfibs), ("fib4_check_urpf: bad fibnum"));
@ -186,12 +178,7 @@ fib4_check_urpf(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
ret = check_urpf_mpath(rt, flags, src_if);
#else
ret = check_urpf(rt->rt_nhop, flags, src_if);
#endif
ret = check_urpf(RNTORT(rn)->rt_nhop, flags, src_if);
RIB_RUNLOCK(rh);
return (ret);
}
@ -206,7 +193,6 @@ fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
{
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
KASSERT((fibnum < rt_numfibs), ("fib4_lookup_debugnet: bad fibnum"));
@ -225,12 +211,7 @@ fib4_lookup_debugnet(uint32_t fibnum, struct in_addr dst, uint32_t scopeid,
/* unlocked lookup */
rn = rh->rnh_matchaddr((void *)&sin4, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
if (rt_mpath_next(rt) != NULL)
rt = rt_mpath_selectrte(rt, 0);
#endif
nh = rt->rt_nhop;
nh = nhop_select((RNTORT(rn))->rt_nhop, 0);
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)

View File

@ -30,8 +30,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -127,9 +125,6 @@ in_inithead(uint32_t fibnum)
return (NULL);
rh->rnh_preadd = rib4_preadd;
#ifdef RADIX_MPATH
rt_mpath_init_rnh(rh);
#endif
return (rh);
}

View File

@ -38,7 +38,6 @@ __FBSDID("$FreeBSD$");
#include "opt_ipsec.h"
#include "opt_kern_tls.h"
#include "opt_mbuf_stress_test.h"
#include "opt_mpath.h"
#include "opt_ratelimit.h"
#include "opt_route.h"
#include "opt_rss.h"
@ -470,11 +469,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct route *ro, int flags,
* for correct operation (as it is for ARP).
*/
uint32_t flowid;
#ifdef RADIX_MPATH
flowid = ntohl(ip->ip_src.s_addr ^ ip->ip_dst.s_addr);
#else
flowid = m->m_pkthdr.flowid;
#endif
ro->ro_nh = fib4_lookup(fibnum, dst->sin_addr, 0,
NHR_REF, flowid);

View File

@ -33,7 +33,6 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_route.h"
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -49,14 +48,11 @@ __FBSDID("$FreeBSD$");
#include <net/if_var.h>
#include <net/if_dl.h>
#include <net/route.h>
#include <net/route/route_ctl.h>
#include <net/route/route_var.h>
#include <net/route/nhop.h>
#include <net/vnet.h>
#ifdef RADIX_MPATH
#include <net/radix_mpath.h>
#endif
#include <netinet/in.h>
#include <netinet/in_var.h>
#include <netinet/ip_mroute.h>
@ -88,7 +84,6 @@ fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
struct sockaddr_in6 sin6;
@ -111,12 +106,7 @@ fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
if (rt_mpath_next(rt) != NULL)
rt = rt_mpath_selectrte(rt, flowid);
#endif
nh = rt->rt_nhop;
nh = nhop_select((RNTORT(rn))->rt_nhop, flowid);
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)
@ -132,7 +122,7 @@ fib6_lookup(uint32_t fibnum, const struct in6_addr *dst6,
}
inline static int
check_urpf(const struct nhop_object *nh, uint32_t flags,
check_urpf_nhop(const struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
@ -149,21 +139,24 @@ check_urpf(const struct nhop_object *nh, uint32_t flags,
return (0);
}
#ifdef RADIX_MPATH
inline static int
check_urpf_mpath(struct rtentry *rt, uint32_t flags,
static int
check_urpf(struct nhop_object *nh, uint32_t flags,
const struct ifnet *src_if)
{
while (rt != NULL) {
if (check_urpf(rt->rt_nhop, flags, src_if) != 0)
#ifdef ROUTE_MPATH
if (NH_IS_NHGRP(nh)) {
struct weightened_nhop *wn;
uint32_t num_nhops;
wn = nhgrp_get_nhops((struct nhgrp_object *)nh, &num_nhops);
for (int i = 0; i < num_nhops; i++) {
if (check_urpf_nhop(wn[i].nh, flags, src_if) != 0)
return (1);
rt = rt_mpath_next(rt);
}
return (0);
}
} else
#endif
return (check_urpf_nhop(nh, flags, src_if));
}
/*
* Performs reverse path forwarding lookup.
@ -181,7 +174,6 @@ fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
RIB_RLOCK_TRACKER;
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct sockaddr_in6 sin6;
int ret;
@ -203,12 +195,7 @@ fib6_check_urpf(uint32_t fibnum, const struct in6_addr *dst6,
RIB_RLOCK(rh);
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
#ifdef RADIX_MPATH
ret = check_urpf_mpath(rt, flags, src_if);
#else
ret = check_urpf(rt->rt_nhop, flags, src_if);
#endif
ret = check_urpf(RNTORT(rn)->rt_nhop, flags, src_if);
RIB_RUNLOCK(rh);
return (ret);
}
@ -223,7 +210,6 @@ fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6,
{
struct rib_head *rh;
struct radix_node *rn;
struct rtentry *rt;
struct nhop_object *nh;
struct sockaddr_in6 sin6;
@ -245,8 +231,7 @@ fib6_lookup_debugnet(uint32_t fibnum, const struct in6_addr *dst6,
rn = rh->rnh_matchaddr((void *)&sin6, &rh->head);
if (rn != NULL && ((rn->rn_flags & RNF_ROOT) == 0)) {
rt = RNTORT(rn);
nh = rt->rt_nhop;
nh = nhop_select((RNTORT(rn))->rt_nhop, 0);
/* Ensure route & ifp is UP */
if (RT_LINK_IS_UP(nh->nh_ifp)) {
if (flags & NHR_REF)

View File

@ -64,8 +64,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_mpath.h"
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
@ -153,9 +151,6 @@ in6_inithead(uint32_t fibnum)
return (NULL);
rh->rnh_preadd = rib6_preadd;
#ifdef RADIX_MPATH
rt_mpath_init_rnh(rh);
#endif
rs = rib_subscribe_internal(rh, nd6_subscription_cb, NULL,
RIB_NOTIFY_IMMEDIATE, true);

View File

@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include "opt_inet.h"
#include "opt_inet6.h"
#include "opt_route.h"
#include <sys/param.h>
#include <sys/systm.h>
@ -1591,7 +1592,11 @@ void
nd6_subscription_cb(struct rib_head *rnh, struct rib_cmd_info *rc, void *arg)
{
#ifdef ROUTE_MPATH
rib_decompose_notification(rc, check_release_defrouter, NULL);
#else
check_release_defrouter(rc, NULL);
#endif
}
int

View File

@ -417,6 +417,7 @@ struct sockproto {
#define NET_RT_IFLISTL 5 /* Survey interface list, using 'l'en
* versions of msghdr structs. */
#define NET_RT_NHOP 6 /* dump routing nexthops */
#define NET_RT_NHGRP 7 /* dump routing nexthop groups */
#endif /* __BSD_VISIBLE */
/*

View File

@ -5,7 +5,7 @@
PROG= netstat
SRCS= if.c inet.c main.c mbuf.c mroute.c netisr.c nl_symbols.c route.c \
unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c common.c nhops.c \
unix.c mroute6.c ipsec.c bpf.c pfkey.c sctp.c common.c nhops.c nhgrp.c \
nl_defs.h
nl_symbols.c: nlist_symbols

View File

@ -54,5 +54,22 @@ struct ifmap_entry {
struct ifmap_entry *prepare_ifmap(size_t *ifmap_size);
struct rt_msghdr;
struct nhops_map {
uint32_t idx;
struct rt_msghdr *rtm;
};
struct nhops_dump {
void *nh_buf;
struct nhops_map *nh_map;
size_t nh_count;
};
void dump_nhops_sysctl(int fibnum, int af, struct nhops_dump *nd);
struct nhop_map;
void nhop_map_update(struct nhop_map *map, uint32_t idx, char *gw, char *ifname);
#endif

View File

@ -215,6 +215,7 @@ int mflag; /* show memory stats */
int noutputs = 0; /* how much outputs before we exit */
int numeric_addr; /* show addresses numerically */
int numeric_port; /* show ports numerically */
int Oflag; /* show nhgrp objects*/
int oflag; /* show nexthop objects*/
int Pflag; /* show TCP log ID */
static int pflag; /* show given protocol */
@ -250,7 +251,7 @@ main(int argc, char *argv[])
if (argc < 0)
exit(EXIT_FAILURE);
while ((ch = getopt(argc, argv, "46AaBbCdF:f:ghI:iLlM:mN:noPp:Qq:RrSTsuWw:xz"))
while ((ch = getopt(argc, argv, "46AaBbCdF:f:ghI:iLlM:mN:nOoPp:Qq:RrSTsuWw:xz"))
!= -1)
switch(ch) {
case '4':
@ -353,6 +354,9 @@ main(int argc, char *argv[])
case 'o':
oflag = 1;
break;
case 'O':
Oflag = 1;
break;
case 'P':
Pflag = 1;
break;
@ -509,6 +513,14 @@ main(int argc, char *argv[])
xo_finish();
exit(0);
}
if (Oflag) {
xo_open_container("statistics");
nhgrp_print(fib, af);
xo_close_container("statistics");
xo_finish();
exit(0);
}
if (gflag) {

View File

@ -163,3 +163,4 @@ void mroutepr(void);
void mrt_stats(void);
void bpf_stats(char *);
void nhops_print(int fibnum, int af);
void nhgrp_print(int fibnum, int af);

355
usr.bin/netstat/nhgrp.c Normal file
View File

@ -0,0 +1,355 @@
/*-
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2020 Alexander V. Chernikov
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/sysctl.h>
#include <net/if.h>
#include <net/if_dl.h>
#include <net/if_types.h>
#include <net/route.h>
#include <net/route/nhop.h>
#include <netinet/in.h>
#include <arpa/inet.h>
#include <libutil.h>
#include <stdio.h>
#include <stdlib.h>
#include <stdbool.h>
#include <string.h>
#include <sysexits.h>
#include <unistd.h>
#include <err.h>
#include <libxo/xo.h>
#include "netstat.h"
#include "common.h"
#define WID_GW_DEFAULT(af) (((af) == AF_INET6) ? 40 : 18)
static int wid_gw;
static int wid_if = 10;
static int wid_nhidx = 8;
static int wid_refcnt = 8;
struct nhop_entry {
char gw[64];
char ifname[IFNAMSIZ];
};
struct nhop_map {
struct nhop_entry *ptr;
size_t size;
};
static struct nhop_map global_nhop_map;
static struct ifmap_entry *ifmap;
static size_t ifmap_size;
static struct nhop_entry *
nhop_get(struct nhop_map *map, uint32_t idx)
{
if (idx >= map->size)
return (NULL);
if (*map->ptr[idx].ifname == '\0')
return (NULL);
return &map->ptr[idx];
}
static void
print_nhgroup_header(int af1 __unused)
{
xo_emit("{T:/%-*.*s}{T:/%-*.*s}{T:/%*.*s}{T:/%*.*s}{T:/%*.*s}"
"{T:/%*.*s}{T:/%*s}\n",
wid_nhidx, wid_nhidx, "GrpIdx",
wid_nhidx, wid_nhidx, "NhIdx",
wid_nhidx, wid_nhidx, "Weight",
wid_nhidx, wid_nhidx, "Slots",
wid_gw, wid_gw, "Gateway",
wid_if, wid_if, "Netif",
wid_refcnt, "Refcnt");
}
static void
print_padding(char sym, int len)
{
char buffer[56];
memset(buffer, sym, sizeof(buffer));
buffer[0] = '{';
buffer[1] = 'P';
buffer[2] = ':';
buffer[3] = ' ';
buffer[len + 3] = '}';
buffer[len + 4] = '\0';
xo_emit(buffer);
}
static void
print_nhgroup_entry_sysctl(const char *name, struct rt_msghdr *rtm,
struct nhgrp_external *nhge)
{
char buffer[128];
struct nhop_entry *ne;
struct nhgrp_nhop_external *ext_cp, *ext_dp;
struct nhgrp_container *nhg_cp, *nhg_dp;
nhg_cp = (struct nhgrp_container *)(nhge + 1);
if (nhg_cp->nhgc_type != NHG_C_TYPE_CNHOPS || nhg_cp->nhgc_subtype != 0)
return;
ext_cp = (struct nhgrp_nhop_external *)(nhg_cp + 1);
nhg_dp = (struct nhgrp_container *)((char *)nhg_cp + nhg_cp->nhgc_len);
if (nhg_dp->nhgc_type != NHG_C_TYPE_DNHOPS || nhg_dp->nhgc_subtype != 0)
return;
ext_dp = (struct nhgrp_nhop_external *)(nhg_dp + 1);
xo_open_instance(name);
snprintf(buffer, sizeof(buffer), "{[:-%d}{:nhgrp-index/%%lu}{]:} ", wid_nhidx);
xo_emit(buffer, nhge->nhg_idx);
/* nhidx */
print_padding('-', wid_nhidx);
/* weight */
print_padding('-', wid_nhidx);
/* slots */
print_padding('-', wid_nhidx);
print_padding('-', wid_gw);
print_padding('-', wid_if);
xo_emit("{t:nhg-refcnt/%*lu}", wid_refcnt, nhge->nhg_refcount);
xo_emit("\n");
xo_open_list("nhop-weights");
for (uint32_t i = 0; i < nhg_cp->nhgc_count; i++) {
/* TODO: optimize slots calculations */
uint32_t slots = 0;
for (uint32_t sidx = 0; sidx < nhg_dp->nhgc_count; sidx++) {
if (ext_dp[sidx].nh_idx == ext_cp[i].nh_idx)
slots++;
}
xo_open_instance("nhop-weight");
print_padding(' ', wid_nhidx);
// nh index
xo_emit("{t:nh-index/%*lu}", wid_nhidx, ext_cp[i].nh_idx);
xo_emit("{t:nh-weight/%*lu}", wid_nhidx, ext_cp[i].nh_weight);
xo_emit("{t:nh-slots/%*lu}", wid_nhidx, slots);
ne = nhop_get(&global_nhop_map, ext_cp[i].nh_idx);
if (ne != NULL) {
xo_emit("{t:nh-gw/%*.*s}", wid_gw, wid_gw, ne->gw);
xo_emit("{t:nh-interface/%*.*s}", wid_if, wid_if, ne->ifname);
}
xo_emit("\n");
xo_close_instance("nhop-weight");
}
xo_close_list("nhop-weights");
xo_close_instance(name);
}
static int
cmp_nhg_idx(const void *_a, const void *_b)
{
const struct nhops_map *a, *b;
a = _a;
b = _b;
if (a->idx > b->idx)
return (1);
else if (a->idx < b->idx)
return (-1);
return (0);
}
static void
dump_nhgrp_sysctl(int fibnum, int af, struct nhops_dump *nd)
{
size_t needed;
int mib[7];
char *buf, *next, *lim;
struct rt_msghdr *rtm;
struct nhgrp_external *nhg;
struct nhops_map *nhg_map;
size_t nhg_count, nhg_size;
mib[0] = CTL_NET;
mib[1] = PF_ROUTE;
mib[2] = 0;
mib[3] = af;
mib[4] = NET_RT_NHGRP;
mib[5] = 0;
mib[6] = fibnum;
if (sysctl(mib, nitems(mib), NULL, &needed, NULL, 0) < 0)
err(EX_OSERR, "sysctl: net.route.0.%d.nhgrpdump.%d estimate",
af, fibnum);
if ((buf = malloc(needed)) == NULL)
errx(2, "malloc(%lu)", (unsigned long)needed);
if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
err(1, "sysctl: net.route.0.%d.nhgrpdump.%d", af, fibnum);
lim = buf + needed;
/*
* nexhops groups are received unsorted. Collect everything first,
* and sort prior displaying.
*/
nhg_count = 0;
nhg_size = 16;
nhg_map = calloc(nhg_size, sizeof(struct nhops_map));
for (next = buf; next < lim; next += rtm->rtm_msglen) {
rtm = (struct rt_msghdr *)next;
if (rtm->rtm_version != RTM_VERSION)
continue;
if (nhg_count >= nhg_size) {
nhg_size *= 2;
nhg_map = realloc(nhg_map, nhg_size * sizeof(struct nhops_map));
}
nhg = (struct nhgrp_external *)(rtm + 1);
nhg_map[nhg_count].idx = nhg->nhg_idx;
nhg_map[nhg_count].rtm = rtm;
nhg_count++;
}
if (nhg_count > 0)
qsort(nhg_map, nhg_count, sizeof(struct nhops_map), cmp_nhg_idx);
nd->nh_buf = buf;
nd->nh_count = nhg_count;
nd->nh_map = nhg_map;
}
static void
print_nhgrp_sysctl(int fibnum, int af)
{
struct nhops_dump nd;
struct nhgrp_external *nhg;
struct rt_msghdr *rtm;
dump_nhgrp_sysctl(fibnum, af, &nd);
xo_open_container("nhgrp-table");
xo_open_list("rt-family");
if (nd.nh_count > 0) {
wid_gw = WID_GW_DEFAULT(af);
xo_open_instance("rt-family");
pr_family(af);
xo_open_list("nhgrp-entry");
print_nhgroup_header(af);
for (size_t i = 0; i < nd.nh_count; i++) {
rtm = nd.nh_map[i].rtm;
nhg = (struct nhgrp_external *)(rtm + 1);
print_nhgroup_entry_sysctl("nhgrp-entry", rtm, nhg);
}
}
xo_close_list("rt-family");
xo_close_container("nhgrp-table");
free(nd.nh_buf);
}
static void
update_global_map(struct nhop_external *nh)
{
char iface_name[128];
char gw_addr[64];
struct nhop_addrs *na;
struct sockaddr *sa_gw;
na = (struct nhop_addrs *)((char *)nh + nh->nh_len);
sa_gw = (struct sockaddr *)((char *)na + na->gw_sa_off);
memset(iface_name, 0, sizeof(iface_name));
if (nh->ifindex < (uint32_t)ifmap_size) {
strlcpy(iface_name, ifmap[nh->ifindex].ifname,
sizeof(iface_name));
if (*iface_name == '\0')
strlcpy(iface_name, "---", sizeof(iface_name));
}
if (nh->nh_flags & NHF_GATEWAY) {
const char *cp;
cp = fmt_sockaddr(sa_gw, NULL, RTF_HOST);
strlcpy(gw_addr, cp, sizeof(gw_addr));
} else
snprintf(gw_addr, sizeof(gw_addr), "%s/resolve", iface_name);
nhop_map_update(&global_nhop_map, nh->nh_idx, gw_addr, iface_name);
}
static void
prepare_nh_map(int fibnum, int af)
{
struct nhops_dump nd;
struct nhop_external *nh;
struct rt_msghdr *rtm;
dump_nhops_sysctl(fibnum, af, &nd);
for (size_t i = 0; i < nd.nh_count; i++) {
rtm = nd.nh_map[i].rtm;
nh = (struct nhop_external *)(rtm + 1);
update_global_map(nh);
}
free(nd.nh_buf);
}
void
nhgrp_print(int fibnum, int af)
{
size_t intsize;
int numfibs;
intsize = sizeof(int);
if (fibnum == -1 &&
sysctlbyname("net.my_fibnum", &fibnum, &intsize, NULL, 0) == -1)
fibnum = 0;
if (sysctlbyname("net.fibs", &numfibs, &intsize, NULL, 0) == -1)
numfibs = 1;
if (fibnum < 0 || fibnum > numfibs - 1)
errx(EX_USAGE, "%d: invalid fib", fibnum);
ifmap = prepare_ifmap(&ifmap_size);
prepare_nh_map(fibnum, af);
xo_open_container("route-nhgrp-information");
xo_emit("{T:Nexthop groups data}");
if (fibnum)
xo_emit(" ({L:fib}: {:fib/%d})", fibnum);
xo_emit("\n");
print_nhgrp_sysctl(fibnum, af);
xo_close_container("route-nhgrp-information");
}

View File

@ -118,8 +118,6 @@ struct nhop_map {
};
static struct nhop_map global_nhop_map;
static void nhop_map_update(struct nhop_map *map, uint32_t idx,
char *gw, char *ifname);
static struct nhop_entry *nhop_get(struct nhop_map *map, uint32_t idx);
@ -204,7 +202,7 @@ print_nhop_header(int af1 __unused)
}
}
static void
void
nhop_map_update(struct nhop_map *map, uint32_t idx, char *gw, char *ifname)
{
if (idx >= map->size) {
@ -322,11 +320,6 @@ print_nhop_entry_sysctl(const char *name, struct rt_msghdr *rtm, struct nhop_ext
xo_close_instance(name);
}
struct nhops_map {
uint32_t idx;
struct rt_msghdr *rtm;
};
static int
cmp_nh_idx(const void *_a, const void *_b)
{
@ -342,15 +335,14 @@ cmp_nh_idx(const void *_a, const void *_b)
return (0);
}
static void
print_nhops_sysctl(int fibnum, int af)
void
dump_nhops_sysctl(int fibnum, int af, struct nhops_dump *nd)
{
size_t needed;
int mib[7];
char *buf, *next, *lim;
struct rt_msghdr *rtm;
struct nhop_external *nh;
int fam;
struct nhops_map *nh_map;
size_t nh_count, nh_size;
@ -369,8 +361,6 @@ print_nhops_sysctl(int fibnum, int af)
if (sysctl(mib, nitems(mib), buf, &needed, NULL, 0) < 0)
err(1, "sysctl: net.route.0.%d.nhdump.%d", af, fibnum);
lim = buf + needed;
xo_open_container("nhop-table");
xo_open_list("rt-family");
/*
* nexhops are received unsorted. Collect everything first, sort and then display
@ -395,9 +385,27 @@ print_nhops_sysctl(int fibnum, int af)
nh_count++;
}
if (nh_count > 0) {
if (nh_count > 0)
qsort(nh_map, nh_count, sizeof(struct nhops_map), cmp_nh_idx);
nh = (struct nhop_external *)(nh_map[0].rtm + 1);
nd->nh_buf = buf;
nd->nh_count = nh_count;
nd->nh_map = nh_map;
}
static void
print_nhops_sysctl(int fibnum, int af)
{
struct nhops_dump nd;
struct nhop_external *nh;
int fam;
struct rt_msghdr *rtm;
dump_nhops_sysctl(fibnum, af, &nd);
xo_open_container("nhop-table");
xo_open_list("rt-family");
if (nd.nh_count > 0) {
nh = (struct nhop_external *)(nd.nh_map[0].rtm + 1);
fam = nh->nh_family;
wid_dst = WID_GW_DEFAULT(fam);
@ -415,8 +423,8 @@ print_nhops_sysctl(int fibnum, int af)
print_nhop_header(fam);
for (size_t i = 0; i < nh_count; i++) {
rtm = nh_map[i].rtm;
for (size_t i = 0; i < nd.nh_count; i++) {
rtm = nd.nh_map[i].rtm;
nh = (struct nhop_external *)(rtm + 1);
print_nhop_entry_sysctl("nh-entry", rtm, nh);
}
@ -426,7 +434,7 @@ print_nhops_sysctl(int fibnum, int af)
}
xo_close_list("rt-family");
xo_close_container("nhop-table");
free(buf);
free(nd.nh_buf);
}
static void