Make net.fibs growable.

Allow to dynamically grow the amount of fibs in each vnet.

This change alters current behavior. Currently, if one defines
 ROUTETABLES > 1 in the kernel config, each vnet will be created
 with the number of fibs defined in the kernel config.
 After this commit vnets will be created with fibs=1.

Dynamic net.fibs is not compatible with net.add_addr_allfibs.
 The plan is to deprecate the latter and make
 net.add_addr_allfibs=0 default behaviour.

Reviewed by:	glebius
Relnotes:	yes
Differential Revision:	https://reviews.freebsd.org/D26062
This commit is contained in:
Alexander V. Chernikov 2020-08-21 21:34:52 +00:00
parent 19337211f8
commit f5247a232a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=364465
5 changed files with 334 additions and 181 deletions

View File

@ -4102,6 +4102,7 @@ net/route/nhop_utils.c standard
net/route/route_ctl.c standard
net/route/route_ddb.c optional ddb
net/route/route_helpers.c standard
net/route/route_tables.c standard
net/route/route_temporal.c standard
net/rss_config.c optional inet rss | inet6 rss
net/rtsock.c standard

View File

@ -74,29 +74,6 @@
#include <netinet/in.h>
#include <netinet/ip_mroute.h>
#include <vm/uma.h>
#define RT_MAXFIBS UINT16_MAX
/* Kernel config default option. */
#ifdef ROUTETABLES
#if ROUTETABLES <= 0
#error "ROUTETABLES defined too low"
#endif
#if ROUTETABLES > RT_MAXFIBS
#error "ROUTETABLES defined too big"
#endif
#define RT_NUMFIBS ROUTETABLES
#endif /* ROUTETABLES */
/* Initialize to default if not otherwise set. */
#ifndef RT_NUMFIBS
#define RT_NUMFIBS 1
#endif
/* This is read-only.. */
u_int rt_numfibs = RT_NUMFIBS;
SYSCTL_UINT(_net, OID_AUTO, fibs, CTLFLAG_RDTUN, &rt_numfibs, 0, "");
/*
* By default add routes to all fibs for new interfaces.
* Once this is set to 0 then only allocate routes on interface
@ -118,10 +95,6 @@ VNET_PCPUSTAT_SYSINIT(rtstat);
VNET_PCPUSTAT_SYSUNINIT(rtstat);
#endif
VNET_DEFINE(struct rib_head *, rt_tables);
#define V_rt_tables VNET(rt_tables)
EVENTHANDLER_LIST_DEFINE(rt_addrmsg);
static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
@ -129,63 +102,6 @@ static int rt_ifdelroute(const struct rtentry *rt, const struct nhop_object *,
static int rt_exportinfo(struct rtentry *rt, struct rt_addrinfo *info,
int flags);
/*
* handler for net.my_fibnum
*/
static int
sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
{
int fibnum;
int error;
fibnum = curthread->td_proc->p_fibnum;
error = sysctl_handle_int(oidp, &fibnum, 0, req);
return (error);
}
SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
&sysctl_my_fibnum, "I",
"default FIB of caller");
static __inline struct rib_head **
rt_tables_get_rnh_ptr(int table, int fam)
{
struct rib_head **rnh;
KASSERT(table >= 0 && table < rt_numfibs,
("%s: table out of bounds (0 <= %d < %d)", __func__, table,
rt_numfibs));
KASSERT(fam >= 0 && fam < (AF_MAX + 1),
("%s: fam out of bounds (0 <= %d < %d)", __func__, fam, AF_MAX+1));
/* rnh is [fib=0][af=0]. */
rnh = (struct rib_head **)V_rt_tables;
/* Get the offset to the requested table and fam. */
rnh += table * (AF_MAX+1) + fam;
return (rnh);
}
struct rib_head *
rt_tables_get_rnh(int table, int fam)
{
return (*rt_tables_get_rnh_ptr(table, fam));
}
u_int
rt_tables_get_gen(int table, int fam)
{
struct rib_head *rnh;
rnh = *rt_tables_get_rnh_ptr(table, fam);
KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d fam %d",
__func__, table, fam));
return (rnh->rnh_gen);
}
/*
* route initialization must occur before ip6_init2(), which happenas at
* SI_ORDER_MIDDLE.
@ -194,89 +110,10 @@ static void
route_init(void)
{
/* whack the tunable ints into line. */
if (rt_numfibs > RT_MAXFIBS)
rt_numfibs = RT_MAXFIBS;
if (rt_numfibs == 0)
rt_numfibs = 1;
nhops_init();
}
SYSINIT(route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_THIRD, route_init, NULL);
static void
vnet_route_init(const void *unused __unused)
{
struct domain *dom;
struct rib_head **rnh;
int table;
int fam;
V_rt_tables = malloc(rt_numfibs * (AF_MAX+1) *
sizeof(struct rib_head *), M_RTABLE, M_WAITOK|M_ZERO);
vnet_rtzone_init();
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtattach == NULL)
continue;
for (table = 0; table < rt_numfibs; table++) {
fam = dom->dom_family;
if (table != 0 && fam != AF_INET6 && fam != AF_INET)
break;
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
*rnh = dom->dom_rtattach(table);
}
}
}
VNET_SYSINIT(vnet_route_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
vnet_route_init, 0);
#ifdef VIMAGE
static void
vnet_route_uninit(const void *unused __unused)
{
int table;
int fam;
struct domain *dom;
struct rib_head **rnh;
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtdetach == NULL)
continue;
for (table = 0; table < rt_numfibs; table++) {
fam = dom->dom_family;
if (table != 0 && fam != AF_INET6 && fam != AF_INET)
break;
rnh = rt_tables_get_rnh_ptr(table, fam);
if (rnh == NULL)
panic("%s: rnh NULL", __func__);
dom->dom_rtdetach(*rnh);
}
}
/*
* dom_rtdetach calls rt_table_destroy(), which
* schedules deletion for all rtentries, nexthops and control
* structures. Wait for the destruction callbacks to fire.
* Note that this should result in freeing all rtentries, but
* nexthops deletions will be scheduled for the next epoch run
* and will be completed after vnet teardown.
*/
epoch_drain_callbacks(net_epoch_preempt);
free(V_rt_tables, M_RTABLE);
vnet_rtzone_destroy();
}
VNET_SYSUNINIT(vnet_route_uninit, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
vnet_route_uninit, 0);
#endif
struct rib_head *
rt_table_init(int offset, int family, u_int fibnum)
{
@ -347,21 +184,6 @@ rt_table_destroy(struct rib_head *rh)
free(rh, M_RTABLE);
}
#ifndef _SYS_SYSPROTO_H_
struct setfib_args {
int fibnum;
};
#endif
int
sys_setfib(struct thread *td, struct setfib_args *uap)
{
if (uap->fibnum < 0 || uap->fibnum >= rt_numfibs)
return EINVAL;
td->td_proc->p_fibnum = uap->fibnum;
return (0);
}
/*
* Adds a temporal redirect entry to the routing table.
* @fibnum: fib number

View File

@ -115,7 +115,10 @@ typedef volatile u_int rt_gen_t; /* tree generation (for adds) */
#define RT_DEFAULT_FIB 0 /* Explicitly mark fib=0 restricted cases */
#define RT_ALL_FIBS -1 /* Announce event for every fib */
#ifdef _KERNEL
extern u_int rt_numfibs; /* number of usable routing tables */
VNET_DECLARE(uint32_t, _rt_numfibs); /* number of existing route tables */
#define V_rt_numfibs VNET(_rt_numfibs)
/* temporary compat arg */
#define rt_numfibs V_rt_numfibs
VNET_DECLARE(u_int, rt_add_addr_allfibs); /* Announce interfaces to all fibs */
#define V_rt_add_addr_allfibs VNET(rt_add_addr_allfibs)
#endif
@ -379,7 +382,7 @@ void rt_newmaddrmsg(int, struct ifmultiaddr *);
void rt_maskedcopy(struct sockaddr *, struct sockaddr *, struct sockaddr *);
struct rib_head *rt_table_init(int, int, u_int);
void rt_table_destroy(struct rib_head *);
u_int rt_tables_get_gen(int table, int fam);
u_int rt_tables_get_gen(uint32_t table, sa_family_t family);
int rtsock_addrmsg(int, struct ifaddr *, int);
int rtsock_routemsg(int, struct rtentry *, struct ifnet *ifp, int, int);

View File

@ -0,0 +1,326 @@
/*-
* SPDX-License-Identifier: BSD-3-Clause
*
* Copyright (c) 1980, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. Neither the name of the University nor the names of its contributors
* may be used to endorse or promote products derived from this software
* without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
/************************************************************************
* Note: In this file a 'fib' is a "forwarding information base" *
* Which is the new name for an in kernel routing (next hop) table. *
***********************************************************************/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include "opt_route.h"
#include <sys/param.h>
#include <sys/socket.h>
#include <sys/systm.h>
#include <sys/malloc.h>
#include <sys/jail.h>
#include <sys/sysctl.h>
#include <sys/syslog.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/sx.h>
#include <sys/domain.h>
#include <sys/sysproto.h>
#include <net/vnet.h>
#include <net/route.h>
#include <net/route/route_var.h>
/* Kernel config default option. */
#ifdef ROUTETABLES
#if ROUTETABLES <= 0
#error "ROUTETABLES defined too low"
#endif
#if ROUTETABLES > RT_MAXFIBS
#error "ROUTETABLES defined too big"
#endif
#define RT_NUMFIBS ROUTETABLES
#endif /* ROUTETABLES */
/* Initialize to default if not otherwise set. */
#ifndef RT_NUMFIBS
#define RT_NUMFIBS 1
#endif
static void grow_rtables(uint32_t num_fibs);
VNET_DEFINE_STATIC(struct sx, rtables_lock);
#define V_rtables_lock VNET(rtables_lock)
#define RTABLES_LOCK() sx_xlock(&V_rtables_lock)
#define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock)
#define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock")
#define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED)
VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
#define V_rt_tables VNET(rt_tables)
VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
/*
* Handler for net.my_fibnum.
* Returns current fib of the process.
*/
static int
sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
{
int fibnum;
int error;
fibnum = curthread->td_proc->p_fibnum;
error = sysctl_handle_int(oidp, &fibnum, 0, req);
return (error);
}
SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
&sysctl_my_fibnum, "I",
"default FIB of caller");
static uint32_t
normalize_num_rtables(uint32_t num_rtables)
{
if (num_rtables > RT_MAXFIBS)
num_rtables = RT_MAXFIBS;
else if (num_rtables == 0)
num_rtables = 1;
return (num_rtables);
}
/*
* Sets the number of fibs in the current vnet.
* Function does not allow shrinking number of rtables.
*/
static int
sysctl_fibs(SYSCTL_HANDLER_ARGS)
{
uint32_t new_fibs;
int error;
RTABLES_LOCK();
new_fibs = V_rt_numfibs;
error = sysctl_handle_32(oidp, &new_fibs, 0, req);
if (error == 0) {
new_fibs = normalize_num_rtables(new_fibs);
if (new_fibs < V_rt_numfibs)
error = ENOTCAPABLE;
if (new_fibs > V_rt_numfibs)
grow_rtables(new_fibs);
}
RTABLES_UNLOCK();
return (error);
}
SYSCTL_PROC(_net, OID_AUTO, fibs,
CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_MPSAFE, NULL, 0,
&sysctl_fibs, "IU",
"set number of fibs");
/*
* Sets fib of a current process.
*/
int
sys_setfib(struct thread *td, struct setfib_args *uap)
{
int error = 0;
CURVNET_SET(TD_TO_VNET(td));
if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
td->td_proc->p_fibnum = uap->fibnum;
else
error = EINVAL;
CURVNET_RESTORE();
return (error);
}
/*
* Grows up the number of routing tables in the current fib.
* Function creates new index array for all rtables and allocates
* remaining routing tables.
*/
static void
grow_rtables(uint32_t num_tables)
{
struct domain *dom;
struct rib_head **prnh;
struct rib_head **new_rt_tables, **old_rt_tables;
int family;
RTABLES_LOCK_ASSERT();
KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
num_tables, V_rt_numfibs));
new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
M_RTABLE, M_WAITOK | M_ZERO);
/*
* Current rt_tables layout:
* fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
* this allows to copy existing tables data by using memcpy()
*/
if (V_rt_tables != NULL)
memcpy(new_rt_tables, V_rt_tables,
V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
/* Populate the remainders */
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtattach == NULL)
continue;
family = dom->dom_family;
for (int i = 0; i < num_tables; i++) {
prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
if (*prnh != NULL)
continue;
*prnh = dom->dom_rtattach(i);
if (*prnh == NULL)
log(LOG_ERR, "unable to create routing tables for domain %d\n",
dom->dom_family);
}
}
/*
* Update rtables pointer.
* Ensure all writes to new_rt_tables has been completed before
* switching pointer.
*/
atomic_thread_fence_rel();
old_rt_tables = V_rt_tables;
V_rt_tables = new_rt_tables;
/* Wait till all cpus see new pointers */
atomic_thread_fence_rel();
epoch_wait_preempt(net_epoch_preempt);
/* Finally, set number of fibs to a new value */
V_rt_numfibs = num_tables;
if (old_rt_tables != NULL)
free(old_rt_tables, M_RTABLE);
}
static void
vnet_rtables_init(const void *unused __unused)
{
int num_rtables_base;
if (IS_DEFAULT_VNET(curvnet)) {
num_rtables_base = RT_NUMFIBS;
TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
V_rt_numfibs = normalize_num_rtables(num_rtables_base);
} else
V_rt_numfibs = 1;
vnet_rtzone_init();
RTABLES_LOCK_INIT();
RTABLES_LOCK();
grow_rtables(V_rt_numfibs);
RTABLES_UNLOCK();
}
VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
vnet_rtables_init, 0);
#ifdef VIMAGE
static void
rtables_destroy(const void *unused __unused)
{
struct rib_head *rnh;
struct domain *dom;
int family;
RTABLES_LOCK();
for (dom = domains; dom; dom = dom->dom_next) {
if (dom->dom_rtdetach == NULL)
continue;
family = dom->dom_family;
for (int i = 0; i < V_rt_numfibs; i++) {
rnh = rt_tables_get_rnh(i, family);
dom->dom_rtdetach(rnh);
}
}
RTABLES_UNLOCK();
/*
* dom_rtdetach calls rt_table_destroy(), which
* schedules deletion for all rtentries, nexthops and control
* structures. Wait for the destruction callbacks to fire.
* Note that this should result in freeing all rtentries, but
* nexthops deletions will be scheduled for the next epoch run
* and will be completed after vnet teardown.
*/
epoch_drain_callbacks(net_epoch_preempt);
free(V_rt_tables, M_RTABLE);
vnet_rtzone_destroy();
}
VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
rtables_destroy, 0);
#endif
static inline struct rib_head *
rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
{
struct rib_head **prnh;
KASSERT(table < V_rt_numfibs,
("%s: table out of bounds (%d < %d)", __func__, table,
V_rt_numfibs));
KASSERT(family < (AF_MAX + 1),
("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
/* rnh is [fib=0][af=0]. */
prnh = V_rt_tables;
/* Get the offset to the requested table and fam. */
prnh += table * (AF_MAX + 1) + family;
return (*prnh);
}
struct rib_head *
rt_tables_get_rnh(uint32_t table, sa_family_t family)
{
return (rt_tables_get_rnh_ptr(table, family));
}
u_int
rt_tables_get_gen(uint32_t table, sa_family_t family)
{
struct rib_head *rnh;
rnh = rt_tables_get_rnh_ptr(table, family);
KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
__func__, table, family));
return (rnh->rnh_gen);
}

View File

@ -79,6 +79,7 @@ struct rib_head {
/* Constants */
#define RIB_MAX_RETRIES 3
#define RT_MAXFIBS UINT16_MAX
/* Macro for verifying fields in af-specific 'struct route' structures */
#define CHK_STRUCT_FIELD_GENERIC(_s1, _f1, _s2, _f2) \
@ -104,7 +105,7 @@ CHK_STRUCT_ROUTE_FIELDS(_ro_new); \
_Static_assert(__offsetof(struct route, ro_dst) == __offsetof(_ro_new, _dst_new),\
"ro_dst and " #_dst_new " are at different offset")
struct rib_head *rt_tables_get_rnh(int fib, int family);
struct rib_head *rt_tables_get_rnh(uint32_t table, sa_family_t family);
void rt_mpath_init_rnh(struct rib_head *rnh);
int rt_getifa_fib(struct rt_addrinfo *info, u_int fibnum);
void rt_setmetrics(const struct rt_addrinfo *info, struct rtentry *rt);