Further split nhop creation and rtable operations.

As nexthops are immutable, some operations such as route attribute changes
 require nexthop fetching, forking, modification and route switching.
These operations are not atomic, so they may need to be retried multiple
 times in presence of multiple speakers changing the same route.

This change introduces "synchronisation" primitive: route_update_conditional(),
 simplifying logic for route changes and upcoming multipath operations.

Differential Revision:	https://reviews.freebsd.org/D26216
This commit is contained in:
Alexander V. Chernikov 2020-08-28 21:59:10 +00:00
parent 404ff76bda
commit b122304f6a
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=364940
2 changed files with 251 additions and 124 deletions

View File

@ -78,9 +78,15 @@ struct rib_subscription {
static int add_route(struct rib_head *rnh, struct rt_addrinfo *info, static int add_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc); struct rib_cmd_info *rc);
static int add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc);
static int del_route(struct rib_head *rnh, struct rt_addrinfo *info, static int del_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc); struct rib_cmd_info *rc);
static int change_route(struct rib_head *, struct rt_addrinfo *, static int change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *nhd_orig, struct rib_cmd_info *rc);
static int change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc); struct rib_cmd_info *rc);
static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type, static void rib_notify(struct rib_head *rnh, enum rib_subscription_type type,
struct rib_cmd_info *rc); struct rib_cmd_info *rc);
@ -202,14 +208,18 @@ rib_add_route(uint32_t fibnum, struct rt_addrinfo *info,
return (add_route(rnh, info, rc)); return (add_route(rnh, info, rc));
} }
/*
* Creates rtentry and nexthop based on @info data.
* Return 0 and fills in rtentry into @prt on success,
* return errno otherwise.
*/
static int static int
add_route(struct rib_head *rnh, struct rt_addrinfo *info, create_rtentry(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc) struct rtentry **prt)
{ {
struct sockaddr *dst, *ndst, *gateway, *netmask; struct sockaddr *dst, *ndst, *gateway, *netmask;
struct rtentry *rt, *rt_old; struct rtentry *rt;
struct nhop_object *nh; struct nhop_object *nh;
struct radix_node *rn;
struct ifaddr *ifa; struct ifaddr *ifa;
int error, flags; int error, flags;
@ -276,7 +286,28 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
rt->rt_weight = 1; rt->rt_weight = 1;
rt_setmetrics(info, rt); rt_setmetrics(info, rt);
rt_old = NULL;
*prt = rt;
return (0);
}
static int
add_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct rib_cmd_info *rc)
{
struct sockaddr *ndst, *netmask;
struct route_nhop_data rnd;
struct nhop_object *nh;
struct rtentry *rt;
int error;
error = create_rtentry(rnh, info, &rt);
if (error != 0)
return (error);
rnd.rnd_nhop = rt->rt_nhop;
rnd.rnd_weight = rt->rt_weight;
nh = rt->rt_nhop;
RIB_WLOCK(rnh); RIB_WLOCK(rnh);
#ifdef RADIX_MPATH #ifdef RADIX_MPATH
@ -290,76 +321,42 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
return (EEXIST); return (EEXIST);
} }
#endif #endif
error = add_route_nhop(rnh, rt, info, &rnd, rc);
if (error == 0) {
rt = NULL;
nh = NULL;
} else if ((error == EEXIST) && ((info->rti_flags & RTF_PINNED) != 0)) {
struct rtentry *rt_orig;
struct nhop_object *nh_orig;
struct radix_node *rn;
rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes); ndst = (struct sockaddr *)rt_key(rt);
netmask = info->rti_info[RTAX_NETMASK];
if (rn != NULL) { rn = rnh->rnh_lookup(ndst, netmask, &rnh->head);
/* Most common usecase */ rt_orig = (struct rtentry *)rn;
if (rt->rt_expire > 0) if (rt_orig != NULL) {
tmproutes_update(rnh, rt); nh_orig = rt_orig->rt_nhop;
if ((nhop_get_rtflags(nh_orig) & RTF_PINNED) == 0) {
/* Finalize notification */ /* Current nexhop is not PINNED, can update */
rnh->rnh_gen++; error = change_route_nhop(rnh, rt_orig,
info, &rnd, rc);
rc->rc_rt = rt; if (error == 0)
rc->rc_nh_new = nh; nh = NULL;
rc->rc_nh_weight = rt->rt_weight;
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
} else if ((info->rti_flags & RTF_PINNED) != 0) {
/*
* Force removal and re-try addition
* TODO: better multipath&pinned support
*/
struct sockaddr *info_dst = info->rti_info[RTAX_DST];
info->rti_info[RTAX_DST] = ndst;
/* Do not delete existing PINNED(interface) routes */
info->rti_flags &= ~RTF_PINNED;
rt_old = rt_unlinkrte(rnh, info, &error);
info->rti_flags |= RTF_PINNED;
info->rti_info[RTAX_DST] = info_dst;
if (rt_old != NULL) {
rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head,
rt->rt_nodes);
/* Finalize notification */
rnh->rnh_gen++;
if (rn != NULL) {
rc->rc_cmd = RTM_CHANGE;
rc->rc_rt = rt;
rc->rc_nh_old = rt_old->rt_nhop;
rc->rc_nh_new = nh;
rc->rc_nh_weight = rt->rt_weight;
} else {
rc->rc_cmd = RTM_DELETE;
rc->rc_rt = rt_old;
rc->rc_nh_old = rt_old->rt_nhop;
rc->rc_nh_weight = rt_old->rt_weight;
} }
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); } else
} error = ENOBUFS;
} }
RIB_WUNLOCK(rnh); RIB_WUNLOCK(rnh);
if ((rn != NULL) || (rt_old != NULL)) if (error == 0)
rib_notify(rnh, RIB_NOTIFY_DELAYED, rc); rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
if (rt_old != NULL) if (nh != NULL)
rtfree(rt_old);
/*
* If it still failed to go into the tree,
* then un-make it (this should be a function)
*/
if (rn == NULL) {
nhop_free(nh); nhop_free(nh);
if (rt != NULL)
uma_zfree(V_rtzone, rt); uma_zfree(V_rtzone, rt);
return (EEXIST);
}
return (0); return (error);
} }
@ -508,7 +505,11 @@ int
rib_change_route(uint32_t fibnum, struct rt_addrinfo *info, rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
struct rib_cmd_info *rc) struct rib_cmd_info *rc)
{ {
RIB_RLOCK_TRACKER;
struct route_nhop_data rnd_orig;
struct rib_head *rnh; struct rib_head *rnh;
struct rtentry *rt;
int error;
NET_EPOCH_ASSERT(); NET_EPOCH_ASSERT();
@ -519,18 +520,18 @@ rib_change_route(uint32_t fibnum, struct rt_addrinfo *info,
bzero(rc, sizeof(struct rib_cmd_info)); bzero(rc, sizeof(struct rib_cmd_info));
rc->rc_cmd = RTM_CHANGE; rc->rc_cmd = RTM_CHANGE;
return (change_route(rnh, info, rc)); /* Check if updated gateway exists */
} if ((info->rti_flags & RTF_GATEWAY) &&
(info->rti_info[RTAX_GATEWAY] == NULL))
return (EINVAL);
static int /*
change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, * route change is done in multiple steps, with dropping and
struct rib_cmd_info *rc) * reacquiring lock. In the situations with multiple processes
{ * changes the same route in can lead to the case when route
RIB_RLOCK_TRACKER; * is changed between the steps. Address it by retrying the operation
struct rtentry *rt = NULL; * multiple times before failing.
int error = 0; */
int free_ifa = 0;
struct nhop_object *nh, *nh_orig;
RIB_RLOCK(rnh); RIB_RLOCK(rnh);
rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST], rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
@ -554,12 +555,33 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
} }
} }
#endif #endif
nh_orig = rt->rt_nhop; rnd_orig.rnd_nhop = rt->rt_nhop;
rnd_orig.rnd_weight = rt->rt_weight;
RIB_RUNLOCK(rnh); RIB_RUNLOCK(rnh);
rt = NULL; for (int i = 0; i < RIB_MAX_RETRIES; i++) {
error = change_route(rnh, info, &rnd_orig, rc);
if (error != EAGAIN)
break;
}
return (error);
}
static int
change_route(struct rib_head *rnh, struct rt_addrinfo *info,
struct route_nhop_data *rnd_orig, struct rib_cmd_info *rc)
{
int error = 0;
int free_ifa = 0;
struct nhop_object *nh, *nh_orig;
struct route_nhop_data rnd_new;
nh = NULL; nh = NULL;
nh_orig = rnd_orig->rnd_nhop;
if (nh_orig == NULL)
return (ESRCH);
/* /*
* New gateway could require new ifaddr, ifp; * New gateway could require new ifaddr, ifp;
@ -593,71 +615,168 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
if (error != 0) if (error != 0)
return (error); return (error);
RIB_WLOCK(rnh); rnd_new.rnd_nhop = nh;
if (info->rti_mflags & RTV_WEIGHT)
rnd_new.rnd_weight = info->rti_rmx->rmx_weight;
else
rnd_new.rnd_weight = rnd_orig->rnd_weight;
/* Lookup rtentry once again and check if nexthop is still the same */ error = change_route_conditional(rnh, NULL, info, rnd_orig, &rnd_new, rc);
rt = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
info->rti_info[RTAX_NETMASK], &rnh->head);
if (rt == NULL) { return (error);
RIB_WUNLOCK(rnh); }
nhop_free(nh);
return (ESRCH); /*
* Insert @rt with nhop data from @rnd_new to @rnh.
* Returns 0 on success.
*/
static int
add_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc)
{
struct sockaddr *ndst, *netmask;
struct radix_node *rn;
int error = 0;
RIB_WLOCK_ASSERT(rnh);
ndst = (struct sockaddr *)rt_key(rt);
netmask = info->rti_info[RTAX_NETMASK];
rt->rt_nhop = rnd->rnd_nhop;
rt->rt_weight = rnd->rnd_weight;
rn = rnh->rnh_addaddr(ndst, netmask, &rnh->head, rt->rt_nodes);
if (rn != NULL) {
if (rt->rt_expire > 0)
tmproutes_update(rnh, rt);
/* Finalize notification */
rnh->rnh_gen++;
rc->rc_cmd = RTM_ADD;
rc->rc_rt = rt;
rc->rc_nh_old = NULL;
rc->rc_nh_new = rnd->rnd_nhop;
rc->rc_nh_weight = rnd->rnd_weight;
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
} else {
/* Existing route or memory allocation failure */
error = EEXIST;
} }
if (rt->rt_nhop != nh_orig) { return (error);
RIB_WUNLOCK(rnh); }
nhop_free(nh);
return (EAGAIN); /*
* Switch @rt nhop/weigh to the ones specified in @rnd.
* Conditionally set rt_expire if set in @info.
* Returns 0 on success.
*/
static int
change_route_nhop(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd,
struct rib_cmd_info *rc)
{
struct nhop_object *nh_orig;
RIB_WLOCK_ASSERT(rnh);
nh_orig = rt->rt_nhop;
if (rnd->rnd_nhop != NULL) {
/* Changing expiration & nexthop & weight to a new one */
rt_setmetrics(info, rt);
rt->rt_nhop = rnd->rnd_nhop;
rt->rt_weight = rnd->rnd_weight;
if (rt->rt_expire > 0)
tmproutes_update(rnh, rt);
} else {
/* Route deletion requested. */
struct sockaddr *ndst, *netmask;
struct radix_node *rn;
ndst = (struct sockaddr *)rt_key(rt);
netmask = info->rti_info[RTAX_NETMASK];
rn = rnh->rnh_deladdr(ndst, netmask, &rnh->head);
if (rn == NULL)
return (ESRCH);
} }
/* Proceed with the update */
/* Provide notification to the protocols.*/
rt->rt_nhop = nh;
rt_setmetrics(info, rt);
/* Finalize notification */ /* Finalize notification */
rnh->rnh_gen++; rnh->rnh_gen++;
rc->rc_cmd = (rnd->rnd_nhop != NULL) ? RTM_CHANGE : RTM_DELETE;
rc->rc_rt = rt; rc->rc_rt = rt;
rc->rc_nh_old = nh_orig; rc->rc_nh_old = nh_orig;
rc->rc_nh_new = rt->rt_nhop; rc->rc_nh_new = rnd->rnd_nhop;
rc->rc_nh_weight = rt->rt_weight; rc->rc_nh_weight = rnd->rnd_weight;
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
RIB_WUNLOCK(rnh);
rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
nhop_free(nh_orig);
return (0); return (0);
} }
static int /*
change_route(struct rib_head *rnh, struct rt_addrinfo *info, * Conditionally update route nhop/weight IFF data in @nhd_orig is
struct rib_cmd_info *rc) * consistent with the current route data.
* Nexthop in @nhd_new is consumed.
*/
int
change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *rnd_orig,
struct route_nhop_data *rnd_new, struct rib_cmd_info *rc)
{ {
int error; struct rtentry *rt_new;
int error = 0;
/* Check if updated gateway exists */ RIB_WLOCK(rnh);
if ((info->rti_flags & RTF_GATEWAY) &&
(info->rti_info[RTAX_GATEWAY] == NULL))
return (EINVAL);
/* rt_new = (struct rtentry *)rnh->rnh_lookup(info->rti_info[RTAX_DST],
* route change is done in multiple steps, with dropping and info->rti_info[RTAX_NETMASK], &rnh->head);
* reacquiring lock. In the situations with multiple processes
* changes the same route in can lead to the case when route if (rt_new == NULL) {
* is changed between the steps. Address it by retrying the operation if (rnd_orig->rnd_nhop == NULL)
* multiple times before failing. error = add_route_nhop(rnh, rt, info, rnd_new, rc);
*/ else {
for (int i = 0; i < RIB_MAX_RETRIES; i++) { /*
error = change_route_one(rnh, info, rc); * Prefix does not exist, which was not our assumption.
if (error != EAGAIN) * Update @rnd_orig with the new data and return
break; */
rnd_orig->rnd_nhop = NULL;
rnd_orig->rnd_weight = 0;
error = EAGAIN;
}
} else {
/* Prefix exists, try to update */
if (rnd_orig->rnd_nhop == rt_new->rt_nhop) {
/*
* Nhop/mpath group hasn't changed. Flip
* to the new precalculated one and return
*/
error = change_route_nhop(rnh, rt_new, info, rnd_new, rc);
} else {
/* Update and retry */
rnd_orig->rnd_nhop = rt_new->rt_nhop;
rnd_orig->rnd_weight = rt_new->rt_weight;
error = EAGAIN;
}
}
RIB_WUNLOCK(rnh);
if (error == 0) {
rib_notify(rnh, RIB_NOTIFY_DELAYED, rc);
if (rnd_orig->rnd_nhop != NULL)
nhop_free_any(rnd_orig->rnd_nhop);
} else {
if (rnd_new->rnd_nhop != NULL)
nhop_free_any(rnd_new->rnd_nhop);
} }
return (error); return (error);

View File

@ -226,6 +226,14 @@ void tmproutes_init(struct rib_head *rh);
void tmproutes_destroy(struct rib_head *rh); void tmproutes_destroy(struct rib_head *rh);
/* route_ctl.c */ /* route_ctl.c */
struct route_nhop_data {
struct nhop_object *rnd_nhop;
uint32_t rnd_weight;
};
int change_route_conditional(struct rib_head *rnh, struct rtentry *rt,
struct rt_addrinfo *info, struct route_nhop_data *nhd_orig,
struct route_nhop_data *nhd_new, struct rib_cmd_info *rc);
void vnet_rtzone_init(void); void vnet_rtzone_init(void);
void vnet_rtzone_destroy(void); void vnet_rtzone_destroy(void);