Remove RT_LOCK mutex from rte.

rtentry lock traditionally served 2 purposed: first was protecting refcounts,
 the second was assuring consistent field access/changes.
Since route nexthop introduction, the need for the former disappeared and
 the need for the latter reduced.
To be more precise, the following rte field are mutable:

rt_nhop (nexthop pointer, updated with RIB_WLOCK, passed in rib_cmd_info)
rte_flags (only RTF_HOST and RTF_UP, where RTF_UP gets changed at rte removal)
rt_weight (relative weight, updated with RIB_WLOCK, passed in rib_cmd_info)
rt_expire (time when rte deletion is scheduled, updated with RIB_WLOCK)
rt_chain (deletion chain pointer, updated with RIB_WLOCK)
All of them are updated under RIB_WLOCK, so the only remaining concern is the reading.

rt_nhop and rt_weight (addressed in this review) are read under rib lock and
 stored in the rib_cmd_info, so the caller has no problem with consitency.
rte_flags is currently read unlocked in rtsock reporting (however the scope
 is only RTF_UP flag, which is pretty static).
rt_expire is currently read unlocked in rtsock reporting.
rt_chain accesses are safe, as this is only used at route deletion.

rt_expire and rte_flags reads will be dealt in a separate reviews soon.

Differential Revision:	https://reviews.freebsd.org/D26162
This commit is contained in:
Alexander V. Chernikov 2020-08-24 20:23:34 +00:00
parent 17c511a422
commit 592d300e34
5 changed files with 28 additions and 57 deletions

View File

@ -531,7 +531,6 @@ static struct witness_order_list_entry order_lists[] = {
*/
{ "so_rcv", &lock_class_mtx_sleep },
{ "radix node head", &lock_class_rm },
{ "rtentry", &lock_class_mtx_sleep },
{ "ifaddr", &lock_class_mtx_sleep },
{ NULL, NULL },
/*

View File

@ -214,18 +214,19 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
/* Verify the allowed flag mask. */
KASSERT(((flags & ~(RTF_GATEWAY)) == 0),
("invalid redirect flags: %x", flags));
flags |= RTF_HOST | RTF_DYNAMIC;
/* Get the best ifa for the given interface and gateway. */
if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL)
return (ENETUNREACH);
ifa_ref(ifa);
bzero(&info, sizeof(info));
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_ifa = ifa;
info.rti_ifp = ifp;
info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC;
info.rti_flags = flags;
/* Setup route metrics to define expire time. */
bzero(&rti_rmx, sizeof(rti_rmx));
@ -242,10 +243,6 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
return (error);
}
RT_LOCK(rc.rc_rt);
flags = rc.rc_rt->rte_flags;
RT_UNLOCK(rc.rc_rt);
RTSTAT_INC(rts_dynamic);
/* Send notification of a route addition to userland. */
@ -253,7 +250,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway,
info.rti_info[RTAX_DST] = dst;
info.rti_info[RTAX_GATEWAY] = gateway;
info.rti_info[RTAX_AUTHOR] = author;
rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum);
rt_missmsg_fib(RTM_REDIRECT, &info, flags | RTF_UP, error, fibnum);
return (0);
}
@ -811,9 +808,7 @@ rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info,
*/
if (rn) {
rto = RNTORT(rn);
RT_LOCK(rto);
rto->rte_flags |= RTF_UP;
RT_UNLOCK(rto);
} else if (rt->rte_flags & RTF_GATEWAY) {
/*
* For gateway routes, we need to

View File

@ -149,9 +149,6 @@ rtfree(struct rtentry *rt)
KASSERT(rt != NULL, ("%s: NULL rt", __func__));
RT_LOCK_ASSERT(rt);
RT_UNLOCK(rt);
epoch_call(net_epoch_preempt, destroy_rtentry_epoch,
&rt->rt_epoch_ctx);
}
@ -250,7 +247,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
nhop_free(nh);
return (ENOBUFS);
}
RT_LOCK_INIT(rt);
rt->rte_flags = RTF_UP | flags;
rt->rt_nhop = nh;
@ -283,7 +279,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
rt_old = NULL;
RIB_WLOCK(rnh);
RT_LOCK(rt);
#ifdef RADIX_MPATH
/* do not permit exactly the same dst/mask/gw pair */
if (rt_mpath_capable(rnh) &&
@ -291,7 +286,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
RIB_WUNLOCK(rnh);
nhop_free(nh);
RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
return (EEXIST);
}
@ -307,8 +301,9 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
/* Finalize notification */
rnh->rnh_gen++;
rc->rc_rt = RNTORT(rn);
rc->rc_rt = rt;
rc->rc_nh_new = nh;
rc->rc_nh_weight = rt->rt_weight;
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
} else if ((info->rti_flags & RTF_PINNED) != 0) {
@ -333,14 +328,15 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
if (rn != NULL) {
rc->rc_cmd = RTM_CHANGE;
rc->rc_rt = RNTORT(rn);
rc->rc_rt = rt;
rc->rc_nh_old = rt_old->rt_nhop;
rc->rc_nh_new = nh;
rc->rc_nh_weight = rt->rt_weight;
} else {
rc->rc_cmd = RTM_DELETE;
rc->rc_rt = RNTORT(rn);
rc->rc_rt = rt_old;
rc->rc_nh_old = rt_old->rt_nhop;
rc->rc_nh_new = nh;
rc->rc_nh_weight = rt_old->rt_weight;
}
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
}
@ -359,13 +355,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info,
*/
if (rn == NULL) {
nhop_free(nh);
RT_LOCK_DESTROY(rt);
uma_zfree(V_rtzone, rt);
return (EEXIST);
}
RT_UNLOCK(rt);
return (0);
}
@ -461,7 +454,6 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror)
panic ("rtrequest delete");
rt = RNTORT(rn);
RT_LOCK(rt);
rt->rte_flags &= ~RTF_UP;
*perror = 0;
@ -620,21 +612,19 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info,
}
/* Proceed with the update */
RT_LOCK(rt);
/* Provide notification to the protocols.*/
rt->rt_nhop = nh;
rt_setmetrics(info, rt);
/* Finalize notification */
rnh->rnh_gen++;
rc->rc_rt = rt;
rc->rc_nh_old = nh_orig;
rc->rc_nh_new = rt->rt_nhop;
rc->rc_nh_weight = rt->rt_weight;
RT_UNLOCK(rt);
/* Update generation id to reflect rtable change */
rnh->rnh_gen++;
rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc);
RIB_WUNLOCK(rnh);

View File

@ -168,23 +168,10 @@ struct rtentry {
int rte_flags; /* up/down?, host/net */
u_long rt_weight; /* absolute weight */
u_long rt_expire; /* lifetime for route, e.g. redirect */
#define rt_endzero rt_mtx
struct mtx rt_mtx; /* mutex for routing entry */
struct rtentry *rt_chain; /* pointer to next rtentry to delete */
struct epoch_context rt_epoch_ctx; /* net epoch tracker */
};
#define RT_LOCK_INIT(_rt) \
mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW)
#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx)
#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx)
#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx)
#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED)
#define RT_UNLOCK_COND(_rt) do { \
if (mtx_owned(&(_rt)->rt_mtx)) \
mtx_unlock(&(_rt)->rt_mtx); \
} while (0)
/*
* With the split between the routing entry and the nexthop,
* rt_flags has to be split between these 2 entries. As rtentry

View File

@ -184,9 +184,9 @@ static void rt_getmetrics(const struct rtentry *rt,
static void rt_dispatch(struct mbuf *, sa_family_t);
static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
struct rt_msghdr *rtm, struct rib_cmd_info *rc);
static int update_rtm_from_rte(struct rt_addrinfo *info,
static int update_rtm_from_rc(struct rt_addrinfo *info,
struct rt_msghdr **prtm, int alloc_len,
struct rtentry *rt, struct nhop_object *nh);
struct rib_cmd_info *rc, struct nhop_object *nh);
static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm,
struct mbuf *m, sa_family_t saf, u_int fibnum,
int rtm_errno);
@ -747,7 +747,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
}
/*
* Update sockaddrs, flags, etc in @prtm based on @rt data.
* Update sockaddrs, flags, etc in @prtm based on @rc data.
* rtm can be reallocated.
*
* Returns 0 on success, along with pointer to (potentially reallocated)
@ -755,8 +755,8 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum,
*
*/
static int
update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
int alloc_len, struct rtentry *rt, struct nhop_object *nh)
update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm,
int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh)
{
struct sockaddr_storage netmask_ss;
struct walkarg w;
@ -767,10 +767,10 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
rtm = *prtm;
info->rti_info[RTAX_DST] = rt_key(rt);
info->rti_info[RTAX_DST] = rt_key(rc->rc_rt);
info->rti_info[RTAX_GATEWAY] = &nh->gw_sa;
info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt),
rt_mask(rt), &netmask_ss);
info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rc->rc_rt),
rt_mask(rc->rc_rt), &netmask_ss);
info->rti_info[RTAX_GENMASK] = 0;
ifp = nh->nh_ifp;
if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) {
@ -815,12 +815,12 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm,
w.w_tmemsize = alloc_len;
rtsock_msg_buffer(rtm->rtm_type, info, &w, &len);
if (rt->rte_flags & RTF_GWFLAG_COMPAT)
rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh);
if (rtm->rtm_flags & RTF_GWFLAG_COMPAT)
rtm->rtm_flags = RTF_GATEWAY |
(rt->rte_flags & ~RTF_GWFLAG_COMPAT);
else
rtm->rtm_flags = rt->rte_flags;
rt_getmetrics(rt, nh, &rtm->rtm_rmx);
(rtm->rtm_flags & ~RTF_GWFLAG_COMPAT);
rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx);
rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight;
rtm->rtm_addrs = info->rti_addrs;
if (orig_rtm != NULL)
@ -918,8 +918,8 @@ route_output(struct mbuf *m, struct socket *so, ...)
#ifdef INET6
rti_need_deembed = 1;
#endif
rtm->rtm_index = rc.rc_nh_new->nh_ifp->if_index;
nh = rc.rc_nh_new;
rtm->rtm_index = nh->nh_ifp->if_index;
}
break;
@ -946,7 +946,7 @@ route_output(struct mbuf *m, struct socket *so, ...)
senderr(ESRCH);
}
error = update_rtm_from_rte(&info, &rtm, alloc_len, rc.rc_rt, nh);
error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh);
/*
* Note that some sockaddr pointers may have changed to
* point to memory outsize @rtm. Some may be pointing