From 592d300e346f7c376d7be30b1627d0a2db500570 Mon Sep 17 00:00:00 2001 From: "Alexander V. Chernikov" Date: Mon, 24 Aug 2020 20:23:34 +0000 Subject: [PATCH] Remove RT_LOCK mutex from rte. rtentry lock traditionally served 2 purposed: first was protecting refcounts, the second was assuring consistent field access/changes. Since route nexthop introduction, the need for the former disappeared and the need for the latter reduced. To be more precise, the following rte field are mutable: rt_nhop (nexthop pointer, updated with RIB_WLOCK, passed in rib_cmd_info) rte_flags (only RTF_HOST and RTF_UP, where RTF_UP gets changed at rte removal) rt_weight (relative weight, updated with RIB_WLOCK, passed in rib_cmd_info) rt_expire (time when rte deletion is scheduled, updated with RIB_WLOCK) rt_chain (deletion chain pointer, updated with RIB_WLOCK) All of them are updated under RIB_WLOCK, so the only remaining concern is the reading. rt_nhop and rt_weight (addressed in this review) are read under rib lock and stored in the rib_cmd_info, so the caller has no problem with consitency. rte_flags is currently read unlocked in rtsock reporting (however the scope is only RTF_UP flag, which is pretty static). rt_expire is currently read unlocked in rtsock reporting. rt_chain accesses are safe, as this is only used at route deletion. rt_expire and rte_flags reads will be dealt in a separate reviews soon. Differential Revision: https://reviews.freebsd.org/D26162 --- sys/kern/subr_witness.c | 1 - sys/net/route.c | 13 ++++--------- sys/net/route/route_ctl.c | 28 +++++++++------------------- sys/net/route/route_var.h | 13 ------------- sys/net/rtsock.c | 30 +++++++++++++++--------------- 5 files changed, 28 insertions(+), 57 deletions(-) diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 3bf0fd47efb8..ff2fbeb8b75f 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -531,7 +531,6 @@ static struct witness_order_list_entry order_lists[] = { */ { "so_rcv", &lock_class_mtx_sleep }, { "radix node head", &lock_class_rm }, - { "rtentry", &lock_class_mtx_sleep }, { "ifaddr", &lock_class_mtx_sleep }, { NULL, NULL }, /* diff --git a/sys/net/route.c b/sys/net/route.c index 960da3c2d7cb..d59b1a0e116b 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -214,18 +214,19 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, /* Verify the allowed flag mask. */ KASSERT(((flags & ~(RTF_GATEWAY)) == 0), ("invalid redirect flags: %x", flags)); + flags |= RTF_HOST | RTF_DYNAMIC; /* Get the best ifa for the given interface and gateway. */ if ((ifa = ifaof_ifpforaddr(gateway, ifp)) == NULL) return (ENETUNREACH); ifa_ref(ifa); - + bzero(&info, sizeof(info)); info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_ifa = ifa; info.rti_ifp = ifp; - info.rti_flags = flags | RTF_HOST | RTF_DYNAMIC; + info.rti_flags = flags; /* Setup route metrics to define expire time. */ bzero(&rti_rmx, sizeof(rti_rmx)); @@ -242,10 +243,6 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, return (error); } - RT_LOCK(rc.rc_rt); - flags = rc.rc_rt->rte_flags; - RT_UNLOCK(rc.rc_rt); - RTSTAT_INC(rts_dynamic); /* Send notification of a route addition to userland. */ @@ -253,7 +250,7 @@ rib_add_redirect(u_int fibnum, struct sockaddr *dst, struct sockaddr *gateway, info.rti_info[RTAX_DST] = dst; info.rti_info[RTAX_GATEWAY] = gateway; info.rti_info[RTAX_AUTHOR] = author; - rt_missmsg_fib(RTM_REDIRECT, &info, flags, error, fibnum); + rt_missmsg_fib(RTM_REDIRECT, &info, flags | RTF_UP, error, fibnum); return (0); } @@ -811,9 +808,7 @@ rt_mpath_unlink(struct rib_head *rnh, struct rt_addrinfo *info, */ if (rn) { rto = RNTORT(rn); - RT_LOCK(rto); rto->rte_flags |= RTF_UP; - RT_UNLOCK(rto); } else if (rt->rte_flags & RTF_GATEWAY) { /* * For gateway routes, we need to diff --git a/sys/net/route/route_ctl.c b/sys/net/route/route_ctl.c index da0bbfe837b9..b2b7fa4842bc 100644 --- a/sys/net/route/route_ctl.c +++ b/sys/net/route/route_ctl.c @@ -149,9 +149,6 @@ rtfree(struct rtentry *rt) KASSERT(rt != NULL, ("%s: NULL rt", __func__)); - RT_LOCK_ASSERT(rt); - - RT_UNLOCK(rt); epoch_call(net_epoch_preempt, destroy_rtentry_epoch, &rt->rt_epoch_ctx); } @@ -250,7 +247,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, nhop_free(nh); return (ENOBUFS); } - RT_LOCK_INIT(rt); rt->rte_flags = RTF_UP | flags; rt->rt_nhop = nh; @@ -283,7 +279,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, rt_old = NULL; RIB_WLOCK(rnh); - RT_LOCK(rt); #ifdef RADIX_MPATH /* do not permit exactly the same dst/mask/gw pair */ if (rt_mpath_capable(rnh) && @@ -291,7 +286,6 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, RIB_WUNLOCK(rnh); nhop_free(nh); - RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); return (EEXIST); } @@ -307,8 +301,9 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, /* Finalize notification */ rnh->rnh_gen++; - rc->rc_rt = RNTORT(rn); + rc->rc_rt = rt; rc->rc_nh_new = nh; + rc->rc_nh_weight = rt->rt_weight; rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); } else if ((info->rti_flags & RTF_PINNED) != 0) { @@ -333,14 +328,15 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, if (rn != NULL) { rc->rc_cmd = RTM_CHANGE; - rc->rc_rt = RNTORT(rn); + rc->rc_rt = rt; rc->rc_nh_old = rt_old->rt_nhop; rc->rc_nh_new = nh; + rc->rc_nh_weight = rt->rt_weight; } else { rc->rc_cmd = RTM_DELETE; - rc->rc_rt = RNTORT(rn); + rc->rc_rt = rt_old; rc->rc_nh_old = rt_old->rt_nhop; - rc->rc_nh_new = nh; + rc->rc_nh_weight = rt_old->rt_weight; } rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); } @@ -359,13 +355,10 @@ add_route(struct rib_head *rnh, struct rt_addrinfo *info, */ if (rn == NULL) { nhop_free(nh); - RT_LOCK_DESTROY(rt); uma_zfree(V_rtzone, rt); return (EEXIST); } - RT_UNLOCK(rt); - return (0); } @@ -461,7 +454,6 @@ rt_unlinkrte(struct rib_head *rnh, struct rt_addrinfo *info, int *perror) panic ("rtrequest delete"); rt = RNTORT(rn); - RT_LOCK(rt); rt->rte_flags &= ~RTF_UP; *perror = 0; @@ -620,21 +612,19 @@ change_route_one(struct rib_head *rnh, struct rt_addrinfo *info, } /* Proceed with the update */ - RT_LOCK(rt); /* Provide notification to the protocols.*/ rt->rt_nhop = nh; rt_setmetrics(info, rt); /* Finalize notification */ + rnh->rnh_gen++; + rc->rc_rt = rt; rc->rc_nh_old = nh_orig; rc->rc_nh_new = rt->rt_nhop; + rc->rc_nh_weight = rt->rt_weight; - RT_UNLOCK(rt); - - /* Update generation id to reflect rtable change */ - rnh->rnh_gen++; rib_notify(rnh, RIB_NOTIFY_IMMEDIATE, rc); RIB_WUNLOCK(rnh); diff --git a/sys/net/route/route_var.h b/sys/net/route/route_var.h index 7d000ec6c173..89b5dfcda696 100644 --- a/sys/net/route/route_var.h +++ b/sys/net/route/route_var.h @@ -168,23 +168,10 @@ struct rtentry { int rte_flags; /* up/down?, host/net */ u_long rt_weight; /* absolute weight */ u_long rt_expire; /* lifetime for route, e.g. redirect */ -#define rt_endzero rt_mtx - struct mtx rt_mtx; /* mutex for routing entry */ struct rtentry *rt_chain; /* pointer to next rtentry to delete */ struct epoch_context rt_epoch_ctx; /* net epoch tracker */ }; -#define RT_LOCK_INIT(_rt) \ - mtx_init(&(_rt)->rt_mtx, "rtentry", NULL, MTX_DEF | MTX_DUPOK | MTX_NEW) -#define RT_LOCK(_rt) mtx_lock(&(_rt)->rt_mtx) -#define RT_UNLOCK(_rt) mtx_unlock(&(_rt)->rt_mtx) -#define RT_LOCK_DESTROY(_rt) mtx_destroy(&(_rt)->rt_mtx) -#define RT_LOCK_ASSERT(_rt) mtx_assert(&(_rt)->rt_mtx, MA_OWNED) -#define RT_UNLOCK_COND(_rt) do { \ - if (mtx_owned(&(_rt)->rt_mtx)) \ - mtx_unlock(&(_rt)->rt_mtx); \ -} while (0) - /* * With the split between the routing entry and the nexthop, * rt_flags has to be split between these 2 entries. As rtentry diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 3296aa6c7504..b227a123210f 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -184,9 +184,9 @@ static void rt_getmetrics(const struct rtentry *rt, static void rt_dispatch(struct mbuf *, sa_family_t); static int handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, struct rt_msghdr *rtm, struct rib_cmd_info *rc); -static int update_rtm_from_rte(struct rt_addrinfo *info, +static int update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm, int alloc_len, - struct rtentry *rt, struct nhop_object *nh); + struct rib_cmd_info *rc, struct nhop_object *nh); static void send_rtm_reply(struct socket *so, struct rt_msghdr *rtm, struct mbuf *m, sa_family_t saf, u_int fibnum, int rtm_errno); @@ -747,7 +747,7 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, } /* - * Update sockaddrs, flags, etc in @prtm based on @rt data. + * Update sockaddrs, flags, etc in @prtm based on @rc data. * rtm can be reallocated. * * Returns 0 on success, along with pointer to (potentially reallocated) @@ -755,8 +755,8 @@ handle_rtm_get(struct rt_addrinfo *info, u_int fibnum, * */ static int -update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm, - int alloc_len, struct rtentry *rt, struct nhop_object *nh) +update_rtm_from_rc(struct rt_addrinfo *info, struct rt_msghdr **prtm, + int alloc_len, struct rib_cmd_info *rc, struct nhop_object *nh) { struct sockaddr_storage netmask_ss; struct walkarg w; @@ -767,10 +767,10 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm, rtm = *prtm; - info->rti_info[RTAX_DST] = rt_key(rt); + info->rti_info[RTAX_DST] = rt_key(rc->rc_rt); info->rti_info[RTAX_GATEWAY] = &nh->gw_sa; - info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rt), - rt_mask(rt), &netmask_ss); + info->rti_info[RTAX_NETMASK] = rtsock_fix_netmask(rt_key(rc->rc_rt), + rt_mask(rc->rc_rt), &netmask_ss); info->rti_info[RTAX_GENMASK] = 0; ifp = nh->nh_ifp; if (rtm->rtm_addrs & (RTA_IFP | RTA_IFA)) { @@ -815,12 +815,12 @@ update_rtm_from_rte(struct rt_addrinfo *info, struct rt_msghdr **prtm, w.w_tmemsize = alloc_len; rtsock_msg_buffer(rtm->rtm_type, info, &w, &len); - if (rt->rte_flags & RTF_GWFLAG_COMPAT) + rtm->rtm_flags = rc->rc_rt->rte_flags | nhop_get_rtflags(nh); + if (rtm->rtm_flags & RTF_GWFLAG_COMPAT) rtm->rtm_flags = RTF_GATEWAY | - (rt->rte_flags & ~RTF_GWFLAG_COMPAT); - else - rtm->rtm_flags = rt->rte_flags; - rt_getmetrics(rt, nh, &rtm->rtm_rmx); + (rtm->rtm_flags & ~RTF_GWFLAG_COMPAT); + rt_getmetrics(rc->rc_rt, nh, &rtm->rtm_rmx); + rtm->rtm_rmx.rmx_weight = rc->rc_nh_weight; rtm->rtm_addrs = info->rti_addrs; if (orig_rtm != NULL) @@ -918,8 +918,8 @@ route_output(struct mbuf *m, struct socket *so, ...) #ifdef INET6 rti_need_deembed = 1; #endif - rtm->rtm_index = rc.rc_nh_new->nh_ifp->if_index; nh = rc.rc_nh_new; + rtm->rtm_index = nh->nh_ifp->if_index; } break; @@ -946,7 +946,7 @@ route_output(struct mbuf *m, struct socket *so, ...) senderr(ESRCH); } - error = update_rtm_from_rte(&info, &rtm, alloc_len, rc.rc_rt, nh); + error = update_rtm_from_rc(&info, &rtm, alloc_len, &rc, nh); /* * Note that some sockaddr pointers may have changed to * point to memory outsize @rtm. Some may be pointing