From 20efcfc602b7a57979da99f0a1f917d9fb4a30e1 Mon Sep 17 00:00:00 2001 From: "Andrey V. Elsukov" Date: Sat, 16 Jun 2018 08:26:23 +0000 Subject: [PATCH] Switch RIB and RADIX_NODE_HEAD lock from rwlock(9) to rmlock(9). Using of rwlock with multiqueue NICs for IP forwarding on high pps produces high lock contention and inefficient. Rmlock fits better for such workloads. Reviewed by: melifaro, olivier Obtained from: Yandex LLC Sponsored by: Yandex LLC Differential Revision: https://reviews.freebsd.org/D15789 --- sys/kern/subr_witness.c | 2 +- sys/kern/vfs_export.c | 3 ++- sys/net/radix.c | 2 +- sys/net/radix.h | 26 +++++++++++++------------- sys/net/radix_mpath.c | 3 +++ sys/net/route.c | 5 +++++ sys/net/route_var.h | 19 ++++++++++--------- sys/net/rtsock.c | 3 +++ sys/netinet/in_fib.c | 4 +++- sys/netinet6/in6_fib.c | 4 +++- sys/netpfil/ipfw/ip_fw_table_algo.c | 1 + sys/nfs/bootp_subr.c | 1 + 12 files changed, 46 insertions(+), 27 deletions(-) diff --git a/sys/kern/subr_witness.c b/sys/kern/subr_witness.c index 9edec343e39a..34bec2b43f68 100644 --- a/sys/kern/subr_witness.c +++ b/sys/kern/subr_witness.c @@ -524,7 +524,7 @@ static struct witness_order_list_entry order_lists[] = { * Routing */ { "so_rcv", &lock_class_mtx_sleep }, - { "radix node head", &lock_class_rw }, + { "radix node head", &lock_class_rm }, { "rtentry", &lock_class_mtx_sleep }, { "ifaddr", &lock_class_mtx_sleep }, { NULL, NULL }, diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c index 3ce2ea4ea0e2..231b77e0ac2e 100644 --- a/sys/kern/vfs_export.c +++ b/sys/kern/vfs_export.c @@ -51,7 +51,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -449,6 +449,7 @@ vfs_setpublicfs(struct mount *mp, struct netexport *nep, static struct netcred * vfs_export_lookup(struct mount *mp, struct sockaddr *nam) { + RADIX_NODE_HEAD_RLOCK_TRACKER; struct netexport *nep; struct netcred *np = NULL; struct radix_node_head *rnh; diff --git a/sys/net/radix.c b/sys/net/radix.c index c434df6cc5e6..f6c8dbe496f4 100644 --- a/sys/net/radix.c +++ b/sys/net/radix.c @@ -39,7 +39,7 @@ #ifdef _KERNEL #include #include -#include +#include #include #include #include diff --git a/sys/net/radix.h b/sys/net/radix.h index 05f0f4900188..9ff51c788d71 100644 --- a/sys/net/radix.h +++ b/sys/net/radix.h @@ -38,7 +38,7 @@ #ifdef _KERNEL #include #include -#include +#include #endif #ifdef MALLOC_DECLARE @@ -138,7 +138,7 @@ struct radix_node_head { rn_close_t *rnh_close; /*do something when the last ref drops*/ struct radix_node rnh_nodes[3]; /* empty tree for common case */ #ifdef _KERNEL - struct rwlock rnh_lock; /* locks entire radix tree */ + struct rmlock rnh_lock; /* locks entire radix tree */ #endif }; @@ -159,18 +159,18 @@ void rn_inithead_internal(struct radix_head *rh, struct radix_node *base_nodes, #define R_Zalloc(p, t, n) (p = (t) malloc((unsigned long)(n), M_RTABLE, M_NOWAIT | M_ZERO)) #define R_Free(p) free((caddr_t)p, M_RTABLE); +#define RADIX_NODE_HEAD_RLOCK_TRACKER struct rm_priotracker _rhn_tracker #define RADIX_NODE_HEAD_LOCK_INIT(rnh) \ - rw_init_flags(&(rnh)->rnh_lock, "radix node head", 0) -#define RADIX_NODE_HEAD_LOCK(rnh) rw_wlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_UNLOCK(rnh) rw_wunlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RLOCK(rnh) rw_rlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_RUNLOCK(rnh) rw_runlock(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_TRY_UPGRADE(rnh) rw_try_upgrade(&(rnh)->rnh_lock) - - -#define RADIX_NODE_HEAD_DESTROY(rnh) rw_destroy(&(rnh)->rnh_lock) -#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_LOCKED) -#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rw_assert(&(rnh)->rnh_lock, RA_WLOCKED) + rm_init(&(rnh)->rnh_lock, "radix node head") +#define RADIX_NODE_HEAD_LOCK(rnh) rm_wlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_UNLOCK(rnh) rm_wunlock(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_RLOCK(rnh) rm_rlock(&(rnh)->rnh_lock,\ + &_rhn_tracker) +#define RADIX_NODE_HEAD_RUNLOCK(rnh) rm_runlock(&(rnh)->rnh_lock,\ + &_rhn_tracker) +#define RADIX_NODE_HEAD_DESTROY(rnh) rm_destroy(&(rnh)->rnh_lock) +#define RADIX_NODE_HEAD_LOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_LOCKED) +#define RADIX_NODE_HEAD_WLOCK_ASSERT(rnh) rm_assert(&(rnh)->rnh_lock, RA_WLOCKED) #endif /* _KERNEL */ int rn_inithead(void **, int); diff --git a/sys/net/radix_mpath.c b/sys/net/radix_mpath.c index e5dc29928981..b7a2ebe96766 100644 --- a/sys/net/radix_mpath.c +++ b/sys/net/radix_mpath.c @@ -43,12 +43,15 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include +#include #include #include #include #include #include +#include #include #include #include diff --git a/sys/net/route.c b/sys/net/route.c index 06eaba334f3a..797f1e4d6eed 100644 --- a/sys/net/route.c +++ b/sys/net/route.c @@ -54,6 +54,8 @@ #include #include #include +#include +#include #include #include @@ -440,6 +442,7 @@ struct rtentry * rtalloc1_fib(struct sockaddr *dst, int report, u_long ignflags, u_int fibnum) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *newrt; @@ -923,6 +926,7 @@ int rib_lookup_info(uint32_t fibnum, const struct sockaddr *dst, uint32_t flags, uint32_t flowid, struct rt_addrinfo *info) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct rtentry *rt; @@ -1944,6 +1948,7 @@ rt_maskedcopy(struct sockaddr *src, struct sockaddr *dst, struct sockaddr *netma static inline int rtinit1(struct ifaddr *ifa, int cmd, int flags, int fibnum) { + RIB_RLOCK_TRACKER; struct sockaddr *dst; struct sockaddr *netmask; struct rtentry *rt = NULL; diff --git a/sys/net/route_var.h b/sys/net/route_var.h index f32dbc2137be..9d0d1931c469 100644 --- a/sys/net/route_var.h +++ b/sys/net/route_var.h @@ -44,18 +44,19 @@ struct rib_head { rt_gen_t rnh_gen; /* generation counter */ int rnh_multipath; /* multipath capable ? */ struct radix_node rnh_nodes[3]; /* empty tree for common case */ - struct rwlock rib_lock; /* config/data path lock */ + struct rmlock rib_lock; /* config/data path lock */ struct radix_mask_head rmhead; /* masks radix head */ }; -#define RIB_LOCK_INIT(rh) rw_init(&(rh)->rib_lock, "rib head lock") -#define RIB_LOCK_DESTROY(rh) rw_destroy(&(rh)->rib_lock) -#define RIB_RLOCK(rh) rw_rlock(&(rh)->rib_lock) -#define RIB_RUNLOCK(rh) rw_runlock(&(rh)->rib_lock) -#define RIB_WLOCK(rh) rw_wlock(&(rh)->rib_lock) -#define RIB_WUNLOCK(rh) rw_wunlock(&(rh)->rib_lock) -#define RIB_LOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_LOCKED) -#define RIB_WLOCK_ASSERT(rh) rw_assert(&(rh)->rib_lock, RA_WLOCKED) +#define RIB_RLOCK_TRACKER struct rm_priotracker _rib_tracker +#define RIB_LOCK_INIT(rh) rm_init(&(rh)->rib_lock, "rib head lock") +#define RIB_LOCK_DESTROY(rh) rm_destroy(&(rh)->rib_lock) +#define RIB_RLOCK(rh) rm_rlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_RUNLOCK(rh) rm_runlock(&(rh)->rib_lock, &_rib_tracker) +#define RIB_WLOCK(rh) rm_wlock(&(rh)->rib_lock) +#define RIB_WUNLOCK(rh) rm_wunlock(&(rh)->rib_lock) +#define RIB_LOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_LOCKED) +#define RIB_WLOCK_ASSERT(rh) rm_assert(&(rh)->rib_lock, RA_WLOCKED) struct rib_head *rt_tables_get_rnh(int fib, int family); diff --git a/sys/net/rtsock.c b/sys/net/rtsock.c index 6fb2c7013857..2daa8c337902 100644 --- a/sys/net/rtsock.c +++ b/sys/net/rtsock.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -542,6 +543,7 @@ rtm_get_jailed(struct rt_addrinfo *info, struct ifnet *ifp, static int route_output(struct mbuf *m, struct socket *so, ...) { + RIB_RLOCK_TRACKER; struct rt_msghdr *rtm = NULL; struct rtentry *rt = NULL; struct rib_head *rnh; @@ -1850,6 +1852,7 @@ sysctl_ifmalist(int af, struct walkarg *w) static int sysctl_rtsock(SYSCTL_HANDLER_ARGS) { + RIB_RLOCK_TRACKER; int *name = (int *)arg1; u_int namelen = arg2; struct rib_head *rnh = NULL; /* silence compiler. */ diff --git a/sys/netinet/in_fib.c b/sys/netinet/in_fib.c index 6b941eb1c46a..5d97cc50620b 100644 --- a/sys/netinet/in_fib.c +++ b/sys/netinet/in_fib.c @@ -37,7 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -134,6 +134,7 @@ int fib4_lookup_nh_basic(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_basic *pnh4) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; @@ -182,6 +183,7 @@ int fib4_lookup_nh_ext(uint32_t fibnum, struct in_addr dst, uint32_t flags, uint32_t flowid, struct nhop4_extended *pnh4) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in sin; diff --git a/sys/netinet6/in6_fib.c b/sys/netinet6/in6_fib.c index 35719310ee89..ecaa6e2b9555 100644 --- a/sys/netinet6/in6_fib.c +++ b/sys/netinet6/in6_fib.c @@ -38,7 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include +#include #include #include #include @@ -171,6 +171,7 @@ int fib6_lookup_nh_basic(uint32_t fibnum, const struct in6_addr *dst, uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_basic *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; @@ -220,6 +221,7 @@ int fib6_lookup_nh_ext(uint32_t fibnum, const struct in6_addr *dst,uint32_t scopeid, uint32_t flags, uint32_t flowid, struct nhop6_extended *pnh6) { + RIB_RLOCK_TRACKER; struct rib_head *rh; struct radix_node *rn; struct sockaddr_in6 sin6; diff --git a/sys/netpfil/ipfw/ip_fw_table_algo.c b/sys/netpfil/ipfw/ip_fw_table_algo.c index 405be0be8285..aacc9431f57d 100644 --- a/sys/netpfil/ipfw/ip_fw_table_algo.c +++ b/sys/netpfil/ipfw/ip_fw_table_algo.c @@ -4047,6 +4047,7 @@ static void ta_foreach_kfib(void *ta_state, struct table_info *ti, ta_foreach_f *f, void *arg) { + RIB_RLOCK_TRACKER; struct rib_head *rh; int error; diff --git a/sys/nfs/bootp_subr.c b/sys/nfs/bootp_subr.c index d7657155f55d..07418aff1f3e 100644 --- a/sys/nfs/bootp_subr.c +++ b/sys/nfs/bootp_subr.c @@ -376,6 +376,7 @@ bootpboot_p_tree(struct radix_node *rn) void bootpboot_p_rtlist(void) { + RIB_RLOCK_TRACKER; struct rib_head *rnh; printf("Routing table:\n");