From e8f0ed264eff0af69a0e51984c1a49babe94b570 Mon Sep 17 00:00:00 2001 From: mjg Date: Wed, 28 Aug 2019 20:34:24 +0000 Subject: [PATCH] vfs: add VOP_NEED_INACTIVE vnode usecount drops to 0 all the time (e.g. for directories during path lookup). When that happens the kernel would always lock the exclusive lock for the vnode in order to call vinactive(). This blocks other threads who want to use the vnode for looukp. vinactive is very rarely needed and can be tested for without the vnode lock held. This patch gives filesytems an opportunity to do it, sample total wait time for tmpfs over 500 minutes of poudriere -j 104: before: 557563641706 (lockmgr:tmpfs) after: 46309603301 (lockmgr:tmpfs) Sponsored by: The FreeBSD Foundation Differential Revision: https://reviews.freebsd.org/D21371 --- sys/fs/nullfs/null_vnops.c | 1 + sys/fs/unionfs/union_vnops.c | 1 + sys/kern/vfs_default.c | 8 +++++++ sys/kern/vfs_subr.c | 46 ++++++++++++++++++++++++++++++++++++ sys/kern/vnode_if.src | 6 +++++ sys/sys/vnode.h | 6 +++++ 6 files changed, 68 insertions(+) diff --git a/sys/fs/nullfs/null_vnops.c b/sys/fs/nullfs/null_vnops.c index a921d7ae084e..a4c5c0609f08 100644 --- a/sys/fs/nullfs/null_vnops.c +++ b/sys/fs/nullfs/null_vnops.c @@ -907,6 +907,7 @@ struct vop_vector null_vnodeops = { .vop_getattr = null_getattr, .vop_getwritemount = null_getwritemount, .vop_inactive = null_inactive, + .vop_need_inactive = vop_stdneed_inactive, .vop_islocked = vop_stdislocked, .vop_lock1 = null_lock, .vop_lookup = null_lookup, diff --git a/sys/fs/unionfs/union_vnops.c b/sys/fs/unionfs/union_vnops.c index 002b3fa6bc38..2cfbbe3a2bb5 100644 --- a/sys/fs/unionfs/union_vnops.c +++ b/sys/fs/unionfs/union_vnops.c @@ -2523,6 +2523,7 @@ struct vop_vector unionfs_vnodeops = { .vop_getextattr = unionfs_getextattr, .vop_getwritemount = unionfs_getwritemount, .vop_inactive = unionfs_inactive, + .vop_need_inactive = vop_stdneed_inactive, .vop_islocked = unionfs_islocked, .vop_ioctl = unionfs_ioctl, .vop_link = unionfs_link, diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 6a9d144cf9ec..94ba43ff0645 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -120,6 +120,7 @@ struct vop_vector default_vnodeops = { .vop_getpages_async = vop_stdgetpages_async, .vop_getwritemount = vop_stdgetwritemount, .vop_inactive = VOP_NULL, + .vop_need_inactive = vop_stdneed_inactive, .vop_ioctl = vop_stdioctl, .vop_kqfilter = vop_stdkqfilter, .vop_islocked = vop_stdislocked, @@ -1157,6 +1158,13 @@ vop_stdadd_writecount(struct vop_add_writecount_args *ap) return (error); } +int +vop_stdneed_inactive(struct vop_need_inactive_args *ap) +{ + + return (1); +} + static int vop_stdioctl(struct vop_ioctl_args *ap) { diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index a09c4baefb5b..1af4aa028833 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -2890,6 +2890,21 @@ vputx(struct vnode *vp, int func) CTR2(KTR_VFS, "%s: return vnode %p to the freelist", __func__, vp); + /* + * Check if the fs wants to perform inactive processing. Note we + * may be only holding the interlock, in which case it is possible + * someone else called vgone on the vnode and ->v_data is now NULL. + * Since vgone performs inactive on its own there is nothing to do + * here but to drop our hold count. + */ + if (__predict_false(vp->v_iflag & VI_DOOMED) || + VOP_NEED_INACTIVE(vp) == 0) { + if (func == VPUTX_VPUT) + VOP_UNLOCK(vp, 0); + vdropl(vp); + return; + } + /* * We must call VOP_INACTIVE with the node locked. Mark * as VI_DOINGINACT to avoid recursion. @@ -4353,6 +4368,7 @@ static struct vop_vector sync_vnodeops = { .vop_close = sync_close, /* close */ .vop_fsync = sync_fsync, /* fsync */ .vop_inactive = sync_inactive, /* inactive */ + .vop_need_inactive = vop_stdneed_inactive, /* need_inactive */ .vop_reclaim = sync_reclaim, /* reclaim */ .vop_lock1 = vop_stdlock, /* lock */ .vop_unlock = vop_stdunlock, /* unlock */ @@ -4514,6 +4530,20 @@ sync_reclaim(struct vop_reclaim_args *ap) return (0); } +int +vn_need_pageq_flush(struct vnode *vp) +{ + struct vm_object *obj; + int need; + + MPASS(mtx_owned(VI_MTX(vp))); + need = 0; + if ((obj = vp->v_object) != NULL && (vp->v_vflag & VV_NOSYNC) == 0 && + (obj->flags & OBJ_MIGHTBEDIRTY) != 0) + need = 1; + return (need); +} + /* * Check if vnode represents a disk device */ @@ -4894,6 +4924,22 @@ vop_unlock_post(void *ap, int rc) if (a->a_flags & LK_INTERLOCK) ASSERT_VI_UNLOCKED(a->a_vp, "VOP_UNLOCK"); } + +void +vop_need_inactive_pre(void *ap) +{ + struct vop_need_inactive_args *a = ap; + + ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); +} + +void +vop_need_inactive_post(void *ap, int rc) +{ + struct vop_need_inactive_args *a = ap; + + ASSERT_VI_LOCKED(a->a_vp, "VOP_NEED_INACTIVE"); +} #endif void diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 576a1487e1c6..c5e7bc75419c 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -358,6 +358,12 @@ vop_inactive { IN struct thread *td; }; +%! need_inactive pre vop_need_inactive_pre +%! need_inactive post vop_need_inactive_post + +vop_need_inactive { + IN struct vnode *vp; +}; %% reclaim vp E E E %! reclaim post vop_reclaim_post diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index f1e0096d8a37..4818df786bd1 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -682,6 +682,7 @@ int vn_generic_copy_file_range(struct vnode *invp, off_t *inoffp, struct vnode *outvp, off_t *outoffp, size_t *lenp, unsigned int flags, struct ucred *incred, struct ucred *outcred, struct thread *fsize_td); +int vn_need_pageq_flush(struct vnode *vp); int vn_isdisk(struct vnode *vp, int *errp); int _vn_lock(struct vnode *vp, int flags, char *file, int line); #define vn_lock(vp, flags) _vn_lock(vp, flags, __FILE__, __LINE__) @@ -753,6 +754,7 @@ int vop_stdfsync(struct vop_fsync_args *); int vop_stdgetwritemount(struct vop_getwritemount_args *); int vop_stdgetpages(struct vop_getpages_args *); int vop_stdinactive(struct vop_inactive_args *); +int vop_stdneed_inactive(struct vop_need_inactive_args *); int vop_stdislocked(struct vop_islocked_args *); int vop_stdkqfilter(struct vop_kqfilter_args *); int vop_stdlock(struct vop_lock1_args *); @@ -813,12 +815,16 @@ void vop_lock_pre(void *a); void vop_lock_post(void *a, int rc); void vop_unlock_pre(void *a); void vop_unlock_post(void *a, int rc); +void vop_need_inactive_pre(void *a); +void vop_need_inactive_post(void *a, int rc); #else #define vop_strategy_pre(x) do { } while (0) #define vop_lock_pre(x) do { } while (0) #define vop_lock_post(x, y) do { } while (0) #define vop_unlock_pre(x) do { } while (0) #define vop_unlock_post(x, y) do { } while (0) +#define vop_need_inactive_pre(x) do { } while (0) +#define vop_need_inactive_post(x, y) do { } while (0) #endif void vop_rename_fail(struct vop_rename_args *ap);