vfs: add the infrastructure for lockless lookup

Reviewed by:    kib
Tested by:      pho (in a patchset)
Differential Revision:	https://reviews.freebsd.org/D25577
This commit is contained in:
Mateusz Guzik 2020-07-25 10:32:45 +00:00
parent 0379ff6ae3
commit 07d2145a17
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=363518
7 changed files with 141 additions and 25 deletions

View File

@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
static __read_mostly uma_zone_t file_zone;
static __read_mostly uma_zone_t filedesc0_zone;
static __read_mostly uma_zone_t pwd_zone;
static __read_mostly smr_t pwd_smr;
__read_mostly uma_zone_t pwd_zone;
VFS_SMR_DECLARE;
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
struct thread *td, int holdleaders);
@ -3343,18 +3343,27 @@ pwd_hold(struct thread *td)
fdp = td->td_proc->p_fd;
smr_enter(pwd_smr);
pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
vfs_smr_enter();
pwd = vfs_smr_entered_load(&fdp->fd_pwd);
MPASS(pwd != NULL);
if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
smr_exit(pwd_smr);
vfs_smr_exit();
return (pwd);
}
smr_exit(pwd_smr);
vfs_smr_exit();
FILEDESC_SLOCK(fdp);
pwd = pwd_hold_filedesc(fdp);
MPASS(pwd != NULL);
FILEDESC_SUNLOCK(fdp);
return (pwd);
}
struct pwd *
pwd_get_smr(void)
{
struct pwd *pwd;
pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd);
MPASS(pwd != NULL);
return (pwd);
}
@ -4368,7 +4377,11 @@ filelistinit(void *dummy)
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
pwd_smr = uma_zone_get_smr(pwd_zone);
/*
* XXXMJG this is a temporary hack due to boot ordering issues against
* the vnode zone.
*/
vfs_smr = uma_zone_get_smr(pwd_zone);
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
}
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);

View File

@ -664,8 +664,8 @@ vntblinit(void *dummy __unused)
vnode_list_reclaim_marker = vn_alloc_marker(NULL);
TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
vfs_smr = uma_zone_get_smr(vnode_zone);
vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
uma_zone_set_smr(vnode_zone, vfs_smr);
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
/*
@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp)
return (vs);
}
void
vget_abort(struct vnode *vp, enum vgetstate vs)
{
switch (vs) {
case VGET_USECOUNT:
vrele(vp);
break;
case VGET_HOLDCNT:
vdrop(vp);
break;
default:
__assert_unreachable();
}
}
int
vget(struct vnode *vp, int flags, struct thread *td)
{
@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td)
return (vget_finish(vp, flags, vs));
}
static int __noinline
static void __noinline
vget_finish_vchr(struct vnode *vp)
{
@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp)
#else
refcount_release(&vp->v_holdcnt);
#endif
return (0);
return;
}
VI_LOCK(vp);
@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp)
refcount_release(&vp->v_holdcnt);
#endif
VI_UNLOCK(vp);
return (0);
return;
}
v_incr_devcount(vp);
refcount_acquire(&vp->v_usecount);
VI_UNLOCK(vp);
return (0);
}
int
vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
{
int error, old;
int error;
if ((flags & LK_INTERLOCK) != 0)
ASSERT_VI_LOCKED(vp, __func__);
@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
error = vn_lock(vp, flags);
if (__predict_false(error != 0)) {
if (vs == VGET_USECOUNT)
vrele(vp);
else
vdrop(vp);
vget_abort(vp, vs);
CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
vp);
return (error);
}
if (vs == VGET_USECOUNT)
return (0);
vget_finish_ref(vp, vs);
return (0);
}
if (__predict_false(vp->v_type == VCHR))
return (vget_finish_vchr(vp));
void
vget_finish_ref(struct vnode *vp, enum vgetstate vs)
{
int old;
VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
VNPASS(vp->v_holdcnt > 0, vp);
VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
if (vs == VGET_USECOUNT)
return;
if (__predict_false(vp->v_type == VCHR)) {
vget_finish_vchr(vp);
return;
}
/*
* We hold the vnode. If the usecount is 0 it will be utilized to keep
@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
refcount_release(&vp->v_holdcnt);
#endif
}
return (0);
}
/*
@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
MNT_KERN_FLAG(MNTK_MARKER);
MNT_KERN_FLAG(MNTK_USES_BCACHE);
MNT_KERN_FLAG(MNTK_FPLOOKUP);
MNT_KERN_FLAG(MNTK_NOASYNC);
MNT_KERN_FLAG(MNTK_UNMOUNT);
MNT_KERN_FLAG(MNTK_MWAIT);
@ -5239,6 +5266,38 @@ vn_isdisk(struct vnode *vp, int *errp)
return (error == 0);
}
/*
* VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
* the comment above cache_fplookup for details.
*
* We never deny as priv_check_cred calls are not yet supported, see vaccess.
*/
int
vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
{
VFS_SMR_ASSERT_ENTERED();
/* Check the owner. */
if (cred->cr_uid == file_uid) {
if (file_mode & S_IXUSR)
return (0);
return (EAGAIN);
}
/* Otherwise, check the groups (first match) */
if (groupmember(file_gid, cred)) {
if (file_mode & S_IXGRP)
return (0);
return (EAGAIN);
}
/* Otherwise, check everyone else. */
if (file_mode & S_IXOTH)
return (0);
return (EAGAIN);
}
/*
* Common filesystem object access control check routine. Accepts a
* vnode's type, "mode", uid and gid, requested access mode, credentials,
@ -5537,6 +5596,20 @@ vop_rename_pre(void *ap)
}
#ifdef DEBUG_VFS_LOCKS
void
vop_fplookup_vexec_pre(void *ap __unused)
{
VFS_SMR_ASSERT_ENTERED();
}
void
vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
{
VFS_SMR_ASSERT_ENTERED();
}
void
vop_strategy_pre(void *ap)
{

View File

@ -146,6 +146,17 @@ vop_close {
};
%% fplookup_vexec vp - - -
%! fplookup_vexec pre vop_fplookup_vexec_pre
%! fplookup_vexec post vop_fplookup_vexec_post
vop_fplookup_vexec {
IN struct vnode *vp;
IN struct ucred *cred;
IN struct thread *td;
};
%% access vp L L L
vop_access {

View File

@ -422,13 +422,14 @@ int mac_vnode_check_listextattr(struct ucred *cred, struct vnode *vp,
int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
struct componentname *cnp);
extern bool mac_vnode_check_lookup_fp_flag;
#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag)
static inline int
mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
struct componentname *cnp)
{
mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
if (__predict_false(mac_vnode_check_lookup_fp_flag))
if (mac_vnode_check_lookup_enabled())
return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
return (0);
}

View File

@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd)
smr_serialized_store(&fdp->fd_pwd, newpwd,
(FILEDESC_XLOCK_ASSERT(fdp), true));
}
struct pwd *pwd_get_smr(void);
#endif /* _KERNEL */

View File

@ -420,6 +420,7 @@ void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp);
#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */
#define MNTK_VMSETSIZE_BUG 0x00010000
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
#define MNTK_NOASYNC 0x00800000 /* disable async */
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */

View File

@ -666,6 +666,8 @@ int vn_path_to_global_path(struct thread *td, struct vnode *vp,
int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
gid_t file_gid, accmode_t accmode, struct ucred *cred,
int *privused);
int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
struct ucred *cred);
int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
struct acl *aclp, accmode_t accmode, struct ucred *cred,
int *privused);
@ -682,6 +684,8 @@ int vget(struct vnode *vp, int flags, struct thread *td);
enum vgetstate vget_prep_smr(struct vnode *vp);
enum vgetstate vget_prep(struct vnode *vp);
int vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
void vget_finish_ref(struct vnode *vp, enum vgetstate vs);
void vget_abort(struct vnode *vp, enum vgetstate vs);
void vgone(struct vnode *vp);
void vhold(struct vnode *);
void vholdl(struct vnode *);
@ -865,6 +869,8 @@ void vop_symlink_post(void *a, int rc);
int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
#ifdef DEBUG_VFS_LOCKS
void vop_fplookup_vexec_pre(void *a);
void vop_fplookup_vexec_post(void *a, int rc);
void vop_strategy_pre(void *a);
void vop_lock_pre(void *a);
void vop_lock_post(void *a, int rc);
@ -872,6 +878,8 @@ void vop_unlock_pre(void *a);
void vop_need_inactive_pre(void *a);
void vop_need_inactive_post(void *a, int rc);
#else
#define vop_fplookup_vexec_pre(x) do { } while (0)
#define vop_fplookup_vexec_post(x, y) do { } while (0)
#define vop_strategy_pre(x) do { } while (0)
#define vop_lock_pre(x) do { } while (0)
#define vop_lock_post(x, y) do { } while (0)
@ -1025,10 +1033,18 @@ int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
#define VFS_SMR() vfs_smr
#define vfs_smr_enter() smr_enter(VFS_SMR())
#define vfs_smr_exit() smr_exit(VFS_SMR())
#define vfs_smr_entered_load(ptr) smr_entered_load((ptr), VFS_SMR())
#define VFS_SMR_ASSERT_ENTERED() SMR_ASSERT_ENTERED(VFS_SMR())
#define VFS_SMR_ASSERT_NOT_ENTERED() SMR_ASSERT_NOT_ENTERED(VFS_SMR())
#define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR())
#define vn_load_v_data_smr(vp) ({ \
struct vnode *_vp = (vp); \
\
VFS_SMR_ASSERT_ENTERED(); \
atomic_load_ptr(&(_vp)->v_data); \
})
#endif /* _KERNEL */
#endif /* !_SYS_VNODE_H_ */