vfs: add the infrastructure for lockless lookup
Reviewed by: kib Tested by: pho (in a patchset) Differential Revision: https://reviews.freebsd.org/D25577
This commit is contained in:
parent
0379ff6ae3
commit
07d2145a17
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=363518
@ -102,8 +102,8 @@ MALLOC_DECLARE(M_FADVISE);
|
||||
|
||||
static __read_mostly uma_zone_t file_zone;
|
||||
static __read_mostly uma_zone_t filedesc0_zone;
|
||||
static __read_mostly uma_zone_t pwd_zone;
|
||||
static __read_mostly smr_t pwd_smr;
|
||||
__read_mostly uma_zone_t pwd_zone;
|
||||
VFS_SMR_DECLARE;
|
||||
|
||||
static int closefp(struct filedesc *fdp, int fd, struct file *fp,
|
||||
struct thread *td, int holdleaders);
|
||||
@ -3343,18 +3343,27 @@ pwd_hold(struct thread *td)
|
||||
|
||||
fdp = td->td_proc->p_fd;
|
||||
|
||||
smr_enter(pwd_smr);
|
||||
pwd = smr_entered_load(&fdp->fd_pwd, pwd_smr);
|
||||
vfs_smr_enter();
|
||||
pwd = vfs_smr_entered_load(&fdp->fd_pwd);
|
||||
MPASS(pwd != NULL);
|
||||
if (__predict_true(refcount_acquire_if_not_zero(&pwd->pwd_refcount))) {
|
||||
smr_exit(pwd_smr);
|
||||
vfs_smr_exit();
|
||||
return (pwd);
|
||||
}
|
||||
smr_exit(pwd_smr);
|
||||
vfs_smr_exit();
|
||||
FILEDESC_SLOCK(fdp);
|
||||
pwd = pwd_hold_filedesc(fdp);
|
||||
MPASS(pwd != NULL);
|
||||
FILEDESC_SUNLOCK(fdp);
|
||||
return (pwd);
|
||||
}
|
||||
|
||||
struct pwd *
|
||||
pwd_get_smr(void)
|
||||
{
|
||||
struct pwd *pwd;
|
||||
|
||||
pwd = vfs_smr_entered_load(&curproc->p_fd->fd_pwd);
|
||||
MPASS(pwd != NULL);
|
||||
return (pwd);
|
||||
}
|
||||
|
||||
@ -4368,7 +4377,11 @@ filelistinit(void *dummy)
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
pwd_zone = uma_zcreate("PWD", sizeof(struct pwd), NULL, NULL,
|
||||
NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_SMR);
|
||||
pwd_smr = uma_zone_get_smr(pwd_zone);
|
||||
/*
|
||||
* XXXMJG this is a temporary hack due to boot ordering issues against
|
||||
* the vnode zone.
|
||||
*/
|
||||
vfs_smr = uma_zone_get_smr(pwd_zone);
|
||||
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
|
||||
}
|
||||
SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL);
|
||||
|
@ -664,8 +664,8 @@ vntblinit(void *dummy __unused)
|
||||
vnode_list_reclaim_marker = vn_alloc_marker(NULL);
|
||||
TAILQ_INSERT_HEAD(&vnode_list, vnode_list_reclaim_marker, v_vnodelist);
|
||||
vnode_zone = uma_zcreate("VNODE", sizeof (struct vnode), NULL, NULL,
|
||||
vnode_init, vnode_fini, UMA_ALIGN_PTR, UMA_ZONE_SMR);
|
||||
vfs_smr = uma_zone_get_smr(vnode_zone);
|
||||
vnode_init, vnode_fini, UMA_ALIGN_PTR, 0);
|
||||
uma_zone_set_smr(vnode_zone, vfs_smr);
|
||||
vnodepoll_zone = uma_zcreate("VNODEPOLL", sizeof (struct vpollinfo),
|
||||
NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, 0);
|
||||
/*
|
||||
@ -2914,6 +2914,22 @@ vget_prep(struct vnode *vp)
|
||||
return (vs);
|
||||
}
|
||||
|
||||
void
|
||||
vget_abort(struct vnode *vp, enum vgetstate vs)
|
||||
{
|
||||
|
||||
switch (vs) {
|
||||
case VGET_USECOUNT:
|
||||
vrele(vp);
|
||||
break;
|
||||
case VGET_HOLDCNT:
|
||||
vdrop(vp);
|
||||
break;
|
||||
default:
|
||||
__assert_unreachable();
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
vget(struct vnode *vp, int flags, struct thread *td)
|
||||
{
|
||||
@ -2925,7 +2941,7 @@ vget(struct vnode *vp, int flags, struct thread *td)
|
||||
return (vget_finish(vp, flags, vs));
|
||||
}
|
||||
|
||||
static int __noinline
|
||||
static void __noinline
|
||||
vget_finish_vchr(struct vnode *vp)
|
||||
{
|
||||
|
||||
@ -2941,7 +2957,7 @@ vget_finish_vchr(struct vnode *vp)
|
||||
#else
|
||||
refcount_release(&vp->v_holdcnt);
|
||||
#endif
|
||||
return (0);
|
||||
return;
|
||||
}
|
||||
|
||||
VI_LOCK(vp);
|
||||
@ -2953,18 +2969,17 @@ vget_finish_vchr(struct vnode *vp)
|
||||
refcount_release(&vp->v_holdcnt);
|
||||
#endif
|
||||
VI_UNLOCK(vp);
|
||||
return (0);
|
||||
return;
|
||||
}
|
||||
v_incr_devcount(vp);
|
||||
refcount_acquire(&vp->v_usecount);
|
||||
VI_UNLOCK(vp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
|
||||
{
|
||||
int error, old;
|
||||
int error;
|
||||
|
||||
if ((flags & LK_INTERLOCK) != 0)
|
||||
ASSERT_VI_LOCKED(vp, __func__);
|
||||
@ -2976,20 +2991,32 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
|
||||
|
||||
error = vn_lock(vp, flags);
|
||||
if (__predict_false(error != 0)) {
|
||||
if (vs == VGET_USECOUNT)
|
||||
vrele(vp);
|
||||
else
|
||||
vdrop(vp);
|
||||
vget_abort(vp, vs);
|
||||
CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
|
||||
vp);
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (vs == VGET_USECOUNT)
|
||||
return (0);
|
||||
vget_finish_ref(vp, vs);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (__predict_false(vp->v_type == VCHR))
|
||||
return (vget_finish_vchr(vp));
|
||||
void
|
||||
vget_finish_ref(struct vnode *vp, enum vgetstate vs)
|
||||
{
|
||||
int old;
|
||||
|
||||
VNPASS(vs == VGET_HOLDCNT || vs == VGET_USECOUNT, vp);
|
||||
VNPASS(vp->v_holdcnt > 0, vp);
|
||||
VNPASS(vs == VGET_HOLDCNT || vp->v_usecount > 0, vp);
|
||||
|
||||
if (vs == VGET_USECOUNT)
|
||||
return;
|
||||
|
||||
if (__predict_false(vp->v_type == VCHR)) {
|
||||
vget_finish_vchr(vp);
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
* We hold the vnode. If the usecount is 0 it will be utilized to keep
|
||||
@ -3006,7 +3033,6 @@ vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
|
||||
refcount_release(&vp->v_holdcnt);
|
||||
#endif
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4424,6 +4450,7 @@ DB_SHOW_COMMAND(mount, db_show_mount)
|
||||
MNT_KERN_FLAG(MNTK_LOOKUP_EXCL_DOTDOT);
|
||||
MNT_KERN_FLAG(MNTK_MARKER);
|
||||
MNT_KERN_FLAG(MNTK_USES_BCACHE);
|
||||
MNT_KERN_FLAG(MNTK_FPLOOKUP);
|
||||
MNT_KERN_FLAG(MNTK_NOASYNC);
|
||||
MNT_KERN_FLAG(MNTK_UNMOUNT);
|
||||
MNT_KERN_FLAG(MNTK_MWAIT);
|
||||
@ -5239,6 +5266,38 @@ vn_isdisk(struct vnode *vp, int *errp)
|
||||
return (error == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
* VOP_FPLOOKUP_VEXEC routines are subject to special circumstances, see
|
||||
* the comment above cache_fplookup for details.
|
||||
*
|
||||
* We never deny as priv_check_cred calls are not yet supported, see vaccess.
|
||||
*/
|
||||
int
|
||||
vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid, struct ucred *cred)
|
||||
{
|
||||
|
||||
VFS_SMR_ASSERT_ENTERED();
|
||||
|
||||
/* Check the owner. */
|
||||
if (cred->cr_uid == file_uid) {
|
||||
if (file_mode & S_IXUSR)
|
||||
return (0);
|
||||
return (EAGAIN);
|
||||
}
|
||||
|
||||
/* Otherwise, check the groups (first match) */
|
||||
if (groupmember(file_gid, cred)) {
|
||||
if (file_mode & S_IXGRP)
|
||||
return (0);
|
||||
return (EAGAIN);
|
||||
}
|
||||
|
||||
/* Otherwise, check everyone else. */
|
||||
if (file_mode & S_IXOTH)
|
||||
return (0);
|
||||
return (EAGAIN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Common filesystem object access control check routine. Accepts a
|
||||
* vnode's type, "mode", uid and gid, requested access mode, credentials,
|
||||
@ -5537,6 +5596,20 @@ vop_rename_pre(void *ap)
|
||||
}
|
||||
|
||||
#ifdef DEBUG_VFS_LOCKS
|
||||
void
|
||||
vop_fplookup_vexec_pre(void *ap __unused)
|
||||
{
|
||||
|
||||
VFS_SMR_ASSERT_ENTERED();
|
||||
}
|
||||
|
||||
void
|
||||
vop_fplookup_vexec_post(void *ap __unused, int rc __unused)
|
||||
{
|
||||
|
||||
VFS_SMR_ASSERT_ENTERED();
|
||||
}
|
||||
|
||||
void
|
||||
vop_strategy_pre(void *ap)
|
||||
{
|
||||
|
@ -146,6 +146,17 @@ vop_close {
|
||||
};
|
||||
|
||||
|
||||
%% fplookup_vexec vp - - -
|
||||
%! fplookup_vexec pre vop_fplookup_vexec_pre
|
||||
%! fplookup_vexec post vop_fplookup_vexec_post
|
||||
|
||||
vop_fplookup_vexec {
|
||||
IN struct vnode *vp;
|
||||
IN struct ucred *cred;
|
||||
IN struct thread *td;
|
||||
};
|
||||
|
||||
|
||||
%% access vp L L L
|
||||
|
||||
vop_access {
|
||||
|
@ -422,13 +422,14 @@ int mac_vnode_check_listextattr(struct ucred *cred, struct vnode *vp,
|
||||
int mac_vnode_check_lookup_impl(struct ucred *cred, struct vnode *dvp,
|
||||
struct componentname *cnp);
|
||||
extern bool mac_vnode_check_lookup_fp_flag;
|
||||
#define mac_vnode_check_lookup_enabled() __predict_false(mac_vnode_check_lookup_fp_flag)
|
||||
static inline int
|
||||
mac_vnode_check_lookup(struct ucred *cred, struct vnode *dvp,
|
||||
struct componentname *cnp)
|
||||
{
|
||||
|
||||
mac_vnode_assert_locked(dvp, "mac_vnode_check_lookup");
|
||||
if (__predict_false(mac_vnode_check_lookup_fp_flag))
|
||||
if (mac_vnode_check_lookup_enabled())
|
||||
return (mac_vnode_check_lookup_impl(cred, dvp, cnp));
|
||||
return (0);
|
||||
}
|
||||
|
@ -311,6 +311,7 @@ pwd_set(struct filedesc *fdp, struct pwd *newpwd)
|
||||
smr_serialized_store(&fdp->fd_pwd, newpwd,
|
||||
(FILEDESC_XLOCK_ASSERT(fdp), true));
|
||||
}
|
||||
struct pwd *pwd_get_smr(void);
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
|
@ -420,6 +420,7 @@ void __mnt_vnode_markerfree_lazy(struct vnode **mvp, struct mount *mp);
|
||||
#define MNTK_TEXT_REFS 0x00008000 /* Keep use ref for text */
|
||||
#define MNTK_VMSETSIZE_BUG 0x00010000
|
||||
#define MNTK_UNIONFS 0x00020000 /* A hack for F_ISUNIONSTACK */
|
||||
#define MNTK_FPLOOKUP 0x00040000 /* fast path lookup is supported */
|
||||
#define MNTK_NOASYNC 0x00800000 /* disable async */
|
||||
#define MNTK_UNMOUNT 0x01000000 /* unmount in progress */
|
||||
#define MNTK_MWAIT 0x02000000 /* waiting for unmount to finish */
|
||||
|
@ -666,6 +666,8 @@ int vn_path_to_global_path(struct thread *td, struct vnode *vp,
|
||||
int vaccess(enum vtype type, mode_t file_mode, uid_t file_uid,
|
||||
gid_t file_gid, accmode_t accmode, struct ucred *cred,
|
||||
int *privused);
|
||||
int vaccess_vexec_smr(mode_t file_mode, uid_t file_uid, gid_t file_gid,
|
||||
struct ucred *cred);
|
||||
int vaccess_acl_nfs4(enum vtype type, uid_t file_uid, gid_t file_gid,
|
||||
struct acl *aclp, accmode_t accmode, struct ucred *cred,
|
||||
int *privused);
|
||||
@ -682,6 +684,8 @@ int vget(struct vnode *vp, int flags, struct thread *td);
|
||||
enum vgetstate vget_prep_smr(struct vnode *vp);
|
||||
enum vgetstate vget_prep(struct vnode *vp);
|
||||
int vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
|
||||
void vget_finish_ref(struct vnode *vp, enum vgetstate vs);
|
||||
void vget_abort(struct vnode *vp, enum vgetstate vs);
|
||||
void vgone(struct vnode *vp);
|
||||
void vhold(struct vnode *);
|
||||
void vholdl(struct vnode *);
|
||||
@ -865,6 +869,8 @@ void vop_symlink_post(void *a, int rc);
|
||||
int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a);
|
||||
|
||||
#ifdef DEBUG_VFS_LOCKS
|
||||
void vop_fplookup_vexec_pre(void *a);
|
||||
void vop_fplookup_vexec_post(void *a, int rc);
|
||||
void vop_strategy_pre(void *a);
|
||||
void vop_lock_pre(void *a);
|
||||
void vop_lock_post(void *a, int rc);
|
||||
@ -872,6 +878,8 @@ void vop_unlock_pre(void *a);
|
||||
void vop_need_inactive_pre(void *a);
|
||||
void vop_need_inactive_post(void *a, int rc);
|
||||
#else
|
||||
#define vop_fplookup_vexec_pre(x) do { } while (0)
|
||||
#define vop_fplookup_vexec_post(x, y) do { } while (0)
|
||||
#define vop_strategy_pre(x) do { } while (0)
|
||||
#define vop_lock_pre(x) do { } while (0)
|
||||
#define vop_lock_post(x, y) do { } while (0)
|
||||
@ -1025,10 +1033,18 @@ int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
|
||||
#define VFS_SMR() vfs_smr
|
||||
#define vfs_smr_enter() smr_enter(VFS_SMR())
|
||||
#define vfs_smr_exit() smr_exit(VFS_SMR())
|
||||
#define vfs_smr_entered_load(ptr) smr_entered_load((ptr), VFS_SMR())
|
||||
#define VFS_SMR_ASSERT_ENTERED() SMR_ASSERT_ENTERED(VFS_SMR())
|
||||
#define VFS_SMR_ASSERT_NOT_ENTERED() SMR_ASSERT_NOT_ENTERED(VFS_SMR())
|
||||
#define VFS_SMR_ZONE_SET(zone) uma_zone_set_smr((zone), VFS_SMR())
|
||||
|
||||
#define vn_load_v_data_smr(vp) ({ \
|
||||
struct vnode *_vp = (vp); \
|
||||
\
|
||||
VFS_SMR_ASSERT_ENTERED(); \
|
||||
atomic_load_ptr(&(_vp)->v_data); \
|
||||
})
|
||||
|
||||
#endif /* _KERNEL */
|
||||
|
||||
#endif /* !_SYS_VNODE_H_ */
|
||||
|
Loading…
Reference in New Issue
Block a user