vfs: implement usecount implying holdcnt

vnodes have 2 reference counts - holdcnt to keep the vnode itself from getting
freed and usecount to denote it is actively used.

Previously all operations bumping usecount would also bump holdcnt, which is
not necessary. We can detect if usecount is already > 1 (in which case holdcnt
is also > 1) and utilize it to avoid bumping holdcnt on our own. This saves
on atomic ops.

Reviewed by:	kib
Tested by:	pho (previous version)
Sponsored by:	The FreeBSD Foundation
Differential Revision:	https://reviews.freebsd.org/D21471
This commit is contained in:
Mateusz Guzik 2019-09-03 15:42:11 +00:00
parent 4d5475613e
commit e3c3248cc7
8 changed files with 164 additions and 110 deletions

View File

@ -87,7 +87,6 @@ vn_rele_async(vnode_t *vp, taskq_t *taskq)
{
VERIFY(vp->v_count > 0);
if (refcount_release_if_not_last(&vp->v_usecount)) {
vdrop(vp);
return;
}
VERIFY(taskq_dispatch((taskq_t *)taskq,

View File

@ -1196,6 +1196,7 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
vnode_t *vp;
sfs_node_t *node;
size_t len;
enum vgetstate vs;
int locked;
int error;
@ -1224,7 +1225,7 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
* before we can lock the vnode again.
*/
locked = VOP_ISLOCKED(vp);
vhold(vp);
vs = vget_prep(vp);
vput(vp);
/* Look up .zfs/snapshot, our parent. */
@ -1236,7 +1237,7 @@ zfsctl_snapshot_vptocnp(struct vop_vptocnp_args *ap)
bcopy(node->sn_name, ap->a_buf + *ap->a_buflen, len);
}
vfs_unbusy(mp);
vget(vp, locked | LK_VNHELD | LK_RETRY, curthread);
vget_finish(vp, locked | LK_RETRY, vs);
return (error);
}

View File

@ -5916,6 +5916,7 @@ zfs_vptocnp(struct vop_vptocnp_args *ap)
vnode_t *vp = ap->a_vp;;
zfsvfs_t *zfsvfs = vp->v_vfsp->vfs_data;
znode_t *zp = VTOZ(vp);
enum vgetstate vs;
int ltype;
int error;
@ -5948,10 +5949,10 @@ zfs_vptocnp(struct vop_vptocnp_args *ap)
ZFS_EXIT(zfsvfs);
covered_vp = vp->v_mount->mnt_vnodecovered;
vhold(covered_vp);
vs = vget_prep(covered_vp);
ltype = VOP_ISLOCKED(vp);
VOP_UNLOCK(vp, 0);
error = vget(covered_vp, LK_SHARED | LK_VNHELD, curthread);
error = vget_finish(covered_vp, LK_SHARED, vs);
if (error == 0) {
error = VOP_VPTOCNP(covered_vp, ap->a_vpp, ap->a_cred,
ap->a_buf, ap->a_buflen);

View File

@ -1255,6 +1255,7 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
struct rwlock *blp;
struct mtx *dvlp;
uint32_t hash;
enum vgetstate vs;
int error, ltype;
if (__predict_false(!doingcache)) {
@ -1369,9 +1370,9 @@ cache_lookup(struct vnode *dvp, struct vnode **vpp, struct componentname *cnp,
ltype = VOP_ISLOCKED(dvp);
VOP_UNLOCK(dvp, 0);
}
vhold(*vpp);
vs = vget_prep(*vpp);
cache_lookup_unlock(blp, dvlp);
error = vget(*vpp, cnp->cn_lkflags | LK_VNHELD, cnp->cn_thread);
error = vget_finish(*vpp, cnp->cn_lkflags, vs);
if (cnp->cn_flags & ISDOTDOT) {
vn_lock(dvp, ltype | LK_RETRY);
if (dvp->v_iflag & VI_DOOMED) {
@ -2430,6 +2431,7 @@ vn_dir_dd_ino(struct vnode *vp)
struct namecache *ncp;
struct vnode *ddvp;
struct mtx *vlp;
enum vgetstate vs;
ASSERT_VOP_LOCKED(vp, "vn_dir_dd_ino");
vlp = VP2VNODELOCK(vp);
@ -2438,9 +2440,9 @@ vn_dir_dd_ino(struct vnode *vp)
if ((ncp->nc_flag & NCF_ISDOTDOT) != 0)
continue;
ddvp = ncp->nc_dvp;
vhold(ddvp);
vs = vget_prep(ddvp);
mtx_unlock(vlp);
if (vget(ddvp, LK_SHARED | LK_NOWAIT | LK_VNHELD, curthread))
if (vget_finish(ddvp, LK_SHARED | LK_NOWAIT, vs))
return (NULL);
return (ddvp);
}

View File

@ -76,6 +76,7 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp;
enum vgetstate vs;
int error;
while (1) {
@ -87,9 +88,9 @@ vfs_hash_get(const struct mount *mp, u_int hash, int flags, struct thread *td,
continue;
if (fn != NULL && fn(vp, arg))
continue;
vhold(vp);
vs = vget_prep(vp);
rw_runlock(&vfs_hash_lock);
error = vget(vp, flags | LK_VNHELD, td);
error = vget_finish(vp, flags, vs);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
if (error)
@ -149,6 +150,7 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
struct vnode **vpp, vfs_hash_cmp_t *fn, void *arg)
{
struct vnode *vp2;
enum vgetstate vs;
int error;
*vpp = NULL;
@ -162,9 +164,9 @@ vfs_hash_insert(struct vnode *vp, u_int hash, int flags, struct thread *td,
continue;
if (fn != NULL && fn(vp2, arg))
continue;
vhold(vp2);
vs = vget_prep(vp2);
rw_wunlock(&vfs_hash_lock);
error = vget(vp2, flags | LK_VNHELD, td);
error = vget_finish(vp2, flags, vs);
if (error == ENOENT && (flags & LK_NOWAIT) == 0)
break;
rw_wlock(&vfs_hash_lock);

View File

@ -107,8 +107,6 @@ static int flushbuflist(struct bufv *bufv, int flags, struct bufobj *bo,
static void syncer_shutdown(void *arg, int howto);
static int vtryrecycle(struct vnode *vp);
static void v_init_counters(struct vnode *);
static void v_incr_usecount(struct vnode *);
static void v_incr_usecount_locked(struct vnode *);
static void v_incr_devcount(struct vnode *);
static void v_decr_devcount(struct vnode *);
static void vgonel(struct vnode *);
@ -2632,44 +2630,6 @@ v_init_counters(struct vnode *vp)
refcount_init(&vp->v_usecount, 1);
}
static void
v_incr_usecount_locked(struct vnode *vp)
{
ASSERT_VI_LOCKED(vp, __func__);
if ((vp->v_iflag & VI_OWEINACT) != 0) {
VNASSERT(vp->v_usecount == 0, vp,
("vnode with usecount and VI_OWEINACT set"));
vp->v_iflag &= ~VI_OWEINACT;
VNODE_REFCOUNT_FENCE_REL();
}
refcount_acquire(&vp->v_usecount);
v_incr_devcount(vp);
}
/*
* Increment the use count on the vnode, taking care to reference
* the driver's usecount if this is a chardev.
*/
static void
v_incr_usecount(struct vnode *vp)
{
ASSERT_VI_UNLOCKED(vp, __func__);
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
if (vp->v_type != VCHR &&
refcount_acquire_if_not_zero(&vp->v_usecount)) {
VNODE_REFCOUNT_FENCE_ACQ();
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
("vnode with usecount and VI_OWEINACT set"));
} else {
VI_LOCK(vp);
v_incr_usecount_locked(vp);
VI_UNLOCK(vp);
}
}
/*
* Increment si_usecount of the associated device, if any.
*/
@ -2714,35 +2674,107 @@ v_decr_devcount(struct vnode *vp)
* with atomic operations. Otherwise the interlock is taken covering
* both the atomic and additional actions.
*/
static enum vgetstate
_vget_prep(struct vnode *vp, bool interlock)
{
enum vgetstate vs;
if (__predict_true(vp->v_type != VCHR)) {
if (refcount_acquire_if_not_zero(&vp->v_usecount)) {
vs = VGET_USECOUNT;
} else {
_vhold(vp, interlock);
vs = VGET_HOLDCNT;
}
} else {
if (!interlock)
VI_LOCK(vp);
if (vp->v_usecount == 0) {
vholdl(vp);
vs = VGET_HOLDCNT;
} else {
v_incr_devcount(vp);
refcount_acquire(&vp->v_usecount);
vs = VGET_USECOUNT;
}
if (!interlock)
VI_UNLOCK(vp);
}
return (vs);
}
enum vgetstate
vget_prep(struct vnode *vp)
{
return (_vget_prep(vp, false));
}
int
vget(struct vnode *vp, int flags, struct thread *td)
{
enum vgetstate vs;
MPASS(td == curthread);
vs = _vget_prep(vp, (flags & LK_INTERLOCK) != 0);
return (vget_finish(vp, flags, vs));
}
int
vget_finish(struct vnode *vp, int flags, enum vgetstate vs)
{
int error, oweinact;
VNASSERT((flags & LK_TYPE_MASK) != 0, vp,
("vget: invalid lock operation"));
("%s: invalid lock operation", __func__));
if ((flags & LK_INTERLOCK) != 0)
ASSERT_VI_LOCKED(vp, __func__);
else
ASSERT_VI_UNLOCKED(vp, __func__);
if ((flags & LK_VNHELD) != 0)
VNASSERT((vp->v_holdcnt > 0), vp,
("vget: LK_VNHELD passed but vnode not held"));
CTR3(KTR_VFS, "%s: vp %p with flags %d", __func__, vp, flags);
if ((flags & LK_VNHELD) == 0)
_vhold(vp, (flags & LK_INTERLOCK) != 0);
VNASSERT(vp->v_holdcnt > 0, vp, ("%s: vnode not held", __func__));
if (vs == VGET_USECOUNT) {
VNASSERT(vp->v_usecount > 0, vp,
("%s: vnode without usecount when VGET_USECOUNT was passed",
__func__));
}
if ((error = vn_lock(vp, flags)) != 0) {
vdrop(vp);
if (vs == VGET_USECOUNT)
vrele(vp);
else
vdrop(vp);
CTR2(KTR_VFS, "%s: impossible to lock vnode %p", __func__,
vp);
return (error);
}
if (vp->v_iflag & VI_DOOMED && (flags & LK_RETRY) == 0)
panic("vget: vn_lock failed to return ENOENT\n");
if (vs == VGET_USECOUNT) {
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
("%s: vnode with usecount and VI_OWEINACT set", __func__));
return (0);
}
/*
* We hold the vnode. If the usecount is 0 it will be utilized to keep
* the vnode around. Otherwise someone else lended their hold count and
* we have to drop ours.
*/
if (vp->v_type != VCHR &&
refcount_acquire_if_not_zero(&vp->v_usecount)) {
#ifdef INVARIANTS
int old = atomic_fetchadd_int(&vp->v_holdcnt, -1) - 1;
VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__));
#else
refcount_release(&vp->v_holdcnt);
#endif
VNODE_REFCOUNT_FENCE_ACQ();
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
("%s: vnode with usecount and VI_OWEINACT set", __func__));
return (0);
}
/*
* We don't guarantee that any particular close will
* trigger inactive processing so just make a best effort
@ -2751,23 +2783,22 @@ vget(struct vnode *vp, int flags, struct thread *td)
*
* Upgrade our holdcnt to a usecount.
*/
if (vp->v_type == VCHR ||
!refcount_acquire_if_not_zero(&vp->v_usecount)) {
VI_LOCK(vp);
if ((vp->v_iflag & VI_OWEINACT) == 0) {
oweinact = 0;
} else {
oweinact = 1;
vp->v_iflag &= ~VI_OWEINACT;
VNODE_REFCOUNT_FENCE_REL();
}
refcount_acquire(&vp->v_usecount);
v_incr_devcount(vp);
if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
(flags & LK_NOWAIT) == 0)
vinactive(vp, td);
VI_UNLOCK(vp);
VI_LOCK(vp);
if ((vp->v_iflag & VI_OWEINACT) == 0) {
oweinact = 0;
} else {
oweinact = 1;
vp->v_iflag &= ~VI_OWEINACT;
VNODE_REFCOUNT_FENCE_REL();
}
if (vp->v_usecount > 0)
refcount_release(&vp->v_holdcnt);
v_incr_devcount(vp);
refcount_acquire(&vp->v_usecount);
if (oweinact && VOP_ISLOCKED(vp) == LK_EXCLUSIVE &&
(flags & LK_NOWAIT) == 0)
vinactive(vp, curthread);
VI_UNLOCK(vp);
return (0);
}
@ -2781,8 +2812,18 @@ vref(struct vnode *vp)
ASSERT_VI_UNLOCKED(vp, __func__);
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
_vhold(vp, false);
v_incr_usecount(vp);
if (vp->v_type != VCHR &&
refcount_acquire_if_not_zero(&vp->v_usecount)) {
VNODE_REFCOUNT_FENCE_ACQ();
VNASSERT(vp->v_holdcnt > 0, vp,
("%s: active vnode not held", __func__));
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
("%s: vnode with usecount and VI_OWEINACT set", __func__));
return;
}
VI_LOCK(vp);
vrefl(vp);
VI_UNLOCK(vp);
}
void
@ -2791,8 +2832,14 @@ vrefl(struct vnode *vp)
ASSERT_VI_LOCKED(vp, __func__);
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
_vhold(vp, true);
v_incr_usecount_locked(vp);
if (vp->v_usecount == 0)
vholdl(vp);
if ((vp->v_iflag & VI_OWEINACT) != 0) {
vp->v_iflag &= ~VI_OWEINACT;
VNODE_REFCOUNT_FENCE_REL();
}
v_incr_devcount(vp);
refcount_acquire(&vp->v_usecount);
}
void
@ -2807,12 +2854,9 @@ vrefact(struct vnode *vp)
return;
}
#ifdef INVARIANTS
int old = atomic_fetchadd_int(&vp->v_holdcnt, 1);
VNASSERT(old > 0, vp, ("%s: wrong hold count", __func__));
old = atomic_fetchadd_int(&vp->v_usecount, 1);
int old = atomic_fetchadd_int(&vp->v_usecount, 1);
VNASSERT(old > 0, vp, ("%s: wrong use count", __func__));
#else
refcount_acquire(&vp->v_holdcnt);
refcount_acquire(&vp->v_usecount);
#endif
}
@ -2860,13 +2904,22 @@ vputx(struct vnode *vp, int func)
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
/*
* It is an invariant that all VOP_* calls operate on a held vnode.
* We may be only having an implicit hold stemming from our usecount,
* which we are about to release. If we unlock the vnode afterwards we
* open a time window where someone else dropped the last usecount and
* proceeded to free the vnode before our unlock finished. For this
* reason we unlock the vnode early. This is a little bit wasteful as
* it may be the vnode is exclusively locked and inactive processing is
* needed, in which case we are adding work.
*/
if (func == VPUTX_VPUT)
VOP_UNLOCK(vp, 0);
if (vp->v_type != VCHR &&
refcount_release_if_not_last(&vp->v_usecount)) {
if (func == VPUTX_VPUT)
VOP_UNLOCK(vp, 0);
vdrop(vp);
refcount_release_if_not_last(&vp->v_usecount))
return;
}
VI_LOCK(vp);
@ -2875,17 +2928,16 @@ vputx(struct vnode *vp, int func)
* prevent vgone() races. We drop the use count here and the
* hold count below when we're done.
*/
if (!refcount_release(&vp->v_usecount) ||
(vp->v_iflag & VI_DOINGINACT)) {
if (func == VPUTX_VPUT)
VOP_UNLOCK(vp, 0);
v_decr_devcount(vp);
v_decr_devcount(vp);
if (!refcount_release(&vp->v_usecount)) {
VI_UNLOCK(vp);
return;
}
if (vp->v_iflag & VI_DOINGINACT) {
vdropl(vp);
return;
}
v_decr_devcount(vp);
error = 0;
if (vp->v_usecount != 0) {
@ -2904,8 +2956,6 @@ vputx(struct vnode *vp, int func)
*/
if (__predict_false(vp->v_iflag & VI_DOOMED) ||
VOP_NEED_INACTIVE(vp) == 0) {
if (func == VPUTX_VPUT)
VOP_UNLOCK(vp, 0);
vdropl(vp);
return;
}
@ -2921,11 +2971,8 @@ vputx(struct vnode *vp, int func)
VI_LOCK(vp);
break;
case VPUTX_VPUT:
if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {
error = VOP_LOCK(vp, LK_UPGRADE | LK_INTERLOCK |
LK_NOWAIT);
VI_LOCK(vp);
}
error = VOP_LOCK(vp, LK_EXCLUSIVE | LK_INTERLOCK | LK_NOWAIT);
VI_LOCK(vp);
break;
case VPUTX_VUNREF:
if (VOP_ISLOCKED(vp) != LK_EXCLUSIVE) {

View File

@ -164,7 +164,6 @@ _lockmgr_args_rw(struct lock *lk, u_int flags, struct rwlock *ilk,
#define LK_SLEEPFAIL 0x000800
#define LK_TIMELOCK 0x001000
#define LK_NODDLKTREAT 0x002000
#define LK_VNHELD 0x004000
/*
* Operations for lockmgr().

View File

@ -58,6 +58,7 @@
enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
VMARKER };
enum vgetstate { VGET_HOLDCNT, VGET_USECOUNT };
/*
* Each underlying filesystem allocates its own private area and hangs
* it from v_data. If non-null, this area is freed in getnewvnode().
@ -652,7 +653,9 @@ int vcount(struct vnode *vp);
#define vdropl(vp) _vdrop((vp), 1)
void _vdrop(struct vnode *, bool);
int vflush(struct mount *mp, int rootrefs, int flags, struct thread *td);
int vget(struct vnode *vp, int lockflag, struct thread *td);
int vget(struct vnode *vp, int flags, struct thread *td);
enum vgetstate vget_prep(struct vnode *vp);
int vget_finish(struct vnode *vp, int flags, enum vgetstate vs);
void vgone(struct vnode *vp);
#define vhold(vp) _vhold((vp), 0)
#define vholdl(vp) _vhold((vp), 1)