vfs: add the concept of vnode state transitions

To quote from a comment above vput_final:
<quote>
* XXX Some filesystems pass in an exclusively locked vnode and strongly depend
* on the lock being held all the way until VOP_INACTIVE. This in particular
* happens with UFS which adds half-constructed vnodes to the hash, where they
* can be found by other code.
</quote>

As is there is no mechanism which allows filesystems to denote that a
vnode is fully initialized, consequently problems like the above are
only found the hard way(tm).

Add rudimentary support for state transitions, which in particular allow
to assert the vnode is not legally unlocked until its fate is decided
(either construction finishes or vgone is called to abort it).

The new field lands in a 1-byte hole, thus it does not grow the struct.

Bump __FreeBSD_version to 1400077

Reviewed by:	kib (previous version)
Tested by:	pho
Differential Revision:	https://reviews.freebsd.org/D37759
This commit is contained in:
Mateusz Guzik 2022-12-19 13:00:30 +00:00
parent ed1bb25410
commit 829f0bcb5f
23 changed files with 121 additions and 4 deletions

View File

@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
* Acquire vnode lock before making it available to the world.
*/
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vn_set_state(vp, VSTATE_CONSTRUCTED);
VN_LOCK_AREC(vp);
if (vp->v_type != VFIFO)
VN_LOCK_ASHARE(vp);

View File

@ -704,6 +704,7 @@ autofs_node_vn(struct autofs_node *anp, struct mount *mp, int flags,
sx_xunlock(&anp->an_vnode_lock);
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}

View File

@ -822,6 +822,7 @@ cd9660_vget_internal(struct mount *mp, cd_ino_t ino, int flags,
* XXX need generation number?
*/
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}

View File

@ -613,6 +613,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
return (error);
}
if (devfs_allocv_drop_refs(0, dmp, de)) {
vgone(vp);
vput(vp);
return (ENOENT);
}
@ -620,6 +621,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode,
mac_devfs_vnode_associate(mp, de, vp);
#endif
sx_xunlock(&dmp->dm_lock);
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}

View File

@ -480,6 +480,7 @@ ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp)
ip->i_birthtime = ts.tv_sec;
ip->i_birthnsec = ts.tv_nsec;
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);

View File

@ -1308,6 +1308,7 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
* Finish inode initialization.
*/
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}

View File

@ -235,6 +235,7 @@ fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp,
/* If we came here, we can insert it safely. */
LIST_INSERT_HEAD(fc, fd, fd_hash);
mtx_unlock(&fdesc_hashmtx);
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}

View File

@ -263,6 +263,7 @@ fuse_vnode_alloc(struct mount *mp,
if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO)
VN_LOCK_ASHARE(*vpp);
vn_set_state(*vpp, VSTATE_CONSTRUCTED);
err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE,
td, &vp2, fuse_vnode_cmp, &nodeid);
if (err) {

View File

@ -85,6 +85,7 @@ mntfs_allocvp(struct mount *mp, struct vnode *ovp)
VOP_UNLOCK(ovp);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
vn_set_state(vp, VSTATE_CONSTRUCTED);
return (vp);
}

View File

@ -299,6 +299,7 @@ deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset,
}
} else
nvp->v_type = VREG;
vn_set_state(nvp, VSTATE_CONSTRUCTED);
ldep->de_modrev = init_va_filerev();
*depp = ldep;
return (0);

View File

@ -178,6 +178,7 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp,
uma_zfree(newnfsnode_zone, np);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, hash, lkflags,
td, &nvp, newnfs_vncmpf, np->n_fhp);
if (error)

View File

@ -286,6 +286,7 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp,
uma_zfree(newnfsnode_zone, np);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, hash, lkflags,
td, &nvp, newnfs_vncmpf, nfhp);
if (error)

View File

@ -262,6 +262,7 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp)
}
null_hashins(mp, xp);
vn_set_state(vp, VSTATE_CONSTRUCTED);
rw_wunlock(&null_hash_lock);
*vpp = vp;

View File

@ -207,6 +207,7 @@ pfs_vncache_alloc(struct mount *mp, struct vnode **vpp,
*vpp = NULLVP;
return (error);
}
vn_set_state(*vpp, VSTATE_CONSTRUCTED);
retry2:
mtx_lock(&pfs_vncache_mutex);
/*

View File

@ -217,6 +217,7 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm,
free(np, M_SMBNODE);
return (error);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
error = vfs_hash_insert(vp, smbfs_hash(name, nmlen), LK_EXCLUSIVE,
td, &vp2, smbfs_vnode_cmp, &sc);
if (error)

View File

@ -1080,6 +1080,8 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag,
vgone(vp);
vput(vp);
vp = NULL;
} else {
vn_set_state(vp, VSTATE_CONSTRUCTED);
}
unlock:

View File

@ -717,6 +717,7 @@ udf_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp)
if (ino == udf_getid(&udfmp->root_icb))
vp->v_vflag |= VV_ROOT;
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);

View File

@ -407,6 +407,8 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp,
return (ENOENT);
}
vn_set_state(vp, VSTATE_CONSTRUCTED);
if (dvp != NULLVP && vt == VDIR)
*vpp = unionfs_ins_cached_vnode(unp, dvp);
if (*vpp != NULLVP) {

View File

@ -161,6 +161,7 @@ nameiinit(void *dummy __unused)
UMA_ALIGN_PTR, 0);
vfs_vector_op_register(&crossmp_vnodeops);
getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp);
vp_crossmp->v_state = VSTATE_CONSTRUCTED;
}
SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL);

View File

@ -603,6 +603,8 @@ vnode_init(void *mem, int size, int flags)
vp->v_dbatchcpu = NOCPU;
vp->v_state = VSTATE_DEAD;
/*
* Check vhold_recycle_free for an explanation.
*/
@ -1792,6 +1794,9 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops,
vp = vn_alloc(mp);
}
counter_u64_add(vnodes_created, 1);
vn_set_state(vp, VSTATE_UNINITIALIZED);
/*
* Locks are given the generic name "vnode" when created.
* Follow the historic practice of using the filesystem
@ -4015,14 +4020,18 @@ vgonel(struct vnode *vp)
/*
* Don't vgonel if we're already doomed.
*/
if (VN_IS_DOOMED(vp))
if (VN_IS_DOOMED(vp)) {
VNPASS(vn_get_state(vp) == VSTATE_DESTROYING || \
vn_get_state(vp) == VSTATE_DEAD, vp);
return;
}
/*
* Paired with freevnode.
*/
vn_seqc_write_begin_locked(vp);
vunlazy_gone(vp);
vn_irflag_set_locked(vp, VIRF_DOOMED);
vn_set_state(vp, VSTATE_DESTROYING);
/*
* Check to see if the vnode is in use. If so, we have to
@ -4140,6 +4149,7 @@ vgonel(struct vnode *vp)
vp->v_vnlock = &vp->v_lock;
vp->v_op = &dead_vnodeops;
vp->v_type = VBAD;
vn_set_state(vp, VSTATE_DEAD);
}
/*
@ -4160,6 +4170,15 @@ static const char *const vtypename[] = {
_Static_assert(nitems(vtypename) == VLASTTYPE + 1,
"vnode type name not added to vtypename");
static const char *const vstatename[] = {
[VSTATE_UNINITIALIZED] = "VSTATE_UNINITIALIZED",
[VSTATE_CONSTRUCTED] = "VSTATE_CONSTRUCTED",
[VSTATE_DESTROYING] = "VSTATE_DESTROYING",
[VSTATE_DEAD] = "VSTATE_DEAD",
};
_Static_assert(nitems(vstatename) == VLASTSTATE + 1,
"vnode state name not added to vstatename");
_Static_assert((VHOLD_ALL_FLAGS & ~VHOLD_NO_SMR) == 0,
"new hold count flag not added to vn_printf");
@ -4176,7 +4195,7 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
vprintf(fmt, ap);
va_end(ap);
printf("%p: ", (void *)vp);
printf("type %s\n", vtypename[vp->v_type]);
printf("type %s state %s\n", vtypename[vp->v_type], vstatename[vp->v_state]);
holdcnt = atomic_load_int(&vp->v_holdcnt);
printf(" usecount %d, writecount %d, refcount %d seqc users %d",
vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS,
@ -5074,6 +5093,7 @@ vfs_allocate_syncvnode(struct mount *mp)
if (error != 0)
panic("vfs_allocate_syncvnode: insmntque() failed");
vp->v_vflag &= ~VV_FORCEINSMQ;
vn_set_state(vp, VSTATE_CONSTRUCTED);
VOP_UNLOCK(vp);
/*
* Place the vnode onto the syncer worklist. We attempt to
@ -5720,8 +5740,10 @@ void
vop_unlock_debugpre(void *ap)
{
struct vop_unlock_args *a = ap;
struct vnode *vp = a->a_vp;
ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK");
VNPASS(vn_get_state(vp) != VSTATE_UNINITIALIZED, vp);
ASSERT_VOP_LOCKED(vp, "VOP_UNLOCK");
}
void
@ -7093,3 +7115,51 @@ vn_irflag_unset(struct vnode *vp, short tounset)
vn_irflag_unset_locked(vp, tounset);
VI_UNLOCK(vp);
}
#ifdef INVARIANTS
void
vn_set_state_validate(struct vnode *vp, enum vstate state)
{
switch (vp->v_state) {
case VSTATE_UNINITIALIZED:
switch (state) {
case VSTATE_CONSTRUCTED:
case VSTATE_DESTROYING:
return;
default:
break;
}
break;
case VSTATE_CONSTRUCTED:
ASSERT_VOP_ELOCKED(vp, __func__);
switch (state) {
case VSTATE_DESTROYING:
return;
default:
break;
}
break;
case VSTATE_DESTROYING:
ASSERT_VOP_ELOCKED(vp, __func__);
switch (state) {
case VSTATE_DEAD:
return;
default:
break;
}
break;
case VSTATE_DEAD:
switch (state) {
case VSTATE_UNINITIALIZED:
return;
default:
break;
}
break;
}
vn_printf(vp, "invalid state transition %d -> %d\n", vp->v_state, state);
panic("invalid state transition %d -> %d\n", vp->v_state, state);
}
#endif

View File

@ -76,7 +76,7 @@
* cannot include sys/param.h and should only be updated here.
*/
#undef __FreeBSD_version
#define __FreeBSD_version 1400076
#define __FreeBSD_version 1400077
/*
* __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,

View File

@ -60,6 +60,10 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD,
VMARKER };
#define VLASTTYPE VMARKER
enum vstate { VSTATE_UNINITIALIZED, VSTATE_CONSTRUCTED, VSTATE_DESTROYING,
VSTATE_DEAD };
#define VLASTSTATE VSTATE_DEAD
enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT };
/*
* Each underlying filesystem allocates its own private area and hangs
@ -107,6 +111,7 @@ struct vnode {
* owned by the filesystem (XXX: and vgone() ?)
*/
enum vtype v_type:8; /* u vnode type */
enum vstate v_state:8; /* u vnode state */
short v_irflag; /* i frequently read flags */
seqc_t v_seqc; /* i modification count */
uint32_t v_nchash; /* u namecache hash */
@ -1136,6 +1141,25 @@ void vn_fsid(struct vnode *vp, struct vattr *va);
int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp);
int vn_lktype_write(struct mount *mp, struct vnode *vp);
#ifdef INVARIANTS
void vn_set_state_validate(struct vnode *vp, enum vstate state);
#endif
static inline void
vn_set_state(struct vnode *vp, enum vstate state)
{
#ifdef INVARIANTS
vn_set_state_validate(vp, state);
#endif
vp->v_state = state;
}
static inline enum vstate
vn_get_state(struct vnode *vp)
{
return (vp->v_state);
}
#define VOP_UNLOCK_FLAGS(vp, flags) ({ \
struct vnode *_vp = (vp); \
int _flags = (flags); \

View File

@ -2026,6 +2026,7 @@ ffs_vgetf(struct mount *mp,
}
#endif
vn_set_state(vp, VSTATE_CONSTRUCTED);
*vpp = vp;
return (0);
}