From 829f0bcb5fe24bb523c5a9e7bd3bb79412e06906 Mon Sep 17 00:00:00 2001 From: Mateusz Guzik Date: Mon, 19 Dec 2022 13:00:30 +0000 Subject: [PATCH] vfs: add the concept of vnode state transitions To quote from a comment above vput_final: * XXX Some filesystems pass in an exclusively locked vnode and strongly depend * on the lock being held all the way until VOP_INACTIVE. This in particular * happens with UFS which adds half-constructed vnodes to the hash, where they * can be found by other code. As is there is no mechanism which allows filesystems to denote that a vnode is fully initialized, consequently problems like the above are only found the hard way(tm). Add rudimentary support for state transitions, which in particular allow to assert the vnode is not legally unlocked until its fate is decided (either construction finishes or vgone is called to abort it). The new field lands in a 1-byte hole, thus it does not grow the struct. Bump __FreeBSD_version to 1400077 Reviewed by: kib (previous version) Tested by: pho Differential Revision: https://reviews.freebsd.org/D37759 --- .../openzfs/module/os/freebsd/zfs/zfs_znode.c | 1 + sys/fs/autofs/autofs_vnops.c | 1 + sys/fs/cd9660/cd9660_vfsops.c | 1 + sys/fs/devfs/devfs_vnops.c | 2 + sys/fs/ext2fs/ext2_alloc.c | 1 + sys/fs/ext2fs/ext2_vfsops.c | 1 + sys/fs/fdescfs/fdesc_vnops.c | 1 + sys/fs/fuse/fuse_node.c | 1 + sys/fs/mntfs/mntfs_vnops.c | 1 + sys/fs/msdosfs/msdosfs_denode.c | 1 + sys/fs/nfsclient/nfs_clnode.c | 1 + sys/fs/nfsclient/nfs_clport.c | 1 + sys/fs/nullfs/null_subr.c | 1 + sys/fs/pseudofs/pseudofs_vncache.c | 1 + sys/fs/smbfs/smbfs_node.c | 1 + sys/fs/tmpfs/tmpfs_subr.c | 2 + sys/fs/udf/udf_vfsops.c | 1 + sys/fs/unionfs/union_subr.c | 2 + sys/kern/vfs_lookup.c | 1 + sys/kern/vfs_subr.c | 76 ++++++++++++++++++- sys/sys/param.h | 2 +- sys/sys/vnode.h | 24 ++++++ sys/ufs/ffs/ffs_vfsops.c | 1 + 23 files changed, 121 insertions(+), 4 deletions(-) diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c index 6c269480cb4b..1064ea5cf01d 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_znode.c @@ -540,6 +540,7 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, * Acquire vnode lock before making it available to the world. */ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vn_set_state(vp, VSTATE_CONSTRUCTED); VN_LOCK_AREC(vp); if (vp->v_type != VFIFO) VN_LOCK_ASHARE(vp); diff --git a/sys/fs/autofs/autofs_vnops.c b/sys/fs/autofs/autofs_vnops.c index 7f120c4922d4..b17a1a4fd2ef 100644 --- a/sys/fs/autofs/autofs_vnops.c +++ b/sys/fs/autofs/autofs_vnops.c @@ -704,6 +704,7 @@ autofs_node_vn(struct autofs_node *anp, struct mount *mp, int flags, sx_xunlock(&anp->an_vnode_lock); + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } diff --git a/sys/fs/cd9660/cd9660_vfsops.c b/sys/fs/cd9660/cd9660_vfsops.c index cc0fd0b2f941..c8ac6bb1be2e 100644 --- a/sys/fs/cd9660/cd9660_vfsops.c +++ b/sys/fs/cd9660/cd9660_vfsops.c @@ -822,6 +822,7 @@ cd9660_vget_internal(struct mount *mp, cd_ino_t ino, int flags, * XXX need generation number? */ + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } diff --git a/sys/fs/devfs/devfs_vnops.c b/sys/fs/devfs/devfs_vnops.c index 511430ccdd97..4c39ccb1ca5b 100644 --- a/sys/fs/devfs/devfs_vnops.c +++ b/sys/fs/devfs/devfs_vnops.c @@ -613,6 +613,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, return (error); } if (devfs_allocv_drop_refs(0, dmp, de)) { + vgone(vp); vput(vp); return (ENOENT); } @@ -620,6 +621,7 @@ devfs_allocv(struct devfs_dirent *de, struct mount *mp, int lockmode, mac_devfs_vnode_associate(mp, de, vp); #endif sx_xunlock(&dmp->dm_lock); + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } diff --git a/sys/fs/ext2fs/ext2_alloc.c b/sys/fs/ext2fs/ext2_alloc.c index 79df804aceb8..489b64a1c0c5 100644 --- a/sys/fs/ext2fs/ext2_alloc.c +++ b/sys/fs/ext2fs/ext2_alloc.c @@ -480,6 +480,7 @@ ext2_valloc(struct vnode *pvp, int mode, struct ucred *cred, struct vnode **vpp) ip->i_birthtime = ts.tv_sec; ip->i_birthnsec = ts.tv_nsec; + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); diff --git a/sys/fs/ext2fs/ext2_vfsops.c b/sys/fs/ext2fs/ext2_vfsops.c index 408f3dac6833..2aff8c701af0 100644 --- a/sys/fs/ext2fs/ext2_vfsops.c +++ b/sys/fs/ext2fs/ext2_vfsops.c @@ -1308,6 +1308,7 @@ ext2_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) * Finish inode initialization. */ + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } diff --git a/sys/fs/fdescfs/fdesc_vnops.c b/sys/fs/fdescfs/fdesc_vnops.c index 7046bb6bf244..0949dcc7eb29 100644 --- a/sys/fs/fdescfs/fdesc_vnops.c +++ b/sys/fs/fdescfs/fdesc_vnops.c @@ -235,6 +235,7 @@ fdesc_allocvp(fdntype ftype, unsigned fd_fd, int ix, struct mount *mp, /* If we came here, we can insert it safely. */ LIST_INSERT_HEAD(fc, fd, fd_hash); mtx_unlock(&fdesc_hashmtx); + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); } diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c index 4d207f9c1365..1abf6e75cc3e 100644 --- a/sys/fs/fuse/fuse_node.c +++ b/sys/fs/fuse/fuse_node.c @@ -263,6 +263,7 @@ fuse_vnode_alloc(struct mount *mp, if (data->dataflags & FSESS_ASYNC_READ && vtyp != VFIFO) VN_LOCK_ASHARE(*vpp); + vn_set_state(*vpp, VSTATE_CONSTRUCTED); err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, &vp2, fuse_vnode_cmp, &nodeid); if (err) { diff --git a/sys/fs/mntfs/mntfs_vnops.c b/sys/fs/mntfs/mntfs_vnops.c index 0360262ce191..d05a8c8287f3 100644 --- a/sys/fs/mntfs/mntfs_vnops.c +++ b/sys/fs/mntfs/mntfs_vnops.c @@ -85,6 +85,7 @@ mntfs_allocvp(struct mount *mp, struct vnode *ovp) VOP_UNLOCK(ovp); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + vn_set_state(vp, VSTATE_CONSTRUCTED); return (vp); } diff --git a/sys/fs/msdosfs/msdosfs_denode.c b/sys/fs/msdosfs/msdosfs_denode.c index e5e9a6e27b78..9a9916e39a67 100644 --- a/sys/fs/msdosfs/msdosfs_denode.c +++ b/sys/fs/msdosfs/msdosfs_denode.c @@ -299,6 +299,7 @@ deget(struct msdosfsmount *pmp, u_long dirclust, u_long diroffset, } } else nvp->v_type = VREG; + vn_set_state(nvp, VSTATE_CONSTRUCTED); ldep->de_modrev = init_va_filerev(); *depp = ldep; return (0); diff --git a/sys/fs/nfsclient/nfs_clnode.c b/sys/fs/nfsclient/nfs_clnode.c index 9718c2c36a3c..bc38739ce04c 100644 --- a/sys/fs/nfsclient/nfs_clnode.c +++ b/sys/fs/nfsclient/nfs_clnode.c @@ -178,6 +178,7 @@ ncl_nget(struct mount *mntp, u_int8_t *fhp, int fhsize, struct nfsnode **npp, uma_zfree(newnfsnode_zone, np); return (error); } + vn_set_state(vp, VSTATE_CONSTRUCTED); error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, np->n_fhp); if (error) diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 53b4c58734c2..fc62783e3965 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -286,6 +286,7 @@ nfscl_nget(struct mount *mntp, struct vnode *dvp, struct nfsfh *nfhp, uma_zfree(newnfsnode_zone, np); return (error); } + vn_set_state(vp, VSTATE_CONSTRUCTED); error = vfs_hash_insert(vp, hash, lkflags, td, &nvp, newnfs_vncmpf, nfhp); if (error) diff --git a/sys/fs/nullfs/null_subr.c b/sys/fs/nullfs/null_subr.c index b2b6e6837b2d..51084e4abe8b 100644 --- a/sys/fs/nullfs/null_subr.c +++ b/sys/fs/nullfs/null_subr.c @@ -262,6 +262,7 @@ null_nodeget(struct mount *mp, struct vnode *lowervp, struct vnode **vpp) } null_hashins(mp, xp); + vn_set_state(vp, VSTATE_CONSTRUCTED); rw_wunlock(&null_hash_lock); *vpp = vp; diff --git a/sys/fs/pseudofs/pseudofs_vncache.c b/sys/fs/pseudofs/pseudofs_vncache.c index 2b7ffae9921d..e637ff6f39e1 100644 --- a/sys/fs/pseudofs/pseudofs_vncache.c +++ b/sys/fs/pseudofs/pseudofs_vncache.c @@ -207,6 +207,7 @@ pfs_vncache_alloc(struct mount *mp, struct vnode **vpp, *vpp = NULLVP; return (error); } + vn_set_state(*vpp, VSTATE_CONSTRUCTED); retry2: mtx_lock(&pfs_vncache_mutex); /* diff --git a/sys/fs/smbfs/smbfs_node.c b/sys/fs/smbfs/smbfs_node.c index 1e9953f5a92a..ef47aa4cd623 100644 --- a/sys/fs/smbfs/smbfs_node.c +++ b/sys/fs/smbfs/smbfs_node.c @@ -217,6 +217,7 @@ smbfs_node_alloc(struct mount *mp, struct vnode *dvp, const char *dirnm, free(np, M_SMBNODE); return (error); } + vn_set_state(vp, VSTATE_CONSTRUCTED); error = vfs_hash_insert(vp, smbfs_hash(name, nmlen), LK_EXCLUSIVE, td, &vp2, smbfs_vnode_cmp, &sc); if (error) diff --git a/sys/fs/tmpfs/tmpfs_subr.c b/sys/fs/tmpfs/tmpfs_subr.c index 7afabf6e4baa..67fb55d2a6a6 100644 --- a/sys/fs/tmpfs/tmpfs_subr.c +++ b/sys/fs/tmpfs/tmpfs_subr.c @@ -1080,6 +1080,8 @@ tmpfs_alloc_vp(struct mount *mp, struct tmpfs_node *node, int lkflag, vgone(vp); vput(vp); vp = NULL; + } else { + vn_set_state(vp, VSTATE_CONSTRUCTED); } unlock: diff --git a/sys/fs/udf/udf_vfsops.c b/sys/fs/udf/udf_vfsops.c index 216e1153b39f..de943229e3a8 100644 --- a/sys/fs/udf/udf_vfsops.c +++ b/sys/fs/udf/udf_vfsops.c @@ -717,6 +717,7 @@ udf_vget(struct mount *mp, ino_t ino, int flags, struct vnode **vpp) if (ino == udf_getid(&udfmp->root_icb)) vp->v_vflag |= VV_ROOT; + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c index b84da505507a..42437f1e3840 100644 --- a/sys/fs/unionfs/union_subr.c +++ b/sys/fs/unionfs/union_subr.c @@ -407,6 +407,8 @@ unionfs_nodeget(struct mount *mp, struct vnode *uppervp, return (ENOENT); } + vn_set_state(vp, VSTATE_CONSTRUCTED); + if (dvp != NULLVP && vt == VDIR) *vpp = unionfs_ins_cached_vnode(unp, dvp); if (*vpp != NULLVP) { diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index d6e0c824a323..e7f1deea0fae 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -161,6 +161,7 @@ nameiinit(void *dummy __unused) UMA_ALIGN_PTR, 0); vfs_vector_op_register(&crossmp_vnodeops); getnewvnode("crossmp", NULL, &crossmp_vnodeops, &vp_crossmp); + vp_crossmp->v_state = VSTATE_CONSTRUCTED; } SYSINIT(vfs, SI_SUB_VFS, SI_ORDER_SECOND, nameiinit, NULL); diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 3c9db6763c6b..b3f12bb52928 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -603,6 +603,8 @@ vnode_init(void *mem, int size, int flags) vp->v_dbatchcpu = NOCPU; + vp->v_state = VSTATE_DEAD; + /* * Check vhold_recycle_free for an explanation. */ @@ -1792,6 +1794,9 @@ getnewvnode(const char *tag, struct mount *mp, struct vop_vector *vops, vp = vn_alloc(mp); } counter_u64_add(vnodes_created, 1); + + vn_set_state(vp, VSTATE_UNINITIALIZED); + /* * Locks are given the generic name "vnode" when created. * Follow the historic practice of using the filesystem @@ -4015,14 +4020,18 @@ vgonel(struct vnode *vp) /* * Don't vgonel if we're already doomed. */ - if (VN_IS_DOOMED(vp)) + if (VN_IS_DOOMED(vp)) { + VNPASS(vn_get_state(vp) == VSTATE_DESTROYING || \ + vn_get_state(vp) == VSTATE_DEAD, vp); return; + } /* * Paired with freevnode. */ vn_seqc_write_begin_locked(vp); vunlazy_gone(vp); vn_irflag_set_locked(vp, VIRF_DOOMED); + vn_set_state(vp, VSTATE_DESTROYING); /* * Check to see if the vnode is in use. If so, we have to @@ -4140,6 +4149,7 @@ vgonel(struct vnode *vp) vp->v_vnlock = &vp->v_lock; vp->v_op = &dead_vnodeops; vp->v_type = VBAD; + vn_set_state(vp, VSTATE_DEAD); } /* @@ -4160,6 +4170,15 @@ static const char *const vtypename[] = { _Static_assert(nitems(vtypename) == VLASTTYPE + 1, "vnode type name not added to vtypename"); +static const char *const vstatename[] = { + [VSTATE_UNINITIALIZED] = "VSTATE_UNINITIALIZED", + [VSTATE_CONSTRUCTED] = "VSTATE_CONSTRUCTED", + [VSTATE_DESTROYING] = "VSTATE_DESTROYING", + [VSTATE_DEAD] = "VSTATE_DEAD", +}; +_Static_assert(nitems(vstatename) == VLASTSTATE + 1, + "vnode state name not added to vstatename"); + _Static_assert((VHOLD_ALL_FLAGS & ~VHOLD_NO_SMR) == 0, "new hold count flag not added to vn_printf"); @@ -4176,7 +4195,7 @@ vn_printf(struct vnode *vp, const char *fmt, ...) vprintf(fmt, ap); va_end(ap); printf("%p: ", (void *)vp); - printf("type %s\n", vtypename[vp->v_type]); + printf("type %s state %s\n", vtypename[vp->v_type], vstatename[vp->v_state]); holdcnt = atomic_load_int(&vp->v_holdcnt); printf(" usecount %d, writecount %d, refcount %d seqc users %d", vp->v_usecount, vp->v_writecount, holdcnt & ~VHOLD_ALL_FLAGS, @@ -5074,6 +5093,7 @@ vfs_allocate_syncvnode(struct mount *mp) if (error != 0) panic("vfs_allocate_syncvnode: insmntque() failed"); vp->v_vflag &= ~VV_FORCEINSMQ; + vn_set_state(vp, VSTATE_CONSTRUCTED); VOP_UNLOCK(vp); /* * Place the vnode onto the syncer worklist. We attempt to @@ -5720,8 +5740,10 @@ void vop_unlock_debugpre(void *ap) { struct vop_unlock_args *a = ap; + struct vnode *vp = a->a_vp; - ASSERT_VOP_LOCKED(a->a_vp, "VOP_UNLOCK"); + VNPASS(vn_get_state(vp) != VSTATE_UNINITIALIZED, vp); + ASSERT_VOP_LOCKED(vp, "VOP_UNLOCK"); } void @@ -7093,3 +7115,51 @@ vn_irflag_unset(struct vnode *vp, short tounset) vn_irflag_unset_locked(vp, tounset); VI_UNLOCK(vp); } + +#ifdef INVARIANTS +void +vn_set_state_validate(struct vnode *vp, enum vstate state) +{ + + switch (vp->v_state) { + case VSTATE_UNINITIALIZED: + switch (state) { + case VSTATE_CONSTRUCTED: + case VSTATE_DESTROYING: + return; + default: + break; + } + break; + case VSTATE_CONSTRUCTED: + ASSERT_VOP_ELOCKED(vp, __func__); + switch (state) { + case VSTATE_DESTROYING: + return; + default: + break; + } + break; + case VSTATE_DESTROYING: + ASSERT_VOP_ELOCKED(vp, __func__); + switch (state) { + case VSTATE_DEAD: + return; + default: + break; + } + break; + case VSTATE_DEAD: + switch (state) { + case VSTATE_UNINITIALIZED: + return; + default: + break; + } + break; + } + + vn_printf(vp, "invalid state transition %d -> %d\n", vp->v_state, state); + panic("invalid state transition %d -> %d\n", vp->v_state, state); +} +#endif diff --git a/sys/sys/param.h b/sys/sys/param.h index 9b123a38a7a8..f0f35e3a268e 100644 --- a/sys/sys/param.h +++ b/sys/sys/param.h @@ -76,7 +76,7 @@ * cannot include sys/param.h and should only be updated here. */ #undef __FreeBSD_version -#define __FreeBSD_version 1400076 +#define __FreeBSD_version 1400077 /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index 5e609f810b8d..b3e67f6dcf31 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -60,6 +60,10 @@ enum vtype { VNON, VREG, VDIR, VBLK, VCHR, VLNK, VSOCK, VFIFO, VBAD, VMARKER }; #define VLASTTYPE VMARKER +enum vstate { VSTATE_UNINITIALIZED, VSTATE_CONSTRUCTED, VSTATE_DESTROYING, + VSTATE_DEAD }; +#define VLASTSTATE VSTATE_DEAD + enum vgetstate { VGET_NONE, VGET_HOLDCNT, VGET_USECOUNT }; /* * Each underlying filesystem allocates its own private area and hangs @@ -107,6 +111,7 @@ struct vnode { * owned by the filesystem (XXX: and vgone() ?) */ enum vtype v_type:8; /* u vnode type */ + enum vstate v_state:8; /* u vnode state */ short v_irflag; /* i frequently read flags */ seqc_t v_seqc; /* i modification count */ uint32_t v_nchash; /* u namecache hash */ @@ -1136,6 +1141,25 @@ void vn_fsid(struct vnode *vp, struct vattr *va); int vn_dir_check_exec(struct vnode *vp, struct componentname *cnp); int vn_lktype_write(struct mount *mp, struct vnode *vp); +#ifdef INVARIANTS +void vn_set_state_validate(struct vnode *vp, enum vstate state); +#endif + +static inline void +vn_set_state(struct vnode *vp, enum vstate state) +{ +#ifdef INVARIANTS + vn_set_state_validate(vp, state); +#endif + vp->v_state = state; +} + +static inline enum vstate +vn_get_state(struct vnode *vp) +{ + return (vp->v_state); +} + #define VOP_UNLOCK_FLAGS(vp, flags) ({ \ struct vnode *_vp = (vp); \ int _flags = (flags); \ diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c index cec03a3beb0d..ad095874c06d 100644 --- a/sys/ufs/ffs/ffs_vfsops.c +++ b/sys/ufs/ffs/ffs_vfsops.c @@ -2026,6 +2026,7 @@ ffs_vgetf(struct mount *mp, } #endif + vn_set_state(vp, VSTATE_CONSTRUCTED); *vpp = vp; return (0); }