Rework v_object lifecycle for vnodes.

Current implementation of vnode_create_vobject() and
vnode_destroy_vobject() is written so that it prepared to handle the
vm object destruction for live vnode.  Practically, no filesystems use
this, except for some remnants that were present in UFS till today.
One of the consequences of that model is that each filesystem must
call vnode_destroy_vobject() in VOP_RECLAIM() or earlier, as result
all of them get rid of the v_object in reclaim.

Move the call to vnode_destroy_vobject() to vgonel() before
VOP_RECLAIM().  This makes v_object stable: either the object is NULL,
or it is valid vm object till the vnode reclamation.  Remove code from
vnode_create_vobject() to handle races with the parallel destruction.

Reviewed by:	markj
Tested by:	pho
Sponsored by:	The FreeBSD Foundation
Differential revision:	https://reviews.freebsd.org/D21412
This commit is contained in:
Konstantin Belousov 2019-08-29 07:50:25 +00:00
parent 1604022248
commit 6470c8d3db
15 changed files with 45 additions and 103 deletions

View File

@ -5375,9 +5375,6 @@ zfs_freebsd_reclaim(ap)
ASSERT(zp != NULL);
/* Destroy the vm object and flush associated pages. */
vnode_destroy_vobject(vp);
/*
* z_teardown_inactive_lock protects from a race with
* zfs_znode_dmu_fini in zfsvfs_teardown during

View File

@ -91,10 +91,6 @@ cd9660_reclaim(ap)
{
struct vnode *vp = ap->a_vp;
/*
* Destroy the vm object and flush associated pages.
*/
vnode_destroy_vobject(vp);
/*
* Remove the inode from its hash chain.
*/

View File

@ -1428,7 +1428,6 @@ devfs_reclaim(struct vop_reclaim_args *ap)
vp->v_data = NULL;
}
mtx_unlock(&devfs_de_interlock);
vnode_destroy_vobject(vp);
return (0);
}

View File

@ -639,6 +639,5 @@ ext2_reclaim(struct vop_reclaim_args *ap)
vfs_hash_remove(vp);
free(vp->v_data, M_EXT2NODE);
vp->v_data = 0;
vnode_destroy_vobject(vp);
return (0);
}

View File

@ -1537,7 +1537,6 @@ fuse_vnop_reclaim(struct vop_reclaim_args *ap)
fuse_vnode_setparent(vp, NULL);
cache_purge(vp);
vfs_hash_remove(vp);
vnode_destroy_vobject(vp);
fuse_vnode_destroy(vp);
return 0;

View File

@ -551,10 +551,6 @@ msdosfs_reclaim(struct vop_reclaim_args *ap)
dep, dep->de_Name, dep->de_refcnt);
#endif
/*
* Destroy the vm object and flush associated pages.
*/
vnode_destroy_vobject(vp);
/*
* Remove the denode from its hash chain.
*/

View File

@ -296,11 +296,6 @@ ncl_reclaim(struct vop_reclaim_args *ap)
ncl_releasesillyrename(vp, ap->a_td);
mtx_unlock(&np->n_mtx);
/*
* Destroy the vm object and flush associated pages.
*/
vnode_destroy_vobject(vp);
if (NFS_ISV4(vp) && vp->v_type == VREG)
/*
* We can now safely close any remaining NFSv4 Opens for

View File

@ -272,10 +272,6 @@ smbfs_reclaim(ap)
KASSERT((np->n_flag & NOPEN) == 0, ("file not closed before reclaim"));
/*
* Destroy the vm object and flush associated pages.
*/
vnode_destroy_vobject(vp);
dvp = (np->n_parent && (np->n_flag & NREFPARENT)) ?
np->n_parent : NULL;

View File

@ -1317,8 +1317,6 @@ tmpfs_reclaim(struct vop_reclaim_args *v)
if (vp->v_type == VREG)
tmpfs_destroy_vobject(vp, node->tn_reg.tn_aobj);
else
vnode_destroy_vobject(vp);
vp->v_object = NULL;
if (tmpfs_use_nc(vp))
cache_purge(vp);

View File

@ -1266,11 +1266,6 @@ udf_reclaim(struct vop_reclaim_args *a)
vp = a->a_vp;
unode = VTON(vp);
/*
* Destroy the vm object and flush associated pages.
*/
vnode_destroy_vobject(vp);
if (unode != NULL) {
vfs_hash_remove(vp);

View File

@ -3481,9 +3481,9 @@ static void
vgonel(struct vnode *vp)
{
struct thread *td;
int oweinact;
int active;
struct mount *mp;
vm_object_t object;
bool active, oweinact;
ASSERT_VOP_ELOCKED(vp, "vgonel");
ASSERT_VI_LOCKED(vp, "vgonel");
@ -3503,8 +3503,8 @@ vgonel(struct vnode *vp)
* Check to see if the vnode is in use. If so, we have to call
* VOP_CLOSE() and VOP_INACTIVE().
*/
active = vp->v_usecount;
oweinact = (vp->v_iflag & VI_OWEINACT);
active = vp->v_usecount > 0;
oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
VI_UNLOCK(vp);
vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
@ -3543,13 +3543,25 @@ vgonel(struct vnode *vp)
("vp %p bufobj not invalidated", vp));
/*
* For VMIO bufobj, BO_DEAD is set in vm_object_terminate()
* after the object's page queue is flushed.
* For VMIO bufobj, BO_DEAD is set later, or in
* vm_object_terminate() after the object's page queue is
* flushed.
*/
if (vp->v_bufobj.bo_object == NULL)
object = vp->v_bufobj.bo_object;
if (object == NULL)
vp->v_bufobj.bo_flag |= BO_DEAD;
BO_UNLOCK(&vp->v_bufobj);
/*
* Handle the VM part. Tmpfs handles v_object on its own (the
* OBJT_VNODE check). Nullfs or other bypassing filesystems
* should not touch the object borrowed from the lower vnode
* (the handle check).
*/
if (object != NULL && object->type == OBJT_VNODE &&
object->handle == vp)
vnode_destroy_vobject(vp);
/*
* Reclaim the vnode.
*/

View File

@ -79,7 +79,6 @@ int ufs_inactive(struct vop_inactive_args *);
int ufs_init(struct vfsconf *);
void ufs_itimes(struct vnode *vp);
int ufs_lookup(struct vop_cachedlookup_args *);
void ufs_prepare_reclaim(struct vnode *vp);
int ufs_readdir(struct vop_readdir_args *);
int ufs_reclaim(struct vop_reclaim_args *);
void ffs_snapgone(struct inode *);

View File

@ -179,31 +179,6 @@ out:
return (error);
}
void
ufs_prepare_reclaim(struct vnode *vp)
{
struct inode *ip;
#ifdef QUOTA
int i;
#endif
ip = VTOI(vp);
vnode_destroy_vobject(vp);
#ifdef QUOTA
for (i = 0; i < MAXQUOTAS; i++) {
if (ip->i_dquot[i] != NODQUOT) {
dqrele(vp, ip->i_dquot[i]);
ip->i_dquot[i] = NODQUOT;
}
}
#endif
#ifdef UFS_DIRHASH
if (ip->i_dirhash != NULL)
ufsdirhash_free(ip);
#endif
}
/*
* Reclaim an inode so that it can be used for other purposes.
*/
@ -216,8 +191,20 @@ ufs_reclaim(ap)
{
struct vnode *vp = ap->a_vp;
struct inode *ip = VTOI(vp);
#ifdef QUOTA
int i;
ufs_prepare_reclaim(vp);
for (i = 0; i < MAXQUOTAS; i++) {
if (ip->i_dquot[i] != NODQUOT) {
dqrele(vp, ip->i_dquot[i]);
ip->i_dquot[i] = NODQUOT;
}
}
#endif
#ifdef UFS_DIRHASH
if (ip->i_dirhash != NULL)
ufsdirhash_free(ip);
#endif
if (ip->i_flag & IN_LAZYMOD)
ip->i_flag |= IN_MODIFIED;

View File

@ -189,7 +189,6 @@ struct vm_object {
#define OBJ_TMPFS_DIRTY 0x0400 /* dirty tmpfs obj */
#define OBJ_COLORED 0x1000 /* pg_color is defined */
#define OBJ_ONEMAPPING 0x2000 /* One USE (a single, non-forked) mapping flag */
#define OBJ_DISCONNECTWNT 0x4000 /* disconnect from vnode wanted */
#define OBJ_TMPFS 0x8000 /* has tmpfs vnode allocated */
/*

View File

@ -147,17 +147,9 @@ vnode_create_vobject(struct vnode *vp, off_t isize, struct thread *td)
if (!vn_isdisk(vp, NULL) && vn_canvmio(vp) == FALSE)
return (0);
while ((object = vp->v_object) != NULL) {
VM_OBJECT_WLOCK(object);
if (!(object->flags & OBJ_DEAD)) {
VM_OBJECT_WUNLOCK(object);
return (0);
}
VOP_UNLOCK(vp, 0);
vm_object_set_flag(object, OBJ_DISCONNECTWNT);
VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vodead", 0);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
}
object = vp->v_object;
if (object != NULL)
return (0);
if (size == 0) {
if (vn_isdisk(vp, NULL)) {
@ -190,10 +182,11 @@ vnode_destroy_vobject(struct vnode *vp)
struct vm_object *obj;
obj = vp->v_object;
if (obj == NULL)
if (obj == NULL || obj->handle != vp)
return;
ASSERT_VOP_ELOCKED(vp, "vnode_destroy_vobject");
VM_OBJECT_WLOCK(obj);
MPASS(obj->type == OBJT_VNODE);
umtx_shm_object_terminated(obj);
if (obj->ref_count == 0) {
/*
@ -223,9 +216,6 @@ vnode_destroy_vobject(struct vnode *vp)
* prevented new waiters from referencing the dying
* object.
*/
KASSERT((obj->flags & OBJ_DISCONNECTWNT) == 0,
("OBJ_DISCONNECTWNT set obj %p flags %x",
obj, obj->flags));
vp->v_object = NULL;
VM_OBJECT_WUNLOCK(obj);
}
@ -243,8 +233,6 @@ vnode_destroy_vobject(struct vnode *vp)
/*
* Allocate (or lookup) pager for a vnode.
* Handle is a vnode pointer.
*
* MPSAFE
*/
vm_object_t
vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
@ -259,28 +247,18 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
if (handle == NULL)
return (NULL);
vp = (struct vnode *) handle;
/*
* If the object is being terminated, wait for it to
* go away.
*/
retry:
while ((object = vp->v_object) != NULL) {
VM_OBJECT_WLOCK(object);
if ((object->flags & OBJ_DEAD) == 0)
break;
vm_object_set_flag(object, OBJ_DISCONNECTWNT);
VM_OBJECT_SLEEP(object, object, PDROP | PVM, "vadead", 0);
}
vp = (struct vnode *)handle;
ASSERT_VOP_LOCKED(vp, "vnode_pager_alloc");
KASSERT(vp->v_usecount != 0, ("vnode_pager_alloc: no vnode reference"));
retry:
object = vp->v_object;
if (object == NULL) {
/*
* Add an object of the appropriate size
*/
object = vm_object_allocate(OBJT_VNODE, OFF_TO_IDX(round_page(size)));
object = vm_object_allocate(OBJT_VNODE,
OFF_TO_IDX(round_page(size)));
object->un_pager.vnp.vnp_size = size;
object->un_pager.vnp.writemappings = 0;
@ -290,7 +268,7 @@ retry:
VI_LOCK(vp);
if (vp->v_object != NULL) {
/*
* Object has been created while we were sleeping
* Object has been created while we were allocating.
*/
VI_UNLOCK(vp);
VM_OBJECT_WLOCK(object);
@ -305,6 +283,7 @@ retry:
vp->v_object = object;
VI_UNLOCK(vp);
} else {
VM_OBJECT_WLOCK(object);
object->ref_count++;
#if VM_NRESERVLEVEL > 0
vm_object_color(object, 0);
@ -334,10 +313,6 @@ vnode_pager_dealloc(vm_object_t object)
object->handle = NULL;
object->type = OBJT_DEAD;
if (object->flags & OBJ_DISCONNECTWNT) {
vm_object_clear_flag(object, OBJ_DISCONNECTWNT);
wakeup(object);
}
ASSERT_VOP_ELOCKED(vp, "vnode_pager_dealloc");
if (object->un_pager.vnp.writemappings > 0) {
object->un_pager.vnp.writemappings = 0;