vfs_msync(), called from syncer vnode fsync VOP, only iterates over

the active vnode list for the given mount point, with the assumption
that vnodes with dirty pages are active.  This is enforced by
vinactive() doing vm_object_page_clean() pass over the vnode pages.

The issue is, if vinactive() cannot be called during vput() due to the
vnode being only shared-locked, we might end up with the dirty pages
for the vnode on the free list.  Such vnode is invisible to syncer,
and pages are only cleaned on the vnode reactivation.  In other words,
the race results in the broken guarantee that user data, written
through the mmap(2), is written to the disk not later than in 30
seconds after the write.

Fix this by keeping the vnode which is freed but still owing
inactivation, on the active list.  When syncer loops find such vnode,
it is deactivated and cleaned by the final vput() call.

Tested by:	pho
Sponsored by:	The FreeBSD Foundation
MFC after:	2 weeks
This commit is contained in:
kib 2015-06-17 04:46:58 +00:00
parent 6f06df98ab
commit becc575eec
2 changed files with 38 additions and 22 deletions

View File

@ -173,6 +173,11 @@ static int reassignbufcalls;
SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW, &reassignbufcalls, 0,
"Number of calls to reassignbuf");
static u_long free_owe_inact;
SYSCTL_ULONG(_vfs, OID_AUTO, free_owe_inact, CTLFLAG_RD, &free_owe_inact, 0,
"Number of times free vnodes kept on active list due to VFS "
"owing inactivation");
/*
* Cache for the mount type id assigned to NFS. This is used for
* special checks in nfs/nfs_nqlease.c and vm/vnode_pager.c.
@ -2368,11 +2373,8 @@ vholdl(struct vnode *vp)
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
#ifdef INVARIANTS
/* getnewvnode() calls v_incr_usecount() without holding interlock. */
if (vp->v_type != VNON || vp->v_data != NULL) {
if (vp->v_type != VNON || vp->v_data != NULL)
ASSERT_VI_LOCKED(vp, "vholdl");
VNASSERT(vp->v_holdcnt > 0 || (vp->v_iflag & VI_FREE) != 0,
vp, ("vholdl: free vnode is held"));
}
#endif
vp->v_holdcnt++;
if ((vp->v_iflag & VI_FREE) == 0)
@ -2443,23 +2445,29 @@ vdropl(struct vnode *vp)
VNASSERT(vp->v_holdcnt == 0, vp,
("vdropl: freeing when we shouldn't"));
active = vp->v_iflag & VI_ACTIVE;
vp->v_iflag &= ~VI_ACTIVE;
mp = vp->v_mount;
mtx_lock(&vnode_free_list_mtx);
if (active) {
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
v_actfreelist);
mp->mnt_activevnodelistsize--;
}
if (vp->v_iflag & VI_AGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist);
if ((vp->v_iflag & VI_OWEINACT) == 0) {
vp->v_iflag &= ~VI_ACTIVE;
mp = vp->v_mount;
mtx_lock(&vnode_free_list_mtx);
if (active) {
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
v_actfreelist);
mp->mnt_activevnodelistsize--;
}
if (vp->v_iflag & VI_AGE) {
TAILQ_INSERT_HEAD(&vnode_free_list, vp,
v_actfreelist);
} else {
TAILQ_INSERT_TAIL(&vnode_free_list, vp,
v_actfreelist);
}
freevnodes++;
vp->v_iflag &= ~VI_AGE;
vp->v_iflag |= VI_FREE;
mtx_unlock(&vnode_free_list_mtx);
} else {
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
atomic_add_long(&free_owe_inact, 1);
}
freevnodes++;
vp->v_iflag &= ~VI_AGE;
vp->v_iflag |= VI_FREE;
mtx_unlock(&vnode_free_list_mtx);
VI_UNLOCK(vp);
return;
}

View File

@ -1410,6 +1410,14 @@ ffs_statfs(mp, sbp)
return (0);
}
static bool
sync_doupdate(struct inode *ip)
{
return ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
IN_UPDATE)) != 0);
}
/*
* For a lazy sync, we only care about access times, quotas and the
* superblock. Other filesystem changes are already converted to
@ -1443,15 +1451,15 @@ ffs_sync_lazy(mp)
* Test also all the other timestamp flags too, to pick up
* any other cases that could be missed.
*/
if ((ip->i_flag & (IN_ACCESS | IN_CHANGE | IN_MODIFIED |
IN_UPDATE)) == 0) {
if (!sync_doupdate(ip) && (vp->v_iflag & VI_OWEINACT) == 0) {
VI_UNLOCK(vp);
continue;
}
if ((error = vget(vp, LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK,
td)) != 0)
continue;
error = ffs_update(vp, 0);
if (sync_doupdate(ip))
error = ffs_update(vp, 0);
if (error != 0)
allerror = error;
vput(vp);