vfs: reimplement deferred inactive to use a dedicated flag (VI_DEFINACT)

The previous behavior of leaving VI_OWEINACT vnodes on the active list without
a hold count is eliminated. Hold count is kept and inactive processing gets
explicitly deferred by setting the VI_DEFINACT flag. The syncer is then
responsible for vdrop.

Reviewed by:	kib (previous version)
Tested by:	pho (in a larger patch, previous version)
Differential Revision:	https://reviews.freebsd.org/D23036
This commit is contained in:
Mateusz Guzik 2020-01-07 15:56:24 +00:00
parent b7cc9d1847
commit c8b3463dd0
7 changed files with 164 additions and 48 deletions

View File

@ -666,7 +666,7 @@ linux_syncfs(struct thread *td, struct linux_syncfs_args *args)
if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
save = curthread_pflags_set(TDP_SYNCIO);
vfs_msync(mp, MNT_NOWAIT);
vfs_periodic(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_NOWAIT);
curthread_pflags_restore(save);
vn_finished_write(mp);

View File

@ -2874,7 +2874,7 @@ g_journal_do_switch(struct g_class *classp)
save = curthread_pflags_set(TDP_SYNCIO);
GJ_TIMER_START(1, &bt);
vfs_msync(mp, MNT_NOWAIT);
vfs_periodic(mp, MNT_NOWAIT);
GJ_TIMER_STOP(1, &bt, "Msync time of %s", mountpoint);
GJ_TIMER_START(1, &bt);

View File

@ -1692,7 +1692,7 @@ dounmount(struct mount *mp, int flags, struct thread *td)
if (coveredvp != NULL)
vdrop(coveredvp);
vfs_msync(mp, MNT_WAIT);
vfs_periodic(mp, MNT_WAIT);
MNT_ILOCK(mp);
async_flag = mp->mnt_flag & MNT_ASYNC;
mp->mnt_flag &= ~MNT_ASYNC;

View File

@ -217,10 +217,9 @@ static int reassignbufcalls;
SYSCTL_INT(_vfs, OID_AUTO, reassignbufcalls, CTLFLAG_RW | CTLFLAG_STATS,
&reassignbufcalls, 0, "Number of calls to reassignbuf");
static counter_u64_t free_owe_inact;
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, free_owe_inact, CTLFLAG_RD, &free_owe_inact,
"Number of times free vnodes kept on active list due to VFS "
"owing inactivation");
static counter_u64_t deferred_inact;
SYSCTL_COUNTER_U64(_vfs, OID_AUTO, deferred_inact, CTLFLAG_RD, &deferred_inact,
"Number of times inactive processing was deferred");
/* To keep more than one thread at a time from running vfs_getnewfsid */
static struct mtx mntid_mtx;
@ -608,7 +607,7 @@ vntblinit(void *dummy __unused)
vnodes_created = counter_u64_alloc(M_WAITOK);
recycles_count = counter_u64_alloc(M_WAITOK);
recycles_free_count = counter_u64_alloc(M_WAITOK);
free_owe_inact = counter_u64_alloc(M_WAITOK);
deferred_inact = counter_u64_alloc(M_WAITOK);
/*
* Initialize the filesystem syncer.
@ -3012,6 +3011,39 @@ vrefcnt(struct vnode *vp)
return (vp->v_usecount);
}
static void
vdefer_inactive(struct vnode *vp)
{
ASSERT_VI_LOCKED(vp, __func__);
VNASSERT(vp->v_iflag & VI_OWEINACT, vp,
("%s: vnode without VI_OWEINACT", __func__));
VNASSERT(!VN_IS_DOOMED(vp), vp,
("%s: doomed vnode", __func__));
if (vp->v_iflag & VI_DEFINACT) {
VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
vdropl(vp);
return;
}
vp->v_iflag |= VI_DEFINACT;
VI_UNLOCK(vp);
counter_u64_add(deferred_inact, 1);
}
static void
vdefer_inactive_cond(struct vnode *vp)
{
VI_LOCK(vp);
VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count"));
if (VN_IS_DOOMED(vp) ||
(vp->v_iflag & VI_OWEINACT) == 0) {
vdropl(vp);
return;
}
vdefer_inactive(vp);
}
enum vputx_op { VPUTX_VRELE, VPUTX_VPUT, VPUTX_VUNREF };
/*
@ -3101,8 +3133,12 @@ vputx(struct vnode *vp, enum vputx_op func)
vinactive(vp);
if (func != VPUTX_VUNREF)
VOP_UNLOCK(vp);
vdropl(vp);
} else if (vp->v_iflag & VI_OWEINACT) {
vdefer_inactive(vp);
} else {
vdropl(vp);
}
vdropl(vp);
}
/*
@ -3257,28 +3293,27 @@ vdrop_deactivate(struct vnode *vp)
("vdrop: vnode already reclaimed."));
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
("vnode already free"));
VNASSERT((vp->v_iflag & VI_OWEINACT) == 0, vp,
("vnode with VI_OWEINACT set"));
VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp,
("vnode with VI_DEFINACT set"));
VNASSERT(vp->v_holdcnt == 0, vp,
("vdrop: freeing when we shouldn't"));
if ((vp->v_iflag & VI_OWEINACT) == 0) {
mp = vp->v_mount;
mtx_lock(&mp->mnt_listmtx);
if (vp->v_iflag & VI_ACTIVE) {
vp->v_iflag &= ~VI_ACTIVE;
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize--;
}
TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist);
mp->mnt_tmpfreevnodelistsize++;
vp->v_iflag |= VI_FREE;
vp->v_mflag |= VMP_TMPMNTFREELIST;
VI_UNLOCK(vp);
if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch)
vnlru_return_batch_locked(mp);
mtx_unlock(&mp->mnt_listmtx);
} else {
VI_UNLOCK(vp);
counter_u64_add(free_owe_inact, 1);
mp = vp->v_mount;
mtx_lock(&mp->mnt_listmtx);
if (vp->v_iflag & VI_ACTIVE) {
vp->v_iflag &= ~VI_ACTIVE;
TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
mp->mnt_activevnodelistsize--;
}
TAILQ_INSERT_TAIL(&mp->mnt_tmpfreevnodelist, vp, v_actfreelist);
mp->mnt_tmpfreevnodelistsize++;
vp->v_iflag |= VI_FREE;
vp->v_mflag |= VMP_TMPMNTFREELIST;
VI_UNLOCK(vp);
if (mp->mnt_tmpfreevnodelistsize >= mnt_free_list_batch)
vnlru_return_batch_locked(mp);
mtx_unlock(&mp->mnt_listmtx);
}
void
@ -3629,7 +3664,17 @@ vgonel(struct vnode *vp)
*/
active = vp->v_usecount > 0;
oweinact = (vp->v_iflag & VI_OWEINACT) != 0;
VI_UNLOCK(vp);
/*
* If we need to do inactive VI_OWEINACT will be set.
*/
if (vp->v_iflag & VI_DEFINACT) {
VNASSERT(vp->v_holdcnt > 1, vp, ("lost hold count"));
vp->v_iflag &= ~VI_DEFINACT;
vdropl(vp);
} else {
VNASSERT(vp->v_holdcnt > 0, vp, ("vnode without hold count"));
VI_UNLOCK(vp);
}
vfs_notify_upper(vp, VFS_NOTIFY_UPPER_RECLAIM);
/*
@ -3823,8 +3868,10 @@ vn_printf(struct vnode *vp, const char *fmt, ...)
strlcat(buf, "|VI_DOINGINACT", sizeof(buf));
if (vp->v_iflag & VI_OWEINACT)
strlcat(buf, "|VI_OWEINACT", sizeof(buf));
if (vp->v_iflag & VI_DEFINACT)
strlcat(buf, "|VI_DEFINACT", sizeof(buf));
flags = vp->v_iflag & ~(VI_TEXT_REF | VI_MOUNT | VI_FREE | VI_ACTIVE |
VI_DOINGINACT | VI_OWEINACT);
VI_DOINGINACT | VI_OWEINACT | VI_DEFINACT);
if (flags != 0) {
snprintf(buf2, sizeof(buf2), "|VI(0x%lx)", flags);
strlcat(buf, buf2, sizeof(buf));
@ -4381,22 +4428,66 @@ vfs_unmountall(void)
unmount_or_warn(rootdevmp);
}
/*
* perform msync on all vnodes under a mount point
* the mount point must be locked.
*/
void
vfs_msync(struct mount *mp, int flags)
static void
vfs_deferred_inactive(struct vnode *vp, int lkflags)
{
ASSERT_VI_LOCKED(vp, __func__);
VNASSERT((vp->v_iflag & VI_DEFINACT) == 0, vp, ("VI_DEFINACT still set"));
if ((vp->v_iflag & VI_OWEINACT) == 0) {
vdropl(vp);
return;
}
if (vn_lock(vp, lkflags) == 0) {
VI_LOCK(vp);
if ((vp->v_iflag & (VI_OWEINACT | VI_DOINGINACT)) == VI_OWEINACT)
vinactive(vp);
VOP_UNLOCK(vp);
vdropl(vp);
return;
}
vdefer_inactive_cond(vp);
}
static void __noinline
vfs_periodic_inactive(struct mount *mp, int flags)
{
struct vnode *vp, *mvp;
int lkflags;
lkflags = LK_EXCLUSIVE | LK_INTERLOCK;
if (flags != MNT_WAIT)
lkflags |= LK_NOWAIT;
MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
if ((vp->v_iflag & VI_DEFINACT) == 0) {
VI_UNLOCK(vp);
continue;
}
vp->v_iflag &= ~VI_DEFINACT;
vfs_deferred_inactive(vp, lkflags);
}
}
static inline bool
vfs_want_msync(struct vnode *vp)
{
struct vm_object *obj;
if (vp->v_vflag & VV_NOSYNC)
return (false);
obj = vp->v_object;
return (obj != NULL && vm_object_mightbedirty(obj));
}
static void __noinline
vfs_periodic_msync_inactive(struct mount *mp, int flags)
{
struct vnode *vp, *mvp;
struct vm_object *obj;
struct thread *td;
int lkflags, objflags;
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0)
return;
bool seen_defer;
td = curthread;
@ -4409,9 +4500,16 @@ vfs_msync(struct mount *mp, int flags)
}
MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) {
obj = vp->v_object;
if (obj == NULL || !vm_object_mightbedirty(obj)) {
VI_UNLOCK(vp);
seen_defer = false;
if (vp->v_iflag & VI_DEFINACT) {
vp->v_iflag &= ~VI_DEFINACT;
seen_defer = true;
}
if (!vfs_want_msync(vp)) {
if (seen_defer)
vfs_deferred_inactive(vp, lkflags);
else
VI_UNLOCK(vp);
continue;
}
if (vget(vp, lkflags, td) == 0) {
@ -4422,10 +4520,27 @@ vfs_msync(struct mount *mp, int flags)
VM_OBJECT_WUNLOCK(obj);
}
vput(vp);
if (seen_defer)
vdrop(vp);
} else {
if (seen_defer)
vdefer_inactive_cond(vp);
}
}
}
void
vfs_periodic(struct mount *mp, int flags)
{
CTR2(KTR_VFS, "%s: mp %p", __func__, mp);
if ((mp->mnt_kern_flag & MNTK_NOMSYNC) != 0)
vfs_periodic_inactive(mp, flags);
else
vfs_periodic_msync_inactive(mp, flags);
}
static void
destroy_vpollinfo_free(struct vpollinfo *vi)
{
@ -4636,7 +4751,7 @@ sync_fsync(struct vop_fsync_args *ap)
* batch. Return them instead of letting them stay there indefinitely.
*/
vnlru_return_batch(mp);
vfs_msync(mp, MNT_NOWAIT);
vfs_periodic(mp, MNT_NOWAIT);
error = VFS_SYNC(mp, MNT_LAZY);
curthread_pflags_restore(save);
vn_finished_write(mp);

View File

@ -129,7 +129,7 @@ kern_sync(struct thread *td)
if ((mp->mnt_flag & MNT_RDONLY) == 0 &&
vn_start_write(NULL, &mp, V_NOWAIT) == 0) {
save = curthread_pflags_set(TDP_SYNCIO);
vfs_msync(mp, MNT_NOWAIT);
vfs_periodic(mp, MNT_NOWAIT);
VFS_SYNC(mp, MNT_NOWAIT);
curthread_pflags_restore(save);
vn_finished_write(mp);

View File

@ -396,7 +396,7 @@ void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
#define MNTK_UNMOUNTF 0x00000001 /* forced unmount in progress */
#define MNTK_ASYNC 0x00000002 /* filtered async flag */
#define MNTK_SOFTDEP 0x00000004 /* async disabled by softdep */
#define MNTK_NOMSYNC 0x00000008 /* don't do vfs_msync */
#define MNTK_NOMSYNC 0x00000008 /* don't do msync */
#define MNTK_DRAINING 0x00000010 /* lock draining is happening */
#define MNTK_REFEXPIRE 0x00000020 /* refcount expiring is happening */
#define MNTK_EXTENDED_SHARED 0x00000040 /* Allow shared locking for more ops */
@ -903,7 +903,7 @@ int vfs_setopts(struct vfsoptlist *opts, const char *name,
const char *value);
int vfs_setpublicfs /* set publicly exported fs */
(struct mount *, struct netexport *, struct export_args *);
void vfs_msync(struct mount *, int);
void vfs_periodic(struct mount *, int);
int vfs_busy(struct mount *, int);
int vfs_export /* process mount export info */
(struct mount *, struct export_args *);

View File

@ -242,6 +242,7 @@ struct xvnode {
#define VI_ACTIVE 0x0200 /* This vnode is on the active list */
#define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */
#define VI_OWEINACT 0x1000 /* Need to call inactive */
#define VI_DEFINACT 0x2000 /* deferred inactive */
#define VV_ROOT 0x0001 /* root of its filesystem */
#define VV_ISTTY 0x0002 /* vnode represents a tty */