vfs: fix vnlru marker handling for filtered/unfiltered cases

The global list has a marker with an invariant that free vnodes are
placed somewhere past that. A caller which performs filtering (like ZFS)
can move said marker all the way to the end, across free vnodes which
don't match. Then a caller which does not perform filtering will fail to
find them. This makes vn_alloc_hard sleep for 1 second instead of
reclaiming, resulting in significant stalls.

Fix the problem by requiring an explicit marker by callers which do
filtering.

As a temporary measure extend vnlru_free to restart if it fails to
reclaim anything.

Big thanks go to the reporter for testing several iterations of the
patch.

Reported by:	Yamagi <lists yamagi.org>
Tested by:	Yamagi <lists yamagi.org>
Reviewed by:	kib
Differential Revision:	https://reviews.freebsd.org/D29324
This commit is contained in:
Mateusz Guzik 2021-03-17 22:33:47 +01:00
parent 21864048f3
commit e9272225e6
3 changed files with 80 additions and 9 deletions

View File

@ -51,6 +51,9 @@
#include <sys/vm.h>
#include <sys/vmmeter.h>
static struct sx arc_vnlru_lock;
static struct vnode *arc_vnlru_marker;
extern struct vfsops zfs_vfsops;
uint_t zfs_arc_free_target = 0;
@ -157,7 +160,9 @@ arc_prune_task(void *arg)
arc_reduce_target_size(ptob(nr_scan));
free(arg, M_TEMP);
vnlru_free(nr_scan, &zfs_vfsops);
sx_xlock(&arc_vnlru_lock);
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
sx_xunlock(&arc_vnlru_lock);
}
/*
@ -234,7 +239,8 @@ arc_lowmem_init(void)
{
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
EVENTHANDLER_PRI_FIRST);
arc_vnlru_marker = vnlru_alloc_marker();
sx_init(&arc_vnlru_lock, "arc vnlru lock");
}
void
@ -242,6 +248,10 @@ arc_lowmem_fini(void)
{
if (arc_event_lowmem != NULL)
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
if (arc_vnlru_marker != NULL) {
vnlru_free_marker(arc_vnlru_marker);
sx_destroy(&arc_vnlru_lock);
}
}
void

View File

@ -1216,9 +1216,9 @@ SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free,
* Attempt to reduce the free list by the requested amount.
*/
static int
vnlru_free_locked(int count, struct vfsops *mnt_op)
vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
{
struct vnode *vp, *mvp;
struct vnode *vp;
struct mount *mp;
int ocount;
@ -1226,7 +1226,6 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
if (count > max_vnlru_free)
count = max_vnlru_free;
ocount = count;
mvp = vnode_list_free_marker;
vp = mvp;
for (;;) {
if (count == 0) {
@ -1268,15 +1267,74 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
return (ocount - count);
}
static int
vnlru_free_locked(int count)
{
mtx_assert(&vnode_list_mtx, MA_OWNED);
return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
}
void
vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp)
{
MPASS(mnt_op != NULL);
MPASS(mvp != NULL);
VNPASS(mvp->v_type == VMARKER, mvp);
mtx_lock(&vnode_list_mtx);
vnlru_free_impl(count, mnt_op, mvp);
mtx_unlock(&vnode_list_mtx);
}
/*
* Temporary binary compat, don't use. Call vnlru_free_vfsops instead.
*/
void
vnlru_free(int count, struct vfsops *mnt_op)
{
struct vnode *mvp;
if (count == 0)
return;
mtx_lock(&vnode_list_mtx);
vnlru_free_locked(count, mnt_op);
mvp = vnode_list_free_marker;
if (vnlru_free_impl(count, mnt_op, mvp) == 0) {
/*
* It is possible the marker was moved over eligible vnodes by
* callers which filtered by different ops. If so, start from
* scratch.
*/
if (vnlru_read_freevnodes() > 0) {
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
}
vnlru_free_impl(count, mnt_op, mvp);
}
mtx_unlock(&vnode_list_mtx);
}
struct vnode *
vnlru_alloc_marker(void)
{
struct vnode *mvp;
mvp = vn_alloc_marker(NULL);
mtx_lock(&vnode_list_mtx);
TAILQ_INSERT_BEFORE(vnode_list_free_marker, mvp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);
return (mvp);
}
void
vnlru_free_marker(struct vnode *mvp)
{
mtx_lock(&vnode_list_mtx);
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
mtx_unlock(&vnode_list_mtx);
vn_free_marker(mvp);
}
static void
vnlru_recalc(void)
{
@ -1423,7 +1481,7 @@ vnlru_proc(void)
* try to reduce it by discarding from the free list.
*/
if (rnumvnodes > desiredvnodes) {
vnlru_free_locked(rnumvnodes - desiredvnodes, NULL);
vnlru_free_locked(rnumvnodes - desiredvnodes);
rnumvnodes = atomic_load_long(&numvnodes);
}
/*
@ -1615,7 +1673,7 @@ vn_alloc_hard(struct mount *mp)
* should be chosen so that we never wait or even reclaim from
* the free list to below its target minimum.
*/
if (vnlru_free_locked(1, NULL) > 0)
if (vnlru_free_locked(1) > 0)
goto alloc;
if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
/*
@ -1625,7 +1683,7 @@ vn_alloc_hard(struct mount *mp)
msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
vnlru_read_freevnodes() > 1)
vnlru_free_locked(1, NULL);
vnlru_free_locked(1);
}
alloc:
rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;

View File

@ -826,7 +826,10 @@ void vfs_timestamp(struct timespec *);
void vfs_write_resume(struct mount *mp, int flags);
int vfs_write_suspend(struct mount *mp, int flags);
int vfs_write_suspend_umnt(struct mount *mp);
struct vnode *vnlru_alloc_marker(void);
void vnlru_free_marker(struct vnode *);
void vnlru_free(int, struct vfsops *);
void vnlru_free_vfsops(int, struct vfsops *, struct vnode *);
int vop_stdbmap(struct vop_bmap_args *);
int vop_stdfdatasync_buf(struct vop_fdatasync_args *);
int vop_stdfsync(struct vop_fsync_args *);