vfs: fix vnlru marker handling for filtered/unfiltered cases
The global list has a marker with an invariant that free vnodes are placed somewhere past that. A caller which performs filtering (like ZFS) can move said marker all the way to the end, across free vnodes which don't match. Then a caller which does not perform filtering will fail to find them. This makes vn_alloc_hard sleep for 1 second instead of reclaiming, resulting in significant stalls. Fix the problem by requiring an explicit marker by callers which do filtering. As a temporary measure extend vnlru_free to restart if it fails to reclaim anything. Big thanks go to the reporter for testing several iterations of the patch. Reported by: Yamagi <lists yamagi.org> Tested by: Yamagi <lists yamagi.org> Reviewed by: kib Differential Revision: https://reviews.freebsd.org/D29324
This commit is contained in:
parent
21864048f3
commit
e9272225e6
@ -51,6 +51,9 @@
|
||||
#include <sys/vm.h>
|
||||
#include <sys/vmmeter.h>
|
||||
|
||||
static struct sx arc_vnlru_lock;
|
||||
static struct vnode *arc_vnlru_marker;
|
||||
|
||||
extern struct vfsops zfs_vfsops;
|
||||
|
||||
uint_t zfs_arc_free_target = 0;
|
||||
@ -157,7 +160,9 @@ arc_prune_task(void *arg)
|
||||
|
||||
arc_reduce_target_size(ptob(nr_scan));
|
||||
free(arg, M_TEMP);
|
||||
vnlru_free(nr_scan, &zfs_vfsops);
|
||||
sx_xlock(&arc_vnlru_lock);
|
||||
vnlru_free_vfsops(nr_scan, &zfs_vfsops, arc_vnlru_marker);
|
||||
sx_xunlock(&arc_vnlru_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -234,7 +239,8 @@ arc_lowmem_init(void)
|
||||
{
|
||||
arc_event_lowmem = EVENTHANDLER_REGISTER(vm_lowmem, arc_lowmem, NULL,
|
||||
EVENTHANDLER_PRI_FIRST);
|
||||
|
||||
arc_vnlru_marker = vnlru_alloc_marker();
|
||||
sx_init(&arc_vnlru_lock, "arc vnlru lock");
|
||||
}
|
||||
|
||||
void
|
||||
@ -242,6 +248,10 @@ arc_lowmem_fini(void)
|
||||
{
|
||||
if (arc_event_lowmem != NULL)
|
||||
EVENTHANDLER_DEREGISTER(vm_lowmem, arc_event_lowmem);
|
||||
if (arc_vnlru_marker != NULL) {
|
||||
vnlru_free_marker(arc_vnlru_marker);
|
||||
sx_destroy(&arc_vnlru_lock);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
|
@ -1216,9 +1216,9 @@ SYSCTL_INT(_debug, OID_AUTO, max_vnlru_free, CTLFLAG_RW, &max_vnlru_free,
|
||||
* Attempt to reduce the free list by the requested amount.
|
||||
*/
|
||||
static int
|
||||
vnlru_free_locked(int count, struct vfsops *mnt_op)
|
||||
vnlru_free_impl(int count, struct vfsops *mnt_op, struct vnode *mvp)
|
||||
{
|
||||
struct vnode *vp, *mvp;
|
||||
struct vnode *vp;
|
||||
struct mount *mp;
|
||||
int ocount;
|
||||
|
||||
@ -1226,7 +1226,6 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
|
||||
if (count > max_vnlru_free)
|
||||
count = max_vnlru_free;
|
||||
ocount = count;
|
||||
mvp = vnode_list_free_marker;
|
||||
vp = mvp;
|
||||
for (;;) {
|
||||
if (count == 0) {
|
||||
@ -1268,15 +1267,74 @@ vnlru_free_locked(int count, struct vfsops *mnt_op)
|
||||
return (ocount - count);
|
||||
}
|
||||
|
||||
static int
|
||||
vnlru_free_locked(int count)
|
||||
{
|
||||
|
||||
mtx_assert(&vnode_list_mtx, MA_OWNED);
|
||||
return (vnlru_free_impl(count, NULL, vnode_list_free_marker));
|
||||
}
|
||||
|
||||
void
|
||||
vnlru_free_vfsops(int count, struct vfsops *mnt_op, struct vnode *mvp)
|
||||
{
|
||||
|
||||
MPASS(mnt_op != NULL);
|
||||
MPASS(mvp != NULL);
|
||||
VNPASS(mvp->v_type == VMARKER, mvp);
|
||||
mtx_lock(&vnode_list_mtx);
|
||||
vnlru_free_impl(count, mnt_op, mvp);
|
||||
mtx_unlock(&vnode_list_mtx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Temporary binary compat, don't use. Call vnlru_free_vfsops instead.
|
||||
*/
|
||||
void
|
||||
vnlru_free(int count, struct vfsops *mnt_op)
|
||||
{
|
||||
struct vnode *mvp;
|
||||
|
||||
if (count == 0)
|
||||
return;
|
||||
mtx_lock(&vnode_list_mtx);
|
||||
vnlru_free_locked(count, mnt_op);
|
||||
mvp = vnode_list_free_marker;
|
||||
if (vnlru_free_impl(count, mnt_op, mvp) == 0) {
|
||||
/*
|
||||
* It is possible the marker was moved over eligible vnodes by
|
||||
* callers which filtered by different ops. If so, start from
|
||||
* scratch.
|
||||
*/
|
||||
if (vnlru_read_freevnodes() > 0) {
|
||||
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
|
||||
TAILQ_INSERT_HEAD(&vnode_list, mvp, v_vnodelist);
|
||||
}
|
||||
vnlru_free_impl(count, mnt_op, mvp);
|
||||
}
|
||||
mtx_unlock(&vnode_list_mtx);
|
||||
}
|
||||
|
||||
struct vnode *
|
||||
vnlru_alloc_marker(void)
|
||||
{
|
||||
struct vnode *mvp;
|
||||
|
||||
mvp = vn_alloc_marker(NULL);
|
||||
mtx_lock(&vnode_list_mtx);
|
||||
TAILQ_INSERT_BEFORE(vnode_list_free_marker, mvp, v_vnodelist);
|
||||
mtx_unlock(&vnode_list_mtx);
|
||||
return (mvp);
|
||||
}
|
||||
|
||||
void
|
||||
vnlru_free_marker(struct vnode *mvp)
|
||||
{
|
||||
mtx_lock(&vnode_list_mtx);
|
||||
TAILQ_REMOVE(&vnode_list, mvp, v_vnodelist);
|
||||
mtx_unlock(&vnode_list_mtx);
|
||||
vn_free_marker(mvp);
|
||||
}
|
||||
|
||||
static void
|
||||
vnlru_recalc(void)
|
||||
{
|
||||
@ -1423,7 +1481,7 @@ vnlru_proc(void)
|
||||
* try to reduce it by discarding from the free list.
|
||||
*/
|
||||
if (rnumvnodes > desiredvnodes) {
|
||||
vnlru_free_locked(rnumvnodes - desiredvnodes, NULL);
|
||||
vnlru_free_locked(rnumvnodes - desiredvnodes);
|
||||
rnumvnodes = atomic_load_long(&numvnodes);
|
||||
}
|
||||
/*
|
||||
@ -1615,7 +1673,7 @@ vn_alloc_hard(struct mount *mp)
|
||||
* should be chosen so that we never wait or even reclaim from
|
||||
* the free list to below its target minimum.
|
||||
*/
|
||||
if (vnlru_free_locked(1, NULL) > 0)
|
||||
if (vnlru_free_locked(1) > 0)
|
||||
goto alloc;
|
||||
if (mp == NULL || (mp->mnt_kern_flag & MNTK_SUSPEND) == 0) {
|
||||
/*
|
||||
@ -1625,7 +1683,7 @@ vn_alloc_hard(struct mount *mp)
|
||||
msleep(&vnlruproc_sig, &vnode_list_mtx, PVFS, "vlruwk", hz);
|
||||
if (atomic_load_long(&numvnodes) + 1 > desiredvnodes &&
|
||||
vnlru_read_freevnodes() > 1)
|
||||
vnlru_free_locked(1, NULL);
|
||||
vnlru_free_locked(1);
|
||||
}
|
||||
alloc:
|
||||
rnumvnodes = atomic_fetchadd_long(&numvnodes, 1) + 1;
|
||||
|
@ -826,7 +826,10 @@ void vfs_timestamp(struct timespec *);
|
||||
void vfs_write_resume(struct mount *mp, int flags);
|
||||
int vfs_write_suspend(struct mount *mp, int flags);
|
||||
int vfs_write_suspend_umnt(struct mount *mp);
|
||||
struct vnode *vnlru_alloc_marker(void);
|
||||
void vnlru_free_marker(struct vnode *);
|
||||
void vnlru_free(int, struct vfsops *);
|
||||
void vnlru_free_vfsops(int, struct vfsops *, struct vnode *);
|
||||
int vop_stdbmap(struct vop_bmap_args *);
|
||||
int vop_stdfdatasync_buf(struct vop_fdatasync_args *);
|
||||
int vop_stdfsync(struct vop_fsync_args *);
|
||||
|
Loading…
Reference in New Issue
Block a user