From 766d50825ef7497b0a6d03c292018b66d7facda9 Mon Sep 17 00:00:00 2001 From: avg Date: Sat, 20 Oct 2012 10:02:18 +0000 Subject: [PATCH] zfs: wait in arc_lowmem only if curproc == pageproc ... otherwise the current thread might be holding ARC locks and thus run into a deadlock. This happens, for example, when a thread does memory allocation in the ARC code and runs into KVA shortage. Also, it really makes the most sense to wait in pageproc, so that the results of ARC reclamation are seen before the page cache is acted upon. In other cases where vm_lowmem is invoked, e.g. on KVA space shortage, the callers perform multiple attempts (up to 8) and wait for rather long intervals between them (up to 4 seconds), so ARC reclaim results should become visible even without explicit waiting on the ARC thread. Note that this is not a critical issue for typical ZFS usages where KVA space should already be large enough. On amd64 systems setting KVA size to twice the physical memory size is known to mitigate KVA fragmentation issues in practice. Side note: perhaps vm_lowmem 'how' parameter should be used to differentiate between causes of the event. Reported by: Nikolay Denev MFC after: 19 days --- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index 5d90438320a2..8e08927d4b29 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -3792,8 +3792,16 @@ arc_lowmem(void *arg __unused, int howto __unused) mutex_enter(&arc_reclaim_thr_lock); needfree = 1; cv_signal(&arc_reclaim_thr_cv); - while (needfree) - msleep(&needfree, &arc_reclaim_thr_lock, 0, "zfs:lowmem", 0); + + /* + * It is unsafe to block here in arbitrary threads, because we can come + * here from ARC itself and may hold ARC locks and thus risk a deadlock + * with ARC reclaim thread. + */ + if (curproc == pageproc) { + while (needfree) + msleep(&needfree, &arc_reclaim_thr_lock, 0, "zfs:lowmem", 0); + } mutex_exit(&arc_reclaim_thr_lock); mutex_exit(&arc_lowmem_lock); }