diff --git a/include/sys/zfs_znode.h b/include/sys/zfs_znode.h index b028e2eb2de9..2f9ca743d97b 100644 --- a/include/sys/zfs_znode.h +++ b/include/sys/zfs_znode.h @@ -280,6 +280,8 @@ typedef struct znode { mutex_tryenter(ZFS_OBJ_MUTEX((zsb), (obj_num))) #define ZFS_OBJ_HOLD_EXIT(zsb, obj_num) \ mutex_exit(ZFS_OBJ_MUTEX((zsb), (obj_num))) +#define ZFS_OBJ_HOLD_OWNED(zsb, obj_num) \ + mutex_owned(ZFS_OBJ_MUTEX((zsb), (obj_num))) /* * Macros to encode/decode ZFS stored time values from/to struct timespec diff --git a/module/zfs/zfs_znode.c b/module/zfs/zfs_znode.c index 1017414f7b1c..1fe88499b555 100644 --- a/module/zfs/zfs_znode.c +++ b/module/zfs/zfs_znode.c @@ -980,13 +980,24 @@ zfs_zinactive(znode_t *zp) { zfs_sb_t *zsb = ZTOZSB(zp); uint64_t z_id = zp->z_id; + boolean_t drop_mutex = 0; ASSERT(zp->z_sa_hdl); /* - * Don't allow a zfs_zget() while were trying to release this znode + * Don't allow a zfs_zget() while were trying to release this znode. + * + * Linux allows direct memory reclaim which means that any KM_SLEEP + * allocation may trigger inode eviction. This can lead to a deadlock + * through the ->shrink_icache_memory()->evict()->zfs_inactive()-> + * zfs_zinactive() call path. To avoid this deadlock the process + * must not reacquire the mutex when it is already holding it. */ - ZFS_OBJ_HOLD_ENTER(zsb, z_id); + if (!ZFS_OBJ_HOLD_OWNED(zsb, z_id)) { + ZFS_OBJ_HOLD_ENTER(zsb, z_id); + drop_mutex = 1; + } + mutex_enter(&zp->z_lock); /* @@ -995,14 +1006,19 @@ zfs_zinactive(znode_t *zp) */ if (zp->z_unlinked) { mutex_exit(&zp->z_lock); - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + + if (drop_mutex) + ZFS_OBJ_HOLD_EXIT(zsb, z_id); + zfs_rmnode(zp); return; } mutex_exit(&zp->z_lock); zfs_znode_dmu_fini(zp); - ZFS_OBJ_HOLD_EXIT(zsb, z_id); + + if (drop_mutex) + ZFS_OBJ_HOLD_EXIT(zsb, z_id); } void