Use KM_PUSHPAGE instead of KM_SLEEP
It used to be the case that all KM_SLEEP allocations were GFS_NOFS. Unfortunately this often resulted in the kernel being unable to reclaim the ARC, inode, and dentry caches in a timely manor. The fix was to make KM_SLEEP a GFP_KERNEL allocation in the SPL. However, this increases the posibility of deadlocking the system on a zfs write thread. If a zfs write thread attempts to perform an allocation it may trigger synchronous reclaim. This reclaim may attempt to flush dirty data/inode to disk to free memory. Unforunately, this write cannot finish because the write thread which would handle it is holding the previous transaction open. Deadlock. To avoid this all allocations in the zfs write thread path must use KM_PUSHPAGE which prohibits synchronous reclaim for that thread. In this way forward progress in ensured. The risk with this change is I missed updating an allocation for the write threads leaving an increased posibility of deadlock. If any deadlocks remain they will be unlikely but we'll have to make sure they all get fixed.
This commit is contained in:
parent
f47c42e214
commit
691f6ac4c2
@ -2720,7 +2720,7 @@ top:
|
||||
arc_callback_t *acb = NULL;
|
||||
|
||||
acb = kmem_zalloc(sizeof (arc_callback_t),
|
||||
KM_SLEEP);
|
||||
KM_PUSHPAGE);
|
||||
acb->acb_done = done;
|
||||
acb->acb_private = private;
|
||||
if (pio != NULL)
|
||||
@ -2836,7 +2836,7 @@ top:
|
||||
|
||||
ASSERT(!GHOST_STATE(hdr->b_state));
|
||||
|
||||
acb = kmem_zalloc(sizeof (arc_callback_t), KM_SLEEP);
|
||||
acb = kmem_zalloc(sizeof (arc_callback_t), KM_PUSHPAGE);
|
||||
acb->acb_done = done;
|
||||
acb->acb_private = private;
|
||||
|
||||
@ -2885,7 +2885,7 @@ top:
|
||||
ARCSTAT_BUMP(arcstat_l2_hits);
|
||||
|
||||
cb = kmem_zalloc(sizeof (l2arc_read_callback_t),
|
||||
KM_SLEEP);
|
||||
KM_PUSHPAGE);
|
||||
cb->l2rcb_buf = buf;
|
||||
cb->l2rcb_spa = spa;
|
||||
cb->l2rcb_bp = *bp;
|
||||
|
@ -174,7 +174,7 @@ zfs_create_share_dir(zfs_sb_t *zsb, dmu_tx_t *tx)
|
||||
vattr.va_uid = crgetuid(kcred);
|
||||
vattr.va_gid = crgetgid(kcred);
|
||||
|
||||
sharezp = kmem_cache_alloc(znode_cache, KM_SLEEP);
|
||||
sharezp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
|
||||
sharezp->z_moved = 0;
|
||||
sharezp->z_unlinked = 0;
|
||||
sharezp->z_atime_dirty = 0;
|
||||
@ -248,7 +248,7 @@ zfs_inode_alloc(struct super_block *sb, struct inode **ip)
|
||||
{
|
||||
znode_t *zp;
|
||||
|
||||
zp = kmem_cache_alloc(znode_cache, KM_SLEEP);
|
||||
zp = kmem_cache_alloc(znode_cache, KM_PUSHPAGE);
|
||||
*ip = ZTOI(zp);
|
||||
|
||||
return (0);
|
||||
|
@ -255,7 +255,7 @@ static void
|
||||
zio_push_transform(zio_t *zio, void *data, uint64_t size, uint64_t bufsize,
|
||||
zio_transform_func_t *transform)
|
||||
{
|
||||
zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_SLEEP);
|
||||
zio_transform_t *zt = kmem_alloc(sizeof (zio_transform_t), KM_PUSHPAGE);
|
||||
|
||||
zt->zt_orig_data = zio->io_data;
|
||||
zt->zt_orig_size = zio->io_size;
|
||||
@ -370,7 +370,7 @@ zio_unique_parent(zio_t *cio)
|
||||
void
|
||||
zio_add_child(zio_t *pio, zio_t *cio)
|
||||
{
|
||||
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
|
||||
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_PUSHPAGE);
|
||||
int w;
|
||||
|
||||
/*
|
||||
@ -491,7 +491,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
ASSERT(!bp || !(flags & ZIO_FLAG_CONFIG_WRITER));
|
||||
ASSERT(vd || stage == ZIO_STAGE_OPEN);
|
||||
|
||||
zio = kmem_cache_alloc(zio_cache, KM_SLEEP);
|
||||
zio = kmem_cache_alloc(zio_cache, KM_PUSHPAGE);
|
||||
bzero(zio, sizeof (zio_t));
|
||||
|
||||
mutex_init(&zio->io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
@ -1503,7 +1503,7 @@ zio_gang_node_alloc(zio_gang_node_t **gnpp)
|
||||
|
||||
ASSERT(*gnpp == NULL);
|
||||
|
||||
gn = kmem_zalloc(sizeof (*gn), KM_SLEEP);
|
||||
gn = kmem_zalloc(sizeof (*gn), KM_PUSHPAGE);
|
||||
gn->gn_gbh = zio_buf_alloc(SPA_GANGBLOCKSIZE);
|
||||
*gnpp = gn;
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user