Use zio buffers in zil_itx_create()

The zil_itx_create() function uses the vmem_alloc() allocator for
its buffers because when logging a write that buffer may be as large
as 64K.  This is non-optimal because we may need to allocate many of
of these buffers and this interface has the potential to be slow.
Instead, use zio_data_buf_alloc() which is specifically designed to
be able to efficiently allocate a wide range of buffer sizes.

In addition, do some cleanup and use the zil_itx_destroy() function
to always free an itx structure.  This way we're always sure the
right allocation functions are used.  Notice that in the current
code kmem_free() and vmem_free() were both used.  This happened to
work because these wrappers map to the same internal SPL function.

This was identified as a potential problem when a low-end memory
constrained system began logging the following warnings.  There
was no deadlock here just repeated allocation failures resulting
in increased latency.

Possible memory allocation deadlock: size=65792 lflags=0x42d0
Pid: 20118, comm: kvm Tainted: P           O 3.2.0-0.bpo.4-amd64
Call Trace:
 [<ffffffffa040b834>] ? spl_kmem_alloc_impl+0x115/0x127 [spl]
 [<ffffffffa040b84f>] ? spl_kmem_alloc_debug+0x9/0x36 [spl]
 [<ffffffffa05d8a0b>] ? zil_itx_create+0x2d/0x59 [zfs]
 [<ffffffffa05c71e6>] ? zfs_log_write+0x13a/0x2f0 [zfs]
 [<ffffffffa05d41bc>] ? zfs_write+0x85b/0x9bb [zfs]
 [<ffffffffa05e37ec>] ? zpl_aio_write+0xca/0x110 [zfs]
 [<ffffffff811088e5>] ? do_sync_readv_writev+0xa3/0xde
 [<ffffffff81108f41>] ? do_readv_writev+0xaf/0x125
 [<ffffffff81109055>] ? sys_pwritev+0x55/0x9a
 [<ffffffff813721d2>] ? system_call_fastpath+0x16/0x1b

Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Richard Yao <ryao@gentoo.org>
Closes #3059
This commit is contained in:
Brian Behlendorf 2015-01-29 15:09:51 -08:00
parent 2c41df5bf8
commit 19ea3d25df

View File

@ -1192,7 +1192,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize)
lrsize = P2ROUNDUP_TYPED(lrsize, sizeof (uint64_t), size_t);
itx = vmem_alloc(offsetof(itx_t, itx_lr) + lrsize, KM_SLEEP);
itx = zio_data_buf_alloc(offsetof(itx_t, itx_lr) + lrsize);
itx->itx_lr.lrc_txtype = txtype;
itx->itx_lr.lrc_reclen = lrsize;
itx->itx_sod = lrsize; /* if write & WR_NEED_COPY will be increased */
@ -1207,7 +1207,7 @@ zil_itx_create(uint64_t txtype, size_t lrsize)
void
zil_itx_destroy(itx_t *itx)
{
vmem_free(itx, offsetof(itx_t, itx_lr) + itx->itx_lr.lrc_reclen);
zio_data_buf_free(itx, offsetof(itx_t, itx_lr)+itx->itx_lr.lrc_reclen);
}
/*
@ -1228,8 +1228,7 @@ zil_itxg_clean(itxs_t *itxs)
if (itx->itx_callback != NULL)
itx->itx_callback(itx->itx_callback_data);
list_remove(list, itx);
kmem_free(itx, offsetof(itx_t, itx_lr) +
itx->itx_lr.lrc_reclen);
zil_itx_destroy(itx);
}
cookie = NULL;
@ -1240,8 +1239,7 @@ zil_itxg_clean(itxs_t *itxs)
if (itx->itx_callback != NULL)
itx->itx_callback(itx->itx_callback_data);
list_remove(list, itx);
kmem_free(itx, offsetof(itx_t, itx_lr) +
itx->itx_lr.lrc_reclen);
zil_itx_destroy(itx);
}
list_destroy(list);
kmem_free(ian, sizeof (itx_async_node_t));
@ -1308,8 +1306,7 @@ zil_remove_async(zilog_t *zilog, uint64_t oid)
if (itx->itx_callback != NULL)
itx->itx_callback(itx->itx_callback_data);
list_remove(&clean_list, itx);
kmem_free(itx, offsetof(itx_t, itx_lr) +
itx->itx_lr.lrc_reclen);
zil_itx_destroy(itx);
}
list_destroy(&clean_list);
}
@ -1589,8 +1586,7 @@ zil_commit_writer(zilog_t *zilog)
if (itx->itx_callback != NULL)
itx->itx_callback(itx->itx_callback_data);
list_remove(&zilog->zl_itx_commit_list, itx);
kmem_free(itx, offsetof(itx_t, itx_lr)
+ itx->itx_lr.lrc_reclen);
zil_itx_destroy(itx);
}
mutex_enter(&zilog->zl_lock);