77831e1738
dataset_remove_clones_key does recursion, so if the recursion goes deep it can overrun the linux kernel stack size of 8KB. I have seen this happen in the actual deployment, and subsequently confirmed it by running a test workload on a custom-built kernel that uses 32KB stack. See the following stack trace as an example of the case where it would have run over the 8KB stack kernel: Depth Size Location (42 entries) ----- ---- -------- 0) 11192 72 __kmalloc+0x2e/0x240 1) 11120 144 kmem_alloc_debug+0x20e/0x500 2) 10976 72 dbuf_hold_impl+0x4a/0xa0 3) 10904 120 dbuf_prefetch+0xd3/0x280 4) 10784 80 dmu_zfetch_dofetch.isra.5+0x10f/0x180 5) 10704 240 dmu_zfetch+0x5f7/0x10e0 6) 10464 168 dbuf_read+0x71e/0x8f0 7) 10296 104 dnode_hold_impl+0x1ee/0x620 8) 10192 16 dnode_hold+0x19/0x20 9) 10176 88 dmu_buf_hold+0x42/0x1b0 10) 10088 144 zap_lockdir+0x48/0x730 11) 9944 128 zap_cursor_retrieve+0x1c4/0x2f0 12) 9816 392 dsl_dataset_remove_clones_key.isra.14+0xab/0x190 13) 9424 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 14) 9032 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 15) 8640 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 16) 8248 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 17) 7856 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 18) 7464 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 19) 7072 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 20) 6680 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 21) 6288 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 22) 5896 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 23) 5504 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 24) 5112 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 25) 4720 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 26) 4328 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 27) 3936 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 28) 3544 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 29) 3152 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 30) 2760 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 31) 2368 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 32) 1976 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 33) 1584 392 dsl_dataset_remove_clones_key.isra.14+0x10c/0x190 34) 1192 232 dsl_dataset_destroy_sync+0x311/0xf60 35) 960 72 dsl_sync_task_group_sync+0x12f/0x230 36) 888 168 dsl_pool_sync+0x48b/0x5c0 37) 720 184 spa_sync+0x417/0xb00 38) 536 184 txg_sync_thread+0x325/0x5b0 39) 352 48 thread_generic_wrapper+0x7a/0x90 40) 304 128 kthread+0xc0/0xd0 41) 176 176 ret_from_fork+0x7c/0xb0 This change reduces the stack usage in dsl_dataset_remove_clones_key by allocating structures in heap, not in stack. This is not a fundamental fix, as one can create an arbitrary large data set that runs over any fixed size stack, but this will make the problem far less likely. Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Kohsuke Kawaguchi <kk@kohsuke.org> Closes #1726 |
||
---|---|---|
.. | ||
arc.c | ||
bplist.c | ||
bpobj.c | ||
bptree.c | ||
dbuf.c | ||
ddt_zap.c | ||
ddt.c | ||
dmu_diff.c | ||
dmu_object.c | ||
dmu_objset.c | ||
dmu_send.c | ||
dmu_traverse.c | ||
dmu_tx.c | ||
dmu_zfetch.c | ||
dmu.c | ||
dnode_sync.c | ||
dnode.c | ||
dsl_dataset.c | ||
dsl_deadlist.c | ||
dsl_deleg.c | ||
dsl_destroy.c | ||
dsl_dir.c | ||
dsl_pool.c | ||
dsl_prop.c | ||
dsl_scan.c | ||
dsl_synctask.c | ||
dsl_userhold.c | ||
fm.c | ||
gzip.c | ||
lz4.c | ||
lzjb.c | ||
Makefile.in | ||
metaslab.c | ||
refcount.c | ||
rrwlock.c | ||
sa.c | ||
sha256.c | ||
spa_boot.c | ||
spa_config.c | ||
spa_errlog.c | ||
spa_history.c | ||
spa_misc.c | ||
spa.c | ||
space_map.c | ||
txg.c | ||
uberblock.c | ||
unique.c | ||
vdev_cache.c | ||
vdev_disk.c | ||
vdev_file.c | ||
vdev_label.c | ||
vdev_mirror.c | ||
vdev_missing.c | ||
vdev_queue.c | ||
vdev_raidz.c | ||
vdev_root.c | ||
vdev.c | ||
zap_leaf.c | ||
zap_micro.c | ||
zap.c | ||
zfeature_common.c | ||
zfeature.c | ||
zfs_acl.c | ||
zfs_byteswap.c | ||
zfs_ctldir.c | ||
zfs_debug.c | ||
zfs_dir.c | ||
zfs_fm.c | ||
zfs_fuid.c | ||
zfs_ioctl.c | ||
zfs_log.c | ||
zfs_onexit.c | ||
zfs_replay.c | ||
zfs_rlock.c | ||
zfs_sa.c | ||
zfs_vfsops.c | ||
zfs_vnops.c | ||
zfs_znode.c | ||
zil.c | ||
zio_checksum.c | ||
zio_compress.c | ||
zio_inject.c | ||
zio.c | ||
zle.c | ||
zpl_ctldir.c | ||
zpl_export.c | ||
zpl_file.c | ||
zpl_inode.c | ||
zpl_super.c | ||
zpl_xattr.c | ||
zrlock.c | ||
zvol.c |