From 9a884731a3c1a93abcd4e34091bd830c587eba95 Mon Sep 17 00:00:00 2001 From: Andriy Gapon Date: Fri, 14 Apr 2017 18:41:37 +0000 Subject: [PATCH] 6914 kernel virtual memory fragmentation leads to hang illumos/illumos-gate@af868f46a5b794687741d5424de9e3a2d684a84a https://github.com/illumos/illumos-gate/commit/af868f46a5b794687741d5424de9e3a2d684a84a https://www.illumos.org/issues/6914 This change allows the kernel to use more virtual address space. This will allow us to devote 1.5x physmem for the zio arena, and an additional 1.5x physmem for the kernel heap. We saw a hang when unable to find any 128K contiguous memory segments. Looking at the core file we see many threads in stacks similar to this: > ffffff68c9c87c00::findstack -v stack pointer for thread ffffff68c9c87c00: ffffff02cd63d8b0 [ ffffff02cd63d8b0 _resume_from_idle+0xf4() ] ffffff02cd63d8e0 swtch+0x141() ffffff02cd63d920 cv_wait+0x70(ffffff6009b1b01e, ffffff6009b1b020) ffffff02cd63da50 vmem_xalloc+0x640(ffffff6009b1b000, 20000, 1000, 0, 0, 0, 0, ffffff0200000004) ffffff02cd63dac0 vmem_alloc+0x135(ffffff6009b1b000, 20000, 4) ffffff02cd63db60 segkmem_xalloc+0x171(ffffff6009b1b000, 0, 20000, 4, 0, fffffffffb885fe0, fffffffffbcefa10) ffffff02cd63dbc0 segkmem_alloc_vn+0x4a(ffffff6009b1b000, 20000, 4, fffffffffbcefa10) ffffff02cd63dbf0 segkmem_zio_alloc+0x20(ffffff6009b1b000, 20000, 4) ffffff02cd63dd20 vmem_xalloc+0x5b1(ffffff6009b1c000, 20000, 1000, 0, 0, 0, 0, 4) ffffff02cd63dd90 vmem_alloc+0x135(ffffff6009b1c000, 20000, 4) ffffff02cd63de20 kmem_slab_create+0x8d(ffffff605fd37008, 4) ffffff02cd63de80 kmem_slab_alloc+0x11e(ffffff605fd37008, 4) ffffff02cd63dee0 kmem_cache_alloc+0x233(ffffff605fd37008, 4) ffffff02cd63df10 zio_data_buf_alloc+0x5b(20000) ffffff02cd63df70 arc_get_data_buf+0x92(ffffff6265a70588, 20000, ffffff901fd796f8) ffffff02cd63dfb0 arc_buf_alloc_impl+0x9c(ffffff6265a70588, ffffff6d233ab0b8) Reviewed by: George Wilson Reviewed by: Adam Leventhal Reviewed by: John Kennedy Reviewed by: Igor Kozhukhov Reviewed by: Josef 'Jeff' Sipek Approved by: Garrett D'Amore Author: Matthew Ahrens --- uts/common/fs/zfs/arc.c | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c index d0269ef5d869..cb645dc39e96 100644 --- a/uts/common/fs/zfs/arc.c +++ b/uts/common/fs/zfs/arc.c @@ -5885,18 +5885,6 @@ arc_init(void) /* Convert seconds to clock ticks */ arc_min_prefetch_lifespan = 1 * hz; - /* Start out with 1/8 of all memory */ - arc_c = allmem / 8; - -#ifdef _KERNEL - /* - * On architectures where the physical memory can be larger - * than the addressable space (intel in 32-bit mode), we may - * need to limit the cache to 1/8 of VM size. - */ - arc_c = MIN(arc_c, vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 8); -#endif - /* set min cache to 1/32 of all memory, or 64MB, whichever is more */ arc_c_min = MAX(allmem / 32, 64 << 20); /* set max to 3/4 of all memory, or all but 1GB, whichever is more */ @@ -5934,6 +5922,15 @@ arc_init(void) /* limit meta-data to 1/4 of the arc capacity */ arc_meta_limit = arc_c_max / 4; +#ifdef _KERNEL + /* + * Metadata is stored in the kernel's heap. Don't let us + * use more than half the heap for the ARC. + */ + arc_meta_limit = MIN(arc_meta_limit, + vmem_size(heap_arena, VMEM_ALLOC | VMEM_FREE) / 2); +#endif + /* Allow the tunable to override if it is reasonable */ if (zfs_arc_meta_limit > 0 && zfs_arc_meta_limit <= arc_c_max) arc_meta_limit = zfs_arc_meta_limit;