diff --git a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c index daedc894f06f..10377cd29b7f 100644 --- a/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c +++ b/sys/cddl/compat/opensolaris/kern/opensolaris_kmem.c @@ -126,6 +126,42 @@ kmem_size_init(void *unused __unused) } SYSINIT(kmem_size_init, SI_SUB_KMEM, SI_ORDER_ANY, kmem_size_init, NULL); +/* + * The return values from kmem_free_* are only valid once the pagedaemon + * has been initialised, before then they return 0. + * + * To ensure the returns are valid the caller can use a SYSINIT with + * subsystem set to SI_SUB_KTHREAD_PAGE and an order of at least + * SI_ORDER_SECOND. + */ +u_int +kmem_free_target(void) +{ + + return (vm_cnt.v_free_target); +} + +u_int +kmem_free_min(void) +{ + + return (vm_cnt.v_free_min); +} + +u_int +kmem_free_count(void) +{ + + return (vm_cnt.v_free_count); +} + +u_int +kmem_page_count(void) +{ + + return (vm_cnt.v_page_count); +} + uint64_t kmem_size(void) { @@ -133,13 +169,6 @@ kmem_size(void) return (kmem_size_val); } -uint64_t -kmem_used(void) -{ - - return (vmem_size(kmem_arena, VMEM_ALLOC)); -} - static int kmem_std_constructor(void *mem, int size __unused, void *private, int flags) { diff --git a/sys/cddl/compat/opensolaris/sys/kmem.h b/sys/cddl/compat/opensolaris/sys/kmem.h index ee6b33f7a982..af6cec52cf91 100644 --- a/sys/cddl/compat/opensolaris/sys/kmem.h +++ b/sys/cddl/compat/opensolaris/sys/kmem.h @@ -66,7 +66,16 @@ typedef struct kmem_cache { void *zfs_kmem_alloc(size_t size, int kmflags); void zfs_kmem_free(void *buf, size_t size); uint64_t kmem_size(void); -uint64_t kmem_used(void); +u_int kmem_page_count(void); + +/* + * The return values from kmem_free_* are only valid once the pagedaemon + * has been initialised, before then they return 0. + */ +u_int kmem_free_count(void); +u_int kmem_free_target(void); +u_int kmem_free_min(void); + kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align, int (*constructor)(void *, void *, int), void (*destructor)(void *, void *), void (*reclaim)(void *) __unused, void *private, vmem_t *vmp, int cflags); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c index cda427a0dab7..1d97718c70bf 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/arc.c @@ -193,9 +193,6 @@ extern int zfs_prefetch_disable; */ static boolean_t arc_warm; -/* - * These tunables are for performance analysis. - */ uint64_t zfs_arc_max; uint64_t zfs_arc_min; uint64_t zfs_arc_meta_limit = 0; @@ -204,6 +201,20 @@ int zfs_arc_shrink_shift = 0; int zfs_arc_p_min_shift = 0; int zfs_disable_dup_eviction = 0; uint64_t zfs_arc_average_blocksize = 8 * 1024; /* 8KB */ +u_int zfs_arc_free_target = (1 << 19); /* default before pagedaemon init only */ + +static int sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS); + +#ifdef _KERNEL +static void +arc_free_target_init(void *unused __unused) +{ + + zfs_arc_free_target = kmem_free_target(); +} +SYSINIT(arc_free_target_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_ANY, + arc_free_target_init, NULL); +#endif TUNABLE_QUAD("vfs.zfs.arc_meta_limit", &zfs_arc_meta_limit); SYSCTL_DECL(_vfs_zfs); @@ -214,6 +225,35 @@ SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_min, CTLFLAG_RDTUN, &zfs_arc_min, 0, SYSCTL_UQUAD(_vfs_zfs, OID_AUTO, arc_average_blocksize, CTLFLAG_RDTUN, &zfs_arc_average_blocksize, 0, "ARC average blocksize"); +/* + * We don't have a tunable for arc_free_target due to the dependency on + * pagedaemon initialisation. + */ +SYSCTL_PROC(_vfs_zfs, OID_AUTO, arc_free_target, + CTLTYPE_UINT | CTLFLAG_MPSAFE | CTLFLAG_RW, 0, sizeof(u_int), + sysctl_vfs_zfs_arc_free_target, "IU", + "Desired number of free pages below which ARC triggers reclaim"); + +static int +sysctl_vfs_zfs_arc_free_target(SYSCTL_HANDLER_ARGS) +{ + u_int val; + int err; + + val = zfs_arc_free_target; + err = sysctl_handle_int(oidp, &val, 0, req); + if (err != 0 || req->newptr == NULL) + return (err); + + if (val < kmem_free_min()) + return (EINVAL); + if (val > kmem_page_count()) + return (EINVAL); + + zfs_arc_free_target = val; + + return (0); +} /* * Note that buffers can be in one of 6 states: @@ -2418,9 +2458,12 @@ arc_flush(spa_t *spa) void arc_shrink(void) { + if (arc_c > arc_c_min) { uint64_t to_free; + DTRACE_PROBE2(arc__shrink, uint64_t, arc_c, uint64_t, + arc_c_min); #ifdef _KERNEL to_free = arc_c >> arc_shrink_shift; #else @@ -2440,8 +2483,11 @@ arc_shrink(void) ASSERT((int64_t)arc_p >= 0); } - if (arc_size > arc_c) + if (arc_size > arc_c) { + DTRACE_PROBE2(arc__shrink_adjust, uint64_t, arc_size, + uint64_t, arc_c); arc_adjust(); + } } static int needfree = 0; @@ -2452,15 +2498,25 @@ arc_reclaim_needed(void) #ifdef _KERNEL - if (needfree) + if (needfree) { + DTRACE_PROBE(arc__reclaim_needfree); return (1); + } + + if (kmem_free_count() < zfs_arc_free_target) { + DTRACE_PROBE2(arc__reclaim_freetarget, uint64_t, + kmem_free_count(), uint64_t, zfs_arc_free_target); + return (1); + } /* * Cooperate with pagedaemon when it's time for it to scan * and reclaim some pages. */ - if (vm_paging_needed()) + if (vm_paging_needed()) { + DTRACE_PROBE(arc__reclaim_paging); return (1); + } #ifdef sun /* @@ -2504,15 +2560,14 @@ arc_reclaim_needed(void) (btop(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC)) >> 2)) return (1); #endif -#else /* !sun */ - if (kmem_used() > (kmem_size() * 3) / 4) - return (1); #endif /* sun */ #else if (spa_get_random(100) == 0) return (1); #endif + DTRACE_PROBE(arc__reclaim_no); + return (0); } diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 760865c7fe56..9835d8d60c58 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -115,10 +115,14 @@ __FBSDID("$FreeBSD$"); /* the kernel process "vm_pageout"*/ static void vm_pageout(void); +static void vm_pageout_init(void); static int vm_pageout_clean(vm_page_t); static void vm_pageout_scan(struct vm_domain *vmd, int pass); static void vm_pageout_mightbe_oom(struct vm_domain *vmd, int pass); +SYSINIT(pagedaemon_init, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, vm_pageout_init, + NULL); + struct proc *pageproc; static struct kproc_desc page_kp = { @@ -126,7 +130,7 @@ static struct kproc_desc page_kp = { vm_pageout, &pageproc }; -SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_FIRST, kproc_start, +SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start, &page_kp); #if !defined(NO_SWAPPING) @@ -1640,15 +1644,11 @@ vm_pageout_worker(void *arg) } /* - * vm_pageout is the high level pageout daemon. + * vm_pageout_init initialises basic pageout daemon settings. */ static void -vm_pageout(void) +vm_pageout_init(void) { -#if MAXMEMDOM > 1 - int error, i; -#endif - /* * Initialize some paging parameters. */ @@ -1694,6 +1694,17 @@ vm_pageout(void) /* XXX does not really belong here */ if (vm_page_max_wired == 0) vm_page_max_wired = vm_cnt.v_free_count / 3; +} + +/* + * vm_pageout is the high level pageout daemon. + */ +static void +vm_pageout(void) +{ +#if MAXMEMDOM > 1 + int error, i; +#endif swap_pager_swap_init(); #if MAXMEMDOM > 1