9018 Replace kmem_cache_reap_now() with kmem_cache_reap_soon()

illumos/illumos-gate@36a64e6284

To prevent kmem_cache reaping from blocking other system resources, turn
kmem_cache_reap_now() (which blocks) into kmem_cache_reap_soon(). Callers
to kmem_cache_reap_soon() should use kmem_cache_reap_active(), which
exploits #9017's new taskq_empty().

Reviewed by: Bryan Cantrill <bryan@joyent.com>
Reviewed by: Dan McDonald <danmcd@joyent.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Yuri Pankov <yuripv@yuripv.net>
Author: Tim Kordas <tim.kordas@joyent.com>

FreeBSD does not use taskqueue for kmem caches reaping, so this change
is less dramatic then it is on Illumos, just limiting reaping to 1 time
per second.  It may possibly be improved later, if needed.
This commit is contained in:
Alexander Motin 2018-02-21 23:15:06 +00:00
commit b17bfcde3d
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=329759
4 changed files with 63 additions and 15 deletions

View File

@ -364,7 +364,8 @@ extern void cv_broadcast(kcondvar_t *cv);
#define kmem_cache_alloc(_c, _f) umem_cache_alloc(_c, _f)
#define kmem_cache_free(_c, _b) umem_cache_free(_c, _b)
#define kmem_debugging() 0
#define kmem_cache_reap_now(_c) /* nothing */
#define kmem_cache_reap_active() (B_FALSE)
#define kmem_cache_reap_soon(_c) /* nothing */
#define kmem_cache_set_move(_c, _cb) /* nothing */
#define POINTER_INVALIDATE(_pp) /* nothing */
#define POINTER_IS_VALID(_p) 0

View File

@ -212,9 +212,30 @@ kmem_cache_free(kmem_cache_t *cache, void *buf)
#endif
}
/*
* Allow our caller to determine if there are running reaps.
*
* This call is very conservative and may return B_TRUE even when
* reaping activity isn't active. If it returns B_FALSE, then reaping
* activity is definitely inactive.
*/
boolean_t
kmem_cache_reap_active(void)
{
return (B_FALSE);
}
/*
* Reap (almost) everything soon.
*
* Note: this does not wait for the reap-tasks to complete. Caller
* should use kmem_cache_reap_active() (above) and/or moderation to
* avoid scheduling too many reap-tasks.
*/
#ifdef _KERNEL
void
kmem_cache_reap_now(kmem_cache_t *cache)
kmem_cache_reap_soon(kmem_cache_t *cache)
{
#ifndef KMEM_DEBUG
zone_drain(cache->kc_zone);
@ -228,7 +249,7 @@ kmem_reap(void)
}
#else
void
kmem_cache_reap_now(kmem_cache_t *cache __unused)
kmem_cache_reap_soon(kmem_cache_t *cache __unused)
{
}

View File

@ -73,7 +73,8 @@ kmem_cache_t *kmem_cache_create(char *name, size_t bufsize, size_t align,
void kmem_cache_destroy(kmem_cache_t *cache);
void *kmem_cache_alloc(kmem_cache_t *cache, int flags);
void kmem_cache_free(kmem_cache_t *cache, void *buf);
void kmem_cache_reap_now(kmem_cache_t *cache);
boolean_t kmem_cache_reap_active(void);
void kmem_cache_reap_soon(kmem_cache_t *);
void kmem_reap(void);
int kmem_debugging(void);
void *calloc(size_t n, size_t s);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
* Copyright (c) 2018, Joyent, Inc.
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
* Copyright (c) 2014 by Saso Kiselkov. All rights reserved.
* Copyright 2017 Nexenta Systems, Inc. All rights reserved.
@ -309,6 +309,9 @@ int zfs_arc_evict_batch_limit = 10;
/* number of seconds before growing cache again */
static int arc_grow_retry = 60;
/* number of milliseconds before attempting a kmem-cache-reap */
static int arc_kmem_cache_reap_retry_ms = 1000;
/* shift of arc_c for calculating overflow limit in arc_get_data_impl */
int zfs_arc_overflow_shift = 8;
@ -4395,21 +4398,31 @@ arc_kmem_reap_now(void)
#endif
#endif
/*
* If a kmem reap is already active, don't schedule more. We must
* check for this because kmem_cache_reap_soon() won't actually
* block on the cache being reaped (this is to prevent callers from
* becoming implicitly blocked by a system-wide kmem reap -- which,
* on a system with many, many full magazines, can take minutes).
*/
if (kmem_cache_reap_active())
return;
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
kmem_cache_reap_now(zio_buf_cache[i]);
kmem_cache_reap_soon(zio_buf_cache[i]);
}
if (zio_data_buf_cache[i] != prev_data_cache) {
prev_data_cache = zio_data_buf_cache[i];
kmem_cache_reap_now(zio_data_buf_cache[i]);
kmem_cache_reap_soon(zio_data_buf_cache[i]);
}
}
kmem_cache_reap_now(abd_chunk_cache);
kmem_cache_reap_now(buf_cache);
kmem_cache_reap_now(hdr_full_cache);
kmem_cache_reap_now(hdr_l2only_cache);
kmem_cache_reap_now(range_seg_cache);
kmem_cache_reap_soon(abd_chunk_cache);
kmem_cache_reap_soon(buf_cache);
kmem_cache_reap_soon(hdr_full_cache);
kmem_cache_reap_soon(hdr_l2only_cache);
kmem_cache_reap_soon(range_seg_cache);
#ifdef illumos
if (zio_arena != NULL) {
@ -4444,6 +4457,7 @@ static void
arc_reclaim_thread(void *unused __unused)
{
hrtime_t growtime = 0;
hrtime_t kmem_reap_time = 0;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_lock, callb_generic_cpr, FTAG);
@ -4477,7 +4491,7 @@ arc_reclaim_thread(void *unused __unused)
int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
hrtime_t curtime = gethrtime();
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
@ -4485,9 +4499,20 @@ arc_reclaim_thread(void *unused __unused)
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
*/
growtime = gethrtime() + SEC2NSEC(arc_grow_retry);
growtime = curtime + SEC2NSEC(arc_grow_retry);
arc_kmem_reap_now();
/*
* Wait at least arc_kmem_cache_reap_retry_ms
* between arc_kmem_reap_now() calls. Without
* this check it is possible to end up in a
* situation where we spend lots of time
* reaping caches, while we're near arc_c_min.
*/
if (curtime >= kmem_reap_time) {
arc_kmem_reap_now();
kmem_reap_time = gethrtime() +
MSEC2NSEC(arc_kmem_cache_reap_retry_ms);
}
/*
* If we are still low on memory, shrink the ARC