5376 arc_kmem_reap_now() should not result in clearing arc_no_grow

Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Steven Hartland <killing@multiplay.co.uk>
Reviewed by: Richard Elling <richard.elling@richardelling.com>
Approved by: Dan McDonald <danmcd@omniti.com>
Author: Matthew Ahrens <mahrens@delphix.com>

illumos/illumos-gate@2ec99e3e98
This commit is contained in:
Xin LI 2015-01-20 20:04:51 +00:00
parent cce363922b
commit cd60f2acd9

View File

@ -149,13 +149,7 @@ static kmutex_t arc_reclaim_thr_lock;
static kcondvar_t arc_reclaim_thr_cv; /* used to signal reclaim thr */
static uint8_t arc_thread_exit;
#define ARC_REDUCE_DNLC_PERCENT 3
uint_t arc_reduce_dnlc_percent = ARC_REDUCE_DNLC_PERCENT;
typedef enum arc_reclaim_strategy {
ARC_RECLAIM_AGGR, /* Aggressive reclaim strategy */
ARC_RECLAIM_CONS /* Conservative reclaim strategy */
} arc_reclaim_strategy_t;
uint_t arc_reduce_dnlc_percent = 3;
/*
* The number of iterations through arc_evict_*() before we
@ -170,7 +164,19 @@ static int arc_grow_retry = 60;
static int arc_p_min_shift = 4;
/* log2(fraction of arc to reclaim) */
static int arc_shrink_shift = 5;
static int arc_shrink_shift = 7;
/*
* log2(fraction of ARC which must be free to allow growing).
* I.e. If there is less than arc_c >> arc_no_grow_shift free memory,
* when reading a new block into the ARC, we will evict an equal-sized block
* from the ARC.
*
* This must be less than arc_shrink_shift, so that when we shrink the ARC,
* we will still not allow it to grow.
*/
int arc_no_grow_shift = 5;
/*
* minimum lifespan of a prefetch block in clock ticks
@ -2194,16 +2200,10 @@ arc_flush(spa_t *spa)
}
void
arc_shrink(void)
arc_shrink(int64_t to_free)
{
if (arc_c > arc_c_min) {
uint64_t to_free;
#ifdef _KERNEL
to_free = MAX(arc_c >> arc_shrink_shift, ptob(needfree));
#else
to_free = arc_c >> arc_shrink_shift;
#endif
if (arc_c > arc_c_min + to_free)
atomic_add_64(&arc_c, -to_free);
else
@ -2222,25 +2222,49 @@ arc_shrink(void)
arc_adjust();
}
typedef enum free_memory_reason_t {
FMR_UNKNOWN,
FMR_NEEDFREE,
FMR_LOTSFREE,
FMR_SWAPFS_MINFREE,
FMR_PAGES_PP_MAXIMUM,
FMR_HEAP_ARENA,
FMR_ZIO_ARENA,
} free_memory_reason_t;
int64_t last_free_memory;
free_memory_reason_t last_free_reason;
/*
* Determine if the system is under memory pressure and is asking
* to reclaim memory. A return value of 1 indicates that the system
* is under memory pressure and that the arc should adjust accordingly.
* Additional reserve of pages for pp_reserve.
*/
static int
arc_reclaim_needed(void)
int64_t arc_pages_pp_reserve = 64;
/*
* Additional reserve of pages for swapfs.
*/
int64_t arc_swapfs_reserve = 64;
/*
* Return the amount of memory that can be consumed before reclaim will be
* needed. Positive if there is sufficient free memory, negative indicates
* the amount of memory that needs to be freed up.
*/
static int64_t
arc_available_memory(void)
{
uint64_t extra;
int64_t lowest = INT64_MAX;
int64_t n;
free_memory_reason_t r = FMR_UNKNOWN;
#ifdef _KERNEL
if (needfree)
return (1);
/*
* take 'desfree' extra pages, so we reclaim sooner, rather than later
*/
extra = desfree;
if (needfree > 0) {
n = PAGESIZE * (-needfree);
if (n < lowest) {
lowest = n;
r = FMR_NEEDFREE;
}
}
/*
* check that we're out of range of the pageout scanner. It starts to
@ -2249,8 +2273,11 @@ arc_reclaim_needed(void)
* number of needed free pages. We add extra pages here to make sure
* the scanner doesn't start up while we're freeing memory.
*/
if (freemem < lotsfree + needfree + extra)
return (1);
n = PAGESIZE * (freemem - lotsfree - needfree - desfree);
if (n < lowest) {
lowest = n;
r = FMR_LOTSFREE;
}
/*
* check to make sure that swapfs has enough space so that anon
@ -2259,8 +2286,13 @@ arc_reclaim_needed(void)
* swap pages. We also add a bit of extra here just to prevent
* circumstances from getting really dire.
*/
if (availrmem < swapfs_minfree + swapfs_reserve + extra)
return (1);
n = PAGESIZE * (availrmem - swapfs_minfree - swapfs_reserve -
desfree - arc_swapfs_reserve);
if (n < lowest) {
lowest = n;
r = FMR_SWAPFS_MINFREE;
}
/*
* Check that we have enough availrmem that memory locking (e.g., via
@ -2269,8 +2301,12 @@ arc_reclaim_needed(void)
* drops below pages_pp_maximum, page locking mechanisms such as
* page_pp_lock() will fail.)
*/
if (availrmem <= pages_pp_maximum)
return (1);
n = PAGESIZE * (availrmem - pages_pp_maximum -
arc_pages_pp_reserve);
if (n < lowest) {
lowest = n;
r = FMR_PAGES_PP_MAXIMUM;
}
#if defined(__i386)
/*
@ -2284,9 +2320,12 @@ arc_reclaim_needed(void)
* heap is allocated. (Or, in the calculation, if less than 1/4th is
* free)
*/
if (vmem_size(heap_arena, VMEM_FREE) <
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2))
return (1);
n = vmem_size(heap_arena, VMEM_FREE) -
(vmem_size(heap_arena, VMEM_FREE | VMEM_ALLOC) >> 2);
if (n < lowest) {
lowest = n;
r = FMR_HEAP_ARENA;
}
#endif
/*
@ -2298,19 +2337,40 @@ arc_reclaim_needed(void)
* to aggressively evict memory from the arc in order to avoid
* memory fragmentation issues.
*/
if (zio_arena != NULL &&
vmem_size(zio_arena, VMEM_FREE) <
(vmem_size(zio_arena, VMEM_ALLOC) >> 4))
return (1);
if (zio_arena != NULL) {
n = vmem_size(zio_arena, VMEM_FREE) -
(vmem_size(zio_arena, VMEM_ALLOC) >> 4);
if (n < lowest) {
lowest = n;
r = FMR_ZIO_ARENA;
}
}
#else
/* Every 100 calls, free a small amount */
if (spa_get_random(100) == 0)
return (1);
lowest = -1024;
#endif
return (0);
last_free_memory = lowest;
last_free_reason = r;
return (lowest);
}
/*
* Determine if the system is under memory pressure and is asking
* to reclaim memory. A return value of TRUE indicates that the system
* is under memory pressure and that the arc should adjust accordingly.
*/
static boolean_t
arc_reclaim_needed(void)
{
return (arc_available_memory() < 0);
}
static void
arc_kmem_reap_now(arc_reclaim_strategy_t strat)
arc_kmem_reap_now(void)
{
size_t i;
kmem_cache_t *prev_cache = NULL;
@ -2335,13 +2395,6 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
#endif
#endif
/*
* An aggressive reclamation will shrink the cache size as well as
* reap free buffers from the arc kmem caches.
*/
if (strat == ARC_RECLAIM_AGGR)
arc_shrink();
for (i = 0; i < SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT; i++) {
if (zio_buf_cache[i] != prev_cache) {
prev_cache = zio_buf_cache[i];
@ -2356,47 +2409,57 @@ arc_kmem_reap_now(arc_reclaim_strategy_t strat)
kmem_cache_reap_now(hdr_cache);
kmem_cache_reap_now(range_seg_cache);
/*
* Ask the vmem areana to reclaim unused memory from its
* quantum caches.
*/
if (zio_arena != NULL && strat == ARC_RECLAIM_AGGR)
if (zio_arena != NULL) {
/*
* Ask the vmem arena to reclaim unused memory from its
* quantum caches.
*/
vmem_qcache_reap(zio_arena);
}
}
static void
arc_reclaim_thread(void)
{
clock_t growtime = 0;
arc_reclaim_strategy_t last_reclaim = ARC_RECLAIM_CONS;
callb_cpr_t cpr;
CALLB_CPR_INIT(&cpr, &arc_reclaim_thr_lock, callb_generic_cpr, FTAG);
mutex_enter(&arc_reclaim_thr_lock);
while (arc_thread_exit == 0) {
if (arc_reclaim_needed()) {
int64_t free_memory = arc_available_memory();
if (free_memory < 0) {
if (arc_no_grow) {
if (last_reclaim == ARC_RECLAIM_CONS) {
last_reclaim = ARC_RECLAIM_AGGR;
} else {
last_reclaim = ARC_RECLAIM_CONS;
}
} else {
arc_no_grow = TRUE;
last_reclaim = ARC_RECLAIM_AGGR;
membar_producer();
}
/* reset the growth delay for every reclaim */
growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
arc_kmem_reap_now(last_reclaim);
arc_no_grow = B_TRUE;
arc_warm = B_TRUE;
} else if (arc_no_grow && ddi_get_lbolt() >= growtime) {
arc_no_grow = FALSE;
/*
* Wait at least zfs_grow_retry (default 60) seconds
* before considering growing.
*/
growtime = ddi_get_lbolt() + (arc_grow_retry * hz);
arc_kmem_reap_now();
/*
* If we are still low on memory, shrink the ARC
* so that we have arc_shrink_min free space.
*/
free_memory = arc_available_memory();
int64_t to_free =
(arc_c >> arc_shrink_shift) - free_memory;
if (to_free > 0) {
#ifdef _KERNEL
to_free = MAX(to_free, ptob(needfree));
#endif
arc_shrink(to_free);
}
} else if (free_memory < arc_c >> arc_no_grow_shift) {
arc_no_grow = B_TRUE;
} else if (ddi_get_lbolt() >= growtime) {
arc_no_grow = B_FALSE;
}
arc_adjust();
@ -3739,6 +3802,15 @@ arc_tempreserve_space(uint64_t reserve, uint64_t txg)
void
arc_init(void)
{
/*
* allmem is "all memory that we could possibly use".
*/
#ifdef _KERNEL
uint64_t allmem = ptob(physmem - swapfs_minfree);
#else
uint64_t allmem = (physmem * PAGESIZE) / 2;
#endif
mutex_init(&arc_reclaim_thr_lock, NULL, MUTEX_DEFAULT, NULL);
cv_init(&arc_reclaim_thr_cv, NULL, CV_DEFAULT, NULL);
@ -3746,7 +3818,7 @@ arc_init(void)
arc_min_prefetch_lifespan = 1 * hz;
/* Start out with 1/8 of all memory */
arc_c = physmem * PAGESIZE / 8;
arc_c = allmem / 8;
#ifdef _KERNEL
/*
@ -3758,21 +3830,21 @@ arc_init(void)
#endif
/* set min cache to 1/32 of all memory, or 64MB, whichever is more */
arc_c_min = MAX(arc_c / 4, 64<<20);
arc_c_min = MAX(allmem / 32, 64 << 20);
/* set max to 3/4 of all memory, or all but 1GB, whichever is more */
if (arc_c * 8 >= 1<<30)
arc_c_max = (arc_c * 8) - (1<<30);
if (allmem >= 1 << 30)
arc_c_max = allmem - (1 << 30);
else
arc_c_max = arc_c_min;
arc_c_max = MAX(arc_c * 6, arc_c_max);
arc_c_max = MAX(allmem * 3 / 4, arc_c_max);
/*
* Allow the tunables to override our calculations if they are
* reasonable (ie. over 64MB)
*/
if (zfs_arc_max > 64<<20 && zfs_arc_max < physmem * PAGESIZE)
if (zfs_arc_max > 64 << 20 && zfs_arc_max < allmem)
arc_c_max = zfs_arc_max;
if (zfs_arc_min > 64<<20 && zfs_arc_min <= arc_c_max)
if (zfs_arc_min > 64 << 20 && zfs_arc_min <= arc_c_max)
arc_c_min = zfs_arc_min;
arc_c = arc_c_max;
@ -3800,6 +3872,12 @@ arc_init(void)
if (zfs_arc_shrink_shift > 0)
arc_shrink_shift = zfs_arc_shrink_shift;
/*
* Ensure that arc_no_grow_shift is less than arc_shrink_shift.
*/
if (arc_no_grow_shift >= arc_shrink_shift)
arc_no_grow_shift = arc_shrink_shift - 1;
if (zfs_arc_p_min_shift > 0)
arc_p_min_shift = zfs_arc_p_min_shift;