4754 io issued to near-full luns even after setting noalloc threshold

4755 mg_alloc_failures is no longer needed

illumos/illumos@b6240e830b
This commit is contained in:
Xin LI 2014-04-18 19:43:02 +00:00
parent 5e7884abcf
commit 03a3382389
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/vendor-sys/illumos/dist/; revision=264668
3 changed files with 7 additions and 51 deletions

View File

@ -38,7 +38,7 @@
* avoid having to load lots of space_maps in a given txg. There are, * avoid having to load lots of space_maps in a given txg. There are,
* however, some cases where we want to avoid "fast" ganging and instead * however, some cases where we want to avoid "fast" ganging and instead
* we want to do an exhaustive search of all metaslabs on this device. * we want to do an exhaustive search of all metaslabs on this device.
* Currently we don't allow any gang, zil, or dump device related allocations * Currently we don't allow any gang, slog, or dump device related allocations
* to "fast" gang. * to "fast" gang.
*/ */
#define CAN_FASTGANG(flags) \ #define CAN_FASTGANG(flags) \
@ -61,14 +61,6 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
*/ */
int zfs_condense_pct = 200; int zfs_condense_pct = 200;
/*
* This value defines the number of allowed allocation failures per vdev.
* If a device reaches this threshold in a given txg then we consider skipping
* allocations on that device. The value of zfs_mg_alloc_failures is computed
* in zio_init() unless it has been overridden in /etc/system.
*/
int zfs_mg_alloc_failures = 0;
/* /*
* The zfs_mg_noalloc_threshold defines which metaslab groups should * The zfs_mg_noalloc_threshold defines which metaslab groups should
* be eligible for allocation. The value is defined as a percentage of * be eligible for allocation. The value is defined as a percentage of
@ -1611,10 +1603,7 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
void void
metaslab_sync_reassess(metaslab_group_t *mg) metaslab_sync_reassess(metaslab_group_t *mg)
{ {
int64_t failures = mg->mg_alloc_failures;
metaslab_group_alloc_update(mg); metaslab_group_alloc_update(mg);
atomic_add_64(&mg->mg_alloc_failures, -failures);
/* /*
* Preload the next potential metaslabs * Preload the next potential metaslabs
@ -1641,7 +1630,7 @@ metaslab_distance(metaslab_t *msp, dva_t *dva)
static uint64_t static uint64_t
metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize, metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
uint64_t txg, uint64_t min_distance, dva_t *dva, int d, int flags) uint64_t txg, uint64_t min_distance, dva_t *dva, int d)
{ {
spa_t *spa = mg->mg_vd->vdev_spa; spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_t *msp = NULL; metaslab_t *msp = NULL;
@ -1668,10 +1657,9 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
spa_dbgmsg(spa, "%s: failed to meet weight " spa_dbgmsg(spa, "%s: failed to meet weight "
"requirement: vdev %llu, txg %llu, mg %p, " "requirement: vdev %llu, txg %llu, mg %p, "
"msp %p, psize %llu, asize %llu, " "msp %p, psize %llu, asize %llu, "
"failures %llu, weight %llu", "weight %llu", spa_name(spa),
spa_name(spa), mg->mg_vd->vdev_id, txg, mg->mg_vd->vdev_id, txg,
mg, msp, psize, asize, mg, msp, psize, asize, msp->ms_weight);
mg->mg_alloc_failures, msp->ms_weight);
mutex_exit(&mg->mg_lock); mutex_exit(&mg->mg_lock);
return (-1ULL); return (-1ULL);
} }
@ -1703,27 +1691,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
mutex_enter(&msp->ms_lock); mutex_enter(&msp->ms_lock);
/*
* If we've already reached the allowable number of failed
* allocation attempts on this metaslab group then we
* consider skipping it. We skip it only if we're allowed
* to "fast" gang, the physical size is larger than
* a gang block, and we're attempting to allocate from
* the primary metaslab.
*/
if (mg->mg_alloc_failures > zfs_mg_alloc_failures &&
CAN_FASTGANG(flags) && psize > SPA_GANGBLOCKSIZE &&
activation_weight == METASLAB_WEIGHT_PRIMARY) {
spa_dbgmsg(spa, "%s: skipping metaslab group: "
"vdev %llu, txg %llu, mg %p, msp[%llu] %p, "
"psize %llu, asize %llu, failures %llu",
spa_name(spa), mg->mg_vd->vdev_id, txg, mg,
msp->ms_id, msp, psize, asize,
mg->mg_alloc_failures);
mutex_exit(&msp->ms_lock);
return (-1ULL);
}
/* /*
* Ensure that the metaslab we have selected is still * Ensure that the metaslab we have selected is still
* capable of handling our request. It's possible that * capable of handling our request. It's possible that
@ -1763,8 +1730,6 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t psize, uint64_t asize,
if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL) if ((offset = metaslab_block_alloc(msp, asize)) != -1ULL)
break; break;
atomic_inc_64(&mg->mg_alloc_failures);
metaslab_passivate(msp, metaslab_block_maxsize(msp)); metaslab_passivate(msp, metaslab_block_maxsize(msp));
mutex_exit(&msp->ms_lock); mutex_exit(&msp->ms_lock);
} }
@ -1919,7 +1884,7 @@ metaslab_alloc_dva(spa_t *spa, metaslab_class_t *mc, uint64_t psize,
ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0); ASSERT(P2PHASE(asize, 1ULL << vd->vdev_ashift) == 0);
offset = metaslab_group_alloc(mg, psize, asize, txg, distance, offset = metaslab_group_alloc(mg, psize, asize, txg, distance,
dva, d, flags); dva, d);
if (offset != -1ULL) { if (offset != -1ULL) {
/* /*
* If we've just selected this metaslab group, * If we've just selected this metaslab group,

View File

@ -24,7 +24,7 @@
*/ */
/* /*
* Copyright (c) 2013 by Delphix. All rights reserved. * Copyright (c) 2011, 2014 by Delphix. All rights reserved.
*/ */
#ifndef _SYS_METASLAB_IMPL_H #ifndef _SYS_METASLAB_IMPL_H
@ -57,7 +57,6 @@ struct metaslab_group {
kmutex_t mg_lock; kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree; avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot; uint64_t mg_aliquot;
uint64_t mg_alloc_failures;
boolean_t mg_allocatable; /* can we allocate? */ boolean_t mg_allocatable; /* can we allocate? */
uint64_t mg_free_capacity; /* percentage free */ uint64_t mg_free_capacity; /* percentage free */
int64_t mg_bias; int64_t mg_bias;

View File

@ -61,7 +61,6 @@ kmem_cache_t *zio_data_buf_cache[SPA_MAXBLOCKSIZE >> SPA_MINBLOCKSHIFT];
#ifdef _KERNEL #ifdef _KERNEL
extern vmem_t *zio_alloc_arena; extern vmem_t *zio_alloc_arena;
#endif #endif
extern int zfs_mg_alloc_failures;
/* /*
* The following actions directly effect the spa's sync-to-convergence logic. * The following actions directly effect the spa's sync-to-convergence logic.
@ -168,13 +167,6 @@ zio_init(void)
zio_data_buf_cache[c - 1] = zio_data_buf_cache[c]; zio_data_buf_cache[c - 1] = zio_data_buf_cache[c];
} }
/*
* The zio write taskqs have 1 thread per cpu, allow 1/2 of the taskqs
* to fail 3 times per txg or 8 failures, whichever is greater.
*/
if (zfs_mg_alloc_failures == 0)
zfs_mg_alloc_failures = MAX((3 * max_ncpus / 2), 8);
zio_inject_init(); zio_inject_init();
} }