7303 dynamic metaslab selection
illumos/illumos-gate@8363e80ae7 https://github.com/illumos/illumos-gate/commit/8363e80ae72609660f6090766ca8c2c18aa53f0 https://www.illumos.org/issues/7303 This change introduces a new weighting algorithm to improve metaslab selection. The new weighting algorithm relies on the SPACEMAP_HISTOGRAM feature. As a result, the metaslab weight now encodes the type of weighting algorithm used (size-based vs segment-based). This also introduce a new allocation tracing facility and two new dcmds to help debug allocation problems. Each zio now contains a zio_alloc_list_t structure that is populated as the zio goes through the allocations stage. Here's an example of how to use the tracing facility: > c5ec000::print zio_t io_alloc_list | ::walk list | ::metaslab_trace MSID DVA ASIZE WEIGHT RESULT VDEV - 0 400 0 NOT_ALLOCATABLE ztest.0a - 0 400 0 NOT_ALLOCATABLE ztest.0a - 0 400 0 ENOSPC ztest.0a - 0 200 0 NOT_ALLOCATABLE ztest.0a - 0 200 0 NOT_ALLOCATABLE ztest.0a - 0 200 0 ENOSPC ztest.0a 1 0 400 1 x 8M 17b1a00 ztest.0a > 1ff2400::print zio_t io_alloc_list | ::walk list | ::metaslab_trace MSID DVA ASIZE WEIGHT RESULT VDEV - 0 200 0 NOT_ALLOCATABLE mirror-2 - 0 200 0 NOT_ALLOCATABLE mirror-0 1 0 200 1 x 4M 112ae00 mirror-1 - 1 200 0 NOT_ALLOCATABLE mirror-2 - 1 200 0 NOT_ALLOCATABLE mirror-0 1 1 200 1 x 4M 112b000 mirror-1 - 2 200 0 NOT_ALLOCATABLE mirror-2 If the metaslab is using segment-based weighting then the WEIGHT column will display the number of segments available in the bucket where the allocation attempt was made. Author: George Wilson <george.wilson@delphix.com> Reviewed by: Alex Reece <alex@delphix.com> Reviewed by: Chris Siden <christopher.siden@delphix.com> Reviewed by: Dan Kimmel <dan.kimmel@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Paul Dagnelie <paul.dagnelie@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Don Brady <don.brady@intel.com> Approved by: Richard Lowe <richlowe@richlowe.net>
This commit is contained in:
parent
b060bbc16a
commit
137146f48c
@ -2562,10 +2562,21 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
||||
|
||||
if (!dump_opt['L']) {
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
|
||||
/*
|
||||
* We are going to be changing the meaning of the metaslab's
|
||||
* ms_tree. Ensure that the allocator doesn't try to
|
||||
* use the tree.
|
||||
*/
|
||||
spa->spa_normal_class->mc_ops = &zdb_metaslab_ops;
|
||||
spa->spa_log_class->mc_ops = &zdb_metaslab_ops;
|
||||
|
||||
for (uint64_t c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
metaslab_group_t *mg = vd->vdev_mg;
|
||||
for (uint64_t m = 0; m < vd->vdev_ms_count; m++) {
|
||||
metaslab_t *msp = vd->vdev_ms[m];
|
||||
ASSERT3P(msp->ms_group, ==, mg);
|
||||
mutex_enter(&msp->ms_lock);
|
||||
metaslab_unload(msp);
|
||||
|
||||
@ -2586,8 +2597,6 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
||||
(longlong_t)m,
|
||||
(longlong_t)vd->vdev_ms_count);
|
||||
|
||||
msp->ms_ops = &zdb_metaslab_ops;
|
||||
|
||||
/*
|
||||
* We don't want to spend the CPU
|
||||
* manipulating the size-ordered
|
||||
@ -2597,7 +2606,10 @@ zdb_leak_init(spa_t *spa, zdb_cb_t *zcb)
|
||||
msp->ms_tree->rt_ops = NULL;
|
||||
VERIFY0(space_map_load(msp->ms_sm,
|
||||
msp->ms_tree, SM_ALLOC));
|
||||
msp->ms_loaded = B_TRUE;
|
||||
|
||||
if (!msp->ms_loaded) {
|
||||
msp->ms_loaded = B_TRUE;
|
||||
}
|
||||
}
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
@ -2619,8 +2631,10 @@ zdb_leak_fini(spa_t *spa)
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
for (int c = 0; c < rvd->vdev_children; c++) {
|
||||
vdev_t *vd = rvd->vdev_child[c];
|
||||
metaslab_group_t *mg = vd->vdev_mg;
|
||||
for (int m = 0; m < vd->vdev_ms_count; m++) {
|
||||
metaslab_t *msp = vd->vdev_ms[m];
|
||||
ASSERT3P(mg, ==, msp->ms_group);
|
||||
mutex_enter(&msp->ms_lock);
|
||||
|
||||
/*
|
||||
@ -2634,7 +2648,10 @@ zdb_leak_fini(spa_t *spa)
|
||||
* from the ms_tree.
|
||||
*/
|
||||
range_tree_vacate(msp->ms_tree, zdb_leak, vd);
|
||||
msp->ms_loaded = B_FALSE;
|
||||
|
||||
if (msp->ms_loaded) {
|
||||
msp->ms_loaded = B_FALSE;
|
||||
}
|
||||
|
||||
mutex_exit(&msp->ms_lock);
|
||||
}
|
||||
|
@ -171,7 +171,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
|
||||
.zo_mirrors = 2,
|
||||
.zo_raidz = 4,
|
||||
.zo_raidz_parity = 1,
|
||||
.zo_vdev_size = SPA_MINDEVSIZE * 2,
|
||||
.zo_vdev_size = SPA_MINDEVSIZE * 4, /* 256m default size */
|
||||
.zo_datasets = 7,
|
||||
.zo_threads = 23,
|
||||
.zo_passtime = 60, /* 60 seconds */
|
||||
|
@ -92,6 +92,11 @@ kstat_create(const char *module, int instance, const char *name,
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
kstat_named_init(kstat_named_t *knp, const char *name, uchar_t type)
|
||||
{}
|
||||
|
||||
/*ARGSUSED*/
|
||||
void
|
||||
kstat_install(kstat_t *ksp)
|
||||
|
@ -301,6 +301,7 @@ extern void cv_broadcast(kcondvar_t *cv);
|
||||
*/
|
||||
extern kstat_t *kstat_create(const char *, int,
|
||||
const char *, const char *, uchar_t, ulong_t, uchar_t);
|
||||
extern void kstat_named_init(kstat_named_t *, const char *, uchar_t);
|
||||
extern void kstat_install(kstat_t *);
|
||||
extern void kstat_delete(kstat_t *);
|
||||
extern void kstat_waitq_enter(kstat_io_t *);
|
||||
|
Loading…
x
Reference in New Issue
Block a user