MFV r242735:
Illumos 13879:4eac7a87eff2: 3329 spa_sync() spends 10-20% of its time in spa_free_sync_cb() 3330 space_seg_t should have its own kmem_cache 3331 deferred frees should happen after sync_pass 1 3335 make SYNC_PASS_* constants tunable New loader-only tunables: vfs.zfs.sync_pass_deferred_free vfs.zfs.sync_pass_dont_compress vfs.zfs.sync_pass_rewrite References: https://www.illumos.org/issues/3329 https://www.illumos.org/issues/3330 https://www.illumos.org/issues/3331 https://www.illumos.org/issues/3335 MFC after: 2 weeks
This commit is contained in:
commit
2b8d4033cc
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=243503
@ -881,8 +881,9 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight)
|
||||
if ((msp->ms_weight & METASLAB_ACTIVE_MASK) == 0) {
|
||||
space_map_load_wait(sm);
|
||||
if (!sm->sm_loaded) {
|
||||
int error = space_map_load(sm, sm_ops, SM_FREE,
|
||||
&msp->ms_smo,
|
||||
space_map_obj_t *smo = &msp->ms_smo;
|
||||
|
||||
int error = space_map_load(sm, sm_ops, SM_FREE, smo,
|
||||
spa_meta_objset(msp->ms_group->mg_vd->vdev_spa));
|
||||
if (error) {
|
||||
metaslab_group_sort(msp->ms_group, msp, 0);
|
||||
|
@ -139,6 +139,7 @@ boolean_t zio_taskq_sysdc = B_TRUE; /* use SDC scheduling class */
|
||||
uint_t zio_taskq_basedc = 80; /* base duty cycle */
|
||||
|
||||
boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */
|
||||
extern int zfs_sync_pass_deferred_free;
|
||||
|
||||
/*
|
||||
* This (illegal) pool name is used when temporarily importing a spa_t in order
|
||||
@ -6302,7 +6303,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
spa_errlog_sync(spa, txg);
|
||||
dsl_pool_sync(dp, txg);
|
||||
|
||||
if (pass <= SYNC_PASS_DEFERRED_FREE) {
|
||||
if (pass < zfs_sync_pass_deferred_free) {
|
||||
zio_t *zio = zio_root(spa, NULL, NULL, 0);
|
||||
bplist_iterate(free_bpl, spa_free_sync_cb,
|
||||
zio, tx);
|
||||
|
@ -1619,6 +1619,7 @@ spa_init(int mode)
|
||||
#endif /* illumos */
|
||||
refcount_sysinit();
|
||||
unique_init();
|
||||
space_map_init();
|
||||
zio_init();
|
||||
dmu_init();
|
||||
zil_init();
|
||||
@ -1641,6 +1642,7 @@ spa_fini(void)
|
||||
zil_fini();
|
||||
dmu_fini();
|
||||
zio_fini();
|
||||
space_map_fini();
|
||||
unique_fini();
|
||||
refcount_fini();
|
||||
|
||||
|
@ -39,6 +39,23 @@ SYSCTL_INT(_vfs_zfs, OID_AUTO, space_map_last_hope, CTLFLAG_RDTUN,
|
||||
&space_map_last_hope, 0,
|
||||
"If kernel panic in space_map code on pool import, import the pool in readonly mode and backup all your data before trying this option.");
|
||||
|
||||
static kmem_cache_t *space_seg_cache;
|
||||
|
||||
void
|
||||
space_map_init(void)
|
||||
{
|
||||
ASSERT(space_seg_cache == NULL);
|
||||
space_seg_cache = kmem_cache_create("space_seg_cache",
|
||||
sizeof (space_seg_t), 0, NULL, NULL, NULL, NULL, NULL, 0);
|
||||
}
|
||||
|
||||
void
|
||||
space_map_fini(void)
|
||||
{
|
||||
kmem_cache_destroy(space_seg_cache);
|
||||
space_seg_cache = NULL;
|
||||
}
|
||||
|
||||
/*
|
||||
* Space map routines.
|
||||
* NOTE: caller is responsible for all locking.
|
||||
@ -148,7 +165,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
avl_remove(sm->sm_pp_root, ss_after);
|
||||
}
|
||||
ss_after->ss_start = ss_before->ss_start;
|
||||
kmem_free(ss_before, sizeof (*ss_before));
|
||||
kmem_cache_free(space_seg_cache, ss_before);
|
||||
ss = ss_after;
|
||||
} else if (merge_before) {
|
||||
ss_before->ss_end = end;
|
||||
@ -161,7 +178,7 @@ space_map_add(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
avl_remove(sm->sm_pp_root, ss_after);
|
||||
ss = ss_after;
|
||||
} else {
|
||||
ss = kmem_alloc(sizeof (*ss), KM_SLEEP);
|
||||
ss = kmem_cache_alloc(space_seg_cache, KM_SLEEP);
|
||||
ss->ss_start = start;
|
||||
ss->ss_end = end;
|
||||
avl_insert(&sm->sm_root, ss, where);
|
||||
@ -207,7 +224,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
avl_remove(sm->sm_pp_root, ss);
|
||||
|
||||
if (left_over && right_over) {
|
||||
newseg = kmem_alloc(sizeof (*newseg), KM_SLEEP);
|
||||
newseg = kmem_cache_alloc(space_seg_cache, KM_SLEEP);
|
||||
newseg->ss_start = end;
|
||||
newseg->ss_end = ss->ss_end;
|
||||
ss->ss_end = start;
|
||||
@ -220,7 +237,7 @@ space_map_remove(space_map_t *sm, uint64_t start, uint64_t size)
|
||||
ss->ss_start = end;
|
||||
} else {
|
||||
avl_remove(&sm->sm_root, ss);
|
||||
kmem_free(ss, sizeof (*ss));
|
||||
kmem_cache_free(space_seg_cache, ss);
|
||||
ss = NULL;
|
||||
}
|
||||
|
||||
@ -260,7 +277,7 @@ space_map_vacate(space_map_t *sm, space_map_func_t *func, space_map_t *mdest)
|
||||
while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
|
||||
if (func != NULL)
|
||||
func(mdest, ss->ss_start, ss->ss_end - ss->ss_start);
|
||||
kmem_free(ss, sizeof (*ss));
|
||||
kmem_cache_free(space_seg_cache, ss);
|
||||
}
|
||||
sm->sm_space = 0;
|
||||
}
|
||||
@ -432,7 +449,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
spa_t *spa = dmu_objset_spa(os);
|
||||
void *cookie = NULL;
|
||||
space_seg_t *ss;
|
||||
uint64_t bufsize, start, size, run_len;
|
||||
uint64_t bufsize, start, size, run_len, delta, sm_space;
|
||||
uint64_t *entry, *entry_map, *entry_map_end;
|
||||
|
||||
ASSERT(MUTEX_HELD(sm->sm_lock));
|
||||
@ -461,11 +478,13 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
SM_DEBUG_SYNCPASS_ENCODE(spa_sync_pass(spa)) |
|
||||
SM_DEBUG_TXG_ENCODE(dmu_tx_get_txg(tx));
|
||||
|
||||
delta = 0;
|
||||
sm_space = sm->sm_space;
|
||||
while ((ss = avl_destroy_nodes(&sm->sm_root, &cookie)) != NULL) {
|
||||
size = ss->ss_end - ss->ss_start;
|
||||
start = (ss->ss_start - sm->sm_start) >> sm->sm_shift;
|
||||
|
||||
sm->sm_space -= size;
|
||||
delta += size;
|
||||
size >>= sm->sm_shift;
|
||||
|
||||
while (size) {
|
||||
@ -487,7 +506,7 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
start += run_len;
|
||||
size -= run_len;
|
||||
}
|
||||
kmem_free(ss, sizeof (*ss));
|
||||
kmem_cache_free(space_seg_cache, ss);
|
||||
}
|
||||
|
||||
if (entry != entry_map) {
|
||||
@ -499,8 +518,15 @@ space_map_sync(space_map_t *sm, uint8_t maptype,
|
||||
smo->smo_objsize += size;
|
||||
}
|
||||
|
||||
/*
|
||||
* Ensure that the space_map's accounting wasn't changed
|
||||
* while we were in the middle of writing it out.
|
||||
*/
|
||||
VERIFY3U(sm->sm_space, ==, sm_space);
|
||||
|
||||
zio_buf_free(entry_map, bufsize);
|
||||
|
||||
sm->sm_space -= delta;
|
||||
VERIFY0(sm->sm_space);
|
||||
}
|
||||
|
||||
|
@ -21,7 +21,10 @@
|
||||
/*
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_METASLAB_IMPL_H
|
||||
|
@ -489,14 +489,6 @@ extern int spa_scan_stop(spa_t *spa);
|
||||
extern void spa_sync(spa_t *spa, uint64_t txg); /* only for DMU use */
|
||||
extern void spa_sync_allpools(void);
|
||||
|
||||
/*
|
||||
* DEFERRED_FREE must be large enough that regular blocks are not
|
||||
* deferred. XXX so can't we change it back to 1?
|
||||
*/
|
||||
#define SYNC_PASS_DEFERRED_FREE 2 /* defer frees after this pass */
|
||||
#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
|
||||
#define SYNC_PASS_REWRITE 1 /* rewrite new bps after this pass */
|
||||
|
||||
/* spa namespace global mutex */
|
||||
extern kmutex_t spa_namespace_lock;
|
||||
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_SPACE_MAP_H
|
||||
#define _SYS_SPACE_MAP_H
|
||||
|
||||
@ -136,6 +140,8 @@ struct space_map_ops {
|
||||
|
||||
typedef void space_map_func_t(space_map_t *sm, uint64_t start, uint64_t size);
|
||||
|
||||
extern void space_map_init(void);
|
||||
extern void space_map_fini(void);
|
||||
extern void space_map_create(space_map_t *sm, uint64_t start, uint64_t size,
|
||||
uint8_t shift, kmutex_t *lp);
|
||||
extern void space_map_destroy(space_map_t *sm);
|
||||
|
@ -23,6 +23,10 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _ZIO_IMPL_H
|
||||
#define _ZIO_IMPL_H
|
||||
|
||||
@ -143,6 +147,7 @@ enum zio_stage {
|
||||
#define ZIO_FREE_PIPELINE \
|
||||
(ZIO_INTERLOCK_STAGES | \
|
||||
ZIO_STAGE_FREE_BP_INIT | \
|
||||
ZIO_STAGE_ISSUE_ASYNC | \
|
||||
ZIO_STAGE_DVA_FREE | \
|
||||
ZIO_STAGE_VDEV_IO_START | \
|
||||
ZIO_STAGE_VDEV_IO_ASSESS)
|
||||
|
@ -106,6 +106,31 @@ extern vmem_t *zio_alloc_arena;
|
||||
#endif
|
||||
extern int zfs_mg_alloc_failures;
|
||||
|
||||
/*
|
||||
* The following actions directly effect the spa's sync-to-convergence logic.
|
||||
* The values below define the sync pass when we start performing the action.
|
||||
* Care should be taken when changing these values as they directly impact
|
||||
* spa_sync() performance. Tuning these values may introduce subtle performance
|
||||
* pathologies and should only be done in the context of performance analysis.
|
||||
* These tunables will eventually be removed and replaced with #defines once
|
||||
* enough analysis has been done to determine optimal values.
|
||||
*
|
||||
* The 'zfs_sync_pass_deferred_free' pass must be greater than 1 to ensure that
|
||||
* regular blocks are not deferred.
|
||||
*/
|
||||
int zfs_sync_pass_deferred_free = 2; /* defer frees starting in this pass */
|
||||
TUNABLE_INT("vfs.zfs.sync_pass_deferred_free", &zfs_sync_pass_deferred_free);
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_deferred_free, CTLFLAG_RDTUN,
|
||||
&zfs_sync_pass_deferred_free, 0, "defer frees starting in this pass");
|
||||
int zfs_sync_pass_dont_compress = 5; /* don't compress starting in this pass */
|
||||
TUNABLE_INT("vfs.zfs.sync_pass_dont_compress", &zfs_sync_pass_dont_compress);
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_dont_compress, CTLFLAG_RDTUN,
|
||||
&zfs_sync_pass_dont_compress, 0, "don't compress starting in this pass");
|
||||
int zfs_sync_pass_rewrite = 2; /* rewrite new bps starting in this pass */
|
||||
TUNABLE_INT("vfs.zfs.sync_pass_rewrite", &zfs_sync_pass_rewrite);
|
||||
SYSCTL_INT(_vfs_zfs, OID_AUTO, sync_pass_rewrite, CTLFLAG_RDTUN,
|
||||
&zfs_sync_pass_rewrite, 0, "rewrite new bps starting in this pass");
|
||||
|
||||
/*
|
||||
* An allocating zio is one that either currently has the DVA allocate
|
||||
* stage set or will have it later in its lifetime.
|
||||
@ -742,7 +767,7 @@ zio_free_sync(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
|
||||
|
||||
ASSERT(!BP_IS_HOLE(bp));
|
||||
ASSERT(spa_syncing_txg(spa) == txg);
|
||||
ASSERT(spa_sync_pass(spa) <= SYNC_PASS_DEFERRED_FREE);
|
||||
ASSERT(spa_sync_pass(spa) < zfs_sync_pass_deferred_free);
|
||||
|
||||
zio = zio_create(pio, spa, txg, bp, NULL, size,
|
||||
NULL, NULL, ZIO_TYPE_FREE, ZIO_PRIORITY_FREE, flags,
|
||||
@ -1051,7 +1076,7 @@ zio_write_bp_init(zio_t *zio)
|
||||
ASSERT(zio->io_child_type == ZIO_CHILD_LOGICAL);
|
||||
ASSERT(!BP_GET_DEDUP(bp));
|
||||
|
||||
if (pass > SYNC_PASS_DONT_COMPRESS)
|
||||
if (pass >= zfs_sync_pass_dont_compress)
|
||||
compress = ZIO_COMPRESS_OFF;
|
||||
|
||||
/* Make sure someone doesn't change their mind on overwrites */
|
||||
@ -1080,7 +1105,7 @@ zio_write_bp_init(zio_t *zio)
|
||||
* There should only be a handful of blocks after pass 1 in any case.
|
||||
*/
|
||||
if (bp->blk_birth == zio->io_txg && BP_GET_PSIZE(bp) == psize &&
|
||||
pass > SYNC_PASS_REWRITE) {
|
||||
pass >= zfs_sync_pass_rewrite) {
|
||||
ASSERT(psize != 0);
|
||||
enum zio_stage gang_stages = zio->io_pipeline & ZIO_GANG_STAGES;
|
||||
zio->io_pipeline = ZIO_REWRITE_PIPELINE | gang_stages;
|
||||
|
Loading…
Reference in New Issue
Block a user