Update ZFS metaslab code from OpenSolaris.

This provides a noticeable write speedup, especially on pools with
less than 30% of free space.

Detailed information (OpenSolaris onnv changesets and Bug IDs):

11146:7e58f40bcb1c
6826241	Sync write IOPS drops dramatically during TXG sync
6869229	zfs should switch to shiny new metaslabs more frequently

11728:59fdb3b856f6
6918420	zdb -m has issues printing metaslab statistics

12047:7c1fcc8419ca
6917066	zfs block picking can be improved

Approved by:	delphij (mentor)
Obtained from:	OpenSolaris (Bug ID 6826241, 6869229, 6918420, 6917066)
MFC after:	2 weeks
This commit is contained in:
Martin Matuska 2010-08-28 08:59:55 +00:00
parent c87f1ad43c
commit abe5837f7c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=211931
12 changed files with 424 additions and 180 deletions

View File

@ -491,35 +491,37 @@ dump_metaslab_stats(metaslab_t *msp)
static void
dump_metaslab(metaslab_t *msp)
{
char freebuf[5];
space_map_obj_t *smo = &msp->ms_smo;
vdev_t *vd = msp->ms_group->mg_vd;
spa_t *spa = vd->vdev_spa;
space_map_t *sm = &msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
char freebuf[5];
nicenum(msp->ms_map.sm_size - smo->smo_alloc, freebuf);
nicenum(sm->sm_size - smo->smo_alloc, freebuf);
(void) printf(
"\tvdev %5llu offset %12llx spacemap %6llu free %5s\n",
(u_longlong_t)vd->vdev_id, (u_longlong_t)msp->ms_map.sm_start,
(u_longlong_t)smo->smo_object, freebuf);
(u_longlong_t)(sm->sm_start / sm->sm_size),
(u_longlong_t)sm->sm_start, (u_longlong_t)smo->smo_object, freebuf);
if (dump_opt['m'] > 1) {
mutex_enter(&msp->ms_lock);
VERIFY(space_map_load(&msp->ms_map, zfs_metaslab_ops,
SM_FREE, &msp->ms_smo, spa->spa_meta_objset) == 0);
space_map_load_wait(sm);
if (!sm->sm_loaded)
VERIFY(space_map_load(sm, zfs_metaslab_ops,
SM_FREE, smo, spa->spa_meta_objset) == 0);
dump_metaslab_stats(msp);
space_map_unload(&msp->ms_map);
space_map_unload(sm);
mutex_exit(&msp->ms_lock);
}
if (dump_opt['d'] > 5 || dump_opt['m'] > 2) {
ASSERT(msp->ms_map.sm_size == (1ULL << vd->vdev_ms_shift));
ASSERT(sm->sm_size == (1ULL << vd->vdev_ms_shift));
mutex_enter(&msp->ms_lock);
dump_spacemap(spa->spa_meta_objset, smo, &msp->ms_map);
dump_spacemap(spa->spa_meta_objset, smo, sm);
mutex_exit(&msp->ms_lock);
}
}
static void

View File

@ -19,8 +19,7 @@
* CDDL HEADER END
*/
/*
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
* Use is subject to license terms.
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -37,7 +36,7 @@ uint64_t metaslab_gang_bang = SPA_MAXBLOCKSIZE + 1; /* force gang blocks */
/*
* Minimum size which forces the dynamic allocator to change
* it's allocation strategy. Once the space map cannot satisfy
* it's allocation strategy. Once the space map cannot satisfy
* an allocation of this size then it switches to using more
* aggressive strategy (i.e search by size rather than offset).
*/
@ -49,7 +48,23 @@ uint64_t metaslab_df_alloc_threshold = SPA_MAXBLOCKSIZE;
* Once the space_map's free space drops below this level we dynamically
* switch to using best-fit allocations.
*/
int metaslab_df_free_pct = 30;
int metaslab_df_free_pct = 4;
/*
* A metaslab is considered "free" if it contains a contiguous
* segment which is greater than metaslab_min_alloc_size.
*/
uint64_t metaslab_min_alloc_size = DMU_MAX_ACCESS;
/*
* Max number of space_maps to prefetch.
*/
int metaslab_prefetch_limit = SPA_DVAS_PER_BP;
/*
* Percentage bonus multiplier for metaslabs that are in the bonus area.
*/
int metaslab_smo_bonus_pct = 150;
/*
* ==========================================================================
@ -218,6 +233,32 @@ metaslab_group_sort(metaslab_group_t *mg, metaslab_t *msp, uint64_t weight)
mutex_exit(&mg->mg_lock);
}
/*
* ==========================================================================
* Common allocator routines
* ==========================================================================
*/
static int
metaslab_segsize_compare(const void *x1, const void *x2)
{
const space_seg_t *s1 = x1;
const space_seg_t *s2 = x2;
uint64_t ss_size1 = s1->ss_end - s1->ss_start;
uint64_t ss_size2 = s2->ss_end - s2->ss_start;
if (ss_size1 < ss_size2)
return (-1);
if (ss_size1 > ss_size2)
return (1);
if (s1->ss_start < s2->ss_start)
return (-1);
if (s1->ss_start > s2->ss_start)
return (1);
return (0);
}
/*
* This is a helper function that can be used by the allocator to find
* a suitable block to allocate. This will search the specified AVL
@ -258,101 +299,8 @@ metaslab_block_picker(avl_tree_t *t, uint64_t *cursor, uint64_t size,
return (metaslab_block_picker(t, cursor, size, align));
}
/*
* ==========================================================================
* The first-fit block allocator
* ==========================================================================
*/
static void
metaslab_ff_load(space_map_t *sm)
{
ASSERT(sm->sm_ppd == NULL);
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
sm->sm_pp_root = NULL;
}
static void
metaslab_ff_unload(space_map_t *sm)
{
kmem_free(sm->sm_ppd, 64 * sizeof (uint64_t));
sm->sm_ppd = NULL;
}
static uint64_t
metaslab_ff_alloc(space_map_t *sm, uint64_t size)
{
avl_tree_t *t = &sm->sm_root;
uint64_t align = size & -size;
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
return (metaslab_block_picker(t, cursor, size, align));
}
/* ARGSUSED */
static void
metaslab_ff_claim(space_map_t *sm, uint64_t start, uint64_t size)
{
/* No need to update cursor */
}
/* ARGSUSED */
static void
metaslab_ff_free(space_map_t *sm, uint64_t start, uint64_t size)
{
/* No need to update cursor */
}
static space_map_ops_t metaslab_ff_ops = {
metaslab_ff_load,
metaslab_ff_unload,
metaslab_ff_alloc,
metaslab_ff_claim,
metaslab_ff_free,
NULL /* maxsize */
};
/*
* Dynamic block allocator -
* Uses the first fit allocation scheme until space get low and then
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
* and metaslab_df_free_pct to determine when to switch the allocation scheme.
*/
uint64_t
metaslab_df_maxsize(space_map_t *sm)
{
avl_tree_t *t = sm->sm_pp_root;
space_seg_t *ss;
if (t == NULL || (ss = avl_last(t)) == NULL)
return (0ULL);
return (ss->ss_end - ss->ss_start);
}
static int
metaslab_df_seg_compare(const void *x1, const void *x2)
{
const space_seg_t *s1 = x1;
const space_seg_t *s2 = x2;
uint64_t ss_size1 = s1->ss_end - s1->ss_start;
uint64_t ss_size2 = s2->ss_end - s2->ss_start;
if (ss_size1 < ss_size2)
return (-1);
if (ss_size1 > ss_size2)
return (1);
if (s1->ss_start < s2->ss_start)
return (-1);
if (s1->ss_start > s2->ss_start)
return (1);
return (0);
}
static void
metaslab_df_load(space_map_t *sm)
metaslab_pp_load(space_map_t *sm)
{
space_seg_t *ss;
@ -360,7 +308,7 @@ metaslab_df_load(space_map_t *sm)
sm->sm_ppd = kmem_zalloc(64 * sizeof (uint64_t), KM_SLEEP);
sm->sm_pp_root = kmem_alloc(sizeof (avl_tree_t), KM_SLEEP);
avl_create(sm->sm_pp_root, metaslab_df_seg_compare,
avl_create(sm->sm_pp_root, metaslab_segsize_compare,
sizeof (space_seg_t), offsetof(struct space_seg, ss_pp_node));
for (ss = avl_first(&sm->sm_root); ss; ss = AVL_NEXT(&sm->sm_root, ss))
@ -368,7 +316,7 @@ metaslab_df_load(space_map_t *sm)
}
static void
metaslab_df_unload(space_map_t *sm)
metaslab_pp_unload(space_map_t *sm)
{
void *cookie = NULL;
@ -384,13 +332,82 @@ metaslab_df_unload(space_map_t *sm)
sm->sm_pp_root = NULL;
}
/* ARGSUSED */
static void
metaslab_pp_claim(space_map_t *sm, uint64_t start, uint64_t size)
{
/* No need to update cursor */
}
/* ARGSUSED */
static void
metaslab_pp_free(space_map_t *sm, uint64_t start, uint64_t size)
{
/* No need to update cursor */
}
/*
* Return the maximum contiguous segment within the metaslab.
*/
uint64_t
metaslab_pp_maxsize(space_map_t *sm)
{
avl_tree_t *t = sm->sm_pp_root;
space_seg_t *ss;
if (t == NULL || (ss = avl_last(t)) == NULL)
return (0ULL);
return (ss->ss_end - ss->ss_start);
}
/*
* ==========================================================================
* The first-fit block allocator
* ==========================================================================
*/
static uint64_t
metaslab_ff_alloc(space_map_t *sm, uint64_t size)
{
avl_tree_t *t = &sm->sm_root;
uint64_t align = size & -size;
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
return (metaslab_block_picker(t, cursor, size, align));
}
/* ARGSUSED */
boolean_t
metaslab_ff_fragmented(space_map_t *sm)
{
return (B_TRUE);
}
static space_map_ops_t metaslab_ff_ops = {
metaslab_pp_load,
metaslab_pp_unload,
metaslab_ff_alloc,
metaslab_pp_claim,
metaslab_pp_free,
metaslab_pp_maxsize,
metaslab_ff_fragmented
};
/*
* ==========================================================================
* Dynamic block allocator -
* Uses the first fit allocation scheme until space get low and then
* adjusts to a best fit allocation method. Uses metaslab_df_alloc_threshold
* and metaslab_df_free_pct to determine when to switch the allocation scheme.
* ==========================================================================
*/
static uint64_t
metaslab_df_alloc(space_map_t *sm, uint64_t size)
{
avl_tree_t *t = &sm->sm_root;
uint64_t align = size & -size;
uint64_t *cursor = (uint64_t *)sm->sm_ppd + highbit(align) - 1;
uint64_t max_size = metaslab_df_maxsize(sm);
uint64_t max_size = metaslab_pp_maxsize(sm);
int free_pct = sm->sm_space * 100 / sm->sm_size;
ASSERT(MUTEX_HELD(sm->sm_lock));
@ -412,30 +429,158 @@ metaslab_df_alloc(space_map_t *sm, uint64_t size)
return (metaslab_block_picker(t, cursor, size, 1ULL));
}
/* ARGSUSED */
static void
metaslab_df_claim(space_map_t *sm, uint64_t start, uint64_t size)
static boolean_t
metaslab_df_fragmented(space_map_t *sm)
{
/* No need to update cursor */
}
uint64_t max_size = metaslab_pp_maxsize(sm);
int free_pct = sm->sm_space * 100 / sm->sm_size;
/* ARGSUSED */
static void
metaslab_df_free(space_map_t *sm, uint64_t start, uint64_t size)
{
/* No need to update cursor */
if (max_size >= metaslab_df_alloc_threshold &&
free_pct >= metaslab_df_free_pct)
return (B_FALSE);
return (B_TRUE);
}
static space_map_ops_t metaslab_df_ops = {
metaslab_df_load,
metaslab_df_unload,
metaslab_pp_load,
metaslab_pp_unload,
metaslab_df_alloc,
metaslab_df_claim,
metaslab_df_free,
metaslab_df_maxsize
metaslab_pp_claim,
metaslab_pp_free,
metaslab_pp_maxsize,
metaslab_df_fragmented
};
space_map_ops_t *zfs_metaslab_ops = &metaslab_df_ops;
/*
* ==========================================================================
* Other experimental allocators
* ==========================================================================
*/
static uint64_t
metaslab_cdf_alloc(space_map_t *sm, uint64_t size)
{
avl_tree_t *t = &sm->sm_root;
uint64_t *cursor = (uint64_t *)sm->sm_ppd;
uint64_t *extent_end = (uint64_t *)sm->sm_ppd + 1;
uint64_t max_size = metaslab_pp_maxsize(sm);
uint64_t rsize = size;
uint64_t offset = 0;
ASSERT(MUTEX_HELD(sm->sm_lock));
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
if (max_size < size)
return (-1ULL);
ASSERT3U(*extent_end, >=, *cursor);
/*
* If we're running low on space switch to using the size
* sorted AVL tree (best-fit).
*/
if ((*cursor + size) > *extent_end) {
t = sm->sm_pp_root;
*cursor = *extent_end = 0;
if (max_size > 2 * SPA_MAXBLOCKSIZE)
rsize = MIN(metaslab_min_alloc_size, max_size);
offset = metaslab_block_picker(t, extent_end, rsize, 1ULL);
if (offset != -1)
*cursor = offset + size;
} else {
offset = metaslab_block_picker(t, cursor, rsize, 1ULL);
}
ASSERT3U(*cursor, <=, *extent_end);
return (offset);
}
static boolean_t
metaslab_cdf_fragmented(space_map_t *sm)
{
uint64_t max_size = metaslab_pp_maxsize(sm);
if (max_size > (metaslab_min_alloc_size * 10))
return (B_FALSE);
return (B_TRUE);
}
static space_map_ops_t metaslab_cdf_ops = {
metaslab_pp_load,
metaslab_pp_unload,
metaslab_cdf_alloc,
metaslab_pp_claim,
metaslab_pp_free,
metaslab_pp_maxsize,
metaslab_cdf_fragmented
};
uint64_t metaslab_ndf_clump_shift = 4;
static uint64_t
metaslab_ndf_alloc(space_map_t *sm, uint64_t size)
{
avl_tree_t *t = &sm->sm_root;
avl_index_t where;
space_seg_t *ss, ssearch;
uint64_t hbit = highbit(size);
uint64_t *cursor = (uint64_t *)sm->sm_ppd + hbit - 1;
uint64_t max_size = metaslab_pp_maxsize(sm);
ASSERT(MUTEX_HELD(sm->sm_lock));
ASSERT3U(avl_numnodes(&sm->sm_root), ==, avl_numnodes(sm->sm_pp_root));
if (max_size < size)
return (-1ULL);
ssearch.ss_start = *cursor;
ssearch.ss_end = *cursor + size;
ss = avl_find(t, &ssearch, &where);
if (ss == NULL || (ss->ss_start + size > ss->ss_end)) {
t = sm->sm_pp_root;
ssearch.ss_start = 0;
ssearch.ss_end = MIN(max_size,
1ULL << (hbit + metaslab_ndf_clump_shift));
ss = avl_find(t, &ssearch, &where);
if (ss == NULL)
ss = avl_nearest(t, where, AVL_AFTER);
ASSERT(ss != NULL);
}
if (ss != NULL) {
if (ss->ss_start + size <= ss->ss_end) {
*cursor = ss->ss_start + size;
return (ss->ss_start);
}
}
return (-1ULL);
}
static boolean_t
metaslab_ndf_fragmented(space_map_t *sm)
{
uint64_t max_size = metaslab_pp_maxsize(sm);
if (max_size > (metaslab_min_alloc_size << metaslab_ndf_clump_shift))
return (B_FALSE);
return (B_TRUE);
}
static space_map_ops_t metaslab_ndf_ops = {
metaslab_pp_load,
metaslab_pp_unload,
metaslab_ndf_alloc,
metaslab_pp_claim,
metaslab_pp_free,
metaslab_pp_maxsize,
metaslab_ndf_fragmented
};
space_map_ops_t *zfs_metaslab_ops = &metaslab_ndf_ops;
/*
* ==========================================================================
@ -522,7 +667,6 @@ metaslab_fini(metaslab_t *msp)
#define METASLAB_WEIGHT_SECONDARY (1ULL << 62)
#define METASLAB_ACTIVE_MASK \
(METASLAB_WEIGHT_PRIMARY | METASLAB_WEIGHT_SECONDARY)
#define METASLAB_SMO_BONUS_MULTIPLIER 2
static uint64_t
metaslab_weight(metaslab_t *msp)
@ -555,25 +699,60 @@ metaslab_weight(metaslab_t *msp)
ASSERT(weight >= space && weight <= 2 * space);
/*
* For locality, assign higher weight to metaslabs we've used before.
* For locality, assign higher weight to metaslabs which have
* a lower offset than what we've already activated.
*/
if (smo->smo_object != 0)
weight *= METASLAB_SMO_BONUS_MULTIPLIER;
if (sm->sm_start <= mg->mg_bonus_area)
weight *= (metaslab_smo_bonus_pct / 100);
ASSERT(weight >= space &&
weight <= 2 * METASLAB_SMO_BONUS_MULTIPLIER * space);
weight <= 2 * (metaslab_smo_bonus_pct / 100) * space);
if (sm->sm_loaded && !sm->sm_ops->smop_fragmented(sm)) {
/*
* If this metaslab is one we're actively using, adjust its
* weight to make it preferable to any inactive metaslab so
* we'll polish it off.
*/
weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
}
return (weight);
}
static void
metaslab_prefetch(metaslab_group_t *mg)
{
spa_t *spa = mg->mg_vd->vdev_spa;
metaslab_t *msp;
avl_tree_t *t = &mg->mg_metaslab_tree;
int m;
mutex_enter(&mg->mg_lock);
/*
* If this metaslab is one we're actively using, adjust its weight to
* make it preferable to any inactive metaslab so we'll polish it off.
* Prefetch the next potential metaslabs
*/
weight |= (msp->ms_weight & METASLAB_ACTIVE_MASK);
for (msp = avl_first(t), m = 0; msp; msp = AVL_NEXT(t, msp), m++) {
space_map_t *sm = &msp->ms_map;
space_map_obj_t *smo = &msp->ms_smo;
return (weight);
/* If we have reached our prefetch limit then we're done */
if (m >= metaslab_prefetch_limit)
break;
if (!sm->sm_loaded && smo->smo_object != 0) {
mutex_exit(&mg->mg_lock);
dmu_prefetch(spa->spa_meta_objset, smo->smo_object,
0ULL, smo->smo_objsize);
mutex_enter(&mg->mg_lock);
}
}
mutex_exit(&mg->mg_lock);
}
static int
metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
{
metaslab_group_t *mg = msp->ms_group;
space_map_t *sm = &msp->ms_map;
space_map_ops_t *sm_ops = msp->ms_group->mg_class->mc_ops;
@ -587,6 +766,15 @@ metaslab_activate(metaslab_t *msp, uint64_t activation_weight, uint64_t size)
return (error);
}
/*
* Track the bonus area as we activate new metaslabs.
*/
if (sm->sm_start > mg->mg_bonus_area) {
mutex_enter(&mg->mg_lock);
mg->mg_bonus_area = sm->sm_start;
mutex_exit(&mg->mg_lock);
}
/*
* If we were able to load the map then make sure
* that this map is still able to satisfy our request.
@ -773,6 +961,32 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
mutex_exit(&msp->ms_lock);
}
void
metaslab_sync_reassess(metaslab_group_t *mg)
{
vdev_t *vd = mg->mg_vd;
/*
* Re-evaluate all metaslabs which have lower offsets than the
* bonus area.
*/
for (int m = 0; m < vd->vdev_ms_count; m++) {
metaslab_t *msp = vd->vdev_ms[m];
if (msp->ms_map.sm_start > mg->mg_bonus_area)
break;
mutex_enter(&msp->ms_lock);
metaslab_group_sort(mg, msp, metaslab_weight(msp));
mutex_exit(&msp->ms_lock);
}
/*
* Prefetch the next potential metaslabs
*/
metaslab_prefetch(mg);
}
static uint64_t
metaslab_distance(metaslab_t *msp, dva_t *dva)
{
@ -868,7 +1082,7 @@ metaslab_group_alloc(metaslab_group_t *mg, uint64_t size, uint64_t txg,
if ((offset = space_map_alloc(&msp->ms_map, size)) != -1ULL)
break;
metaslab_passivate(msp, size - 1);
metaslab_passivate(msp, space_map_maxsize(&msp->ms_map));
mutex_exit(&msp->ms_lock);
}

View File

@ -74,35 +74,38 @@ enum zti_modes {
zti_mode_fixed, /* value is # of threads (min 1) */
zti_mode_online_percent, /* value is % of online CPUs */
zti_mode_tune, /* fill from zio_taskq_tune_* */
zti_mode_null, /* don't create a taskq */
zti_nmodes
};
#define ZTI_THREAD_FIX(n) { zti_mode_fixed, (n) }
#define ZTI_THREAD_PCT(n) { zti_mode_online_percent, (n) }
#define ZTI_THREAD_TUNE { zti_mode_tune, 0 }
#define ZTI_FIX(n) { zti_mode_fixed, (n) }
#define ZTI_PCT(n) { zti_mode_online_percent, (n) }
#define ZTI_TUNE { zti_mode_tune, 0 }
#define ZTI_NULL { zti_mode_null, 0 }
#define ZTI_THREAD_ONE ZTI_THREAD_FIX(1)
#define ZTI_ONE ZTI_FIX(1)
typedef struct zio_taskq_info {
const char *zti_name;
struct {
enum zti_modes zti_mode;
uint_t zti_value;
} zti_nthreads[ZIO_TASKQ_TYPES];
enum zti_modes zti_mode;
uint_t zti_value;
} zio_taskq_info_t;
static const char *const zio_taskq_types[ZIO_TASKQ_TYPES] = {
"issue", "intr"
"issue", "issue_high", "intr", "intr_high"
};
const zio_taskq_info_t zio_taskqs[ZIO_TYPES] = {
/* ISSUE INTR */
{ "spa_zio_null", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
{ "spa_zio_read", { ZTI_THREAD_FIX(8), ZTI_THREAD_TUNE } },
{ "spa_zio_write", { ZTI_THREAD_TUNE, ZTI_THREAD_FIX(8) } },
{ "spa_zio_free", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
{ "spa_zio_claim", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
{ "spa_zio_ioctl", { ZTI_THREAD_ONE, ZTI_THREAD_ONE } },
/*
* Define the taskq threads for the following I/O types:
* NULL, READ, WRITE, FREE, CLAIM, and IOCTL
*/
const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
/* ISSUE ISSUE_HIGH INTR INTR_HIGH */
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_FIX(8), ZTI_NULL, ZTI_TUNE, ZTI_NULL },
{ ZTI_TUNE, ZTI_FIX(5), ZTI_FIX(8), ZTI_FIX(5) },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
{ ZTI_ONE, ZTI_NULL, ZTI_ONE, ZTI_NULL },
};
enum zti_modes zio_taskq_tune_mode = zti_mode_online_percent;
@ -581,14 +584,14 @@ spa_activate(spa_t *spa, int mode)
spa->spa_log_class = metaslab_class_create(zfs_metaslab_ops);
for (int t = 0; t < ZIO_TYPES; t++) {
const zio_taskq_info_t *ztip = &zio_taskqs[t];
for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
enum zti_modes mode = ztip->zti_nthreads[q].zti_mode;
uint_t value = ztip->zti_nthreads[q].zti_value;
const zio_taskq_info_t *ztip = &zio_taskqs[t][q];
enum zti_modes mode = ztip->zti_mode;
uint_t value = ztip->zti_value;
char name[32];
(void) snprintf(name, sizeof (name),
"%s_%s", ztip->zti_name, zio_taskq_types[q]);
"%s_%s", zio_type_name[t], zio_taskq_types[q]);
if (mode == zti_mode_tune) {
mode = zio_taskq_tune_mode;
@ -613,6 +616,10 @@ spa_activate(spa_t *spa, int mode)
TASKQ_PREPOPULATE | TASKQ_THREADS_CPU_PCT);
break;
case zti_mode_null:
spa->spa_zio_taskq[t][q] = NULL;
break;
case zti_mode_tune:
default:
panic("unrecognized mode for "
@ -659,7 +666,8 @@ spa_deactivate(spa_t *spa)
for (int t = 0; t < ZIO_TYPES; t++) {
for (int q = 0; q < ZIO_TASKQ_TYPES; q++) {
taskq_destroy(spa->spa_zio_taskq[t][q]);
if (spa->spa_zio_taskq[t][q] != NULL)
taskq_destroy(spa->spa_zio_taskq[t][q]);
spa->spa_zio_taskq[t][q] = NULL;
}
}

View File

@ -368,10 +368,8 @@ space_map_unload(space_map_t *sm)
uint64_t
space_map_maxsize(space_map_t *sm)
{
if (sm->sm_loaded && sm->sm_ops != NULL)
return (sm->sm_ops->smop_max(sm));
else
return (-1ULL);
ASSERT(sm->sm_ops != NULL);
return (sm->sm_ops->smop_max(sm));
}
uint64_t

View File

@ -46,6 +46,7 @@ extern metaslab_t *metaslab_init(metaslab_group_t *mg, space_map_obj_t *smo,
extern void metaslab_fini(metaslab_t *msp);
extern void metaslab_sync(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_done(metaslab_t *msp, uint64_t txg);
extern void metaslab_sync_reassess(metaslab_group_t *mg);
#define METASLAB_HINTBP_FAVOR 0x0
#define METASLAB_HINTBP_AVOID 0x1

View File

@ -46,6 +46,7 @@ struct metaslab_group {
kmutex_t mg_lock;
avl_tree_t mg_metaslab_tree;
uint64_t mg_aliquot;
uint64_t mg_bonus_area;
int64_t mg_bias;
metaslab_class_t *mg_class;
vdev_t *mg_vd;

View File

@ -87,7 +87,9 @@ typedef enum spa_log_state {
enum zio_taskq_type {
ZIO_TASKQ_ISSUE = 0,
ZIO_TASKQ_ISSUE_HIGH,
ZIO_TASKQ_INTERRUPT,
ZIO_TASKQ_INTERRUPT_HIGH,
ZIO_TASKQ_TYPES
};

View File

@ -77,6 +77,7 @@ struct space_map_ops {
void (*smop_claim)(space_map_t *sm, uint64_t start, uint64_t size);
void (*smop_free)(space_map_t *sm, uint64_t start, uint64_t size);
uint64_t (*smop_max)(space_map_t *sm);
boolean_t (*smop_fragmented)(space_map_t *sm);
};
/*

View File

@ -107,14 +107,15 @@ enum zio_compress {
#define ZIO_PRIORITY_NOW (zio_priority_table[0])
#define ZIO_PRIORITY_SYNC_READ (zio_priority_table[1])
#define ZIO_PRIORITY_SYNC_WRITE (zio_priority_table[2])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[3])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[4])
#define ZIO_PRIORITY_FREE (zio_priority_table[5])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[6])
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[8])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[9])
#define ZIO_PRIORITY_TABLE_SIZE 10
#define ZIO_PRIORITY_LOG_WRITE (zio_priority_table[3])
#define ZIO_PRIORITY_CACHE_FILL (zio_priority_table[4])
#define ZIO_PRIORITY_AGG (zio_priority_table[5])
#define ZIO_PRIORITY_FREE (zio_priority_table[6])
#define ZIO_PRIORITY_ASYNC_WRITE (zio_priority_table[7])
#define ZIO_PRIORITY_ASYNC_READ (zio_priority_table[8])
#define ZIO_PRIORITY_RESILVER (zio_priority_table[9])
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
#define ZIO_PRIORITY_TABLE_SIZE 11
#define ZIO_FLAG_MUSTSUCCEED 0x00000
#define ZIO_FLAG_CANFAIL 0x00001

View File

@ -1773,9 +1773,13 @@ void
vdev_sync_done(vdev_t *vd, uint64_t txg)
{
metaslab_t *msp;
boolean_t reassess = !txg_list_empty(&vd->vdev_ms_list, TXG_CLEAN(txg));
while (msp = txg_list_remove(&vd->vdev_ms_list, TXG_CLEAN(txg)))
metaslab_sync_done(msp, txg);
if (reassess)
metaslab_sync_reassess(vd->vdev_mg);
}
void

View File

@ -233,7 +233,7 @@ vdev_queue_io_to_issue(vdev_queue_t *vq, uint64_t pending_limit)
ASSERT(size <= zfs_vdev_aggregation_limit);
aio = zio_vdev_delegated_io(fio->io_vd, fio->io_offset,
zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_NOW,
zio_buf_alloc(size), size, fio->io_type, ZIO_PRIORITY_AGG,
flags | ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_QUEUE,
vdev_queue_agg_io_done, NULL);

View File

@ -49,11 +49,12 @@ uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = {
0, /* ZIO_PRIORITY_NOW */
0, /* ZIO_PRIORITY_SYNC_READ */
0, /* ZIO_PRIORITY_SYNC_WRITE */
6, /* ZIO_PRIORITY_ASYNC_READ */
4, /* ZIO_PRIORITY_ASYNC_WRITE */
4, /* ZIO_PRIORITY_FREE */
0, /* ZIO_PRIORITY_CACHE_FILL */
0, /* ZIO_PRIORITY_LOG_WRITE */
1, /* ZIO_PRIORITY_CACHE_FILL */
1, /* ZIO_PRIORITY_AGG */
4, /* ZIO_PRIORITY_FREE */
4, /* ZIO_PRIORITY_ASYNC_WRITE */
6, /* ZIO_PRIORITY_ASYNC_READ */
10, /* ZIO_PRIORITY_RESILVER */
20, /* ZIO_PRIORITY_SCRUB */
};
@ -64,7 +65,9 @@ uint8_t zio_priority_table[ZIO_PRIORITY_TABLE_SIZE] = {
* ==========================================================================
*/
char *zio_type_name[ZIO_TYPES] = {
"null", "read", "write", "free", "claim", "ioctl" };
"zio_null", "zio_read", "zio_write", "zio_free", "zio_claim",
"zio_ioctl"
};
#define SYNC_PASS_DEFERRED_FREE 1 /* defer frees after this pass */
#define SYNC_PASS_DONT_COMPRESS 4 /* don't compress after this pass */
@ -942,6 +945,7 @@ zio_write_bp_init(zio_t *zio)
static void
zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q)
{
spa_t *spa = zio->io_spa;
zio_type_t t = zio->io_type;
/*
@ -958,7 +962,15 @@ zio_taskq_dispatch(zio_t *zio, enum zio_taskq_type q)
if (t == ZIO_TYPE_WRITE && zio->io_vd && zio->io_vd->vdev_aux)
t = ZIO_TYPE_NULL;
(void) taskq_dispatch_safe(zio->io_spa->spa_zio_taskq[t][q],
/*
* If this is a high priority I/O, then use the high priority taskq.
*/
if (zio->io_priority == ZIO_PRIORITY_NOW &&
spa->spa_zio_taskq[t][q + 1] != NULL)
q++;
ASSERT3U(q, <, ZIO_TASKQ_TYPES);
(void) taskq_dispatch_safe(spa->spa_zio_taskq[t][q],
(task_func_t *)zio_execute, zio, &zio->io_task);
}