Merge recent vendor changes:
3086 unnecessarily setting DS_FLAG_INCONSISTENT on async destroyed datasets 3090 vdev_reopen() during reguid causes vdev to be treated as corrupt 3102 vdev_uberblock_load() and vdev_validate() may read the wrong label Referenes: https://www.illumos.org/issues/3086 https://www.illumos.org/issues/3090 https://www.illumos.org/issues/3102 PR: kern/170912, kern/170914 Obtained from: illumos (changeset #13776, #13777) MFC after: 2 weeks
This commit is contained in:
commit
671303c6d5
@ -364,7 +364,7 @@ ztest_info_t ztest_info[] = {
|
||||
{ ztest_spa_rename, 1, &zopt_rarely },
|
||||
{ ztest_scrub, 1, &zopt_rarely },
|
||||
{ ztest_dsl_dataset_promote_busy, 1, &zopt_rarely },
|
||||
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
|
||||
{ ztest_vdev_attach_detach, 1, &zopt_rarely },
|
||||
{ ztest_vdev_LUN_growth, 1, &zopt_rarely },
|
||||
{ ztest_vdev_add_remove, 1,
|
||||
&ztest_opts.zo_vdevtime },
|
||||
@ -415,6 +415,13 @@ static spa_t *ztest_spa = NULL;
|
||||
static ztest_ds_t *ztest_ds;
|
||||
|
||||
static mutex_t ztest_vdev_lock;
|
||||
|
||||
/*
|
||||
* The ztest_name_lock protects the pool and dataset namespace used by
|
||||
* the individual tests. To modify the namespace, consumers must grab
|
||||
* this lock as writer. Grabbing the lock as reader will ensure that the
|
||||
* namespace does not change while the lock is held.
|
||||
*/
|
||||
static rwlock_t ztest_name_lock;
|
||||
|
||||
static boolean_t ztest_dump_core = B_TRUE;
|
||||
@ -2225,6 +2232,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
objset_t *os = zd->zd_os;
|
||||
|
||||
VERIFY(mutex_lock(&zd->zd_dirobj_lock) == 0);
|
||||
(void) rw_wrlock(&zd->zd_zilog_lock);
|
||||
|
||||
/* zfsvfs_teardown() */
|
||||
@ -2235,6 +2243,7 @@ ztest_zil_remount(ztest_ds_t *zd, uint64_t id)
|
||||
zil_replay(os, zd, ztest_replay_vector);
|
||||
|
||||
(void) rw_unlock(&zd->zd_zilog_lock);
|
||||
VERIFY(mutex_unlock(&zd->zd_dirobj_lock) == 0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4860,10 +4869,16 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
spa_t *spa = ztest_spa;
|
||||
uint64_t orig, load;
|
||||
int error;
|
||||
|
||||
orig = spa_guid(spa);
|
||||
load = spa_load_guid(spa);
|
||||
if (spa_change_guid(spa) != 0)
|
||||
|
||||
(void) rw_wrlock(&ztest_name_lock);
|
||||
error = spa_change_guid(spa);
|
||||
(void) rw_unlock(&ztest_name_lock);
|
||||
|
||||
if (error != 0)
|
||||
return;
|
||||
|
||||
if (ztest_opts.zo_verbose >= 3) {
|
||||
@ -5540,8 +5555,15 @@ ztest_freeze(void)
|
||||
*/
|
||||
kernel_init(FREAD | FWRITE);
|
||||
VERIFY3U(0, ==, spa_open(ztest_opts.zo_pool, &spa, FTAG));
|
||||
ASSERT(spa_freeze_txg(spa) == UINT64_MAX);
|
||||
VERIFY3U(0, ==, ztest_dataset_open(0));
|
||||
ztest_dataset_close(0);
|
||||
|
||||
spa->spa_debug = B_TRUE;
|
||||
ztest_spa = spa;
|
||||
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||
ztest_reguid(NULL, 0);
|
||||
|
||||
spa_close(spa, FTAG);
|
||||
kernel_fini();
|
||||
}
|
||||
|
@ -21,7 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/*
|
||||
@ -437,8 +437,8 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
uint_t i, nspares, nl2cache;
|
||||
boolean_t config_seen;
|
||||
uint64_t best_txg;
|
||||
char *name, *hostname, *comment;
|
||||
uint64_t version, guid;
|
||||
char *name, *hostname;
|
||||
uint64_t guid;
|
||||
uint_t children = 0;
|
||||
nvlist_t **child = NULL;
|
||||
uint_t holes;
|
||||
@ -524,61 +524,54 @@ get_configs(libzfs_handle_t *hdl, pool_list_t *pl, boolean_t active_ok)
|
||||
* configuration:
|
||||
*
|
||||
* version
|
||||
* pool guid
|
||||
* name
|
||||
* pool guid
|
||||
* name
|
||||
* pool txg (if available)
|
||||
* comment (if available)
|
||||
* pool state
|
||||
* pool state
|
||||
* hostid (if available)
|
||||
* hostname (if available)
|
||||
*/
|
||||
uint64_t state;
|
||||
uint64_t state, version, pool_txg;
|
||||
char *comment = NULL;
|
||||
|
||||
verify(nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_VERSION, &version) == 0);
|
||||
if (nvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_VERSION, version) != 0)
|
||||
goto nomem;
|
||||
verify(nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_POOL_GUID, &guid) == 0);
|
||||
if (nvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_GUID, guid) != 0)
|
||||
goto nomem;
|
||||
verify(nvlist_lookup_string(tmp,
|
||||
ZPOOL_CONFIG_POOL_NAME, &name) == 0);
|
||||
if (nvlist_add_string(config,
|
||||
ZPOOL_CONFIG_POOL_NAME, name) != 0)
|
||||
goto nomem;
|
||||
version = fnvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_VERSION);
|
||||
fnvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_VERSION, version);
|
||||
guid = fnvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_POOL_GUID);
|
||||
fnvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_GUID, guid);
|
||||
name = fnvlist_lookup_string(tmp,
|
||||
ZPOOL_CONFIG_POOL_NAME);
|
||||
fnvlist_add_string(config,
|
||||
ZPOOL_CONFIG_POOL_NAME, name);
|
||||
|
||||
if (nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_POOL_TXG, &pool_txg) == 0)
|
||||
fnvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_TXG, pool_txg);
|
||||
|
||||
/*
|
||||
* COMMENT is optional, don't bail if it's not
|
||||
* there, instead, set it to NULL.
|
||||
*/
|
||||
if (nvlist_lookup_string(tmp,
|
||||
ZPOOL_CONFIG_COMMENT, &comment) != 0)
|
||||
comment = NULL;
|
||||
else if (nvlist_add_string(config,
|
||||
ZPOOL_CONFIG_COMMENT, comment) != 0)
|
||||
goto nomem;
|
||||
ZPOOL_CONFIG_COMMENT, &comment) == 0)
|
||||
fnvlist_add_string(config,
|
||||
ZPOOL_CONFIG_COMMENT, comment);
|
||||
|
||||
verify(nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
|
||||
if (nvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_STATE, state) != 0)
|
||||
goto nomem;
|
||||
state = fnvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_POOL_STATE);
|
||||
fnvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_POOL_STATE, state);
|
||||
|
||||
hostid = 0;
|
||||
if (nvlist_lookup_uint64(tmp,
|
||||
ZPOOL_CONFIG_HOSTID, &hostid) == 0) {
|
||||
if (nvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_HOSTID, hostid) != 0)
|
||||
goto nomem;
|
||||
verify(nvlist_lookup_string(tmp,
|
||||
ZPOOL_CONFIG_HOSTNAME,
|
||||
&hostname) == 0);
|
||||
if (nvlist_add_string(config,
|
||||
ZPOOL_CONFIG_HOSTNAME,
|
||||
hostname) != 0)
|
||||
goto nomem;
|
||||
fnvlist_add_uint64(config,
|
||||
ZPOOL_CONFIG_HOSTID, hostid);
|
||||
hostname = fnvlist_lookup_string(tmp,
|
||||
ZPOOL_CONFIG_HOSTNAME);
|
||||
fnvlist_add_string(config,
|
||||
ZPOOL_CONFIG_HOSTNAME, hostname);
|
||||
}
|
||||
|
||||
config_seen = B_TRUE;
|
||||
|
@ -1769,15 +1769,15 @@ dmu_init(void)
|
||||
dnode_init();
|
||||
dbuf_init();
|
||||
zfetch_init();
|
||||
arc_init();
|
||||
l2arc_init();
|
||||
arc_init();
|
||||
}
|
||||
|
||||
void
|
||||
dmu_fini(void)
|
||||
{
|
||||
l2arc_fini();
|
||||
arc_fini();
|
||||
l2arc_fini();
|
||||
zfetch_fini();
|
||||
dbuf_fini();
|
||||
dnode_fini();
|
||||
|
@ -1649,13 +1649,6 @@ dmu_recv_existing_end(dmu_recv_cookie_t *drc)
|
||||
dsl_dataset_t *ds = drc->drc_logical_ds;
|
||||
int err, myerr;
|
||||
|
||||
/*
|
||||
* XXX hack; seems the ds is still dirty and dsl_pool_zil_clean()
|
||||
* expects it to have a ds_user_ptr (and zil), but clone_swap()
|
||||
* can close it.
|
||||
*/
|
||||
txg_wait_synced(ds->ds_dir->dd_pool, 0);
|
||||
|
||||
if (dsl_dataset_tryown(ds, FALSE, dmu_recv_tag)) {
|
||||
err = dsl_dataset_clone_swap(drc->drc_real_ds, ds,
|
||||
drc->drc_force);
|
||||
|
@ -106,14 +106,8 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
ASSERT(BP_GET_TYPE(bp) != DMU_OT_NONE);
|
||||
ASSERT(DMU_OT_IS_VALID(BP_GET_TYPE(bp)));
|
||||
if (ds == NULL) {
|
||||
/*
|
||||
* Account for the meta-objset space in its placeholder
|
||||
* dsl_dir.
|
||||
*/
|
||||
ASSERT3U(compressed, ==, uncompressed); /* it's all metadata */
|
||||
dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
|
||||
used, compressed, uncompressed, tx);
|
||||
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
|
||||
dsl_pool_mos_diduse_space(tx->tx_pool,
|
||||
used, compressed, uncompressed);
|
||||
return;
|
||||
}
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
@ -149,15 +143,9 @@ dsl_dataset_block_kill(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx,
|
||||
|
||||
ASSERT(used > 0);
|
||||
if (ds == NULL) {
|
||||
/*
|
||||
* Account for the meta-objset space in its placeholder
|
||||
* dataset.
|
||||
*/
|
||||
dsl_free(tx->tx_pool, tx->tx_txg, bp);
|
||||
|
||||
dsl_dir_diduse_space(tx->tx_pool->dp_mos_dir, DD_USED_HEAD,
|
||||
-used, -compressed, -uncompressed, tx);
|
||||
dsl_dir_dirty(tx->tx_pool->dp_mos_dir, tx);
|
||||
dsl_pool_mos_diduse_space(tx->tx_pool,
|
||||
-used, -compressed, -uncompressed);
|
||||
return (used);
|
||||
}
|
||||
ASSERT3P(tx->tx_pool, ==, ds->ds_dir->dd_pool);
|
||||
@ -1116,26 +1104,26 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
|
||||
dummy_ds.ds_dir = dd;
|
||||
dummy_ds.ds_object = ds->ds_object;
|
||||
|
||||
/*
|
||||
* Check for errors and mark this ds as inconsistent, in
|
||||
* case we crash while freeing the objects.
|
||||
*/
|
||||
err = dsl_sync_task_do(dd->dd_pool, dsl_dataset_destroy_begin_check,
|
||||
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = dmu_objset_from_ds(ds, &os);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* If async destruction is not enabled try to remove all objects
|
||||
* while in the open context so that there is less work to do in
|
||||
* the syncing context.
|
||||
*/
|
||||
if (!spa_feature_is_enabled(dsl_dataset_get_spa(ds),
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY])) {
|
||||
/*
|
||||
* Check for errors and mark this ds as inconsistent, in
|
||||
* case we crash while freeing the objects.
|
||||
*/
|
||||
err = dsl_sync_task_do(dd->dd_pool,
|
||||
dsl_dataset_destroy_begin_check,
|
||||
dsl_dataset_destroy_begin_sync, ds, NULL, 0);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
err = dmu_objset_from_ds(ds, &os);
|
||||
if (err)
|
||||
goto out;
|
||||
|
||||
/*
|
||||
* Remove all objects while in the open context so that
|
||||
* there is less work to do in the syncing context.
|
||||
*/
|
||||
for (obj = 0; err == 0; err = dmu_object_next(os, &obj, FALSE,
|
||||
ds->ds_phys->ds_prev_snap_txg)) {
|
||||
/*
|
||||
@ -1146,30 +1134,25 @@ dsl_dataset_destroy(dsl_dataset_t *ds, void *tag, boolean_t defer)
|
||||
}
|
||||
if (err != ESRCH)
|
||||
goto out;
|
||||
}
|
||||
|
||||
/*
|
||||
* Only the ZIL knows how to free log blocks.
|
||||
*/
|
||||
zil_destroy(dmu_objset_zil(os), B_FALSE);
|
||||
/*
|
||||
* Sync out all in-flight IO.
|
||||
*/
|
||||
txg_wait_synced(dd->dd_pool, 0);
|
||||
|
||||
/*
|
||||
* Sync out all in-flight IO.
|
||||
*/
|
||||
txg_wait_synced(dd->dd_pool, 0);
|
||||
/*
|
||||
* If we managed to free all the objects in open
|
||||
* context, the user space accounting should be zero.
|
||||
*/
|
||||
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
|
||||
dmu_objset_userused_enabled(os)) {
|
||||
uint64_t count;
|
||||
|
||||
/*
|
||||
* If we managed to free all the objects in open
|
||||
* context, the user space accounting should be zero.
|
||||
*/
|
||||
if (ds->ds_phys->ds_bp.blk_fill == 0 &&
|
||||
dmu_objset_userused_enabled(os)) {
|
||||
uint64_t count;
|
||||
|
||||
ASSERT(zap_count(os, DMU_USERUSED_OBJECT, &count) != 0 ||
|
||||
count == 0);
|
||||
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT, &count) != 0 ||
|
||||
count == 0);
|
||||
ASSERT(zap_count(os, DMU_USERUSED_OBJECT,
|
||||
&count) != 0 || count == 0);
|
||||
ASSERT(zap_count(os, DMU_GROUPUSED_OBJECT,
|
||||
&count) != 0 || count == 0);
|
||||
}
|
||||
}
|
||||
|
||||
rw_enter(&dd->dd_pool->dp_config_rwlock, RW_READER);
|
||||
@ -1906,6 +1889,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
} else {
|
||||
zfeature_info_t *async_destroy =
|
||||
&spa_feature_table[SPA_FEATURE_ASYNC_DESTROY];
|
||||
objset_t *os;
|
||||
|
||||
/*
|
||||
* There's no next snapshot, so this is a head dataset.
|
||||
@ -1917,6 +1901,8 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
dsl_deadlist_free(mos, ds->ds_phys->ds_deadlist_obj, tx);
|
||||
ds->ds_phys->ds_deadlist_obj = 0;
|
||||
|
||||
VERIFY3U(0, ==, dmu_objset_from_ds(ds, &os));
|
||||
|
||||
if (!spa_feature_is_enabled(dp->dp_spa, async_destroy)) {
|
||||
err = old_synchronous_dataset_destroy(ds, tx);
|
||||
} else {
|
||||
@ -1926,12 +1912,12 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
*/
|
||||
uint64_t used, comp, uncomp;
|
||||
|
||||
ASSERT(err == 0 || err == EBUSY);
|
||||
zil_destroy_sync(dmu_objset_zil(os), tx);
|
||||
|
||||
if (!spa_feature_is_active(dp->dp_spa, async_destroy)) {
|
||||
spa_feature_incr(dp->dp_spa, async_destroy, tx);
|
||||
dp->dp_bptree_obj = bptree_alloc(
|
||||
dp->dp_meta_objset, tx);
|
||||
VERIFY(zap_add(dp->dp_meta_objset,
|
||||
dp->dp_bptree_obj = bptree_alloc(mos, tx);
|
||||
VERIFY(zap_add(mos,
|
||||
DMU_POOL_DIRECTORY_OBJECT,
|
||||
DMU_POOL_BPTREE_OBJ, sizeof (uint64_t), 1,
|
||||
&dp->dp_bptree_obj, tx) == 0);
|
||||
@ -1944,7 +1930,7 @@ dsl_dataset_destroy_sync(void *arg1, void *tag, dmu_tx_t *tx)
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
|
||||
ds->ds_phys->ds_unique_bytes == used);
|
||||
|
||||
bptree_add(dp->dp_meta_objset, dp->dp_bptree_obj,
|
||||
bptree_add(mos, dp->dp_bptree_obj,
|
||||
&ds->ds_phys->ds_bp, ds->ds_phys->ds_prev_snap_txg,
|
||||
used, comp, uncomp, tx);
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
|
||||
@ -2233,7 +2219,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
ds->ds_phys->ds_fsid_guid = ds->ds_fsid_guid;
|
||||
|
||||
dsl_dir_dirty(ds->ds_dir, tx);
|
||||
dmu_objset_sync(ds->ds_objset, zio, tx);
|
||||
}
|
||||
|
||||
|
@ -195,7 +195,6 @@ dsl_dir_open_obj(dsl_pool_t *dp, uint64_t ddobj,
|
||||
kmem_free(dd, sizeof (dsl_dir_t));
|
||||
dmu_buf_rele(dbuf, tag);
|
||||
return (err);
|
||||
|
||||
}
|
||||
|
||||
void
|
||||
@ -229,7 +228,7 @@ dsl_dir_name(dsl_dir_t *dd, char *buf)
|
||||
}
|
||||
}
|
||||
|
||||
/* Calculate name legnth, avoiding all the strcat calls of dsl_dir_name */
|
||||
/* Calculate name length, avoiding all the strcat calls of dsl_dir_name */
|
||||
int
|
||||
dsl_dir_namelen(dsl_dir_t *dd)
|
||||
{
|
||||
@ -593,8 +592,6 @@ dsl_dir_sync(dsl_dir_t *dd, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
|
||||
mutex_enter(&dd->dd_lock);
|
||||
ASSERT3U(dd->dd_tempreserved[tx->tx_txg&TXG_MASK], ==, 0);
|
||||
dprintf_dd(dd, "txg=%llu towrite=%lluK\n", tx->tx_txg,
|
||||
@ -951,8 +948,6 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
ASSERT(dmu_tx_is_syncing(tx));
|
||||
ASSERT(type < DD_USED_NUM);
|
||||
|
||||
dsl_dir_dirty(dd, tx);
|
||||
|
||||
if (needlock)
|
||||
mutex_enter(&dd->dd_lock);
|
||||
accounted_delta = parent_delta(dd, dd->dd_phys->dd_used_bytes, used);
|
||||
@ -961,6 +956,7 @@ dsl_dir_diduse_space(dsl_dir_t *dd, dd_used_t type,
|
||||
dd->dd_phys->dd_compressed_bytes >= -compressed);
|
||||
ASSERT(uncompressed >= 0 ||
|
||||
dd->dd_phys->dd_uncompressed_bytes >= -uncompressed);
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
dd->dd_phys->dd_used_bytes += used;
|
||||
dd->dd_phys->dd_uncompressed_bytes += uncompressed;
|
||||
dd->dd_phys->dd_compressed_bytes += compressed;
|
||||
@ -1002,13 +998,13 @@ dsl_dir_transfer_space(dsl_dir_t *dd, int64_t delta,
|
||||
if (delta == 0 || !(dd->dd_phys->dd_flags & DD_FLAG_USED_BREAKDOWN))
|
||||
return;
|
||||
|
||||
dsl_dir_dirty(dd, tx);
|
||||
if (needlock)
|
||||
mutex_enter(&dd->dd_lock);
|
||||
ASSERT(delta > 0 ?
|
||||
dd->dd_phys->dd_used_breakdown[oldtype] >= delta :
|
||||
dd->dd_phys->dd_used_breakdown[newtype] >= -delta);
|
||||
ASSERT(dd->dd_phys->dd_used_bytes >= ABS(delta));
|
||||
dmu_buf_will_dirty(dd->dd_dbuf, tx);
|
||||
dd->dd_phys->dd_used_breakdown[oldtype] -= delta;
|
||||
dd->dd_phys->dd_used_breakdown[newtype] += delta;
|
||||
if (needlock)
|
||||
|
@ -42,6 +42,7 @@
|
||||
#include <sys/dsl_deadlist.h>
|
||||
#include <sys/bptree.h>
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/zil_impl.h>
|
||||
|
||||
int zfs_no_write_throttle = 0;
|
||||
int zfs_write_limit_shift = 3; /* 1/8th of physical memory */
|
||||
@ -111,12 +112,12 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
||||
|
||||
txg_list_create(&dp->dp_dirty_datasets,
|
||||
offsetof(dsl_dataset_t, ds_dirty_link));
|
||||
txg_list_create(&dp->dp_dirty_zilogs,
|
||||
offsetof(zilog_t, zl_dirty_link));
|
||||
txg_list_create(&dp->dp_dirty_dirs,
|
||||
offsetof(dsl_dir_t, dd_dirty_link));
|
||||
txg_list_create(&dp->dp_sync_tasks,
|
||||
offsetof(dsl_sync_task_group_t, dstg_node));
|
||||
list_create(&dp->dp_synced_datasets, sizeof (dsl_dataset_t),
|
||||
offsetof(dsl_dataset_t, ds_synced_link));
|
||||
|
||||
mutex_init(&dp->dp_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
@ -249,9 +250,9 @@ dsl_pool_close(dsl_pool_t *dp)
|
||||
dmu_objset_evict(dp->dp_meta_objset);
|
||||
|
||||
txg_list_destroy(&dp->dp_dirty_datasets);
|
||||
txg_list_destroy(&dp->dp_dirty_zilogs);
|
||||
txg_list_destroy(&dp->dp_sync_tasks);
|
||||
txg_list_destroy(&dp->dp_dirty_dirs);
|
||||
list_destroy(&dp->dp_synced_datasets);
|
||||
|
||||
arc_flush(dp->dp_spa);
|
||||
txg_fini(dp);
|
||||
@ -331,6 +332,21 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
return (dp);
|
||||
}
|
||||
|
||||
/*
|
||||
* Account for the meta-objset space in its placeholder dsl_dir.
|
||||
*/
|
||||
void
|
||||
dsl_pool_mos_diduse_space(dsl_pool_t *dp,
|
||||
int64_t used, int64_t comp, int64_t uncomp)
|
||||
{
|
||||
ASSERT3U(comp, ==, uncomp); /* it's all metadata */
|
||||
mutex_enter(&dp->dp_lock);
|
||||
dp->dp_mos_used_delta += used;
|
||||
dp->dp_mos_compressed_delta += comp;
|
||||
dp->dp_mos_uncompressed_delta += uncomp;
|
||||
mutex_exit(&dp->dp_lock);
|
||||
}
|
||||
|
||||
static int
|
||||
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
|
||||
{
|
||||
@ -349,11 +365,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
dmu_tx_t *tx;
|
||||
dsl_dir_t *dd;
|
||||
dsl_dataset_t *ds;
|
||||
dsl_sync_task_group_t *dstg;
|
||||
objset_t *mos = dp->dp_meta_objset;
|
||||
hrtime_t start, write_time;
|
||||
uint64_t data_written;
|
||||
int err;
|
||||
list_t synced_datasets;
|
||||
|
||||
list_create(&synced_datasets, sizeof (dsl_dataset_t),
|
||||
offsetof(dsl_dataset_t, ds_synced_link));
|
||||
|
||||
/*
|
||||
* We need to copy dp_space_towrite() before doing
|
||||
@ -376,7 +395,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
* may sync newly-created datasets on pass 2.
|
||||
*/
|
||||
ASSERT(!list_link_active(&ds->ds_synced_link));
|
||||
list_insert_tail(&dp->dp_synced_datasets, ds);
|
||||
list_insert_tail(&synced_datasets, ds);
|
||||
dsl_dataset_sync(ds, zio, tx);
|
||||
}
|
||||
DTRACE_PROBE(pool_sync__1setup);
|
||||
@ -386,15 +405,20 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
ASSERT(err == 0);
|
||||
DTRACE_PROBE(pool_sync__2rootzio);
|
||||
|
||||
for (ds = list_head(&dp->dp_synced_datasets); ds;
|
||||
ds = list_next(&dp->dp_synced_datasets, ds))
|
||||
/*
|
||||
* After the data blocks have been written (ensured by the zio_wait()
|
||||
* above), update the user/group space accounting.
|
||||
*/
|
||||
for (ds = list_head(&synced_datasets); ds;
|
||||
ds = list_next(&synced_datasets, ds))
|
||||
dmu_objset_do_userquota_updates(ds->ds_objset, tx);
|
||||
|
||||
/*
|
||||
* Sync the datasets again to push out the changes due to
|
||||
* userspace updates. This must be done before we process the
|
||||
* sync tasks, because that could cause a snapshot of a dataset
|
||||
* whose ds_bp will be rewritten when we do this 2nd sync.
|
||||
* sync tasks, so that any snapshots will have the correct
|
||||
* user accounting information (and we won't get confused
|
||||
* about which blocks are part of the snapshot).
|
||||
*/
|
||||
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
|
||||
while (ds = txg_list_remove(&dp->dp_dirty_datasets, txg)) {
|
||||
@ -405,30 +429,42 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
err = zio_wait(zio);
|
||||
|
||||
/*
|
||||
* Move dead blocks from the pending deadlist to the on-disk
|
||||
* deadlist.
|
||||
* Now that the datasets have been completely synced, we can
|
||||
* clean up our in-memory structures accumulated while syncing:
|
||||
*
|
||||
* - move dead blocks from the pending deadlist to the on-disk deadlist
|
||||
* - clean up zil records
|
||||
* - release hold from dsl_dataset_dirty()
|
||||
*/
|
||||
for (ds = list_head(&dp->dp_synced_datasets); ds;
|
||||
ds = list_next(&dp->dp_synced_datasets, ds)) {
|
||||
while (ds = list_remove_head(&synced_datasets)) {
|
||||
objset_t *os = ds->ds_objset;
|
||||
bplist_iterate(&ds->ds_pending_deadlist,
|
||||
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
|
||||
ASSERT(!dmu_objset_is_dirty(os, txg));
|
||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||
}
|
||||
|
||||
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg)) {
|
||||
/*
|
||||
* No more sync tasks should have been added while we
|
||||
* were syncing.
|
||||
*/
|
||||
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
|
||||
dsl_sync_task_group_sync(dstg, tx);
|
||||
}
|
||||
DTRACE_PROBE(pool_sync__3task);
|
||||
|
||||
start = gethrtime();
|
||||
while (dd = txg_list_remove(&dp->dp_dirty_dirs, txg))
|
||||
dsl_dir_sync(dd, tx);
|
||||
write_time += gethrtime() - start;
|
||||
|
||||
/*
|
||||
* The MOS's space is accounted for in the pool/$MOS
|
||||
* (dp_mos_dir). We can't modify the mos while we're syncing
|
||||
* it, so we remember the deltas and apply them here.
|
||||
*/
|
||||
if (dp->dp_mos_used_delta != 0 || dp->dp_mos_compressed_delta != 0 ||
|
||||
dp->dp_mos_uncompressed_delta != 0) {
|
||||
dsl_dir_diduse_space(dp->dp_mos_dir, DD_USED_HEAD,
|
||||
dp->dp_mos_used_delta,
|
||||
dp->dp_mos_compressed_delta,
|
||||
dp->dp_mos_uncompressed_delta, tx);
|
||||
dp->dp_mos_used_delta = 0;
|
||||
dp->dp_mos_compressed_delta = 0;
|
||||
dp->dp_mos_uncompressed_delta = 0;
|
||||
}
|
||||
|
||||
start = gethrtime();
|
||||
if (list_head(&mos->os_dirty_dnodes[txg & TXG_MASK]) != NULL ||
|
||||
list_head(&mos->os_free_dnodes[txg & TXG_MASK]) != NULL) {
|
||||
@ -444,6 +480,27 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
hrtime_t, dp->dp_read_overhead);
|
||||
write_time -= dp->dp_read_overhead;
|
||||
|
||||
/*
|
||||
* If we modify a dataset in the same txg that we want to destroy it,
|
||||
* its dsl_dir's dd_dbuf will be dirty, and thus have a hold on it.
|
||||
* dsl_dir_destroy_check() will fail if there are unexpected holds.
|
||||
* Therefore, we want to sync the MOS (thus syncing the dd_dbuf
|
||||
* and clearing the hold on it) before we process the sync_tasks.
|
||||
* The MOS data dirtied by the sync_tasks will be synced on the next
|
||||
* pass.
|
||||
*/
|
||||
DTRACE_PROBE(pool_sync__3task);
|
||||
if (!txg_list_empty(&dp->dp_sync_tasks, txg)) {
|
||||
dsl_sync_task_group_t *dstg;
|
||||
/*
|
||||
* No more sync tasks should have been added while we
|
||||
* were syncing.
|
||||
*/
|
||||
ASSERT(spa_sync_pass(dp->dp_spa) == 1);
|
||||
while (dstg = txg_list_remove(&dp->dp_sync_tasks, txg))
|
||||
dsl_sync_task_group_sync(dstg, tx);
|
||||
}
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
|
||||
dp->dp_space_towrite[txg & TXG_MASK] = 0;
|
||||
@ -492,15 +549,14 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
|
||||
void
|
||||
dsl_pool_sync_done(dsl_pool_t *dp, uint64_t txg)
|
||||
{
|
||||
zilog_t *zilog;
|
||||
dsl_dataset_t *ds;
|
||||
objset_t *os;
|
||||
|
||||
while (ds = list_head(&dp->dp_synced_datasets)) {
|
||||
list_remove(&dp->dp_synced_datasets, ds);
|
||||
os = ds->ds_objset;
|
||||
zil_clean(os->os_zil, txg);
|
||||
ASSERT(!dmu_objset_is_dirty(os, txg));
|
||||
dmu_buf_rele(ds->ds_dbuf, ds);
|
||||
while (zilog = txg_list_remove(&dp->dp_dirty_zilogs, txg)) {
|
||||
ds = dmu_objset_ds(zilog->zl_os);
|
||||
zil_clean(zilog, txg);
|
||||
ASSERT(!dmu_objset_is_dirty(zilog->zl_os, txg));
|
||||
dmu_buf_rele(ds->ds_dbuf, zilog);
|
||||
}
|
||||
ASSERT(!dmu_objset_is_dirty(dp->dp_meta_objset, txg));
|
||||
}
|
||||
|
@ -120,6 +120,8 @@ const zio_taskq_info_t zio_taskqs[ZIO_TYPES][ZIO_TASKQ_TYPES] = {
|
||||
|
||||
static dsl_syncfunc_t spa_sync_version;
|
||||
static dsl_syncfunc_t spa_sync_props;
|
||||
static dsl_checkfunc_t spa_change_guid_check;
|
||||
static dsl_syncfunc_t spa_change_guid_sync;
|
||||
static boolean_t spa_has_active_shared_spare(spa_t *spa);
|
||||
static int spa_load_impl(spa_t *spa, uint64_t, nvlist_t *config,
|
||||
spa_load_state_t state, spa_import_type_t type, boolean_t mosconfig,
|
||||
@ -683,6 +685,56 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
|
||||
}
|
||||
}
|
||||
|
||||
/*ARGSUSED*/
|
||||
static int
|
||||
spa_change_guid_check(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
spa_t *spa = arg1;
|
||||
uint64_t *newguid = arg2;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
uint64_t vdev_state;
|
||||
|
||||
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
|
||||
vdev_state = rvd->vdev_state;
|
||||
spa_config_exit(spa, SCL_STATE, FTAG);
|
||||
|
||||
if (vdev_state != VDEV_STATE_HEALTHY)
|
||||
return (ENXIO);
|
||||
|
||||
ASSERT3U(spa_guid(spa), !=, *newguid);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
spa_change_guid_sync(void *arg1, void *arg2, dmu_tx_t *tx)
|
||||
{
|
||||
spa_t *spa = arg1;
|
||||
uint64_t *newguid = arg2;
|
||||
uint64_t oldguid;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
|
||||
oldguid = spa_guid(spa);
|
||||
|
||||
spa_config_enter(spa, SCL_STATE, FTAG, RW_READER);
|
||||
rvd->vdev_guid = *newguid;
|
||||
rvd->vdev_guid_sum += (*newguid - oldguid);
|
||||
vdev_config_dirty(rvd);
|
||||
spa_config_exit(spa, SCL_STATE, FTAG);
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
/*
|
||||
* TODO: until recent illumos logging changes are merged
|
||||
* log reguid as pool property change
|
||||
*/
|
||||
spa_history_log_internal(LOG_POOL_PROPSET, spa, tx,
|
||||
"guid change old=%llu new=%llu", oldguid, *newguid);
|
||||
#else
|
||||
spa_history_log_internal(spa, "guid change", tx, "old=%lld new=%lld",
|
||||
oldguid, *newguid);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Change the GUID for the pool. This is done so that we can later
|
||||
* re-import a pool built from a clone of our own vdevs. We will modify
|
||||
@ -695,29 +747,23 @@ spa_prop_clear_bootfs(spa_t *spa, uint64_t dsobj, dmu_tx_t *tx)
|
||||
int
|
||||
spa_change_guid(spa_t *spa)
|
||||
{
|
||||
uint64_t oldguid, newguid;
|
||||
uint64_t txg;
|
||||
int error;
|
||||
uint64_t guid;
|
||||
|
||||
if (!(spa_mode_global & FWRITE))
|
||||
return (EROFS);
|
||||
mutex_enter(&spa_namespace_lock);
|
||||
guid = spa_generate_guid(NULL);
|
||||
|
||||
txg = spa_vdev_enter(spa);
|
||||
error = dsl_sync_task_do(spa_get_dsl(spa), spa_change_guid_check,
|
||||
spa_change_guid_sync, spa, &guid, 5);
|
||||
|
||||
if (spa->spa_root_vdev->vdev_state != VDEV_STATE_HEALTHY)
|
||||
return (spa_vdev_exit(spa, NULL, txg, ENXIO));
|
||||
if (error == 0) {
|
||||
spa_config_sync(spa, B_FALSE, B_TRUE);
|
||||
spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID);
|
||||
}
|
||||
|
||||
oldguid = spa_guid(spa);
|
||||
newguid = spa_generate_guid(NULL);
|
||||
ASSERT3U(oldguid, !=, newguid);
|
||||
mutex_exit(&spa_namespace_lock);
|
||||
|
||||
spa->spa_root_vdev->vdev_guid = newguid;
|
||||
spa->spa_root_vdev->vdev_guid_sum += (newguid - oldguid);
|
||||
|
||||
vdev_config_dirty(spa->spa_root_vdev);
|
||||
|
||||
spa_event_notify(spa, NULL, ESC_ZFS_POOL_REGUID);
|
||||
|
||||
return (spa_vdev_exit(spa, NULL, txg, 0));
|
||||
return (error);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -6107,6 +6153,9 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
rvd->vdev_children, txg, B_TRUE);
|
||||
}
|
||||
|
||||
if (error == 0)
|
||||
spa->spa_last_synced_guid = rvd->vdev_guid;
|
||||
|
||||
spa_config_exit(spa, SCL_STATE, FTAG);
|
||||
|
||||
if (error == 0)
|
||||
|
@ -1352,16 +1352,29 @@ spa_name(spa_t *spa)
|
||||
uint64_t
|
||||
spa_guid(spa_t *spa)
|
||||
{
|
||||
dsl_pool_t *dp = spa_get_dsl(spa);
|
||||
uint64_t guid;
|
||||
|
||||
/*
|
||||
* If we fail to parse the config during spa_load(), we can go through
|
||||
* the error path (which posts an ereport) and end up here with no root
|
||||
* vdev. We stash the original pool guid in 'spa_config_guid' to handle
|
||||
* this case.
|
||||
*/
|
||||
if (spa->spa_root_vdev != NULL)
|
||||
if (spa->spa_root_vdev == NULL)
|
||||
return (spa->spa_config_guid);
|
||||
|
||||
guid = spa->spa_last_synced_guid != 0 ?
|
||||
spa->spa_last_synced_guid : spa->spa_root_vdev->vdev_guid;
|
||||
|
||||
/*
|
||||
* Return the most recently synced out guid unless we're
|
||||
* in syncing context.
|
||||
*/
|
||||
if (dp && dsl_pool_sync_context(dp))
|
||||
return (spa->spa_root_vdev->vdev_guid);
|
||||
else
|
||||
return (spa->spa_config_guid);
|
||||
return (guid);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
|
@ -82,7 +82,6 @@ typedef struct dsl_pool {
|
||||
|
||||
/* No lock needed - sync context only */
|
||||
blkptr_t dp_meta_rootbp;
|
||||
list_t dp_synced_datasets;
|
||||
hrtime_t dp_read_overhead;
|
||||
uint64_t dp_throughput; /* bytes per millisec */
|
||||
uint64_t dp_write_limit;
|
||||
@ -96,10 +95,14 @@ typedef struct dsl_pool {
|
||||
kmutex_t dp_lock;
|
||||
uint64_t dp_space_towrite[TXG_SIZE];
|
||||
uint64_t dp_tempreserved[TXG_SIZE];
|
||||
uint64_t dp_mos_used_delta;
|
||||
uint64_t dp_mos_compressed_delta;
|
||||
uint64_t dp_mos_uncompressed_delta;
|
||||
|
||||
/* Has its own locking */
|
||||
tx_state_t dp_tx;
|
||||
txg_list_t dp_dirty_datasets;
|
||||
txg_list_t dp_dirty_zilogs;
|
||||
txg_list_t dp_dirty_dirs;
|
||||
txg_list_t dp_sync_tasks;
|
||||
|
||||
@ -139,6 +142,8 @@ int dsl_read_nolock(zio_t *pio, spa_t *spa, const blkptr_t *bpp,
|
||||
void dsl_pool_create_origin(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_upgrade_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_upgrade_dir_clones(dsl_pool_t *dp, dmu_tx_t *tx);
|
||||
void dsl_pool_mos_diduse_space(dsl_pool_t *dp,
|
||||
int64_t used, int64_t comp, int64_t uncomp);
|
||||
|
||||
taskq_t *dsl_pool_vnrele_taskq(dsl_pool_t *dp);
|
||||
|
||||
|
@ -141,6 +141,7 @@ struct spa {
|
||||
vdev_t *spa_root_vdev; /* top-level vdev container */
|
||||
uint64_t spa_config_guid; /* config pool guid */
|
||||
uint64_t spa_load_guid; /* spa_load initialized guid */
|
||||
uint64_t spa_last_synced_guid; /* last synced guid */
|
||||
list_t spa_config_dirty_list; /* vdevs with dirty config */
|
||||
list_t spa_state_dirty_list; /* vdevs with dirty state */
|
||||
spa_aux_vdev_t spa_spares; /* hot spares */
|
||||
|
@ -22,6 +22,9 @@
|
||||
* Copyright 2010 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#ifndef _SYS_TXG_H
|
||||
#define _SYS_TXG_H
|
||||
@ -115,7 +118,7 @@ extern boolean_t txg_sync_waiting(struct dsl_pool *dp);
|
||||
|
||||
extern void txg_list_create(txg_list_t *tl, size_t offset);
|
||||
extern void txg_list_destroy(txg_list_t *tl);
|
||||
extern int txg_list_empty(txg_list_t *tl, uint64_t txg);
|
||||
extern boolean_t txg_list_empty(txg_list_t *tl, uint64_t txg);
|
||||
extern int txg_list_add(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern int txg_list_add_tail(txg_list_t *tl, void *p, uint64_t txg);
|
||||
extern void *txg_list_remove(txg_list_t *tl, uint64_t txg);
|
||||
|
@ -142,7 +142,7 @@ extern nvlist_t *vdev_config_generate(spa_t *spa, vdev_t *vd,
|
||||
struct uberblock;
|
||||
extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
||||
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd, int label);
|
||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
|
||||
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
|
||||
|
||||
typedef enum {
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -395,6 +396,7 @@ extern void zil_replay(objset_t *os, void *arg,
|
||||
zil_replay_func_t *replay_func[TX_MAX_TYPE]);
|
||||
extern boolean_t zil_replaying(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_destroy(zilog_t *zilog, boolean_t keep_first);
|
||||
extern void zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx);
|
||||
extern void zil_rollback_destroy(zilog_t *zilog, dmu_tx_t *tx);
|
||||
|
||||
extern itx_t *zil_itx_create(uint64_t txtype, size_t lrsize);
|
||||
|
@ -20,6 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -130,6 +131,7 @@ struct zilog {
|
||||
zil_header_t zl_old_header; /* debugging aid */
|
||||
uint_t zl_prev_blks[ZIL_PREV_BLKS]; /* size - sector rounded */
|
||||
uint_t zl_prev_rotor; /* rotor for zl_prev[] */
|
||||
txg_node_t zl_dirty_link; /* protected by dp_dirty_zilogs list */
|
||||
};
|
||||
|
||||
typedef struct zil_bp_node {
|
||||
|
@ -21,6 +21,7 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Portions Copyright 2011 Martin Matuska <mm@FreeBSD.org>
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
@ -596,7 +597,7 @@ txg_list_destroy(txg_list_t *tl)
|
||||
mutex_destroy(&tl->tl_lock);
|
||||
}
|
||||
|
||||
int
|
||||
boolean_t
|
||||
txg_list_empty(txg_list_t *tl, uint64_t txg)
|
||||
{
|
||||
return (tl->tl_head[txg & TXG_MASK] == NULL);
|
||||
|
@ -1328,9 +1328,9 @@ vdev_validate(vdev_t *vd, boolean_t strict)
|
||||
if (vd->vdev_ops->vdev_op_leaf && vdev_readable(vd)) {
|
||||
uint64_t aux_guid = 0;
|
||||
nvlist_t *nvl;
|
||||
uint64_t txg = strict ? spa->spa_config_txg : -1ULL;
|
||||
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) ==
|
||||
NULL) {
|
||||
if ((label = vdev_label_read_config(vd, txg)) == NULL) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_BAD_LABEL);
|
||||
return (0);
|
||||
@ -1512,7 +1512,7 @@ vdev_reopen(vdev_t *vd)
|
||||
!l2arc_vdev_present(vd))
|
||||
l2arc_add_vdev(spa, vd);
|
||||
} else {
|
||||
(void) vdev_validate(vd, B_TRUE);
|
||||
(void) vdev_validate(vd, spa_last_synced_txg(spa));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -1971,7 +1971,7 @@ vdev_validate_aux(vdev_t *vd)
|
||||
if (!vdev_readable(vd))
|
||||
return (0);
|
||||
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL) {
|
||||
if ((label = vdev_label_read_config(vd, -1ULL)) == NULL) {
|
||||
vdev_set_state(vd, B_TRUE, VDEV_STATE_CANT_OPEN,
|
||||
VDEV_AUX_CORRUPT_DATA);
|
||||
return (-1);
|
||||
|
@ -433,17 +433,22 @@ vdev_top_config_generate(spa_t *spa, nvlist_t *config)
|
||||
}
|
||||
|
||||
/*
|
||||
* Returns the configuration from the label of the given vdev. If 'label' is
|
||||
* VDEV_BEST_LABEL, each label of the vdev will be read until a valid
|
||||
* configuration is found; otherwise, only the specified label will be read.
|
||||
* Returns the configuration from the label of the given vdev. For vdevs
|
||||
* which don't have a txg value stored on their label (i.e. spares/cache)
|
||||
* or have not been completely initialized (txg = 0) just return
|
||||
* the configuration from the first valid label we find. Otherwise,
|
||||
* find the most up-to-date label that does not exceed the specified
|
||||
* 'txg' value.
|
||||
*/
|
||||
nvlist_t *
|
||||
vdev_label_read_config(vdev_t *vd, int label)
|
||||
vdev_label_read_config(vdev_t *vd, uint64_t txg)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
nvlist_t *config = NULL;
|
||||
vdev_phys_t *vp;
|
||||
zio_t *zio;
|
||||
uint64_t best_txg = 0;
|
||||
int error = 0;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
@ -456,8 +461,7 @@ vdev_label_read_config(vdev_t *vd, int label)
|
||||
|
||||
retry:
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
if (label >= 0 && label < VDEV_LABELS && label != l)
|
||||
continue;
|
||||
nvlist_t *label = NULL;
|
||||
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
|
||||
@ -467,12 +471,31 @@ vdev_label_read_config(vdev_t *vd, int label)
|
||||
|
||||
if (zio_wait(zio) == 0 &&
|
||||
nvlist_unpack(vp->vp_nvlist, sizeof (vp->vp_nvlist),
|
||||
&config, 0) == 0)
|
||||
break;
|
||||
&label, 0) == 0) {
|
||||
uint64_t label_txg = 0;
|
||||
|
||||
if (config != NULL) {
|
||||
nvlist_free(config);
|
||||
config = NULL;
|
||||
/*
|
||||
* Auxiliary vdevs won't have txg values in their
|
||||
* labels and newly added vdevs may not have been
|
||||
* completely initialized so just return the
|
||||
* configuration from the first valid label we
|
||||
* encounter.
|
||||
*/
|
||||
error = nvlist_lookup_uint64(label,
|
||||
ZPOOL_CONFIG_POOL_TXG, &label_txg);
|
||||
if ((error || label_txg == 0) && !config) {
|
||||
config = label;
|
||||
break;
|
||||
} else if (label_txg <= txg && label_txg > best_txg) {
|
||||
best_txg = label_txg;
|
||||
nvlist_free(config);
|
||||
config = fnvlist_dup(label);
|
||||
}
|
||||
}
|
||||
|
||||
if (label != NULL) {
|
||||
nvlist_free(label);
|
||||
label = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
@ -507,7 +530,7 @@ vdev_inuse(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason,
|
||||
/*
|
||||
* Read the label, if any, and perform some basic sanity checks.
|
||||
*/
|
||||
if ((label = vdev_label_read_config(vd, VDEV_BEST_LABEL)) == NULL)
|
||||
if ((label = vdev_label_read_config(vd, -1ULL)) == NULL)
|
||||
return (B_FALSE);
|
||||
|
||||
(void) nvlist_lookup_uint64(label, ZPOOL_CONFIG_CREATE_TXG,
|
||||
@ -867,7 +890,6 @@ vdev_uberblock_compare(uberblock_t *ub1, uberblock_t *ub2)
|
||||
struct ubl_cbdata {
|
||||
uberblock_t *ubl_ubbest; /* Best uberblock */
|
||||
vdev_t *ubl_vd; /* vdev associated with the above */
|
||||
int ubl_label; /* Label associated with the above */
|
||||
};
|
||||
|
||||
static void
|
||||
@ -886,15 +908,13 @@ vdev_uberblock_load_done(zio_t *zio)
|
||||
if (ub->ub_txg <= spa->spa_load_max_txg &&
|
||||
vdev_uberblock_compare(ub, cbp->ubl_ubbest) > 0) {
|
||||
/*
|
||||
* Keep track of the vdev and label in which this
|
||||
* uberblock was found. We will use this information
|
||||
* later to obtain the config nvlist associated with
|
||||
* Keep track of the vdev in which this uberblock
|
||||
* was found. We will use this information later
|
||||
* to obtain the config nvlist associated with
|
||||
* this uberblock.
|
||||
*/
|
||||
*cbp->ubl_ubbest = *ub;
|
||||
cbp->ubl_vd = vd;
|
||||
cbp->ubl_label = vdev_label_number(vd->vdev_psize,
|
||||
zio->io_offset);
|
||||
}
|
||||
mutex_exit(&rio->io_lock);
|
||||
}
|
||||
@ -926,12 +946,11 @@ vdev_uberblock_load_impl(zio_t *zio, vdev_t *vd, int flags,
|
||||
* Reads the 'best' uberblock from disk along with its associated
|
||||
* configuration. First, we read the uberblock array of each label of each
|
||||
* vdev, keeping track of the uberblock with the highest txg in each array.
|
||||
* Then, we read the configuration from the same label as the best uberblock.
|
||||
* Then, we read the configuration from the same vdev as the best uberblock.
|
||||
*/
|
||||
void
|
||||
vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
|
||||
{
|
||||
int i;
|
||||
zio_t *zio;
|
||||
spa_t *spa = rvd->vdev_spa;
|
||||
struct ubl_cbdata cb;
|
||||
@ -951,13 +970,15 @@ vdev_uberblock_load(vdev_t *rvd, uberblock_t *ub, nvlist_t **config)
|
||||
zio = zio_root(spa, NULL, &cb, flags);
|
||||
vdev_uberblock_load_impl(zio, rvd, flags, &cb);
|
||||
(void) zio_wait(zio);
|
||||
if (cb.ubl_vd != NULL) {
|
||||
for (i = cb.ubl_label % 2; i < VDEV_LABELS; i += 2) {
|
||||
*config = vdev_label_read_config(cb.ubl_vd, i);
|
||||
if (*config != NULL)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* It's possible that the best uberblock was discovered on a label
|
||||
* that has a configuration which was written in a future txg.
|
||||
* Search all labels on this vdev to find the configuration that
|
||||
* matches the txg for our uberblock.
|
||||
*/
|
||||
if (cb.ubl_vd != NULL)
|
||||
*config = vdev_label_read_config(cb.ubl_vd, ub->ub_txg);
|
||||
spa_config_exit(spa, SCL_ALL, FTAG);
|
||||
}
|
||||
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012 by Delphix. All rights reserved.
|
||||
*/
|
||||
|
||||
/* Portions Copyright 2010 Robert Milkowski */
|
||||
@ -461,6 +461,37 @@ zil_alloc_lwb(zilog_t *zilog, blkptr_t *bp, uint64_t txg)
|
||||
return (lwb);
|
||||
}
|
||||
|
||||
/*
|
||||
* Called when we create in-memory log transactions so that we know
|
||||
* to cleanup the itxs at the end of spa_sync().
|
||||
*/
|
||||
void
|
||||
zilog_dirty(zilog_t *zilog, uint64_t txg)
|
||||
{
|
||||
dsl_pool_t *dp = zilog->zl_dmu_pool;
|
||||
dsl_dataset_t *ds = dmu_objset_ds(zilog->zl_os);
|
||||
|
||||
if (dsl_dataset_is_snapshot(ds))
|
||||
panic("dirtying snapshot!");
|
||||
|
||||
if (txg_list_add(&dp->dp_dirty_zilogs, zilog, txg) == 0) {
|
||||
/* up the hold count until we can be written out */
|
||||
dmu_buf_add_ref(ds->ds_dbuf, zilog);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t
|
||||
zilog_is_dirty(zilog_t *zilog)
|
||||
{
|
||||
dsl_pool_t *dp = zilog->zl_dmu_pool;
|
||||
|
||||
for (int t = 0; t < TXG_SIZE; t++) {
|
||||
if (txg_list_member(&dp->dp_dirty_zilogs, zilog, t))
|
||||
return (B_TRUE);
|
||||
}
|
||||
return (B_FALSE);
|
||||
}
|
||||
|
||||
/*
|
||||
* Create an on-disk intent log.
|
||||
*/
|
||||
@ -577,14 +608,21 @@ zil_destroy(zilog_t *zilog, boolean_t keep_first)
|
||||
kmem_cache_free(zil_lwb_cache, lwb);
|
||||
}
|
||||
} else if (!keep_first) {
|
||||
(void) zil_parse(zilog, zil_free_log_block,
|
||||
zil_free_log_record, tx, zh->zh_claim_txg);
|
||||
zil_destroy_sync(zilog, tx);
|
||||
}
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
|
||||
dmu_tx_commit(tx);
|
||||
}
|
||||
|
||||
void
|
||||
zil_destroy_sync(zilog_t *zilog, dmu_tx_t *tx)
|
||||
{
|
||||
ASSERT(list_is_empty(&zilog->zl_lwb_list));
|
||||
(void) zil_parse(zilog, zil_free_log_block,
|
||||
zil_free_log_record, tx, zilog->zl_header->zh_claim_txg);
|
||||
}
|
||||
|
||||
int
|
||||
zil_claim(const char *osname, void *txarg)
|
||||
{
|
||||
@ -998,6 +1036,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
|
||||
return (NULL);
|
||||
|
||||
ASSERT(lwb->lwb_buf != NULL);
|
||||
ASSERT(zilog_is_dirty(zilog) ||
|
||||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
|
||||
|
||||
if (lrc->lrc_txtype == TX_WRITE && itx->itx_wr_state == WR_NEED_COPY)
|
||||
dlen = P2ROUNDUP_TYPED(
|
||||
@ -1218,7 +1258,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
|
||||
if ((itx->itx_lr.lrc_txtype & ~TX_CI) == TX_RENAME)
|
||||
zil_async_to_sync(zilog, itx->itx_oid);
|
||||
|
||||
if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX)
|
||||
if (spa_freeze_txg(zilog->zl_spa) != UINT64_MAX)
|
||||
txg = ZILTEST_TXG;
|
||||
else
|
||||
txg = dmu_tx_get_txg(tx);
|
||||
@ -1269,6 +1309,7 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
itx->itx_lr.lrc_txg = dmu_tx_get_txg(tx);
|
||||
zilog_dirty(zilog, txg);
|
||||
mutex_exit(&itxg->itxg_lock);
|
||||
|
||||
/* Release the old itxs now we've dropped the lock */
|
||||
@ -1278,7 +1319,10 @@ zil_itx_assign(zilog_t *zilog, itx_t *itx, dmu_tx_t *tx)
|
||||
|
||||
/*
|
||||
* If there are any in-memory intent log transactions which have now been
|
||||
* synced then start up a taskq to free them.
|
||||
* synced then start up a taskq to free them. We should only do this after we
|
||||
* have written out the uberblocks (i.e. txg has been comitted) so that
|
||||
* don't inadvertently clean out in-memory log records that would be required
|
||||
* by zil_commit().
|
||||
*/
|
||||
void
|
||||
zil_clean(zilog_t *zilog, uint64_t synced_txg)
|
||||
@ -1746,6 +1790,7 @@ zil_close(zilog_t *zilog)
|
||||
mutex_exit(&zilog->zl_lock);
|
||||
if (txg)
|
||||
txg_wait_synced(zilog->zl_dmu_pool, txg);
|
||||
ASSERT(!zilog_is_dirty(zilog));
|
||||
|
||||
taskq_destroy(zilog->zl_clean_taskq);
|
||||
zilog->zl_clean_taskq = NULL;
|
||||
|
Loading…
Reference in New Issue
Block a user