MFV r354378,r354379,r354386: 10499 Multi-modifier protection (MMP)
10499 Multi-modifier protection (MMP) illumos/illumos-gate@e0f1c0afa4e0f1c0afa4
https://www.illumos.org/issues/10499 Port the following ZFS commits from ZoL to illumos. 379ca9cf2 Multi-modifier protection (MMP) bbffb59ef Fix multihost stale cache file import 0d398b256 Do not initiate MMP writes while pool is suspended 10701 Correct lock ASSERTs in vdev_label_read/write illumos/illumos-gate@58447f688d58447f688d
https://www.illumos.org/issues/10701 Port of ZoL commit: 0091d66f4e Correct lock ASSERTs in vdev_label_read/write At a minimum, this fixes a blown assert during an MMP test run when running on a DEBUG build. 11770 additional mmp fixes illumos/illumos-gate@4348eb90124348eb9012
https://www.illumos.org/issues/11770 Port a few additional MMP fixes from ZoL that came in after our initial MMP port. 4ca457b065 ZTS: Fix mmp_interval failure ca95f70dff zpool import progress kstat (only minimal changes from above can be pulled in right now) 060f0226e6 MMP interval and fail_intervals in uberblock Note from the committer (me). I do not have any use for this feature and I have not tested it. I only did smoke testing with multihost=off. Please be aware. I merged the code only to make future merges easier. Portions contributed by: Jerry Jelinek <jerry.jelinek@joyent.com> Portions contributed by: Tim Chase <tim@chase2k.com> Portions contributed by: sanjeevbagewadi <sanjeev.bagewadi@gmail.com> Portions contributed by: John L. Hammond <john.hammond@intel.com> Portions contributed by: Giuseppe Di Natale <dinatale2@llnl.gov> Portions contributed by: Prakash Surya <surya1@llnl.gov> Portions contributed by: Brian Behlendorf <behlendorf1@llnl.gov> Author: Olaf Faaland <faaland1@llnl.gov> MFC after: 4 weeks
This commit is contained in:
commit
a8c08e008a
@ -24,6 +24,7 @@
|
|||||||
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2014 Integros [integros.com]
|
* Copyright (c) 2014 Integros [integros.com]
|
||||||
* Copyright 2017 Nexenta Systems, Inc.
|
* Copyright 2017 Nexenta Systems, Inc.
|
||||||
|
* Copyright (c) 2017, 2018 Lawrence Livermore National Security, LLC.
|
||||||
* Copyright 2017 RackTop Systems.
|
* Copyright 2017 RackTop Systems.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
@ -2436,6 +2437,26 @@ dump_uberblock(uberblock_t *ub, const char *header, const char *footer)
|
|||||||
(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
|
(void) printf("\tguid_sum = %llu\n", (u_longlong_t)ub->ub_guid_sum);
|
||||||
(void) printf("\ttimestamp = %llu UTC = %s",
|
(void) printf("\ttimestamp = %llu UTC = %s",
|
||||||
(u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
|
(u_longlong_t)ub->ub_timestamp, asctime(localtime(×tamp)));
|
||||||
|
|
||||||
|
(void) printf("\tmmp_magic = %016llx\n",
|
||||||
|
(u_longlong_t)ub->ub_mmp_magic);
|
||||||
|
if (MMP_VALID(ub)) {
|
||||||
|
(void) printf("\tmmp_delay = %0llu\n",
|
||||||
|
(u_longlong_t)ub->ub_mmp_delay);
|
||||||
|
if (MMP_SEQ_VALID(ub))
|
||||||
|
(void) printf("\tmmp_seq = %u\n",
|
||||||
|
(unsigned int) MMP_SEQ(ub));
|
||||||
|
if (MMP_FAIL_INT_VALID(ub))
|
||||||
|
(void) printf("\tmmp_fail = %u\n",
|
||||||
|
(unsigned int) MMP_FAIL_INT(ub));
|
||||||
|
if (MMP_INTERVAL_VALID(ub))
|
||||||
|
(void) printf("\tmmp_write = %u\n",
|
||||||
|
(unsigned int) MMP_INTERVAL(ub));
|
||||||
|
/* After MMP_* to make summarize_uberblock_mmp cleaner */
|
||||||
|
(void) printf("\tmmp_valid = %x\n",
|
||||||
|
(unsigned int) ub->ub_mmp_config & 0xFF);
|
||||||
|
}
|
||||||
|
|
||||||
if (dump_opt['u'] >= 3) {
|
if (dump_opt['u'] >= 3) {
|
||||||
char blkbuf[BP_SPRINTF_LEN];
|
char blkbuf[BP_SPRINTF_LEN];
|
||||||
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
|
snprintf_blkptr(blkbuf, sizeof (blkbuf), &ub->ub_rootbp);
|
||||||
@ -2534,6 +2555,12 @@ dump_label_uberblocks(vdev_label_t *lbl, uint64_t ashift)
|
|||||||
|
|
||||||
if (uberblock_verify(ub))
|
if (uberblock_verify(ub))
|
||||||
continue;
|
continue;
|
||||||
|
|
||||||
|
if ((dump_opt['u'] < 4) &&
|
||||||
|
(ub->ub_mmp_magic == MMP_MAGIC) && ub->ub_mmp_delay &&
|
||||||
|
(i >= VDEV_UBERBLOCK_COUNT(&vd) - MMP_BLOCKS_PER_LABEL))
|
||||||
|
continue;
|
||||||
|
|
||||||
(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
|
(void) snprintf(header, ZDB_MAX_UB_HEADER_SIZE,
|
||||||
"Uberblock[%d]\n", i);
|
"Uberblock[%d]\n", i);
|
||||||
dump_uberblock(ub, header, "");
|
dump_uberblock(ub, header, "");
|
||||||
@ -4173,6 +4200,22 @@ verify_device_removal_feature_counts(spa_t *spa)
|
|||||||
return (ret);
|
return (ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
zdb_set_skip_mmp(char *target)
|
||||||
|
{
|
||||||
|
spa_t *spa;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable the activity check to allow examination of
|
||||||
|
* active pools.
|
||||||
|
*/
|
||||||
|
mutex_enter(&spa_namespace_lock);
|
||||||
|
if ((spa = spa_lookup(target)) != NULL) {
|
||||||
|
spa->spa_import_flags |= ZFS_IMPORT_SKIP_MMP;
|
||||||
|
}
|
||||||
|
mutex_exit(&spa_namespace_lock);
|
||||||
|
}
|
||||||
|
|
||||||
#define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
|
#define BOGUS_SUFFIX "_CHECKPOINTED_UNIVERSE"
|
||||||
/*
|
/*
|
||||||
* Import the checkpointed state of the pool specified by the target
|
* Import the checkpointed state of the pool specified by the target
|
||||||
@ -4207,6 +4250,7 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
|
|||||||
}
|
}
|
||||||
|
|
||||||
if (cfg == NULL) {
|
if (cfg == NULL) {
|
||||||
|
zdb_set_skip_mmp(poolname);
|
||||||
error = spa_get_stats(poolname, &cfg, NULL, 0);
|
error = spa_get_stats(poolname, &cfg, NULL, 0);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
fatal("Tried to read config of pool \"%s\" but "
|
fatal("Tried to read config of pool \"%s\" but "
|
||||||
@ -4219,7 +4263,8 @@ import_checkpointed_state(char *target, nvlist_t *cfg, char **new_path)
|
|||||||
fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
|
fnvlist_add_string(cfg, ZPOOL_CONFIG_POOL_NAME, bogus_name);
|
||||||
|
|
||||||
error = spa_import(bogus_name, cfg, NULL,
|
error = spa_import(bogus_name, cfg, NULL,
|
||||||
ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT);
|
ZFS_IMPORT_MISSING_LOG | ZFS_IMPORT_CHECKPOINT |
|
||||||
|
ZFS_IMPORT_SKIP_MMP);
|
||||||
if (error != 0) {
|
if (error != 0) {
|
||||||
fatal("Tried to import pool \"%s\" but spa_import() failed "
|
fatal("Tried to import pool \"%s\" but spa_import() failed "
|
||||||
"with error %d\n", bogus_name, error);
|
"with error %d\n", bogus_name, error);
|
||||||
@ -5222,90 +5267,6 @@ zdb_embedded_block(char *thing)
|
|||||||
free(buf);
|
free(buf);
|
||||||
}
|
}
|
||||||
|
|
||||||
static boolean_t
|
|
||||||
pool_match(nvlist_t *cfg, char *tgt)
|
|
||||||
{
|
|
||||||
uint64_t v, guid = strtoull(tgt, NULL, 0);
|
|
||||||
char *s;
|
|
||||||
|
|
||||||
if (guid != 0) {
|
|
||||||
if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
|
|
||||||
return (v == guid);
|
|
||||||
} else {
|
|
||||||
if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
|
|
||||||
return (strcmp(s, tgt) == 0);
|
|
||||||
}
|
|
||||||
return (B_FALSE);
|
|
||||||
}
|
|
||||||
|
|
||||||
static char *
|
|
||||||
find_zpool(char **target, nvlist_t **configp, int dirc, char **dirv)
|
|
||||||
{
|
|
||||||
nvlist_t *pools;
|
|
||||||
nvlist_t *match = NULL;
|
|
||||||
char *name = NULL;
|
|
||||||
char *sepp = NULL;
|
|
||||||
char sep = '\0';
|
|
||||||
int count = 0;
|
|
||||||
importargs_t args;
|
|
||||||
|
|
||||||
bzero(&args, sizeof (args));
|
|
||||||
args.paths = dirc;
|
|
||||||
args.path = dirv;
|
|
||||||
args.can_be_active = B_TRUE;
|
|
||||||
|
|
||||||
if ((sepp = strpbrk(*target, "/@")) != NULL) {
|
|
||||||
sep = *sepp;
|
|
||||||
*sepp = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
pools = zpool_search_import(g_zfs, &args);
|
|
||||||
|
|
||||||
if (pools != NULL) {
|
|
||||||
nvpair_t *elem = NULL;
|
|
||||||
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
|
|
||||||
verify(nvpair_value_nvlist(elem, configp) == 0);
|
|
||||||
if (pool_match(*configp, *target)) {
|
|
||||||
count++;
|
|
||||||
if (match != NULL) {
|
|
||||||
/* print previously found config */
|
|
||||||
if (name != NULL) {
|
|
||||||
(void) printf("%s\n", name);
|
|
||||||
dump_nvlist(match, 8);
|
|
||||||
name = NULL;
|
|
||||||
}
|
|
||||||
(void) printf("%s\n",
|
|
||||||
nvpair_name(elem));
|
|
||||||
dump_nvlist(*configp, 8);
|
|
||||||
} else {
|
|
||||||
match = *configp;
|
|
||||||
name = nvpair_name(elem);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (count > 1)
|
|
||||||
(void) fatal("\tMatched %d pools - use pool GUID "
|
|
||||||
"instead of pool name or \n"
|
|
||||||
"\tpool name part of a dataset name to select pool", count);
|
|
||||||
|
|
||||||
if (sepp)
|
|
||||||
*sepp = sep;
|
|
||||||
/*
|
|
||||||
* If pool GUID was specified for pool id, replace it with pool name
|
|
||||||
*/
|
|
||||||
if (name && (strstr(*target, name) != *target)) {
|
|
||||||
int sz = 1 + strlen(name) + ((sepp) ? strlen(sepp) : 0);
|
|
||||||
|
|
||||||
*target = umem_alloc(sz, UMEM_NOFAIL);
|
|
||||||
(void) snprintf(*target, sz, "%s%s", name, sepp ? sepp : "");
|
|
||||||
}
|
|
||||||
|
|
||||||
*configp = name ? match : NULL;
|
|
||||||
|
|
||||||
return (name);
|
|
||||||
}
|
|
||||||
|
|
||||||
int
|
int
|
||||||
main(int argc, char **argv)
|
main(int argc, char **argv)
|
||||||
{
|
{
|
||||||
@ -5318,7 +5279,7 @@ main(int argc, char **argv)
|
|||||||
int error = 0;
|
int error = 0;
|
||||||
char **searchdirs = NULL;
|
char **searchdirs = NULL;
|
||||||
int nsearch = 0;
|
int nsearch = 0;
|
||||||
char *target;
|
char *target, *target_pool;
|
||||||
nvlist_t *policy = NULL;
|
nvlist_t *policy = NULL;
|
||||||
uint64_t max_txg = UINT64_MAX;
|
uint64_t max_txg = UINT64_MAX;
|
||||||
int flags = ZFS_IMPORT_MISSING_LOG;
|
int flags = ZFS_IMPORT_MISSING_LOG;
|
||||||
@ -5526,22 +5487,48 @@ main(int argc, char **argv)
|
|||||||
error = 0;
|
error = 0;
|
||||||
target = argv[0];
|
target = argv[0];
|
||||||
|
|
||||||
if (dump_opt['e']) {
|
if (strpbrk(target, "/@") != NULL) {
|
||||||
char *name = find_zpool(&target, &cfg, nsearch, searchdirs);
|
size_t targetlen;
|
||||||
|
|
||||||
error = ENOENT;
|
target_pool = strdup(target);
|
||||||
if (name) {
|
*strpbrk(target_pool, "/@") = '\0';
|
||||||
if (dump_opt['C'] > 1) {
|
|
||||||
(void) printf("\nConfiguration for import:\n");
|
target_is_spa = B_FALSE;
|
||||||
dump_nvlist(cfg, 8);
|
targetlen = strlen(target);
|
||||||
|
if (targetlen && target[targetlen - 1] == '/')
|
||||||
|
target[targetlen - 1] = '\0';
|
||||||
|
} else {
|
||||||
|
target_pool = target;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (dump_opt['e']) {
|
||||||
|
importargs_t args = { 0 };
|
||||||
|
|
||||||
|
args.paths = nsearch;
|
||||||
|
args.path = searchdirs;
|
||||||
|
args.can_be_active = B_TRUE;
|
||||||
|
|
||||||
|
error = zpool_tryimport(g_zfs, target_pool, &cfg, &args);
|
||||||
|
|
||||||
|
if (error == 0) {
|
||||||
|
|
||||||
if (nvlist_add_nvlist(cfg,
|
if (nvlist_add_nvlist(cfg,
|
||||||
ZPOOL_LOAD_POLICY, policy) != 0) {
|
ZPOOL_LOAD_POLICY, policy) != 0) {
|
||||||
fatal("can't open '%s': %s",
|
fatal("can't open '%s': %s",
|
||||||
target, strerror(ENOMEM));
|
target, strerror(ENOMEM));
|
||||||
}
|
}
|
||||||
error = spa_import(name, cfg, NULL, flags);
|
|
||||||
|
if (dump_opt['C'] > 1) {
|
||||||
|
(void) printf("\nConfiguration for import:\n");
|
||||||
|
dump_nvlist(cfg, 8);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable the activity check to allow examination of
|
||||||
|
* active pools.
|
||||||
|
*/
|
||||||
|
error = spa_import(target_pool, cfg, NULL,
|
||||||
|
flags | ZFS_IMPORT_SKIP_MMP);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -5556,21 +5543,6 @@ main(int argc, char **argv)
|
|||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if (strpbrk(target, "/@") != NULL) {
|
|
||||||
size_t targetlen;
|
|
||||||
|
|
||||||
target_is_spa = B_FALSE;
|
|
||||||
/*
|
|
||||||
* Remove any trailing slash. Later code would get confused
|
|
||||||
* by it, but we want to allow it so that "pool/" can
|
|
||||||
* indicate that we want to dump the topmost filesystem,
|
|
||||||
* rather than the whole pool.
|
|
||||||
*/
|
|
||||||
targetlen = strlen(target);
|
|
||||||
if (targetlen != 0 && target[targetlen - 1] == '/')
|
|
||||||
target[targetlen - 1] = '\0';
|
|
||||||
}
|
|
||||||
|
|
||||||
if (error == 0) {
|
if (error == 0) {
|
||||||
if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
|
if (dump_opt['k'] && (target_is_spa || dump_opt['R'])) {
|
||||||
ASSERT(checkpoint_pool != NULL);
|
ASSERT(checkpoint_pool != NULL);
|
||||||
@ -5584,6 +5556,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
|
|
||||||
} else if (target_is_spa || dump_opt['R']) {
|
} else if (target_is_spa || dump_opt['R']) {
|
||||||
|
zdb_set_skip_mmp(target);
|
||||||
error = spa_open_rewind(target, &spa, FTAG, policy,
|
error = spa_open_rewind(target, &spa, FTAG, policy,
|
||||||
NULL);
|
NULL);
|
||||||
if (error) {
|
if (error) {
|
||||||
@ -5606,6 +5579,7 @@ main(int argc, char **argv)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
|
zdb_set_skip_mmp(target);
|
||||||
error = open_objset(target, DMU_OST_ANY, FTAG, &os);
|
error = open_objset(target, DMU_OST_ANY, FTAG, &os);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -121,16 +121,11 @@ space_delta_cb(dmu_object_type_t bonustype, void *data,
|
|||||||
* Target is the dataset whose pool we want to open.
|
* Target is the dataset whose pool we want to open.
|
||||||
*/
|
*/
|
||||||
static void
|
static void
|
||||||
import_pool(const char *target, boolean_t readonly)
|
zhack_import(char *target, boolean_t readonly)
|
||||||
{
|
{
|
||||||
nvlist_t *config;
|
nvlist_t *config;
|
||||||
nvlist_t *pools;
|
|
||||||
int error;
|
|
||||||
char *sepp;
|
|
||||||
spa_t *spa;
|
|
||||||
nvpair_t *elem;
|
|
||||||
nvlist_t *props;
|
nvlist_t *props;
|
||||||
const char *name;
|
int error;
|
||||||
|
|
||||||
kernel_init(readonly ? FREAD : (FREAD | FWRITE));
|
kernel_init(readonly ? FREAD : (FREAD | FWRITE));
|
||||||
g_zfs = libzfs_init();
|
g_zfs = libzfs_init();
|
||||||
@ -139,68 +134,40 @@ import_pool(const char *target, boolean_t readonly)
|
|||||||
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
|
dmu_objset_register_type(DMU_OST_ZFS, space_delta_cb);
|
||||||
|
|
||||||
g_readonly = readonly;
|
g_readonly = readonly;
|
||||||
|
|
||||||
/*
|
|
||||||
* If we only want readonly access, it's OK if we find
|
|
||||||
* a potentially-active (ie, imported into the kernel) pool from the
|
|
||||||
* default cachefile.
|
|
||||||
*/
|
|
||||||
if (readonly && spa_open(target, &spa, FTAG) == 0) {
|
|
||||||
spa_close(spa, FTAG);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
g_importargs.unique = B_TRUE;
|
g_importargs.unique = B_TRUE;
|
||||||
g_importargs.can_be_active = readonly;
|
g_importargs.can_be_active = readonly;
|
||||||
g_pool = strdup(target);
|
g_pool = strdup(target);
|
||||||
if ((sepp = strpbrk(g_pool, "/@")) != NULL)
|
|
||||||
*sepp = '\0';
|
|
||||||
g_importargs.poolname = g_pool;
|
|
||||||
pools = zpool_search_import(g_zfs, &g_importargs);
|
|
||||||
|
|
||||||
if (nvlist_empty(pools)) {
|
error = zpool_tryimport(g_zfs, target, &config, &g_importargs);
|
||||||
if (!g_importargs.can_be_active) {
|
if (error)
|
||||||
g_importargs.can_be_active = B_TRUE;
|
fatal(NULL, FTAG, "cannot import '%s': %s", target,
|
||||||
if (zpool_search_import(g_zfs, &g_importargs) != NULL ||
|
libzfs_error_description(g_zfs));
|
||||||
spa_open(target, &spa, FTAG) == 0) {
|
|
||||||
fatal(spa, FTAG, "cannot import '%s': pool is "
|
|
||||||
"active; run " "\"zpool export %s\" "
|
|
||||||
"first\n", g_pool, g_pool);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
fatal(NULL, FTAG, "cannot import '%s': no such pool "
|
|
||||||
"available\n", g_pool);
|
|
||||||
}
|
|
||||||
|
|
||||||
elem = nvlist_next_nvpair(pools, NULL);
|
|
||||||
name = nvpair_name(elem);
|
|
||||||
verify(nvpair_value_nvlist(elem, &config) == 0);
|
|
||||||
|
|
||||||
props = NULL;
|
props = NULL;
|
||||||
if (readonly) {
|
if (readonly) {
|
||||||
verify(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
|
VERIFY(nvlist_alloc(&props, NV_UNIQUE_NAME, 0) == 0);
|
||||||
verify(nvlist_add_uint64(props,
|
VERIFY(nvlist_add_uint64(props,
|
||||||
zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
|
zpool_prop_to_name(ZPOOL_PROP_READONLY), 1) == 0);
|
||||||
}
|
}
|
||||||
|
|
||||||
zfeature_checks_disable = B_TRUE;
|
zfeature_checks_disable = B_TRUE;
|
||||||
error = spa_import(name, config, props, ZFS_IMPORT_NORMAL);
|
error = spa_import(target, config, props,
|
||||||
|
(readonly ? ZFS_IMPORT_SKIP_MMP : ZFS_IMPORT_NORMAL));
|
||||||
zfeature_checks_disable = B_FALSE;
|
zfeature_checks_disable = B_FALSE;
|
||||||
if (error == EEXIST)
|
if (error == EEXIST)
|
||||||
error = 0;
|
error = 0;
|
||||||
|
|
||||||
if (error)
|
if (error)
|
||||||
fatal(NULL, FTAG, "can't import '%s': %s", name,
|
fatal(NULL, FTAG, "can't import '%s': %s", target,
|
||||||
strerror(error));
|
strerror(error));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
static void
|
||||||
zhack_spa_open(const char *target, boolean_t readonly, void *tag, spa_t **spa)
|
zhack_spa_open(char *target, boolean_t readonly, void *tag, spa_t **spa)
|
||||||
{
|
{
|
||||||
int err;
|
int err;
|
||||||
|
|
||||||
import_pool(target, readonly);
|
zhack_import(target, readonly);
|
||||||
|
|
||||||
zfeature_checks_disable = B_TRUE;
|
zfeature_checks_disable = B_TRUE;
|
||||||
err = spa_open(target, spa, tag);
|
err = spa_open(target, spa, tag);
|
||||||
|
@ -481,6 +481,11 @@ If a pool has a shared spare that is currently being used, the pool can not be
|
|||||||
exported since other pools may use this shared spare, which may lead to
|
exported since other pools may use this shared spare, which may lead to
|
||||||
potential data corruption.
|
potential data corruption.
|
||||||
.Pp
|
.Pp
|
||||||
|
Shared spares add some risk.
|
||||||
|
If the pools are imported on different hosts, and both pools suffer a device
|
||||||
|
failure at the same time, both could attempt to use the spare at the same time.
|
||||||
|
This may not be detected, resulting in data corruption.
|
||||||
|
.Pp
|
||||||
An in-progress spare replacement can be cancelled by detaching the hot spare.
|
An in-progress spare replacement can be cancelled by detaching the hot spare.
|
||||||
If the original faulted device is detached, then the hot spare assumes its
|
If the original faulted device is detached, then the hot spare assumes its
|
||||||
place in the configuration, and is removed from the spare list of all active
|
place in the configuration, and is removed from the spare list of all active
|
||||||
@ -806,7 +811,7 @@ to the enabled state.
|
|||||||
See
|
See
|
||||||
.Xr zpool-features 7
|
.Xr zpool-features 7
|
||||||
for details on feature states.
|
for details on feature states.
|
||||||
.It Sy listsnaps Ns = Ns Cm on No | Cm off
|
.It Sy listsnapshots Ns = Ns Cm on No | Cm off
|
||||||
Controls whether information about snapshots associated with this pool is
|
Controls whether information about snapshots associated with this pool is
|
||||||
output when
|
output when
|
||||||
.Qq Nm zfs Cm list
|
.Qq Nm zfs Cm list
|
||||||
@ -814,6 +819,31 @@ is run without the
|
|||||||
.Fl t
|
.Fl t
|
||||||
option. The default value is
|
option. The default value is
|
||||||
.Cm off .
|
.Cm off .
|
||||||
|
This property can also be referred to by its shortened name,
|
||||||
|
.Sy listsnaps .
|
||||||
|
.It Sy multihost Ns = Ns Sy on No | Sy off
|
||||||
|
Controls whether a pool activity check should be performed during
|
||||||
|
.Nm zpool Cm import .
|
||||||
|
When a pool is determined to be active it cannot be imported, even with the
|
||||||
|
.Fl f
|
||||||
|
option.
|
||||||
|
This property is intended to be used in failover configurations
|
||||||
|
where multiple hosts have access to a pool on shared storage.
|
||||||
|
.sp
|
||||||
|
Multihost provides protection on import only.
|
||||||
|
It does not protect against an
|
||||||
|
individual device being used in multiple pools, regardless of the type of vdev.
|
||||||
|
See the discussion under
|
||||||
|
.Sy zpool create.
|
||||||
|
.sp
|
||||||
|
When this property is on, periodic writes to storage occur to show the pool is
|
||||||
|
in use.
|
||||||
|
See
|
||||||
|
.Sy vfs.zfs.multihost_interval
|
||||||
|
sysctl.
|
||||||
|
In order to enable this property each host must set a unique hostid.
|
||||||
|
The default value is
|
||||||
|
.Sy off .
|
||||||
.It Sy version Ns = Ns Ar version
|
.It Sy version Ns = Ns Ar version
|
||||||
The current on-disk version of the pool. This can be increased, but never
|
The current on-disk version of the pool. This can be increased, but never
|
||||||
decreased. The preferred method of updating pools is with the
|
decreased. The preferred method of updating pools is with the
|
||||||
@ -958,8 +988,22 @@ discarded transactions is irretrievably lost.
|
|||||||
Used in combination with the
|
Used in combination with the
|
||||||
.Fl F
|
.Fl F
|
||||||
flag. Check whether discarding transactions would make the pool openable, but
|
flag. Check whether discarding transactions would make the pool openable, but
|
||||||
|
<<<<<<<
|
||||||
do not actually discard any transactions.
|
do not actually discard any transactions.
|
||||||
.El
|
.El
|
||||||
|
|||||||
|
||||||
|
If no arguments are specified, all device errors within the pool are cleared.
|
||||||
|
If one or more devices is specified, only those errors associated with the
|
||||||
|
specified device or devices are cleared.
|
||||||
|
=======
|
||||||
|
If no arguments are specified, all device errors within the pool are cleared.
|
||||||
|
If one or more devices is specified, only those errors associated with the
|
||||||
|
specified device or devices are cleared.
|
||||||
|
If multihost is enabled, and the pool has been suspended, this will not
|
||||||
|
resume I/O.
|
||||||
|
While the pool was suspended, it may have been imported on
|
||||||
|
another host, and resuming I/O could result in pool damage.
|
||||||
|
>>>>>>>
|
||||||
.It Xo
|
.It Xo
|
||||||
.Nm
|
.Nm
|
||||||
.Cm create
|
.Cm create
|
||||||
@ -984,7 +1028,37 @@ specification is described in the
|
|||||||
.Qq Sx Virtual Devices
|
.Qq Sx Virtual Devices
|
||||||
section.
|
section.
|
||||||
.Pp
|
.Pp
|
||||||
|
<<<<<<<
|
||||||
The command verifies that each device specified is accessible and not currently
|
The command verifies that each device specified is accessible and not currently
|
||||||
|
|||||||
|
||||||
|
The command verifies that each device specified is accessible and not currently
|
||||||
|
in use by another subsystem.
|
||||||
|
There are some uses, such as being currently mounted, or specified as the
|
||||||
|
dedicated dump device, that prevents a device from ever being used by ZFS.
|
||||||
|
Other uses, such as having a preexisting UFS file system, can be overridden with
|
||||||
|
=======
|
||||||
|
The command attempts to verify that each device specified is accessible and not
|
||||||
|
currently in use by another subsystem.
|
||||||
|
However this check is not robust enough
|
||||||
|
to detect simultaneous attempts to use a new device in different pools, even if
|
||||||
|
.Sy multihost
|
||||||
|
is
|
||||||
|
.Sy enabled.
|
||||||
|
The
|
||||||
|
administrator must ensure that simultaneous invocations of any combination of
|
||||||
|
.Sy zpool replace ,
|
||||||
|
.Sy zpool create ,
|
||||||
|
.Sy zpool add ,
|
||||||
|
or
|
||||||
|
.Sy zpool labelclear ,
|
||||||
|
do not refer to the same device.
|
||||||
|
Using the same device in two pools will
|
||||||
|
result in pool corruption.
|
||||||
|
.sp
|
||||||
|
There are some uses, such as being currently mounted, or specified as the
|
||||||
|
dedicated dump device, that prevents a device from ever being used by ZFS.
|
||||||
|
Other uses, such as having a preexisting UFS file system, can be overridden with
|
||||||
|
>>>>>>>
|
||||||
in use by another subsystem. There are some uses, such as being currently
|
in use by another subsystem. There are some uses, such as being currently
|
||||||
mounted, or specified as the dedicated dump device, that prevents a device from
|
mounted, or specified as the dedicated dump device, that prevents a device from
|
||||||
ever being used by
|
ever being used by
|
||||||
|
@ -53,6 +53,7 @@
|
|||||||
#include <zfs_prop.h>
|
#include <zfs_prop.h>
|
||||||
#include <sys/fs/zfs.h>
|
#include <sys/fs/zfs.h>
|
||||||
#include <sys/stat.h>
|
#include <sys/stat.h>
|
||||||
|
#include <sys/debug.h>
|
||||||
|
|
||||||
#include <libzfs.h>
|
#include <libzfs.h>
|
||||||
|
|
||||||
@ -1635,6 +1636,10 @@ print_status_config(zpool_handle_t *zhp, const char *name, nvlist_t *nv,
|
|||||||
(void) printf(gettext("split into new pool"));
|
(void) printf(gettext("split into new pool"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VDEV_AUX_ACTIVE:
|
||||||
|
(void) printf(gettext("currently in use"));
|
||||||
|
break;
|
||||||
|
|
||||||
case VDEV_AUX_CHILDREN_OFFLINE:
|
case VDEV_AUX_CHILDREN_OFFLINE:
|
||||||
(void) printf(gettext("all children offline"));
|
(void) printf(gettext("all children offline"));
|
||||||
break;
|
break;
|
||||||
@ -1769,6 +1774,10 @@ print_import_config(const char *name, nvlist_t *nv, int namewidth, int depth)
|
|||||||
(void) printf(gettext("too many errors"));
|
(void) printf(gettext("too many errors"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case VDEV_AUX_ACTIVE:
|
||||||
|
(void) printf(gettext("currently in use"));
|
||||||
|
break;
|
||||||
|
|
||||||
case VDEV_AUX_CHILDREN_OFFLINE:
|
case VDEV_AUX_CHILDREN_OFFLINE:
|
||||||
(void) printf(gettext("all children offline"));
|
(void) printf(gettext("all children offline"));
|
||||||
break;
|
break;
|
||||||
@ -1866,8 +1875,10 @@ show_import(nvlist_t *config)
|
|||||||
vdev_stat_t *vs;
|
vdev_stat_t *vs;
|
||||||
char *name;
|
char *name;
|
||||||
uint64_t guid;
|
uint64_t guid;
|
||||||
|
uint64_t hostid = 0;
|
||||||
char *msgid;
|
char *msgid;
|
||||||
nvlist_t *nvroot;
|
char *hostname = "unknown";
|
||||||
|
nvlist_t *nvroot, *nvinfo;
|
||||||
int reason;
|
int reason;
|
||||||
const char *health;
|
const char *health;
|
||||||
uint_t vsc;
|
uint_t vsc;
|
||||||
@ -1954,6 +1965,17 @@ show_import(nvlist_t *config)
|
|||||||
zpool_print_unsup_feat(config);
|
zpool_print_unsup_feat(config);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_STATUS_HOSTID_ACTIVE:
|
||||||
|
(void) printf(gettext(" status: The pool is currently "
|
||||||
|
"imported by another system.\n"));
|
||||||
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_STATUS_HOSTID_REQUIRED:
|
||||||
|
(void) printf(gettext(" status: The pool has the "
|
||||||
|
"multihost property on. It cannot\n\tbe safely imported "
|
||||||
|
"when the system hostid is not set.\n"));
|
||||||
|
break;
|
||||||
|
|
||||||
case ZPOOL_STATUS_HOSTID_MISMATCH:
|
case ZPOOL_STATUS_HOSTID_MISMATCH:
|
||||||
(void) printf(gettext(" status: The pool was last accessed by "
|
(void) printf(gettext(" status: The pool was last accessed by "
|
||||||
"another system.\n"));
|
"another system.\n"));
|
||||||
@ -2040,6 +2062,27 @@ show_import(nvlist_t *config)
|
|||||||
"imported. Attach the missing\n\tdevices and try "
|
"imported. Attach the missing\n\tdevices and try "
|
||||||
"again.\n"));
|
"again.\n"));
|
||||||
break;
|
break;
|
||||||
|
case ZPOOL_STATUS_HOSTID_ACTIVE:
|
||||||
|
VERIFY0(nvlist_lookup_nvlist(config,
|
||||||
|
ZPOOL_CONFIG_LOAD_INFO, &nvinfo));
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME))
|
||||||
|
hostname = fnvlist_lookup_string(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTNAME);
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID))
|
||||||
|
hostid = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTID);
|
||||||
|
|
||||||
|
(void) printf(gettext(" action: The pool must be "
|
||||||
|
"exported from %s (hostid=%lx)\n\tbefore it "
|
||||||
|
"can be safely imported.\n"), hostname,
|
||||||
|
(unsigned long) hostid);
|
||||||
|
break;
|
||||||
|
case ZPOOL_STATUS_HOSTID_REQUIRED:
|
||||||
|
(void) printf(gettext(" action: Check the SMF "
|
||||||
|
"svc:/system/hostid service.\n"));
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
(void) printf(gettext(" action: The pool cannot be "
|
(void) printf(gettext(" action: The pool cannot be "
|
||||||
"imported due to damaged devices or data.\n"));
|
"imported due to damaged devices or data.\n"));
|
||||||
@ -2087,6 +2130,31 @@ show_import(nvlist_t *config)
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static boolean_t
|
||||||
|
zfs_force_import_required(nvlist_t *config)
|
||||||
|
{
|
||||||
|
uint64_t state;
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
nvlist_t *nvinfo;
|
||||||
|
|
||||||
|
state = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE);
|
||||||
|
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||||
|
|
||||||
|
if (state != POOL_STATE_EXPORTED && hostid != get_system_hostid())
|
||||||
|
return (B_TRUE);
|
||||||
|
|
||||||
|
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE)) {
|
||||||
|
mmp_state_t mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE);
|
||||||
|
|
||||||
|
if (mmp_state != MMP_STATE_INACTIVE)
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Perform the import for the given configuration. This passes the heavy
|
* Perform the import for the given configuration. This passes the heavy
|
||||||
* lifting off to zpool_import_props(), and then mounts the datasets contained
|
* lifting off to zpool_import_props(), and then mounts the datasets contained
|
||||||
@ -2098,53 +2166,73 @@ do_import(nvlist_t *config, const char *newname, const char *mntopts,
|
|||||||
{
|
{
|
||||||
zpool_handle_t *zhp;
|
zpool_handle_t *zhp;
|
||||||
char *name;
|
char *name;
|
||||||
uint64_t state;
|
|
||||||
uint64_t version;
|
uint64_t version;
|
||||||
|
|
||||||
verify(nvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME,
|
name = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
|
||||||
&name) == 0);
|
version = fnvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION);
|
||||||
|
|
||||||
verify(nvlist_lookup_uint64(config,
|
|
||||||
ZPOOL_CONFIG_POOL_STATE, &state) == 0);
|
|
||||||
verify(nvlist_lookup_uint64(config,
|
|
||||||
ZPOOL_CONFIG_VERSION, &version) == 0);
|
|
||||||
if (!SPA_VERSION_IS_SUPPORTED(version)) {
|
if (!SPA_VERSION_IS_SUPPORTED(version)) {
|
||||||
(void) fprintf(stderr, gettext("cannot import '%s': pool "
|
(void) fprintf(stderr, gettext("cannot import '%s': pool "
|
||||||
"is formatted using an unsupported ZFS version\n"), name);
|
"is formatted using an unsupported ZFS version\n"), name);
|
||||||
return (1);
|
return (1);
|
||||||
} else if (state != POOL_STATE_EXPORTED &&
|
} else if (zfs_force_import_required(config) &&
|
||||||
!(flags & ZFS_IMPORT_ANY_HOST)) {
|
!(flags & ZFS_IMPORT_ANY_HOST)) {
|
||||||
uint64_t hostid;
|
mmp_state_t mmp_state = MMP_STATE_INACTIVE;
|
||||||
|
nvlist_t *nvinfo;
|
||||||
|
|
||||||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID,
|
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||||
&hostid) == 0) {
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_STATE))
|
||||||
if ((unsigned long)hostid != gethostid()) {
|
mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||||
char *hostname;
|
ZPOOL_CONFIG_MMP_STATE);
|
||||||
uint64_t timestamp;
|
|
||||||
time_t t;
|
if (mmp_state == MMP_STATE_ACTIVE) {
|
||||||
|
char *hostname = "<unknown>";
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTNAME))
|
||||||
|
hostname = fnvlist_lookup_string(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTNAME);
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_HOSTID))
|
||||||
|
hostid = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTID);
|
||||||
|
|
||||||
verify(nvlist_lookup_string(config,
|
|
||||||
ZPOOL_CONFIG_HOSTNAME, &hostname) == 0);
|
|
||||||
verify(nvlist_lookup_uint64(config,
|
|
||||||
ZPOOL_CONFIG_TIMESTAMP, ×tamp) == 0);
|
|
||||||
t = timestamp;
|
|
||||||
(void) fprintf(stderr, gettext("cannot import "
|
|
||||||
"'%s': pool may be in use from other "
|
|
||||||
"system, it was last accessed by %s "
|
|
||||||
"(hostid: 0x%lx) on %s"), name, hostname,
|
|
||||||
(unsigned long)hostid,
|
|
||||||
asctime(localtime(&t)));
|
|
||||||
(void) fprintf(stderr, gettext("use '-f' to "
|
|
||||||
"import anyway\n"));
|
|
||||||
return (1);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
(void) fprintf(stderr, gettext("cannot import '%s': "
|
(void) fprintf(stderr, gettext("cannot import '%s': "
|
||||||
"pool may be in use from other system\n"), name);
|
"pool is imported on %s (hostid: "
|
||||||
(void) fprintf(stderr, gettext("use '-f' to import "
|
"0x%lx)\nExport the pool on the other system, "
|
||||||
"anyway\n"));
|
"then run 'zpool import'.\n"),
|
||||||
return (1);
|
name, hostname, (unsigned long) hostid);
|
||||||
|
} else if (mmp_state == MMP_STATE_NO_HOSTID) {
|
||||||
|
(void) fprintf(stderr, gettext("Cannot import '%s': "
|
||||||
|
"pool has the multihost property on and the\n"
|
||||||
|
"system's hostid is not set.\n"), name);
|
||||||
|
} else {
|
||||||
|
char *hostname = "<unknown>";
|
||||||
|
uint64_t timestamp = 0;
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
|
||||||
|
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTNAME))
|
||||||
|
hostname = fnvlist_lookup_string(config,
|
||||||
|
ZPOOL_CONFIG_HOSTNAME);
|
||||||
|
|
||||||
|
if (nvlist_exists(config, ZPOOL_CONFIG_TIMESTAMP))
|
||||||
|
timestamp = fnvlist_lookup_uint64(config,
|
||||||
|
ZPOOL_CONFIG_TIMESTAMP);
|
||||||
|
|
||||||
|
if (nvlist_exists(config, ZPOOL_CONFIG_HOSTID))
|
||||||
|
hostid = fnvlist_lookup_uint64(config,
|
||||||
|
ZPOOL_CONFIG_HOSTID);
|
||||||
|
|
||||||
|
(void) fprintf(stderr, gettext("cannot import '%s': "
|
||||||
|
"pool was previously in use from another system.\n"
|
||||||
|
"Last accessed by %s (hostid=%lx) at %s"
|
||||||
|
"The pool can be imported, use 'zpool import -f' "
|
||||||
|
"to import the pool.\n"), name, hostname,
|
||||||
|
(unsigned long)hostid, ctime((time_t *)×tamp));
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return (1);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
|
if (zpool_import_props(g_zfs, config, newname, props, flags) != 0)
|
||||||
@ -5108,6 +5196,15 @@ status_callback(zpool_handle_t *zhp, void *data)
|
|||||||
"to be recovered.\n"));
|
"to be recovered.\n"));
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_STATUS_IO_FAILURE_MMP:
|
||||||
|
(void) printf(gettext("status: The pool is suspended because "
|
||||||
|
"multihost writes failed or were delayed;\n\tanother "
|
||||||
|
"system could import the pool undetected.\n"));
|
||||||
|
(void) printf(gettext("action: Make sure the pool's devices "
|
||||||
|
"are connected, then reboot your system and\n\timport the "
|
||||||
|
"pool.\n"));
|
||||||
|
break;
|
||||||
|
|
||||||
case ZPOOL_STATUS_IO_FAILURE_WAIT:
|
case ZPOOL_STATUS_IO_FAILURE_WAIT:
|
||||||
case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
|
case ZPOOL_STATUS_IO_FAILURE_CONTINUE:
|
||||||
(void) printf(gettext("status: One or more devices are "
|
(void) printf(gettext("status: One or more devices are "
|
||||||
|
@ -128,6 +128,7 @@
|
|||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <sys/fs/zfs.h>
|
#include <sys/fs/zfs.h>
|
||||||
#include <libnvpair.h>
|
#include <libnvpair.h>
|
||||||
|
#include <libzfs.h>
|
||||||
#include <libcmdutils.h>
|
#include <libcmdutils.h>
|
||||||
|
|
||||||
static int ztest_fd_data = -1;
|
static int ztest_fd_data = -1;
|
||||||
@ -166,6 +167,7 @@ typedef struct ztest_shared_opts {
|
|||||||
uint64_t zo_time;
|
uint64_t zo_time;
|
||||||
uint64_t zo_maxloops;
|
uint64_t zo_maxloops;
|
||||||
uint64_t zo_metaslab_force_ganging;
|
uint64_t zo_metaslab_force_ganging;
|
||||||
|
int zo_mmp_test;
|
||||||
} ztest_shared_opts_t;
|
} ztest_shared_opts_t;
|
||||||
|
|
||||||
static const ztest_shared_opts_t ztest_opts_defaults = {
|
static const ztest_shared_opts_t ztest_opts_defaults = {
|
||||||
@ -184,6 +186,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = {
|
|||||||
.zo_passtime = 60, /* 60 seconds */
|
.zo_passtime = 60, /* 60 seconds */
|
||||||
.zo_killrate = 70, /* 70% kill rate */
|
.zo_killrate = 70, /* 70% kill rate */
|
||||||
.zo_verbose = 0,
|
.zo_verbose = 0,
|
||||||
|
.zo_mmp_test = 0,
|
||||||
.zo_init = 1,
|
.zo_init = 1,
|
||||||
.zo_time = 300, /* 5 minutes */
|
.zo_time = 300, /* 5 minutes */
|
||||||
.zo_maxloops = 50, /* max loops during spa_freeze() */
|
.zo_maxloops = 50, /* max loops during spa_freeze() */
|
||||||
@ -343,6 +346,7 @@ ztest_func_t ztest_spa_create_destroy;
|
|||||||
ztest_func_t ztest_fault_inject;
|
ztest_func_t ztest_fault_inject;
|
||||||
ztest_func_t ztest_ddt_repair;
|
ztest_func_t ztest_ddt_repair;
|
||||||
ztest_func_t ztest_dmu_snapshot_hold;
|
ztest_func_t ztest_dmu_snapshot_hold;
|
||||||
|
ztest_func_t ztest_mmp_enable_disable;
|
||||||
ztest_func_t ztest_scrub;
|
ztest_func_t ztest_scrub;
|
||||||
ztest_func_t ztest_dsl_dataset_promote_busy;
|
ztest_func_t ztest_dsl_dataset_promote_busy;
|
||||||
ztest_func_t ztest_vdev_attach_detach;
|
ztest_func_t ztest_vdev_attach_detach;
|
||||||
@ -388,6 +392,7 @@ ztest_info_t ztest_info[] = {
|
|||||||
{ ztest_fault_inject, 1, &zopt_incessant },
|
{ ztest_fault_inject, 1, &zopt_incessant },
|
||||||
{ ztest_ddt_repair, 1, &zopt_sometimes },
|
{ ztest_ddt_repair, 1, &zopt_sometimes },
|
||||||
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
|
{ ztest_dmu_snapshot_hold, 1, &zopt_sometimes },
|
||||||
|
{ ztest_mmp_enable_disable, 1, &zopt_sometimes },
|
||||||
{ ztest_reguid, 1, &zopt_rarely },
|
{ ztest_reguid, 1, &zopt_rarely },
|
||||||
{ ztest_scrub, 1, &zopt_often },
|
{ ztest_scrub, 1, &zopt_often },
|
||||||
{ ztest_spa_upgrade, 1, &zopt_rarely },
|
{ ztest_spa_upgrade, 1, &zopt_rarely },
|
||||||
@ -601,6 +606,7 @@ usage(boolean_t requested)
|
|||||||
"\t[-k kill_percentage (default: %llu%%)]\n"
|
"\t[-k kill_percentage (default: %llu%%)]\n"
|
||||||
"\t[-p pool_name (default: %s)]\n"
|
"\t[-p pool_name (default: %s)]\n"
|
||||||
"\t[-f dir (default: %s)] file directory for vdev files\n"
|
"\t[-f dir (default: %s)] file directory for vdev files\n"
|
||||||
|
"\t[-M] Multi-host simulate pool imported on remote host\n"
|
||||||
"\t[-V] verbose (use multiple times for ever more blather)\n"
|
"\t[-V] verbose (use multiple times for ever more blather)\n"
|
||||||
"\t[-E] use existing pool instead of creating new one\n"
|
"\t[-E] use existing pool instead of creating new one\n"
|
||||||
"\t[-T time (default: %llu sec)] total run time\n"
|
"\t[-T time (default: %llu sec)] total run time\n"
|
||||||
@ -644,7 +650,7 @@ process_options(int argc, char **argv)
|
|||||||
bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
|
bcopy(&ztest_opts_defaults, zo, sizeof (*zo));
|
||||||
|
|
||||||
while ((opt = getopt(argc, argv,
|
while ((opt = getopt(argc, argv,
|
||||||
"v:s:a:m:r:R:d:t:g:i:k:p:f:VET:P:hF:B:o:")) != EOF) {
|
"v:s:a:m:r:R:d:t:g:i:k:p:f:MVET:P:hF:B:o:")) != EOF) {
|
||||||
value = 0;
|
value = 0;
|
||||||
switch (opt) {
|
switch (opt) {
|
||||||
case 'v':
|
case 'v':
|
||||||
@ -713,6 +719,9 @@ process_options(int argc, char **argv)
|
|||||||
sizeof (zo->zo_dir));
|
sizeof (zo->zo_dir));
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case 'M':
|
||||||
|
zo->zo_mmp_test = 1;
|
||||||
|
break;
|
||||||
case 'V':
|
case 'V':
|
||||||
zo->zo_verbose++;
|
zo->zo_verbose++;
|
||||||
break;
|
break;
|
||||||
@ -2480,6 +2489,9 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
|
|||||||
spa_t *spa;
|
spa_t *spa;
|
||||||
nvlist_t *nvroot;
|
nvlist_t *nvroot;
|
||||||
|
|
||||||
|
if (zo->zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Attempt to create using a bad file.
|
* Attempt to create using a bad file.
|
||||||
*/
|
*/
|
||||||
@ -2511,6 +2523,56 @@ ztest_spa_create_destroy(ztest_ds_t *zd, uint64_t id)
|
|||||||
rw_exit(&ztest_name_lock);
|
rw_exit(&ztest_name_lock);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Start and then stop the MMP threads to ensure the startup and shutdown code
|
||||||
|
* works properly. Actual protection and property-related code tested via ZTS.
|
||||||
|
*/
|
||||||
|
/* ARGSUSED */
|
||||||
|
void
|
||||||
|
ztest_mmp_enable_disable(ztest_ds_t *zd, uint64_t id)
|
||||||
|
{
|
||||||
|
ztest_shared_opts_t *zo = &ztest_opts;
|
||||||
|
spa_t *spa = ztest_spa;
|
||||||
|
|
||||||
|
if (zo->zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Since enabling MMP involves setting a property, it could not be done
|
||||||
|
* while the pool is suspended.
|
||||||
|
*/
|
||||||
|
if (spa_suspended(spa))
|
||||||
|
return;
|
||||||
|
|
||||||
|
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||||
|
mutex_enter(&spa->spa_props_lock);
|
||||||
|
|
||||||
|
zfs_multihost_fail_intervals = 0;
|
||||||
|
|
||||||
|
if (!spa_multihost(spa)) {
|
||||||
|
spa->spa_multihost = B_TRUE;
|
||||||
|
mmp_thread_start(spa);
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_exit(&spa->spa_props_lock);
|
||||||
|
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||||
|
|
||||||
|
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||||
|
mmp_signal_all_threads();
|
||||||
|
txg_wait_synced(spa_get_dsl(spa), 0);
|
||||||
|
|
||||||
|
spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
|
||||||
|
mutex_enter(&spa->spa_props_lock);
|
||||||
|
|
||||||
|
if (spa_multihost(spa)) {
|
||||||
|
mmp_thread_stop(spa);
|
||||||
|
spa->spa_multihost = B_FALSE;
|
||||||
|
}
|
||||||
|
|
||||||
|
mutex_exit(&spa->spa_props_lock);
|
||||||
|
spa_config_exit(spa, SCL_CONFIG, FTAG);
|
||||||
|
}
|
||||||
|
|
||||||
/* ARGSUSED */
|
/* ARGSUSED */
|
||||||
void
|
void
|
||||||
ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
|
ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
|
||||||
@ -2521,6 +2583,9 @@ ztest_spa_upgrade(ztest_ds_t *zd, uint64_t id)
|
|||||||
nvlist_t *nvroot, *props;
|
nvlist_t *nvroot, *props;
|
||||||
char *name;
|
char *name;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
mutex_enter(&ztest_vdev_lock);
|
mutex_enter(&ztest_vdev_lock);
|
||||||
name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
|
name = kmem_asprintf("%s_upgrade", ztest_opts.zo_pool);
|
||||||
|
|
||||||
@ -2689,6 +2754,9 @@ ztest_vdev_add_remove(ztest_ds_t *zd, uint64_t id)
|
|||||||
nvlist_t *nvroot;
|
nvlist_t *nvroot;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
mutex_enter(&ztest_vdev_lock);
|
mutex_enter(&ztest_vdev_lock);
|
||||||
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
|
leaves = MAX(zs->zs_mirrors + zs->zs_splits, 1) * ztest_opts.zo_raidz;
|
||||||
|
|
||||||
@ -2771,6 +2839,9 @@ ztest_vdev_aux_add_remove(ztest_ds_t *zd, uint64_t id)
|
|||||||
uint64_t guid = 0;
|
uint64_t guid = 0;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
if (ztest_random(2) == 0) {
|
if (ztest_random(2) == 0) {
|
||||||
sav = &spa->spa_spares;
|
sav = &spa->spa_spares;
|
||||||
aux = ZPOOL_CONFIG_SPARES;
|
aux = ZPOOL_CONFIG_SPARES;
|
||||||
@ -2866,6 +2937,9 @@ ztest_split_pool(ztest_ds_t *zd, uint64_t id)
|
|||||||
uint_t c, children, schildren = 0, lastlogid = 0;
|
uint_t c, children, schildren = 0, lastlogid = 0;
|
||||||
int error = 0;
|
int error = 0;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
mutex_enter(&ztest_vdev_lock);
|
mutex_enter(&ztest_vdev_lock);
|
||||||
|
|
||||||
/* ensure we have a useable config; mirrors of raidz aren't supported */
|
/* ensure we have a useable config; mirrors of raidz aren't supported */
|
||||||
@ -2972,6 +3046,9 @@ ztest_vdev_attach_detach(ztest_ds_t *zd, uint64_t id)
|
|||||||
int oldvd_is_log;
|
int oldvd_is_log;
|
||||||
int error, expected_error;
|
int error, expected_error;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
mutex_enter(&ztest_vdev_lock);
|
mutex_enter(&ztest_vdev_lock);
|
||||||
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
|
leaves = MAX(zs->zs_mirrors, 1) * ztest_opts.zo_raidz;
|
||||||
|
|
||||||
@ -5564,6 +5641,9 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
|
|||||||
uint64_t orig, load;
|
uint64_t orig, load;
|
||||||
int error;
|
int error;
|
||||||
|
|
||||||
|
if (ztest_opts.zo_mmp_test)
|
||||||
|
return;
|
||||||
|
|
||||||
orig = spa_guid(spa);
|
orig = spa_guid(spa);
|
||||||
load = spa_load_guid(spa);
|
load = spa_load_guid(spa);
|
||||||
|
|
||||||
@ -6249,7 +6329,7 @@ ztest_run(ztest_shared_t *zs)
|
|||||||
* Verify that we can export the pool and reimport it under a
|
* Verify that we can export the pool and reimport it under a
|
||||||
* different name.
|
* different name.
|
||||||
*/
|
*/
|
||||||
if (ztest_random(2) == 0) {
|
if ((ztest_random(2) == 0) && !ztest_opts.zo_mmp_test) {
|
||||||
char name[ZFS_MAX_DATASET_NAME_LEN];
|
char name[ZFS_MAX_DATASET_NAME_LEN];
|
||||||
(void) snprintf(name, sizeof (name), "%s_import",
|
(void) snprintf(name, sizeof (name), "%s_import",
|
||||||
ztest_opts.zo_pool);
|
ztest_opts.zo_pool);
|
||||||
@ -6397,6 +6477,56 @@ make_random_props()
|
|||||||
return (props);
|
return (props);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Import a storage pool with the given name.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
ztest_import(ztest_shared_t *zs)
|
||||||
|
{
|
||||||
|
libzfs_handle_t *hdl;
|
||||||
|
importargs_t args = { 0 };
|
||||||
|
spa_t *spa;
|
||||||
|
nvlist_t *cfg = NULL;
|
||||||
|
int nsearch = 1;
|
||||||
|
char *searchdirs[nsearch];
|
||||||
|
char *name = ztest_opts.zo_pool;
|
||||||
|
int flags = ZFS_IMPORT_MISSING_LOG;
|
||||||
|
int error;
|
||||||
|
|
||||||
|
mutex_init(&ztest_vdev_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
rw_init(&ztest_name_lock, NULL, USYNC_THREAD, NULL);
|
||||||
|
|
||||||
|
kernel_init(FREAD | FWRITE);
|
||||||
|
hdl = libzfs_init();
|
||||||
|
|
||||||
|
searchdirs[0] = ztest_opts.zo_dir;
|
||||||
|
args.paths = nsearch;
|
||||||
|
args.path = searchdirs;
|
||||||
|
args.can_be_active = B_FALSE;
|
||||||
|
|
||||||
|
error = zpool_tryimport(hdl, name, &cfg, &args);
|
||||||
|
if (error)
|
||||||
|
(void) fatal(0, "No pools found\n");
|
||||||
|
|
||||||
|
VERIFY0(spa_import(name, cfg, NULL, flags));
|
||||||
|
VERIFY0(spa_open(name, &spa, FTAG));
|
||||||
|
zs->zs_metaslab_sz =
|
||||||
|
1ULL << spa->spa_root_vdev->vdev_child[0]->vdev_ms_shift;
|
||||||
|
spa_close(spa, FTAG);
|
||||||
|
|
||||||
|
libzfs_fini(hdl);
|
||||||
|
kernel_fini();
|
||||||
|
|
||||||
|
if (!ztest_opts.zo_mmp_test) {
|
||||||
|
ztest_run_zdb(ztest_opts.zo_pool);
|
||||||
|
ztest_freeze();
|
||||||
|
ztest_run_zdb(ztest_opts.zo_pool);
|
||||||
|
}
|
||||||
|
|
||||||
|
rw_destroy(&ztest_name_lock);
|
||||||
|
mutex_destroy(&ztest_vdev_lock);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create a storage pool with the given name and initial vdev size.
|
* Create a storage pool with the given name and initial vdev size.
|
||||||
* Then test spa_freeze() functionality.
|
* Then test spa_freeze() functionality.
|
||||||
@ -6441,11 +6571,11 @@ ztest_init(ztest_shared_t *zs)
|
|||||||
|
|
||||||
kernel_fini();
|
kernel_fini();
|
||||||
|
|
||||||
|
if (!ztest_opts.zo_mmp_test) {
|
||||||
ztest_run_zdb(ztest_opts.zo_pool);
|
ztest_run_zdb(ztest_opts.zo_pool);
|
||||||
|
|
||||||
ztest_freeze();
|
ztest_freeze();
|
||||||
|
|
||||||
ztest_run_zdb(ztest_opts.zo_pool);
|
ztest_run_zdb(ztest_opts.zo_pool);
|
||||||
|
}
|
||||||
|
|
||||||
rw_destroy(&ztest_name_lock);
|
rw_destroy(&ztest_name_lock);
|
||||||
mutex_destroy(&ztest_vdev_lock);
|
mutex_destroy(&ztest_vdev_lock);
|
||||||
@ -6610,13 +6740,19 @@ ztest_run_init(void)
|
|||||||
{
|
{
|
||||||
ztest_shared_t *zs = ztest_shared;
|
ztest_shared_t *zs = ztest_shared;
|
||||||
|
|
||||||
ASSERT(ztest_opts.zo_init != 0);
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Blow away any existing copy of zpool.cache
|
* Blow away any existing copy of zpool.cache
|
||||||
*/
|
*/
|
||||||
(void) remove(spa_config_path);
|
(void) remove(spa_config_path);
|
||||||
|
|
||||||
|
if (ztest_opts.zo_init == 0) {
|
||||||
|
if (ztest_opts.zo_verbose >= 1)
|
||||||
|
(void) printf("Importing pool %s\n",
|
||||||
|
ztest_opts.zo_pool);
|
||||||
|
ztest_import(zs);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Create and initialize our storage pool.
|
* Create and initialize our storage pool.
|
||||||
*/
|
*/
|
||||||
@ -6823,6 +6959,7 @@ main(int argc, char **argv)
|
|||||||
(void) printf("\n");
|
(void) printf("\n");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!ztest_opts.zo_mmp_test)
|
||||||
ztest_run_zdb(ztest_opts.zo_pool);
|
ztest_run_zdb(ztest_opts.zo_pool);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -23,7 +23,7 @@
|
|||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
|
* Copyright (c) 2011 Pawel Jakub Dawidek. All rights reserved.
|
||||||
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
||||||
* Copyright (c) 2012, Joyent, Inc. All rights reserved.
|
* Copyright 2019 Joyent, Inc.
|
||||||
* Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
|
* Copyright (c) 2012 Martin Matuska <mm@FreeBSD.org>. All rights reserved.
|
||||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||||
* Copyright (c) 2014 Integros [integros.com]
|
* Copyright (c) 2014 Integros [integros.com]
|
||||||
@ -131,6 +131,7 @@ typedef enum zfs_error {
|
|||||||
EZFS_DIFFDATA, /* bad zfs diff data */
|
EZFS_DIFFDATA, /* bad zfs diff data */
|
||||||
EZFS_POOLREADONLY, /* pool is in read-only mode */
|
EZFS_POOLREADONLY, /* pool is in read-only mode */
|
||||||
EZFS_SCRUB_PAUSED, /* scrub currently paused */
|
EZFS_SCRUB_PAUSED, /* scrub currently paused */
|
||||||
|
EZFS_ACTIVE_POOL, /* pool is imported on a different system */
|
||||||
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
|
EZFS_NO_PENDING, /* cannot cancel, no operation is pending */
|
||||||
EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */
|
EZFS_CHECKPOINT_EXISTS, /* checkpoint exists */
|
||||||
EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */
|
EZFS_DISCARDING_CHECKPOINT, /* currently discarding a checkpoint */
|
||||||
@ -315,6 +316,8 @@ typedef enum {
|
|||||||
/*
|
/*
|
||||||
* The following correspond to faults as defined in the (fault.fs.zfs.*)
|
* The following correspond to faults as defined in the (fault.fs.zfs.*)
|
||||||
* event namespace. Each is associated with a corresponding message ID.
|
* event namespace. Each is associated with a corresponding message ID.
|
||||||
|
* This must be kept in sync with the zfs_msgid_table in
|
||||||
|
* lib/libzfs/libzfs_status.c.
|
||||||
*/
|
*/
|
||||||
ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */
|
ZPOOL_STATUS_CORRUPT_CACHE, /* corrupt /kernel/drv/zpool.cache */
|
||||||
ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */
|
ZPOOL_STATUS_MISSING_DEV_R, /* missing device with replicas */
|
||||||
@ -327,8 +330,11 @@ typedef enum {
|
|||||||
ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */
|
ZPOOL_STATUS_FAILING_DEV, /* device experiencing errors */
|
||||||
ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */
|
ZPOOL_STATUS_VERSION_NEWER, /* newer on-disk version */
|
||||||
ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
|
ZPOOL_STATUS_HOSTID_MISMATCH, /* last accessed by another system */
|
||||||
|
ZPOOL_STATUS_HOSTID_ACTIVE, /* currently active on another system */
|
||||||
|
ZPOOL_STATUS_HOSTID_REQUIRED, /* multihost=on and hostid=0 */
|
||||||
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
|
ZPOOL_STATUS_IO_FAILURE_WAIT, /* failed I/O, failmode 'wait' */
|
||||||
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
|
ZPOOL_STATUS_IO_FAILURE_CONTINUE, /* failed I/O, failmode 'continue' */
|
||||||
|
ZPOOL_STATUS_IO_FAILURE_MMP, /* failed MMP, failmode not 'panic' */
|
||||||
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
|
ZPOOL_STATUS_BAD_LOG, /* cannot read log chain(s) */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -407,6 +413,8 @@ typedef struct importargs {
|
|||||||
} importargs_t;
|
} importargs_t;
|
||||||
|
|
||||||
extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
|
extern nvlist_t *zpool_search_import(libzfs_handle_t *, importargs_t *);
|
||||||
|
extern int zpool_tryimport(libzfs_handle_t *hdl, char *target,
|
||||||
|
nvlist_t **configp, importargs_t *args);
|
||||||
|
|
||||||
/* legacy pool search routines */
|
/* legacy pool search routines */
|
||||||
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
|
extern nvlist_t *zpool_find_import(libzfs_handle_t *, int, char **);
|
||||||
@ -745,6 +753,7 @@ extern boolean_t zfs_dataset_exists(libzfs_handle_t *, const char *,
|
|||||||
zfs_type_t);
|
zfs_type_t);
|
||||||
extern int zfs_spa_version(zfs_handle_t *, int *);
|
extern int zfs_spa_version(zfs_handle_t *, int *);
|
||||||
extern boolean_t zfs_bookmark_exists(const char *path);
|
extern boolean_t zfs_bookmark_exists(const char *path);
|
||||||
|
extern ulong_t get_system_hostid(void);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Mount support functions.
|
* Mount support functions.
|
||||||
|
@ -440,6 +440,8 @@ make_dataset_handle_common(zfs_handle_t *zhp, zfs_cmd_t *zc)
|
|||||||
zhp->zfs_head_type = ZFS_TYPE_VOLUME;
|
zhp->zfs_head_type = ZFS_TYPE_VOLUME;
|
||||||
else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
|
else if (zhp->zfs_dmustats.dds_type == DMU_OST_ZFS)
|
||||||
zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
|
zhp->zfs_head_type = ZFS_TYPE_FILESYSTEM;
|
||||||
|
else if (zhp->zfs_dmustats.dds_type == DMU_OST_OTHER)
|
||||||
|
return (-1);
|
||||||
else
|
else
|
||||||
abort();
|
abort();
|
||||||
|
|
||||||
|
@ -137,7 +137,7 @@ typedef enum {
|
|||||||
SHARED_SMB = 0x4
|
SHARED_SMB = 0x4
|
||||||
} zfs_share_type_t;
|
} zfs_share_type_t;
|
||||||
|
|
||||||
#define CONFIG_BUF_MINSIZE 65536
|
#define CONFIG_BUF_MINSIZE 262144
|
||||||
|
|
||||||
int zfs_error(libzfs_handle_t *, int, const char *);
|
int zfs_error(libzfs_handle_t *, int, const char *);
|
||||||
int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
|
int zfs_error_fmt(libzfs_handle_t *, int, const char *, ...);
|
||||||
|
@ -1599,16 +1599,87 @@ name_or_guid_exists(zpool_handle_t *zhp, void *data)
|
|||||||
nvlist_t *
|
nvlist_t *
|
||||||
zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
|
zpool_search_import(libzfs_handle_t *hdl, importargs_t *import)
|
||||||
{
|
{
|
||||||
|
nvlist_t *pools = NULL;
|
||||||
|
|
||||||
verify(import->poolname == NULL || import->guid == 0);
|
verify(import->poolname == NULL || import->guid == 0);
|
||||||
|
|
||||||
if (import->unique)
|
if (import->unique)
|
||||||
import->exists = zpool_iter(hdl, name_or_guid_exists, import);
|
import->exists = zpool_iter(hdl, name_or_guid_exists, import);
|
||||||
|
|
||||||
if (import->cachefile != NULL)
|
if (import->cachefile != NULL)
|
||||||
return (zpool_find_import_cached(hdl, import->cachefile,
|
pools = zpool_find_import_cached(hdl, import->cachefile,
|
||||||
import->poolname, import->guid));
|
import->poolname, import->guid);
|
||||||
|
else
|
||||||
|
pools = zpool_find_import_impl(hdl, import);
|
||||||
|
|
||||||
return (zpool_find_import_impl(hdl, import));
|
return (pools);
|
||||||
|
}
|
||||||
|
|
||||||
|
static boolean_t
|
||||||
|
pool_match(nvlist_t *cfg, char *tgt)
|
||||||
|
{
|
||||||
|
uint64_t v, guid = strtoull(tgt, NULL, 0);
|
||||||
|
char *s;
|
||||||
|
|
||||||
|
if (guid != 0) {
|
||||||
|
if (nvlist_lookup_uint64(cfg, ZPOOL_CONFIG_POOL_GUID, &v) == 0)
|
||||||
|
return (v == guid);
|
||||||
|
} else {
|
||||||
|
if (nvlist_lookup_string(cfg, ZPOOL_CONFIG_POOL_NAME, &s) == 0)
|
||||||
|
return (strcmp(s, tgt) == 0);
|
||||||
|
}
|
||||||
|
return (B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
int
|
||||||
|
zpool_tryimport(libzfs_handle_t *hdl, char *target, nvlist_t **configp,
|
||||||
|
importargs_t *args)
|
||||||
|
{
|
||||||
|
nvlist_t *pools;
|
||||||
|
nvlist_t *match = NULL;
|
||||||
|
nvlist_t *config = NULL;
|
||||||
|
char *sepp = NULL;
|
||||||
|
int count = 0;
|
||||||
|
char *targetdup = strdup(target);
|
||||||
|
|
||||||
|
*configp = NULL;
|
||||||
|
|
||||||
|
if ((sepp = strpbrk(targetdup, "/@")) != NULL) {
|
||||||
|
*sepp = '\0';
|
||||||
|
}
|
||||||
|
|
||||||
|
pools = zpool_search_import(hdl, args);
|
||||||
|
|
||||||
|
if (pools != NULL) {
|
||||||
|
nvpair_t *elem = NULL;
|
||||||
|
while ((elem = nvlist_next_nvpair(pools, elem)) != NULL) {
|
||||||
|
VERIFY0(nvpair_value_nvlist(elem, &config));
|
||||||
|
if (pool_match(config, targetdup)) {
|
||||||
|
count++;
|
||||||
|
if (match != NULL) {
|
||||||
|
/* multiple matches found */
|
||||||
|
continue;
|
||||||
|
} else {
|
||||||
|
match = config;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count == 0) {
|
||||||
|
free(targetdup);
|
||||||
|
return (ENOENT);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (count > 1) {
|
||||||
|
free(targetdup);
|
||||||
|
return (EINVAL);
|
||||||
|
}
|
||||||
|
|
||||||
|
*configp = match;
|
||||||
|
free(targetdup);
|
||||||
|
|
||||||
|
return (0);
|
||||||
}
|
}
|
||||||
|
|
||||||
boolean_t
|
boolean_t
|
||||||
|
@ -665,6 +665,15 @@ zpool_valid_proplist(libzfs_handle_t *hdl, const char *poolname,
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_PROP_MULTIHOST:
|
||||||
|
if (get_system_hostid() == 0) {
|
||||||
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
|
"requires a non-zero system hostid"));
|
||||||
|
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||||
"property '%s'(%d) not defined"), propname, prop);
|
"property '%s'(%d) not defined"), propname, prop);
|
||||||
@ -1802,6 +1811,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
|
|||||||
|
|
||||||
if (error) {
|
if (error) {
|
||||||
char desc[1024];
|
char desc[1024];
|
||||||
|
char aux[256];
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Dry-run failed, but we print out what success
|
* Dry-run failed, but we print out what success
|
||||||
@ -1847,6 +1857,46 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
|
|||||||
(void) zfs_error(hdl, EZFS_BADVERSION, desc);
|
(void) zfs_error(hdl, EZFS_BADVERSION, desc);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case EREMOTEIO:
|
||||||
|
if (nv != NULL && nvlist_lookup_nvlist(nv,
|
||||||
|
ZPOOL_CONFIG_LOAD_INFO, &nvinfo) == 0) {
|
||||||
|
char *hostname = "<unknown>";
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
mmp_state_t mmp_state;
|
||||||
|
|
||||||
|
mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE);
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTNAME))
|
||||||
|
hostname = fnvlist_lookup_string(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTNAME);
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTID))
|
||||||
|
hostid = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTID);
|
||||||
|
|
||||||
|
if (mmp_state == MMP_STATE_ACTIVE) {
|
||||||
|
(void) snprintf(aux, sizeof (aux),
|
||||||
|
dgettext(TEXT_DOMAIN, "pool is imp"
|
||||||
|
"orted on host '%s' (hostid=%lx).\n"
|
||||||
|
"Export the pool on the other "
|
||||||
|
"system, then run 'zpool import'."),
|
||||||
|
hostname, (unsigned long) hostid);
|
||||||
|
} else if (mmp_state == MMP_STATE_NO_HOSTID) {
|
||||||
|
(void) snprintf(aux, sizeof (aux),
|
||||||
|
dgettext(TEXT_DOMAIN, "pool has "
|
||||||
|
"the multihost property on and "
|
||||||
|
"the\nsystem's hostid is not "
|
||||||
|
"set.\n"));
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) zfs_error_aux(hdl, aux);
|
||||||
|
}
|
||||||
|
(void) zfs_error(hdl, EZFS_ACTIVE_POOL, desc);
|
||||||
|
break;
|
||||||
|
|
||||||
case EINVAL:
|
case EINVAL:
|
||||||
(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
|
(void) zfs_error(hdl, EZFS_INVALCONFIG, desc);
|
||||||
break;
|
break;
|
||||||
@ -2392,7 +2442,7 @@ zpool_find_vdev(zpool_handle_t *zhp, const char *path, boolean_t *avail_spare,
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
vdev_online(nvlist_t *nv)
|
vdev_is_online(nvlist_t *nv)
|
||||||
{
|
{
|
||||||
uint64_t ival;
|
uint64_t ival;
|
||||||
|
|
||||||
@ -2460,7 +2510,7 @@ vdev_get_physpaths(nvlist_t *nv, char *physpath, size_t phypath_size,
|
|||||||
return (EZFS_INVALCONFIG);
|
return (EZFS_INVALCONFIG);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (vdev_online(nv)) {
|
if (vdev_is_online(nv)) {
|
||||||
if ((ret = vdev_get_one_physpath(nv, physpath,
|
if ((ret = vdev_get_one_physpath(nv, physpath,
|
||||||
phypath_size, rsz)) != 0)
|
phypath_size, rsz)) != 0)
|
||||||
return (ret);
|
return (ret);
|
||||||
|
@ -53,20 +53,36 @@
|
|||||||
* of this table, and hence have no associated message ID.
|
* of this table, and hence have no associated message ID.
|
||||||
*/
|
*/
|
||||||
static char *zfs_msgid_table[] = {
|
static char *zfs_msgid_table[] = {
|
||||||
"ZFS-8000-14",
|
"ZFS-8000-14", /* ZPOOL_STATUS_CORRUPT_CACHE */
|
||||||
"ZFS-8000-2Q",
|
"ZFS-8000-2Q", /* ZPOOL_STATUS_MISSING_DEV_R */
|
||||||
"ZFS-8000-3C",
|
"ZFS-8000-3C", /* ZPOOL_STATUS_MISSING_DEV_NR */
|
||||||
"ZFS-8000-4J",
|
"ZFS-8000-4J", /* ZPOOL_STATUS_CORRUPT_LABEL_R */
|
||||||
"ZFS-8000-5E",
|
"ZFS-8000-5E", /* ZPOOL_STATUS_CORRUPT_LABEL_NR */
|
||||||
"ZFS-8000-6X",
|
"ZFS-8000-6X", /* ZPOOL_STATUS_BAD_GUID_SUM */
|
||||||
"ZFS-8000-72",
|
"ZFS-8000-72", /* ZPOOL_STATUS_CORRUPT_POOL */
|
||||||
"ZFS-8000-8A",
|
"ZFS-8000-8A", /* ZPOOL_STATUS_CORRUPT_DATA */
|
||||||
"ZFS-8000-9P",
|
"ZFS-8000-9P", /* ZPOOL_STATUS_FAILING_DEV */
|
||||||
"ZFS-8000-A5",
|
"ZFS-8000-A5", /* ZPOOL_STATUS_VERSION_NEWER */
|
||||||
"ZFS-8000-EY",
|
"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_MISMATCH */
|
||||||
"ZFS-8000-HC",
|
"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_ACTIVE */
|
||||||
"ZFS-8000-JQ",
|
"ZFS-8000-EY", /* ZPOOL_STATUS_HOSTID_REQUIRED */
|
||||||
"ZFS-8000-K4",
|
"ZFS-8000-HC", /* ZPOOL_STATUS_IO_FAILURE_WAIT */
|
||||||
|
"ZFS-8000-JQ", /* ZPOOL_STATUS_IO_FAILURE_CONTINUE */
|
||||||
|
"ZFS-8000-MM", /* ZPOOL_STATUS_IO_FAILURE_MMP */
|
||||||
|
"ZFS-8000-K4", /* ZPOOL_STATUS_BAD_LOG */
|
||||||
|
/*
|
||||||
|
* The following results have no message ID.
|
||||||
|
* ZPOOL_STATUS_UNSUP_FEAT_READ
|
||||||
|
* ZPOOL_STATUS_UNSUP_FEAT_WRITE
|
||||||
|
* ZPOOL_STATUS_FAULTED_DEV_R
|
||||||
|
* ZPOOL_STATUS_FAULTED_DEV_NR
|
||||||
|
* ZPOOL_STATUS_VERSION_OLDER
|
||||||
|
* ZPOOL_STATUS_FEAT_DISABLED
|
||||||
|
* ZPOOL_STATUS_RESILVERING
|
||||||
|
* ZPOOL_STATUS_OFFLINE_DEV
|
||||||
|
* ZPOOL_STATUS_REMOVED_DEV
|
||||||
|
* ZPOOL_STATUS_OK
|
||||||
|
*/
|
||||||
};
|
};
|
||||||
|
|
||||||
#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
|
#define NMSGID (sizeof (zfs_msgid_table) / sizeof (zfs_msgid_table[0]))
|
||||||
@ -204,6 +220,7 @@ check_status(nvlist_t *config, boolean_t isimport)
|
|||||||
uint64_t stateval;
|
uint64_t stateval;
|
||||||
uint64_t suspended;
|
uint64_t suspended;
|
||||||
uint64_t hostid = 0;
|
uint64_t hostid = 0;
|
||||||
|
unsigned long system_hostid = get_system_hostid();
|
||||||
|
|
||||||
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
|
verify(nvlist_lookup_uint64(config, ZPOOL_CONFIG_VERSION,
|
||||||
&version) == 0);
|
&version) == 0);
|
||||||
@ -223,11 +240,31 @@ check_status(nvlist_t *config, boolean_t isimport)
|
|||||||
ps->pss_state == DSS_SCANNING)
|
ps->pss_state == DSS_SCANNING)
|
||||||
return (ZPOOL_STATUS_RESILVERING);
|
return (ZPOOL_STATUS_RESILVERING);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* The multihost property is set and the pool may be active.
|
||||||
|
*/
|
||||||
|
if (vs->vs_state == VDEV_STATE_CANT_OPEN &&
|
||||||
|
vs->vs_aux == VDEV_AUX_ACTIVE) {
|
||||||
|
mmp_state_t mmp_state;
|
||||||
|
nvlist_t *nvinfo;
|
||||||
|
|
||||||
|
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||||
|
mmp_state = fnvlist_lookup_uint64(nvinfo,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE);
|
||||||
|
|
||||||
|
if (mmp_state == MMP_STATE_ACTIVE)
|
||||||
|
return (ZPOOL_STATUS_HOSTID_ACTIVE);
|
||||||
|
else if (mmp_state == MMP_STATE_NO_HOSTID)
|
||||||
|
return (ZPOOL_STATUS_HOSTID_REQUIRED);
|
||||||
|
else
|
||||||
|
return (ZPOOL_STATUS_HOSTID_MISMATCH);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Pool last accessed by another system.
|
* Pool last accessed by another system.
|
||||||
*/
|
*/
|
||||||
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_HOSTID, &hostid);
|
||||||
if (hostid != 0 && (unsigned long)hostid != gethostid() &&
|
if (hostid != 0 && (unsigned long)hostid != system_hostid &&
|
||||||
stateval == POOL_STATE_ACTIVE)
|
stateval == POOL_STATE_ACTIVE)
|
||||||
return (ZPOOL_STATUS_HOSTID_MISMATCH);
|
return (ZPOOL_STATUS_HOSTID_MISMATCH);
|
||||||
|
|
||||||
@ -260,10 +297,16 @@ check_status(nvlist_t *config, boolean_t isimport)
|
|||||||
return (ZPOOL_STATUS_BAD_GUID_SUM);
|
return (ZPOOL_STATUS_BAD_GUID_SUM);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Check whether the pool has suspended due to failed I/O.
|
* Check whether the pool has suspended.
|
||||||
*/
|
*/
|
||||||
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED,
|
||||||
&suspended) == 0) {
|
&suspended) == 0) {
|
||||||
|
uint64_t reason;
|
||||||
|
|
||||||
|
if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_SUSPENDED_REASON,
|
||||||
|
&reason) == 0 && reason == ZIO_SUSPEND_MMP)
|
||||||
|
return (ZPOOL_STATUS_IO_FAILURE_MMP);
|
||||||
|
|
||||||
if (suspended == ZIO_FAILURE_MODE_CONTINUE)
|
if (suspended == ZIO_FAILURE_MODE_CONTINUE)
|
||||||
return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
|
return (ZPOOL_STATUS_IO_FAILURE_CONTINUE);
|
||||||
return (ZPOOL_STATUS_IO_FAILURE_WAIT);
|
return (ZPOOL_STATUS_IO_FAILURE_WAIT);
|
||||||
@ -358,6 +401,7 @@ check_status(nvlist_t *config, boolean_t isimport)
|
|||||||
if (isimport) {
|
if (isimport) {
|
||||||
feat = fnvlist_lookup_nvlist(config,
|
feat = fnvlist_lookup_nvlist(config,
|
||||||
ZPOOL_CONFIG_LOAD_INFO);
|
ZPOOL_CONFIG_LOAD_INFO);
|
||||||
|
if (nvlist_exists(feat, ZPOOL_CONFIG_ENABLED_FEAT))
|
||||||
feat = fnvlist_lookup_nvlist(feat,
|
feat = fnvlist_lookup_nvlist(feat,
|
||||||
ZPOOL_CONFIG_ENABLED_FEAT);
|
ZPOOL_CONFIG_ENABLED_FEAT);
|
||||||
} else {
|
} else {
|
||||||
|
@ -21,7 +21,7 @@
|
|||||||
|
|
||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2018 Joyent, Inc.
|
* Copyright 2019 Joyent, Inc.
|
||||||
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
* Copyright (c) 2011, 2017 by Delphix. All rights reserved.
|
||||||
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
|
* Copyright 2016 Igor Kozhukhov <ikozhukhov@gmail.com>
|
||||||
* Copyright (c) 2017 Datto Inc.
|
* Copyright (c) 2017 Datto Inc.
|
||||||
@ -56,6 +56,7 @@
|
|||||||
|
|
||||||
#include "libzfs_impl.h"
|
#include "libzfs_impl.h"
|
||||||
#include "zfs_prop.h"
|
#include "zfs_prop.h"
|
||||||
|
#include "zfs_comutil.h"
|
||||||
#include "zfeature_common.h"
|
#include "zfeature_common.h"
|
||||||
|
|
||||||
|
|
||||||
@ -254,6 +255,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
|||||||
return (dgettext(TEXT_DOMAIN, "device removal in progress"));
|
return (dgettext(TEXT_DOMAIN, "device removal in progress"));
|
||||||
case EZFS_VDEV_TOO_BIG:
|
case EZFS_VDEV_TOO_BIG:
|
||||||
return (dgettext(TEXT_DOMAIN, "device exceeds supported size"));
|
return (dgettext(TEXT_DOMAIN, "device exceeds supported size"));
|
||||||
|
case EZFS_ACTIVE_POOL:
|
||||||
|
return (dgettext(TEXT_DOMAIN, "pool is imported on a "
|
||||||
|
"different host"));
|
||||||
case EZFS_TOOMANY:
|
case EZFS_TOOMANY:
|
||||||
return (dgettext(TEXT_DOMAIN, "argument list too long"));
|
return (dgettext(TEXT_DOMAIN, "argument list too long"));
|
||||||
case EZFS_INITIALIZING:
|
case EZFS_INITIALIZING:
|
||||||
@ -424,6 +428,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
|||||||
"pool I/O is currently suspended"));
|
"pool I/O is currently suspended"));
|
||||||
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
|
zfs_verror(hdl, EZFS_POOLUNAVAIL, fmt, ap);
|
||||||
break;
|
break;
|
||||||
|
case EREMOTEIO:
|
||||||
|
zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap);
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
zfs_error_aux(hdl, strerror(error));
|
zfs_error_aux(hdl, strerror(error));
|
||||||
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
||||||
@ -512,6 +519,9 @@ zpool_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
|||||||
case ESRCH:
|
case ESRCH:
|
||||||
zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap);
|
zfs_verror(hdl, EZFS_NO_PENDING, fmt, ap);
|
||||||
break;
|
break;
|
||||||
|
case EREMOTEIO:
|
||||||
|
zfs_verror(hdl, EZFS_ACTIVE_POOL, fmt, ap);
|
||||||
|
break;
|
||||||
case ZFS_ERR_CHECKPOINT_EXISTS:
|
case ZFS_ERR_CHECKPOINT_EXISTS:
|
||||||
zfs_verror(hdl, EZFS_CHECKPOINT_EXISTS, fmt, ap);
|
zfs_verror(hdl, EZFS_CHECKPOINT_EXISTS, fmt, ap);
|
||||||
break;
|
break;
|
||||||
@ -1592,3 +1602,20 @@ zprop_iter(zprop_func func, void *cb, boolean_t show_all, boolean_t ordered,
|
|||||||
{
|
{
|
||||||
return (zprop_iter_common(func, cb, show_all, ordered, type));
|
return (zprop_iter_common(func, cb, show_all, ordered, type));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
ulong_t
|
||||||
|
get_system_hostid(void)
|
||||||
|
{
|
||||||
|
char *env;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow the hostid to be subverted for testing.
|
||||||
|
*/
|
||||||
|
env = getenv("ZFS_HOSTID");
|
||||||
|
if (env) {
|
||||||
|
ulong_t hostid = strtoull(env, NULL, 16);
|
||||||
|
return (hostid & 0xFFFFFFFF);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (gethostid());
|
||||||
|
}
|
||||||
|
@ -41,6 +41,7 @@
|
|||||||
#include <sys/zmod.h>
|
#include <sys/zmod.h>
|
||||||
#include <sys/utsname.h>
|
#include <sys/utsname.h>
|
||||||
#include <sys/systeminfo.h>
|
#include <sys/systeminfo.h>
|
||||||
|
#include <libzfs.h>
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Emulation of kernel services in userland.
|
* Emulation of kernel services in userland.
|
||||||
@ -989,8 +990,8 @@ kernel_init(int mode)
|
|||||||
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
|
dprintf("physmem = %llu pages (%.2f GB)\n", physmem,
|
||||||
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
|
(double)physmem * sysconf(_SC_PAGE_SIZE) / (1ULL << 30));
|
||||||
|
|
||||||
(void) snprintf(hw_serial, sizeof (hw_serial), "%lu",
|
(void) snprintf(hw_serial, sizeof (hw_serial), "%ld",
|
||||||
(mode & FWRITE) ? (unsigned long)gethostid() : 0);
|
(mode & FWRITE) ? get_system_hostid() : 0);
|
||||||
|
|
||||||
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
|
VERIFY((random_fd = open("/dev/random", O_RDONLY)) != -1);
|
||||||
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
|
VERIFY((urandom_fd = open("/dev/urandom", O_RDONLY)) != -1);
|
||||||
|
@ -332,8 +332,11 @@ extern void cv_destroy(kcondvar_t *cv);
|
|||||||
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
|
extern void cv_wait(kcondvar_t *cv, kmutex_t *mp);
|
||||||
extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp);
|
extern int cv_wait_sig(kcondvar_t *cv, kmutex_t *mp);
|
||||||
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
|
extern clock_t cv_timedwait(kcondvar_t *cv, kmutex_t *mp, clock_t abstime);
|
||||||
|
#define cv_timedwait_sig(cvp, mp, t) cv_timedwait(cvp, mp, t)
|
||||||
extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
|
extern clock_t cv_timedwait_hires(kcondvar_t *cvp, kmutex_t *mp, hrtime_t tim,
|
||||||
hrtime_t res, int flag);
|
hrtime_t res, int flag);
|
||||||
|
#define cv_timedwait_sig_hires(cvp, mp, t, r, f) \
|
||||||
|
cv_timedwait_hires(cvp, mp, t, r, f)
|
||||||
extern void cv_signal(kcondvar_t *cv);
|
extern void cv_signal(kcondvar_t *cv);
|
||||||
extern void cv_broadcast(kcondvar_t *cv);
|
extern void cv_broadcast(kcondvar_t *cv);
|
||||||
|
|
||||||
|
@ -49,6 +49,7 @@ WARNS?= 0
|
|||||||
CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris
|
CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem
|
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem
|
||||||
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common
|
||||||
CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
|
CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs
|
||||||
CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lua
|
CFLAGS+= -I${SRCTOP}/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/lua
|
||||||
|
@ -9,6 +9,7 @@ WARNS?= 0
|
|||||||
CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris
|
CFLAGS+= -I${SRCTOP}/sys/cddl/compat/opensolaris
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/include
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem
|
CFLAGS+= -I${SRCTOP}/cddl/compat/opensolaris/lib/libumem
|
||||||
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzfs/common
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libzpool/common
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libnvpair
|
||||||
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libcmdutils
|
CFLAGS+= -I${SRCTOP}/cddl/contrib/opensolaris/lib/libcmdutils
|
||||||
|
@ -21,6 +21,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2017 by Delphix. All rights reserved.
|
||||||
|
* Copyright 2019 Joyent, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
#ifndef _ZFS_COMUTIL_H
|
#ifndef _ZFS_COMUTIL_H
|
||||||
@ -33,6 +34,9 @@
|
|||||||
extern "C" {
|
extern "C" {
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* Needed for ZoL errno usage in MMP kernel and user code */
|
||||||
|
#define EREMOTEIO EREMOTE
|
||||||
|
|
||||||
extern boolean_t zfs_allocatable_devs(nvlist_t *);
|
extern boolean_t zfs_allocatable_devs(nvlist_t *);
|
||||||
extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *);
|
extern void zpool_get_load_policy(nvlist_t *, zpool_load_policy_t *);
|
||||||
|
|
||||||
|
@ -125,6 +125,9 @@ zpool_prop_init(void)
|
|||||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
|
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "EXPAND", boolean_table);
|
||||||
zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
|
zprop_register_index(ZPOOL_PROP_READONLY, "readonly", 0,
|
||||||
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
|
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "RDONLY", boolean_table);
|
||||||
|
zprop_register_index(ZPOOL_PROP_MULTIHOST, "multihost", 0,
|
||||||
|
PROP_DEFAULT, ZFS_TYPE_POOL, "on | off", "MULTIHOST",
|
||||||
|
boolean_table);
|
||||||
|
|
||||||
/* default index properties */
|
/* default index properties */
|
||||||
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
|
zprop_register_index(ZPOOL_PROP_FAILUREMODE, "failmode",
|
||||||
|
@ -100,6 +100,7 @@ ZFS_COMMON_OBJS += \
|
|||||||
gzip.o \
|
gzip.o \
|
||||||
lzjb.o \
|
lzjb.o \
|
||||||
metaslab.o \
|
metaslab.o \
|
||||||
|
mmp.o \
|
||||||
multilist.o \
|
multilist.o \
|
||||||
range_tree.o \
|
range_tree.o \
|
||||||
refcount.o \
|
refcount.o \
|
||||||
|
@ -50,6 +50,7 @@
|
|||||||
#include <sys/zfeature.h>
|
#include <sys/zfeature.h>
|
||||||
#include <sys/zil_impl.h>
|
#include <sys/zil_impl.h>
|
||||||
#include <sys/dsl_userhold.h>
|
#include <sys/dsl_userhold.h>
|
||||||
|
#include <sys/mmp.h>
|
||||||
|
|
||||||
#if defined(__FreeBSD__) && defined(_KERNEL)
|
#if defined(__FreeBSD__) && defined(_KERNEL)
|
||||||
#include <sys/types.h>
|
#include <sys/types.h>
|
||||||
@ -292,6 +293,7 @@ dsl_pool_open_impl(spa_t *spa, uint64_t txg)
|
|||||||
dp->dp_meta_rootbp = *bp;
|
dp->dp_meta_rootbp = *bp;
|
||||||
rrw_init(&dp->dp_config_rwlock, B_TRUE);
|
rrw_init(&dp->dp_config_rwlock, B_TRUE);
|
||||||
txg_init(dp, txg);
|
txg_init(dp, txg);
|
||||||
|
mmp_init(spa);
|
||||||
|
|
||||||
txg_list_create(&dp->dp_dirty_datasets, spa,
|
txg_list_create(&dp->dp_dirty_datasets, spa,
|
||||||
offsetof(dsl_dataset_t, ds_dirty_link));
|
offsetof(dsl_dataset_t, ds_dirty_link));
|
||||||
@ -493,6 +495,7 @@ dsl_pool_close(dsl_pool_t *dp)
|
|||||||
*/
|
*/
|
||||||
arc_flush(dp->dp_spa, FALSE);
|
arc_flush(dp->dp_spa, FALSE);
|
||||||
|
|
||||||
|
mmp_fini(dp->dp_spa);
|
||||||
txg_fini(dp);
|
txg_fini(dp);
|
||||||
dsl_scan_fini(dp);
|
dsl_scan_fini(dp);
|
||||||
dmu_buf_user_evict_wait();
|
dmu_buf_user_evict_wait();
|
||||||
|
750
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/mmp.c
Normal file
750
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/mmp.c
Normal file
@ -0,0 +1,750 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* The contents of this file are subject to the terms of the
|
||||||
|
* Common Development and Distribution License (the "License").
|
||||||
|
* You may not use this file except in compliance with the License.
|
||||||
|
*
|
||||||
|
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||||
|
* or http://www.opensolaris.org/os/licensing.
|
||||||
|
* See the License for the specific language governing permissions
|
||||||
|
* and limitations under the License.
|
||||||
|
*
|
||||||
|
* When distributing Covered Code, include this CDDL HEADER in each
|
||||||
|
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||||
|
* If applicable, add the following below this CDDL HEADER, with the
|
||||||
|
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||||
|
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
|
||||||
|
* Copyright 2019 Joyent, Inc.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#include <sys/abd.h>
|
||||||
|
#include <sys/mmp.h>
|
||||||
|
#include <sys/spa.h>
|
||||||
|
#include <sys/spa_impl.h>
|
||||||
|
#include <sys/time.h>
|
||||||
|
#include <sys/vdev.h>
|
||||||
|
#include <sys/vdev_impl.h>
|
||||||
|
#include <sys/zfs_context.h>
|
||||||
|
#include <sys/callb.h>
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Multi-Modifier Protection (MMP) attempts to prevent a user from importing
|
||||||
|
* or opening a pool on more than one host at a time. In particular, it
|
||||||
|
* prevents "zpool import -f" on a host from succeeding while the pool is
|
||||||
|
* already imported on another host. There are many other ways in which a
|
||||||
|
* device could be used by two hosts for different purposes at the same time
|
||||||
|
* resulting in pool damage. This implementation does not attempt to detect
|
||||||
|
* those cases.
|
||||||
|
*
|
||||||
|
* MMP operates by ensuring there are frequent visible changes on disk (a
|
||||||
|
* "heartbeat") at all times. And by altering the import process to check
|
||||||
|
* for these changes and failing the import when they are detected. This
|
||||||
|
* functionality is enabled by setting the 'multihost' pool property to on.
|
||||||
|
*
|
||||||
|
* Uberblocks written by the txg_sync thread always go into the first
|
||||||
|
* (N-MMP_BLOCKS_PER_LABEL) slots, the remaining slots are reserved for MMP.
|
||||||
|
* They are used to hold uberblocks which are exactly the same as the last
|
||||||
|
* synced uberblock except that the ub_timestamp and mmp_config are frequently
|
||||||
|
* updated. Like all other uberblocks, the slot is written with an embedded
|
||||||
|
* checksum, and slots with invalid checksums are ignored. This provides the
|
||||||
|
* "heartbeat", with no risk of overwriting good uberblocks that must be
|
||||||
|
* preserved, e.g. previous txgs and associated block pointers.
|
||||||
|
*
|
||||||
|
* Three optional fields are added to uberblock structure; ub_mmp_magic,
|
||||||
|
* ub_mmp_config, and ub_mmp_delay. The ub_mmp_magic value allows zfs to tell
|
||||||
|
* whether the other ub_mmp_* fields are valid. The ub_mmp_config field tells
|
||||||
|
* the importing host the settings of zfs_multihost_interval and
|
||||||
|
* zfs_multihost_fail_intervals on the host which last had (or currently has)
|
||||||
|
* the pool imported. These determine how long a host must wait to detect
|
||||||
|
* activity in the pool, before concluding the pool is not in use. The
|
||||||
|
* mmp_delay field is a decaying average of the amount of time between
|
||||||
|
* completion of successive MMP writes, in nanoseconds. It indicates whether
|
||||||
|
* MMP is enabled.
|
||||||
|
*
|
||||||
|
* During import an activity test may now be performed to determine if
|
||||||
|
* the pool is in use. The activity test is typically required if the
|
||||||
|
* ZPOOL_CONFIG_HOSTID does not match the system hostid, the pool state is
|
||||||
|
* POOL_STATE_ACTIVE, and the pool is not a root pool.
|
||||||
|
*
|
||||||
|
* The activity test finds the "best" uberblock (highest txg, timestamp, and, if
|
||||||
|
* ub_mmp_magic is valid, sequence number from ub_mmp_config). It then waits
|
||||||
|
* some time, and finds the "best" uberblock again. If any of the mentioned
|
||||||
|
* fields have different values in the newly read uberblock, the pool is in use
|
||||||
|
* by another host and the import fails. In order to assure the accuracy of the
|
||||||
|
* activity test, the default values result in an activity test duration of 20x
|
||||||
|
* the mmp write interval.
|
||||||
|
*
|
||||||
|
* The duration of the "zpool import" activity test depends on the information
|
||||||
|
* available in the "best" uberblock:
|
||||||
|
*
|
||||||
|
* 1) If uberblock was written by zfs-0.8 or newer and fail_intervals > 0:
|
||||||
|
* ub_mmp_config.fail_intervals * ub_mmp_config.multihost_interval * 2
|
||||||
|
*
|
||||||
|
* In this case, a weak guarantee is provided. Since the host which last had
|
||||||
|
* the pool imported will suspend the pool if no mmp writes land within
|
||||||
|
* fail_intervals * multihost_interval ms, the absence of writes during that
|
||||||
|
* time means either the pool is not imported, or it is imported but the pool
|
||||||
|
* is suspended and no further writes will occur.
|
||||||
|
*
|
||||||
|
* Note that resuming the suspended pool on the remote host would invalidate
|
||||||
|
* this guarantee, and so it is not allowed.
|
||||||
|
*
|
||||||
|
* The factor of 2 provides a conservative safety factor and derives from
|
||||||
|
* MMP_IMPORT_SAFETY_FACTOR;
|
||||||
|
*
|
||||||
|
* 2) If uberblock was written by zfs-0.8 or newer and fail_intervals == 0:
|
||||||
|
* (ub_mmp_config.multihost_interval + ub_mmp_delay) *
|
||||||
|
* zfs_multihost_import_intervals
|
||||||
|
*
|
||||||
|
* In this case no guarantee can provided. However, as long as some devices
|
||||||
|
* are healthy and connected, it is likely that at least one write will land
|
||||||
|
* within (multihost_interval + mmp_delay) because multihost_interval is
|
||||||
|
* enough time for a write to be attempted to each leaf vdev, and mmp_delay
|
||||||
|
* is enough for one to land, based on past delays. Multiplying by
|
||||||
|
* zfs_multihost_import_intervals provides a conservative safety factor.
|
||||||
|
*
|
||||||
|
* 3) If uberblock was written by zfs-0.7:
|
||||||
|
* (zfs_multihost_interval + ub_mmp_delay) * zfs_multihost_import_intervals
|
||||||
|
*
|
||||||
|
* The same logic as case #2 applies, but we do not know remote tunables.
|
||||||
|
*
|
||||||
|
* We use the local value for zfs_multihost_interval because the original MMP
|
||||||
|
* did not record this value in the uberblock.
|
||||||
|
*
|
||||||
|
* ub_mmp_delay >= (zfs_multihost_interval / leaves), so if the other host
|
||||||
|
* has a much larger zfs_multihost_interval set, ub_mmp_delay will reflect
|
||||||
|
* that. We will have waited enough time for zfs_multihost_import_intervals
|
||||||
|
* writes to be issued and all but one to land.
|
||||||
|
*
|
||||||
|
* single device pool example delays
|
||||||
|
*
|
||||||
|
* import_delay = (1 + 1) * 20 = 40s #defaults, no I/O delay
|
||||||
|
* import_delay = (1 + 10) * 20 = 220s #defaults, 10s I/O delay
|
||||||
|
* import_delay = (10 + 10) * 20 = 400s #10s multihost_interval,
|
||||||
|
* no I/O delay
|
||||||
|
* 100 device pool example delays
|
||||||
|
*
|
||||||
|
* import_delay = (1 + .01) * 20 = 20s #defaults, no I/O delay
|
||||||
|
* import_delay = (1 + 10) * 20 = 220s #defaults, 10s I/O delay
|
||||||
|
* import_delay = (10 + .1) * 20 = 202s #10s multihost_interval,
|
||||||
|
* no I/O delay
|
||||||
|
*
|
||||||
|
* 4) Otherwise, this uberblock was written by a pre-MMP zfs:
|
||||||
|
* zfs_multihost_import_intervals * zfs_multihost_interval
|
||||||
|
*
|
||||||
|
* In this case local tunables are used. By default this product = 10s, long
|
||||||
|
* enough for a pool with any activity at all to write at least one
|
||||||
|
* uberblock. No guarantee can be provided.
|
||||||
|
*
|
||||||
|
* Additionally, the duration is then extended by a random 25% to attempt to to
|
||||||
|
* detect simultaneous imports. For example, if both partner hosts are rebooted
|
||||||
|
* at the same time and automatically attempt to import the pool.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used to control the frequency of mmp writes which are performed when the
|
||||||
|
* 'multihost' pool property is on. This is one factor used to determine the
|
||||||
|
* length of the activity check during import.
|
||||||
|
*
|
||||||
|
* On average an mmp write will be issued for each leaf vdev every
|
||||||
|
* zfs_multihost_interval milliseconds. In practice, the observed period can
|
||||||
|
* vary with the I/O load and this observed value is the ub_mmp_delay which is
|
||||||
|
* stored in the uberblock. The minimum allowed value is 100 ms.
|
||||||
|
*/
|
||||||
|
ulong_t zfs_multihost_interval = MMP_DEFAULT_INTERVAL;
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
SYSCTL_DECL(_vfs_zfs);
|
||||||
|
SYSCTL_ULONG(_vfs_zfs, OID_AUTO, multihost_interval, CTLFLAG_RWTUN,
|
||||||
|
&zfs_multihost_interval, 0, "Interval between MMP writes, milliseconds");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Used to control the duration of the activity test on import. Smaller values
|
||||||
|
* of zfs_multihost_import_intervals will reduce the import time but increase
|
||||||
|
* the risk of failing to detect an active pool. The total activity check time
|
||||||
|
* is never allowed to drop below one second. A value of 0 is ignored and
|
||||||
|
* treated as if it was set to 1.
|
||||||
|
*/
|
||||||
|
uint_t zfs_multihost_import_intervals = MMP_DEFAULT_IMPORT_INTERVALS;
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
SYSCTL_UINT(_vfs_zfs, OID_AUTO, multihost_import_intervals, CTLFLAG_RWTUN,
|
||||||
|
&zfs_multihost_import_intervals, 0,
|
||||||
|
"MMP activity check period for pool import, "
|
||||||
|
"in units of multihost_interval");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Controls the behavior of the pool when mmp write failures or delays are
|
||||||
|
* detected.
|
||||||
|
*
|
||||||
|
* When zfs_multihost_fail_intervals = 0, mmp write failures or delays are
|
||||||
|
* ignored. The failures will still be reported to the ZED which depending on
|
||||||
|
* its configuration may take action such as suspending the pool or taking a
|
||||||
|
* device offline.
|
||||||
|
*
|
||||||
|
* When zfs_multihost_fail_intervals > 0, the pool will be suspended if
|
||||||
|
* zfs_multihost_fail_intervals * zfs_multihost_interval milliseconds pass
|
||||||
|
* without a successful mmp write. This guarantees the activity test will see
|
||||||
|
* mmp writes if the pool is imported. A value of 1 is ignored and treated as
|
||||||
|
* if it was set to 2, because a single leaf vdev pool will issue a write once
|
||||||
|
* per multihost_interval and thus any variation in latency would cause the
|
||||||
|
* pool to be suspended.
|
||||||
|
*/
|
||||||
|
uint_t zfs_multihost_fail_intervals = MMP_DEFAULT_FAIL_INTERVALS;
|
||||||
|
#ifdef __FreeBSD__
|
||||||
|
SYSCTL_UINT(_vfs_zfs, OID_AUTO, multihost_fail_intervals, CTLFLAG_RWTUN,
|
||||||
|
&zfs_multihost_fail_intervals, 0,
|
||||||
|
"How long to tolerate MMP write failures before suspending a pool, "
|
||||||
|
"in units of multihost_interval");
|
||||||
|
#endif
|
||||||
|
|
||||||
|
char *mmp_tag = "mmp_write_uberblock";
|
||||||
|
static void mmp_thread(void *arg);
|
||||||
|
|
||||||
|
void
|
||||||
|
mmp_init(spa_t *spa)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
mutex_init(&mmp->mmp_thread_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
cv_init(&mmp->mmp_thread_cv, NULL, CV_DEFAULT, NULL);
|
||||||
|
mutex_init(&mmp->mmp_io_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
mmp->mmp_kstat_id = 1;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mmp_write_done() calculates mmp_delay based on prior mmp_delay and
|
||||||
|
* the elapsed time since the last write. For the first mmp write,
|
||||||
|
* there is no "last write", so we start with fake non-zero values.
|
||||||
|
*/
|
||||||
|
mmp->mmp_last_write = gethrtime();
|
||||||
|
mmp->mmp_delay = MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval));
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mmp_fini(spa_t *spa)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
mutex_destroy(&mmp->mmp_thread_lock);
|
||||||
|
cv_destroy(&mmp->mmp_thread_cv);
|
||||||
|
mutex_destroy(&mmp->mmp_io_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mmp_thread_enter(mmp_thread_t *mmp, callb_cpr_t *cpr)
|
||||||
|
{
|
||||||
|
CALLB_CPR_INIT(cpr, &mmp->mmp_thread_lock, callb_generic_cpr, FTAG);
|
||||||
|
mutex_enter(&mmp->mmp_thread_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mmp_thread_exit(mmp_thread_t *mmp, kthread_t **mpp, callb_cpr_t *cpr)
|
||||||
|
{
|
||||||
|
ASSERT(*mpp != NULL);
|
||||||
|
*mpp = NULL;
|
||||||
|
cv_broadcast(&mmp->mmp_thread_cv);
|
||||||
|
CALLB_CPR_EXIT(cpr); /* drops &mmp->mmp_thread_lock */
|
||||||
|
thread_exit();
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mmp_thread_start(spa_t *spa)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
if (spa_writeable(spa)) {
|
||||||
|
mutex_enter(&mmp->mmp_thread_lock);
|
||||||
|
if (!mmp->mmp_thread) {
|
||||||
|
mmp->mmp_thread = thread_create(NULL, 0, mmp_thread,
|
||||||
|
spa, 0, &p0, TS_RUN, minclsyspri);
|
||||||
|
zfs_dbgmsg("MMP thread started pool '%s' "
|
||||||
|
"gethrtime %llu", spa_name(spa), gethrtime());
|
||||||
|
}
|
||||||
|
mutex_exit(&mmp->mmp_thread_lock);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mmp_thread_stop(spa_t *spa)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
mutex_enter(&mmp->mmp_thread_lock);
|
||||||
|
mmp->mmp_thread_exiting = 1;
|
||||||
|
cv_broadcast(&mmp->mmp_thread_cv);
|
||||||
|
|
||||||
|
while (mmp->mmp_thread) {
|
||||||
|
cv_wait(&mmp->mmp_thread_cv, &mmp->mmp_thread_lock);
|
||||||
|
}
|
||||||
|
mutex_exit(&mmp->mmp_thread_lock);
|
||||||
|
zfs_dbgmsg("MMP thread stopped pool '%s' gethrtime %llu",
|
||||||
|
spa_name(spa), gethrtime());
|
||||||
|
|
||||||
|
ASSERT(mmp->mmp_thread == NULL);
|
||||||
|
mmp->mmp_thread_exiting = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
typedef enum mmp_vdev_state_flag {
|
||||||
|
MMP_FAIL_NOT_WRITABLE = (1 << 0),
|
||||||
|
MMP_FAIL_WRITE_PENDING = (1 << 1),
|
||||||
|
} mmp_vdev_state_flag_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Find a leaf vdev to write an MMP block to. It must not have an outstanding
|
||||||
|
* mmp write (if so a new write will also likely block). If there is no usable
|
||||||
|
* leaf, a nonzero error value is returned. The error value returned is a bit
|
||||||
|
* field.
|
||||||
|
*
|
||||||
|
* MMP_FAIL_WRITE_PENDING One or more leaf vdevs are writeable, but have an
|
||||||
|
* outstanding MMP write.
|
||||||
|
* MMP_FAIL_NOT_WRITABLE One or more leaf vdevs are not writeable.
|
||||||
|
*/
|
||||||
|
|
||||||
|
static int
|
||||||
|
mmp_next_leaf(spa_t *spa)
|
||||||
|
{
|
||||||
|
vdev_t *leaf;
|
||||||
|
vdev_t *starting_leaf;
|
||||||
|
int fail_mask = 0;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&spa->spa_mmp.mmp_io_lock));
|
||||||
|
ASSERT(spa_config_held(spa, SCL_STATE, RW_READER));
|
||||||
|
ASSERT(list_link_active(&spa->spa_leaf_list.list_head) == B_TRUE);
|
||||||
|
ASSERT(!list_is_empty(&spa->spa_leaf_list));
|
||||||
|
|
||||||
|
if (spa->spa_mmp.mmp_leaf_last_gen != spa->spa_leaf_list_gen) {
|
||||||
|
spa->spa_mmp.mmp_last_leaf = list_head(&spa->spa_leaf_list);
|
||||||
|
spa->spa_mmp.mmp_leaf_last_gen = spa->spa_leaf_list_gen;
|
||||||
|
}
|
||||||
|
|
||||||
|
leaf = spa->spa_mmp.mmp_last_leaf;
|
||||||
|
if (leaf == NULL)
|
||||||
|
leaf = list_head(&spa->spa_leaf_list);
|
||||||
|
starting_leaf = leaf;
|
||||||
|
|
||||||
|
do {
|
||||||
|
leaf = list_next(&spa->spa_leaf_list, leaf);
|
||||||
|
if (leaf == NULL)
|
||||||
|
leaf = list_head(&spa->spa_leaf_list);
|
||||||
|
|
||||||
|
if (!vdev_writeable(leaf)) {
|
||||||
|
fail_mask |= MMP_FAIL_NOT_WRITABLE;
|
||||||
|
} else if (leaf->vdev_mmp_pending != 0) {
|
||||||
|
fail_mask |= MMP_FAIL_WRITE_PENDING;
|
||||||
|
} else {
|
||||||
|
spa->spa_mmp.mmp_last_leaf = leaf;
|
||||||
|
return (0);
|
||||||
|
}
|
||||||
|
} while (leaf != starting_leaf);
|
||||||
|
|
||||||
|
ASSERT(fail_mask);
|
||||||
|
|
||||||
|
return (fail_mask);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMP writes are issued on a fixed schedule, but may complete at variable,
|
||||||
|
* much longer, intervals. The mmp_delay captures long periods between
|
||||||
|
* successful writes for any reason, including disk latency, scheduling delays,
|
||||||
|
* etc.
|
||||||
|
*
|
||||||
|
* The mmp_delay is usually calculated as a decaying average, but if the latest
|
||||||
|
* delay is higher we do not average it, so that we do not hide sudden spikes
|
||||||
|
* which the importing host must wait for.
|
||||||
|
*
|
||||||
|
* If writes are occurring frequently, such as due to a high rate of txg syncs,
|
||||||
|
* the mmp_delay could become very small. Since those short delays depend on
|
||||||
|
* activity we cannot count on, we never allow mmp_delay to get lower than rate
|
||||||
|
* expected if only mmp_thread writes occur.
|
||||||
|
*
|
||||||
|
* If an mmp write was skipped or fails, and we have already waited longer than
|
||||||
|
* mmp_delay, we need to update it so the next write reflects the longer delay.
|
||||||
|
*
|
||||||
|
* Do not set mmp_delay if the multihost property is not on, so as not to
|
||||||
|
* trigger an activity check on import.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
mmp_delay_update(spa_t *spa, boolean_t write_completed)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mts = &spa->spa_mmp;
|
||||||
|
hrtime_t delay = gethrtime() - mts->mmp_last_write;
|
||||||
|
|
||||||
|
ASSERT(MUTEX_HELD(&mts->mmp_io_lock));
|
||||||
|
|
||||||
|
if (spa_multihost(spa) == B_FALSE) {
|
||||||
|
mts->mmp_delay = 0;
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (delay > mts->mmp_delay)
|
||||||
|
mts->mmp_delay = delay;
|
||||||
|
|
||||||
|
if (write_completed == B_FALSE)
|
||||||
|
return;
|
||||||
|
|
||||||
|
mts->mmp_last_write = gethrtime();
|
||||||
|
|
||||||
|
/*
|
||||||
|
* strictly less than, in case delay was changed above.
|
||||||
|
*/
|
||||||
|
if (delay < mts->mmp_delay) {
|
||||||
|
hrtime_t min_delay =
|
||||||
|
MSEC2NSEC(MMP_INTERVAL_OK(zfs_multihost_interval)) /
|
||||||
|
MAX(1, vdev_count_leaves(spa));
|
||||||
|
mts->mmp_delay = MAX(((delay + mts->mmp_delay * 127) / 128),
|
||||||
|
min_delay);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mmp_write_done(zio_t *zio)
|
||||||
|
{
|
||||||
|
spa_t *spa = zio->io_spa;
|
||||||
|
vdev_t *vd = zio->io_vd;
|
||||||
|
mmp_thread_t *mts = zio->io_private;
|
||||||
|
|
||||||
|
mutex_enter(&mts->mmp_io_lock);
|
||||||
|
uint64_t mmp_kstat_id = vd->vdev_mmp_kstat_id;
|
||||||
|
hrtime_t mmp_write_duration = gethrtime() - vd->vdev_mmp_pending;
|
||||||
|
|
||||||
|
mmp_delay_update(spa, (zio->io_error == 0));
|
||||||
|
|
||||||
|
vd->vdev_mmp_pending = 0;
|
||||||
|
vd->vdev_mmp_kstat_id = 0;
|
||||||
|
|
||||||
|
mutex_exit(&mts->mmp_io_lock);
|
||||||
|
spa_config_exit(spa, SCL_STATE, mmp_tag);
|
||||||
|
|
||||||
|
abd_free(zio->io_abd);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* When the uberblock on-disk is updated by a spa_sync,
|
||||||
|
* creating a new "best" uberblock, update the one stored
|
||||||
|
* in the mmp thread state, used for mmp writes.
|
||||||
|
*/
|
||||||
|
void
|
||||||
|
mmp_update_uberblock(spa_t *spa, uberblock_t *ub)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
mutex_enter(&mmp->mmp_io_lock);
|
||||||
|
mmp->mmp_ub = *ub;
|
||||||
|
mmp->mmp_seq = 1;
|
||||||
|
mmp->mmp_ub.ub_timestamp = gethrestime_sec();
|
||||||
|
mmp_delay_update(spa, B_TRUE);
|
||||||
|
mutex_exit(&mmp->mmp_io_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Choose a random vdev, label, and MMP block, and write over it
|
||||||
|
* with a copy of the last-synced uberblock, whose timestamp
|
||||||
|
* has been updated to reflect that the pool is in use.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
mmp_write_uberblock(spa_t *spa)
|
||||||
|
{
|
||||||
|
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
uberblock_t *ub;
|
||||||
|
vdev_t *vd = NULL;
|
||||||
|
int label, error;
|
||||||
|
uint64_t offset;
|
||||||
|
|
||||||
|
hrtime_t lock_acquire_time = gethrtime();
|
||||||
|
spa_config_enter(spa, SCL_STATE, mmp_tag, RW_READER);
|
||||||
|
lock_acquire_time = gethrtime() - lock_acquire_time;
|
||||||
|
if (lock_acquire_time > (MSEC2NSEC(MMP_MIN_INTERVAL) / 10))
|
||||||
|
zfs_dbgmsg("MMP SCL_STATE acquisition pool '%s' took %llu ns "
|
||||||
|
"gethrtime %llu", spa_name(spa), lock_acquire_time,
|
||||||
|
gethrtime());
|
||||||
|
|
||||||
|
mutex_enter(&mmp->mmp_io_lock);
|
||||||
|
|
||||||
|
error = mmp_next_leaf(spa);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* spa_mmp_history has two types of entries:
|
||||||
|
* Issued MMP write: records time issued, error status, etc.
|
||||||
|
* Skipped MMP write: an MMP write could not be issued because no
|
||||||
|
* suitable leaf vdev was available. See comment above struct
|
||||||
|
* spa_mmp_history for details.
|
||||||
|
*/
|
||||||
|
|
||||||
|
if (error) {
|
||||||
|
mmp_delay_update(spa, B_FALSE);
|
||||||
|
if (mmp->mmp_skip_error == error) {
|
||||||
|
/*
|
||||||
|
* ZoL porting note: the following is TBD
|
||||||
|
* spa_mmp_history_set_skip(spa, mmp->mmp_kstat_id - 1);
|
||||||
|
*/
|
||||||
|
} else {
|
||||||
|
mmp->mmp_skip_error = error;
|
||||||
|
/*
|
||||||
|
* ZoL porting note: the following is TBD
|
||||||
|
* spa_mmp_history_add(spa, mmp->mmp_ub.ub_txg,
|
||||||
|
* gethrestime_sec(), mmp->mmp_delay, NULL, 0,
|
||||||
|
* mmp->mmp_kstat_id++, error);
|
||||||
|
*/
|
||||||
|
zfs_dbgmsg("MMP error choosing leaf pool '%s' "
|
||||||
|
"gethrtime %llu fail_mask %#x", spa_name(spa),
|
||||||
|
gethrtime(), error);
|
||||||
|
}
|
||||||
|
mutex_exit(&mmp->mmp_io_lock);
|
||||||
|
spa_config_exit(spa, SCL_STATE, mmp_tag);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
vd = spa->spa_mmp.mmp_last_leaf;
|
||||||
|
if (mmp->mmp_skip_error != 0) {
|
||||||
|
mmp->mmp_skip_error = 0;
|
||||||
|
zfs_dbgmsg("MMP write after skipping due to unavailable "
|
||||||
|
"leaves, pool '%s' gethrtime %llu leaf %#llu",
|
||||||
|
spa_name(spa), gethrtime(), vd->vdev_guid);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mmp->mmp_zio_root == NULL)
|
||||||
|
mmp->mmp_zio_root = zio_root(spa, NULL, NULL,
|
||||||
|
flags | ZIO_FLAG_GODFATHER);
|
||||||
|
|
||||||
|
if (mmp->mmp_ub.ub_timestamp != gethrestime_sec()) {
|
||||||
|
/*
|
||||||
|
* Want to reset mmp_seq when timestamp advances because after
|
||||||
|
* an mmp_seq wrap new values will not be chosen by
|
||||||
|
* uberblock_compare() as the "best".
|
||||||
|
*/
|
||||||
|
mmp->mmp_ub.ub_timestamp = gethrestime_sec();
|
||||||
|
mmp->mmp_seq = 1;
|
||||||
|
}
|
||||||
|
|
||||||
|
ub = &mmp->mmp_ub;
|
||||||
|
ub->ub_mmp_magic = MMP_MAGIC;
|
||||||
|
ub->ub_mmp_delay = mmp->mmp_delay;
|
||||||
|
ub->ub_mmp_config = MMP_SEQ_SET(mmp->mmp_seq) |
|
||||||
|
MMP_INTERVAL_SET(MMP_INTERVAL_OK(zfs_multihost_interval)) |
|
||||||
|
MMP_FAIL_INT_SET(MMP_FAIL_INTVS_OK(
|
||||||
|
zfs_multihost_fail_intervals));
|
||||||
|
vd->vdev_mmp_pending = gethrtime();
|
||||||
|
vd->vdev_mmp_kstat_id = mmp->mmp_kstat_id;
|
||||||
|
|
||||||
|
zio_t *zio = zio_null(mmp->mmp_zio_root, spa, NULL, NULL, NULL, flags);
|
||||||
|
abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
|
||||||
|
abd_zero(ub_abd, VDEV_UBERBLOCK_SIZE(vd));
|
||||||
|
abd_copy_from_buf(ub_abd, ub, sizeof (uberblock_t));
|
||||||
|
|
||||||
|
mmp->mmp_seq++;
|
||||||
|
mmp->mmp_kstat_id++;
|
||||||
|
mutex_exit(&mmp->mmp_io_lock);
|
||||||
|
|
||||||
|
offset = VDEV_UBERBLOCK_OFFSET(vd, VDEV_UBERBLOCK_COUNT(vd) -
|
||||||
|
MMP_BLOCKS_PER_LABEL + spa_get_random(MMP_BLOCKS_PER_LABEL));
|
||||||
|
|
||||||
|
label = spa_get_random(VDEV_LABELS);
|
||||||
|
vdev_label_write(zio, vd, label, ub_abd, offset,
|
||||||
|
VDEV_UBERBLOCK_SIZE(vd), mmp_write_done, mmp,
|
||||||
|
flags | ZIO_FLAG_DONT_PROPAGATE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* ZoL porting note: the following is TBD
|
||||||
|
* (void) spa_mmp_history_add(spa, ub->ub_txg, ub->ub_timestamp,
|
||||||
|
* ub->ub_mmp_delay, vd, label, vd->vdev_mmp_kstat_id, 0);
|
||||||
|
*/
|
||||||
|
|
||||||
|
zio_nowait(zio);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
mmp_thread(void *arg)
|
||||||
|
{
|
||||||
|
spa_t *spa = (spa_t *)arg;
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
boolean_t suspended = spa_suspended(spa);
|
||||||
|
boolean_t multihost = spa_multihost(spa);
|
||||||
|
uint64_t mmp_interval = MSEC2NSEC(MMP_INTERVAL_OK(
|
||||||
|
zfs_multihost_interval));
|
||||||
|
uint32_t mmp_fail_intervals = MMP_FAIL_INTVS_OK(
|
||||||
|
zfs_multihost_fail_intervals);
|
||||||
|
hrtime_t mmp_fail_ns = mmp_fail_intervals * mmp_interval;
|
||||||
|
boolean_t last_spa_suspended = suspended;
|
||||||
|
boolean_t last_spa_multihost = multihost;
|
||||||
|
uint64_t last_mmp_interval = mmp_interval;
|
||||||
|
uint32_t last_mmp_fail_intervals = mmp_fail_intervals;
|
||||||
|
hrtime_t last_mmp_fail_ns = mmp_fail_ns;
|
||||||
|
callb_cpr_t cpr;
|
||||||
|
int skip_wait = 0;
|
||||||
|
|
||||||
|
mmp_thread_enter(mmp, &cpr);
|
||||||
|
|
||||||
|
while (!mmp->mmp_thread_exiting) {
|
||||||
|
hrtime_t next_time = gethrtime() +
|
||||||
|
MSEC2NSEC(MMP_DEFAULT_INTERVAL);
|
||||||
|
int leaves = MAX(vdev_count_leaves(spa), 1);
|
||||||
|
|
||||||
|
/* Detect changes in tunables or state */
|
||||||
|
|
||||||
|
last_spa_suspended = suspended;
|
||||||
|
last_spa_multihost = multihost;
|
||||||
|
suspended = spa_suspended(spa);
|
||||||
|
multihost = spa_multihost(spa);
|
||||||
|
|
||||||
|
last_mmp_interval = mmp_interval;
|
||||||
|
last_mmp_fail_intervals = mmp_fail_intervals;
|
||||||
|
last_mmp_fail_ns = mmp_fail_ns;
|
||||||
|
mmp_interval = MSEC2NSEC(MMP_INTERVAL_OK(
|
||||||
|
zfs_multihost_interval));
|
||||||
|
mmp_fail_intervals = MMP_FAIL_INTVS_OK(
|
||||||
|
zfs_multihost_fail_intervals);
|
||||||
|
|
||||||
|
/* Smooth so pool is not suspended when reducing tunables */
|
||||||
|
if (mmp_fail_intervals * mmp_interval < mmp_fail_ns) {
|
||||||
|
mmp_fail_ns = (mmp_fail_ns * 31 +
|
||||||
|
mmp_fail_intervals * mmp_interval) / 32;
|
||||||
|
} else {
|
||||||
|
mmp_fail_ns = mmp_fail_intervals *
|
||||||
|
mmp_interval;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mmp_interval != last_mmp_interval ||
|
||||||
|
mmp_fail_intervals != last_mmp_fail_intervals) {
|
||||||
|
/*
|
||||||
|
* We want other hosts to see new tunables as quickly as
|
||||||
|
* possible. Write out at higher frequency than usual.
|
||||||
|
*/
|
||||||
|
skip_wait += leaves;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (multihost)
|
||||||
|
next_time = gethrtime() + mmp_interval / leaves;
|
||||||
|
|
||||||
|
if (mmp_fail_ns != last_mmp_fail_ns) {
|
||||||
|
zfs_dbgmsg("MMP interval change pool '%s' "
|
||||||
|
"gethrtime %llu last_mmp_interval %llu "
|
||||||
|
"mmp_interval %llu last_mmp_fail_intervals %u "
|
||||||
|
"mmp_fail_intervals %u mmp_fail_ns %llu "
|
||||||
|
"skip_wait %d leaves %d next_time %llu",
|
||||||
|
spa_name(spa), gethrtime(), last_mmp_interval,
|
||||||
|
mmp_interval, last_mmp_fail_intervals,
|
||||||
|
mmp_fail_intervals, mmp_fail_ns, skip_wait, leaves,
|
||||||
|
next_time);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMP off => on, or suspended => !suspended:
|
||||||
|
* No writes occurred recently. Update mmp_last_write to give
|
||||||
|
* us some time to try.
|
||||||
|
*/
|
||||||
|
if ((!last_spa_multihost && multihost) ||
|
||||||
|
(last_spa_suspended && !suspended)) {
|
||||||
|
zfs_dbgmsg("MMP state change pool '%s': gethrtime %llu "
|
||||||
|
"last_spa_multihost %u multihost %u "
|
||||||
|
"last_spa_suspended %u suspended %u",
|
||||||
|
spa_name(spa), last_spa_multihost, multihost,
|
||||||
|
last_spa_suspended, suspended);
|
||||||
|
mutex_enter(&mmp->mmp_io_lock);
|
||||||
|
mmp->mmp_last_write = gethrtime();
|
||||||
|
mmp->mmp_delay = mmp_interval;
|
||||||
|
mutex_exit(&mmp->mmp_io_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMP on => off:
|
||||||
|
* mmp_delay == 0 tells importing node to skip activity check.
|
||||||
|
*/
|
||||||
|
if (last_spa_multihost && !multihost) {
|
||||||
|
mutex_enter(&mmp->mmp_io_lock);
|
||||||
|
mmp->mmp_delay = 0;
|
||||||
|
mutex_exit(&mmp->mmp_io_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Suspend the pool if no MMP write has succeeded in over
|
||||||
|
* mmp_interval * mmp_fail_intervals nanoseconds.
|
||||||
|
*/
|
||||||
|
if (multihost && !suspended && mmp_fail_intervals &&
|
||||||
|
(gethrtime() - mmp->mmp_last_write) > mmp_fail_ns) {
|
||||||
|
zfs_dbgmsg("MMP suspending pool '%s': gethrtime %llu "
|
||||||
|
"mmp_last_write %llu mmp_interval %llu "
|
||||||
|
"mmp_fail_intervals %llu mmp_fail_ns %llu",
|
||||||
|
spa_name(spa), (u_longlong_t)gethrtime(),
|
||||||
|
(u_longlong_t)mmp->mmp_last_write,
|
||||||
|
(u_longlong_t)mmp_interval,
|
||||||
|
(u_longlong_t)mmp_fail_intervals,
|
||||||
|
(u_longlong_t)mmp_fail_ns);
|
||||||
|
cmn_err(CE_WARN, "MMP writes to pool '%s' have not "
|
||||||
|
"succeeded in over %llu ms; suspending pool. "
|
||||||
|
"Hrtime %llu",
|
||||||
|
spa_name(spa),
|
||||||
|
NSEC2MSEC(gethrtime() - mmp->mmp_last_write),
|
||||||
|
gethrtime());
|
||||||
|
zio_suspend(spa, NULL, ZIO_SUSPEND_MMP);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (multihost && !suspended)
|
||||||
|
mmp_write_uberblock(spa);
|
||||||
|
|
||||||
|
if (skip_wait > 0) {
|
||||||
|
next_time = gethrtime() + MSEC2NSEC(MMP_MIN_INTERVAL) /
|
||||||
|
leaves;
|
||||||
|
skip_wait--;
|
||||||
|
}
|
||||||
|
|
||||||
|
CALLB_CPR_SAFE_BEGIN(&cpr);
|
||||||
|
#if defined(illumos)
|
||||||
|
(void) cv_timedwait_sig_hrtime(&mmp->mmp_thread_cv,
|
||||||
|
&mmp->mmp_thread_lock, next_time);
|
||||||
|
#elif defined(_KERNEL)
|
||||||
|
(void) cv_timedwait_sig_sbt(&mmp->mmp_thread_cv,
|
||||||
|
&mmp->mmp_thread_lock, nstosbt(next_time),
|
||||||
|
100 * SBT_1US, C_ABSOLUTE);
|
||||||
|
#else
|
||||||
|
(void) cv_timedwait_sig_hires(&mmp->mmp_thread_cv,
|
||||||
|
&mmp->mmp_thread_lock, next_time, USEC2NSEC(100),
|
||||||
|
CALLOUT_FLAG_ABSOLUTE);
|
||||||
|
#endif
|
||||||
|
CALLB_CPR_SAFE_END(&cpr, &mmp->mmp_thread_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Outstanding writes are allowed to complete. */
|
||||||
|
if (mmp->mmp_zio_root)
|
||||||
|
zio_wait(mmp->mmp_zio_root);
|
||||||
|
|
||||||
|
mmp->mmp_zio_root = NULL;
|
||||||
|
mmp_thread_exit(mmp, &mmp->mmp_thread, &cpr);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Signal the MMP thread to wake it, when it is sleeping on
|
||||||
|
* its cv. Used when some module parameter has changed and
|
||||||
|
* we want the thread to know about it.
|
||||||
|
* Only signal if the pool is active and mmp thread is
|
||||||
|
* running, otherwise there is no thread to wake.
|
||||||
|
*/
|
||||||
|
static void
|
||||||
|
mmp_signal_thread(spa_t *spa)
|
||||||
|
{
|
||||||
|
mmp_thread_t *mmp = &spa->spa_mmp;
|
||||||
|
|
||||||
|
mutex_enter(&mmp->mmp_thread_lock);
|
||||||
|
if (mmp->mmp_thread)
|
||||||
|
cv_broadcast(&mmp->mmp_thread_cv);
|
||||||
|
mutex_exit(&mmp->mmp_thread_lock);
|
||||||
|
}
|
||||||
|
|
||||||
|
void
|
||||||
|
mmp_signal_all_threads(void)
|
||||||
|
{
|
||||||
|
spa_t *spa = NULL;
|
||||||
|
|
||||||
|
mutex_enter(&spa_namespace_lock);
|
||||||
|
while ((spa = spa_next(spa))) {
|
||||||
|
if (spa->spa_state == POOL_STATE_ACTIVE)
|
||||||
|
mmp_signal_thread(spa);
|
||||||
|
}
|
||||||
|
mutex_exit(&spa_namespace_lock);
|
||||||
|
}
|
@ -58,6 +58,7 @@
|
|||||||
#include <sys/vdev_initialize.h>
|
#include <sys/vdev_initialize.h>
|
||||||
#include <sys/metaslab.h>
|
#include <sys/metaslab.h>
|
||||||
#include <sys/metaslab_impl.h>
|
#include <sys/metaslab_impl.h>
|
||||||
|
#include <sys/mmp.h>
|
||||||
#include <sys/uberblock_impl.h>
|
#include <sys/uberblock_impl.h>
|
||||||
#include <sys/txg.h>
|
#include <sys/txg.h>
|
||||||
#include <sys/avl.h>
|
#include <sys/avl.h>
|
||||||
@ -584,6 +585,16 @@ spa_prop_validate(spa_t *spa, nvlist_t *props)
|
|||||||
error = SET_ERROR(EINVAL);
|
error = SET_ERROR(EINVAL);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case ZPOOL_PROP_MULTIHOST:
|
||||||
|
error = nvpair_value_uint64(elem, &intval);
|
||||||
|
if (!error && intval > 1)
|
||||||
|
error = SET_ERROR(EINVAL);
|
||||||
|
|
||||||
|
if (!error && !spa_get_hostid())
|
||||||
|
error = SET_ERROR(ENOTSUP);
|
||||||
|
|
||||||
|
break;
|
||||||
|
|
||||||
case ZPOOL_PROP_BOOTFS:
|
case ZPOOL_PROP_BOOTFS:
|
||||||
/*
|
/*
|
||||||
* If the pool version is less than SPA_VERSION_BOOTFS,
|
* If the pool version is less than SPA_VERSION_BOOTFS,
|
||||||
@ -1463,6 +1474,9 @@ spa_unload(spa_t *spa)
|
|||||||
spa_config_exit(spa, SCL_ALL, spa);
|
spa_config_exit(spa, SCL_ALL, spa);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spa->spa_mmp.mmp_thread)
|
||||||
|
mmp_thread_stop(spa);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for any outstanding async I/O to complete.
|
* Wait for any outstanding async I/O to complete.
|
||||||
*/
|
*/
|
||||||
@ -2428,6 +2442,278 @@ vdev_count_verify_zaps(vdev_t *vd)
|
|||||||
return (total);
|
return (total);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Determine whether the activity check is required.
|
||||||
|
*/
|
||||||
|
static boolean_t
|
||||||
|
spa_activity_check_required(spa_t *spa, uberblock_t *ub, nvlist_t *label,
|
||||||
|
nvlist_t *config)
|
||||||
|
{
|
||||||
|
uint64_t state = 0;
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
uint64_t tryconfig_txg = 0;
|
||||||
|
uint64_t tryconfig_timestamp = 0;
|
||||||
|
uint16_t tryconfig_mmp_seq = 0;
|
||||||
|
nvlist_t *nvinfo;
|
||||||
|
|
||||||
|
if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) {
|
||||||
|
nvinfo = fnvlist_lookup_nvlist(config, ZPOOL_CONFIG_LOAD_INFO);
|
||||||
|
(void) nvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG,
|
||||||
|
&tryconfig_txg);
|
||||||
|
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_TIMESTAMP,
|
||||||
|
&tryconfig_timestamp);
|
||||||
|
(void) nvlist_lookup_uint16(nvinfo, ZPOOL_CONFIG_MMP_SEQ,
|
||||||
|
&tryconfig_mmp_seq);
|
||||||
|
}
|
||||||
|
|
||||||
|
(void) nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_STATE, &state);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Disable the MMP activity check - This is used by zdb which
|
||||||
|
* is intended to be used on potentially active pools.
|
||||||
|
*/
|
||||||
|
if (spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP)
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip the activity check when the MMP feature is disabled.
|
||||||
|
*/
|
||||||
|
if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay == 0)
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the tryconfig_ values are nonzero, they are the results of an
|
||||||
|
* earlier tryimport. If they all match the uberblock we just found,
|
||||||
|
* then the pool has not changed and we return false so we do not test
|
||||||
|
* a second time.
|
||||||
|
*/
|
||||||
|
if (tryconfig_txg && tryconfig_txg == ub->ub_txg &&
|
||||||
|
tryconfig_timestamp && tryconfig_timestamp == ub->ub_timestamp &&
|
||||||
|
tryconfig_mmp_seq && tryconfig_mmp_seq ==
|
||||||
|
(MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0))
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Allow the activity check to be skipped when importing the pool
|
||||||
|
* on the same host which last imported it. Since the hostid from
|
||||||
|
* configuration may be stale use the one read from the label.
|
||||||
|
*/
|
||||||
|
if (nvlist_exists(label, ZPOOL_CONFIG_HOSTID))
|
||||||
|
hostid = fnvlist_lookup_uint64(label, ZPOOL_CONFIG_HOSTID);
|
||||||
|
|
||||||
|
if (hostid == spa_get_hostid())
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Skip the activity test when the pool was cleanly exported.
|
||||||
|
*/
|
||||||
|
if (state != POOL_STATE_ACTIVE)
|
||||||
|
return (B_FALSE);
|
||||||
|
|
||||||
|
return (B_TRUE);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Nanoseconds the activity check must watch for changes on-disk.
|
||||||
|
*/
|
||||||
|
static uint64_t
|
||||||
|
spa_activity_check_duration(spa_t *spa, uberblock_t *ub)
|
||||||
|
{
|
||||||
|
uint64_t import_intervals = MAX(zfs_multihost_import_intervals, 1);
|
||||||
|
uint64_t multihost_interval = MSEC2NSEC(
|
||||||
|
MMP_INTERVAL_OK(zfs_multihost_interval));
|
||||||
|
uint64_t import_delay = MAX(NANOSEC, import_intervals *
|
||||||
|
multihost_interval);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Local tunables determine a minimum duration except for the case
|
||||||
|
* where we know when the remote host will suspend the pool if MMP
|
||||||
|
* writes do not land.
|
||||||
|
*
|
||||||
|
* See Big Theory comment at the top of mmp.c for the reasoning behind
|
||||||
|
* these cases and times.
|
||||||
|
*/
|
||||||
|
|
||||||
|
ASSERT(MMP_IMPORT_SAFETY_FACTOR >= 100);
|
||||||
|
|
||||||
|
if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) &&
|
||||||
|
MMP_FAIL_INT(ub) > 0) {
|
||||||
|
|
||||||
|
/* MMP on remote host will suspend pool after failed writes */
|
||||||
|
import_delay = MMP_FAIL_INT(ub) * MSEC2NSEC(MMP_INTERVAL(ub)) *
|
||||||
|
MMP_IMPORT_SAFETY_FACTOR / 100;
|
||||||
|
|
||||||
|
zfs_dbgmsg("fail_intvals>0 import_delay=%llu ub_mmp "
|
||||||
|
"mmp_fails=%llu ub_mmp mmp_interval=%llu "
|
||||||
|
"import_intervals=%u", import_delay, MMP_FAIL_INT(ub),
|
||||||
|
MMP_INTERVAL(ub), import_intervals);
|
||||||
|
|
||||||
|
} else if (MMP_INTERVAL_VALID(ub) && MMP_FAIL_INT_VALID(ub) &&
|
||||||
|
MMP_FAIL_INT(ub) == 0) {
|
||||||
|
|
||||||
|
/* MMP on remote host will never suspend pool */
|
||||||
|
import_delay = MAX(import_delay, (MSEC2NSEC(MMP_INTERVAL(ub)) +
|
||||||
|
ub->ub_mmp_delay) * import_intervals);
|
||||||
|
|
||||||
|
zfs_dbgmsg("fail_intvals=0 import_delay=%llu ub_mmp "
|
||||||
|
"mmp_interval=%llu ub_mmp_delay=%llu "
|
||||||
|
"import_intervals=%u", import_delay, MMP_INTERVAL(ub),
|
||||||
|
ub->ub_mmp_delay, import_intervals);
|
||||||
|
|
||||||
|
} else if (MMP_VALID(ub)) {
|
||||||
|
/*
|
||||||
|
* zfs-0.7 compatability case
|
||||||
|
*/
|
||||||
|
|
||||||
|
import_delay = MAX(import_delay, (multihost_interval +
|
||||||
|
ub->ub_mmp_delay) * import_intervals);
|
||||||
|
|
||||||
|
zfs_dbgmsg("import_delay=%llu ub_mmp_delay=%llu "
|
||||||
|
"import_intervals=%u leaves=%u", import_delay,
|
||||||
|
ub->ub_mmp_delay, import_intervals,
|
||||||
|
vdev_count_leaves(spa));
|
||||||
|
} else {
|
||||||
|
/* Using local tunings is the only reasonable option */
|
||||||
|
zfs_dbgmsg("pool last imported on non-MMP aware "
|
||||||
|
"host using import_delay=%llu multihost_interval=%llu "
|
||||||
|
"import_intervals=%u", import_delay, multihost_interval,
|
||||||
|
import_intervals);
|
||||||
|
}
|
||||||
|
|
||||||
|
return (import_delay);
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* Perform the import activity check. If the user canceled the import or
|
||||||
|
* we detected activity then fail.
|
||||||
|
*/
|
||||||
|
static int
|
||||||
|
spa_activity_check(spa_t *spa, uberblock_t *ub, nvlist_t *config)
|
||||||
|
{
|
||||||
|
uint64_t txg = ub->ub_txg;
|
||||||
|
uint64_t timestamp = ub->ub_timestamp;
|
||||||
|
uint64_t mmp_config = ub->ub_mmp_config;
|
||||||
|
uint16_t mmp_seq = MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0;
|
||||||
|
uint64_t import_delay;
|
||||||
|
hrtime_t import_expire;
|
||||||
|
nvlist_t *mmp_label = NULL;
|
||||||
|
vdev_t *rvd = spa->spa_root_vdev;
|
||||||
|
kcondvar_t cv;
|
||||||
|
kmutex_t mtx;
|
||||||
|
int error = 0;
|
||||||
|
|
||||||
|
cv_init(&cv, NULL, CV_DEFAULT, NULL);
|
||||||
|
mutex_init(&mtx, NULL, MUTEX_DEFAULT, NULL);
|
||||||
|
mutex_enter(&mtx);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If ZPOOL_CONFIG_MMP_TXG is present an activity check was performed
|
||||||
|
* during the earlier tryimport. If the txg recorded there is 0 then
|
||||||
|
* the pool is known to be active on another host.
|
||||||
|
*
|
||||||
|
* Otherwise, the pool might be in use on another host. Check for
|
||||||
|
* changes in the uberblocks on disk if necessary.
|
||||||
|
*/
|
||||||
|
if (nvlist_exists(config, ZPOOL_CONFIG_LOAD_INFO)) {
|
||||||
|
nvlist_t *nvinfo = fnvlist_lookup_nvlist(config,
|
||||||
|
ZPOOL_CONFIG_LOAD_INFO);
|
||||||
|
|
||||||
|
if (nvlist_exists(nvinfo, ZPOOL_CONFIG_MMP_TXG) &&
|
||||||
|
fnvlist_lookup_uint64(nvinfo, ZPOOL_CONFIG_MMP_TXG) == 0) {
|
||||||
|
vdev_uberblock_load(rvd, ub, &mmp_label);
|
||||||
|
error = SET_ERROR(EREMOTEIO);
|
||||||
|
goto out;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
import_delay = spa_activity_check_duration(spa, ub);
|
||||||
|
|
||||||
|
/* Add a small random factor in case of simultaneous imports (0-25%) */
|
||||||
|
import_delay += import_delay * spa_get_random(250) / 1000;
|
||||||
|
|
||||||
|
import_expire = gethrtime() + import_delay;
|
||||||
|
|
||||||
|
while (gethrtime() < import_expire) {
|
||||||
|
vdev_uberblock_load(rvd, ub, &mmp_label);
|
||||||
|
|
||||||
|
if (txg != ub->ub_txg || timestamp != ub->ub_timestamp ||
|
||||||
|
mmp_seq != (MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0)) {
|
||||||
|
zfs_dbgmsg("multihost activity detected "
|
||||||
|
"txg %llu ub_txg %llu "
|
||||||
|
"timestamp %llu ub_timestamp %llu "
|
||||||
|
"mmp_config %#llx ub_mmp_config %#llx",
|
||||||
|
txg, ub->ub_txg, timestamp, ub->ub_timestamp,
|
||||||
|
mmp_config, ub->ub_mmp_config);
|
||||||
|
|
||||||
|
error = SET_ERROR(EREMOTEIO);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mmp_label) {
|
||||||
|
nvlist_free(mmp_label);
|
||||||
|
mmp_label = NULL;
|
||||||
|
}
|
||||||
|
error = cv_timedwait_sig(&cv, &mtx, hz);
|
||||||
|
#if defined(illumos) || !defined(_KERNEL)
|
||||||
|
if (error != -1) {
|
||||||
|
#else
|
||||||
|
if (error != EWOULDBLOCK) {
|
||||||
|
#endif
|
||||||
|
error = SET_ERROR(EINTR);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
error = 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
out:
|
||||||
|
mutex_exit(&mtx);
|
||||||
|
mutex_destroy(&mtx);
|
||||||
|
cv_destroy(&cv);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the pool is determined to be active store the status in the
|
||||||
|
* spa->spa_load_info nvlist. If the remote hostname or hostid are
|
||||||
|
* available from configuration read from disk store them as well.
|
||||||
|
* This allows 'zpool import' to generate a more useful message.
|
||||||
|
*
|
||||||
|
* ZPOOL_CONFIG_MMP_STATE - observed pool status (mandatory)
|
||||||
|
* ZPOOL_CONFIG_MMP_HOSTNAME - hostname from the active pool
|
||||||
|
* ZPOOL_CONFIG_MMP_HOSTID - hostid from the active pool
|
||||||
|
*/
|
||||||
|
if (error == EREMOTEIO) {
|
||||||
|
char *hostname = "<unknown>";
|
||||||
|
uint64_t hostid = 0;
|
||||||
|
|
||||||
|
if (mmp_label) {
|
||||||
|
if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTNAME)) {
|
||||||
|
hostname = fnvlist_lookup_string(mmp_label,
|
||||||
|
ZPOOL_CONFIG_HOSTNAME);
|
||||||
|
fnvlist_add_string(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTNAME, hostname);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (nvlist_exists(mmp_label, ZPOOL_CONFIG_HOSTID)) {
|
||||||
|
hostid = fnvlist_lookup_uint64(mmp_label,
|
||||||
|
ZPOOL_CONFIG_HOSTID);
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_HOSTID, hostid);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE, MMP_STATE_ACTIVE);
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_TXG, 0);
|
||||||
|
|
||||||
|
error = spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (mmp_label)
|
||||||
|
nvlist_free(mmp_label);
|
||||||
|
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
static int
|
static int
|
||||||
spa_verify_host(spa_t *spa, nvlist_t *mos_config)
|
spa_verify_host(spa_t *spa, nvlist_t *mos_config)
|
||||||
{
|
{
|
||||||
@ -2678,6 +2964,7 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
|
|||||||
vdev_t *rvd = spa->spa_root_vdev;
|
vdev_t *rvd = spa->spa_root_vdev;
|
||||||
nvlist_t *label;
|
nvlist_t *label;
|
||||||
uberblock_t *ub = &spa->spa_uberblock;
|
uberblock_t *ub = &spa->spa_uberblock;
|
||||||
|
boolean_t activity_check = B_FALSE;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If we are opening the checkpointed state of the pool by
|
* If we are opening the checkpointed state of the pool by
|
||||||
@ -2719,6 +3006,37 @@ spa_ld_select_uberblock(spa_t *spa, spa_import_type_t type)
|
|||||||
spa_load_note(spa, "using uberblock with txg=%llu",
|
spa_load_note(spa, "using uberblock with txg=%llu",
|
||||||
(u_longlong_t)ub->ub_txg);
|
(u_longlong_t)ub->ub_txg);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* For pools which have the multihost property on determine if the
|
||||||
|
* pool is truly inactive and can be safely imported. Prevent
|
||||||
|
* hosts which don't have a hostid set from importing the pool.
|
||||||
|
*/
|
||||||
|
activity_check = spa_activity_check_required(spa, ub, label,
|
||||||
|
spa->spa_config);
|
||||||
|
if (activity_check) {
|
||||||
|
if (ub->ub_mmp_magic == MMP_MAGIC && ub->ub_mmp_delay &&
|
||||||
|
spa_get_hostid() == 0) {
|
||||||
|
nvlist_free(label);
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID);
|
||||||
|
return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
|
||||||
|
}
|
||||||
|
|
||||||
|
int error = spa_activity_check(spa, ub, spa->spa_config);
|
||||||
|
if (error) {
|
||||||
|
nvlist_free(label);
|
||||||
|
return (error);
|
||||||
|
}
|
||||||
|
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE, MMP_STATE_INACTIVE);
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_TXG, ub->ub_txg);
|
||||||
|
fnvlist_add_uint16(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_SEQ,
|
||||||
|
(MMP_SEQ_VALID(ub) ? MMP_SEQ(ub) : 0));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the pool has an unsupported version we can't open it.
|
* If the pool has an unsupported version we can't open it.
|
||||||
*/
|
*/
|
||||||
@ -3274,6 +3592,7 @@ spa_ld_get_props(spa_t *spa)
|
|||||||
spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
|
spa_prop_find(spa, ZPOOL_PROP_DELEGATION, &spa->spa_delegation);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
|
spa_prop_find(spa, ZPOOL_PROP_FAILUREMODE, &spa->spa_failmode);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
|
spa_prop_find(spa, ZPOOL_PROP_AUTOEXPAND, &spa->spa_autoexpand);
|
||||||
|
spa_prop_find(spa, ZPOOL_PROP_MULTIHOST, &spa->spa_multihost);
|
||||||
spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
|
spa_prop_find(spa, ZPOOL_PROP_DEDUPDITTO,
|
||||||
&spa->spa_dedup_ditto);
|
&spa->spa_dedup_ditto);
|
||||||
|
|
||||||
@ -3361,6 +3680,18 @@ spa_ld_load_vdev_metadata(spa_t *spa)
|
|||||||
int error = 0;
|
int error = 0;
|
||||||
vdev_t *rvd = spa->spa_root_vdev;
|
vdev_t *rvd = spa->spa_root_vdev;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If the 'multihost' property is set, then never allow a pool to
|
||||||
|
* be imported when the system hostid is zero. The exception to
|
||||||
|
* this rule is zdb which is always allowed to access pools.
|
||||||
|
*/
|
||||||
|
if (spa_multihost(spa) && spa_get_hostid() == 0 &&
|
||||||
|
(spa->spa_import_flags & ZFS_IMPORT_SKIP_MMP) == 0) {
|
||||||
|
fnvlist_add_uint64(spa->spa_load_info,
|
||||||
|
ZPOOL_CONFIG_MMP_STATE, MMP_STATE_NO_HOSTID);
|
||||||
|
return (spa_vdev_err(rvd, VDEV_AUX_ACTIVE, EREMOTEIO));
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* If the 'autoreplace' property is set, then post a resource notifying
|
* If the 'autoreplace' property is set, then post a resource notifying
|
||||||
* the ZFS DE that it should not issue any faults for unopenable
|
* the ZFS DE that it should not issue any faults for unopenable
|
||||||
@ -3961,6 +4292,7 @@ spa_load_impl(spa_t *spa, spa_import_type_t type, char **ereport)
|
|||||||
*/
|
*/
|
||||||
spa->spa_sync_on = B_TRUE;
|
spa->spa_sync_on = B_TRUE;
|
||||||
txg_sync_start(spa->spa_dsl_pool);
|
txg_sync_start(spa->spa_dsl_pool);
|
||||||
|
mmp_thread_start(spa);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Wait for all claims to sync. We sync up to the highest
|
* Wait for all claims to sync. We sync up to the highest
|
||||||
@ -4540,10 +4872,14 @@ spa_get_stats(const char *name, nvlist_t **config,
|
|||||||
ZPOOL_CONFIG_ERRCOUNT,
|
ZPOOL_CONFIG_ERRCOUNT,
|
||||||
spa_get_errlog_size(spa)) == 0);
|
spa_get_errlog_size(spa)) == 0);
|
||||||
|
|
||||||
if (spa_suspended(spa))
|
if (spa_suspended(spa)) {
|
||||||
VERIFY(nvlist_add_uint64(*config,
|
VERIFY(nvlist_add_uint64(*config,
|
||||||
ZPOOL_CONFIG_SUSPENDED,
|
ZPOOL_CONFIG_SUSPENDED,
|
||||||
spa->spa_failmode) == 0);
|
spa->spa_failmode) == 0);
|
||||||
|
VERIFY(nvlist_add_uint64(*config,
|
||||||
|
ZPOOL_CONFIG_SUSPENDED_REASON,
|
||||||
|
spa->spa_suspended) == 0);
|
||||||
|
}
|
||||||
|
|
||||||
spa_add_spares(spa, *config);
|
spa_add_spares(spa, *config);
|
||||||
spa_add_l2cache(spa, *config);
|
spa_add_l2cache(spa, *config);
|
||||||
@ -4630,18 +4966,6 @@ spa_validate_aux_devs(spa_t *spa, nvlist_t *nvroot, uint64_t crtxg, int mode,
|
|||||||
goto out;
|
goto out;
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
* The L2ARC currently only supports disk devices in
|
|
||||||
* kernel context. For user-level testing, we allow it.
|
|
||||||
*/
|
|
||||||
#ifdef _KERNEL
|
|
||||||
if ((strcmp(config, ZPOOL_CONFIG_L2CACHE) == 0) &&
|
|
||||||
strcmp(vd->vdev_ops->vdev_op_type, VDEV_TYPE_DISK) != 0) {
|
|
||||||
error = SET_ERROR(ENOTBLK);
|
|
||||||
vdev_free(vd);
|
|
||||||
goto out;
|
|
||||||
}
|
|
||||||
#endif
|
|
||||||
vd->vdev_top = vd;
|
vd->vdev_top = vd;
|
||||||
|
|
||||||
if ((error = vdev_open(vd)) == 0 &&
|
if ((error = vdev_open(vd)) == 0 &&
|
||||||
@ -4988,6 +5312,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
|||||||
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
|
spa->spa_delegation = zpool_prop_default_numeric(ZPOOL_PROP_DELEGATION);
|
||||||
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
|
spa->spa_failmode = zpool_prop_default_numeric(ZPOOL_PROP_FAILUREMODE);
|
||||||
spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
|
spa->spa_autoexpand = zpool_prop_default_numeric(ZPOOL_PROP_AUTOEXPAND);
|
||||||
|
spa->spa_multihost = zpool_prop_default_numeric(ZPOOL_PROP_MULTIHOST);
|
||||||
|
|
||||||
if (props != NULL) {
|
if (props != NULL) {
|
||||||
spa_configfile_set(spa, props, B_FALSE);
|
spa_configfile_set(spa, props, B_FALSE);
|
||||||
@ -4998,6 +5323,7 @@ spa_create(const char *pool, nvlist_t *nvroot, nvlist_t *props,
|
|||||||
|
|
||||||
spa->spa_sync_on = B_TRUE;
|
spa->spa_sync_on = B_TRUE;
|
||||||
txg_sync_start(spa->spa_dsl_pool);
|
txg_sync_start(spa->spa_dsl_pool);
|
||||||
|
mmp_thread_start(spa);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* We explicitly wait for the first transaction to complete so that our
|
* We explicitly wait for the first transaction to complete so that our
|
||||||
@ -7844,6 +8170,9 @@ spa_sync_props(void *arg, dmu_tx_t *tx)
|
|||||||
spa_async_request(spa,
|
spa_async_request(spa,
|
||||||
SPA_ASYNC_AUTOEXPAND);
|
SPA_ASYNC_AUTOEXPAND);
|
||||||
break;
|
break;
|
||||||
|
case ZPOOL_PROP_MULTIHOST:
|
||||||
|
spa->spa_multihost = intval;
|
||||||
|
break;
|
||||||
case ZPOOL_PROP_DEDUPDITTO:
|
case ZPOOL_PROP_DEDUPDITTO:
|
||||||
spa->spa_dedup_ditto = intval;
|
spa->spa_dedup_ditto = intval;
|
||||||
break;
|
break;
|
||||||
@ -8252,7 +8581,7 @@ spa_sync(spa_t *spa, uint64_t txg)
|
|||||||
|
|
||||||
if (error == 0)
|
if (error == 0)
|
||||||
break;
|
break;
|
||||||
zio_suspend(spa, NULL);
|
zio_suspend(spa, NULL, ZIO_SUSPEND_IOERR);
|
||||||
zio_resume_wait(spa);
|
zio_resume_wait(spa);
|
||||||
}
|
}
|
||||||
dmu_tx_commit(tx);
|
dmu_tx_commit(tx);
|
||||||
|
@ -436,8 +436,7 @@ spa_config_generate(spa_t *spa, vdev_t *vd, uint64_t txg, int getstats)
|
|||||||
spa->spa_comment);
|
spa->spa_comment);
|
||||||
}
|
}
|
||||||
|
|
||||||
hostid = zone_get_hostid(NULL);
|
hostid = spa_get_hostid();
|
||||||
|
|
||||||
if (hostid != 0) {
|
if (hostid != 0) {
|
||||||
fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid);
|
fnvlist_add_uint64(config, ZPOOL_CONFIG_HOSTID, hostid);
|
||||||
}
|
}
|
||||||
|
@ -837,6 +837,9 @@ spa_add(const char *name, nvlist_t *config, const char *altroot)
|
|||||||
spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
|
spa->spa_feat_refcount_cache[i] = SPA_FEATURE_DISABLED;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
list_create(&spa->spa_leaf_list, sizeof (vdev_t),
|
||||||
|
offsetof(vdev_t, vdev_leaf_node));
|
||||||
|
|
||||||
return (spa);
|
return (spa);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -881,6 +884,7 @@ spa_remove(spa_t *spa)
|
|||||||
sizeof (avl_tree_t));
|
sizeof (avl_tree_t));
|
||||||
|
|
||||||
list_destroy(&spa->spa_config_list);
|
list_destroy(&spa->spa_config_list);
|
||||||
|
list_destroy(&spa->spa_leaf_list);
|
||||||
|
|
||||||
nvlist_free(spa->spa_label_features);
|
nvlist_free(spa->spa_label_features);
|
||||||
nvlist_free(spa->spa_load_info);
|
nvlist_free(spa->spa_load_info);
|
||||||
@ -1526,6 +1530,9 @@ spa_get_random(uint64_t range)
|
|||||||
|
|
||||||
ASSERT(range != 0);
|
ASSERT(range != 0);
|
||||||
|
|
||||||
|
if (range == 1)
|
||||||
|
return (0);
|
||||||
|
|
||||||
(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
|
(void) random_get_pseudo_bytes((void *)&r, sizeof (uint64_t));
|
||||||
|
|
||||||
return (r % range);
|
return (r % range);
|
||||||
@ -1855,7 +1862,7 @@ spa_get_failmode(spa_t *spa)
|
|||||||
boolean_t
|
boolean_t
|
||||||
spa_suspended(spa_t *spa)
|
spa_suspended(spa_t *spa)
|
||||||
{
|
{
|
||||||
return (spa->spa_suspended);
|
return (spa->spa_suspended != ZIO_SUSPEND_NONE);
|
||||||
}
|
}
|
||||||
|
|
||||||
uint64_t
|
uint64_t
|
||||||
@ -2263,6 +2270,30 @@ spa_maxdnodesize(spa_t *spa)
|
|||||||
return (DNODE_MIN_SIZE);
|
return (DNODE_MIN_SIZE);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
boolean_t
|
||||||
|
spa_multihost(spa_t *spa)
|
||||||
|
{
|
||||||
|
return (spa->spa_multihost ? B_TRUE : B_FALSE);
|
||||||
|
}
|
||||||
|
|
||||||
|
unsigned long
|
||||||
|
spa_get_hostid(void)
|
||||||
|
{
|
||||||
|
unsigned long myhostid;
|
||||||
|
|
||||||
|
#ifdef _KERNEL
|
||||||
|
myhostid = zone_get_hostid(NULL);
|
||||||
|
#else /* _KERNEL */
|
||||||
|
/*
|
||||||
|
* We're emulating the system's hostid in userland, so
|
||||||
|
* we can't use zone_get_hostid().
|
||||||
|
*/
|
||||||
|
(void) ddi_strtoul(hw_serial, NULL, 10, &myhostid);
|
||||||
|
#endif /* _KERNEL */
|
||||||
|
|
||||||
|
return (myhostid);
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Returns the txg that the last device removal completed. No indirect mappings
|
* Returns the txg that the last device removal completed. No indirect mappings
|
||||||
* have been added since this txg.
|
* have been added since this txg.
|
||||||
|
@ -39,6 +39,7 @@
|
|||||||
#include <sys/bptree.h>
|
#include <sys/bptree.h>
|
||||||
#include <sys/rrwlock.h>
|
#include <sys/rrwlock.h>
|
||||||
#include <sys/dsl_synctask.h>
|
#include <sys/dsl_synctask.h>
|
||||||
|
#include <sys/mmp.h>
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
extern "C" {
|
extern "C" {
|
||||||
|
74
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/mmp.h
Normal file
74
sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/mmp.h
Normal file
@ -0,0 +1,74 @@
|
|||||||
|
/*
|
||||||
|
* CDDL HEADER START
|
||||||
|
*
|
||||||
|
* This file and its contents are supplied under the terms of the
|
||||||
|
* Common Development and Distribution License ("CDDL"), version 1.0.
|
||||||
|
* You may only use this file in accordance with the terms of version
|
||||||
|
* 1.0 of the CDDL.
|
||||||
|
*
|
||||||
|
* A full copy of the text of the CDDL should have accompanied this
|
||||||
|
* source. A copy of the CDDL is also available via the Internet at
|
||||||
|
* http://www.illumos.org/license/CDDL.
|
||||||
|
*
|
||||||
|
* CDDL HEADER END
|
||||||
|
*/
|
||||||
|
/*
|
||||||
|
* Copyright (C) 2017 by Lawrence Livermore National Security, LLC.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef _SYS_MMP_H
|
||||||
|
#define _SYS_MMP_H
|
||||||
|
|
||||||
|
#include <sys/spa.h>
|
||||||
|
#include <sys/zfs_context.h>
|
||||||
|
#include <sys/uberblock_impl.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MMP_MIN_INTERVAL 100 /* ms */
|
||||||
|
#define MMP_DEFAULT_INTERVAL 1000 /* ms */
|
||||||
|
#define MMP_DEFAULT_IMPORT_INTERVALS 20
|
||||||
|
#define MMP_DEFAULT_FAIL_INTERVALS 10
|
||||||
|
#define MMP_MIN_FAIL_INTERVALS 2 /* min if != 0 */
|
||||||
|
#define MMP_IMPORT_SAFETY_FACTOR 200 /* pct */
|
||||||
|
#define MMP_INTERVAL_OK(interval) MAX(interval, MMP_MIN_INTERVAL)
|
||||||
|
#define MMP_FAIL_INTVS_OK(fails) (fails == 0 ? 0 : MAX(fails, \
|
||||||
|
MMP_MIN_FAIL_INTERVALS))
|
||||||
|
|
||||||
|
typedef struct mmp_thread {
|
||||||
|
kmutex_t mmp_thread_lock; /* protect thread mgmt fields */
|
||||||
|
kcondvar_t mmp_thread_cv;
|
||||||
|
kthread_t *mmp_thread;
|
||||||
|
uint8_t mmp_thread_exiting;
|
||||||
|
kmutex_t mmp_io_lock; /* protect below */
|
||||||
|
hrtime_t mmp_last_write; /* last successful MMP write */
|
||||||
|
uint64_t mmp_delay; /* decaying avg ns between MMP writes */
|
||||||
|
uberblock_t mmp_ub; /* last ub written by sync */
|
||||||
|
zio_t *mmp_zio_root; /* root of mmp write zios */
|
||||||
|
uint64_t mmp_kstat_id; /* unique id for next MMP write kstat */
|
||||||
|
int mmp_skip_error; /* reason for last skipped write */
|
||||||
|
vdev_t *mmp_last_leaf; /* last mmp write sent here */
|
||||||
|
uint64_t mmp_leaf_last_gen; /* last mmp write sent here */
|
||||||
|
uint32_t mmp_seq; /* intra-second update counter */
|
||||||
|
} mmp_thread_t;
|
||||||
|
|
||||||
|
|
||||||
|
extern void mmp_init(struct spa *spa);
|
||||||
|
extern void mmp_fini(struct spa *spa);
|
||||||
|
extern void mmp_thread_start(struct spa *spa);
|
||||||
|
extern void mmp_thread_stop(struct spa *spa);
|
||||||
|
extern void mmp_update_uberblock(struct spa *spa, struct uberblock *ub);
|
||||||
|
extern void mmp_signal_all_threads(void);
|
||||||
|
|
||||||
|
/* Global tuning */
|
||||||
|
extern ulong_t zfs_multihost_interval;
|
||||||
|
extern uint_t zfs_multihost_fail_intervals;
|
||||||
|
extern uint_t zfs_multihost_import_intervals;
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif /* _SYS_MMP_H */
|
@ -865,6 +865,8 @@ extern boolean_t spa_writeable(spa_t *spa);
|
|||||||
extern boolean_t spa_has_pending_synctask(spa_t *spa);
|
extern boolean_t spa_has_pending_synctask(spa_t *spa);
|
||||||
extern int spa_maxblocksize(spa_t *spa);
|
extern int spa_maxblocksize(spa_t *spa);
|
||||||
extern int spa_maxdnodesize(spa_t *spa);
|
extern int spa_maxdnodesize(spa_t *spa);
|
||||||
|
extern boolean_t spa_multihost(spa_t *spa);
|
||||||
|
extern unsigned long spa_get_hostid(void);
|
||||||
extern boolean_t spa_has_checkpoint(spa_t *spa);
|
extern boolean_t spa_has_checkpoint(spa_t *spa);
|
||||||
extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
|
extern boolean_t spa_importing_readonly_checkpoint(spa_t *spa);
|
||||||
extern boolean_t spa_suspend_async_destroy(spa_t *spa);
|
extern boolean_t spa_suspend_async_destroy(spa_t *spa);
|
||||||
|
@ -330,7 +330,7 @@ struct spa {
|
|||||||
zio_t *spa_txg_zio[TXG_SIZE]; /* spa_sync() waits for this */
|
zio_t *spa_txg_zio[TXG_SIZE]; /* spa_sync() waits for this */
|
||||||
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
kmutex_t spa_suspend_lock; /* protects suspend_zio_root */
|
||||||
kcondvar_t spa_suspend_cv; /* notification of resume */
|
kcondvar_t spa_suspend_cv; /* notification of resume */
|
||||||
uint8_t spa_suspended; /* pool is suspended */
|
zio_suspend_reason_t spa_suspended; /* pool is suspended */
|
||||||
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
uint8_t spa_claiming; /* pool is doing zil_claim() */
|
||||||
boolean_t spa_is_root; /* pool is root */
|
boolean_t spa_is_root; /* pool is root */
|
||||||
int spa_minref; /* num refs when first opened */
|
int spa_minref; /* num refs when first opened */
|
||||||
@ -396,6 +396,11 @@ struct spa {
|
|||||||
|
|
||||||
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
|
hrtime_t spa_ccw_fail_time; /* Conf cache write fail time */
|
||||||
|
|
||||||
|
uint64_t spa_multihost; /* multihost aware (mmp) */
|
||||||
|
mmp_thread_t spa_mmp; /* multihost mmp thread */
|
||||||
|
list_t spa_leaf_list; /* list of leaf vdevs */
|
||||||
|
uint64_t spa_leaf_list_gen; /* track leaf_list changes */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* spa_refcount & spa_config_lock must be the last elements
|
* spa_refcount & spa_config_lock must be the last elements
|
||||||
* because refcount_t changes size based on compilation options.
|
* because refcount_t changes size based on compilation options.
|
||||||
|
@ -40,7 +40,8 @@ extern "C" {
|
|||||||
typedef struct uberblock uberblock_t;
|
typedef struct uberblock uberblock_t;
|
||||||
|
|
||||||
extern int uberblock_verify(uberblock_t *);
|
extern int uberblock_verify(uberblock_t *);
|
||||||
extern boolean_t uberblock_update(uberblock_t *, vdev_t *, uint64_t);
|
extern boolean_t uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg,
|
||||||
|
uint64_t mmp_delay);
|
||||||
|
|
||||||
#ifdef __cplusplus
|
#ifdef __cplusplus
|
||||||
}
|
}
|
||||||
|
@ -44,6 +44,36 @@ extern "C" {
|
|||||||
*/
|
*/
|
||||||
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
#define UBERBLOCK_MAGIC 0x00bab10c /* oo-ba-bloc! */
|
||||||
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
#define UBERBLOCK_SHIFT 10 /* up to 1K */
|
||||||
|
#define MMP_MAGIC 0xa11cea11 /* all-see-all */
|
||||||
|
|
||||||
|
#define MMP_INTERVAL_VALID_BIT 0x01
|
||||||
|
#define MMP_SEQ_VALID_BIT 0x02
|
||||||
|
#define MMP_FAIL_INT_VALID_BIT 0x04
|
||||||
|
|
||||||
|
#define MMP_VALID(ubp) (ubp->ub_magic == UBERBLOCK_MAGIC && \
|
||||||
|
ubp->ub_mmp_magic == MMP_MAGIC)
|
||||||
|
#define MMP_INTERVAL_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_INTERVAL_VALID_BIT))
|
||||||
|
#define MMP_SEQ_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_SEQ_VALID_BIT))
|
||||||
|
#define MMP_FAIL_INT_VALID(ubp) (MMP_VALID(ubp) && (ubp->ub_mmp_config & \
|
||||||
|
MMP_FAIL_INT_VALID_BIT))
|
||||||
|
|
||||||
|
#define MMP_INTERVAL(ubp) ((ubp->ub_mmp_config & 0x00000000FFFFFF00) \
|
||||||
|
>> 8)
|
||||||
|
#define MMP_SEQ(ubp) ((ubp->ub_mmp_config & 0x0000FFFF00000000) \
|
||||||
|
>> 32)
|
||||||
|
#define MMP_FAIL_INT(ubp) ((ubp->ub_mmp_config & 0xFFFF000000000000) \
|
||||||
|
>> 48)
|
||||||
|
|
||||||
|
#define MMP_INTERVAL_SET(write) \
|
||||||
|
(((uint64_t)(write & 0xFFFFFF) << 8) | MMP_INTERVAL_VALID_BIT)
|
||||||
|
|
||||||
|
#define MMP_SEQ_SET(seq) \
|
||||||
|
(((uint64_t)(seq & 0xFFFF) << 32) | MMP_SEQ_VALID_BIT)
|
||||||
|
|
||||||
|
#define MMP_FAIL_INT_SET(fail) \
|
||||||
|
(((uint64_t)(fail & 0xFFFF) << 48) | MMP_FAIL_INT_VALID_BIT)
|
||||||
|
|
||||||
struct uberblock {
|
struct uberblock {
|
||||||
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
uint64_t ub_magic; /* UBERBLOCK_MAGIC */
|
||||||
@ -56,10 +86,33 @@ struct uberblock {
|
|||||||
/* highest SPA_VERSION supported by software that wrote this txg */
|
/* highest SPA_VERSION supported by software that wrote this txg */
|
||||||
uint64_t ub_software_version;
|
uint64_t ub_software_version;
|
||||||
|
|
||||||
/* These fields are reserved for features that are under development: */
|
/* Maybe missing in uberblocks we read, but always written */
|
||||||
uint64_t ub_mmp_magic;
|
uint64_t ub_mmp_magic;
|
||||||
|
/*
|
||||||
|
* If ub_mmp_delay == 0 and ub_mmp_magic is valid, MMP is off.
|
||||||
|
* Otherwise, nanosec since last MMP write.
|
||||||
|
*/
|
||||||
uint64_t ub_mmp_delay;
|
uint64_t ub_mmp_delay;
|
||||||
uint64_t ub_mmp_seq;
|
|
||||||
|
/*
|
||||||
|
* The ub_mmp_config contains the multihost write interval, multihost
|
||||||
|
* fail intervals, sequence number for sub-second granularity, and
|
||||||
|
* valid bit mask. This layout is as follows:
|
||||||
|
*
|
||||||
|
* 64 56 48 40 32 24 16 8 0
|
||||||
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||||
|
* 0 | Fail Intervals| Seq | Write Interval (ms) | VALID |
|
||||||
|
* +-------+-------+-------+-------+-------+-------+-------+-------+
|
||||||
|
*
|
||||||
|
* This allows a write_interval of (2^24/1000)s, over 4.5 hours
|
||||||
|
*
|
||||||
|
* VALID Bits:
|
||||||
|
* - 0x01 - Write Interval (ms)
|
||||||
|
* - 0x02 - Sequence number exists
|
||||||
|
* - 0x04 - Fail Intervals
|
||||||
|
* - 0xf8 - Reserved
|
||||||
|
*/
|
||||||
|
uint64_t ub_mmp_config;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* ub_checkpoint_txg indicates two things about the current uberblock:
|
* ub_checkpoint_txg indicates two things about the current uberblock:
|
||||||
|
@ -168,6 +168,8 @@ extern uint64_t vdev_label_offset(uint64_t psize, int l, uint64_t offset);
|
|||||||
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
extern int vdev_label_number(uint64_t psise, uint64_t offset);
|
||||||
extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
|
extern nvlist_t *vdev_label_read_config(vdev_t *vd, uint64_t txg);
|
||||||
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
|
extern void vdev_uberblock_load(vdev_t *, struct uberblock *, nvlist_t **);
|
||||||
|
extern void vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t
|
||||||
|
offset, uint64_t size, zio_done_func_t *done, void *priv, int flags);
|
||||||
|
|
||||||
typedef enum {
|
typedef enum {
|
||||||
VDEV_LABEL_CREATE, /* create/add a new device */
|
VDEV_LABEL_CREATE, /* create/add a new device */
|
||||||
|
@ -374,6 +374,9 @@ struct vdev {
|
|||||||
vdev_aux_t vdev_label_aux; /* on-disk aux state */
|
vdev_aux_t vdev_label_aux; /* on-disk aux state */
|
||||||
struct trim_map *vdev_trimmap; /* map on outstanding trims */
|
struct trim_map *vdev_trimmap; /* map on outstanding trims */
|
||||||
uint64_t vdev_leaf_zap;
|
uint64_t vdev_leaf_zap;
|
||||||
|
hrtime_t vdev_mmp_pending; /* 0 if write finished */
|
||||||
|
uint64_t vdev_mmp_kstat_id; /* to find kstat entry */
|
||||||
|
list_node_t vdev_leaf_node; /* leaf vdev list */
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* For DTrace to work in userland (libzpool) context, these fields must
|
* For DTrace to work in userland (libzpool) context, these fields must
|
||||||
@ -395,6 +398,12 @@ struct vdev {
|
|||||||
#define VDEV_PHYS_SIZE (112 << 10)
|
#define VDEV_PHYS_SIZE (112 << 10)
|
||||||
#define VDEV_UBERBLOCK_RING (128 << 10)
|
#define VDEV_UBERBLOCK_RING (128 << 10)
|
||||||
|
|
||||||
|
/*
|
||||||
|
* MMP blocks occupy the last MMP_BLOCKS_PER_LABEL slots in the uberblock
|
||||||
|
* ring when MMP is enabled.
|
||||||
|
*/
|
||||||
|
#define MMP_BLOCKS_PER_LABEL 1
|
||||||
|
|
||||||
/* The largest uberblock we support is 8k. */
|
/* The largest uberblock we support is 8k. */
|
||||||
#define MAX_UBERBLOCK_SHIFT (13)
|
#define MAX_UBERBLOCK_SHIFT (13)
|
||||||
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
#define VDEV_UBERBLOCK_SHIFT(vd) \
|
||||||
|
@ -131,6 +131,12 @@ enum zio_checksum {
|
|||||||
#define ZIO_FAILURE_MODE_CONTINUE 1
|
#define ZIO_FAILURE_MODE_CONTINUE 1
|
||||||
#define ZIO_FAILURE_MODE_PANIC 2
|
#define ZIO_FAILURE_MODE_PANIC 2
|
||||||
|
|
||||||
|
typedef enum zio_suspend_reason {
|
||||||
|
ZIO_SUSPEND_NONE = 0,
|
||||||
|
ZIO_SUSPEND_IOERR,
|
||||||
|
ZIO_SUSPEND_MMP,
|
||||||
|
} zio_suspend_reason_t;
|
||||||
|
|
||||||
enum zio_flag {
|
enum zio_flag {
|
||||||
/*
|
/*
|
||||||
* Flags inherited by gang, ddt, and vdev children,
|
* Flags inherited by gang, ddt, and vdev children,
|
||||||
@ -606,7 +612,7 @@ extern enum zio_checksum zio_checksum_dedup_select(spa_t *spa,
|
|||||||
extern enum zio_compress zio_compress_select(spa_t *spa,
|
extern enum zio_compress zio_compress_select(spa_t *spa,
|
||||||
enum zio_compress child, enum zio_compress parent);
|
enum zio_compress child, enum zio_compress parent);
|
||||||
|
|
||||||
extern void zio_suspend(spa_t *spa, zio_t *zio);
|
extern void zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t);
|
||||||
extern int zio_resume(spa_t *spa);
|
extern int zio_resume(spa_t *spa);
|
||||||
extern void zio_resume_wait(spa_t *spa);
|
extern void zio_resume_wait(spa_t *spa);
|
||||||
|
|
||||||
|
@ -26,6 +26,7 @@
|
|||||||
#include <sys/zfs_context.h>
|
#include <sys/zfs_context.h>
|
||||||
#include <sys/uberblock_impl.h>
|
#include <sys/uberblock_impl.h>
|
||||||
#include <sys/vdev_impl.h>
|
#include <sys/vdev_impl.h>
|
||||||
|
#include <sys/mmp.h>
|
||||||
|
|
||||||
int
|
int
|
||||||
uberblock_verify(uberblock_t *ub)
|
uberblock_verify(uberblock_t *ub)
|
||||||
@ -44,7 +45,7 @@ uberblock_verify(uberblock_t *ub)
|
|||||||
* transaction group.
|
* transaction group.
|
||||||
*/
|
*/
|
||||||
boolean_t
|
boolean_t
|
||||||
uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
|
uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg, uint64_t mmp_delay)
|
||||||
{
|
{
|
||||||
ASSERT(ub->ub_txg < txg);
|
ASSERT(ub->ub_txg < txg);
|
||||||
|
|
||||||
@ -57,6 +58,16 @@ uberblock_update(uberblock_t *ub, vdev_t *rvd, uint64_t txg)
|
|||||||
ub->ub_guid_sum = rvd->vdev_guid_sum;
|
ub->ub_guid_sum = rvd->vdev_guid_sum;
|
||||||
ub->ub_timestamp = gethrestime_sec();
|
ub->ub_timestamp = gethrestime_sec();
|
||||||
ub->ub_software_version = SPA_VERSION;
|
ub->ub_software_version = SPA_VERSION;
|
||||||
|
ub->ub_mmp_magic = MMP_MAGIC;
|
||||||
|
if (spa_multihost(rvd->vdev_spa)) {
|
||||||
|
ub->ub_mmp_delay = mmp_delay;
|
||||||
|
ub->ub_mmp_config = MMP_SEQ_SET(0) |
|
||||||
|
MMP_INTERVAL_SET(zfs_multihost_interval) |
|
||||||
|
MMP_FAIL_INT_SET(zfs_multihost_fail_intervals);
|
||||||
|
} else {
|
||||||
|
ub->ub_mmp_delay = 0;
|
||||||
|
ub->ub_mmp_config = 0;
|
||||||
|
}
|
||||||
ub->ub_checkpoint_txg = 0;
|
ub->ub_checkpoint_txg = 0;
|
||||||
|
|
||||||
return (ub->ub_rootbp.blk_birth == txg);
|
return (ub->ub_rootbp.blk_birth == txg);
|
||||||
|
@ -475,6 +475,11 @@ vdev_add_child(vdev_t *pvd, vdev_t *cvd)
|
|||||||
*/
|
*/
|
||||||
for (; pvd != NULL; pvd = pvd->vdev_parent)
|
for (; pvd != NULL; pvd = pvd->vdev_parent)
|
||||||
pvd->vdev_guid_sum += cvd->vdev_guid_sum;
|
pvd->vdev_guid_sum += cvd->vdev_guid_sum;
|
||||||
|
|
||||||
|
if (cvd->vdev_ops->vdev_op_leaf) {
|
||||||
|
list_insert_head(&cvd->vdev_spa->spa_leaf_list, cvd);
|
||||||
|
cvd->vdev_spa->spa_leaf_list_gen++;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
@ -504,6 +509,12 @@ vdev_remove_child(vdev_t *pvd, vdev_t *cvd)
|
|||||||
pvd->vdev_children = 0;
|
pvd->vdev_children = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (cvd->vdev_ops->vdev_op_leaf) {
|
||||||
|
spa_t *spa = cvd->vdev_spa;
|
||||||
|
list_remove(&spa->spa_leaf_list, cvd);
|
||||||
|
spa->spa_leaf_list_gen++;
|
||||||
|
}
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Walk up all ancestors to update guid sum.
|
* Walk up all ancestors to update guid sum.
|
||||||
*/
|
*/
|
||||||
@ -595,6 +606,7 @@ vdev_alloc_common(spa_t *spa, uint_t id, uint64_t guid, vdev_ops_t *ops)
|
|||||||
mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_obsolete_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
vd->vdev_obsolete_segments = range_tree_create(NULL, NULL);
|
vd->vdev_obsolete_segments = range_tree_create(NULL, NULL);
|
||||||
|
|
||||||
|
list_link_init(&vd->vdev_leaf_node);
|
||||||
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_dtl_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_stat_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
mutex_init(&vd->vdev_probe_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||||
@ -928,6 +940,7 @@ vdev_free(vdev_t *vd)
|
|||||||
vdev_remove_child(vd->vdev_parent, vd);
|
vdev_remove_child(vd->vdev_parent, vd);
|
||||||
|
|
||||||
ASSERT(vd->vdev_parent == NULL);
|
ASSERT(vd->vdev_parent == NULL);
|
||||||
|
ASSERT(!list_link_active(&vd->vdev_leaf_node));
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Clean up vdev structure.
|
* Clean up vdev structure.
|
||||||
|
@ -22,6 +22,7 @@
|
|||||||
/*
|
/*
|
||||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||||
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
* Copyright (c) 2012, 2018 by Delphix. All rights reserved.
|
||||||
|
* Copyright 2019 Joyent, Inc.
|
||||||
*/
|
*/
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -189,8 +190,9 @@ static void
|
|||||||
vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
|
vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
|
||||||
uint64_t size, zio_done_func_t *done, void *private, int flags)
|
uint64_t size, zio_done_func_t *done, void *private, int flags)
|
||||||
{
|
{
|
||||||
ASSERT(spa_config_held(zio->io_spa, SCL_STATE_ALL, RW_WRITER) ==
|
ASSERT(
|
||||||
SCL_STATE_ALL);
|
spa_config_held(zio->io_spa, SCL_STATE, RW_READER) == SCL_STATE ||
|
||||||
|
spa_config_held(zio->io_spa, SCL_STATE, RW_WRITER) == SCL_STATE);
|
||||||
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
|
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
|
||||||
|
|
||||||
zio_nowait(zio_read_phys(zio, vd,
|
zio_nowait(zio_read_phys(zio, vd,
|
||||||
@ -199,14 +201,13 @@ vdev_label_read(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
|
|||||||
ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
|
ZIO_PRIORITY_SYNC_READ, flags, B_TRUE));
|
||||||
}
|
}
|
||||||
|
|
||||||
static void
|
void
|
||||||
vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
|
vdev_label_write(zio_t *zio, vdev_t *vd, int l, abd_t *buf, uint64_t offset,
|
||||||
uint64_t size, zio_done_func_t *done, void *private, int flags)
|
uint64_t size, zio_done_func_t *done, void *private, int flags)
|
||||||
{
|
{
|
||||||
ASSERT(spa_config_held(zio->io_spa, SCL_ALL, RW_WRITER) == SCL_ALL ||
|
ASSERT(
|
||||||
(spa_config_held(zio->io_spa, SCL_CONFIG | SCL_STATE, RW_READER) ==
|
spa_config_held(zio->io_spa, SCL_STATE, RW_READER) == SCL_STATE ||
|
||||||
(SCL_CONFIG | SCL_STATE) &&
|
spa_config_held(zio->io_spa, SCL_STATE, RW_WRITER) == SCL_STATE);
|
||||||
dsl_pool_sync_context(spa_get_dsl(zio->io_spa))));
|
|
||||||
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
|
ASSERT(flags & ZIO_FLAG_CONFIG_WRITER);
|
||||||
|
|
||||||
zio_nowait(zio_write_phys(zio, vd,
|
zio_nowait(zio_write_phys(zio, vd,
|
||||||
@ -1050,10 +1051,35 @@ static int
|
|||||||
vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
|
vdev_uberblock_compare(const uberblock_t *ub1, const uberblock_t *ub2)
|
||||||
{
|
{
|
||||||
int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
|
int cmp = AVL_CMP(ub1->ub_txg, ub2->ub_txg);
|
||||||
|
|
||||||
if (likely(cmp))
|
if (likely(cmp))
|
||||||
return (cmp);
|
return (cmp);
|
||||||
|
|
||||||
return (AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp));
|
cmp = AVL_CMP(ub1->ub_timestamp, ub2->ub_timestamp);
|
||||||
|
if (likely(cmp))
|
||||||
|
return (cmp);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If MMP_VALID(ub) && MMP_SEQ_VALID(ub) then the host has an MMP-aware
|
||||||
|
* ZFS, e.g. zfsonlinux >= 0.7.
|
||||||
|
*
|
||||||
|
* If one ub has MMP and the other does not, they were written by
|
||||||
|
* different hosts, which matters for MMP. So we treat no MMP/no SEQ as
|
||||||
|
* a 0 value.
|
||||||
|
*
|
||||||
|
* Since timestamp and txg are the same if we get this far, either is
|
||||||
|
* acceptable for importing the pool.
|
||||||
|
*/
|
||||||
|
unsigned int seq1 = 0;
|
||||||
|
unsigned int seq2 = 0;
|
||||||
|
|
||||||
|
if (MMP_VALID(ub1) && MMP_SEQ_VALID(ub1))
|
||||||
|
seq1 = MMP_SEQ(ub1);
|
||||||
|
|
||||||
|
if (MMP_VALID(ub2) && MMP_SEQ_VALID(ub2))
|
||||||
|
seq2 = MMP_SEQ(ub2);
|
||||||
|
|
||||||
|
return (AVL_CMP(seq1, seq2));
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ubl_cbdata {
|
struct ubl_cbdata {
|
||||||
@ -1194,7 +1220,8 @@ vdev_uberblock_sync(zio_t *zio, uint64_t *good_writes,
|
|||||||
if (!vdev_writeable(vd))
|
if (!vdev_writeable(vd))
|
||||||
return;
|
return;
|
||||||
|
|
||||||
int n = ub->ub_txg & (VDEV_UBERBLOCK_COUNT(vd) - 1);
|
int m = spa_multihost(vd->vdev_spa) ? MMP_BLOCKS_PER_LABEL : 0;
|
||||||
|
int n = ub->ub_txg % (VDEV_UBERBLOCK_COUNT(vd) - m);
|
||||||
|
|
||||||
/* Copy the uberblock_t into the ABD */
|
/* Copy the uberblock_t into the ABD */
|
||||||
abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
|
abd_t *ub_abd = abd_alloc_for_io(VDEV_UBERBLOCK_SIZE(vd), B_TRUE);
|
||||||
@ -1412,10 +1439,13 @@ retry:
|
|||||||
* and the vdev configuration hasn't changed,
|
* and the vdev configuration hasn't changed,
|
||||||
* then there's nothing to do.
|
* then there's nothing to do.
|
||||||
*/
|
*/
|
||||||
if (ub->ub_txg < txg &&
|
if (ub->ub_txg < txg) {
|
||||||
uberblock_update(ub, spa->spa_root_vdev, txg) == B_FALSE &&
|
boolean_t changed = uberblock_update(ub, spa->spa_root_vdev,
|
||||||
list_is_empty(&spa->spa_config_dirty_list))
|
txg, spa->spa_mmp.mmp_delay);
|
||||||
|
|
||||||
|
if (!changed && list_is_empty(&spa->spa_config_dirty_list))
|
||||||
return (0);
|
return (0);
|
||||||
|
}
|
||||||
|
|
||||||
if (txg > spa_freeze_txg(spa))
|
if (txg > spa_freeze_txg(spa))
|
||||||
return (0);
|
return (0);
|
||||||
@ -1478,6 +1508,9 @@ retry:
|
|||||||
goto retry;
|
goto retry;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (spa_multihost(spa))
|
||||||
|
mmp_update_uberblock(spa, ub);
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Sync out odd labels for every dirty vdev. If the system dies
|
* Sync out odd labels for every dirty vdev. If the system dies
|
||||||
* in the middle of this process, the even labels and the new
|
* in the middle of this process, the even labels and the new
|
||||||
|
@ -5134,6 +5134,13 @@ zfs_ioc_clear(zfs_cmd_t *zc)
|
|||||||
if (error != 0)
|
if (error != 0)
|
||||||
return (error);
|
return (error);
|
||||||
|
|
||||||
|
/*
|
||||||
|
* If multihost is enabled, resuming I/O is unsafe as another
|
||||||
|
* host may have imported the pool.
|
||||||
|
*/
|
||||||
|
if (spa_multihost(spa) && spa_suspended(spa))
|
||||||
|
return (SET_ERROR(EINVAL));
|
||||||
|
|
||||||
spa_vdev_state_enter(spa, SCL_NONE);
|
spa_vdev_state_enter(spa, SCL_NONE);
|
||||||
|
|
||||||
if (zc->zc_guid == 0) {
|
if (zc->zc_guid == 0) {
|
||||||
|
@ -1904,7 +1904,7 @@ zio_reexecute(zio_t *pio)
|
|||||||
}
|
}
|
||||||
|
|
||||||
void
|
void
|
||||||
zio_suspend(spa_t *spa, zio_t *zio)
|
zio_suspend(spa_t *spa, zio_t *zio, zio_suspend_reason_t reason)
|
||||||
{
|
{
|
||||||
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
|
if (spa_get_failmode(spa) == ZIO_FAILURE_MODE_PANIC)
|
||||||
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
|
fm_panic("Pool '%s' has encountered an uncorrectable I/O "
|
||||||
@ -1920,7 +1920,7 @@ zio_suspend(spa_t *spa, zio_t *zio)
|
|||||||
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE |
|
||||||
ZIO_FLAG_GODFATHER);
|
ZIO_FLAG_GODFATHER);
|
||||||
|
|
||||||
spa->spa_suspended = B_TRUE;
|
spa->spa_suspended = reason;
|
||||||
|
|
||||||
if (zio != NULL) {
|
if (zio != NULL) {
|
||||||
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
|
ASSERT(!(zio->io_flags & ZIO_FLAG_GODFATHER));
|
||||||
@ -1943,7 +1943,7 @@ zio_resume(spa_t *spa)
|
|||||||
* Reexecute all previously suspended i/o.
|
* Reexecute all previously suspended i/o.
|
||||||
*/
|
*/
|
||||||
mutex_enter(&spa->spa_suspend_lock);
|
mutex_enter(&spa->spa_suspend_lock);
|
||||||
spa->spa_suspended = B_FALSE;
|
spa->spa_suspended = ZIO_SUSPEND_NONE;
|
||||||
cv_broadcast(&spa->spa_suspend_cv);
|
cv_broadcast(&spa->spa_suspend_cv);
|
||||||
pio = spa->spa_suspend_zio_root;
|
pio = spa->spa_suspend_zio_root;
|
||||||
spa->spa_suspend_zio_root = NULL;
|
spa->spa_suspend_zio_root = NULL;
|
||||||
@ -4084,7 +4084,7 @@ zio_done(zio_t *zio)
|
|||||||
* We'd fail again if we reexecuted now, so suspend
|
* We'd fail again if we reexecuted now, so suspend
|
||||||
* until conditions improve (e.g. device comes online).
|
* until conditions improve (e.g. device comes online).
|
||||||
*/
|
*/
|
||||||
zio_suspend(spa, zio);
|
zio_suspend(zio->io_spa, zio, ZIO_SUSPEND_IOERR);
|
||||||
} else {
|
} else {
|
||||||
/*
|
/*
|
||||||
* Reexecution is potentially a huge amount of work.
|
* Reexecution is potentially a huge amount of work.
|
||||||
|
@ -217,6 +217,7 @@ typedef enum {
|
|||||||
ZPOOL_PROP_CHECKPOINT,
|
ZPOOL_PROP_CHECKPOINT,
|
||||||
ZPOOL_PROP_TNAME,
|
ZPOOL_PROP_TNAME,
|
||||||
ZPOOL_PROP_MAXDNODESIZE,
|
ZPOOL_PROP_MAXDNODESIZE,
|
||||||
|
ZPOOL_PROP_MULTIHOST,
|
||||||
ZPOOL_NUM_PROPS
|
ZPOOL_NUM_PROPS
|
||||||
} zpool_prop_t;
|
} zpool_prop_t;
|
||||||
|
|
||||||
@ -590,6 +591,7 @@ typedef struct zpool_load_policy {
|
|||||||
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
|
#define ZPOOL_CONFIG_RESILVER_TXG "resilver_txg"
|
||||||
#define ZPOOL_CONFIG_COMMENT "comment"
|
#define ZPOOL_CONFIG_COMMENT "comment"
|
||||||
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
#define ZPOOL_CONFIG_SUSPENDED "suspended" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_SUSPENDED_REASON "suspended_reason" /* not stored */
|
||||||
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
#define ZPOOL_CONFIG_TIMESTAMP "timestamp" /* not stored on disk */
|
||||||
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
#define ZPOOL_CONFIG_BOOTFS "bootfs" /* not stored on disk */
|
||||||
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
|
#define ZPOOL_CONFIG_MISSING_DEVICES "missing_vdevs" /* not stored on disk */
|
||||||
@ -604,6 +606,11 @@ typedef struct zpool_load_policy {
|
|||||||
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
|
#define ZPOOL_CONFIG_VDEV_LEAF_ZAP "com.delphix:vdev_zap_leaf"
|
||||||
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
|
#define ZPOOL_CONFIG_HAS_PER_VDEV_ZAPS "com.delphix:has_per_vdev_zaps"
|
||||||
#define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */
|
#define ZPOOL_CONFIG_CACHEFILE "cachefile" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_MMP_STATE "mmp_state" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_MMP_TXG "mmp_txg" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_MMP_SEQ "mmp_seq" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_MMP_HOSTNAME "mmp_hostname" /* not stored on disk */
|
||||||
|
#define ZPOOL_CONFIG_MMP_HOSTID "mmp_hostid" /* not stored on disk */
|
||||||
/*
|
/*
|
||||||
* The persistent vdev state is stored as separate values rather than a single
|
* The persistent vdev state is stored as separate values rather than a single
|
||||||
* 'vdev_state' entry. This is because a device can be in multiple states, such
|
* 'vdev_state' entry. This is because a device can be in multiple states, such
|
||||||
@ -715,7 +722,8 @@ typedef enum vdev_aux {
|
|||||||
VDEV_AUX_EXTERNAL, /* external diagnosis */
|
VDEV_AUX_EXTERNAL, /* external diagnosis */
|
||||||
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
|
VDEV_AUX_SPLIT_POOL, /* vdev was split off into another pool */
|
||||||
VDEV_AUX_ASHIFT_TOO_BIG, /* vdev's min block size is too large */
|
VDEV_AUX_ASHIFT_TOO_BIG, /* vdev's min block size is too large */
|
||||||
VDEV_AUX_CHILDREN_OFFLINE /* all children are offline */
|
VDEV_AUX_CHILDREN_OFFLINE, /* all children are offline */
|
||||||
|
VDEV_AUX_ACTIVE /* vdev active on a different host */
|
||||||
} vdev_aux_t;
|
} vdev_aux_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
@ -735,6 +743,16 @@ typedef enum pool_state {
|
|||||||
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
|
POOL_STATE_POTENTIALLY_ACTIVE /* Internal libzfs state */
|
||||||
} pool_state_t;
|
} pool_state_t;
|
||||||
|
|
||||||
|
/*
|
||||||
|
* mmp state. The following states provide additional detail describing
|
||||||
|
* why a pool couldn't be safely imported.
|
||||||
|
*/
|
||||||
|
typedef enum mmp_state {
|
||||||
|
MMP_STATE_ACTIVE = 0, /* In active use */
|
||||||
|
MMP_STATE_INACTIVE, /* Inactive and safe to import */
|
||||||
|
MMP_STATE_NO_HOSTID /* System hostid is not set */
|
||||||
|
} mmp_state_t;
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Scan Functions.
|
* Scan Functions.
|
||||||
*/
|
*/
|
||||||
@ -1104,6 +1122,7 @@ typedef enum {
|
|||||||
#define ZFS_IMPORT_ONLY 0x8
|
#define ZFS_IMPORT_ONLY 0x8
|
||||||
#define ZFS_IMPORT_CHECKPOINT 0x10
|
#define ZFS_IMPORT_CHECKPOINT 0x10
|
||||||
#define ZFS_IMPORT_TEMP_NAME 0x20
|
#define ZFS_IMPORT_TEMP_NAME 0x20
|
||||||
|
#define ZFS_IMPORT_SKIP_MMP 0x40
|
||||||
|
|
||||||
/*
|
/*
|
||||||
* Channel program argument/return nvlist keys and defaults.
|
* Channel program argument/return nvlist keys and defaults.
|
||||||
|
Loading…
x
Reference in New Issue
Block a user