Illumos 4970-4974 - extreme rewind enhancements

4970 need controls on i/o issued by zpool import -XF
4971 zpool import -T should accept hex values
4972 zpool import -T implies extreme rewind, and thus a scrub
4973 spa_load_retry retries the same txg
4974 spa_load_verify() reads all data twice
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Dan McDonald <danmcd@omniti.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Approved by: Robert Mustacchi <rm@joyent.com>

References:
  https://www.illumos.org/issues/4970
  https://www.illumos.org/issues/4971
  https://www.illumos.org/issues/4972
  https://www.illumos.org/issues/4973
  https://www.illumos.org/issues/4974
  https://github.com/illumos/illumos-gate/commit/e42d205

Notes:
    This set of patches adds a set of tunable parameters for the
    "extreme rewind" mode of pool import which allows control over
    the traversal performed during such an import.

Ported by: Tim Chase <tim@chase2k.com>
Signed-off-by: Brian Behlendorf <behlendorf1@llnl.gov>
Closes #2598
This commit is contained in:
Matthew Ahrens 2014-07-15 10:58:41 -08:00 committed by Brian Behlendorf
parent 49ddb31506
commit dea377c0d9
3 changed files with 114 additions and 15 deletions

View File

@ -22,7 +22,7 @@
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright 2011 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2013 by Delphix. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012 by Frederik Wessels. All rights reserved.
* Copyright (c) 2012 by Cyril Plisko. All rights reserved.
*/
@ -2082,7 +2082,7 @@ zpool_do_import(int argc, char **argv)
case 'T':
errno = 0;
txg = strtoull(optarg, &endptr, 10);
txg = strtoull(optarg, &endptr, 0);
if (errno != 0 || *endptr != '\0') {
(void) fprintf(stderr,
gettext("invalid txg value\n"));

View File

@ -230,6 +230,52 @@ they operate close to quota or capacity limits.
Default value: 24
.RE
.sp
.ne 2
.na
\fBspa_load_verify_data\fR (int)
.ad
.RS 12n
Whether to traverse data blocks during an "extreme rewind" (\fB-X\fR)
import. Use 0 to disable and 1 to enable.
An extreme rewind import normally performs a full traversal of all
blocks in the pool for verification. If this parameter is set to 0,
the traversal skips non-metadata blocks. It can be toggled once the
import has started to stop or start the traversal of non-metadata blocks.
.sp
Default value: 1
.RE
.sp
.ne 2
.na
\fBspa_load_verify_metadata\fR (int)
.ad
.RS 12n
Whether to traverse blocks during an "extreme rewind" (\fB-X\fR)
pool import. Use 0 to disable and 1 to enable.
An extreme rewind import normally performs a full traversal of all
blocks in the pool for verification. If this parameter is set to 1,
the traversal is not performed. It can be toggled once the import has
started to stop or start the traversal.
.sp
Default value: 1
.RE
.sp
.ne 2
.na
\fBspa_load_verify_maxinflight\fR (int)
.ad
.RS 12n
Maximum concurrent I/Os during the traversal performed during an "extreme
rewind" (\fB-X\fR) pool import.
.sp
Default value: 10000
.RE
.sp
.ne 2
.na

View File

@ -1855,6 +1855,7 @@ spa_load_verify_done(zio_t *zio)
spa_load_error_t *sle = zio->io_private;
dmu_object_type_t type = BP_GET_TYPE(bp);
int error = zio->io_error;
spa_t *spa = zio->io_spa;
if (error) {
if ((BP_GET_LEVEL(bp) != 0 || DMU_OT_IS_METADATA(type)) &&
@ -1864,23 +1865,56 @@ spa_load_verify_done(zio_t *zio)
atomic_add_64(&sle->sle_data_count, 1);
}
zio_data_buf_free(zio->io_data, zio->io_size);
mutex_enter(&spa->spa_scrub_lock);
spa->spa_scrub_inflight--;
cv_broadcast(&spa->spa_scrub_io_cv);
mutex_exit(&spa->spa_scrub_lock);
}
/*
* Maximum number of concurrent scrub i/os to create while verifying
* a pool while importing it.
*/
int spa_load_verify_maxinflight = 10000;
int spa_load_verify_metadata = B_TRUE;
int spa_load_verify_data = B_TRUE;
/*ARGSUSED*/
static int
spa_load_verify_cb(spa_t *spa, zilog_t *zilog, const blkptr_t *bp,
const zbookmark_phys_t *zb, const dnode_phys_t *dnp, void *arg)
{
if (!BP_IS_HOLE(bp) && !BP_IS_EMBEDDED(bp)) {
zio_t *rio = arg;
size_t size = BP_GET_PSIZE(bp);
void *data = zio_data_buf_alloc(size);
zio_t *rio;
size_t size;
void *data;
zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
}
if (BP_IS_HOLE(bp) || BP_IS_EMBEDDED(bp))
return (0);
/*
* Note: normally this routine will not be called if
* spa_load_verify_metadata is not set. However, it may be useful
* to manually set the flag after the traversal has begun.
*/
if (!spa_load_verify_metadata)
return (0);
if (BP_GET_BUFC_TYPE(bp) == ARC_BUFC_DATA && !spa_load_verify_data)
return (0);
rio = arg;
size = BP_GET_PSIZE(bp);
data = zio_data_buf_alloc(size);
mutex_enter(&spa->spa_scrub_lock);
while (spa->spa_scrub_inflight >= spa_load_verify_maxinflight)
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
spa->spa_scrub_inflight++;
mutex_exit(&spa->spa_scrub_lock);
zio_nowait(zio_read(rio, spa, bp, data, size,
spa_load_verify_done, rio->io_private, ZIO_PRIORITY_SCRUB,
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_CANFAIL |
ZIO_FLAG_SCRUB | ZIO_FLAG_RAW, zb));
return (0);
}
@ -1891,7 +1925,7 @@ spa_load_verify(spa_t *spa)
spa_load_error_t sle = { 0 };
zpool_rewind_policy_t policy;
boolean_t verify_ok = B_FALSE;
int error;
int error = 0;
zpool_get_rewind_policy(spa->spa_config, &policy);
@ -1901,8 +1935,11 @@ spa_load_verify(spa_t *spa)
rio = zio_root(spa, NULL, &sle,
ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE);
error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH, spa_load_verify_cb, rio);
if (spa_load_verify_metadata) {
error = traverse_pool(spa, spa->spa_verify_min_txg,
TRAVERSE_PRE | TRAVERSE_PREFETCH_METADATA,
spa_load_verify_cb, rio);
}
(void) zio_wait(rio);
@ -2781,7 +2818,7 @@ spa_load_retry(spa_t *spa, spa_load_state_t state, int mosconfig)
spa_unload(spa);
spa_deactivate(spa);
spa->spa_load_max_txg--;
spa->spa_load_max_txg = spa->spa_uberblock.ub_txg - 1;
spa_activate(spa, mode);
spa_async_suspend(spa);
@ -2811,6 +2848,8 @@ spa_load_best(spa_t *spa, spa_load_state_t state, int mosconfig,
spa_set_log_state(spa, SPA_LOG_CLEAR);
} else {
spa->spa_load_max_txg = max_request;
if (max_request != UINT64_MAX)
spa->spa_extreme_rewind = B_TRUE;
}
load_error = rewind_error = spa_load(spa, state, SPA_IMPORT_EXISTING,
@ -6603,3 +6642,17 @@ EXPORT_SYMBOL(spa_prop_clear_bootfs);
/* asynchronous event notification */
EXPORT_SYMBOL(spa_event_notify);
#endif
#if defined(_KERNEL) && defined(HAVE_SPL)
module_param(spa_load_verify_maxinflight, int, 0644);
MODULE_PARM_DESC(spa_load_verify_maxinflight,
"Max concurrent traversal I/Os while verifying pool during import -X");
module_param(spa_load_verify_metadata, int, 0644);
MODULE_PARM_DESC(spa_load_verify_metadata,
"Set to traverse metadata on pool import");
module_param(spa_load_verify_data, int, 0644);
MODULE_PARM_DESC(spa_load_verify_data,
"Set to traverse data on pool import");
#endif