From 38fc65e02783418319af282674409119a60ec6c5 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Thu, 22 Feb 2018 00:09:15 +0000 Subject: [PATCH] 8962 zdb should work on non-idle pools illumos/illumos-gate@e144c4e6c90e7d4dccaad6db660ee42b6e7ba04f Currently `zdb` consistently fails to examine non-idle pools as it fails during the `spa_load()` process. The main problem seems to be that `spa_load_verify()` fails as can be seen below: $ sudo zdb -d -G dcenter zdb: can't open 'dcenter': I/O error ZFS_DBGMSG(zdb): spa_open_common: opening dcenter spa_load(dcenter): LOADING disk vdev '/dev/dsk/c4t11d0s0': best uberblock found for spa dcenter. txg 40824950 spa_load(dcenter): using uberblock with txg=40824950 spa_load(dcenter): UNLOADING spa_load(dcenter): RELOADING spa_load(dcenter): LOADING disk vdev '/dev/dsk/c3t10d0s0': best uberblock found for spa dcenter. txg 40824952 spa_load(dcenter): using uberblock with txg=40824952 spa_load(dcenter): FAILED: spa_load_verify failed [error=5] spa_load(dcenter): UNLOADING This change makes `spa_load_verify()` a dryrun when ran from `zdb`. This is done by creating a global flag in zfs and then setting it in `zdb`. Reviewed by: George Wilson Reviewed by: Matthew Ahrens Reviewed by: Andy Stormont Approved by: Dan McDonald Author: Pavel Zakharov --- cmd/zdb/zdb.c | 8 ++++++++ uts/common/fs/zfs/spa.c | 20 ++++++++++++++++++-- 2 files changed, 26 insertions(+), 2 deletions(-) diff --git a/cmd/zdb/zdb.c b/cmd/zdb/zdb.c index 41809d70fdcb..319825ca9abe 100644 --- a/cmd/zdb/zdb.c +++ b/cmd/zdb/zdb.c @@ -89,12 +89,14 @@ extern boolean_t zfs_recover; extern uint64_t zfs_arc_max, zfs_arc_meta_limit; extern int zfs_vdev_async_read_max_active; extern int aok; +extern boolean_t spa_load_verify_dryrun; #else int reference_tracking_enable; boolean_t zfs_recover; uint64_t zfs_arc_max, zfs_arc_meta_limit; int zfs_vdev_async_read_max_active; int aok; +boolean_t spa_load_verify_dryrun; #endif static const char cmdname[] = "zdb"; @@ -4543,6 +4545,12 @@ main(int argc, char **argv) */ reference_tracking_enable = B_FALSE; + /* + * Do not fail spa_load when spa_load_verify fails. This is needed + * to load non-idle pools. + */ + spa_load_verify_dryrun = B_TRUE; + kernel_init(FREAD); g_zfs = libzfs_init(); ASSERT(g_zfs != NULL); diff --git a/uts/common/fs/zfs/spa.c b/uts/common/fs/zfs/spa.c index a425f8c91284..56d316a16d76 100644 --- a/uts/common/fs/zfs/spa.c +++ b/uts/common/fs/zfs/spa.c @@ -164,6 +164,12 @@ uint_t zio_taskq_basedc = 80; /* base duty cycle */ boolean_t spa_create_process = B_TRUE; /* no process ==> no sysdc */ extern int zfs_sync_pass_deferred_free; +/* + * Report any spa_load_verify errors found, but do not fail spa_load. + * This is used by zdb to analyze non-idle pools. + */ +boolean_t spa_load_verify_dryrun = B_FALSE; + /* * This (illegal) pool name is used when temporarily importing a spa_t in order * to get the vdev stats associated with the imported devices. @@ -2014,8 +2020,15 @@ spa_load_verify(spa_t *spa) spa->spa_load_meta_errors = sle.sle_meta_count; spa->spa_load_data_errors = sle.sle_data_count; - if (!error && sle.sle_meta_count <= policy.zrp_maxmeta && - sle.sle_data_count <= policy.zrp_maxdata) { + if (sle.sle_meta_count != 0 || sle.sle_data_count != 0) { + spa_load_note(spa, "spa_load_verify found %llu metadata errors " + "and %llu data errors", (u_longlong_t)sle.sle_meta_count, + (u_longlong_t)sle.sle_data_count); + } + + if (spa_load_verify_dryrun || + (!error && sle.sle_meta_count <= policy.zrp_maxmeta && + sle.sle_data_count <= policy.zrp_maxdata)) { int64_t loss = 0; verify_ok = B_TRUE; @@ -2033,6 +2046,9 @@ spa_load_verify(spa_t *spa) spa->spa_load_max_txg = spa->spa_uberblock.ub_txg; } + if (spa_load_verify_dryrun) + return (0); + if (error) { if (error != ENXIO && error != EIO) error = SET_ERROR(EIO);