4391 panic system rather than corrupting pool if we hit bug 4390

llumos/illumos-gate@8b36997aa2
This commit is contained in:
Xin LI 2014-01-01 01:18:37 +00:00
parent cbdc759424
commit 5ed46bbf4d
5 changed files with 26 additions and 9 deletions

View File

@ -180,6 +180,7 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
err = 0;
for (i = ba.ba_phys->bt_begin; i < ba.ba_phys->bt_end; i++) {
bptree_entry_phys_t bte;
int flags = TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST;
ASSERT(!free || i == ba.ba_phys->bt_begin);
@ -188,13 +189,13 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
if (err != 0)
break;
if (zfs_recover)
flags |= TRAVERSE_HARD;
err = traverse_dataset_destroyed(os->os_spa, &bte.be_bp,
bte.be_birth_txg, &bte.be_zb,
TRAVERSE_PREFETCH_METADATA | TRAVERSE_POST,
bte.be_birth_txg, &bte.be_zb, flags,
bptree_visit_cb, &ba);
if (free) {
ASSERT(err == 0 || err == ERESTART);
if (err != 0) {
if (err == ERESTART) {
/* save bookmark for future resume */
ASSERT3U(bte.be_zb.zb_objset, ==,
ZB_DESTROYED_OBJSET);
@ -202,11 +203,21 @@ bptree_iterate(objset_t *os, uint64_t obj, boolean_t free, bptree_itor_t func,
dmu_write(os, obj, i * sizeof (bte),
sizeof (bte), &bte, tx);
break;
} else {
ba.ba_phys->bt_begin++;
(void) dmu_free_range(os, obj,
i * sizeof (bte), sizeof (bte), tx);
}
if (err != 0) {
/*
* We can not properly handle an i/o
* error, because the traversal code
* does not know how to resume from an
* arbitrary bookmark.
*/
zfs_panic_recover("error %u from "
"traverse_dataset_destroyed()", err);
}
ba.ba_phys->bt_begin++;
(void) dmu_free_range(os, obj,
i * sizeof (bte), sizeof (bte), tx);
}
}

View File

@ -383,7 +383,7 @@ traverse_visitbp(traverse_data_t *td, const dnode_phys_t *dnp,
(void) arc_buf_remove_ref(buf, &buf);
post:
if (err == 0 && lasterr == 0 && (td->td_flags & TRAVERSE_POST)) {
if (err == 0 && (td->td_flags & TRAVERSE_POST)) {
err = td->td_func(td->td_spa, NULL, bp, zb, dnp, td->td_arg);
if (err == ERESTART)
pause = B_TRUE;

View File

@ -1317,6 +1317,9 @@ dsl_scan_free_should_pause(dsl_scan_t *scn)
{
uint64_t elapsed_nanosecs;
if (zfs_recover)
return (B_FALSE);
elapsed_nanosecs = gethrtime() - scn->scn_sync_start_time;
return (elapsed_nanosecs / NANOSEC > zfs_txg_timeout ||
(NSEC2MSEC(elapsed_nanosecs) > zfs_free_min_time_ms &&

View File

@ -247,6 +247,8 @@ int zfs_flags = 0;
* zfs_recover can be set to nonzero to attempt to recover from
* otherwise-fatal errors, typically caused by on-disk corruption. When
* set, calls to zfs_panic_recover() will turn into warning messages.
* This should only be used as a last resort, as it typically results
* in leaked space, or worse.
*/
int zfs_recover = 0;

View File

@ -47,6 +47,7 @@ extern "C" {
#endif
extern int zfs_flags;
extern int zfs_recover;
#define ZFS_DEBUG_DPRINTF (1<<0)
#define ZFS_DEBUG_DBUF_VERIFY (1<<1)