7199, 7200 dsl_dataset_rollback_sync may try to free already free blocks

7199 dsl_dataset_rollback_sync may try to free already free blocks
7200 no blocks must be born in a txg after a snaphot is created

illumos/illumos-gate@bfaed0b91e
bfaed0b91e

https://www.illumos.org/issues/7199
  dsl_dataset_rollback_sync may try to free already freed blocks when it calls
  dsl_destroy_head_sync_impl to destroy a temporary clone.
  That happens if a snapshot to which we are rolling back and from which the
  clone is created has some ZIL records.

https://www.illumos.org/issues/7200
  No new blocks must be born in a dataset in the same TXG after a snapshot of the
  dataset is taken.
  Those blocks would have the same blk_birth as the dataset's ds_prev_snap_txg
  and as such they would be presumed to belong o the snapshot while in fact they
  do not.
  All the datasets must be clean before sync tasks are run, so the described
  scenario may happen only if one of the sync tasks dirties the dataset and
  another sync task takes its snapshot.
  Then, there will be another sync pass because of the dirty data and the new
  blocks will be born in the same TXG when the data is written out.
  It seems that almost all of the existing sync tasks modify only MOS and do not
  dirty any objsets.
  The only exception that I've been able to identify so far is the rollback which
  can modify an objset when it zeroes out the objset's ZIL.

Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Brad Lewis <brad.lewis@delphix.com>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
Author: Andriy Gapon <andriy.gapon@clusterhq.com>
This commit is contained in:
Andriy Gapon 2016-11-22 11:49:55 +00:00
parent d414bfe23b
commit 7f74889d89
3 changed files with 59 additions and 18 deletions

View File

@ -81,6 +81,8 @@ extern inline dsl_dataset_phys_t *dsl_dataset_phys(dsl_dataset_t *ds);
extern int spa_asize_inflation;
static zil_header_t zero_zil;
/*
* Figure out how much of this delta should be propogated to the dsl_dir
* layer. If there's a refreservation, that space has already been
@ -125,6 +127,7 @@ dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp, dmu_tx_t *tx)
return;
}
ASSERT3U(bp->blk_birth, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);
dmu_buf_will_dirty(ds->ds_dbuf, tx);
mutex_enter(&ds->ds_lock);
delta = parent_delta(ds, used);
@ -889,8 +892,20 @@ dsl_dataset_zero_zil(dsl_dataset_t *ds, dmu_tx_t *tx)
objset_t *os;
VERIFY0(dmu_objset_from_ds(ds, &os));
bzero(&os->os_zil_header, sizeof (os->os_zil_header));
dsl_dataset_dirty(ds, tx);
if (bcmp(&os->os_zil_header, &zero_zil, sizeof (zero_zil)) != 0) {
dsl_pool_t *dp = ds->ds_dir->dd_pool;
zio_t *zio;
bzero(&os->os_zil_header, sizeof (os->os_zil_header));
zio = zio_root(dp->dp_spa, NULL, NULL, ZIO_FLAG_MUSTSUCCEED);
dsl_dataset_sync(ds, zio, tx);
VERIFY0(zio_wait(zio));
/* dsl_dataset_sync_done will drop this reference. */
dmu_buf_add_ref(ds->ds_dbuf, ds);
dsl_dataset_sync_done(ds, tx);
}
}
uint64_t
@ -1030,8 +1045,10 @@ dsl_dataset_dirty(dsl_dataset_t *ds, dmu_tx_t *tx)
if (dsl_dataset_phys(ds)->ds_next_snap_obj != 0)
panic("dirtying snapshot!");
dp = ds->ds_dir->dd_pool;
/* Must not dirty a dataset in the same txg where it got snapshotted. */
ASSERT3U(tx->tx_txg, >, dsl_dataset_phys(ds)->ds_prev_snap_txg);
dp = ds->ds_dir->dd_pool;
if (txg_list_add(&dp->dp_dirty_datasets, ds, tx->tx_txg)) {
/* up the hold count until we can be written out */
dmu_buf_add_ref(ds->ds_dbuf, ds);
@ -1286,8 +1303,6 @@ void
dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dmu_tx_t *tx)
{
static zil_header_t zero_zil;
dsl_pool_t *dp = ds->ds_dir->dd_pool;
dmu_buf_t *dbuf;
dsl_dataset_phys_t *dsphys;
@ -1306,6 +1321,10 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
bcmp(&os->os_phys->os_zil_header, &zero_zil,
sizeof (zero_zil)) == 0);
/* Should not snapshot a dirty dataset. */
ASSERT(!txg_list_member(&ds->ds_dir->dd_pool->dp_dirty_datasets,
ds, tx->tx_txg));
dsl_fs_ss_count_adjust(ds->ds_dir, 1, DD_FIELD_SNAPSHOT_COUNT, tx);
/*
@ -1652,6 +1671,27 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
}
}
static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_deadlist_insert(dl, bp, tx);
return (0);
}
void
dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
{
objset_t *os = ds->ds_objset;
bplist_iterate(&ds->ds_pending_deadlist,
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
dmu_buf_rele(ds->ds_dbuf, ds);
}
static void
get_clones_stat(dsl_dataset_t *ds, nvlist_t *nv)
{
@ -2151,6 +2191,18 @@ dsl_dataset_rollback_check(void *arg, dmu_tx_t *tx)
return (SET_ERROR(EINVAL));
}
/*
* No rollback to a snapshot created in the current txg, because
* the rollback may dirty the dataset and create blocks that are
* not reachable from the rootbp while having a birth txg that
* falls into the snapshot's range.
*/
if (dmu_tx_is_syncing(tx) &&
dsl_dataset_phys(ds)->ds_prev_snap_txg >= tx->tx_txg) {
dsl_dataset_rele(ds, FTAG);
return (SET_ERROR(EAGAIN));
}
/* must not have any bookmarks after the most recent snapshot */
nvlist_t *proprequest = fnvlist_alloc();
fnvlist_add_boolean(proprequest, zfs_prop_to_name(ZFS_PROP_CREATETXG));

View File

@ -424,14 +424,6 @@ dsl_pool_mos_diduse_space(dsl_pool_t *dp,
mutex_exit(&dp->dp_lock);
}
static int
deadlist_enqueue_cb(void *arg, const blkptr_t *bp, dmu_tx_t *tx)
{
dsl_deadlist_t *dl = arg;
dsl_deadlist_insert(dl, bp, tx);
return (0);
}
static void
dsl_pool_sync_mos(dsl_pool_t *dp, dmu_tx_t *tx)
{
@ -532,11 +524,7 @@ dsl_pool_sync(dsl_pool_t *dp, uint64_t txg)
* - release hold from dsl_dataset_dirty()
*/
while ((ds = list_remove_head(&synced_datasets)) != NULL) {
objset_t *os = ds->ds_objset;
bplist_iterate(&ds->ds_pending_deadlist,
deadlist_enqueue_cb, &ds->ds_deadlist, tx);
ASSERT(!dmu_objset_is_dirty(os, txg));
dmu_buf_rele(ds->ds_dbuf, ds);
dsl_dataset_sync_done(ds, tx);
}
while ((dd = txg_list_remove(&dp->dp_dirty_dirs, txg)) != NULL) {
dsl_dir_sync(dd, tx);

View File

@ -274,6 +274,7 @@ boolean_t dsl_dataset_modified_since_snap(dsl_dataset_t *ds,
dsl_dataset_t *snap);
void dsl_dataset_sync(dsl_dataset_t *os, zio_t *zio, dmu_tx_t *tx);
void dsl_dataset_sync_done(dsl_dataset_t *os, dmu_tx_t *tx);
void dsl_dataset_block_born(dsl_dataset_t *ds, const blkptr_t *bp,
dmu_tx_t *tx);