MFV r306422: 7254 ztest failed assertion in ztest_dataset_dirobj_verify: dirobjs + 1 == usedobjs

dsl_dataset_space is looking at the ds_bp's fill count while
dmu_objset_write_ready() is concurrently modifying it. This fix adds an
rrwlock to protect the ds_bp.

Closes #180

Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Prakash Surya <prakash.surya@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: Steve Gonczi <steve.gonczi@delphix.com>
Author: Paul Dagnelie <pcd@delphix.com>
This commit is contained in:
Alexander Motin 2016-09-28 23:54:47 +00:00
commit bb97118138
9 changed files with 85 additions and 13 deletions

View File

@ -1489,10 +1489,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* objects may be dirtied in syncing context, but only if they
* were already pre-dirtied in open context.
*/
#ifdef DEBUG
if (dn->dn_objset->os_dsl_dataset != NULL) {
rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
RW_READER, FTAG);
}
ASSERT(!dmu_tx_is_syncing(tx) ||
BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
dn->dn_objset->os_dsl_dataset == NULL);
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
#endif
/*
* We make this assert for private objects as well, but after we
* check if we're already dirty. They are allowed to re-dirty
@ -1517,12 +1525,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* Don't set dirtyctx to SYNC if we're just modifying this as we
* initialize the objset.
*/
if (dn->dn_dirtyctx == DN_UNDIRTIED &&
!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
dn->dn_dirtyctx =
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
ASSERT(dn->dn_dirtyctx_firstset == NULL);
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
if (dn->dn_dirtyctx == DN_UNDIRTIED) {
if (dn->dn_objset->os_dsl_dataset != NULL) {
rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
RW_READER, FTAG);
}
if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ?
DN_DIRTY_SYNC : DN_DIRTY_OPEN);
ASSERT(dn->dn_dirtyctx_firstset == NULL);
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
}
if (dn->dn_objset->os_dsl_dataset != NULL) {
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
FTAG);
}
}
mutex_exit(&dn->dn_mtx);
@ -1567,8 +1584,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
* this assertion only if we're not already dirty.
*/
os = dn->dn_objset;
#ifdef DEBUG
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
if (dn->dn_objset->os_dsl_dataset != NULL)
rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
#endif
ASSERT(db->db.db_size != 0);
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);

View File

@ -489,8 +489,10 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
mutex_enter(&ds->ds_opening_lock);
if (ds->ds_objset == NULL) {
objset_t *os;
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
ds, dsl_dataset_get_blkptr(ds), &os);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (err == 0) {
mutex_enter(&ds->ds_lock);
@ -876,9 +878,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
doca->doca_cred, tx);
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bp = dsl_dataset_get_blkptr(ds);
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
ds, bp, doca->doca_type, tx);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (doca->doca_userfunc != NULL) {
doca->doca_userfunc(os, doca->doca_userarg,
@ -1051,7 +1055,6 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT3P(bp, ==, os->os_rootbp);
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
ASSERT0(BP_GET_LEVEL(bp));
@ -1064,6 +1067,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
bp->blk_fill = 0;
for (int i = 0; i < dnp->dn_nblkptr; i++)
bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
if (os->os_dsl_dataset != NULL)
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
*os->os_rootbp = *bp;
if (os->os_dsl_dataset != NULL)
rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
}
/* ARGSUSED */
@ -1083,6 +1091,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
dsl_dataset_block_born(ds, bp, tx);
}
kmem_free(bp, sizeof (*bp));
}
/* called from dsl */
@ -1096,6 +1105,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
list_t *list;
list_t *newlist = NULL;
dbuf_dirty_record_t *dr;
blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
*blkptr_copy = *os->os_rootbp;
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
@ -1123,7 +1134,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
dmu_write_policy(os, NULL, 0, 0, &zp);
zio = arc_write(pio, os->os_spa, tx->tx_txg,
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);

View File

@ -1510,10 +1510,12 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
* If we actually created a non-clone, we need to create the
* objset in our new dataset.
*/
rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
(void) dmu_objset_create_impl(dp->dp_spa,
newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
}
rrw_exit(&newds->ds_bp_rwlock, FTAG);
drba->drba_cookie->drc_ds = newds;
@ -1656,7 +1658,9 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
dmu_buf_will_dirty(ds->ds_dbuf, tx);
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
rrw_exit(&ds->ds_bp_rwlock, FTAG);
drba->drba_cookie->drc_ds = ds;

View File

@ -312,6 +312,7 @@ dsl_dataset_evict(void *dbu)
mutex_destroy(&ds->ds_opening_lock);
mutex_destroy(&ds->ds_sendstream_lock);
refcount_destroy(&ds->ds_longholds);
rrw_destroy(&ds->ds_bp_rwlock);
kmem_free(ds, sizeof (dsl_dataset_t));
}
@ -441,6 +442,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
rrw_init(&ds->ds_bp_rwlock, B_FALSE);
refcount_create(&ds->ds_longholds);
bplist_create(&ds->ds_pending_deadlist);
@ -831,7 +833,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
dsl_dataset_phys(origin)->ds_compressed_bytes;
dsphys->ds_uncompressed_bytes =
dsl_dataset_phys(origin)->ds_uncompressed_bytes;
rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG);
dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;
rrw_exit(&origin->ds_bp_rwlock, FTAG);
/*
* Inherit flags that describe the dataset's contents
@ -1389,7 +1393,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
dsphys->ds_uncompressed_bytes =
dsl_dataset_phys(ds)->ds_uncompressed_bytes;
dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
rrw_exit(&ds->ds_bp_rwlock, FTAG);
dmu_buf_rele(dbuf, FTAG);
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
@ -1981,7 +1987,9 @@ dsl_dataset_space(dsl_dataset_t *ds,
else
*availbytesp = 0;
}
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
*usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
}
@ -1989,12 +1997,15 @@ boolean_t
dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
{
dsl_pool_t *dp = ds->ds_dir->dd_pool;
uint64_t birth;
ASSERT(dsl_pool_config_held(dp));
if (snap == NULL)
return (B_FALSE);
if (dsl_dataset_phys(ds)->ds_bp.blk_birth >
dsl_dataset_phys(snap)->ds_creation_txg) {
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
birth = dsl_dataset_get_blkptr(ds)->blk_birth;
rrw_exit(&ds->ds_bp_rwlock, FTAG);
if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
objset_t *os, *os_snap;
/*
* It may be that only the ZIL differs, because it was
@ -3058,11 +3069,15 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
/* swap blkptrs */
{
rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG);
rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG);
blkptr_t tmp;
tmp = dsl_dataset_phys(origin_head)->ds_bp;
dsl_dataset_phys(origin_head)->ds_bp =
dsl_dataset_phys(clone)->ds_bp;
dsl_dataset_phys(clone)->ds_bp = tmp;
rrw_exit(&origin_head->ds_bp_rwlock, FTAG);
rrw_exit(&clone->ds_bp_rwlock, FTAG);
}
/* set dd_*_bytes */

View File

@ -246,7 +246,9 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
uint64_t obj;
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(refcount_is_zero(&ds->ds_longholds));
if (defer &&
@ -720,7 +722,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
ASSERT(ds->ds_prev == NULL ||
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
/* We need to log before removing it from the namespace. */
@ -812,10 +816,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
dsl_dataset_phys(ds)->ds_unique_bytes == used);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
bptree_add(mos, dp->dp_bptree_obj,
&dsl_dataset_phys(ds)->ds_bp,
dsl_dataset_phys(ds)->ds_prev_snap_txg,
used, comp, uncomp, tx);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
-used, -comp, -uncomp, tx);
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 Steven Hartland. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
@ -493,8 +493,10 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
/* create the root objset */
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
os = dmu_objset_create_impl(dp->dp_spa, ds,
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
#ifdef _KERNEL
zfs_create_fs(os, kcred, zplprops, tx);
#endif
@ -807,7 +809,9 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
* The $ORIGIN can't have any data, or the accounting
* will be wrong.
*/
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
/* The origin doesn't get attached to itself */
if (ds->ds_object == prev->ds_object) {

View File

@ -1113,7 +1113,9 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
* Iterate over the bps in this ds.
*/
dmu_buf_will_dirty(ds->ds_dbuf, tx);
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
rrw_exit(&ds->ds_bp_rwlock, FTAG);
char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
dsl_dataset_name(ds, dsname);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
* Copyright (c) 2014 Integros [integros.com]
@ -101,9 +101,14 @@ struct objset {
zfs_redundant_metadata_type_t os_redundant_metadata;
int os_recordsize;
/*
* Pointer is constant; the blkptr it points to is protected by
* os_dsl_dataset->ds_bp_rwlock
*/
blkptr_t *os_rootbp;
/* no lock needed: */
struct dmu_tx *os_synctx; /* XXX sketchy */
blkptr_t *os_rootbp;
zil_header_t os_zil_header;
list_t os_synced_dnodes;
uint64_t os_flags;

View File

@ -39,6 +39,7 @@
#include <sys/zfs_context.h>
#include <sys/dsl_deadlist.h>
#include <sys/refcount.h>
#include <sys/rrwlock.h>
#include <zfeature_common.h>
#ifdef __cplusplus
@ -141,6 +142,7 @@ typedef struct dsl_dataset_phys {
typedef struct dsl_dataset {
dmu_buf_user_t ds_dbu;
rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */
/* Immutable: */
struct dsl_dir *ds_dir;