MFV r306422: 7254 ztest failed assertion in ztest_dataset_dirobj_verify: dirobjs + 1 == usedobjs
dsl_dataset_space is looking at the ds_bp's fill count while dmu_objset_write_ready() is concurrently modifying it. This fix adds an rrwlock to protect the ds_bp. Closes #180 Reviewed by: George Wilson <george.wilson@delphix.com> Reviewed by: Prakash Surya <prakash.surya@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Reviewed by: Steve Gonczi <steve.gonczi@delphix.com> Author: Paul Dagnelie <pcd@delphix.com>
This commit is contained in:
parent
27891556e1
commit
daf458fb50
@ -1489,10 +1489,18 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
* objects may be dirtied in syncing context, but only if they
|
||||
* were already pre-dirtied in open context.
|
||||
*/
|
||||
#ifdef DEBUG
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL) {
|
||||
rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
|
||||
RW_READER, FTAG);
|
||||
}
|
||||
ASSERT(!dmu_tx_is_syncing(tx) ||
|
||||
BP_IS_HOLE(dn->dn_objset->os_rootbp) ||
|
||||
DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
|
||||
dn->dn_objset->os_dsl_dataset == NULL);
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL)
|
||||
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock, FTAG);
|
||||
#endif
|
||||
/*
|
||||
* We make this assert for private objects as well, but after we
|
||||
* check if we're already dirty. They are allowed to re-dirty
|
||||
@ -1517,12 +1525,21 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
* Don't set dirtyctx to SYNC if we're just modifying this as we
|
||||
* initialize the objset.
|
||||
*/
|
||||
if (dn->dn_dirtyctx == DN_UNDIRTIED &&
|
||||
!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
|
||||
dn->dn_dirtyctx =
|
||||
(dmu_tx_is_syncing(tx) ? DN_DIRTY_SYNC : DN_DIRTY_OPEN);
|
||||
ASSERT(dn->dn_dirtyctx_firstset == NULL);
|
||||
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
|
||||
if (dn->dn_dirtyctx == DN_UNDIRTIED) {
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL) {
|
||||
rrw_enter(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
|
||||
RW_READER, FTAG);
|
||||
}
|
||||
if (!BP_IS_HOLE(dn->dn_objset->os_rootbp)) {
|
||||
dn->dn_dirtyctx = (dmu_tx_is_syncing(tx) ?
|
||||
DN_DIRTY_SYNC : DN_DIRTY_OPEN);
|
||||
ASSERT(dn->dn_dirtyctx_firstset == NULL);
|
||||
dn->dn_dirtyctx_firstset = kmem_alloc(1, KM_SLEEP);
|
||||
}
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL) {
|
||||
rrw_exit(&dn->dn_objset->os_dsl_dataset->ds_bp_rwlock,
|
||||
FTAG);
|
||||
}
|
||||
}
|
||||
mutex_exit(&dn->dn_mtx);
|
||||
|
||||
@ -1567,8 +1584,14 @@ dbuf_dirty(dmu_buf_impl_t *db, dmu_tx_t *tx)
|
||||
* this assertion only if we're not already dirty.
|
||||
*/
|
||||
os = dn->dn_objset;
|
||||
#ifdef DEBUG
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL)
|
||||
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_READER, FTAG);
|
||||
ASSERT(!dmu_tx_is_syncing(tx) || DMU_OBJECT_IS_SPECIAL(dn->dn_object) ||
|
||||
os->os_dsl_dataset == NULL || BP_IS_HOLE(os->os_rootbp));
|
||||
if (dn->dn_objset->os_dsl_dataset != NULL)
|
||||
rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
|
||||
#endif
|
||||
ASSERT(db->db.db_size != 0);
|
||||
|
||||
dprintf_dbuf(db, "size=%llx\n", (u_longlong_t)db->db.db_size);
|
||||
|
@ -489,8 +489,10 @@ dmu_objset_from_ds(dsl_dataset_t *ds, objset_t **osp)
|
||||
mutex_enter(&ds->ds_opening_lock);
|
||||
if (ds->ds_objset == NULL) {
|
||||
objset_t *os;
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
err = dmu_objset_open_impl(dsl_dataset_get_spa(ds),
|
||||
ds, dsl_dataset_get_blkptr(ds), &os);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
|
||||
if (err == 0) {
|
||||
mutex_enter(&ds->ds_lock);
|
||||
@ -876,9 +878,11 @@ dmu_objset_create_sync(void *arg, dmu_tx_t *tx)
|
||||
doca->doca_cred, tx);
|
||||
|
||||
VERIFY0(dsl_dataset_hold_obj(pdd->dd_pool, obj, FTAG, &ds));
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
bp = dsl_dataset_get_blkptr(ds);
|
||||
os = dmu_objset_create_impl(pdd->dd_pool->dp_spa,
|
||||
ds, bp, doca->doca_type, tx);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
|
||||
if (doca->doca_userfunc != NULL) {
|
||||
doca->doca_userfunc(os, doca->doca_userarg,
|
||||
@ -1051,7 +1055,6 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
|
||||
dnode_phys_t *dnp = &os->os_phys->os_meta_dnode;
|
||||
|
||||
ASSERT(!BP_IS_EMBEDDED(bp));
|
||||
ASSERT3P(bp, ==, os->os_rootbp);
|
||||
ASSERT3U(BP_GET_TYPE(bp), ==, DMU_OT_OBJSET);
|
||||
ASSERT0(BP_GET_LEVEL(bp));
|
||||
|
||||
@ -1064,6 +1067,11 @@ dmu_objset_write_ready(zio_t *zio, arc_buf_t *abuf, void *arg)
|
||||
bp->blk_fill = 0;
|
||||
for (int i = 0; i < dnp->dn_nblkptr; i++)
|
||||
bp->blk_fill += BP_GET_FILL(&dnp->dn_blkptr[i]);
|
||||
if (os->os_dsl_dataset != NULL)
|
||||
rrw_enter(&os->os_dsl_dataset->ds_bp_rwlock, RW_WRITER, FTAG);
|
||||
*os->os_rootbp = *bp;
|
||||
if (os->os_dsl_dataset != NULL)
|
||||
rrw_exit(&os->os_dsl_dataset->ds_bp_rwlock, FTAG);
|
||||
}
|
||||
|
||||
/* ARGSUSED */
|
||||
@ -1083,6 +1091,7 @@ dmu_objset_write_done(zio_t *zio, arc_buf_t *abuf, void *arg)
|
||||
(void) dsl_dataset_block_kill(ds, bp_orig, tx, B_TRUE);
|
||||
dsl_dataset_block_born(ds, bp, tx);
|
||||
}
|
||||
kmem_free(bp, sizeof (*bp));
|
||||
}
|
||||
|
||||
/* called from dsl */
|
||||
@ -1096,6 +1105,8 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||
list_t *list;
|
||||
list_t *newlist = NULL;
|
||||
dbuf_dirty_record_t *dr;
|
||||
blkptr_t *blkptr_copy = kmem_alloc(sizeof (*os->os_rootbp), KM_SLEEP);
|
||||
*blkptr_copy = *os->os_rootbp;
|
||||
|
||||
dprintf_ds(os->os_dsl_dataset, "txg=%llu\n", tx->tx_txg);
|
||||
|
||||
@ -1123,7 +1134,7 @@ dmu_objset_sync(objset_t *os, zio_t *pio, dmu_tx_t *tx)
|
||||
dmu_write_policy(os, NULL, 0, 0, &zp);
|
||||
|
||||
zio = arc_write(pio, os->os_spa, tx->tx_txg,
|
||||
os->os_rootbp, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
|
||||
blkptr_copy, os->os_phys_buf, DMU_OS_IS_L2CACHEABLE(os),
|
||||
&zp, dmu_objset_write_ready, NULL, NULL, dmu_objset_write_done,
|
||||
os, ZIO_PRIORITY_ASYNC_WRITE, ZIO_FLAG_MUSTSUCCEED, &zb);
|
||||
|
||||
|
@ -1510,10 +1510,12 @@ dmu_recv_begin_sync(void *arg, dmu_tx_t *tx)
|
||||
* If we actually created a non-clone, we need to create the
|
||||
* objset in our new dataset.
|
||||
*/
|
||||
rrw_enter(&newds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
if (BP_IS_HOLE(dsl_dataset_get_blkptr(newds))) {
|
||||
(void) dmu_objset_create_impl(dp->dp_spa,
|
||||
newds, dsl_dataset_get_blkptr(newds), drrb->drr_type, tx);
|
||||
}
|
||||
rrw_exit(&newds->ds_bp_rwlock, FTAG);
|
||||
|
||||
drba->drba_cookie->drc_ds = newds;
|
||||
|
||||
@ -1656,7 +1658,9 @@ dmu_recv_resume_begin_sync(void *arg, dmu_tx_t *tx)
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
dsl_dataset_phys(ds)->ds_flags |= DS_FLAG_INCONSISTENT;
|
||||
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
ASSERT(!BP_IS_HOLE(dsl_dataset_get_blkptr(ds)));
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
|
||||
drba->drba_cookie->drc_ds = ds;
|
||||
|
||||
|
@ -312,6 +312,7 @@ dsl_dataset_evict(void *dbu)
|
||||
mutex_destroy(&ds->ds_opening_lock);
|
||||
mutex_destroy(&ds->ds_sendstream_lock);
|
||||
refcount_destroy(&ds->ds_longholds);
|
||||
rrw_destroy(&ds->ds_bp_rwlock);
|
||||
|
||||
kmem_free(ds, sizeof (dsl_dataset_t));
|
||||
}
|
||||
@ -441,6 +442,7 @@ dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag,
|
||||
mutex_init(&ds->ds_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_opening_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
mutex_init(&ds->ds_sendstream_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
rrw_init(&ds->ds_bp_rwlock, B_FALSE);
|
||||
refcount_create(&ds->ds_longholds);
|
||||
|
||||
bplist_create(&ds->ds_pending_deadlist);
|
||||
@ -831,7 +833,9 @@ dsl_dataset_create_sync_dd(dsl_dir_t *dd, dsl_dataset_t *origin,
|
||||
dsl_dataset_phys(origin)->ds_compressed_bytes;
|
||||
dsphys->ds_uncompressed_bytes =
|
||||
dsl_dataset_phys(origin)->ds_uncompressed_bytes;
|
||||
rrw_enter(&origin->ds_bp_rwlock, RW_READER, FTAG);
|
||||
dsphys->ds_bp = dsl_dataset_phys(origin)->ds_bp;
|
||||
rrw_exit(&origin->ds_bp_rwlock, FTAG);
|
||||
|
||||
/*
|
||||
* Inherit flags that describe the dataset's contents
|
||||
@ -1389,7 +1393,9 @@ dsl_dataset_snapshot_sync_impl(dsl_dataset_t *ds, const char *snapname,
|
||||
dsphys->ds_uncompressed_bytes =
|
||||
dsl_dataset_phys(ds)->ds_uncompressed_bytes;
|
||||
dsphys->ds_flags = dsl_dataset_phys(ds)->ds_flags;
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
dsphys->ds_bp = dsl_dataset_phys(ds)->ds_bp;
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
dmu_buf_rele(dbuf, FTAG);
|
||||
|
||||
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
|
||||
@ -1981,7 +1987,9 @@ dsl_dataset_space(dsl_dataset_t *ds,
|
||||
else
|
||||
*availbytesp = 0;
|
||||
}
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
*usedobjsp = BP_GET_FILL(&dsl_dataset_phys(ds)->ds_bp);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
*availobjsp = DN_MAX_OBJECT - *usedobjsp;
|
||||
}
|
||||
|
||||
@ -1989,12 +1997,15 @@ boolean_t
|
||||
dsl_dataset_modified_since_snap(dsl_dataset_t *ds, dsl_dataset_t *snap)
|
||||
{
|
||||
dsl_pool_t *dp = ds->ds_dir->dd_pool;
|
||||
uint64_t birth;
|
||||
|
||||
ASSERT(dsl_pool_config_held(dp));
|
||||
if (snap == NULL)
|
||||
return (B_FALSE);
|
||||
if (dsl_dataset_phys(ds)->ds_bp.blk_birth >
|
||||
dsl_dataset_phys(snap)->ds_creation_txg) {
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
birth = dsl_dataset_get_blkptr(ds)->blk_birth;
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
if (birth > dsl_dataset_phys(snap)->ds_creation_txg) {
|
||||
objset_t *os, *os_snap;
|
||||
/*
|
||||
* It may be that only the ZIL differs, because it was
|
||||
@ -3058,11 +3069,15 @@ dsl_dataset_clone_swap_sync_impl(dsl_dataset_t *clone,
|
||||
|
||||
/* swap blkptrs */
|
||||
{
|
||||
rrw_enter(&clone->ds_bp_rwlock, RW_WRITER, FTAG);
|
||||
rrw_enter(&origin_head->ds_bp_rwlock, RW_WRITER, FTAG);
|
||||
blkptr_t tmp;
|
||||
tmp = dsl_dataset_phys(origin_head)->ds_bp;
|
||||
dsl_dataset_phys(origin_head)->ds_bp =
|
||||
dsl_dataset_phys(clone)->ds_bp;
|
||||
dsl_dataset_phys(clone)->ds_bp = tmp;
|
||||
rrw_exit(&origin_head->ds_bp_rwlock, FTAG);
|
||||
rrw_exit(&clone->ds_bp_rwlock, FTAG);
|
||||
}
|
||||
|
||||
/* set dd_*_bytes */
|
||||
|
@ -246,7 +246,9 @@ dsl_destroy_snapshot_sync_impl(dsl_dataset_t *ds, boolean_t defer, dmu_tx_t *tx)
|
||||
uint64_t obj;
|
||||
|
||||
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
ASSERT(refcount_is_zero(&ds->ds_longholds));
|
||||
|
||||
if (defer &&
|
||||
@ -720,7 +722,9 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
ASSERT3U(dsl_dataset_phys(ds)->ds_num_children, <=, 1);
|
||||
ASSERT(ds->ds_prev == NULL ||
|
||||
dsl_dataset_phys(ds->ds_prev)->ds_next_snap_obj != ds->ds_object);
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
ASSERT3U(dsl_dataset_phys(ds)->ds_bp.blk_birth, <=, tx->tx_txg);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
ASSERT(RRW_WRITE_HELD(&dp->dp_config_rwlock));
|
||||
|
||||
/* We need to log before removing it from the namespace. */
|
||||
@ -812,10 +816,12 @@ dsl_destroy_head_sync_impl(dsl_dataset_t *ds, dmu_tx_t *tx)
|
||||
ASSERT(!DS_UNIQUE_IS_ACCURATE(ds) ||
|
||||
dsl_dataset_phys(ds)->ds_unique_bytes == used);
|
||||
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
bptree_add(mos, dp->dp_bptree_obj,
|
||||
&dsl_dataset_phys(ds)->ds_bp,
|
||||
dsl_dataset_phys(ds)->ds_prev_snap_txg,
|
||||
used, comp, uncomp, tx);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
dsl_dir_diduse_space(ds->ds_dir, DD_USED_HEAD,
|
||||
-used, -comp, -uncomp, tx);
|
||||
dsl_dir_diduse_space(dp->dp_free_dir, DD_USED_HEAD,
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2011, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 Steven Hartland. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
@ -493,8 +493,10 @@ dsl_pool_create(spa_t *spa, nvlist_t *zplprops, uint64_t txg)
|
||||
|
||||
/* create the root objset */
|
||||
VERIFY0(dsl_dataset_hold_obj(dp, obj, FTAG, &ds));
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
os = dmu_objset_create_impl(dp->dp_spa, ds,
|
||||
dsl_dataset_get_blkptr(ds), DMU_OST_ZFS, tx);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
#ifdef _KERNEL
|
||||
zfs_create_fs(os, kcred, zplprops, tx);
|
||||
#endif
|
||||
@ -807,7 +809,9 @@ upgrade_clones_cb(dsl_pool_t *dp, dsl_dataset_t *hds, void *arg)
|
||||
* The $ORIGIN can't have any data, or the accounting
|
||||
* will be wrong.
|
||||
*/
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
ASSERT0(dsl_dataset_phys(prev)->ds_bp.blk_birth);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
|
||||
/* The origin doesn't get attached to itself */
|
||||
if (ds->ds_object == prev->ds_object) {
|
||||
|
@ -1113,7 +1113,9 @@ dsl_scan_visitds(dsl_scan_t *scn, uint64_t dsobj, dmu_tx_t *tx)
|
||||
* Iterate over the bps in this ds.
|
||||
*/
|
||||
dmu_buf_will_dirty(ds->ds_dbuf, tx);
|
||||
rrw_enter(&ds->ds_bp_rwlock, RW_READER, FTAG);
|
||||
dsl_scan_visit_rootbp(scn, ds, &dsl_dataset_phys(ds)->ds_bp, tx);
|
||||
rrw_exit(&ds->ds_bp_rwlock, FTAG);
|
||||
|
||||
char *dsname = kmem_alloc(ZFS_MAX_DATASET_NAME_LEN, KM_SLEEP);
|
||||
dsl_dataset_name(ds, dsname);
|
||||
|
@ -20,7 +20,7 @@
|
||||
*/
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2012, 2014 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
|
||||
* Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
|
||||
* Copyright (c) 2014 Integros [integros.com]
|
||||
@ -101,9 +101,14 @@ struct objset {
|
||||
zfs_redundant_metadata_type_t os_redundant_metadata;
|
||||
int os_recordsize;
|
||||
|
||||
/*
|
||||
* Pointer is constant; the blkptr it points to is protected by
|
||||
* os_dsl_dataset->ds_bp_rwlock
|
||||
*/
|
||||
blkptr_t *os_rootbp;
|
||||
|
||||
/* no lock needed: */
|
||||
struct dmu_tx *os_synctx; /* XXX sketchy */
|
||||
blkptr_t *os_rootbp;
|
||||
zil_header_t os_zil_header;
|
||||
list_t os_synced_dnodes;
|
||||
uint64_t os_flags;
|
||||
|
@ -39,6 +39,7 @@
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/dsl_deadlist.h>
|
||||
#include <sys/refcount.h>
|
||||
#include <sys/rrwlock.h>
|
||||
#include <zfeature_common.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
@ -141,6 +142,7 @@ typedef struct dsl_dataset_phys {
|
||||
|
||||
typedef struct dsl_dataset {
|
||||
dmu_buf_user_t ds_dbu;
|
||||
rrwlock_t ds_bp_rwlock; /* Protects ds_phys->ds_bp */
|
||||
|
||||
/* Immutable: */
|
||||
struct dsl_dir *ds_dir;
|
||||
|
Loading…
Reference in New Issue
Block a user