panic in bpobj_space(): null pointer dereference

This is a race condition in the deadlist code.

A thread executing an administrative command that uses
dsl_deadlist_space_range() holds the lock of the whole deadlist_t to
protect the access of all its entries that the deadlist contains in an
avl tree.

Sync threads trying to insert a new entry in the deadlist (through
dsl_deadlist_insert() -> dle_enqueue()) do not hold the deadlist lock at
that moment.  If the dle_bpobj is the empty bpobj (our sentinel value),
we close and reopen it.  Between these two operations, it is possible
for the dsl_deadlist_space_range() thread to dereference that bpobj
which is NULL during that window.

Threads should hold the a deadlist's dl_lock when they manipulate its
internal data so scenarios like the one above are avoided.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Dan Kimmel <dan.kimmel@delphix.com>
Reviewed-by: George Melikov <mail@gmelikov.ru>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Matthew Ahrens <mahrens@delphix.com>
Closes #5762
This commit is contained in:
Matthew Ahrens 2017-02-09 10:19:12 -08:00 committed by Brian Behlendorf
parent b0eac56a4d
commit df7eeccc75
2 changed files with 25 additions and 11 deletions

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2011, 2014 by Delphix. All rights reserved.
* Copyright (c) 2011, 2016 by Delphix. All rights reserved.
*/
#include <sys/bpobj.h>
@ -395,6 +395,7 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
return;
}
mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
if (bpo->bpo_phys->bpo_subobjs == 0) {
bpo->bpo_phys->bpo_subobjs = dmu_object_alloc(bpo->bpo_os,
@ -405,7 +406,6 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
ASSERT0(dmu_object_info(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, &doi));
ASSERT3U(doi.doi_type, ==, DMU_OT_BPOBJ_SUBOBJ);
mutex_enter(&bpo->bpo_lock);
dmu_write(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj),
sizeof (subobj), &subobj, tx);

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012 by Delphix. All rights reserved.
* Copyright (c) 2012, 2016 by Delphix. All rights reserved.
* Copyright (c) 2014 Spectra Logic Corporation, All rights reserved.
*/
@ -66,6 +66,8 @@ dsl_deadlist_load_tree(dsl_deadlist_t *dl)
zap_cursor_t zc;
zap_attribute_t za;
ASSERT(MUTEX_HELD(&dl->dl_lock));
ASSERT(!dl->dl_oldfmt);
if (dl->dl_havetree)
return;
@ -178,6 +180,7 @@ static void
dle_enqueue(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
const blkptr_t *bp, dmu_tx_t *tx)
{
ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object ==
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
uint64_t obj = bpobj_alloc(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
@ -194,6 +197,7 @@ static void
dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj, dmu_tx_t *tx)
{
ASSERT(MUTEX_HELD(&dl->dl_lock));
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) {
bpobj_enqueue_subobj(&dle->dle_bpobj, obj, tx);
@ -218,15 +222,14 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
return;
}
mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used +=
bp_get_dsize_sync(dmu_objset_spa(dl->dl_os), bp);
dl->dl_phys->dl_comp += BP_GET_PSIZE(bp);
dl->dl_phys->dl_uncomp += BP_GET_UCSIZE(bp);
mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = bp->blk_birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@ -243,6 +246,7 @@ dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, dmu_tx_t *tx)
ASSERT3P(dle, !=, NULL);
dle_enqueue(dl, dle, bp, tx);
mutex_exit(&dl->dl_lock);
}
/*
@ -258,16 +262,19 @@ dsl_deadlist_add_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;
dsl_deadlist_load_tree(dl);
dle = kmem_alloc(sizeof (*dle), KM_SLEEP);
dle->dle_mintxg = mintxg;
mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
obj = bpobj_alloc_empty(dl->dl_os, SPA_OLD_MAXBLOCKSIZE, tx);
VERIFY3U(0, ==, bpobj_open(&dle->dle_bpobj, dl->dl_os, obj));
avl_add(&dl->dl_tree, dle);
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, dl->dl_object,
mintxg, obj, tx));
mutex_exit(&dl->dl_lock);
}
/*
@ -282,6 +289,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
if (dl->dl_oldfmt)
return;
mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = mintxg;
@ -295,6 +303,7 @@ dsl_deadlist_remove_key(dsl_deadlist_t *dl, uint64_t mintxg, dmu_tx_t *tx)
kmem_free(dle, sizeof (*dle));
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object, mintxg, tx));
mutex_exit(&dl->dl_lock);
}
/*
@ -338,6 +347,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
return (newobj);
}
mutex_enter(&dl->dl_lock);
dsl_deadlist_load_tree(dl);
for (dle = avl_first(&dl->dl_tree); dle;
@ -351,6 +361,7 @@ dsl_deadlist_clone(dsl_deadlist_t *dl, uint64_t maxtxg,
VERIFY3U(0, ==, zap_add_int_key(dl->dl_os, newobj,
dle->dle_mintxg, obj, tx));
}
mutex_exit(&dl->dl_lock);
return (newobj);
}
@ -428,6 +439,8 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
uint64_t used, comp, uncomp;
bpobj_t bpo;
ASSERT(MUTEX_HELD(&dl->dl_lock));
VERIFY3U(0, ==, bpobj_open(&bpo, dl->dl_os, obj));
VERIFY3U(0, ==, bpobj_space(&bpo, &used, &comp, &uncomp));
bpobj_close(&bpo);
@ -435,11 +448,9 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dsl_deadlist_load_tree(dl);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
mutex_enter(&dl->dl_lock);
dl->dl_phys->dl_used += used;
dl->dl_phys->dl_comp += comp;
dl->dl_phys->dl_uncomp += uncomp;
mutex_exit(&dl->dl_lock);
dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
@ -479,6 +490,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
return;
}
mutex_enter(&dl->dl_lock);
for (zap_cursor_init(&zc, dl->dl_os, obj);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
@ -493,6 +505,7 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
dmu_buf_will_dirty(bonus, tx);
bzero(dlp, sizeof (*dlp));
dmu_buf_rele(bonus, FTAG);
mutex_exit(&dl->dl_lock);
}
/*
@ -507,6 +520,8 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
avl_index_t where;
ASSERT(!dl->dl_oldfmt);
mutex_enter(&dl->dl_lock);
dmu_buf_will_dirty(dl->dl_dbuf, tx);
dsl_deadlist_load_tree(dl);
@ -522,14 +537,12 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
VERIFY3U(0, ==, bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp));
mutex_enter(&dl->dl_lock);
ASSERT3U(dl->dl_phys->dl_used, >=, used);
ASSERT3U(dl->dl_phys->dl_comp, >=, comp);
ASSERT3U(dl->dl_phys->dl_uncomp, >=, uncomp);
dl->dl_phys->dl_used -= used;
dl->dl_phys->dl_comp -= comp;
dl->dl_phys->dl_uncomp -= uncomp;
mutex_exit(&dl->dl_lock);
VERIFY3U(0, ==, zap_remove_int(dl->dl_os, dl->dl_object,
dle->dle_mintxg, tx));
@ -540,4 +553,5 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
kmem_free(dle, sizeof (*dle));
dle = dle_next;
}
mutex_exit(&dl->dl_lock);
}