From 6ebebaceb1091142b81430291c610d79b6a3073e Mon Sep 17 00:00:00 2001 From: "Justin T. Gibbs" Date: Thu, 2 Apr 2015 22:59:15 +1100 Subject: [PATCH] Illumos 5531 - NULL pointer dereference in dsl_prop_get_ds() 5531 NULL pointer dereference in dsl_prop_get_ds() Author: Justin T. Gibbs Reviewed by: Matthew Ahrens Reviewed by: Dan McDonald Reviewed by: George Wilson Reviewed by: Bayard Bell Approved by: Robert Mustacchi References: https://www.illumos.org/issues/5531 https://github.com/illumos/illumos-gate/commit/e57a022 Ported-by: Chris Dunlop Signed-off-by: Brian Behlendorf --- include/sys/dbuf.h | 5 +++- include/sys/dmu.h | 16 ++++++++++++ include/sys/dsl_dataset.h | 2 ++ module/zfs/dbuf.c | 52 ++++++++++++++++++++++++++++++++++----- module/zfs/dnode_sync.c | 3 ++- module/zfs/dsl_dataset.c | 7 ++++++ module/zfs/dsl_prop.c | 47 +++++++++++++++++++++++++++++------ 7 files changed, 116 insertions(+), 16 deletions(-) diff --git a/include/sys/dbuf.h b/include/sys/dbuf.h index c2f4f8bd0efc..dd45261a32de 100644 --- a/include/sys/dbuf.h +++ b/include/sys/dbuf.h @@ -262,12 +262,15 @@ int dbuf_hold_impl(struct dnode *dn, uint8_t level, uint64_t blkid, int create, void dbuf_prefetch(struct dnode *dn, uint64_t blkid, zio_priority_t prio); void dbuf_add_ref(dmu_buf_impl_t *db, void *tag); +boolean_t dbuf_try_add_ref(dmu_buf_t *db, objset_t *os, uint64_t obj, + uint64_t blkid, void *tag); uint64_t dbuf_refcount(dmu_buf_impl_t *db); void dbuf_rele(dmu_buf_impl_t *db, void *tag); void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag); -dmu_buf_impl_t *dbuf_find(struct dnode *dn, uint8_t level, uint64_t blkid); +dmu_buf_impl_t *dbuf_find(struct objset *os, uint64_t object, uint8_t level, + uint64_t blkid); int dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags); void dmu_buf_will_not_fill(dmu_buf_t *db, dmu_tx_t *tx); diff --git a/include/sys/dmu.h b/include/sys/dmu.h index b2f1efae011a..aa3e89d6070c 100644 --- a/include/sys/dmu.h +++ b/include/sys/dmu.h @@ -454,7 +454,23 @@ int dmu_spill_hold_existing(dmu_buf_t *bonus, void *tag, dmu_buf_t **dbp); */ int dmu_buf_hold(objset_t *os, uint64_t object, uint64_t offset, void *tag, dmu_buf_t **, int flags); + +/* + * Add a reference to a dmu buffer that has already been held via + * dmu_buf_hold() in the current context. + */ void dmu_buf_add_ref(dmu_buf_t *db, void* tag); + +/* + * Attempt to add a reference to a dmu buffer that is in an unknown state, + * using a pointer that may have been invalidated by eviction processing. + * The request will succeed if the passed in dbuf still represents the + * same os/object/blkid, is ineligible for eviction, and has at least + * one hold by a user other than the syncer. + */ +boolean_t dmu_buf_try_add_ref(dmu_buf_t *, objset_t *os, uint64_t object, + uint64_t blkid, void *tag); + void dmu_buf_rele(dmu_buf_t *db, void *tag); uint64_t dmu_buf_refcount(dmu_buf_t *db); diff --git a/include/sys/dsl_dataset.h b/include/sys/dsl_dataset.h index 7e5c0a7cba7b..edfc5509efb1 100644 --- a/include/sys/dsl_dataset.h +++ b/include/sys/dsl_dataset.h @@ -197,6 +197,8 @@ dsl_dataset_phys(dsl_dataset_t *ds) int dsl_dataset_hold(struct dsl_pool *dp, const char *name, void *tag, dsl_dataset_t **dsp); +boolean_t dsl_dataset_try_add_ref(struct dsl_pool *dp, dsl_dataset_t *ds, + void *tag); int dsl_dataset_hold_obj(struct dsl_pool *dp, uint64_t dsobj, void *tag, dsl_dataset_t **); void dsl_dataset_rele(dsl_dataset_t *ds, void *tag); diff --git a/module/zfs/dbuf.c b/module/zfs/dbuf.c index ddfd2e9814e2..cd74ce3e8614 100644 --- a/module/zfs/dbuf.c +++ b/module/zfs/dbuf.c @@ -149,16 +149,13 @@ dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid) (dbuf)->db_blkid == (blkid)) dmu_buf_impl_t * -dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) +dbuf_find(objset_t *os, uint64_t obj, uint8_t level, uint64_t blkid) { dbuf_hash_table_t *h = &dbuf_hash_table; - objset_t *os = dn->dn_objset; - uint64_t obj; uint64_t hv; uint64_t idx; dmu_buf_impl_t *db; - obj = dn->dn_object; hv = DBUF_HASH(os, obj, level, blkid); idx = hv & h->hash_table_mask; @@ -177,6 +174,24 @@ dbuf_find(dnode_t *dn, uint8_t level, uint64_t blkid) return (NULL); } +static dmu_buf_impl_t * +dbuf_find_bonus(objset_t *os, uint64_t object) +{ + dnode_t *dn; + dmu_buf_impl_t *db = NULL; + + if (dnode_hold(os, object, FTAG, &dn) == 0) { + rw_enter(&dn->dn_struct_rwlock, RW_READER); + if (dn->dn_bonus != NULL) { + db = dn->dn_bonus; + mutex_enter(&db->db_mtx); + } + rw_exit(&dn->dn_struct_rwlock); + dnode_rele(dn, FTAG); + } + return (db); +} + /* * Insert an entry into the hash table. If there is already an element * equal to elem in the hash table, then the already existing element @@ -2000,7 +2015,7 @@ dbuf_prefetch(dnode_t *dn, uint64_t blkid, zio_priority_t prio) return; /* dbuf_find() returns with db_mtx held */ - if ((db = dbuf_find(dn, 0, blkid))) { + if ((db = dbuf_find(dn->dn_objset, dn->dn_object, 0, blkid))) { /* * This dbuf is already in the cache. We assume that * it is already CACHED, or else about to be either @@ -2048,7 +2063,8 @@ __dbuf_hold_impl(struct dbuf_hold_impl_data *dh) *(dh->dh_dbp) = NULL; top: /* dbuf_find() returns with db_mtx held */ - dh->dh_db = dbuf_find(dh->dh_dn, dh->dh_level, dh->dh_blkid); + dh->dh_db = dbuf_find(dh->dh_dn->dn_objset, dh->dh_dn->dn_object, + dh->dh_level, dh->dh_blkid); if (dh->dh_db == NULL) { dh->dh_bp = NULL; @@ -2228,6 +2244,30 @@ dbuf_add_ref(dmu_buf_impl_t *db, void *tag) VERIFY(refcount_add(&db->db_holds, tag) > 1); } +#pragma weak dmu_buf_try_add_ref = dbuf_try_add_ref +boolean_t +dbuf_try_add_ref(dmu_buf_t *db_fake, objset_t *os, uint64_t obj, uint64_t blkid, + void *tag) +{ + dmu_buf_impl_t *db = (dmu_buf_impl_t *)db_fake; + dmu_buf_impl_t *found_db; + boolean_t result = B_FALSE; + + if (db->db_blkid == DMU_BONUS_BLKID) + found_db = dbuf_find_bonus(os, obj); + else + found_db = dbuf_find(os, obj, 0, blkid); + + if (found_db != NULL) { + if (db == found_db && dbuf_refcount(db) > db->db_dirtycnt) { + (void) refcount_add(&db->db_holds, tag); + result = B_TRUE; + } + mutex_exit(&db->db_mtx); + } + return (result); +} + /* * If you call dbuf_rele() you had better not be referencing the dnode handle * unless you have some other direct or indirect hold on the dnode. (An indirect diff --git a/module/zfs/dnode_sync.c b/module/zfs/dnode_sync.c index cad83a0e097b..5c80a531d2d9 100644 --- a/module/zfs/dnode_sync.c +++ b/module/zfs/dnode_sync.c @@ -77,7 +77,8 @@ dnode_increase_indirection(dnode_t *dn, dmu_tx_t *tx) /* set dbuf's parent pointers to new indirect buf */ for (i = 0; i < nblkptr; i++) { - dmu_buf_impl_t *child = dbuf_find(dn, old_toplvl, i); + dmu_buf_impl_t *child = + dbuf_find(dn->dn_objset, dn->dn_object, old_toplvl, i); if (child == NULL) continue; diff --git a/module/zfs/dsl_dataset.c b/module/zfs/dsl_dataset.c index 73b202ceb125..a5310edcef3d 100644 --- a/module/zfs/dsl_dataset.c +++ b/module/zfs/dsl_dataset.c @@ -351,6 +351,13 @@ dsl_dataset_snap_remove(dsl_dataset_t *ds, const char *name, dmu_tx_t *tx, return (err); } +boolean_t +dsl_dataset_try_add_ref(dsl_pool_t *dp, dsl_dataset_t *ds, void *tag) +{ + return (dmu_buf_try_add_ref(ds->ds_dbuf, dp->dp_meta_objset, + ds->ds_object, DMU_BONUS_BLKID, tag)); +} + int dsl_dataset_hold_obj(dsl_pool_t *dp, uint64_t dsobj, void *tag, dsl_dataset_t **dsp) diff --git a/module/zfs/dsl_prop.c b/module/zfs/dsl_prop.c index 4c0e579907c8..69bcd190f2e0 100644 --- a/module/zfs/dsl_prop.c +++ b/module/zfs/dsl_prop.c @@ -441,9 +441,31 @@ dsl_prop_notify_all_cb(dsl_pool_t *dp, dsl_dataset_t *ds, void *arg) cbr = list_next(&dd->dd_prop_cbs, cbr)) { uint64_t value; + /* + * Callback entries do not have holds on their datasets + * so that datasets with registered callbacks are still + * eligible for eviction. Unlike operations on callbacks + * for a single dataset, we are performing a recursive + * descent of related datasets and the calling context + * for this iteration only has a dataset hold on the root. + * Without a hold, the callback's pointer to the dataset + * could be invalidated by eviction at any time. + * + * Use dsl_dataset_try_add_ref() to verify that the + * dataset has not begun eviction processing and to + * prevent eviction from occurring for the duration + * of the callback. If the hold attempt fails, this + * object is already being evicted and the callback can + * be safely ignored. + */ + if (!dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG)) + continue; + if (dsl_prop_get_ds(cbr->cbr_ds, cbr->cbr_propname, sizeof (value), 1, &value, NULL) == 0) cbr->cbr_func(cbr->cbr_arg, value); + + dsl_dataset_rele(cbr->cbr_ds, FTAG); } mutex_exit(&dd->dd_lock); @@ -496,19 +518,28 @@ dsl_prop_changed_notify(dsl_pool_t *dp, uint64_t ddobj, mutex_enter(&dd->dd_lock); for (cbr = list_head(&dd->dd_prop_cbs); cbr; cbr = list_next(&dd->dd_prop_cbs, cbr)) { - uint64_t propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj; - - if (strcmp(cbr->cbr_propname, propname) != 0) - continue; + uint64_t propobj; /* - * If the property is set on this ds, then it is not - * inherited here; don't call the callback. + * cbr->cbf_ds may be invalidated due to eviction, + * requiring the use of dsl_dataset_try_add_ref(). + * See comment block in dsl_prop_notify_all_cb() + * for details. */ - if (propobj && 0 == zap_contains(mos, propobj, propname)) + if (strcmp(cbr->cbr_propname, propname) != 0 || + !dsl_dataset_try_add_ref(dp, cbr->cbr_ds, FTAG)) continue; - cbr->cbr_func(cbr->cbr_arg, value); + propobj = dsl_dataset_phys(cbr->cbr_ds)->ds_props_obj; + + /* + * If the property is not set on this ds, then it is + * inherited here; call the callback. + */ + if (propobj == 0 || zap_contains(mos, propobj, propname) != 0) + cbr->cbr_func(cbr->cbr_arg, value); + + dsl_dataset_rele(cbr->cbr_ds, FTAG); } mutex_exit(&dd->dd_lock);