diff --git a/uts/common/fs/zfs/arc.c b/uts/common/fs/zfs/arc.c index 752cacdb126c..dd5b3accc445 100644 --- a/uts/common/fs/zfs/arc.c +++ b/uts/common/fs/zfs/arc.c @@ -105,7 +105,7 @@ * with the buffer may be evicted prior to the callback. The callback * must be made with *no locks held* (to prevent deadlock). Additionally, * the users of callbacks must ensure that their private data is - * protected from simultaneous callbacks from arc_buf_evict() + * protected from simultaneous callbacks from arc_clear_callback() * and arc_do_user_evicts(). * * Note that the majority of the performance stats are manipulated @@ -1495,8 +1495,12 @@ arc_buf_data_free(arc_buf_t *buf, void (*free_func)(void *, size_t)) } } +/* + * Free up buf->b_data and if 'remove' is set, then pull the + * arc_buf_t off of the the arc_buf_hdr_t's list and free it. + */ static void -arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) +arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t remove) { arc_buf_t **bufp; @@ -1547,7 +1551,7 @@ arc_buf_destroy(arc_buf_t *buf, boolean_t recycle, boolean_t all) } /* only remove the buf if requested */ - if (!all) + if (!remove) return; /* remove the buf from the hdr list */ @@ -2121,7 +2125,7 @@ arc_do_user_evicts(void) mutex_exit(&arc_eviction_mtx); if (buf->b_efunc != NULL) - VERIFY(buf->b_efunc(buf) == 0); + VERIFY0(buf->b_efunc(buf->b_private)); buf->b_efunc = NULL; buf->b_private = NULL; @@ -3240,16 +3244,25 @@ arc_freed(spa_t *spa, const blkptr_t *bp) } /* - * This is used by the DMU to let the ARC know that a buffer is - * being evicted, so the ARC should clean up. If this arc buf - * is not yet in the evicted state, it will be put there. + * Clear the user eviction callback set by arc_set_callback(), first calling + * it if it exists. Because the presence of a callback keeps an arc_buf cached + * clearing the callback may result in the arc_buf being destroyed. However, + * it will not result in the *last* arc_buf being destroyed, hence the data + * will remain cached in the ARC. We make a copy of the arc buffer here so + * that we can process the callback without holding any locks. + * + * It's possible that the callback is already in the process of being cleared + * by another thread. In this case we can not clear the callback. + * + * Returns B_TRUE if the callback was successfully called and cleared. */ -int -arc_buf_evict(arc_buf_t *buf) +boolean_t +arc_clear_callback(arc_buf_t *buf) { arc_buf_hdr_t *hdr; kmutex_t *hash_lock; - arc_buf_t **bufp; + arc_evict_func_t *efunc = buf->b_efunc; + void *private = buf->b_private; mutex_enter(&buf->b_evict_lock); hdr = buf->b_hdr; @@ -3259,17 +3272,16 @@ arc_buf_evict(arc_buf_t *buf) */ ASSERT(buf->b_data == NULL); mutex_exit(&buf->b_evict_lock); - return (0); + return (B_FALSE); } else if (buf->b_data == NULL) { - arc_buf_t copy = *buf; /* structure assignment */ /* * We are on the eviction list; process this buffer now * but let arc_do_user_evicts() do the reaping. */ buf->b_efunc = NULL; mutex_exit(&buf->b_evict_lock); - VERIFY(copy.b_efunc(©) == 0); - return (1); + VERIFY0(efunc(private)); + return (B_TRUE); } hash_lock = HDR_LOCK(hdr); mutex_enter(hash_lock); @@ -3279,48 +3291,21 @@ arc_buf_evict(arc_buf_t *buf) ASSERT3U(refcount_count(&hdr->b_refcnt), <, hdr->b_datacnt); ASSERT(hdr->b_state == arc_mru || hdr->b_state == arc_mfu); - /* - * Pull this buffer off of the hdr - */ - bufp = &hdr->b_buf; - while (*bufp != buf) - bufp = &(*bufp)->b_next; - *bufp = buf->b_next; - - ASSERT(buf->b_data != NULL); - arc_buf_destroy(buf, FALSE, FALSE); - - if (hdr->b_datacnt == 0) { - arc_state_t *old_state = hdr->b_state; - arc_state_t *evicted_state; - - ASSERT(hdr->b_buf == NULL); - ASSERT(refcount_is_zero(&hdr->b_refcnt)); - - evicted_state = - (old_state == arc_mru) ? arc_mru_ghost : arc_mfu_ghost; - - mutex_enter(&old_state->arcs_mtx); - mutex_enter(&evicted_state->arcs_mtx); - - arc_change_state(evicted_state, hdr, hash_lock); - ASSERT(HDR_IN_HASH_TABLE(hdr)); - hdr->b_flags |= ARC_IN_HASH_TABLE; - hdr->b_flags &= ~ARC_BUF_AVAILABLE; - - mutex_exit(&evicted_state->arcs_mtx); - mutex_exit(&old_state->arcs_mtx); - } - mutex_exit(hash_lock); - mutex_exit(&buf->b_evict_lock); - - VERIFY(buf->b_efunc(buf) == 0); buf->b_efunc = NULL; buf->b_private = NULL; - buf->b_hdr = NULL; - buf->b_next = NULL; - kmem_cache_free(buf_cache, buf); - return (1); + + if (hdr->b_datacnt > 1) { + mutex_exit(&buf->b_evict_lock); + arc_buf_destroy(buf, FALSE, TRUE); + } else { + ASSERT(buf == hdr->b_buf); + hdr->b_flags |= ARC_BUF_AVAILABLE; + mutex_exit(&buf->b_evict_lock); + } + + mutex_exit(hash_lock); + VERIFY0(efunc(private)); + return (B_TRUE); } /* @@ -3466,17 +3451,6 @@ arc_released(arc_buf_t *buf) return (released); } -int -arc_has_callback(arc_buf_t *buf) -{ - int callback; - - mutex_enter(&buf->b_evict_lock); - callback = (buf->b_efunc != NULL); - mutex_exit(&buf->b_evict_lock); - return (callback); -} - #ifdef ZFS_DEBUG int arc_referenced(arc_buf_t *buf) diff --git a/uts/common/fs/zfs/dbuf.c b/uts/common/fs/zfs/dbuf.c index a635f245131f..759cfcfb8e3a 100644 --- a/uts/common/fs/zfs/dbuf.c +++ b/uts/common/fs/zfs/dbuf.c @@ -181,8 +181,7 @@ dbuf_hash_insert(dmu_buf_impl_t *db) } /* - * Remove an entry from the hash table. This operation will - * fail if there are any existing holds on the db. + * Remove an entry from the hash table. It must be in the EVICTING state. */ static void dbuf_hash_remove(dmu_buf_impl_t *db) @@ -194,7 +193,7 @@ dbuf_hash_remove(dmu_buf_impl_t *db) dmu_buf_impl_t *dbf, **dbp; /* - * We musn't hold db_mtx to maintin lock ordering: + * We musn't hold db_mtx to maintain lock ordering: * DBUF_HASH_MUTEX > db_mtx. */ ASSERT(refcount_is_zero(&db->db_holds)); @@ -431,7 +430,6 @@ static void dbuf_set_data(dmu_buf_impl_t *db, arc_buf_t *buf) { ASSERT(MUTEX_HELD(&db->db_mtx)); - ASSERT(db->db_buf == NULL || !arc_has_callback(db->db_buf)); db->db_buf = buf; if (buf != NULL) { ASSERT(buf->b_data != NULL); @@ -1544,12 +1542,15 @@ dbuf_assign_arcbuf(dmu_buf_impl_t *db, arc_buf_t *buf, dmu_tx_t *tx) * when we are not holding the dn_dbufs_mtx, we can't clear the * entry in the dn_dbufs list. We have to wait until dbuf_destroy() * in this case. For callers from the DMU we will usually see: - * dbuf_clear()->arc_buf_evict()->dbuf_do_evict()->dbuf_destroy() + * dbuf_clear()->arc_clear_callback()->dbuf_do_evict()->dbuf_destroy() * For the arc callback, we will usually see: * dbuf_do_evict()->dbuf_clear();dbuf_destroy() * Sometimes, though, we will get a mix of these two: - * DMU: dbuf_clear()->arc_buf_evict() + * DMU: dbuf_clear()->arc_clear_callback() * ARC: dbuf_do_evict()->dbuf_destroy() + * + * This routine will dissociate the dbuf from the arc, by calling + * arc_clear_callback(), but will not evict the data from the ARC. */ void dbuf_clear(dmu_buf_impl_t *db) @@ -1557,7 +1558,7 @@ dbuf_clear(dmu_buf_impl_t *db) dnode_t *dn; dmu_buf_impl_t *parent = db->db_parent; dmu_buf_impl_t *dndb; - int dbuf_gone = FALSE; + boolean_t dbuf_gone = B_FALSE; ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(refcount_is_zero(&db->db_holds)); @@ -1603,7 +1604,7 @@ dbuf_clear(dmu_buf_impl_t *db) } if (db->db_buf) - dbuf_gone = arc_buf_evict(db->db_buf); + dbuf_gone = arc_clear_callback(db->db_buf); if (!dbuf_gone) mutex_exit(&db->db_mtx); @@ -1771,8 +1772,7 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid, static int dbuf_do_evict(void *private) { - arc_buf_t *buf = private; - dmu_buf_impl_t *db = buf->b_private; + dmu_buf_impl_t *db = private; if (!MUTEX_HELD(&db->db_mtx)) mutex_enter(&db->db_mtx); @@ -2135,11 +2135,23 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag) * block on-disk. If so, then we simply evict * ourselves. */ - if (!DBUF_IS_CACHEABLE(db) || - arc_buf_eviction_needed(db->db_buf)) + if (!DBUF_IS_CACHEABLE(db)) { + if (db->db_blkptr != NULL && + !BP_IS_HOLE(db->db_blkptr) && + !BP_IS_EMBEDDED(db->db_blkptr)) { + spa_t *spa = + dmu_objset_spa(db->db_objset); + blkptr_t bp = *db->db_blkptr; + dbuf_clear(db); + arc_freed(spa, &bp); + } else { + dbuf_clear(db); + } + } else if (arc_buf_eviction_needed(db->db_buf)) { dbuf_clear(db); - else + } else { mutex_exit(&db->db_mtx); + } } } else { mutex_exit(&db->db_mtx); diff --git a/uts/common/fs/zfs/sys/arc.h b/uts/common/fs/zfs/sys/arc.h index 15ef0f58d02b..04a20c13eeef 100644 --- a/uts/common/fs/zfs/sys/arc.h +++ b/uts/common/fs/zfs/sys/arc.h @@ -95,7 +95,6 @@ boolean_t arc_buf_remove_ref(arc_buf_t *buf, void *tag); int arc_buf_size(arc_buf_t *buf); void arc_release(arc_buf_t *buf, void *tag); int arc_released(arc_buf_t *buf); -int arc_has_callback(arc_buf_t *buf); void arc_buf_freeze(arc_buf_t *buf); void arc_buf_thaw(arc_buf_t *buf); boolean_t arc_buf_eviction_needed(arc_buf_t *buf); @@ -114,7 +113,7 @@ zio_t *arc_write(zio_t *pio, spa_t *spa, uint64_t txg, void arc_freed(spa_t *spa, const blkptr_t *bp); void arc_set_callback(arc_buf_t *buf, arc_evict_func_t *func, void *private); -int arc_buf_evict(arc_buf_t *buf); +boolean_t arc_clear_callback(arc_buf_t *buf); void arc_flush(spa_t *spa); void arc_tempreserve_clear(uint64_t reserve);