From ea642c5c38ef535e02e34c3e8db078427de14efc Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 4 Feb 2020 15:53:51 +0000 Subject: [PATCH] Few microoptimizations to dbuf layer. Move db_link into the same cache line as db_blkid and db_level. It allows significantly reduce avl_add() time in dbuf_create() on systems with large RAM and huge number of dbufs per dnode. Avoid few accesses to dbuf_caches[].size, which is highly congested under high IOPS and never stays in cache for a long time. Use local value we are receiving from zfs_refcount_add_many() any way. Remove cache_size_bytes_max bump from dbuf_evict_one(). I don't see a point to do it on dbuf eviction after we done it on insertion in dbuf_rele_and_unlock(). Reviewed by: mahrens, Brian Behlendorf MFC after: 2 weeks Sponsored by: iXsystems, Inc. --- .../opensolaris/uts/common/fs/zfs/dbuf.c | 31 ++++++------------- .../opensolaris/uts/common/fs/zfs/sys/dbuf.h | 13 ++++---- 2 files changed, 16 insertions(+), 28 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c index b7214bc2ed25..90f6ccfb5097 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/dbuf.c @@ -670,13 +670,6 @@ dbuf_cache_lowater_bytes(void) (dbuf_cache_target * dbuf_cache_lowater_pct) / 100); } -static inline boolean_t -dbuf_cache_above_hiwater(void) -{ - return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > - dbuf_cache_hiwater_bytes()); -} - static inline boolean_t dbuf_cache_above_lowater(void) { @@ -717,8 +710,6 @@ dbuf_evict_one(void) ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE); db->db_caching_status = DB_NO_CACHE; dbuf_destroy(db); - DBUF_STAT_MAX(cache_size_bytes_max, - zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size)); DBUF_STAT_BUMP(cache_total_evicts); } else { multilist_sublist_unlock(mls); @@ -778,16 +769,15 @@ dbuf_evict_thread(void *unused __unused) * dbuf cache using the callers context. */ static void -dbuf_evict_notify(void) +dbuf_evict_notify(uint64_t size) { /* * We check if we should evict without holding the dbuf_evict_lock, * because it's OK to occasionally make the wrong decision here, * and grabbing the lock results in massive lock contention. */ - if (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > - dbuf_cache_max_bytes) { - if (dbuf_cache_above_hiwater()) + if (size > dbuf_cache_max_bytes) { + if (size > dbuf_cache_hiwater_bytes()) dbuf_evict_one(); cv_signal(&dbuf_evict_cv); } @@ -3186,6 +3176,7 @@ void dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) { int64_t holds; + uint64_t size; ASSERT(MUTEX_HELD(&db->db_mtx)); DBUF_VERIFY(db); @@ -3282,15 +3273,14 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) db->db_caching_status = dcs; multilist_insert(dbuf_caches[dcs].cache, db); - (void) zfs_refcount_add_many( + size = zfs_refcount_add_many( &dbuf_caches[dcs].size, db->db.db_size, db); if (dcs == DB_DBUF_METADATA_CACHE) { DBUF_STAT_BUMP(metadata_cache_count); DBUF_STAT_MAX( metadata_cache_size_bytes_max, - zfs_refcount_count( - &dbuf_caches[dcs].size)); + size); } else { DBUF_STAT_BUMP( cache_levels[db->db_level]); @@ -3299,15 +3289,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) cache_levels_bytes[db->db_level], db->db.db_size); DBUF_STAT_MAX(cache_size_bytes_max, - zfs_refcount_count( - &dbuf_caches[dcs].size)); + size); } mutex_exit(&db->db_mtx); - if (db->db_caching_status == DB_DBUF_CACHE && - !evicting) { - dbuf_evict_notify(); - } + if (dcs == DB_DBUF_CACHE && !evicting) + dbuf_evict_notify(size); } if (do_arc_evict) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h index 40d3878ee384..4b1a9e11b165 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/dbuf.h @@ -189,6 +189,13 @@ typedef struct dmu_buf_impl { */ struct dmu_buf_impl *db_hash_next; + /* + * Our link on the owner dnodes's dn_dbufs list. + * Protected by its dn_dbufs_mtx. Should be on the same cache line + * as db_level and db_blkid for the best avl_add() performance. + */ + avl_node_t db_link; + /* our block number */ uint64_t db_blkid; @@ -230,12 +237,6 @@ typedef struct dmu_buf_impl { /* pointer to most recent dirty record for this buffer */ dbuf_dirty_record_t *db_last_dirty; - /* - * Our link on the owner dnodes's dn_dbufs list. - * Protected by its dn_dbufs_mtx. - */ - avl_node_t db_link; - /* Link in dbuf_cache or dbuf_metadata_cache */ multilist_node_t db_cache_link;