Few microoptimizations to dbuf layer

Move db_link into the same cache line as db_blkid and db_level.
It allows significantly reduce avl_add() time in dbuf_create() on
systems with large RAM and huge number of dbufs per dnode.

Avoid few accesses to dbuf_caches[].size, which is highly congested
under high IOPS and never stays in cache for a long time.  Use local
value we are receiving from zfs_refcount_add_many() any way.

Remove cache_size_bytes_max bump from dbuf_evict_one().  I don't see
a point to do it on dbuf eviction after we done it on insertion in
dbuf_rele_and_unlock().

Reviewed-by: Matt Ahrens <matt@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #9931
This commit is contained in:
Alexander Motin 2020-02-05 14:08:44 -05:00 committed by GitHub
parent cccbed9f98
commit cbd8f5b759
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 16 additions and 28 deletions

View File

@ -206,6 +206,13 @@ typedef struct dmu_buf_impl {
*/ */
struct dmu_buf_impl *db_hash_next; struct dmu_buf_impl *db_hash_next;
/*
* Our link on the owner dnodes's dn_dbufs list.
* Protected by its dn_dbufs_mtx. Should be on the same cache line
* as db_level and db_blkid for the best avl_add() performance.
*/
avl_node_t db_link;
/* our block number */ /* our block number */
uint64_t db_blkid; uint64_t db_blkid;
@ -260,12 +267,6 @@ typedef struct dmu_buf_impl {
/* List of dirty records for the buffer sorted newest to oldest. */ /* List of dirty records for the buffer sorted newest to oldest. */
list_t db_dirty_records; list_t db_dirty_records;
/*
* Our link on the owner dnodes's dn_dbufs list.
* Protected by its dn_dbufs_mtx.
*/
avl_node_t db_link;
/* Link in dbuf_cache or dbuf_metadata_cache */ /* Link in dbuf_cache or dbuf_metadata_cache */
multilist_node_t db_cache_link; multilist_node_t db_cache_link;

View File

@ -627,13 +627,6 @@ dbuf_cache_lowater_bytes(void)
(dbuf_cache_target * dbuf_cache_lowater_pct) / 100); (dbuf_cache_target * dbuf_cache_lowater_pct) / 100);
} }
static inline boolean_t
dbuf_cache_above_hiwater(void)
{
return (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) >
dbuf_cache_hiwater_bytes());
}
static inline boolean_t static inline boolean_t
dbuf_cache_above_lowater(void) dbuf_cache_above_lowater(void)
{ {
@ -673,8 +666,6 @@ dbuf_evict_one(void)
ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE); ASSERT3U(db->db_caching_status, ==, DB_DBUF_CACHE);
db->db_caching_status = DB_NO_CACHE; db->db_caching_status = DB_NO_CACHE;
dbuf_destroy(db); dbuf_destroy(db);
DBUF_STAT_MAX(cache_size_bytes_max,
zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size));
DBUF_STAT_BUMP(cache_total_evicts); DBUF_STAT_BUMP(cache_total_evicts);
} else { } else {
multilist_sublist_unlock(mls); multilist_sublist_unlock(mls);
@ -730,16 +721,15 @@ dbuf_evict_thread(void *unused)
* dbuf cache using the callers context. * dbuf cache using the callers context.
*/ */
static void static void
dbuf_evict_notify(void) dbuf_evict_notify(uint64_t size)
{ {
/* /*
* We check if we should evict without holding the dbuf_evict_lock, * We check if we should evict without holding the dbuf_evict_lock,
* because it's OK to occasionally make the wrong decision here, * because it's OK to occasionally make the wrong decision here,
* and grabbing the lock results in massive lock contention. * and grabbing the lock results in massive lock contention.
*/ */
if (zfs_refcount_count(&dbuf_caches[DB_DBUF_CACHE].size) > if (size > dbuf_cache_target_bytes()) {
dbuf_cache_target_bytes()) { if (size > dbuf_cache_hiwater_bytes())
if (dbuf_cache_above_hiwater())
dbuf_evict_one(); dbuf_evict_one();
cv_signal(&dbuf_evict_cv); cv_signal(&dbuf_evict_cv);
} }
@ -3471,6 +3461,7 @@ void
dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting) dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
{ {
int64_t holds; int64_t holds;
uint64_t size;
ASSERT(MUTEX_HELD(&db->db_mtx)); ASSERT(MUTEX_HELD(&db->db_mtx));
DBUF_VERIFY(db); DBUF_VERIFY(db);
@ -3567,7 +3558,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
db->db_caching_status = dcs; db->db_caching_status = dcs;
multilist_insert(dbuf_caches[dcs].cache, db); multilist_insert(dbuf_caches[dcs].cache, db);
(void) zfs_refcount_add_many( size = zfs_refcount_add_many(
&dbuf_caches[dcs].size, &dbuf_caches[dcs].size,
db->db.db_size, db); db->db.db_size, db);
@ -3575,8 +3566,7 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
DBUF_STAT_BUMP(metadata_cache_count); DBUF_STAT_BUMP(metadata_cache_count);
DBUF_STAT_MAX( DBUF_STAT_MAX(
metadata_cache_size_bytes_max, metadata_cache_size_bytes_max,
zfs_refcount_count( size);
&dbuf_caches[dcs].size));
} else { } else {
DBUF_STAT_BUMP( DBUF_STAT_BUMP(
cache_levels[db->db_level]); cache_levels[db->db_level]);
@ -3585,15 +3575,12 @@ dbuf_rele_and_unlock(dmu_buf_impl_t *db, void *tag, boolean_t evicting)
cache_levels_bytes[db->db_level], cache_levels_bytes[db->db_level],
db->db.db_size); db->db.db_size);
DBUF_STAT_MAX(cache_size_bytes_max, DBUF_STAT_MAX(cache_size_bytes_max,
zfs_refcount_count( size);
&dbuf_caches[dcs].size));
} }
mutex_exit(&db->db_mtx); mutex_exit(&db->db_mtx);
if (db->db_caching_status == DB_DBUF_CACHE && if (dcs == DB_DBUF_CACHE && !evicting)
!evicting) { dbuf_evict_notify(size);
dbuf_evict_notify();
}
} }
if (do_arc_evict) if (do_arc_evict)