Compact dbuf/buf hashes and lock arrays

With default dbuf cache size of 1/32 of ARC, it makes no sense to have
hash table of the same size (or even bigger on Linux).  Reduce it to
1/8 of ARC's one, still leaving some slack, assuming higher I/O rate
via dbuf cache than via ARC.

Remove padding from ARC hash locks array.  The idea behind padding
is to avoid false sharing between locks.  It would have sense if
there would be a limited number of very busy locks.  But since we
have no limit on the number, using the same memory for more locks we
can achieve even lower lock contention with the same false sharing,
or we can use less memory for the same contention level.

Reduce number of hash locks from 8192 to 2048.  The number is still
big enough to not cause contention, but reduced memory size improves
cache hit rate for mutex_tryenter() in ARC eviction thread, saving
about 1% of the thread time.

Reviewed-by: Matthew Ahrens <mahrens@delphix.com>
Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Signed-off-by: Alexander Motin <mav@FreeBSD.org>
Sponsored-By: iXsystems, Inc.
Closes #12289
This commit is contained in:
Alexander Motin 2021-07-01 11:30:31 -04:00 committed by GitHub
parent c6d1112bf4
commit 490c845efe
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 11 additions and 24 deletions

View File

@ -322,12 +322,12 @@ typedef struct dmu_buf_impl {
} dmu_buf_impl_t; } dmu_buf_impl_t;
/* Note: the dbuf hash table is exposed only for the mdb module */ /* Note: the dbuf hash table is exposed only for the mdb module */
#define DBUF_MUTEXES 8192 #define DBUF_MUTEXES 2048
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)]) #define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
typedef struct dbuf_hash_table { typedef struct dbuf_hash_table {
uint64_t hash_table_mask; uint64_t hash_table_mask;
dmu_buf_impl_t **hash_table; dmu_buf_impl_t **hash_table;
kmutex_t hash_mutexes[DBUF_MUTEXES]; kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
} dbuf_hash_table_t; } dbuf_hash_table_t;
typedef void (*dbuf_prefetch_fn)(void *, boolean_t); typedef void (*dbuf_prefetch_fn)(void *, boolean_t);

View File

@ -740,29 +740,18 @@ taskq_t *arc_prune_taskq;
* Hash table routines * Hash table routines
*/ */
#define HT_LOCK_ALIGN 64 #define BUF_LOCKS 2048
#define HT_LOCK_PAD (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
struct ht_lock {
kmutex_t ht_lock;
#ifdef _KERNEL
unsigned char pad[HT_LOCK_PAD];
#endif
};
#define BUF_LOCKS 8192
typedef struct buf_hash_table { typedef struct buf_hash_table {
uint64_t ht_mask; uint64_t ht_mask;
arc_buf_hdr_t **ht_table; arc_buf_hdr_t **ht_table;
struct ht_lock ht_locks[BUF_LOCKS]; kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
} buf_hash_table_t; } buf_hash_table_t;
static buf_hash_table_t buf_hash_table; static buf_hash_table_t buf_hash_table;
#define BUF_HASH_INDEX(spa, dva, birth) \ #define BUF_HASH_INDEX(spa, dva, birth) \
(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask) (buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)]) #define BUF_HASH_LOCK(idx) (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
#define HDR_LOCK(hdr) \ #define HDR_LOCK(hdr) \
(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth))) (BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
@ -1111,7 +1100,7 @@ buf_fini(void)
(buf_hash_table.ht_mask + 1) * sizeof (void *)); (buf_hash_table.ht_mask + 1) * sizeof (void *));
#endif #endif
for (i = 0; i < BUF_LOCKS; i++) for (i = 0; i < BUF_LOCKS; i++)
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock); mutex_destroy(BUF_HASH_LOCK(i));
kmem_cache_destroy(hdr_full_cache); kmem_cache_destroy(hdr_full_cache);
kmem_cache_destroy(hdr_full_crypt_cache); kmem_cache_destroy(hdr_full_crypt_cache);
kmem_cache_destroy(hdr_l2only_cache); kmem_cache_destroy(hdr_l2only_cache);
@ -1276,10 +1265,8 @@ buf_init(void)
for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--) for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY); *ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
for (i = 0; i < BUF_LOCKS; i++) { for (i = 0; i < BUF_LOCKS; i++)
mutex_init(&buf_hash_table.ht_locks[i].ht_lock, mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
NULL, MUTEX_DEFAULT, NULL);
}
} }
#define ARC_MINTIME (hz>>4) /* 62 ms */ #define ARC_MINTIME (hz>>4) /* 62 ms */

View File

@ -826,12 +826,12 @@ dbuf_init(void)
int i; int i;
/* /*
* The hash table is big enough to fill all of physical memory * The hash table is big enough to fill one eighth of physical memory
* with an average block size of zfs_arc_average_blocksize (default 8K). * with an average block size of zfs_arc_average_blocksize (default 8K).
* By default, the table will take up * By default, the table will take up
* totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers). * totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
*/ */
while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE) while (hsize * zfs_arc_average_blocksize < arc_all_memory() / 8)
hsize <<= 1; hsize <<= 1;
retry: retry:
@ -3055,8 +3055,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
db->db_state = DB_EVICTING; /* not worth logging this state change */ db->db_state = DB_EVICTING; /* not worth logging this state change */
if ((odb = dbuf_hash_insert(db)) != NULL) { if ((odb = dbuf_hash_insert(db)) != NULL) {
/* someone else inserted it first */ /* someone else inserted it first */
kmem_cache_free(dbuf_kmem_cache, db);
mutex_exit(&dn->dn_dbufs_mtx); mutex_exit(&dn->dn_dbufs_mtx);
kmem_cache_free(dbuf_kmem_cache, db);
DBUF_STAT_BUMP(hash_insert_race); DBUF_STAT_BUMP(hash_insert_race);
return (odb); return (odb);
} }