Compact dbuf/buf hashes and lock arrays
With default dbuf cache size of 1/32 of ARC, it makes no sense to have hash table of the same size (or even bigger on Linux). Reduce it to 1/8 of ARC's one, still leaving some slack, assuming higher I/O rate via dbuf cache than via ARC. Remove padding from ARC hash locks array. The idea behind padding is to avoid false sharing between locks. It would have sense if there would be a limited number of very busy locks. But since we have no limit on the number, using the same memory for more locks we can achieve even lower lock contention with the same false sharing, or we can use less memory for the same contention level. Reduce number of hash locks from 8192 to 2048. The number is still big enough to not cause contention, but reduced memory size improves cache hit rate for mutex_tryenter() in ARC eviction thread, saving about 1% of the thread time. Reviewed-by: Matthew Ahrens <mahrens@delphix.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Signed-off-by: Alexander Motin <mav@FreeBSD.org> Sponsored-By: iXsystems, Inc. Closes #12289
This commit is contained in:
parent
c6d1112bf4
commit
490c845efe
@ -322,12 +322,12 @@ typedef struct dmu_buf_impl {
|
||||
} dmu_buf_impl_t;
|
||||
|
||||
/* Note: the dbuf hash table is exposed only for the mdb module */
|
||||
#define DBUF_MUTEXES 8192
|
||||
#define DBUF_MUTEXES 2048
|
||||
#define DBUF_HASH_MUTEX(h, idx) (&(h)->hash_mutexes[(idx) & (DBUF_MUTEXES-1)])
|
||||
typedef struct dbuf_hash_table {
|
||||
uint64_t hash_table_mask;
|
||||
dmu_buf_impl_t **hash_table;
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES];
|
||||
kmutex_t hash_mutexes[DBUF_MUTEXES] ____cacheline_aligned;
|
||||
} dbuf_hash_table_t;
|
||||
|
||||
typedef void (*dbuf_prefetch_fn)(void *, boolean_t);
|
||||
|
@ -740,29 +740,18 @@ taskq_t *arc_prune_taskq;
|
||||
* Hash table routines
|
||||
*/
|
||||
|
||||
#define HT_LOCK_ALIGN 64
|
||||
#define HT_LOCK_PAD (P2NPHASE(sizeof (kmutex_t), (HT_LOCK_ALIGN)))
|
||||
|
||||
struct ht_lock {
|
||||
kmutex_t ht_lock;
|
||||
#ifdef _KERNEL
|
||||
unsigned char pad[HT_LOCK_PAD];
|
||||
#endif
|
||||
};
|
||||
|
||||
#define BUF_LOCKS 8192
|
||||
#define BUF_LOCKS 2048
|
||||
typedef struct buf_hash_table {
|
||||
uint64_t ht_mask;
|
||||
arc_buf_hdr_t **ht_table;
|
||||
struct ht_lock ht_locks[BUF_LOCKS];
|
||||
kmutex_t ht_locks[BUF_LOCKS] ____cacheline_aligned;
|
||||
} buf_hash_table_t;
|
||||
|
||||
static buf_hash_table_t buf_hash_table;
|
||||
|
||||
#define BUF_HASH_INDEX(spa, dva, birth) \
|
||||
(buf_hash(spa, dva, birth) & buf_hash_table.ht_mask)
|
||||
#define BUF_HASH_LOCK_NTRY(idx) (buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
|
||||
#define BUF_HASH_LOCK(idx) (&(BUF_HASH_LOCK_NTRY(idx).ht_lock))
|
||||
#define BUF_HASH_LOCK(idx) (&buf_hash_table.ht_locks[idx & (BUF_LOCKS-1)])
|
||||
#define HDR_LOCK(hdr) \
|
||||
(BUF_HASH_LOCK(BUF_HASH_INDEX(hdr->b_spa, &hdr->b_dva, hdr->b_birth)))
|
||||
|
||||
@ -1111,7 +1100,7 @@ buf_fini(void)
|
||||
(buf_hash_table.ht_mask + 1) * sizeof (void *));
|
||||
#endif
|
||||
for (i = 0; i < BUF_LOCKS; i++)
|
||||
mutex_destroy(&buf_hash_table.ht_locks[i].ht_lock);
|
||||
mutex_destroy(BUF_HASH_LOCK(i));
|
||||
kmem_cache_destroy(hdr_full_cache);
|
||||
kmem_cache_destroy(hdr_full_crypt_cache);
|
||||
kmem_cache_destroy(hdr_l2only_cache);
|
||||
@ -1276,10 +1265,8 @@ buf_init(void)
|
||||
for (ct = zfs_crc64_table + i, *ct = i, j = 8; j > 0; j--)
|
||||
*ct = (*ct >> 1) ^ (-(*ct & 1) & ZFS_CRC64_POLY);
|
||||
|
||||
for (i = 0; i < BUF_LOCKS; i++) {
|
||||
mutex_init(&buf_hash_table.ht_locks[i].ht_lock,
|
||||
NULL, MUTEX_DEFAULT, NULL);
|
||||
}
|
||||
for (i = 0; i < BUF_LOCKS; i++)
|
||||
mutex_init(BUF_HASH_LOCK(i), NULL, MUTEX_DEFAULT, NULL);
|
||||
}
|
||||
|
||||
#define ARC_MINTIME (hz>>4) /* 62 ms */
|
||||
|
@ -826,12 +826,12 @@ dbuf_init(void)
|
||||
int i;
|
||||
|
||||
/*
|
||||
* The hash table is big enough to fill all of physical memory
|
||||
* The hash table is big enough to fill one eighth of physical memory
|
||||
* with an average block size of zfs_arc_average_blocksize (default 8K).
|
||||
* By default, the table will take up
|
||||
* totalmem * sizeof(void*) / 8K (1MB per GB with 8-byte pointers).
|
||||
*/
|
||||
while (hsize * zfs_arc_average_blocksize < physmem * PAGESIZE)
|
||||
while (hsize * zfs_arc_average_blocksize < arc_all_memory() / 8)
|
||||
hsize <<= 1;
|
||||
|
||||
retry:
|
||||
@ -3055,8 +3055,8 @@ dbuf_create(dnode_t *dn, uint8_t level, uint64_t blkid,
|
||||
db->db_state = DB_EVICTING; /* not worth logging this state change */
|
||||
if ((odb = dbuf_hash_insert(db)) != NULL) {
|
||||
/* someone else inserted it first */
|
||||
kmem_cache_free(dbuf_kmem_cache, db);
|
||||
mutex_exit(&dn->dn_dbufs_mtx);
|
||||
kmem_cache_free(dbuf_kmem_cache, db);
|
||||
DBUF_STAT_BUMP(hash_insert_race);
|
||||
return (odb);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user