OpenZFS 8484 - Implement aggregate sum and use for arc counters

In pursuit of improving performance on multi-core systems, we should implements fanned out counters and use them to improve the performance of some of the arc statistics. These stats are updated extremely frequently, and can consume a significant amount of CPU time. Authored by: Paul Dagnelie <pcd@delphix.com> Reviewed by: Pavel Zakharov <pavel.zakharov@delphix.com> Reviewed by: Matthew Ahrens <mahrens@delphix.com> Approved by: Dan McDonald <danmcd@joyent.com> Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Ported-by: Paul Dagnelie <pcd@delphix.com> OpenZFS-issue: https://www.illumos.org/issues/8484 OpenZFS-commit: https://github.com/openzfs/openzfs/commit/7028a8b92b7 Issue #3752 Closes #7462
2017-05-25 11:32:40 -07:00 · 2017-05-25 11:32:40 -07:00 · 37fb3e4318
commit 37fb3e4318
parent f0ed6c7448
12 changed files with 589 additions and 85 deletions
--- a/include/sys/Makefile.am
+++ b/include/sys/Makefile.am
@ -2,6 +2,7 @@ SUBDIRS = fm fs crypto lua sysevent
 COMMON_H = \
 	$(top_srcdir)/include/sys/abd.h \
 	$(top_srcdir)/include/sys/aggsum.h \
 	$(top_srcdir)/include/sys/arc.h \
 	$(top_srcdir)/include/sys/arc_impl.h \
 	$(top_srcdir)/include/sys/avl.h \
@ -11,6 +12,7 @@ COMMON_H = \
 	$(top_srcdir)/include/sys/bpobj.h \
 	$(top_srcdir)/include/sys/bptree.h \
 	$(top_srcdir)/include/sys/bqueue.h \
 	$(top_srcdir)/include/sys/cityhash.h \
 	$(top_srcdir)/include/sys/dbuf.h \
 	$(top_srcdir)/include/sys/ddt.h \
 	$(top_srcdir)/include/sys/dmu.h \
--- a/include/sys/aggsum.h
+++ b/include/sys/aggsum.h
@ -0,0 +1,59 @@
 /*
 * CDDL HEADER START
 *
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 *
 * CDDL HEADER END
 */
 /*
 * Copyright (c) 2017 by Delphix. All rights reserved.
 */
 #ifndef	_SYS_AGGSUM_H
 #define	_SYS_AGGSUM_H
 #include <sys/zfs_context.h>
 #ifdef	__cplusplus
 extern "C" {
 #endif
 typedef struct aggsum_bucket aggsum_bucket_t;
 struct aggsum_bucket {
 	kmutex_t asc_lock;
 	int64_t asc_delta;
 	uint64_t asc_borrowed;
 } ____cacheline_aligned;
 /*
 * Fan out over FANOUT cpus.
 */
 typedef struct aggsum {
 	kmutex_t as_lock;
 	int64_t as_lower_bound;
 	int64_t as_upper_bound;
 	uint64_t as_numbuckets;
 	aggsum_bucket_t *as_buckets;
 } aggsum_t;
 void aggsum_init(aggsum_t *, uint64_t);
 void aggsum_fini(aggsum_t *);
 int64_t aggsum_lower_bound(aggsum_t *);
 int64_t aggsum_upper_bound(aggsum_t *);
 int aggsum_compare(aggsum_t *, uint64_t);
 uint64_t aggsum_value(aggsum_t *);
 void aggsum_add(aggsum_t *, int64_t);
 #ifdef	__cplusplus
 }
 #endif
 #endif /* _SYS_AGGSUM_H */
--- a/include/sys/cityhash.h
+++ b/include/sys/cityhash.h
@ -0,0 +1,41 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 /*
 * Copyright (c) 2017 by Delphix. All rights reserved.
 */
 #ifndef	_SYS_CITYHASH_H
 #define	_SYS_CITYHASH_H
 #include <sys/zfs_context.h>
 #ifdef	__cplusplus
 extern "C" {
 #endif
 uint64_t cityhash4(uint64_t, uint64_t, uint64_t, uint64_t);
 #ifdef	__cplusplus
 }
 #endif
 #endif	/* _SYS_CITYHASH_H */
--- a/include/sys/zfs_context.h
+++ b/include/sys/zfs_context.h
@ -750,5 +750,7 @@ extern fstrans_cookie_t spl_fstrans_mark(void);
 extern void spl_fstrans_unmark(fstrans_cookie_t);
 extern int __spl_pf_fstrans_check(void);
 #define	____cacheline_aligned
 #endif /* _KERNEL */
 #endif	/* _SYS_ZFS_CONTEXT_H */
--- a/lib/libzpool/Makefile.am
+++ b/lib/libzpool/Makefile.am
@ -40,12 +40,14 @@ KERNEL_C = \
 	zpool_prop.c \
 	zprop_common.c \
 	abd.c \
 	aggsum.c \
 	arc.c \
 	blkptr.c \
 	bplist.c \
 	bpobj.c \
 	bptree.c \
 	bqueue.c \
 	cityhash.c \
 	dbuf.c \
 	dbuf_stats.c \
 	ddt.c \
--- a/module/zfs/Makefile.in
+++ b/module/zfs/Makefile.in
@ -17,10 +17,12 @@ endif
 ccflags-y += $(NO_UNUSED_BUT_SET_VARIABLE)
 $(MODULE)-objs += abd.o
 $(MODULE)-objs += aggsum.o
 $(MODULE)-objs += arc.o
 $(MODULE)-objs += blkptr.o
 $(MODULE)-objs += bplist.o
 $(MODULE)-objs += bpobj.o
 $(MODULE)-objs += cityhash.o
 $(MODULE)-objs += dbuf.o
 $(MODULE)-objs += dbuf_stats.o
 $(MODULE)-objs += bptree.o
--- a/module/zfs/THIRDPARTYLICENSE.cityhash
+++ b/module/zfs/THIRDPARTYLICENSE.cityhash
@ -0,0 +1,19 @@
 Copyright (c) 2011 Google, Inc.
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
 in the Software without restriction, including without limitation the rights
 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 copies of the Software, and to permit persons to whom the Software is
 furnished to do so, subject to the following conditions:
 The above copyright notice and this permission notice shall be included in
 all copies or substantial portions of the Software.
 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 THE SOFTWARE.
--- a/module/zfs/THIRDPARTYLICENSE.cityhash.descrip
+++ b/module/zfs/THIRDPARTYLICENSE.cityhash.descrip
@ -0,0 +1 @@
 CITYHASH CHECKSUM FUNCTIONALITY IN ZFS
--- a/module/zfs/aggsum.c
+++ b/module/zfs/aggsum.c
@ -0,0 +1,233 @@
 /*
 * CDDL HEADER START
 *
 * This file and its contents are supplied under the terms of the
 * Common Development and Distribution License ("CDDL"), version 1.0.
 * You may only use this file in accordance with the terms of version
 * 1.0 of the CDDL.
 *
 * A full copy of the text of the CDDL should have accompanied this
 * source.  A copy of the CDDL is also available via the Internet at
 * http://www.illumos.org/license/CDDL.
 *
 * CDDL HEADER END
 */
 /*
 * Copyright (c) 2017 by Delphix. All rights reserved.
 */
 #include <sys/zfs_context.h>
 #include <sys/aggsum.h>
 /*
 * Aggregate-sum counters are a form of fanned-out counter, used when atomic
 * instructions on a single field cause enough CPU cache line contention to
 * slow system performance. Due to their increased overhead and the expense
 * involved with precisely reading from them, they should only be used in cases
 * where the write rate (increment/decrement) is much higher than the read rate
 * (get value).
 *
 * Aggregate sum counters are comprised of two basic parts, the core and the
 * buckets. The core counter contains a lock for the entire counter, as well
 * as the current upper and lower bounds on the value of the counter. The
 * aggsum_bucket structure contains a per-bucket lock to protect the contents of
 * the bucket, the current amount that this bucket has changed from the global
 * counter (called the delta), and the amount of increment and decrement we have
 * "borrowed" from the core counter.
 *
 * The basic operation of an aggsum is simple. Threads that wish to modify the
 * counter will modify one bucket's counter (determined by their current CPU, to
 * help minimize lock and cache contention). If the bucket already has
 * sufficient capacity borrowed from the core structure to handle their request,
 * they simply modify the delta and return.  If the bucket does not, we clear
 * the bucket's current state (to prevent the borrowed amounts from getting too
 * large), and borrow more from the core counter. Borrowing is done by adding to
 * the upper bound (or subtracting from the lower bound) of the core counter,
 * and setting the borrow value for the bucket to the amount added (or
 * subtracted).  Clearing the bucket is the opposite; we add the current delta
 * to both the lower and upper bounds of the core counter, subtract the borrowed
 * incremental from the upper bound, and add the borrowed decrement from the
 * lower bound.  Note that only borrowing and clearing require access to the
 * core counter; since all other operations access CPU-local resources,
 * performance can be much higher than a traditional counter.
 *
 * Threads that wish to read from the counter have a slightly more challenging
 * task. It is fast to determine the upper and lower bounds of the aggum; this
 * does not require grabbing any locks. This suffices for cases where an
 * approximation of the aggsum's value is acceptable. However, if one needs to
 * know whether some specific value is above or below the current value in the
 * aggsum, they invoke aggsum_compare(). This function operates by repeatedly
 * comparing the target value to the upper and lower bounds of the aggsum, and
 * then clearing a bucket. This proceeds until the target is outside of the
 * upper and lower bounds and we return a response, or the last bucket has been
 * cleared and we know that the target is equal to the aggsum's value. Finally,
 * the most expensive operation is determining the precise value of the aggsum.
 * To do this, we clear every bucket and then return the upper bound (which must
 * be equal to the lower bound). What makes aggsum_compare() and aggsum_value()
 * expensive is clearing buckets. This involves grabbing the global lock
 * (serializing against themselves and borrow operations), grabbing a bucket's
 * lock (preventing threads on those CPUs from modifying their delta), and
 * zeroing out the borrowed value (forcing that thread to borrow on its next
 * request, which will also be expensive).  This is what makes aggsums well
 * suited for write-many read-rarely operations.
 */
 /*
 * We will borrow aggsum_borrow_multiplier times the current request, so we will
 * have to get the as_lock approximately every aggsum_borrow_multiplier calls to
 * aggsum_delta().
 */
 static uint_t aggsum_borrow_multiplier = 10;
 void
 aggsum_init(aggsum_t *as, uint64_t value)
 {
 	bzero(as, sizeof (*as));
 	as->as_lower_bound = as->as_upper_bound = value;
 	mutex_init(&as->as_lock, NULL, MUTEX_DEFAULT, NULL);
 	as->as_numbuckets = boot_ncpus;
 	as->as_buckets = kmem_zalloc(boot_ncpus * sizeof (aggsum_bucket_t),
 	    KM_SLEEP);
 	for (int i = 0; i < as->as_numbuckets; i++) {
 		mutex_init(&as->as_buckets[i].asc_lock,
 		    NULL, MUTEX_DEFAULT, NULL);
 	}
 }
 void
 aggsum_fini(aggsum_t *as)
 {
 	for (int i = 0; i < as->as_numbuckets; i++)
 		mutex_destroy(&as->as_buckets[i].asc_lock);
 	kmem_free(as->as_buckets, as->as_numbuckets * sizeof (aggsum_bucket_t));
 	mutex_destroy(&as->as_lock);
 }
 int64_t
 aggsum_lower_bound(aggsum_t *as)
 {
 	return (as->as_lower_bound);
 }
 int64_t
 aggsum_upper_bound(aggsum_t *as)
 {
 	return (as->as_upper_bound);
 }
 static void
 aggsum_flush_bucket(aggsum_t *as, struct aggsum_bucket *asb)
 {
 	ASSERT(MUTEX_HELD(&as->as_lock));
 	ASSERT(MUTEX_HELD(&asb->asc_lock));
 	/*
 	 * We use atomic instructions for this because we read the upper and
 	 * lower bounds without the lock, so we need stores to be atomic.
 	 */
 	atomic_add_64((volatile uint64_t *)&as->as_lower_bound, asb->asc_delta);
 	atomic_add_64((volatile uint64_t *)&as->as_upper_bound, asb->asc_delta);
 	asb->asc_delta = 0;
 	atomic_add_64((volatile uint64_t *)&as->as_upper_bound,
 	    -asb->asc_borrowed);
 	atomic_add_64((volatile uint64_t *)&as->as_lower_bound,
 	    asb->asc_borrowed);
 	asb->asc_borrowed = 0;
 }
 uint64_t
 aggsum_value(aggsum_t *as)
 {
 	int64_t rv;
 	mutex_enter(&as->as_lock);
 	if (as->as_lower_bound == as->as_upper_bound) {
 		rv = as->as_lower_bound;
 		for (int i = 0; i < as->as_numbuckets; i++) {
 			ASSERT0(as->as_buckets[i].asc_delta);
 			ASSERT0(as->as_buckets[i].asc_borrowed);
 		}
 		mutex_exit(&as->as_lock);
 		return (rv);
 	}
 	for (int i = 0; i < as->as_numbuckets; i++) {
 		struct aggsum_bucket *asb = &as->as_buckets[i];
 		mutex_enter(&asb->asc_lock);
 		aggsum_flush_bucket(as, asb);
 		mutex_exit(&asb->asc_lock);
 	}
 	VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound);
 	rv = as->as_lower_bound;
 	mutex_exit(&as->as_lock);
 	return (rv);
 }
 static void
 aggsum_borrow(aggsum_t *as, int64_t delta, struct aggsum_bucket *asb)
 {
 	int64_t abs_delta = (delta < 0 ? -delta : delta);
 	mutex_enter(&as->as_lock);
 	mutex_enter(&asb->asc_lock);
 	aggsum_flush_bucket(as, asb);
 	atomic_add_64((volatile uint64_t *)&as->as_upper_bound, abs_delta);
 	atomic_add_64((volatile uint64_t *)&as->as_lower_bound, -abs_delta);
 	asb->asc_borrowed = abs_delta;
 	mutex_exit(&asb->asc_lock);
 	mutex_exit(&as->as_lock);
 }
 void
 aggsum_add(aggsum_t *as, int64_t delta)
 {
 	struct aggsum_bucket *asb =
 	    &as->as_buckets[CPU_SEQID % as->as_numbuckets];
 	for (;;) {
 		mutex_enter(&asb->asc_lock);
 		if (asb->asc_delta + delta <= (int64_t)asb->asc_borrowed &&
 		    asb->asc_delta + delta >= -(int64_t)asb->asc_borrowed) {
 			asb->asc_delta += delta;
 			mutex_exit(&asb->asc_lock);
 			return;
 		}
 		mutex_exit(&asb->asc_lock);
 		aggsum_borrow(as, delta * aggsum_borrow_multiplier, asb);
 	}
 }
 /*
 * Compare the aggsum value to target efficiently. Returns -1 if the value
 * represented by the aggsum is less than target, 1 if it's greater, and 0 if
 * they are equal.
 */
 int
 aggsum_compare(aggsum_t *as, uint64_t target)
 {
 	if (as->as_upper_bound < target)
 		return (-1);
 	if (as->as_lower_bound > target)
 		return (1);
 	mutex_enter(&as->as_lock);
 	for (int i = 0; i < as->as_numbuckets; i++) {
 		struct aggsum_bucket *asb = &as->as_buckets[i];
 		mutex_enter(&asb->asc_lock);
 		aggsum_flush_bucket(as, asb);
 		mutex_exit(&asb->asc_lock);
 		if (as->as_upper_bound < target) {
 			mutex_exit(&as->as_lock);
 			return (-1);
 		}
 		if (as->as_lower_bound > target) {
 			mutex_exit(&as->as_lock);
 			return (1);
 		}
 	}
 	VERIFY3U(as->as_lower_bound, ==, as->as_upper_bound);
 	ASSERT3U(as->as_lower_bound, ==, target);
 	mutex_exit(&as->as_lock);
 	return (0);
 }
--- a/module/zfs/arc.c
+++ b/module/zfs/arc.c
@ -303,6 +303,8 @@
 #include <zfs_fletcher.h>
 #include <sys/arc_impl.h>
 #include <sys/trace_arc.h>
 #include <sys/aggsum.h>
 #include <sys/cityhash.h>
 #ifndef _KERNEL
 /* set with ZFS_DEBUG=watch, to enable watchpoints on frozen buffers */
@ -475,6 +477,7 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_c;
 	kstat_named_t arcstat_c_min;
 	kstat_named_t arcstat_c_max;
 	/* Not updated directly; only synced in arc_kstat_update. */
 	kstat_named_t arcstat_size;
 	/*
 	 * Number of compressed bytes stored in the arc_buf_hdr_t's b_pabd.
@ -503,12 +506,14 @@ typedef struct arc_stats {
 	 * (allocated via arc_buf_hdr_t_full and arc_buf_hdr_t_l2only
 	 * caches), and arc_buf_t structures (allocated via arc_buf_t
 	 * cache).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_hdr_size;
 	/*
 	 * Number of bytes consumed by ARC buffers of type equal to
 	 * ARC_BUFC_DATA. This is generally consumed by buffers backing
 	 * on disk user data (e.g. plain file contents).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_data_size;
 	/*
@ -516,18 +521,22 @@ typedef struct arc_stats {
 	 * ARC_BUFC_METADATA. This is generally consumed by buffers
 	 * backing on disk data that is used for internal ZFS
 	 * structures (e.g. ZAP, dnode, indirect blocks, etc).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_metadata_size;
 	/*
 	 * Number of bytes consumed by dmu_buf_impl_t objects.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_dbuf_size;
 	/*
 	 * Number of bytes consumed by dnode_t objects.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_dnode_size;
 	/*
 	 * Number of bytes consumed by bonus buffers.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_bonus_size;
 	/*
@ -535,6 +544,7 @@ typedef struct arc_stats {
 	 * arc_anon state. This includes *all* buffers in the arc_anon
 	 * state; e.g. data, metadata, evictable, and unevictable buffers
 	 * are all included in this value.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_anon_size;
 	/*
@ -542,6 +552,7 @@ typedef struct arc_stats {
 	 * following criteria: backing buffers of type ARC_BUFC_DATA,
 	 * residing in the arc_anon state, and are eligible for eviction
 	 * (e.g. have no outstanding holds on the buffer).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_anon_evictable_data;
 	/*
@ -549,6 +560,7 @@ typedef struct arc_stats {
 	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
 	 * residing in the arc_anon state, and are eligible for eviction
 	 * (e.g. have no outstanding holds on the buffer).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_anon_evictable_metadata;
 	/*
@ -556,6 +568,7 @@ typedef struct arc_stats {
 	 * arc_mru state. This includes *all* buffers in the arc_mru
 	 * state; e.g. data, metadata, evictable, and unevictable buffers
 	 * are all included in this value.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_size;
 	/*
@ -563,6 +576,7 @@ typedef struct arc_stats {
 	 * following criteria: backing buffers of type ARC_BUFC_DATA,
 	 * residing in the arc_mru state, and are eligible for eviction
 	 * (e.g. have no outstanding holds on the buffer).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_evictable_data;
 	/*
@ -570,6 +584,7 @@ typedef struct arc_stats {
 	 * following criteria: backing buffers of type ARC_BUFC_METADATA,
 	 * residing in the arc_mru state, and are eligible for eviction
 	 * (e.g. have no outstanding holds on the buffer).
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_evictable_metadata;
 	/*
@ -580,18 +595,21 @@ typedef struct arc_stats {
 	 * don't actually have ARC buffers linked off of these headers.
 	 * Thus, *if* the headers had associated ARC buffers, these
 	 * buffers *would have* consumed this number of bytes.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_ghost_size;
 	/*
 	 * Number of bytes that *would have been* consumed by ARC
 	 * buffers that are eligible for eviction, of type
 	 * ARC_BUFC_DATA, and linked off the arc_mru_ghost state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_ghost_evictable_data;
 	/*
 	 * Number of bytes that *would have been* consumed by ARC
 	 * buffers that are eligible for eviction, of type
 	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mru_ghost_evictable_metadata;
 	/*
@ -599,36 +617,42 @@ typedef struct arc_stats {
 	 * arc_mfu state. This includes *all* buffers in the arc_mfu
 	 * state; e.g. data, metadata, evictable, and unevictable buffers
 	 * are all included in this value.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_size;
 	/*
 	 * Number of bytes consumed by ARC buffers that are eligible for
 	 * eviction, of type ARC_BUFC_DATA, and reside in the arc_mfu
 	 * state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_evictable_data;
 	/*
 	 * Number of bytes consumed by ARC buffers that are eligible for
 	 * eviction, of type ARC_BUFC_METADATA, and reside in the
 	 * arc_mfu state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_evictable_metadata;
 	/*
 	 * Total number of bytes that *would have been* consumed by ARC
 	 * buffers in the arc_mfu_ghost state. See the comment above
 	 * arcstat_mru_ghost_size for more details.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_ghost_size;
 	/*
 	 * Number of bytes that *would have been* consumed by ARC
 	 * buffers that are eligible for eviction, of type
 	 * ARC_BUFC_DATA, and linked off the arc_mfu_ghost state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_ghost_evictable_data;
 	/*
 	 * Number of bytes that *would have been* consumed by ARC
 	 * buffers that are eligible for eviction, of type
 	 * ARC_BUFC_METADATA, and linked off the arc_mru_ghost state.
 	 * Not updated directly; only synced in arc_kstat_update.
 	 */
 	kstat_named_t arcstat_mfu_ghost_evictable_metadata;
 	kstat_named_t arcstat_l2_hits;
@ -650,6 +674,7 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_l2_io_error;
 	kstat_named_t arcstat_l2_lsize;
 	kstat_named_t arcstat_l2_psize;
 	/* Not updated directly; only synced in arc_kstat_update. */
 	kstat_named_t arcstat_l2_hdr_size;
 	kstat_named_t arcstat_memory_throttle_count;
 	kstat_named_t arcstat_memory_direct_count;
@ -661,6 +686,7 @@ typedef struct arc_stats {
 	kstat_named_t arcstat_tempreserve;
 	kstat_named_t arcstat_loaned_bytes;
 	kstat_named_t arcstat_prune;
 	/* Not updated directly; only synced in arc_kstat_update. */
 	kstat_named_t arcstat_meta_used;
 	kstat_named_t arcstat_meta_limit;
 	kstat_named_t arcstat_dnode_limit;
@ -829,7 +855,6 @@ static arc_state_t	*arc_l2c_only;
 * the possibility of inconsistency by having shadow copies of the variables,
 * while still allowing the code to be readable.
 */
 #define	arc_size	ARCSTAT(arcstat_size)	/* actual total arc size */
 #define	arc_p		ARCSTAT(arcstat_p)	/* target size of MRU */
 #define	arc_c		ARCSTAT(arcstat_c)	/* target size of cache */
 #define	arc_c_min	ARCSTAT(arcstat_c_min)	/* min target cache size */
@ -840,11 +865,7 @@ static arc_state_t	*arc_l2c_only;
 #define	arc_meta_limit	ARCSTAT(arcstat_meta_limit) /* max size for metadata */
 #define	arc_dnode_limit	ARCSTAT(arcstat_dnode_limit) /* max size for dnodes */
 #define	arc_meta_min	ARCSTAT(arcstat_meta_min) /* min size for metadata */
 #define	arc_meta_used	ARCSTAT(arcstat_meta_used) /* size of metadata */
 #define	arc_meta_max	ARCSTAT(arcstat_meta_max) /* max size of metadata */
 #define	arc_dbuf_size	ARCSTAT(arcstat_dbuf_size) /* dbuf metadata */
 #define	arc_dnode_size	ARCSTAT(arcstat_dnode_size) /* dnode metadata */
 #define	arc_bonus_size	ARCSTAT(arcstat_bonus_size) /* bonus buffer metadata */
 #define	arc_need_free	ARCSTAT(arcstat_need_free) /* bytes to be freed */
 #define	arc_sys_free	ARCSTAT(arcstat_sys_free) /* target system free bytes */
@ -857,6 +878,24 @@ static arc_state_t	*arc_l2c_only;
 /* number of bytes in the arc from arc_buf_t's */
 #define	arc_overhead_size	ARCSTAT(arcstat_overhead_size)
 /*
 * There are also some ARC variables that we want to export, but that are
 * updated so often that having the canonical representation be the statistic
 * variable causes a performance bottleneck. We want to use aggsum_t's for these
 * instead, but still be able to export the kstat in the same way as before.
 * The solution is to always use the aggsum version, except in the kstat update
 * callback.
 */
 aggsum_t arc_size;
 aggsum_t arc_meta_used;
 aggsum_t astat_data_size;
 aggsum_t astat_metadata_size;
 aggsum_t astat_dbuf_size;
 aggsum_t astat_dnode_size;
 aggsum_t astat_bonus_size;
 aggsum_t astat_hdr_size;
 aggsum_t astat_l2_hdr_size;
 static list_t arc_prune_list;
 static kmutex_t arc_prune_mtx;
 static taskq_t *arc_prune_taskq;
@ -1050,21 +1089,15 @@ static inline void arc_hdr_clear_flags(arc_buf_hdr_t *hdr, arc_flags_t flags);
 static boolean_t l2arc_write_eligible(uint64_t, arc_buf_hdr_t *);
 static void l2arc_read_done(zio_t *);
 /*
 * We use Cityhash for this. It's fast, and has good hash properties without
 * requiring any large static buffers.
 */
 static uint64_t
 buf_hash(uint64_t spa, const dva_t *dva, uint64_t birth)
 {
-	uint8_t *vdva = (uint8_t *)dva;
+	return (cityhash4(spa, dva->dva_word[0], dva->dva_word[1], birth));
 	uint64_t crc = -1ULL;
 	int i;
 	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
 	for (i = 0; i < sizeof (dva_t); i++)
 		crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ vdva[i]) & 0xFF];
 	crc ^= (spa>>8) ^ birth;
 	return (crc);
 }
 #define	HDR_EMPTY(hdr)						\
@ -2676,32 +2709,32 @@ arc_space_consume(uint64_t space, arc_space_type_t type)
 	default:
 		break;
 	case ARC_SPACE_DATA:
-		ARCSTAT_INCR(arcstat_data_size, space);
+		aggsum_add(&astat_data_size, space);
 		break;
 	case ARC_SPACE_META:
-		ARCSTAT_INCR(arcstat_metadata_size, space);
+		aggsum_add(&astat_metadata_size, space);
 		break;
 	case ARC_SPACE_BONUS:
-		ARCSTAT_INCR(arcstat_bonus_size, space);
+		aggsum_add(&astat_bonus_size, space);
 		break;
 	case ARC_SPACE_DNODE:
-		ARCSTAT_INCR(arcstat_dnode_size, space);
+		aggsum_add(&astat_dnode_size, space);
 		break;
 	case ARC_SPACE_DBUF:
-		ARCSTAT_INCR(arcstat_dbuf_size, space);
+		aggsum_add(&astat_dbuf_size, space);
 		break;
 	case ARC_SPACE_HDRS:
-		ARCSTAT_INCR(arcstat_hdr_size, space);
+		aggsum_add(&astat_hdr_size, space);
 		break;
 	case ARC_SPACE_L2HDRS:
-		ARCSTAT_INCR(arcstat_l2_hdr_size, space);
+		aggsum_add(&astat_l2_hdr_size, space);
 		break;
 	}
 	if (type != ARC_SPACE_DATA)
-		ARCSTAT_INCR(arcstat_meta_used, space);
+		aggsum_add(&arc_meta_used, space);
-	atomic_add_64(&arc_size, space);
+	aggsum_add(&arc_size, space);
 }
 void
@ -2713,37 +2746,42 @@ arc_space_return(uint64_t space, arc_space_type_t type)
 	default:
 		break;
 	case ARC_SPACE_DATA:
-		ARCSTAT_INCR(arcstat_data_size, -space);
+		aggsum_add(&astat_data_size, -space);
 		break;
 	case ARC_SPACE_META:
-		ARCSTAT_INCR(arcstat_metadata_size, -space);
+		aggsum_add(&astat_metadata_size, -space);
 		break;
 	case ARC_SPACE_BONUS:
-		ARCSTAT_INCR(arcstat_bonus_size, -space);
+		aggsum_add(&astat_bonus_size, -space);
 		break;
 	case ARC_SPACE_DNODE:
-		ARCSTAT_INCR(arcstat_dnode_size, -space);
+		aggsum_add(&astat_dnode_size, -space);
 		break;
 	case ARC_SPACE_DBUF:
-		ARCSTAT_INCR(arcstat_dbuf_size, -space);
+		aggsum_add(&astat_dbuf_size, -space);
 		break;
 	case ARC_SPACE_HDRS:
-		ARCSTAT_INCR(arcstat_hdr_size, -space);
+		aggsum_add(&astat_hdr_size, -space);
 		break;
 	case ARC_SPACE_L2HDRS:
-		ARCSTAT_INCR(arcstat_l2_hdr_size, -space);
+		aggsum_add(&astat_l2_hdr_size, -space);
 		break;
 	}
 	if (type != ARC_SPACE_DATA) {
-		ASSERT(arc_meta_used >= space);
+		ASSERT(aggsum_compare(&arc_meta_used, space) >= 0);
-		if (arc_meta_max < arc_meta_used)
+		/*
-			arc_meta_max = arc_meta_used;
+		 * We use the upper bound here rather than the precise value
-		ARCSTAT_INCR(arcstat_meta_used, -space);
+		 * because the arc_meta_max value doesn't need to be
 		 * precise. It's only consumed by humans via arcstats.
 		 */
 		if (arc_meta_max < aggsum_upper_bound(&arc_meta_used))
 			arc_meta_max = aggsum_upper_bound(&arc_meta_used);
 		aggsum_add(&arc_meta_used, -space);
 	}
-	ASSERT(arc_size >= space);
+	ASSERT(aggsum_compare(&arc_size, space) >= 0);
-	atomic_add_64(&arc_size, -space);
+	aggsum_add(&arc_size, -space);
 }
 /*
@ -4073,9 +4111,12 @@ arc_evict_state(arc_state_t *state, uint64_t spa, int64_t bytes,
 		 * Request that 10% of the LRUs be scanned by the superblock
 		 * shrinker.
 		 */
-		if (type == ARC_BUFC_DATA && arc_dnode_size > arc_dnode_limit)
+		if (type == ARC_BUFC_DATA && aggsum_compare(&astat_dnode_size,
-			arc_prune_async((arc_dnode_size - arc_dnode_limit) /
+		    arc_dnode_limit) > 0) {
-			    sizeof (dnode_t) / zfs_arc_dnode_reduce_percent);
+			arc_prune_async((aggsum_upper_bound(&astat_dnode_size) -
 			    arc_dnode_limit) / sizeof (dnode_t) /
 			    zfs_arc_dnode_reduce_percent);
 		}
 		/*
 		 * Start eviction using a randomly selected sublist,
@ -4257,14 +4298,14 @@ arc_adjust_impl(arc_state_t *state, uint64_t spa, int64_t bytes,
 *
 * Therefore, this function has been updated to make alternating passes
 * over the ARC releasing data buffers and then newly unheld meta data
- * buffers.  This ensures forward progress is maintained and arc_meta_used
+ * buffers.  This ensures forward progress is maintained and meta_used
 * will decrease.  Normally this is sufficient, but if required the ARC
 * will call the registered prune callbacks causing dentry and inodes to
 * be dropped from the VFS cache.  This will make dnode meta data buffers
 * available for reclaim.
 */
 static uint64_t
-arc_adjust_meta_balanced(void)
+arc_adjust_meta_balanced(uint64_t meta_used)
 {
 	int64_t delta, prune = 0, adjustmnt;
 	uint64_t total_evicted = 0;
@ -4280,7 +4321,7 @@ arc_adjust_meta_balanced(void)
 	 * metadata from the MFU. I think we probably need to implement a
 	 * "metadata arc_p" value to do this properly.
 	 */
-	adjustmnt = arc_meta_used - arc_meta_limit;
+	adjustmnt = meta_used - arc_meta_limit;
 	if (adjustmnt > 0 && refcount_count(&arc_mru->arcs_esize[type]) > 0) {
 		delta = MIN(refcount_count(&arc_mru->arcs_esize[type]),
@ -4305,7 +4346,7 @@ arc_adjust_meta_balanced(void)
 		total_evicted += arc_adjust_impl(arc_mfu, 0, delta, type);
 	}
-	adjustmnt = arc_meta_used - arc_meta_limit;
+	adjustmnt = meta_used - arc_meta_limit;
 	if (adjustmnt > 0 &&
 	    refcount_count(&arc_mru_ghost->arcs_esize[type]) > 0) {
@ -4329,7 +4370,7 @@ arc_adjust_meta_balanced(void)
 	 * meta buffers.  Requests to the upper layers will be made with
 	 * increasingly large scan sizes until the ARC is below the limit.
 	 */
-	if (arc_meta_used > arc_meta_limit) {
+	if (meta_used > arc_meta_limit) {
 		if (type == ARC_BUFC_DATA) {
 			type = ARC_BUFC_METADATA;
 		} else {
@ -4354,7 +4395,7 @@ arc_adjust_meta_balanced(void)
 * capped by the arc_meta_limit tunable.
 */
 static uint64_t
-arc_adjust_meta_only(void)
+arc_adjust_meta_only(uint64_t meta_used)
 {
 	uint64_t total_evicted = 0;
 	int64_t target;
@ -4366,7 +4407,7 @@ arc_adjust_meta_only(void)
 	 * we're over the meta limit more than we're over arc_p, we
 	 * evict some from the MRU here, and some from the MFU below.
 	 */
-	target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
+	target = MIN((int64_t)(meta_used - arc_meta_limit),
 	    (int64_t)(refcount_count(&arc_anon->arcs_size) +
 	    refcount_count(&arc_mru->arcs_size) - arc_p));
@ -4377,8 +4418,9 @@ arc_adjust_meta_only(void)
 	 * below the meta limit, but not so much as to drop us below the
 	 * space allotted to the MFU (which is defined as arc_c - arc_p).
 	 */
-	target = MIN((int64_t)(arc_meta_used - arc_meta_limit),
+	target = MIN((int64_t)(meta_used - arc_meta_limit),
-	    (int64_t)(refcount_count(&arc_mfu->arcs_size) - (arc_c - arc_p)));
+	    (int64_t)(refcount_count(&arc_mfu->arcs_size) -
 	    (arc_c - arc_p)));
 	total_evicted += arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
@ -4386,12 +4428,12 @@ arc_adjust_meta_only(void)
 }
 static uint64_t
-arc_adjust_meta(void)
+arc_adjust_meta(uint64_t meta_used)
 {
 	if (zfs_arc_meta_strategy == ARC_STRATEGY_META_ONLY)
-		return (arc_adjust_meta_only());
+		return (arc_adjust_meta_only(meta_used));
 	else
-		return (arc_adjust_meta_balanced());
+		return (arc_adjust_meta_balanced(meta_used));
 }
 /*
@ -4478,12 +4520,14 @@ arc_adjust(void)
 	uint64_t total_evicted = 0;
 	uint64_t bytes;
 	int64_t target;
 	uint64_t asize = aggsum_value(&arc_size);
 	uint64_t ameta = aggsum_value(&arc_meta_used);
 	/*
 	 * If we're over arc_meta_limit, we want to correct that before
 	 * potentially evicting data buffers below.
 	 */
-	total_evicted += arc_adjust_meta();
+	total_evicted += arc_adjust_meta(ameta);
 	/*
 	 * Adjust MRU size
@ -4495,9 +4539,9 @@ arc_adjust(void)
 	 * the MRU is over arc_p, we'll evict enough to get back to
 	 * arc_p here, and then evict more from the MFU below.
 	 */
-	target = MIN((int64_t)(arc_size - arc_c),
+	target = MIN((int64_t)(asize - arc_c),
 	    (int64_t)(refcount_count(&arc_anon->arcs_size) +
-	    refcount_count(&arc_mru->arcs_size) + arc_meta_used - arc_p));
+	    refcount_count(&arc_mru->arcs_size) + ameta - arc_p));
 	/*
 	 * If we're below arc_meta_min, always prefer to evict data.
@ -4508,7 +4552,7 @@ arc_adjust(void)
 	 * type, spill over into the next type.
 	 */
 	if (arc_adjust_type(arc_mru) == ARC_BUFC_METADATA &&
-	    arc_meta_used > arc_meta_min) {
+	    ameta > arc_meta_min) {
 		bytes = arc_adjust_impl(arc_mru, 0, target, ARC_BUFC_METADATA);
 		total_evicted += bytes;
@ -4541,10 +4585,10 @@ arc_adjust(void)
 	 * size back to arc_p, if we're still above the target cache
 	 * size, we evict the rest from the MFU.
 	 */
-	target = arc_size - arc_c;
+	target = asize - arc_c;
 	if (arc_adjust_type(arc_mfu) == ARC_BUFC_METADATA &&
-	    arc_meta_used > arc_meta_min) {
+	    ameta > arc_meta_min) {
 		bytes = arc_adjust_impl(arc_mfu, 0, target, ARC_BUFC_METADATA);
 		total_evicted += bytes;
@ -4645,13 +4689,14 @@ arc_flush(spa_t *spa, boolean_t retry)
 void
 arc_shrink(int64_t to_free)
 {
 	uint64_t asize = aggsum_value(&arc_size);
 	uint64_t c = arc_c;
 	if (c > to_free && c - to_free > arc_c_min) {
 		arc_c = c - to_free;
 		atomic_add_64(&arc_p, -(arc_p >> arc_shrink_shift));
-		if (arc_c > arc_size)
+		if (asize < arc_c)
-			arc_c = MAX(arc_size, arc_c_min);
+			arc_c = MAX(asize, arc_c_min);
 		if (arc_p > arc_c)
 			arc_p = (arc_c >> 1);
 		ASSERT(arc_c >= arc_c_min);
@ -4660,7 +4705,7 @@ arc_shrink(int64_t to_free)
 		arc_c = arc_c_min;
 	}
-	if (arc_size > arc_c)
+	if (asize > arc_c)
 		(void) arc_adjust();
 }
@ -4877,7 +4922,8 @@ arc_kmem_reap_now(void)
 	extern kmem_cache_t	*range_seg_cache;
 #ifdef _KERNEL
-	if ((arc_meta_used >= arc_meta_limit) && zfs_arc_meta_prune) {
+	if ((aggsum_compare(&arc_meta_used, arc_meta_limit) >= 0) &&
 	    zfs_arc_meta_prune) {
 		/*
 		 * We are exceeding our meta-data cache limit.
 		 * Prune some entries to release holds on meta-data.
@ -5022,7 +5068,7 @@ arc_reclaim_thread(void *unused)
 		 * be helpful and could potentially cause us to enter an
 		 * infinite loop.
 		 */
-		if (arc_size <= arc_c || evicted == 0) {
+		if (aggsum_compare(&arc_size, arc_c) <= 0|| evicted == 0) {
 			/*
 			 * We're either no longer overflowing, or we
 			 * can't evict anything more, so we should wake
@ -5101,12 +5147,13 @@ arc_reclaim_thread(void *unused)
 static uint64_t
 arc_evictable_memory(void)
 {
 	int64_t asize = aggsum_value(&arc_size);
 	uint64_t arc_clean =
 	    refcount_count(&arc_mru->arcs_esize[ARC_BUFC_DATA]) +
 	    refcount_count(&arc_mru->arcs_esize[ARC_BUFC_METADATA]) +
 	    refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_DATA]) +
 	    refcount_count(&arc_mfu->arcs_esize[ARC_BUFC_METADATA]);
-	uint64_t arc_dirty = MAX((int64_t)arc_size - (int64_t)arc_clean, 0);
+	uint64_t arc_dirty = MAX((int64_t)asize - (int64_t)arc_clean, 0);
 	/*
 	 * Scale reported evictable memory in proportion to page cache, cap
@ -5118,7 +5165,7 @@ arc_evictable_memory(void)
 	if (arc_dirty >= min)
 		return (arc_clean);
-	return (MAX((int64_t)arc_size - (int64_t)min, 0));
+	return (MAX((int64_t)asize - (int64_t)min, 0));
 }
 /*
@ -5261,7 +5308,8 @@ arc_adapt(int bytes, arc_state_t *state)
 	 * cache size, increment the target cache size
 	 */
 	ASSERT3U(arc_c, >=, 2ULL << SPA_MAXBLOCKSHIFT);
-	if (arc_size >= arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) {
+	if (aggsum_compare(&arc_size, arc_c - (2ULL << SPA_MAXBLOCKSHIFT)) >=
 	    0) {
 		atomic_add_64(&arc_c, (int64_t)bytes);
 		if (arc_c > arc_c_max)
 			arc_c = arc_c_max;
@ -5284,7 +5332,16 @@ arc_is_overflowing(void)
 	uint64_t overflow = MAX(SPA_MAXBLOCKSIZE,
 	    arc_c >> zfs_arc_overflow_shift);
-	return (arc_size >= arc_c + overflow);
+	/*
 	 * We just compare the lower bound here for performance reasons. Our
 	 * primary goals are to make sure that the arc never grows without
 	 * bound, and that it can reach its maximum size. This check
 	 * accomplishes both goals. The maximum amount we could run over by is
 	 * 2 * aggsum_borrow_multiplier * NUM_CPUS * the average size of a block
 	 * in the ARC. In practice, that's in the tens of MB, which is low
 	 * enough to be safe.
 	 */
 	return (aggsum_lower_bound(&arc_size) >= arc_c + overflow);
 }
 static abd_t *
@ -5399,7 +5456,8 @@ arc_get_data_impl(arc_buf_hdr_t *hdr, uint64_t size, void *tag)
 		 * If we are growing the cache, and we are adding anonymous
 		 * data, and we have outgrown arc_p, update arc_p
 		 */
-		if (arc_size < arc_c && hdr->b_l1hdr.b_state == arc_anon &&
+		if (aggsum_compare(&arc_size, arc_c) < 0 &&
 		    hdr->b_l1hdr.b_state == arc_anon &&
 		    (refcount_count(&arc_anon->arcs_size) +
 		    refcount_count(&arc_mru->arcs_size) > arc_p))
 			arc_p = MIN(arc_c, arc_p + size);
@ -7213,6 +7271,17 @@ arc_kstat_update(kstat_t *ksp, int rw)
 		    &as->arcstat_mfu_ghost_evictable_data,
 		    &as->arcstat_mfu_ghost_evictable_metadata);
 		ARCSTAT(arcstat_size) = aggsum_value(&arc_size);
 		ARCSTAT(arcstat_meta_used) = aggsum_value(&arc_meta_used);
 		ARCSTAT(arcstat_data_size) = aggsum_value(&astat_data_size);
 		ARCSTAT(arcstat_metadata_size) =
 		    aggsum_value(&astat_metadata_size);
 		ARCSTAT(arcstat_hdr_size) = aggsum_value(&astat_hdr_size);
 		ARCSTAT(arcstat_l2_hdr_size) = aggsum_value(&astat_l2_hdr_size);
 		ARCSTAT(arcstat_dbuf_size) = aggsum_value(&astat_dbuf_size);
 		ARCSTAT(arcstat_dnode_size) = aggsum_value(&astat_dnode_size);
 		ARCSTAT(arcstat_bonus_size) = aggsum_value(&astat_bonus_size);
 		as->arcstat_memory_all_bytes.value.ui64 =
 		    arc_all_memory();
 		as->arcstat_memory_free_bytes.value.ui64 =
@ -7424,6 +7493,16 @@ arc_state_init(void)
 	refcount_create(&arc_mfu_ghost->arcs_size);
 	refcount_create(&arc_l2c_only->arcs_size);
 	aggsum_init(&arc_meta_used, 0);
 	aggsum_init(&arc_size, 0);
 	aggsum_init(&astat_data_size, 0);
 	aggsum_init(&astat_metadata_size, 0);
 	aggsum_init(&astat_hdr_size, 0);
 	aggsum_init(&astat_l2_hdr_size, 0);
 	aggsum_init(&astat_bonus_size, 0);
 	aggsum_init(&astat_dnode_size, 0);
 	aggsum_init(&astat_dbuf_size, 0);
 	arc_anon->arcs_state = ARC_STATE_ANON;
 	arc_mru->arcs_state = ARC_STATE_MRU;
 	arc_mru_ghost->arcs_state = ARC_STATE_MRU_GHOST;
@ -7465,6 +7544,16 @@ arc_state_fini(void)
 	multilist_destroy(arc_mfu_ghost->arcs_list[ARC_BUFC_DATA]);
 	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_METADATA]);
 	multilist_destroy(arc_l2c_only->arcs_list[ARC_BUFC_DATA]);
 	aggsum_fini(&arc_meta_used);
 	aggsum_fini(&arc_size);
 	aggsum_fini(&astat_data_size);
 	aggsum_fini(&astat_metadata_size);
 	aggsum_fini(&astat_hdr_size);
 	aggsum_fini(&astat_l2_hdr_size);
 	aggsum_fini(&astat_bonus_size);
 	aggsum_fini(&astat_dnode_size);
 	aggsum_fini(&astat_dbuf_size);
 }
 uint64_t
@ -7516,7 +7605,6 @@ arc_init(void)
 	arc_c = arc_c_max;
 	arc_p = (arc_c >> 1);
 	arc_size = 0;
 	/* Set min to 1/2 of arc_c_min */
 	arc_meta_min = 1ULL << SPA_MAXBLOCKSHIFT;
--- a/module/zfs/cityhash.c
+++ b/module/zfs/cityhash.c
@ -0,0 +1,63 @@
 // Copyright (c) 2011 Google, Inc.
 //
 // Permission is hereby granted, free of charge, to any person obtaining a copy
 // of this software and associated documentation files (the "Software"), to deal
 // in the Software without restriction, including without limitation the rights
 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 // copies of the Software, and to permit persons to whom the Software is
 // furnished to do so, subject to the following conditions:
 //
 // The above copyright notice and this permission notice shall be included in
 // all copies or substantial portions of the Software.
 //
 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 // THE SOFTWARE.
 /*
 * Copyright (c) 2017 by Delphix. All rights reserved.
 */
 #include <sys/cityhash.h>
 #define	HASH_K1 0xb492b66fbe98f273ULL
 #define	HASH_K2 0x9ae16a3b2f90404fULL
 /*
 * Bitwise right rotate.  Normally this will compile to a single
 * instruction.
 */
 static inline uint64_t
 rotate(uint64_t val, int shift)
 {
 	// Avoid shifting by 64: doing so yields an undefined result.
 	return (shift == 0 ? val : (val >> shift) | (val << (64 - shift)));
 }
 static inline uint64_t
 cityhash_helper(uint64_t u, uint64_t v, uint64_t mul)
 {
 	uint64_t a = (u ^ v) * mul;
 	a ^= (a >> 47);
 	uint64_t b = (v ^ a) * mul;
 	b ^= (b >> 47);
 	b *= mul;
 	return (b);
 }
 uint64_t
 cityhash4(uint64_t w1, uint64_t w2, uint64_t w3, uint64_t w4)
 {
 	uint64_t mul = HASH_K2 + 64;
 	uint64_t a = w1 * HASH_K1;
 	uint64_t b = w2;
 	uint64_t c = w4 * mul;
 	uint64_t d = w3 * HASH_K2;
 	return (cityhash_helper(rotate(a + b, 43) + rotate(c, 30) + d,
 	    a + rotate(b + HASH_K2, 18) + c, mul));
 }
--- a/module/zfs/dbuf.c
+++ b/module/zfs/dbuf.c
@ -48,6 +48,7 @@
 #include <sys/callb.h>
 #include <sys/abd.h>
 #include <sys/vdev.h>
 #include <sys/cityhash.h>
 kstat_t *dbuf_ksp;
@ -270,23 +271,14 @@ static dbuf_hash_table_t dbuf_hash_table;
 static uint64_t dbuf_hash_count;
 /*
 * We use Cityhash for this. It's fast, and has good hash properties without
 * requiring any large static buffers.
 */
 static uint64_t
 dbuf_hash(void *os, uint64_t obj, uint8_t lvl, uint64_t blkid)
 {
-	uintptr_t osv = (uintptr_t)os;
+	return (cityhash4((uintptr_t)os, obj, (uint64_t)lvl, blkid));
 	uint64_t crc = -1ULL;
 	ASSERT(zfs_crc64_table[128] == ZFS_CRC64_POLY);
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (lvl)) & 0xFF];
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (osv >> 6)) & 0xFF];
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 0)) & 0xFF];
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (obj >> 8)) & 0xFF];
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 0)) & 0xFF];
 	crc = (crc >> 8) ^ zfs_crc64_table[(crc ^ (blkid >> 8)) & 0xFF];
 	crc ^= (osv>>14) ^ (obj>>16) ^ (blkid>>16);
 	return (crc);
 }
 #define	DBUF_EQUAL(dbuf, os, obj, level, blkid)		\