zfs: merge openzfs/zfs@deb121309

Notable upstream pull request merges: #12918 Introduce BLAKE3 checksums as an OpenZFS feature #13553 Reduce ZIO io_lock contention on sorted scrub #13537 Improve sorted scan memory accounting #13540 AVL: Remove obsolete branching optimizations #13563 FreeBSD: Improve crypto_dispatch() handling Obtained from: OpenZFS OpenZFS commit: deb1213098
2022-06-23 17:47:42 +02:00 · 2022-06-23 17:47:42 +02:00 · 1f1e2261e3
commit 1f1e2261e3
parent a320e9dd51 deb1213098
129 changed files with 25943 additions and 521 deletions
--- a/cddl/lib/libicp/Makefile
+++ b/cddl/lib/libicp/Makefile
@ -16,9 +16,23 @@ ASM_SOURCES_AS = \
        asm-x86_64/modes/aesni-gcm-x86_64.S \
        asm-x86_64/modes/ghash-x86_64.S \
        asm-x86_64/sha2/sha256_impl.S \
-        asm-x86_64/sha2/sha512_impl.S
+        asm-x86_64/sha2/sha512_impl.S \
+        asm-x86_64/blake3/blake3_avx2.S \
+        asm-x86_64/blake3/blake3_avx512.S \
+        asm-x86_64/blake3/blake3_sse2.S \
+        asm-x86_64/blake3/blake3_sse41.S

 CFLAGS+= -D__amd64 -D_SYS_STACK_H -UHAVE_AES
+.elif ${MACHINE_ARCH} == "aarch64"
+ASM_SOURCES_C =
+ASM_SOURCES_AS = \
+        asm-aarch64/blake3/b3_aarch64_sse2.S \
+        asm-aarch64/blake3/b3_aarch64_sse41.S
+.elif ${MACHINE_ARCH} == "powerpc64"
+ASM_SOURCES_C =
+ASM_SOURCES_AS = \
+        asm-ppc64/blake3/b3_ppc64le_sse2.S \
+        asm-ppc64/blake3/b3_ppc64le_sse41.S
 .else
 ASM_SOURCES_C =
 ASM_SOURCES_AS =
@ -35,6 +49,10 @@ KERNEL_C = \
        algs/aes/aes_impl_x86-64.c \
        algs/aes/aes_impl.c \
        algs/aes/aes_modes.c \
+        algs/blake3/blake3.c \
+        algs/blake3/blake3_generic.c \
+        algs/blake3/blake3_impl.c \
+        algs/blake3/blake3_x86-64.c \
        algs/edonr/edonr.c \
        algs/modes/modes.c \
        algs/modes/cbc.c \
@ -88,5 +106,14 @@ CFLAGS.aesni-gcm-x86_64.S+= -DLOCORE
 CFLAGS.ghash-x86_64.S+= -DLOCORE
 CFLAGS.sha256_impl.S+= -DLOCORE
 CFLAGS.sha512_impl.S+= -DLOCORE
+CFLAGS.blake3_avx2.S = -DLOCORE
+CFLAGS.blake3_avx512.S = -DLOCORE
+CFLAGS.blake3_sse2.S = -DLOCORE
+CFLAGS.blake3_sse41.S = -DLOCORE
+CFLAGS.b3_aarch64_sse2.S = -DLOCORE
+CFLAGS.b3_aarch64_sse41.S = -DLOCORE
+CFLAGS.b3_ppc64le_sse2.S = -DLOCORE
+CFLAGS.b3_ppc64le_sse41.S = -DLOCORE
+

 .include <bsd.lib.mk>
--- a/cddl/lib/libicp_rescue/Makefile
+++ b/cddl/lib/libicp_rescue/Makefile
@ -15,9 +15,23 @@ ASM_SOURCES_AS = \
        asm-x86_64/modes/gcm_pclmulqdq.S \
        asm-x86_64/modes/aesni-gcm-x86_64.S \
        asm-x86_64/sha2/sha256_impl.S \
-        asm-x86_64/sha2/sha512_impl.S
+        asm-x86_64/sha2/sha512_impl.S \
+        asm-x86_64/blake3/blake3_avx2.S \
+        asm-x86_64/blake3/blake3_avx512.S \
+        asm-x86_64/blake3/blake3_sse2.S \
+        asm-x86_64/blake3/blake3_sse41.S

 CFLAGS+= -D__amd64 -D_SYS_STACK_H
+.elif ${MACHINE_ARCH} == "aarch64"
+ASM_SOURCES_C =
+ASM_SOURCES_AS = \
+        asm-aarch64/blake3/b3_aarch64_sse2.S \
+        asm-aarch64/blake3/b3_aarch64_sse41.S
+.elif ${MACHINE_ARCH} == "powerpc64"
+ASM_SOURCES_C =
+ASM_SOURCES_AS = \
+        asm-ppc64/blake3/b3_ppc64le_sse2.S \
+        asm-ppc64/blake3/b3_ppc64le_sse41.S
 .else
 ASM_SOURCES_C =
 ASM_SOURCES_AS =
@ -34,6 +48,10 @@ KERNEL_C = \
        algs/aes/aes_impl_x86-64.c \
        algs/aes/aes_impl.c \
        algs/aes/aes_modes.c \
+        algs/blake3/blake3.c \
+        algs/blake3/blake3_generic.c \
+        algs/blake3/blake3_impl.c \
+        algs/blake3/blake3_x86-64.c \
        algs/edonr/edonr.c \
        algs/modes/modes.c \
        algs/modes/cbc.c \
@ -81,5 +99,14 @@ CFLAGS.ghash-x86_64.S+= -DLOCORE
 CFLAGS.sha256_impl.S+= -DLOCORE
 CFLAGS.sha512_impl.S+= -DLOCORE
 CFLAGS.gcm.c+= -UCAN_USE_GCM_ASM
+CFLAGS.blake3_avx2.S = -DLOCORE
+CFLAGS.blake3_avx512.S = -DLOCORE
+CFLAGS.blake3_sse2.S = -DLOCORE
+CFLAGS.blake3_sse41.S = -DLOCORE
+CFLAGS.b3_aarch64_sse2.S = -DLOCORE
+CFLAGS.b3_aarch64_sse41.S = -DLOCORE
+CFLAGS.b3_ppc64le_sse2.S = -DLOCORE
+CFLAGS.b3_ppc64le_sse41.S = -DLOCORE
+

 .include <bsd.lib.mk>
--- a/cddl/lib/libzpool/Makefile
+++ b/cddl/lib/libzpool/Makefile
@ -56,6 +56,7 @@ KERNEL_C = \
 	aggsum.c \
 	arc.c \
 	arc_os.c \
+        blake3_zfs.c \
 	blkptr.c \
 	bplist.c \
 	bpobj.c \
@ -169,6 +170,7 @@ KERNEL_C = \
 	zcp_synctask.c \
 	zfeature.c \
 	zfs_byteswap.c \
+        zfs_chksum.c \
 	zfs_debug.c \
 	zfs_fm.c \
 	zfs_fuid.c \
--- a/sys/contrib/openzfs/AUTHORS
+++ b/sys/contrib/openzfs/AUTHORS
@ -285,6 +285,7 @@ CONTRIBUTORS:
    Tim Connors <tconnors@rather.puzzling.org>
    Tim Crawford <tcrawford@datto.com>
    Tim Haley <Tim.Haley@Sun.COM>
+    Tino Reichardt <milky-zfs@mcmilk.de>
    Tobin Harding <me@tobin.cc>
    Tom Caputi <tcaputi@datto.com>
    Tom Matthews <tom@axiom-partners.com>
--- a/sys/contrib/openzfs/cmd/zfs/zfs_iter.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_iter.c
@ -174,7 +174,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
 	zfs_sort_column_t *col;
 	zfs_prop_t prop;

-	if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL &&
+	if ((prop = zfs_name_to_prop(name)) == ZPROP_USERPROP &&
 	    !zfs_prop_user(name))
 		return (-1);

@ -182,7 +182,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,

 	col->sc_prop = prop;
 	col->sc_reverse = reverse;
-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		col->sc_user_prop = safe_malloc(strlen(name) + 1);
 		(void) strcpy(col->sc_user_prop, name);
 	}
@ -311,7 +311,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
 		 * Otherwise, we compare 'lnum' and 'rnum'.
 		 */
 		lstr = rstr = NULL;
-		if (psc->sc_prop == ZPROP_INVAL) {
+		if (psc->sc_prop == ZPROP_USERPROP) {
 			nvlist_t *luser, *ruser;
 			nvlist_t *lval, *rval;

--- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c
+++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c
@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
 static int zfs_do_unjail(int argc, char **argv);
 #endif

+#ifdef __linux__
+static int zfs_do_zone(int argc, char **argv);
+static int zfs_do_unzone(int argc, char **argv);
+#endif
+
 /*
 * Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
 */
@ -184,6 +189,8 @@ typedef enum {
 	HELP_JAIL,
 	HELP_UNJAIL,
 	HELP_WAIT,
+	HELP_ZONE,
+	HELP_UNZONE,
 } zfs_help_t;

 typedef struct zfs_command {
@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
 	{ "jail",	zfs_do_jail,		HELP_JAIL		},
 	{ "unjail",	zfs_do_unjail,		HELP_UNJAIL		},
 #endif
+
+#ifdef __linux__
+	{ "zone",	zfs_do_zone,		HELP_ZONE		},
+	{ "unzone",	zfs_do_unzone,		HELP_UNZONE		},
+#endif
 };

 #define	NCOMMAND	(sizeof (command_table) / sizeof (command_table[0]))
@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
 		return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
 	case HELP_WAIT:
 		return (gettext("\twait [-t <activity>] <filesystem>\n"));
+	case HELP_ZONE:
+		return (gettext("\tzone <nsfile> <filesystem>\n"));
+	case HELP_UNZONE:
+		return (gettext("\tunzone <nsfile> <filesystem>\n"));
 	default:
 		__builtin_unreachable();
 	}
@ -1901,7 +1917,7 @@ get_callback(zfs_handle_t *zhp, void *data)
 		    pl == cbp->cb_proplist)
 			continue;

-		if (pl->pl_prop != ZPROP_INVAL) {
+		if (pl->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, pl->pl_prop, buf,
 			    sizeof (buf), &sourcetype, source,
 			    sizeof (source),
@ -2291,7 +2307,7 @@ zfs_do_inherit(int argc, char **argv)
 	argc--;
 	argv++;

-	if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
+	if ((prop = zfs_name_to_prop(propname)) != ZPROP_USERPROP) {
 		if (zfs_prop_readonly(prop)) {
 			(void) fprintf(stderr, gettext(
 			    "%s property is read-only\n"),
@ -3427,7 +3443,7 @@ print_header(list_cbdata_t *cb)
 		}

 		right_justify = B_FALSE;
-		if (pl->pl_prop != ZPROP_INVAL) {
+		if (pl->pl_prop != ZPROP_USERPROP) {
 			header = zfs_prop_column_name(pl->pl_prop);
 			right_justify = zfs_prop_align_right(pl->pl_prop);
 		} else {
@ -3478,7 +3494,7 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb)
 			    sizeof (property));
 			propstr = property;
 			right_justify = zfs_prop_align_right(pl->pl_prop);
-		} else if (pl->pl_prop != ZPROP_INVAL) {
+		} else if (pl->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, NULL, 0,
 			    cb->cb_literal) != 0)
@ -8692,6 +8708,50 @@ main(int argc, char **argv)
 	return (ret);
 }

+/*
+ * zfs zone nsfile filesystem
+ *
+ * Add or delete the given dataset to/from the namespace.
+ */
+#ifdef __linux__
+static int
+zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
+{
+	zfs_handle_t *zhp;
+	int ret;
+
+	if (argc < 3) {
+		(void) fprintf(stderr, gettext("missing argument(s)\n"));
+		usage(B_FALSE);
+	}
+	if (argc > 3) {
+		(void) fprintf(stderr, gettext("too many arguments\n"));
+		usage(B_FALSE);
+	}
+
+	zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
+	if (zhp == NULL)
+		return (1);
+
+	ret = (zfs_userns(zhp, argv[1], attach) != 0);
+
+	zfs_close(zhp);
+	return (ret);
+}
+
+static int
+zfs_do_zone(int argc, char **argv)
+{
+	return (zfs_do_zone_impl(argc, argv, B_TRUE));
+}
+
+static int
+zfs_do_unzone(int argc, char **argv)
+{
+	return (zfs_do_zone_impl(argc, argv, B_FALSE));
+}
+#endif
+
 #ifdef __FreeBSD__
 #include <sys/jail.h>
 #include <jail.h>
--- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c
+++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c
@ -5946,7 +5946,7 @@ print_header(list_cbdata_t *cb)
 			first = B_FALSE;

 		right_justify = B_FALSE;
-		if (pl->pl_prop != ZPROP_INVAL) {
+		if (pl->pl_prop != ZPROP_USERPROP) {
 			header = zpool_prop_column_name(pl->pl_prop);
 			right_justify = zpool_prop_align_right(pl->pl_prop);
 		} else {
@ -6004,7 +6004,7 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
 		}

 		right_justify = B_FALSE;
-		if (pl->pl_prop != ZPROP_INVAL) {
+		if (pl->pl_prop != ZPROP_USERPROP) {
 			if (zpool_get_prop(zhp, pl->pl_prop, property,
 			    sizeof (property), NULL, cb->cb_literal) != 0)
 				propstr = "-";
--- a/sys/contrib/openzfs/cmd/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest.c
@ -121,6 +121,7 @@
 #include <sys/zfeature.h>
 #include <sys/dsl_userhold.h>
 #include <sys/abd.h>
+#include <sys/blake3.h>
 #include <stdio.h>
 #include <stdlib.h>
 #include <unistd.h>
@ -417,6 +418,7 @@ ztest_func_t ztest_device_removal;
 ztest_func_t ztest_spa_checkpoint_create_discard;
 ztest_func_t ztest_initialize;
 ztest_func_t ztest_trim;
+ztest_func_t ztest_blake3;
 ztest_func_t ztest_fletcher;
 ztest_func_t ztest_fletcher_incr;
 ztest_func_t ztest_verify_dnode_bt;
@ -470,6 +472,7 @@ ztest_info_t ztest_info[] = {
 	ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
 	ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
 	ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
+	ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
 	ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
 	ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
 	ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
@ -6373,6 +6376,92 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
 	VERIFY3U(load, ==, spa_load_guid(spa));
 }

+void
+ztest_blake3(ztest_ds_t *zd, uint64_t id)
+{
+	(void) zd, (void) id;
+	hrtime_t end = gethrtime() + NANOSEC;
+	zio_cksum_salt_t salt;
+	void *salt_ptr = &salt.zcs_bytes;
+	struct abd *abd_data, *abd_meta;
+	void *buf, *templ;
+	int i, *ptr;
+	uint32_t size;
+	BLAKE3_CTX ctx;
+
+	size = ztest_random_blocksize();
+	buf = umem_alloc(size, UMEM_NOFAIL);
+	abd_data = abd_alloc(size, B_FALSE);
+	abd_meta = abd_alloc(size, B_TRUE);
+
+	for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
+		*ptr = ztest_random(UINT_MAX);
+	memset(salt_ptr, 'A', 32);
+
+	abd_copy_from_buf_off(abd_data, buf, 0, size);
+	abd_copy_from_buf_off(abd_meta, buf, 0, size);
+
+	while (gethrtime() <= end) {
+		int run_count = 100;
+		zio_cksum_t zc_ref1, zc_ref2;
+		zio_cksum_t zc_res1, zc_res2;
+
+		void *ref1 = &zc_ref1;
+		void *ref2 = &zc_ref2;
+		void *res1 = &zc_res1;
+		void *res2 = &zc_res2;
+
+		/* BLAKE3_KEY_LEN = 32 */
+		VERIFY0(blake3_set_impl_name("generic"));
+		templ = abd_checksum_blake3_tmpl_init(&salt);
+		Blake3_InitKeyed(&ctx, salt_ptr);
+		Blake3_Update(&ctx, buf, size);
+		Blake3_Final(&ctx, ref1);
+		zc_ref2 = zc_ref1;
+		ZIO_CHECKSUM_BSWAP(&zc_ref2);
+		abd_checksum_blake3_tmpl_free(templ);
+
+		VERIFY0(blake3_set_impl_name("cycle"));
+		while (run_count-- > 0) {
+
+			/* Test current implementation */
+			Blake3_InitKeyed(&ctx, salt_ptr);
+			Blake3_Update(&ctx, buf, size);
+			Blake3_Final(&ctx, res1);
+			zc_res2 = zc_res1;
+			ZIO_CHECKSUM_BSWAP(&zc_res2);
+
+			VERIFY0(memcmp(ref1, res1, 32));
+			VERIFY0(memcmp(ref2, res2, 32));
+
+			/* Test ABD - data */
+			templ = abd_checksum_blake3_tmpl_init(&salt);
+			abd_checksum_blake3_native(abd_data, size,
+			    templ, &zc_res1);
+			abd_checksum_blake3_byteswap(abd_data, size,
+			    templ, &zc_res2);
+
+			VERIFY0(memcmp(ref1, res1, 32));
+			VERIFY0(memcmp(ref2, res2, 32));
+
+			/* Test ABD - metadata */
+			abd_checksum_blake3_native(abd_meta, size,
+			    templ, &zc_res1);
+			abd_checksum_blake3_byteswap(abd_meta, size,
+			    templ, &zc_res2);
+			abd_checksum_blake3_tmpl_free(templ);
+
+			VERIFY0(memcmp(ref1, res1, 32));
+			VERIFY0(memcmp(ref2, res2, 32));
+
+		}
+	}
+
+	abd_free(abd_data);
+	abd_free(abd_meta);
+	umem_free(buf, size);
+}
+
 void
 ztest_fletcher(ztest_ds_t *zd, uint64_t id)
 {
--- a/sys/contrib/openzfs/config/always-arch.m4
+++ b/sys/contrib/openzfs/config/always-arch.m4
@ -30,6 +30,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
 		;;
 	esac

+	AM_CONDITIONAL([TARGET_CPU_AARCH64], test $TARGET_CPU = aarch64)
 	AM_CONDITIONAL([TARGET_CPU_X86_64], test $TARGET_CPU = x86_64)
 	AM_CONDITIONAL([TARGET_CPU_POWERPC], test $TARGET_CPU = powerpc)
+	AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
 ])
--- a/sys/contrib/openzfs/config/kernel-add-disk.m4
+++ b/sys/contrib/openzfs/config/kernel-add-disk.m4
@ -7,8 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [
 		#include <linux/blkdev.h>
 	], [
 		struct gendisk *disk = NULL;
-		int err = add_disk(disk);
-		err = err;
+		int error __attribute__ ((unused)) = add_disk(disk);
 	])
 ])

--- a/sys/contrib/openzfs/config/kernel-blk-queue.m4
+++ b/sys/contrib/openzfs/config/kernel-blk-queue.m4
@ -359,6 +359,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
 	])
 ])

+dnl #
+dnl # See if kernel supports block multi-queue and blk_status_t.
+dnl # blk_status_t represents the new status codes introduced in the 4.13
+dnl # kernel patch:
+dnl #
+dnl #  block: introduce new block status code type
+dnl #
+dnl # We do not currently support the "old" block multi-queue interfaces from
+dnl # prior kernels.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
+	ZFS_LINUX_TEST_SRC([blk_mq], [
+		#include <linux/blk-mq.h>
+	], [
+		struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0};
+		(void) blk_mq_alloc_tag_set(&tag_set);
+		return BLK_STS_OK;
+	], [])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
+	AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
+	ZFS_LINUX_TEST_RESULT([blk_mq], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
+	], [
+		AC_MSG_RESULT(no)
+	])
+])
+
 AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
@ -370,6 +400,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_SRC_BLK_MQ
 ])

 AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
@ -383,4 +414,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
 	ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
 	ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
+	ZFS_AC_KERNEL_BLK_MQ
 ])
--- a/sys/contrib/openzfs/config/kernel-user-ns-inum.m4
+++ b/sys/contrib/openzfs/config/kernel-user-ns-inum.m4
@ -0,0 +1,23 @@
+dnl #
+dnl # 3.18 API change
+dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
+	ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
+		#include <linux/user_namespace.h>
+	], [
+		struct user_namespace uns;
+		uns.ns.inum = 0;
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
+	AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
+	ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
+		    [user_namespace->ns.inum exists])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_KTHREAD
 	ZFS_AC_KERNEL_SRC_ZERO_PAGE
 	ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM

 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_KTHREAD
 	ZFS_AC_KERNEL_ZERO_PAGE
 	ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
+	ZFS_AC_KERNEL_USER_NS_COMMON_INUM
 ])

 dnl #
--- a/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
+++ b/sys/contrib/openzfs/contrib/dracut/90zfs/module-setup.sh.in
@ -83,8 +83,7 @@ install() {

 		for _service in \
 			"zfs-import-scan.service" \
-			"zfs-import-cache.service" \
-			"zfs-load-module.service"; do
+			"zfs-import-cache.service"; do
 			inst_simple "${systemdsystemunitdir}/${_service}"
 			systemctl -q --root "${initdir}" add-wants zfs-import.target "${_service}"
 		done
--- a/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
+++ b/sys/contrib/openzfs/contrib/pyzfs/libzfs_core/_constants.py
@ -100,6 +100,7 @@ def enum(*sequential, **named):
        'ZFS_ERR_REBUILD_IN_PROGRESS',
        'ZFS_ERR_BADPROP',
        'ZFS_ERR_VDEV_NOTSUP',
+        'ZFS_ERR_NOT_USER_NAMESPACE',
    ],
    {}
 )
--- a/sys/contrib/openzfs/etc/Makefile.am
+++ b/sys/contrib/openzfs/etc/Makefile.am
@ -59,6 +59,9 @@ systemdunit_DATA = \
 	%D%/systemd/system/zfs-scrub-monthly@.timer \
 	%D%/systemd/system/zfs-scrub-weekly@.timer \
 	%D%/systemd/system/zfs-scrub@.service \
+	%D%/systemd/system/zfs-trim-monthly@.timer \
+	%D%/systemd/system/zfs-trim-weekly@.timer \
+	%D%/systemd/system/zfs-trim@.service \
 	%D%/systemd/system/zfs-share.service \
 	%D%/systemd/system/zfs-volume-wait.service \
 	%D%/systemd/system/zfs-volumes.target \
--- a/sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-monthly@.timer.in
@ -0,0 +1,12 @@
+[Unit]
+Description=Monthly zpool trim timer for %i
+Documentation=man:zpool-trim(8)
+
+[Timer]
+OnCalendar=monthly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-trim@%i.service
+
+[Install]
+WantedBy=timers.target
--- a/sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim-weekly@.timer.in
@ -0,0 +1,12 @@
+[Unit]
+Description=Weekly zpool trim timer for %i
+Documentation=man:zpool-trim(8)
+
+[Timer]
+OnCalendar=weekly
+Persistent=true
+RandomizedDelaySec=1h
+Unit=zfs-trim@%i.service
+
+[Install]
+WantedBy=timers.target
--- a/sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
+++ b/sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
@ -0,0 +1,15 @@
+[Unit]
+Description=zpool trim on %i
+Documentation=man:zpool-trim(8)
+Requires=zfs.target
+After=zfs.target
+ConditionACPower=true
+ConditionPathIsDirectory=/sys/module/zfs
+
+[Service]
+EnvironmentFile=-@initconfdir@/zfs
+ExecStart=/bin/sh -c '\
+if @sbindir@/zpool status %i | grep -q "(trimming)"; then\
+exec @sbindir@/zpool wait -t trim %i;\
+else exec @sbindir@/zpool trim -w %i; fi'
+ExecStop=-/bin/sh -c '@sbindir@/zpool trim -s %i 2>/dev/null || true'
--- a/sys/contrib/openzfs/include/Makefile.am
+++ b/sys/contrib/openzfs/include/Makefile.am
@ -23,6 +23,7 @@ COMMON_H = \
 	sys/avl.h \
 	sys/avl_impl.h \
 	sys/bitops.h \
+	sys/blake3.h \
 	sys/blkptr.h \
 	sys/bplist.h \
 	sys/bpobj.h \
@ -117,6 +118,7 @@ COMMON_H = \
 	sys/zfeature.h \
 	sys/zfs_acl.h \
 	sys/zfs_bootenv.h \
+	sys/zfs_chksum.h \
 	sys/zfs_context.h \
 	sys/zfs_debug.h \
 	sys/zfs_delay.h \
--- a/sys/contrib/openzfs/include/libzfs.h
+++ b/sys/contrib/openzfs/include/libzfs.h
@ -150,6 +150,7 @@ typedef enum zfs_error {
 	EZFS_EXPORT_IN_PROGRESS,	/* currently exporting the pool */
 	EZFS_REBUILDING,	/* resilvering (sequential reconstrution) */
 	EZFS_VDEV_NOTSUP,	/* ops not supported for this type of vdev */
+	EZFS_NOT_USER_NAMESPACE,	/* a file is not a user namespace */
 	EZFS_UNKNOWN
 } zfs_error_t;

@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,

 #endif /* __FreeBSD__ */

+#ifdef __linux__
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);
+
+#endif
+
 #ifdef	__cplusplus
 }
 #endif
--- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
+++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ccompile.h
@ -74,10 +74,12 @@ extern "C" {

 #ifndef LOCORE
 #ifndef HAVE_RPC_TYPES
+#ifndef _KERNEL
 typedef int bool_t;
 typedef int enum_t;
 #endif
 #endif
+#endif

 #ifndef __cplusplus
 #define	__init
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/blkdev_compat.h
@ -34,6 +34,11 @@
 #include <linux/hdreg.h>
 #include <linux/major.h>
 #include <linux/msdos_fs.h>	/* for SECTOR_* */
+#include <linux/bio.h>
+
+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif

 #ifndef HAVE_BLK_QUEUE_FLAG_SET
 static inline void
@ -608,4 +613,110 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
 }
 #endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */

+/*
+ * All the io_*() helper functions below can operate on a bio, or a rq, but
+ * not both.  The older submit_bio() codepath will pass a bio, and the
+ * newer blk-mq codepath will pass a rq.
+ */
+static inline int
+io_data_dir(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL) {
+		if (op_is_write(req_op(rq))) {
+			return (WRITE);
+		} else {
+			return (READ);
+		}
+	}
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_data_dir(bio));
+}
+
+static inline int
+io_is_flush(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_FLUSH);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_flush(bio));
+}
+
+static inline int
+io_is_discard(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_DISCARD);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_discard(bio));
+}
+
+static inline int
+io_is_secure_erase(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (req_op(rq) == REQ_OP_SECURE_ERASE);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_secure_erase(bio));
+}
+
+static inline int
+io_is_fua(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (rq->cmd_flags & REQ_FUA);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_is_fua(bio));
+}
+
+
+static inline uint64_t
+io_offset(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_pos(rq) << 9);
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (BIO_BI_SECTOR(bio) << 9);
+}
+
+static inline uint64_t
+io_size(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (blk_rq_bytes(rq));
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (BIO_BI_SIZE(bio));
+}
+
+static inline int
+io_has_data(struct bio *bio, struct request *rq)
+{
+#ifdef HAVE_BLK_MQ
+	if (rq != NULL)
+		return (bio_has_data(rq->bio));
+#else
+	ASSERT3P(rq, ==, NULL);
+#endif
+	return (bio_has_data(bio));
+}
 #endif /* _ZFS_BLKDEV_H */
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/simd_powerpc.h
@ -57,25 +57,45 @@
 #include <sys/types.h>
 #include <linux/version.h>

-#define	kfpu_allowed()		1
-#define	kfpu_begin()					\
-	{						\
-		preempt_disable();			\
-		enable_kernel_altivec();		\
-	}
+#define	kfpu_allowed()			1
+
 #if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
 #define	kfpu_end()				\
 	{					\
+		disable_kernel_vsx();		\
 		disable_kernel_altivec();	\
 		preempt_enable();		\
 	}
+#define	kfpu_begin()				\
+	{					\
+		preempt_disable();		\
+		enable_kernel_altivec();	\
+		enable_kernel_vsx();		\
+	}
 #else
-/* seems that before 4.5 no-one bothered disabling ... */
+/* seems that before 4.5 no-one bothered */
+#define	kfpu_begin()
 #define	kfpu_end()		preempt_enable()
 #endif
 #define	kfpu_init()		0
 #define	kfpu_fini()		((void) 0)

+static inline boolean_t
+zfs_vsx_available(void)
+{
+	boolean_t res;
+#if defined(__powerpc64__)
+	u64 msr;
+#else
+	u32 msr;
+#endif
+	kfpu_begin();
+	__asm volatile("mfmsr %0" : "=r"(msr));
+	res = (msr & 0x800000) != 0;
+	kfpu_end();
+	return (res);
+}
+
 /*
 * Check if AltiVec instruction set is available
 */
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
@ -296,11 +296,7 @@ static inline struct dentry *file_dentry(const struct file *f)

 static inline uid_t zfs_uid_read_impl(struct inode *ip)
 {
-#ifdef HAVE_SUPER_USER_NS
-	return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
-#else
 	return (from_kuid(kcred->user_ns, ip->i_uid));
-#endif
 }

 static inline uid_t zfs_uid_read(struct inode *ip)
@ -310,11 +306,7 @@ static inline uid_t zfs_uid_read(struct inode *ip)

 static inline gid_t zfs_gid_read_impl(struct inode *ip)
 {
-#ifdef HAVE_SUPER_USER_NS
-	return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
-#else
 	return (from_kgid(kcred->user_ns, ip->i_gid));
-#endif
 }

 static inline gid_t zfs_gid_read(struct inode *ip)
@ -324,20 +316,12 @@ static inline gid_t zfs_gid_read(struct inode *ip)

 static inline void zfs_uid_write(struct inode *ip, uid_t uid)
 {
-#ifdef HAVE_SUPER_USER_NS
-	ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
-#else
 	ip->i_uid = make_kuid(kcred->user_ns, uid);
-#endif
 }

 static inline void zfs_gid_write(struct inode *ip, gid_t gid)
 {
-#ifdef HAVE_SUPER_USER_NS
-	ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
-#else
 	ip->i_gid = make_kgid(kcred->user_ns, gid);
-#endif
 }

 /*
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/uio.h
@ -69,9 +69,20 @@ typedef struct zfs_uio {
 	uint16_t	uio_fmode;
 	uint16_t	uio_extflg;
 	ssize_t		uio_resid;
+
 	size_t		uio_skip;
+
+	struct request	*rq;
+
+	/*
+	 * Used for saving rq_for_each_segment() state between calls
+	 * to zfs_uiomove_bvec_rq().
+	 */
+	struct req_iterator iter;
+	struct bio_vec bv;
 } zfs_uio_t;

+
 #define	zfs_uio_segflg(u)		(u)->uio_segflg
 #define	zfs_uio_offset(u)		(u)->uio_loffset
 #define	zfs_uio_resid(u)		(u)->uio_resid
@ -116,17 +127,33 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
 }

 static inline void
-zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
+zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
 {
-	uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
-	uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
-	uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
+	/* Either bio or rq will be set, but not both */
+	ASSERT3P(uio, !=, bio);
+
+	if (bio) {
+		uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
+		uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
+	} else {
+		uio->uio_bvec = NULL;
+		uio->uio_iovcnt = 0;
+		memset(&uio->iter, 0, sizeof (uio->iter));
+	}
+
+	uio->uio_loffset = io_offset(bio, rq);
 	uio->uio_segflg = UIO_BVEC;
 	uio->uio_fault_disable = B_FALSE;
 	uio->uio_fmode = 0;
 	uio->uio_extflg = 0;
-	uio->uio_resid = BIO_BI_SIZE(bio);
-	uio->uio_skip = BIO_BI_SKIP(bio);
+	uio->uio_resid = io_size(bio, rq);
+	if (bio) {
+		uio->uio_skip = BIO_BI_SKIP(bio);
+	} else {
+		uio->uio_skip = 0;
+	}
+
+	uio->rq = rq;
 }

 #if defined(HAVE_VFS_IOV_ITER)
--- a/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
+++ b/sys/contrib/openzfs/include/os/linux/spl/sys/zone.h
@ -25,11 +25,34 @@
 #define	_SPL_ZONE_H

 #include <sys/byteorder.h>
+#include <sys/cred.h>

-#define	GLOBAL_ZONEID			0
+#include <linux/cred.h>
+#include <linux/user_namespace.h>

-#define	zone_dataset_visible(x, y)	(1)
-#define	crgetzoneid(x)			(GLOBAL_ZONEID)
-#define	INGLOBALZONE(z)			(1)
+/*
+ * Attach the given dataset to the given user namespace.
+ */
+extern int zone_dataset_attach(cred_t *, const char *, int);
+
+/*
+ * Detach the given dataset from the given user namespace.
+ */
+extern int zone_dataset_detach(cred_t *, const char *, int);
+
+/*
+ * Returns true if the named pool/dataset is visible in the current zone.
+ */
+extern int zone_dataset_visible(const char *dataset, int *write);
+
+int spl_zone_init(void);
+void spl_zone_fini(void);
+
+extern unsigned int crgetzoneid(const cred_t *);
+extern unsigned int global_zoneid(void);
+extern boolean_t inglobalzone(proc_t *);
+
+#define	INGLOBALZONE(x) inglobalzone(x)
+#define	GLOBAL_ZONEID	global_zoneid()

 #endif /* SPL_ZONE_H */
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_context_os.h
@ -32,4 +32,9 @@
 #define	HAVE_LARGE_STACKS	1
 #endif

+#if defined(CONFIG_UML)
+#undef setjmp
+#undef longjmp
+#endif
+
 #endif
--- a/sys/contrib/openzfs/include/sys/blake3.h
+++ b/sys/contrib/openzfs/include/sys/blake3.h
@ -0,0 +1,125 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef BLAKE3_H
+#define	BLAKE3_H
+
+#ifdef  _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#define	BLAKE3_KEY_LEN		32
+#define	BLAKE3_OUT_LEN		32
+#define	BLAKE3_MAX_DEPTH	54
+#define	BLAKE3_BLOCK_LEN	64
+#define	BLAKE3_CHUNK_LEN	1024
+
+/*
+ * This struct is a private implementation detail.
+ * It has to be here because it's part of BLAKE3_CTX below.
+ */
+typedef struct {
+	uint32_t cv[8];
+	uint64_t chunk_counter;
+	uint8_t buf[BLAKE3_BLOCK_LEN];
+	uint8_t buf_len;
+	uint8_t blocks_compressed;
+	uint8_t flags;
+} blake3_chunk_state_t;
+
+typedef struct {
+	uint32_t key[8];
+	blake3_chunk_state_t chunk;
+	uint8_t cv_stack_len;
+
+	/*
+	 * The stack size is MAX_DEPTH + 1 because we do lazy merging. For
+	 * example, with 7 chunks, we have 3 entries in the stack. Adding an
+	 * 8th chunk requires a 4th entry, rather than merging everything down
+	 * to 1, because we don't know whether more input is coming. This is
+	 * different from how the reference implementation does things.
+	 */
+	uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
+
+	/* const blake3_impl_ops_t *ops */
+	const void *ops;
+} BLAKE3_CTX;
+
+/* init the context for hash operation */
+void Blake3_Init(BLAKE3_CTX *ctx);
+
+/* init the context for a MAC and/or tree hash operation */
+void Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN]);
+
+/* process the input bytes */
+void Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t input_len);
+
+/* finalize the hash computation and output the result */
+void Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out);
+
+/* finalize the hash computation and output the result */
+void Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
+    size_t out_len);
+
+/* these are pre-allocated contexts */
+extern void **blake3_per_cpu_ctx;
+extern void blake3_per_cpu_ctx_init(void);
+extern void blake3_per_cpu_ctx_fini(void);
+
+/* return number of supported implementations */
+extern int blake3_get_impl_count(void);
+
+/* return id of selected implementation */
+extern int blake3_get_impl_id(void);
+
+/* return name of selected implementation */
+extern const char *blake3_get_impl_name(void);
+
+/* setup id as fastest implementation */
+extern void blake3_set_impl_fastest(uint32_t id);
+
+/* set implementation by id */
+extern void blake3_set_impl_id(uint32_t id);
+
+/* set implementation by name */
+extern int blake3_set_impl_name(const char *name);
+
+/* set startup implementation */
+extern void blake3_setup_impl(void);
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif	/* BLAKE3_H */
--- a/sys/contrib/openzfs/include/sys/fs/zfs.h
+++ b/sys/contrib/openzfs/include/sys/fs/zfs.h
@ -93,6 +93,7 @@ typedef enum dmu_objset_type {
 typedef enum {
 	ZPROP_CONT = -2,
 	ZPROP_INVAL = -1,
+	ZPROP_USERPROP = ZPROP_INVAL,
 	ZFS_PROP_TYPE = 0,
 	ZFS_PROP_CREATION,
 	ZFS_PROP_USED,
@ -310,7 +311,7 @@ typedef int (*zprop_func)(int, void *);
 */
 typedef enum {
 	VDEV_PROP_INVAL = -1,
-#define	VDEV_PROP_USER	VDEV_PROP_INVAL
+	VDEV_PROP_USERPROP = VDEV_PROP_INVAL,
 	VDEV_PROP_NAME,
 	VDEV_PROP_CAPACITY,
 	VDEV_PROP_STATE,
@ -1450,7 +1451,9 @@ typedef enum zfs_ioc {
 	ZFS_IOC_EVENTS_SEEK,			/* 0x83 (Linux) */
 	ZFS_IOC_NEXTBOOT,			/* 0x84 (FreeBSD) */
 	ZFS_IOC_JAIL,				/* 0x85 (FreeBSD) */
+	ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL,	/* 0x85 (Linux) */
 	ZFS_IOC_UNJAIL,				/* 0x86 (FreeBSD) */
+	ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL,	/* 0x86 (Linux) */
 	ZFS_IOC_SET_BOOTENV,			/* 0x87 */
 	ZFS_IOC_GET_BOOTENV,			/* 0x88 */
 	ZFS_IOC_LAST
@ -1531,6 +1534,7 @@ typedef enum {
 	ZFS_ERR_REBUILD_IN_PROGRESS,
 	ZFS_ERR_BADPROP,
 	ZFS_ERR_VDEV_NOTSUP,
+	ZFS_ERR_NOT_USER_NAMESPACE,
 } zfs_errno_t;

 /*
--- a/sys/contrib/openzfs/include/sys/zfs_chksum.h
+++ b/sys/contrib/openzfs/include/sys/zfs_chksum.h
@ -0,0 +1,48 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef	_ZFS_CHKSUM_H
+#define	_ZFS_CHKSUM_H
+
+#ifdef  _KERNEL
+#include <sys/types.h>
+#else
+#include <stdint.h>
+#include <stdlib.h>
+#endif
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+/* Benchmark the chksums of ZFS when the module is loading */
+void chksum_init(void);
+void chksum_fini(void);
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif	/* _ZFS_CHKSUM_H */
--- a/sys/contrib/openzfs/include/sys/zfs_ioctl.h
+++ b/sys/contrib/openzfs/include/sys/zfs_ioctl.h
@ -124,6 +124,7 @@ typedef enum drr_headertype {
 * default use of "zfs send" won't encounter the bug mentioned above.
 */
 #define	DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
+#define	DMU_BACKUP_FEATURE_BLAKE3		(1 << 28)

 /*
 * Mask of all supported backup features
@ -134,7 +135,7 @@ typedef enum drr_headertype {
    DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
    DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
    DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
-    DMU_BACKUP_FEATURE_ZSTD)
+    DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_BLAKE3)

 /* Are all features in the given flag word currently supported? */
 #define	DMU_STREAM_SUPPORTED(x)	(!((x) & ~DMU_BACKUP_FEATURE_MASK))
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@ -89,6 +89,7 @@ enum zio_checksum {
 	ZIO_CHECKSUM_SHA512,
 	ZIO_CHECKSUM_SKEIN,
 	ZIO_CHECKSUM_EDONR,
+	ZIO_CHECKSUM_BLAKE3,
 	ZIO_CHECKSUM_FUNCTIONS
 };

--- a/sys/contrib/openzfs/include/sys/zio_checksum.h
+++ b/sys/contrib/openzfs/include/sys/zio_checksum.h
@ -21,7 +21,8 @@
 /*
 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
- * Copyright Saso Kiselkov 2013, All rights reserved.
+ * Copyright (c) 2013 Saso Kiselkov, All rights reserved.
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
 */

 #ifndef _SYS_ZIO_CHECKSUM_H
@ -107,6 +108,8 @@ _SYS_ZIO_CHECKSUM_H zio_checksum_info_t
 /*
 * Checksum routines.
 */
+
+/* SHA2 */
 extern zio_checksum_t abd_checksum_SHA256;
 extern zio_checksum_t abd_checksum_SHA512_native;
 extern zio_checksum_t abd_checksum_SHA512_byteswap;
@ -123,6 +126,13 @@ extern zio_checksum_t abd_checksum_edonr_byteswap;
 extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
 extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;

+/* BLAKE3 */
+extern zio_checksum_t abd_checksum_blake3_native;
+extern zio_checksum_t abd_checksum_blake3_byteswap;
+extern zio_checksum_tmpl_init_t abd_checksum_blake3_tmpl_init;
+extern zio_checksum_tmpl_free_t abd_checksum_blake3_tmpl_free;
+
+/* Fletcher 4 */
 _SYS_ZIO_CHECKSUM_H zio_abd_checksum_func_t fletcher_4_abd_ops;
 extern zio_checksum_t abd_fletcher_4_native;
 extern zio_checksum_t abd_fletcher_4_byteswap;
--- a/sys/contrib/openzfs/include/zfeature_common.h
+++ b/sys/contrib/openzfs/include/zfeature_common.h
@ -77,6 +77,7 @@ typedef enum spa_feature {
 	SPA_FEATURE_DRAID,
 	SPA_FEATURE_ZILSAXATTR,
 	SPA_FEATURE_HEAD_ERRLOG,
+	SPA_FEATURE_BLAKE3,
 	SPA_FEATURES
 } spa_feature_t;

--- a/sys/contrib/openzfs/lib/libicp/Makefile.am
+++ b/sys/contrib/openzfs/lib/libicp/Makefile.am
@ -13,6 +13,10 @@ nodist_libicp_la_SOURCES = \
 	module/icp/algs/aes/aes_impl_x86-64.c \
 	module/icp/algs/aes/aes_impl.c \
 	module/icp/algs/aes/aes_modes.c \
+	module/icp/algs/blake3/blake3.c \
+	module/icp/algs/blake3/blake3_generic.c \
+	module/icp/algs/blake3/blake3_impl.c \
+	module/icp/algs/blake3/blake3_x86-64.c \
 	module/icp/algs/edonr/edonr.c \
 	module/icp/algs/modes/modes.c \
 	module/icp/algs/modes/cbc.c \
@ -36,15 +40,30 @@ nodist_libicp_la_SOURCES = \
 	module/icp/core/kcf_mech_tabs.c \
 	module/icp/core/kcf_prov_tabs.c

+if TARGET_CPU_AARCH64
+nodist_libicp_la_SOURCES += \
+	module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S \
+	module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
+endif
+
+if TARGET_CPU_POWERPC
+nodist_libicp_la_SOURCES += \
+	module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S \
+	module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
+endif
+
 if TARGET_CPU_X86_64
 nodist_libicp_la_SOURCES += \
-	module/icp/asm-x86_64/aes/aeskey.c
-nodist_libicp_la_SOURCES += \
+	module/icp/asm-x86_64/aes/aeskey.c \
 	module/icp/asm-x86_64/aes/aes_amd64.S \
 	module/icp/asm-x86_64/aes/aes_aesni.S \
 	module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \
 	module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \
 	module/icp/asm-x86_64/modes/ghash-x86_64.S \
 	module/icp/asm-x86_64/sha2/sha256_impl.S \
-	module/icp/asm-x86_64/sha2/sha512_impl.S
+	module/icp/asm-x86_64/sha2/sha512_impl.S \
+	module/icp/asm-x86_64/blake3/blake3_avx2.S \
+	module/icp/asm-x86_64/blake3/blake3_avx512.S \
+	module/icp/asm-x86_64/blake3/blake3_sse2.S \
+	module/icp/asm-x86_64/blake3/blake3_sse41.S
 endif
--- a/sys/contrib/openzfs/lib/libspl/include/sys/simd.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/simd.h
@ -491,6 +491,24 @@ zfs_altivec_available(void)
 #endif
 	return (has_altivec);
 }
+static inline boolean_t
+zfs_vsx_available(void)
+{
+	boolean_t has_vsx = B_FALSE;
+#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
+	sighandler_t savesig;
+	savesig = signal(SIGILL, sigillhandler);
+	if (setjmp(env)) {
+		signal(SIGILL, savesig);
+		has_vsx = B_FALSE;
+	} else {
+		__asm__ __volatile__("xssubsp 0,0,0\n");
+		signal(SIGILL, savesig);
+		has_vsx = B_TRUE;
+	}
+#endif
+	return (has_vsx);
+}
 #else

 #define	kfpu_allowed()		0
--- a/sys/contrib/openzfs/lib/libspl/include/sys/types.h
+++ b/sys/contrib/openzfs/lib/libspl/include/sys/types.h
@ -44,7 +44,7 @@
 #include <inttypes.h>
 #endif /* HAVE_INTTYPES */

-typedef int		zoneid_t;
+typedef uint_t		zoneid_t;
 typedef int		projid_t;

 /*
--- a/sys/contrib/openzfs/lib/libspl/include/zone.h
+++ b/sys/contrib/openzfs/lib/libspl/include/zone.h
@ -33,7 +33,17 @@
 extern "C" {
 #endif

-#define	GLOBAL_ZONEID		0
+#ifdef __FreeBSD__
+#define	GLOBAL_ZONEID	0
+#else
+/*
+ * Hardcoded in the kernel's root user namespace.  A "better" way to get
+ * this would be by using ioctl_ns(2), but this would need to be performed
+ * recursively on NS_GET_PARENT and then NS_GET_USERNS.  Also, that's only
+ * supported since Linux 4.9.
+ */
+#define	GLOBAL_ZONEID	4026531837U
+#endif

 extern zoneid_t		getzoneid(void);

--- a/sys/contrib/openzfs/lib/libspl/os/linux/zone.c
+++ b/sys/contrib/openzfs/lib/libspl/os/linux/zone.c
@ -23,10 +23,40 @@
 * Use is subject to license terms.
 */

+#include <unistd.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <limits.h>
+#include <string.h>
+
 #include <zone.h>

 zoneid_t
 getzoneid(void)
 {
-	return (GLOBAL_ZONEID);
+	char path[PATH_MAX];
+	char buf[128] = { '\0' };
+	char *cp;
+
+	int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
+	/* This API doesn't have any error checking... */
+	if (c < 0)
+		return (0);
+
+	ssize_t r = readlink(path, buf, sizeof (buf) - 1);
+	if (r < 0)
+		return (0);
+
+	cp = strchr(buf, '[');
+	if (cp == NULL)
+		return (0);
+	cp++;
+
+	unsigned long n = strtoul(cp, NULL, 10);
+	if (n == ULONG_MAX && errno == ERANGE)
+		return (0);
+	zoneid_t z = (zoneid_t)n;
+
+	return (z);
 }
--- a/sys/contrib/openzfs/lib/libuutil/libuutil.abi
+++ b/sys/contrib/openzfs/lib/libuutil/libuutil.abi
@ -1081,7 +1081,7 @@
    </function-decl>
  </abi-instr>
  <abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
-    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
    <function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
      <return type-id='4da03624'/>
    </function-decl>
--- a/sys/contrib/openzfs/lib/libzfs/libzfs.abi
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs.abi
@ -433,6 +433,7 @@
    <elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -583,7 +584,7 @@
    <elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
-    <elf-symbol name='spa_feature_table' size='2016' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
+    <elf-symbol name='spa_feature_table' size='2072' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
    <elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
@ -1537,7 +1538,7 @@
    </function-decl>
  </abi-instr>
  <abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
-    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
    <function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
      <return type-id='4da03624'/>
    </function-decl>
@ -4414,6 +4415,12 @@
    <function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
      <return type-id='26a90f95'/>
    </function-decl>
+    <function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
+      <parameter type-id='9200a744' name='zhp'/>
+      <parameter type-id='80f4b756' name='nspath'/>
+      <parameter type-id='95e97e5e' name='attach'/>
+      <return type-id='95e97e5e'/>
+    </function-decl>
  </abi-instr>
  <abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
    <function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
@ -4770,8 +4777,8 @@
    </function-decl>
  </abi-instr>
  <abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
-    <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16128' id='9d5e9e2e'>
-      <subrange length='36' type-id='7359adad' id='ae666bde'/>
+    <array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16576' id='9d5e9e2e'>
+      <subrange length='37' type-id='7359adad' id='ae666bde'/>
    </array-type-def>
    <enum-decl name='spa_feature' id='33ecb627'>
      <underlying-type type-id='9cac1fee'/>
@ -4812,7 +4819,8 @@
      <enumerator name='SPA_FEATURE_DRAID' value='33'/>
      <enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
      <enumerator name='SPA_FEATURE_HEAD_ERRLOG' value='35'/>
-      <enumerator name='SPA_FEATURES' value='36'/>
+      <enumerator name='SPA_FEATURE_BLAKE3' value='36'/>
+      <enumerator name='SPA_FEATURES' value='37'/>
    </enum-decl>
    <typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
    <enum-decl name='zfeature_flags' id='6db816a4'>
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_crypto.c
@ -1003,7 +1003,7 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props,
    uint_t *wkeylen_out)
 {
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT;
 	uint64_t keyformat = ZFS_KEYFORMAT_NONE;
 	char *keylocation = NULL;
@ -1174,7 +1174,7 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp,
    char *parent_name, nvlist_t *props)
 {
 	(void) origin_zhp, (void) parent_name;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "Encryption clone error"));
@ -1276,7 +1276,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop,
    const char *alt_keylocation)
 {
 	int ret, attempts = 0;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	uint64_t keystatus, iters = 0, salt = 0;
 	uint64_t keyformat = ZFS_KEYFORMAT_NONE;
 	char prop_keylocation[MAXNAMELEN];
@ -1444,7 +1444,7 @@ int
 zfs_crypto_unload_key(zfs_handle_t *zhp)
 {
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	char prop_encroot[MAXNAMELEN];
 	uint64_t keystatus, keyformat;
 	boolean_t is_encroot;
@ -1580,7 +1580,7 @@ int
 zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey)
 {
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	boolean_t is_encroot;
 	nvlist_t *props = NULL;
 	uint8_t *wkeydata = NULL;
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_dataset.c
@ -678,7 +678,7 @@ zfs_handle_t *
 zfs_open(libzfs_handle_t *hdl, const char *path, int types)
 {
 	zfs_handle_t *zhp;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	char *bookp;

 	(void) snprintf(errbuf, sizeof (errbuf),
@ -1022,7 +1022,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 		const char *propname = nvpair_name(elem);

 		prop = zfs_name_to_prop(propname);
-		if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
+		if (prop == ZPROP_USERPROP && zfs_prop_user(propname)) {
 			/*
 			 * This is a user property: make sure it's a
 			 * string, and that it's less than ZAP_MAXNAMELEN.
@ -1061,7 +1061,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 			goto error;
 		}

-		if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
+		if (prop == ZPROP_USERPROP && zfs_prop_userquota(propname)) {
 			zfs_userquota_prop_t uqtype;
 			char *newpropname = NULL;
 			char domain[128];
@ -1143,7 +1143,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
 			}
 			free(newpropname);
 			continue;
-		} else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) {
+		} else if (prop == ZPROP_USERPROP &&
+		    zfs_prop_written(propname)) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "'%s' is readonly"),
 			    propname);
@ -1716,7 +1717,7 @@ int
 zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
 {
 	int ret = -1;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *nvl = NULL;

@ -1750,7 +1751,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
 	int ret = -1;
 	prop_changelist_t **cls = NULL;
 	int cl_idx;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	nvlist_t *nvl;
 	int nvl_len = 0;
@ -1930,14 +1931,14 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
 	int ret;
 	prop_changelist_t *cl;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	zfs_prop_t prop;

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot inherit %s for '%s'"), propname, zhp->zfs_name);

 	zc.zc_cookie = received;
-	if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
+	if ((prop = zfs_name_to_prop(propname)) == ZPROP_USERPROP) {
 		/*
 		 * For user properties, the amount of work we have to do is very
 		 * small, so just do it here.
@ -2356,7 +2357,7 @@ zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,

 	prop = zfs_name_to_prop(propname);

-	if (prop != ZPROP_INVAL) {
+	if (prop != ZPROP_USERPROP) {
 		uint64_t cookie;
 		if (!nvlist_exists(zhp->zfs_recvd_props, propname))
 			return (-1);
@ -3402,7 +3403,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	char *slash;
 	zfs_handle_t *zhp;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	uint64_t is_zoned;

 	(void) snprintf(errbuf, sizeof (errbuf),
@ -3580,7 +3581,7 @@ zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
 {
 	int prefix;
 	char *path_copy;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	int rc = 0;

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@ -3624,7 +3625,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
 	zpool_handle_t *zpool_handle;
 	uint8_t *wkeydata = NULL;
 	uint_t wkeylen = 0;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	char parent[ZFS_MAX_DATASET_NAME_LEN];

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@ -3897,7 +3898,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
 	}

 	if (nvlist_empty(errlist)) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));

@ -3905,7 +3906,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
 	}
 	for (pair = nvlist_next_nvpair(errlist, NULL);
 	    pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
 		    nvpair_name(pair));
@ -3934,7 +3935,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
 {
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	uint64_t zoned;

@ -4018,7 +4019,7 @@ zfs_promote(zfs_handle_t *zhp)
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char snapname[ZFS_MAX_DATASET_NAME_LEN];
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot promote '%s'"), zhp->zfs_name);
@ -4100,7 +4101,7 @@ int
 zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
 {
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	nvpair_t *elem;
 	nvlist_t *errors;
 	zpool_handle_t *zpool_hdl;
@ -4185,7 +4186,7 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
 	char *cp;
 	zfs_handle_t *zhp;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot snapshot %s"), path);
@ -4328,7 +4329,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
 	 */
 	err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name);
 	if (err != 0) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];

 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
@ -4387,7 +4388,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags)
 	char parent[ZFS_MAX_DATASET_NAME_LEN];
 	char property[ZFS_MAXPROPLEN];
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	/* if we have the same exact name, just return success */
 	if (strcmp(zhp->zfs_name, target) == 0)
@ -4635,7 +4636,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
 		 */
 		start = plp;
 		while (*start != NULL) {
-			if ((*start)->pl_prop == ZPROP_INVAL)
+			if ((*start)->pl_prop == ZPROP_USERPROP)
 				break;
 			start = &(*start)->pl_next;
 		}
@ -4656,7 +4657,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
 				entry = zfs_alloc(hdl, sizeof (zprop_list_t));
 				entry->pl_user_prop =
 				    zfs_strdup(hdl, nvpair_name(elem));
-				entry->pl_prop = ZPROP_INVAL;
+				entry->pl_prop = ZPROP_USERPROP;
 				entry->pl_width = strlen(nvpair_name(elem));
 				entry->pl_all = B_TRUE;
 				*last = entry;
@ -4671,7 +4672,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
 		if (entry->pl_fixed && !literal)
 			continue;

-		if (entry->pl_prop != ZPROP_INVAL) {
+		if (entry->pl_prop != ZPROP_USERPROP) {
 			if (zfs_prop_get(zhp, entry->pl_prop,
 			    buf, sizeof (buf), NULL, NULL, 0, literal) == 0) {
 				if (strlen(buf) > entry->pl_width)
@ -4720,13 +4721,14 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
 		next = nvlist_next_nvpair(zhp->zfs_props, curr);

 		/*
-		 * User properties will result in ZPROP_INVAL, and since we
+		 * User properties will result in ZPROP_USERPROP (an alias
+		 * for ZPROP_INVAL), and since we
 		 * only know how to prune standard ZFS properties, we always
 		 * leave these in the list.  This can also happen if we
 		 * encounter an unknown DSL property (when running older
 		 * software, for example).
 		 */
-		if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
+		if (zfs_prop != ZPROP_USERPROP && props[zfs_prop] == B_FALSE)
 			(void) nvlist_remove(zhp->zfs_props,
 			    nvpair_name(curr), nvpair_type(curr));
 		curr = next;
@ -4902,7 +4904,7 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
 	(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);

 	if (nvlist_empty(ha.nvl)) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];

 		fnvlist_free(ha.nvl);
 		ret = ENOENT;
@ -4926,7 +4928,7 @@ zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
 	int ret;
 	nvlist_t *errors;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	nvpair_t *elem;

 	errors = NULL;
@ -5028,7 +5030,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
 	nvlist_t *errors = NULL;
 	nvpair_t *elem;
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	ha.nvl = fnvlist_alloc();
 	ha.snapname = snapname;
@ -5108,7 +5110,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
 	int nvsz = 2048;
 	void *nvbuf;
 	int err = 0;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
 	    zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
@ -5172,7 +5174,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
 	zfs_cmd_t zc = {"\0"};
 	libzfs_handle_t *hdl = zhp->zfs_hdl;
 	char *nvbuf;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	size_t nvsz;
 	int err;

@ -5224,7 +5226,7 @@ int
 zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
 {
 	int err;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	err = lzc_get_holds(zhp->zfs_name, nvl);

--- a/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_diff.c
@ -709,7 +709,7 @@ zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
    const char *tosnap, int flags)
 {
 	zfs_cmd_t zc = {"\0"};
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	differ_info_t di = { 0 };
 	pthread_t tid;
 	int pipefd[2];
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_impl.h
@ -44,6 +44,8 @@
 extern "C" {
 #endif

+#define	ERRBUFLEN 1024
+
 struct libzfs_handle {
 	int libzfs_error;
 	int libzfs_fd;
@ -208,7 +210,7 @@ typedef struct differ_info {
 	char *ds;
 	char *dsmnt;
 	char *tmpsnap;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	boolean_t isclone;
 	boolean_t scripted;
 	boolean_t classify;
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_pool.c
@ -776,7 +776,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
 {
 	zfs_cmd_t zc = {"\0"};
 	int ret = -1;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *nvl = NULL;
 	nvlist_t *realprops;
 	uint64_t version;
@ -854,7 +854,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
 		for (i = 0; i < SPA_FEATURES; i++) {
 			zprop_list_t *entry = zfs_alloc(hdl,
 			    sizeof (zprop_list_t));
-			entry->pl_prop = ZPROP_INVAL;
+			entry->pl_prop = ZPROP_USERPROP;
 			entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
 			    spa_feature_table[i].fi_uname);
 			entry->pl_width = strlen(entry->pl_user_prop);
@ -898,7 +898,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
 		}

 		entry = zfs_alloc(hdl, sizeof (zprop_list_t));
-		entry->pl_prop = ZPROP_INVAL;
+		entry->pl_prop = ZPROP_USERPROP;
 		entry->pl_user_prop = propname;
 		entry->pl_width = strlen(entry->pl_user_prop);
 		entry->pl_all = B_TRUE;
@ -911,7 +911,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
 		if (entry->pl_fixed && !literal)
 			continue;

-		if (entry->pl_prop != ZPROP_INVAL &&
+		if (entry->pl_prop != ZPROP_USERPROP &&
 		    zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
 		    NULL, literal) == 0) {
 			if (strlen(buf) > entry->pl_width)
@ -967,7 +967,7 @@ vdev_expand_proplist(zpool_handle_t *zhp, const char *vdevname,

 			/* Skip properties that are not user defined */
 			if ((prop = vdev_name_to_prop(propname)) !=
-			    VDEV_PROP_USER)
+			    VDEV_PROP_USERPROP)
 				continue;

 			if (nvpair_value_nvlist(elem, &propval) != 0)
@ -1368,14 +1368,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 	nvlist_t *hidden_args = NULL;
 	uint8_t *wkeydata = NULL;
 	uint_t wkeylen = 0;
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	int ret = -1;

-	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot create '%s'"), pool);

 	if (!zpool_name_valid(hdl, B_FALSE, pool))
-		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));

 	zcmd_write_conf_nvlist(hdl, &zc, nvroot);

@ -1383,7 +1383,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 		prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };

 		if ((zc_props = zpool_valid_proplist(hdl, pool, props,
-		    SPA_VERSION_1, flags, msg)) == NULL) {
+		    SPA_VERSION_1, flags, errbuf)) == NULL) {
 			goto create_failed;
 		}
 	}
@ -1397,7 +1397,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 		    strcmp(zonestr, "on") == 0);

 		if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
-		    fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
+		    fsprops, zoned, NULL, NULL, B_TRUE, errbuf)) == NULL) {
 			goto create_failed;
 		}

@ -1407,7 +1407,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "%s property requires a special vdev"),
 			    zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
-			(void) zfs_error(hdl, EZFS_BADPROP, msg);
+			(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
 			goto create_failed;
 		}

@ -1417,7 +1417,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 		}
 		if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
 		    &wkeydata, &wkeylen) != 0) {
-			zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
+			zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
 			goto create_failed;
 		}
 		if (nvlist_add_nvlist(zc_props,
@ -1465,7 +1465,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 			    "one or more vdevs refer to the same device, or "
 			    "one of\nthe devices is part of an active md or "
 			    "lvm device"));
-			return (zfs_error(hdl, EZFS_BADDEV, msg));
+			return (zfs_error(hdl, EZFS_BADDEV, errbuf));

 		case ERANGE:
 			/*
@ -1480,7 +1480,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "record size invalid"));
-			return (zfs_error(hdl, EZFS_BADPROP, msg));
+			return (zfs_error(hdl, EZFS_BADPROP, errbuf));

 		case EOVERFLOW:
 			/*
@ -1499,12 +1499,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 				    "one or more devices is less than the "
 				    "minimum size (%s)"), buf);
 			}
-			return (zfs_error(hdl, EZFS_BADDEV, msg));
+			return (zfs_error(hdl, EZFS_BADDEV, errbuf));

 		case ENOSPC:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is out of space"));
-			return (zfs_error(hdl, EZFS_BADDEV, msg));
+			return (zfs_error(hdl, EZFS_BADDEV, errbuf));

 		case EINVAL:
 			if (zpool_has_draid_vdev(nvroot) &&
@ -1512,13 +1512,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "dRAID vdevs are unsupported by the "
 				    "kernel"));
-				return (zfs_error(hdl, EZFS_BADDEV, msg));
+				return (zfs_error(hdl, EZFS_BADDEV, errbuf));
 			} else {
-				return (zpool_standard_error(hdl, errno, msg));
+				return (zpool_standard_error(hdl, errno,
+				    errbuf));
 			}

 		default:
-			return (zpool_standard_error(hdl, errno, msg));
+			return (zpool_standard_error(hdl, errno, errbuf));
 		}
 	}

@ -1542,7 +1543,7 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
 	zfs_cmd_t zc = {"\0"};
 	zfs_handle_t *zfp = NULL;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	char msg[1024];
+	char errbuf[ERRBUFLEN];

 	if (zhp->zpool_state == POOL_STATE_ACTIVE &&
 	    (zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
@ -1552,15 +1553,15 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
 	zc.zc_history = (uint64_t)(uintptr_t)log_str;

 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot destroy '%s'"), zhp->zpool_name);

 		if (errno == EROFS) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more devices is read only"));
-			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 		} else {
-			(void) zpool_standard_error(hdl, errno, msg);
+			(void) zpool_standard_error(hdl, errno, errbuf);
 		}

 		if (zfp)
@ -1583,14 +1584,14 @@ int
 zpool_checkpoint(zpool_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	int error;

 	error = lzc_pool_checkpoint(zhp->zpool_name);
 	if (error != 0) {
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot checkpoint '%s'"), zhp->zpool_name);
-		(void) zpool_standard_error(hdl, error, msg);
+		(void) zpool_standard_error(hdl, error, errbuf);
 		return (-1);
 	}

@ -1604,14 +1605,14 @@ int
 zpool_discard_checkpoint(zpool_handle_t *zhp)
 {
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	int error;

 	error = lzc_pool_checkpoint_discard(zhp->zpool_name);
 	if (error != 0) {
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot discard checkpoint in '%s'"), zhp->zpool_name);
-		(void) zpool_standard_error(hdl, error, msg);
+		(void) zpool_standard_error(hdl, error, errbuf);
 		return (-1);
 	}

@ -1628,11 +1629,11 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 	zfs_cmd_t zc = {"\0"};
 	int ret;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t **spares, **l2cache;
 	uint_t nspares, nl2cache;

-	(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot add to '%s'"), zhp->zpool_name);

 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
@ -1641,7 +1642,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 	    &spares, &nspares) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add hot spares"));
-		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+		return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 	}

 	if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
@ -1650,7 +1651,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 	    &l2cache, &nl2cache) == 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
 		    "upgraded to add cache devices"));
-		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+		return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 	}

 	zcmd_write_conf_nvlist(hdl, &zc, nvroot);
@ -1667,7 +1668,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 			 */
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "one or more vdevs refer to the same device"));
-			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 			break;

 		case EINVAL:
@ -1684,7 +1685,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 				    "raidz or dRAID vdevs"));
 			}

-			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 			break;

 		case EOVERFLOW:
@ -1704,17 +1705,17 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
 				    "device is less than the minimum "
 				    "size (%s)"), buf);
 			}
-			(void) zfs_error(hdl, EZFS_BADDEV, msg);
+			(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 			break;

 		case ENOTSUP:
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "pool must be upgraded to add these vdevs"));
-			(void) zfs_error(hdl, EZFS_BADVERSION, msg);
+			(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
 			break;

 		default:
-			(void) zpool_standard_error(hdl, errno, msg);
+			(void) zpool_standard_error(hdl, errno, errbuf);
 		}

 		ret = -1;
@ -2009,7 +2010,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
 	char *origname;
 	int ret;
 	int error = 0;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	origname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);

@ -2516,11 +2517,11 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
 				goto out;
 			}
 		} else {
-			char msg[1024];
+			char errbuf[ERRBUFLEN];

-			(void) snprintf(msg, sizeof (msg),
+			(void) snprintf(errbuf, sizeof (errbuf),
 			    dgettext(TEXT_DOMAIN, "operation failed"));
-			zpool_standard_error(zhp->zpool_hdl, err, msg);
+			zpool_standard_error(zhp->zpool_hdl, err, errbuf);
 			retval = -1;
 			goto out;
 		}
@ -2545,7 +2546,7 @@ int
 zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	int err;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

@ -2568,21 +2569,22 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)

 	if (func == POOL_SCAN_SCRUB) {
 		if (cmd == POOL_SCRUB_PAUSE) {
-			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
-			    "cannot pause scrubbing %s"), zc.zc_name);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "cannot pause scrubbing %s"),
+			    zc.zc_name);
 		} else {
 			assert(cmd == POOL_SCRUB_NORMAL);
-			(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
-			    "cannot scrub %s"), zc.zc_name);
+			(void) snprintf(errbuf, sizeof (errbuf),
+			    dgettext(TEXT_DOMAIN, "cannot scrub %s"),
+			    zc.zc_name);
 		}
 	} else if (func == POOL_SCAN_RESILVER) {
 		assert(cmd == POOL_SCRUB_NORMAL);
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot restart resilver on %s"), zc.zc_name);
 	} else if (func == POOL_SCAN_NONE) {
-		(void) snprintf(msg, sizeof (msg),
-		    dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
-		    zc.zc_name);
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
+		    "cannot cancel scrubbing %s"), zc.zc_name);
 	} else {
 		assert(!"unexpected result");
 	}
@ -2599,18 +2601,19 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
 		if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
 		    ps->pss_state == DSS_SCANNING) {
 			if (cmd == POOL_SCRUB_PAUSE)
-				return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
+				return (zfs_error(hdl, EZFS_SCRUB_PAUSED,
+				    errbuf));
 			else
-				return (zfs_error(hdl, EZFS_SCRUBBING, msg));
+				return (zfs_error(hdl, EZFS_SCRUBBING, errbuf));
 		} else {
-			return (zfs_error(hdl, EZFS_RESILVERING, msg));
+			return (zfs_error(hdl, EZFS_RESILVERING, errbuf));
 		}
 	} else if (err == ENOENT) {
-		return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
+		return (zfs_error(hdl, EZFS_NO_SCRUB, errbuf));
 	} else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
-		return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
+		return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, errbuf));
 	} else {
-		return (zpool_standard_error(hdl, err, msg));
+		return (zpool_standard_error(hdl, err, errbuf));
 	}
 }

@ -3087,28 +3090,28 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
    vdev_state_t *newstate)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

 	if (flags & ZFS_ONLINE_EXPAND) {
-		(void) snprintf(msg, sizeof (msg),
+		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
 	} else {
-		(void) snprintf(msg, sizeof (msg),
+		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot online %s"), path);
 	}

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);

 	if (avail_spare)
-		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+		return (zfs_error(hdl, EZFS_ISSPARE, errbuf));

 #ifndef __FreeBSD__
 	char *pathname;
@ -3126,7 +3129,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 		if (l2cache) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "cannot expand cache devices"));
-			return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
+			return (zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf));
 		}

 		if (wholedisk) {
@ -3139,12 +3142,12 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 				    sizeof (buf));
 				if (error != 0)
 					return (zfs_error(hdl, EZFS_NODEVICE,
-					    msg));
+					    errbuf));

 				fullpath = buf;
 			}

-			error = zpool_relabel_disk(hdl, fullpath, msg);
+			error = zpool_relabel_disk(hdl, fullpath, errbuf);
 			if (error != 0)
 				return (error);
 		}
@ -3159,9 +3162,9 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
 			    "from this pool into a new one.  Use '%s' "
 			    "instead"), "zpool detach");
-			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
+			return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, errbuf));
 		}
-		return (zpool_standard_error(hdl, errno, msg));
+		return (zpool_standard_error(hdl, errno, errbuf));
 	}

 	*newstate = zc.zc_cookie;
@ -3175,23 +3178,23 @@ int
 zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot offline %s"), path);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);

 	if (avail_spare)
-		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+		return (zfs_error(hdl, EZFS_ISSPARE, errbuf));

 	zc.zc_cookie = VDEV_STATE_OFFLINE;
 	zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
@ -3205,16 +3208,16 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
 		/*
 		 * There are no other replicas of this device.
 		 */
-		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+		return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));

 	case EEXIST:
 		/*
 		 * The log device has unplayed logs
 		 */
-		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
+		return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, errbuf));

 	default:
-		return (zpool_standard_error(hdl, errno, msg));
+		return (zpool_standard_error(hdl, errno, errbuf));
 	}
 }

@ -3225,10 +3228,10 @@ int
 zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@ -3245,10 +3248,10 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 		/*
 		 * There are no other replicas of this device.
 		 */
-		return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
+		return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));

 	default:
-		return (zpool_standard_error(hdl, errno, msg));
+		return (zpool_standard_error(hdl, errno, errbuf));
 	}

 }
@ -3260,10 +3263,10 @@ int
 zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@ -3274,7 +3277,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
 		return (0);

-	return (zpool_standard_error(hdl, errno, msg));
+	return (zpool_standard_error(hdl, errno, errbuf));
 }

 /*
@ -3312,7 +3315,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
    const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	int ret;
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
@ -3324,22 +3327,22 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

 	if (replacing)
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot replace %s with %s"), old_disk, new_disk);
 	else
-		(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
+		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "cannot attach %s to %s"), new_disk, old_disk);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
 	    &islog)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	if (avail_spare)
-		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+		return (zfs_error(hdl, EZFS_ISSPARE, errbuf));

 	if (l2cache)
-		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+		return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));

 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 	zc.zc_cookie = replacing;
@ -3349,14 +3352,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 	    zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "the loaded zfs module doesn't support device rebuilds"));
-		return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
+		return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
 	}

 	if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
 	    &child, &children) != 0 || children != 1) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
-		return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
+		return (zfs_error(hdl, EZFS_INVALCONFIG, errbuf));
 	}

 	config_root = fnvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
@ -3377,7 +3380,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "can only be replaced by another hot spare"));
 		free(newname);
-		return (zfs_error(hdl, EZFS_BADTARGET, msg));
+		return (zfs_error(hdl, EZFS_BADTARGET, errbuf));
 	}

 	free(newname);
@ -3435,7 +3438,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 				    "disks"));
 			}
 		}
-		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
+		(void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
 		break;

 	case EINVAL:
@ -3444,14 +3447,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device must be a single disk"));
-		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
 		break;

 	case EBUSY:
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
 		    "or device removal is in progress"),
 		    new_disk);
-		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 		break;

 	case EOVERFLOW:
@ -3460,7 +3463,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "device is too small"));
-		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 		break;

 	case EDOM:
@ -3470,18 +3473,18 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "new device has a different optimal sector size; use the "
 		    "option '-o ashift=N' to override the optimal size"));
-		(void) zfs_error(hdl, EZFS_BADDEV, msg);
+		(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
 		break;

 	case ENAMETOOLONG:
 		/*
 		 * The resulting top-level vdev spec won't fit in the label.
 		 */
-		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
+		(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
 		break;

 	default:
-		(void) zpool_standard_error(hdl, errno, msg);
+		(void) zpool_standard_error(hdl, errno, errbuf);
 	}

 	return (-1);
@ -3494,24 +3497,24 @@ int
 zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot detach %s"), path);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    NULL)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	if (avail_spare)
-		return (zfs_error(hdl, EZFS_ISSPARE, msg));
+		return (zfs_error(hdl, EZFS_ISSPARE, errbuf));

 	if (l2cache)
-		return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
+		return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));

 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);

@ -3526,18 +3529,18 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
 		 */
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
 		    "applicable to mirror and replacing vdevs"));
-		(void) zfs_error(hdl, EZFS_BADTARGET, msg);
+		(void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
 		break;

 	case EBUSY:
 		/*
 		 * There are no other replicas of this device.
 		 */
-		(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
+		(void) zfs_error(hdl, EZFS_NOREPLICAS, errbuf);
 		break;

 	default:
-		(void) zpool_standard_error(hdl, errno, msg);
+		(void) zpool_standard_error(hdl, errno, errbuf);
 	}

 	return (-1);
@ -3592,7 +3595,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
    nvlist_t *props, splitflags_t flags)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024], *bias;
+	char errbuf[ERRBUFLEN], *bias;
 	nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
 	nvlist_t **varray = NULL, *zc_props = NULL;
 	uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
@ -3601,11 +3604,11 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
 	boolean_t freelist = B_FALSE, memory_err = B_TRUE;
 	int retval = 0;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);

 	if (!zpool_name_valid(hdl, B_FALSE, newname))
-		return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
+		return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));

 	if ((config = zpool_get_config(zhp, NULL)) == NULL) {
 		(void) fprintf(stderr, gettext("Internal error: unable to "
@ -3619,7 +3622,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
 	if (props) {
 		prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
 		if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
-		    props, vers, flags, msg)) == NULL)
+		    props, vers, flags, errbuf)) == NULL)
 			return (-1);
 		(void) nvlist_lookup_uint64(zc_props,
 		    zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
@ -3691,7 +3694,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
 		} else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Source pool must be composed only of mirrors\n"));
-			retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+			retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
 			goto out;
 		}

@ -3739,7 +3742,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
 	if (found != newchildren) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
 		    "include at most one disk from each mirror"));
-		retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
 		goto out;
 	}

@ -3793,7 +3796,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
 		zcmd_write_src_nvlist(hdl, &zc, zc_props);

 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
-		retval = zpool_standard_error(hdl, errno, msg);
+		retval = zpool_standard_error(hdl, errno, errbuf);
 		goto out;
 	}

@ -3832,31 +3835,31 @@ int
 zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	uint64_t version;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot remove %s"), path);

 	if (zpool_is_draid_spare(path)) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "dRAID spares cannot be removed"));
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
 	}

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
 	if (islog && version < SPA_VERSION_HOLES) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "pool must be upgraded to support log removal"));
-		return (zfs_error(hdl, EZFS_BADVERSION, msg));
+		return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
 	}

 	zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
@ -3870,7 +3873,7 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "invalid config; all top-level vdevs must "
 		    "have the same sector size and not be raidz."));
-		(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
+		(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
 		break;

 	case EBUSY:
@ -3881,21 +3884,21 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Pool busy; removal may already be in progress"));
 		}
-		(void) zfs_error(hdl, EZFS_BUSY, msg);
+		(void) zfs_error(hdl, EZFS_BUSY, errbuf);
 		break;

 	case EACCES:
 		if (islog) {
 			zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 			    "Mount encrypted datasets to replay logs."));
-			(void) zfs_error(hdl, EZFS_BUSY, msg);
+			(void) zfs_error(hdl, EZFS_BUSY, errbuf);
 		} else {
-			(void) zpool_standard_error(hdl, errno, msg);
+			(void) zpool_standard_error(hdl, errno, errbuf);
 		}
 		break;

 	default:
-		(void) zpool_standard_error(hdl, errno, msg);
+		(void) zpool_standard_error(hdl, errno, errbuf);
 	}
 	return (-1);
 }
@ -3904,10 +3907,10 @@ int
 zpool_vdev_remove_cancel(zpool_handle_t *zhp)
 {
 	zfs_cmd_t zc = {{0}};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot cancel removal"));

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
@ -3916,25 +3919,25 @@ zpool_vdev_remove_cancel(zpool_handle_t *zhp)
 	if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
 		return (0);

-	return (zpool_standard_error(hdl, errno, msg));
+	return (zpool_standard_error(hdl, errno, errbuf));
 }

 int
 zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
    uint64_t *sizep)
 {
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	boolean_t avail_spare, l2cache, islog;
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
 	    path);

 	if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
 	    &islog)) == NULL)
-		return (zfs_error(hdl, EZFS_NODEVICE, msg));
+		return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 	if (avail_spare || l2cache || islog) {
 		*sizep = 0;
@ -3944,7 +3947,7 @@ zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
 	if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
 		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 		    "indirect size not available"));
-		return (zfs_error(hdl, EINVAL, msg));
+		return (zfs_error(hdl, EINVAL, errbuf));
 	}
 	return (0);
 }
@ -3956,7 +3959,7 @@ int
 zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *tgt;
 	zpool_load_policy_t policy;
 	boolean_t avail_spare, l2cache;
@ -3965,11 +3968,11 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 	int error;

 	if (path)
-		(void) snprintf(msg, sizeof (msg),
+		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    path);
 	else
-		(void) snprintf(msg, sizeof (msg),
+		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
 		    zhp->zpool_name);

@ -3977,14 +3980,14 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 	if (path) {
 		if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
 		    &l2cache, NULL)) == NULL)
-			return (zfs_error(hdl, EZFS_NODEVICE, msg));
+			return (zfs_error(hdl, EZFS_NODEVICE, errbuf));

 		/*
 		 * Don't allow error clearing for hot spares.  Do allow
 		 * error clearing for l2cache devices.
 		 */
 		if (avail_spare)
-			return (zfs_error(hdl, EZFS_ISSPARE, msg));
+			return (zfs_error(hdl, EZFS_ISSPARE, errbuf));

 		zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
 	}
@ -4014,7 +4017,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
 	}

 	zcmd_free_nvlists(&zc);
-	return (zpool_standard_error(hdl, errno, msg));
+	return (zpool_standard_error(hdl, errno, errbuf));
 }

 /*
@ -4024,10 +4027,10 @@ int
 zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 {
 	zfs_cmd_t zc = {"\0"};
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
 	    (u_longlong_t)guid);

@ -4038,7 +4041,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 	if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
 		return (0);

-	return (zpool_standard_error(hdl, errno, msg));
+	return (zpool_standard_error(hdl, errno, errbuf));
 }

 /*
@ -4047,18 +4050,18 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
 int
 zpool_reguid(zpool_handle_t *zhp)
 {
-	char msg[1024];
+	char errbuf[ERRBUFLEN];
 	libzfs_handle_t *hdl = zhp->zpool_hdl;
 	zfs_cmd_t zc = {"\0"};

-	(void) snprintf(msg, sizeof (msg),
+	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);

 	(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
 	if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
 		return (0);

-	return (zpool_standard_error(hdl, errno, msg));
+	return (zpool_standard_error(hdl, errno, errbuf));
 }

 /*
@ -4998,7 +5001,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)

 	verify(zhp != NULL);
 	if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "pool is in an unavailable state"));
 		return (zfs_error(zhp->zpool_hdl, EZFS_POOLUNAVAIL, errbuf));
@ -5006,7 +5009,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)

 	if ((tgt = zpool_find_vdev(zhp, vdevname, &avail_spare, &l2cache,
 	    NULL)) == NULL) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "can not find %s in %s"),
 		    vdevname, zhp->zpool_name);
@ -5030,7 +5033,7 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
 	uint64_t intval;
 	zprop_source_t src = ZPROP_SRC_NONE;

-	if (prop == VDEV_PROP_USER) {
+	if (prop == VDEV_PROP_USERPROP) {
 		/* user property, prop_name must contain the property name */
 		assert(prop_name != NULL);
 		if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) {
@ -5192,7 +5195,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,

 	fnvlist_add_uint64(reqnvl, ZPOOL_VDEV_PROPS_GET_VDEV, vdev_guid);

-	if (prop != VDEV_PROP_USER) {
+	if (prop != VDEV_PROP_USERPROP) {
 		/* prop_name overrides prop value */
 		if (prop_name != NULL)
 			prop = vdev_name_to_prop(prop_name);
@ -5216,7 +5219,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,
 		ret = zpool_get_vdev_prop_value(retprops, prop, prop_name, buf,
 		    len, srctype, literal);
 	} else {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get vdev property %s from"
 		    " %s in %s"), prop_name, vdevname, zhp->zpool_name);
@ -5254,7 +5257,7 @@ zpool_get_all_vdev_props(zpool_handle_t *zhp, const char *vdevname,
 	nvlist_free(nvl);

 	if (ret) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf),
 		    dgettext(TEXT_DOMAIN, "cannot get vdev properties for"
 		    " %s in %s"), vdevname, zhp->zpool_name);
@ -5295,7 +5298,7 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
 		return (no_memory(zhp->zpool_hdl));
 	}

-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	(void) snprintf(errbuf, sizeof (errbuf),
 	    dgettext(TEXT_DOMAIN, "cannot set property %s for %s on %s"),
 	    propname, vdevname, zhp->zpool_name);
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c
@ -734,7 +734,7 @@ zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
 	if (error == 0)
 		return (0);

-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "warning: cannot estimate space for '%s'"), snapname);

@ -804,7 +804,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
 	}

 	if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		int error = errno;

 		(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
@ -1615,7 +1615,7 @@ find_redact_book(libzfs_handle_t *hdl, const char *path,
    const uint64_t *redact_snap_guids, int num_redact_snaps,
    char **bookname)
 {
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *bmarks;

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@ -1679,7 +1679,7 @@ static int
 zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
    int outfd, nvlist_t *resume_nvl)
 {
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	char *toname;
 	char *fromname = NULL;
 	uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
@ -1827,7 +1827,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
 		if (flags->progress && send_progress_thread_exit(hdl, tid))
 			return (-1);

-		char errbuf[1024];
+		char errbuf[ERRBUFLEN];
 		(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 		    "warning: cannot send '%s'"), zhp->zfs_name);

@ -1907,7 +1907,7 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
    const char *resume_token)
 {
 	int ret;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	nvlist_t *resume_nvl;

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
@ -1938,7 +1938,7 @@ zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
 	uint64_t saved_guid = 0, resume_guid = 0;
 	uint64_t obj = 0, off = 0, bytes = 0;
 	char token_buf[ZFS_MAXPROPLEN];
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "saved send failed"));
@ -2062,7 +2062,7 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
 	/* short name of snap we are sending */
 	char *tosnap = "";

-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "warning: cannot send '%s'"), zhp->zfs_name);
 	if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
@ -2187,7 +2187,7 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
    sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
    void *cb_arg, nvlist_t **debugnvp)
 {
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	send_dump_data_t sdd = { 0 };
 	int err = 0;
 	nvlist_t *fss = NULL;
@ -2366,9 +2366,9 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
 		 * there was some error, because it might not be totally
 		 * failed.
 		 */
-		err = send_conclusion_record(outfd, NULL);
-		if (err != 0)
-			return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
+		int err2 = send_conclusion_record(outfd, NULL);
+		if (err2 != 0)
+			return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
 	}

 	return (err || sdd.err);
@ -2510,7 +2510,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
 	pthread_t ptid;
 	progress_arg_t pa = { 0 };

-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "warning: cannot send '%s'"), name);

@ -3654,7 +3654,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
 	char *cp;
 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
 	char sendfs[ZFS_MAX_DATASET_NAME_LEN];
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	dmu_replay_record_t drre;
 	int error;
 	boolean_t anyerr = B_FALSE;
@ -3871,7 +3871,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
 	dmu_replay_record_t *drr;
 	void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
 	uint64_t payload_size;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
 	    "cannot receive"));
@ -4239,7 +4239,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
 	int ioctl_err, ioctl_errno, err;
 	char *cp;
 	struct drr_begin *drrb = &drr->drr_u.drr_begin;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	const char *chopprefix;
 	boolean_t newfs = B_FALSE;
 	boolean_t stream_wantsnewfs, stream_resumingnewfs;
@ -5107,7 +5107,7 @@ zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
 		name = nvpair_name(nvp);
 		prop = zfs_name_to_prop(name);

-		if (prop == ZPROP_INVAL) {
+		if (prop == ZPROP_USERPROP) {
 			if (!zfs_prop_user(name)) {
 				zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
 				    "%s: invalid property '%s'"), errbuf, name);
@ -5151,7 +5151,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
 	int err;
 	dmu_replay_record_t drr, drr_noswap;
 	struct drr_begin *drrb = &drr.drr_u.drr_begin;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];
 	zio_cksum_t zcksum = { { 0 } };
 	uint64_t featureflags;
 	int hdrtype;
--- a/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
+++ b/sys/contrib/openzfs/lib/libzfs/libzfs_util.c
@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
 	case EZFS_VDEV_NOTSUP:
 		return (dgettext(TEXT_DOMAIN, "operation not supported "
 		    "on this type of vdev"));
+	case EZFS_NOT_USER_NAMESPACE:
+		return (dgettext(TEXT_DOMAIN, "the provided file "
+		    "was not a user namespace file"));
 	case EZFS_UNKNOWN:
 		return (dgettext(TEXT_DOMAIN, "unknown error"));
 	default:
@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
 	case ZFS_ERR_BADPROP:
 		zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
 		break;
+	case ZFS_ERR_NOT_USER_NAMESPACE:
+		zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
+		break;
 	default:
 		zfs_error_aux(hdl, "%s", strerror(error));
 		zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
@ -1276,7 +1282,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
 		/*
 		 * 'PROPERTY' column
 		 */
-		if (pl->pl_prop != ZPROP_INVAL) {
+		if (pl->pl_prop != ZPROP_USERPROP) {
 			const char *propname = (type == ZFS_TYPE_POOL) ?
 			    zpool_prop_to_name(pl->pl_prop) :
 			    ((type == ZFS_TYPE_VDEV) ?
@ -1749,7 +1755,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
 	 * Return failure if no property table entry was found and this isn't
 	 * a user-defined property.
 	 */
-	if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
+	if (prop == ZPROP_USERPROP && ((type == ZFS_TYPE_POOL &&
 	    !zpool_prop_feature(propname) &&
 	    !zpool_prop_unsupported(propname)) ||
 	    ((type == ZFS_TYPE_DATASET) && !zfs_prop_user(propname) &&
@ -1764,7 +1770,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
 	zprop_list_t *entry = zfs_alloc(hdl, sizeof (*entry));

 	entry->pl_prop = prop;
-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		entry->pl_user_prop = zfs_strdup(hdl, propname);
 		entry->pl_width = strlen(propname);
 	} else {
--- a/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/freebsd/libzfs_compat.c
@ -193,8 +193,6 @@ execvpe(const char *name, char * const argv[], char * const envp[])
 	return (execvPe(name, path, argv, envp));
 }

-#define	ERRBUFLEN 1024
-
 static __thread char errbuf[ERRBUFLEN];

 const char *
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_pool_os.c
@ -216,7 +216,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
 	size_t resv = EFI_MIN_RESV_SIZE;
 	uint64_t slice_size;
 	diskaddr_t start_block;
-	char errbuf[1024];
+	char errbuf[ERRBUFLEN];

 	/* prepare an error message just in case */
 	(void) snprintf(errbuf, sizeof (errbuf),
--- a/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c
+++ b/sys/contrib/openzfs/lib/libzfs/os/linux/libzfs_util_os.c
@ -19,6 +19,9 @@
 * CDDL HEADER END
 */

+/*
+ * Copyright (c) 2021 Klara, Inc.
+ */

 #include <alloca.h>
 #include <errno.h>
@ -207,3 +210,71 @@ zfs_version_kernel(void)
 		ret[read - 1] = '\0';
 	return (ret);
 }
+
+/*
+ * Add or delete the given filesystem to/from the given user namespace.
+ */
+int
+zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach)
+{
+	libzfs_handle_t *hdl = zhp->zfs_hdl;
+	zfs_cmd_t zc = {"\0"};
+	char errbuf[1024];
+	unsigned long cmd;
+	int ret;
+
+	if (attach) {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
+		    zhp->zfs_name);
+	} else {
+		(void) snprintf(errbuf, sizeof (errbuf),
+		    dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
+		    zhp->zfs_name);
+	}
+
+	switch (zhp->zfs_type) {
+	case ZFS_TYPE_VOLUME:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "volumes can not be namespaced"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_SNAPSHOT:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "snapshots can not be namespaced"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_BOOKMARK:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "bookmarks can not be namespaced"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_VDEV:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "vdevs can not be namespaced"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_INVALID:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "invalid zfs_type_t: ZFS_TYPE_INVALID"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_POOL:
+		zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
+		    "pools can not be namespaced"));
+		return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
+	case ZFS_TYPE_FILESYSTEM:
+		zfs_fallthrough;
+	}
+	assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
+
+	(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
+	zc.zc_objset_type = DMU_OST_ZFS;
+	zc.zc_cleanup_fd = open(nspath, O_RDONLY);
+	if (zc.zc_cleanup_fd < 0) {
+		return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf));
+	}
+
+	cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
+	if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
+		zfs_standard_error(hdl, errno, errbuf);
+
+	(void) close(zc.zc_cleanup_fd);
+
+	return (ret);
+}
--- a/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi
+++ b/sys/contrib/openzfs/lib/libzfs_core/libzfs_core.abi
@ -939,7 +939,7 @@
    </function-decl>
  </abi-instr>
  <abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
-    <typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
+    <typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
    <function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
      <return type-id='4da03624'/>
    </function-decl>
--- a/sys/contrib/openzfs/lib/libzpool/Makefile.am
+++ b/sys/contrib/openzfs/lib/libzpool/Makefile.am
@ -67,6 +67,7 @@ nodist_libzpool_la_SOURCES = \
 	module/zfs/abd.c \
 	module/zfs/aggsum.c \
 	module/zfs/arc.c \
+	module/zfs/blake3_zfs.c \
 	module/zfs/blkptr.c \
 	module/zfs/bplist.c \
 	module/zfs/bpobj.c \
@ -171,6 +172,7 @@ nodist_libzpool_la_SOURCES = \
 	module/zfs/zcp_synctask.c \
 	module/zfs/zfeature.c \
 	module/zfs/zfs_byteswap.c \
+	module/zfs/zfs_chksum.c \
 	module/zfs/zfs_fm.c \
 	module/zfs/zfs_fuid.c \
 	module/zfs/zfs_ratelimit.c \
--- a/sys/contrib/openzfs/man/Makefile.am
+++ b/sys/contrib/openzfs/man/Makefile.am
@ -59,9 +59,11 @@ dist_man_MANS = \
 	%D%/man8/zfs-unjail.8 \
 	%D%/man8/zfs-unload-key.8 \
 	%D%/man8/zfs-unmount.8 \
+	%D%/man8/zfs-unzone.8 \
 	%D%/man8/zfs-upgrade.8 \
 	%D%/man8/zfs-userspace.8 \
 	%D%/man8/zfs-wait.8 \
+	%D%/man8/zfs-zone.8 \
 	%D%/man8/zfs_ids_to_path.8 \
 	%D%/man8/zgenhostid.8 \
 	%D%/man8/zinject.8 \
--- a/sys/contrib/openzfs/man/man4/zfs.4
+++ b/sys/contrib/openzfs/man/man4/zfs.4
@ -2248,9 +2248,74 @@ for each I/O submitter.
 When unset, requests are handled asynchronously by a thread pool.
 The number of requests which can be handled concurrently is controlled by
 .Sy zvol_threads .
+.Sy zvol_request_sync
+is ignored when running on a kernel that supports block multiqueue
+.Pq Li blk-mq .
 .
-.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
-Max number of threads which can handle zvol I/O requests concurrently.
+.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
+The number of system wide threads to use for processing zvol block IOs.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_threads
+to the number of CPUs present or 32 (whichever is greater).
+.
+.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
+The number of threads per zvol to use for queuing IO requests.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+If
+.Sy 0
+(the default) then internally set
+.Sy zvol_blk_mq_threads
+to the number of CPUs present.
+.
+.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
+Set to
+.Sy 1
+to use the
+.Li blk-mq
+API for zvols.
+Set to
+.Sy 0
+(the default) to use the legacy zvol APIs.
+This setting can give better or worse zvol performance depending on
+the workload.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only read and assigned to a zvol at zvol load time.
+.
+.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
+If
+.Sy zvol_use_blk_mq
+is enabled, then process this number of
+.Sy volblocksize Ns -sized blocks per zvol thread.
+This tunable can be use to favor better performance for zvol reads (lower
+values) or writes (higher values).
+If set to
+.Sy 0 ,
+then the zvol layer will process the maximum number of blocks
+per thread that it can.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+.
+.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
+The queue_depth value for the zvol
+.Li blk-mq
+interface.
+This parameter will only appear if your kernel supports
+.Li blk-mq
+and is only applied at each zvol's load time.
+If
+.Sy 0
+(the default) then use the kernel's default queue depth.
+Values are clamped to the kernel's
+.Dv BLKDEV_MIN_RQ
+and
+.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
+limits.
 .
 .It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
 Defines zvol block devices behaviour when
--- a/sys/contrib/openzfs/man/man7/zfsprops.7
+++ b/sys/contrib/openzfs/man/man7/zfsprops.7
@ -743,7 +743,7 @@ This property is not inherited.
 .It Xo
 .Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns
 .Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns
-.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr
+.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr Ns | Ns Sy blake3
 .Xc
 Controls the checksum used to verify data integrity.
 The default value is
@ -768,8 +768,9 @@ a recommended practice.
 The
 .Sy sha512 ,
 .Sy skein ,
+.Sy edonr ,
 and
-.Sy edonr
+.Sy blake3
 checksum algorithms require enabling the appropriate features on the pool.
 .Pp
 Please see
@ -984,7 +985,7 @@ mount options.
 .It Xo
 .Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns
 .Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns
-.Sy edonr , Ns Sy verify
+.Sy edonr , Ns Sy verify Ns | Ns Sy blake3 Ns Oo , Ns Sy verify Oc Ns
 .Xc
 Configures deduplication for a dataset.
 The default value is
@ -1884,8 +1885,7 @@ feature and are not relevant on other platforms.
 The default value is
 .Sy off .
 .It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
-Controls whether the dataset is managed from a non-global zone.
-Zones are a Solaris feature and are not relevant on other platforms.
+Controls whether the dataset is managed from a non-global zone or namespace.
 The default value is
 .Sy off .
 .El
--- a/sys/contrib/openzfs/man/man7/zpool-features.7
+++ b/sys/contrib/openzfs/man/man7/zpool-features.7
@ -326,6 +326,12 @@ while
 .Sy freeing
 is non-zero.
 .
+.feature org.openzfs blake3 no extensible_dataset
+This feature enables the use of the BLAKE3 hash algorithm for checksum and dedup.
+BLAKE3 is a secure hash algorithm focused on high performance.
+.Pp
+.checksum-spiel blake3
+.
 .feature com.delphix bookmarks yes extensible_dataset
 This feature enables use of the
 .Nm zfs Cm bookmark
@ -436,6 +442,8 @@ in ZFS, which means that the checksum is pre-seeded with a secret
 to be checksummed.
 Thus the produced checksums are unique to a given pool,
 preventing hash collision attacks on systems with dedup.
+.Pp
+.checksum-spiel edonr
 .
 .feature com.delphix embedded_data no
 This feature improves the performance and compression ratio of
--- a/sys/contrib/openzfs/man/man8/zfs-unzone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-unzone.8
@ -0,0 +1 @@
+zfs-zone.8
--- a/sys/contrib/openzfs/man/man8/zfs-zone.8
+++ b/sys/contrib/openzfs/man/man8/zfs-zone.8
@ -0,0 +1,116 @@
+.\"
+.\" CDDL HEADER START
+.\"
+.\" The contents of this file are subject to the terms of the
+.\" Common Development and Distribution License (the "License").
+.\" You may not use this file except in compliance with the License.
+.\"
+.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+.\" or http://www.opensolaris.org/os/licensing.
+.\" See the License for the specific language governing permissions
+.\" and limitations under the License.
+.\"
+.\" When distributing Covered Code, include this CDDL HEADER in each
+.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+.\" If applicable, add the following below this CDDL HEADER, with the
+.\" fields enclosed by brackets "[]" replaced with your own identifying
+.\" information: Portions Copyright [yyyy] [name of copyright owner]
+.\"
+.\" CDDL HEADER END
+.\"
+.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
+.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
+.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
+.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
+.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
+.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
+.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
+.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
+.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
+.\" Copyright (c) 2014 Integros [integros.com]
+.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
+.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
+.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
+.\" Copyright 2019 Richard Laager. All rights reserved.
+.\" Copyright 2018 Nexenta Systems, Inc.
+.\" Copyright 2019 Joyent, Inc.
+.\" Copyright 2021 Klara, Inc.
+.\"
+.Dd June 3, 2022
+.Dt ZFS-ZONE 8
+.Os
+.
+.Sh NAME
+.Nm zfs-zone ,
+.Nm zfs-unzone
+.Nd attach and detach ZFS filesystems to user namespaces
+.Sh SYNOPSIS
+.Nm zfs Cm zone
+.Ar nsfile
+.Ar filesystem
+.Nm zfs Cm unzone
+.Ar nsfile
+.Ar filesystem
+.
+.Sh DESCRIPTION
+.Bl -tag -width ""
+.It Xo
+.Nm zfs
+.Cm zone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Attach the specified
+.Ar filesystem
+to the user namespace identified by
+.Ar nsfile .
+From now on this file system tree can be managed from within a user namespace
+if the
+.Sy zoned
+property has been set.
+.Pp
+You cannot attach a zoned dataset's children to another user namespace.
+You can also not attach the root file system
+of the user namespace or any dataset
+which needs to be mounted before the zfs service
+is run inside the user namespace,
+as it would be attached unmounted until it is
+mounted from the service inside the user namespace.
+.Pp
+To allow management of the dataset from within a user namespace, the
+.Sy zoned
+property has to be set and the user namespaces needs access to the
+.Pa /dev/zfs
+device.
+The
+.Sy quota
+property cannot be changed from within a user namespace.
+.Pp
+After a dataset is attached to a user namespace and the
+.Sy zoned
+property is set,
+a zoned file system cannot be mounted outside the user namespace,
+since the user namespace administrator might have set the mount point
+to an unacceptable value.
+.It Xo
+.Nm zfs
+.Cm unzone
+.Ar nsfile
+.Ar filesystem
+.Xc
+Detach the specified
+.Ar filesystem
+from the user namespace identified by
+.Ar nsfile .
+.El
+.Sh EXAMPLES
+.Ss Example 1 : No Delegating a Dataset to a User Namespace
+The following example delegates the
+.Ar tank/users
+dataset to a user namespace identified by user namespace file
+.Pa /proc/1234/ns/user .
+.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
+.
+.Sh SEE ALSO
+.Xr zfsprops 7
--- a/sys/contrib/openzfs/man/man8/zpool-trim.8
+++ b/sys/contrib/openzfs/man/man8/zpool-trim.8
@ -84,8 +84,29 @@ with no flags on the relevant target devices.
 .It Fl w , -wait
 Wait until the devices are done being trimmed before returning.
 .El
+.Sh PERIODIC TRIM
+On machines using systemd, trim timers can be enabled on a per-pool basis.
+.Nm weekly
+and
+.Nm monthly
+timer units are provided.
+.Bl -tag -width Ds
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-trim-\fIweekly\fB@\fIrpool\fB.timer
+.Cm --now
+.It Xo
+.Xc
+.Nm systemctl
+.Cm enable
+.Cm zfs-trim-\fImonthly\fB@\fIotherpool\fB.timer
+.Cm --now
+.El
 .
 .Sh SEE ALSO
+.Xr systemd.timer 5 ,
 .Xr zpoolprops 7 ,
 .Xr zpool-initialize 8 ,
 .Xr zpool-wait 8
--- a/sys/contrib/openzfs/module/Kbuild.in
+++ b/sys/contrib/openzfs/module/Kbuild.in
@ -65,7 +65,8 @@ SPL_OBJS := \
 	spl-tsd.o \
 	spl-vmem.o \
 	spl-xdr.o \
-	spl-zlib.o
+	spl-zlib.o \
+	spl-zone.o

 spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))

@ -75,6 +76,10 @@ ICP_OBJS := \
 	algs/aes/aes_impl.o \
 	algs/aes/aes_impl_generic.o \
 	algs/aes/aes_modes.o \
+	algs/blake3/blake3.o \
+	algs/blake3/blake3_generic.o \
+	algs/blake3/blake3_impl.o \
+	algs/blake3/blake3_x86-64.o \
 	algs/edonr/edonr.o \
 	algs/modes/cbc.o \
 	algs/modes/ccm.o \
@ -105,23 +110,45 @@ ICP_OBJS_X86_64 := \
 	asm-x86_64/aes/aes_aesni.o \
 	asm-x86_64/aes/aes_amd64.o \
 	asm-x86_64/aes/aeskey.o \
+	asm-x86_64/blake3/blake3_avx2.o \
+	asm-x86_64/blake3/blake3_avx512.o \
+	asm-x86_64/blake3/blake3_sse2.o \
+	asm-x86_64/blake3/blake3_sse41.o \
 	asm-x86_64/modes/aesni-gcm-x86_64.o \
 	asm-x86_64/modes/gcm_pclmulqdq.o \
 	asm-x86_64/modes/ghash-x86_64.o \
 	asm-x86_64/sha2/sha256_impl.o \
 	asm-x86_64/sha2/sha512_impl.o

+
 ICP_OBJS_X86 := \
 	algs/aes/aes_impl_aesni.o \
 	algs/aes/aes_impl_x86-64.o \
 	algs/modes/gcm_pclmulqdq.o

+
+ICP_OBJS_ARM64 := \
+	asm-aarch64/blake3/b3_aarch64_sse2.o \
+	asm-aarch64/blake3/b3_aarch64_sse41.o
+
+
+ICP_OBJS_PPC_PPC64 := \
+	asm-ppc64/blake3/b3_ppc64le_sse2.o \
+	asm-ppc64/blake3/b3_ppc64le_sse41.o
+
 zfs-objs             += $(addprefix icp/,$(ICP_OBJS))
 zfs-$(CONFIG_X86)    += $(addprefix icp/,$(ICP_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86))
 zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64))
+zfs-$(CONFIG_ARM64)  += $(addprefix icp/,$(ICP_OBJS_ARM64))
+zfs-$(CONFIG_PPC)    += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
+zfs-$(CONFIG_PPC64)  += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))

-$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : asflags-y += -I$(icp_include)
-$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflags-y += -I$(icp_include)
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
+	$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include)
+
+$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
+	$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)

 # Suppress objtool "can't find jump dest instruction at" warnings.  They
 # are caused by the constants which are defined in the text section of the
@ -129,6 +156,7 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflag
 # utility tries to interpret them as opcodes and obviously fails doing so.
 OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
 OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
+
 # Suppress objtool "unsupported stack pointer realignment" warnings. We are
 # not using a DRAP register while aligning the stack to a 64 byte boundary.
 # See #6950 for the reasoning.
@ -205,6 +233,7 @@ ZCOMMON_OBJS_ARM64 := \

 zfs-objs            += $(addprefix zcommon/,$(ZCOMMON_OBJS))
 zfs-$(CONFIG_X86)   += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
 zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64))


@ -261,6 +290,7 @@ ZFS_OBJS := \
 	abd.o \
 	aggsum.o \
 	arc.o \
+	blake3_zfs.o \
 	blkptr.o \
 	bplist.o \
 	bpobj.o \
@ -358,6 +388,7 @@ ZFS_OBJS := \
 	zcp_synctask.o \
 	zfeature.o \
 	zfs_byteswap.o \
+	zfs_chksum.o \
 	zfs_fm.o \
 	zfs_fuid.o \
 	zfs_ioctl.o \
@ -428,6 +459,7 @@ ZFS_OBJS_PPC_PPC64 := \

 zfs-objs            += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS))
 zfs-$(CONFIG_X86)   += $(addprefix zfs/,$(ZFS_OBJS_X86))
+zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86))
 zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
 zfs-$(CONFIG_PPC)   += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
 zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
--- a/sys/contrib/openzfs/module/Makefile.bsd
+++ b/sys/contrib/openzfs/module/Makefile.bsd
@ -10,6 +10,10 @@ INCDIR=${.CURDIR:H}/include
 KMOD=	openzfs

 .PATH:	${SRCDIR}/avl \
+	${SRCDIR}/icp/algs/blake3 \
+	${SRCDIR}/icp/asm-aarch64/blake3 \
+	${SRCDIR}/icp/asm-ppc64/blake3 \
+	${SRCDIR}/icp/asm-x86_64/blake3 \
 	${SRCDIR}/lua \
 	${SRCDIR}/nvpair \
 	${SRCDIR}/icp/algs/edonr \
@ -31,6 +35,7 @@ CFLAGS+= -I${INCDIR}/os/freebsd
 CFLAGS+= -I${INCDIR}/os/freebsd/spl
 CFLAGS+= -I${INCDIR}/os/freebsd/zfs
 CFLAGS+= -I${SRCDIR}/zstd/include
+CFLAGS+= -I${SRCDIR}/icp/include
 CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h

 CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS  -D__BSD_VISIBLE=1 \
@ -38,7 +43,8 @@ CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS  -D__BSD_VISIBLE=1 \
 	 -D_SYS_VMEM_H_ -DKDTRACE_HOOKS -DSMP -DCOMPAT_FREEBSD11

 .if ${MACHINE_ARCH} == "amd64"
-CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F -DHAVE_SSSE3
+CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
+	 -DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL
 .endif

 .if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
@ -73,12 +79,32 @@ CFLAGS+= -DBITS_PER_LONG=64

 SRCS=	vnode_if.h device_if.h bus_if.h

-# avl
+#avl
 SRCS+=	avl.c

 # icp
 SRCS+=  edonr.c

+#icp/algs/blake3
+SRCS+=	blake3.c \
+	blake3_generic.c \
+	blake3_impl.c \
+	blake3_x86-64.c
+
+#icp/asm-aarch64/blake3
+SRCS+=	b3_aarch64_sse2.S \
+	b3_aarch64_sse41.S
+
+#icp/asm-ppc64/blake3
+SRCS+=	b3_ppc64le_sse2.S \
+	b3_ppc64le_sse41.S
+
+#icp/asm-x86_64/blake3
+SRCS+=	blake3_avx2.S \
+	blake3_avx512.S \
+	blake3_sse2.S \
+	blake3_sse41.S
+
 #lua
 SRCS+=	lapi.c \
 	lauxlib.c \
@ -189,6 +215,7 @@ SRCS+=	zfeature_common.c \
 SRCS+=	abd.c \
 	aggsum.c \
 	arc.c \
+	blake3_zfs.c \
 	blkptr.c \
 	bplist.c \
 	bpobj.c \
@ -291,6 +318,7 @@ SRCS+=	abd.c \
 	zcp_synctask.c \
 	zfeature.c \
 	zfs_byteswap.c \
+	zfs_chksum.c \
 	zfs_file_os.c \
 	zfs_fm.c \
 	zfs_fuid.c \
@ -337,8 +365,6 @@ SRCS+=	zfs_zstd.c \
 	zstd_decompress.c \
 	zstd_decompress_block.c

-	
-
 beforeinstall:
 .if ${MK_DEBUG_FILES} != "no"
 	mtree -eu \
--- a/sys/contrib/openzfs/module/avl/avl.c
+++ b/sys/contrib/openzfs/module/avl/avl.c
@ -108,21 +108,6 @@
 #include <sys/cmn_err.h>
 #include <sys/mod.h>

-/*
- * Small arrays to translate between balance (or diff) values and child indices.
- *
- * Code that deals with binary tree data structures will randomly use
- * left and right children when examining a tree.  C "if()" statements
- * which evaluate randomly suffer from very poor hardware branch prediction.
- * In this code we avoid some of the branch mispredictions by using the
- * following translation arrays. They replace random branches with an
- * additional memory reference. Since the translation arrays are both very
- * small the data should remain efficiently in cache.
- */
-static const int  avl_child2balance[]	= {-1, 1};
-static const int  avl_balance2child[]	= {0, 0, 1};
-
-
 /*
 * Walk from one node to the previous valued node (ie. an infix walk
 * towards the left). At any given node we do one of 2 things:
@ -278,8 +263,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
 #endif
 			return (AVL_NODE2DATA(node, off));
 		}
-		child = avl_balance2child[1 + diff];
-
+		child = (diff > 0);
 	}

 	if (where != NULL)
@ -527,7 +511,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
 		 * Compute the new balance
 		 */
 		old_balance = AVL_XBALANCE(node);
-		new_balance = old_balance + avl_child2balance[which_child];
+		new_balance = old_balance + (which_child ? 1 : -1);

 		/*
 		 * If we introduced equal balance, then we are done immediately
@ -693,7 +677,7 @@ avl_remove(avl_tree_t *tree, void *data)
 		 * choose node to swap from whichever side is taller
 		 */
 		old_balance = AVL_XBALANCE(delete);
-		left = avl_balance2child[old_balance + 1];
+		left = (old_balance > 0);
 		right = 1 - left;

 		/*
@ -777,7 +761,7 @@ avl_remove(avl_tree_t *tree, void *data)
 		 */
 		node = parent;
 		old_balance = AVL_XBALANCE(node);
-		new_balance = old_balance - avl_child2balance[which_child];
+		new_balance = old_balance - (which_child ? 1 : -1);
 		parent = AVL_XPARENT(node);
 		which_child = AVL_XCHILD(node);

--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
@ -0,0 +1,732 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/blake3.h>
+
+#include "blake3_impl.h"
+
+/*
+ * We need 1056 byte stack for blake3_compress_subtree_wide()
+ * - we define this pragma to make gcc happy
+ */
+#if defined(__GNUC__)
+#pragma GCC diagnostic ignored "-Wframe-larger-than="
+#endif
+
+/* internal used */
+typedef struct {
+	uint32_t input_cv[8];
+	uint64_t counter;
+	uint8_t block[BLAKE3_BLOCK_LEN];
+	uint8_t block_len;
+	uint8_t flags;
+} output_t;
+
+/* internal flags */
+enum blake3_flags {
+	CHUNK_START		= 1 << 0,
+	CHUNK_END		= 1 << 1,
+	PARENT			= 1 << 2,
+	ROOT			= 1 << 3,
+	KEYED_HASH		= 1 << 4,
+	DERIVE_KEY_CONTEXT	= 1 << 5,
+	DERIVE_KEY_MATERIAL	= 1 << 6,
+};
+
+/* internal start */
+static void chunk_state_init(blake3_chunk_state_t *ctx,
+    const uint32_t key[8], uint8_t flags)
+{
+	memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+	ctx->chunk_counter = 0;
+	memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+	ctx->buf_len = 0;
+	ctx->blocks_compressed = 0;
+	ctx->flags = flags;
+}
+
+static void chunk_state_reset(blake3_chunk_state_t *ctx,
+    const uint32_t key[8], uint64_t chunk_counter)
+{
+	memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
+	ctx->chunk_counter = chunk_counter;
+	ctx->blocks_compressed = 0;
+	memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+	ctx->buf_len = 0;
+}
+
+static size_t chunk_state_len(const blake3_chunk_state_t *ctx)
+{
+	return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) +
+	    ((size_t)ctx->buf_len);
+}
+
+static size_t chunk_state_fill_buf(blake3_chunk_state_t *ctx,
+    const uint8_t *input, size_t input_len)
+{
+	size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len);
+	if (take > input_len) {
+		take = input_len;
+	}
+	uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len);
+	memcpy(dest, input, take);
+	ctx->buf_len += (uint8_t)take;
+	return (take);
+}
+
+static uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state_t *ctx)
+{
+	if (ctx->blocks_compressed == 0) {
+		return (CHUNK_START);
+	} else {
+		return (0);
+	}
+}
+
+static output_t make_output(const uint32_t input_cv[8],
+    const uint8_t *block, uint8_t block_len,
+    uint64_t counter, uint8_t flags)
+{
+	output_t ret;
+	memcpy(ret.input_cv, input_cv, 32);
+	memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
+	ret.block_len = block_len;
+	ret.counter = counter;
+	ret.flags = flags;
+	return (ret);
+}
+
+/*
+ * Chaining values within a given chunk (specifically the compress_in_place
+ * interface) are represented as words. This avoids unnecessary bytes<->words
+ * conversion overhead in the portable implementation. However, the hash_many
+ * interface handles both user input and parent node blocks, so it accepts
+ * bytes. For that reason, chaining values in the CV stack are represented as
+ * bytes.
+ */
+static void output_chaining_value(const blake3_impl_ops_t *ops,
+    const output_t *ctx, uint8_t cv[32])
+{
+	uint32_t cv_words[8];
+	memcpy(cv_words, ctx->input_cv, 32);
+	ops->compress_in_place(cv_words, ctx->block, ctx->block_len,
+	    ctx->counter, ctx->flags);
+	store_cv_words(cv, cv_words);
+}
+
+static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
+    uint64_t seek, uint8_t *out, size_t out_len)
+{
+	uint64_t output_block_counter = seek / 64;
+	size_t offset_within_block = seek % 64;
+	uint8_t wide_buf[64];
+	while (out_len > 0) {
+		ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len,
+		    output_block_counter, ctx->flags | ROOT, wide_buf);
+		size_t available_bytes = 64 - offset_within_block;
+		size_t memcpy_len;
+		if (out_len > available_bytes) {
+			memcpy_len = available_bytes;
+		} else {
+			memcpy_len = out_len;
+		}
+		memcpy(out, wide_buf + offset_within_block, memcpy_len);
+		out += memcpy_len;
+		out_len -= memcpy_len;
+		output_block_counter += 1;
+		offset_within_block = 0;
+	}
+}
+
+static void chunk_state_update(const blake3_impl_ops_t *ops,
+    blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
+{
+	if (ctx->buf_len > 0) {
+		size_t take = chunk_state_fill_buf(ctx, input, input_len);
+		input += take;
+		input_len -= take;
+		if (input_len > 0) {
+			ops->compress_in_place(ctx->cv, ctx->buf,
+			    BLAKE3_BLOCK_LEN, ctx->chunk_counter,
+			    ctx->flags|chunk_state_maybe_start_flag(ctx));
+			ctx->blocks_compressed += 1;
+			ctx->buf_len = 0;
+			memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
+		}
+	}
+
+	while (input_len > BLAKE3_BLOCK_LEN) {
+		ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN,
+		    ctx->chunk_counter,
+		    ctx->flags|chunk_state_maybe_start_flag(ctx));
+		ctx->blocks_compressed += 1;
+		input += BLAKE3_BLOCK_LEN;
+		input_len -= BLAKE3_BLOCK_LEN;
+	}
+
+	size_t take = chunk_state_fill_buf(ctx, input, input_len);
+	input += take;
+	input_len -= take;
+}
+
+static output_t chunk_state_output(const blake3_chunk_state_t *ctx)
+{
+	uint8_t block_flags =
+	    ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END;
+	return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter,
+	    block_flags));
+}
+
+static output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
+    const uint32_t key[8], uint8_t flags)
+{
+	return (make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT));
+}
+
+/*
+ * Given some input larger than one chunk, return the number of bytes that
+ * should go in the left subtree. This is the largest power-of-2 number of
+ * chunks that leaves at least 1 byte for the right subtree.
+ */
+static size_t left_len(size_t content_len)
+{
+	/*
+	 * Subtract 1 to reserve at least one byte for the right side.
+	 * content_len
+	 * should always be greater than BLAKE3_CHUNK_LEN.
+	 */
+	size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
+	return (round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN);
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
+ * on a single thread. Write out the chunk chaining values and return the
+ * number of chunks hashed. These chunks are never the root and never empty;
+ * those cases use a different codepath.
+ */
+static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
+    const uint8_t *input, size_t input_len, const uint32_t key[8],
+    uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+	const uint8_t *chunks_array[MAX_SIMD_DEGREE];
+	size_t input_position = 0;
+	size_t chunks_array_len = 0;
+	while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
+		chunks_array[chunks_array_len] = &input[input_position];
+		input_position += BLAKE3_CHUNK_LEN;
+		chunks_array_len += 1;
+	}
+
+	ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN /
+	    BLAKE3_BLOCK_LEN, key, chunk_counter, B_TRUE, flags, CHUNK_START,
+	    CHUNK_END, out);
+
+	/*
+	 * Hash the remaining partial chunk, if there is one. Note that the
+	 * empty chunk (meaning the empty message) is a different codepath.
+	 */
+	if (input_len > input_position) {
+		uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
+		blake3_chunk_state_t chunk_state;
+		chunk_state_init(&chunk_state, key, flags);
+		chunk_state.chunk_counter = counter;
+		chunk_state_update(ops, &chunk_state, &input[input_position],
+		    input_len - input_position);
+		output_t output = chunk_state_output(&chunk_state);
+		output_chaining_value(ops, &output, &out[chunks_array_len *
+		    BLAKE3_OUT_LEN]);
+		return (chunks_array_len + 1);
+	} else {
+		return (chunks_array_len);
+	}
+}
+
+/*
+ * Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
+ * on a single thread. Write out the parent chaining values and return the
+ * number of parents hashed. (If there's an odd input chaining value left over,
+ * return it as an additional output.) These parents are never the root and
+ * never empty; those cases use a different codepath.
+ */
+static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
+    const uint8_t *child_chaining_values, size_t num_chaining_values,
+    const uint32_t key[8], uint8_t flags, uint8_t *out)
+{
+	const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
+	size_t parents_array_len = 0;
+
+	while (num_chaining_values - (2 * parents_array_len) >= 2) {
+		parents_array[parents_array_len] = &child_chaining_values[2 *
+		    parents_array_len * BLAKE3_OUT_LEN];
+		parents_array_len += 1;
+	}
+
+	ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE,
+	    flags | PARENT, 0, 0, out);
+
+	/* If there's an odd child left over, it becomes an output. */
+	if (num_chaining_values > 2 * parents_array_len) {
+		memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
+		    &child_chaining_values[2 * parents_array_len *
+		    BLAKE3_OUT_LEN], BLAKE3_OUT_LEN);
+		return (parents_array_len + 1);
+	} else {
+		return (parents_array_len);
+	}
+}
+
+/*
+ * The wide helper function returns (writes out) an array of chaining values
+ * and returns the length of that array. The number of chaining values returned
+ * is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
+ * if the input is shorter than that many chunks. The reason for maintaining a
+ * wide array of chaining values going back up the tree, is to allow the
+ * implementation to hash as many parents in parallel as possible.
+ *
+ * As a special case when the SIMD degree is 1, this function will still return
+ * at least 2 outputs. This guarantees that this function doesn't perform the
+ * root compression. (If it did, it would use the wrong flags, and also we
+ * wouldn't be able to implement exendable ouput.) Note that this function is
+ * not used when the whole input is only 1 chunk long; that's a different
+ * codepath.
+ *
+ * Why not just have the caller split the input on the first update(), instead
+ * of implementing this special rule? Because we don't want to limit SIMD or
+ * multi-threading parallelism for that update().
+ */
+static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
+    const uint8_t *input, size_t input_len, const uint32_t key[8],
+    uint64_t chunk_counter, uint8_t flags, uint8_t *out)
+{
+	/*
+	 * Note that the single chunk case does *not* bump the SIMD degree up
+	 * to 2 when it is 1. If this implementation adds multi-threading in
+	 * the future, this gives us the option of multi-threading even the
+	 * 2-chunk case, which can help performance on smaller platforms.
+	 */
+	if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) {
+		return (compress_chunks_parallel(ops, input, input_len, key,
+		    chunk_counter, flags, out));
+	}
+
+
+	/*
+	 * With more than simd_degree chunks, we need to recurse. Start by
+	 * dividing the input into left and right subtrees. (Note that this is
+	 * only optimal as long as the SIMD degree is a power of 2. If we ever
+	 * get a SIMD degree of 3 or something, we'll need a more complicated
+	 * strategy.)
+	 */
+	size_t left_input_len = left_len(input_len);
+	size_t right_input_len = input_len - left_input_len;
+	const uint8_t *right_input = &input[left_input_len];
+	uint64_t right_chunk_counter = chunk_counter +
+	    (uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
+
+	/*
+	 * Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2
+	 * to account for the special case of returning 2 outputs when the
+	 * SIMD degree is 1.
+	 */
+	uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+	size_t degree = ops->degree;
+	if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
+
+		/*
+		 * The special case: We always use a degree of at least two,
+		 * to make sure there are two outputs. Except, as noted above,
+		 * at the chunk level, where we allow degree=1. (Note that the
+		 * 1-chunk-input case is a different codepath.)
+		 */
+		degree = 2;
+	}
+	uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
+
+	/*
+	 * Recurse! If this implementation adds multi-threading support in the
+	 * future, this is where it will go.
+	 */
+	size_t left_n = blake3_compress_subtree_wide(ops, input, left_input_len,
+	    key, chunk_counter, flags, cv_array);
+	size_t right_n = blake3_compress_subtree_wide(ops, right_input,
+	    right_input_len, key, right_chunk_counter, flags, right_cvs);
+
+	/*
+	 * The special case again. If simd_degree=1, then we'll have left_n=1
+	 * and right_n=1. Rather than compressing them into a single output,
+	 * return them directly, to make sure we always have at least two
+	 * outputs.
+	 */
+	if (left_n == 1) {
+		memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+		return (2);
+	}
+
+	/* Otherwise, do one layer of parent node compression. */
+	size_t num_chaining_values = left_n + right_n;
+	return compress_parents_parallel(ops, cv_array,
+	    num_chaining_values, key, flags, out);
+}
+
+/*
+ * Hash a subtree with compress_subtree_wide(), and then condense the resulting
+ * list of chaining values down to a single parent node. Don't compress that
+ * last parent node, however. Instead, return its message bytes (the
+ * concatenated chaining values of its children). This is necessary when the
+ * first call to update() supplies a complete subtree, because the topmost
+ * parent node of that subtree could end up being the root. It's also necessary
+ * for extended output in the general case.
+ *
+ * As with compress_subtree_wide(), this function is not used on inputs of 1
+ * chunk or less. That's a different codepath.
+ */
+static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
+    const uint8_t *input, size_t input_len, const uint32_t key[8],
+    uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
+{
+	uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
+	size_t num_cvs = blake3_compress_subtree_wide(ops, input, input_len,
+	    key, chunk_counter, flags, cv_array);
+
+	/*
+	 * If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
+	 * compress_subtree_wide() returns more than 2 chaining values. Condense
+	 * them into 2 by forming parent nodes repeatedly.
+	 */
+	uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
+	while (num_cvs > 2) {
+		num_cvs = compress_parents_parallel(ops, cv_array, num_cvs, key,
+		    flags, out_array);
+		memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
+	}
+	memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
+}
+
+static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
+    uint8_t flags)
+{
+	memcpy(ctx->key, key, BLAKE3_KEY_LEN);
+	chunk_state_init(&ctx->chunk, key, flags);
+	ctx->cv_stack_len = 0;
+	ctx->ops = blake3_impl_get_ops();
+}
+
+/*
+ * As described in hasher_push_cv() below, we do "lazy merging", delaying
+ * merges until right before the next CV is about to be added. This is
+ * different from the reference implementation. Another difference is that we
+ * aren't always merging 1 chunk at a time. Instead, each CV might represent
+ * any power-of-two number of chunks, as long as the smaller-above-larger
+ * stack order is maintained. Instead of the "count the trailing 0-bits"
+ * algorithm described in the spec, we use a "count the total number of
+ * 1-bits" variant that doesn't require us to retain the subtree size of the
+ * CV on top of the stack. The principle is the same: each CV that should
+ * remain in the stack is represented by a 1-bit in the total number of chunks
+ * (or bytes) so far.
+ */
+static void hasher_merge_cv_stack(BLAKE3_CTX *ctx, uint64_t total_len)
+{
+	size_t post_merge_stack_len = (size_t)popcnt(total_len);
+	while (ctx->cv_stack_len > post_merge_stack_len) {
+		uint8_t *parent_node =
+		    &ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN];
+		output_t output =
+		    parent_output(parent_node, ctx->key, ctx->chunk.flags);
+		output_chaining_value(ctx->ops, &output, parent_node);
+		ctx->cv_stack_len -= 1;
+	}
+}
+
+/*
+ * In reference_impl.rs, we merge the new CV with existing CVs from the stack
+ * before pushing it. We can do that because we know more input is coming, so
+ * we know none of the merges are root.
+ *
+ * This setting is different. We want to feed as much input as possible to
+ * compress_subtree_wide(), without setting aside anything for the chunk_state.
+ * If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
+ * as a single subtree, if at all possible.
+ *
+ * This leads to two problems:
+ * 1) This 64 KiB input might be the only call that ever gets made to update.
+ *    In this case, the root node of the 64 KiB subtree would be the root node
+ *    of the whole tree, and it would need to be ROOT finalized. We can't
+ *    compress it until we know.
+ * 2) This 64 KiB input might complete a larger tree, whose root node is
+ *    similarly going to be the the root of the whole tree. For example, maybe
+ *    we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
+ *    node at the root of the 256 KiB subtree until we know how to finalize it.
+ *
+ * The second problem is solved with "lazy merging". That is, when we're about
+ * to add a CV to the stack, we don't merge it with anything first, as the
+ * reference impl does. Instead we do merges using the *previous* CV that was
+ * added, which is sitting on top of the stack, and we put the new CV
+ * (unmerged) on top of the stack afterwards. This guarantees that we never
+ * merge the root node until finalize().
+ *
+ * Solving the first problem requires an additional tool,
+ * compress_subtree_to_parent_node(). That function always returns the top
+ * *two* chaining values of the subtree it's compressing. We then do lazy
+ * merging with each of them separately, so that the second CV will always
+ * remain unmerged. (That also helps us support extendable output when we're
+ * hashing an input all-at-once.)
+ */
+static void hasher_push_cv(BLAKE3_CTX *ctx, uint8_t new_cv[BLAKE3_OUT_LEN],
+    uint64_t chunk_counter)
+{
+	hasher_merge_cv_stack(ctx, chunk_counter);
+	memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
+	    BLAKE3_OUT_LEN);
+	ctx->cv_stack_len += 1;
+}
+
+void
+Blake3_Init(BLAKE3_CTX *ctx)
+{
+	hasher_init_base(ctx, BLAKE3_IV, 0);
+}
+
+void
+Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN])
+{
+	uint32_t key_words[8];
+	load_key_words(key, key_words);
+	hasher_init_base(ctx, key_words, KEYED_HASH);
+}
+
+static void
+Blake3_Update2(BLAKE3_CTX *ctx, const void *input, size_t input_len)
+{
+	/*
+	 * Explicitly checking for zero avoids causing UB by passing a null
+	 * pointer to memcpy. This comes up in practice with things like:
+	 *   std::vector<uint8_t> v;
+	 *   blake3_hasher_update(&hasher, v.data(), v.size());
+	 */
+	if (input_len == 0) {
+		return;
+	}
+
+	const uint8_t *input_bytes = (const uint8_t *)input;
+
+	/*
+	 * If we have some partial chunk bytes in the internal chunk_state, we
+	 * need to finish that chunk first.
+	 */
+	if (chunk_state_len(&ctx->chunk) > 0) {
+		size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk);
+		if (take > input_len) {
+			take = input_len;
+		}
+		chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take);
+		input_bytes += take;
+		input_len -= take;
+		/*
+		 * If we've filled the current chunk and there's more coming,
+		 * finalize this chunk and proceed. In this case we know it's
+		 * not the root.
+		 */
+		if (input_len > 0) {
+			output_t output = chunk_state_output(&ctx->chunk);
+			uint8_t chunk_cv[32];
+			output_chaining_value(ctx->ops, &output, chunk_cv);
+			hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter);
+			chunk_state_reset(&ctx->chunk, ctx->key,
+			    ctx->chunk.chunk_counter + 1);
+		} else {
+			return;
+		}
+	}
+
+	/*
+	 * Now the chunk_state is clear, and we have more input. If there's
+	 * more than a single chunk (so, definitely not the root chunk), hash
+	 * the largest whole subtree we can, with the full benefits of SIMD
+	 * (and maybe in the future, multi-threading) parallelism. Two
+	 * restrictions:
+	 * - The subtree has to be a power-of-2 number of chunks. Only
+	 *   subtrees along the right edge can be incomplete, and we don't know
+	 *   where the right edge is going to be until we get to finalize().
+	 * - The subtree must evenly divide the total number of chunks up
+	 *   until this point (if total is not 0). If the current incomplete
+	 *   subtree is only waiting for 1 more chunk, we can't hash a subtree
+	 *   of 4 chunks. We have to complete the current subtree first.
+	 * Because we might need to break up the input to form powers of 2, or
+	 * to evenly divide what we already have, this part runs in a loop.
+	 */
+	while (input_len > BLAKE3_CHUNK_LEN) {
+		size_t subtree_len = round_down_to_power_of_2(input_len);
+		uint64_t count_so_far =
+		    ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
+		/*
+		 * Shrink the subtree_len until it evenly divides the count so
+		 * far. We know that subtree_len itself is a power of 2, so we
+		 * can use a bitmasking trick instead of an actual remainder
+		 * operation. (Note that if the caller consistently passes
+		 * power-of-2 inputs of the same size, as is hopefully
+		 * typical, this loop condition will always fail, and
+		 * subtree_len will always be the full length of the input.)
+		 *
+		 * An aside: We don't have to shrink subtree_len quite this
+		 * much. For example, if count_so_far is 1, we could pass 2
+		 * chunks to compress_subtree_to_parent_node. Since we'll get
+		 * 2 CVs back, we'll still get the right answer in the end,
+		 * and we might get to use 2-way SIMD parallelism. The problem
+		 * with this optimization, is that it gets us stuck always
+		 * hashing 2 chunks. The total number of chunks will remain
+		 * odd, and we'll never graduate to higher degrees of
+		 * parallelism. See
+		 * https://github.com/BLAKE3-team/BLAKE3/issues/69.
+		 */
+		while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
+			subtree_len /= 2;
+		}
+		/*
+		 * The shrunken subtree_len might now be 1 chunk long. If so,
+		 * hash that one chunk by itself. Otherwise, compress the
+		 * subtree into a pair of CVs.
+		 */
+		uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
+		if (subtree_len <= BLAKE3_CHUNK_LEN) {
+			blake3_chunk_state_t chunk_state;
+			chunk_state_init(&chunk_state, ctx->key,
+			    ctx->chunk.flags);
+			chunk_state.chunk_counter = ctx->chunk.chunk_counter;
+			chunk_state_update(ctx->ops, &chunk_state, input_bytes,
+			    subtree_len);
+			output_t output = chunk_state_output(&chunk_state);
+			uint8_t cv[BLAKE3_OUT_LEN];
+			output_chaining_value(ctx->ops, &output, cv);
+			hasher_push_cv(ctx, cv, chunk_state.chunk_counter);
+		} else {
+			/*
+			 * This is the high-performance happy path, though
+			 * getting here depends on the caller giving us a long
+			 * enough input.
+			 */
+			uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
+			compress_subtree_to_parent_node(ctx->ops, input_bytes,
+			    subtree_len, ctx->key, ctx-> chunk.chunk_counter,
+			    ctx->chunk.flags, cv_pair);
+			hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter);
+			hasher_push_cv(ctx, &cv_pair[BLAKE3_OUT_LEN],
+			    ctx->chunk.chunk_counter + (subtree_chunks / 2));
+		}
+		ctx->chunk.chunk_counter += subtree_chunks;
+		input_bytes += subtree_len;
+		input_len -= subtree_len;
+	}
+
+	/*
+	 * If there's any remaining input less than a full chunk, add it to
+	 * the chunk state. In that case, also do a final merge loop to make
+	 * sure the subtree stack doesn't contain any unmerged pairs. The
+	 * remaining input means we know these merges are non-root. This merge
+	 * loop isn't strictly necessary here, because hasher_push_chunk_cv
+	 * already does its own merge loop, but it simplifies
+	 * blake3_hasher_finalize below.
+	 */
+	if (input_len > 0) {
+		chunk_state_update(ctx->ops, &ctx->chunk, input_bytes,
+		    input_len);
+		hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter);
+	}
+}
+
+void
+Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t todo)
+{
+	size_t done = 0;
+	const uint8_t *data = input;
+	const size_t block_max = 1024 * 64;
+
+	/* max feed buffer to leave the stack size small */
+	while (todo != 0) {
+		size_t block = (todo >= block_max) ? block_max : todo;
+		Blake3_Update2(ctx, data + done, block);
+		done += block;
+		todo -= block;
+	}
+}
+
+void
+Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out)
+{
+	Blake3_FinalSeek(ctx, 0, out, BLAKE3_OUT_LEN);
+}
+
+void
+Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
+    size_t out_len)
+{
+	/*
+	 * Explicitly checking for zero avoids causing UB by passing a null
+	 * pointer to memcpy. This comes up in practice with things like:
+	 *   std::vector<uint8_t> v;
+	 *   blake3_hasher_finalize(&hasher, v.data(), v.size());
+	 */
+	if (out_len == 0) {
+		return;
+	}
+	/* If the subtree stack is empty, then the current chunk is the root. */
+	if (ctx->cv_stack_len == 0) {
+		output_t output = chunk_state_output(&ctx->chunk);
+		output_root_bytes(ctx->ops, &output, seek, out, out_len);
+		return;
+	}
+	/*
+	 * If there are any bytes in the chunk state, finalize that chunk and
+	 * do a roll-up merge between that chunk hash and every subtree in the
+	 * stack. In this case, the extra merge loop at the end of
+	 * blake3_hasher_update guarantees that none of the subtrees in the
+	 * stack need to be merged with each other first. Otherwise, if there
+	 * are no bytes in the chunk state, then the top of the stack is a
+	 * chunk hash, and we start the merge from that.
+	 */
+	output_t output;
+	size_t cvs_remaining;
+	if (chunk_state_len(&ctx->chunk) > 0) {
+		cvs_remaining = ctx->cv_stack_len;
+		output = chunk_state_output(&ctx->chunk);
+	} else {
+		/* There are always at least 2 CVs in the stack in this case. */
+		cvs_remaining = ctx->cv_stack_len - 2;
+		output = parent_output(&ctx->cv_stack[cvs_remaining * 32],
+		    ctx->key, ctx->chunk.flags);
+	}
+	while (cvs_remaining > 0) {
+		cvs_remaining -= 1;
+		uint8_t parent_block[BLAKE3_BLOCK_LEN];
+		memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32);
+		output_chaining_value(ctx->ops, &output, &parent_block[32]);
+		output = parent_output(parent_block, ctx->key,
+		    ctx->chunk.flags);
+	}
+	output_root_bytes(ctx->ops, &output, seek, out, out_len);
+}
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
@ -0,0 +1,202 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include "blake3_impl.h"
+
+#define	rotr32(x, n)	(((x) >> (n)) | ((x) << (32 - (n))))
+static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
+    uint32_t x, uint32_t y)
+{
+	state[a] = state[a] + state[b] + x;
+	state[d] = rotr32(state[d] ^ state[a], 16);
+	state[c] = state[c] + state[d];
+	state[b] = rotr32(state[b] ^ state[c], 12);
+	state[a] = state[a] + state[b] + y;
+	state[d] = rotr32(state[d] ^ state[a], 8);
+	state[c] = state[c] + state[d];
+	state[b] = rotr32(state[b] ^ state[c], 7);
+}
+
+static inline void round_fn(uint32_t state[16], const uint32_t *msg,
+    size_t round)
+{
+	/* Select the message schedule based on the round. */
+	const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];
+
+	/* Mix the columns. */
+	g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
+	g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
+	g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
+	g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
+
+	/* Mix the rows. */
+	g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
+	g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
+	g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
+	g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
+}
+
+static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags)
+{
+	uint32_t block_words[16];
+	block_words[0] = load32(block + 4 * 0);
+	block_words[1] = load32(block + 4 * 1);
+	block_words[2] = load32(block + 4 * 2);
+	block_words[3] = load32(block + 4 * 3);
+	block_words[4] = load32(block + 4 * 4);
+	block_words[5] = load32(block + 4 * 5);
+	block_words[6] = load32(block + 4 * 6);
+	block_words[7] = load32(block + 4 * 7);
+	block_words[8] = load32(block + 4 * 8);
+	block_words[9] = load32(block + 4 * 9);
+	block_words[10] = load32(block + 4 * 10);
+	block_words[11] = load32(block + 4 * 11);
+	block_words[12] = load32(block + 4 * 12);
+	block_words[13] = load32(block + 4 * 13);
+	block_words[14] = load32(block + 4 * 14);
+	block_words[15] = load32(block + 4 * 15);
+
+	state[0] = cv[0];
+	state[1] = cv[1];
+	state[2] = cv[2];
+	state[3] = cv[3];
+	state[4] = cv[4];
+	state[5] = cv[5];
+	state[6] = cv[6];
+	state[7] = cv[7];
+	state[8] = BLAKE3_IV[0];
+	state[9] = BLAKE3_IV[1];
+	state[10] = BLAKE3_IV[2];
+	state[11] = BLAKE3_IV[3];
+	state[12] = counter_low(counter);
+	state[13] = counter_high(counter);
+	state[14] = (uint32_t)block_len;
+	state[15] = (uint32_t)flags;
+
+	round_fn(state, &block_words[0], 0);
+	round_fn(state, &block_words[0], 1);
+	round_fn(state, &block_words[0], 2);
+	round_fn(state, &block_words[0], 3);
+	round_fn(state, &block_words[0], 4);
+	round_fn(state, &block_words[0], 5);
+	round_fn(state, &block_words[0], 6);
+}
+
+static inline void blake3_compress_in_place_generic(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags)
+{
+	uint32_t state[16];
+	compress_pre(state, cv, block, block_len, counter, flags);
+	cv[0] = state[0] ^ state[8];
+	cv[1] = state[1] ^ state[9];
+	cv[2] = state[2] ^ state[10];
+	cv[3] = state[3] ^ state[11];
+	cv[4] = state[4] ^ state[12];
+	cv[5] = state[5] ^ state[13];
+	cv[6] = state[6] ^ state[14];
+	cv[7] = state[7] ^ state[15];
+}
+
+static inline void hash_one_generic(const uint8_t *input, size_t blocks,
+    const uint32_t key[8], uint64_t counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
+{
+	uint32_t cv[8];
+	memcpy(cv, key, BLAKE3_KEY_LEN);
+	uint8_t block_flags = flags | flags_start;
+	while (blocks > 0) {
+		if (blocks == 1) {
+			block_flags |= flags_end;
+		}
+		blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,
+		    counter, block_flags);
+		input = &input[BLAKE3_BLOCK_LEN];
+		blocks -= 1;
+		block_flags = flags;
+	}
+	store_cv_words(out, cv);
+}
+
+static inline void blake3_compress_xof_generic(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64])
+{
+	uint32_t state[16];
+	compress_pre(state, cv, block, block_len, counter, flags);
+
+	store32(&out[0 * 4], state[0] ^ state[8]);
+	store32(&out[1 * 4], state[1] ^ state[9]);
+	store32(&out[2 * 4], state[2] ^ state[10]);
+	store32(&out[3 * 4], state[3] ^ state[11]);
+	store32(&out[4 * 4], state[4] ^ state[12]);
+	store32(&out[5 * 4], state[5] ^ state[13]);
+	store32(&out[6 * 4], state[6] ^ state[14]);
+	store32(&out[7 * 4], state[7] ^ state[15]);
+	store32(&out[8 * 4], state[8] ^ cv[0]);
+	store32(&out[9 * 4], state[9] ^ cv[1]);
+	store32(&out[10 * 4], state[10] ^ cv[2]);
+	store32(&out[11 * 4], state[11] ^ cv[3]);
+	store32(&out[12 * 4], state[12] ^ cv[4]);
+	store32(&out[13 * 4], state[13] ^ cv[5]);
+	store32(&out[14 * 4], state[14] ^ cv[6]);
+	store32(&out[15 * 4], state[15] ^ cv[7]);
+}
+
+static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
+    boolean_t increment_counter, uint8_t flags, uint8_t flags_start,
+    uint8_t flags_end, uint8_t *out)
+{
+	while (num_inputs > 0) {
+		hash_one_generic(inputs[0], blocks, key, counter, flags,
+		    flags_start, flags_end, out);
+		if (increment_counter) {
+			counter += 1;
+		}
+		inputs += 1;
+		num_inputs -= 1;
+		out = &out[BLAKE3_OUT_LEN];
+	}
+}
+
+static inline boolean_t blake3_is_generic_supported(void)
+{
+	return (B_TRUE);
+}
+
+const blake3_impl_ops_t blake3_generic_impl = {
+	.compress_in_place = blake3_compress_in_place_generic,
+	.compress_xof = blake3_compress_xof_generic,
+	.hash_many = blake3_hash_many_generic,
+	.is_supported = blake3_is_generic_supported,
+	.degree = 4,
+	.name = "generic"
+};
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
@ -0,0 +1,284 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zio_checksum.h>
+
+#include "blake3_impl.h"
+
+static const blake3_impl_ops_t *const blake3_impls[] = {
+	&blake3_generic_impl,
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE2)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+	&blake3_sse2_impl,
+#endif
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+	&blake3_sse41_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+	&blake3_avx2_impl,
+#endif
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+	&blake3_avx512_impl,
+#endif
+};
+
+/* this pointer holds current ops for implementation */
+static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
+
+/* special implementation selections */
+#define	IMPL_FASTEST	(UINT32_MAX)
+#define	IMPL_CYCLE	(UINT32_MAX-1)
+#define	IMPL_USER	(UINT32_MAX-2)
+#define	IMPL_PARAM	(UINT32_MAX-3)
+
+#define	IMPL_READ(i) (*(volatile uint32_t *) &(i))
+static uint32_t icp_blake3_impl = IMPL_FASTEST;
+
+#define	BLAKE3_IMPL_NAME_MAX	16
+
+/* id of fastest implementation */
+static uint32_t blake3_fastest_id = 0;
+
+/* currently used id */
+static uint32_t blake3_current_id = 0;
+
+/* id of module parameter (-1 == unused) */
+static int blake3_param_id = -1;
+
+/* return number of supported implementations */
+int
+blake3_get_impl_count(void)
+{
+	static int impls = 0;
+	int i;
+
+	if (impls)
+		return (impls);
+
+	for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+		if (!blake3_impls[i]->is_supported()) continue;
+		impls++;
+	}
+
+	return (impls);
+}
+
+/* return id of selected implementation */
+int
+blake3_get_impl_id(void)
+{
+	return (blake3_current_id);
+}
+
+/* return name of selected implementation */
+const char *
+blake3_get_impl_name(void)
+{
+	return (blake3_selected_impl->name);
+}
+
+/* setup id as fastest implementation */
+void
+blake3_set_impl_fastest(uint32_t id)
+{
+	blake3_fastest_id = id;
+}
+
+/* set implementation by id */
+void
+blake3_set_impl_id(uint32_t id)
+{
+	int i, cid;
+
+	/* select fastest */
+	if (id == IMPL_FASTEST)
+		id = blake3_fastest_id;
+
+	/* select next or first */
+	if (id == IMPL_CYCLE)
+		id = (++blake3_current_id) % blake3_get_impl_count();
+
+	/* 0..N for the real impl */
+	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+		if (!blake3_impls[i]->is_supported()) continue;
+		if (cid == id) {
+			blake3_current_id = cid;
+			blake3_selected_impl = blake3_impls[i];
+			return;
+		}
+		cid++;
+	}
+}
+
+/* set implementation by name */
+int
+blake3_set_impl_name(const char *name)
+{
+	int i, cid;
+
+	if (strcmp(name, "fastest") == 0) {
+		atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
+		blake3_set_impl_id(IMPL_FASTEST);
+		return (0);
+	} else if (strcmp(name, "cycle") == 0) {
+		atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
+		blake3_set_impl_id(IMPL_CYCLE);
+		return (0);
+	}
+
+	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+		if (!blake3_impls[i]->is_supported()) continue;
+		if (strcmp(name, blake3_impls[i]->name) == 0) {
+			if (icp_blake3_impl == IMPL_PARAM) {
+				blake3_param_id = cid;
+				return (0);
+			}
+			blake3_selected_impl = blake3_impls[i];
+			blake3_current_id = cid;
+			return (0);
+		}
+		cid++;
+	}
+
+	return (-EINVAL);
+}
+
+/* setup implementation */
+void
+blake3_setup_impl(void)
+{
+	switch (IMPL_READ(icp_blake3_impl)) {
+	case IMPL_PARAM:
+		blake3_set_impl_id(blake3_param_id);
+		atomic_swap_32(&icp_blake3_impl, IMPL_USER);
+		break;
+	case IMPL_FASTEST:
+		blake3_set_impl_id(IMPL_FASTEST);
+		break;
+	case IMPL_CYCLE:
+		blake3_set_impl_id(IMPL_CYCLE);
+		break;
+	default:
+		blake3_set_impl_id(blake3_current_id);
+		break;
+	}
+}
+
+/* return selected implementation */
+const blake3_impl_ops_t *
+blake3_impl_get_ops(void)
+{
+	/* each call to ops will cycle */
+	if (icp_blake3_impl == IMPL_CYCLE)
+		blake3_set_impl_id(IMPL_CYCLE);
+
+	return (blake3_selected_impl);
+}
+
+#if defined(_KERNEL)
+void **blake3_per_cpu_ctx;
+
+void
+blake3_per_cpu_ctx_init(void)
+{
+	/*
+	 * Create "The Godfather" ptr to hold all blake3 ctx
+	 */
+	blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
+	for (int i = 0; i < max_ncpus; i++) {
+		blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
+		    KM_SLEEP);
+	}
+}
+
+void
+blake3_per_cpu_ctx_fini(void)
+{
+	for (int i = 0; i < max_ncpus; i++) {
+		memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
+		kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
+	}
+	memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
+	kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
+}
+#endif
+
+#if defined(_KERNEL) && defined(__linux__)
+static int
+icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
+{
+	char req_name[BLAKE3_IMPL_NAME_MAX];
+	size_t i;
+
+	/* sanitize input */
+	i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
+	if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
+		return (-EINVAL);
+
+	strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
+	while (i > 0 && isspace(req_name[i-1]))
+		i--;
+	req_name[i] = '\0';
+
+	atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
+	return (blake3_set_impl_name(req_name));
+}
+
+static int
+icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
+{
+	int i, cid, cnt = 0;
+	char *fmt;
+
+	/* cycling */
+	fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
+	cnt += sprintf(buffer + cnt, fmt);
+
+	/* fastest one */
+	fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
+	cnt += sprintf(buffer + cnt, fmt);
+
+	/* user selected */
+	for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
+		if (!blake3_impls[i]->is_supported()) continue;
+		fmt = (icp_blake3_impl == IMPL_USER &&
+		    cid == blake3_current_id) ? "[%s] " : "%s ";
+		cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
+		cid++;
+	}
+
+	buffer[cnt] = 0;
+
+	return (cnt);
+}
+
+module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
+    NULL, 0644);
+MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
+#endif
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
@ -0,0 +1,213 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
+ * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#ifndef BLAKE3_IMPL_H
+#define	BLAKE3_IMPL_H
+
+#ifdef	__cplusplus
+extern "C" {
+#endif
+
+#include <sys/types.h>
+#include <sys/blake3.h>
+#include <sys/simd.h>
+
+/*
+ * Methods used to define BLAKE3 assembler implementations
+ */
+typedef void (*blake3_compress_in_place_f)(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN],
+    uint8_t block_len, uint64_t counter,
+    uint8_t flags);
+
+typedef void (*blake3_compress_xof_f)(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+typedef boolean_t (*blake3_is_supported_f)(void);
+
+typedef struct blake3_impl_ops {
+	blake3_compress_in_place_f compress_in_place;
+	blake3_compress_xof_f compress_xof;
+	blake3_hash_many_f hash_many;
+	blake3_is_supported_f is_supported;
+	int degree;
+	const char *name;
+} blake3_impl_ops_t;
+
+/* Return selected BLAKE3 implementation ops */
+extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
+
+extern const blake3_impl_ops_t blake3_generic_impl;
+
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE2)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+extern const blake3_impl_ops_t blake3_sse2_impl;
+#endif
+
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+extern const blake3_impl_ops_t blake3_sse41_impl;
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern const blake3_impl_ops_t blake3_avx2_impl;
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern const blake3_impl_ops_t blake3_avx512_impl;
+#endif
+
+#if defined(__x86_64)
+#define	MAX_SIMD_DEGREE 16
+#else
+#define	MAX_SIMD_DEGREE 4
+#endif
+
+#define	MAX_SIMD_DEGREE_OR_2	(MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
+
+static const uint32_t BLAKE3_IV[8] = {
+	0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
+	0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL};
+
+static const uint8_t BLAKE3_MSG_SCHEDULE[7][16] = {
+	{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
+	{2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
+	{3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
+	{10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
+	{12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
+	{9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
+	{11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
+};
+
+/* Find index of the highest set bit */
+static inline unsigned int highest_one(uint64_t x) {
+#if defined(__GNUC__) || defined(__clang__)
+	return (63 ^ __builtin_clzll(x));
+#elif defined(_MSC_VER) && defined(IS_X86_64)
+	unsigned long index;
+	_BitScanReverse64(&index, x);
+	return (index);
+#elif defined(_MSC_VER) && defined(IS_X86_32)
+	if (x >> 32) {
+		unsigned long index;
+		_BitScanReverse(&index, x >> 32);
+		return (32 + index);
+	} else {
+		unsigned long index;
+		_BitScanReverse(&index, x);
+		return (index);
+	}
+#else
+	unsigned int c = 0;
+	if (x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
+	if (x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
+	if (x & 0x000000000000ff00ULL) { x >>=  8; c +=  8; }
+	if (x & 0x00000000000000f0ULL) { x >>=  4; c +=  4; }
+	if (x & 0x000000000000000cULL) { x >>=  2; c +=  2; }
+	if (x & 0x0000000000000002ULL) { c +=  1; }
+	return (c);
+#endif
+}
+
+/* Count the number of 1 bits. */
+static inline unsigned int popcnt(uint64_t x) {
+	unsigned int count = 0;
+
+	while (x != 0) {
+		count += 1;
+		x &= x - 1;
+	}
+
+	return (count);
+}
+
+/*
+ * Largest power of two less than or equal to x.
+ * As a special case, returns 1 when x is 0.
+ */
+static inline uint64_t round_down_to_power_of_2(uint64_t x) {
+	return (1ULL << highest_one(x | 1));
+}
+
+static inline uint32_t counter_low(uint64_t counter) {
+	return ((uint32_t)counter);
+}
+
+static inline uint32_t counter_high(uint64_t counter) {
+	return ((uint32_t)(counter >> 32));
+}
+
+static inline uint32_t load32(const void *src) {
+	const uint8_t *p = (const uint8_t *)src;
+	return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
+	    ((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
+}
+
+static inline void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
+    uint32_t key_words[8]) {
+	key_words[0] = load32(&key[0 * 4]);
+	key_words[1] = load32(&key[1 * 4]);
+	key_words[2] = load32(&key[2 * 4]);
+	key_words[3] = load32(&key[3 * 4]);
+	key_words[4] = load32(&key[4 * 4]);
+	key_words[5] = load32(&key[5 * 4]);
+	key_words[6] = load32(&key[6 * 4]);
+	key_words[7] = load32(&key[7 * 4]);
+}
+
+static inline void store32(void *dst, uint32_t w) {
+	uint8_t *p = (uint8_t *)dst;
+	p[0] = (uint8_t)(w >> 0);
+	p[1] = (uint8_t)(w >> 8);
+	p[2] = (uint8_t)(w >> 16);
+	p[3] = (uint8_t)(w >> 24);
+}
+
+static inline void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
+	store32(&bytes_out[0 * 4], cv_words[0]);
+	store32(&bytes_out[1 * 4], cv_words[1]);
+	store32(&bytes_out[2 * 4], cv_words[2]);
+	store32(&bytes_out[3 * 4], cv_words[3]);
+	store32(&bytes_out[4 * 4], cv_words[4]);
+	store32(&bytes_out[5 * 4], cv_words[5]);
+	store32(&bytes_out[6 * 4], cv_words[6]);
+	store32(&bytes_out[7 * 4], cv_words[7]);
+}
+
+#ifdef	__cplusplus
+}
+#endif
+
+#endif				/* BLAKE3_IMPL_H */
--- a/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
+++ b/sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
@ -0,0 +1,248 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include "blake3_impl.h"
+
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE2)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse2(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+	kfpu_begin();
+	zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
+	    flags);
+	kfpu_end();
+}
+
+static void blake3_compress_xof_sse2(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+	kfpu_begin();
+	zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
+	    out);
+	kfpu_end();
+}
+
+static void blake3_hash_many_sse2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+	kfpu_begin();
+	zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
+	    increment_counter, flags, flags_start, flags_end, out);
+	kfpu_end();
+}
+
+static boolean_t blake3_is_sse2_supported(void)
+{
+#if defined(__x86_64)
+	return (kfpu_allowed() && zfs_sse2_available());
+#elif defined(__PPC64__)
+	return (kfpu_allowed() && zfs_vsx_available());
+#else
+	return (kfpu_allowed());
+#endif
+}
+
+const blake3_impl_ops_t blake3_sse2_impl = {
+	.compress_in_place = blake3_compress_in_place_sse2,
+	.compress_xof = blake3_compress_xof_sse2,
+	.hash_many = blake3_hash_many_sse2,
+	.is_supported = blake3_is_sse2_supported,
+	.degree = 4,
+	.name = "sse2"
+};
+#endif
+
+#if defined(__aarch64__) || \
+	(defined(__x86_64) && defined(HAVE_SSE2)) || \
+	(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
+
+extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_sse41(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+	kfpu_begin();
+	zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
+	    flags);
+	kfpu_end();
+}
+
+static void blake3_compress_xof_sse41(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+	kfpu_begin();
+	zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
+	    out);
+	kfpu_end();
+}
+
+static void blake3_hash_many_sse41(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+	kfpu_begin();
+	zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
+	    increment_counter, flags, flags_start, flags_end, out);
+	kfpu_end();
+}
+
+static boolean_t blake3_is_sse41_supported(void)
+{
+#if defined(__x86_64)
+	return (kfpu_allowed() && zfs_sse4_1_available());
+#elif defined(__PPC64__)
+	return (kfpu_allowed() && zfs_vsx_available());
+#else
+	return (kfpu_allowed());
+#endif
+}
+
+const blake3_impl_ops_t blake3_sse41_impl = {
+	.compress_in_place = blake3_compress_in_place_sse41,
+	.compress_xof = blake3_compress_xof_sse41,
+	.hash_many = blake3_hash_many_sse41,
+	.is_supported = blake3_is_sse41_supported,
+	.degree = 4,
+	.name = "sse41"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
+extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_hash_many_avx2(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+	kfpu_begin();
+	zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
+	    increment_counter, flags, flags_start, flags_end, out);
+	kfpu_end();
+}
+
+static boolean_t blake3_is_avx2_supported(void)
+{
+	return (kfpu_allowed() && zfs_sse4_1_available() &&
+	    zfs_avx2_available());
+}
+
+const blake3_impl_ops_t blake3_avx2_impl = {
+	.compress_in_place = blake3_compress_in_place_sse41,
+	.compress_xof = blake3_compress_xof_sse41,
+	.hash_many = blake3_hash_many_avx2,
+	.is_supported = blake3_is_avx2_supported,
+	.degree = 8,
+	.name = "avx2"
+};
+#endif
+
+#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
+extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags);
+
+extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]);
+
+extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out);
+
+static void blake3_compress_in_place_avx512(uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags) {
+	kfpu_begin();
+	zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
+	    flags);
+	kfpu_end();
+}
+
+static void blake3_compress_xof_avx512(const uint32_t cv[8],
+    const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
+    uint64_t counter, uint8_t flags, uint8_t out[64]) {
+	kfpu_begin();
+	zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
+	    out);
+	kfpu_end();
+}
+
+static void blake3_hash_many_avx512(const uint8_t * const *inputs,
+    size_t num_inputs, size_t blocks, const uint32_t key[8],
+    uint64_t counter, boolean_t increment_counter, uint8_t flags,
+    uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
+	kfpu_begin();
+	zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
+	    increment_counter, flags, flags_start, flags_end, out);
+	kfpu_end();
+}
+
+static boolean_t blake3_is_avx512_supported(void)
+{
+	return (kfpu_allowed() && zfs_avx512f_available() &&
+	    zfs_avx512vl_available());
+}
+
+const blake3_impl_ops_t blake3_avx512_impl = {
+	.compress_in_place = blake3_compress_in_place_avx512,
+	.compress_xof = blake3_compress_xof_avx512,
+	.hash_many = blake3_hash_many_avx512,
+	.is_supported = blake3_is_avx512_supported,
+	.degree = 16,
+	.name = "avx512"
+};
+#endif
--- a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
--- a/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
+++ b/sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
--- a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
--- a/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
+++ b/sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
--- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
+++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
--- a/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/crypto_os.c
@ -149,6 +149,13 @@ freebsd_zfs_crypt_done(struct cryptop *crp)
 	return (0);
 }

+static int
+freebsd_zfs_crypt_done_sync(struct cryptop *crp)
+{
+
+	return (0);
+}
+
 void
 freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
 {
@ -158,26 +165,36 @@ freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
 }

 static int
-zfs_crypto_dispatch(freebsd_crypt_session_t *session, 	struct cryptop *crp)
+zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
 {
 	int error;

 	crp->crp_opaque = session;
-	crp->crp_callback = freebsd_zfs_crypt_done;
 	for (;;) {
+#if __FreeBSD_version < 1400004
+		boolean_t async = ((crypto_ses2caps(crp->crp_session) &
+		    CRYPTOCAP_F_SYNC) == 0);
+#else
+		boolean_t async = !CRYPTO_SESS_SYNC(crp->crp_session);
+#endif
+		crp->crp_callback = async ? freebsd_zfs_crypt_done :
+		    freebsd_zfs_crypt_done_sync;
 		error = crypto_dispatch(crp);
-		if (error)
-			break;
-		mtx_lock(&session->fs_lock);
-		while (session->fs_done == false)
-			msleep(crp, &session->fs_lock, 0,
-			    "zfs_crypto", 0);
-		mtx_unlock(&session->fs_lock);
-
-		if (crp->crp_etype == ENOMEM) {
-			pause("zcrnomem", 1);
-		} else if (crp->crp_etype != EAGAIN) {
+		if (error == 0) {
+			if (async) {
+				mtx_lock(&session->fs_lock);
+				while (session->fs_done == false) {
+					msleep(crp, &session->fs_lock, 0,
+					    "zfs_crypto", 0);
+				}
+				mtx_unlock(&session->fs_lock);
+			}
 			error = crp->crp_etype;
+		}
+
+		if (error == ENOMEM) {
+			pause("zcrnomem", 1);
+		} else if (error != EAGAIN) {
 			break;
 		}
 		crp->crp_etype = 0;
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-generic.c
@ -780,8 +780,13 @@ spl_init(void)
 	if ((rc = spl_zlib_init()))
 		goto out7;

+	if ((rc = spl_zone_init()))
+		goto out8;
+
 	return (rc);

+out8:
+	spl_zlib_fini();
 out7:
 	spl_kstat_fini();
 out6:
@ -801,6 +806,7 @@ spl_init(void)
 static void __exit
 spl_fini(void)
 {
+	spl_zone_fini();
 	spl_zlib_fini();
 	spl_kstat_fini();
 	spl_proc_fini();
--- a/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
+++ b/sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
@ -0,0 +1,424 @@
+/*
+ * Copyright (c) 2021 Klara Systems, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/types.h>
+#include <sys/mutex.h>
+#include <sys/sysmacros.h>
+#include <sys/kmem.h>
+#include <linux/file.h>
+#include <linux/magic.h>
+#include <sys/zone.h>
+
+#if defined(CONFIG_USER_NS)
+#include <linux/statfs.h>
+#include <linux/proc_ns.h>
+#endif
+
+static kmutex_t zone_datasets_lock;
+static struct list_head zone_datasets;
+
+typedef struct zone_datasets {
+	struct list_head zds_list;	/* zone_datasets linkage */
+	struct user_namespace *zds_userns; /* namespace reference */
+	struct list_head zds_datasets;	/* datasets for the namespace */
+} zone_datasets_t;
+
+typedef struct zone_dataset {
+	struct list_head zd_list;	/* zone_dataset linkage */
+	size_t zd_dsnamelen;		/* length of name */
+	char zd_dsname[0];		/* name of the member dataset */
+} zone_dataset_t;
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+/*
+ * Returns:
+ * - 0 on success
+ * - EBADF if it cannot open the provided file descriptor
+ * - ENOTTY if the file itself is a not a user namespace file. We want to
+ *   intercept this error in the ZFS layer. We cannot just return one of the
+ *   ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
+ *   and the SPL layers.
+ */
+static int
+user_ns_get(int fd, struct user_namespace **userns)
+{
+	struct kstatfs st;
+	struct file *nsfile;
+	struct ns_common *ns;
+	int error;
+
+	if ((nsfile = fget(fd)) == NULL)
+		return (EBADF);
+	if (vfs_statfs(&nsfile->f_path, &st) != 0) {
+		error = ENOTTY;
+		goto done;
+	}
+	if (st.f_type != NSFS_MAGIC) {
+		error = ENOTTY;
+		goto done;
+	}
+	ns = get_proc_ns(file_inode(nsfile));
+	if (ns->ops->type != CLONE_NEWUSER) {
+		error = ENOTTY;
+		goto done;
+	}
+	*userns = container_of(ns, struct user_namespace, ns);
+
+	error = 0;
+done:
+	fput(nsfile);
+
+	return (error);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static unsigned int
+user_ns_zoneid(struct user_namespace *user_ns)
+{
+	unsigned int r;
+
+#if defined(HAVE_USER_NS_COMMON_INUM)
+	r = user_ns->ns.inum;
+#else
+	r = user_ns->proc_inum;
+#endif
+
+	return (r);
+}
+
+static struct zone_datasets *
+zone_datasets_lookup(unsigned int nsinum)
+{
+	zone_datasets_t *zds;
+
+	list_for_each_entry(zds, &zone_datasets, zds_list) {
+		if (user_ns_zoneid(zds->zds_userns) == nsinum)
+			return (zds);
+	}
+	return (NULL);
+}
+
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+static struct zone_dataset *
+zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
+{
+	zone_dataset_t *zd;
+
+	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+		if (zd->zd_dsnamelen != dsnamelen)
+			continue;
+		if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
+			return (zd);
+	}
+
+	return (NULL);
+}
+
+static int
+zone_dataset_cred_check(cred_t *cred)
+{
+
+	if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
+		return (EPERM);
+
+	return (0);
+}
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+
+static int
+zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
+{
+
+	if (dataset[0] == '\0' || dataset[0] == '/')
+		return (ENOENT);
+
+	*dsnamelen = strlen(dataset);
+	/* Ignore trailing slash, if supplied. */
+	if (dataset[*dsnamelen - 1] == '/')
+		(*dsnamelen)--;
+
+	return (0);
+}
+
+int
+zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+	struct user_namespace *userns;
+	zone_datasets_t *zds;
+	zone_dataset_t *zd;
+	int error;
+	size_t dsnamelen;
+
+	if ((error = zone_dataset_cred_check(cred)) != 0)
+		return (error);
+	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+		return (error);
+	if ((error = user_ns_get(userns_fd, &userns)) != 0)
+		return (error);
+
+	mutex_enter(&zone_datasets_lock);
+	zds = zone_datasets_lookup(user_ns_zoneid(userns));
+	if (zds == NULL) {
+		zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
+		INIT_LIST_HEAD(&zds->zds_list);
+		INIT_LIST_HEAD(&zds->zds_datasets);
+		zds->zds_userns = userns;
+		/*
+		 * Lock the namespace by incresing its refcount to prevent
+		 * the namespace ID from being reused.
+		 */
+		get_user_ns(userns);
+		list_add_tail(&zds->zds_list, &zone_datasets);
+	} else {
+		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+		if (zd != NULL) {
+			mutex_exit(&zone_datasets_lock);
+			return (EEXIST);
+		}
+	}
+
+	zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
+	zd->zd_dsnamelen = dsnamelen;
+	strncpy(zd->zd_dsname, dataset, dsnamelen);
+	zd->zd_dsname[dsnamelen] = '\0';
+	INIT_LIST_HEAD(&zd->zd_list);
+	list_add_tail(&zd->zd_list, &zds->zds_datasets);
+
+	mutex_exit(&zone_datasets_lock);
+	return (0);
+#else
+	return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_attach);
+
+int
+zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
+{
+#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
+	struct user_namespace *userns;
+	zone_datasets_t *zds;
+	zone_dataset_t *zd;
+	int error;
+	size_t dsnamelen;
+
+	if ((error = zone_dataset_cred_check(cred)) != 0)
+		return (error);
+	if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
+		return (error);
+	if ((error = user_ns_get(userns_fd, &userns)) != 0)
+		return (error);
+
+	mutex_enter(&zone_datasets_lock);
+	zds = zone_datasets_lookup(user_ns_zoneid(userns));
+	if (zds != NULL)
+		zd = zone_dataset_lookup(zds, dataset, dsnamelen);
+	if (zds == NULL || zd == NULL) {
+		mutex_exit(&zone_datasets_lock);
+		return (ENOENT);
+	}
+
+	list_del(&zd->zd_list);
+	kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+
+	/* Prune the namespace entry if it has no more delegations. */
+	if (list_empty(&zds->zds_datasets)) {
+		/*
+		 * Decrease the refcount now that the namespace is no longer
+		 * used. It is no longer necessary to prevent the namespace ID
+		 * from being reused.
+		 */
+		put_user_ns(userns);
+		list_del(&zds->zds_list);
+		kmem_free(zds, sizeof (*zds));
+	}
+
+	mutex_exit(&zone_datasets_lock);
+	return (0);
+#else
+	return (ENXIO);
+#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
+}
+EXPORT_SYMBOL(zone_dataset_detach);
+
+/*
+ * A dataset is visible if:
+ * - It is a parent of a namespace entry.
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * A dataset is writable if:
+ * - It is one of the namespace entries.
+ * - It is a child of a namespace entry.
+ *
+ * The parent datasets of namespace entries are visible and
+ * read-only to provide a path back to the root of the pool.
+ */
+int
+zone_dataset_visible(const char *dataset, int *write)
+{
+	zone_datasets_t *zds;
+	zone_dataset_t *zd;
+	size_t dsnamelen, zd_len;
+	int visible;
+
+	/* Default to read-only, in case visible is returned. */
+	if (write != NULL)
+		*write = 0;
+	if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
+		return (0);
+	if (INGLOBALZONE(curproc)) {
+		if (write != NULL)
+			*write = 1;
+		return (1);
+	}
+
+	mutex_enter(&zone_datasets_lock);
+	zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
+	if (zds == NULL) {
+		mutex_exit(&zone_datasets_lock);
+		return (0);
+	}
+
+	visible = 0;
+	list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
+		zd_len = strlen(zd->zd_dsname);
+		if (zd_len > dsnamelen) {
+			/*
+			 * The name of the namespace entry is longer than that
+			 * of the dataset, so it could be that the dataset is a
+			 * parent of the namespace entry.
+			 */
+			visible = memcmp(zd->zd_dsname, dataset,
+			    dsnamelen) == 0 &&
+			    zd->zd_dsname[dsnamelen] == '/';
+			if (visible)
+				break;
+		} else if (zd_len == dsnamelen) {
+			/*
+			 * The name of the namespace entry is as long as that
+			 * of the dataset, so perhaps the dataset itself is the
+			 * namespace entry.
+			 */
+			visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
+			if (visible) {
+				if (write != NULL)
+					*write = 1;
+				break;
+			}
+		} else {
+			/*
+			 * The name of the namespace entry is shorter than that
+			 * of the dataset, so perhaps the dataset is a child of
+			 * the namespace entry.
+			 */
+			visible = memcmp(zd->zd_dsname, dataset,
+			    zd_len) == 0 && dataset[zd_len] == '/';
+			if (visible) {
+				if (write != NULL)
+					*write = 1;
+				break;
+			}
+		}
+	}
+
+	mutex_exit(&zone_datasets_lock);
+	return (visible);
+}
+EXPORT_SYMBOL(zone_dataset_visible);
+
+unsigned int
+global_zoneid(void)
+{
+	unsigned int z = 0;
+
+#if defined(CONFIG_USER_NS)
+	z = user_ns_zoneid(&init_user_ns);
+#endif
+
+	return (z);
+}
+EXPORT_SYMBOL(global_zoneid);
+
+unsigned int
+crgetzoneid(const cred_t *cr)
+{
+	unsigned int r = 0;
+
+#if defined(CONFIG_USER_NS)
+	r = user_ns_zoneid(cr->user_ns);
+#endif
+
+	return (r);
+}
+EXPORT_SYMBOL(crgetzoneid);
+
+boolean_t
+inglobalzone(proc_t *proc)
+{
+#if defined(CONFIG_USER_NS)
+	return (proc->cred->user_ns == &init_user_ns);
+#else
+	return (B_TRUE);
+#endif
+}
+EXPORT_SYMBOL(inglobalzone);
+
+int
+spl_zone_init(void)
+{
+	mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
+	INIT_LIST_HEAD(&zone_datasets);
+	return (0);
+}
+
+void
+spl_zone_fini(void)
+{
+	zone_datasets_t *zds;
+	zone_dataset_t *zd;
+
+	/*
+	 * It would be better to assert an empty zone_datasets, but since
+	 * there's no automatic mechanism for cleaning them up if the user
+	 * namespace is destroyed, just do it here, since spl is about to go
+	 * out of context.
+	 */
+	while (!list_empty(&zone_datasets)) {
+		zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
+		while (!list_empty(&zds->zds_datasets)) {
+			zd = list_entry(zds->zds_datasets.next,
+			    zone_dataset_t, zd_list);
+			list_del(&zd->zd_list);
+			kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
+			put_user_ns(zds->zds_userns);
+		}
+		list_del(&zds->zds_list);
+		kmem_free(zds, sizeof (*zds));
+	}
+	mutex_destroy(&zone_datasets_lock);
+}
--- a/sys/contrib/openzfs/module/os/linux/zfs/policy.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
 static int
 priv_policy(const cred_t *cr, int capability, int err)
 {
-	return (priv_policy_ns(cr, capability, err, NULL));
+	return (priv_policy_ns(cr, capability, err, cr->user_ns));
 }

 static int
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c
@ -37,6 +37,7 @@
 * Copyright 2017 RackTop Systems.
 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
 * Copyright (c) 2019 Datto Inc.
+ * Copyright (c) 2021 Klara, Inc.
 */

 #include <sys/types.h>
@ -150,6 +151,48 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)

 }

+static int
+zfs_ioc_userns_attach(zfs_cmd_t *zc)
+{
+	int error;
+
+	if (zc == NULL)
+		return (SET_ERROR(EINVAL));
+
+	error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+	/*
+	 * Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
+	 * back from the SPL layer, which does not know about ZFS_ERR_* errors.
+	 * See the comment at the user_ns_get() function in spl-zone.c for
+	 * details.
+	 */
+	if (error == ENOTTY)
+		error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+	return (error);
+}
+
+static int
+zfs_ioc_userns_detach(zfs_cmd_t *zc)
+{
+	int error;
+
+	if (zc == NULL)
+		return (SET_ERROR(EINVAL));
+
+	error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
+
+	/*
+	 * See the comment in zfs_ioc_userns_attach() for details on what is
+	 * going on here.
+	 */
+	if (error == ENOTTY)
+		error = ZFS_ERR_NOT_USER_NAMESPACE;
+
+	return (error);
+}
+
 uint64_t
 zfs_max_nvlist_src_size_os(void)
 {
@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
 void
 zfs_ioctl_init_os(void)
 {
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
+	    zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
+	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
+	    zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
 }

 #ifdef CONFIG_COMPAT
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@ -126,7 +126,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 }

 static int
-zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 {
 	const struct bio_vec *bv = uio->uio_bvec;
 	size_t skip = uio->uio_skip;
@ -137,10 +137,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 		cnt = MIN(bv->bv_len - skip, n);

 		paddr = zfs_kmap_atomic(bv->bv_page);
-		if (rw == UIO_READ)
+		if (rw == UIO_READ) {
+			/* Copy from buffer 'p' to the bvec data */
 			memcpy(paddr + bv->bv_offset + skip, p, cnt);
-		else
+		} else {
+			/* Copy from bvec data to buffer 'p' */
 			memcpy(p, paddr + bv->bv_offset + skip, cnt);
+		}
 		zfs_kunmap_atomic(paddr);

 		skip += cnt;
@ -158,6 +161,141 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 	return (0);
 }

+#ifdef HAVE_BLK_MQ
+static void
+zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
+    struct bio_vec *bv)
+{
+	void *paddr;
+
+	paddr = zfs_kmap_atomic(bv->bv_page);
+	if (rw == UIO_READ) {
+		/* Copy from buffer 'p' to the bvec data */
+		memcpy(paddr + bv->bv_offset + skip, p, cnt);
+	} else {
+		/* Copy from bvec data to buffer 'p' */
+		memcpy(p, paddr + bv->bv_offset + skip, cnt);
+	}
+	zfs_kunmap_atomic(paddr);
+}
+
+/*
+ * Copy 'n' bytes of data between the buffer p[] and the data represented
+ * by the request in the uio.
+ */
+static int
+zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+	struct request *rq = uio->rq;
+	struct bio_vec bv;
+	struct req_iterator iter;
+	size_t this_seg_start;	/* logical offset */
+	size_t this_seg_end;		/* logical offset */
+	size_t skip_in_seg;
+	size_t copy_from_seg;
+	size_t orig_loffset;
+	int copied = 0;
+
+	/*
+	 * Get the original logical offset of this entire request (because
+	 * uio->uio_loffset will be modified over time).
+	 */
+	orig_loffset = io_offset(NULL, rq);
+	this_seg_start = orig_loffset;
+
+	rq_for_each_segment(bv, rq, iter) {
+		if (uio->iter.bio) {
+			/*
+			 * If uio->iter.bio is present, then we know we've saved
+			 * uio->iter from a previous call to this function, and
+			 * we can skip ahead in this rq_for_each_segment() loop
+			 * to where we last left off.  That way, we don't need
+			 * to iterate over tons of segments we've already
+			 * processed - we can just restore the "saved state".
+			 */
+			iter = uio->iter;
+			bv = uio->bv;
+			this_seg_start = uio->uio_loffset;
+			memset(&uio->iter, 0, sizeof (uio->iter));
+			continue;
+		}
+
+		/*
+		 * Lookup what the logical offset of the last byte of this
+		 * segment is.
+		 */
+		this_seg_end = this_seg_start + bv.bv_len - 1;
+
+		/*
+		 * We only need to operate on segments that have data we're
+		 * copying.
+		 */
+		if (uio->uio_loffset >= this_seg_start &&
+		    uio->uio_loffset <= this_seg_end) {
+			/*
+			 * Some, or all, of the data in this segment needs to be
+			 * copied.
+			 */
+
+			/*
+			 * We may be not be copying from the first byte in the
+			 * segment.  Figure out how many bytes to skip copying
+			 * from the beginning of this segment.
+			 */
+			skip_in_seg = uio->uio_loffset - this_seg_start;
+
+			/*
+			 * Calculate the total number of bytes from this
+			 * segment that we will be copying.
+			 */
+			copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
+
+			/* Copy the bytes */
+			zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
+			p = ((char *)p) + copy_from_seg;
+
+			n -= copy_from_seg;
+			uio->uio_resid -= copy_from_seg;
+			uio->uio_loffset += copy_from_seg;
+			copied = 1;	/* We copied some data */
+		}
+
+		if (n == 0) {
+			/*
+			 * All done copying.  Save our 'iter' value to the uio.
+			 * This allows us to "save our state" and skip ahead in
+			 * the rq_for_each_segment() loop the next time we call
+			 * call zfs_uiomove_bvec_rq() on this uio (which we
+			 * will be doing for any remaining data in the uio).
+			 */
+			uio->iter = iter; /* make a copy of the struct data */
+			uio->bv = bv;
+			return (0);
+		}
+
+		this_seg_start = this_seg_end + 1;
+	}
+
+	if (!copied) {
+		/* Didn't copy anything */
+		uio->uio_resid = 0;
+	}
+	return (0);
+}
+#endif
+
+static int
+zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
+{
+#ifdef HAVE_BLK_MQ
+	if (uio->rq != NULL)
+		return (zfs_uiomove_bvec_rq(p, n, rw, uio));
+#else
+	ASSERT3P(uio->rq, ==, NULL);
+#endif
+	return (zfs_uiomove_bvec_impl(p, n, rw, uio));
+}
+
 #if defined(HAVE_VFS_IOV_ITER)
 static int
 zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
@ -300,8 +438,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
 {
 	if (n > uio->uio_resid)
 		return;
-
-	if (uio->uio_segflg == UIO_BVEC) {
+	/*
+	 * When using a uio with a struct request, we simply
+	 * use uio_loffset as a pointer to the next logical byte to
+	 * copy in the request.  We don't have to do any fancy
+	 * accounting with uio_bvec/uio_iovcnt since we don't use
+	 * them.
+	 */
+	if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
 		uio->uio_skip += n;
 		while (uio->uio_iovcnt &&
 		    uio->uio_skip >= uio->uio_bvec->bv_len) {
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
 	int error = 0;
 	zfsvfs_t *zfsvfs = NULL;
 	vfs_t *vfs = NULL;
+	int canwrite;
+	int dataset_visible_zone;

 	ASSERT(zm);
 	ASSERT(osname);

+	dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
+
+	/*
+	 * Refuse to mount a filesystem if we are in a namespace and the
+	 * dataset is not visible or writable in that namespace.
+	 */
+	if (!INGLOBALZONE(curproc) &&
+	    (!dataset_visible_zone || !canwrite)) {
+		return (SET_ERROR(EPERM));
+	}
+
 	error = zfsvfs_parse_options(zm->mnt_data, &vfs);
 	if (error)
 		return (error);

+	/*
+	 * If a non-writable filesystem is being mounted without the
+	 * read-only flag, pretend it was set, as done for snapshots.
+	 */
+	if (!canwrite)
+		vfs->vfs_readonly = true;
+
 	error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
 	if (error) {
 		zfsvfs_vfs_free(vfs);
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@ -32,6 +32,9 @@
 #include <sys/zfs_vnops.h>
 #include <sys/zfs_ctldir.h>
 #include <sys/zpl.h>
+#include <sys/dmu.h>
+#include <sys/dsl_dataset.h>
+#include <sys/zap.h>

 /*
 * Common open routine.  Disallow any write access.
@ -411,6 +414,20 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
 #endif

 	stat->nlink = stat->size = 2;
+
+	dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
+	if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
+		uint64_t snap_count;
+		int err = zap_count(
+		    dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
+		    dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
+		if (err != 0) {
+			ZPL_EXIT(zfsvfs);
+			return (-err);
+		}
+		stat->nlink += snap_count;
+	}
+
 	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
 	stat->atime = current_time(ip);
 	ZPL_EXIT(zfsvfs);
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_super.c
@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
 struct file_system_type zpl_fs_type = {
 	.owner			= THIS_MODULE,
 	.name			= ZFS_DRIVER,
+	.fs_flags		= FS_USERNS_MOUNT,
 	.mount			= zpl_mount,
 	.kill_sb		= zpl_kill_sb,
 };
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@ -83,6 +83,7 @@
 #include <sys/zap.h>
 #include <sys/vfs.h>
 #include <sys/zpl.h>
+#include <linux/vfs_compat.h>

 enum xattr_permission {
 	XAPERM_DENY,
@ -1495,7 +1496,9 @@ zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
 	return (perm);
 }

-#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
+#if defined(CONFIG_FS_POSIX_ACL) && \
+	(!defined(HAVE_POSIX_ACL_RELEASE) || \
+		defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
 struct acl_rel_struct {
 	struct acl_rel_struct *next;
 	struct posix_acl *acl;
--- a/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zvol_os.c
@ -41,20 +41,77 @@
 #include <linux/blkdev_compat.h>
 #include <linux/task_io_accounting_ops.h>

+#ifdef HAVE_BLK_MQ
+#include <linux/blk-mq.h>
+#endif
+
+static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
+    struct request *rq, boolean_t force_sync);
+
 static unsigned int zvol_major = ZVOL_MAJOR;
 static unsigned int zvol_request_sync = 0;
 static unsigned int zvol_prefetch_bytes = (128 * 1024);
 static unsigned long zvol_max_discard_blocks = 16384;
-static unsigned int zvol_threads = 32;

 #ifndef HAVE_BLKDEV_GET_ERESTARTSYS
 static const unsigned int zvol_open_timeout_ms = 1000;
 #endif

+static unsigned int zvol_threads = 0;
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_threads = 0;
+static unsigned int zvol_blk_mq_actual_threads;
+static boolean_t zvol_use_blk_mq = B_FALSE;
+
+/*
+ * The maximum number of volblocksize blocks to process per thread.  Typically,
+ * write heavy workloads preform better with higher values here, and read
+ * heavy workloads preform better with lower values, but that's not a hard
+ * and fast rule.  It's basically a knob to tune between "less overhead with
+ * less parallelism" and "more overhead, but more parallelism".
+ *
+ * '8' was chosen as a reasonable, balanced, default based off of sequential
+ * read and write tests to a zvol in an NVMe pool (with 16 CPUs).
+ */
+static unsigned int zvol_blk_mq_blocks_per_thread = 8;
+#endif
+
+#ifndef	BLKDEV_DEFAULT_RQ
+/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
+#define	BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
+#endif
+
+/*
+ * Finalize our BIO or request.
+ */
+#ifdef	HAVE_BLK_MQ
+#define	END_IO(zv, bio, rq, error)  do { \
+	if (bio) { \
+		BIO_END_IO(bio, error); \
+	} else { \
+		blk_mq_end_request(rq, errno_to_bi_status(error)); \
+	} \
+} while (0)
+#else
+#define	END_IO(zv, bio, rq, error)	BIO_END_IO(bio, error)
+#endif
+
+#ifdef HAVE_BLK_MQ
+static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+static unsigned int zvol_actual_blk_mq_queue_depth;
+#endif
+
 struct zvol_state_os {
 	struct gendisk		*zvo_disk;	/* generic disk */
 	struct request_queue	*zvo_queue;	/* request queue */
 	dev_t			zvo_dev;	/* device id */
+
+#ifdef HAVE_BLK_MQ
+	struct blk_mq_tag_set tag_set;
+#endif
+
+	/* Set from the global 'zvol_use_blk_mq' at zvol load */
+	boolean_t use_blk_mq;
 };

 taskq_t *zvol_taskq;
@ -63,8 +120,14 @@ static struct ida zvol_ida;
 typedef struct zv_request_stack {
 	zvol_state_t	*zv;
 	struct bio	*bio;
+	struct request *rq;
 } zv_request_t;

+typedef struct zv_work {
+	struct request  *rq;
+	struct work_struct work;
+} zv_work_t;
+
 typedef struct zv_request_task {
 	zv_request_t zvr;
 	taskq_ent_t	ent;
@ -86,6 +149,62 @@ zv_request_task_free(zv_request_task_t *task)
 	kmem_free(task, sizeof (*task));
 }

+#ifdef HAVE_BLK_MQ
+
+/*
+ * This is called when a new block multiqueue request comes in.  A request
+ * contains one or more BIOs.
+ */
+static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
+    const struct blk_mq_queue_data *bd)
+{
+	struct request *rq = bd->rq;
+	zvol_state_t *zv = rq->q->queuedata;
+
+	/* Tell the kernel that we are starting to process this request */
+	blk_mq_start_request(rq);
+
+	if (blk_rq_is_passthrough(rq)) {
+		/* Skip non filesystem request */
+		blk_mq_end_request(rq, BLK_STS_IOERR);
+		return (BLK_STS_IOERR);
+	}
+
+	zvol_request_impl(zv, NULL, rq, 0);
+
+	/* Acknowledge to the kernel that we got this request */
+	return (BLK_STS_OK);
+}
+
+static struct blk_mq_ops zvol_blk_mq_queue_ops = {
+	.queue_rq = zvol_mq_queue_rq,
+};
+
+/* Initialize our blk-mq struct */
+static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
+{
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	memset(&zso->tag_set, 0, sizeof (zso->tag_set));
+
+	/* Initialize tag set. */
+	zso->tag_set.ops = &zvol_blk_mq_queue_ops;
+	zso->tag_set.nr_hw_queues = zvol_blk_mq_actual_threads;
+	zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
+	zso->tag_set.numa_node = NUMA_NO_NODE;
+	zso->tag_set.cmd_size = 0;
+
+	/*
+	 * We need BLK_MQ_F_BLOCKING here since we do blocking calls in
+	 * zvol_request_impl()
+	 */
+	zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
+	zso->tag_set.driver_data = zv;
+
+	return (blk_mq_alloc_tag_set(&zso->tag_set));
+}
+#endif /* HAVE_BLK_MQ */
+
 /*
 * Given a path, return TRUE if path is a ZVOL.
 */
@ -107,38 +226,51 @@ static void
 zvol_write(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
-
-	zfs_uio_bvec_init(&uio, bio);
-
 	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q;
+	struct gendisk *disk;
+	unsigned long start_time = 0;
+	boolean_t acct = B_FALSE;
+
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);

+	q = zv->zv_zso->zvo_queue;
+	disk = zv->zv_zso->zvo_disk;
+
 	/* bio marked as FLUSH need to flush before write */
-	if (bio_is_flush(bio))
+	if (io_is_flush(bio, rq))
 		zil_commit(zv->zv_zilog, ZVOL_OBJ);

 	/* Some requests are just for flush and nothing else. */
-	if (uio.uio_resid == 0) {
+	if (io_size(bio, rq) == 0) {
 		rw_exit(&zv->zv_suspend_lock);
-		BIO_END_IO(bio, 0);
+		END_IO(zv, bio, rq, 0);
 		return;
 	}

-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
+	zfs_uio_bvec_init(&uio, bio, rq);

-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
+	ssize_t start_resid = uio.uio_resid;
+
+	/*
+	 * With use_blk_mq, accounting is done by blk_mq_start_request()
+	 * and blk_mq_end_request(), so we can skip it here.
+	 */
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}

 	boolean_t sync =
-	    bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	    io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;

 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_WRITER);
@ -180,10 +312,11 @@ zvol_write(zv_request_t *zvr)

 	rw_exit(&zv->zv_suspend_lock);

-	if (acct)
+	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	}

-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }

 static void
@ -198,27 +331,33 @@ static void
 zvol_discard(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	zvol_state_t *zv = zvr->zv;
-	uint64_t start = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
+	uint64_t start = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
 	uint64_t end = start + size;
 	boolean_t sync;
 	int error = 0;
 	dmu_tx_t *tx;
+	struct request_queue *q = zv->zv_zso->zvo_queue;
+	struct gendisk *disk = zv->zv_zso->zvo_disk;
+	unsigned long start_time = 0;
+
+	boolean_t acct = blk_queue_io_stat(q);

 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);
 	ASSERT3P(zv->zv_zilog, !=, NULL);

-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	unsigned long start_time;
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct) {
+			start_time = blk_generic_start_io_acct(q, disk, WRITE,
+			    bio);
+		}
+	}

-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
-
-	sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
+	sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;

 	if (end > zv->zv_volsize) {
 		error = SET_ERROR(EIO);
@ -231,7 +370,7 @@ zvol_discard(zv_request_t *zvr)
 	 * the unaligned parts which is slow (read-modify-write) and useless
 	 * since we are not freeing any space by doing so.
 	 */
-	if (!bio_is_secure_erase(bio)) {
+	if (!io_is_secure_erase(bio, rq)) {
 		start = P2ROUNDUP(start, zv->zv_volblocksize);
 		end = P2ALIGN(end, zv->zv_volblocksize);
 		size = end - start;
@ -262,10 +401,12 @@ zvol_discard(zv_request_t *zvr)
 unlock:
 	rw_exit(&zv->zv_suspend_lock);

-	if (acct)
-		blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
+	if (bio && acct) {
+		blk_generic_end_io_acct(q, disk, WRITE, bio,
+		    start_time);
+	}

-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }

 static void
@ -280,28 +421,41 @@ static void
 zvol_read(zv_request_t *zvr)
 {
 	struct bio *bio = zvr->bio;
+	struct request *rq = zvr->rq;
 	int error = 0;
 	zfs_uio_t uio;
-
-	zfs_uio_bvec_init(&uio, bio);
-
+	boolean_t acct = B_FALSE;
 	zvol_state_t *zv = zvr->zv;
+	struct request_queue *q;
+	struct gendisk *disk;
+	unsigned long start_time = 0;
+
 	ASSERT3P(zv, !=, NULL);
 	ASSERT3U(zv->zv_open_count, >, 0);

-	struct request_queue *q = zv->zv_zso->zvo_queue;
-	struct gendisk *disk = zv->zv_zso->zvo_disk;
-	ssize_t start_resid = uio.uio_resid;
-	unsigned long start_time;
+	zfs_uio_bvec_init(&uio, bio, rq);

-	boolean_t acct = blk_queue_io_stat(q);
-	if (acct)
-		start_time = blk_generic_start_io_acct(q, disk, READ, bio);
+	q = zv->zv_zso->zvo_queue;
+	disk = zv->zv_zso->zvo_disk;
+
+	ssize_t start_resid = uio.uio_resid;
+
+	/*
+	 * When blk-mq is being used, accounting is done by
+	 * blk_mq_start_request() and blk_mq_end_request().
+	 */
+	if (bio) {
+		acct = blk_queue_io_stat(q);
+		if (acct)
+			start_time = blk_generic_start_io_acct(q, disk, READ,
+			    bio);
+	}

 	zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
 	    uio.uio_loffset, uio.uio_resid, RL_READER);

 	uint64_t volsize = zv->zv_volsize;
+
 	while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
 		uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);

@ -325,10 +479,11 @@ zvol_read(zv_request_t *zvr)

 	rw_exit(&zv->zv_suspend_lock);

-	if (acct)
+	if (bio && acct) {
 		blk_generic_end_io_acct(q, disk, READ, bio, start_time);
+	}

-	BIO_END_IO(bio, -error);
+	END_IO(zv, bio, rq, -error);
 }

 static void
@ -339,52 +494,49 @@ zvol_read_task(void *arg)
 	zv_request_task_free(task);
 }

-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+
+/*
+ * Process a BIO or request
+ *
+ * Either 'bio' or 'rq' should be set depending on if we are processing a
+ * bio or a request (both should not be set).
+ *
+ * force_sync:	Set to 0 to defer processing to a background taskq
+ *			Set to 1 to process data synchronously
+ */
 static void
-zvol_submit_bio(struct bio *bio)
-#else
-static blk_qc_t
-zvol_submit_bio(struct bio *bio)
-#endif
-#else
-static MAKE_REQUEST_FN_RET
-zvol_request(struct request_queue *q, struct bio *bio)
-#endif
+zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
+    boolean_t force_sync)
 {
-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#if defined(HAVE_BIO_BDEV_DISK)
-	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
-#else
-	struct request_queue *q = bio->bi_disk->queue;
-#endif
-#endif
-	zvol_state_t *zv = q->queuedata;
 	fstrans_cookie_t cookie = spl_fstrans_mark();
-	uint64_t offset = BIO_BI_SECTOR(bio) << 9;
-	uint64_t size = BIO_BI_SIZE(bio);
-	int rw = bio_data_dir(bio);
+	uint64_t offset = io_offset(bio, rq);
+	uint64_t size = io_size(bio, rq);
+	int rw = io_data_dir(bio, rq);

-	if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
-		printk(KERN_INFO
-		    "%s: bad access: offset=%llu, size=%lu\n",
-		    zv->zv_zso->zvo_disk->disk_name,
-		    (long long unsigned)offset,
-		    (long unsigned)size);
-
-		BIO_END_IO(bio, -SET_ERROR(EIO));
-		goto out;
-	}
+	if (zvol_request_sync)
+		force_sync = 1;

 	zv_request_t zvr = {
 		.zv = zv,
 		.bio = bio,
+		.rq = rq,
 	};
+
+	if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
+		printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
+		    zv->zv_zso->zvo_disk->disk_name,
+		    (long long unsigned)offset,
+		    (long unsigned)size);
+
+		END_IO(zv, bio, rq, -SET_ERROR(EIO));
+		goto out;
+	}
+
 	zv_request_task_t *task;

 	if (rw == WRITE) {
 		if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
-			BIO_END_IO(bio, -SET_ERROR(EROFS));
+			END_IO(zv, bio, rq, -SET_ERROR(EROFS));
 			goto out;
 		}

@ -421,7 +573,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * i/o may be a ZIL write (via zil_commit()), or a read of an
 		 * indirect block, or a read of a data block (if this is a
 		 * partial-block write).  We will indicate that the i/o is
-		 * complete by calling BIO_END_IO() from the taskq callback.
+		 * complete by calling END_IO() from the taskq callback.
 		 *
 		 * This design allows the calling thread to continue and
 		 * initiate more concurrent operations by calling
@ -441,12 +593,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * of one i/o at a time per zvol.  However, an even better
 		 * design would be for zvol_request() to initiate the zio
 		 * directly, and then be notified by the zio_done callback,
-		 * which would call BIO_END_IO().  Unfortunately, the DMU/ZIL
+		 * which would call END_IO().  Unfortunately, the DMU/ZIL
 		 * interfaces lack this functionality (they block waiting for
 		 * the i/o to complete).
 		 */
-		if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
-			if (zvol_request_sync) {
+		if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
+			if (force_sync) {
 				zvol_discard(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@ -454,7 +606,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
 				    zvol_discard_task, task, 0, &task->ent);
 			}
 		} else {
-			if (zvol_request_sync) {
+			if (force_sync) {
 				zvol_write(&zvr);
 			} else {
 				task = zv_request_task_create(zvr);
@ -469,14 +621,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
 		 * data and require no additional handling.
 		 */
 		if (size == 0) {
-			BIO_END_IO(bio, 0);
+			END_IO(zv, bio, rq, 0);
 			goto out;
 		}

 		rw_enter(&zv->zv_suspend_lock, RW_READER);

 		/* See comment in WRITE case above. */
-		if (zvol_request_sync) {
+		if (force_sync) {
 			zvol_read(&zvr);
 		} else {
 			task = zv_request_task_create(zvr);
@ -487,8 +639,33 @@ zvol_request(struct request_queue *q, struct bio *bio)

 out:
 	spl_fstrans_unmark(cookie);
-#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
-	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
+}
+
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
+static void
+zvol_submit_bio(struct bio *bio)
+#else
+static blk_qc_t
+zvol_submit_bio(struct bio *bio)
+#endif
+#else
+static MAKE_REQUEST_FN_RET
+zvol_request(struct request_queue *q, struct bio *bio)
+#endif
+{
+#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
+#if defined(HAVE_BIO_BDEV_DISK)
+	struct request_queue *q = bio->bi_bdev->bd_disk->queue;
+#else
+	struct request_queue *q = bio->bi_disk->queue;
+#endif
+#endif
+	zvol_state_t *zv = q->queuedata;
+
+	zvol_request_impl(zv, bio, NULL, 0);
+#if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
+	defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
 	!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
 	return (BLK_QC_T_NONE);
 #endif
@ -805,6 +982,27 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
 	return (0);
 }

+/*
+ * Why have two separate block_device_operations structs?
+ *
+ * Normally we'd just have one, and assign 'submit_bio' as needed.  However,
+ * it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
+ * can't just change submit_bio dynamically at runtime.  So just create two
+ * separate structs to get around this.
+ */
+static const struct block_device_operations zvol_ops_blk_mq = {
+	.open			= zvol_open,
+	.release		= zvol_release,
+	.ioctl			= zvol_ioctl,
+	.compat_ioctl		= zvol_compat_ioctl,
+	.check_events		= zvol_check_events,
+#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
+	.revalidate_disk	= zvol_revalidate_disk,
+#endif
+	.getgeo			= zvol_getgeo,
+	.owner			= THIS_MODULE,
+};
+
 static const struct block_device_operations zvol_ops = {
 	.open			= zvol_open,
 	.release		= zvol_release,
@ -821,6 +1019,87 @@ static const struct block_device_operations zvol_ops = {
 #endif
 };

+static int
+zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
+{
+#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
+#if defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
+	if (zso->zvo_disk == NULL)
+		return (1);
+
+	zso->zvo_disk->minors = ZVOL_MINORS;
+	zso->zvo_queue = zso->zvo_disk->queue;
+#else
+	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_BLK_ALLOC_DISK */
+#else
+	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
+	if (zso->zvo_queue == NULL)
+		return (1);
+
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		return (1);
+	}
+
+	zso->zvo_disk->queue = zso->zvo_queue;
+#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
+	return (0);
+
+}
+
+static int
+zvol_alloc_blk_mq(zvol_state_t *zv)
+{
+#ifdef HAVE_BLK_MQ
+	struct zvol_state_os *zso = zv->zv_zso;
+
+	/* Allocate our blk-mq tag_set */
+	if (zvol_blk_mq_alloc_tag_set(zv) != 0)
+		return (1);
+
+#if defined(HAVE_BLK_ALLOC_DISK)
+	zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
+	if (zso->zvo_disk == NULL) {
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+	zso->zvo_queue = zso->zvo_disk->queue;
+	zso->zvo_disk->minors = ZVOL_MINORS;
+#else
+	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
+	if (zso->zvo_disk == NULL) {
+		blk_cleanup_queue(zso->zvo_queue);
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+	/* Allocate queue */
+	zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
+	if (IS_ERR(zso->zvo_queue)) {
+		blk_mq_free_tag_set(&zso->tag_set);
+		return (1);
+	}
+
+	/* Our queue is now created, assign it to our disk */
+	zso->zvo_disk->queue = zso->zvo_queue;
+
+#endif
+#endif
+	return (0);
+}
+
 /*
 * Allocate memory for a new zvol_state_t and setup the required
 * request queue and generic disk structures for the block device.
@ -831,6 +1110,7 @@ zvol_alloc(dev_t dev, const char *name)
 	zvol_state_t *zv;
 	struct zvol_state_os *zso;
 	uint64_t volmode;
+	int ret;

 	if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
 		return (NULL);
@ -849,48 +1129,44 @@ zvol_alloc(dev_t dev, const char *name)
 	list_link_init(&zv->zv_next);
 	mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);

-#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
-#ifdef HAVE_BLK_ALLOC_DISK
-	zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
-	if (zso->zvo_disk == NULL)
-		goto out_kmem;
+#ifdef HAVE_BLK_MQ
+	zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
+#endif

-	zso->zvo_disk->minors = ZVOL_MINORS;
-	zso->zvo_queue = zso->zvo_disk->queue;
-#else
-	zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
-		goto out_kmem;
-
-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
-		goto out_kmem;
+	/*
+	 * The block layer has 3 interfaces for getting BIOs:
+	 *
+	 * 1. blk-mq request queues (new)
+	 * 2. submit_bio() (oldest)
+	 * 3. regular request queues (old).
+	 *
+	 * Each of those interfaces has two permutations:
+	 *
+	 * a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
+	 *    both the disk and its queue (5.14 kernel or newer)
+	 *
+	 * b) We don't have blk_*alloc_disk(), and have to allocate the
+	 *    disk and the queue separately. (5.13 kernel or older)
+	 */
+	if (zv->zv_zso->use_blk_mq) {
+		ret = zvol_alloc_blk_mq(zv);
+		zso->zvo_disk->fops = &zvol_ops_blk_mq;
+	} else {
+		ret = zvol_alloc_non_blk_mq(zso);
+		zso->zvo_disk->fops = &zvol_ops;
 	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_BLK_ALLOC_DISK */
-#else
-	zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
-	if (zso->zvo_queue == NULL)
+	if (ret != 0)
 		goto out_kmem;

-	zso->zvo_disk = alloc_disk(ZVOL_MINORS);
-	if (zso->zvo_disk == NULL) {
-		blk_cleanup_queue(zso->zvo_queue);
-		goto out_kmem;
-	}
-
-	zso->zvo_disk->queue = zso->zvo_queue;
-#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
-
 	blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);

 	/* Limit read-ahead to a single page to prevent over-prefetching. */
 	blk_queue_set_read_ahead(zso->zvo_queue, 1);

-	/* Disable write merging in favor of the ZIO pipeline. */
-	blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	if (!zv->zv_zso->use_blk_mq) {
+		/* Disable write merging in favor of the ZIO pipeline. */
+		blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
+	}

 	/* Enable /proc/diskstats */
 	blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
@ -918,7 +1194,6 @@ zvol_alloc(dev_t dev, const char *name)
 	}

 	zso->zvo_disk->first_minor = (dev & MINORMASK);
-	zso->zvo_disk->fops = &zvol_ops;
 	zso->zvo_disk->private_data = zv;
 	snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
 	    ZVOL_DEV_NAME, (dev & MINORMASK));
@ -963,6 +1238,11 @@ zvol_os_free(zvol_state_t *zv)
 	put_disk(zv->zv_zso->zvo_disk);
 #endif

+#ifdef HAVE_BLK_MQ
+	if (zv->zv_zso->use_blk_mq)
+		blk_mq_free_tag_set(&zv->zv_zso->tag_set);
+#endif
+
 	ida_simple_remove(&zvol_ida,
 	    MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);

@ -1044,8 +1324,69 @@ zvol_os_create_minor(const char *name)

 	blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
 	    (DMU_MAX_ACCESS / 4) >> 9);
-	blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
-	blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+
+	if (zv->zv_zso->use_blk_mq) {
+		/*
+		 * IO requests can be really big (1MB).  When an IO request
+		 * comes in, it is passed off to zvol_read() or zvol_write()
+		 * in a new thread, where it is chunked up into 'volblocksize'
+		 * sized pieces and processed.  So for example, if the request
+		 * is a 1MB write and your volblocksize is 128k, one zvol_write
+		 * thread will take that request and sequentially do ten 128k
+		 * IOs.  This is due to the fact that the thread needs to lock
+		 * each volblocksize sized block.  So you might be wondering:
+		 * "instead of passing the whole 1MB request to one thread,
+		 * why not pass ten individual 128k chunks to ten threads and
+		 * process the whole write in parallel?"  The short answer is
+		 * that there's a sweet spot number of chunks that balances
+		 * the greater parallelism with the added overhead of more
+		 * threads. The sweet spot can be different depending on if you
+		 * have a read or write  heavy workload.  Writes typically want
+		 * high chunk counts while reads typically want lower ones.  On
+		 * a test pool with 6 NVMe drives in a 3x 2-disk mirror
+		 * configuration, with volblocksize=8k, the sweet spot for good
+		 * sequential reads and writes was at 8 chunks.
+		 */
+
+		/*
+		 * Below we tell the kernel how big we want our requests
+		 * to be.  You would think that blk_queue_io_opt() would be
+		 * used to do this since it is used to "set optimal request
+		 * size for the queue", but that doesn't seem to do
+		 * anything - the kernel still gives you huge requests
+		 * with tons of little PAGE_SIZE segments contained within it.
+		 *
+		 * Knowing that the kernel will just give you PAGE_SIZE segments
+		 * no matter what, you can say "ok, I want PAGE_SIZE byte
+		 * segments, and I want 'N' of them per request", where N is
+		 * the correct number of segments for the volblocksize and
+		 * number of chunks you want.
+		 */
+#ifdef HAVE_BLK_MQ
+		if (zvol_blk_mq_blocks_per_thread != 0) {
+			unsigned int chunks;
+			chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
+
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    PAGE_SIZE);
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    (zv->zv_volblocksize * chunks) / PAGE_SIZE);
+		} else {
+			/*
+			 * Special case: zvol_blk_mq_blocks_per_thread = 0
+			 * Max everything out.
+			 */
+			blk_queue_max_segments(zv->zv_zso->zvo_queue,
+			    UINT16_MAX);
+			blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
+			    UINT_MAX);
+		}
+#endif
+	} else {
+		blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
+		blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
+	}
+
 	blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
 	    zv->zv_volblocksize);
 	blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
@ -1167,19 +1508,54 @@ int
 zvol_init(void)
 {
 	int error;
-	int threads = MIN(MAX(zvol_threads, 1), 1024);
+
+	/*
+	 * zvol_threads is the module param the user passes in.
+	 *
+	 * zvol_actual_threads is what we use internally, since the user can
+	 * pass zvol_thread = 0 to mean "use all the CPUs" (the default).
+	 */
+	static unsigned int zvol_actual_threads;
+
+	if (zvol_threads == 0) {
+		/*
+		 * See dde9380a1 for why 32 was chosen here.  This should
+		 * probably be refined to be some multiple of the number
+		 * of CPUs.
+		 */
+		zvol_actual_threads = MAX(num_online_cpus(), 32);
+	} else {
+		zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
+	}

 	error = register_blkdev(zvol_major, ZVOL_DRIVER);
 	if (error) {
 		printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
 		return (error);
 	}
-	zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
-	    threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
+
+#ifdef HAVE_BLK_MQ
+	if (zvol_blk_mq_queue_depth == 0) {
+		zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
+	} else {
+		zvol_actual_blk_mq_queue_depth =
+		    MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
+	}
+
+	if (zvol_blk_mq_threads == 0) {
+		zvol_blk_mq_actual_threads = num_online_cpus();
+	} else {
+		zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1),
+		    1024);
+	}
+#endif
+	zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
+	    zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
 	if (zvol_taskq == NULL) {
 		unregister_blkdev(zvol_major, ZVOL_DRIVER);
 		return (-ENOMEM);
 	}
+
 	zvol_init_impl();
 	ida_init(&zvol_ida);
 	return (0);
@ -1202,7 +1578,8 @@ module_param(zvol_major, uint, 0444);
 MODULE_PARM_DESC(zvol_major, "Major number for zvol device");

 module_param(zvol_threads, uint, 0444);
-MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
+MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
+    "to 0 to use all active CPUs");

 module_param(zvol_request_sync, uint, 0644);
 MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
@ -1215,4 +1592,17 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");

 module_param(zvol_volmode, uint, 0644);
 MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
+
+#ifdef HAVE_BLK_MQ
+module_param(zvol_blk_mq_queue_depth, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
+
+module_param(zvol_use_blk_mq, uint, 0644);
+MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
+
+module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
+MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
+    "Process volblocksize blocks per thread");
+#endif
+
 /* END CSTYLED */
--- a/sys/contrib/openzfs/module/zcommon/zfeature_common.c
+++ b/sys/contrib/openzfs/module/zcommon/zfeature_common.c
@ -696,16 +696,15 @@ zpool_feature_init(void)
 	    ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);

 	{
-
-	static const spa_feature_t zilsaxattr_deps[] = {
-		SPA_FEATURE_EXTENSIBLE_DATASET,
-		SPA_FEATURE_NONE
-	};
-	zfeature_register(SPA_FEATURE_ZILSAXATTR,
-	    "org.openzfs:zilsaxattr", "zilsaxattr",
-	    "Support for xattr=sa extended attribute logging in ZIL.",
-	    ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
-	    ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
+		static const spa_feature_t zilsaxattr_deps[] = {
+			SPA_FEATURE_EXTENSIBLE_DATASET,
+			SPA_FEATURE_NONE
+		};
+		zfeature_register(SPA_FEATURE_ZILSAXATTR,
+		    "org.openzfs:zilsaxattr", "zilsaxattr",
+		    "Support for xattr=sa extended attribute logging in ZIL.",
+		    ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
+		    ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
 	}

 	zfeature_register(SPA_FEATURE_HEAD_ERRLOG,
@ -714,6 +713,18 @@ zpool_feature_init(void)
 	    ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, ZFEATURE_TYPE_BOOLEAN, NULL,
 	    sfeatures);

+	{
+		static const spa_feature_t blake3_deps[] = {
+			SPA_FEATURE_EXTENSIBLE_DATASET,
+			SPA_FEATURE_NONE
+		};
+		zfeature_register(SPA_FEATURE_BLAKE3,
+		    "org.openzfs:blake3", "blake3",
+		    "BLAKE3 hash algorithm.",
+		    ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN,
+		    blake3_deps, sfeatures);
+	}
+
 	zfs_mod_list_supported_free(sfeatures);
 }

--- a/sys/contrib/openzfs/module/zcommon/zfs_prop.c
+++ b/sys/contrib/openzfs/module/zcommon/zfs_prop.c
@ -84,6 +84,7 @@ zfs_prop_init(void)
 		{ "sha512",	ZIO_CHECKSUM_SHA512 },
 		{ "skein",	ZIO_CHECKSUM_SKEIN },
 		{ "edonr",	ZIO_CHECKSUM_EDONR },
+		{ "blake3",	ZIO_CHECKSUM_BLAKE3 },
 		{ NULL }
 	};

@ -102,6 +103,9 @@ zfs_prop_init(void)
 				ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
 		{ "edonr,verify",
 				ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
+		{ "blake3",	ZIO_CHECKSUM_BLAKE3 },
+		{ "blake3,verify",
+				ZIO_CHECKSUM_BLAKE3 | ZIO_CHECKSUM_VERIFY },
 		{ NULL }
 	};

@ -394,12 +398,12 @@ zfs_prop_init(void)
 	    ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
 	    ZFS_TYPE_VOLUME,
 	    "on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein"
-	    " | edonr",
+	    " | edonr | blake3",
 	    "CHECKSUM", checksum_table, sfeatures);
 	zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
 	    PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
 	    "on | off | verify | sha256[,verify] | sha512[,verify] | "
-	    "skein[,verify] | edonr,verify",
+	    "skein[,verify] | edonr,verify | blake3[,verify]",
 	    "DEDUP", dedup_table, sfeatures);
 	zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
 	    ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
--- a/sys/contrib/openzfs/module/zfs/blake3_zfs.c
+++ b/sys/contrib/openzfs/module/zfs/blake3_zfs.c
@ -0,0 +1,117 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://opensource.org/licenses/CDDL-1.0.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright 2022 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/zfs_context.h>
+#include <sys/zio_checksum.h>
+#include <sys/blake3.h>
+#include <sys/abd.h>
+
+static int
+blake3_incremental(void *buf, size_t size, void *arg)
+{
+	BLAKE3_CTX *ctx = arg;
+
+	Blake3_Update(ctx, buf, size);
+
+	return (0);
+}
+
+/*
+ * Computes a native 256-bit BLAKE3 MAC checksum. Please note that this
+ * function requires the presence of a ctx_template that should be allocated
+ * using abd_checksum_blake3_tmpl_init.
+ */
+void
+abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template,
+    zio_cksum_t *zcp)
+{
+	ASSERT(ctx_template != 0);
+
+#if defined(_KERNEL)
+	BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE];
+#else
+	BLAKE3_CTX *ctx = kmem_alloc(sizeof (*ctx), KM_SLEEP);
+#endif
+
+	memcpy(ctx, ctx_template, sizeof (*ctx));
+	(void) abd_iterate_func(abd, 0, size, blake3_incremental, ctx);
+	Blake3_Final(ctx, (uint8_t *)zcp);
+
+#if !defined(_KERNEL)
+	memset(ctx, 0, sizeof (*ctx));
+	kmem_free(ctx, sizeof (*ctx));
+#endif
+}
+
+/*
+ * Byteswapped version of abd_checksum_blake3_native. This just invokes
+ * the native checksum function and byteswaps the resulting checksum (since
+ * BLAKE3 is internally endian-insensitive).
+ */
+void
+abd_checksum_blake3_byteswap(abd_t *abd, uint64_t size,
+    const void *ctx_template, zio_cksum_t *zcp)
+{
+	zio_cksum_t tmp;
+
+	ASSERT(ctx_template != 0);
+
+	abd_checksum_blake3_native(abd, size, ctx_template, &tmp);
+	zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
+	zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
+	zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
+	zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
+}
+
+/*
+ * Allocates a BLAKE3 MAC template suitable for using in BLAKE3 MAC checksum
+ * computations and returns a pointer to it.
+ */
+void *
+abd_checksum_blake3_tmpl_init(const zio_cksum_salt_t *salt)
+{
+	BLAKE3_CTX *ctx;
+
+	ASSERT(sizeof (salt->zcs_bytes) == 32);
+
+	/* init reference object */
+	ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
+	Blake3_InitKeyed(ctx, salt->zcs_bytes);
+
+	return (ctx);
+}
+
+/*
+ * Frees a BLAKE3 context template previously allocated using
+ * zio_checksum_blake3_tmpl_init.
+ */
+void
+abd_checksum_blake3_tmpl_free(void *ctx_template)
+{
+	BLAKE3_CTX *ctx = ctx_template;
+
+	memset(ctx, 0, sizeof (*ctx));
+	kmem_free(ctx, sizeof (*ctx));
+}
--- a/sys/contrib/openzfs/module/zfs/dsl_prop.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_prop.c
@ -88,7 +88,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
 		setpoint[0] = '\0';

 	prop = zfs_name_to_prop(propname);
-	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
+	inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
 	inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
 	recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);

@ -168,7 +168,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
 	uint64_t zapobj;

 	ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
-	inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
+	inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
 	zapobj = dsl_dataset_phys(ds)->ds_props_obj;

 	if (zapobj != 0) {
@ -1055,12 +1055,12 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
 		prop = zfs_name_to_prop(propname);

 		/* Skip non-inheritable properties. */
-		if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL &&
-		    !zfs_prop_inheritable(prop))
+		if ((flags & DSL_PROP_GET_INHERITING) &&
+		    prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
 			continue;

 		/* Skip properties not valid for this type. */
-		if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL &&
+		if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_USERPROP &&
 		    !zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT, B_FALSE))
 			continue;

--- a/sys/contrib/openzfs/module/zfs/dsl_scan.c
+++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c
@ -280,6 +280,7 @@ typedef struct scan_io {
 struct dsl_scan_io_queue {
 	dsl_scan_t	*q_scn; /* associated dsl_scan_t */
 	vdev_t		*q_vd; /* top-level vdev that this queue represents */
+	zio_t		*q_zio; /* scn_zio_root child for waiting on IO */

 	/* trees used for sorting I/Os and extents of I/Os */
 	range_tree_t	*q_exts_by_addr;
@ -1276,9 +1277,12 @@ dsl_scan_should_clear(dsl_scan_t *scn)
 		mutex_enter(&tvd->vdev_scan_io_queue_lock);
 		queue = tvd->vdev_scan_io_queue;
 		if (queue != NULL) {
-			/* # extents in exts_by_size = # in exts_by_addr */
+			/*
+			 * # of extents in exts_by_size = # in exts_by_addr.
+			 * B-tree efficiency is ~75%, but can be as low as 50%.
+			 */
 			mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
-			    sizeof (range_seg_gap_t) + queue->q_sio_memused;
+			    3 * sizeof (range_seg_gap_t) + queue->q_sio_memused;
 		}
 		mutex_exit(&tvd->vdev_scan_io_queue_lock);
 	}
@ -3033,15 +3037,19 @@ scan_io_queues_run_one(void *arg)
 	dsl_scan_io_queue_t *queue = arg;
 	kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
 	boolean_t suspended = B_FALSE;
-	range_seg_t *rs = NULL;
-	scan_io_t *sio = NULL;
+	range_seg_t *rs;
+	scan_io_t *sio;
+	zio_t *zio;
 	list_t sio_list;

 	ASSERT(queue->q_scn->scn_is_sorted);

 	list_create(&sio_list, sizeof (scan_io_t),
 	    offsetof(scan_io_t, sio_nodes.sio_list_node));
+	zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
+	    NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
 	mutex_enter(q_lock);
+	queue->q_zio = zio;

 	/* Calculate maximum in-flight bytes for this vdev. */
 	queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
@ -3108,7 +3116,9 @@ scan_io_queues_run_one(void *arg)
 		scan_io_queue_insert_impl(queue, sio);
 	}

+	queue->q_zio = NULL;
 	mutex_exit(q_lock);
+	zio_nowait(zio);
 	list_destroy(&sio_list);
 }

@ -4073,6 +4083,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
 	dsl_scan_t *scn = dp->dp_scan;
 	size_t size = BP_GET_PSIZE(bp);
 	abd_t *data = abd_alloc_for_io(size, B_FALSE);
+	zio_t *pio;

 	if (queue == NULL) {
 		ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
@ -4081,6 +4092,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
 			cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
 		spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
 		mutex_exit(&spa->spa_scrub_lock);
+		pio = scn->scn_zio_root;
 	} else {
 		kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;

@ -4089,12 +4101,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
 		while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
 			cv_wait(&queue->q_zio_cv, q_lock);
 		queue->q_inflight_bytes += BP_GET_PSIZE(bp);
+		pio = queue->q_zio;
 		mutex_exit(q_lock);
 	}

+	ASSERT(pio != NULL);
 	count_block(scn, dp->dp_blkstats, bp);
-	zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
-	    dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
+	zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
+	    queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
 }

 /*
--- a/sys/contrib/openzfs/module/zfs/spa_misc.c
+++ b/sys/contrib/openzfs/module/zfs/spa_misc.c
@ -30,6 +30,7 @@
 */

 #include <sys/zfs_context.h>
+#include <sys/zfs_chksum.h>
 #include <sys/spa_impl.h>
 #include <sys/zio.h>
 #include <sys/zio_checksum.h>
@ -2417,6 +2418,7 @@ spa_init(spa_mode_t mode)
 	vdev_raidz_math_init();
 	vdev_file_init();
 	zfs_prop_init();
+	chksum_init();
 	zpool_prop_init();
 	zpool_feature_init();
 	spa_config_load();
@ -2438,6 +2440,7 @@ spa_fini(void)
 	vdev_cache_stat_fini();
 	vdev_mirror_stat_fini();
 	vdev_raidz_math_fini();
+	chksum_fini();
 	zil_fini();
 	dmu_fini();
 	zio_fini();
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@ -5496,7 +5496,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
 		}

 		switch (prop = vdev_name_to_prop(propname)) {
-		case VDEV_PROP_USER:
+		case VDEV_PROP_USERPROP:
 			if (vdev_prop_user(propname)) {
 				strval = fnvpair_value_string(elem);
 				if (strlen(strval) == 0) {
@ -5580,7 +5580,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 		uint64_t intval = 0;
 		char *strval = NULL;

-		if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
+		if (prop == VDEV_PROP_USERPROP && !vdev_prop_user(propname)) {
 			error = EINVAL;
 			goto end;
 		}
@ -5937,7 +5937,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
 			case VDEV_PROP_COMMENT:
 				/* Exists in the ZAP below */
 				/* FALLTHRU */
-			case VDEV_PROP_USER:
+			case VDEV_PROP_USERPROP:
 				/* User Properites */
 				src = ZPROP_SRC_LOCAL;

--- a/sys/contrib/openzfs/module/zfs/zcp_synctask.c
+++ b/sys/contrib/openzfs/module/zfs/zcp_synctask.c
@ -325,7 +325,7 @@ zcp_synctask_inherit_prop_check(void *arg, dmu_tx_t *tx)
 	zcp_inherit_prop_arg_t *args = arg;
 	zfs_prop_t prop = zfs_name_to_prop(args->zipa_prop);

-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		if (zfs_prop_user(args->zipa_prop))
 			return (0);

--- a/sys/contrib/openzfs/module/zfs/zfs_chksum.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_chksum.c
@ -0,0 +1,323 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+
+/*
+ * Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
+ */
+
+#include <sys/types.h>
+#include <sys/spa.h>
+#include <sys/zio_checksum.h>
+#include <sys/zfs_context.h>
+#include <sys/zfs_chksum.h>
+
+#include <sys/blake3.h>
+
+static kstat_t *chksum_kstat = NULL;
+
+typedef struct {
+	const char *name;
+	const char *impl;
+	uint64_t bs1k;
+	uint64_t bs4k;
+	uint64_t bs16k;
+	uint64_t bs64k;
+	uint64_t bs256k;
+	uint64_t bs1m;
+	uint64_t bs4m;
+	zio_cksum_salt_t salt;
+	zio_checksum_t *(func);
+	zio_checksum_tmpl_init_t *(init);
+	zio_checksum_tmpl_free_t *(free);
+} chksum_stat_t;
+
+static int chksum_stat_cnt = 0;
+static chksum_stat_t *chksum_stat_data = 0;
+
+/*
+ * i3-1005G1 test output:
+ *
+ * implementation     1k      4k     16k     64k    256k      1m      4m
+ * fletcher-4       5421   15001   26468   32555   34720   32801   18847
+ * edonr-generic    1196    1602    1761    1749    1762    1759    1751
+ * skein-generic     546     591     608     615     619     612     616
+ * sha256-generic    246     270     274     274     277     275     276
+ * sha256-avx        262     296     304     307     307     307     306
+ * sha256-sha-ni     769    1072    1172    1220    1219    1232    1228
+ * sha256-openssl    240     300     316     314     304     285     276
+ * sha512-generic    333     374     385     392     391     393     392
+ * sha512-openssl    353     441     467     476     472     467     426
+ * sha512-avx        362     444     473     475     479     476     478
+ * sha512-avx2       394     500     530     538     543     545     542
+ * blake3-generic    308     313     313     313     312     313     312
+ * blake3-sse2       402    1289    1423    1446    1432    1458    1413
+ * blake3-sse41      427    1470    1625    1704    1679    1607    1629
+ * blake3-avx2       428    1920    3095    3343    3356    3318    3204
+ * blake3-avx512     473    2687    4905    5836    5844    5643    5374
+ */
+static int
+chksum_stat_kstat_headers(char *buf, size_t size)
+{
+	ssize_t off = 0;
+
+	off += snprintf(buf + off, size, "%-23s", "implementation");
+	off += snprintf(buf + off, size - off, "%8s", "1k");
+	off += snprintf(buf + off, size - off, "%8s", "4k");
+	off += snprintf(buf + off, size - off, "%8s", "16k");
+	off += snprintf(buf + off, size - off, "%8s", "64k");
+	off += snprintf(buf + off, size - off, "%8s", "256k");
+	off += snprintf(buf + off, size - off, "%8s", "1m");
+	(void) snprintf(buf + off, size - off, "%8s\n", "4m");
+
+	return (0);
+}
+
+static int
+chksum_stat_kstat_data(char *buf, size_t size, void *data)
+{
+	chksum_stat_t *cs;
+	ssize_t off = 0;
+	char b[24];
+
+	cs = (chksum_stat_t *)data;
+	snprintf(b, 23, "%s-%s", cs->name, cs->impl);
+	off += snprintf(buf + off, size - off, "%-23s", b);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs1k);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs4k);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs16k);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs64k);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs256k);
+	off += snprintf(buf + off, size - off, "%8llu",
+	    (u_longlong_t)cs->bs1m);
+	(void) snprintf(buf + off, size - off, "%8llu\n",
+	    (u_longlong_t)cs->bs4m);
+
+	return (0);
+}
+
+static void *
+chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
+{
+	if (n < chksum_stat_cnt)
+		ksp->ks_private = (void *)(chksum_stat_data + n);
+	else
+		ksp->ks_private = NULL;
+
+	return (ksp->ks_private);
+}
+
+static void
+chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
+    uint64_t *result)
+{
+	hrtime_t start;
+	uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
+	uint32_t l, loops = 0;
+	zio_cksum_t zcp;
+
+	switch (round) {
+	case 1: /* 1k */
+		size = 1<<10; loops = 128; break;
+	case 2: /* 2k */
+		size = 1<<12; loops = 64; break;
+	case 3: /* 4k */
+		size = 1<<14; loops = 32; break;
+	case 4: /* 16k */
+		size = 1<<16; loops = 16; break;
+	case 5: /* 256k */
+		size = 1<<18; loops = 8; break;
+	case 6: /* 1m */
+		size = 1<<20; loops = 4; break;
+	case 7: /* 4m */
+		size = 1<<22; loops = 1; break;
+	}
+
+	kpreempt_disable();
+	start = gethrtime();
+	do {
+		for (l = 0; l < loops; l++, run_count++)
+			cs->func(abd, size, ctx, &zcp);
+
+		run_time_ns = gethrtime() - start;
+	} while (run_time_ns < MSEC2NSEC(1));
+	kpreempt_enable();
+
+	run_bw = size * run_count * NANOSEC;
+	run_bw /= run_time_ns;	/* B/s */
+	*result = run_bw/1024/1024; /* MiB/s */
+}
+
+static void
+chksum_benchit(chksum_stat_t *cs)
+{
+	abd_t *abd;
+	void *ctx = 0;
+	void *salt = &cs->salt.zcs_bytes;
+
+	/* allocate test memory via default abd interface */
+	abd = abd_alloc_linear(1<<22, B_FALSE);
+	memset(salt, 0, sizeof (cs->salt.zcs_bytes));
+	if (cs->init) {
+		ctx = cs->init(&cs->salt);
+	}
+
+	chksum_run(cs, abd, ctx, 1, &cs->bs1k);
+	chksum_run(cs, abd, ctx, 2, &cs->bs4k);
+	chksum_run(cs, abd, ctx, 3, &cs->bs16k);
+	chksum_run(cs, abd, ctx, 4, &cs->bs64k);
+	chksum_run(cs, abd, ctx, 5, &cs->bs256k);
+	chksum_run(cs, abd, ctx, 6, &cs->bs1m);
+	chksum_run(cs, abd, ctx, 7, &cs->bs4m);
+
+	/* free up temp memory */
+	if (cs->free) {
+		cs->free(ctx);
+	}
+	abd_free(abd);
+}
+
+/*
+ * Initialize and benchmark all supported implementations.
+ */
+static void
+chksum_benchmark(void)
+{
+
+#ifndef _KERNEL
+	/* we need the benchmark only for the kernel module */
+	return;
+#endif
+
+	chksum_stat_t *cs;
+	int cbid = 0, id;
+	uint64_t max = 0;
+
+	/* space for the benchmark times */
+	chksum_stat_cnt = 4;
+	chksum_stat_cnt += blake3_get_impl_count();
+	chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
+	    sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
+
+	/* edonr */
+	cs = &chksum_stat_data[cbid++];
+	cs->init = abd_checksum_edonr_tmpl_init;
+	cs->func = abd_checksum_edonr_native;
+	cs->free = abd_checksum_edonr_tmpl_free;
+	cs->name = "edonr";
+	cs->impl = "generic";
+	chksum_benchit(cs);
+
+	/* skein */
+	cs = &chksum_stat_data[cbid++];
+	cs->init = abd_checksum_skein_tmpl_init;
+	cs->func = abd_checksum_skein_native;
+	cs->free = abd_checksum_skein_tmpl_free;
+	cs->name = "skein";
+	cs->impl = "generic";
+	chksum_benchit(cs);
+
+	/* sha256 */
+	cs = &chksum_stat_data[cbid++];
+	cs->init = 0;
+	cs->func = abd_checksum_SHA256;
+	cs->free = 0;
+	cs->name = "sha256";
+	cs->impl = "generic";
+	chksum_benchit(cs);
+
+	/* sha512 */
+	cs = &chksum_stat_data[cbid++];
+	cs->init = 0;
+	cs->func = abd_checksum_SHA512_native;
+	cs->free = 0;
+	cs->name = "sha512";
+	cs->impl = "generic";
+	chksum_benchit(cs);
+
+	/* blake3 */
+	for (id = 0; id < blake3_get_impl_count(); id++) {
+		blake3_set_impl_id(id);
+		cs = &chksum_stat_data[cbid++];
+		cs->init = abd_checksum_blake3_tmpl_init;
+		cs->func = abd_checksum_blake3_native;
+		cs->free = abd_checksum_blake3_tmpl_free;
+		cs->name = "blake3";
+		cs->impl = blake3_get_impl_name();
+		chksum_benchit(cs);
+		if (cs->bs256k > max) {
+			max = cs->bs256k;
+			blake3_set_impl_fastest(id);
+		}
+	}
+}
+
+void
+chksum_init(void)
+{
+#ifdef _KERNEL
+	blake3_per_cpu_ctx_init();
+#endif
+
+	/* Benchmark supported implementations */
+	chksum_benchmark();
+
+	/* Install kstats for all implementations */
+	chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
+	    KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
+
+	if (chksum_kstat != NULL) {
+		chksum_kstat->ks_data = NULL;
+		chksum_kstat->ks_ndata = UINT32_MAX;
+		kstat_set_raw_ops(chksum_kstat,
+		    chksum_stat_kstat_headers,
+		    chksum_stat_kstat_data,
+		    chksum_stat_kstat_addr);
+		kstat_install(chksum_kstat);
+	}
+
+	/* setup implementations */
+	blake3_setup_impl();
+}
+
+void
+chksum_fini(void)
+{
+	if (chksum_kstat != NULL) {
+		kstat_delete(chksum_kstat);
+		chksum_kstat = NULL;
+	}
+
+	if (chksum_stat_cnt) {
+		kmem_free(chksum_stat_data,
+		    sizeof (chksum_stat_t) * chksum_stat_cnt);
+		chksum_stat_cnt = 0;
+		chksum_stat_data = 0;
+	}
+
+#ifdef _KERNEL
+	blake3_per_cpu_ctx_fini();
+#endif
+}
--- a/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_ioctl.c
@ -1104,7 +1104,7 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
 	(void) innvl;
 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);

-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		if (!zfs_prop_user(zc->zc_value))
 			return (SET_ERROR(EINVAL));
 		return (zfs_secpolicy_write_perms(zc->zc_name,
@ -2406,7 +2406,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
 	const char *strval = NULL;
 	int err = -1;

-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		if (zfs_prop_userquota(propname))
 			return (zfs_prop_set_userquota(dsname, pair));
 		return (-1);
@ -2577,7 +2577,7 @@ zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
 			/* inherited properties are expected to be booleans */
 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
 				err = SET_ERROR(EINVAL);
-		} else if (err == 0 && prop == ZPROP_INVAL) {
+		} else if (err == 0 && prop == ZPROP_USERPROP) {
 			if (zfs_prop_user(propname)) {
 				if (nvpair_type(propval) != DATA_TYPE_STRING)
 					err = SET_ERROR(EINVAL);
@ -2853,11 +2853,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
 		 * and reservation to the received or default values even though
 		 * they are not considered inheritable.
 		 */
-		if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
+		if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
 			return (SET_ERROR(EINVAL));
 	}

-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		if (!zfs_prop_user(propname))
 			return (SET_ERROR(EINVAL));

@ -4488,7 +4488,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
 	uint64_t intval, compval;
 	int err;

-	if (prop == ZPROP_INVAL) {
+	if (prop == ZPROP_USERPROP) {
 		if (zfs_prop_user(propname)) {
 			if ((err = zfs_secpolicy_write_perms(dsname,
 			    ZFS_DELEG_PERM_USERPROP, cr)))
@ -5034,7 +5034,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
 				/* -x property */
 				const char *name = nvpair_name(nvp);
 				zfs_prop_t prop = zfs_name_to_prop(name);
-				if (prop != ZPROP_INVAL) {
+				if (prop != ZPROP_USERPROP) {
 					if (!zfs_prop_inheritable(prop))
 						continue;
 				} else if (!zfs_prop_user(name))
--- a/sys/contrib/openzfs/module/zfs/zio_checksum.c
+++ b/sys/contrib/openzfs/module/zfs/zio_checksum.c
@ -195,6 +195,10 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
 	    abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free,
 	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
 	    ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
+	{{abd_checksum_blake3_native,	abd_checksum_blake3_byteswap},
+	    abd_checksum_blake3_tmpl_init, abd_checksum_blake3_tmpl_free,
+	    ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
+	    ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "blake3"},
 };

 /*
@ -207,6 +211,8 @@ zio_checksum_to_feature(enum zio_checksum cksum)
 	VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);

 	switch (cksum) {
+	case ZIO_CHECKSUM_BLAKE3:
+		return (SPA_FEATURE_BLAKE3);
 	case ZIO_CHECKSUM_SHA512:
 		return (SPA_FEATURE_SHA512);
 	case ZIO_CHECKSUM_SKEIN:
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@ -113,8 +113,8 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
 tags = ['functional', 'channel_program', 'synctask_core']

 [tests/functional/checksum]
-tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos',
-    'filetest_002_pos']
+tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'run_blake3_test',
+    'filetest_001_pos', 'filetest_002_pos']
 tags = ['functional', 'checksum']

 [tests/functional/clean_mirror]
@ -937,9 +937,13 @@ tags = ['functional', 'zvol', 'zvol_cli']

 [tests/functional/zvol/zvol_misc]
 tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
-    'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
+    'zvol_misc_snapdev', 'zvol_misc_trim', 'zvol_misc_volmode', 'zvol_misc_zil']
 tags = ['functional', 'zvol', 'zvol_misc']

+[tests/functional/zvol/zvol_stress]
+tests = ['zvol_stress']
+tags = ['functional', 'zvol', 'zvol_stress']
+
 [tests/functional/zvol/zvol_swap]
 tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
 tags = ['functional', 'zvol', 'zvol_swap']
--- a/Show More
+++ b/Show More