zfs: merge OpenZFS master-891568c99

Notable upstream pull request merges: #11652 Split dmu_zfetch() speculation and execution parts #11682 Fix zfs_get_data access to files with wrong generation #11735 Clean up RAIDZ/DRAID ereport code #11737 Initialize metaslab range trees in metaslab_init #11739 FreeBSD: make seqc asserts conditional on replay #11763 Allow setting bootfs property on pools with indirect vdevs #11767 FreeBSD: Fix memory leaks in kstats Obtained from: OpenZFS MFC after: 2 weeks
2021-03-21 01:46:08 +01:00 · 2021-03-21 01:46:08 +01:00 · f9693bef8d
commit f9693bef8d
parent 815209920f 48a1c304e8
77 changed files with 1561 additions and 883 deletions
--- a/sys/contrib/openzfs/README.md
+++ b/sys/contrib/openzfs/README.md
@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`

 # Supported Kernels
  * The `META` file contains the officially recognized supported Linux kernel versions.
-  * Supported FreeBSD versions are 12-STABLE and 13-CURRENT.
+  * Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.
--- a/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c
+++ b/sys/contrib/openzfs/cmd/raidz_test/raidz_test.c
@ -448,7 +448,6 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
 		rr->rr_missingdata = 0;
 		rr->rr_missingparity = 0;
 		rr->rr_firstdatacol = nparity;
-		rr->rr_abd_copy = NULL;
 		rr->rr_abd_empty = NULL;
 		rr->rr_nempty = 0;

@ -459,7 +458,6 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
 			}
 			rr->rr_col[c].rc_devidx = child_id;
 			rr->rr_col[c].rc_offset = child_offset;
-			rr->rr_col[c].rc_gdata = NULL;
 			rr->rr_col[c].rc_orig_data = NULL;
 			rr->rr_col[c].rc_error = 0;
 			rr->rr_col[c].rc_tried = 0;
--- a/sys/contrib/openzfs/cmd/ztest/ztest.c
+++ b/sys/contrib/openzfs/cmd/ztest/ztest.c
@ -2287,8 +2287,8 @@ ztest_get_done(zgd_t *zgd, int error)
 }

 static int
-ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
-    zio_t *zio)
+ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	ztest_ds_t *zd = arg;
 	objset_t *os = zd->zd_os;
--- a/sys/contrib/openzfs/config/kernel-bio_max_segs.m4
+++ b/sys/contrib/openzfs/config/kernel-bio_max_segs.m4
@ -0,0 +1,23 @@
+dnl #
+dnl # 5.12 API change removes BIO_MAX_PAGES in favor of bio_max_segs()
+dnl # which will handle the logic of setting the upper-bound to a
+dnl # BIO_MAX_PAGES, internally.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS], [
+	ZFS_LINUX_TEST_SRC([bio_max_segs], [
+		#include <linux/bio.h>
+	],[
+		bio_max_segs(1);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_BIO_MAX_SEGS], [
+	AC_MSG_CHECKING([whether bio_max_segs() exists])
+	ZFS_LINUX_TEST_RESULT([bio_max_segs], [
+		AC_MSG_RESULT(yes)
+
+		AC_DEFINE([HAVE_BIO_MAX_SEGS], 1, [bio_max_segs() is implemented])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
--- a/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
+++ b/sys/contrib/openzfs/config/kernel-generic_fillattr.m4
@ -0,0 +1,28 @@
+dnl #
+dnl # 5.12 API
+dnl #
+dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
+dnl # as the first arg, to support idmapped mounts.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
+	ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
+		#include <linux/fs.h>
+	],[
+		struct user_namespace *userns = NULL;
+		struct inode *in = NULL;
+		struct kstat *k = NULL;
+		generic_fillattr(userns, in, k);
+	])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [
+	AC_MSG_CHECKING([whether generic_fillattr requres struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
+		AC_MSG_RESULT([yes])
+		AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
+		    [generic_fillattr requires struct user_namespace*])
+	],[
+		AC_MSG_RESULT([no])
+	])
+])
+
--- a/sys/contrib/openzfs/config/kernel-inode-create.m4
+++ b/sys/contrib/openzfs/config/kernel-inode-create.m4
@ -1,7 +1,25 @@
-dnl #
-dnl # 3.6 API change
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_FLAGS], [
+AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([create_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int inode_create(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t umode, bool flag) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.create		= inode_create,
+		};
+	],[])
+
+	dnl #
+	dnl # 3.6 API change
+	dnl #
 	ZFS_LINUX_TEST_SRC([create_flags], [
 		#include <linux/fs.h>
 		#include <linux/sched.h>
@ -16,11 +34,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_FLAGS], [
 	],[])
 ])

-AC_DEFUN([ZFS_AC_KERNEL_CREATE_FLAGS], [
-	AC_MSG_CHECKING([whether iops->create() passes flags])
-	ZFS_LINUX_TEST_RESULT([create_flags], [
+AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
+	AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([create_userns], [
 		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
+		   [iops->create() takes struct user_namespace*])
 	],[
-		ZFS_LINUX_TEST_ERROR([iops->create()])
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether iops->create() passes flags])
+		ZFS_LINUX_TEST_RESULT([create_flags], [
+			AC_MSG_RESULT(yes)
+		],[
+			ZFS_LINUX_TEST_ERROR([iops->create()])
+		])
 	])
 ])
--- a/sys/contrib/openzfs/config/kernel-inode-getattr.m4
+++ b/sys/contrib/openzfs/config/kernel-inode-getattr.m4
@ -1,8 +1,29 @@
-dnl #
-dnl # Linux 4.11 API
-dnl # See torvalds/linux@a528d35
-dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
+	dnl #
+	dnl # Linux 5.12 API
+	dnl # The getattr I/O operations handler type was extended to require
+	dnl # a struct user_namespace* as its first arg, to support idmapped
+	dnl # mounts.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_getattr_userns], [
+		#include <linux/fs.h>
+
+		int test_getattr(
+			struct user_namespace *userns,
+		    const struct path *p, struct kstat *k,
+		    u32 request_mask, unsigned int query_flags)
+		    { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.getattr = test_getattr,
+		};
+	],[])
+
+	dnl #
+	dnl # Linux 4.11 API
+	dnl # See torvalds/linux@a528d35
+	dnl #
 	ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [
 		#include <linux/fs.h>

@ -33,21 +54,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
 ])

 AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
-	AC_MSG_CHECKING([whether iops->getattr() takes a path])
-	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
+	dnl #
+	dnl # Kernel 5.12 test
+	dnl #
+	AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
+	ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
-		    [iops->getattr() takes a path])
+		AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
+		    [iops->getattr() takes struct user_namespace*])
 	],[
 		AC_MSG_RESULT(no)

-		AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
-		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+		dnl #
+		dnl # Kernel 4.11 test
+		dnl #
+		AC_MSG_CHECKING([whether iops->getattr() takes a path])
+		ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
-			    [iops->getattr() takes a vfsmount])
+			AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
+				[iops->getattr() takes a path])
 		],[
 			AC_MSG_RESULT(no)
+
+			dnl #
+			dnl # Kernel < 4.11 test
+			dnl #
+			AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
+			ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
+				AC_MSG_RESULT(yes)
+				AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
+					[iops->getattr() takes a vfsmount])
+			],[
+				AC_MSG_RESULT(no)
+			])
 		])
 	])
 ])
--- a/sys/contrib/openzfs/config/kernel-is_owner_or_cap.m4
+++ b/sys/contrib/openzfs/config/kernel-is_owner_or_cap.m4
@ -11,13 +11,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
 		struct inode *ip = NULL;
 		(void) inode_owner_or_capable(ip);
 	])
+
+	ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [
+		#include <linux/fs.h>
+	],[
+		struct inode *ip = NULL;
+		(void) inode_owner_or_capable(&init_user_ns, ip);
+	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
 	AC_MSG_CHECKING([whether inode_owner_or_capable() exists])
 	ZFS_LINUX_TEST_RESULT([inode_owner_or_capable], [
 		AC_MSG_RESULT(yes)
-	],[
-		ZFS_LINUX_TEST_ERROR([capability])
+		AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE, 1,
+		    [inode_owner_or_capable() exists])
+	], [
+		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING(
+		    [whether inode_owner_or_capable() takes user_ns])
+		ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1,
+			    [inode_owner_or_capable() takes user_ns])
+		],[
+			ZFS_LINUX_TEST_ERROR([capability])
+		])
 	])
 ])
--- a/sys/contrib/openzfs/config/kernel-mkdir-umode-t.m4
+++ b/sys/contrib/openzfs/config/kernel-mkdir-umode-t.m4
@ -1,32 +0,0 @@
-dnl #
-dnl # 3.3 API change
-dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
-dnl # umode_t type rather than an int.  The expectation is that any backport
-dnl # would also change all three prototypes.  However, if it turns out that
-dnl # some distribution doesn't backport the whole thing this could be
-dnl # broken apart into three separate checks.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T], [
-	ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
-		#include <linux/fs.h>
-
-		int mkdir(struct inode *inode, struct dentry *dentry,
-		    umode_t umode) { return 0; }
-
-		static const struct inode_operations
-		    iops __attribute__ ((unused)) = {
-			.mkdir = mkdir,
-		};
-	],[])
-])
-
-AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [
-	AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t])
-	ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
-		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
-		    [iops->create()/mkdir()/mknod() take umode_t])
-	],[
-		ZFS_LINUX_TEST_ERROR([mkdir()])
-	])
-])
--- a/sys/contrib/openzfs/config/kernel-mkdir.m4
+++ b/sys/contrib/openzfs/config/kernel-mkdir.m4
@ -0,0 +1,65 @@
+dnl #
+dnl # Supported mkdir() interfaces checked newest to oldest.
+dnl #
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
+	dnl #
+	dnl # 5.12 API change
+	dnl # The struct user_namespace arg was added as the first argument to
+	dnl # mkdir()
+	dnl #
+	ZFS_LINUX_TEST_SRC([mkdir_user_namespace], [
+		#include <linux/fs.h>
+
+		int mkdir(struct user_namespace *userns,
+			struct inode *inode, struct dentry *dentry,
+		    umode_t umode) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+
+	dnl #
+	dnl # 3.3 API change
+	dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
+	dnl # umode_t type rather than an int.  The expectation is that any backport
+	dnl # would also change all three prototypes.  However, if it turns out that
+	dnl # some distribution doesn't backport the whole thing this could be
+	dnl # broken apart into three separate checks.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
+		#include <linux/fs.h>
+
+		int mkdir(struct inode *inode, struct dentry *dentry,
+		    umode_t umode) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.mkdir = mkdir,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
+	dnl #
+	dnl # 5.12 API change
+	dnl # The struct user_namespace arg was added as the first argument to
+	dnl # mkdir() of the iops structure.
+	dnl #
+	AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
+		    [iops->mkdir() takes struct user_namespace*])
+	],[
+		AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
+		ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
+			    [iops->mkdir() takes umode_t])
+		],[
+			ZFS_LINUX_TEST_ERROR([mkdir()])
+		])
+	])
+])
--- a/sys/contrib/openzfs/config/kernel-mknod.m4
+++ b/sys/contrib/openzfs/config/kernel-mknod.m4
@ -0,0 +1,30 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([mknod_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_mknod(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    umode_t u, dev_t d) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.mknod		= tmp_mknod,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
+	AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([mknod_userns], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
+		    [iops->mknod() takes struct user_namespace*])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
--- a/sys/contrib/openzfs/config/kernel-rename.m4
+++ b/sys/contrib/openzfs/config/kernel-rename.m4
@ -1,10 +1,10 @@
-dnl #
-dnl # 4.9 API change,
-dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
-dnl # flags.
-dnl #
-AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [
-	ZFS_LINUX_TEST_SRC([inode_operations_rename], [
+AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
+	dnl #
+	dnl # 4.9 API change,
+	dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
+	dnl # flags.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [
 		#include <linux/fs.h>
 		int rename_fn(struct inode *sip, struct dentry *sdp,
 			struct inode *tip, struct dentry *tdp,
@ -15,15 +15,41 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [
 			.rename = rename_fn,
 		};
 	],[])
+
+	dnl #
+	dnl # 5.12 API change,
+	dnl #
+	dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument
+	dnl # of the rename() and other inode_operations members.
+	dnl #
+	ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [
+		#include <linux/fs.h>
+		int rename_fn(struct user_namespace *user_ns, struct inode *sip,
+			struct dentry *sdp, struct inode *tip, struct dentry *tdp,
+			unsigned int flags) { return 0; }
+
+		static const struct inode_operations
+		    iops __attribute__ ((unused)) = {
+			.rename = rename_fn,
+		};
+	],[])
 ])

-AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [
-	AC_MSG_CHECKING([whether iops->rename() wants flags])
-	ZFS_LINUX_TEST_RESULT([inode_operations_rename], [
+AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
+	AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
-		    [iops->rename() wants flags])
+		AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
+		    [iops->rename() takes struct user_namespace*])
 	],[
 		AC_MSG_RESULT(no)
+
+		ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
+				[iops->rename() wants flags])
+		],[
+			AC_MSG_RESULT(no)
+		])
 	])
 ])
--- a/sys/contrib/openzfs/config/kernel-setattr-prepare.m4
+++ b/sys/contrib/openzfs/config/kernel-setattr-prepare.m4
@ -1,27 +1,52 @@
-dnl #
-dnl # 4.9 API change
-dnl # The inode_change_ok() function has been renamed setattr_prepare()
-dnl # and updated to take a dentry rather than an inode.
-dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
+	dnl #
+	dnl # 4.9 API change
+	dnl # The inode_change_ok() function has been renamed setattr_prepare()
+	dnl # and updated to take a dentry rather than an inode.
+	dnl #
 	ZFS_LINUX_TEST_SRC([setattr_prepare], [
 		#include <linux/fs.h>
 	], [
 		struct dentry *dentry = NULL;
 		struct iattr *attr = NULL;
 		int error __attribute__ ((unused)) =
-		    setattr_prepare(dentry, attr);
+			setattr_prepare(dentry, attr);
+	])
+
+	dnl #
+	dnl # 5.12 API change
+	dnl # The setattr_prepare() function has been changed to accept a new argument
+	dnl # for struct user_namespace*
+	dnl #
+	ZFS_LINUX_TEST_SRC([setattr_prepare_userns], [
+		#include <linux/fs.h>
+	], [
+		struct dentry *dentry = NULL;
+		struct iattr *attr = NULL;
+		struct user_namespace *userns = NULL;
+		int error __attribute__ ((unused)) =
+			setattr_prepare(userns, dentry, attr);
 	])
 ])

 AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
-	AC_MSG_CHECKING([whether setattr_prepare() is available])
-	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+	AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
 	    [setattr_prepare], [fs/attr.c], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_SETATTR_PREPARE, 1,
-		    [setattr_prepare() is available])
+		AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
+		    [setattr_prepare() accepts user_namespace])
 	], [
 		AC_MSG_RESULT(no)
+
+		AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
+		ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
+			[setattr_prepare], [fs/attr.c], [
+			AC_MSG_RESULT(yes)
+			AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
+				[setattr_prepare() is available, doesn't accept user_namespace])
+		], [
+			AC_MSG_RESULT(no)
+		])
 	])
 ])
--- a/sys/contrib/openzfs/config/kernel-symlink.m4
+++ b/sys/contrib/openzfs/config/kernel-symlink.m4
@ -0,0 +1,30 @@
+AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
+	dnl #
+	dnl # 5.12 API change that added the struct user_namespace* arg
+	dnl # to the front of this function type's arg list.
+	dnl #
+	ZFS_LINUX_TEST_SRC([symlink_userns], [
+		#include <linux/fs.h>
+		#include <linux/sched.h>
+
+		int tmp_symlink(struct user_namespace *userns,
+		    struct inode *inode ,struct dentry *dentry,
+		    const char *path) { return 0; }
+
+		static const struct inode_operations
+			iops __attribute__ ((unused)) = {
+			.symlink		= tmp_symlink,
+		};
+	],[])
+])
+
+AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
+	AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
+	ZFS_LINUX_TEST_RESULT([symlink_userns], [
+		AC_MSG_RESULT(yes)
+		AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
+		    [iops->symlink() takes struct user_namespace*])
+	],[
+		AC_MSG_RESULT(no)
+	])
+])
--- a/sys/contrib/openzfs/config/kernel-xattr-handler.m4
+++ b/sys/contrib/openzfs/config/kernel-xattr-handler.m4
@ -152,6 +152,21 @@ dnl #
 dnl # Supported xattr handler set() interfaces checked newest to oldest.
 dnl #
 AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
+	ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
+		#include <linux/xattr.h>
+
+		int set(const struct xattr_handler *handler,
+			struct user_namespace *mnt_userns,
+			struct dentry *dentry, struct inode *inode,
+			const char *name, const void *buffer,
+			size_t size, int flags)
+			{ return 0; }
+		static const struct xattr_handler
+			xops __attribute__ ((unused)) = {
+			.set = set,
+		};
+	],[])
+
 	ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [
 		#include <linux/xattr.h>

@ -194,45 +209,58 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [

 AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
 	dnl #
-	dnl # 4.7 API change,
-	dnl # The xattr_handler->set() callback was changed to take both
-	dnl # dentry and inode.
+	dnl # 5.12 API change,
+	dnl # The xattr_handler->set() callback was changed to 8 arguments, and
+	dnl # struct user_namespace* was inserted as arg #2
 	dnl #
-	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
-	ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
+	AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
+	ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
 		AC_MSG_RESULT(yes)
-		AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
-		    [xattr_handler->set() wants both dentry and inode])
+		AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
+		    [xattr_handler->set() takes user_namespace])
 	],[
 		dnl #
-		dnl # 4.4 API change,
-		dnl # The xattr_handler->set() callback was changed to take a
-		dnl # xattr_handler, and handler_flags argument was removed and
-		dnl # should be accessed by handler->flags.
+		dnl # 4.7 API change,
+		dnl # The xattr_handler->set() callback was changed to take both
+		dnl # dentry and inode.
 		dnl #
 		AC_MSG_RESULT(no)
-		AC_MSG_CHECKING(
-		    [whether xattr_handler->set() wants xattr_handler])
-		ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
+		AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
+		ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
 			AC_MSG_RESULT(yes)
-			AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
-			    [xattr_handler->set() wants xattr_handler])
+			AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
+			    [xattr_handler->set() wants both dentry and inode])
 		],[
 			dnl #
-			dnl # 2.6.33 API change,
-			dnl # The xattr_handler->set() callback was changed
-			dnl # to take a dentry instead of an inode, and a
-			dnl # handler_flags argument was added.
+			dnl # 4.4 API change,
+			dnl # The xattr_handler->set() callback was changed to take a
+			dnl # xattr_handler, and handler_flags argument was removed and
+			dnl # should be accessed by handler->flags.
 			dnl #
 			AC_MSG_RESULT(no)
 			AC_MSG_CHECKING(
-			    [whether xattr_handler->set() wants dentry])
-			ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+			    [whether xattr_handler->set() wants xattr_handler])
+			ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
 				AC_MSG_RESULT(yes)
-				AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
-				    [xattr_handler->set() wants dentry])
+				AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
+				    [xattr_handler->set() wants xattr_handler])
 			],[
-				ZFS_LINUX_TEST_ERROR([xattr set()])
+				dnl #
+				dnl # 2.6.33 API change,
+				dnl # The xattr_handler->set() callback was changed
+				dnl # to take a dentry instead of an inode, and a
+				dnl # handler_flags argument was added.
+				dnl #
+				AC_MSG_RESULT(no)
+				AC_MSG_CHECKING(
+				    [whether xattr_handler->set() wants dentry])
+				ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
+					AC_MSG_RESULT(yes)
+					AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
+					    [xattr_handler->set() wants dentry])
+				],[
+					ZFS_LINUX_TEST_ERROR([xattr set()])
+				])
 			])
 		])
 	])
--- a/sys/contrib/openzfs/config/kernel.m4
+++ b/sys/contrib/openzfs/config/kernel.m4
@ -79,9 +79,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_EVICT_INODE
 	ZFS_AC_KERNEL_SRC_DIRTY_INODE
 	ZFS_AC_KERNEL_SRC_SHRINKER
-	ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T
+	ZFS_AC_KERNEL_SRC_MKDIR
 	ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS
-	ZFS_AC_KERNEL_SRC_CREATE_FLAGS
+	ZFS_AC_KERNEL_SRC_CREATE
 	ZFS_AC_KERNEL_SRC_GET_LINK
 	ZFS_AC_KERNEL_SRC_PUT_LINK
 	ZFS_AC_KERNEL_SRC_TMPFILE
@ -115,7 +115,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_KUIDGID_T
 	ZFS_AC_KERNEL_SRC_KUID_HELPERS
 	ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST
-	ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS
+	ZFS_AC_KERNEL_SRC_RENAME
 	ZFS_AC_KERNEL_SRC_CURRENT_TIME
 	ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL
@ -125,6 +125,10 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
 	ZFS_AC_KERNEL_SRC_KSTRTOUL
 	ZFS_AC_KERNEL_SRC_PERCPU
 	ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
+	ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS
+	ZFS_AC_KERNEL_SRC_MKNOD
+	ZFS_AC_KERNEL_SRC_SYMLINK
+	ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS

 	AC_MSG_CHECKING([for available kernel interfaces])
 	ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@ -177,9 +181,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_EVICT_INODE
 	ZFS_AC_KERNEL_DIRTY_INODE
 	ZFS_AC_KERNEL_SHRINKER
-	ZFS_AC_KERNEL_MKDIR_UMODE_T
+	ZFS_AC_KERNEL_MKDIR
 	ZFS_AC_KERNEL_LOOKUP_FLAGS
-	ZFS_AC_KERNEL_CREATE_FLAGS
+	ZFS_AC_KERNEL_CREATE
 	ZFS_AC_KERNEL_GET_LINK
 	ZFS_AC_KERNEL_PUT_LINK
 	ZFS_AC_KERNEL_TMPFILE
@ -213,7 +217,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_KUIDGID_T
 	ZFS_AC_KERNEL_KUID_HELPERS
 	ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST
-	ZFS_AC_KERNEL_RENAME_WANTS_FLAGS
+	ZFS_AC_KERNEL_RENAME
 	ZFS_AC_KERNEL_CURRENT_TIME
 	ZFS_AC_KERNEL_USERNS_CAPABILITIES
 	ZFS_AC_KERNEL_IN_COMPAT_SYSCALL
@ -223,6 +227,10 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
 	ZFS_AC_KERNEL_KSTRTOUL
 	ZFS_AC_KERNEL_PERCPU
 	ZFS_AC_KERNEL_CPU_HOTPLUG
+	ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS
+	ZFS_AC_KERNEL_MKNOD
+	ZFS_AC_KERNEL_SYMLINK
+	ZFS_AC_KERNEL_BIO_MAX_SEGS
 ])

 dnl #
--- a/sys/contrib/openzfs/configure.ac
+++ b/sys/contrib/openzfs/configure.ac
@ -240,6 +240,7 @@ AC_CONFIG_FILES([
 	tests/zfs-tests/tests/Makefile
 	tests/zfs-tests/tests/functional/Makefile
 	tests/zfs-tests/tests/functional/acl/Makefile
+	tests/zfs-tests/tests/functional/acl/off/Makefile
 	tests/zfs-tests/tests/functional/acl/posix/Makefile
 	tests/zfs-tests/tests/functional/acl/posix-sa/Makefile
 	tests/zfs-tests/tests/functional/alloc_class/Makefile
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/kmap_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/kmap_compat.h
@ -30,8 +30,8 @@
 #include <linux/uaccess.h>

 /* 2.6.37 API change */
-#define	zfs_kmap_atomic(page, km_type)		kmap_atomic(page)
-#define	zfs_kunmap_atomic(addr, km_type)	kunmap_atomic(addr)
+#define	zfs_kmap_atomic(page)	kmap_atomic(page)
+#define	zfs_kunmap_atomic(addr)	kunmap_atomic(addr)

 /* 5.0 API change - no more 'type' argument for access_ok() */
 #ifdef HAVE_ACCESS_OK_TYPE
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/vfs_compat.h
@ -343,7 +343,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
 /*
 * 4.9 API change
 */
-#ifndef HAVE_SETATTR_PREPARE
+#if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
+    defined(HAVE_SETATTR_PREPARE_USERNS))
 static inline int
 setattr_prepare(struct dentry *dentry, struct iattr *ia)
 {
@ -389,6 +390,15 @@ func(const struct path *path, struct kstat *stat, u32 request_mask,	\
 {									\
 	return (func##_impl(path, stat, request_mask, query_flags));	\
 }
+#elif defined(HAVE_USERNS_IOPS_GETATTR)
+#define	ZPL_GETATTR_WRAPPER(func)					\
+static int								\
+func(struct user_namespace *user_ns, const struct path *path,	\
+    struct kstat *stat, u32 request_mask, unsigned int query_flags)	\
+{									\
+	return (func##_impl(user_ns, path, stat, request_mask, \
+	    query_flags));	\
+}
 #else
 #error
 #endif
@ -436,4 +446,16 @@ zpl_is_32bit_api(void)
 #endif
 }

+/*
+ * 5.12 API change
+ * To support id-mapped mounts, generic_fillattr() was modified to
+ * accept a new struct user_namespace* as its first arg.
+ */
+#ifdef HAVE_GENERIC_FILLATTR_USERNS
+#define	zpl_generic_fillattr(user_ns, ip, sp)	\
+    generic_fillattr(user_ns, ip, sp)
+#else
+#define	zpl_generic_fillattr(user_ns, ip, sp)	generic_fillattr(ip, sp)
+#endif
+
 #endif /* _ZFS_VFS_H */
--- a/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h
+++ b/sys/contrib/openzfs/include/os/linux/kernel/linux/xattr_compat.h
@ -119,12 +119,27 @@ fn(struct dentry *dentry, const char *name, void *buffer, size_t size,	\
 #error "Unsupported kernel"
 #endif

+/*
+ * 5.12 API change,
+ * The xattr_handler->set() callback was changed to take the
+ * struct user_namespace* as the first arg, to support idmapped
+ * mounts.
+ */
+#if defined(HAVE_XATTR_SET_USERNS)
+#define	ZPL_XATTR_SET_WRAPPER(fn)					\
+static int								\
+fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
+    struct dentry *dentry, struct inode *inode, const char *name,	\
+    const void *buffer, size_t size, int flags)	\
+{									\
+	return (__ ## fn(inode, name, buffer, size, flags));		\
+}
 /*
 * 4.7 API change,
 * The xattr_handler->set() callback was changed to take a both dentry and
 * inode, because the dentry might not be attached to an inode yet.
 */
-#if defined(HAVE_XATTR_SET_DENTRY_INODE)
+#elif defined(HAVE_XATTR_SET_DENTRY_INODE)
 #define	ZPL_XATTR_SET_WRAPPER(fn)					\
 static int								\
 fn(const struct xattr_handler *handler, struct dentry *dentry,		\
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_vnops_os.h
@ -54,7 +54,8 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
 extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
    cred_t *cr, int flags);
 extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
-extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
+extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
+	struct kstat *sp);
 extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
 extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
    char *tnm, cred_t *cr, int flags);
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h
@ -73,7 +73,13 @@ extern "C" {
 #define	zn_has_cached_data(zp)		((zp)->z_is_mapped)
 #define	zn_rlimit_fsize(zp, uio)	(0)

-#define	zhold(zp)	igrab(ZTOI((zp)))
+/*
+ * zhold() wraps igrab() on Linux, and igrab() may fail when the
+ * inode is in the process of being deleted.  As zhold() must only be
+ * called when a ref already exists - so the inode cannot be
+ * mid-deletion - we VERIFY() this.
+ */
+#define	zhold(zp)	VERIFY3P(igrab(ZTOI((zp))), !=, NULL)
 #define	zrele(zp)	iput(ZTOI((zp)))

 /* Called on entry to each ZFS inode and vfs operation. */
--- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
+++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zpl.h
@ -171,4 +171,22 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
 	timespec_trunc(ts, (ip)->i_sb->s_time_gran)
 #endif

+#if defined(HAVE_INODE_OWNER_OR_CAPABLE)
+#define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ip)
+#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
+#define	zpl_inode_owner_or_capable(ns, ip)	inode_owner_or_capable(ns, ip)
+#else
+#error "Unsupported kernel"
+#endif
+
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+#define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(ns, dentry, ia)
+#else
+/*
+ * Use kernel-provided version, or our own from
+ * linux/vfs_compat.h
+ */
+#define	zpl_setattr_prepare(ns, dentry, ia)	setattr_prepare(dentry, ia)
+#endif
+
 #endif	/* _SYS_ZPL_H */
--- a/sys/contrib/openzfs/include/sys/dmu_zfetch.h
+++ b/sys/contrib/openzfs/include/sys/dmu_zfetch.h
@ -49,20 +49,26 @@ typedef struct zfetch {

 typedef struct zstream {
 	uint64_t	zs_blkid;	/* expect next access at this blkid */
-	uint64_t	zs_pf_blkid;	/* next block to prefetch */
+	uint64_t	zs_pf_blkid1;	/* first block to prefetch */
+	uint64_t	zs_pf_blkid;	/* block to prefetch up to */

 	/*
 	 * We will next prefetch the L1 indirect block of this level-0
 	 * block id.
 	 */
-	uint64_t	zs_ipf_blkid;
+	uint64_t	zs_ipf_blkid1;	/* first block to prefetch */
+	uint64_t	zs_ipf_blkid;	/* block to prefetch up to */

-	kmutex_t	zs_lock;	/* protects stream */
-	hrtime_t	zs_atime;	/* time last prefetch issued */
-	hrtime_t	zs_start_time;	/* start of last prefetch */
 	list_node_t	zs_node;	/* link for zf_stream */
+	hrtime_t	zs_atime;	/* time last prefetch issued */
 	zfetch_t	*zs_fetch;	/* parent fetch */
-	zfs_refcount_t	zs_blocks; /* number of pending blocks in the stream */
+	boolean_t	zs_missed;	/* stream saw cache misses */
+	zfs_refcount_t	zs_callers;	/* number of pending callers */
+	/*
+	 * Number of stream references: dnode, callers and pending blocks.
+	 * The stream memory is freed when the number returns to zero.
+	 */
+	zfs_refcount_t	zs_refs;
 } zstream_t;

 void		zfetch_init(void);
@ -70,7 +76,10 @@ void		zfetch_fini(void);

 void		dmu_zfetch_init(zfetch_t *, struct dnode *);
 void		dmu_zfetch_fini(zfetch_t *);
-void		dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t,
+zstream_t	*dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
+    boolean_t);
+void		dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
+void		dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
    boolean_t);


--- a/sys/contrib/openzfs/include/sys/vdev_raidz.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz.h
@ -50,6 +50,8 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
 void vdev_raidz_child_done(zio_t *);
 void vdev_raidz_io_done(zio_t *);

+extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
+
 /*
 * vdev_raidz_math interface
 */
--- a/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
+++ b/sys/contrib/openzfs/include/sys/vdev_raidz_impl.h
@ -108,8 +108,7 @@ typedef struct raidz_col {
 	uint64_t rc_size;		/* I/O size */
 	abd_t rc_abdstruct;		/* rc_abd probably points here */
 	abd_t *rc_abd;			/* I/O data */
-	void *rc_orig_data;		/* pre-reconstruction */
-	abd_t *rc_gdata;		/* used to store the "good" version */
+	abd_t *rc_orig_data;		/* pre-reconstruction */
 	int rc_error;			/* I/O error for this device */
 	uint8_t rc_tried;		/* Did we attempt this I/O column? */
 	uint8_t rc_skipped;		/* Did we skip this I/O column? */
@ -124,10 +123,8 @@ typedef struct raidz_row {
 	uint64_t rr_missingdata;	/* Count of missing data devices */
 	uint64_t rr_missingparity;	/* Count of missing parity devices */
 	uint64_t rr_firstdatacol;	/* First data column/parity count */
-	abd_t *rr_abd_copy;		/* rm_asize-buffer of copied data */
 	abd_t *rr_abd_empty;		/* dRAID empty sector buffer */
 	int rr_nempty;			/* empty sectors included in parity */
-	int rr_code;			/* reconstruction code (unused) */
 #ifdef ZFS_DEBUG
 	uint64_t rr_offset;		/* Logical offset for *_io_verify() */
 	uint64_t rr_size;		/* Physical size for *_io_verify() */
@ -136,8 +133,6 @@ typedef struct raidz_row {
 } raidz_row_t;

 typedef struct raidz_map {
-	uintptr_t rm_reports;		/* # of referencing checksum reports */
-	boolean_t rm_freed;		/* map no longer has referencing ZIO */
 	boolean_t rm_ecksuminjected;	/* checksum error was injected */
 	int rm_nrows;			/* Regular row count */
 	int rm_nskip;			/* RAIDZ sectors skipped for padding */
--- a/sys/contrib/openzfs/include/sys/zil.h
+++ b/sys/contrib/openzfs/include/sys/zil.h
@ -399,6 +399,7 @@ typedef struct itx {
 	void		*itx_callback_data; /* User data for the callback */
 	size_t		itx_size;	/* allocated itx structure size */
 	uint64_t	itx_oid;	/* object id */
+	uint64_t	itx_gen;	/* gen number for zfs_get_data */
 	lr_t		itx_lr;		/* common part of log record */
 	/* followed by type-specific part of lr_xx_t and its immediate data */
 } itx_t;
@ -467,7 +468,7 @@ typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
 typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
    uint64_t txg);
 typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap);
-typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf,
+typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf,
    struct lwb *lwb, zio_t *zio);

 extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,
--- a/sys/contrib/openzfs/include/sys/zio.h
+++ b/sys/contrib/openzfs/include/sys/zio.h
@ -382,14 +382,8 @@ struct zio_cksum_report {
 	struct zio_bad_cksum	*zcr_ckinfo;	/* information from failure */
 };

-typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr,
-    void *arg);
-
-zio_vsd_cksum_report_f	zio_vsd_default_cksum_report;
-
 typedef struct zio_vsd_ops {
 	zio_done_func_t		*vsd_free;
-	zio_vsd_cksum_report_f	*vsd_cksum_report;
 } zio_vsd_ops_t;

 typedef struct zio_gang_node {
@ -683,7 +677,7 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
 */
 extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
    const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
-    uint64_t length, void *arg, struct zio_bad_cksum *info);
+    uint64_t length, struct zio_bad_cksum *info);
 extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
    const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);

@ -695,6 +689,8 @@ extern int zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
    uint64_t length, const abd_t *good_data, const abd_t *bad_data,
    struct zio_bad_cksum *info);

+void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr);
+
 /* Called from spa_sync(), but primarily an injection handler */
 extern void spa_handle_ignored_writes(spa_t *spa);

--- a/sys/contrib/openzfs/include/sys/zvol_impl.h
+++ b/sys/contrib/openzfs/include/sys/zvol_impl.h
@ -85,8 +85,8 @@ void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
    uint64_t len, boolean_t sync);
 void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
    uint64_t size, int sync);
-int zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
-    zio_t *zio);
+int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio);
 int zvol_init_impl(void);
 void zvol_fini_impl(void);
 void zvol_wait_close(zvol_state_t *zv);
--- a/sys/contrib/openzfs/man/man5/zfs-module-parameters.5
+++ b/sys/contrib/openzfs/man/man5/zfs-module-parameters.5
@ -1,6 +1,6 @@
 '\" te
 .\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
-.\" Copyright (c) 2019, 2020 by Delphix. All rights reserved.
+.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
 .\" Copyright (c) 2019 Datto Inc.
 .\" The contents of this file are subject to the terms of the Common Development
 .\" and Distribution License (the "License").  You may not use this file except
@ -691,6 +691,29 @@ will occur.
 Default value: \fB600000\fR (ten minutes).
 .RE

+.sp
+.ne 2
+.na
+\fBreference_history\fR (int)
+.ad
+.RS 12n
+Maximum reference holders being tracked when reference_tracking_enable is
+active.
+.sp
+Default value: \fB3\fR.
+.RE
+
+.sp
+.ne 2
+.na
+\fBreference_tracking_enable\fR (int)
+.ad
+.RS 12n
+Track reference holders to refcount_t objects (debug builds only).
+.sp
+Use \fB1\fR for yes and \fB0\fR for no (default).
+.RE
+
 .sp
 .ne 2
 .na
--- a/sys/contrib/openzfs/man/man8/zfs-allow.8
+++ b/sys/contrib/openzfs/man/man8/zfs-allow.8
@ -205,6 +205,7 @@ diff             subcommand     Allows lookup of paths within a dataset
                                given an object number, and the ability
                                to create snapshots necessary to
                                'zfs diff'.
+hold             subcommand     Allows adding a user hold to a snapshot
 load-key         subcommand     Allows loading and unloading of encryption key
                                (see 'zfs load-key' and 'zfs unload-key').
 change-key       subcommand     Allows changing an encryption key via
@ -214,6 +215,8 @@ promote          subcommand     Must also have the 'mount' and 'promote'
                                ability in the origin file system
 receive          subcommand     Must also have the 'mount' and 'create'
                                ability
+release          subcommand     Allows releasing a user hold which might
+                                destroy the snapshot
 rename           subcommand     Must also have the 'mount' and 'create'
                                ability in the new parent
 rollback         subcommand     Must also have the 'mount' ability
--- a/sys/contrib/openzfs/man/man8/zgenhostid.8
+++ b/sys/contrib/openzfs/man/man8/zgenhostid.8
@ -21,7 +21,7 @@
 .\"
 .\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
 .\"
-.Dd September 13, 2020
+.Dd March 18, 2021
 .Dt ZGENHOSTID 8 SMM
 .Os
 .Sh NAME
@ -50,7 +50,7 @@ Force file overwrite.
 Write to
 .Pa filename
 instead of default
-.Pa /etc/hostd
+.Pa /etc/hostid
 .It Ar hostid
 Specifies the value to be placed in
 .Pa /etc/hostid .
--- a/sys/contrib/openzfs/man/man8/zpoolconcepts.8
+++ b/sys/contrib/openzfs/man/man8/zpoolconcepts.8
@ -295,6 +295,23 @@ identified by a unique identifier instead of its path since the path was never
 correct in the first place.
 .El
 .Pp
+Checksum errors represent events where a disk returned data that was expected
+to be correct, but was not.
+In other words, these are instances of silent data corruption.
+The checksum errors are reported in
+.Nm zpool Cm status
+and
+.Nm zpool Cm events .
+When a block is stored redundantly, a damaged block may be reconstructed
+(e.g. from RAIDZ parity or a mirrored copy).
+In this case, ZFS reports the checksum error against the disks that contained
+damaged data.
+If a block is unable to be reconstructed (e.g. due to 3 disks being damaged
+in a RAIDZ2 group), it is not possible to determine which disks were silently
+corrupted.
+In this case, checksum errors are reported for all disks on which the block
+is stored.
+.Pp
 If a device is removed and later re-attached to the system, ZFS attempts
 to put the device online automatically.
 Device attach detection is hardware-dependent and might not be supported on all
--- a/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c
+++ b/sys/contrib/openzfs/module/os/freebsd/spl/spl_kstat.c
@ -299,15 +299,10 @@ __kstat_create(const char *module, int instance, const char *name,
 		panic("Undefined kstat type %d\n", ksp->ks_type);
 	}

-	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
+	if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL)
 		ksp->ks_data = NULL;
-	} else {
+	else
 		ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
-		if (ksp->ks_data == NULL) {
-			kmem_free(ksp, sizeof (*ksp));
-			ksp = NULL;
-		}
-	}

 	/*
 	 * Some kstats use a module name like "zfs/poolname" to distinguish a
@ -509,6 +504,8 @@ kstat_delete(kstat_t *ksp)
 	sysctl_ctx_free(&ksp->ks_sysctl_ctx);
 	ksp->ks_lock = NULL;
 	mutex_destroy(&ksp->ks_private_lock);
+	if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
+		kmem_free(ksp->ks_data, ksp->ks_data_size);
 	free(ksp, M_KSTAT);
 }

--- a/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
+++ b/sys/contrib/openzfs/module/os/freebsd/zfs/sysctl_os.c
@ -407,12 +407,6 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
    &metaslab_preload_limit, 0,
    "Max number of metaslabs per group to preload");

-/* refcount.c */
-extern int reference_tracking_enable;
-SYSCTL_INT(_vfs_zfs, OID_AUTO, reference_tracking_enable, CTLFLAG_RDTUN,
-    &reference_tracking_enable, 0,
-    "Track reference holders to refcount_t objects, used mostly by ZFS");
-
 /* spa.c */
 extern int zfs_ccw_retry_interval;
 SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,
--- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c
@ -490,8 +490,8 @@ abd_alloc_zero_scatter(void)
 #define	PAGE_SHIFT (highbit64(PAGESIZE)-1)
 #endif

-#define	zfs_kmap_atomic(chunk, km)	((void *)chunk)
-#define	zfs_kunmap_atomic(addr, km)	do { (void)(addr); } while (0)
+#define	zfs_kmap_atomic(chunk)		((void *)chunk)
+#define	zfs_kunmap_atomic(addr)		do { (void)(addr); } while (0)
 #define	local_irq_save(flags)		do { (void)(flags); } while (0)
 #define	local_irq_restore(flags)	do { (void)(flags); } while (0)
 #define	nth_page(pg, i) \
@ -879,8 +879,7 @@ abd_iter_map(struct abd_iter *aiter)
 		aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
 		    aiter->iter_abd->abd_size - aiter->iter_pos);

-		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
-		    km_table[aiter->iter_km]);
+		paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg));
 	}

 	aiter->iter_mapaddr = (char *)paddr + offset;
@ -899,8 +898,7 @@ abd_iter_unmap(struct abd_iter *aiter)

 	if (!abd_is_linear(aiter->iter_abd)) {
 		/* LINTED E_FUNC_SET_NOT_USED */
-		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
-		    km_table[aiter->iter_km]);
+		zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset);
 	}

 	ASSERT3P(aiter->iter_mapaddr, !=, NULL);
--- a/sys/contrib/openzfs/module/os/linux/zfs/policy.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/policy.c
@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
 	if (crgetfsuid(cr) == owner)
 		return (0);

-	if (inode_owner_or_capable(ip))
+	if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
 		return (0);

 #if defined(CONFIG_USER_NS)
--- a/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/vdev_disk.c
@ -589,9 +589,14 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
 		}

 		/* bio_alloc() with __GFP_WAIT never returns NULL */
+#ifdef HAVE_BIO_MAX_SEGS
+		dr->dr_bio[i] = bio_alloc(GFP_NOIO, bio_max_segs(
+		    abd_nr_pages_off(zio->io_abd, bio_size, abd_offset)));
+#else
 		dr->dr_bio[i] = bio_alloc(GFP_NOIO,
 		    MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
 		    BIO_MAX_PAGES));
+#endif
 		if (unlikely(dr->dr_bio[i] == NULL)) {
 			vdev_disk_dio_free(dr);
 			return (SET_ERROR(ENOMEM));
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c
@ -590,7 +590,8 @@ struct inode *
 zfsctl_root(znode_t *zp)
 {
 	ASSERT(zfs_has_ctldir(zp));
-	igrab(ZTOZSB(zp)->z_ctldir);
+	/* Must have an existing ref, so igrab() cannot return NULL */
+	VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
 	return (ZTOZSB(zp)->z_ctldir);
 }

--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_uio.c
@ -136,12 +136,12 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
 		void *paddr;
 		cnt = MIN(bv->bv_len - skip, n);

-		paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
+		paddr = zfs_kmap_atomic(bv->bv_page);
 		if (rw == UIO_READ)
 			bcopy(p, paddr + bv->bv_offset + skip, cnt);
 		else
 			bcopy(paddr + bv->bv_offset + skip, p, cnt);
-		zfs_kunmap_atomic(paddr, KM_USER1);
+		zfs_kunmap_atomic(paddr);

 		skip += cnt;
 		if (skip == bv->bv_len) {
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c
@ -1734,7 +1734,11 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
 			VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
 			    0, kcred, NULL, NULL) == 0);
 		} else {
-			igrab(*ipp);
+			/*
+			 * Must have an existing ref, so igrab()
+			 * cannot return NULL
+			 */
+			VERIFY3P(igrab(*ipp), !=, NULL);
 		}
 		ZFS_EXIT(zfsvfs);
 		return (0);
--- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c
@ -1656,7 +1656,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
 */
 /* ARGSUSED */
 int
-zfs_getattr_fast(struct inode *ip, struct kstat *sp)
+zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
+    struct kstat *sp)
 {
 	znode_t *zp = ITOZ(ip);
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -1668,7 +1669,7 @@ zfs_getattr_fast(struct inode *ip, struct kstat *sp)

 	mutex_enter(&zp->z_lock);

-	generic_fillattr(ip, sp);
+	zpl_generic_fillattr(user_ns, ip, sp);
 	/*
 	 * +1 link count for root inode with visible '.zfs' directory.
 	 */
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_ctldir.c
@ -101,12 +101,22 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
 */
 /* ARGSUSED */
 static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_root_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
 zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
    u32 request_mask, unsigned int query_flags)
+#endif
 {
 	struct inode *ip = path->dentry->d_inode;

+#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+	generic_fillattr(user_ns, ip, stat);
+#else
 	generic_fillattr(ip, stat);
+#endif
 	stat->atime = current_time(ip);

 	return (0);
@ -290,8 +300,14 @@ zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
 #endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */

 static int
+#ifdef HAVE_IOPS_RENAME_USERNS
+zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
+#else
 zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+#endif
 {
 	cred_t *cr = CRED();
 	int error;
@ -309,7 +325,7 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
 	return (error);
 }

-#ifndef HAVE_RENAME_WANTS_FLAGS
+#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
 static int
 zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry)
@ -333,7 +349,12 @@ zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
 }

 static int
+#ifdef HAVE_IOPS_MKDIR_USERNS
+zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
+    struct dentry *dentry, umode_t mode)
+#else
 zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
+#endif
 {
 	cred_t *cr = CRED();
 	vattr_t *vap;
@ -363,14 +384,24 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
 */
 /* ARGSUSED */
 static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
 zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
    u32 request_mask, unsigned int query_flags)
+#endif
 {
 	struct inode *ip = path->dentry->d_inode;
 	zfsvfs_t *zfsvfs = ITOZSB(ip);

 	ZPL_ENTER(zfsvfs);
+#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+	generic_fillattr(user_ns, ip, stat);
+#else
 	generic_fillattr(ip, stat);
+#endif

 	stat->nlink = stat->size = 2;
 	stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
@ -408,7 +439,7 @@ const struct file_operations zpl_fops_snapdir = {
 const struct inode_operations zpl_ops_snapdir = {
 	.lookup		= zpl_snapdir_lookup,
 	.getattr	= zpl_snapdir_getattr,
-#ifdef HAVE_RENAME_WANTS_FLAGS
+#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
 	.rename		= zpl_snapdir_rename2,
 #else
 	.rename		= zpl_snapdir_rename,
@ -495,8 +526,14 @@ zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)

 /* ARGSUSED */
 static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_shares_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
 zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
    u32 request_mask, unsigned int query_flags)
+#endif
 {
 	struct inode *ip = path->dentry->d_inode;
 	zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -506,7 +543,11 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
 	ZPL_ENTER(zfsvfs);

 	if (zfsvfs->z_shares_dir == 0) {
+#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+		generic_fillattr(user_ns, path->dentry->d_inode, stat);
+#else
 		generic_fillattr(path->dentry->d_inode, stat);
+#endif
 		stat->nlink = stat->size = 2;
 		stat->atime = current_time(ip);
 		ZPL_EXIT(zfsvfs);
@ -515,7 +556,11 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,

 	error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
 	if (error == 0) {
-		error = -zfs_getattr_fast(ZTOI(dzp), stat);
+#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
+		error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
+#else
+		error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
+#endif
 		iput(ZTOI(dzp));
 	}

--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c
@ -869,7 +869,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
 	    !capable(CAP_LINUX_IMMUTABLE))
 		return (-EACCES);

-	if (!inode_owner_or_capable(ip))
+	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
 		return (-EACCES);

 	xva_init(xva);
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_inode.c
@ -128,7 +128,12 @@ zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
 }

 static int
+#ifdef HAVE_IOPS_CREATE_USERNS
+zpl_create(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode, bool flag)
+#else
 zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
+#endif
 {
 	cred_t *cr = CRED();
 	znode_t *zp;
@ -163,7 +168,12 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
 }

 static int
+#ifdef HAVE_IOPS_MKNOD_USERNS
+zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode,
+#else
 zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
+#endif
    dev_t rdev)
 {
 	cred_t *cr = CRED();
@ -278,7 +288,12 @@ zpl_unlink(struct inode *dir, struct dentry *dentry)
 }

 static int
+#ifdef HAVE_IOPS_MKDIR_USERNS
+zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, umode_t mode)
+#else
 zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+#endif
 {
 	cred_t *cr = CRED();
 	vattr_t *vap;
@ -338,8 +353,14 @@ zpl_rmdir(struct inode *dir, struct dentry *dentry)
 }

 static int
+#ifdef HAVE_USERNS_IOPS_GETATTR
+zpl_getattr_impl(struct user_namespace *user_ns,
+    const struct path *path, struct kstat *stat, u32 request_mask,
+    unsigned int query_flags)
+#else
 zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
    unsigned int query_flags)
+#endif
 {
 	int error;
 	fstrans_cookie_t cookie;
@ -350,7 +371,11 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 	 * XXX request_mask and query_flags currently ignored.
 	 */

-	error = -zfs_getattr_fast(path->dentry->d_inode, stat);
+#ifdef HAVE_USERNS_IOPS_GETATTR
+	error = -zfs_getattr_fast(user_ns, path->dentry->d_inode, stat);
+#else
+	error = -zfs_getattr_fast(kcred->user_ns, path->dentry->d_inode, stat);
+#endif
 	spl_fstrans_unmark(cookie);
 	ASSERT3S(error, <=, 0);

@ -359,7 +384,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
 ZPL_GETATTR_WRAPPER(zpl_getattr);

 static int
+#ifdef HAVE_SETATTR_PREPARE_USERNS
+zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
+    struct iattr *ia)
+#else
 zpl_setattr(struct dentry *dentry, struct iattr *ia)
+#endif
 {
 	struct inode *ip = dentry->d_inode;
 	cred_t *cr = CRED();
@ -367,7 +397,7 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 	int error;
 	fstrans_cookie_t cookie;

-	error = setattr_prepare(dentry, ia);
+	error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
 	if (error)
 		return (error);

@ -399,8 +429,14 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
 }

 static int
+#ifdef HAVE_IOPS_RENAME_USERNS
+zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
+    struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
+    unsigned int flags)
+#else
 zpl_rename2(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry, unsigned int flags)
+#endif
 {
 	cred_t *cr = CRED();
 	int error;
@ -421,7 +457,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
 	return (error);
 }

-#ifndef HAVE_RENAME_WANTS_FLAGS
+#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
 static int
 zpl_rename(struct inode *sdip, struct dentry *sdentry,
    struct inode *tdip, struct dentry *tdentry)
@ -431,7 +467,12 @@ zpl_rename(struct inode *sdip, struct dentry *sdentry,
 #endif

 static int
+#ifdef HAVE_IOPS_SYMLINK_USERNS
+zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
+    struct dentry *dentry, const char *name)
+#else
 zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
+#endif
 {
 	cred_t *cr = CRED();
 	vattr_t *vap;
@ -593,7 +634,8 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)

 	crhold(cr);
 	ip->i_ctime = current_time(ip);
-	igrab(ip); /* Use ihold() if available */
+	/* Must have an existing ref, so igrab() cannot return NULL */
+	VERIFY3P(igrab(ip), !=, NULL);

 	cookie = spl_fstrans_mark();
 	error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0);
@ -677,7 +719,7 @@ const struct inode_operations zpl_dir_inode_operations = {
 	.mkdir		= zpl_mkdir,
 	.rmdir		= zpl_rmdir,
 	.mknod		= zpl_mknod,
-#ifdef HAVE_RENAME_WANTS_FLAGS
+#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
 	.rename		= zpl_rename2,
 #else
 	.rename		= zpl_rename,
--- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
+++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_xattr.c
@ -1233,7 +1233,7 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);

-	if (!inode_owner_or_capable(ip))
+	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
 		return (-EPERM);

 	if (value) {
@ -1273,7 +1273,7 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
 	if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
 		return (-EOPNOTSUPP);

-	if (!inode_owner_or_capable(ip))
+	if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
 		return (-EPERM);

 	if (value) {
--- a/sys/contrib/openzfs/module/zfs/dbuf.c
+++ b/sys/contrib/openzfs/module/zfs/dbuf.c
@ -1640,7 +1640,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		mutex_exit(&db->db_mtx);
 		if (err == 0 && prefetch) {
 			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
-			    flags & DB_RF_HAVESTRUCT);
+			    B_FALSE, flags & DB_RF_HAVESTRUCT);
 		}
 		DB_DNODE_EXIT(db);
 		DBUF_STAT_BUMP(hash_hits);
@ -1662,6 +1662,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		 */
 		if (!err && prefetch) {
 			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
+			    db->db_state != DB_CACHED,
 			    flags & DB_RF_HAVESTRUCT);
 		}

@ -1691,7 +1692,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
 		mutex_exit(&db->db_mtx);
 		if (prefetch) {
 			dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
-			    flags & DB_RF_HAVESTRUCT);
+			    B_TRUE, flags & DB_RF_HAVESTRUCT);
 		}
 		DB_DNODE_EXIT(db);
 		DBUF_STAT_BUMP(hash_misses);
--- a/sys/contrib/openzfs/module/zfs/dmu.c
+++ b/sys/contrib/openzfs/module/zfs/dmu.c
@ -497,10 +497,12 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
    boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
 {
 	dmu_buf_t **dbp;
+	zstream_t *zs = NULL;
 	uint64_t blkid, nblks, i;
 	uint32_t dbuf_flags;
 	int err;
 	zio_t *zio = NULL;
+	boolean_t missed = B_FALSE;

 	ASSERT(length <= DMU_MAX_ACCESS);

@ -536,9 +538,21 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
 		zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
 		    ZIO_FLAG_CANFAIL);
 	blkid = dbuf_whichblock(dn, 0, offset);
+	if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
+	    DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
+		/*
+		 * Prepare the zfetch before initiating the demand reads, so
+		 * that if multiple threads block on same indirect block, we
+		 * base predictions on the original less racy request order.
+		 */
+		zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
+		    read && DNODE_IS_CACHEABLE(dn), B_TRUE);
+	}
 	for (i = 0; i < nblks; i++) {
 		dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
 		if (db == NULL) {
+			if (zs)
+				dmu_zfetch_run(zs, missed, B_TRUE);
 			rw_exit(&dn->dn_struct_rwlock);
 			dmu_buf_rele_array(dbp, nblks, tag);
 			if (read)
@ -546,20 +560,27 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
 			return (SET_ERROR(EIO));
 		}

-		/* initiate async i/o */
-		if (read)
+		/*
+		 * Initiate async demand data read.
+		 * We check the db_state after calling dbuf_read() because
+		 * (1) dbuf_read() may change the state to CACHED due to a
+		 * hit in the ARC, and (2) on a cache miss, a child will
+		 * have been added to "zio" but not yet completed, so the
+		 * state will not yet be CACHED.
+		 */
+		if (read) {
 			(void) dbuf_read(db, zio, dbuf_flags);
+			if (db->db_state != DB_CACHED)
+				missed = B_TRUE;
+		}
 		dbp[i] = &db->db;
 	}

 	if (!read)
 		zfs_racct_write(length, nblks);

-	if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
-	    DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
-		dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
-		    read && DNODE_IS_CACHEABLE(dn), B_TRUE);
-	}
+	if (zs)
+		dmu_zfetch_run(zs, missed, B_TRUE);
 	rw_exit(&dn->dn_struct_rwlock);

 	if (read) {
--- a/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
+++ b/sys/contrib/openzfs/module/zfs/dmu_zfetch.c
@ -59,8 +59,6 @@ typedef struct zfetch_stats {
 	kstat_named_t zfetchstat_hits;
 	kstat_named_t zfetchstat_misses;
 	kstat_named_t zfetchstat_max_streams;
-	kstat_named_t zfetchstat_max_completion_us;
-	kstat_named_t zfetchstat_last_completion_us;
 	kstat_named_t zfetchstat_io_issued;
 } zfetch_stats_t;

@ -68,8 +66,6 @@ static zfetch_stats_t zfetch_stats = {
 	{ "hits",			KSTAT_DATA_UINT64 },
 	{ "misses",			KSTAT_DATA_UINT64 },
 	{ "max_streams",		KSTAT_DATA_UINT64 },
-	{ "max_completion_us",		KSTAT_DATA_UINT64 },
-	{ "last_completion_us",		KSTAT_DATA_UINT64 },
 	{ "io_issued",		KSTAT_DATA_UINT64 },
 };

@ -129,7 +125,7 @@ dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
 static void
 dmu_zfetch_stream_fini(zstream_t *zs)
 {
-	mutex_destroy(&zs->zs_lock);
+	ASSERT(!list_link_active(&zs->zs_node));
 	kmem_free(zs, sizeof (*zs));
 }

@ -138,17 +134,10 @@ dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
 {
 	ASSERT(MUTEX_HELD(&zf->zf_lock));
 	list_remove(&zf->zf_stream, zs);
-	dmu_zfetch_stream_fini(zs);
-	zf->zf_numstreams--;
-}
-
-static void
-dmu_zfetch_stream_orphan(zfetch_t *zf, zstream_t *zs)
-{
-	ASSERT(MUTEX_HELD(&zf->zf_lock));
-	list_remove(&zf->zf_stream, zs);
-	zs->zs_fetch = NULL;
 	zf->zf_numstreams--;
+	membar_producer();
+	if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+		dmu_zfetch_stream_fini(zs);
 }

 /*
@ -161,12 +150,8 @@ dmu_zfetch_fini(zfetch_t *zf)
 	zstream_t *zs;

 	mutex_enter(&zf->zf_lock);
-	while ((zs = list_head(&zf->zf_stream)) != NULL) {
-		if (zfs_refcount_count(&zs->zs_blocks) != 0)
-			dmu_zfetch_stream_orphan(zf, zs);
-		else
-			dmu_zfetch_stream_remove(zf, zs);
-	}
+	while ((zs = list_head(&zf->zf_stream)) != NULL)
+		dmu_zfetch_stream_remove(zf, zs);
 	mutex_exit(&zf->zf_lock);
 	list_destroy(&zf->zf_stream);
 	mutex_destroy(&zf->zf_lock);
@ -195,9 +180,9 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
 	    zs != NULL; zs = zs_next) {
 		zs_next = list_next(&zf->zf_stream, zs);
 		/*
-		 * Skip gethrtime() call if there are still references
+		 * Skip if still active.  1 -- zf_stream reference.
 		 */
-		if (zfs_refcount_count(&zs->zs_blocks) != 0)
+		if (zfs_refcount_count(&zs->zs_refs) != 1)
 			continue;
 		if (((now - zs->zs_atime) / NANOSEC) >
 		    zfetch_min_sec_reap)
@ -222,12 +207,17 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)

 	zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
 	zs->zs_blkid = blkid;
+	zs->zs_pf_blkid1 = blkid;
 	zs->zs_pf_blkid = blkid;
+	zs->zs_ipf_blkid1 = blkid;
 	zs->zs_ipf_blkid = blkid;
 	zs->zs_atime = now;
 	zs->zs_fetch = zf;
-	zfs_refcount_create(&zs->zs_blocks);
-	mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
+	zs->zs_missed = B_FALSE;
+	zfs_refcount_create(&zs->zs_callers);
+	zfs_refcount_create(&zs->zs_refs);
+	/* One reference for zf_stream. */
+	zfs_refcount_add(&zs->zs_refs, NULL);
 	zf->zf_numstreams++;
 	list_insert_head(&zf->zf_stream, zs);
 }
@ -237,48 +227,36 @@ dmu_zfetch_stream_done(void *arg, boolean_t io_issued)
 {
 	zstream_t *zs = arg;

-	if (zs->zs_start_time && io_issued) {
-		hrtime_t now = gethrtime();
-		hrtime_t delta = NSEC2USEC(now - zs->zs_start_time);
-
-		zs->zs_start_time = 0;
-		ZFETCHSTAT_SET(zfetchstat_last_completion_us, delta);
-		if (delta > ZFETCHSTAT_GET(zfetchstat_max_completion_us))
-			ZFETCHSTAT_SET(zfetchstat_max_completion_us, delta);
-	}
-
-	if (zfs_refcount_remove(&zs->zs_blocks, NULL) != 0)
-		return;
-
-	/*
-	 * The parent fetch structure has gone away
-	 */
-	if (zs->zs_fetch == NULL)
+	if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
 		dmu_zfetch_stream_fini(zs);
 }

 /*
- * This is the predictive prefetch entry point.  It associates dnode access
- * specified with blkid and nblks arguments with prefetch stream, predicts
- * further accesses based on that stats and initiates speculative prefetch.
+ * This is the predictive prefetch entry point.  dmu_zfetch_prepare()
+ * associates dnode access specified with blkid and nblks arguments with
+ * prefetch stream, predicts further accesses based on that stats and returns
+ * the stream pointer on success.  That pointer must later be passed to
+ * dmu_zfetch_run() to initiate the speculative prefetch for the stream and
+ * release it.  dmu_zfetch() is a wrapper for simple cases when window between
+ * prediction and prefetch initiation is not needed.
 * fetch_data argument specifies whether actual data blocks should be fetched:
 *   FALSE -- prefetch only indirect blocks for predicted data blocks;
 *   TRUE -- prefetch predicted data blocks plus following indirect blocks.
 */
-void
-dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
-    boolean_t have_lock)
+zstream_t *
+dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
+    boolean_t fetch_data, boolean_t have_lock)
 {
 	zstream_t *zs;
-	int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
+	int64_t pf_start, ipf_start;
 	int64_t pf_ahead_blks, max_blks;
-	int epbs, max_dist_blks, pf_nblks, ipf_nblks, issued;
-	uint64_t end_of_access_blkid;
+	int max_dist_blks, pf_nblks, ipf_nblks;
+	uint64_t end_of_access_blkid, maxblkid;
 	end_of_access_blkid = blkid + nblks;
 	spa_t *spa = zf->zf_dnode->dn_objset->os_spa;

 	if (zfs_prefetch_disable)
-		return;
+		return (NULL);
 	/*
 	 * If we haven't yet loaded the indirect vdevs' mappings, we
 	 * can only read from blocks that we carefully ensure are on
@ -287,14 +265,14 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
 	 * blocks (e.g. of the MOS's dnode object).
 	 */
 	if (!spa_indirect_vdevs_loaded(spa))
-		return;
+		return (NULL);

 	/*
 	 * As a fast path for small (single-block) files, ignore access
 	 * to the first block.
 	 */
 	if (!have_lock && blkid == 0)
-		return;
+		return (NULL);

 	if (!have_lock)
 		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
@ -303,10 +281,11 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
 	 * A fast path for small files for which no prefetch will
 	 * happen.
 	 */
-	if (zf->zf_dnode->dn_maxblkid < 2) {
+	maxblkid = zf->zf_dnode->dn_maxblkid;
+	if (maxblkid < 2) {
 		if (!have_lock)
 			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
-		return;
+		return (NULL);
 	}
 	mutex_enter(&zf->zf_lock);

@ -317,45 +296,47 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
 	 */
 	for (zs = list_head(&zf->zf_stream); zs != NULL;
 	    zs = list_next(&zf->zf_stream, zs)) {
-		if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
-			mutex_enter(&zs->zs_lock);
-			/*
-			 * zs_blkid could have changed before we
-			 * acquired zs_lock; re-check them here.
-			 */
-			if (blkid == zs->zs_blkid) {
-				break;
-			} else if (blkid + 1 == zs->zs_blkid) {
-				blkid++;
-				nblks--;
-				if (nblks == 0) {
-					/* Already prefetched this before. */
-					mutex_exit(&zs->zs_lock);
-					mutex_exit(&zf->zf_lock);
-					if (!have_lock) {
-						rw_exit(&zf->zf_dnode->
-						    dn_struct_rwlock);
-					}
-					return;
-				}
-				break;
-			}
-			mutex_exit(&zs->zs_lock);
+		if (blkid == zs->zs_blkid) {
+			break;
+		} else if (blkid + 1 == zs->zs_blkid) {
+			blkid++;
+			nblks--;
+			break;
 		}
 	}

+	/*
+	 * If the file is ending, remove the matching stream if found.
+	 * If not found then it is too late to create a new one now.
+	 */
+	if (end_of_access_blkid >= maxblkid) {
+		if (zs != NULL)
+			dmu_zfetch_stream_remove(zf, zs);
+		mutex_exit(&zf->zf_lock);
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		return (NULL);
+	}
+
+	/* Exit if we already prefetched this block before. */
+	if (nblks == 0) {
+		mutex_exit(&zf->zf_lock);
+		if (!have_lock)
+			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+		return (NULL);
+	}
+
 	if (zs == NULL) {
 		/*
 		 * This access is not part of any existing stream.  Create
 		 * a new stream for it.
 		 */
-		ZFETCHSTAT_BUMP(zfetchstat_misses);
-
 		dmu_zfetch_stream_create(zf, end_of_access_blkid);
 		mutex_exit(&zf->zf_lock);
 		if (!have_lock)
 			rw_exit(&zf->zf_dnode->dn_struct_rwlock);
-		return;
+		ZFETCHSTAT_BUMP(zfetchstat_misses);
+		return (NULL);
 	}

 	/*
@ -369,6 +350,10 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
 	 * start just after the block we just accessed.
 	 */
 	pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
+	if (zs->zs_pf_blkid1 < end_of_access_blkid)
+		zs->zs_pf_blkid1 = end_of_access_blkid;
+	if (zs->zs_ipf_blkid1 < end_of_access_blkid)
+		zs->zs_ipf_blkid1 = end_of_access_blkid;

 	/*
 	 * Double our amount of prefetched data, but don't let the
@ -407,49 +392,108 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
 	 * (i.e. the amount read now + the amount of data prefetched now).
 	 */
 	pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
-	max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
+	max_blks = max_dist_blks - (ipf_start - zs->zs_pf_blkid);
 	ipf_nblks = MIN(pf_ahead_blks, max_blks);
 	zs->zs_ipf_blkid = ipf_start + ipf_nblks;

-	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
-	ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
-	ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
-
-	zs->zs_atime = gethrtime();
-	/* no prior reads in progress */
-	if (zfs_refcount_count(&zs->zs_blocks) == 0)
-		zs->zs_start_time = zs->zs_atime;
 	zs->zs_blkid = end_of_access_blkid;
-	zfs_refcount_add_many(&zs->zs_blocks, pf_nblks + ipf_iend - ipf_istart,
-	    NULL);
-	mutex_exit(&zs->zs_lock);
+	/* Protect the stream from reclamation. */
+	zs->zs_atime = gethrtime();
+	zfs_refcount_add(&zs->zs_refs, NULL);
+	/* Count concurrent callers. */
+	zfs_refcount_add(&zs->zs_callers, NULL);
 	mutex_exit(&zf->zf_lock);
-	issued = 0;
+
+	if (!have_lock)
+		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
+
+	ZFETCHSTAT_BUMP(zfetchstat_hits);
+	return (zs);
+}
+
+void
+dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
+{
+	zfetch_t *zf = zs->zs_fetch;
+	int64_t pf_start, pf_end, ipf_start, ipf_end;
+	int epbs, issued;
+
+	if (missed)
+		zs->zs_missed = missed;

 	/*
-	 * dbuf_prefetch() is asynchronous (even when it needs to read
-	 * indirect blocks), but we still prefer to drop our locks before
-	 * calling it to reduce the time we hold them.
+	 * Postpone the prefetch if there are more concurrent callers.
+	 * It happens when multiple requests are waiting for the same
+	 * indirect block.  The last one will run the prefetch for all.
 	 */
+	if (zfs_refcount_remove(&zs->zs_callers, NULL) != 0) {
+		/* Drop reference taken in dmu_zfetch_prepare(). */
+		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+			dmu_zfetch_stream_fini(zs);
+		return;
+	}

-	for (int i = 0; i < pf_nblks; i++) {
-		issued += dbuf_prefetch_impl(zf->zf_dnode, 0, pf_start + i,
+	mutex_enter(&zf->zf_lock);
+	if (zs->zs_missed) {
+		pf_start = zs->zs_pf_blkid1;
+		pf_end = zs->zs_pf_blkid1 = zs->zs_pf_blkid;
+	} else {
+		pf_start = pf_end = 0;
+	}
+	ipf_start = MAX(zs->zs_pf_blkid1, zs->zs_ipf_blkid1);
+	ipf_end = zs->zs_ipf_blkid1 = zs->zs_ipf_blkid;
+	mutex_exit(&zf->zf_lock);
+	ASSERT3S(pf_start, <=, pf_end);
+	ASSERT3S(ipf_start, <=, ipf_end);
+
+	epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
+	ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
+	ipf_end = P2ROUNDUP(ipf_end, 1 << epbs) >> epbs;
+	ASSERT3S(ipf_start, <=, ipf_end);
+	issued = pf_end - pf_start + ipf_end - ipf_start;
+	if (issued > 1) {
+		/* More references on top of taken in dmu_zfetch_prepare(). */
+		zfs_refcount_add_many(&zs->zs_refs, issued - 1, NULL);
+	} else if (issued == 0) {
+		/* Some other thread has done our work, so drop the ref. */
+		if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
+			dmu_zfetch_stream_fini(zs);
+		return;
+	}
+
+	if (!have_lock)
+		rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
+
+	issued = 0;
+	for (int64_t blk = pf_start; blk < pf_end; blk++) {
+		issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
 		    dmu_zfetch_stream_done, zs);
 	}
-	for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) {
+	for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
 		issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
 		    ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
 		    dmu_zfetch_stream_done, zs);
 	}
+
 	if (!have_lock)
 		rw_exit(&zf->zf_dnode->dn_struct_rwlock);
-	ZFETCHSTAT_BUMP(zfetchstat_hits);

 	if (issued)
 		ZFETCHSTAT_ADD(zfetchstat_io_issued, issued);
 }

+void
+dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
+    boolean_t missed, boolean_t have_lock)
+{
+	zstream_t *zs;
+
+	zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
+	if (zs)
+		dmu_zfetch_run(zs, missed, have_lock);
+}
+
 /* BEGIN CSTYLED */
 ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
 	"Disable all ZFS prefetching");
--- a/sys/contrib/openzfs/module/zfs/metaslab.c
+++ b/sys/contrib/openzfs/module/zfs/metaslab.c
@ -2316,18 +2316,13 @@ metaslab_load_impl(metaslab_t *msp)
 		range_tree_add(msp->ms_allocatable,
 		    msp->ms_start, msp->ms_size);

-		if (msp->ms_freed != NULL) {
+		if (msp->ms_new) {
 			/*
 			 * If the ms_sm doesn't exist, this means that this
 			 * metaslab hasn't gone through metaslab_sync() and
 			 * thus has never been dirtied. So we shouldn't
 			 * expect any unflushed allocs or frees from previous
 			 * TXGs.
-			 *
-			 * Note: ms_freed and all the other trees except for
-			 * the ms_allocatable, can be NULL at this point only
-			 * if this is a new metaslab of a vdev that just got
-			 * expanded.
 			 */
 			ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
 			ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
@ -2365,8 +2360,6 @@ metaslab_load_impl(metaslab_t *msp)
 	range_tree_walk(msp->ms_unflushed_frees,
 	    range_tree_add, msp->ms_allocatable);

-	msp->ms_loaded = B_TRUE;
-
 	ASSERT3P(msp->ms_group, !=, NULL);
 	spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
 	if (spa_syncing_log_sm(spa) != NULL) {
@ -2680,19 +2673,31 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
 		ms->ms_allocated_space = space_map_allocated(ms->ms_sm);
 	}

-	range_seg_type_t type;
 	uint64_t shift, start;
-	type = metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
+	range_seg_type_t type =
+	    metaslab_calculate_range_tree_type(vd, ms, &start, &shift);

-	/*
-	 * We create the ms_allocatable here, but we don't create the
-	 * other range trees until metaslab_sync_done().  This serves
-	 * two purposes: it allows metaslab_sync_done() to detect the
-	 * addition of new space; and for debugging, it ensures that
-	 * we'd data fault on any attempt to use this metaslab before
-	 * it's ready.
-	 */
 	ms->ms_allocatable = range_tree_create(NULL, type, NULL, start, shift);
+	for (int t = 0; t < TXG_SIZE; t++) {
+		ms->ms_allocating[t] = range_tree_create(NULL, type,
+		    NULL, start, shift);
+	}
+	ms->ms_freeing = range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_freed = range_tree_create(NULL, type, NULL, start, shift);
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		ms->ms_defer[t] = range_tree_create(NULL, type, NULL,
+		    start, shift);
+	}
+	ms->ms_checkpointing =
+	    range_tree_create(NULL, type, NULL, start, shift);
+	ms->ms_unflushed_allocs =
+	    range_tree_create(NULL, type, NULL, start, shift);
+
+	metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
+	mrap->mra_bt = &ms->ms_unflushed_frees_by_size;
+	mrap->mra_floor_shift = metaslab_by_size_min_shift;
+	ms->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops,
+	    type, mrap, start, shift);

 	ms->ms_trim = range_tree_create(NULL, type, NULL, start, shift);

@ -2765,13 +2770,13 @@ metaslab_fini(metaslab_t *msp)

 	mutex_enter(&msp->ms_lock);
 	VERIFY(msp->ms_group == NULL);
+
 	/*
-	 * If the range trees haven't been allocated, this metaslab hasn't
-	 * been through metaslab_sync_done() for the first time yet, so its
+	 * If this metaslab hasn't been through metaslab_sync_done() yet its
 	 * space hasn't been accounted for in its vdev and doesn't need to be
 	 * subtracted.
 	 */
-	if (msp->ms_freed != NULL) {
+	if (!msp->ms_new) {
 		metaslab_space_update(vd, mg->mg_class,
 		    -metaslab_allocated_space(msp), 0, -msp->ms_size);

@ -2782,27 +2787,24 @@ metaslab_fini(metaslab_t *msp)
 	metaslab_unload(msp);

 	range_tree_destroy(msp->ms_allocatable);
+	range_tree_destroy(msp->ms_freeing);
+	range_tree_destroy(msp->ms_freed);

-	if (msp->ms_freed != NULL) {
-		range_tree_destroy(msp->ms_freeing);
-		range_tree_destroy(msp->ms_freed);
+	ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
+	    metaslab_unflushed_changes_memused(msp));
+	spa->spa_unflushed_stats.sus_memused -=
+	    metaslab_unflushed_changes_memused(msp);
+	range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
+	range_tree_destroy(msp->ms_unflushed_allocs);
+	range_tree_destroy(msp->ms_checkpointing);
+	range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
+	range_tree_destroy(msp->ms_unflushed_frees);

-		ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
-		    metaslab_unflushed_changes_memused(msp));
-		spa->spa_unflushed_stats.sus_memused -=
-		    metaslab_unflushed_changes_memused(msp);
-		range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
-		range_tree_destroy(msp->ms_unflushed_allocs);
-		range_tree_destroy(msp->ms_checkpointing);
-		range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
-		range_tree_destroy(msp->ms_unflushed_frees);
-
-		for (int t = 0; t < TXG_SIZE; t++) {
-			range_tree_destroy(msp->ms_allocating[t]);
-		}
-		for (int t = 0; t < TXG_DEFER_SIZE; t++) {
-			range_tree_destroy(msp->ms_defer[t]);
-		}
+	for (int t = 0; t < TXG_SIZE; t++) {
+		range_tree_destroy(msp->ms_allocating[t]);
+	}
+	for (int t = 0; t < TXG_DEFER_SIZE; t++) {
+		range_tree_destroy(msp->ms_defer[t]);
 	}
 	ASSERT0(msp->ms_deferspace);

@ -3926,17 +3928,15 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
 	/*
 	 * This metaslab has just been added so there's no work to do now.
 	 */
-	if (msp->ms_freeing == NULL) {
-		ASSERT3P(alloctree, ==, NULL);
+	if (msp->ms_new) {
+		ASSERT0(range_tree_space(alloctree));
+		ASSERT0(range_tree_space(msp->ms_freeing));
+		ASSERT0(range_tree_space(msp->ms_freed));
+		ASSERT0(range_tree_space(msp->ms_checkpointing));
+		ASSERT0(range_tree_space(msp->ms_trim));
 		return;
 	}

-	ASSERT3P(alloctree, !=, NULL);
-	ASSERT3P(msp->ms_freeing, !=, NULL);
-	ASSERT3P(msp->ms_freed, !=, NULL);
-	ASSERT3P(msp->ms_checkpointing, !=, NULL);
-	ASSERT3P(msp->ms_trim, !=, NULL);
-
 	/*
 	 * Normally, we don't want to process a metaslab if there are no
 	 * allocations or frees to perform. However, if the metaslab is being
@ -4240,54 +4240,15 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)

 	mutex_enter(&msp->ms_lock);

-	/*
-	 * If this metaslab is just becoming available, initialize its
-	 * range trees and add its capacity to the vdev.
-	 */
-	if (msp->ms_freed == NULL) {
-		range_seg_type_t type;
-		uint64_t shift, start;
-		type = metaslab_calculate_range_tree_type(vd, msp, &start,
-		    &shift);
-
-		for (int t = 0; t < TXG_SIZE; t++) {
-			ASSERT(msp->ms_allocating[t] == NULL);
-
-			msp->ms_allocating[t] = range_tree_create(NULL, type,
-			    NULL, start, shift);
-		}
-
-		ASSERT3P(msp->ms_freeing, ==, NULL);
-		msp->ms_freeing = range_tree_create(NULL, type, NULL, start,
-		    shift);
-
-		ASSERT3P(msp->ms_freed, ==, NULL);
-		msp->ms_freed = range_tree_create(NULL, type, NULL, start,
-		    shift);
-
-		for (int t = 0; t < TXG_DEFER_SIZE; t++) {
-			ASSERT3P(msp->ms_defer[t], ==, NULL);
-			msp->ms_defer[t] = range_tree_create(NULL, type, NULL,
-			    start, shift);
-		}
-
-		ASSERT3P(msp->ms_checkpointing, ==, NULL);
-		msp->ms_checkpointing = range_tree_create(NULL, type, NULL,
-		    start, shift);
-
-		ASSERT3P(msp->ms_unflushed_allocs, ==, NULL);
-		msp->ms_unflushed_allocs = range_tree_create(NULL, type, NULL,
-		    start, shift);
-
-		metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
-		mrap->mra_bt = &msp->ms_unflushed_frees_by_size;
-		mrap->mra_floor_shift = metaslab_by_size_min_shift;
-		ASSERT3P(msp->ms_unflushed_frees, ==, NULL);
-		msp->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops,
-		    type, mrap, start, shift);
-
+	if (msp->ms_new) {
+		/* this is a new metaslab, add its capacity to the vdev */
 		metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size);
+
+		/* there should be no allocations nor frees at this point */
+		VERIFY0(msp->ms_allocated_this_txg);
+		VERIFY0(range_tree_space(msp->ms_freed));
 	}
+
 	ASSERT0(range_tree_space(msp->ms_freeing));
 	ASSERT0(range_tree_space(msp->ms_checkpointing));

--- a/sys/contrib/openzfs/module/zfs/refcount.c
+++ b/sys/contrib/openzfs/module/zfs/refcount.c
@ -20,7 +20,7 @@
 */
 /*
 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
- * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
+ * Copyright (c) 2012, 2021 by Delphix. All rights reserved.
 */

 #include <sys/zfs_context.h>
@ -324,4 +324,12 @@ zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
 	mutex_exit(&rc->rc_mtx);
 	return (B_TRUE);
 }
+
+/* BEGIN CSTYLED */
+ZFS_MODULE_PARAM(zfs, ,reference_tracking_enable, INT, ZMOD_RW,
+	"Track reference holders to refcount_t objects");
+
+ZFS_MODULE_PARAM(zfs, ,reference_history, INT, ZMOD_RW,
+	"Maximum reference holders being tracked");
+/* END CSTYLED */
 #endif	/* ZFS_DEBUG */
--- a/sys/contrib/openzfs/module/zfs/vdev.c
+++ b/sys/contrib/openzfs/module/zfs/vdev.c
@ -5105,10 +5105,8 @@ vdev_is_bootable(vdev_t *vd)
 	if (!vd->vdev_ops->vdev_op_leaf) {
 		const char *vdev_type = vd->vdev_ops->vdev_op_type;

-		if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0 ||
-		    strcmp(vdev_type, VDEV_TYPE_INDIRECT) == 0) {
+		if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0)
 			return (B_FALSE);
-		}
 	}

 	for (int c = 0; c < vd->vdev_children; c++) {
--- a/sys/contrib/openzfs/module/zfs/vdev_draid.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_draid.c
@ -632,236 +632,6 @@ vdev_draid_group_to_offset(vdev_t *vd, uint64_t group)
 	return (group * vdc->vdc_groupsz);
 }

-
-static void
-vdev_draid_map_free_vsd(zio_t *zio)
-{
-	raidz_map_t *rm = zio->io_vsd;
-
-	ASSERT0(rm->rm_freed);
-	rm->rm_freed = B_TRUE;
-
-	if (rm->rm_reports == 0) {
-		vdev_raidz_map_free(rm);
-	}
-}
-
-/*ARGSUSED*/
-static void
-vdev_draid_cksum_free(void *arg, size_t ignored)
-{
-	raidz_map_t *rm = arg;
-
-	ASSERT3U(rm->rm_reports, >, 0);
-
-	if (--rm->rm_reports == 0 && rm->rm_freed)
-		vdev_raidz_map_free(rm);
-}
-
-static void
-vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
-{
-	raidz_map_t *rm = zcr->zcr_cbdata;
-	const size_t c = zcr->zcr_cbinfo;
-	uint64_t skip_size = zcr->zcr_sector;
-	uint64_t parity_size;
-	size_t x, offset, size;
-
-	if (good_data == NULL) {
-		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
-		return;
-	}
-
-	/*
-	 * Detailed cksum reporting is currently only supported for single
-	 * row draid mappings, this covers the vast majority of zios. Only
-	 * a dRAID zio which spans groups will have multiple rows.
-	 */
-	if (rm->rm_nrows != 1) {
-		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
-		return;
-	}
-
-	raidz_row_t *rr = rm->rm_row[0];
-	const abd_t *good = NULL;
-	const abd_t *bad = rr->rr_col[c].rc_abd;
-
-	if (c < rr->rr_firstdatacol) {
-		/*
-		 * The first time through, calculate the parity blocks for
-		 * the good data (this relies on the fact that the good
-		 * data never changes for a given logical zio)
-		 */
-		if (rr->rr_col[0].rc_gdata == NULL) {
-			abd_t *bad_parity[VDEV_DRAID_MAXPARITY];
-
-			/*
-			 * Set up the rr_col[]s to generate the parity for
-			 * good_data, first saving the parity bufs and
-			 * replacing them with buffers to hold the result.
-			 */
-			for (x = 0; x < rr->rr_firstdatacol; x++) {
-				bad_parity[x] = rr->rr_col[x].rc_abd;
-				rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata =
-				    abd_alloc_sametype(rr->rr_col[x].rc_abd,
-				    rr->rr_col[x].rc_size);
-			}
-
-			/*
-			 * Fill in the data columns from good_data being
-			 * careful to pad short columns and empty columns
-			 * with a skip sector.
-			 */
-			uint64_t good_size = abd_get_size((abd_t *)good_data);
-
-			offset = 0;
-			for (; x < rr->rr_cols; x++) {
-				abd_free(rr->rr_col[x].rc_abd);
-
-				if (offset == good_size) {
-					/* empty data column (small write) */
-					rr->rr_col[x].rc_abd =
-					    abd_get_zeros(skip_size);
-				} else if (x < rr->rr_bigcols) {
-					/* this is a "big column" */
-					size = rr->rr_col[x].rc_size;
-					rr->rr_col[x].rc_abd =
-					    abd_get_offset_size(
-					    (abd_t *)good_data, offset, size);
-					offset += size;
-				} else {
-					/* short data column, add skip sector */
-					size = rr->rr_col[x].rc_size -skip_size;
-					rr->rr_col[x].rc_abd = abd_alloc(
-					    rr->rr_col[x].rc_size, B_TRUE);
-					abd_copy_off(rr->rr_col[x].rc_abd,
-					    (abd_t *)good_data, 0, offset,
-					    size);
-					abd_zero_off(rr->rr_col[x].rc_abd,
-					    size, skip_size);
-					offset += size;
-				}
-			}
-
-			/*
-			 * Construct the parity from the good data.
-			 */
-			vdev_raidz_generate_parity_row(rm, rr);
-
-			/* restore everything back to its original state */
-			for (x = 0; x < rr->rr_firstdatacol; x++)
-				rr->rr_col[x].rc_abd = bad_parity[x];
-
-			offset = 0;
-			for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
-				abd_free(rr->rr_col[x].rc_abd);
-				rr->rr_col[x].rc_abd = abd_get_offset_size(
-				    rr->rr_abd_copy, offset,
-				    rr->rr_col[x].rc_size);
-				offset += rr->rr_col[x].rc_size;
-			}
-		}
-
-		ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL);
-		good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0,
-		    rr->rr_col[c].rc_size);
-	} else {
-		/* adjust good_data to point at the start of our column */
-		parity_size = size = rr->rr_col[0].rc_size;
-		if (c >= rr->rr_bigcols) {
-			size -= skip_size;
-			zcr->zcr_length = size;
-		}
-
-		/* empty column */
-		if (size == 0) {
-			zfs_ereport_finish_checksum(zcr, NULL, NULL, B_TRUE);
-			return;
-		}
-
-		offset = 0;
-		for (x = rr->rr_firstdatacol; x < c; x++) {
-			if (x < rr->rr_bigcols) {
-				offset += parity_size;
-			} else {
-				offset += parity_size - skip_size;
-			}
-		}
-
-		good = abd_get_offset_size((abd_t *)good_data, offset, size);
-	}
-
-	/* we drop the ereport if it ends up that the data was good */
-	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
-	abd_free((abd_t *)good);
-}
-
-/*
- * Invoked indirectly by zfs_ereport_start_checksum(), called
- * below when our read operation fails completely.  The main point
- * is to keep a copy of everything we read from disk, so that at
- * vdev_draid_cksum_finish() time we can compare it with the good data.
- */
-static void
-vdev_draid_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
-{
-	size_t c = (size_t)(uintptr_t)arg;
-	raidz_map_t *rm = zio->io_vsd;
-
-	/* set up the report and bump the refcount  */
-	zcr->zcr_cbdata = rm;
-	zcr->zcr_cbinfo = c;
-	zcr->zcr_finish = vdev_draid_cksum_finish;
-	zcr->zcr_free = vdev_draid_cksum_free;
-
-	rm->rm_reports++;
-	ASSERT3U(rm->rm_reports, >, 0);
-
-	if (rm->rm_row[0]->rr_abd_copy != NULL)
-		return;
-
-	/*
-	 * It's the first time we're called for this raidz_map_t, so we need
-	 * to copy the data aside; there's no guarantee that our zio's buffer
-	 * won't be re-used for something else.
-	 *
-	 * Our parity data is already in separate buffers, so there's no need
-	 * to copy them.  Furthermore, all columns should have been expanded
-	 * by vdev_draid_map_alloc_empty() when attempting reconstruction.
-	 */
-	for (int i = 0; i < rm->rm_nrows; i++) {
-		raidz_row_t *rr = rm->rm_row[i];
-		size_t offset = 0;
-		size_t size = 0;
-
-		for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
-			ASSERT3U(rr->rr_col[c].rc_size, ==,
-			    rr->rr_col[0].rc_size);
-			size += rr->rr_col[c].rc_size;
-		}
-
-		rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE);
-
-		for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
-			raidz_col_t *col = &rr->rr_col[c];
-			abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy,
-			    offset, col->rc_size);
-
-			abd_copy(tmp, col->rc_abd, col->rc_size);
-			abd_free(col->rc_abd);
-
-			col->rc_abd = tmp;
-			offset += col->rc_size;
-		}
-		ASSERT3U(offset, ==, size);
-	}
-}
-
-const zio_vsd_ops_t vdev_draid_vsd_ops = {
-	.vsd_free = vdev_draid_map_free_vsd,
-	.vsd_cksum_report = vdev_draid_cksum_report
-};
-
 /*
 * Full stripe writes.  When writing, all columns (D+P) are required.  Parity
 * is calculated over all the columns, including empty zero filled sectors,
@ -1208,7 +978,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
 	rr->rr_missingdata = 0;
 	rr->rr_missingparity = 0;
 	rr->rr_firstdatacol = vdc->vdc_nparity;
-	rr->rr_abd_copy = NULL;
 	rr->rr_abd_empty = NULL;
 #ifdef ZFS_DEBUG
 	rr->rr_offset = io_offset;
@ -1230,7 +999,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
 		rc->rc_devidx = vdev_draid_permute_id(vdc, base, iter, c);
 		rc->rc_offset = physical_offset;
 		rc->rc_abd = NULL;
-		rc->rc_gdata = NULL;
 		rc->rc_orig_data = NULL;
 		rc->rc_error = 0;
 		rc->rc_tried = 0;
@ -1328,9 +1096,6 @@ vdev_draid_map_alloc(zio_t *zio)
 	if (nrows == 2)
 		rm->rm_row[1] = rr[1];

-	zio->io_vsd = rm;
-	zio->io_vsd_ops = &vdev_draid_vsd_ops;
-
 	return (rm);
 }

@ -2183,12 +1948,13 @@ static void
 vdev_draid_io_start(zio_t *zio)
 {
 	vdev_t *vd __maybe_unused = zio->io_vd;
-	raidz_map_t *rm;

 	ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
 	ASSERT3U(zio->io_offset, ==, vdev_draid_get_astart(vd, zio->io_offset));

-	rm = vdev_draid_map_alloc(zio);
+	raidz_map_t *rm = vdev_draid_map_alloc(zio);
+	zio->io_vsd = rm;
+	zio->io_vsd_ops = &vdev_raidz_vsd_ops;

 	if (zio->io_type == ZIO_TYPE_WRITE) {
 		for (int i = 0; i < rm->rm_nrows; i++) {
--- a/sys/contrib/openzfs/module/zfs/vdev_indirect.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_indirect.c
@ -315,7 +315,6 @@ vdev_indirect_map_free(zio_t *zio)

 static const zio_vsd_ops_t vdev_indirect_vsd_ops = {
 	.vsd_free = vdev_indirect_map_free,
-	.vsd_cksum_report = zio_vsd_default_cksum_report
 };

 /*
--- a/sys/contrib/openzfs/module/zfs/vdev_mirror.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_mirror.c
@ -174,7 +174,6 @@ vdev_mirror_map_free(zio_t *zio)

 static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
 	.vsd_free = vdev_mirror_map_free,
-	.vsd_cksum_report = zio_vsd_default_cksum_report
 };

 static int
@ -379,8 +378,6 @@ vdev_mirror_map_init(zio_t *zio)
 		}
 	}

-	zio->io_vsd = mm;
-	zio->io_vsd_ops = &vdev_mirror_vsd_ops;
 	return (mm);
 }

@ -629,6 +626,8 @@ vdev_mirror_io_start(zio_t *zio)
 	int c, children;

 	mm = vdev_mirror_map_init(zio);
+	zio->io_vsd = mm;
+	zio->io_vsd_ops = &vdev_mirror_vsd_ops;

 	if (mm == NULL) {
 		ASSERT(!spa_trust_config(zio->io_spa));
--- a/sys/contrib/openzfs/module/zfs/vdev_raidz.c
+++ b/sys/contrib/openzfs/module/zfs/vdev_raidz.c
@ -143,15 +143,10 @@ vdev_raidz_row_free(raidz_row_t *rr)

 		if (rc->rc_size != 0)
 			abd_free(rc->rc_abd);
-		if (rc->rc_gdata != NULL)
-			abd_free(rc->rc_gdata);
 		if (rc->rc_orig_data != NULL)
-			zio_buf_free(rc->rc_orig_data, rc->rc_size);
+			abd_free(rc->rc_orig_data);
 	}

-	if (rr->rr_abd_copy != NULL)
-		abd_free(rr->rr_abd_copy);
-
 	if (rr->rr_abd_empty != NULL)
 		abd_free(rr->rr_abd_empty);

@ -172,175 +167,11 @@ vdev_raidz_map_free_vsd(zio_t *zio)
 {
 	raidz_map_t *rm = zio->io_vsd;

-	ASSERT0(rm->rm_freed);
-	rm->rm_freed = B_TRUE;
-
-	if (rm->rm_reports == 0) {
-		vdev_raidz_map_free(rm);
-	}
+	vdev_raidz_map_free(rm);
 }

-/*ARGSUSED*/
-static void
-vdev_raidz_cksum_free(void *arg, size_t ignored)
-{
-	raidz_map_t *rm = arg;
-
-	ASSERT3U(rm->rm_reports, >, 0);
-
-	if (--rm->rm_reports == 0 && rm->rm_freed)
-		vdev_raidz_map_free(rm);
-}
-
-static void
-vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
-{
-	raidz_map_t *rm = zcr->zcr_cbdata;
-	const size_t c = zcr->zcr_cbinfo;
-	size_t x, offset;
-
-	if (good_data == NULL) {
-		zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
-		return;
-	}
-
-	ASSERT3U(rm->rm_nrows, ==, 1);
-	raidz_row_t *rr = rm->rm_row[0];
-
-	const abd_t *good = NULL;
-	const abd_t *bad = rr->rr_col[c].rc_abd;
-
-	if (c < rr->rr_firstdatacol) {
-		/*
-		 * The first time through, calculate the parity blocks for
-		 * the good data (this relies on the fact that the good
-		 * data never changes for a given logical ZIO)
-		 */
-		if (rr->rr_col[0].rc_gdata == NULL) {
-			abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
-
-			/*
-			 * Set up the rr_col[]s to generate the parity for
-			 * good_data, first saving the parity bufs and
-			 * replacing them with buffers to hold the result.
-			 */
-			for (x = 0; x < rr->rr_firstdatacol; x++) {
-				bad_parity[x] = rr->rr_col[x].rc_abd;
-				rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata =
-				    abd_alloc_sametype(rr->rr_col[x].rc_abd,
-				    rr->rr_col[x].rc_size);
-			}
-
-			/* fill in the data columns from good_data */
-			offset = 0;
-			for (; x < rr->rr_cols; x++) {
-				abd_free(rr->rr_col[x].rc_abd);
-
-				rr->rr_col[x].rc_abd =
-				    abd_get_offset_size((abd_t *)good_data,
-				    offset, rr->rr_col[x].rc_size);
-				offset += rr->rr_col[x].rc_size;
-			}
-
-			/*
-			 * Construct the parity from the good data.
-			 */
-			vdev_raidz_generate_parity_row(rm, rr);
-
-			/* restore everything back to its original state */
-			for (x = 0; x < rr->rr_firstdatacol; x++)
-				rr->rr_col[x].rc_abd = bad_parity[x];
-
-			offset = 0;
-			for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
-				abd_free(rr->rr_col[x].rc_abd);
-				rr->rr_col[x].rc_abd = abd_get_offset_size(
-				    rr->rr_abd_copy, offset,
-				    rr->rr_col[x].rc_size);
-				offset += rr->rr_col[x].rc_size;
-			}
-		}
-
-		ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL);
-		good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0,
-		    rr->rr_col[c].rc_size);
-	} else {
-		/* adjust good_data to point at the start of our column */
-		offset = 0;
-		for (x = rr->rr_firstdatacol; x < c; x++)
-			offset += rr->rr_col[x].rc_size;
-
-		good = abd_get_offset_size((abd_t *)good_data, offset,
-		    rr->rr_col[c].rc_size);
-	}
-
-	/* we drop the ereport if it ends up that the data was good */
-	zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
-	abd_free((abd_t *)good);
-}
-
-/*
- * Invoked indirectly by zfs_ereport_start_checksum(), called
- * below when our read operation fails completely.  The main point
- * is to keep a copy of everything we read from disk, so that at
- * vdev_raidz_cksum_finish() time we can compare it with the good data.
- */
-static void
-vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
-{
-	size_t c = (size_t)(uintptr_t)arg;
-	raidz_map_t *rm = zio->io_vsd;
-
-	/* set up the report and bump the refcount  */
-	zcr->zcr_cbdata = rm;
-	zcr->zcr_cbinfo = c;
-	zcr->zcr_finish = vdev_raidz_cksum_finish;
-	zcr->zcr_free = vdev_raidz_cksum_free;
-
-	rm->rm_reports++;
-	ASSERT3U(rm->rm_reports, >, 0);
-	ASSERT3U(rm->rm_nrows, ==, 1);
-
-	if (rm->rm_row[0]->rr_abd_copy != NULL)
-		return;
-
-	/*
-	 * It's the first time we're called for this raidz_map_t, so we need
-	 * to copy the data aside; there's no guarantee that our zio's buffer
-	 * won't be re-used for something else.
-	 *
-	 * Our parity data is already in separate buffers, so there's no need
-	 * to copy them.
-	 */
-	for (int i = 0; i < rm->rm_nrows; i++) {
-		raidz_row_t *rr = rm->rm_row[i];
-		size_t offset = 0;
-		size_t size = 0;
-
-		for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++)
-			size += rr->rr_col[c].rc_size;
-
-		rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE);
-
-		for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
-			raidz_col_t *col = &rr->rr_col[c];
-			abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy,
-			    offset, col->rc_size);
-
-			abd_copy(tmp, col->rc_abd, col->rc_size);
-
-			abd_free(col->rc_abd);
-			col->rc_abd = tmp;
-
-			offset += col->rc_size;
-		}
-		ASSERT3U(offset, ==, size);
-	}
-}
-
-static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
+const zio_vsd_ops_t vdev_raidz_vsd_ops = {
 	.vsd_free = vdev_raidz_map_free_vsd,
-	.vsd_cksum_report = vdev_raidz_cksum_report
 };

 /*
@ -414,7 +245,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
 	rr->rr_missingdata = 0;
 	rr->rr_missingparity = 0;
 	rr->rr_firstdatacol = nparity;
-	rr->rr_abd_copy = NULL;
 	rr->rr_abd_empty = NULL;
 	rr->rr_nempty = 0;
 #ifdef ZFS_DEBUG
@ -435,7 +265,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
 		rc->rc_devidx = col;
 		rc->rc_offset = coff;
 		rc->rc_abd = NULL;
-		rc->rc_gdata = NULL;
 		rc->rc_orig_data = NULL;
 		rc->rc_error = 0;
 		rc->rc_tried = 0;
@ -831,7 +660,7 @@ vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private)
 	return (0);
 }

-static int
+static void
 vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int x = tgts[0];
@ -860,11 +689,9 @@ vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts)
 		(void) abd_iterate_func2(dst, src, 0, 0, size,
 		    vdev_raidz_reconst_p_func, NULL);
 	}
-
-	return (1 << VDEV_RAIDZ_P);
 }

-static int
+static void
 vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int x = tgts[0];
@ -905,11 +732,9 @@ vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts)
 	struct reconst_q_struct rq = { abd_to_buf(src), exp };
 	(void) abd_iterate_func(dst, 0, rr->rr_col[x].rc_size,
 	    vdev_raidz_reconst_q_post_func, &rq);
-
-	return (1 << VDEV_RAIDZ_Q);
 }

-static int
+static void
 vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp;
@ -995,8 +820,6 @@ vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts)
 	 */
 	rr->rr_col[VDEV_RAIDZ_P].rc_abd = pdata;
 	rr->rr_col[VDEV_RAIDZ_Q].rc_abd = qdata;
-
-	return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q));
 }

 /* BEGIN CSTYLED */
@ -1355,7 +1178,7 @@ vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing,
 	kmem_free(p, psize);
 }

-static int
+static void
 vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
 {
 	int n, i, c, t, tt;
@ -1370,8 +1193,6 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)

 	abd_t **bufs = NULL;

-	int code = 0;
-
 	/*
 	 * Matrix reconstruction can't use scatter ABDs yet, so we allocate
 	 * temporary linear ABDs if any non-linear ABDs are found.
@ -1426,15 +1247,10 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
 			continue;
 		}

-		code |= 1 << c;
-
 		parity_map[i] = c;
 		i++;
 	}

-	ASSERT(code != 0);
-	ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY);
-
 	psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) *
 	    nmissing_rows * n + sizeof (used[0]) * n;
 	p = kmem_alloc(psize, KM_SLEEP);
@ -1497,18 +1313,15 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
 		}
 		kmem_free(bufs, rr->rr_cols * sizeof (abd_t *));
 	}
-
-	return (code);
 }

-static int
+static void
 vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
    const int *t, int nt)
 {
 	int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
 	int ntgts;
 	int i, c, ret;
-	int code;
 	int nbadparity, nbaddata;
 	int parity_valid[VDEV_RAIDZ_MAXPARITY];

@ -1541,20 +1354,24 @@ vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
 	/* Reconstruct using the new math implementation */
 	ret = vdev_raidz_math_reconstruct(rm, rr, parity_valid, dt, nbaddata);
 	if (ret != RAIDZ_ORIGINAL_IMPL)
-		return (ret);
+		return;

 	/*
 	 * See if we can use any of our optimized reconstruction routines.
 	 */
 	switch (nbaddata) {
 	case 1:
-		if (parity_valid[VDEV_RAIDZ_P])
-			return (vdev_raidz_reconstruct_p(rr, dt, 1));
+		if (parity_valid[VDEV_RAIDZ_P]) {
+			vdev_raidz_reconstruct_p(rr, dt, 1);
+			return;
+		}

 		ASSERT(rr->rr_firstdatacol > 1);

-		if (parity_valid[VDEV_RAIDZ_Q])
-			return (vdev_raidz_reconstruct_q(rr, dt, 1));
+		if (parity_valid[VDEV_RAIDZ_Q]) {
+			vdev_raidz_reconstruct_q(rr, dt, 1);
+			return;
+		}

 		ASSERT(rr->rr_firstdatacol > 2);
 		break;
@ -1563,18 +1380,17 @@ vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
 		ASSERT(rr->rr_firstdatacol > 1);

 		if (parity_valid[VDEV_RAIDZ_P] &&
-		    parity_valid[VDEV_RAIDZ_Q])
-			return (vdev_raidz_reconstruct_pq(rr, dt, 2));
+		    parity_valid[VDEV_RAIDZ_Q]) {
+			vdev_raidz_reconstruct_pq(rr, dt, 2);
+			return;
+		}

 		ASSERT(rr->rr_firstdatacol > 2);

 		break;
 	}

-	code = vdev_raidz_reconstruct_general(rr, tgts, ntgts);
-	ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY));
-	ASSERT(code > 0);
-	return (code);
+	vdev_raidz_reconstruct_general(rr, tgts, ntgts);
 }

 static int
@ -1811,10 +1627,11 @@ vdev_raidz_io_start(zio_t *zio)
 	vdev_t *vd = zio->io_vd;
 	vdev_t *tvd = vd->vdev_top;
 	vdev_raidz_t *vdrz = vd->vdev_tsd;
-	raidz_map_t *rm;

-	rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift,
+	raidz_map_t *rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift,
 	    vdrz->vd_logical_width, vdrz->vd_nparity);
+	zio->io_vsd = rm;
+	zio->io_vsd_ops = &vdev_raidz_vsd_ops;

 	/*
 	 * Until raidz expansion is implemented all maps for a raidz vdev
@ -1823,9 +1640,6 @@ vdev_raidz_io_start(zio_t *zio)
 	ASSERT3U(rm->rm_nrows, ==, 1);
 	raidz_row_t *rr = rm->rm_row[0];

-	zio->io_vsd = rm;
-	zio->io_vsd_ops = &vdev_raidz_vsd_ops;
-
 	if (zio->io_type == ZIO_TYPE_WRITE) {
 		vdev_raidz_io_start_write(zio, rr, tvd->vdev_ashift);
 	} else {
@ -2021,7 +1835,7 @@ raidz_restore_orig_data(raidz_map_t *rm)
 		for (int c = 0; c < rr->rr_cols; c++) {
 			raidz_col_t *rc = &rr->rr_col[c];
 			if (rc->rc_need_orig_restore) {
-				abd_copy_from_buf(rc->rc_abd,
+				abd_copy(rc->rc_abd,
 				    rc->rc_orig_data, rc->rc_size);
 				rc->rc_need_orig_restore = B_FALSE;
 			}
@ -2062,9 +1876,9 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
 				if (rc->rc_devidx == ltgts[lt]) {
 					if (rc->rc_orig_data == NULL) {
 						rc->rc_orig_data =
-						    zio_buf_alloc(rc->rc_size);
-						abd_copy_to_buf(
-						    rc->rc_orig_data,
+						    abd_alloc_linear(
+						    rc->rc_size, B_TRUE);
+						abd_copy(rc->rc_orig_data,
 						    rc->rc_abd, rc->rc_size);
 					}
 					rc->rc_need_orig_restore = B_TRUE;
@ -2082,10 +1896,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
 			raidz_restore_orig_data(rm);
 			return (EINVAL);
 		}
-		rr->rr_code = 0;
 		if (dead_data > 0)
-			rr->rr_code = vdev_raidz_reconstruct_row(rm, rr,
-			    my_tgts, t);
+			vdev_raidz_reconstruct_row(rm, rr, my_tgts, t);
 	}

 	/* Check for success */
@ -2111,7 +1923,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
 					if (rc->rc_error == 0 &&
 					    c >= rr->rr_firstdatacol) {
 						raidz_checksum_error(zio,
-						    rc, rc->rc_gdata);
+						    rc, rc->rc_orig_data);
 						rc->rc_error =
 						    SET_ERROR(ECKSUM);
 					}
@ -2318,11 +2130,7 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr)
 	}
 }

-/*
- * return 0 if no reconstruction occurred, otherwise the "code" from
- * vdev_raidz_reconstruct().
- */
-static int
+static void
 vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
    raidz_row_t *rr)
 {
@ -2330,7 +2138,6 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
 	int parity_untried = 0;
 	int data_errors = 0;
 	int total_errors = 0;
-	int code = 0;

 	ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol);
 	ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol);
@ -2385,10 +2192,8 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,

 		ASSERT(rr->rr_firstdatacol >= n);

-		code = vdev_raidz_reconstruct_row(rm, rr, tgts, n);
+		vdev_raidz_reconstruct_row(rm, rr, tgts, n);
 	}
-
-	return (code);
 }

 /*
@ -2453,7 +2258,7 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)

 			(void) zfs_ereport_start_checksum(zio->io_spa,
 			    cvd, &zio->io_bookmark, zio, rc->rc_offset,
-			    rc->rc_size, (void *)(uintptr_t)c, &zbc);
+			    rc->rc_size, &zbc);
 			mutex_enter(&cvd->vdev_stat_lock);
 			cvd->vdev_stat.vs_checksum_errors++;
 			mutex_exit(&cvd->vdev_stat_lock);
@ -2473,8 +2278,7 @@ vdev_raidz_io_done(zio_t *zio)
 	} else {
 		for (int i = 0; i < rm->rm_nrows; i++) {
 			raidz_row_t *rr = rm->rm_row[i];
-			rr->rr_code =
-			    vdev_raidz_io_done_reconstruct_known_missing(zio,
+			vdev_raidz_io_done_reconstruct_known_missing(zio,
 			    rm, rr);
 		}

--- a/sys/contrib/openzfs/module/zfs/zfs_fm.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fm.c
@ -1125,8 +1125,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
 */
 int
 zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
-    struct zio *zio, uint64_t offset, uint64_t length, void *arg,
-    zio_bad_cksum_t *info)
+    struct zio *zio, uint64_t offset, uint64_t length, zio_bad_cksum_t *info)
 {
 	zio_cksum_report_t *report;

@ -1144,10 +1143,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,

 	report = kmem_zalloc(sizeof (*report), KM_SLEEP);

-	if (zio->io_vsd != NULL)
-		zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
-	else
-		zio_vsd_default_cksum_report(zio, report, arg);
+	zio_vsd_default_cksum_report(zio, report);

 	/* copy the checksum failure information if it was provided */
 	if (info != NULL) {
--- a/sys/contrib/openzfs/module/zfs/zfs_fuid.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_fuid.c
@ -728,7 +728,6 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
 boolean_t
 zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
 {
-#ifdef HAVE_KSID
 	uid_t		gid;

 #ifdef illumos
@ -773,9 +772,6 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
 	 */
 	gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
 	return (groupmember(gid, cr));
-#else
-	return (B_TRUE);
-#endif
 }

 void
--- a/sys/contrib/openzfs/module/zfs/zfs_log.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_log.c
@ -540,6 +540,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 	uint32_t blocksize = zp->z_blksz;
 	itx_wr_state_t write_state;
 	uintptr_t fsync_cnt;
+	uint64_t gen = 0;

 	if (zil_replaying(zilog, tx) || zp->z_unlinked ||
 	    zfs_xattr_owner_unlinked(zp)) {
@ -562,6 +563,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 		(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
 	}

+	(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
+	    sizeof (gen));
+
 	while (resid) {
 		itx_t *itx;
 		lr_write_t *lr;
@ -609,6 +613,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
 		BP_ZERO(&lr->lr_blkptr);

 		itx->itx_private = ZTOZSB(zp);
+		itx->itx_gen = gen;

 		if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
 		    (fsync_cnt == 0))
--- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c
+++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c
@ -740,7 +740,8 @@ static void zfs_get_done(zgd_t *zgd, int error);
 * Get data to generate a TX_WRITE intent log record.
 */
 int
-zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
+zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	zfsvfs_t *zfsvfs = arg;
 	objset_t *os = zfsvfs->z_os;
@ -751,6 +752,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
 	dmu_buf_t *db;
 	zgd_t *zgd;
 	int error = 0;
+	uint64_t zp_gen;

 	ASSERT3P(lwb, !=, NULL);
 	ASSERT3P(zio, !=, NULL);
@ -769,6 +771,16 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
 		zfs_zrele_async(zp);
 		return (SET_ERROR(ENOENT));
 	}
+	/* check if generation number matches */
+	if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
+	    sizeof (zp_gen)) != 0) {
+		zfs_zrele_async(zp);
+		return (SET_ERROR(EIO));
+	}
+	if (zp_gen != gen) {
+		zfs_zrele_async(zp);
+		return (SET_ERROR(ENOENT));
+	}

 	zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
 	zgd->zgd_lwb = lwb;
--- a/sys/contrib/openzfs/module/zfs/zil.c
+++ b/sys/contrib/openzfs/module/zfs/zil.c
@ -1744,7 +1744,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
 			 * completed after "lwb_write_zio" completed.
 			 */
 			error = zilog->zl_get_data(itx->itx_private,
-			    lrwb, dbuf, lwb, lwb->lwb_write_zio);
+			    itx->itx_gen, lrwb, dbuf, lwb,
+			    lwb->lwb_write_zio);

 			if (error == EIO) {
 				txg_wait_synced(zilog->zl_dmu_pool, txg);
--- a/sys/contrib/openzfs/module/zfs/zio.c
+++ b/sys/contrib/openzfs/module/zfs/zio.c
@ -3950,7 +3950,7 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,

 /*ARGSUSED*/
 void
-zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
+zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr)
 {
 	void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);

@ -4288,7 +4288,7 @@ zio_checksum_verify(zio_t *zio)
 		    !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
 			(void) zfs_ereport_start_checksum(zio->io_spa,
 			    zio->io_vd, &zio->io_bookmark, zio,
-			    zio->io_offset, zio->io_size, NULL, &info);
+			    zio->io_offset, zio->io_size, &info);
 			mutex_enter(&zio->io_vd->vdev_stat_lock);
 			zio->io_vd->vdev_stat.vs_checksum_errors++;
 			mutex_exit(&zio->io_vd->vdev_stat_lock);
--- a/sys/contrib/openzfs/module/zfs/zvol.c
+++ b/sys/contrib/openzfs/module/zfs/zvol.c
@ -673,7 +673,8 @@ zvol_get_done(zgd_t *zgd, int error)
 * Get data to generate a TX_WRITE intent log record.
 */
 int
-zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
+zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
+    struct lwb *lwb, zio_t *zio)
 {
 	zvol_state_t *zv = arg;
 	uint64_t offset = lr->lr_offset;
--- a/sys/contrib/openzfs/tests/runfiles/common.run
+++ b/sys/contrib/openzfs/tests/runfiles/common.run
@ -28,6 +28,10 @@ failsafe = callbacks/zfs_failsafe
 outputdir = /var/tmp/test_results
 tags = ['functional']

+[tests/functional/acl/off]
+tests = ['posixmode']
+tags = ['functional', 'acl']
+
 [tests/functional/alloc_class]
 tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
    'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
@ -722,8 +726,8 @@ tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
    'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes',
    'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones',
    'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative',
-    'redacted_origin', 'redacted_props', 'redacted_resume', 'redacted_size',
-    'redacted_volume']
+    'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume',
+    'redacted_size', 'redacted_volume']
 tags = ['functional', 'redacted_send']

 [tests/functional/raidz]
--- a/sys/contrib/openzfs/tests/runfiles/freebsd.run
+++ b/sys/contrib/openzfs/tests/runfiles/freebsd.run
@ -22,6 +22,10 @@ failsafe = callbacks/zfs_failsafe
 outputdir = /var/tmp/test_results
 tags = ['functional']

+[tests/functional/acl/off:FreeBSD]
+tests = ['dosmode']
+tags = ['functional', 'acl']
+
 [tests/functional/cli_root/zfs_jail:FreeBSD]
 tests = ['zfs_jail_001_pos']
 tags = ['functional', 'cli_root', 'zfs_jail']
--- a/sys/contrib/openzfs/tests/runfiles/sanity.run
+++ b/sys/contrib/openzfs/tests/runfiles/sanity.run
@ -30,6 +30,10 @@ failsafe = callbacks/zfs_failsafe
 outputdir = /var/tmp/test_results
 tags = ['functional']

+[tests/functional/acl/off]
+tests = ['posixmode']
+tags = ['functional', 'acl']
+
 [tests/functional/alloc_class]
 tests = ['alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos',
    'alloc_class_006_pos', 'alloc_class_008_pos', 'alloc_class_010_pos',
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/Makefile.am
@ -3,4 +3,4 @@ dist_pkgdata_DATA = \
 	acl.cfg \
 	acl_common.kshlib

-SUBDIRS = posix posix-sa
+SUBDIRS = off posix posix-sa
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/.gitignore
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/.gitignore
@ -0,0 +1 @@
+/dosmode_readonly_write
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/Makefile.am
@ -0,0 +1,16 @@
+include $(top_srcdir)/config/Rules.am
+
+pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
+
+dist_pkgdata_SCRIPTS = \
+	dosmode.ksh \
+	posixmode.ksh \
+	cleanup.ksh \
+	setup.ksh
+
+pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
+
+if BUILD_FREEBSD
+pkgexec_PROGRAMS = dosmode_readonly_write
+dosmode_readonly_write_SOURCES = dosmode_readonly_write.c
+endif
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/cleanup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/cleanup.ksh
@ -0,0 +1,33 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
+# Use is subject to license terms.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+cleanup_user_group
+
+default_cleanup
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/dosmode.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/dosmode.ksh
@ -0,0 +1,199 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+#
+# DESCRIPTION:
+#	Verify that DOS mode flags function correctly.
+#
+#	These flags are not currently exposed on Linux, so the test is
+#	only useful on FreeBSD.
+#
+# STRATEGY:
+#	1. ARCHIVE
+#	2. HIDDEN
+#	3. OFFLINE
+#	4. READONLY
+#	5. REPARSE
+#	6. SPARSE
+#	7. SYSTEM
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	rm -f $testfile
+}
+
+function hasflag
+{
+	typeset flag=$1
+	typeset path=$2
+
+	ls -lo $path | awk '{ gsub(",", "\n", $5); print $5 }' | grep -qxF $flag
+}
+
+log_assert "Verify DOS mode flags function correctly"
+log_onexit cleanup
+
+tests_base=$STF_SUITE/tests/functional/acl/off
+testfile=$TESTDIR/testfile
+owner=$ZFS_ACL_STAFF1
+other=$ZFS_ACL_STAFF2
+
+#
+# ARCHIVE
+#
+# This flag is set by ZFS when a file has been updated to indicate that
+# the file needs to be archived.
+#
+log_must touch $testfile
+log_must hasflag uarch $testfile
+log_must chflags nouarch $testfile
+log_must hasflag - $testfile
+log_must touch $testfile
+log_must hasflag uarch $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must hasflag uarch $testfile
+log_must user_run $owner chflags nouarch $testfile
+log_mustnot user_run $other chflags uarch $testfile
+log_must hasflag - $testfile
+log_must user_run $owner touch $testfile
+log_mustnot user_run $other chflags nouarch $testfile
+log_must hasflag uarch $testfile
+log_must user_run $owner rm $testfile
+
+#
+# HIDDEN
+#
+log_must touch $testfile
+log_must chflags hidden $testfile
+log_must hasflag hidden $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags hidden $testfile
+log_mustnot user_run $other chflags nohidden $testfile
+log_must hasflag hidden $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags hidden $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+
+#
+# OFFLINE
+#
+log_must touch $testfile
+log_must chflags offline $testfile
+log_must hasflag offline $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags offline $testfile
+log_mustnot user_run $other chflags nooffline $testfile
+log_must hasflag offline $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags offline $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+#
+# READONLY
+#
+# This flag prevents users from writing or appending to the file,
+# but root is always allowed the operation.
+#
+log_must touch $testfile
+log_must chflags rdonly $testfile
+log_must hasflag rdonly $testfile
+log_must eval "echo 'root write allowed' >> $testfile"
+log_must cat $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $tesfile
+log_must rm $testfile
+# It is required to still be able to write to an fd that was opened RW before
+# READONLY is set.  We have a special test program for that.
+log_must user_run $owner touch $testfile
+log_mustnot user_run $other chflags rdonly $testfile
+log_must user_run $owner $tests_base/dosmode_readonly_write $testfile
+log_mustnot user_run $other chflags nordonly $testfile
+log_must hasflag rdonly $testfile
+log_mustnot user_run $owner "echo 'user write forbidden' >> $testfile"
+log_must eval "echo 'root write allowed' >> $testfile"
+# We are still allowed to read and remove the file when READONLY is set.
+log_must user_run $owner cat $testfile
+log_must user_run $owner rm $testfile
+
+#
+# REPARSE
+#
+# FIXME: does not work, not sure if broken or testing wrong
+#
+
+#
+# SPARSE
+#
+log_must truncate -s 1m $testfile
+log_must chflags sparse $testfile
+log_must hasflag sparse $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner truncate -s 1m $testfile
+log_must user_run $owner chflags sparse $testfile
+log_mustnot user_run $other chflags nosparse $testfile
+log_must hasflag sparse $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags sparse $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+#
+# SYSTEM
+#
+log_must touch $testfile
+log_must chflags system $testfile
+log_must hasflag system $testfile
+log_must chflags 0 $testfile
+log_must hasflag - $testfile
+log_must rm $testfile
+log_must user_run $owner touch $testfile
+log_must user_run $owner chflags system $testfile
+log_mustnot user_run $other chflags nosystem $testfile
+log_must hasflag system $testfile
+log_must user_run $owner chflags 0 $testfile
+log_mustnot user_run $other chflags system $testfile
+log_must hasflag - $testfile
+log_must user_run $owner rm $testfile
+
+log_pass "DOS mode flags function correctly"
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/dosmode_readonly_write.c
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/dosmode_readonly_write.c
@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2021 iXsystems, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Test for correct behavior of DOS mode READONLY flag on a file.
+ * We should be able to open a file RW, set READONLY, and still write to the fd.
+ */
+
+#include <sys/stat.h>
+#include <err.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+
+int
+main(int argc, const char *argv[])
+{
+	const char *buf = "We should be allowed to write this to the fd.\n";
+	const char *path;
+	int fd;
+
+	if (argc != 2) {
+		fprintf(stderr, "usage: %s PATH\n", argv[0]);
+		return (EXIT_FAILURE);
+	}
+	path = argv[1];
+	fd = open(path, O_CREAT|O_RDWR, 0777);
+	if (fd == -1)
+		err(EXIT_FAILURE, "%s: open failed", path);
+	if (chflags(path, UF_READONLY) == -1)
+		err(EXIT_FAILURE, "%s: chflags failed", path);
+	if (write(fd, buf, strlen(buf)) == -1)
+		err(EXIT_FAILURE, "%s: write failed", path);
+	if (close(fd) == -1)
+		err(EXIT_FAILURE, "%s: close failed", path);
+	return (EXIT_SUCCESS);
+}
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/posixmode.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/posixmode.ksh
@ -0,0 +1,145 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+#
+# DESCRIPTION:
+#	Verify that POSIX mode bits function correctly.
+#
+#	These tests are incomplete and will be added to over time.
+#
+#	NOTE: Creating directory entries behaves differently between platforms.
+#	The parent directory's group is used on FreeBSD, while the effective
+#	group is used on Linux.  We chown to the effective group when creating
+#	directories and files in these tests to achieve consistency across all
+#	platforms.
+#
+# STRATEGY:
+#	1. Sanity check the POSIX mode test on tmpfs
+#	2. Test POSIX mode bits on ZFS
+#
+
+verify_runnable "both"
+
+function cleanup
+{
+	umount -f $tmpdir
+	rm -rf $tmpdir $TESTDIR/dir
+}
+
+log_assert "Verify POSIX mode bits function correctly"
+log_onexit cleanup
+
+owner=$ZFS_ACL_STAFF1
+other=$ZFS_ACL_STAFF2
+group=$ZFS_ACL_STAFF_GROUP
+if is_linux; then
+	wheel=root
+else
+	wheel=wheel
+fi
+
+function test_posix_mode # base
+{
+	typeset base=$1
+	typeset dir=$base/dir
+	typeset file=$dir/file
+
+	# dir owned by root
+	log_must mkdir $dir
+	log_must chown :$wheel $dir
+	log_must chmod 007 $dir
+
+	# file owned by root
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must ls -la $dir
+	log_must rm $file
+
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must user_run $other rm $file
+
+	# file owned by user
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must ls -la $dir
+	log_must user_run $owner rm $file
+
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must user_run $other rm $file
+
+	log_must user_run $owner touch $file
+	log_must chown :$group $file
+	log_must rm $file
+
+	log_must rm -rf $dir
+
+	# dir owned by user
+	log_must user_run $owner mkdir $dir
+	log_must chown :$group $dir
+	log_must user_run $owner chmod 007 $dir
+
+	# file owned by root
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_must ls -la $dir
+	log_must rm $file
+
+	log_must touch $file
+	log_must chown :$wheel $file
+	log_mustnot user_run $other rm $file
+	log_must rm $file
+
+	# file owned by user
+	log_mustnot user_run $owner touch $file
+	log_must touch $file
+	log_must chown $owner:$group $file
+	log_must ls -la $dir
+	log_mustnot user_run $owner rm $file
+	log_mustnot user_run $other rm $file
+	log_must rm $file
+
+	log_must rm -rf $dir
+}
+
+# Sanity check on tmpfs first
+tmpdir=$(TMPDIR=$TEST_BASE_DIR mktemp -d)
+log_must mount -t tmpfs tmp $tmpdir
+log_must chmod 777 $tmpdir
+
+test_posix_mode $tmpdir
+
+log_must umount $tmpdir
+log_must rmdir $tmpdir
+
+# Verify ZFS
+test_posix_mode $TESTDIR
+
+log_pass "POSIX mode bits function correctly"
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/setup.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/acl/off/setup.ksh
@ -0,0 +1,44 @@
+#!/bin/ksh -p
+#
+# CDDL HEADER START
+#
+# The contents of this file are subject to the terms of the
+# Common Development and Distribution License (the "License").
+# You may not use this file except in compliance with the License.
+#
+# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+# or http://www.opensolaris.org/os/licensing.
+# See the License for the specific language governing permissions
+# and limitations under the License.
+#
+# When distributing Covered Code, include this CDDL HEADER in each
+# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+# If applicable, add the following below this CDDL HEADER, with the
+# fields enclosed by brackets "[]" replaced with your own identifying
+# information: Portions Copyright [yyyy] [name of copyright owner]
+#
+# CDDL HEADER END
+#
+
+#
+# Portions Copyright (c) 2021 iXsystems, Inc.
+#
+
+. $STF_SUITE/include/libtest.shlib
+. $STF_SUITE/tests/functional/acl/acl_common.kshlib
+
+DISK=${DISKS%% *}
+
+cleanup_user_group
+
+# Create staff group and add users to it
+log_must add_group $ZFS_ACL_STAFF_GROUP
+log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF1
+log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF2
+
+default_setup_noexit $DISK
+
+log_must zfs set acltype=off $TESTPOOL/$TESTFS
+log_must chmod 0777 $TESTDIR
+
+log_pass
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/Makefile.am
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/Makefile.am
@ -15,6 +15,7 @@ dist_pkgdata_SCRIPTS = \
 	redacted_mounts.ksh \
 	redacted_negative.ksh \
 	redacted_origin.ksh \
+	redacted_panic.ksh \
 	redacted_props.ksh \
 	redacted_resume.ksh \
 	redacted_size.ksh \
--- a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
+++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/redacted_send/redacted_panic.ksh
@ -0,0 +1,44 @@
+#!/bin/ksh
+
+#
+# This file and its contents are supplied under the terms of the
+# Common Development and Distribution License ("CDDL"), version 1.0.
+# You may only use this file in accordance with the terms of version
+# 1.0 of the CDDL.
+#
+# A full copy of the text of the CDDL should have accompanied this
+# source.  A copy of the CDDL is also available via the Internet at
+# http://www.illumos.org/license/CDDL.
+#
+
+#
+# Copyright (c) 2021 by Delphix. All rights reserved.
+#
+
+. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
+
+#
+# Description:
+# Verify edge case when midbufid is equal to minbufid for the bug fixed by
+# https://github.com/openzfs/zfs/pull/11297 (Fix kernel panic induced by
+# redacted send)
+#
+
+typeset ds_name="panic"
+typeset sendfs="$POOL/$ds_name"
+typeset recvfs="$POOL2/$ds_name"
+typeset clone="$POOL/${ds_name}_clone"
+typeset stream=$(mktemp $tmpdir/stream.XXXX)
+
+log_onexit redacted_cleanup $sendfs $recvfs
+
+log_must zfs create -o recsize=8k $sendfs
+log_must dd if=/dev/urandom of=/$sendfs/file bs=1024k count=2048
+log_must zfs snapshot $sendfs@init
+log_must zfs clone $sendfs@init $clone
+log_must stride_dd -i /dev/urandom -o /$clone/file -b 8192 -s 2 -c 7226
+log_must zfs snapshot $clone@init
+log_must zfs redact $sendfs@init book_init $clone@init
+log_must eval "zfs send --redact $sendfs#book_init $sendfs@init >$stream"
+log_must eval "zfs recv $recvfs <$stream"
+log_pass
--- a/sys/modules/zfs/zfs_config.h
+++ b/sys/modules/zfs/zfs_config.h
@ -734,7 +734,7 @@
 /* #undef ZFS_IS_GPL_COMPATIBLE */

 /* Define the project alias string. */
-#define ZFS_META_ALIAS "zfs-2.0.0-FreeBSD_g9305ff2ed"
+#define ZFS_META_ALIAS "zfs-2.0.0-FreeBSD_g891568c99"

 /* Define the project author. */
 #define ZFS_META_AUTHOR "OpenZFS"
@ -764,7 +764,7 @@
 #define ZFS_META_NAME "zfs"

 /* Define the project release. */
-#define ZFS_META_RELEASE "FreeBSD_g9305ff2ed"
+#define ZFS_META_RELEASE "FreeBSD_g891568c99"

 /* Define the project version. */
 #define ZFS_META_VERSION "2.0.0"