zfs: merge OpenZFS master-891568c99

Notable upstream pull request merges:
  #11652 Split dmu_zfetch() speculation and execution parts
  #11682 Fix zfs_get_data access to files with wrong generation
  #11735 Clean up RAIDZ/DRAID ereport code
  #11737 Initialize metaslab range trees in metaslab_init
  #11739 FreeBSD: make seqc asserts conditional on replay
  #11763 Allow setting bootfs property on pools with indirect vdevs
  #11767 FreeBSD: Fix memory leaks in kstats

Obtained from:	OpenZFS
MFC after:	2 weeks
This commit is contained in:
Martin Matuska 2021-03-21 01:46:08 +01:00
commit f9693bef8d
77 changed files with 1561 additions and 883 deletions

View File

@ -32,4 +32,4 @@ For more details see the NOTICE, LICENSE and COPYRIGHT files; `UCRL-CODE-235197`
# Supported Kernels
* The `META` file contains the officially recognized supported Linux kernel versions.
* Supported FreeBSD versions are 12-STABLE and 13-CURRENT.
* Supported FreeBSD versions are any supported branches and releases starting from 12.2-RELEASE.

View File

@ -448,7 +448,6 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
rr->rr_missingdata = 0;
rr->rr_missingparity = 0;
rr->rr_firstdatacol = nparity;
rr->rr_abd_copy = NULL;
rr->rr_abd_empty = NULL;
rr->rr_nempty = 0;
@ -459,7 +458,6 @@ vdev_raidz_map_alloc_expanded(abd_t *abd, uint64_t size, uint64_t offset,
}
rr->rr_col[c].rc_devidx = child_id;
rr->rr_col[c].rc_offset = child_offset;
rr->rr_col[c].rc_gdata = NULL;
rr->rr_col[c].rc_orig_data = NULL;
rr->rr_col[c].rc_error = 0;
rr->rr_col[c].rc_tried = 0;

View File

@ -2287,8 +2287,8 @@ ztest_get_done(zgd_t *zgd, int error)
}
static int
ztest_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
zio_t *zio)
ztest_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio)
{
ztest_ds_t *zd = arg;
objset_t *os = zd->zd_os;

View File

@ -0,0 +1,23 @@
dnl #
dnl # 5.12 API change removes BIO_MAX_PAGES in favor of bio_max_segs()
dnl # which will handle the logic of setting the upper-bound to a
dnl # BIO_MAX_PAGES, internally.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS], [
ZFS_LINUX_TEST_SRC([bio_max_segs], [
#include <linux/bio.h>
],[
bio_max_segs(1);
])
])
AC_DEFUN([ZFS_AC_KERNEL_BIO_MAX_SEGS], [
AC_MSG_CHECKING([whether bio_max_segs() exists])
ZFS_LINUX_TEST_RESULT([bio_max_segs], [
AC_MSG_RESULT(yes)
AC_DEFINE([HAVE_BIO_MAX_SEGS], 1, [bio_max_segs() is implemented])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -0,0 +1,28 @@
dnl #
dnl # 5.12 API
dnl #
dnl # generic_fillattr in linux/fs.h now requires a struct user_namespace*
dnl # as the first arg, to support idmapped mounts.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS], [
ZFS_LINUX_TEST_SRC([generic_fillattr_userns], [
#include <linux/fs.h>
],[
struct user_namespace *userns = NULL;
struct inode *in = NULL;
struct kstat *k = NULL;
generic_fillattr(userns, in, k);
])
])
AC_DEFUN([ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS], [
AC_MSG_CHECKING([whether generic_fillattr requres struct user_namespace*])
ZFS_LINUX_TEST_RESULT([generic_fillattr_userns], [
AC_MSG_RESULT([yes])
AC_DEFINE(HAVE_GENERIC_FILLATTR_USERNS, 1,
[generic_fillattr requires struct user_namespace*])
],[
AC_MSG_RESULT([no])
])
])

View File

@ -1,7 +1,25 @@
dnl #
dnl # 3.6 API change
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_FLAGS], [
AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE], [
dnl #
dnl # 5.12 API change that added the struct user_namespace* arg
dnl # to the front of this function type's arg list.
dnl #
ZFS_LINUX_TEST_SRC([create_userns], [
#include <linux/fs.h>
#include <linux/sched.h>
int inode_create(struct user_namespace *userns,
struct inode *inode ,struct dentry *dentry,
umode_t umode, bool flag) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.create = inode_create,
};
],[])
dnl #
dnl # 3.6 API change
dnl #
ZFS_LINUX_TEST_SRC([create_flags], [
#include <linux/fs.h>
#include <linux/sched.h>
@ -16,11 +34,20 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_CREATE_FLAGS], [
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_CREATE_FLAGS], [
AC_MSG_CHECKING([whether iops->create() passes flags])
ZFS_LINUX_TEST_RESULT([create_flags], [
AC_DEFUN([ZFS_AC_KERNEL_CREATE], [
AC_MSG_CHECKING([whether iops->create() takes struct user_namespace*])
ZFS_LINUX_TEST_RESULT([create_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOPS_CREATE_USERNS, 1,
[iops->create() takes struct user_namespace*])
],[
ZFS_LINUX_TEST_ERROR([iops->create()])
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether iops->create() passes flags])
ZFS_LINUX_TEST_RESULT([create_flags], [
AC_MSG_RESULT(yes)
],[
ZFS_LINUX_TEST_ERROR([iops->create()])
])
])
])

View File

@ -1,8 +1,29 @@
dnl #
dnl # Linux 4.11 API
dnl # See torvalds/linux@a528d35
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
dnl #
dnl # Linux 5.12 API
dnl # The getattr I/O operations handler type was extended to require
dnl # a struct user_namespace* as its first arg, to support idmapped
dnl # mounts.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_getattr_userns], [
#include <linux/fs.h>
int test_getattr(
struct user_namespace *userns,
const struct path *p, struct kstat *k,
u32 request_mask, unsigned int query_flags)
{ return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.getattr = test_getattr,
};
],[])
dnl #
dnl # Linux 4.11 API
dnl # See torvalds/linux@a528d35
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_getattr_path], [
#include <linux/fs.h>
@ -33,21 +54,39 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_GETATTR], [
])
AC_DEFUN([ZFS_AC_KERNEL_INODE_GETATTR], [
AC_MSG_CHECKING([whether iops->getattr() takes a path])
ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
dnl #
dnl # Kernel 5.12 test
dnl #
AC_MSG_CHECKING([whether iops->getattr() takes user_namespace])
ZFS_LINUX_TEST_RESULT([inode_operations_getattr_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
[iops->getattr() takes a path])
AC_DEFINE(HAVE_USERNS_IOPS_GETATTR, 1,
[iops->getattr() takes struct user_namespace*])
],[
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
dnl #
dnl # Kernel 4.11 test
dnl #
AC_MSG_CHECKING([whether iops->getattr() takes a path])
ZFS_LINUX_TEST_RESULT([inode_operations_getattr_path], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
[iops->getattr() takes a vfsmount])
AC_DEFINE(HAVE_PATH_IOPS_GETATTR, 1,
[iops->getattr() takes a path])
],[
AC_MSG_RESULT(no)
dnl #
dnl # Kernel < 4.11 test
dnl #
AC_MSG_CHECKING([whether iops->getattr() takes a vfsmount])
ZFS_LINUX_TEST_RESULT([inode_operations_getattr_vfsmount], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_VFSMOUNT_IOPS_GETATTR, 1,
[iops->getattr() takes a vfsmount])
],[
AC_MSG_RESULT(no)
])
])
])
])

View File

@ -11,13 +11,32 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_INODE_OWNER_OR_CAPABLE], [
struct inode *ip = NULL;
(void) inode_owner_or_capable(ip);
])
ZFS_LINUX_TEST_SRC([inode_owner_or_capable_idmapped], [
#include <linux/fs.h>
],[
struct inode *ip = NULL;
(void) inode_owner_or_capable(&init_user_ns, ip);
])
])
AC_DEFUN([ZFS_AC_KERNEL_INODE_OWNER_OR_CAPABLE], [
AC_MSG_CHECKING([whether inode_owner_or_capable() exists])
ZFS_LINUX_TEST_RESULT([inode_owner_or_capable], [
AC_MSG_RESULT(yes)
],[
ZFS_LINUX_TEST_ERROR([capability])
AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE, 1,
[inode_owner_or_capable() exists])
], [
AC_MSG_RESULT(no)
AC_MSG_CHECKING(
[whether inode_owner_or_capable() takes user_ns])
ZFS_LINUX_TEST_RESULT([inode_owner_or_capable_idmapped], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED, 1,
[inode_owner_or_capable() takes user_ns])
],[
ZFS_LINUX_TEST_ERROR([capability])
])
])
])

View File

@ -1,32 +0,0 @@
dnl #
dnl # 3.3 API change
dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
dnl # umode_t type rather than an int. The expectation is that any backport
dnl # would also change all three prototypes. However, if it turns out that
dnl # some distribution doesn't backport the whole thing this could be
dnl # broken apart into three separate checks.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T], [
ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
#include <linux/fs.h>
int mkdir(struct inode *inode, struct dentry *dentry,
umode_t umode) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.mkdir = mkdir,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_MKDIR_UMODE_T], [
AC_MSG_CHECKING([whether iops->create()/mkdir()/mknod() take umode_t])
ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
[iops->create()/mkdir()/mknod() take umode_t])
],[
ZFS_LINUX_TEST_ERROR([mkdir()])
])
])

View File

@ -0,0 +1,65 @@
dnl #
dnl # Supported mkdir() interfaces checked newest to oldest.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_MKDIR], [
dnl #
dnl # 5.12 API change
dnl # The struct user_namespace arg was added as the first argument to
dnl # mkdir()
dnl #
ZFS_LINUX_TEST_SRC([mkdir_user_namespace], [
#include <linux/fs.h>
int mkdir(struct user_namespace *userns,
struct inode *inode, struct dentry *dentry,
umode_t umode) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.mkdir = mkdir,
};
],[])
dnl #
dnl # 3.3 API change
dnl # The VFS .create, .mkdir and .mknod callbacks were updated to take a
dnl # umode_t type rather than an int. The expectation is that any backport
dnl # would also change all three prototypes. However, if it turns out that
dnl # some distribution doesn't backport the whole thing this could be
dnl # broken apart into three separate checks.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_mkdir], [
#include <linux/fs.h>
int mkdir(struct inode *inode, struct dentry *dentry,
umode_t umode) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.mkdir = mkdir,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_MKDIR], [
dnl #
dnl # 5.12 API change
dnl # The struct user_namespace arg was added as the first argument to
dnl # mkdir() of the iops structure.
dnl #
AC_MSG_CHECKING([whether iops->mkdir() takes struct user_namespace*])
ZFS_LINUX_TEST_RESULT([mkdir_user_namespace], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOPS_MKDIR_USERNS, 1,
[iops->mkdir() takes struct user_namespace*])
],[
AC_MSG_CHECKING([whether iops->mkdir() takes umode_t])
ZFS_LINUX_TEST_RESULT([inode_operations_mkdir], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_MKDIR_UMODE_T, 1,
[iops->mkdir() takes umode_t])
],[
ZFS_LINUX_TEST_ERROR([mkdir()])
])
])
])

View File

@ -0,0 +1,30 @@
AC_DEFUN([ZFS_AC_KERNEL_SRC_MKNOD], [
dnl #
dnl # 5.12 API change that added the struct user_namespace* arg
dnl # to the front of this function type's arg list.
dnl #
ZFS_LINUX_TEST_SRC([mknod_userns], [
#include <linux/fs.h>
#include <linux/sched.h>
int tmp_mknod(struct user_namespace *userns,
struct inode *inode ,struct dentry *dentry,
umode_t u, dev_t d) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.mknod = tmp_mknod,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_MKNOD], [
AC_MSG_CHECKING([whether iops->mknod() takes struct user_namespace*])
ZFS_LINUX_TEST_RESULT([mknod_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOPS_MKNOD_USERNS, 1,
[iops->mknod() takes struct user_namespace*])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -1,10 +1,10 @@
dnl #
dnl # 4.9 API change,
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
dnl # flags.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [
ZFS_LINUX_TEST_SRC([inode_operations_rename], [
AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME], [
dnl #
dnl # 4.9 API change,
dnl # iops->rename2() merged into iops->rename(), and iops->rename() now wants
dnl # flags.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_rename_flags], [
#include <linux/fs.h>
int rename_fn(struct inode *sip, struct dentry *sdp,
struct inode *tip, struct dentry *tdp,
@ -15,15 +15,41 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS], [
.rename = rename_fn,
};
],[])
dnl #
dnl # 5.12 API change,
dnl #
dnl # Linux 5.12 introduced passing struct user_namespace* as the first argument
dnl # of the rename() and other inode_operations members.
dnl #
ZFS_LINUX_TEST_SRC([inode_operations_rename_userns], [
#include <linux/fs.h>
int rename_fn(struct user_namespace *user_ns, struct inode *sip,
struct dentry *sdp, struct inode *tip, struct dentry *tdp,
unsigned int flags) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.rename = rename_fn,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_RENAME_WANTS_FLAGS], [
AC_MSG_CHECKING([whether iops->rename() wants flags])
ZFS_LINUX_TEST_RESULT([inode_operations_rename], [
AC_DEFUN([ZFS_AC_KERNEL_RENAME], [
AC_MSG_CHECKING([whether iops->rename() takes struct user_namespace*])
ZFS_LINUX_TEST_RESULT([inode_operations_rename_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
[iops->rename() wants flags])
AC_DEFINE(HAVE_IOPS_RENAME_USERNS, 1,
[iops->rename() takes struct user_namespace*])
],[
AC_MSG_RESULT(no)
ZFS_LINUX_TEST_RESULT([inode_operations_rename_flags], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_RENAME_WANTS_FLAGS, 1,
[iops->rename() wants flags])
],[
AC_MSG_RESULT(no)
])
])
])

View File

@ -1,27 +1,52 @@
dnl #
dnl # 4.9 API change
dnl # The inode_change_ok() function has been renamed setattr_prepare()
dnl # and updated to take a dentry rather than an inode.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_SETATTR_PREPARE], [
dnl #
dnl # 4.9 API change
dnl # The inode_change_ok() function has been renamed setattr_prepare()
dnl # and updated to take a dentry rather than an inode.
dnl #
ZFS_LINUX_TEST_SRC([setattr_prepare], [
#include <linux/fs.h>
], [
struct dentry *dentry = NULL;
struct iattr *attr = NULL;
int error __attribute__ ((unused)) =
setattr_prepare(dentry, attr);
setattr_prepare(dentry, attr);
])
dnl #
dnl # 5.12 API change
dnl # The setattr_prepare() function has been changed to accept a new argument
dnl # for struct user_namespace*
dnl #
ZFS_LINUX_TEST_SRC([setattr_prepare_userns], [
#include <linux/fs.h>
], [
struct dentry *dentry = NULL;
struct iattr *attr = NULL;
struct user_namespace *userns = NULL;
int error __attribute__ ((unused)) =
setattr_prepare(userns, dentry, attr);
])
])
AC_DEFUN([ZFS_AC_KERNEL_SETATTR_PREPARE], [
AC_MSG_CHECKING([whether setattr_prepare() is available])
ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
AC_MSG_CHECKING([whether setattr_prepare() is available and accepts struct user_namespace*])
ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare_userns],
[setattr_prepare], [fs/attr.c], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SETATTR_PREPARE, 1,
[setattr_prepare() is available])
AC_DEFINE(HAVE_SETATTR_PREPARE_USERNS, 1,
[setattr_prepare() accepts user_namespace])
], [
AC_MSG_RESULT(no)
AC_MSG_CHECKING([whether setattr_prepare() is available, doesn't accept user_namespace])
ZFS_LINUX_TEST_RESULT_SYMBOL([setattr_prepare],
[setattr_prepare], [fs/attr.c], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_SETATTR_PREPARE_NO_USERNS, 1,
[setattr_prepare() is available, doesn't accept user_namespace])
], [
AC_MSG_RESULT(no)
])
])
])

View File

@ -0,0 +1,30 @@
AC_DEFUN([ZFS_AC_KERNEL_SRC_SYMLINK], [
dnl #
dnl # 5.12 API change that added the struct user_namespace* arg
dnl # to the front of this function type's arg list.
dnl #
ZFS_LINUX_TEST_SRC([symlink_userns], [
#include <linux/fs.h>
#include <linux/sched.h>
int tmp_symlink(struct user_namespace *userns,
struct inode *inode ,struct dentry *dentry,
const char *path) { return 0; }
static const struct inode_operations
iops __attribute__ ((unused)) = {
.symlink = tmp_symlink,
};
],[])
])
AC_DEFUN([ZFS_AC_KERNEL_SYMLINK], [
AC_MSG_CHECKING([whether iops->symlink() takes struct user_namespace*])
ZFS_LINUX_TEST_RESULT([symlink_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_IOPS_SYMLINK_USERNS, 1,
[iops->symlink() takes struct user_namespace*])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -152,6 +152,21 @@ dnl #
dnl # Supported xattr handler set() interfaces checked newest to oldest.
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
ZFS_LINUX_TEST_SRC([xattr_handler_set_userns], [
#include <linux/xattr.h>
int set(const struct xattr_handler *handler,
struct user_namespace *mnt_userns,
struct dentry *dentry, struct inode *inode,
const char *name, const void *buffer,
size_t size, int flags)
{ return 0; }
static const struct xattr_handler
xops __attribute__ ((unused)) = {
.set = set,
};
],[])
ZFS_LINUX_TEST_SRC([xattr_handler_set_dentry_inode], [
#include <linux/xattr.h>
@ -194,45 +209,58 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_XATTR_HANDLER_SET], [
AC_DEFUN([ZFS_AC_KERNEL_XATTR_HANDLER_SET], [
dnl #
dnl # 4.7 API change,
dnl # The xattr_handler->set() callback was changed to take both
dnl # dentry and inode.
dnl # 5.12 API change,
dnl # The xattr_handler->set() callback was changed to 8 arguments, and
dnl # struct user_namespace* was inserted as arg #2
dnl #
AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
AC_MSG_CHECKING([whether xattr_handler->set() wants dentry, inode, and user_namespace])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_userns], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
[xattr_handler->set() wants both dentry and inode])
AC_DEFINE(HAVE_XATTR_SET_USERNS, 1,
[xattr_handler->set() takes user_namespace])
],[
dnl #
dnl # 4.4 API change,
dnl # The xattr_handler->set() callback was changed to take a
dnl # xattr_handler, and handler_flags argument was removed and
dnl # should be accessed by handler->flags.
dnl # 4.7 API change,
dnl # The xattr_handler->set() callback was changed to take both
dnl # dentry and inode.
dnl #
AC_MSG_RESULT(no)
AC_MSG_CHECKING(
[whether xattr_handler->set() wants xattr_handler])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
AC_MSG_CHECKING([whether xattr_handler->set() wants dentry and inode])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry_inode], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
[xattr_handler->set() wants xattr_handler])
AC_DEFINE(HAVE_XATTR_SET_DENTRY_INODE, 1,
[xattr_handler->set() wants both dentry and inode])
],[
dnl #
dnl # 2.6.33 API change,
dnl # The xattr_handler->set() callback was changed
dnl # to take a dentry instead of an inode, and a
dnl # handler_flags argument was added.
dnl # 4.4 API change,
dnl # The xattr_handler->set() callback was changed to take a
dnl # xattr_handler, and handler_flags argument was removed and
dnl # should be accessed by handler->flags.
dnl #
AC_MSG_RESULT(no)
AC_MSG_CHECKING(
[whether xattr_handler->set() wants dentry])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
[whether xattr_handler->set() wants xattr_handler])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_xattr_handler], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
[xattr_handler->set() wants dentry])
AC_DEFINE(HAVE_XATTR_SET_HANDLER, 1,
[xattr_handler->set() wants xattr_handler])
],[
ZFS_LINUX_TEST_ERROR([xattr set()])
dnl #
dnl # 2.6.33 API change,
dnl # The xattr_handler->set() callback was changed
dnl # to take a dentry instead of an inode, and a
dnl # handler_flags argument was added.
dnl #
AC_MSG_RESULT(no)
AC_MSG_CHECKING(
[whether xattr_handler->set() wants dentry])
ZFS_LINUX_TEST_RESULT([xattr_handler_set_dentry], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_XATTR_SET_DENTRY, 1,
[xattr_handler->set() wants dentry])
],[
ZFS_LINUX_TEST_ERROR([xattr set()])
])
])
])
])

View File

@ -79,9 +79,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_EVICT_INODE
ZFS_AC_KERNEL_SRC_DIRTY_INODE
ZFS_AC_KERNEL_SRC_SHRINKER
ZFS_AC_KERNEL_SRC_MKDIR_UMODE_T
ZFS_AC_KERNEL_SRC_MKDIR
ZFS_AC_KERNEL_SRC_LOOKUP_FLAGS
ZFS_AC_KERNEL_SRC_CREATE_FLAGS
ZFS_AC_KERNEL_SRC_CREATE
ZFS_AC_KERNEL_SRC_GET_LINK
ZFS_AC_KERNEL_SRC_PUT_LINK
ZFS_AC_KERNEL_SRC_TMPFILE
@ -115,7 +115,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_KUIDGID_T
ZFS_AC_KERNEL_SRC_KUID_HELPERS
ZFS_AC_KERNEL_SRC_MODULE_PARAM_CALL_CONST
ZFS_AC_KERNEL_SRC_RENAME_WANTS_FLAGS
ZFS_AC_KERNEL_SRC_RENAME
ZFS_AC_KERNEL_SRC_CURRENT_TIME
ZFS_AC_KERNEL_SRC_USERNS_CAPABILITIES
ZFS_AC_KERNEL_SRC_IN_COMPAT_SYSCALL
@ -125,6 +125,10 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_KSTRTOUL
ZFS_AC_KERNEL_SRC_PERCPU
ZFS_AC_KERNEL_SRC_CPU_HOTPLUG
ZFS_AC_KERNEL_SRC_GENERIC_FILLATTR_USERNS
ZFS_AC_KERNEL_SRC_MKNOD
ZFS_AC_KERNEL_SRC_SYMLINK
ZFS_AC_KERNEL_SRC_BIO_MAX_SEGS
AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@ -177,9 +181,9 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_EVICT_INODE
ZFS_AC_KERNEL_DIRTY_INODE
ZFS_AC_KERNEL_SHRINKER
ZFS_AC_KERNEL_MKDIR_UMODE_T
ZFS_AC_KERNEL_MKDIR
ZFS_AC_KERNEL_LOOKUP_FLAGS
ZFS_AC_KERNEL_CREATE_FLAGS
ZFS_AC_KERNEL_CREATE
ZFS_AC_KERNEL_GET_LINK
ZFS_AC_KERNEL_PUT_LINK
ZFS_AC_KERNEL_TMPFILE
@ -213,7 +217,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_KUIDGID_T
ZFS_AC_KERNEL_KUID_HELPERS
ZFS_AC_KERNEL_MODULE_PARAM_CALL_CONST
ZFS_AC_KERNEL_RENAME_WANTS_FLAGS
ZFS_AC_KERNEL_RENAME
ZFS_AC_KERNEL_CURRENT_TIME
ZFS_AC_KERNEL_USERNS_CAPABILITIES
ZFS_AC_KERNEL_IN_COMPAT_SYSCALL
@ -223,6 +227,10 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_KSTRTOUL
ZFS_AC_KERNEL_PERCPU
ZFS_AC_KERNEL_CPU_HOTPLUG
ZFS_AC_KERNEL_GENERIC_FILLATTR_USERNS
ZFS_AC_KERNEL_MKNOD
ZFS_AC_KERNEL_SYMLINK
ZFS_AC_KERNEL_BIO_MAX_SEGS
])
dnl #

View File

@ -240,6 +240,7 @@ AC_CONFIG_FILES([
tests/zfs-tests/tests/Makefile
tests/zfs-tests/tests/functional/Makefile
tests/zfs-tests/tests/functional/acl/Makefile
tests/zfs-tests/tests/functional/acl/off/Makefile
tests/zfs-tests/tests/functional/acl/posix/Makefile
tests/zfs-tests/tests/functional/acl/posix-sa/Makefile
tests/zfs-tests/tests/functional/alloc_class/Makefile

View File

@ -30,8 +30,8 @@
#include <linux/uaccess.h>
/* 2.6.37 API change */
#define zfs_kmap_atomic(page, km_type) kmap_atomic(page)
#define zfs_kunmap_atomic(addr, km_type) kunmap_atomic(addr)
#define zfs_kmap_atomic(page) kmap_atomic(page)
#define zfs_kunmap_atomic(addr) kunmap_atomic(addr)
/* 5.0 API change - no more 'type' argument for access_ok() */
#ifdef HAVE_ACCESS_OK_TYPE

View File

@ -343,7 +343,8 @@ static inline void zfs_gid_write(struct inode *ip, gid_t gid)
/*
* 4.9 API change
*/
#ifndef HAVE_SETATTR_PREPARE
#if !(defined(HAVE_SETATTR_PREPARE_NO_USERNS) || \
defined(HAVE_SETATTR_PREPARE_USERNS))
static inline int
setattr_prepare(struct dentry *dentry, struct iattr *ia)
{
@ -389,6 +390,15 @@ func(const struct path *path, struct kstat *stat, u32 request_mask, \
{ \
return (func##_impl(path, stat, request_mask, query_flags)); \
}
#elif defined(HAVE_USERNS_IOPS_GETATTR)
#define ZPL_GETATTR_WRAPPER(func) \
static int \
func(struct user_namespace *user_ns, const struct path *path, \
struct kstat *stat, u32 request_mask, unsigned int query_flags) \
{ \
return (func##_impl(user_ns, path, stat, request_mask, \
query_flags)); \
}
#else
#error
#endif
@ -436,4 +446,16 @@ zpl_is_32bit_api(void)
#endif
}
/*
* 5.12 API change
* To support id-mapped mounts, generic_fillattr() was modified to
* accept a new struct user_namespace* as its first arg.
*/
#ifdef HAVE_GENERIC_FILLATTR_USERNS
#define zpl_generic_fillattr(user_ns, ip, sp) \
generic_fillattr(user_ns, ip, sp)
#else
#define zpl_generic_fillattr(user_ns, ip, sp) generic_fillattr(ip, sp)
#endif
#endif /* _ZFS_VFS_H */

View File

@ -119,12 +119,27 @@ fn(struct dentry *dentry, const char *name, void *buffer, size_t size, \
#error "Unsupported kernel"
#endif
/*
* 5.12 API change,
* The xattr_handler->set() callback was changed to take the
* struct user_namespace* as the first arg, to support idmapped
* mounts.
*/
#if defined(HAVE_XATTR_SET_USERNS)
#define ZPL_XATTR_SET_WRAPPER(fn) \
static int \
fn(const struct xattr_handler *handler, struct user_namespace *user_ns, \
struct dentry *dentry, struct inode *inode, const char *name, \
const void *buffer, size_t size, int flags) \
{ \
return (__ ## fn(inode, name, buffer, size, flags)); \
}
/*
* 4.7 API change,
* The xattr_handler->set() callback was changed to take a both dentry and
* inode, because the dentry might not be attached to an inode yet.
*/
#if defined(HAVE_XATTR_SET_DENTRY_INODE)
#elif defined(HAVE_XATTR_SET_DENTRY_INODE)
#define ZPL_XATTR_SET_WRAPPER(fn) \
static int \
fn(const struct xattr_handler *handler, struct dentry *dentry, \

View File

@ -54,7 +54,8 @@ extern int zfs_mkdir(znode_t *dzp, char *dirname, vattr_t *vap,
extern int zfs_rmdir(znode_t *dzp, char *name, znode_t *cwd,
cred_t *cr, int flags);
extern int zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr);
extern int zfs_getattr_fast(struct inode *ip, struct kstat *sp);
extern int zfs_getattr_fast(struct user_namespace *, struct inode *ip,
struct kstat *sp);
extern int zfs_setattr(znode_t *zp, vattr_t *vap, int flag, cred_t *cr);
extern int zfs_rename(znode_t *sdzp, char *snm, znode_t *tdzp,
char *tnm, cred_t *cr, int flags);

View File

@ -73,7 +73,13 @@ extern "C" {
#define zn_has_cached_data(zp) ((zp)->z_is_mapped)
#define zn_rlimit_fsize(zp, uio) (0)
#define zhold(zp) igrab(ZTOI((zp)))
/*
* zhold() wraps igrab() on Linux, and igrab() may fail when the
* inode is in the process of being deleted. As zhold() must only be
* called when a ref already exists - so the inode cannot be
* mid-deletion - we VERIFY() this.
*/
#define zhold(zp) VERIFY3P(igrab(ZTOI((zp))), !=, NULL)
#define zrele(zp) iput(ZTOI((zp)))
/* Called on entry to each ZFS inode and vfs operation. */

View File

@ -171,4 +171,22 @@ zpl_dir_emit_dots(struct file *file, zpl_dir_context_t *ctx)
timespec_trunc(ts, (ip)->i_sb->s_time_gran)
#endif
#if defined(HAVE_INODE_OWNER_OR_CAPABLE)
#define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ip)
#elif defined(HAVE_INODE_OWNER_OR_CAPABLE_IDMAPPED)
#define zpl_inode_owner_or_capable(ns, ip) inode_owner_or_capable(ns, ip)
#else
#error "Unsupported kernel"
#endif
#ifdef HAVE_SETATTR_PREPARE_USERNS
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(ns, dentry, ia)
#else
/*
* Use kernel-provided version, or our own from
* linux/vfs_compat.h
*/
#define zpl_setattr_prepare(ns, dentry, ia) setattr_prepare(dentry, ia)
#endif
#endif /* _SYS_ZPL_H */

View File

@ -49,20 +49,26 @@ typedef struct zfetch {
typedef struct zstream {
uint64_t zs_blkid; /* expect next access at this blkid */
uint64_t zs_pf_blkid; /* next block to prefetch */
uint64_t zs_pf_blkid1; /* first block to prefetch */
uint64_t zs_pf_blkid; /* block to prefetch up to */
/*
* We will next prefetch the L1 indirect block of this level-0
* block id.
*/
uint64_t zs_ipf_blkid;
uint64_t zs_ipf_blkid1; /* first block to prefetch */
uint64_t zs_ipf_blkid; /* block to prefetch up to */
kmutex_t zs_lock; /* protects stream */
hrtime_t zs_atime; /* time last prefetch issued */
hrtime_t zs_start_time; /* start of last prefetch */
list_node_t zs_node; /* link for zf_stream */
hrtime_t zs_atime; /* time last prefetch issued */
zfetch_t *zs_fetch; /* parent fetch */
zfs_refcount_t zs_blocks; /* number of pending blocks in the stream */
boolean_t zs_missed; /* stream saw cache misses */
zfs_refcount_t zs_callers; /* number of pending callers */
/*
* Number of stream references: dnode, callers and pending blocks.
* The stream memory is freed when the number returns to zero.
*/
zfs_refcount_t zs_refs;
} zstream_t;
void zfetch_init(void);
@ -70,7 +76,10 @@ void zfetch_fini(void);
void dmu_zfetch_init(zfetch_t *, struct dnode *);
void dmu_zfetch_fini(zfetch_t *);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t,
zstream_t *dmu_zfetch_prepare(zfetch_t *, uint64_t, uint64_t, boolean_t,
boolean_t);
void dmu_zfetch_run(zstream_t *, boolean_t, boolean_t);
void dmu_zfetch(zfetch_t *, uint64_t, uint64_t, boolean_t, boolean_t,
boolean_t);

View File

@ -50,6 +50,8 @@ void vdev_raidz_reconstruct(struct raidz_map *, const int *, int);
void vdev_raidz_child_done(zio_t *);
void vdev_raidz_io_done(zio_t *);
extern const zio_vsd_ops_t vdev_raidz_vsd_ops;
/*
* vdev_raidz_math interface
*/

View File

@ -108,8 +108,7 @@ typedef struct raidz_col {
uint64_t rc_size; /* I/O size */
abd_t rc_abdstruct; /* rc_abd probably points here */
abd_t *rc_abd; /* I/O data */
void *rc_orig_data; /* pre-reconstruction */
abd_t *rc_gdata; /* used to store the "good" version */
abd_t *rc_orig_data; /* pre-reconstruction */
int rc_error; /* I/O error for this device */
uint8_t rc_tried; /* Did we attempt this I/O column? */
uint8_t rc_skipped; /* Did we skip this I/O column? */
@ -124,10 +123,8 @@ typedef struct raidz_row {
uint64_t rr_missingdata; /* Count of missing data devices */
uint64_t rr_missingparity; /* Count of missing parity devices */
uint64_t rr_firstdatacol; /* First data column/parity count */
abd_t *rr_abd_copy; /* rm_asize-buffer of copied data */
abd_t *rr_abd_empty; /* dRAID empty sector buffer */
int rr_nempty; /* empty sectors included in parity */
int rr_code; /* reconstruction code (unused) */
#ifdef ZFS_DEBUG
uint64_t rr_offset; /* Logical offset for *_io_verify() */
uint64_t rr_size; /* Physical size for *_io_verify() */
@ -136,8 +133,6 @@ typedef struct raidz_row {
} raidz_row_t;
typedef struct raidz_map {
uintptr_t rm_reports; /* # of referencing checksum reports */
boolean_t rm_freed; /* map no longer has referencing ZIO */
boolean_t rm_ecksuminjected; /* checksum error was injected */
int rm_nrows; /* Regular row count */
int rm_nskip; /* RAIDZ sectors skipped for padding */

View File

@ -399,6 +399,7 @@ typedef struct itx {
void *itx_callback_data; /* User data for the callback */
size_t itx_size; /* allocated itx structure size */
uint64_t itx_oid; /* object id */
uint64_t itx_gen; /* gen number for zfs_get_data */
lr_t itx_lr; /* common part of log record */
/* followed by type-specific part of lr_xx_t and its immediate data */
} itx_t;
@ -467,7 +468,7 @@ typedef int zil_parse_blk_func_t(zilog_t *zilog, const blkptr_t *bp, void *arg,
typedef int zil_parse_lr_func_t(zilog_t *zilog, const lr_t *lr, void *arg,
uint64_t txg);
typedef int zil_replay_func_t(void *arg1, void *arg2, boolean_t byteswap);
typedef int zil_get_data_t(void *arg, lr_write_t *lr, char *dbuf,
typedef int zil_get_data_t(void *arg, uint64_t arg2, lr_write_t *lr, char *dbuf,
struct lwb *lwb, zio_t *zio);
extern int zil_parse(zilog_t *zilog, zil_parse_blk_func_t *parse_blk_func,

View File

@ -382,14 +382,8 @@ struct zio_cksum_report {
struct zio_bad_cksum *zcr_ckinfo; /* information from failure */
};
typedef void zio_vsd_cksum_report_f(zio_t *zio, zio_cksum_report_t *zcr,
void *arg);
zio_vsd_cksum_report_f zio_vsd_default_cksum_report;
typedef struct zio_vsd_ops {
zio_done_func_t *vsd_free;
zio_vsd_cksum_report_f *vsd_cksum_report;
} zio_vsd_ops_t;
typedef struct zio_gang_node {
@ -683,7 +677,7 @@ extern hrtime_t zio_handle_io_delay(zio_t *zio);
*/
extern int zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd,
const zbookmark_phys_t *zb, struct zio *zio, uint64_t offset,
uint64_t length, void *arg, struct zio_bad_cksum *info);
uint64_t length, struct zio_bad_cksum *info);
extern void zfs_ereport_finish_checksum(zio_cksum_report_t *report,
const abd_t *good_data, const abd_t *bad_data, boolean_t drop_if_identical);
@ -695,6 +689,8 @@ extern int zfs_ereport_post_checksum(spa_t *spa, vdev_t *vd,
uint64_t length, const abd_t *good_data, const abd_t *bad_data,
struct zio_bad_cksum *info);
void zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr);
/* Called from spa_sync(), but primarily an injection handler */
extern void spa_handle_ignored_writes(spa_t *spa);

View File

@ -85,8 +85,8 @@ void zvol_log_truncate(zvol_state_t *zv, dmu_tx_t *tx, uint64_t off,
uint64_t len, boolean_t sync);
void zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, uint64_t offset,
uint64_t size, int sync);
int zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb,
zio_t *zio);
int zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio);
int zvol_init_impl(void);
void zvol_fini_impl(void);
void zvol_wait_close(zvol_state_t *zv);

View File

@ -1,6 +1,6 @@
'\" te
.\" Copyright (c) 2013 by Turbo Fredriksson <turbo@bayour.com>. All rights reserved.
.\" Copyright (c) 2019, 2020 by Delphix. All rights reserved.
.\" Copyright (c) 2019, 2021 by Delphix. All rights reserved.
.\" Copyright (c) 2019 Datto Inc.
.\" The contents of this file are subject to the terms of the Common Development
.\" and Distribution License (the "License"). You may not use this file except
@ -691,6 +691,29 @@ will occur.
Default value: \fB600000\fR (ten minutes).
.RE
.sp
.ne 2
.na
\fBreference_history\fR (int)
.ad
.RS 12n
Maximum reference holders being tracked when reference_tracking_enable is
active.
.sp
Default value: \fB3\fR.
.RE
.sp
.ne 2
.na
\fBreference_tracking_enable\fR (int)
.ad
.RS 12n
Track reference holders to refcount_t objects (debug builds only).
.sp
Use \fB1\fR for yes and \fB0\fR for no (default).
.RE
.sp
.ne 2
.na

View File

@ -205,6 +205,7 @@ diff subcommand Allows lookup of paths within a dataset
given an object number, and the ability
to create snapshots necessary to
'zfs diff'.
hold subcommand Allows adding a user hold to a snapshot
load-key subcommand Allows loading and unloading of encryption key
(see 'zfs load-key' and 'zfs unload-key').
change-key subcommand Allows changing an encryption key via
@ -214,6 +215,8 @@ promote subcommand Must also have the 'mount' and 'promote'
ability in the origin file system
receive subcommand Must also have the 'mount' and 'create'
ability
release subcommand Allows releasing a user hold which might
destroy the snapshot
rename subcommand Must also have the 'mount' and 'create'
ability in the new parent
rollback subcommand Must also have the 'mount' ability

View File

@ -21,7 +21,7 @@
.\"
.\" Copyright (c) 2017 by Lawrence Livermore National Security, LLC.
.\"
.Dd September 13, 2020
.Dd March 18, 2021
.Dt ZGENHOSTID 8 SMM
.Os
.Sh NAME
@ -50,7 +50,7 @@ Force file overwrite.
Write to
.Pa filename
instead of default
.Pa /etc/hostd
.Pa /etc/hostid
.It Ar hostid
Specifies the value to be placed in
.Pa /etc/hostid .

View File

@ -295,6 +295,23 @@ identified by a unique identifier instead of its path since the path was never
correct in the first place.
.El
.Pp
Checksum errors represent events where a disk returned data that was expected
to be correct, but was not.
In other words, these are instances of silent data corruption.
The checksum errors are reported in
.Nm zpool Cm status
and
.Nm zpool Cm events .
When a block is stored redundantly, a damaged block may be reconstructed
(e.g. from RAIDZ parity or a mirrored copy).
In this case, ZFS reports the checksum error against the disks that contained
damaged data.
If a block is unable to be reconstructed (e.g. due to 3 disks being damaged
in a RAIDZ2 group), it is not possible to determine which disks were silently
corrupted.
In this case, checksum errors are reported for all disks on which the block
is stored.
.Pp
If a device is removed and later re-attached to the system, ZFS attempts
to put the device online automatically.
Device attach detection is hardware-dependent and might not be supported on all

View File

@ -299,15 +299,10 @@ __kstat_create(const char *module, int instance, const char *name,
panic("Undefined kstat type %d\n", ksp->ks_type);
}
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL) {
if (ksp->ks_flags & KSTAT_FLAG_VIRTUAL)
ksp->ks_data = NULL;
} else {
else
ksp->ks_data = kmem_zalloc(ksp->ks_data_size, KM_SLEEP);
if (ksp->ks_data == NULL) {
kmem_free(ksp, sizeof (*ksp));
ksp = NULL;
}
}
/*
* Some kstats use a module name like "zfs/poolname" to distinguish a
@ -509,6 +504,8 @@ kstat_delete(kstat_t *ksp)
sysctl_ctx_free(&ksp->ks_sysctl_ctx);
ksp->ks_lock = NULL;
mutex_destroy(&ksp->ks_private_lock);
if (!(ksp->ks_flags & KSTAT_FLAG_VIRTUAL))
kmem_free(ksp->ks_data, ksp->ks_data_size);
free(ksp, M_KSTAT);
}

View File

@ -407,12 +407,6 @@ SYSCTL_INT(_vfs_zfs_metaslab, OID_AUTO, preload_limit, CTLFLAG_RWTUN,
&metaslab_preload_limit, 0,
"Max number of metaslabs per group to preload");
/* refcount.c */
extern int reference_tracking_enable;
SYSCTL_INT(_vfs_zfs, OID_AUTO, reference_tracking_enable, CTLFLAG_RDTUN,
&reference_tracking_enable, 0,
"Track reference holders to refcount_t objects, used mostly by ZFS");
/* spa.c */
extern int zfs_ccw_retry_interval;
SYSCTL_INT(_vfs_zfs, OID_AUTO, ccw_retry_interval, CTLFLAG_RWTUN,

View File

@ -490,8 +490,8 @@ abd_alloc_zero_scatter(void)
#define PAGE_SHIFT (highbit64(PAGESIZE)-1)
#endif
#define zfs_kmap_atomic(chunk, km) ((void *)chunk)
#define zfs_kunmap_atomic(addr, km) do { (void)(addr); } while (0)
#define zfs_kmap_atomic(chunk) ((void *)chunk)
#define zfs_kunmap_atomic(addr) do { (void)(addr); } while (0)
#define local_irq_save(flags) do { (void)(flags); } while (0)
#define local_irq_restore(flags) do { (void)(flags); } while (0)
#define nth_page(pg, i) \
@ -879,8 +879,7 @@ abd_iter_map(struct abd_iter *aiter)
aiter->iter_mapsize = MIN(aiter->iter_sg->length - offset,
aiter->iter_abd->abd_size - aiter->iter_pos);
paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg),
km_table[aiter->iter_km]);
paddr = zfs_kmap_atomic(sg_page(aiter->iter_sg));
}
aiter->iter_mapaddr = (char *)paddr + offset;
@ -899,8 +898,7 @@ abd_iter_unmap(struct abd_iter *aiter)
if (!abd_is_linear(aiter->iter_abd)) {
/* LINTED E_FUNC_SET_NOT_USED */
zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset,
km_table[aiter->iter_km]);
zfs_kunmap_atomic(aiter->iter_mapaddr - aiter->iter_offset);
}
ASSERT3P(aiter->iter_mapaddr, !=, NULL);

View File

@ -124,7 +124,7 @@ secpolicy_vnode_any_access(const cred_t *cr, struct inode *ip, uid_t owner)
if (crgetfsuid(cr) == owner)
return (0);
if (inode_owner_or_capable(ip))
if (zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (0);
#if defined(CONFIG_USER_NS)

View File

@ -589,9 +589,14 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
}
/* bio_alloc() with __GFP_WAIT never returns NULL */
#ifdef HAVE_BIO_MAX_SEGS
dr->dr_bio[i] = bio_alloc(GFP_NOIO, bio_max_segs(
abd_nr_pages_off(zio->io_abd, bio_size, abd_offset)));
#else
dr->dr_bio[i] = bio_alloc(GFP_NOIO,
MIN(abd_nr_pages_off(zio->io_abd, bio_size, abd_offset),
BIO_MAX_PAGES));
#endif
if (unlikely(dr->dr_bio[i] == NULL)) {
vdev_disk_dio_free(dr);
return (SET_ERROR(ENOMEM));

View File

@ -590,7 +590,8 @@ struct inode *
zfsctl_root(znode_t *zp)
{
ASSERT(zfs_has_ctldir(zp));
igrab(ZTOZSB(zp)->z_ctldir);
/* Must have an existing ref, so igrab() cannot return NULL */
VERIFY3P(igrab(ZTOZSB(zp)->z_ctldir), !=, NULL);
return (ZTOZSB(zp)->z_ctldir);
}

View File

@ -136,12 +136,12 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
void *paddr;
cnt = MIN(bv->bv_len - skip, n);
paddr = zfs_kmap_atomic(bv->bv_page, KM_USER1);
paddr = zfs_kmap_atomic(bv->bv_page);
if (rw == UIO_READ)
bcopy(p, paddr + bv->bv_offset + skip, cnt);
else
bcopy(paddr + bv->bv_offset + skip, p, cnt);
zfs_kunmap_atomic(paddr, KM_USER1);
zfs_kunmap_atomic(paddr);
skip += cnt;
if (skip == bv->bv_len) {

View File

@ -1734,7 +1734,11 @@ zfs_vget(struct super_block *sb, struct inode **ipp, fid_t *fidp)
VERIFY(zfsctl_root_lookup(*ipp, "snapshot", ipp,
0, kcred, NULL, NULL) == 0);
} else {
igrab(*ipp);
/*
* Must have an existing ref, so igrab()
* cannot return NULL
*/
VERIFY3P(igrab(*ipp), !=, NULL);
}
ZFS_EXIT(zfsvfs);
return (0);

View File

@ -1656,7 +1656,8 @@ zfs_readdir(struct inode *ip, zpl_dir_context_t *ctx, cred_t *cr)
*/
/* ARGSUSED */
int
zfs_getattr_fast(struct inode *ip, struct kstat *sp)
zfs_getattr_fast(struct user_namespace *user_ns, struct inode *ip,
struct kstat *sp)
{
znode_t *zp = ITOZ(ip);
zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -1668,7 +1669,7 @@ zfs_getattr_fast(struct inode *ip, struct kstat *sp)
mutex_enter(&zp->z_lock);
generic_fillattr(ip, sp);
zpl_generic_fillattr(user_ns, ip, sp);
/*
* +1 link count for root inode with visible '.zfs' directory.
*/

View File

@ -101,12 +101,22 @@ zpl_root_readdir(struct file *filp, void *dirent, filldir_t filldir)
*/
/* ARGSUSED */
static int
#ifdef HAVE_USERNS_IOPS_GETATTR
zpl_root_getattr_impl(struct user_namespace *user_ns,
const struct path *path, struct kstat *stat, u32 request_mask,
unsigned int query_flags)
#else
zpl_root_getattr_impl(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
#endif
{
struct inode *ip = path->dentry->d_inode;
#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
generic_fillattr(user_ns, ip, stat);
#else
generic_fillattr(ip, stat);
#endif
stat->atime = current_time(ip);
return (0);
@ -290,8 +300,14 @@ zpl_snapdir_readdir(struct file *filp, void *dirent, filldir_t filldir)
#endif /* !HAVE_VFS_ITERATE && !HAVE_VFS_ITERATE_SHARED */
static int
#ifdef HAVE_IOPS_RENAME_USERNS
zpl_snapdir_rename2(struct user_namespace *user_ns, struct inode *sdip,
struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
unsigned int flags)
#else
zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry, unsigned int flags)
#endif
{
cred_t *cr = CRED();
int error;
@ -309,7 +325,7 @@ zpl_snapdir_rename2(struct inode *sdip, struct dentry *sdentry,
return (error);
}
#ifndef HAVE_RENAME_WANTS_FLAGS
#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
static int
zpl_snapdir_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
@ -333,7 +349,12 @@ zpl_snapdir_rmdir(struct inode *dip, struct dentry *dentry)
}
static int
#ifdef HAVE_IOPS_MKDIR_USERNS
zpl_snapdir_mkdir(struct user_namespace *user_ns, struct inode *dip,
struct dentry *dentry, umode_t mode)
#else
zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
#endif
{
cred_t *cr = CRED();
vattr_t *vap;
@ -363,14 +384,24 @@ zpl_snapdir_mkdir(struct inode *dip, struct dentry *dentry, umode_t mode)
*/
/* ARGSUSED */
static int
#ifdef HAVE_USERNS_IOPS_GETATTR
zpl_snapdir_getattr_impl(struct user_namespace *user_ns,
const struct path *path, struct kstat *stat, u32 request_mask,
unsigned int query_flags)
#else
zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
#endif
{
struct inode *ip = path->dentry->d_inode;
zfsvfs_t *zfsvfs = ITOZSB(ip);
ZPL_ENTER(zfsvfs);
#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
generic_fillattr(user_ns, ip, stat);
#else
generic_fillattr(ip, stat);
#endif
stat->nlink = stat->size = 2;
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
@ -408,7 +439,7 @@ const struct file_operations zpl_fops_snapdir = {
const struct inode_operations zpl_ops_snapdir = {
.lookup = zpl_snapdir_lookup,
.getattr = zpl_snapdir_getattr,
#ifdef HAVE_RENAME_WANTS_FLAGS
#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
.rename = zpl_snapdir_rename2,
#else
.rename = zpl_snapdir_rename,
@ -495,8 +526,14 @@ zpl_shares_readdir(struct file *filp, void *dirent, filldir_t filldir)
/* ARGSUSED */
static int
#ifdef HAVE_USERNS_IOPS_GETATTR
zpl_shares_getattr_impl(struct user_namespace *user_ns,
const struct path *path, struct kstat *stat, u32 request_mask,
unsigned int query_flags)
#else
zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
u32 request_mask, unsigned int query_flags)
#endif
{
struct inode *ip = path->dentry->d_inode;
zfsvfs_t *zfsvfs = ITOZSB(ip);
@ -506,7 +543,11 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
ZPL_ENTER(zfsvfs);
if (zfsvfs->z_shares_dir == 0) {
#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
generic_fillattr(user_ns, path->dentry->d_inode, stat);
#else
generic_fillattr(path->dentry->d_inode, stat);
#endif
stat->nlink = stat->size = 2;
stat->atime = current_time(ip);
ZPL_EXIT(zfsvfs);
@ -515,7 +556,11 @@ zpl_shares_getattr_impl(const struct path *path, struct kstat *stat,
error = -zfs_zget(zfsvfs, zfsvfs->z_shares_dir, &dzp);
if (error == 0) {
error = -zfs_getattr_fast(ZTOI(dzp), stat);
#if defined(HAVE_GENERIC_FILLATTR_USERNS) && defined(HAVE_USERNS_IOPS_GETATTR)
error = -zfs_getattr_fast(user_ns, ZTOI(dzp), stat);
#else
error = -zfs_getattr_fast(kcred->user_ns, ZTOI(dzp), stat);
#endif
iput(ZTOI(dzp));
}

View File

@ -869,7 +869,7 @@ __zpl_ioctl_setflags(struct inode *ip, uint32_t ioctl_flags, xvattr_t *xva)
!capable(CAP_LINUX_IMMUTABLE))
return (-EACCES);
if (!inode_owner_or_capable(ip))
if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (-EACCES);
xva_init(xva);

View File

@ -128,7 +128,12 @@ zpl_vap_init(vattr_t *vap, struct inode *dir, umode_t mode, cred_t *cr)
}
static int
#ifdef HAVE_IOPS_CREATE_USERNS
zpl_create(struct user_namespace *user_ns, struct inode *dir,
struct dentry *dentry, umode_t mode, bool flag)
#else
zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
#endif
{
cred_t *cr = CRED();
znode_t *zp;
@ -163,7 +168,12 @@ zpl_create(struct inode *dir, struct dentry *dentry, umode_t mode, bool flag)
}
static int
#ifdef HAVE_IOPS_MKNOD_USERNS
zpl_mknod(struct user_namespace *user_ns, struct inode *dir,
struct dentry *dentry, umode_t mode,
#else
zpl_mknod(struct inode *dir, struct dentry *dentry, umode_t mode,
#endif
dev_t rdev)
{
cred_t *cr = CRED();
@ -278,7 +288,12 @@ zpl_unlink(struct inode *dir, struct dentry *dentry)
}
static int
#ifdef HAVE_IOPS_MKDIR_USERNS
zpl_mkdir(struct user_namespace *user_ns, struct inode *dir,
struct dentry *dentry, umode_t mode)
#else
zpl_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
#endif
{
cred_t *cr = CRED();
vattr_t *vap;
@ -338,8 +353,14 @@ zpl_rmdir(struct inode *dir, struct dentry *dentry)
}
static int
#ifdef HAVE_USERNS_IOPS_GETATTR
zpl_getattr_impl(struct user_namespace *user_ns,
const struct path *path, struct kstat *stat, u32 request_mask,
unsigned int query_flags)
#else
zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
unsigned int query_flags)
#endif
{
int error;
fstrans_cookie_t cookie;
@ -350,7 +371,11 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
* XXX request_mask and query_flags currently ignored.
*/
error = -zfs_getattr_fast(path->dentry->d_inode, stat);
#ifdef HAVE_USERNS_IOPS_GETATTR
error = -zfs_getattr_fast(user_ns, path->dentry->d_inode, stat);
#else
error = -zfs_getattr_fast(kcred->user_ns, path->dentry->d_inode, stat);
#endif
spl_fstrans_unmark(cookie);
ASSERT3S(error, <=, 0);
@ -359,7 +384,12 @@ zpl_getattr_impl(const struct path *path, struct kstat *stat, u32 request_mask,
ZPL_GETATTR_WRAPPER(zpl_getattr);
static int
#ifdef HAVE_SETATTR_PREPARE_USERNS
zpl_setattr(struct user_namespace *user_ns, struct dentry *dentry,
struct iattr *ia)
#else
zpl_setattr(struct dentry *dentry, struct iattr *ia)
#endif
{
struct inode *ip = dentry->d_inode;
cred_t *cr = CRED();
@ -367,7 +397,7 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
int error;
fstrans_cookie_t cookie;
error = setattr_prepare(dentry, ia);
error = zpl_setattr_prepare(kcred->user_ns, dentry, ia);
if (error)
return (error);
@ -399,8 +429,14 @@ zpl_setattr(struct dentry *dentry, struct iattr *ia)
}
static int
#ifdef HAVE_IOPS_RENAME_USERNS
zpl_rename2(struct user_namespace *user_ns, struct inode *sdip,
struct dentry *sdentry, struct inode *tdip, struct dentry *tdentry,
unsigned int flags)
#else
zpl_rename2(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry, unsigned int flags)
#endif
{
cred_t *cr = CRED();
int error;
@ -421,7 +457,7 @@ zpl_rename2(struct inode *sdip, struct dentry *sdentry,
return (error);
}
#ifndef HAVE_RENAME_WANTS_FLAGS
#if !defined(HAVE_RENAME_WANTS_FLAGS) && !defined(HAVE_IOPS_RENAME_USERNS)
static int
zpl_rename(struct inode *sdip, struct dentry *sdentry,
struct inode *tdip, struct dentry *tdentry)
@ -431,7 +467,12 @@ zpl_rename(struct inode *sdip, struct dentry *sdentry,
#endif
static int
#ifdef HAVE_IOPS_SYMLINK_USERNS
zpl_symlink(struct user_namespace *user_ns, struct inode *dir,
struct dentry *dentry, const char *name)
#else
zpl_symlink(struct inode *dir, struct dentry *dentry, const char *name)
#endif
{
cred_t *cr = CRED();
vattr_t *vap;
@ -593,7 +634,8 @@ zpl_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
crhold(cr);
ip->i_ctime = current_time(ip);
igrab(ip); /* Use ihold() if available */
/* Must have an existing ref, so igrab() cannot return NULL */
VERIFY3P(igrab(ip), !=, NULL);
cookie = spl_fstrans_mark();
error = -zfs_link(ITOZ(dir), ITOZ(ip), dname(dentry), cr, 0);
@ -677,7 +719,7 @@ const struct inode_operations zpl_dir_inode_operations = {
.mkdir = zpl_mkdir,
.rmdir = zpl_rmdir,
.mknod = zpl_mknod,
#ifdef HAVE_RENAME_WANTS_FLAGS
#if defined(HAVE_RENAME_WANTS_FLAGS) || defined(HAVE_IOPS_RENAME_USERNS)
.rename = zpl_rename2,
#else
.rename = zpl_rename,

View File

@ -1233,7 +1233,7 @@ __zpl_xattr_acl_set_access(struct inode *ip, const char *name,
if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
if (!inode_owner_or_capable(ip))
if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (-EPERM);
if (value) {
@ -1273,7 +1273,7 @@ __zpl_xattr_acl_set_default(struct inode *ip, const char *name,
if (ITOZSB(ip)->z_acl_type != ZFS_ACLTYPE_POSIX)
return (-EOPNOTSUPP);
if (!inode_owner_or_capable(ip))
if (!zpl_inode_owner_or_capable(kcred->user_ns, ip))
return (-EPERM);
if (value) {

View File

@ -1640,7 +1640,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_exit(&db->db_mtx);
if (err == 0 && prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
flags & DB_RF_HAVESTRUCT);
B_FALSE, flags & DB_RF_HAVESTRUCT);
}
DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_hits);
@ -1662,6 +1662,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
*/
if (!err && prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
db->db_state != DB_CACHED,
flags & DB_RF_HAVESTRUCT);
}
@ -1691,7 +1692,7 @@ dbuf_read(dmu_buf_impl_t *db, zio_t *zio, uint32_t flags)
mutex_exit(&db->db_mtx);
if (prefetch) {
dmu_zfetch(&dn->dn_zfetch, db->db_blkid, 1, B_TRUE,
flags & DB_RF_HAVESTRUCT);
B_TRUE, flags & DB_RF_HAVESTRUCT);
}
DB_DNODE_EXIT(db);
DBUF_STAT_BUMP(hash_misses);

View File

@ -497,10 +497,12 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
boolean_t read, void *tag, int *numbufsp, dmu_buf_t ***dbpp, uint32_t flags)
{
dmu_buf_t **dbp;
zstream_t *zs = NULL;
uint64_t blkid, nblks, i;
uint32_t dbuf_flags;
int err;
zio_t *zio = NULL;
boolean_t missed = B_FALSE;
ASSERT(length <= DMU_MAX_ACCESS);
@ -536,9 +538,21 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
zio = zio_root(dn->dn_objset->os_spa, NULL, NULL,
ZIO_FLAG_CANFAIL);
blkid = dbuf_whichblock(dn, 0, offset);
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
/*
* Prepare the zfetch before initiating the demand reads, so
* that if multiple threads block on same indirect block, we
* base predictions on the original less racy request order.
*/
zs = dmu_zfetch_prepare(&dn->dn_zfetch, blkid, nblks,
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
}
for (i = 0; i < nblks; i++) {
dmu_buf_impl_t *db = dbuf_hold(dn, blkid + i, tag);
if (db == NULL) {
if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
rw_exit(&dn->dn_struct_rwlock);
dmu_buf_rele_array(dbp, nblks, tag);
if (read)
@ -546,20 +560,27 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length,
return (SET_ERROR(EIO));
}
/* initiate async i/o */
if (read)
/*
* Initiate async demand data read.
* We check the db_state after calling dbuf_read() because
* (1) dbuf_read() may change the state to CACHED due to a
* hit in the ARC, and (2) on a cache miss, a child will
* have been added to "zio" but not yet completed, so the
* state will not yet be CACHED.
*/
if (read) {
(void) dbuf_read(db, zio, dbuf_flags);
if (db->db_state != DB_CACHED)
missed = B_TRUE;
}
dbp[i] = &db->db;
}
if (!read)
zfs_racct_write(length, nblks);
if ((flags & DMU_READ_NO_PREFETCH) == 0 &&
DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) {
dmu_zfetch(&dn->dn_zfetch, blkid, nblks,
read && DNODE_IS_CACHEABLE(dn), B_TRUE);
}
if (zs)
dmu_zfetch_run(zs, missed, B_TRUE);
rw_exit(&dn->dn_struct_rwlock);
if (read) {

View File

@ -59,8 +59,6 @@ typedef struct zfetch_stats {
kstat_named_t zfetchstat_hits;
kstat_named_t zfetchstat_misses;
kstat_named_t zfetchstat_max_streams;
kstat_named_t zfetchstat_max_completion_us;
kstat_named_t zfetchstat_last_completion_us;
kstat_named_t zfetchstat_io_issued;
} zfetch_stats_t;
@ -68,8 +66,6 @@ static zfetch_stats_t zfetch_stats = {
{ "hits", KSTAT_DATA_UINT64 },
{ "misses", KSTAT_DATA_UINT64 },
{ "max_streams", KSTAT_DATA_UINT64 },
{ "max_completion_us", KSTAT_DATA_UINT64 },
{ "last_completion_us", KSTAT_DATA_UINT64 },
{ "io_issued", KSTAT_DATA_UINT64 },
};
@ -129,7 +125,7 @@ dmu_zfetch_init(zfetch_t *zf, dnode_t *dno)
static void
dmu_zfetch_stream_fini(zstream_t *zs)
{
mutex_destroy(&zs->zs_lock);
ASSERT(!list_link_active(&zs->zs_node));
kmem_free(zs, sizeof (*zs));
}
@ -138,17 +134,10 @@ dmu_zfetch_stream_remove(zfetch_t *zf, zstream_t *zs)
{
ASSERT(MUTEX_HELD(&zf->zf_lock));
list_remove(&zf->zf_stream, zs);
dmu_zfetch_stream_fini(zs);
zf->zf_numstreams--;
}
static void
dmu_zfetch_stream_orphan(zfetch_t *zf, zstream_t *zs)
{
ASSERT(MUTEX_HELD(&zf->zf_lock));
list_remove(&zf->zf_stream, zs);
zs->zs_fetch = NULL;
zf->zf_numstreams--;
membar_producer();
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
dmu_zfetch_stream_fini(zs);
}
/*
@ -161,12 +150,8 @@ dmu_zfetch_fini(zfetch_t *zf)
zstream_t *zs;
mutex_enter(&zf->zf_lock);
while ((zs = list_head(&zf->zf_stream)) != NULL) {
if (zfs_refcount_count(&zs->zs_blocks) != 0)
dmu_zfetch_stream_orphan(zf, zs);
else
dmu_zfetch_stream_remove(zf, zs);
}
while ((zs = list_head(&zf->zf_stream)) != NULL)
dmu_zfetch_stream_remove(zf, zs);
mutex_exit(&zf->zf_lock);
list_destroy(&zf->zf_stream);
mutex_destroy(&zf->zf_lock);
@ -195,9 +180,9 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
zs != NULL; zs = zs_next) {
zs_next = list_next(&zf->zf_stream, zs);
/*
* Skip gethrtime() call if there are still references
* Skip if still active. 1 -- zf_stream reference.
*/
if (zfs_refcount_count(&zs->zs_blocks) != 0)
if (zfs_refcount_count(&zs->zs_refs) != 1)
continue;
if (((now - zs->zs_atime) / NANOSEC) >
zfetch_min_sec_reap)
@ -222,12 +207,17 @@ dmu_zfetch_stream_create(zfetch_t *zf, uint64_t blkid)
zstream_t *zs = kmem_zalloc(sizeof (*zs), KM_SLEEP);
zs->zs_blkid = blkid;
zs->zs_pf_blkid1 = blkid;
zs->zs_pf_blkid = blkid;
zs->zs_ipf_blkid1 = blkid;
zs->zs_ipf_blkid = blkid;
zs->zs_atime = now;
zs->zs_fetch = zf;
zfs_refcount_create(&zs->zs_blocks);
mutex_init(&zs->zs_lock, NULL, MUTEX_DEFAULT, NULL);
zs->zs_missed = B_FALSE;
zfs_refcount_create(&zs->zs_callers);
zfs_refcount_create(&zs->zs_refs);
/* One reference for zf_stream. */
zfs_refcount_add(&zs->zs_refs, NULL);
zf->zf_numstreams++;
list_insert_head(&zf->zf_stream, zs);
}
@ -237,48 +227,36 @@ dmu_zfetch_stream_done(void *arg, boolean_t io_issued)
{
zstream_t *zs = arg;
if (zs->zs_start_time && io_issued) {
hrtime_t now = gethrtime();
hrtime_t delta = NSEC2USEC(now - zs->zs_start_time);
zs->zs_start_time = 0;
ZFETCHSTAT_SET(zfetchstat_last_completion_us, delta);
if (delta > ZFETCHSTAT_GET(zfetchstat_max_completion_us))
ZFETCHSTAT_SET(zfetchstat_max_completion_us, delta);
}
if (zfs_refcount_remove(&zs->zs_blocks, NULL) != 0)
return;
/*
* The parent fetch structure has gone away
*/
if (zs->zs_fetch == NULL)
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
dmu_zfetch_stream_fini(zs);
}
/*
* This is the predictive prefetch entry point. It associates dnode access
* specified with blkid and nblks arguments with prefetch stream, predicts
* further accesses based on that stats and initiates speculative prefetch.
* This is the predictive prefetch entry point. dmu_zfetch_prepare()
* associates dnode access specified with blkid and nblks arguments with
* prefetch stream, predicts further accesses based on that stats and returns
* the stream pointer on success. That pointer must later be passed to
* dmu_zfetch_run() to initiate the speculative prefetch for the stream and
* release it. dmu_zfetch() is a wrapper for simple cases when window between
* prediction and prefetch initiation is not needed.
* fetch_data argument specifies whether actual data blocks should be fetched:
* FALSE -- prefetch only indirect blocks for predicted data blocks;
* TRUE -- prefetch predicted data blocks plus following indirect blocks.
*/
void
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
boolean_t have_lock)
zstream_t *
dmu_zfetch_prepare(zfetch_t *zf, uint64_t blkid, uint64_t nblks,
boolean_t fetch_data, boolean_t have_lock)
{
zstream_t *zs;
int64_t pf_start, ipf_start, ipf_istart, ipf_iend;
int64_t pf_start, ipf_start;
int64_t pf_ahead_blks, max_blks;
int epbs, max_dist_blks, pf_nblks, ipf_nblks, issued;
uint64_t end_of_access_blkid;
int max_dist_blks, pf_nblks, ipf_nblks;
uint64_t end_of_access_blkid, maxblkid;
end_of_access_blkid = blkid + nblks;
spa_t *spa = zf->zf_dnode->dn_objset->os_spa;
if (zfs_prefetch_disable)
return;
return (NULL);
/*
* If we haven't yet loaded the indirect vdevs' mappings, we
* can only read from blocks that we carefully ensure are on
@ -287,14 +265,14 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
* blocks (e.g. of the MOS's dnode object).
*/
if (!spa_indirect_vdevs_loaded(spa))
return;
return (NULL);
/*
* As a fast path for small (single-block) files, ignore access
* to the first block.
*/
if (!have_lock && blkid == 0)
return;
return (NULL);
if (!have_lock)
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
@ -303,10 +281,11 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
* A fast path for small files for which no prefetch will
* happen.
*/
if (zf->zf_dnode->dn_maxblkid < 2) {
maxblkid = zf->zf_dnode->dn_maxblkid;
if (maxblkid < 2) {
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return;
return (NULL);
}
mutex_enter(&zf->zf_lock);
@ -317,45 +296,47 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
*/
for (zs = list_head(&zf->zf_stream); zs != NULL;
zs = list_next(&zf->zf_stream, zs)) {
if (blkid == zs->zs_blkid || blkid + 1 == zs->zs_blkid) {
mutex_enter(&zs->zs_lock);
/*
* zs_blkid could have changed before we
* acquired zs_lock; re-check them here.
*/
if (blkid == zs->zs_blkid) {
break;
} else if (blkid + 1 == zs->zs_blkid) {
blkid++;
nblks--;
if (nblks == 0) {
/* Already prefetched this before. */
mutex_exit(&zs->zs_lock);
mutex_exit(&zf->zf_lock);
if (!have_lock) {
rw_exit(&zf->zf_dnode->
dn_struct_rwlock);
}
return;
}
break;
}
mutex_exit(&zs->zs_lock);
if (blkid == zs->zs_blkid) {
break;
} else if (blkid + 1 == zs->zs_blkid) {
blkid++;
nblks--;
break;
}
}
/*
* If the file is ending, remove the matching stream if found.
* If not found then it is too late to create a new one now.
*/
if (end_of_access_blkid >= maxblkid) {
if (zs != NULL)
dmu_zfetch_stream_remove(zf, zs);
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return (NULL);
}
/* Exit if we already prefetched this block before. */
if (nblks == 0) {
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return (NULL);
}
if (zs == NULL) {
/*
* This access is not part of any existing stream. Create
* a new stream for it.
*/
ZFETCHSTAT_BUMP(zfetchstat_misses);
dmu_zfetch_stream_create(zf, end_of_access_blkid);
mutex_exit(&zf->zf_lock);
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
return;
ZFETCHSTAT_BUMP(zfetchstat_misses);
return (NULL);
}
/*
@ -369,6 +350,10 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
* start just after the block we just accessed.
*/
pf_start = MAX(zs->zs_pf_blkid, end_of_access_blkid);
if (zs->zs_pf_blkid1 < end_of_access_blkid)
zs->zs_pf_blkid1 = end_of_access_blkid;
if (zs->zs_ipf_blkid1 < end_of_access_blkid)
zs->zs_ipf_blkid1 = end_of_access_blkid;
/*
* Double our amount of prefetched data, but don't let the
@ -407,49 +392,108 @@ dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
* (i.e. the amount read now + the amount of data prefetched now).
*/
pf_ahead_blks = zs->zs_ipf_blkid - blkid + nblks + pf_nblks;
max_blks = max_dist_blks - (ipf_start - end_of_access_blkid);
max_blks = max_dist_blks - (ipf_start - zs->zs_pf_blkid);
ipf_nblks = MIN(pf_ahead_blks, max_blks);
zs->zs_ipf_blkid = ipf_start + ipf_nblks;
epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
ipf_istart = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
ipf_iend = P2ROUNDUP(zs->zs_ipf_blkid, 1 << epbs) >> epbs;
zs->zs_atime = gethrtime();
/* no prior reads in progress */
if (zfs_refcount_count(&zs->zs_blocks) == 0)
zs->zs_start_time = zs->zs_atime;
zs->zs_blkid = end_of_access_blkid;
zfs_refcount_add_many(&zs->zs_blocks, pf_nblks + ipf_iend - ipf_istart,
NULL);
mutex_exit(&zs->zs_lock);
/* Protect the stream from reclamation. */
zs->zs_atime = gethrtime();
zfs_refcount_add(&zs->zs_refs, NULL);
/* Count concurrent callers. */
zfs_refcount_add(&zs->zs_callers, NULL);
mutex_exit(&zf->zf_lock);
issued = 0;
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_hits);
return (zs);
}
void
dmu_zfetch_run(zstream_t *zs, boolean_t missed, boolean_t have_lock)
{
zfetch_t *zf = zs->zs_fetch;
int64_t pf_start, pf_end, ipf_start, ipf_end;
int epbs, issued;
if (missed)
zs->zs_missed = missed;
/*
* dbuf_prefetch() is asynchronous (even when it needs to read
* indirect blocks), but we still prefer to drop our locks before
* calling it to reduce the time we hold them.
* Postpone the prefetch if there are more concurrent callers.
* It happens when multiple requests are waiting for the same
* indirect block. The last one will run the prefetch for all.
*/
if (zfs_refcount_remove(&zs->zs_callers, NULL) != 0) {
/* Drop reference taken in dmu_zfetch_prepare(). */
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
dmu_zfetch_stream_fini(zs);
return;
}
for (int i = 0; i < pf_nblks; i++) {
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, pf_start + i,
mutex_enter(&zf->zf_lock);
if (zs->zs_missed) {
pf_start = zs->zs_pf_blkid1;
pf_end = zs->zs_pf_blkid1 = zs->zs_pf_blkid;
} else {
pf_start = pf_end = 0;
}
ipf_start = MAX(zs->zs_pf_blkid1, zs->zs_ipf_blkid1);
ipf_end = zs->zs_ipf_blkid1 = zs->zs_ipf_blkid;
mutex_exit(&zf->zf_lock);
ASSERT3S(pf_start, <=, pf_end);
ASSERT3S(ipf_start, <=, ipf_end);
epbs = zf->zf_dnode->dn_indblkshift - SPA_BLKPTRSHIFT;
ipf_start = P2ROUNDUP(ipf_start, 1 << epbs) >> epbs;
ipf_end = P2ROUNDUP(ipf_end, 1 << epbs) >> epbs;
ASSERT3S(ipf_start, <=, ipf_end);
issued = pf_end - pf_start + ipf_end - ipf_start;
if (issued > 1) {
/* More references on top of taken in dmu_zfetch_prepare(). */
zfs_refcount_add_many(&zs->zs_refs, issued - 1, NULL);
} else if (issued == 0) {
/* Some other thread has done our work, so drop the ref. */
if (zfs_refcount_remove(&zs->zs_refs, NULL) == 0)
dmu_zfetch_stream_fini(zs);
return;
}
if (!have_lock)
rw_enter(&zf->zf_dnode->dn_struct_rwlock, RW_READER);
issued = 0;
for (int64_t blk = pf_start; blk < pf_end; blk++) {
issued += dbuf_prefetch_impl(zf->zf_dnode, 0, blk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
dmu_zfetch_stream_done, zs);
}
for (int64_t iblk = ipf_istart; iblk < ipf_iend; iblk++) {
for (int64_t iblk = ipf_start; iblk < ipf_end; iblk++) {
issued += dbuf_prefetch_impl(zf->zf_dnode, 1, iblk,
ZIO_PRIORITY_ASYNC_READ, ARC_FLAG_PREDICTIVE_PREFETCH,
dmu_zfetch_stream_done, zs);
}
if (!have_lock)
rw_exit(&zf->zf_dnode->dn_struct_rwlock);
ZFETCHSTAT_BUMP(zfetchstat_hits);
if (issued)
ZFETCHSTAT_ADD(zfetchstat_io_issued, issued);
}
void
dmu_zfetch(zfetch_t *zf, uint64_t blkid, uint64_t nblks, boolean_t fetch_data,
boolean_t missed, boolean_t have_lock)
{
zstream_t *zs;
zs = dmu_zfetch_prepare(zf, blkid, nblks, fetch_data, have_lock);
if (zs)
dmu_zfetch_run(zs, missed, have_lock);
}
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_prefetch, zfs_prefetch_, disable, INT, ZMOD_RW,
"Disable all ZFS prefetching");

View File

@ -2316,18 +2316,13 @@ metaslab_load_impl(metaslab_t *msp)
range_tree_add(msp->ms_allocatable,
msp->ms_start, msp->ms_size);
if (msp->ms_freed != NULL) {
if (msp->ms_new) {
/*
* If the ms_sm doesn't exist, this means that this
* metaslab hasn't gone through metaslab_sync() and
* thus has never been dirtied. So we shouldn't
* expect any unflushed allocs or frees from previous
* TXGs.
*
* Note: ms_freed and all the other trees except for
* the ms_allocatable, can be NULL at this point only
* if this is a new metaslab of a vdev that just got
* expanded.
*/
ASSERT(range_tree_is_empty(msp->ms_unflushed_allocs));
ASSERT(range_tree_is_empty(msp->ms_unflushed_frees));
@ -2365,8 +2360,6 @@ metaslab_load_impl(metaslab_t *msp)
range_tree_walk(msp->ms_unflushed_frees,
range_tree_add, msp->ms_allocatable);
msp->ms_loaded = B_TRUE;
ASSERT3P(msp->ms_group, !=, NULL);
spa_t *spa = msp->ms_group->mg_vd->vdev_spa;
if (spa_syncing_log_sm(spa) != NULL) {
@ -2680,19 +2673,31 @@ metaslab_init(metaslab_group_t *mg, uint64_t id, uint64_t object,
ms->ms_allocated_space = space_map_allocated(ms->ms_sm);
}
range_seg_type_t type;
uint64_t shift, start;
type = metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
range_seg_type_t type =
metaslab_calculate_range_tree_type(vd, ms, &start, &shift);
/*
* We create the ms_allocatable here, but we don't create the
* other range trees until metaslab_sync_done(). This serves
* two purposes: it allows metaslab_sync_done() to detect the
* addition of new space; and for debugging, it ensures that
* we'd data fault on any attempt to use this metaslab before
* it's ready.
*/
ms->ms_allocatable = range_tree_create(NULL, type, NULL, start, shift);
for (int t = 0; t < TXG_SIZE; t++) {
ms->ms_allocating[t] = range_tree_create(NULL, type,
NULL, start, shift);
}
ms->ms_freeing = range_tree_create(NULL, type, NULL, start, shift);
ms->ms_freed = range_tree_create(NULL, type, NULL, start, shift);
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
ms->ms_defer[t] = range_tree_create(NULL, type, NULL,
start, shift);
}
ms->ms_checkpointing =
range_tree_create(NULL, type, NULL, start, shift);
ms->ms_unflushed_allocs =
range_tree_create(NULL, type, NULL, start, shift);
metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
mrap->mra_bt = &ms->ms_unflushed_frees_by_size;
mrap->mra_floor_shift = metaslab_by_size_min_shift;
ms->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops,
type, mrap, start, shift);
ms->ms_trim = range_tree_create(NULL, type, NULL, start, shift);
@ -2765,13 +2770,13 @@ metaslab_fini(metaslab_t *msp)
mutex_enter(&msp->ms_lock);
VERIFY(msp->ms_group == NULL);
/*
* If the range trees haven't been allocated, this metaslab hasn't
* been through metaslab_sync_done() for the first time yet, so its
* If this metaslab hasn't been through metaslab_sync_done() yet its
* space hasn't been accounted for in its vdev and doesn't need to be
* subtracted.
*/
if (msp->ms_freed != NULL) {
if (!msp->ms_new) {
metaslab_space_update(vd, mg->mg_class,
-metaslab_allocated_space(msp), 0, -msp->ms_size);
@ -2782,27 +2787,24 @@ metaslab_fini(metaslab_t *msp)
metaslab_unload(msp);
range_tree_destroy(msp->ms_allocatable);
range_tree_destroy(msp->ms_freeing);
range_tree_destroy(msp->ms_freed);
if (msp->ms_freed != NULL) {
range_tree_destroy(msp->ms_freeing);
range_tree_destroy(msp->ms_freed);
ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
metaslab_unflushed_changes_memused(msp));
spa->spa_unflushed_stats.sus_memused -=
metaslab_unflushed_changes_memused(msp);
range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
range_tree_destroy(msp->ms_unflushed_allocs);
range_tree_destroy(msp->ms_checkpointing);
range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
range_tree_destroy(msp->ms_unflushed_frees);
ASSERT3U(spa->spa_unflushed_stats.sus_memused, >=,
metaslab_unflushed_changes_memused(msp));
spa->spa_unflushed_stats.sus_memused -=
metaslab_unflushed_changes_memused(msp);
range_tree_vacate(msp->ms_unflushed_allocs, NULL, NULL);
range_tree_destroy(msp->ms_unflushed_allocs);
range_tree_destroy(msp->ms_checkpointing);
range_tree_vacate(msp->ms_unflushed_frees, NULL, NULL);
range_tree_destroy(msp->ms_unflushed_frees);
for (int t = 0; t < TXG_SIZE; t++) {
range_tree_destroy(msp->ms_allocating[t]);
}
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
range_tree_destroy(msp->ms_defer[t]);
}
for (int t = 0; t < TXG_SIZE; t++) {
range_tree_destroy(msp->ms_allocating[t]);
}
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
range_tree_destroy(msp->ms_defer[t]);
}
ASSERT0(msp->ms_deferspace);
@ -3926,17 +3928,15 @@ metaslab_sync(metaslab_t *msp, uint64_t txg)
/*
* This metaslab has just been added so there's no work to do now.
*/
if (msp->ms_freeing == NULL) {
ASSERT3P(alloctree, ==, NULL);
if (msp->ms_new) {
ASSERT0(range_tree_space(alloctree));
ASSERT0(range_tree_space(msp->ms_freeing));
ASSERT0(range_tree_space(msp->ms_freed));
ASSERT0(range_tree_space(msp->ms_checkpointing));
ASSERT0(range_tree_space(msp->ms_trim));
return;
}
ASSERT3P(alloctree, !=, NULL);
ASSERT3P(msp->ms_freeing, !=, NULL);
ASSERT3P(msp->ms_freed, !=, NULL);
ASSERT3P(msp->ms_checkpointing, !=, NULL);
ASSERT3P(msp->ms_trim, !=, NULL);
/*
* Normally, we don't want to process a metaslab if there are no
* allocations or frees to perform. However, if the metaslab is being
@ -4240,54 +4240,15 @@ metaslab_sync_done(metaslab_t *msp, uint64_t txg)
mutex_enter(&msp->ms_lock);
/*
* If this metaslab is just becoming available, initialize its
* range trees and add its capacity to the vdev.
*/
if (msp->ms_freed == NULL) {
range_seg_type_t type;
uint64_t shift, start;
type = metaslab_calculate_range_tree_type(vd, msp, &start,
&shift);
for (int t = 0; t < TXG_SIZE; t++) {
ASSERT(msp->ms_allocating[t] == NULL);
msp->ms_allocating[t] = range_tree_create(NULL, type,
NULL, start, shift);
}
ASSERT3P(msp->ms_freeing, ==, NULL);
msp->ms_freeing = range_tree_create(NULL, type, NULL, start,
shift);
ASSERT3P(msp->ms_freed, ==, NULL);
msp->ms_freed = range_tree_create(NULL, type, NULL, start,
shift);
for (int t = 0; t < TXG_DEFER_SIZE; t++) {
ASSERT3P(msp->ms_defer[t], ==, NULL);
msp->ms_defer[t] = range_tree_create(NULL, type, NULL,
start, shift);
}
ASSERT3P(msp->ms_checkpointing, ==, NULL);
msp->ms_checkpointing = range_tree_create(NULL, type, NULL,
start, shift);
ASSERT3P(msp->ms_unflushed_allocs, ==, NULL);
msp->ms_unflushed_allocs = range_tree_create(NULL, type, NULL,
start, shift);
metaslab_rt_arg_t *mrap = kmem_zalloc(sizeof (*mrap), KM_SLEEP);
mrap->mra_bt = &msp->ms_unflushed_frees_by_size;
mrap->mra_floor_shift = metaslab_by_size_min_shift;
ASSERT3P(msp->ms_unflushed_frees, ==, NULL);
msp->ms_unflushed_frees = range_tree_create(&metaslab_rt_ops,
type, mrap, start, shift);
if (msp->ms_new) {
/* this is a new metaslab, add its capacity to the vdev */
metaslab_space_update(vd, mg->mg_class, 0, 0, msp->ms_size);
/* there should be no allocations nor frees at this point */
VERIFY0(msp->ms_allocated_this_txg);
VERIFY0(range_tree_space(msp->ms_freed));
}
ASSERT0(range_tree_space(msp->ms_freeing));
ASSERT0(range_tree_space(msp->ms_checkpointing));

View File

@ -20,7 +20,7 @@
*/
/*
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
* Copyright (c) 2012, 2015 by Delphix. All rights reserved.
* Copyright (c) 2012, 2021 by Delphix. All rights reserved.
*/
#include <sys/zfs_context.h>
@ -324,4 +324,12 @@ zfs_refcount_not_held(zfs_refcount_t *rc, const void *holder)
mutex_exit(&rc->rc_mtx);
return (B_TRUE);
}
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs, ,reference_tracking_enable, INT, ZMOD_RW,
"Track reference holders to refcount_t objects");
ZFS_MODULE_PARAM(zfs, ,reference_history, INT, ZMOD_RW,
"Maximum reference holders being tracked");
/* END CSTYLED */
#endif /* ZFS_DEBUG */

View File

@ -5105,10 +5105,8 @@ vdev_is_bootable(vdev_t *vd)
if (!vd->vdev_ops->vdev_op_leaf) {
const char *vdev_type = vd->vdev_ops->vdev_op_type;
if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0 ||
strcmp(vdev_type, VDEV_TYPE_INDIRECT) == 0) {
if (strcmp(vdev_type, VDEV_TYPE_MISSING) == 0)
return (B_FALSE);
}
}
for (int c = 0; c < vd->vdev_children; c++) {

View File

@ -632,236 +632,6 @@ vdev_draid_group_to_offset(vdev_t *vd, uint64_t group)
return (group * vdc->vdc_groupsz);
}
static void
vdev_draid_map_free_vsd(zio_t *zio)
{
raidz_map_t *rm = zio->io_vsd;
ASSERT0(rm->rm_freed);
rm->rm_freed = B_TRUE;
if (rm->rm_reports == 0) {
vdev_raidz_map_free(rm);
}
}
/*ARGSUSED*/
static void
vdev_draid_cksum_free(void *arg, size_t ignored)
{
raidz_map_t *rm = arg;
ASSERT3U(rm->rm_reports, >, 0);
if (--rm->rm_reports == 0 && rm->rm_freed)
vdev_raidz_map_free(rm);
}
static void
vdev_draid_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
const size_t c = zcr->zcr_cbinfo;
uint64_t skip_size = zcr->zcr_sector;
uint64_t parity_size;
size_t x, offset, size;
if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
return;
}
/*
* Detailed cksum reporting is currently only supported for single
* row draid mappings, this covers the vast majority of zios. Only
* a dRAID zio which spans groups will have multiple rows.
*/
if (rm->rm_nrows != 1) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
return;
}
raidz_row_t *rr = rm->rm_row[0];
const abd_t *good = NULL;
const abd_t *bad = rr->rr_col[c].rc_abd;
if (c < rr->rr_firstdatacol) {
/*
* The first time through, calculate the parity blocks for
* the good data (this relies on the fact that the good
* data never changes for a given logical zio)
*/
if (rr->rr_col[0].rc_gdata == NULL) {
abd_t *bad_parity[VDEV_DRAID_MAXPARITY];
/*
* Set up the rr_col[]s to generate the parity for
* good_data, first saving the parity bufs and
* replacing them with buffers to hold the result.
*/
for (x = 0; x < rr->rr_firstdatacol; x++) {
bad_parity[x] = rr->rr_col[x].rc_abd;
rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata =
abd_alloc_sametype(rr->rr_col[x].rc_abd,
rr->rr_col[x].rc_size);
}
/*
* Fill in the data columns from good_data being
* careful to pad short columns and empty columns
* with a skip sector.
*/
uint64_t good_size = abd_get_size((abd_t *)good_data);
offset = 0;
for (; x < rr->rr_cols; x++) {
abd_free(rr->rr_col[x].rc_abd);
if (offset == good_size) {
/* empty data column (small write) */
rr->rr_col[x].rc_abd =
abd_get_zeros(skip_size);
} else if (x < rr->rr_bigcols) {
/* this is a "big column" */
size = rr->rr_col[x].rc_size;
rr->rr_col[x].rc_abd =
abd_get_offset_size(
(abd_t *)good_data, offset, size);
offset += size;
} else {
/* short data column, add skip sector */
size = rr->rr_col[x].rc_size -skip_size;
rr->rr_col[x].rc_abd = abd_alloc(
rr->rr_col[x].rc_size, B_TRUE);
abd_copy_off(rr->rr_col[x].rc_abd,
(abd_t *)good_data, 0, offset,
size);
abd_zero_off(rr->rr_col[x].rc_abd,
size, skip_size);
offset += size;
}
}
/*
* Construct the parity from the good data.
*/
vdev_raidz_generate_parity_row(rm, rr);
/* restore everything back to its original state */
for (x = 0; x < rr->rr_firstdatacol; x++)
rr->rr_col[x].rc_abd = bad_parity[x];
offset = 0;
for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd = abd_get_offset_size(
rr->rr_abd_copy, offset,
rr->rr_col[x].rc_size);
offset += rr->rr_col[x].rc_size;
}
}
ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL);
good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0,
rr->rr_col[c].rc_size);
} else {
/* adjust good_data to point at the start of our column */
parity_size = size = rr->rr_col[0].rc_size;
if (c >= rr->rr_bigcols) {
size -= skip_size;
zcr->zcr_length = size;
}
/* empty column */
if (size == 0) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_TRUE);
return;
}
offset = 0;
for (x = rr->rr_firstdatacol; x < c; x++) {
if (x < rr->rr_bigcols) {
offset += parity_size;
} else {
offset += parity_size - skip_size;
}
}
good = abd_get_offset_size((abd_t *)good_data, offset, size);
}
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
abd_free((abd_t *)good);
}
/*
* Invoked indirectly by zfs_ereport_start_checksum(), called
* below when our read operation fails completely. The main point
* is to keep a copy of everything we read from disk, so that at
* vdev_draid_cksum_finish() time we can compare it with the good data.
*/
static void
vdev_draid_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
{
size_t c = (size_t)(uintptr_t)arg;
raidz_map_t *rm = zio->io_vsd;
/* set up the report and bump the refcount */
zcr->zcr_cbdata = rm;
zcr->zcr_cbinfo = c;
zcr->zcr_finish = vdev_draid_cksum_finish;
zcr->zcr_free = vdev_draid_cksum_free;
rm->rm_reports++;
ASSERT3U(rm->rm_reports, >, 0);
if (rm->rm_row[0]->rr_abd_copy != NULL)
return;
/*
* It's the first time we're called for this raidz_map_t, so we need
* to copy the data aside; there's no guarantee that our zio's buffer
* won't be re-used for something else.
*
* Our parity data is already in separate buffers, so there's no need
* to copy them. Furthermore, all columns should have been expanded
* by vdev_draid_map_alloc_empty() when attempting reconstruction.
*/
for (int i = 0; i < rm->rm_nrows; i++) {
raidz_row_t *rr = rm->rm_row[i];
size_t offset = 0;
size_t size = 0;
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
ASSERT3U(rr->rr_col[c].rc_size, ==,
rr->rr_col[0].rc_size);
size += rr->rr_col[c].rc_size;
}
rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE);
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
raidz_col_t *col = &rr->rr_col[c];
abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy,
offset, col->rc_size);
abd_copy(tmp, col->rc_abd, col->rc_size);
abd_free(col->rc_abd);
col->rc_abd = tmp;
offset += col->rc_size;
}
ASSERT3U(offset, ==, size);
}
}
const zio_vsd_ops_t vdev_draid_vsd_ops = {
.vsd_free = vdev_draid_map_free_vsd,
.vsd_cksum_report = vdev_draid_cksum_report
};
/*
* Full stripe writes. When writing, all columns (D+P) are required. Parity
* is calculated over all the columns, including empty zero filled sectors,
@ -1208,7 +978,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
rr->rr_missingdata = 0;
rr->rr_missingparity = 0;
rr->rr_firstdatacol = vdc->vdc_nparity;
rr->rr_abd_copy = NULL;
rr->rr_abd_empty = NULL;
#ifdef ZFS_DEBUG
rr->rr_offset = io_offset;
@ -1230,7 +999,6 @@ vdev_draid_map_alloc_row(zio_t *zio, raidz_row_t **rrp, uint64_t io_offset,
rc->rc_devidx = vdev_draid_permute_id(vdc, base, iter, c);
rc->rc_offset = physical_offset;
rc->rc_abd = NULL;
rc->rc_gdata = NULL;
rc->rc_orig_data = NULL;
rc->rc_error = 0;
rc->rc_tried = 0;
@ -1328,9 +1096,6 @@ vdev_draid_map_alloc(zio_t *zio)
if (nrows == 2)
rm->rm_row[1] = rr[1];
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_draid_vsd_ops;
return (rm);
}
@ -2183,12 +1948,13 @@ static void
vdev_draid_io_start(zio_t *zio)
{
vdev_t *vd __maybe_unused = zio->io_vd;
raidz_map_t *rm;
ASSERT3P(vd->vdev_ops, ==, &vdev_draid_ops);
ASSERT3U(zio->io_offset, ==, vdev_draid_get_astart(vd, zio->io_offset));
rm = vdev_draid_map_alloc(zio);
raidz_map_t *rm = vdev_draid_map_alloc(zio);
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
if (zio->io_type == ZIO_TYPE_WRITE) {
for (int i = 0; i < rm->rm_nrows; i++) {

View File

@ -315,7 +315,6 @@ vdev_indirect_map_free(zio_t *zio)
static const zio_vsd_ops_t vdev_indirect_vsd_ops = {
.vsd_free = vdev_indirect_map_free,
.vsd_cksum_report = zio_vsd_default_cksum_report
};
/*

View File

@ -174,7 +174,6 @@ vdev_mirror_map_free(zio_t *zio)
static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
.vsd_free = vdev_mirror_map_free,
.vsd_cksum_report = zio_vsd_default_cksum_report
};
static int
@ -379,8 +378,6 @@ vdev_mirror_map_init(zio_t *zio)
}
}
zio->io_vsd = mm;
zio->io_vsd_ops = &vdev_mirror_vsd_ops;
return (mm);
}
@ -629,6 +626,8 @@ vdev_mirror_io_start(zio_t *zio)
int c, children;
mm = vdev_mirror_map_init(zio);
zio->io_vsd = mm;
zio->io_vsd_ops = &vdev_mirror_vsd_ops;
if (mm == NULL) {
ASSERT(!spa_trust_config(zio->io_spa));

View File

@ -143,15 +143,10 @@ vdev_raidz_row_free(raidz_row_t *rr)
if (rc->rc_size != 0)
abd_free(rc->rc_abd);
if (rc->rc_gdata != NULL)
abd_free(rc->rc_gdata);
if (rc->rc_orig_data != NULL)
zio_buf_free(rc->rc_orig_data, rc->rc_size);
abd_free(rc->rc_orig_data);
}
if (rr->rr_abd_copy != NULL)
abd_free(rr->rr_abd_copy);
if (rr->rr_abd_empty != NULL)
abd_free(rr->rr_abd_empty);
@ -172,175 +167,11 @@ vdev_raidz_map_free_vsd(zio_t *zio)
{
raidz_map_t *rm = zio->io_vsd;
ASSERT0(rm->rm_freed);
rm->rm_freed = B_TRUE;
if (rm->rm_reports == 0) {
vdev_raidz_map_free(rm);
}
vdev_raidz_map_free(rm);
}
/*ARGSUSED*/
static void
vdev_raidz_cksum_free(void *arg, size_t ignored)
{
raidz_map_t *rm = arg;
ASSERT3U(rm->rm_reports, >, 0);
if (--rm->rm_reports == 0 && rm->rm_freed)
vdev_raidz_map_free(rm);
}
static void
vdev_raidz_cksum_finish(zio_cksum_report_t *zcr, const abd_t *good_data)
{
raidz_map_t *rm = zcr->zcr_cbdata;
const size_t c = zcr->zcr_cbinfo;
size_t x, offset;
if (good_data == NULL) {
zfs_ereport_finish_checksum(zcr, NULL, NULL, B_FALSE);
return;
}
ASSERT3U(rm->rm_nrows, ==, 1);
raidz_row_t *rr = rm->rm_row[0];
const abd_t *good = NULL;
const abd_t *bad = rr->rr_col[c].rc_abd;
if (c < rr->rr_firstdatacol) {
/*
* The first time through, calculate the parity blocks for
* the good data (this relies on the fact that the good
* data never changes for a given logical ZIO)
*/
if (rr->rr_col[0].rc_gdata == NULL) {
abd_t *bad_parity[VDEV_RAIDZ_MAXPARITY];
/*
* Set up the rr_col[]s to generate the parity for
* good_data, first saving the parity bufs and
* replacing them with buffers to hold the result.
*/
for (x = 0; x < rr->rr_firstdatacol; x++) {
bad_parity[x] = rr->rr_col[x].rc_abd;
rr->rr_col[x].rc_abd = rr->rr_col[x].rc_gdata =
abd_alloc_sametype(rr->rr_col[x].rc_abd,
rr->rr_col[x].rc_size);
}
/* fill in the data columns from good_data */
offset = 0;
for (; x < rr->rr_cols; x++) {
abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd =
abd_get_offset_size((abd_t *)good_data,
offset, rr->rr_col[x].rc_size);
offset += rr->rr_col[x].rc_size;
}
/*
* Construct the parity from the good data.
*/
vdev_raidz_generate_parity_row(rm, rr);
/* restore everything back to its original state */
for (x = 0; x < rr->rr_firstdatacol; x++)
rr->rr_col[x].rc_abd = bad_parity[x];
offset = 0;
for (x = rr->rr_firstdatacol; x < rr->rr_cols; x++) {
abd_free(rr->rr_col[x].rc_abd);
rr->rr_col[x].rc_abd = abd_get_offset_size(
rr->rr_abd_copy, offset,
rr->rr_col[x].rc_size);
offset += rr->rr_col[x].rc_size;
}
}
ASSERT3P(rr->rr_col[c].rc_gdata, !=, NULL);
good = abd_get_offset_size(rr->rr_col[c].rc_gdata, 0,
rr->rr_col[c].rc_size);
} else {
/* adjust good_data to point at the start of our column */
offset = 0;
for (x = rr->rr_firstdatacol; x < c; x++)
offset += rr->rr_col[x].rc_size;
good = abd_get_offset_size((abd_t *)good_data, offset,
rr->rr_col[c].rc_size);
}
/* we drop the ereport if it ends up that the data was good */
zfs_ereport_finish_checksum(zcr, good, bad, B_TRUE);
abd_free((abd_t *)good);
}
/*
* Invoked indirectly by zfs_ereport_start_checksum(), called
* below when our read operation fails completely. The main point
* is to keep a copy of everything we read from disk, so that at
* vdev_raidz_cksum_finish() time we can compare it with the good data.
*/
static void
vdev_raidz_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *arg)
{
size_t c = (size_t)(uintptr_t)arg;
raidz_map_t *rm = zio->io_vsd;
/* set up the report and bump the refcount */
zcr->zcr_cbdata = rm;
zcr->zcr_cbinfo = c;
zcr->zcr_finish = vdev_raidz_cksum_finish;
zcr->zcr_free = vdev_raidz_cksum_free;
rm->rm_reports++;
ASSERT3U(rm->rm_reports, >, 0);
ASSERT3U(rm->rm_nrows, ==, 1);
if (rm->rm_row[0]->rr_abd_copy != NULL)
return;
/*
* It's the first time we're called for this raidz_map_t, so we need
* to copy the data aside; there's no guarantee that our zio's buffer
* won't be re-used for something else.
*
* Our parity data is already in separate buffers, so there's no need
* to copy them.
*/
for (int i = 0; i < rm->rm_nrows; i++) {
raidz_row_t *rr = rm->rm_row[i];
size_t offset = 0;
size_t size = 0;
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++)
size += rr->rr_col[c].rc_size;
rr->rr_abd_copy = abd_alloc_for_io(size, B_FALSE);
for (c = rr->rr_firstdatacol; c < rr->rr_cols; c++) {
raidz_col_t *col = &rr->rr_col[c];
abd_t *tmp = abd_get_offset_size(rr->rr_abd_copy,
offset, col->rc_size);
abd_copy(tmp, col->rc_abd, col->rc_size);
abd_free(col->rc_abd);
col->rc_abd = tmp;
offset += col->rc_size;
}
ASSERT3U(offset, ==, size);
}
}
static const zio_vsd_ops_t vdev_raidz_vsd_ops = {
const zio_vsd_ops_t vdev_raidz_vsd_ops = {
.vsd_free = vdev_raidz_map_free_vsd,
.vsd_cksum_report = vdev_raidz_cksum_report
};
/*
@ -414,7 +245,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
rr->rr_missingdata = 0;
rr->rr_missingparity = 0;
rr->rr_firstdatacol = nparity;
rr->rr_abd_copy = NULL;
rr->rr_abd_empty = NULL;
rr->rr_nempty = 0;
#ifdef ZFS_DEBUG
@ -435,7 +265,6 @@ vdev_raidz_map_alloc(zio_t *zio, uint64_t ashift, uint64_t dcols,
rc->rc_devidx = col;
rc->rc_offset = coff;
rc->rc_abd = NULL;
rc->rc_gdata = NULL;
rc->rc_orig_data = NULL;
rc->rc_error = 0;
rc->rc_tried = 0;
@ -831,7 +660,7 @@ vdev_raidz_reconst_pq_tail_func(void *xbuf, size_t size, void *private)
return (0);
}
static int
static void
vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts)
{
int x = tgts[0];
@ -860,11 +689,9 @@ vdev_raidz_reconstruct_p(raidz_row_t *rr, int *tgts, int ntgts)
(void) abd_iterate_func2(dst, src, 0, 0, size,
vdev_raidz_reconst_p_func, NULL);
}
return (1 << VDEV_RAIDZ_P);
}
static int
static void
vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts)
{
int x = tgts[0];
@ -905,11 +732,9 @@ vdev_raidz_reconstruct_q(raidz_row_t *rr, int *tgts, int ntgts)
struct reconst_q_struct rq = { abd_to_buf(src), exp };
(void) abd_iterate_func(dst, 0, rr->rr_col[x].rc_size,
vdev_raidz_reconst_q_post_func, &rq);
return (1 << VDEV_RAIDZ_Q);
}
static int
static void
vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts)
{
uint8_t *p, *q, *pxy, *qxy, tmp, a, b, aexp, bexp;
@ -995,8 +820,6 @@ vdev_raidz_reconstruct_pq(raidz_row_t *rr, int *tgts, int ntgts)
*/
rr->rr_col[VDEV_RAIDZ_P].rc_abd = pdata;
rr->rr_col[VDEV_RAIDZ_Q].rc_abd = qdata;
return ((1 << VDEV_RAIDZ_P) | (1 << VDEV_RAIDZ_Q));
}
/* BEGIN CSTYLED */
@ -1355,7 +1178,7 @@ vdev_raidz_matrix_reconstruct(raidz_row_t *rr, int n, int nmissing,
kmem_free(p, psize);
}
static int
static void
vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
{
int n, i, c, t, tt;
@ -1370,8 +1193,6 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
abd_t **bufs = NULL;
int code = 0;
/*
* Matrix reconstruction can't use scatter ABDs yet, so we allocate
* temporary linear ABDs if any non-linear ABDs are found.
@ -1426,15 +1247,10 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
continue;
}
code |= 1 << c;
parity_map[i] = c;
i++;
}
ASSERT(code != 0);
ASSERT3U(code, <, 1 << VDEV_RAIDZ_MAXPARITY);
psize = (sizeof (rows[0][0]) + sizeof (invrows[0][0])) *
nmissing_rows * n + sizeof (used[0]) * n;
p = kmem_alloc(psize, KM_SLEEP);
@ -1497,18 +1313,15 @@ vdev_raidz_reconstruct_general(raidz_row_t *rr, int *tgts, int ntgts)
}
kmem_free(bufs, rr->rr_cols * sizeof (abd_t *));
}
return (code);
}
static int
static void
vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
const int *t, int nt)
{
int tgts[VDEV_RAIDZ_MAXPARITY], *dt;
int ntgts;
int i, c, ret;
int code;
int nbadparity, nbaddata;
int parity_valid[VDEV_RAIDZ_MAXPARITY];
@ -1541,20 +1354,24 @@ vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
/* Reconstruct using the new math implementation */
ret = vdev_raidz_math_reconstruct(rm, rr, parity_valid, dt, nbaddata);
if (ret != RAIDZ_ORIGINAL_IMPL)
return (ret);
return;
/*
* See if we can use any of our optimized reconstruction routines.
*/
switch (nbaddata) {
case 1:
if (parity_valid[VDEV_RAIDZ_P])
return (vdev_raidz_reconstruct_p(rr, dt, 1));
if (parity_valid[VDEV_RAIDZ_P]) {
vdev_raidz_reconstruct_p(rr, dt, 1);
return;
}
ASSERT(rr->rr_firstdatacol > 1);
if (parity_valid[VDEV_RAIDZ_Q])
return (vdev_raidz_reconstruct_q(rr, dt, 1));
if (parity_valid[VDEV_RAIDZ_Q]) {
vdev_raidz_reconstruct_q(rr, dt, 1);
return;
}
ASSERT(rr->rr_firstdatacol > 2);
break;
@ -1563,18 +1380,17 @@ vdev_raidz_reconstruct_row(raidz_map_t *rm, raidz_row_t *rr,
ASSERT(rr->rr_firstdatacol > 1);
if (parity_valid[VDEV_RAIDZ_P] &&
parity_valid[VDEV_RAIDZ_Q])
return (vdev_raidz_reconstruct_pq(rr, dt, 2));
parity_valid[VDEV_RAIDZ_Q]) {
vdev_raidz_reconstruct_pq(rr, dt, 2);
return;
}
ASSERT(rr->rr_firstdatacol > 2);
break;
}
code = vdev_raidz_reconstruct_general(rr, tgts, ntgts);
ASSERT(code < (1 << VDEV_RAIDZ_MAXPARITY));
ASSERT(code > 0);
return (code);
vdev_raidz_reconstruct_general(rr, tgts, ntgts);
}
static int
@ -1811,10 +1627,11 @@ vdev_raidz_io_start(zio_t *zio)
vdev_t *vd = zio->io_vd;
vdev_t *tvd = vd->vdev_top;
vdev_raidz_t *vdrz = vd->vdev_tsd;
raidz_map_t *rm;
rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift,
raidz_map_t *rm = vdev_raidz_map_alloc(zio, tvd->vdev_ashift,
vdrz->vd_logical_width, vdrz->vd_nparity);
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
/*
* Until raidz expansion is implemented all maps for a raidz vdev
@ -1823,9 +1640,6 @@ vdev_raidz_io_start(zio_t *zio)
ASSERT3U(rm->rm_nrows, ==, 1);
raidz_row_t *rr = rm->rm_row[0];
zio->io_vsd = rm;
zio->io_vsd_ops = &vdev_raidz_vsd_ops;
if (zio->io_type == ZIO_TYPE_WRITE) {
vdev_raidz_io_start_write(zio, rr, tvd->vdev_ashift);
} else {
@ -2021,7 +1835,7 @@ raidz_restore_orig_data(raidz_map_t *rm)
for (int c = 0; c < rr->rr_cols; c++) {
raidz_col_t *rc = &rr->rr_col[c];
if (rc->rc_need_orig_restore) {
abd_copy_from_buf(rc->rc_abd,
abd_copy(rc->rc_abd,
rc->rc_orig_data, rc->rc_size);
rc->rc_need_orig_restore = B_FALSE;
}
@ -2062,9 +1876,9 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
if (rc->rc_devidx == ltgts[lt]) {
if (rc->rc_orig_data == NULL) {
rc->rc_orig_data =
zio_buf_alloc(rc->rc_size);
abd_copy_to_buf(
rc->rc_orig_data,
abd_alloc_linear(
rc->rc_size, B_TRUE);
abd_copy(rc->rc_orig_data,
rc->rc_abd, rc->rc_size);
}
rc->rc_need_orig_restore = B_TRUE;
@ -2082,10 +1896,8 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
raidz_restore_orig_data(rm);
return (EINVAL);
}
rr->rr_code = 0;
if (dead_data > 0)
rr->rr_code = vdev_raidz_reconstruct_row(rm, rr,
my_tgts, t);
vdev_raidz_reconstruct_row(rm, rr, my_tgts, t);
}
/* Check for success */
@ -2111,7 +1923,7 @@ raidz_reconstruct(zio_t *zio, int *ltgts, int ntgts, int nparity)
if (rc->rc_error == 0 &&
c >= rr->rr_firstdatacol) {
raidz_checksum_error(zio,
rc, rc->rc_gdata);
rc, rc->rc_orig_data);
rc->rc_error =
SET_ERROR(ECKSUM);
}
@ -2318,11 +2130,7 @@ vdev_raidz_io_done_write_impl(zio_t *zio, raidz_row_t *rr)
}
}
/*
* return 0 if no reconstruction occurred, otherwise the "code" from
* vdev_raidz_reconstruct().
*/
static int
static void
vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
raidz_row_t *rr)
{
@ -2330,7 +2138,6 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
int parity_untried = 0;
int data_errors = 0;
int total_errors = 0;
int code = 0;
ASSERT3U(rr->rr_missingparity, <=, rr->rr_firstdatacol);
ASSERT3U(rr->rr_missingdata, <=, rr->rr_cols - rr->rr_firstdatacol);
@ -2385,10 +2192,8 @@ vdev_raidz_io_done_reconstruct_known_missing(zio_t *zio, raidz_map_t *rm,
ASSERT(rr->rr_firstdatacol >= n);
code = vdev_raidz_reconstruct_row(rm, rr, tgts, n);
vdev_raidz_reconstruct_row(rm, rr, tgts, n);
}
return (code);
}
/*
@ -2453,7 +2258,7 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio)
(void) zfs_ereport_start_checksum(zio->io_spa,
cvd, &zio->io_bookmark, zio, rc->rc_offset,
rc->rc_size, (void *)(uintptr_t)c, &zbc);
rc->rc_size, &zbc);
mutex_enter(&cvd->vdev_stat_lock);
cvd->vdev_stat.vs_checksum_errors++;
mutex_exit(&cvd->vdev_stat_lock);
@ -2473,8 +2278,7 @@ vdev_raidz_io_done(zio_t *zio)
} else {
for (int i = 0; i < rm->rm_nrows; i++) {
raidz_row_t *rr = rm->rm_row[i];
rr->rr_code =
vdev_raidz_io_done_reconstruct_known_missing(zio,
vdev_raidz_io_done_reconstruct_known_missing(zio,
rm, rr);
}

View File

@ -1125,8 +1125,7 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd,
*/
int
zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
struct zio *zio, uint64_t offset, uint64_t length, void *arg,
zio_bad_cksum_t *info)
struct zio *zio, uint64_t offset, uint64_t length, zio_bad_cksum_t *info)
{
zio_cksum_report_t *report;
@ -1144,10 +1143,7 @@ zfs_ereport_start_checksum(spa_t *spa, vdev_t *vd, const zbookmark_phys_t *zb,
report = kmem_zalloc(sizeof (*report), KM_SLEEP);
if (zio->io_vsd != NULL)
zio->io_vsd_ops->vsd_cksum_report(zio, report, arg);
else
zio_vsd_default_cksum_report(zio, report, arg);
zio_vsd_default_cksum_report(zio, report);
/* copy the checksum failure information if it was provided */
if (info != NULL) {

View File

@ -728,7 +728,6 @@ zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
boolean_t
zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
{
#ifdef HAVE_KSID
uid_t gid;
#ifdef illumos
@ -773,9 +772,6 @@ zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
*/
gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
return (groupmember(gid, cr));
#else
return (B_TRUE);
#endif
}
void

View File

@ -540,6 +540,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
uint32_t blocksize = zp->z_blksz;
itx_wr_state_t write_state;
uintptr_t fsync_cnt;
uint64_t gen = 0;
if (zil_replaying(zilog, tx) || zp->z_unlinked ||
zfs_xattr_owner_unlinked(zp)) {
@ -562,6 +563,9 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
(void) tsd_set(zfs_fsyncer_key, (void *)(fsync_cnt - 1));
}
(void) sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(ZTOZSB(zp)), &gen,
sizeof (gen));
while (resid) {
itx_t *itx;
lr_write_t *lr;
@ -609,6 +613,7 @@ zfs_log_write(zilog_t *zilog, dmu_tx_t *tx, int txtype,
BP_ZERO(&lr->lr_blkptr);
itx->itx_private = ZTOZSB(zp);
itx->itx_gen = gen;
if (!(ioflag & (O_SYNC | O_DSYNC)) && (zp->z_sync_cnt == 0) &&
(fsync_cnt == 0))

View File

@ -740,7 +740,8 @@ static void zfs_get_done(zgd_t *zgd, int error);
* Get data to generate a TX_WRITE intent log record.
*/
int
zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
zfs_get_data(void *arg, uint64_t gen, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio)
{
zfsvfs_t *zfsvfs = arg;
objset_t *os = zfsvfs->z_os;
@ -751,6 +752,7 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
dmu_buf_t *db;
zgd_t *zgd;
int error = 0;
uint64_t zp_gen;
ASSERT3P(lwb, !=, NULL);
ASSERT3P(zio, !=, NULL);
@ -769,6 +771,16 @@ zfs_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
zfs_zrele_async(zp);
return (SET_ERROR(ENOENT));
}
/* check if generation number matches */
if (sa_lookup(zp->z_sa_hdl, SA_ZPL_GEN(zfsvfs), &zp_gen,
sizeof (zp_gen)) != 0) {
zfs_zrele_async(zp);
return (SET_ERROR(EIO));
}
if (zp_gen != gen) {
zfs_zrele_async(zp);
return (SET_ERROR(ENOENT));
}
zgd = (zgd_t *)kmem_zalloc(sizeof (zgd_t), KM_SLEEP);
zgd->zgd_lwb = lwb;

View File

@ -1744,7 +1744,8 @@ zil_lwb_commit(zilog_t *zilog, itx_t *itx, lwb_t *lwb)
* completed after "lwb_write_zio" completed.
*/
error = zilog->zl_get_data(itx->itx_private,
lrwb, dbuf, lwb, lwb->lwb_write_zio);
itx->itx_gen, lrwb, dbuf, lwb,
lwb->lwb_write_zio);
if (error == EIO) {
txg_wait_synced(zilog->zl_dmu_pool, txg);

View File

@ -3950,7 +3950,7 @@ zio_vsd_default_cksum_finish(zio_cksum_report_t *zcr,
/*ARGSUSED*/
void
zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr, void *ignored)
zio_vsd_default_cksum_report(zio_t *zio, zio_cksum_report_t *zcr)
{
void *abd = abd_alloc_sametype(zio->io_abd, zio->io_size);
@ -4288,7 +4288,7 @@ zio_checksum_verify(zio_t *zio)
!(zio->io_flags & ZIO_FLAG_SPECULATIVE)) {
(void) zfs_ereport_start_checksum(zio->io_spa,
zio->io_vd, &zio->io_bookmark, zio,
zio->io_offset, zio->io_size, NULL, &info);
zio->io_offset, zio->io_size, &info);
mutex_enter(&zio->io_vd->vdev_stat_lock);
zio->io_vd->vdev_stat.vs_checksum_errors++;
mutex_exit(&zio->io_vd->vdev_stat_lock);

View File

@ -673,7 +673,8 @@ zvol_get_done(zgd_t *zgd, int error)
* Get data to generate a TX_WRITE intent log record.
*/
int
zvol_get_data(void *arg, lr_write_t *lr, char *buf, struct lwb *lwb, zio_t *zio)
zvol_get_data(void *arg, uint64_t arg2, lr_write_t *lr, char *buf,
struct lwb *lwb, zio_t *zio)
{
zvol_state_t *zv = arg;
uint64_t offset = lr->lr_offset;

View File

@ -28,6 +28,10 @@ failsafe = callbacks/zfs_failsafe
outputdir = /var/tmp/test_results
tags = ['functional']
[tests/functional/acl/off]
tests = ['posixmode']
tags = ['functional', 'acl']
[tests/functional/alloc_class]
tests = ['alloc_class_001_pos', 'alloc_class_002_neg', 'alloc_class_003_pos',
'alloc_class_004_pos', 'alloc_class_005_pos', 'alloc_class_006_pos',
@ -722,8 +726,8 @@ tests = ['redacted_compressed', 'redacted_contents', 'redacted_deleted',
'redacted_disabled_feature', 'redacted_embedded', 'redacted_holes',
'redacted_incrementals', 'redacted_largeblocks', 'redacted_many_clones',
'redacted_mixed_recsize', 'redacted_mounts', 'redacted_negative',
'redacted_origin', 'redacted_props', 'redacted_resume', 'redacted_size',
'redacted_volume']
'redacted_origin', 'redacted_panic', 'redacted_props', 'redacted_resume',
'redacted_size', 'redacted_volume']
tags = ['functional', 'redacted_send']
[tests/functional/raidz]

View File

@ -22,6 +22,10 @@ failsafe = callbacks/zfs_failsafe
outputdir = /var/tmp/test_results
tags = ['functional']
[tests/functional/acl/off:FreeBSD]
tests = ['dosmode']
tags = ['functional', 'acl']
[tests/functional/cli_root/zfs_jail:FreeBSD]
tests = ['zfs_jail_001_pos']
tags = ['functional', 'cli_root', 'zfs_jail']

View File

@ -30,6 +30,10 @@ failsafe = callbacks/zfs_failsafe
outputdir = /var/tmp/test_results
tags = ['functional']
[tests/functional/acl/off]
tests = ['posixmode']
tags = ['functional', 'acl']
[tests/functional/alloc_class]
tests = ['alloc_class_003_pos', 'alloc_class_004_pos', 'alloc_class_005_pos',
'alloc_class_006_pos', 'alloc_class_008_pos', 'alloc_class_010_pos',

View File

@ -3,4 +3,4 @@ dist_pkgdata_DATA = \
acl.cfg \
acl_common.kshlib
SUBDIRS = posix posix-sa
SUBDIRS = off posix posix-sa

View File

@ -0,0 +1 @@
/dosmode_readonly_write

View File

@ -0,0 +1,16 @@
include $(top_srcdir)/config/Rules.am
pkgdatadir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
dist_pkgdata_SCRIPTS = \
dosmode.ksh \
posixmode.ksh \
cleanup.ksh \
setup.ksh
pkgexecdir = $(datadir)/@PACKAGE@/zfs-tests/tests/functional/acl/off
if BUILD_FREEBSD
pkgexec_PROGRAMS = dosmode_readonly_write
dosmode_readonly_write_SOURCES = dosmode_readonly_write.c
endif

View File

@ -0,0 +1,33 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Copyright 2007 Sun Microsystems, Inc. All rights reserved.
# Use is subject to license terms.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/acl/acl_common.kshlib
cleanup_user_group
default_cleanup

View File

@ -0,0 +1,199 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Portions Copyright 2021 iXsystems, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/acl/acl_common.kshlib
#
# DESCRIPTION:
# Verify that DOS mode flags function correctly.
#
# These flags are not currently exposed on Linux, so the test is
# only useful on FreeBSD.
#
# STRATEGY:
# 1. ARCHIVE
# 2. HIDDEN
# 3. OFFLINE
# 4. READONLY
# 5. REPARSE
# 6. SPARSE
# 7. SYSTEM
#
verify_runnable "both"
function cleanup
{
rm -f $testfile
}
function hasflag
{
typeset flag=$1
typeset path=$2
ls -lo $path | awk '{ gsub(",", "\n", $5); print $5 }' | grep -qxF $flag
}
log_assert "Verify DOS mode flags function correctly"
log_onexit cleanup
tests_base=$STF_SUITE/tests/functional/acl/off
testfile=$TESTDIR/testfile
owner=$ZFS_ACL_STAFF1
other=$ZFS_ACL_STAFF2
#
# ARCHIVE
#
# This flag is set by ZFS when a file has been updated to indicate that
# the file needs to be archived.
#
log_must touch $testfile
log_must hasflag uarch $testfile
log_must chflags nouarch $testfile
log_must hasflag - $testfile
log_must touch $testfile
log_must hasflag uarch $testfile
log_must rm $testfile
log_must user_run $owner touch $testfile
log_must hasflag uarch $testfile
log_must user_run $owner chflags nouarch $testfile
log_mustnot user_run $other chflags uarch $testfile
log_must hasflag - $testfile
log_must user_run $owner touch $testfile
log_mustnot user_run $other chflags nouarch $testfile
log_must hasflag uarch $testfile
log_must user_run $owner rm $testfile
#
# HIDDEN
#
log_must touch $testfile
log_must chflags hidden $testfile
log_must hasflag hidden $testfile
log_must chflags 0 $testfile
log_must hasflag - $testfile
log_must rm $testfile
log_must user_run $owner touch $testfile
log_must user_run $owner chflags hidden $testfile
log_mustnot user_run $other chflags nohidden $testfile
log_must hasflag hidden $testfile
log_must user_run $owner chflags 0 $testfile
log_mustnot user_run $other chflags hidden $testfile
log_must hasflag - $testfile
log_must user_run $owner rm $testfile
#
# OFFLINE
#
log_must touch $testfile
log_must chflags offline $testfile
log_must hasflag offline $testfile
log_must chflags 0 $testfile
log_must hasflag - $testfile
log_must rm $testfile
log_must user_run $owner touch $testfile
log_must user_run $owner chflags offline $testfile
log_mustnot user_run $other chflags nooffline $testfile
log_must hasflag offline $testfile
log_must user_run $owner chflags 0 $testfile
log_mustnot user_run $other chflags offline $testfile
log_must hasflag - $testfile
log_must user_run $owner rm $testfile
#
# READONLY
#
# This flag prevents users from writing or appending to the file,
# but root is always allowed the operation.
#
log_must touch $testfile
log_must chflags rdonly $testfile
log_must hasflag rdonly $testfile
log_must eval "echo 'root write allowed' >> $testfile"
log_must cat $testfile
log_must chflags 0 $testfile
log_must hasflag - $tesfile
log_must rm $testfile
# It is required to still be able to write to an fd that was opened RW before
# READONLY is set. We have a special test program for that.
log_must user_run $owner touch $testfile
log_mustnot user_run $other chflags rdonly $testfile
log_must user_run $owner $tests_base/dosmode_readonly_write $testfile
log_mustnot user_run $other chflags nordonly $testfile
log_must hasflag rdonly $testfile
log_mustnot user_run $owner "echo 'user write forbidden' >> $testfile"
log_must eval "echo 'root write allowed' >> $testfile"
# We are still allowed to read and remove the file when READONLY is set.
log_must user_run $owner cat $testfile
log_must user_run $owner rm $testfile
#
# REPARSE
#
# FIXME: does not work, not sure if broken or testing wrong
#
#
# SPARSE
#
log_must truncate -s 1m $testfile
log_must chflags sparse $testfile
log_must hasflag sparse $testfile
log_must chflags 0 $testfile
log_must hasflag - $testfile
log_must rm $testfile
log_must user_run $owner truncate -s 1m $testfile
log_must user_run $owner chflags sparse $testfile
log_mustnot user_run $other chflags nosparse $testfile
log_must hasflag sparse $testfile
log_must user_run $owner chflags 0 $testfile
log_mustnot user_run $other chflags sparse $testfile
log_must hasflag - $testfile
log_must user_run $owner rm $testfile
#
# SYSTEM
#
log_must touch $testfile
log_must chflags system $testfile
log_must hasflag system $testfile
log_must chflags 0 $testfile
log_must hasflag - $testfile
log_must rm $testfile
log_must user_run $owner touch $testfile
log_must user_run $owner chflags system $testfile
log_mustnot user_run $other chflags nosystem $testfile
log_must hasflag system $testfile
log_must user_run $owner chflags 0 $testfile
log_mustnot user_run $other chflags system $testfile
log_must hasflag - $testfile
log_must user_run $owner rm $testfile
log_pass "DOS mode flags function correctly"

View File

@ -0,0 +1,61 @@
/*
* SPDX-License-Identifier: BSD-2-Clause-FreeBSD
*
* Copyright (c) 2021 iXsystems, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* Test for correct behavior of DOS mode READONLY flag on a file.
* We should be able to open a file RW, set READONLY, and still write to the fd.
*/
#include <sys/stat.h>
#include <err.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
int
main(int argc, const char *argv[])
{
const char *buf = "We should be allowed to write this to the fd.\n";
const char *path;
int fd;
if (argc != 2) {
fprintf(stderr, "usage: %s PATH\n", argv[0]);
return (EXIT_FAILURE);
}
path = argv[1];
fd = open(path, O_CREAT|O_RDWR, 0777);
if (fd == -1)
err(EXIT_FAILURE, "%s: open failed", path);
if (chflags(path, UF_READONLY) == -1)
err(EXIT_FAILURE, "%s: chflags failed", path);
if (write(fd, buf, strlen(buf)) == -1)
err(EXIT_FAILURE, "%s: write failed", path);
if (close(fd) == -1)
err(EXIT_FAILURE, "%s: close failed", path);
return (EXIT_SUCCESS);
}

View File

@ -0,0 +1,145 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Portions Copyright 2021 iXsystems, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/acl/acl_common.kshlib
#
# DESCRIPTION:
# Verify that POSIX mode bits function correctly.
#
# These tests are incomplete and will be added to over time.
#
# NOTE: Creating directory entries behaves differently between platforms.
# The parent directory's group is used on FreeBSD, while the effective
# group is used on Linux. We chown to the effective group when creating
# directories and files in these tests to achieve consistency across all
# platforms.
#
# STRATEGY:
# 1. Sanity check the POSIX mode test on tmpfs
# 2. Test POSIX mode bits on ZFS
#
verify_runnable "both"
function cleanup
{
umount -f $tmpdir
rm -rf $tmpdir $TESTDIR/dir
}
log_assert "Verify POSIX mode bits function correctly"
log_onexit cleanup
owner=$ZFS_ACL_STAFF1
other=$ZFS_ACL_STAFF2
group=$ZFS_ACL_STAFF_GROUP
if is_linux; then
wheel=root
else
wheel=wheel
fi
function test_posix_mode # base
{
typeset base=$1
typeset dir=$base/dir
typeset file=$dir/file
# dir owned by root
log_must mkdir $dir
log_must chown :$wheel $dir
log_must chmod 007 $dir
# file owned by root
log_must touch $file
log_must chown :$wheel $file
log_must ls -la $dir
log_must rm $file
log_must touch $file
log_must chown :$wheel $file
log_must user_run $other rm $file
# file owned by user
log_must user_run $owner touch $file
log_must chown :$group $file
log_must ls -la $dir
log_must user_run $owner rm $file
log_must user_run $owner touch $file
log_must chown :$group $file
log_must user_run $other rm $file
log_must user_run $owner touch $file
log_must chown :$group $file
log_must rm $file
log_must rm -rf $dir
# dir owned by user
log_must user_run $owner mkdir $dir
log_must chown :$group $dir
log_must user_run $owner chmod 007 $dir
# file owned by root
log_must touch $file
log_must chown :$wheel $file
log_must ls -la $dir
log_must rm $file
log_must touch $file
log_must chown :$wheel $file
log_mustnot user_run $other rm $file
log_must rm $file
# file owned by user
log_mustnot user_run $owner touch $file
log_must touch $file
log_must chown $owner:$group $file
log_must ls -la $dir
log_mustnot user_run $owner rm $file
log_mustnot user_run $other rm $file
log_must rm $file
log_must rm -rf $dir
}
# Sanity check on tmpfs first
tmpdir=$(TMPDIR=$TEST_BASE_DIR mktemp -d)
log_must mount -t tmpfs tmp $tmpdir
log_must chmod 777 $tmpdir
test_posix_mode $tmpdir
log_must umount $tmpdir
log_must rmdir $tmpdir
# Verify ZFS
test_posix_mode $TESTDIR
log_pass "POSIX mode bits function correctly"

View File

@ -0,0 +1,44 @@
#!/bin/ksh -p
#
# CDDL HEADER START
#
# The contents of this file are subject to the terms of the
# Common Development and Distribution License (the "License").
# You may not use this file except in compliance with the License.
#
# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
# or http://www.opensolaris.org/os/licensing.
# See the License for the specific language governing permissions
# and limitations under the License.
#
# When distributing Covered Code, include this CDDL HEADER in each
# file and include the License file at usr/src/OPENSOLARIS.LICENSE.
# If applicable, add the following below this CDDL HEADER, with the
# fields enclosed by brackets "[]" replaced with your own identifying
# information: Portions Copyright [yyyy] [name of copyright owner]
#
# CDDL HEADER END
#
#
# Portions Copyright (c) 2021 iXsystems, Inc.
#
. $STF_SUITE/include/libtest.shlib
. $STF_SUITE/tests/functional/acl/acl_common.kshlib
DISK=${DISKS%% *}
cleanup_user_group
# Create staff group and add users to it
log_must add_group $ZFS_ACL_STAFF_GROUP
log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF1
log_must add_user $ZFS_ACL_STAFF_GROUP $ZFS_ACL_STAFF2
default_setup_noexit $DISK
log_must zfs set acltype=off $TESTPOOL/$TESTFS
log_must chmod 0777 $TESTDIR
log_pass

View File

@ -15,6 +15,7 @@ dist_pkgdata_SCRIPTS = \
redacted_mounts.ksh \
redacted_negative.ksh \
redacted_origin.ksh \
redacted_panic.ksh \
redacted_props.ksh \
redacted_resume.ksh \
redacted_size.ksh \

View File

@ -0,0 +1,44 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
#
# Copyright (c) 2021 by Delphix. All rights reserved.
#
. $STF_SUITE/tests/functional/redacted_send/redacted.kshlib
#
# Description:
# Verify edge case when midbufid is equal to minbufid for the bug fixed by
# https://github.com/openzfs/zfs/pull/11297 (Fix kernel panic induced by
# redacted send)
#
typeset ds_name="panic"
typeset sendfs="$POOL/$ds_name"
typeset recvfs="$POOL2/$ds_name"
typeset clone="$POOL/${ds_name}_clone"
typeset stream=$(mktemp $tmpdir/stream.XXXX)
log_onexit redacted_cleanup $sendfs $recvfs
log_must zfs create -o recsize=8k $sendfs
log_must dd if=/dev/urandom of=/$sendfs/file bs=1024k count=2048
log_must zfs snapshot $sendfs@init
log_must zfs clone $sendfs@init $clone
log_must stride_dd -i /dev/urandom -o /$clone/file -b 8192 -s 2 -c 7226
log_must zfs snapshot $clone@init
log_must zfs redact $sendfs@init book_init $clone@init
log_must eval "zfs send --redact $sendfs#book_init $sendfs@init >$stream"
log_must eval "zfs recv $recvfs <$stream"
log_pass

View File

@ -734,7 +734,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.0.0-FreeBSD_g9305ff2ed"
#define ZFS_META_ALIAS "zfs-2.0.0-FreeBSD_g891568c99"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -764,7 +764,7 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_g9305ff2ed"
#define ZFS_META_RELEASE "FreeBSD_g891568c99"
/* Define the project version. */
#define ZFS_META_VERSION "2.0.0"