zfs: merge openzfs/zfs@deb121309
Notable upstream pull request merges: #12918 Introduce BLAKE3 checksums as an OpenZFS feature #13553 Reduce ZIO io_lock contention on sorted scrub #13537 Improve sorted scan memory accounting #13540 AVL: Remove obsolete branching optimizations #13563 FreeBSD: Improve crypto_dispatch() handling Obtained from: OpenZFS OpenZFS commit: deb1213098e2dc10e6eee5e5c57bb40584e096a6
This commit is contained in:
commit
1f1e2261e3
@ -16,9 +16,23 @@ ASM_SOURCES_AS = \
|
||||
asm-x86_64/modes/aesni-gcm-x86_64.S \
|
||||
asm-x86_64/modes/ghash-x86_64.S \
|
||||
asm-x86_64/sha2/sha256_impl.S \
|
||||
asm-x86_64/sha2/sha512_impl.S
|
||||
asm-x86_64/sha2/sha512_impl.S \
|
||||
asm-x86_64/blake3/blake3_avx2.S \
|
||||
asm-x86_64/blake3/blake3_avx512.S \
|
||||
asm-x86_64/blake3/blake3_sse2.S \
|
||||
asm-x86_64/blake3/blake3_sse41.S
|
||||
|
||||
CFLAGS+= -D__amd64 -D_SYS_STACK_H -UHAVE_AES
|
||||
.elif ${MACHINE_ARCH} == "aarch64"
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS = \
|
||||
asm-aarch64/blake3/b3_aarch64_sse2.S \
|
||||
asm-aarch64/blake3/b3_aarch64_sse41.S
|
||||
.elif ${MACHINE_ARCH} == "powerpc64"
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS = \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse2.S \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse41.S
|
||||
.else
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS =
|
||||
@ -35,6 +49,10 @@ KERNEL_C = \
|
||||
algs/aes/aes_impl_x86-64.c \
|
||||
algs/aes/aes_impl.c \
|
||||
algs/aes/aes_modes.c \
|
||||
algs/blake3/blake3.c \
|
||||
algs/blake3/blake3_generic.c \
|
||||
algs/blake3/blake3_impl.c \
|
||||
algs/blake3/blake3_x86-64.c \
|
||||
algs/edonr/edonr.c \
|
||||
algs/modes/modes.c \
|
||||
algs/modes/cbc.c \
|
||||
@ -88,5 +106,14 @@ CFLAGS.aesni-gcm-x86_64.S+= -DLOCORE
|
||||
CFLAGS.ghash-x86_64.S+= -DLOCORE
|
||||
CFLAGS.sha256_impl.S+= -DLOCORE
|
||||
CFLAGS.sha512_impl.S+= -DLOCORE
|
||||
CFLAGS.blake3_avx2.S = -DLOCORE
|
||||
CFLAGS.blake3_avx512.S = -DLOCORE
|
||||
CFLAGS.blake3_sse2.S = -DLOCORE
|
||||
CFLAGS.blake3_sse41.S = -DLOCORE
|
||||
CFLAGS.b3_aarch64_sse2.S = -DLOCORE
|
||||
CFLAGS.b3_aarch64_sse41.S = -DLOCORE
|
||||
CFLAGS.b3_ppc64le_sse2.S = -DLOCORE
|
||||
CFLAGS.b3_ppc64le_sse41.S = -DLOCORE
|
||||
|
||||
|
||||
.include <bsd.lib.mk>
|
||||
|
@ -15,9 +15,23 @@ ASM_SOURCES_AS = \
|
||||
asm-x86_64/modes/gcm_pclmulqdq.S \
|
||||
asm-x86_64/modes/aesni-gcm-x86_64.S \
|
||||
asm-x86_64/sha2/sha256_impl.S \
|
||||
asm-x86_64/sha2/sha512_impl.S
|
||||
asm-x86_64/sha2/sha512_impl.S \
|
||||
asm-x86_64/blake3/blake3_avx2.S \
|
||||
asm-x86_64/blake3/blake3_avx512.S \
|
||||
asm-x86_64/blake3/blake3_sse2.S \
|
||||
asm-x86_64/blake3/blake3_sse41.S
|
||||
|
||||
CFLAGS+= -D__amd64 -D_SYS_STACK_H
|
||||
.elif ${MACHINE_ARCH} == "aarch64"
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS = \
|
||||
asm-aarch64/blake3/b3_aarch64_sse2.S \
|
||||
asm-aarch64/blake3/b3_aarch64_sse41.S
|
||||
.elif ${MACHINE_ARCH} == "powerpc64"
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS = \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse2.S \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse41.S
|
||||
.else
|
||||
ASM_SOURCES_C =
|
||||
ASM_SOURCES_AS =
|
||||
@ -34,6 +48,10 @@ KERNEL_C = \
|
||||
algs/aes/aes_impl_x86-64.c \
|
||||
algs/aes/aes_impl.c \
|
||||
algs/aes/aes_modes.c \
|
||||
algs/blake3/blake3.c \
|
||||
algs/blake3/blake3_generic.c \
|
||||
algs/blake3/blake3_impl.c \
|
||||
algs/blake3/blake3_x86-64.c \
|
||||
algs/edonr/edonr.c \
|
||||
algs/modes/modes.c \
|
||||
algs/modes/cbc.c \
|
||||
@ -81,5 +99,14 @@ CFLAGS.ghash-x86_64.S+= -DLOCORE
|
||||
CFLAGS.sha256_impl.S+= -DLOCORE
|
||||
CFLAGS.sha512_impl.S+= -DLOCORE
|
||||
CFLAGS.gcm.c+= -UCAN_USE_GCM_ASM
|
||||
CFLAGS.blake3_avx2.S = -DLOCORE
|
||||
CFLAGS.blake3_avx512.S = -DLOCORE
|
||||
CFLAGS.blake3_sse2.S = -DLOCORE
|
||||
CFLAGS.blake3_sse41.S = -DLOCORE
|
||||
CFLAGS.b3_aarch64_sse2.S = -DLOCORE
|
||||
CFLAGS.b3_aarch64_sse41.S = -DLOCORE
|
||||
CFLAGS.b3_ppc64le_sse2.S = -DLOCORE
|
||||
CFLAGS.b3_ppc64le_sse41.S = -DLOCORE
|
||||
|
||||
|
||||
.include <bsd.lib.mk>
|
||||
|
@ -56,6 +56,7 @@ KERNEL_C = \
|
||||
aggsum.c \
|
||||
arc.c \
|
||||
arc_os.c \
|
||||
blake3_zfs.c \
|
||||
blkptr.c \
|
||||
bplist.c \
|
||||
bpobj.c \
|
||||
@ -169,6 +170,7 @@ KERNEL_C = \
|
||||
zcp_synctask.c \
|
||||
zfeature.c \
|
||||
zfs_byteswap.c \
|
||||
zfs_chksum.c \
|
||||
zfs_debug.c \
|
||||
zfs_fm.c \
|
||||
zfs_fuid.c \
|
||||
|
@ -285,6 +285,7 @@ CONTRIBUTORS:
|
||||
Tim Connors <tconnors@rather.puzzling.org>
|
||||
Tim Crawford <tcrawford@datto.com>
|
||||
Tim Haley <Tim.Haley@Sun.COM>
|
||||
Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
Tobin Harding <me@tobin.cc>
|
||||
Tom Caputi <tcaputi@datto.com>
|
||||
Tom Matthews <tom@axiom-partners.com>
|
||||
|
@ -174,7 +174,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
|
||||
zfs_sort_column_t *col;
|
||||
zfs_prop_t prop;
|
||||
|
||||
if ((prop = zfs_name_to_prop(name)) == ZPROP_INVAL &&
|
||||
if ((prop = zfs_name_to_prop(name)) == ZPROP_USERPROP &&
|
||||
!zfs_prop_user(name))
|
||||
return (-1);
|
||||
|
||||
@ -182,7 +182,7 @@ zfs_add_sort_column(zfs_sort_column_t **sc, const char *name,
|
||||
|
||||
col->sc_prop = prop;
|
||||
col->sc_reverse = reverse;
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
col->sc_user_prop = safe_malloc(strlen(name) + 1);
|
||||
(void) strcpy(col->sc_user_prop, name);
|
||||
}
|
||||
@ -311,7 +311,7 @@ zfs_sort(const void *larg, const void *rarg, void *data)
|
||||
* Otherwise, we compare 'lnum' and 'rnum'.
|
||||
*/
|
||||
lstr = rstr = NULL;
|
||||
if (psc->sc_prop == ZPROP_INVAL) {
|
||||
if (psc->sc_prop == ZPROP_USERPROP) {
|
||||
nvlist_t *luser, *ruser;
|
||||
nvlist_t *lval, *rval;
|
||||
|
||||
|
@ -127,6 +127,11 @@ static int zfs_do_jail(int argc, char **argv);
|
||||
static int zfs_do_unjail(int argc, char **argv);
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
static int zfs_do_zone(int argc, char **argv);
|
||||
static int zfs_do_unzone(int argc, char **argv);
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Enable a reasonable set of defaults for libumem debugging on DEBUG builds.
|
||||
*/
|
||||
@ -184,6 +189,8 @@ typedef enum {
|
||||
HELP_JAIL,
|
||||
HELP_UNJAIL,
|
||||
HELP_WAIT,
|
||||
HELP_ZONE,
|
||||
HELP_UNZONE,
|
||||
} zfs_help_t;
|
||||
|
||||
typedef struct zfs_command {
|
||||
@ -254,6 +261,11 @@ static zfs_command_t command_table[] = {
|
||||
{ "jail", zfs_do_jail, HELP_JAIL },
|
||||
{ "unjail", zfs_do_unjail, HELP_UNJAIL },
|
||||
#endif
|
||||
|
||||
#ifdef __linux__
|
||||
{ "zone", zfs_do_zone, HELP_ZONE },
|
||||
{ "unzone", zfs_do_unzone, HELP_UNZONE },
|
||||
#endif
|
||||
};
|
||||
|
||||
#define NCOMMAND (sizeof (command_table) / sizeof (command_table[0]))
|
||||
@ -415,6 +427,10 @@ get_usage(zfs_help_t idx)
|
||||
return (gettext("\tunjail <jailid|jailname> <filesystem>\n"));
|
||||
case HELP_WAIT:
|
||||
return (gettext("\twait [-t <activity>] <filesystem>\n"));
|
||||
case HELP_ZONE:
|
||||
return (gettext("\tzone <nsfile> <filesystem>\n"));
|
||||
case HELP_UNZONE:
|
||||
return (gettext("\tunzone <nsfile> <filesystem>\n"));
|
||||
default:
|
||||
__builtin_unreachable();
|
||||
}
|
||||
@ -1901,7 +1917,7 @@ get_callback(zfs_handle_t *zhp, void *data)
|
||||
pl == cbp->cb_proplist)
|
||||
continue;
|
||||
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
if (zfs_prop_get(zhp, pl->pl_prop, buf,
|
||||
sizeof (buf), &sourcetype, source,
|
||||
sizeof (source),
|
||||
@ -2291,7 +2307,7 @@ zfs_do_inherit(int argc, char **argv)
|
||||
argc--;
|
||||
argv++;
|
||||
|
||||
if ((prop = zfs_name_to_prop(propname)) != ZPROP_INVAL) {
|
||||
if ((prop = zfs_name_to_prop(propname)) != ZPROP_USERPROP) {
|
||||
if (zfs_prop_readonly(prop)) {
|
||||
(void) fprintf(stderr, gettext(
|
||||
"%s property is read-only\n"),
|
||||
@ -3427,7 +3443,7 @@ print_header(list_cbdata_t *cb)
|
||||
}
|
||||
|
||||
right_justify = B_FALSE;
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
header = zfs_prop_column_name(pl->pl_prop);
|
||||
right_justify = zfs_prop_align_right(pl->pl_prop);
|
||||
} else {
|
||||
@ -3478,7 +3494,7 @@ print_dataset(zfs_handle_t *zhp, list_cbdata_t *cb)
|
||||
sizeof (property));
|
||||
propstr = property;
|
||||
right_justify = zfs_prop_align_right(pl->pl_prop);
|
||||
} else if (pl->pl_prop != ZPROP_INVAL) {
|
||||
} else if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
if (zfs_prop_get(zhp, pl->pl_prop, property,
|
||||
sizeof (property), NULL, NULL, 0,
|
||||
cb->cb_literal) != 0)
|
||||
@ -8692,6 +8708,50 @@ main(int argc, char **argv)
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* zfs zone nsfile filesystem
|
||||
*
|
||||
* Add or delete the given dataset to/from the namespace.
|
||||
*/
|
||||
#ifdef __linux__
|
||||
static int
|
||||
zfs_do_zone_impl(int argc, char **argv, boolean_t attach)
|
||||
{
|
||||
zfs_handle_t *zhp;
|
||||
int ret;
|
||||
|
||||
if (argc < 3) {
|
||||
(void) fprintf(stderr, gettext("missing argument(s)\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
if (argc > 3) {
|
||||
(void) fprintf(stderr, gettext("too many arguments\n"));
|
||||
usage(B_FALSE);
|
||||
}
|
||||
|
||||
zhp = zfs_open(g_zfs, argv[2], ZFS_TYPE_FILESYSTEM);
|
||||
if (zhp == NULL)
|
||||
return (1);
|
||||
|
||||
ret = (zfs_userns(zhp, argv[1], attach) != 0);
|
||||
|
||||
zfs_close(zhp);
|
||||
return (ret);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_zone(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_zone_impl(argc, argv, B_TRUE));
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_do_unzone(int argc, char **argv)
|
||||
{
|
||||
return (zfs_do_zone_impl(argc, argv, B_FALSE));
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef __FreeBSD__
|
||||
#include <sys/jail.h>
|
||||
#include <jail.h>
|
||||
|
@ -5946,7 +5946,7 @@ print_header(list_cbdata_t *cb)
|
||||
first = B_FALSE;
|
||||
|
||||
right_justify = B_FALSE;
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
header = zpool_prop_column_name(pl->pl_prop);
|
||||
right_justify = zpool_prop_align_right(pl->pl_prop);
|
||||
} else {
|
||||
@ -6004,7 +6004,7 @@ print_pool(zpool_handle_t *zhp, list_cbdata_t *cb)
|
||||
}
|
||||
|
||||
right_justify = B_FALSE;
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
if (zpool_get_prop(zhp, pl->pl_prop, property,
|
||||
sizeof (property), NULL, cb->cb_literal) != 0)
|
||||
propstr = "-";
|
||||
|
@ -121,6 +121,7 @@
|
||||
#include <sys/zfeature.h>
|
||||
#include <sys/dsl_userhold.h>
|
||||
#include <sys/abd.h>
|
||||
#include <sys/blake3.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <unistd.h>
|
||||
@ -417,6 +418,7 @@ ztest_func_t ztest_device_removal;
|
||||
ztest_func_t ztest_spa_checkpoint_create_discard;
|
||||
ztest_func_t ztest_initialize;
|
||||
ztest_func_t ztest_trim;
|
||||
ztest_func_t ztest_blake3;
|
||||
ztest_func_t ztest_fletcher;
|
||||
ztest_func_t ztest_fletcher_incr;
|
||||
ztest_func_t ztest_verify_dnode_bt;
|
||||
@ -470,6 +472,7 @@ ztest_info_t ztest_info[] = {
|
||||
ZTI_INIT(ztest_spa_checkpoint_create_discard, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_initialize, 1, &zopt_sometimes),
|
||||
ZTI_INIT(ztest_trim, 1, &zopt_sometimes),
|
||||
ZTI_INIT(ztest_blake3, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_fletcher, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_fletcher_incr, 1, &zopt_rarely),
|
||||
ZTI_INIT(ztest_verify_dnode_bt, 1, &zopt_sometimes),
|
||||
@ -6373,6 +6376,92 @@ ztest_reguid(ztest_ds_t *zd, uint64_t id)
|
||||
VERIFY3U(load, ==, spa_load_guid(spa));
|
||||
}
|
||||
|
||||
void
|
||||
ztest_blake3(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
(void) zd, (void) id;
|
||||
hrtime_t end = gethrtime() + NANOSEC;
|
||||
zio_cksum_salt_t salt;
|
||||
void *salt_ptr = &salt.zcs_bytes;
|
||||
struct abd *abd_data, *abd_meta;
|
||||
void *buf, *templ;
|
||||
int i, *ptr;
|
||||
uint32_t size;
|
||||
BLAKE3_CTX ctx;
|
||||
|
||||
size = ztest_random_blocksize();
|
||||
buf = umem_alloc(size, UMEM_NOFAIL);
|
||||
abd_data = abd_alloc(size, B_FALSE);
|
||||
abd_meta = abd_alloc(size, B_TRUE);
|
||||
|
||||
for (i = 0, ptr = buf; i < size / sizeof (*ptr); i++, ptr++)
|
||||
*ptr = ztest_random(UINT_MAX);
|
||||
memset(salt_ptr, 'A', 32);
|
||||
|
||||
abd_copy_from_buf_off(abd_data, buf, 0, size);
|
||||
abd_copy_from_buf_off(abd_meta, buf, 0, size);
|
||||
|
||||
while (gethrtime() <= end) {
|
||||
int run_count = 100;
|
||||
zio_cksum_t zc_ref1, zc_ref2;
|
||||
zio_cksum_t zc_res1, zc_res2;
|
||||
|
||||
void *ref1 = &zc_ref1;
|
||||
void *ref2 = &zc_ref2;
|
||||
void *res1 = &zc_res1;
|
||||
void *res2 = &zc_res2;
|
||||
|
||||
/* BLAKE3_KEY_LEN = 32 */
|
||||
VERIFY0(blake3_set_impl_name("generic"));
|
||||
templ = abd_checksum_blake3_tmpl_init(&salt);
|
||||
Blake3_InitKeyed(&ctx, salt_ptr);
|
||||
Blake3_Update(&ctx, buf, size);
|
||||
Blake3_Final(&ctx, ref1);
|
||||
zc_ref2 = zc_ref1;
|
||||
ZIO_CHECKSUM_BSWAP(&zc_ref2);
|
||||
abd_checksum_blake3_tmpl_free(templ);
|
||||
|
||||
VERIFY0(blake3_set_impl_name("cycle"));
|
||||
while (run_count-- > 0) {
|
||||
|
||||
/* Test current implementation */
|
||||
Blake3_InitKeyed(&ctx, salt_ptr);
|
||||
Blake3_Update(&ctx, buf, size);
|
||||
Blake3_Final(&ctx, res1);
|
||||
zc_res2 = zc_res1;
|
||||
ZIO_CHECKSUM_BSWAP(&zc_res2);
|
||||
|
||||
VERIFY0(memcmp(ref1, res1, 32));
|
||||
VERIFY0(memcmp(ref2, res2, 32));
|
||||
|
||||
/* Test ABD - data */
|
||||
templ = abd_checksum_blake3_tmpl_init(&salt);
|
||||
abd_checksum_blake3_native(abd_data, size,
|
||||
templ, &zc_res1);
|
||||
abd_checksum_blake3_byteswap(abd_data, size,
|
||||
templ, &zc_res2);
|
||||
|
||||
VERIFY0(memcmp(ref1, res1, 32));
|
||||
VERIFY0(memcmp(ref2, res2, 32));
|
||||
|
||||
/* Test ABD - metadata */
|
||||
abd_checksum_blake3_native(abd_meta, size,
|
||||
templ, &zc_res1);
|
||||
abd_checksum_blake3_byteswap(abd_meta, size,
|
||||
templ, &zc_res2);
|
||||
abd_checksum_blake3_tmpl_free(templ);
|
||||
|
||||
VERIFY0(memcmp(ref1, res1, 32));
|
||||
VERIFY0(memcmp(ref2, res2, 32));
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
abd_free(abd_data);
|
||||
abd_free(abd_meta);
|
||||
umem_free(buf, size);
|
||||
}
|
||||
|
||||
void
|
||||
ztest_fletcher(ztest_ds_t *zd, uint64_t id)
|
||||
{
|
||||
|
@ -30,6 +30,8 @@ AC_DEFUN([ZFS_AC_CONFIG_ALWAYS_ARCH], [
|
||||
;;
|
||||
esac
|
||||
|
||||
AM_CONDITIONAL([TARGET_CPU_AARCH64], test $TARGET_CPU = aarch64)
|
||||
AM_CONDITIONAL([TARGET_CPU_X86_64], test $TARGET_CPU = x86_64)
|
||||
AM_CONDITIONAL([TARGET_CPU_POWERPC], test $TARGET_CPU = powerpc)
|
||||
AM_CONDITIONAL([TARGET_CPU_SPARC64], test $TARGET_CPU = sparc64)
|
||||
])
|
||||
|
@ -7,8 +7,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_ADD_DISK], [
|
||||
#include <linux/blkdev.h>
|
||||
], [
|
||||
struct gendisk *disk = NULL;
|
||||
int err = add_disk(disk);
|
||||
err = err;
|
||||
int error __attribute__ ((unused)) = add_disk(disk);
|
||||
])
|
||||
])
|
||||
|
||||
|
@ -359,6 +359,36 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS], [
|
||||
])
|
||||
])
|
||||
|
||||
dnl #
|
||||
dnl # See if kernel supports block multi-queue and blk_status_t.
|
||||
dnl # blk_status_t represents the new status codes introduced in the 4.13
|
||||
dnl # kernel patch:
|
||||
dnl #
|
||||
dnl # block: introduce new block status code type
|
||||
dnl #
|
||||
dnl # We do not currently support the "old" block multi-queue interfaces from
|
||||
dnl # prior kernels.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_MQ], [
|
||||
ZFS_LINUX_TEST_SRC([blk_mq], [
|
||||
#include <linux/blk-mq.h>
|
||||
], [
|
||||
struct blk_mq_tag_set tag_set __attribute__ ((unused)) = {0};
|
||||
(void) blk_mq_alloc_tag_set(&tag_set);
|
||||
return BLK_STS_OK;
|
||||
], [])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLK_MQ], [
|
||||
AC_MSG_CHECKING([whether block multiqueue with blk_status_t is available])
|
||||
ZFS_LINUX_TEST_RESULT([blk_mq], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_BLK_MQ, 1, [block multiqueue is available])
|
||||
], [
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
|
||||
ZFS_AC_KERNEL_SRC_BLK_QUEUE_PLUG
|
||||
ZFS_AC_KERNEL_SRC_BLK_QUEUE_BDI
|
||||
@ -370,6 +400,7 @@ AC_DEFUN([ZFS_AC_KERNEL_SRC_BLK_QUEUE], [
|
||||
ZFS_AC_KERNEL_SRC_BLK_QUEUE_FLUSH
|
||||
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_HW_SECTORS
|
||||
ZFS_AC_KERNEL_SRC_BLK_QUEUE_MAX_SEGMENTS
|
||||
ZFS_AC_KERNEL_SRC_BLK_MQ
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
|
||||
@ -383,4 +414,5 @@ AC_DEFUN([ZFS_AC_KERNEL_BLK_QUEUE], [
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_FLUSH
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_HW_SECTORS
|
||||
ZFS_AC_KERNEL_BLK_QUEUE_MAX_SEGMENTS
|
||||
ZFS_AC_KERNEL_BLK_MQ
|
||||
])
|
||||
|
23
sys/contrib/openzfs/config/kernel-user-ns-inum.m4
Normal file
23
sys/contrib/openzfs/config/kernel-user-ns-inum.m4
Normal file
@ -0,0 +1,23 @@
|
||||
dnl #
|
||||
dnl # 3.18 API change
|
||||
dnl # struct user_namespace inum moved from .proc_inum to .ns.inum.
|
||||
dnl #
|
||||
AC_DEFUN([ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM], [
|
||||
ZFS_LINUX_TEST_SRC([user_ns_common_inum], [
|
||||
#include <linux/user_namespace.h>
|
||||
], [
|
||||
struct user_namespace uns;
|
||||
uns.ns.inum = 0;
|
||||
])
|
||||
])
|
||||
|
||||
AC_DEFUN([ZFS_AC_KERNEL_USER_NS_COMMON_INUM], [
|
||||
AC_MSG_CHECKING([whether user_namespace->ns.inum exists])
|
||||
ZFS_LINUX_TEST_RESULT([user_ns_common_inum], [
|
||||
AC_MSG_RESULT(yes)
|
||||
AC_DEFINE(HAVE_USER_NS_COMMON_INUM, 1,
|
||||
[user_namespace->ns.inum exists])
|
||||
],[
|
||||
AC_MSG_RESULT(no)
|
||||
])
|
||||
])
|
@ -145,6 +145,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
|
||||
ZFS_AC_KERNEL_SRC_KTHREAD
|
||||
ZFS_AC_KERNEL_SRC_ZERO_PAGE
|
||||
ZFS_AC_KERNEL_SRC___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
|
||||
|
||||
AC_MSG_CHECKING([for available kernel interfaces])
|
||||
ZFS_LINUX_TEST_COMPILE_ALL([kabi])
|
||||
@ -263,6 +264,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
|
||||
ZFS_AC_KERNEL_KTHREAD
|
||||
ZFS_AC_KERNEL_ZERO_PAGE
|
||||
ZFS_AC_KERNEL___COPY_FROM_USER_INATOMIC
|
||||
ZFS_AC_KERNEL_USER_NS_COMMON_INUM
|
||||
])
|
||||
|
||||
dnl #
|
||||
|
@ -83,8 +83,7 @@ install() {
|
||||
|
||||
for _service in \
|
||||
"zfs-import-scan.service" \
|
||||
"zfs-import-cache.service" \
|
||||
"zfs-load-module.service"; do
|
||||
"zfs-import-cache.service"; do
|
||||
inst_simple "${systemdsystemunitdir}/${_service}"
|
||||
systemctl -q --root "${initdir}" add-wants zfs-import.target "${_service}"
|
||||
done
|
||||
|
@ -100,6 +100,7 @@ zfs_errno = enum_with_offset(1024, [
|
||||
'ZFS_ERR_REBUILD_IN_PROGRESS',
|
||||
'ZFS_ERR_BADPROP',
|
||||
'ZFS_ERR_VDEV_NOTSUP',
|
||||
'ZFS_ERR_NOT_USER_NAMESPACE',
|
||||
],
|
||||
{}
|
||||
)
|
||||
|
@ -59,6 +59,9 @@ systemdunit_DATA = \
|
||||
%D%/systemd/system/zfs-scrub-monthly@.timer \
|
||||
%D%/systemd/system/zfs-scrub-weekly@.timer \
|
||||
%D%/systemd/system/zfs-scrub@.service \
|
||||
%D%/systemd/system/zfs-trim-monthly@.timer \
|
||||
%D%/systemd/system/zfs-trim-weekly@.timer \
|
||||
%D%/systemd/system/zfs-trim@.service \
|
||||
%D%/systemd/system/zfs-share.service \
|
||||
%D%/systemd/system/zfs-volume-wait.service \
|
||||
%D%/systemd/system/zfs-volumes.target \
|
||||
|
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Monthly zpool trim timer for %i
|
||||
Documentation=man:zpool-trim(8)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=monthly
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1h
|
||||
Unit=zfs-trim@%i.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
@ -0,0 +1,12 @@
|
||||
[Unit]
|
||||
Description=Weekly zpool trim timer for %i
|
||||
Documentation=man:zpool-trim(8)
|
||||
|
||||
[Timer]
|
||||
OnCalendar=weekly
|
||||
Persistent=true
|
||||
RandomizedDelaySec=1h
|
||||
Unit=zfs-trim@%i.service
|
||||
|
||||
[Install]
|
||||
WantedBy=timers.target
|
15
sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
Normal file
15
sys/contrib/openzfs/etc/systemd/system/zfs-trim@.service.in
Normal file
@ -0,0 +1,15 @@
|
||||
[Unit]
|
||||
Description=zpool trim on %i
|
||||
Documentation=man:zpool-trim(8)
|
||||
Requires=zfs.target
|
||||
After=zfs.target
|
||||
ConditionACPower=true
|
||||
ConditionPathIsDirectory=/sys/module/zfs
|
||||
|
||||
[Service]
|
||||
EnvironmentFile=-@initconfdir@/zfs
|
||||
ExecStart=/bin/sh -c '\
|
||||
if @sbindir@/zpool status %i | grep -q "(trimming)"; then\
|
||||
exec @sbindir@/zpool wait -t trim %i;\
|
||||
else exec @sbindir@/zpool trim -w %i; fi'
|
||||
ExecStop=-/bin/sh -c '@sbindir@/zpool trim -s %i 2>/dev/null || true'
|
@ -23,6 +23,7 @@ COMMON_H = \
|
||||
sys/avl.h \
|
||||
sys/avl_impl.h \
|
||||
sys/bitops.h \
|
||||
sys/blake3.h \
|
||||
sys/blkptr.h \
|
||||
sys/bplist.h \
|
||||
sys/bpobj.h \
|
||||
@ -117,6 +118,7 @@ COMMON_H = \
|
||||
sys/zfeature.h \
|
||||
sys/zfs_acl.h \
|
||||
sys/zfs_bootenv.h \
|
||||
sys/zfs_chksum.h \
|
||||
sys/zfs_context.h \
|
||||
sys/zfs_debug.h \
|
||||
sys/zfs_delay.h \
|
||||
|
@ -150,6 +150,7 @@ typedef enum zfs_error {
|
||||
EZFS_EXPORT_IN_PROGRESS, /* currently exporting the pool */
|
||||
EZFS_REBUILDING, /* resilvering (sequential reconstrution) */
|
||||
EZFS_VDEV_NOTSUP, /* ops not supported for this type of vdev */
|
||||
EZFS_NOT_USER_NAMESPACE, /* a file is not a user namespace */
|
||||
EZFS_UNKNOWN
|
||||
} zfs_error_t;
|
||||
|
||||
@ -979,6 +980,15 @@ _LIBZFS_H int zpool_nextboot(libzfs_handle_t *, uint64_t, uint64_t,
|
||||
|
||||
#endif /* __FreeBSD__ */
|
||||
|
||||
#ifdef __linux__
|
||||
|
||||
/*
|
||||
* Add or delete the given filesystem to/from the given user namespace.
|
||||
*/
|
||||
_LIBZFS_H int zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
@ -74,10 +74,12 @@ extern "C" {
|
||||
|
||||
#ifndef LOCORE
|
||||
#ifndef HAVE_RPC_TYPES
|
||||
#ifndef _KERNEL
|
||||
typedef int bool_t;
|
||||
typedef int enum_t;
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#ifndef __cplusplus
|
||||
#define __init
|
||||
|
@ -34,6 +34,11 @@
|
||||
#include <linux/hdreg.h>
|
||||
#include <linux/major.h>
|
||||
#include <linux/msdos_fs.h> /* for SECTOR_* */
|
||||
#include <linux/bio.h>
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
#include <linux/blk-mq.h>
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_BLK_QUEUE_FLAG_SET
|
||||
static inline void
|
||||
@ -608,4 +613,110 @@ blk_generic_alloc_queue(make_request_fn make_request, int node_id)
|
||||
}
|
||||
#endif /* !HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
|
||||
|
||||
/*
|
||||
* All the io_*() helper functions below can operate on a bio, or a rq, but
|
||||
* not both. The older submit_bio() codepath will pass a bio, and the
|
||||
* newer blk-mq codepath will pass a rq.
|
||||
*/
|
||||
static inline int
|
||||
io_data_dir(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL) {
|
||||
if (op_is_write(req_op(rq))) {
|
||||
return (WRITE);
|
||||
} else {
|
||||
return (READ);
|
||||
}
|
||||
}
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_data_dir(bio));
|
||||
}
|
||||
|
||||
static inline int
|
||||
io_is_flush(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (req_op(rq) == REQ_OP_FLUSH);
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_is_flush(bio));
|
||||
}
|
||||
|
||||
static inline int
|
||||
io_is_discard(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (req_op(rq) == REQ_OP_DISCARD);
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_is_discard(bio));
|
||||
}
|
||||
|
||||
static inline int
|
||||
io_is_secure_erase(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (req_op(rq) == REQ_OP_SECURE_ERASE);
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_is_secure_erase(bio));
|
||||
}
|
||||
|
||||
static inline int
|
||||
io_is_fua(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (rq->cmd_flags & REQ_FUA);
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_is_fua(bio));
|
||||
}
|
||||
|
||||
|
||||
static inline uint64_t
|
||||
io_offset(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (blk_rq_pos(rq) << 9);
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (BIO_BI_SECTOR(bio) << 9);
|
||||
}
|
||||
|
||||
static inline uint64_t
|
||||
io_size(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (blk_rq_bytes(rq));
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (BIO_BI_SIZE(bio));
|
||||
}
|
||||
|
||||
static inline int
|
||||
io_has_data(struct bio *bio, struct request *rq)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (rq != NULL)
|
||||
return (bio_has_data(rq->bio));
|
||||
#else
|
||||
ASSERT3P(rq, ==, NULL);
|
||||
#endif
|
||||
return (bio_has_data(bio));
|
||||
}
|
||||
#endif /* _ZFS_BLKDEV_H */
|
||||
|
@ -57,25 +57,45 @@
|
||||
#include <sys/types.h>
|
||||
#include <linux/version.h>
|
||||
|
||||
#define kfpu_allowed() 1
|
||||
#define kfpu_begin() \
|
||||
{ \
|
||||
preempt_disable(); \
|
||||
enable_kernel_altivec(); \
|
||||
}
|
||||
#define kfpu_allowed() 1
|
||||
|
||||
#if LINUX_VERSION_CODE >= KERNEL_VERSION(4, 5, 0)
|
||||
#define kfpu_end() \
|
||||
{ \
|
||||
disable_kernel_vsx(); \
|
||||
disable_kernel_altivec(); \
|
||||
preempt_enable(); \
|
||||
}
|
||||
#define kfpu_begin() \
|
||||
{ \
|
||||
preempt_disable(); \
|
||||
enable_kernel_altivec(); \
|
||||
enable_kernel_vsx(); \
|
||||
}
|
||||
#else
|
||||
/* seems that before 4.5 no-one bothered disabling ... */
|
||||
/* seems that before 4.5 no-one bothered */
|
||||
#define kfpu_begin()
|
||||
#define kfpu_end() preempt_enable()
|
||||
#endif
|
||||
#define kfpu_init() 0
|
||||
#define kfpu_fini() ((void) 0)
|
||||
|
||||
static inline boolean_t
|
||||
zfs_vsx_available(void)
|
||||
{
|
||||
boolean_t res;
|
||||
#if defined(__powerpc64__)
|
||||
u64 msr;
|
||||
#else
|
||||
u32 msr;
|
||||
#endif
|
||||
kfpu_begin();
|
||||
__asm volatile("mfmsr %0" : "=r"(msr));
|
||||
res = (msr & 0x800000) != 0;
|
||||
kfpu_end();
|
||||
return (res);
|
||||
}
|
||||
|
||||
/*
|
||||
* Check if AltiVec instruction set is available
|
||||
*/
|
||||
|
@ -296,11 +296,7 @@ static inline struct dentry *file_dentry(const struct file *f)
|
||||
|
||||
static inline uid_t zfs_uid_read_impl(struct inode *ip)
|
||||
{
|
||||
#ifdef HAVE_SUPER_USER_NS
|
||||
return (from_kuid(ip->i_sb->s_user_ns, ip->i_uid));
|
||||
#else
|
||||
return (from_kuid(kcred->user_ns, ip->i_uid));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline uid_t zfs_uid_read(struct inode *ip)
|
||||
@ -310,11 +306,7 @@ static inline uid_t zfs_uid_read(struct inode *ip)
|
||||
|
||||
static inline gid_t zfs_gid_read_impl(struct inode *ip)
|
||||
{
|
||||
#ifdef HAVE_SUPER_USER_NS
|
||||
return (from_kgid(ip->i_sb->s_user_ns, ip->i_gid));
|
||||
#else
|
||||
return (from_kgid(kcred->user_ns, ip->i_gid));
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline gid_t zfs_gid_read(struct inode *ip)
|
||||
@ -324,20 +316,12 @@ static inline gid_t zfs_gid_read(struct inode *ip)
|
||||
|
||||
static inline void zfs_uid_write(struct inode *ip, uid_t uid)
|
||||
{
|
||||
#ifdef HAVE_SUPER_USER_NS
|
||||
ip->i_uid = make_kuid(ip->i_sb->s_user_ns, uid);
|
||||
#else
|
||||
ip->i_uid = make_kuid(kcred->user_ns, uid);
|
||||
#endif
|
||||
}
|
||||
|
||||
static inline void zfs_gid_write(struct inode *ip, gid_t gid)
|
||||
{
|
||||
#ifdef HAVE_SUPER_USER_NS
|
||||
ip->i_gid = make_kgid(ip->i_sb->s_user_ns, gid);
|
||||
#else
|
||||
ip->i_gid = make_kgid(kcred->user_ns, gid);
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -69,9 +69,20 @@ typedef struct zfs_uio {
|
||||
uint16_t uio_fmode;
|
||||
uint16_t uio_extflg;
|
||||
ssize_t uio_resid;
|
||||
|
||||
size_t uio_skip;
|
||||
|
||||
struct request *rq;
|
||||
|
||||
/*
|
||||
* Used for saving rq_for_each_segment() state between calls
|
||||
* to zfs_uiomove_bvec_rq().
|
||||
*/
|
||||
struct req_iterator iter;
|
||||
struct bio_vec bv;
|
||||
} zfs_uio_t;
|
||||
|
||||
|
||||
#define zfs_uio_segflg(u) (u)->uio_segflg
|
||||
#define zfs_uio_offset(u) (u)->uio_loffset
|
||||
#define zfs_uio_resid(u) (u)->uio_resid
|
||||
@ -116,17 +127,33 @@ zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov,
|
||||
}
|
||||
|
||||
static inline void
|
||||
zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio)
|
||||
zfs_uio_bvec_init(zfs_uio_t *uio, struct bio *bio, struct request *rq)
|
||||
{
|
||||
uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
|
||||
uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
|
||||
uio->uio_loffset = BIO_BI_SECTOR(bio) << 9;
|
||||
/* Either bio or rq will be set, but not both */
|
||||
ASSERT3P(uio, !=, bio);
|
||||
|
||||
if (bio) {
|
||||
uio->uio_iovcnt = bio->bi_vcnt - BIO_BI_IDX(bio);
|
||||
uio->uio_bvec = &bio->bi_io_vec[BIO_BI_IDX(bio)];
|
||||
} else {
|
||||
uio->uio_bvec = NULL;
|
||||
uio->uio_iovcnt = 0;
|
||||
memset(&uio->iter, 0, sizeof (uio->iter));
|
||||
}
|
||||
|
||||
uio->uio_loffset = io_offset(bio, rq);
|
||||
uio->uio_segflg = UIO_BVEC;
|
||||
uio->uio_fault_disable = B_FALSE;
|
||||
uio->uio_fmode = 0;
|
||||
uio->uio_extflg = 0;
|
||||
uio->uio_resid = BIO_BI_SIZE(bio);
|
||||
uio->uio_skip = BIO_BI_SKIP(bio);
|
||||
uio->uio_resid = io_size(bio, rq);
|
||||
if (bio) {
|
||||
uio->uio_skip = BIO_BI_SKIP(bio);
|
||||
} else {
|
||||
uio->uio_skip = 0;
|
||||
}
|
||||
|
||||
uio->rq = rq;
|
||||
}
|
||||
|
||||
#if defined(HAVE_VFS_IOV_ITER)
|
||||
|
@ -25,11 +25,34 @@
|
||||
#define _SPL_ZONE_H
|
||||
|
||||
#include <sys/byteorder.h>
|
||||
#include <sys/cred.h>
|
||||
|
||||
#define GLOBAL_ZONEID 0
|
||||
#include <linux/cred.h>
|
||||
#include <linux/user_namespace.h>
|
||||
|
||||
#define zone_dataset_visible(x, y) (1)
|
||||
#define crgetzoneid(x) (GLOBAL_ZONEID)
|
||||
#define INGLOBALZONE(z) (1)
|
||||
/*
|
||||
* Attach the given dataset to the given user namespace.
|
||||
*/
|
||||
extern int zone_dataset_attach(cred_t *, const char *, int);
|
||||
|
||||
/*
|
||||
* Detach the given dataset from the given user namespace.
|
||||
*/
|
||||
extern int zone_dataset_detach(cred_t *, const char *, int);
|
||||
|
||||
/*
|
||||
* Returns true if the named pool/dataset is visible in the current zone.
|
||||
*/
|
||||
extern int zone_dataset_visible(const char *dataset, int *write);
|
||||
|
||||
int spl_zone_init(void);
|
||||
void spl_zone_fini(void);
|
||||
|
||||
extern unsigned int crgetzoneid(const cred_t *);
|
||||
extern unsigned int global_zoneid(void);
|
||||
extern boolean_t inglobalzone(proc_t *);
|
||||
|
||||
#define INGLOBALZONE(x) inglobalzone(x)
|
||||
#define GLOBAL_ZONEID global_zoneid()
|
||||
|
||||
#endif /* SPL_ZONE_H */
|
||||
|
@ -32,4 +32,9 @@
|
||||
#define HAVE_LARGE_STACKS 1
|
||||
#endif
|
||||
|
||||
#if defined(CONFIG_UML)
|
||||
#undef setjmp
|
||||
#undef longjmp
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
125
sys/contrib/openzfs/include/sys/blake3.h
Normal file
125
sys/contrib/openzfs/include/sys/blake3.h
Normal file
@ -0,0 +1,125 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
|
||||
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
|
||||
* Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef BLAKE3_H
|
||||
#define BLAKE3_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define BLAKE3_KEY_LEN 32
|
||||
#define BLAKE3_OUT_LEN 32
|
||||
#define BLAKE3_MAX_DEPTH 54
|
||||
#define BLAKE3_BLOCK_LEN 64
|
||||
#define BLAKE3_CHUNK_LEN 1024
|
||||
|
||||
/*
|
||||
* This struct is a private implementation detail.
|
||||
* It has to be here because it's part of BLAKE3_CTX below.
|
||||
*/
|
||||
typedef struct {
|
||||
uint32_t cv[8];
|
||||
uint64_t chunk_counter;
|
||||
uint8_t buf[BLAKE3_BLOCK_LEN];
|
||||
uint8_t buf_len;
|
||||
uint8_t blocks_compressed;
|
||||
uint8_t flags;
|
||||
} blake3_chunk_state_t;
|
||||
|
||||
typedef struct {
|
||||
uint32_t key[8];
|
||||
blake3_chunk_state_t chunk;
|
||||
uint8_t cv_stack_len;
|
||||
|
||||
/*
|
||||
* The stack size is MAX_DEPTH + 1 because we do lazy merging. For
|
||||
* example, with 7 chunks, we have 3 entries in the stack. Adding an
|
||||
* 8th chunk requires a 4th entry, rather than merging everything down
|
||||
* to 1, because we don't know whether more input is coming. This is
|
||||
* different from how the reference implementation does things.
|
||||
*/
|
||||
uint8_t cv_stack[(BLAKE3_MAX_DEPTH + 1) * BLAKE3_OUT_LEN];
|
||||
|
||||
/* const blake3_impl_ops_t *ops */
|
||||
const void *ops;
|
||||
} BLAKE3_CTX;
|
||||
|
||||
/* init the context for hash operation */
|
||||
void Blake3_Init(BLAKE3_CTX *ctx);
|
||||
|
||||
/* init the context for a MAC and/or tree hash operation */
|
||||
void Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN]);
|
||||
|
||||
/* process the input bytes */
|
||||
void Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t input_len);
|
||||
|
||||
/* finalize the hash computation and output the result */
|
||||
void Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out);
|
||||
|
||||
/* finalize the hash computation and output the result */
|
||||
void Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
|
||||
size_t out_len);
|
||||
|
||||
/* these are pre-allocated contexts */
|
||||
extern void **blake3_per_cpu_ctx;
|
||||
extern void blake3_per_cpu_ctx_init(void);
|
||||
extern void blake3_per_cpu_ctx_fini(void);
|
||||
|
||||
/* return number of supported implementations */
|
||||
extern int blake3_get_impl_count(void);
|
||||
|
||||
/* return id of selected implementation */
|
||||
extern int blake3_get_impl_id(void);
|
||||
|
||||
/* return name of selected implementation */
|
||||
extern const char *blake3_get_impl_name(void);
|
||||
|
||||
/* setup id as fastest implementation */
|
||||
extern void blake3_set_impl_fastest(uint32_t id);
|
||||
|
||||
/* set implementation by id */
|
||||
extern void blake3_set_impl_id(uint32_t id);
|
||||
|
||||
/* set implementation by name */
|
||||
extern int blake3_set_impl_name(const char *name);
|
||||
|
||||
/* set startup implementation */
|
||||
extern void blake3_setup_impl(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLAKE3_H */
|
@ -93,6 +93,7 @@ typedef enum dmu_objset_type {
|
||||
typedef enum {
|
||||
ZPROP_CONT = -2,
|
||||
ZPROP_INVAL = -1,
|
||||
ZPROP_USERPROP = ZPROP_INVAL,
|
||||
ZFS_PROP_TYPE = 0,
|
||||
ZFS_PROP_CREATION,
|
||||
ZFS_PROP_USED,
|
||||
@ -310,7 +311,7 @@ typedef int (*zprop_func)(int, void *);
|
||||
*/
|
||||
typedef enum {
|
||||
VDEV_PROP_INVAL = -1,
|
||||
#define VDEV_PROP_USER VDEV_PROP_INVAL
|
||||
VDEV_PROP_USERPROP = VDEV_PROP_INVAL,
|
||||
VDEV_PROP_NAME,
|
||||
VDEV_PROP_CAPACITY,
|
||||
VDEV_PROP_STATE,
|
||||
@ -1450,7 +1451,9 @@ typedef enum zfs_ioc {
|
||||
ZFS_IOC_EVENTS_SEEK, /* 0x83 (Linux) */
|
||||
ZFS_IOC_NEXTBOOT, /* 0x84 (FreeBSD) */
|
||||
ZFS_IOC_JAIL, /* 0x85 (FreeBSD) */
|
||||
ZFS_IOC_USERNS_ATTACH = ZFS_IOC_JAIL, /* 0x85 (Linux) */
|
||||
ZFS_IOC_UNJAIL, /* 0x86 (FreeBSD) */
|
||||
ZFS_IOC_USERNS_DETACH = ZFS_IOC_UNJAIL, /* 0x86 (Linux) */
|
||||
ZFS_IOC_SET_BOOTENV, /* 0x87 */
|
||||
ZFS_IOC_GET_BOOTENV, /* 0x88 */
|
||||
ZFS_IOC_LAST
|
||||
@ -1531,6 +1534,7 @@ typedef enum {
|
||||
ZFS_ERR_REBUILD_IN_PROGRESS,
|
||||
ZFS_ERR_BADPROP,
|
||||
ZFS_ERR_VDEV_NOTSUP,
|
||||
ZFS_ERR_NOT_USER_NAMESPACE,
|
||||
} zfs_errno_t;
|
||||
|
||||
/*
|
||||
|
48
sys/contrib/openzfs/include/sys/zfs_chksum.h
Normal file
48
sys/contrib/openzfs/include/sys/zfs_chksum.h
Normal file
@ -0,0 +1,48 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef _ZFS_CHKSUM_H
|
||||
#define _ZFS_CHKSUM_H
|
||||
|
||||
#ifdef _KERNEL
|
||||
#include <sys/types.h>
|
||||
#else
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/* Benchmark the chksums of ZFS when the module is loading */
|
||||
void chksum_init(void);
|
||||
void chksum_fini(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* _ZFS_CHKSUM_H */
|
@ -124,6 +124,7 @@ typedef enum drr_headertype {
|
||||
* default use of "zfs send" won't encounter the bug mentioned above.
|
||||
*/
|
||||
#define DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS (1 << 27)
|
||||
#define DMU_BACKUP_FEATURE_BLAKE3 (1 << 28)
|
||||
|
||||
/*
|
||||
* Mask of all supported backup features
|
||||
@ -134,7 +135,7 @@ typedef enum drr_headertype {
|
||||
DMU_BACKUP_FEATURE_COMPRESSED | DMU_BACKUP_FEATURE_LARGE_DNODE | \
|
||||
DMU_BACKUP_FEATURE_RAW | DMU_BACKUP_FEATURE_HOLDS | \
|
||||
DMU_BACKUP_FEATURE_REDACTED | DMU_BACKUP_FEATURE_SWITCH_TO_LARGE_BLOCKS | \
|
||||
DMU_BACKUP_FEATURE_ZSTD)
|
||||
DMU_BACKUP_FEATURE_ZSTD | DMU_BACKUP_FEATURE_BLAKE3)
|
||||
|
||||
/* Are all features in the given flag word currently supported? */
|
||||
#define DMU_STREAM_SUPPORTED(x) (!((x) & ~DMU_BACKUP_FEATURE_MASK))
|
||||
|
@ -89,6 +89,7 @@ enum zio_checksum {
|
||||
ZIO_CHECKSUM_SHA512,
|
||||
ZIO_CHECKSUM_SKEIN,
|
||||
ZIO_CHECKSUM_EDONR,
|
||||
ZIO_CHECKSUM_BLAKE3,
|
||||
ZIO_CHECKSUM_FUNCTIONS
|
||||
};
|
||||
|
||||
|
@ -21,7 +21,8 @@
|
||||
/*
|
||||
* Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
|
||||
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
|
||||
* Copyright Saso Kiselkov 2013, All rights reserved.
|
||||
* Copyright (c) 2013 Saso Kiselkov, All rights reserved.
|
||||
* Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef _SYS_ZIO_CHECKSUM_H
|
||||
@ -107,6 +108,8 @@ _SYS_ZIO_CHECKSUM_H zio_checksum_info_t
|
||||
/*
|
||||
* Checksum routines.
|
||||
*/
|
||||
|
||||
/* SHA2 */
|
||||
extern zio_checksum_t abd_checksum_SHA256;
|
||||
extern zio_checksum_t abd_checksum_SHA512_native;
|
||||
extern zio_checksum_t abd_checksum_SHA512_byteswap;
|
||||
@ -123,6 +126,13 @@ extern zio_checksum_t abd_checksum_edonr_byteswap;
|
||||
extern zio_checksum_tmpl_init_t abd_checksum_edonr_tmpl_init;
|
||||
extern zio_checksum_tmpl_free_t abd_checksum_edonr_tmpl_free;
|
||||
|
||||
/* BLAKE3 */
|
||||
extern zio_checksum_t abd_checksum_blake3_native;
|
||||
extern zio_checksum_t abd_checksum_blake3_byteswap;
|
||||
extern zio_checksum_tmpl_init_t abd_checksum_blake3_tmpl_init;
|
||||
extern zio_checksum_tmpl_free_t abd_checksum_blake3_tmpl_free;
|
||||
|
||||
/* Fletcher 4 */
|
||||
_SYS_ZIO_CHECKSUM_H zio_abd_checksum_func_t fletcher_4_abd_ops;
|
||||
extern zio_checksum_t abd_fletcher_4_native;
|
||||
extern zio_checksum_t abd_fletcher_4_byteswap;
|
||||
|
@ -77,6 +77,7 @@ typedef enum spa_feature {
|
||||
SPA_FEATURE_DRAID,
|
||||
SPA_FEATURE_ZILSAXATTR,
|
||||
SPA_FEATURE_HEAD_ERRLOG,
|
||||
SPA_FEATURE_BLAKE3,
|
||||
SPA_FEATURES
|
||||
} spa_feature_t;
|
||||
|
||||
|
@ -13,6 +13,10 @@ nodist_libicp_la_SOURCES = \
|
||||
module/icp/algs/aes/aes_impl_x86-64.c \
|
||||
module/icp/algs/aes/aes_impl.c \
|
||||
module/icp/algs/aes/aes_modes.c \
|
||||
module/icp/algs/blake3/blake3.c \
|
||||
module/icp/algs/blake3/blake3_generic.c \
|
||||
module/icp/algs/blake3/blake3_impl.c \
|
||||
module/icp/algs/blake3/blake3_x86-64.c \
|
||||
module/icp/algs/edonr/edonr.c \
|
||||
module/icp/algs/modes/modes.c \
|
||||
module/icp/algs/modes/cbc.c \
|
||||
@ -36,15 +40,30 @@ nodist_libicp_la_SOURCES = \
|
||||
module/icp/core/kcf_mech_tabs.c \
|
||||
module/icp/core/kcf_prov_tabs.c
|
||||
|
||||
if TARGET_CPU_AARCH64
|
||||
nodist_libicp_la_SOURCES += \
|
||||
module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S \
|
||||
module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
|
||||
endif
|
||||
|
||||
if TARGET_CPU_POWERPC
|
||||
nodist_libicp_la_SOURCES += \
|
||||
module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S \
|
||||
module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
|
||||
endif
|
||||
|
||||
if TARGET_CPU_X86_64
|
||||
nodist_libicp_la_SOURCES += \
|
||||
module/icp/asm-x86_64/aes/aeskey.c
|
||||
nodist_libicp_la_SOURCES += \
|
||||
module/icp/asm-x86_64/aes/aeskey.c \
|
||||
module/icp/asm-x86_64/aes/aes_amd64.S \
|
||||
module/icp/asm-x86_64/aes/aes_aesni.S \
|
||||
module/icp/asm-x86_64/modes/gcm_pclmulqdq.S \
|
||||
module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S \
|
||||
module/icp/asm-x86_64/modes/ghash-x86_64.S \
|
||||
module/icp/asm-x86_64/sha2/sha256_impl.S \
|
||||
module/icp/asm-x86_64/sha2/sha512_impl.S
|
||||
module/icp/asm-x86_64/sha2/sha512_impl.S \
|
||||
module/icp/asm-x86_64/blake3/blake3_avx2.S \
|
||||
module/icp/asm-x86_64/blake3/blake3_avx512.S \
|
||||
module/icp/asm-x86_64/blake3/blake3_sse2.S \
|
||||
module/icp/asm-x86_64/blake3/blake3_sse41.S
|
||||
endif
|
||||
|
@ -491,6 +491,24 @@ zfs_altivec_available(void)
|
||||
#endif
|
||||
return (has_altivec);
|
||||
}
|
||||
static inline boolean_t
|
||||
zfs_vsx_available(void)
|
||||
{
|
||||
boolean_t has_vsx = B_FALSE;
|
||||
#if defined(__ALTIVEC__) && !defined(__FreeBSD__)
|
||||
sighandler_t savesig;
|
||||
savesig = signal(SIGILL, sigillhandler);
|
||||
if (setjmp(env)) {
|
||||
signal(SIGILL, savesig);
|
||||
has_vsx = B_FALSE;
|
||||
} else {
|
||||
__asm__ __volatile__("xssubsp 0,0,0\n");
|
||||
signal(SIGILL, savesig);
|
||||
has_vsx = B_TRUE;
|
||||
}
|
||||
#endif
|
||||
return (has_vsx);
|
||||
}
|
||||
#else
|
||||
|
||||
#define kfpu_allowed() 0
|
||||
|
@ -44,7 +44,7 @@
|
||||
#include <inttypes.h>
|
||||
#endif /* HAVE_INTTYPES */
|
||||
|
||||
typedef int zoneid_t;
|
||||
typedef uint_t zoneid_t;
|
||||
typedef int projid_t;
|
||||
|
||||
/*
|
||||
|
@ -33,7 +33,17 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define GLOBAL_ZONEID 0
|
||||
#ifdef __FreeBSD__
|
||||
#define GLOBAL_ZONEID 0
|
||||
#else
|
||||
/*
|
||||
* Hardcoded in the kernel's root user namespace. A "better" way to get
|
||||
* this would be by using ioctl_ns(2), but this would need to be performed
|
||||
* recursively on NS_GET_PARENT and then NS_GET_USERNS. Also, that's only
|
||||
* supported since Linux 4.9.
|
||||
*/
|
||||
#define GLOBAL_ZONEID 4026531837U
|
||||
#endif
|
||||
|
||||
extern zoneid_t getzoneid(void);
|
||||
|
||||
|
@ -23,10 +23,40 @@
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#include <unistd.h>
|
||||
#include <stdio.h>
|
||||
#include <errno.h>
|
||||
#include <stdlib.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
|
||||
#include <zone.h>
|
||||
|
||||
zoneid_t
|
||||
getzoneid(void)
|
||||
{
|
||||
return (GLOBAL_ZONEID);
|
||||
char path[PATH_MAX];
|
||||
char buf[128] = { '\0' };
|
||||
char *cp;
|
||||
|
||||
int c = snprintf(path, sizeof (path), "/proc/self/ns/user");
|
||||
/* This API doesn't have any error checking... */
|
||||
if (c < 0)
|
||||
return (0);
|
||||
|
||||
ssize_t r = readlink(path, buf, sizeof (buf) - 1);
|
||||
if (r < 0)
|
||||
return (0);
|
||||
|
||||
cp = strchr(buf, '[');
|
||||
if (cp == NULL)
|
||||
return (0);
|
||||
cp++;
|
||||
|
||||
unsigned long n = strtoul(cp, NULL, 10);
|
||||
if (n == ULONG_MAX && errno == ERANGE)
|
||||
return (0);
|
||||
zoneid_t z = (zoneid_t)n;
|
||||
|
||||
return (z);
|
||||
}
|
||||
|
@ -1081,7 +1081,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
|
@ -433,6 +433,7 @@
|
||||
<elf-symbol name='zfs_unmountall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_unshare' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_unshareall' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_userns' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_userspace' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_valid_proplist' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_version_kernel' type='func-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -583,7 +584,7 @@
|
||||
<elf-symbol name='fletcher_4_superscalar_ops' size='64' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='libzfs_config_ops' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='sa_protocol_names' size='16' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='spa_feature_table' size='2016' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='spa_feature_table' size='2072' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfeature_checks_disable' size='4' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_deleg_perm_tab' size='512' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
<elf-symbol name='zfs_history_event_names' size='328' type='object-type' binding='global-binding' visibility='default-visibility' is-defined='yes'/>
|
||||
@ -1537,7 +1538,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='lib/libspl/os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
@ -4414,6 +4415,12 @@
|
||||
<function-decl name='zfs_version_kernel' mangled-name='zfs_version_kernel' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_version_kernel'>
|
||||
<return type-id='26a90f95'/>
|
||||
</function-decl>
|
||||
<function-decl name='zfs_userns' mangled-name='zfs_userns' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_userns'>
|
||||
<parameter type-id='9200a744' name='zhp'/>
|
||||
<parameter type-id='80f4b756' name='nspath'/>
|
||||
<parameter type-id='95e97e5e' name='attach'/>
|
||||
<return type-id='95e97e5e'/>
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='lib/libzutil/os/linux/zutil_device_path_os.c' language='LANG_C99'>
|
||||
<function-decl name='zfs_append_partition' mangled-name='zfs_append_partition' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='zfs_append_partition'>
|
||||
@ -4770,8 +4777,8 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='module/zcommon/zfeature_common.c' language='LANG_C99'>
|
||||
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16128' id='9d5e9e2e'>
|
||||
<subrange length='36' type-id='7359adad' id='ae666bde'/>
|
||||
<array-type-def dimensions='1' type-id='83f29ca2' size-in-bits='16576' id='9d5e9e2e'>
|
||||
<subrange length='37' type-id='7359adad' id='ae666bde'/>
|
||||
</array-type-def>
|
||||
<enum-decl name='spa_feature' id='33ecb627'>
|
||||
<underlying-type type-id='9cac1fee'/>
|
||||
@ -4812,7 +4819,8 @@
|
||||
<enumerator name='SPA_FEATURE_DRAID' value='33'/>
|
||||
<enumerator name='SPA_FEATURE_ZILSAXATTR' value='34'/>
|
||||
<enumerator name='SPA_FEATURE_HEAD_ERRLOG' value='35'/>
|
||||
<enumerator name='SPA_FEATURES' value='36'/>
|
||||
<enumerator name='SPA_FEATURE_BLAKE3' value='36'/>
|
||||
<enumerator name='SPA_FEATURES' value='37'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='spa_feature_t' type-id='33ecb627' id='d6618c78'/>
|
||||
<enum-decl name='zfeature_flags' id='6db816a4'>
|
||||
|
@ -1003,7 +1003,7 @@ zfs_crypto_create(libzfs_handle_t *hdl, char *parent_name, nvlist_t *props,
|
||||
uint_t *wkeylen_out)
|
||||
{
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
uint64_t crypt = ZIO_CRYPT_INHERIT, pcrypt = ZIO_CRYPT_INHERIT;
|
||||
uint64_t keyformat = ZFS_KEYFORMAT_NONE;
|
||||
char *keylocation = NULL;
|
||||
@ -1174,7 +1174,7 @@ zfs_crypto_clone_check(libzfs_handle_t *hdl, zfs_handle_t *origin_zhp,
|
||||
char *parent_name, nvlist_t *props)
|
||||
{
|
||||
(void) origin_zhp, (void) parent_name;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "Encryption clone error"));
|
||||
@ -1276,7 +1276,7 @@ zfs_crypto_load_key(zfs_handle_t *zhp, boolean_t noop,
|
||||
const char *alt_keylocation)
|
||||
{
|
||||
int ret, attempts = 0;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
uint64_t keystatus, iters = 0, salt = 0;
|
||||
uint64_t keyformat = ZFS_KEYFORMAT_NONE;
|
||||
char prop_keylocation[MAXNAMELEN];
|
||||
@ -1444,7 +1444,7 @@ int
|
||||
zfs_crypto_unload_key(zfs_handle_t *zhp)
|
||||
{
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
char prop_encroot[MAXNAMELEN];
|
||||
uint64_t keystatus, keyformat;
|
||||
boolean_t is_encroot;
|
||||
@ -1580,7 +1580,7 @@ int
|
||||
zfs_crypto_rewrap(zfs_handle_t *zhp, nvlist_t *raw_props, boolean_t inheritkey)
|
||||
{
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
boolean_t is_encroot;
|
||||
nvlist_t *props = NULL;
|
||||
uint8_t *wkeydata = NULL;
|
||||
|
@ -678,7 +678,7 @@ zfs_handle_t *
|
||||
zfs_open(libzfs_handle_t *hdl, const char *path, int types)
|
||||
{
|
||||
zfs_handle_t *zhp;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
char *bookp;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
@ -1022,7 +1022,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
|
||||
const char *propname = nvpair_name(elem);
|
||||
|
||||
prop = zfs_name_to_prop(propname);
|
||||
if (prop == ZPROP_INVAL && zfs_prop_user(propname)) {
|
||||
if (prop == ZPROP_USERPROP && zfs_prop_user(propname)) {
|
||||
/*
|
||||
* This is a user property: make sure it's a
|
||||
* string, and that it's less than ZAP_MAXNAMELEN.
|
||||
@ -1061,7 +1061,7 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
|
||||
goto error;
|
||||
}
|
||||
|
||||
if (prop == ZPROP_INVAL && zfs_prop_userquota(propname)) {
|
||||
if (prop == ZPROP_USERPROP && zfs_prop_userquota(propname)) {
|
||||
zfs_userquota_prop_t uqtype;
|
||||
char *newpropname = NULL;
|
||||
char domain[128];
|
||||
@ -1143,7 +1143,8 @@ zfs_valid_proplist(libzfs_handle_t *hdl, zfs_type_t type, nvlist_t *nvl,
|
||||
}
|
||||
free(newpropname);
|
||||
continue;
|
||||
} else if (prop == ZPROP_INVAL && zfs_prop_written(propname)) {
|
||||
} else if (prop == ZPROP_USERPROP &&
|
||||
zfs_prop_written(propname)) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"'%s' is readonly"),
|
||||
propname);
|
||||
@ -1716,7 +1717,7 @@ int
|
||||
zfs_prop_set(zfs_handle_t *zhp, const char *propname, const char *propval)
|
||||
{
|
||||
int ret = -1;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
nvlist_t *nvl = NULL;
|
||||
|
||||
@ -1750,7 +1751,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
|
||||
int ret = -1;
|
||||
prop_changelist_t **cls = NULL;
|
||||
int cl_idx;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
nvlist_t *nvl;
|
||||
int nvl_len = 0;
|
||||
@ -1930,14 +1931,14 @@ zfs_prop_inherit(zfs_handle_t *zhp, const char *propname, boolean_t received)
|
||||
int ret;
|
||||
prop_changelist_t *cl;
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
zfs_prop_t prop;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot inherit %s for '%s'"), propname, zhp->zfs_name);
|
||||
|
||||
zc.zc_cookie = received;
|
||||
if ((prop = zfs_name_to_prop(propname)) == ZPROP_INVAL) {
|
||||
if ((prop = zfs_name_to_prop(propname)) == ZPROP_USERPROP) {
|
||||
/*
|
||||
* For user properties, the amount of work we have to do is very
|
||||
* small, so just do it here.
|
||||
@ -2356,7 +2357,7 @@ zfs_prop_get_recvd(zfs_handle_t *zhp, const char *propname, char *propbuf,
|
||||
|
||||
prop = zfs_name_to_prop(propname);
|
||||
|
||||
if (prop != ZPROP_INVAL) {
|
||||
if (prop != ZPROP_USERPROP) {
|
||||
uint64_t cookie;
|
||||
if (!nvlist_exists(zhp->zfs_recvd_props, propname))
|
||||
return (-1);
|
||||
@ -3402,7 +3403,7 @@ check_parents(libzfs_handle_t *hdl, const char *path, uint64_t *zoned,
|
||||
char parent[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char *slash;
|
||||
zfs_handle_t *zhp;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
uint64_t is_zoned;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
@ -3580,7 +3581,7 @@ zfs_create_ancestors(libzfs_handle_t *hdl, const char *path)
|
||||
{
|
||||
int prefix;
|
||||
char *path_copy;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int rc = 0;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
@ -3624,7 +3625,7 @@ zfs_create(libzfs_handle_t *hdl, const char *path, zfs_type_t type,
|
||||
zpool_handle_t *zpool_handle;
|
||||
uint8_t *wkeydata = NULL;
|
||||
uint_t wkeylen = 0;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
char parent[ZFS_MAX_DATASET_NAME_LEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
@ -3897,7 +3898,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
|
||||
}
|
||||
|
||||
if (nvlist_empty(errlist)) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot destroy snapshots"));
|
||||
|
||||
@ -3905,7 +3906,7 @@ zfs_destroy_snaps_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, boolean_t defer)
|
||||
}
|
||||
for (pair = nvlist_next_nvpair(errlist, NULL);
|
||||
pair != NULL; pair = nvlist_next_nvpair(errlist, pair)) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot destroy snapshot %s"),
|
||||
nvpair_name(pair));
|
||||
@ -3934,7 +3935,7 @@ zfs_clone(zfs_handle_t *zhp, const char *target, nvlist_t *props)
|
||||
{
|
||||
char parent[ZFS_MAX_DATASET_NAME_LEN];
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
uint64_t zoned;
|
||||
|
||||
@ -4018,7 +4019,7 @@ zfs_promote(zfs_handle_t *zhp)
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char snapname[ZFS_MAX_DATASET_NAME_LEN];
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot promote '%s'"), zhp->zfs_name);
|
||||
@ -4100,7 +4101,7 @@ int
|
||||
zfs_snapshot_nvl(libzfs_handle_t *hdl, nvlist_t *snaps, nvlist_t *props)
|
||||
{
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvpair_t *elem;
|
||||
nvlist_t *errors;
|
||||
zpool_handle_t *zpool_hdl;
|
||||
@ -4185,7 +4186,7 @@ zfs_snapshot(libzfs_handle_t *hdl, const char *path, boolean_t recursive,
|
||||
char fsname[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char *cp;
|
||||
zfs_handle_t *zhp;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot snapshot %s"), path);
|
||||
@ -4328,7 +4329,7 @@ zfs_rollback(zfs_handle_t *zhp, zfs_handle_t *snap, boolean_t force)
|
||||
*/
|
||||
err = lzc_rollback_to(zhp->zfs_name, snap->zfs_name);
|
||||
if (err != 0) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot rollback '%s'"),
|
||||
@ -4387,7 +4388,7 @@ zfs_rename(zfs_handle_t *zhp, const char *target, renameflags_t flags)
|
||||
char parent[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char property[ZFS_MAXPROPLEN];
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
/* if we have the same exact name, just return success */
|
||||
if (strcmp(zhp->zfs_name, target) == 0)
|
||||
@ -4635,7 +4636,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
|
||||
*/
|
||||
start = plp;
|
||||
while (*start != NULL) {
|
||||
if ((*start)->pl_prop == ZPROP_INVAL)
|
||||
if ((*start)->pl_prop == ZPROP_USERPROP)
|
||||
break;
|
||||
start = &(*start)->pl_next;
|
||||
}
|
||||
@ -4656,7 +4657,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
|
||||
entry = zfs_alloc(hdl, sizeof (zprop_list_t));
|
||||
entry->pl_user_prop =
|
||||
zfs_strdup(hdl, nvpair_name(elem));
|
||||
entry->pl_prop = ZPROP_INVAL;
|
||||
entry->pl_prop = ZPROP_USERPROP;
|
||||
entry->pl_width = strlen(nvpair_name(elem));
|
||||
entry->pl_all = B_TRUE;
|
||||
*last = entry;
|
||||
@ -4671,7 +4672,7 @@ zfs_expand_proplist(zfs_handle_t *zhp, zprop_list_t **plp, boolean_t received,
|
||||
if (entry->pl_fixed && !literal)
|
||||
continue;
|
||||
|
||||
if (entry->pl_prop != ZPROP_INVAL) {
|
||||
if (entry->pl_prop != ZPROP_USERPROP) {
|
||||
if (zfs_prop_get(zhp, entry->pl_prop,
|
||||
buf, sizeof (buf), NULL, NULL, 0, literal) == 0) {
|
||||
if (strlen(buf) > entry->pl_width)
|
||||
@ -4720,13 +4721,14 @@ zfs_prune_proplist(zfs_handle_t *zhp, uint8_t *props)
|
||||
next = nvlist_next_nvpair(zhp->zfs_props, curr);
|
||||
|
||||
/*
|
||||
* User properties will result in ZPROP_INVAL, and since we
|
||||
* User properties will result in ZPROP_USERPROP (an alias
|
||||
* for ZPROP_INVAL), and since we
|
||||
* only know how to prune standard ZFS properties, we always
|
||||
* leave these in the list. This can also happen if we
|
||||
* encounter an unknown DSL property (when running older
|
||||
* software, for example).
|
||||
*/
|
||||
if (zfs_prop != ZPROP_INVAL && props[zfs_prop] == B_FALSE)
|
||||
if (zfs_prop != ZPROP_USERPROP && props[zfs_prop] == B_FALSE)
|
||||
(void) nvlist_remove(zhp->zfs_props,
|
||||
nvpair_name(curr), nvpair_type(curr));
|
||||
curr = next;
|
||||
@ -4902,7 +4904,7 @@ zfs_hold(zfs_handle_t *zhp, const char *snapname, const char *tag,
|
||||
(void) zfs_hold_one(zfs_handle_dup(zhp), &ha);
|
||||
|
||||
if (nvlist_empty(ha.nvl)) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
fnvlist_free(ha.nvl);
|
||||
ret = ENOENT;
|
||||
@ -4926,7 +4928,7 @@ zfs_hold_nvl(zfs_handle_t *zhp, int cleanup_fd, nvlist_t *holds)
|
||||
int ret;
|
||||
nvlist_t *errors;
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvpair_t *elem;
|
||||
|
||||
errors = NULL;
|
||||
@ -5028,7 +5030,7 @@ zfs_release(zfs_handle_t *zhp, const char *snapname, const char *tag,
|
||||
nvlist_t *errors = NULL;
|
||||
nvpair_t *elem;
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
ha.nvl = fnvlist_alloc();
|
||||
ha.snapname = snapname;
|
||||
@ -5108,7 +5110,7 @@ zfs_get_fsacl(zfs_handle_t *zhp, nvlist_t **nvl)
|
||||
int nvsz = 2048;
|
||||
void *nvbuf;
|
||||
int err = 0;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
assert(zhp->zfs_type == ZFS_TYPE_VOLUME ||
|
||||
zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
|
||||
@ -5172,7 +5174,7 @@ zfs_set_fsacl(zfs_handle_t *zhp, boolean_t un, nvlist_t *nvl)
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
char *nvbuf;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
size_t nvsz;
|
||||
int err;
|
||||
|
||||
@ -5224,7 +5226,7 @@ int
|
||||
zfs_get_holds(zfs_handle_t *zhp, nvlist_t **nvl)
|
||||
{
|
||||
int err;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
err = lzc_get_holds(zhp->zfs_name, nvl);
|
||||
|
||||
|
@ -709,7 +709,7 @@ zfs_show_diffs(zfs_handle_t *zhp, int outfd, const char *fromsnap,
|
||||
const char *tosnap, int flags)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
differ_info_t di = { 0 };
|
||||
pthread_t tid;
|
||||
int pipefd[2];
|
||||
|
@ -44,6 +44,8 @@
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#define ERRBUFLEN 1024
|
||||
|
||||
struct libzfs_handle {
|
||||
int libzfs_error;
|
||||
int libzfs_fd;
|
||||
@ -208,7 +210,7 @@ typedef struct differ_info {
|
||||
char *ds;
|
||||
char *dsmnt;
|
||||
char *tmpsnap;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
boolean_t isclone;
|
||||
boolean_t scripted;
|
||||
boolean_t classify;
|
||||
|
@ -776,7 +776,7 @@ zpool_set_prop(zpool_handle_t *zhp, const char *propname, const char *propval)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
int ret = -1;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *nvl = NULL;
|
||||
nvlist_t *realprops;
|
||||
uint64_t version;
|
||||
@ -854,7 +854,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
|
||||
for (i = 0; i < SPA_FEATURES; i++) {
|
||||
zprop_list_t *entry = zfs_alloc(hdl,
|
||||
sizeof (zprop_list_t));
|
||||
entry->pl_prop = ZPROP_INVAL;
|
||||
entry->pl_prop = ZPROP_USERPROP;
|
||||
entry->pl_user_prop = zfs_asprintf(hdl, "feature@%s",
|
||||
spa_feature_table[i].fi_uname);
|
||||
entry->pl_width = strlen(entry->pl_user_prop);
|
||||
@ -898,7 +898,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
|
||||
}
|
||||
|
||||
entry = zfs_alloc(hdl, sizeof (zprop_list_t));
|
||||
entry->pl_prop = ZPROP_INVAL;
|
||||
entry->pl_prop = ZPROP_USERPROP;
|
||||
entry->pl_user_prop = propname;
|
||||
entry->pl_width = strlen(entry->pl_user_prop);
|
||||
entry->pl_all = B_TRUE;
|
||||
@ -911,7 +911,7 @@ zpool_expand_proplist(zpool_handle_t *zhp, zprop_list_t **plp,
|
||||
if (entry->pl_fixed && !literal)
|
||||
continue;
|
||||
|
||||
if (entry->pl_prop != ZPROP_INVAL &&
|
||||
if (entry->pl_prop != ZPROP_USERPROP &&
|
||||
zpool_get_prop(zhp, entry->pl_prop, buf, sizeof (buf),
|
||||
NULL, literal) == 0) {
|
||||
if (strlen(buf) > entry->pl_width)
|
||||
@ -967,7 +967,7 @@ vdev_expand_proplist(zpool_handle_t *zhp, const char *vdevname,
|
||||
|
||||
/* Skip properties that are not user defined */
|
||||
if ((prop = vdev_name_to_prop(propname)) !=
|
||||
VDEV_PROP_USER)
|
||||
VDEV_PROP_USERPROP)
|
||||
continue;
|
||||
|
||||
if (nvpair_value_nvlist(elem, &propval) != 0)
|
||||
@ -1368,14 +1368,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
nvlist_t *hidden_args = NULL;
|
||||
uint8_t *wkeydata = NULL;
|
||||
uint_t wkeylen = 0;
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int ret = -1;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot create '%s'"), pool);
|
||||
|
||||
if (!zpool_name_valid(hdl, B_FALSE, pool))
|
||||
return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
|
||||
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
|
||||
|
||||
zcmd_write_conf_nvlist(hdl, &zc, nvroot);
|
||||
|
||||
@ -1383,7 +1383,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
prop_flags_t flags = { .create = B_TRUE, .import = B_FALSE };
|
||||
|
||||
if ((zc_props = zpool_valid_proplist(hdl, pool, props,
|
||||
SPA_VERSION_1, flags, msg)) == NULL) {
|
||||
SPA_VERSION_1, flags, errbuf)) == NULL) {
|
||||
goto create_failed;
|
||||
}
|
||||
}
|
||||
@ -1397,7 +1397,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
strcmp(zonestr, "on") == 0);
|
||||
|
||||
if ((zc_fsprops = zfs_valid_proplist(hdl, ZFS_TYPE_FILESYSTEM,
|
||||
fsprops, zoned, NULL, NULL, B_TRUE, msg)) == NULL) {
|
||||
fsprops, zoned, NULL, NULL, B_TRUE, errbuf)) == NULL) {
|
||||
goto create_failed;
|
||||
}
|
||||
|
||||
@ -1407,7 +1407,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"%s property requires a special vdev"),
|
||||
zfs_prop_to_name(ZFS_PROP_SPECIAL_SMALL_BLOCKS));
|
||||
(void) zfs_error(hdl, EZFS_BADPROP, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADPROP, errbuf);
|
||||
goto create_failed;
|
||||
}
|
||||
|
||||
@ -1417,7 +1417,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
}
|
||||
if (zfs_crypto_create(hdl, NULL, zc_fsprops, props, B_TRUE,
|
||||
&wkeydata, &wkeylen) != 0) {
|
||||
zfs_error(hdl, EZFS_CRYPTOFAILED, msg);
|
||||
zfs_error(hdl, EZFS_CRYPTOFAILED, errbuf);
|
||||
goto create_failed;
|
||||
}
|
||||
if (nvlist_add_nvlist(zc_props,
|
||||
@ -1465,7 +1465,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
"one or more vdevs refer to the same device, or "
|
||||
"one of\nthe devices is part of an active md or "
|
||||
"lvm device"));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, msg));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, errbuf));
|
||||
|
||||
case ERANGE:
|
||||
/*
|
||||
@ -1480,7 +1480,7 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
*/
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"record size invalid"));
|
||||
return (zfs_error(hdl, EZFS_BADPROP, msg));
|
||||
return (zfs_error(hdl, EZFS_BADPROP, errbuf));
|
||||
|
||||
case EOVERFLOW:
|
||||
/*
|
||||
@ -1499,12 +1499,12 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
"one or more devices is less than the "
|
||||
"minimum size (%s)"), buf);
|
||||
}
|
||||
return (zfs_error(hdl, EZFS_BADDEV, msg));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, errbuf));
|
||||
|
||||
case ENOSPC:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"one or more devices is out of space"));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, msg));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, errbuf));
|
||||
|
||||
case EINVAL:
|
||||
if (zpool_has_draid_vdev(nvroot) &&
|
||||
@ -1512,13 +1512,14 @@ zpool_create(libzfs_handle_t *hdl, const char *pool, nvlist_t *nvroot,
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"dRAID vdevs are unsupported by the "
|
||||
"kernel"));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, msg));
|
||||
return (zfs_error(hdl, EZFS_BADDEV, errbuf));
|
||||
} else {
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno,
|
||||
errbuf));
|
||||
}
|
||||
|
||||
default:
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
}
|
||||
|
||||
@ -1542,7 +1543,7 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
zfs_handle_t *zfp = NULL;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
if (zhp->zpool_state == POOL_STATE_ACTIVE &&
|
||||
(zfp = zfs_open(hdl, zhp->zpool_name, ZFS_TYPE_FILESYSTEM)) == NULL)
|
||||
@ -1552,15 +1553,15 @@ zpool_destroy(zpool_handle_t *zhp, const char *log_str)
|
||||
zc.zc_history = (uint64_t)(uintptr_t)log_str;
|
||||
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_POOL_DESTROY, &zc) != 0) {
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot destroy '%s'"), zhp->zpool_name);
|
||||
|
||||
if (errno == EROFS) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"one or more devices is read only"));
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
} else {
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
|
||||
if (zfp)
|
||||
@ -1583,14 +1584,14 @@ int
|
||||
zpool_checkpoint(zpool_handle_t *zhp)
|
||||
{
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int error;
|
||||
|
||||
error = lzc_pool_checkpoint(zhp->zpool_name);
|
||||
if (error != 0) {
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot checkpoint '%s'"), zhp->zpool_name);
|
||||
(void) zpool_standard_error(hdl, error, msg);
|
||||
(void) zpool_standard_error(hdl, error, errbuf);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -1604,14 +1605,14 @@ int
|
||||
zpool_discard_checkpoint(zpool_handle_t *zhp)
|
||||
{
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int error;
|
||||
|
||||
error = lzc_pool_checkpoint_discard(zhp->zpool_name);
|
||||
if (error != 0) {
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot discard checkpoint in '%s'"), zhp->zpool_name);
|
||||
(void) zpool_standard_error(hdl, error, msg);
|
||||
(void) zpool_standard_error(hdl, error, errbuf);
|
||||
return (-1);
|
||||
}
|
||||
|
||||
@ -1628,11 +1629,11 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
int ret;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t **spares, **l2cache;
|
||||
uint_t nspares, nl2cache;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot add to '%s'"), zhp->zpool_name);
|
||||
|
||||
if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
|
||||
@ -1641,7 +1642,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
&spares, &nspares) == 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
|
||||
"upgraded to add hot spares"));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, msg));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
|
||||
}
|
||||
|
||||
if (zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL) <
|
||||
@ -1650,7 +1651,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
&l2cache, &nl2cache) == 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "pool must be "
|
||||
"upgraded to add cache devices"));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, msg));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
|
||||
}
|
||||
|
||||
zcmd_write_conf_nvlist(hdl, &zc, nvroot);
|
||||
@ -1667,7 +1668,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
*/
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"one or more vdevs refer to the same device"));
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case EINVAL:
|
||||
@ -1684,7 +1685,7 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
"raidz or dRAID vdevs"));
|
||||
}
|
||||
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case EOVERFLOW:
|
||||
@ -1704,17 +1705,17 @@ zpool_add(zpool_handle_t *zhp, nvlist_t *nvroot)
|
||||
"device is less than the minimum "
|
||||
"size (%s)"), buf);
|
||||
}
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case ENOTSUP:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool must be upgraded to add these vdevs"));
|
||||
(void) zfs_error(hdl, EZFS_BADVERSION, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADVERSION, errbuf);
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
|
||||
ret = -1;
|
||||
@ -2009,7 +2010,7 @@ zpool_import_props(libzfs_handle_t *hdl, nvlist_t *config, const char *newname,
|
||||
char *origname;
|
||||
int ret;
|
||||
int error = 0;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
origname = fnvlist_lookup_string(config, ZPOOL_CONFIG_POOL_NAME);
|
||||
|
||||
@ -2516,11 +2517,11 @@ zpool_trim(zpool_handle_t *zhp, pool_trim_func_t cmd_type, nvlist_t *vds,
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "operation failed"));
|
||||
zpool_standard_error(zhp->zpool_hdl, err, msg);
|
||||
zpool_standard_error(zhp->zpool_hdl, err, errbuf);
|
||||
retval = -1;
|
||||
goto out;
|
||||
}
|
||||
@ -2545,7 +2546,7 @@ int
|
||||
zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int err;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
@ -2568,21 +2569,22 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
|
||||
|
||||
if (func == POOL_SCAN_SCRUB) {
|
||||
if (cmd == POOL_SCRUB_PAUSE) {
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
"cannot pause scrubbing %s"), zc.zc_name);
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot pause scrubbing %s"),
|
||||
zc.zc_name);
|
||||
} else {
|
||||
assert(cmd == POOL_SCRUB_NORMAL);
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
"cannot scrub %s"), zc.zc_name);
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot scrub %s"),
|
||||
zc.zc_name);
|
||||
}
|
||||
} else if (func == POOL_SCAN_RESILVER) {
|
||||
assert(cmd == POOL_SCRUB_NORMAL);
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot restart resilver on %s"), zc.zc_name);
|
||||
} else if (func == POOL_SCAN_NONE) {
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
dgettext(TEXT_DOMAIN, "cannot cancel scrubbing %s"),
|
||||
zc.zc_name);
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot cancel scrubbing %s"), zc.zc_name);
|
||||
} else {
|
||||
assert(!"unexpected result");
|
||||
}
|
||||
@ -2599,18 +2601,19 @@ zpool_scan(zpool_handle_t *zhp, pool_scan_func_t func, pool_scrub_cmd_t cmd)
|
||||
if (ps && ps->pss_func == POOL_SCAN_SCRUB &&
|
||||
ps->pss_state == DSS_SCANNING) {
|
||||
if (cmd == POOL_SCRUB_PAUSE)
|
||||
return (zfs_error(hdl, EZFS_SCRUB_PAUSED, msg));
|
||||
return (zfs_error(hdl, EZFS_SCRUB_PAUSED,
|
||||
errbuf));
|
||||
else
|
||||
return (zfs_error(hdl, EZFS_SCRUBBING, msg));
|
||||
return (zfs_error(hdl, EZFS_SCRUBBING, errbuf));
|
||||
} else {
|
||||
return (zfs_error(hdl, EZFS_RESILVERING, msg));
|
||||
return (zfs_error(hdl, EZFS_RESILVERING, errbuf));
|
||||
}
|
||||
} else if (err == ENOENT) {
|
||||
return (zfs_error(hdl, EZFS_NO_SCRUB, msg));
|
||||
return (zfs_error(hdl, EZFS_NO_SCRUB, errbuf));
|
||||
} else if (err == ENOTSUP && func == POOL_SCAN_RESILVER) {
|
||||
return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, msg));
|
||||
return (zfs_error(hdl, EZFS_NO_RESILVER_DEFER, errbuf));
|
||||
} else {
|
||||
return (zpool_standard_error(hdl, err, msg));
|
||||
return (zpool_standard_error(hdl, err, errbuf));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3087,28 +3090,28 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
||||
vdev_state_t *newstate)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache, islog;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
if (flags & ZFS_ONLINE_EXPAND) {
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot expand %s"), path);
|
||||
} else {
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot online %s"), path);
|
||||
}
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
&islog)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
#ifndef __FreeBSD__
|
||||
char *pathname;
|
||||
@ -3126,7 +3129,7 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
||||
if (l2cache) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"cannot expand cache devices"));
|
||||
return (zfs_error(hdl, EZFS_VDEVNOTSUP, msg));
|
||||
return (zfs_error(hdl, EZFS_VDEVNOTSUP, errbuf));
|
||||
}
|
||||
|
||||
if (wholedisk) {
|
||||
@ -3139,12 +3142,12 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
||||
sizeof (buf));
|
||||
if (error != 0)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE,
|
||||
msg));
|
||||
errbuf));
|
||||
|
||||
fullpath = buf;
|
||||
}
|
||||
|
||||
error = zpool_relabel_disk(hdl, fullpath, msg);
|
||||
error = zpool_relabel_disk(hdl, fullpath, errbuf);
|
||||
if (error != 0)
|
||||
return (error);
|
||||
}
|
||||
@ -3159,9 +3162,9 @@ zpool_vdev_online(zpool_handle_t *zhp, const char *path, int flags,
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "was split "
|
||||
"from this pool into a new one. Use '%s' "
|
||||
"instead"), "zpool detach");
|
||||
return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, msg));
|
||||
return (zfs_error(hdl, EZFS_POSTSPLIT_ONLINE, errbuf));
|
||||
}
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
*newstate = zc.zc_cookie;
|
||||
@ -3175,23 +3178,23 @@ int
|
||||
zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot offline %s"), path);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
NULL)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
zc.zc_cookie = VDEV_STATE_OFFLINE;
|
||||
zc.zc_obj = istmp ? ZFS_OFFLINE_TEMPORARY : 0;
|
||||
@ -3205,16 +3208,16 @@ zpool_vdev_offline(zpool_handle_t *zhp, const char *path, boolean_t istmp)
|
||||
/*
|
||||
* There are no other replicas of this device.
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));
|
||||
|
||||
case EEXIST:
|
||||
/*
|
||||
* The log device has unplayed logs
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, msg));
|
||||
return (zfs_error(hdl, EZFS_UNPLAYED_LOGS, errbuf));
|
||||
|
||||
default:
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
}
|
||||
|
||||
@ -3225,10 +3228,10 @@ int
|
||||
zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot fault %llu"), (u_longlong_t)guid);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
@ -3245,10 +3248,10 @@ zpool_vdev_fault(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
|
||||
/*
|
||||
* There are no other replicas of this device.
|
||||
*/
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, msg));
|
||||
return (zfs_error(hdl, EZFS_NOREPLICAS, errbuf));
|
||||
|
||||
default:
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
}
|
||||
@ -3260,10 +3263,10 @@ int
|
||||
zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot degrade %llu"), (u_longlong_t)guid);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
@ -3274,7 +3277,7 @@ zpool_vdev_degrade(zpool_handle_t *zhp, uint64_t guid, vdev_aux_t aux)
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SET_STATE, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -3312,7 +3315,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
const char *new_disk, nvlist_t *nvroot, int replacing, boolean_t rebuild)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int ret;
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache, islog;
|
||||
@ -3324,22 +3327,22 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
if (replacing)
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot replace %s with %s"), old_disk, new_disk);
|
||||
else
|
||||
(void) snprintf(msg, sizeof (msg), dgettext(TEXT_DOMAIN,
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot attach %s to %s"), new_disk, old_disk);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, old_disk, &avail_spare, &l2cache,
|
||||
&islog)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
if (l2cache)
|
||||
return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
zc.zc_cookie = replacing;
|
||||
@ -3349,14 +3352,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
zfeature_lookup_guid("org.openzfs:device_rebuild", NULL) != 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"the loaded zfs module doesn't support device rebuilds"));
|
||||
return (zfs_error(hdl, EZFS_POOL_NOTSUP, msg));
|
||||
return (zfs_error(hdl, EZFS_POOL_NOTSUP, errbuf));
|
||||
}
|
||||
|
||||
if (nvlist_lookup_nvlist_array(nvroot, ZPOOL_CONFIG_CHILDREN,
|
||||
&child, &children) != 0 || children != 1) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"new device must be a single disk"));
|
||||
return (zfs_error(hdl, EZFS_INVALCONFIG, msg));
|
||||
return (zfs_error(hdl, EZFS_INVALCONFIG, errbuf));
|
||||
}
|
||||
|
||||
config_root = fnvlist_lookup_nvlist(zpool_get_config(zhp, NULL),
|
||||
@ -3377,7 +3380,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"can only be replaced by another hot spare"));
|
||||
free(newname);
|
||||
return (zfs_error(hdl, EZFS_BADTARGET, msg));
|
||||
return (zfs_error(hdl, EZFS_BADTARGET, errbuf));
|
||||
}
|
||||
|
||||
free(newname);
|
||||
@ -3435,7 +3438,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
"disks"));
|
||||
}
|
||||
}
|
||||
(void) zfs_error(hdl, EZFS_BADTARGET, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
|
||||
break;
|
||||
|
||||
case EINVAL:
|
||||
@ -3444,14 +3447,14 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
*/
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"new device must be a single disk"));
|
||||
(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
|
||||
(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
|
||||
break;
|
||||
|
||||
case EBUSY:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "%s is busy, "
|
||||
"or device removal is in progress"),
|
||||
new_disk);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case EOVERFLOW:
|
||||
@ -3460,7 +3463,7 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
*/
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"device is too small"));
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case EDOM:
|
||||
@ -3470,18 +3473,18 @@ zpool_vdev_attach(zpool_handle_t *zhp, const char *old_disk,
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"new device has a different optimal sector size; use the "
|
||||
"option '-o ashift=N' to override the optimal size"));
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADDEV, errbuf);
|
||||
break;
|
||||
|
||||
case ENAMETOOLONG:
|
||||
/*
|
||||
* The resulting top-level vdev spec won't fit in the label.
|
||||
*/
|
||||
(void) zfs_error(hdl, EZFS_DEVOVERFLOW, msg);
|
||||
(void) zfs_error(hdl, EZFS_DEVOVERFLOW, errbuf);
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
@ -3494,24 +3497,24 @@ int
|
||||
zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot detach %s"), path);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
NULL)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
if (l2cache)
|
||||
return (zfs_error(hdl, EZFS_ISL2CACHE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISL2CACHE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
|
||||
@ -3526,18 +3529,18 @@ zpool_vdev_detach(zpool_handle_t *zhp, const char *path)
|
||||
*/
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "only "
|
||||
"applicable to mirror and replacing vdevs"));
|
||||
(void) zfs_error(hdl, EZFS_BADTARGET, msg);
|
||||
(void) zfs_error(hdl, EZFS_BADTARGET, errbuf);
|
||||
break;
|
||||
|
||||
case EBUSY:
|
||||
/*
|
||||
* There are no other replicas of this device.
|
||||
*/
|
||||
(void) zfs_error(hdl, EZFS_NOREPLICAS, msg);
|
||||
(void) zfs_error(hdl, EZFS_NOREPLICAS, errbuf);
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
|
||||
return (-1);
|
||||
@ -3592,7 +3595,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
nvlist_t *props, splitflags_t flags)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024], *bias;
|
||||
char errbuf[ERRBUFLEN], *bias;
|
||||
nvlist_t *tree, *config, **child, **newchild, *newconfig = NULL;
|
||||
nvlist_t **varray = NULL, *zc_props = NULL;
|
||||
uint_t c, children, newchildren, lastlog = 0, vcount, found = 0;
|
||||
@ -3601,11 +3604,11 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
boolean_t freelist = B_FALSE, memory_err = B_TRUE;
|
||||
int retval = 0;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "Unable to split %s"), zhp->zpool_name);
|
||||
|
||||
if (!zpool_name_valid(hdl, B_FALSE, newname))
|
||||
return (zfs_error(hdl, EZFS_INVALIDNAME, msg));
|
||||
return (zfs_error(hdl, EZFS_INVALIDNAME, errbuf));
|
||||
|
||||
if ((config = zpool_get_config(zhp, NULL)) == NULL) {
|
||||
(void) fprintf(stderr, gettext("Internal error: unable to "
|
||||
@ -3619,7 +3622,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
if (props) {
|
||||
prop_flags_t flags = { .create = B_FALSE, .import = B_TRUE };
|
||||
if ((zc_props = zpool_valid_proplist(hdl, zhp->zpool_name,
|
||||
props, vers, flags, msg)) == NULL)
|
||||
props, vers, flags, errbuf)) == NULL)
|
||||
return (-1);
|
||||
(void) nvlist_lookup_uint64(zc_props,
|
||||
zpool_prop_to_name(ZPOOL_PROP_READONLY), &readonly);
|
||||
@ -3691,7 +3694,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
} else if (strcmp(type, VDEV_TYPE_MIRROR) != 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Source pool must be composed only of mirrors\n"));
|
||||
retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
|
||||
retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -3739,7 +3742,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
if (found != newchildren) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN, "Device list must "
|
||||
"include at most one disk from each mirror"));
|
||||
retval = zfs_error(hdl, EZFS_INVALCONFIG, msg);
|
||||
retval = zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -3793,7 +3796,7 @@ zpool_vdev_split(zpool_handle_t *zhp, char *newname, nvlist_t **newroot,
|
||||
zcmd_write_src_nvlist(hdl, &zc, zc_props);
|
||||
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_SPLIT, &zc) != 0) {
|
||||
retval = zpool_standard_error(hdl, errno, msg);
|
||||
retval = zpool_standard_error(hdl, errno, errbuf);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -3832,31 +3835,31 @@ int
|
||||
zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache, islog;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
uint64_t version;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot remove %s"), path);
|
||||
|
||||
if (zpool_is_draid_spare(path)) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"dRAID spares cannot be removed"));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
}
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
&islog)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
version = zpool_get_prop_int(zhp, ZPOOL_PROP_VERSION, NULL);
|
||||
if (islog && version < SPA_VERSION_HOLES) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pool must be upgraded to support log removal"));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, msg));
|
||||
return (zfs_error(hdl, EZFS_BADVERSION, errbuf));
|
||||
}
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
@ -3870,7 +3873,7 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"invalid config; all top-level vdevs must "
|
||||
"have the same sector size and not be raidz."));
|
||||
(void) zfs_error(hdl, EZFS_INVALCONFIG, msg);
|
||||
(void) zfs_error(hdl, EZFS_INVALCONFIG, errbuf);
|
||||
break;
|
||||
|
||||
case EBUSY:
|
||||
@ -3881,21 +3884,21 @@ zpool_vdev_remove(zpool_handle_t *zhp, const char *path)
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Pool busy; removal may already be in progress"));
|
||||
}
|
||||
(void) zfs_error(hdl, EZFS_BUSY, msg);
|
||||
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
|
||||
break;
|
||||
|
||||
case EACCES:
|
||||
if (islog) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"Mount encrypted datasets to replay logs."));
|
||||
(void) zfs_error(hdl, EZFS_BUSY, msg);
|
||||
(void) zfs_error(hdl, EZFS_BUSY, errbuf);
|
||||
} else {
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
(void) zpool_standard_error(hdl, errno, msg);
|
||||
(void) zpool_standard_error(hdl, errno, errbuf);
|
||||
}
|
||||
return (-1);
|
||||
}
|
||||
@ -3904,10 +3907,10 @@ int
|
||||
zpool_vdev_remove_cancel(zpool_handle_t *zhp)
|
||||
{
|
||||
zfs_cmd_t zc = {{0}};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot cancel removal"));
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
@ -3916,25 +3919,25 @@ zpool_vdev_remove_cancel(zpool_handle_t *zhp)
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_VDEV_REMOVE, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
int
|
||||
zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
|
||||
uint64_t *sizep)
|
||||
{
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
boolean_t avail_spare, l2cache, islog;
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot determine indirect size of %s"),
|
||||
path);
|
||||
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare, &l2cache,
|
||||
&islog)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
if (avail_spare || l2cache || islog) {
|
||||
*sizep = 0;
|
||||
@ -3944,7 +3947,7 @@ zpool_vdev_indirect_size(zpool_handle_t *zhp, const char *path,
|
||||
if (nvlist_lookup_uint64(tgt, ZPOOL_CONFIG_INDIRECT_SIZE, sizep) != 0) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"indirect size not available"));
|
||||
return (zfs_error(hdl, EINVAL, msg));
|
||||
return (zfs_error(hdl, EINVAL, errbuf));
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
@ -3956,7 +3959,7 @@ int
|
||||
zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *tgt;
|
||||
zpool_load_policy_t policy;
|
||||
boolean_t avail_spare, l2cache;
|
||||
@ -3965,11 +3968,11 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
|
||||
int error;
|
||||
|
||||
if (path)
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
|
||||
path);
|
||||
else
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot clear errors for %s"),
|
||||
zhp->zpool_name);
|
||||
|
||||
@ -3977,14 +3980,14 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
|
||||
if (path) {
|
||||
if ((tgt = zpool_find_vdev(zhp, path, &avail_spare,
|
||||
&l2cache, NULL)) == NULL)
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, msg));
|
||||
return (zfs_error(hdl, EZFS_NODEVICE, errbuf));
|
||||
|
||||
/*
|
||||
* Don't allow error clearing for hot spares. Do allow
|
||||
* error clearing for l2cache devices.
|
||||
*/
|
||||
if (avail_spare)
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, msg));
|
||||
return (zfs_error(hdl, EZFS_ISSPARE, errbuf));
|
||||
|
||||
zc.zc_guid = fnvlist_lookup_uint64(tgt, ZPOOL_CONFIG_GUID);
|
||||
}
|
||||
@ -4014,7 +4017,7 @@ zpool_clear(zpool_handle_t *zhp, const char *path, nvlist_t *rewindnvl)
|
||||
}
|
||||
|
||||
zcmd_free_nvlists(&zc);
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4024,10 +4027,10 @@ int
|
||||
zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
|
||||
{
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot clear errors for %llx"),
|
||||
(u_longlong_t)guid);
|
||||
|
||||
@ -4038,7 +4041,7 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_CLEAR, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4047,18 +4050,18 @@ zpool_vdev_clear(zpool_handle_t *zhp, uint64_t guid)
|
||||
int
|
||||
zpool_reguid(zpool_handle_t *zhp)
|
||||
{
|
||||
char msg[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
libzfs_handle_t *hdl = zhp->zpool_hdl;
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
|
||||
(void) snprintf(msg, sizeof (msg),
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot reguid '%s'"), zhp->zpool_name);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zpool_name, sizeof (zc.zc_name));
|
||||
if (zfs_ioctl(hdl, ZFS_IOC_POOL_REGUID, &zc) == 0)
|
||||
return (0);
|
||||
|
||||
return (zpool_standard_error(hdl, errno, msg));
|
||||
return (zpool_standard_error(hdl, errno, errbuf));
|
||||
}
|
||||
|
||||
/*
|
||||
@ -4998,7 +5001,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)
|
||||
|
||||
verify(zhp != NULL);
|
||||
if (zpool_get_state(zhp) == POOL_STATE_UNAVAIL) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "pool is in an unavailable state"));
|
||||
return (zfs_error(zhp->zpool_hdl, EZFS_POOLUNAVAIL, errbuf));
|
||||
@ -5006,7 +5009,7 @@ zpool_vdev_guid(zpool_handle_t *zhp, const char *vdevname, uint64_t *vdev_guid)
|
||||
|
||||
if ((tgt = zpool_find_vdev(zhp, vdevname, &avail_spare, &l2cache,
|
||||
NULL)) == NULL) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "can not find %s in %s"),
|
||||
vdevname, zhp->zpool_name);
|
||||
@ -5030,7 +5033,7 @@ zpool_get_vdev_prop_value(nvlist_t *nvprop, vdev_prop_t prop, char *prop_name,
|
||||
uint64_t intval;
|
||||
zprop_source_t src = ZPROP_SRC_NONE;
|
||||
|
||||
if (prop == VDEV_PROP_USER) {
|
||||
if (prop == VDEV_PROP_USERPROP) {
|
||||
/* user property, prop_name must contain the property name */
|
||||
assert(prop_name != NULL);
|
||||
if (nvlist_lookup_nvlist(nvprop, prop_name, &nv) == 0) {
|
||||
@ -5192,7 +5195,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,
|
||||
|
||||
fnvlist_add_uint64(reqnvl, ZPOOL_VDEV_PROPS_GET_VDEV, vdev_guid);
|
||||
|
||||
if (prop != VDEV_PROP_USER) {
|
||||
if (prop != VDEV_PROP_USERPROP) {
|
||||
/* prop_name overrides prop value */
|
||||
if (prop_name != NULL)
|
||||
prop = vdev_name_to_prop(prop_name);
|
||||
@ -5216,7 +5219,7 @@ zpool_get_vdev_prop(zpool_handle_t *zhp, const char *vdevname, vdev_prop_t prop,
|
||||
ret = zpool_get_vdev_prop_value(retprops, prop, prop_name, buf,
|
||||
len, srctype, literal);
|
||||
} else {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot get vdev property %s from"
|
||||
" %s in %s"), prop_name, vdevname, zhp->zpool_name);
|
||||
@ -5254,7 +5257,7 @@ zpool_get_all_vdev_props(zpool_handle_t *zhp, const char *vdevname,
|
||||
nvlist_free(nvl);
|
||||
|
||||
if (ret) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot get vdev properties for"
|
||||
" %s in %s"), vdevname, zhp->zpool_name);
|
||||
@ -5295,7 +5298,7 @@ zpool_set_vdev_prop(zpool_handle_t *zhp, const char *vdevname,
|
||||
return (no_memory(zhp->zpool_hdl));
|
||||
}
|
||||
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot set property %s for %s on %s"),
|
||||
propname, vdevname, zhp->zpool_name);
|
||||
|
@ -734,7 +734,7 @@ zfs_send_space(zfs_handle_t *zhp, const char *snapname, const char *from,
|
||||
if (error == 0)
|
||||
return (0);
|
||||
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"warning: cannot estimate space for '%s'"), snapname);
|
||||
|
||||
@ -804,7 +804,7 @@ dump_ioctl(zfs_handle_t *zhp, const char *fromsnap, uint64_t fromsnap_obj,
|
||||
}
|
||||
|
||||
if (zfs_ioctl(zhp->zfs_hdl, ZFS_IOC_SEND, &zc) != 0) {
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
int error = errno;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), "%s '%s'",
|
||||
@ -1615,7 +1615,7 @@ find_redact_book(libzfs_handle_t *hdl, const char *path,
|
||||
const uint64_t *redact_snap_guids, int num_redact_snaps,
|
||||
char **bookname)
|
||||
{
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *bmarks;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
@ -1679,7 +1679,7 @@ static int
|
||||
zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||
int outfd, nvlist_t *resume_nvl)
|
||||
{
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
char *toname;
|
||||
char *fromname = NULL;
|
||||
uint64_t resumeobj, resumeoff, toguid, fromguid, bytes;
|
||||
@ -1827,7 +1827,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
|
||||
if (flags->progress && send_progress_thread_exit(hdl, tid))
|
||||
return (-1);
|
||||
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"warning: cannot send '%s'"), zhp->zfs_name);
|
||||
|
||||
@ -1907,7 +1907,7 @@ zfs_send_resume(libzfs_handle_t *hdl, sendflags_t *flags, int outfd,
|
||||
const char *resume_token)
|
||||
{
|
||||
int ret;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
nvlist_t *resume_nvl;
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
@ -1938,7 +1938,7 @@ zfs_send_saved(zfs_handle_t *zhp, sendflags_t *flags, int outfd,
|
||||
uint64_t saved_guid = 0, resume_guid = 0;
|
||||
uint64_t obj = 0, off = 0, bytes = 0;
|
||||
char token_buf[ZFS_MAXPROPLEN];
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"saved send failed"));
|
||||
@ -2062,7 +2062,7 @@ send_prelim_records(zfs_handle_t *zhp, const char *from, int fd,
|
||||
/* short name of snap we are sending */
|
||||
char *tosnap = "";
|
||||
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"warning: cannot send '%s'"), zhp->zfs_name);
|
||||
if (zhp->zfs_type == ZFS_TYPE_FILESYSTEM && zfs_prop_get_int(zhp,
|
||||
@ -2187,7 +2187,7 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
sendflags_t *flags, int outfd, snapfilter_cb_t filter_func,
|
||||
void *cb_arg, nvlist_t **debugnvp)
|
||||
{
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
send_dump_data_t sdd = { 0 };
|
||||
int err = 0;
|
||||
nvlist_t *fss = NULL;
|
||||
@ -2366,9 +2366,9 @@ zfs_send_cb_impl(zfs_handle_t *zhp, const char *fromsnap, const char *tosnap,
|
||||
* there was some error, because it might not be totally
|
||||
* failed.
|
||||
*/
|
||||
err = send_conclusion_record(outfd, NULL);
|
||||
if (err != 0)
|
||||
return (zfs_standard_error(zhp->zfs_hdl, err, errbuf));
|
||||
int err2 = send_conclusion_record(outfd, NULL);
|
||||
if (err2 != 0)
|
||||
return (zfs_standard_error(zhp->zfs_hdl, err2, errbuf));
|
||||
}
|
||||
|
||||
return (err || sdd.err);
|
||||
@ -2510,7 +2510,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
|
||||
pthread_t ptid;
|
||||
progress_arg_t pa = { 0 };
|
||||
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"warning: cannot send '%s'"), name);
|
||||
|
||||
@ -3654,7 +3654,7 @@ zfs_receive_package(libzfs_handle_t *hdl, int fd, const char *destname,
|
||||
char *cp;
|
||||
char tofs[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char sendfs[ZFS_MAX_DATASET_NAME_LEN];
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
dmu_replay_record_t drre;
|
||||
int error;
|
||||
boolean_t anyerr = B_FALSE;
|
||||
@ -3871,7 +3871,7 @@ recv_skip(libzfs_handle_t *hdl, int fd, boolean_t byteswap)
|
||||
dmu_replay_record_t *drr;
|
||||
void *buf = zfs_alloc(hdl, SPA_MAXBLOCKSIZE);
|
||||
uint64_t payload_size;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
(void) snprintf(errbuf, sizeof (errbuf), dgettext(TEXT_DOMAIN,
|
||||
"cannot receive"));
|
||||
@ -4239,7 +4239,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
|
||||
int ioctl_err, ioctl_errno, err;
|
||||
char *cp;
|
||||
struct drr_begin *drrb = &drr->drr_u.drr_begin;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
const char *chopprefix;
|
||||
boolean_t newfs = B_FALSE;
|
||||
boolean_t stream_wantsnewfs, stream_resumingnewfs;
|
||||
@ -5107,7 +5107,7 @@ zfs_receive_checkprops(libzfs_handle_t *hdl, nvlist_t *props,
|
||||
name = nvpair_name(nvp);
|
||||
prop = zfs_name_to_prop(name);
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (!zfs_prop_user(name)) {
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"%s: invalid property '%s'"), errbuf, name);
|
||||
@ -5151,7 +5151,7 @@ zfs_receive_impl(libzfs_handle_t *hdl, const char *tosnap,
|
||||
int err;
|
||||
dmu_replay_record_t drr, drr_noswap;
|
||||
struct drr_begin *drrb = &drr.drr_u.drr_begin;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
zio_cksum_t zcksum = { { 0 } };
|
||||
uint64_t featureflags;
|
||||
int hdrtype;
|
||||
|
@ -299,6 +299,9 @@ libzfs_error_description(libzfs_handle_t *hdl)
|
||||
case EZFS_VDEV_NOTSUP:
|
||||
return (dgettext(TEXT_DOMAIN, "operation not supported "
|
||||
"on this type of vdev"));
|
||||
case EZFS_NOT_USER_NAMESPACE:
|
||||
return (dgettext(TEXT_DOMAIN, "the provided file "
|
||||
"was not a user namespace file"));
|
||||
case EZFS_UNKNOWN:
|
||||
return (dgettext(TEXT_DOMAIN, "unknown error"));
|
||||
default:
|
||||
@ -485,6 +488,9 @@ zfs_standard_error_fmt(libzfs_handle_t *hdl, int error, const char *fmt, ...)
|
||||
case ZFS_ERR_BADPROP:
|
||||
zfs_verror(hdl, EZFS_BADPROP, fmt, ap);
|
||||
break;
|
||||
case ZFS_ERR_NOT_USER_NAMESPACE:
|
||||
zfs_verror(hdl, EZFS_NOT_USER_NAMESPACE, fmt, ap);
|
||||
break;
|
||||
default:
|
||||
zfs_error_aux(hdl, "%s", strerror(error));
|
||||
zfs_verror(hdl, EZFS_UNKNOWN, fmt, ap);
|
||||
@ -1276,7 +1282,7 @@ zprop_print_headers(zprop_get_cbdata_t *cbp, zfs_type_t type)
|
||||
/*
|
||||
* 'PROPERTY' column
|
||||
*/
|
||||
if (pl->pl_prop != ZPROP_INVAL) {
|
||||
if (pl->pl_prop != ZPROP_USERPROP) {
|
||||
const char *propname = (type == ZFS_TYPE_POOL) ?
|
||||
zpool_prop_to_name(pl->pl_prop) :
|
||||
((type == ZFS_TYPE_VDEV) ?
|
||||
@ -1749,7 +1755,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
|
||||
* Return failure if no property table entry was found and this isn't
|
||||
* a user-defined property.
|
||||
*/
|
||||
if (prop == ZPROP_INVAL && ((type == ZFS_TYPE_POOL &&
|
||||
if (prop == ZPROP_USERPROP && ((type == ZFS_TYPE_POOL &&
|
||||
!zpool_prop_feature(propname) &&
|
||||
!zpool_prop_unsupported(propname)) ||
|
||||
((type == ZFS_TYPE_DATASET) && !zfs_prop_user(propname) &&
|
||||
@ -1764,7 +1770,7 @@ addlist(libzfs_handle_t *hdl, const char *propname, zprop_list_t **listp,
|
||||
zprop_list_t *entry = zfs_alloc(hdl, sizeof (*entry));
|
||||
|
||||
entry->pl_prop = prop;
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
entry->pl_user_prop = zfs_strdup(hdl, propname);
|
||||
entry->pl_width = strlen(propname);
|
||||
} else {
|
||||
|
@ -193,8 +193,6 @@ execvpe(const char *name, char * const argv[], char * const envp[])
|
||||
return (execvPe(name, path, argv, envp));
|
||||
}
|
||||
|
||||
#define ERRBUFLEN 1024
|
||||
|
||||
static __thread char errbuf[ERRBUFLEN];
|
||||
|
||||
const char *
|
||||
|
@ -216,7 +216,7 @@ zpool_label_disk(libzfs_handle_t *hdl, zpool_handle_t *zhp, const char *name)
|
||||
size_t resv = EFI_MIN_RESV_SIZE;
|
||||
uint64_t slice_size;
|
||||
diskaddr_t start_block;
|
||||
char errbuf[1024];
|
||||
char errbuf[ERRBUFLEN];
|
||||
|
||||
/* prepare an error message just in case */
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
|
@ -19,6 +19,9 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <alloca.h>
|
||||
#include <errno.h>
|
||||
@ -207,3 +210,71 @@ zfs_version_kernel(void)
|
||||
ret[read - 1] = '\0';
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Add or delete the given filesystem to/from the given user namespace.
|
||||
*/
|
||||
int
|
||||
zfs_userns(zfs_handle_t *zhp, const char *nspath, int attach)
|
||||
{
|
||||
libzfs_handle_t *hdl = zhp->zfs_hdl;
|
||||
zfs_cmd_t zc = {"\0"};
|
||||
char errbuf[1024];
|
||||
unsigned long cmd;
|
||||
int ret;
|
||||
|
||||
if (attach) {
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot add '%s' to namespace"),
|
||||
zhp->zfs_name);
|
||||
} else {
|
||||
(void) snprintf(errbuf, sizeof (errbuf),
|
||||
dgettext(TEXT_DOMAIN, "cannot remove '%s' from namespace"),
|
||||
zhp->zfs_name);
|
||||
}
|
||||
|
||||
switch (zhp->zfs_type) {
|
||||
case ZFS_TYPE_VOLUME:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"volumes can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_SNAPSHOT:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"snapshots can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_BOOKMARK:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"bookmarks can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_VDEV:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"vdevs can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_INVALID:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"invalid zfs_type_t: ZFS_TYPE_INVALID"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_POOL:
|
||||
zfs_error_aux(hdl, dgettext(TEXT_DOMAIN,
|
||||
"pools can not be namespaced"));
|
||||
return (zfs_error(hdl, EZFS_BADTYPE, errbuf));
|
||||
case ZFS_TYPE_FILESYSTEM:
|
||||
zfs_fallthrough;
|
||||
}
|
||||
assert(zhp->zfs_type == ZFS_TYPE_FILESYSTEM);
|
||||
|
||||
(void) strlcpy(zc.zc_name, zhp->zfs_name, sizeof (zc.zc_name));
|
||||
zc.zc_objset_type = DMU_OST_ZFS;
|
||||
zc.zc_cleanup_fd = open(nspath, O_RDONLY);
|
||||
if (zc.zc_cleanup_fd < 0) {
|
||||
return (zfs_error(hdl, EZFS_NOT_USER_NAMESPACE, errbuf));
|
||||
}
|
||||
|
||||
cmd = attach ? ZFS_IOC_USERNS_ATTACH : ZFS_IOC_USERNS_DETACH;
|
||||
if ((ret = zfs_ioctl(hdl, cmd, &zc)) != 0)
|
||||
zfs_standard_error(hdl, errno, errbuf);
|
||||
|
||||
(void) close(zc.zc_cleanup_fd);
|
||||
|
||||
return (ret);
|
||||
}
|
||||
|
@ -939,7 +939,7 @@
|
||||
</function-decl>
|
||||
</abi-instr>
|
||||
<abi-instr address-size='64' path='os/linux/zone.c' language='LANG_C99'>
|
||||
<typedef-decl name='zoneid_t' type-id='95e97e5e' id='4da03624'/>
|
||||
<typedef-decl name='zoneid_t' type-id='3502e3ff' id='4da03624'/>
|
||||
<function-decl name='getzoneid' mangled-name='getzoneid' visibility='default' binding='global' size-in-bits='64' elf-symbol-id='getzoneid'>
|
||||
<return type-id='4da03624'/>
|
||||
</function-decl>
|
||||
|
@ -67,6 +67,7 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/zfs/abd.c \
|
||||
module/zfs/aggsum.c \
|
||||
module/zfs/arc.c \
|
||||
module/zfs/blake3_zfs.c \
|
||||
module/zfs/blkptr.c \
|
||||
module/zfs/bplist.c \
|
||||
module/zfs/bpobj.c \
|
||||
@ -171,6 +172,7 @@ nodist_libzpool_la_SOURCES = \
|
||||
module/zfs/zcp_synctask.c \
|
||||
module/zfs/zfeature.c \
|
||||
module/zfs/zfs_byteswap.c \
|
||||
module/zfs/zfs_chksum.c \
|
||||
module/zfs/zfs_fm.c \
|
||||
module/zfs/zfs_fuid.c \
|
||||
module/zfs/zfs_ratelimit.c \
|
||||
|
@ -59,9 +59,11 @@ dist_man_MANS = \
|
||||
%D%/man8/zfs-unjail.8 \
|
||||
%D%/man8/zfs-unload-key.8 \
|
||||
%D%/man8/zfs-unmount.8 \
|
||||
%D%/man8/zfs-unzone.8 \
|
||||
%D%/man8/zfs-upgrade.8 \
|
||||
%D%/man8/zfs-userspace.8 \
|
||||
%D%/man8/zfs-wait.8 \
|
||||
%D%/man8/zfs-zone.8 \
|
||||
%D%/man8/zfs_ids_to_path.8 \
|
||||
%D%/man8/zgenhostid.8 \
|
||||
%D%/man8/zinject.8 \
|
||||
|
@ -2248,9 +2248,74 @@ for each I/O submitter.
|
||||
When unset, requests are handled asynchronously by a thread pool.
|
||||
The number of requests which can be handled concurrently is controlled by
|
||||
.Sy zvol_threads .
|
||||
.Sy zvol_request_sync
|
||||
is ignored when running on a kernel that supports block multiqueue
|
||||
.Pq Li blk-mq .
|
||||
.
|
||||
.It Sy zvol_threads Ns = Ns Sy 32 Pq uint
|
||||
Max number of threads which can handle zvol I/O requests concurrently.
|
||||
.It Sy zvol_threads Ns = Ns Sy 0 Pq uint
|
||||
The number of system wide threads to use for processing zvol block IOs.
|
||||
If
|
||||
.Sy 0
|
||||
(the default) then internally set
|
||||
.Sy zvol_threads
|
||||
to the number of CPUs present or 32 (whichever is greater).
|
||||
.
|
||||
.It Sy zvol_blk_mq_threads Ns = Ns Sy 0 Pq uint
|
||||
The number of threads per zvol to use for queuing IO requests.
|
||||
This parameter will only appear if your kernel supports
|
||||
.Li blk-mq
|
||||
and is only read and assigned to a zvol at zvol load time.
|
||||
If
|
||||
.Sy 0
|
||||
(the default) then internally set
|
||||
.Sy zvol_blk_mq_threads
|
||||
to the number of CPUs present.
|
||||
.
|
||||
.It Sy zvol_use_blk_mq Ns = Ns Sy 0 Ns | Ns 1 Pq uint
|
||||
Set to
|
||||
.Sy 1
|
||||
to use the
|
||||
.Li blk-mq
|
||||
API for zvols.
|
||||
Set to
|
||||
.Sy 0
|
||||
(the default) to use the legacy zvol APIs.
|
||||
This setting can give better or worse zvol performance depending on
|
||||
the workload.
|
||||
This parameter will only appear if your kernel supports
|
||||
.Li blk-mq
|
||||
and is only read and assigned to a zvol at zvol load time.
|
||||
.
|
||||
.It Sy zvol_blk_mq_blocks_per_thread Ns = Ns Sy 8 Pq uint
|
||||
If
|
||||
.Sy zvol_use_blk_mq
|
||||
is enabled, then process this number of
|
||||
.Sy volblocksize Ns -sized blocks per zvol thread.
|
||||
This tunable can be use to favor better performance for zvol reads (lower
|
||||
values) or writes (higher values).
|
||||
If set to
|
||||
.Sy 0 ,
|
||||
then the zvol layer will process the maximum number of blocks
|
||||
per thread that it can.
|
||||
This parameter will only appear if your kernel supports
|
||||
.Li blk-mq
|
||||
and is only applied at each zvol's load time.
|
||||
.
|
||||
.It Sy zvol_blk_mq_queue_depth Ns = Ns Sy 0 Pq uint
|
||||
The queue_depth value for the zvol
|
||||
.Li blk-mq
|
||||
interface.
|
||||
This parameter will only appear if your kernel supports
|
||||
.Li blk-mq
|
||||
and is only applied at each zvol's load time.
|
||||
If
|
||||
.Sy 0
|
||||
(the default) then use the kernel's default queue depth.
|
||||
Values are clamped to the kernel's
|
||||
.Dv BLKDEV_MIN_RQ
|
||||
and
|
||||
.Dv BLKDEV_MAX_RQ Ns / Ns Dv BLKDEV_DEFAULT_RQ
|
||||
limits.
|
||||
.
|
||||
.It Sy zvol_volmode Ns = Ns Sy 1 Pq uint
|
||||
Defines zvol block devices behaviour when
|
||||
|
@ -743,7 +743,7 @@ This property is not inherited.
|
||||
.It Xo
|
||||
.Sy checksum Ns = Ns Sy on Ns | Ns Sy off Ns | Ns Sy fletcher2 Ns | Ns
|
||||
.Sy fletcher4 Ns | Ns Sy sha256 Ns | Ns Sy noparity Ns | Ns
|
||||
.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr
|
||||
.Sy sha512 Ns | Ns Sy skein Ns | Ns Sy edonr Ns | Ns Sy blake3
|
||||
.Xc
|
||||
Controls the checksum used to verify data integrity.
|
||||
The default value is
|
||||
@ -768,8 +768,9 @@ a recommended practice.
|
||||
The
|
||||
.Sy sha512 ,
|
||||
.Sy skein ,
|
||||
.Sy edonr ,
|
||||
and
|
||||
.Sy edonr
|
||||
.Sy blake3
|
||||
checksum algorithms require enabling the appropriate features on the pool.
|
||||
.Pp
|
||||
Please see
|
||||
@ -984,7 +985,7 @@ mount options.
|
||||
.It Xo
|
||||
.Sy dedup Ns = Ns Sy off Ns | Ns Sy on Ns | Ns Sy verify Ns | Ns
|
||||
.Sy sha256 Ns Oo , Ns Sy verify Oc Ns | Ns Sy sha512 Ns Oo , Ns Sy verify Oc Ns | Ns Sy skein Ns Oo , Ns Sy verify Oc Ns | Ns
|
||||
.Sy edonr , Ns Sy verify
|
||||
.Sy edonr , Ns Sy verify Ns | Ns Sy blake3 Ns Oo , Ns Sy verify Oc Ns
|
||||
.Xc
|
||||
Configures deduplication for a dataset.
|
||||
The default value is
|
||||
@ -1884,8 +1885,7 @@ feature and are not relevant on other platforms.
|
||||
The default value is
|
||||
.Sy off .
|
||||
.It Sy zoned Ns = Ns Sy on Ns | Ns Sy off
|
||||
Controls whether the dataset is managed from a non-global zone.
|
||||
Zones are a Solaris feature and are not relevant on other platforms.
|
||||
Controls whether the dataset is managed from a non-global zone or namespace.
|
||||
The default value is
|
||||
.Sy off .
|
||||
.El
|
||||
|
@ -326,6 +326,12 @@ while
|
||||
.Sy freeing
|
||||
is non-zero.
|
||||
.
|
||||
.feature org.openzfs blake3 no extensible_dataset
|
||||
This feature enables the use of the BLAKE3 hash algorithm for checksum and dedup.
|
||||
BLAKE3 is a secure hash algorithm focused on high performance.
|
||||
.Pp
|
||||
.checksum-spiel blake3
|
||||
.
|
||||
.feature com.delphix bookmarks yes extensible_dataset
|
||||
This feature enables use of the
|
||||
.Nm zfs Cm bookmark
|
||||
@ -436,6 +442,8 @@ in ZFS, which means that the checksum is pre-seeded with a secret
|
||||
to be checksummed.
|
||||
Thus the produced checksums are unique to a given pool,
|
||||
preventing hash collision attacks on systems with dedup.
|
||||
.Pp
|
||||
.checksum-spiel edonr
|
||||
.
|
||||
.feature com.delphix embedded_data no
|
||||
This feature improves the performance and compression ratio of
|
||||
|
1
sys/contrib/openzfs/man/man8/zfs-unzone.8
Symbolic link
1
sys/contrib/openzfs/man/man8/zfs-unzone.8
Symbolic link
@ -0,0 +1 @@
|
||||
zfs-zone.8
|
116
sys/contrib/openzfs/man/man8/zfs-zone.8
Normal file
116
sys/contrib/openzfs/man/man8/zfs-zone.8
Normal file
@ -0,0 +1,116 @@
|
||||
.\"
|
||||
.\" CDDL HEADER START
|
||||
.\"
|
||||
.\" The contents of this file are subject to the terms of the
|
||||
.\" Common Development and Distribution License (the "License").
|
||||
.\" You may not use this file except in compliance with the License.
|
||||
.\"
|
||||
.\" You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
.\" or http://www.opensolaris.org/os/licensing.
|
||||
.\" See the License for the specific language governing permissions
|
||||
.\" and limitations under the License.
|
||||
.\"
|
||||
.\" When distributing Covered Code, include this CDDL HEADER in each
|
||||
.\" file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
.\" If applicable, add the following below this CDDL HEADER, with the
|
||||
.\" fields enclosed by brackets "[]" replaced with your own identifying
|
||||
.\" information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.\" CDDL HEADER END
|
||||
.\"
|
||||
.\" Copyright (c) 2009 Sun Microsystems, Inc. All Rights Reserved.
|
||||
.\" Copyright 2011 Joshua M. Clulow <josh@sysmgr.org>
|
||||
.\" Copyright (c) 2011, 2019 by Delphix. All rights reserved.
|
||||
.\" Copyright (c) 2011, Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
||||
.\" Copyright (c) 2012, Glen Barber <gjb@FreeBSD.org>
|
||||
.\" Copyright (c) 2012, Bryan Drewery <bdrewery@FreeBSD.org>
|
||||
.\" Copyright (c) 2013, Steven Hartland <smh@FreeBSD.org>
|
||||
.\" Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
|
||||
.\" Copyright (c) 2014, Joyent, Inc. All rights reserved.
|
||||
.\" Copyright (c) 2014 by Adam Stevko. All rights reserved.
|
||||
.\" Copyright (c) 2014 Integros [integros.com]
|
||||
.\" Copyright (c) 2014, Xin LI <delphij@FreeBSD.org>
|
||||
.\" Copyright (c) 2014-2015, The FreeBSD Foundation, All Rights Reserved.
|
||||
.\" Copyright (c) 2016 Nexenta Systems, Inc. All Rights Reserved.
|
||||
.\" Copyright 2019 Richard Laager. All rights reserved.
|
||||
.\" Copyright 2018 Nexenta Systems, Inc.
|
||||
.\" Copyright 2019 Joyent, Inc.
|
||||
.\" Copyright 2021 Klara, Inc.
|
||||
.\"
|
||||
.Dd June 3, 2022
|
||||
.Dt ZFS-ZONE 8
|
||||
.Os
|
||||
.
|
||||
.Sh NAME
|
||||
.Nm zfs-zone ,
|
||||
.Nm zfs-unzone
|
||||
.Nd attach and detach ZFS filesystems to user namespaces
|
||||
.Sh SYNOPSIS
|
||||
.Nm zfs Cm zone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Nm zfs Cm unzone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.
|
||||
.Sh DESCRIPTION
|
||||
.Bl -tag -width ""
|
||||
.It Xo
|
||||
.Nm zfs
|
||||
.Cm zone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Xc
|
||||
Attach the specified
|
||||
.Ar filesystem
|
||||
to the user namespace identified by
|
||||
.Ar nsfile .
|
||||
From now on this file system tree can be managed from within a user namespace
|
||||
if the
|
||||
.Sy zoned
|
||||
property has been set.
|
||||
.Pp
|
||||
You cannot attach a zoned dataset's children to another user namespace.
|
||||
You can also not attach the root file system
|
||||
of the user namespace or any dataset
|
||||
which needs to be mounted before the zfs service
|
||||
is run inside the user namespace,
|
||||
as it would be attached unmounted until it is
|
||||
mounted from the service inside the user namespace.
|
||||
.Pp
|
||||
To allow management of the dataset from within a user namespace, the
|
||||
.Sy zoned
|
||||
property has to be set and the user namespaces needs access to the
|
||||
.Pa /dev/zfs
|
||||
device.
|
||||
The
|
||||
.Sy quota
|
||||
property cannot be changed from within a user namespace.
|
||||
.Pp
|
||||
After a dataset is attached to a user namespace and the
|
||||
.Sy zoned
|
||||
property is set,
|
||||
a zoned file system cannot be mounted outside the user namespace,
|
||||
since the user namespace administrator might have set the mount point
|
||||
to an unacceptable value.
|
||||
.It Xo
|
||||
.Nm zfs
|
||||
.Cm unzone
|
||||
.Ar nsfile
|
||||
.Ar filesystem
|
||||
.Xc
|
||||
Detach the specified
|
||||
.Ar filesystem
|
||||
from the user namespace identified by
|
||||
.Ar nsfile .
|
||||
.El
|
||||
.Sh EXAMPLES
|
||||
.Ss Example 1 : No Delegating a Dataset to a User Namespace
|
||||
The following example delegates the
|
||||
.Ar tank/users
|
||||
dataset to a user namespace identified by user namespace file
|
||||
.Pa /proc/1234/ns/user .
|
||||
.Dl # Nm zfs Cm zone Ar /proc/1234/ns/user Ar tank/users
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr zfsprops 7
|
@ -84,8 +84,29 @@ with no flags on the relevant target devices.
|
||||
.It Fl w , -wait
|
||||
Wait until the devices are done being trimmed before returning.
|
||||
.El
|
||||
.Sh PERIODIC TRIM
|
||||
On machines using systemd, trim timers can be enabled on a per-pool basis.
|
||||
.Nm weekly
|
||||
and
|
||||
.Nm monthly
|
||||
timer units are provided.
|
||||
.Bl -tag -width Ds
|
||||
.It Xo
|
||||
.Xc
|
||||
.Nm systemctl
|
||||
.Cm enable
|
||||
.Cm zfs-trim-\fIweekly\fB@\fIrpool\fB.timer
|
||||
.Cm --now
|
||||
.It Xo
|
||||
.Xc
|
||||
.Nm systemctl
|
||||
.Cm enable
|
||||
.Cm zfs-trim-\fImonthly\fB@\fIotherpool\fB.timer
|
||||
.Cm --now
|
||||
.El
|
||||
.
|
||||
.Sh SEE ALSO
|
||||
.Xr systemd.timer 5 ,
|
||||
.Xr zpoolprops 7 ,
|
||||
.Xr zpool-initialize 8 ,
|
||||
.Xr zpool-wait 8
|
||||
|
@ -65,7 +65,8 @@ SPL_OBJS := \
|
||||
spl-tsd.o \
|
||||
spl-vmem.o \
|
||||
spl-xdr.o \
|
||||
spl-zlib.o
|
||||
spl-zlib.o \
|
||||
spl-zone.o
|
||||
|
||||
spl-objs += $(addprefix os/linux/spl/,$(SPL_OBJS))
|
||||
|
||||
@ -75,6 +76,10 @@ ICP_OBJS := \
|
||||
algs/aes/aes_impl.o \
|
||||
algs/aes/aes_impl_generic.o \
|
||||
algs/aes/aes_modes.o \
|
||||
algs/blake3/blake3.o \
|
||||
algs/blake3/blake3_generic.o \
|
||||
algs/blake3/blake3_impl.o \
|
||||
algs/blake3/blake3_x86-64.o \
|
||||
algs/edonr/edonr.o \
|
||||
algs/modes/cbc.o \
|
||||
algs/modes/ccm.o \
|
||||
@ -105,23 +110,45 @@ ICP_OBJS_X86_64 := \
|
||||
asm-x86_64/aes/aes_aesni.o \
|
||||
asm-x86_64/aes/aes_amd64.o \
|
||||
asm-x86_64/aes/aeskey.o \
|
||||
asm-x86_64/blake3/blake3_avx2.o \
|
||||
asm-x86_64/blake3/blake3_avx512.o \
|
||||
asm-x86_64/blake3/blake3_sse2.o \
|
||||
asm-x86_64/blake3/blake3_sse41.o \
|
||||
asm-x86_64/modes/aesni-gcm-x86_64.o \
|
||||
asm-x86_64/modes/gcm_pclmulqdq.o \
|
||||
asm-x86_64/modes/ghash-x86_64.o \
|
||||
asm-x86_64/sha2/sha256_impl.o \
|
||||
asm-x86_64/sha2/sha512_impl.o
|
||||
|
||||
|
||||
ICP_OBJS_X86 := \
|
||||
algs/aes/aes_impl_aesni.o \
|
||||
algs/aes/aes_impl_x86-64.o \
|
||||
algs/modes/gcm_pclmulqdq.o
|
||||
|
||||
|
||||
ICP_OBJS_ARM64 := \
|
||||
asm-aarch64/blake3/b3_aarch64_sse2.o \
|
||||
asm-aarch64/blake3/b3_aarch64_sse41.o
|
||||
|
||||
|
||||
ICP_OBJS_PPC_PPC64 := \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse2.o \
|
||||
asm-ppc64/blake3/b3_ppc64le_sse41.o
|
||||
|
||||
zfs-objs += $(addprefix icp/,$(ICP_OBJS))
|
||||
zfs-$(CONFIG_X86) += $(addprefix icp/,$(ICP_OBJS_X86))
|
||||
zfs-$(CONFIG_UML_X86)+= $(addprefix icp/,$(ICP_OBJS_X86))
|
||||
zfs-$(CONFIG_X86_64) += $(addprefix icp/,$(ICP_OBJS_X86_64))
|
||||
zfs-$(CONFIG_ARM64) += $(addprefix icp/,$(ICP_OBJS_ARM64))
|
||||
zfs-$(CONFIG_PPC) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
|
||||
zfs-$(CONFIG_PPC64) += $(addprefix icp/,$(ICP_OBJS_PPC_PPC64))
|
||||
|
||||
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : asflags-y += -I$(icp_include)
|
||||
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflags-y += -I$(icp_include)
|
||||
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
|
||||
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : asflags-y += -I$(icp_include)
|
||||
|
||||
$(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64) \
|
||||
$(ICP_OBJS_ARM64) $(ICP_OBJS_PPC_PPC64)) : ccflags-y += -I$(icp_include)
|
||||
|
||||
# Suppress objtool "can't find jump dest instruction at" warnings. They
|
||||
# are caused by the constants which are defined in the text section of the
|
||||
@ -129,6 +156,7 @@ $(addprefix $(obj)/icp/,$(ICP_OBJS) $(ICP_OBJS_X86) $(ICP_OBJS_X86_64)) : ccflag
|
||||
# utility tries to interpret them as opcodes and obviously fails doing so.
|
||||
OBJECT_FILES_NON_STANDARD_aesni-gcm-x86_64.o := y
|
||||
OBJECT_FILES_NON_STANDARD_ghash-x86_64.o := y
|
||||
|
||||
# Suppress objtool "unsupported stack pointer realignment" warnings. We are
|
||||
# not using a DRAP register while aligning the stack to a 64 byte boundary.
|
||||
# See #6950 for the reasoning.
|
||||
@ -205,6 +233,7 @@ ZCOMMON_OBJS_ARM64 := \
|
||||
|
||||
zfs-objs += $(addprefix zcommon/,$(ZCOMMON_OBJS))
|
||||
zfs-$(CONFIG_X86) += $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
|
||||
zfs-$(CONFIG_UML_X86)+= $(addprefix zcommon/,$(ZCOMMON_OBJS_X86))
|
||||
zfs-$(CONFIG_ARM64) += $(addprefix zcommon/,$(ZCOMMON_OBJS_ARM64))
|
||||
|
||||
|
||||
@ -261,6 +290,7 @@ ZFS_OBJS := \
|
||||
abd.o \
|
||||
aggsum.o \
|
||||
arc.o \
|
||||
blake3_zfs.o \
|
||||
blkptr.o \
|
||||
bplist.o \
|
||||
bpobj.o \
|
||||
@ -358,6 +388,7 @@ ZFS_OBJS := \
|
||||
zcp_synctask.o \
|
||||
zfeature.o \
|
||||
zfs_byteswap.o \
|
||||
zfs_chksum.o \
|
||||
zfs_fm.o \
|
||||
zfs_fuid.o \
|
||||
zfs_ioctl.o \
|
||||
@ -428,6 +459,7 @@ ZFS_OBJS_PPC_PPC64 := \
|
||||
|
||||
zfs-objs += $(addprefix zfs/,$(ZFS_OBJS)) $(addprefix os/linux/zfs/,$(ZFS_OBJS_OS))
|
||||
zfs-$(CONFIG_X86) += $(addprefix zfs/,$(ZFS_OBJS_X86))
|
||||
zfs-$(CONFIG_UML_X86)+= $(addprefix zfs/,$(ZFS_OBJS_X86))
|
||||
zfs-$(CONFIG_ARM64) += $(addprefix zfs/,$(ZFS_OBJS_ARM64))
|
||||
zfs-$(CONFIG_PPC) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
zfs-$(CONFIG_PPC64) += $(addprefix zfs/,$(ZFS_OBJS_PPC_PPC64))
|
||||
|
@ -10,6 +10,10 @@ INCDIR=${.CURDIR:H}/include
|
||||
KMOD= openzfs
|
||||
|
||||
.PATH: ${SRCDIR}/avl \
|
||||
${SRCDIR}/icp/algs/blake3 \
|
||||
${SRCDIR}/icp/asm-aarch64/blake3 \
|
||||
${SRCDIR}/icp/asm-ppc64/blake3 \
|
||||
${SRCDIR}/icp/asm-x86_64/blake3 \
|
||||
${SRCDIR}/lua \
|
||||
${SRCDIR}/nvpair \
|
||||
${SRCDIR}/icp/algs/edonr \
|
||||
@ -31,6 +35,7 @@ CFLAGS+= -I${INCDIR}/os/freebsd
|
||||
CFLAGS+= -I${INCDIR}/os/freebsd/spl
|
||||
CFLAGS+= -I${INCDIR}/os/freebsd/zfs
|
||||
CFLAGS+= -I${SRCDIR}/zstd/include
|
||||
CFLAGS+= -I${SRCDIR}/icp/include
|
||||
CFLAGS+= -include ${INCDIR}/os/freebsd/spl/sys/ccompile.h
|
||||
|
||||
CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1 \
|
||||
@ -38,7 +43,8 @@ CFLAGS+= -D__KERNEL__ -DFREEBSD_NAMECACHE -DBUILDING_ZFS -D__BSD_VISIBLE=1 \
|
||||
-D_SYS_VMEM_H_ -DKDTRACE_HOOKS -DSMP -DCOMPAT_FREEBSD11
|
||||
|
||||
.if ${MACHINE_ARCH} == "amd64"
|
||||
CFLAGS+= -DHAVE_AVX2 -DHAVE_AVX -D__x86_64 -DHAVE_SSE2 -DHAVE_AVX512F -DHAVE_SSSE3
|
||||
CFLAGS+= -D__x86_64 -DHAVE_SSE2 -DHAVE_SSSE3 -DHAVE_SSE4_1 -DHAVE_SSE4_2 \
|
||||
-DHAVE_AVX -DHAVE_AVX2 -DHAVE_AVX512F -DHAVE_AVX512VL
|
||||
.endif
|
||||
|
||||
.if defined(WITH_DEBUG) && ${WITH_DEBUG} == "true"
|
||||
@ -73,12 +79,32 @@ CFLAGS+= -DBITS_PER_LONG=64
|
||||
|
||||
SRCS= vnode_if.h device_if.h bus_if.h
|
||||
|
||||
# avl
|
||||
#avl
|
||||
SRCS+= avl.c
|
||||
|
||||
# icp
|
||||
SRCS+= edonr.c
|
||||
|
||||
#icp/algs/blake3
|
||||
SRCS+= blake3.c \
|
||||
blake3_generic.c \
|
||||
blake3_impl.c \
|
||||
blake3_x86-64.c
|
||||
|
||||
#icp/asm-aarch64/blake3
|
||||
SRCS+= b3_aarch64_sse2.S \
|
||||
b3_aarch64_sse41.S
|
||||
|
||||
#icp/asm-ppc64/blake3
|
||||
SRCS+= b3_ppc64le_sse2.S \
|
||||
b3_ppc64le_sse41.S
|
||||
|
||||
#icp/asm-x86_64/blake3
|
||||
SRCS+= blake3_avx2.S \
|
||||
blake3_avx512.S \
|
||||
blake3_sse2.S \
|
||||
blake3_sse41.S
|
||||
|
||||
#lua
|
||||
SRCS+= lapi.c \
|
||||
lauxlib.c \
|
||||
@ -189,6 +215,7 @@ SRCS+= zfeature_common.c \
|
||||
SRCS+= abd.c \
|
||||
aggsum.c \
|
||||
arc.c \
|
||||
blake3_zfs.c \
|
||||
blkptr.c \
|
||||
bplist.c \
|
||||
bpobj.c \
|
||||
@ -291,6 +318,7 @@ SRCS+= abd.c \
|
||||
zcp_synctask.c \
|
||||
zfeature.c \
|
||||
zfs_byteswap.c \
|
||||
zfs_chksum.c \
|
||||
zfs_file_os.c \
|
||||
zfs_fm.c \
|
||||
zfs_fuid.c \
|
||||
@ -337,8 +365,6 @@ SRCS+= zfs_zstd.c \
|
||||
zstd_decompress.c \
|
||||
zstd_decompress_block.c
|
||||
|
||||
|
||||
|
||||
beforeinstall:
|
||||
.if ${MK_DEBUG_FILES} != "no"
|
||||
mtree -eu \
|
||||
|
@ -108,21 +108,6 @@
|
||||
#include <sys/cmn_err.h>
|
||||
#include <sys/mod.h>
|
||||
|
||||
/*
|
||||
* Small arrays to translate between balance (or diff) values and child indices.
|
||||
*
|
||||
* Code that deals with binary tree data structures will randomly use
|
||||
* left and right children when examining a tree. C "if()" statements
|
||||
* which evaluate randomly suffer from very poor hardware branch prediction.
|
||||
* In this code we avoid some of the branch mispredictions by using the
|
||||
* following translation arrays. They replace random branches with an
|
||||
* additional memory reference. Since the translation arrays are both very
|
||||
* small the data should remain efficiently in cache.
|
||||
*/
|
||||
static const int avl_child2balance[] = {-1, 1};
|
||||
static const int avl_balance2child[] = {0, 0, 1};
|
||||
|
||||
|
||||
/*
|
||||
* Walk from one node to the previous valued node (ie. an infix walk
|
||||
* towards the left). At any given node we do one of 2 things:
|
||||
@ -278,8 +263,7 @@ avl_find(avl_tree_t *tree, const void *value, avl_index_t *where)
|
||||
#endif
|
||||
return (AVL_NODE2DATA(node, off));
|
||||
}
|
||||
child = avl_balance2child[1 + diff];
|
||||
|
||||
child = (diff > 0);
|
||||
}
|
||||
|
||||
if (where != NULL)
|
||||
@ -527,7 +511,7 @@ avl_insert(avl_tree_t *tree, void *new_data, avl_index_t where)
|
||||
* Compute the new balance
|
||||
*/
|
||||
old_balance = AVL_XBALANCE(node);
|
||||
new_balance = old_balance + avl_child2balance[which_child];
|
||||
new_balance = old_balance + (which_child ? 1 : -1);
|
||||
|
||||
/*
|
||||
* If we introduced equal balance, then we are done immediately
|
||||
@ -693,7 +677,7 @@ avl_remove(avl_tree_t *tree, void *data)
|
||||
* choose node to swap from whichever side is taller
|
||||
*/
|
||||
old_balance = AVL_XBALANCE(delete);
|
||||
left = avl_balance2child[old_balance + 1];
|
||||
left = (old_balance > 0);
|
||||
right = 1 - left;
|
||||
|
||||
/*
|
||||
@ -777,7 +761,7 @@ avl_remove(avl_tree_t *tree, void *data)
|
||||
*/
|
||||
node = parent;
|
||||
old_balance = AVL_XBALANCE(node);
|
||||
new_balance = old_balance - avl_child2balance[which_child];
|
||||
new_balance = old_balance - (which_child ? 1 : -1);
|
||||
parent = AVL_XPARENT(node);
|
||||
which_child = AVL_XCHILD(node);
|
||||
|
||||
|
732
sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
Normal file
732
sys/contrib/openzfs/module/icp/algs/blake3/blake3.c
Normal file
@ -0,0 +1,732 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
|
||||
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/blake3.h>
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
/*
|
||||
* We need 1056 byte stack for blake3_compress_subtree_wide()
|
||||
* - we define this pragma to make gcc happy
|
||||
*/
|
||||
#if defined(__GNUC__)
|
||||
#pragma GCC diagnostic ignored "-Wframe-larger-than="
|
||||
#endif
|
||||
|
||||
/* internal used */
|
||||
typedef struct {
|
||||
uint32_t input_cv[8];
|
||||
uint64_t counter;
|
||||
uint8_t block[BLAKE3_BLOCK_LEN];
|
||||
uint8_t block_len;
|
||||
uint8_t flags;
|
||||
} output_t;
|
||||
|
||||
/* internal flags */
|
||||
enum blake3_flags {
|
||||
CHUNK_START = 1 << 0,
|
||||
CHUNK_END = 1 << 1,
|
||||
PARENT = 1 << 2,
|
||||
ROOT = 1 << 3,
|
||||
KEYED_HASH = 1 << 4,
|
||||
DERIVE_KEY_CONTEXT = 1 << 5,
|
||||
DERIVE_KEY_MATERIAL = 1 << 6,
|
||||
};
|
||||
|
||||
/* internal start */
|
||||
static void chunk_state_init(blake3_chunk_state_t *ctx,
|
||||
const uint32_t key[8], uint8_t flags)
|
||||
{
|
||||
memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
|
||||
ctx->chunk_counter = 0;
|
||||
memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
|
||||
ctx->buf_len = 0;
|
||||
ctx->blocks_compressed = 0;
|
||||
ctx->flags = flags;
|
||||
}
|
||||
|
||||
static void chunk_state_reset(blake3_chunk_state_t *ctx,
|
||||
const uint32_t key[8], uint64_t chunk_counter)
|
||||
{
|
||||
memcpy(ctx->cv, key, BLAKE3_KEY_LEN);
|
||||
ctx->chunk_counter = chunk_counter;
|
||||
ctx->blocks_compressed = 0;
|
||||
memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
|
||||
ctx->buf_len = 0;
|
||||
}
|
||||
|
||||
static size_t chunk_state_len(const blake3_chunk_state_t *ctx)
|
||||
{
|
||||
return (BLAKE3_BLOCK_LEN * (size_t)ctx->blocks_compressed) +
|
||||
((size_t)ctx->buf_len);
|
||||
}
|
||||
|
||||
static size_t chunk_state_fill_buf(blake3_chunk_state_t *ctx,
|
||||
const uint8_t *input, size_t input_len)
|
||||
{
|
||||
size_t take = BLAKE3_BLOCK_LEN - ((size_t)ctx->buf_len);
|
||||
if (take > input_len) {
|
||||
take = input_len;
|
||||
}
|
||||
uint8_t *dest = ctx->buf + ((size_t)ctx->buf_len);
|
||||
memcpy(dest, input, take);
|
||||
ctx->buf_len += (uint8_t)take;
|
||||
return (take);
|
||||
}
|
||||
|
||||
static uint8_t chunk_state_maybe_start_flag(const blake3_chunk_state_t *ctx)
|
||||
{
|
||||
if (ctx->blocks_compressed == 0) {
|
||||
return (CHUNK_START);
|
||||
} else {
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
|
||||
static output_t make_output(const uint32_t input_cv[8],
|
||||
const uint8_t *block, uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags)
|
||||
{
|
||||
output_t ret;
|
||||
memcpy(ret.input_cv, input_cv, 32);
|
||||
memcpy(ret.block, block, BLAKE3_BLOCK_LEN);
|
||||
ret.block_len = block_len;
|
||||
ret.counter = counter;
|
||||
ret.flags = flags;
|
||||
return (ret);
|
||||
}
|
||||
|
||||
/*
|
||||
* Chaining values within a given chunk (specifically the compress_in_place
|
||||
* interface) are represented as words. This avoids unnecessary bytes<->words
|
||||
* conversion overhead in the portable implementation. However, the hash_many
|
||||
* interface handles both user input and parent node blocks, so it accepts
|
||||
* bytes. For that reason, chaining values in the CV stack are represented as
|
||||
* bytes.
|
||||
*/
|
||||
static void output_chaining_value(const blake3_impl_ops_t *ops,
|
||||
const output_t *ctx, uint8_t cv[32])
|
||||
{
|
||||
uint32_t cv_words[8];
|
||||
memcpy(cv_words, ctx->input_cv, 32);
|
||||
ops->compress_in_place(cv_words, ctx->block, ctx->block_len,
|
||||
ctx->counter, ctx->flags);
|
||||
store_cv_words(cv, cv_words);
|
||||
}
|
||||
|
||||
static void output_root_bytes(const blake3_impl_ops_t *ops, const output_t *ctx,
|
||||
uint64_t seek, uint8_t *out, size_t out_len)
|
||||
{
|
||||
uint64_t output_block_counter = seek / 64;
|
||||
size_t offset_within_block = seek % 64;
|
||||
uint8_t wide_buf[64];
|
||||
while (out_len > 0) {
|
||||
ops->compress_xof(ctx->input_cv, ctx->block, ctx->block_len,
|
||||
output_block_counter, ctx->flags | ROOT, wide_buf);
|
||||
size_t available_bytes = 64 - offset_within_block;
|
||||
size_t memcpy_len;
|
||||
if (out_len > available_bytes) {
|
||||
memcpy_len = available_bytes;
|
||||
} else {
|
||||
memcpy_len = out_len;
|
||||
}
|
||||
memcpy(out, wide_buf + offset_within_block, memcpy_len);
|
||||
out += memcpy_len;
|
||||
out_len -= memcpy_len;
|
||||
output_block_counter += 1;
|
||||
offset_within_block = 0;
|
||||
}
|
||||
}
|
||||
|
||||
static void chunk_state_update(const blake3_impl_ops_t *ops,
|
||||
blake3_chunk_state_t *ctx, const uint8_t *input, size_t input_len)
|
||||
{
|
||||
if (ctx->buf_len > 0) {
|
||||
size_t take = chunk_state_fill_buf(ctx, input, input_len);
|
||||
input += take;
|
||||
input_len -= take;
|
||||
if (input_len > 0) {
|
||||
ops->compress_in_place(ctx->cv, ctx->buf,
|
||||
BLAKE3_BLOCK_LEN, ctx->chunk_counter,
|
||||
ctx->flags|chunk_state_maybe_start_flag(ctx));
|
||||
ctx->blocks_compressed += 1;
|
||||
ctx->buf_len = 0;
|
||||
memset(ctx->buf, 0, BLAKE3_BLOCK_LEN);
|
||||
}
|
||||
}
|
||||
|
||||
while (input_len > BLAKE3_BLOCK_LEN) {
|
||||
ops->compress_in_place(ctx->cv, input, BLAKE3_BLOCK_LEN,
|
||||
ctx->chunk_counter,
|
||||
ctx->flags|chunk_state_maybe_start_flag(ctx));
|
||||
ctx->blocks_compressed += 1;
|
||||
input += BLAKE3_BLOCK_LEN;
|
||||
input_len -= BLAKE3_BLOCK_LEN;
|
||||
}
|
||||
|
||||
size_t take = chunk_state_fill_buf(ctx, input, input_len);
|
||||
input += take;
|
||||
input_len -= take;
|
||||
}
|
||||
|
||||
static output_t chunk_state_output(const blake3_chunk_state_t *ctx)
|
||||
{
|
||||
uint8_t block_flags =
|
||||
ctx->flags | chunk_state_maybe_start_flag(ctx) | CHUNK_END;
|
||||
return (make_output(ctx->cv, ctx->buf, ctx->buf_len, ctx->chunk_counter,
|
||||
block_flags));
|
||||
}
|
||||
|
||||
static output_t parent_output(const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
const uint32_t key[8], uint8_t flags)
|
||||
{
|
||||
return (make_output(key, block, BLAKE3_BLOCK_LEN, 0, flags | PARENT));
|
||||
}
|
||||
|
||||
/*
|
||||
* Given some input larger than one chunk, return the number of bytes that
|
||||
* should go in the left subtree. This is the largest power-of-2 number of
|
||||
* chunks that leaves at least 1 byte for the right subtree.
|
||||
*/
|
||||
static size_t left_len(size_t content_len)
|
||||
{
|
||||
/*
|
||||
* Subtract 1 to reserve at least one byte for the right side.
|
||||
* content_len
|
||||
* should always be greater than BLAKE3_CHUNK_LEN.
|
||||
*/
|
||||
size_t full_chunks = (content_len - 1) / BLAKE3_CHUNK_LEN;
|
||||
return (round_down_to_power_of_2(full_chunks) * BLAKE3_CHUNK_LEN);
|
||||
}
|
||||
|
||||
/*
|
||||
* Use SIMD parallelism to hash up to MAX_SIMD_DEGREE chunks at the same time
|
||||
* on a single thread. Write out the chunk chaining values and return the
|
||||
* number of chunks hashed. These chunks are never the root and never empty;
|
||||
* those cases use a different codepath.
|
||||
*/
|
||||
static size_t compress_chunks_parallel(const blake3_impl_ops_t *ops,
|
||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
||||
{
|
||||
const uint8_t *chunks_array[MAX_SIMD_DEGREE];
|
||||
size_t input_position = 0;
|
||||
size_t chunks_array_len = 0;
|
||||
while (input_len - input_position >= BLAKE3_CHUNK_LEN) {
|
||||
chunks_array[chunks_array_len] = &input[input_position];
|
||||
input_position += BLAKE3_CHUNK_LEN;
|
||||
chunks_array_len += 1;
|
||||
}
|
||||
|
||||
ops->hash_many(chunks_array, chunks_array_len, BLAKE3_CHUNK_LEN /
|
||||
BLAKE3_BLOCK_LEN, key, chunk_counter, B_TRUE, flags, CHUNK_START,
|
||||
CHUNK_END, out);
|
||||
|
||||
/*
|
||||
* Hash the remaining partial chunk, if there is one. Note that the
|
||||
* empty chunk (meaning the empty message) is a different codepath.
|
||||
*/
|
||||
if (input_len > input_position) {
|
||||
uint64_t counter = chunk_counter + (uint64_t)chunks_array_len;
|
||||
blake3_chunk_state_t chunk_state;
|
||||
chunk_state_init(&chunk_state, key, flags);
|
||||
chunk_state.chunk_counter = counter;
|
||||
chunk_state_update(ops, &chunk_state, &input[input_position],
|
||||
input_len - input_position);
|
||||
output_t output = chunk_state_output(&chunk_state);
|
||||
output_chaining_value(ops, &output, &out[chunks_array_len *
|
||||
BLAKE3_OUT_LEN]);
|
||||
return (chunks_array_len + 1);
|
||||
} else {
|
||||
return (chunks_array_len);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Use SIMD parallelism to hash up to MAX_SIMD_DEGREE parents at the same time
|
||||
* on a single thread. Write out the parent chaining values and return the
|
||||
* number of parents hashed. (If there's an odd input chaining value left over,
|
||||
* return it as an additional output.) These parents are never the root and
|
||||
* never empty; those cases use a different codepath.
|
||||
*/
|
||||
static size_t compress_parents_parallel(const blake3_impl_ops_t *ops,
|
||||
const uint8_t *child_chaining_values, size_t num_chaining_values,
|
||||
const uint32_t key[8], uint8_t flags, uint8_t *out)
|
||||
{
|
||||
const uint8_t *parents_array[MAX_SIMD_DEGREE_OR_2];
|
||||
size_t parents_array_len = 0;
|
||||
|
||||
while (num_chaining_values - (2 * parents_array_len) >= 2) {
|
||||
parents_array[parents_array_len] = &child_chaining_values[2 *
|
||||
parents_array_len * BLAKE3_OUT_LEN];
|
||||
parents_array_len += 1;
|
||||
}
|
||||
|
||||
ops->hash_many(parents_array, parents_array_len, 1, key, 0, B_FALSE,
|
||||
flags | PARENT, 0, 0, out);
|
||||
|
||||
/* If there's an odd child left over, it becomes an output. */
|
||||
if (num_chaining_values > 2 * parents_array_len) {
|
||||
memcpy(&out[parents_array_len * BLAKE3_OUT_LEN],
|
||||
&child_chaining_values[2 * parents_array_len *
|
||||
BLAKE3_OUT_LEN], BLAKE3_OUT_LEN);
|
||||
return (parents_array_len + 1);
|
||||
} else {
|
||||
return (parents_array_len);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* The wide helper function returns (writes out) an array of chaining values
|
||||
* and returns the length of that array. The number of chaining values returned
|
||||
* is the dyanmically detected SIMD degree, at most MAX_SIMD_DEGREE. Or fewer,
|
||||
* if the input is shorter than that many chunks. The reason for maintaining a
|
||||
* wide array of chaining values going back up the tree, is to allow the
|
||||
* implementation to hash as many parents in parallel as possible.
|
||||
*
|
||||
* As a special case when the SIMD degree is 1, this function will still return
|
||||
* at least 2 outputs. This guarantees that this function doesn't perform the
|
||||
* root compression. (If it did, it would use the wrong flags, and also we
|
||||
* wouldn't be able to implement exendable ouput.) Note that this function is
|
||||
* not used when the whole input is only 1 chunk long; that's a different
|
||||
* codepath.
|
||||
*
|
||||
* Why not just have the caller split the input on the first update(), instead
|
||||
* of implementing this special rule? Because we don't want to limit SIMD or
|
||||
* multi-threading parallelism for that update().
|
||||
*/
|
||||
static size_t blake3_compress_subtree_wide(const blake3_impl_ops_t *ops,
|
||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||
uint64_t chunk_counter, uint8_t flags, uint8_t *out)
|
||||
{
|
||||
/*
|
||||
* Note that the single chunk case does *not* bump the SIMD degree up
|
||||
* to 2 when it is 1. If this implementation adds multi-threading in
|
||||
* the future, this gives us the option of multi-threading even the
|
||||
* 2-chunk case, which can help performance on smaller platforms.
|
||||
*/
|
||||
if (input_len <= (size_t)(ops->degree * BLAKE3_CHUNK_LEN)) {
|
||||
return (compress_chunks_parallel(ops, input, input_len, key,
|
||||
chunk_counter, flags, out));
|
||||
}
|
||||
|
||||
|
||||
/*
|
||||
* With more than simd_degree chunks, we need to recurse. Start by
|
||||
* dividing the input into left and right subtrees. (Note that this is
|
||||
* only optimal as long as the SIMD degree is a power of 2. If we ever
|
||||
* get a SIMD degree of 3 or something, we'll need a more complicated
|
||||
* strategy.)
|
||||
*/
|
||||
size_t left_input_len = left_len(input_len);
|
||||
size_t right_input_len = input_len - left_input_len;
|
||||
const uint8_t *right_input = &input[left_input_len];
|
||||
uint64_t right_chunk_counter = chunk_counter +
|
||||
(uint64_t)(left_input_len / BLAKE3_CHUNK_LEN);
|
||||
|
||||
/*
|
||||
* Make space for the child outputs. Here we use MAX_SIMD_DEGREE_OR_2
|
||||
* to account for the special case of returning 2 outputs when the
|
||||
* SIMD degree is 1.
|
||||
*/
|
||||
uint8_t cv_array[2 * MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
|
||||
size_t degree = ops->degree;
|
||||
if (left_input_len > BLAKE3_CHUNK_LEN && degree == 1) {
|
||||
|
||||
/*
|
||||
* The special case: We always use a degree of at least two,
|
||||
* to make sure there are two outputs. Except, as noted above,
|
||||
* at the chunk level, where we allow degree=1. (Note that the
|
||||
* 1-chunk-input case is a different codepath.)
|
||||
*/
|
||||
degree = 2;
|
||||
}
|
||||
uint8_t *right_cvs = &cv_array[degree * BLAKE3_OUT_LEN];
|
||||
|
||||
/*
|
||||
* Recurse! If this implementation adds multi-threading support in the
|
||||
* future, this is where it will go.
|
||||
*/
|
||||
size_t left_n = blake3_compress_subtree_wide(ops, input, left_input_len,
|
||||
key, chunk_counter, flags, cv_array);
|
||||
size_t right_n = blake3_compress_subtree_wide(ops, right_input,
|
||||
right_input_len, key, right_chunk_counter, flags, right_cvs);
|
||||
|
||||
/*
|
||||
* The special case again. If simd_degree=1, then we'll have left_n=1
|
||||
* and right_n=1. Rather than compressing them into a single output,
|
||||
* return them directly, to make sure we always have at least two
|
||||
* outputs.
|
||||
*/
|
||||
if (left_n == 1) {
|
||||
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
|
||||
return (2);
|
||||
}
|
||||
|
||||
/* Otherwise, do one layer of parent node compression. */
|
||||
size_t num_chaining_values = left_n + right_n;
|
||||
return compress_parents_parallel(ops, cv_array,
|
||||
num_chaining_values, key, flags, out);
|
||||
}
|
||||
|
||||
/*
|
||||
* Hash a subtree with compress_subtree_wide(), and then condense the resulting
|
||||
* list of chaining values down to a single parent node. Don't compress that
|
||||
* last parent node, however. Instead, return its message bytes (the
|
||||
* concatenated chaining values of its children). This is necessary when the
|
||||
* first call to update() supplies a complete subtree, because the topmost
|
||||
* parent node of that subtree could end up being the root. It's also necessary
|
||||
* for extended output in the general case.
|
||||
*
|
||||
* As with compress_subtree_wide(), this function is not used on inputs of 1
|
||||
* chunk or less. That's a different codepath.
|
||||
*/
|
||||
static void compress_subtree_to_parent_node(const blake3_impl_ops_t *ops,
|
||||
const uint8_t *input, size_t input_len, const uint32_t key[8],
|
||||
uint64_t chunk_counter, uint8_t flags, uint8_t out[2 * BLAKE3_OUT_LEN])
|
||||
{
|
||||
uint8_t cv_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN];
|
||||
size_t num_cvs = blake3_compress_subtree_wide(ops, input, input_len,
|
||||
key, chunk_counter, flags, cv_array);
|
||||
|
||||
/*
|
||||
* If MAX_SIMD_DEGREE is greater than 2 and there's enough input,
|
||||
* compress_subtree_wide() returns more than 2 chaining values. Condense
|
||||
* them into 2 by forming parent nodes repeatedly.
|
||||
*/
|
||||
uint8_t out_array[MAX_SIMD_DEGREE_OR_2 * BLAKE3_OUT_LEN / 2];
|
||||
while (num_cvs > 2) {
|
||||
num_cvs = compress_parents_parallel(ops, cv_array, num_cvs, key,
|
||||
flags, out_array);
|
||||
memcpy(cv_array, out_array, num_cvs * BLAKE3_OUT_LEN);
|
||||
}
|
||||
memcpy(out, cv_array, 2 * BLAKE3_OUT_LEN);
|
||||
}
|
||||
|
||||
static void hasher_init_base(BLAKE3_CTX *ctx, const uint32_t key[8],
|
||||
uint8_t flags)
|
||||
{
|
||||
memcpy(ctx->key, key, BLAKE3_KEY_LEN);
|
||||
chunk_state_init(&ctx->chunk, key, flags);
|
||||
ctx->cv_stack_len = 0;
|
||||
ctx->ops = blake3_impl_get_ops();
|
||||
}
|
||||
|
||||
/*
|
||||
* As described in hasher_push_cv() below, we do "lazy merging", delaying
|
||||
* merges until right before the next CV is about to be added. This is
|
||||
* different from the reference implementation. Another difference is that we
|
||||
* aren't always merging 1 chunk at a time. Instead, each CV might represent
|
||||
* any power-of-two number of chunks, as long as the smaller-above-larger
|
||||
* stack order is maintained. Instead of the "count the trailing 0-bits"
|
||||
* algorithm described in the spec, we use a "count the total number of
|
||||
* 1-bits" variant that doesn't require us to retain the subtree size of the
|
||||
* CV on top of the stack. The principle is the same: each CV that should
|
||||
* remain in the stack is represented by a 1-bit in the total number of chunks
|
||||
* (or bytes) so far.
|
||||
*/
|
||||
static void hasher_merge_cv_stack(BLAKE3_CTX *ctx, uint64_t total_len)
|
||||
{
|
||||
size_t post_merge_stack_len = (size_t)popcnt(total_len);
|
||||
while (ctx->cv_stack_len > post_merge_stack_len) {
|
||||
uint8_t *parent_node =
|
||||
&ctx->cv_stack[(ctx->cv_stack_len - 2) * BLAKE3_OUT_LEN];
|
||||
output_t output =
|
||||
parent_output(parent_node, ctx->key, ctx->chunk.flags);
|
||||
output_chaining_value(ctx->ops, &output, parent_node);
|
||||
ctx->cv_stack_len -= 1;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* In reference_impl.rs, we merge the new CV with existing CVs from the stack
|
||||
* before pushing it. We can do that because we know more input is coming, so
|
||||
* we know none of the merges are root.
|
||||
*
|
||||
* This setting is different. We want to feed as much input as possible to
|
||||
* compress_subtree_wide(), without setting aside anything for the chunk_state.
|
||||
* If the user gives us 64 KiB, we want to parallelize over all 64 KiB at once
|
||||
* as a single subtree, if at all possible.
|
||||
*
|
||||
* This leads to two problems:
|
||||
* 1) This 64 KiB input might be the only call that ever gets made to update.
|
||||
* In this case, the root node of the 64 KiB subtree would be the root node
|
||||
* of the whole tree, and it would need to be ROOT finalized. We can't
|
||||
* compress it until we know.
|
||||
* 2) This 64 KiB input might complete a larger tree, whose root node is
|
||||
* similarly going to be the the root of the whole tree. For example, maybe
|
||||
* we have 196 KiB (that is, 128 + 64) hashed so far. We can't compress the
|
||||
* node at the root of the 256 KiB subtree until we know how to finalize it.
|
||||
*
|
||||
* The second problem is solved with "lazy merging". That is, when we're about
|
||||
* to add a CV to the stack, we don't merge it with anything first, as the
|
||||
* reference impl does. Instead we do merges using the *previous* CV that was
|
||||
* added, which is sitting on top of the stack, and we put the new CV
|
||||
* (unmerged) on top of the stack afterwards. This guarantees that we never
|
||||
* merge the root node until finalize().
|
||||
*
|
||||
* Solving the first problem requires an additional tool,
|
||||
* compress_subtree_to_parent_node(). That function always returns the top
|
||||
* *two* chaining values of the subtree it's compressing. We then do lazy
|
||||
* merging with each of them separately, so that the second CV will always
|
||||
* remain unmerged. (That also helps us support extendable output when we're
|
||||
* hashing an input all-at-once.)
|
||||
*/
|
||||
static void hasher_push_cv(BLAKE3_CTX *ctx, uint8_t new_cv[BLAKE3_OUT_LEN],
|
||||
uint64_t chunk_counter)
|
||||
{
|
||||
hasher_merge_cv_stack(ctx, chunk_counter);
|
||||
memcpy(&ctx->cv_stack[ctx->cv_stack_len * BLAKE3_OUT_LEN], new_cv,
|
||||
BLAKE3_OUT_LEN);
|
||||
ctx->cv_stack_len += 1;
|
||||
}
|
||||
|
||||
void
|
||||
Blake3_Init(BLAKE3_CTX *ctx)
|
||||
{
|
||||
hasher_init_base(ctx, BLAKE3_IV, 0);
|
||||
}
|
||||
|
||||
void
|
||||
Blake3_InitKeyed(BLAKE3_CTX *ctx, const uint8_t key[BLAKE3_KEY_LEN])
|
||||
{
|
||||
uint32_t key_words[8];
|
||||
load_key_words(key, key_words);
|
||||
hasher_init_base(ctx, key_words, KEYED_HASH);
|
||||
}
|
||||
|
||||
static void
|
||||
Blake3_Update2(BLAKE3_CTX *ctx, const void *input, size_t input_len)
|
||||
{
|
||||
/*
|
||||
* Explicitly checking for zero avoids causing UB by passing a null
|
||||
* pointer to memcpy. This comes up in practice with things like:
|
||||
* std::vector<uint8_t> v;
|
||||
* blake3_hasher_update(&hasher, v.data(), v.size());
|
||||
*/
|
||||
if (input_len == 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
const uint8_t *input_bytes = (const uint8_t *)input;
|
||||
|
||||
/*
|
||||
* If we have some partial chunk bytes in the internal chunk_state, we
|
||||
* need to finish that chunk first.
|
||||
*/
|
||||
if (chunk_state_len(&ctx->chunk) > 0) {
|
||||
size_t take = BLAKE3_CHUNK_LEN - chunk_state_len(&ctx->chunk);
|
||||
if (take > input_len) {
|
||||
take = input_len;
|
||||
}
|
||||
chunk_state_update(ctx->ops, &ctx->chunk, input_bytes, take);
|
||||
input_bytes += take;
|
||||
input_len -= take;
|
||||
/*
|
||||
* If we've filled the current chunk and there's more coming,
|
||||
* finalize this chunk and proceed. In this case we know it's
|
||||
* not the root.
|
||||
*/
|
||||
if (input_len > 0) {
|
||||
output_t output = chunk_state_output(&ctx->chunk);
|
||||
uint8_t chunk_cv[32];
|
||||
output_chaining_value(ctx->ops, &output, chunk_cv);
|
||||
hasher_push_cv(ctx, chunk_cv, ctx->chunk.chunk_counter);
|
||||
chunk_state_reset(&ctx->chunk, ctx->key,
|
||||
ctx->chunk.chunk_counter + 1);
|
||||
} else {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Now the chunk_state is clear, and we have more input. If there's
|
||||
* more than a single chunk (so, definitely not the root chunk), hash
|
||||
* the largest whole subtree we can, with the full benefits of SIMD
|
||||
* (and maybe in the future, multi-threading) parallelism. Two
|
||||
* restrictions:
|
||||
* - The subtree has to be a power-of-2 number of chunks. Only
|
||||
* subtrees along the right edge can be incomplete, and we don't know
|
||||
* where the right edge is going to be until we get to finalize().
|
||||
* - The subtree must evenly divide the total number of chunks up
|
||||
* until this point (if total is not 0). If the current incomplete
|
||||
* subtree is only waiting for 1 more chunk, we can't hash a subtree
|
||||
* of 4 chunks. We have to complete the current subtree first.
|
||||
* Because we might need to break up the input to form powers of 2, or
|
||||
* to evenly divide what we already have, this part runs in a loop.
|
||||
*/
|
||||
while (input_len > BLAKE3_CHUNK_LEN) {
|
||||
size_t subtree_len = round_down_to_power_of_2(input_len);
|
||||
uint64_t count_so_far =
|
||||
ctx->chunk.chunk_counter * BLAKE3_CHUNK_LEN;
|
||||
/*
|
||||
* Shrink the subtree_len until it evenly divides the count so
|
||||
* far. We know that subtree_len itself is a power of 2, so we
|
||||
* can use a bitmasking trick instead of an actual remainder
|
||||
* operation. (Note that if the caller consistently passes
|
||||
* power-of-2 inputs of the same size, as is hopefully
|
||||
* typical, this loop condition will always fail, and
|
||||
* subtree_len will always be the full length of the input.)
|
||||
*
|
||||
* An aside: We don't have to shrink subtree_len quite this
|
||||
* much. For example, if count_so_far is 1, we could pass 2
|
||||
* chunks to compress_subtree_to_parent_node. Since we'll get
|
||||
* 2 CVs back, we'll still get the right answer in the end,
|
||||
* and we might get to use 2-way SIMD parallelism. The problem
|
||||
* with this optimization, is that it gets us stuck always
|
||||
* hashing 2 chunks. The total number of chunks will remain
|
||||
* odd, and we'll never graduate to higher degrees of
|
||||
* parallelism. See
|
||||
* https://github.com/BLAKE3-team/BLAKE3/issues/69.
|
||||
*/
|
||||
while ((((uint64_t)(subtree_len - 1)) & count_so_far) != 0) {
|
||||
subtree_len /= 2;
|
||||
}
|
||||
/*
|
||||
* The shrunken subtree_len might now be 1 chunk long. If so,
|
||||
* hash that one chunk by itself. Otherwise, compress the
|
||||
* subtree into a pair of CVs.
|
||||
*/
|
||||
uint64_t subtree_chunks = subtree_len / BLAKE3_CHUNK_LEN;
|
||||
if (subtree_len <= BLAKE3_CHUNK_LEN) {
|
||||
blake3_chunk_state_t chunk_state;
|
||||
chunk_state_init(&chunk_state, ctx->key,
|
||||
ctx->chunk.flags);
|
||||
chunk_state.chunk_counter = ctx->chunk.chunk_counter;
|
||||
chunk_state_update(ctx->ops, &chunk_state, input_bytes,
|
||||
subtree_len);
|
||||
output_t output = chunk_state_output(&chunk_state);
|
||||
uint8_t cv[BLAKE3_OUT_LEN];
|
||||
output_chaining_value(ctx->ops, &output, cv);
|
||||
hasher_push_cv(ctx, cv, chunk_state.chunk_counter);
|
||||
} else {
|
||||
/*
|
||||
* This is the high-performance happy path, though
|
||||
* getting here depends on the caller giving us a long
|
||||
* enough input.
|
||||
*/
|
||||
uint8_t cv_pair[2 * BLAKE3_OUT_LEN];
|
||||
compress_subtree_to_parent_node(ctx->ops, input_bytes,
|
||||
subtree_len, ctx->key, ctx-> chunk.chunk_counter,
|
||||
ctx->chunk.flags, cv_pair);
|
||||
hasher_push_cv(ctx, cv_pair, ctx->chunk.chunk_counter);
|
||||
hasher_push_cv(ctx, &cv_pair[BLAKE3_OUT_LEN],
|
||||
ctx->chunk.chunk_counter + (subtree_chunks / 2));
|
||||
}
|
||||
ctx->chunk.chunk_counter += subtree_chunks;
|
||||
input_bytes += subtree_len;
|
||||
input_len -= subtree_len;
|
||||
}
|
||||
|
||||
/*
|
||||
* If there's any remaining input less than a full chunk, add it to
|
||||
* the chunk state. In that case, also do a final merge loop to make
|
||||
* sure the subtree stack doesn't contain any unmerged pairs. The
|
||||
* remaining input means we know these merges are non-root. This merge
|
||||
* loop isn't strictly necessary here, because hasher_push_chunk_cv
|
||||
* already does its own merge loop, but it simplifies
|
||||
* blake3_hasher_finalize below.
|
||||
*/
|
||||
if (input_len > 0) {
|
||||
chunk_state_update(ctx->ops, &ctx->chunk, input_bytes,
|
||||
input_len);
|
||||
hasher_merge_cv_stack(ctx, ctx->chunk.chunk_counter);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Blake3_Update(BLAKE3_CTX *ctx, const void *input, size_t todo)
|
||||
{
|
||||
size_t done = 0;
|
||||
const uint8_t *data = input;
|
||||
const size_t block_max = 1024 * 64;
|
||||
|
||||
/* max feed buffer to leave the stack size small */
|
||||
while (todo != 0) {
|
||||
size_t block = (todo >= block_max) ? block_max : todo;
|
||||
Blake3_Update2(ctx, data + done, block);
|
||||
done += block;
|
||||
todo -= block;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
Blake3_Final(const BLAKE3_CTX *ctx, uint8_t *out)
|
||||
{
|
||||
Blake3_FinalSeek(ctx, 0, out, BLAKE3_OUT_LEN);
|
||||
}
|
||||
|
||||
void
|
||||
Blake3_FinalSeek(const BLAKE3_CTX *ctx, uint64_t seek, uint8_t *out,
|
||||
size_t out_len)
|
||||
{
|
||||
/*
|
||||
* Explicitly checking for zero avoids causing UB by passing a null
|
||||
* pointer to memcpy. This comes up in practice with things like:
|
||||
* std::vector<uint8_t> v;
|
||||
* blake3_hasher_finalize(&hasher, v.data(), v.size());
|
||||
*/
|
||||
if (out_len == 0) {
|
||||
return;
|
||||
}
|
||||
/* If the subtree stack is empty, then the current chunk is the root. */
|
||||
if (ctx->cv_stack_len == 0) {
|
||||
output_t output = chunk_state_output(&ctx->chunk);
|
||||
output_root_bytes(ctx->ops, &output, seek, out, out_len);
|
||||
return;
|
||||
}
|
||||
/*
|
||||
* If there are any bytes in the chunk state, finalize that chunk and
|
||||
* do a roll-up merge between that chunk hash and every subtree in the
|
||||
* stack. In this case, the extra merge loop at the end of
|
||||
* blake3_hasher_update guarantees that none of the subtrees in the
|
||||
* stack need to be merged with each other first. Otherwise, if there
|
||||
* are no bytes in the chunk state, then the top of the stack is a
|
||||
* chunk hash, and we start the merge from that.
|
||||
*/
|
||||
output_t output;
|
||||
size_t cvs_remaining;
|
||||
if (chunk_state_len(&ctx->chunk) > 0) {
|
||||
cvs_remaining = ctx->cv_stack_len;
|
||||
output = chunk_state_output(&ctx->chunk);
|
||||
} else {
|
||||
/* There are always at least 2 CVs in the stack in this case. */
|
||||
cvs_remaining = ctx->cv_stack_len - 2;
|
||||
output = parent_output(&ctx->cv_stack[cvs_remaining * 32],
|
||||
ctx->key, ctx->chunk.flags);
|
||||
}
|
||||
while (cvs_remaining > 0) {
|
||||
cvs_remaining -= 1;
|
||||
uint8_t parent_block[BLAKE3_BLOCK_LEN];
|
||||
memcpy(parent_block, &ctx->cv_stack[cvs_remaining * 32], 32);
|
||||
output_chaining_value(ctx->ops, &output, &parent_block[32]);
|
||||
output = parent_output(parent_block, ctx->key,
|
||||
ctx->chunk.flags);
|
||||
}
|
||||
output_root_bytes(ctx->ops, &output, seek, out, out_len);
|
||||
}
|
202
sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
Normal file
202
sys/contrib/openzfs/module/icp/algs/blake3/blake3_generic.c
Normal file
@ -0,0 +1,202 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
|
||||
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n))))
|
||||
static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d,
|
||||
uint32_t x, uint32_t y)
|
||||
{
|
||||
state[a] = state[a] + state[b] + x;
|
||||
state[d] = rotr32(state[d] ^ state[a], 16);
|
||||
state[c] = state[c] + state[d];
|
||||
state[b] = rotr32(state[b] ^ state[c], 12);
|
||||
state[a] = state[a] + state[b] + y;
|
||||
state[d] = rotr32(state[d] ^ state[a], 8);
|
||||
state[c] = state[c] + state[d];
|
||||
state[b] = rotr32(state[b] ^ state[c], 7);
|
||||
}
|
||||
|
||||
static inline void round_fn(uint32_t state[16], const uint32_t *msg,
|
||||
size_t round)
|
||||
{
|
||||
/* Select the message schedule based on the round. */
|
||||
const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round];
|
||||
|
||||
/* Mix the columns. */
|
||||
g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]);
|
||||
g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]);
|
||||
g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]);
|
||||
g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]);
|
||||
|
||||
/* Mix the rows. */
|
||||
g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]);
|
||||
g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]);
|
||||
g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]);
|
||||
g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]);
|
||||
}
|
||||
|
||||
static inline void compress_pre(uint32_t state[16], const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags)
|
||||
{
|
||||
uint32_t block_words[16];
|
||||
block_words[0] = load32(block + 4 * 0);
|
||||
block_words[1] = load32(block + 4 * 1);
|
||||
block_words[2] = load32(block + 4 * 2);
|
||||
block_words[3] = load32(block + 4 * 3);
|
||||
block_words[4] = load32(block + 4 * 4);
|
||||
block_words[5] = load32(block + 4 * 5);
|
||||
block_words[6] = load32(block + 4 * 6);
|
||||
block_words[7] = load32(block + 4 * 7);
|
||||
block_words[8] = load32(block + 4 * 8);
|
||||
block_words[9] = load32(block + 4 * 9);
|
||||
block_words[10] = load32(block + 4 * 10);
|
||||
block_words[11] = load32(block + 4 * 11);
|
||||
block_words[12] = load32(block + 4 * 12);
|
||||
block_words[13] = load32(block + 4 * 13);
|
||||
block_words[14] = load32(block + 4 * 14);
|
||||
block_words[15] = load32(block + 4 * 15);
|
||||
|
||||
state[0] = cv[0];
|
||||
state[1] = cv[1];
|
||||
state[2] = cv[2];
|
||||
state[3] = cv[3];
|
||||
state[4] = cv[4];
|
||||
state[5] = cv[5];
|
||||
state[6] = cv[6];
|
||||
state[7] = cv[7];
|
||||
state[8] = BLAKE3_IV[0];
|
||||
state[9] = BLAKE3_IV[1];
|
||||
state[10] = BLAKE3_IV[2];
|
||||
state[11] = BLAKE3_IV[3];
|
||||
state[12] = counter_low(counter);
|
||||
state[13] = counter_high(counter);
|
||||
state[14] = (uint32_t)block_len;
|
||||
state[15] = (uint32_t)flags;
|
||||
|
||||
round_fn(state, &block_words[0], 0);
|
||||
round_fn(state, &block_words[0], 1);
|
||||
round_fn(state, &block_words[0], 2);
|
||||
round_fn(state, &block_words[0], 3);
|
||||
round_fn(state, &block_words[0], 4);
|
||||
round_fn(state, &block_words[0], 5);
|
||||
round_fn(state, &block_words[0], 6);
|
||||
}
|
||||
|
||||
static inline void blake3_compress_in_place_generic(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags)
|
||||
{
|
||||
uint32_t state[16];
|
||||
compress_pre(state, cv, block, block_len, counter, flags);
|
||||
cv[0] = state[0] ^ state[8];
|
||||
cv[1] = state[1] ^ state[9];
|
||||
cv[2] = state[2] ^ state[10];
|
||||
cv[3] = state[3] ^ state[11];
|
||||
cv[4] = state[4] ^ state[12];
|
||||
cv[5] = state[5] ^ state[13];
|
||||
cv[6] = state[6] ^ state[14];
|
||||
cv[7] = state[7] ^ state[15];
|
||||
}
|
||||
|
||||
static inline void hash_one_generic(const uint8_t *input, size_t blocks,
|
||||
const uint32_t key[8], uint64_t counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN])
|
||||
{
|
||||
uint32_t cv[8];
|
||||
memcpy(cv, key, BLAKE3_KEY_LEN);
|
||||
uint8_t block_flags = flags | flags_start;
|
||||
while (blocks > 0) {
|
||||
if (blocks == 1) {
|
||||
block_flags |= flags_end;
|
||||
}
|
||||
blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN,
|
||||
counter, block_flags);
|
||||
input = &input[BLAKE3_BLOCK_LEN];
|
||||
blocks -= 1;
|
||||
block_flags = flags;
|
||||
}
|
||||
store_cv_words(out, cv);
|
||||
}
|
||||
|
||||
static inline void blake3_compress_xof_generic(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64])
|
||||
{
|
||||
uint32_t state[16];
|
||||
compress_pre(state, cv, block, block_len, counter, flags);
|
||||
|
||||
store32(&out[0 * 4], state[0] ^ state[8]);
|
||||
store32(&out[1 * 4], state[1] ^ state[9]);
|
||||
store32(&out[2 * 4], state[2] ^ state[10]);
|
||||
store32(&out[3 * 4], state[3] ^ state[11]);
|
||||
store32(&out[4 * 4], state[4] ^ state[12]);
|
||||
store32(&out[5 * 4], state[5] ^ state[13]);
|
||||
store32(&out[6 * 4], state[6] ^ state[14]);
|
||||
store32(&out[7 * 4], state[7] ^ state[15]);
|
||||
store32(&out[8 * 4], state[8] ^ cv[0]);
|
||||
store32(&out[9 * 4], state[9] ^ cv[1]);
|
||||
store32(&out[10 * 4], state[10] ^ cv[2]);
|
||||
store32(&out[11 * 4], state[11] ^ cv[3]);
|
||||
store32(&out[12 * 4], state[12] ^ cv[4]);
|
||||
store32(&out[13 * 4], state[13] ^ cv[5]);
|
||||
store32(&out[14 * 4], state[14] ^ cv[6]);
|
||||
store32(&out[15 * 4], state[15] ^ cv[7]);
|
||||
}
|
||||
|
||||
static inline void blake3_hash_many_generic(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter,
|
||||
boolean_t increment_counter, uint8_t flags, uint8_t flags_start,
|
||||
uint8_t flags_end, uint8_t *out)
|
||||
{
|
||||
while (num_inputs > 0) {
|
||||
hash_one_generic(inputs[0], blocks, key, counter, flags,
|
||||
flags_start, flags_end, out);
|
||||
if (increment_counter) {
|
||||
counter += 1;
|
||||
}
|
||||
inputs += 1;
|
||||
num_inputs -= 1;
|
||||
out = &out[BLAKE3_OUT_LEN];
|
||||
}
|
||||
}
|
||||
|
||||
static inline boolean_t blake3_is_generic_supported(void)
|
||||
{
|
||||
return (B_TRUE);
|
||||
}
|
||||
|
||||
const blake3_impl_ops_t blake3_generic_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_generic,
|
||||
.compress_xof = blake3_compress_xof_generic,
|
||||
.hash_many = blake3_hash_many_generic,
|
||||
.is_supported = blake3_is_generic_supported,
|
||||
.degree = 4,
|
||||
.name = "generic"
|
||||
};
|
284
sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
Normal file
284
sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.c
Normal file
@ -0,0 +1,284 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
static const blake3_impl_ops_t *const blake3_impls[] = {
|
||||
&blake3_generic_impl,
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
&blake3_sse2_impl,
|
||||
#endif
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
&blake3_sse41_impl,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
&blake3_avx2_impl,
|
||||
#endif
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
&blake3_avx512_impl,
|
||||
#endif
|
||||
};
|
||||
|
||||
/* this pointer holds current ops for implementation */
|
||||
static const blake3_impl_ops_t *blake3_selected_impl = &blake3_generic_impl;
|
||||
|
||||
/* special implementation selections */
|
||||
#define IMPL_FASTEST (UINT32_MAX)
|
||||
#define IMPL_CYCLE (UINT32_MAX-1)
|
||||
#define IMPL_USER (UINT32_MAX-2)
|
||||
#define IMPL_PARAM (UINT32_MAX-3)
|
||||
|
||||
#define IMPL_READ(i) (*(volatile uint32_t *) &(i))
|
||||
static uint32_t icp_blake3_impl = IMPL_FASTEST;
|
||||
|
||||
#define BLAKE3_IMPL_NAME_MAX 16
|
||||
|
||||
/* id of fastest implementation */
|
||||
static uint32_t blake3_fastest_id = 0;
|
||||
|
||||
/* currently used id */
|
||||
static uint32_t blake3_current_id = 0;
|
||||
|
||||
/* id of module parameter (-1 == unused) */
|
||||
static int blake3_param_id = -1;
|
||||
|
||||
/* return number of supported implementations */
|
||||
int
|
||||
blake3_get_impl_count(void)
|
||||
{
|
||||
static int impls = 0;
|
||||
int i;
|
||||
|
||||
if (impls)
|
||||
return (impls);
|
||||
|
||||
for (i = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||
if (!blake3_impls[i]->is_supported()) continue;
|
||||
impls++;
|
||||
}
|
||||
|
||||
return (impls);
|
||||
}
|
||||
|
||||
/* return id of selected implementation */
|
||||
int
|
||||
blake3_get_impl_id(void)
|
||||
{
|
||||
return (blake3_current_id);
|
||||
}
|
||||
|
||||
/* return name of selected implementation */
|
||||
const char *
|
||||
blake3_get_impl_name(void)
|
||||
{
|
||||
return (blake3_selected_impl->name);
|
||||
}
|
||||
|
||||
/* setup id as fastest implementation */
|
||||
void
|
||||
blake3_set_impl_fastest(uint32_t id)
|
||||
{
|
||||
blake3_fastest_id = id;
|
||||
}
|
||||
|
||||
/* set implementation by id */
|
||||
void
|
||||
blake3_set_impl_id(uint32_t id)
|
||||
{
|
||||
int i, cid;
|
||||
|
||||
/* select fastest */
|
||||
if (id == IMPL_FASTEST)
|
||||
id = blake3_fastest_id;
|
||||
|
||||
/* select next or first */
|
||||
if (id == IMPL_CYCLE)
|
||||
id = (++blake3_current_id) % blake3_get_impl_count();
|
||||
|
||||
/* 0..N for the real impl */
|
||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||
if (!blake3_impls[i]->is_supported()) continue;
|
||||
if (cid == id) {
|
||||
blake3_current_id = cid;
|
||||
blake3_selected_impl = blake3_impls[i];
|
||||
return;
|
||||
}
|
||||
cid++;
|
||||
}
|
||||
}
|
||||
|
||||
/* set implementation by name */
|
||||
int
|
||||
blake3_set_impl_name(const char *name)
|
||||
{
|
||||
int i, cid;
|
||||
|
||||
if (strcmp(name, "fastest") == 0) {
|
||||
atomic_swap_32(&icp_blake3_impl, IMPL_FASTEST);
|
||||
blake3_set_impl_id(IMPL_FASTEST);
|
||||
return (0);
|
||||
} else if (strcmp(name, "cycle") == 0) {
|
||||
atomic_swap_32(&icp_blake3_impl, IMPL_CYCLE);
|
||||
blake3_set_impl_id(IMPL_CYCLE);
|
||||
return (0);
|
||||
}
|
||||
|
||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||
if (!blake3_impls[i]->is_supported()) continue;
|
||||
if (strcmp(name, blake3_impls[i]->name) == 0) {
|
||||
if (icp_blake3_impl == IMPL_PARAM) {
|
||||
blake3_param_id = cid;
|
||||
return (0);
|
||||
}
|
||||
blake3_selected_impl = blake3_impls[i];
|
||||
blake3_current_id = cid;
|
||||
return (0);
|
||||
}
|
||||
cid++;
|
||||
}
|
||||
|
||||
return (-EINVAL);
|
||||
}
|
||||
|
||||
/* setup implementation */
|
||||
void
|
||||
blake3_setup_impl(void)
|
||||
{
|
||||
switch (IMPL_READ(icp_blake3_impl)) {
|
||||
case IMPL_PARAM:
|
||||
blake3_set_impl_id(blake3_param_id);
|
||||
atomic_swap_32(&icp_blake3_impl, IMPL_USER);
|
||||
break;
|
||||
case IMPL_FASTEST:
|
||||
blake3_set_impl_id(IMPL_FASTEST);
|
||||
break;
|
||||
case IMPL_CYCLE:
|
||||
blake3_set_impl_id(IMPL_CYCLE);
|
||||
break;
|
||||
default:
|
||||
blake3_set_impl_id(blake3_current_id);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
/* return selected implementation */
|
||||
const blake3_impl_ops_t *
|
||||
blake3_impl_get_ops(void)
|
||||
{
|
||||
/* each call to ops will cycle */
|
||||
if (icp_blake3_impl == IMPL_CYCLE)
|
||||
blake3_set_impl_id(IMPL_CYCLE);
|
||||
|
||||
return (blake3_selected_impl);
|
||||
}
|
||||
|
||||
#if defined(_KERNEL)
|
||||
void **blake3_per_cpu_ctx;
|
||||
|
||||
void
|
||||
blake3_per_cpu_ctx_init(void)
|
||||
{
|
||||
/*
|
||||
* Create "The Godfather" ptr to hold all blake3 ctx
|
||||
*/
|
||||
blake3_per_cpu_ctx = kmem_alloc(max_ncpus * sizeof (void *), KM_SLEEP);
|
||||
for (int i = 0; i < max_ncpus; i++) {
|
||||
blake3_per_cpu_ctx[i] = kmem_alloc(sizeof (BLAKE3_CTX),
|
||||
KM_SLEEP);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
blake3_per_cpu_ctx_fini(void)
|
||||
{
|
||||
for (int i = 0; i < max_ncpus; i++) {
|
||||
memset(blake3_per_cpu_ctx[i], 0, sizeof (BLAKE3_CTX));
|
||||
kmem_free(blake3_per_cpu_ctx[i], sizeof (BLAKE3_CTX));
|
||||
}
|
||||
memset(blake3_per_cpu_ctx, 0, max_ncpus * sizeof (void *));
|
||||
kmem_free(blake3_per_cpu_ctx, max_ncpus * sizeof (void *));
|
||||
}
|
||||
#endif
|
||||
|
||||
#if defined(_KERNEL) && defined(__linux__)
|
||||
static int
|
||||
icp_blake3_impl_set(const char *name, zfs_kernel_param_t *kp)
|
||||
{
|
||||
char req_name[BLAKE3_IMPL_NAME_MAX];
|
||||
size_t i;
|
||||
|
||||
/* sanitize input */
|
||||
i = strnlen(name, BLAKE3_IMPL_NAME_MAX);
|
||||
if (i == 0 || i >= BLAKE3_IMPL_NAME_MAX)
|
||||
return (-EINVAL);
|
||||
|
||||
strlcpy(req_name, name, BLAKE3_IMPL_NAME_MAX);
|
||||
while (i > 0 && isspace(req_name[i-1]))
|
||||
i--;
|
||||
req_name[i] = '\0';
|
||||
|
||||
atomic_swap_32(&icp_blake3_impl, IMPL_PARAM);
|
||||
return (blake3_set_impl_name(req_name));
|
||||
}
|
||||
|
||||
static int
|
||||
icp_blake3_impl_get(char *buffer, zfs_kernel_param_t *kp)
|
||||
{
|
||||
int i, cid, cnt = 0;
|
||||
char *fmt;
|
||||
|
||||
/* cycling */
|
||||
fmt = (icp_blake3_impl == IMPL_CYCLE) ? "[cycle] " : "cycle ";
|
||||
cnt += sprintf(buffer + cnt, fmt);
|
||||
|
||||
/* fastest one */
|
||||
fmt = (icp_blake3_impl == IMPL_FASTEST) ? "[fastest] " : "fastest ";
|
||||
cnt += sprintf(buffer + cnt, fmt);
|
||||
|
||||
/* user selected */
|
||||
for (i = 0, cid = 0; i < ARRAY_SIZE(blake3_impls); i++) {
|
||||
if (!blake3_impls[i]->is_supported()) continue;
|
||||
fmt = (icp_blake3_impl == IMPL_USER &&
|
||||
cid == blake3_current_id) ? "[%s] " : "%s ";
|
||||
cnt += sprintf(buffer + cnt, fmt, blake3_impls[i]->name);
|
||||
cid++;
|
||||
}
|
||||
|
||||
buffer[cnt] = 0;
|
||||
|
||||
return (cnt);
|
||||
}
|
||||
|
||||
module_param_call(icp_blake3_impl, icp_blake3_impl_set, icp_blake3_impl_get,
|
||||
NULL, 0644);
|
||||
MODULE_PARM_DESC(icp_blake3_impl, "Select BLAKE3 implementation.");
|
||||
#endif
|
213
sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
Normal file
213
sys/contrib/openzfs/module/icp/algs/blake3/blake3_impl.h
Normal file
@ -0,0 +1,213 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3
|
||||
* Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#ifndef BLAKE3_IMPL_H
|
||||
#define BLAKE3_IMPL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/blake3.h>
|
||||
#include <sys/simd.h>
|
||||
|
||||
/*
|
||||
* Methods used to define BLAKE3 assembler implementations
|
||||
*/
|
||||
typedef void (*blake3_compress_in_place_f)(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN],
|
||||
uint8_t block_len, uint64_t counter,
|
||||
uint8_t flags);
|
||||
|
||||
typedef void (*blake3_compress_xof_f)(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
typedef void (*blake3_hash_many_f)(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
typedef boolean_t (*blake3_is_supported_f)(void);
|
||||
|
||||
typedef struct blake3_impl_ops {
|
||||
blake3_compress_in_place_f compress_in_place;
|
||||
blake3_compress_xof_f compress_xof;
|
||||
blake3_hash_many_f hash_many;
|
||||
blake3_is_supported_f is_supported;
|
||||
int degree;
|
||||
const char *name;
|
||||
} blake3_impl_ops_t;
|
||||
|
||||
/* Return selected BLAKE3 implementation ops */
|
||||
extern const blake3_impl_ops_t *blake3_impl_get_ops(void);
|
||||
|
||||
extern const blake3_impl_ops_t blake3_generic_impl;
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
extern const blake3_impl_ops_t blake3_sse2_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE4_1)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
extern const blake3_impl_ops_t blake3_sse41_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
extern const blake3_impl_ops_t blake3_avx2_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
extern const blake3_impl_ops_t blake3_avx512_impl;
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64)
|
||||
#define MAX_SIMD_DEGREE 16
|
||||
#else
|
||||
#define MAX_SIMD_DEGREE 4
|
||||
#endif
|
||||
|
||||
#define MAX_SIMD_DEGREE_OR_2 (MAX_SIMD_DEGREE > 2 ? MAX_SIMD_DEGREE : 2)
|
||||
|
||||
static const uint32_t BLAKE3_IV[8] = {
|
||||
0x6A09E667UL, 0xBB67AE85UL, 0x3C6EF372UL, 0xA54FF53AUL,
|
||||
0x510E527FUL, 0x9B05688CUL, 0x1F83D9ABUL, 0x5BE0CD19UL};
|
||||
|
||||
static const uint8_t BLAKE3_MSG_SCHEDULE[7][16] = {
|
||||
{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15},
|
||||
{2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8},
|
||||
{3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1},
|
||||
{10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6},
|
||||
{12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4},
|
||||
{9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7},
|
||||
{11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13},
|
||||
};
|
||||
|
||||
/* Find index of the highest set bit */
|
||||
static inline unsigned int highest_one(uint64_t x) {
|
||||
#if defined(__GNUC__) || defined(__clang__)
|
||||
return (63 ^ __builtin_clzll(x));
|
||||
#elif defined(_MSC_VER) && defined(IS_X86_64)
|
||||
unsigned long index;
|
||||
_BitScanReverse64(&index, x);
|
||||
return (index);
|
||||
#elif defined(_MSC_VER) && defined(IS_X86_32)
|
||||
if (x >> 32) {
|
||||
unsigned long index;
|
||||
_BitScanReverse(&index, x >> 32);
|
||||
return (32 + index);
|
||||
} else {
|
||||
unsigned long index;
|
||||
_BitScanReverse(&index, x);
|
||||
return (index);
|
||||
}
|
||||
#else
|
||||
unsigned int c = 0;
|
||||
if (x & 0xffffffff00000000ULL) { x >>= 32; c += 32; }
|
||||
if (x & 0x00000000ffff0000ULL) { x >>= 16; c += 16; }
|
||||
if (x & 0x000000000000ff00ULL) { x >>= 8; c += 8; }
|
||||
if (x & 0x00000000000000f0ULL) { x >>= 4; c += 4; }
|
||||
if (x & 0x000000000000000cULL) { x >>= 2; c += 2; }
|
||||
if (x & 0x0000000000000002ULL) { c += 1; }
|
||||
return (c);
|
||||
#endif
|
||||
}
|
||||
|
||||
/* Count the number of 1 bits. */
|
||||
static inline unsigned int popcnt(uint64_t x) {
|
||||
unsigned int count = 0;
|
||||
|
||||
while (x != 0) {
|
||||
count += 1;
|
||||
x &= x - 1;
|
||||
}
|
||||
|
||||
return (count);
|
||||
}
|
||||
|
||||
/*
|
||||
* Largest power of two less than or equal to x.
|
||||
* As a special case, returns 1 when x is 0.
|
||||
*/
|
||||
static inline uint64_t round_down_to_power_of_2(uint64_t x) {
|
||||
return (1ULL << highest_one(x | 1));
|
||||
}
|
||||
|
||||
static inline uint32_t counter_low(uint64_t counter) {
|
||||
return ((uint32_t)counter);
|
||||
}
|
||||
|
||||
static inline uint32_t counter_high(uint64_t counter) {
|
||||
return ((uint32_t)(counter >> 32));
|
||||
}
|
||||
|
||||
static inline uint32_t load32(const void *src) {
|
||||
const uint8_t *p = (const uint8_t *)src;
|
||||
return ((uint32_t)(p[0]) << 0) | ((uint32_t)(p[1]) << 8) |
|
||||
((uint32_t)(p[2]) << 16) | ((uint32_t)(p[3]) << 24);
|
||||
}
|
||||
|
||||
static inline void load_key_words(const uint8_t key[BLAKE3_KEY_LEN],
|
||||
uint32_t key_words[8]) {
|
||||
key_words[0] = load32(&key[0 * 4]);
|
||||
key_words[1] = load32(&key[1 * 4]);
|
||||
key_words[2] = load32(&key[2 * 4]);
|
||||
key_words[3] = load32(&key[3 * 4]);
|
||||
key_words[4] = load32(&key[4 * 4]);
|
||||
key_words[5] = load32(&key[5 * 4]);
|
||||
key_words[6] = load32(&key[6 * 4]);
|
||||
key_words[7] = load32(&key[7 * 4]);
|
||||
}
|
||||
|
||||
static inline void store32(void *dst, uint32_t w) {
|
||||
uint8_t *p = (uint8_t *)dst;
|
||||
p[0] = (uint8_t)(w >> 0);
|
||||
p[1] = (uint8_t)(w >> 8);
|
||||
p[2] = (uint8_t)(w >> 16);
|
||||
p[3] = (uint8_t)(w >> 24);
|
||||
}
|
||||
|
||||
static inline void store_cv_words(uint8_t bytes_out[32], uint32_t cv_words[8]) {
|
||||
store32(&bytes_out[0 * 4], cv_words[0]);
|
||||
store32(&bytes_out[1 * 4], cv_words[1]);
|
||||
store32(&bytes_out[2 * 4], cv_words[2]);
|
||||
store32(&bytes_out[3 * 4], cv_words[3]);
|
||||
store32(&bytes_out[4 * 4], cv_words[4]);
|
||||
store32(&bytes_out[5 * 4], cv_words[5]);
|
||||
store32(&bytes_out[6 * 4], cv_words[6]);
|
||||
store32(&bytes_out[7 * 4], cv_words[7]);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* BLAKE3_IMPL_H */
|
248
sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
Normal file
248
sys/contrib/openzfs/module/icp/algs/blake3/blake3_x86-64.c
Normal file
@ -0,0 +1,248 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include "blake3_impl.h"
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void zfs_blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse2(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse2(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse2(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse2(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse2_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse2_available());
|
||||
#elif defined(__PPC64__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_impl_ops_t blake3_sse2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse2,
|
||||
.compress_xof = blake3_compress_xof_sse2,
|
||||
.hash_many = blake3_hash_many_sse2,
|
||||
.is_supported = blake3_is_sse2_supported,
|
||||
.degree = 4,
|
||||
.name = "sse2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__aarch64__) || \
|
||||
(defined(__x86_64) && defined(HAVE_SSE2)) || \
|
||||
(defined(__PPC64__) && defined(__LITTLE_ENDIAN__))
|
||||
|
||||
extern void zfs_blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_sse41(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_sse41(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_sse41(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_sse41(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_sse41(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_sse41(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_sse41_supported(void)
|
||||
{
|
||||
#if defined(__x86_64)
|
||||
return (kfpu_allowed() && zfs_sse4_1_available());
|
||||
#elif defined(__PPC64__)
|
||||
return (kfpu_allowed() && zfs_vsx_available());
|
||||
#else
|
||||
return (kfpu_allowed());
|
||||
#endif
|
||||
}
|
||||
|
||||
const blake3_impl_ops_t blake3_sse41_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_sse41,
|
||||
.is_supported = blake3_is_sse41_supported,
|
||||
.degree = 4,
|
||||
.name = "sse41"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_SSE4_1) && defined(HAVE_AVX2)
|
||||
extern void zfs_blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_hash_many_avx2(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx2(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx2_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_sse4_1_available() &&
|
||||
zfs_avx2_available());
|
||||
}
|
||||
|
||||
const blake3_impl_ops_t blake3_avx2_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_sse41,
|
||||
.compress_xof = blake3_compress_xof_sse41,
|
||||
.hash_many = blake3_hash_many_avx2,
|
||||
.is_supported = blake3_is_avx2_supported,
|
||||
.degree = 8,
|
||||
.name = "avx2"
|
||||
};
|
||||
#endif
|
||||
|
||||
#if defined(__x86_64) && defined(HAVE_AVX512F) && defined(HAVE_AVX512VL)
|
||||
extern void zfs_blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags);
|
||||
|
||||
extern void zfs_blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]);
|
||||
|
||||
extern void zfs_blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out);
|
||||
|
||||
static void blake3_compress_in_place_avx512(uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_in_place_avx512(cv, block, block_len, counter,
|
||||
flags);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_compress_xof_avx512(const uint32_t cv[8],
|
||||
const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len,
|
||||
uint64_t counter, uint8_t flags, uint8_t out[64]) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_compress_xof_avx512(cv, block, block_len, counter, flags,
|
||||
out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static void blake3_hash_many_avx512(const uint8_t * const *inputs,
|
||||
size_t num_inputs, size_t blocks, const uint32_t key[8],
|
||||
uint64_t counter, boolean_t increment_counter, uint8_t flags,
|
||||
uint8_t flags_start, uint8_t flags_end, uint8_t *out) {
|
||||
kfpu_begin();
|
||||
zfs_blake3_hash_many_avx512(inputs, num_inputs, blocks, key, counter,
|
||||
increment_counter, flags, flags_start, flags_end, out);
|
||||
kfpu_end();
|
||||
}
|
||||
|
||||
static boolean_t blake3_is_avx512_supported(void)
|
||||
{
|
||||
return (kfpu_allowed() && zfs_avx512f_available() &&
|
||||
zfs_avx512vl_available());
|
||||
}
|
||||
|
||||
const blake3_impl_ops_t blake3_avx512_impl = {
|
||||
.compress_in_place = blake3_compress_in_place_avx512,
|
||||
.compress_xof = blake3_compress_xof_avx512,
|
||||
.hash_many = blake3_hash_many_avx512,
|
||||
.is_supported = blake3_is_avx512_supported,
|
||||
.degree = 16,
|
||||
.name = "avx512"
|
||||
};
|
||||
#endif
|
2450
sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
Normal file
2450
sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse2.S
Normal file
File diff suppressed because it is too large
Load Diff
2463
sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
Normal file
2463
sys/contrib/openzfs/module/icp/asm-aarch64/blake3/b3_aarch64_sse41.S
Normal file
File diff suppressed because it is too large
Load Diff
2823
sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
Normal file
2823
sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse2.S
Normal file
File diff suppressed because it is too large
Load Diff
3064
sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
Normal file
3064
sys/contrib/openzfs/module/icp/asm-ppc64/blake3/b3_ppc64le_sse41.S
Normal file
File diff suppressed because it is too large
Load Diff
1845
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
Normal file
1845
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S
Normal file
File diff suppressed because it is too large
Load Diff
2618
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
Normal file
2618
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx512.S
Normal file
File diff suppressed because it is too large
Load Diff
2323
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
Normal file
2323
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse2.S
Normal file
File diff suppressed because it is too large
Load Diff
2058
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
Normal file
2058
sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_sse41.S
Normal file
File diff suppressed because it is too large
Load Diff
@ -149,6 +149,13 @@ freebsd_zfs_crypt_done(struct cryptop *crp)
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
freebsd_zfs_crypt_done_sync(struct cryptop *crp)
|
||||
{
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
|
||||
{
|
||||
@ -158,26 +165,36 @@ freebsd_crypt_freesession(freebsd_crypt_session_t *sess)
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
|
||||
zfs_crypto_dispatch(freebsd_crypt_session_t *session, struct cryptop *crp)
|
||||
{
|
||||
int error;
|
||||
|
||||
crp->crp_opaque = session;
|
||||
crp->crp_callback = freebsd_zfs_crypt_done;
|
||||
for (;;) {
|
||||
#if __FreeBSD_version < 1400004
|
||||
boolean_t async = ((crypto_ses2caps(crp->crp_session) &
|
||||
CRYPTOCAP_F_SYNC) == 0);
|
||||
#else
|
||||
boolean_t async = !CRYPTO_SESS_SYNC(crp->crp_session);
|
||||
#endif
|
||||
crp->crp_callback = async ? freebsd_zfs_crypt_done :
|
||||
freebsd_zfs_crypt_done_sync;
|
||||
error = crypto_dispatch(crp);
|
||||
if (error)
|
||||
break;
|
||||
mtx_lock(&session->fs_lock);
|
||||
while (session->fs_done == false)
|
||||
msleep(crp, &session->fs_lock, 0,
|
||||
"zfs_crypto", 0);
|
||||
mtx_unlock(&session->fs_lock);
|
||||
|
||||
if (crp->crp_etype == ENOMEM) {
|
||||
pause("zcrnomem", 1);
|
||||
} else if (crp->crp_etype != EAGAIN) {
|
||||
if (error == 0) {
|
||||
if (async) {
|
||||
mtx_lock(&session->fs_lock);
|
||||
while (session->fs_done == false) {
|
||||
msleep(crp, &session->fs_lock, 0,
|
||||
"zfs_crypto", 0);
|
||||
}
|
||||
mtx_unlock(&session->fs_lock);
|
||||
}
|
||||
error = crp->crp_etype;
|
||||
}
|
||||
|
||||
if (error == ENOMEM) {
|
||||
pause("zcrnomem", 1);
|
||||
} else if (error != EAGAIN) {
|
||||
break;
|
||||
}
|
||||
crp->crp_etype = 0;
|
||||
|
@ -780,8 +780,13 @@ spl_init(void)
|
||||
if ((rc = spl_zlib_init()))
|
||||
goto out7;
|
||||
|
||||
if ((rc = spl_zone_init()))
|
||||
goto out8;
|
||||
|
||||
return (rc);
|
||||
|
||||
out8:
|
||||
spl_zlib_fini();
|
||||
out7:
|
||||
spl_kstat_fini();
|
||||
out6:
|
||||
@ -801,6 +806,7 @@ out1:
|
||||
static void __exit
|
||||
spl_fini(void)
|
||||
{
|
||||
spl_zone_fini();
|
||||
spl_zlib_fini();
|
||||
spl_kstat_fini();
|
||||
spl_proc_fini();
|
||||
|
424
sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
Normal file
424
sys/contrib/openzfs/module/os/linux/spl/spl-zone.c
Normal file
@ -0,0 +1,424 @@
|
||||
/*
|
||||
* Copyright (c) 2021 Klara Systems, Inc.
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/mutex.h>
|
||||
#include <sys/sysmacros.h>
|
||||
#include <sys/kmem.h>
|
||||
#include <linux/file.h>
|
||||
#include <linux/magic.h>
|
||||
#include <sys/zone.h>
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
#include <linux/statfs.h>
|
||||
#include <linux/proc_ns.h>
|
||||
#endif
|
||||
|
||||
static kmutex_t zone_datasets_lock;
|
||||
static struct list_head zone_datasets;
|
||||
|
||||
typedef struct zone_datasets {
|
||||
struct list_head zds_list; /* zone_datasets linkage */
|
||||
struct user_namespace *zds_userns; /* namespace reference */
|
||||
struct list_head zds_datasets; /* datasets for the namespace */
|
||||
} zone_datasets_t;
|
||||
|
||||
typedef struct zone_dataset {
|
||||
struct list_head zd_list; /* zone_dataset linkage */
|
||||
size_t zd_dsnamelen; /* length of name */
|
||||
char zd_dsname[0]; /* name of the member dataset */
|
||||
} zone_dataset_t;
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
/*
|
||||
* Returns:
|
||||
* - 0 on success
|
||||
* - EBADF if it cannot open the provided file descriptor
|
||||
* - ENOTTY if the file itself is a not a user namespace file. We want to
|
||||
* intercept this error in the ZFS layer. We cannot just return one of the
|
||||
* ZFS_ERR_* errors here as we want to preserve the seperation of the ZFS
|
||||
* and the SPL layers.
|
||||
*/
|
||||
static int
|
||||
user_ns_get(int fd, struct user_namespace **userns)
|
||||
{
|
||||
struct kstatfs st;
|
||||
struct file *nsfile;
|
||||
struct ns_common *ns;
|
||||
int error;
|
||||
|
||||
if ((nsfile = fget(fd)) == NULL)
|
||||
return (EBADF);
|
||||
if (vfs_statfs(&nsfile->f_path, &st) != 0) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
if (st.f_type != NSFS_MAGIC) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
ns = get_proc_ns(file_inode(nsfile));
|
||||
if (ns->ops->type != CLONE_NEWUSER) {
|
||||
error = ENOTTY;
|
||||
goto done;
|
||||
}
|
||||
*userns = container_of(ns, struct user_namespace, ns);
|
||||
|
||||
error = 0;
|
||||
done:
|
||||
fput(nsfile);
|
||||
|
||||
return (error);
|
||||
}
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
|
||||
static unsigned int
|
||||
user_ns_zoneid(struct user_namespace *user_ns)
|
||||
{
|
||||
unsigned int r;
|
||||
|
||||
#if defined(HAVE_USER_NS_COMMON_INUM)
|
||||
r = user_ns->ns.inum;
|
||||
#else
|
||||
r = user_ns->proc_inum;
|
||||
#endif
|
||||
|
||||
return (r);
|
||||
}
|
||||
|
||||
static struct zone_datasets *
|
||||
zone_datasets_lookup(unsigned int nsinum)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
|
||||
list_for_each_entry(zds, &zone_datasets, zds_list) {
|
||||
if (user_ns_zoneid(zds->zds_userns) == nsinum)
|
||||
return (zds);
|
||||
}
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
static struct zone_dataset *
|
||||
zone_dataset_lookup(zone_datasets_t *zds, const char *dataset, size_t dsnamelen)
|
||||
{
|
||||
zone_dataset_t *zd;
|
||||
|
||||
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
||||
if (zd->zd_dsnamelen != dsnamelen)
|
||||
continue;
|
||||
if (strncmp(zd->zd_dsname, dataset, dsnamelen) == 0)
|
||||
return (zd);
|
||||
}
|
||||
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
zone_dataset_cred_check(cred_t *cred)
|
||||
{
|
||||
|
||||
if (!uid_eq(cred->uid, GLOBAL_ROOT_UID))
|
||||
return (EPERM);
|
||||
|
||||
return (0);
|
||||
}
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
|
||||
static int
|
||||
zone_dataset_name_check(const char *dataset, size_t *dsnamelen)
|
||||
{
|
||||
|
||||
if (dataset[0] == '\0' || dataset[0] == '/')
|
||||
return (ENOENT);
|
||||
|
||||
*dsnamelen = strlen(dataset);
|
||||
/* Ignore trailing slash, if supplied. */
|
||||
if (dataset[*dsnamelen - 1] == '/')
|
||||
(*dsnamelen)--;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
zone_dataset_attach(cred_t *cred, const char *dataset, int userns_fd)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
struct user_namespace *userns;
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
int error;
|
||||
size_t dsnamelen;
|
||||
|
||||
if ((error = zone_dataset_cred_check(cred)) != 0)
|
||||
return (error);
|
||||
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
||||
return (error);
|
||||
if ((error = user_ns_get(userns_fd, &userns)) != 0)
|
||||
return (error);
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
||||
if (zds == NULL) {
|
||||
zds = kmem_alloc(sizeof (zone_datasets_t), KM_SLEEP);
|
||||
INIT_LIST_HEAD(&zds->zds_list);
|
||||
INIT_LIST_HEAD(&zds->zds_datasets);
|
||||
zds->zds_userns = userns;
|
||||
/*
|
||||
* Lock the namespace by incresing its refcount to prevent
|
||||
* the namespace ID from being reused.
|
||||
*/
|
||||
get_user_ns(userns);
|
||||
list_add_tail(&zds->zds_list, &zone_datasets);
|
||||
} else {
|
||||
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
||||
if (zd != NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (EEXIST);
|
||||
}
|
||||
}
|
||||
|
||||
zd = kmem_alloc(sizeof (zone_dataset_t) + dsnamelen + 1, KM_SLEEP);
|
||||
zd->zd_dsnamelen = dsnamelen;
|
||||
strncpy(zd->zd_dsname, dataset, dsnamelen);
|
||||
zd->zd_dsname[dsnamelen] = '\0';
|
||||
INIT_LIST_HEAD(&zd->zd_list);
|
||||
list_add_tail(&zd->zd_list, &zds->zds_datasets);
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
#else
|
||||
return (ENXIO);
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_attach);
|
||||
|
||||
int
|
||||
zone_dataset_detach(cred_t *cred, const char *dataset, int userns_fd)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM)
|
||||
struct user_namespace *userns;
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
int error;
|
||||
size_t dsnamelen;
|
||||
|
||||
if ((error = zone_dataset_cred_check(cred)) != 0)
|
||||
return (error);
|
||||
if ((error = zone_dataset_name_check(dataset, &dsnamelen)) != 0)
|
||||
return (error);
|
||||
if ((error = user_ns_get(userns_fd, &userns)) != 0)
|
||||
return (error);
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(user_ns_zoneid(userns));
|
||||
if (zds != NULL)
|
||||
zd = zone_dataset_lookup(zds, dataset, dsnamelen);
|
||||
if (zds == NULL || zd == NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (ENOENT);
|
||||
}
|
||||
|
||||
list_del(&zd->zd_list);
|
||||
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
||||
|
||||
/* Prune the namespace entry if it has no more delegations. */
|
||||
if (list_empty(&zds->zds_datasets)) {
|
||||
/*
|
||||
* Decrease the refcount now that the namespace is no longer
|
||||
* used. It is no longer necessary to prevent the namespace ID
|
||||
* from being reused.
|
||||
*/
|
||||
put_user_ns(userns);
|
||||
list_del(&zds->zds_list);
|
||||
kmem_free(zds, sizeof (*zds));
|
||||
}
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
#else
|
||||
return (ENXIO);
|
||||
#endif /* defined(CONFIG_USER_NS) && defined(HAVE_USER_NS_COMMON_INUM) */
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_detach);
|
||||
|
||||
/*
|
||||
* A dataset is visible if:
|
||||
* - It is a parent of a namespace entry.
|
||||
* - It is one of the namespace entries.
|
||||
* - It is a child of a namespace entry.
|
||||
*
|
||||
* A dataset is writable if:
|
||||
* - It is one of the namespace entries.
|
||||
* - It is a child of a namespace entry.
|
||||
*
|
||||
* The parent datasets of namespace entries are visible and
|
||||
* read-only to provide a path back to the root of the pool.
|
||||
*/
|
||||
int
|
||||
zone_dataset_visible(const char *dataset, int *write)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
size_t dsnamelen, zd_len;
|
||||
int visible;
|
||||
|
||||
/* Default to read-only, in case visible is returned. */
|
||||
if (write != NULL)
|
||||
*write = 0;
|
||||
if (zone_dataset_name_check(dataset, &dsnamelen) != 0)
|
||||
return (0);
|
||||
if (INGLOBALZONE(curproc)) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
return (1);
|
||||
}
|
||||
|
||||
mutex_enter(&zone_datasets_lock);
|
||||
zds = zone_datasets_lookup(crgetzoneid(curproc->cred));
|
||||
if (zds == NULL) {
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (0);
|
||||
}
|
||||
|
||||
visible = 0;
|
||||
list_for_each_entry(zd, &zds->zds_datasets, zd_list) {
|
||||
zd_len = strlen(zd->zd_dsname);
|
||||
if (zd_len > dsnamelen) {
|
||||
/*
|
||||
* The name of the namespace entry is longer than that
|
||||
* of the dataset, so it could be that the dataset is a
|
||||
* parent of the namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset,
|
||||
dsnamelen) == 0 &&
|
||||
zd->zd_dsname[dsnamelen] == '/';
|
||||
if (visible)
|
||||
break;
|
||||
} else if (zd_len == dsnamelen) {
|
||||
/*
|
||||
* The name of the namespace entry is as long as that
|
||||
* of the dataset, so perhaps the dataset itself is the
|
||||
* namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset, zd_len) == 0;
|
||||
if (visible) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
/*
|
||||
* The name of the namespace entry is shorter than that
|
||||
* of the dataset, so perhaps the dataset is a child of
|
||||
* the namespace entry.
|
||||
*/
|
||||
visible = memcmp(zd->zd_dsname, dataset,
|
||||
zd_len) == 0 && dataset[zd_len] == '/';
|
||||
if (visible) {
|
||||
if (write != NULL)
|
||||
*write = 1;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
mutex_exit(&zone_datasets_lock);
|
||||
return (visible);
|
||||
}
|
||||
EXPORT_SYMBOL(zone_dataset_visible);
|
||||
|
||||
unsigned int
|
||||
global_zoneid(void)
|
||||
{
|
||||
unsigned int z = 0;
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
z = user_ns_zoneid(&init_user_ns);
|
||||
#endif
|
||||
|
||||
return (z);
|
||||
}
|
||||
EXPORT_SYMBOL(global_zoneid);
|
||||
|
||||
unsigned int
|
||||
crgetzoneid(const cred_t *cr)
|
||||
{
|
||||
unsigned int r = 0;
|
||||
|
||||
#if defined(CONFIG_USER_NS)
|
||||
r = user_ns_zoneid(cr->user_ns);
|
||||
#endif
|
||||
|
||||
return (r);
|
||||
}
|
||||
EXPORT_SYMBOL(crgetzoneid);
|
||||
|
||||
boolean_t
|
||||
inglobalzone(proc_t *proc)
|
||||
{
|
||||
#if defined(CONFIG_USER_NS)
|
||||
return (proc->cred->user_ns == &init_user_ns);
|
||||
#else
|
||||
return (B_TRUE);
|
||||
#endif
|
||||
}
|
||||
EXPORT_SYMBOL(inglobalzone);
|
||||
|
||||
int
|
||||
spl_zone_init(void)
|
||||
{
|
||||
mutex_init(&zone_datasets_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
INIT_LIST_HEAD(&zone_datasets);
|
||||
return (0);
|
||||
}
|
||||
|
||||
void
|
||||
spl_zone_fini(void)
|
||||
{
|
||||
zone_datasets_t *zds;
|
||||
zone_dataset_t *zd;
|
||||
|
||||
/*
|
||||
* It would be better to assert an empty zone_datasets, but since
|
||||
* there's no automatic mechanism for cleaning them up if the user
|
||||
* namespace is destroyed, just do it here, since spl is about to go
|
||||
* out of context.
|
||||
*/
|
||||
while (!list_empty(&zone_datasets)) {
|
||||
zds = list_entry(zone_datasets.next, zone_datasets_t, zds_list);
|
||||
while (!list_empty(&zds->zds_datasets)) {
|
||||
zd = list_entry(zds->zds_datasets.next,
|
||||
zone_dataset_t, zd_list);
|
||||
list_del(&zd->zd_list);
|
||||
kmem_free(zd, sizeof (*zd) + zd->zd_dsnamelen + 1);
|
||||
put_user_ns(zds->zds_userns);
|
||||
}
|
||||
list_del(&zds->zds_list);
|
||||
kmem_free(zds, sizeof (*zds));
|
||||
}
|
||||
mutex_destroy(&zone_datasets_lock);
|
||||
}
|
@ -61,7 +61,7 @@ priv_policy_ns(const cred_t *cr, int capability, int err,
|
||||
static int
|
||||
priv_policy(const cred_t *cr, int capability, int err)
|
||||
{
|
||||
return (priv_policy_ns(cr, capability, err, NULL));
|
||||
return (priv_policy_ns(cr, capability, err, cr->user_ns));
|
||||
}
|
||||
|
||||
static int
|
||||
|
@ -37,6 +37,7 @@
|
||||
* Copyright 2017 RackTop Systems.
|
||||
* Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
|
||||
* Copyright (c) 2019 Datto Inc.
|
||||
* Copyright (c) 2021 Klara, Inc.
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
@ -150,6 +151,48 @@ out:
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_attach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_attach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* Translate ENOTTY to ZFS_ERR_NOT_USER_NAMESPACE as we just arrived
|
||||
* back from the SPL layer, which does not know about ZFS_ERR_* errors.
|
||||
* See the comment at the user_ns_get() function in spl-zone.c for
|
||||
* details.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_ioc_userns_detach(zfs_cmd_t *zc)
|
||||
{
|
||||
int error;
|
||||
|
||||
if (zc == NULL)
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
error = zone_dataset_detach(CRED(), zc->zc_name, zc->zc_cleanup_fd);
|
||||
|
||||
/*
|
||||
* See the comment in zfs_ioc_userns_attach() for details on what is
|
||||
* going on here.
|
||||
*/
|
||||
if (error == ENOTTY)
|
||||
error = ZFS_ERR_NOT_USER_NAMESPACE;
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
uint64_t
|
||||
zfs_max_nvlist_src_size_os(void)
|
||||
{
|
||||
@ -168,6 +211,10 @@ zfs_ioctl_update_mount_cache(const char *dsname)
|
||||
void
|
||||
zfs_ioctl_init_os(void)
|
||||
{
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_ATTACH,
|
||||
zfs_ioc_userns_attach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERNS_DETACH,
|
||||
zfs_ioc_userns_detach, zfs_secpolicy_config, POOL_CHECK_NONE);
|
||||
}
|
||||
|
||||
#ifdef CONFIG_COMPAT
|
||||
|
@ -126,7 +126,7 @@ zfs_uiomove_iov(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
}
|
||||
|
||||
static int
|
||||
zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
zfs_uiomove_bvec_impl(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
const struct bio_vec *bv = uio->uio_bvec;
|
||||
size_t skip = uio->uio_skip;
|
||||
@ -137,10 +137,13 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
cnt = MIN(bv->bv_len - skip, n);
|
||||
|
||||
paddr = zfs_kmap_atomic(bv->bv_page);
|
||||
if (rw == UIO_READ)
|
||||
if (rw == UIO_READ) {
|
||||
/* Copy from buffer 'p' to the bvec data */
|
||||
memcpy(paddr + bv->bv_offset + skip, p, cnt);
|
||||
else
|
||||
} else {
|
||||
/* Copy from bvec data to buffer 'p' */
|
||||
memcpy(p, paddr + bv->bv_offset + skip, cnt);
|
||||
}
|
||||
zfs_kunmap_atomic(paddr);
|
||||
|
||||
skip += cnt;
|
||||
@ -158,6 +161,141 @@ zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
return (0);
|
||||
}
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
static void
|
||||
zfs_copy_bvec(void *p, size_t skip, size_t cnt, zfs_uio_rw_t rw,
|
||||
struct bio_vec *bv)
|
||||
{
|
||||
void *paddr;
|
||||
|
||||
paddr = zfs_kmap_atomic(bv->bv_page);
|
||||
if (rw == UIO_READ) {
|
||||
/* Copy from buffer 'p' to the bvec data */
|
||||
memcpy(paddr + bv->bv_offset + skip, p, cnt);
|
||||
} else {
|
||||
/* Copy from bvec data to buffer 'p' */
|
||||
memcpy(p, paddr + bv->bv_offset + skip, cnt);
|
||||
}
|
||||
zfs_kunmap_atomic(paddr);
|
||||
}
|
||||
|
||||
/*
|
||||
* Copy 'n' bytes of data between the buffer p[] and the data represented
|
||||
* by the request in the uio.
|
||||
*/
|
||||
static int
|
||||
zfs_uiomove_bvec_rq(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
struct request *rq = uio->rq;
|
||||
struct bio_vec bv;
|
||||
struct req_iterator iter;
|
||||
size_t this_seg_start; /* logical offset */
|
||||
size_t this_seg_end; /* logical offset */
|
||||
size_t skip_in_seg;
|
||||
size_t copy_from_seg;
|
||||
size_t orig_loffset;
|
||||
int copied = 0;
|
||||
|
||||
/*
|
||||
* Get the original logical offset of this entire request (because
|
||||
* uio->uio_loffset will be modified over time).
|
||||
*/
|
||||
orig_loffset = io_offset(NULL, rq);
|
||||
this_seg_start = orig_loffset;
|
||||
|
||||
rq_for_each_segment(bv, rq, iter) {
|
||||
if (uio->iter.bio) {
|
||||
/*
|
||||
* If uio->iter.bio is present, then we know we've saved
|
||||
* uio->iter from a previous call to this function, and
|
||||
* we can skip ahead in this rq_for_each_segment() loop
|
||||
* to where we last left off. That way, we don't need
|
||||
* to iterate over tons of segments we've already
|
||||
* processed - we can just restore the "saved state".
|
||||
*/
|
||||
iter = uio->iter;
|
||||
bv = uio->bv;
|
||||
this_seg_start = uio->uio_loffset;
|
||||
memset(&uio->iter, 0, sizeof (uio->iter));
|
||||
continue;
|
||||
}
|
||||
|
||||
/*
|
||||
* Lookup what the logical offset of the last byte of this
|
||||
* segment is.
|
||||
*/
|
||||
this_seg_end = this_seg_start + bv.bv_len - 1;
|
||||
|
||||
/*
|
||||
* We only need to operate on segments that have data we're
|
||||
* copying.
|
||||
*/
|
||||
if (uio->uio_loffset >= this_seg_start &&
|
||||
uio->uio_loffset <= this_seg_end) {
|
||||
/*
|
||||
* Some, or all, of the data in this segment needs to be
|
||||
* copied.
|
||||
*/
|
||||
|
||||
/*
|
||||
* We may be not be copying from the first byte in the
|
||||
* segment. Figure out how many bytes to skip copying
|
||||
* from the beginning of this segment.
|
||||
*/
|
||||
skip_in_seg = uio->uio_loffset - this_seg_start;
|
||||
|
||||
/*
|
||||
* Calculate the total number of bytes from this
|
||||
* segment that we will be copying.
|
||||
*/
|
||||
copy_from_seg = MIN(bv.bv_len - skip_in_seg, n);
|
||||
|
||||
/* Copy the bytes */
|
||||
zfs_copy_bvec(p, skip_in_seg, copy_from_seg, rw, &bv);
|
||||
p = ((char *)p) + copy_from_seg;
|
||||
|
||||
n -= copy_from_seg;
|
||||
uio->uio_resid -= copy_from_seg;
|
||||
uio->uio_loffset += copy_from_seg;
|
||||
copied = 1; /* We copied some data */
|
||||
}
|
||||
|
||||
if (n == 0) {
|
||||
/*
|
||||
* All done copying. Save our 'iter' value to the uio.
|
||||
* This allows us to "save our state" and skip ahead in
|
||||
* the rq_for_each_segment() loop the next time we call
|
||||
* call zfs_uiomove_bvec_rq() on this uio (which we
|
||||
* will be doing for any remaining data in the uio).
|
||||
*/
|
||||
uio->iter = iter; /* make a copy of the struct data */
|
||||
uio->bv = bv;
|
||||
return (0);
|
||||
}
|
||||
|
||||
this_seg_start = this_seg_end + 1;
|
||||
}
|
||||
|
||||
if (!copied) {
|
||||
/* Didn't copy anything */
|
||||
uio->uio_resid = 0;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
#endif
|
||||
|
||||
static int
|
||||
zfs_uiomove_bvec(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (uio->rq != NULL)
|
||||
return (zfs_uiomove_bvec_rq(p, n, rw, uio));
|
||||
#else
|
||||
ASSERT3P(uio->rq, ==, NULL);
|
||||
#endif
|
||||
return (zfs_uiomove_bvec_impl(p, n, rw, uio));
|
||||
}
|
||||
|
||||
#if defined(HAVE_VFS_IOV_ITER)
|
||||
static int
|
||||
zfs_uiomove_iter(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio,
|
||||
@ -300,8 +438,14 @@ zfs_uioskip(zfs_uio_t *uio, size_t n)
|
||||
{
|
||||
if (n > uio->uio_resid)
|
||||
return;
|
||||
|
||||
if (uio->uio_segflg == UIO_BVEC) {
|
||||
/*
|
||||
* When using a uio with a struct request, we simply
|
||||
* use uio_loffset as a pointer to the next logical byte to
|
||||
* copy in the request. We don't have to do any fancy
|
||||
* accounting with uio_bvec/uio_iovcnt since we don't use
|
||||
* them.
|
||||
*/
|
||||
if (uio->uio_segflg == UIO_BVEC && uio->rq == NULL) {
|
||||
uio->uio_skip += n;
|
||||
while (uio->uio_iovcnt &&
|
||||
uio->uio_skip >= uio->uio_bvec->bv_len) {
|
||||
|
@ -1453,14 +1453,34 @@ zfs_domount(struct super_block *sb, zfs_mnt_t *zm, int silent)
|
||||
int error = 0;
|
||||
zfsvfs_t *zfsvfs = NULL;
|
||||
vfs_t *vfs = NULL;
|
||||
int canwrite;
|
||||
int dataset_visible_zone;
|
||||
|
||||
ASSERT(zm);
|
||||
ASSERT(osname);
|
||||
|
||||
dataset_visible_zone = zone_dataset_visible(osname, &canwrite);
|
||||
|
||||
/*
|
||||
* Refuse to mount a filesystem if we are in a namespace and the
|
||||
* dataset is not visible or writable in that namespace.
|
||||
*/
|
||||
if (!INGLOBALZONE(curproc) &&
|
||||
(!dataset_visible_zone || !canwrite)) {
|
||||
return (SET_ERROR(EPERM));
|
||||
}
|
||||
|
||||
error = zfsvfs_parse_options(zm->mnt_data, &vfs);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* If a non-writable filesystem is being mounted without the
|
||||
* read-only flag, pretend it was set, as done for snapshots.
|
||||
*/
|
||||
if (!canwrite)
|
||||
vfs->vfs_readonly = true;
|
||||
|
||||
error = zfsvfs_create(osname, vfs->vfs_readonly, &zfsvfs);
|
||||
if (error) {
|
||||
zfsvfs_vfs_free(vfs);
|
||||
|
@ -32,6 +32,9 @@
|
||||
#include <sys/zfs_vnops.h>
|
||||
#include <sys/zfs_ctldir.h>
|
||||
#include <sys/zpl.h>
|
||||
#include <sys/dmu.h>
|
||||
#include <sys/dsl_dataset.h>
|
||||
#include <sys/zap.h>
|
||||
|
||||
/*
|
||||
* Common open routine. Disallow any write access.
|
||||
@ -411,6 +414,20 @@ zpl_snapdir_getattr_impl(const struct path *path, struct kstat *stat,
|
||||
#endif
|
||||
|
||||
stat->nlink = stat->size = 2;
|
||||
|
||||
dsl_dataset_t *ds = dmu_objset_ds(zfsvfs->z_os);
|
||||
if (dsl_dataset_phys(ds)->ds_snapnames_zapobj != 0) {
|
||||
uint64_t snap_count;
|
||||
int err = zap_count(
|
||||
dmu_objset_pool(ds->ds_objset)->dp_meta_objset,
|
||||
dsl_dataset_phys(ds)->ds_snapnames_zapobj, &snap_count);
|
||||
if (err != 0) {
|
||||
ZPL_EXIT(zfsvfs);
|
||||
return (-err);
|
||||
}
|
||||
stat->nlink += snap_count;
|
||||
}
|
||||
|
||||
stat->ctime = stat->mtime = dmu_objset_snap_cmtime(zfsvfs->z_os);
|
||||
stat->atime = current_time(ip);
|
||||
ZPL_EXIT(zfsvfs);
|
||||
|
@ -360,6 +360,7 @@ const struct super_operations zpl_super_operations = {
|
||||
struct file_system_type zpl_fs_type = {
|
||||
.owner = THIS_MODULE,
|
||||
.name = ZFS_DRIVER,
|
||||
.fs_flags = FS_USERNS_MOUNT,
|
||||
.mount = zpl_mount,
|
||||
.kill_sb = zpl_kill_sb,
|
||||
};
|
||||
|
@ -83,6 +83,7 @@
|
||||
#include <sys/zap.h>
|
||||
#include <sys/vfs.h>
|
||||
#include <sys/zpl.h>
|
||||
#include <linux/vfs_compat.h>
|
||||
|
||||
enum xattr_permission {
|
||||
XAPERM_DENY,
|
||||
@ -1495,7 +1496,9 @@ zpl_xattr_permission(xattr_filldir_t *xf, const char *name, int name_len)
|
||||
return (perm);
|
||||
}
|
||||
|
||||
#if !defined(HAVE_POSIX_ACL_RELEASE) || defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY)
|
||||
#if defined(CONFIG_FS_POSIX_ACL) && \
|
||||
(!defined(HAVE_POSIX_ACL_RELEASE) || \
|
||||
defined(HAVE_POSIX_ACL_RELEASE_GPL_ONLY))
|
||||
struct acl_rel_struct {
|
||||
struct acl_rel_struct *next;
|
||||
struct posix_acl *acl;
|
||||
|
@ -41,20 +41,77 @@
|
||||
#include <linux/blkdev_compat.h>
|
||||
#include <linux/task_io_accounting_ops.h>
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
#include <linux/blk-mq.h>
|
||||
#endif
|
||||
|
||||
static void zvol_request_impl(zvol_state_t *zv, struct bio *bio,
|
||||
struct request *rq, boolean_t force_sync);
|
||||
|
||||
static unsigned int zvol_major = ZVOL_MAJOR;
|
||||
static unsigned int zvol_request_sync = 0;
|
||||
static unsigned int zvol_prefetch_bytes = (128 * 1024);
|
||||
static unsigned long zvol_max_discard_blocks = 16384;
|
||||
static unsigned int zvol_threads = 32;
|
||||
|
||||
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
|
||||
static const unsigned int zvol_open_timeout_ms = 1000;
|
||||
#endif
|
||||
|
||||
static unsigned int zvol_threads = 0;
|
||||
#ifdef HAVE_BLK_MQ
|
||||
static unsigned int zvol_blk_mq_threads = 0;
|
||||
static unsigned int zvol_blk_mq_actual_threads;
|
||||
static boolean_t zvol_use_blk_mq = B_FALSE;
|
||||
|
||||
/*
|
||||
* The maximum number of volblocksize blocks to process per thread. Typically,
|
||||
* write heavy workloads preform better with higher values here, and read
|
||||
* heavy workloads preform better with lower values, but that's not a hard
|
||||
* and fast rule. It's basically a knob to tune between "less overhead with
|
||||
* less parallelism" and "more overhead, but more parallelism".
|
||||
*
|
||||
* '8' was chosen as a reasonable, balanced, default based off of sequential
|
||||
* read and write tests to a zvol in an NVMe pool (with 16 CPUs).
|
||||
*/
|
||||
static unsigned int zvol_blk_mq_blocks_per_thread = 8;
|
||||
#endif
|
||||
|
||||
#ifndef BLKDEV_DEFAULT_RQ
|
||||
/* BLKDEV_MAX_RQ was renamed to BLKDEV_DEFAULT_RQ in the 5.16 kernel */
|
||||
#define BLKDEV_DEFAULT_RQ BLKDEV_MAX_RQ
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Finalize our BIO or request.
|
||||
*/
|
||||
#ifdef HAVE_BLK_MQ
|
||||
#define END_IO(zv, bio, rq, error) do { \
|
||||
if (bio) { \
|
||||
BIO_END_IO(bio, error); \
|
||||
} else { \
|
||||
blk_mq_end_request(rq, errno_to_bi_status(error)); \
|
||||
} \
|
||||
} while (0)
|
||||
#else
|
||||
#define END_IO(zv, bio, rq, error) BIO_END_IO(bio, error)
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
static unsigned int zvol_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
|
||||
static unsigned int zvol_actual_blk_mq_queue_depth;
|
||||
#endif
|
||||
|
||||
struct zvol_state_os {
|
||||
struct gendisk *zvo_disk; /* generic disk */
|
||||
struct request_queue *zvo_queue; /* request queue */
|
||||
dev_t zvo_dev; /* device id */
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
struct blk_mq_tag_set tag_set;
|
||||
#endif
|
||||
|
||||
/* Set from the global 'zvol_use_blk_mq' at zvol load */
|
||||
boolean_t use_blk_mq;
|
||||
};
|
||||
|
||||
taskq_t *zvol_taskq;
|
||||
@ -63,8 +120,14 @@ static struct ida zvol_ida;
|
||||
typedef struct zv_request_stack {
|
||||
zvol_state_t *zv;
|
||||
struct bio *bio;
|
||||
struct request *rq;
|
||||
} zv_request_t;
|
||||
|
||||
typedef struct zv_work {
|
||||
struct request *rq;
|
||||
struct work_struct work;
|
||||
} zv_work_t;
|
||||
|
||||
typedef struct zv_request_task {
|
||||
zv_request_t zvr;
|
||||
taskq_ent_t ent;
|
||||
@ -86,6 +149,62 @@ zv_request_task_free(zv_request_task_t *task)
|
||||
kmem_free(task, sizeof (*task));
|
||||
}
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
|
||||
/*
|
||||
* This is called when a new block multiqueue request comes in. A request
|
||||
* contains one or more BIOs.
|
||||
*/
|
||||
static blk_status_t zvol_mq_queue_rq(struct blk_mq_hw_ctx *hctx,
|
||||
const struct blk_mq_queue_data *bd)
|
||||
{
|
||||
struct request *rq = bd->rq;
|
||||
zvol_state_t *zv = rq->q->queuedata;
|
||||
|
||||
/* Tell the kernel that we are starting to process this request */
|
||||
blk_mq_start_request(rq);
|
||||
|
||||
if (blk_rq_is_passthrough(rq)) {
|
||||
/* Skip non filesystem request */
|
||||
blk_mq_end_request(rq, BLK_STS_IOERR);
|
||||
return (BLK_STS_IOERR);
|
||||
}
|
||||
|
||||
zvol_request_impl(zv, NULL, rq, 0);
|
||||
|
||||
/* Acknowledge to the kernel that we got this request */
|
||||
return (BLK_STS_OK);
|
||||
}
|
||||
|
||||
static struct blk_mq_ops zvol_blk_mq_queue_ops = {
|
||||
.queue_rq = zvol_mq_queue_rq,
|
||||
};
|
||||
|
||||
/* Initialize our blk-mq struct */
|
||||
static int zvol_blk_mq_alloc_tag_set(zvol_state_t *zv)
|
||||
{
|
||||
struct zvol_state_os *zso = zv->zv_zso;
|
||||
|
||||
memset(&zso->tag_set, 0, sizeof (zso->tag_set));
|
||||
|
||||
/* Initialize tag set. */
|
||||
zso->tag_set.ops = &zvol_blk_mq_queue_ops;
|
||||
zso->tag_set.nr_hw_queues = zvol_blk_mq_actual_threads;
|
||||
zso->tag_set.queue_depth = zvol_actual_blk_mq_queue_depth;
|
||||
zso->tag_set.numa_node = NUMA_NO_NODE;
|
||||
zso->tag_set.cmd_size = 0;
|
||||
|
||||
/*
|
||||
* We need BLK_MQ_F_BLOCKING here since we do blocking calls in
|
||||
* zvol_request_impl()
|
||||
*/
|
||||
zso->tag_set.flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_BLOCKING;
|
||||
zso->tag_set.driver_data = zv;
|
||||
|
||||
return (blk_mq_alloc_tag_set(&zso->tag_set));
|
||||
}
|
||||
#endif /* HAVE_BLK_MQ */
|
||||
|
||||
/*
|
||||
* Given a path, return TRUE if path is a ZVOL.
|
||||
*/
|
||||
@ -107,38 +226,51 @@ static void
|
||||
zvol_write(zv_request_t *zvr)
|
||||
{
|
||||
struct bio *bio = zvr->bio;
|
||||
struct request *rq = zvr->rq;
|
||||
int error = 0;
|
||||
zfs_uio_t uio;
|
||||
|
||||
zfs_uio_bvec_init(&uio, bio);
|
||||
|
||||
zvol_state_t *zv = zvr->zv;
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
unsigned long start_time = 0;
|
||||
boolean_t acct = B_FALSE;
|
||||
|
||||
ASSERT3P(zv, !=, NULL);
|
||||
ASSERT3U(zv->zv_open_count, >, 0);
|
||||
ASSERT3P(zv->zv_zilog, !=, NULL);
|
||||
|
||||
q = zv->zv_zso->zvo_queue;
|
||||
disk = zv->zv_zso->zvo_disk;
|
||||
|
||||
/* bio marked as FLUSH need to flush before write */
|
||||
if (bio_is_flush(bio))
|
||||
if (io_is_flush(bio, rq))
|
||||
zil_commit(zv->zv_zilog, ZVOL_OBJ);
|
||||
|
||||
/* Some requests are just for flush and nothing else. */
|
||||
if (uio.uio_resid == 0) {
|
||||
if (io_size(bio, rq) == 0) {
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
BIO_END_IO(bio, 0);
|
||||
END_IO(zv, bio, rq, 0);
|
||||
return;
|
||||
}
|
||||
|
||||
struct request_queue *q = zv->zv_zso->zvo_queue;
|
||||
struct gendisk *disk = zv->zv_zso->zvo_disk;
|
||||
ssize_t start_resid = uio.uio_resid;
|
||||
unsigned long start_time;
|
||||
zfs_uio_bvec_init(&uio, bio, rq);
|
||||
|
||||
boolean_t acct = blk_queue_io_stat(q);
|
||||
if (acct)
|
||||
start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
|
||||
ssize_t start_resid = uio.uio_resid;
|
||||
|
||||
/*
|
||||
* With use_blk_mq, accounting is done by blk_mq_start_request()
|
||||
* and blk_mq_end_request(), so we can skip it here.
|
||||
*/
|
||||
if (bio) {
|
||||
acct = blk_queue_io_stat(q);
|
||||
if (acct) {
|
||||
start_time = blk_generic_start_io_acct(q, disk, WRITE,
|
||||
bio);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t sync =
|
||||
bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||
io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||
|
||||
zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
|
||||
uio.uio_loffset, uio.uio_resid, RL_WRITER);
|
||||
@ -180,10 +312,11 @@ zvol_write(zv_request_t *zvr)
|
||||
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
if (acct)
|
||||
if (bio && acct) {
|
||||
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
|
||||
}
|
||||
|
||||
BIO_END_IO(bio, -error);
|
||||
END_IO(zv, bio, rq, -error);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -198,27 +331,33 @@ static void
|
||||
zvol_discard(zv_request_t *zvr)
|
||||
{
|
||||
struct bio *bio = zvr->bio;
|
||||
struct request *rq = zvr->rq;
|
||||
zvol_state_t *zv = zvr->zv;
|
||||
uint64_t start = BIO_BI_SECTOR(bio) << 9;
|
||||
uint64_t size = BIO_BI_SIZE(bio);
|
||||
uint64_t start = io_offset(bio, rq);
|
||||
uint64_t size = io_size(bio, rq);
|
||||
uint64_t end = start + size;
|
||||
boolean_t sync;
|
||||
int error = 0;
|
||||
dmu_tx_t *tx;
|
||||
struct request_queue *q = zv->zv_zso->zvo_queue;
|
||||
struct gendisk *disk = zv->zv_zso->zvo_disk;
|
||||
unsigned long start_time = 0;
|
||||
|
||||
boolean_t acct = blk_queue_io_stat(q);
|
||||
|
||||
ASSERT3P(zv, !=, NULL);
|
||||
ASSERT3U(zv->zv_open_count, >, 0);
|
||||
ASSERT3P(zv->zv_zilog, !=, NULL);
|
||||
|
||||
struct request_queue *q = zv->zv_zso->zvo_queue;
|
||||
struct gendisk *disk = zv->zv_zso->zvo_disk;
|
||||
unsigned long start_time;
|
||||
if (bio) {
|
||||
acct = blk_queue_io_stat(q);
|
||||
if (acct) {
|
||||
start_time = blk_generic_start_io_acct(q, disk, WRITE,
|
||||
bio);
|
||||
}
|
||||
}
|
||||
|
||||
boolean_t acct = blk_queue_io_stat(q);
|
||||
if (acct)
|
||||
start_time = blk_generic_start_io_acct(q, disk, WRITE, bio);
|
||||
|
||||
sync = bio_is_fua(bio) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||
sync = io_is_fua(bio, rq) || zv->zv_objset->os_sync == ZFS_SYNC_ALWAYS;
|
||||
|
||||
if (end > zv->zv_volsize) {
|
||||
error = SET_ERROR(EIO);
|
||||
@ -231,7 +370,7 @@ zvol_discard(zv_request_t *zvr)
|
||||
* the unaligned parts which is slow (read-modify-write) and useless
|
||||
* since we are not freeing any space by doing so.
|
||||
*/
|
||||
if (!bio_is_secure_erase(bio)) {
|
||||
if (!io_is_secure_erase(bio, rq)) {
|
||||
start = P2ROUNDUP(start, zv->zv_volblocksize);
|
||||
end = P2ALIGN(end, zv->zv_volblocksize);
|
||||
size = end - start;
|
||||
@ -262,10 +401,12 @@ zvol_discard(zv_request_t *zvr)
|
||||
unlock:
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
if (acct)
|
||||
blk_generic_end_io_acct(q, disk, WRITE, bio, start_time);
|
||||
if (bio && acct) {
|
||||
blk_generic_end_io_acct(q, disk, WRITE, bio,
|
||||
start_time);
|
||||
}
|
||||
|
||||
BIO_END_IO(bio, -error);
|
||||
END_IO(zv, bio, rq, -error);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -280,28 +421,41 @@ static void
|
||||
zvol_read(zv_request_t *zvr)
|
||||
{
|
||||
struct bio *bio = zvr->bio;
|
||||
struct request *rq = zvr->rq;
|
||||
int error = 0;
|
||||
zfs_uio_t uio;
|
||||
|
||||
zfs_uio_bvec_init(&uio, bio);
|
||||
|
||||
boolean_t acct = B_FALSE;
|
||||
zvol_state_t *zv = zvr->zv;
|
||||
struct request_queue *q;
|
||||
struct gendisk *disk;
|
||||
unsigned long start_time = 0;
|
||||
|
||||
ASSERT3P(zv, !=, NULL);
|
||||
ASSERT3U(zv->zv_open_count, >, 0);
|
||||
|
||||
struct request_queue *q = zv->zv_zso->zvo_queue;
|
||||
struct gendisk *disk = zv->zv_zso->zvo_disk;
|
||||
ssize_t start_resid = uio.uio_resid;
|
||||
unsigned long start_time;
|
||||
zfs_uio_bvec_init(&uio, bio, rq);
|
||||
|
||||
boolean_t acct = blk_queue_io_stat(q);
|
||||
if (acct)
|
||||
start_time = blk_generic_start_io_acct(q, disk, READ, bio);
|
||||
q = zv->zv_zso->zvo_queue;
|
||||
disk = zv->zv_zso->zvo_disk;
|
||||
|
||||
ssize_t start_resid = uio.uio_resid;
|
||||
|
||||
/*
|
||||
* When blk-mq is being used, accounting is done by
|
||||
* blk_mq_start_request() and blk_mq_end_request().
|
||||
*/
|
||||
if (bio) {
|
||||
acct = blk_queue_io_stat(q);
|
||||
if (acct)
|
||||
start_time = blk_generic_start_io_acct(q, disk, READ,
|
||||
bio);
|
||||
}
|
||||
|
||||
zfs_locked_range_t *lr = zfs_rangelock_enter(&zv->zv_rangelock,
|
||||
uio.uio_loffset, uio.uio_resid, RL_READER);
|
||||
|
||||
uint64_t volsize = zv->zv_volsize;
|
||||
|
||||
while (uio.uio_resid > 0 && uio.uio_loffset < volsize) {
|
||||
uint64_t bytes = MIN(uio.uio_resid, DMU_MAX_ACCESS >> 1);
|
||||
|
||||
@ -325,10 +479,11 @@ zvol_read(zv_request_t *zvr)
|
||||
|
||||
rw_exit(&zv->zv_suspend_lock);
|
||||
|
||||
if (acct)
|
||||
if (bio && acct) {
|
||||
blk_generic_end_io_acct(q, disk, READ, bio, start_time);
|
||||
}
|
||||
|
||||
BIO_END_IO(bio, -error);
|
||||
END_IO(zv, bio, rq, -error);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -339,52 +494,49 @@ zvol_read_task(void *arg)
|
||||
zv_request_task_free(task);
|
||||
}
|
||||
|
||||
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
|
||||
#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
|
||||
|
||||
/*
|
||||
* Process a BIO or request
|
||||
*
|
||||
* Either 'bio' or 'rq' should be set depending on if we are processing a
|
||||
* bio or a request (both should not be set).
|
||||
*
|
||||
* force_sync: Set to 0 to defer processing to a background taskq
|
||||
* Set to 1 to process data synchronously
|
||||
*/
|
||||
static void
|
||||
zvol_submit_bio(struct bio *bio)
|
||||
#else
|
||||
static blk_qc_t
|
||||
zvol_submit_bio(struct bio *bio)
|
||||
#endif
|
||||
#else
|
||||
static MAKE_REQUEST_FN_RET
|
||||
zvol_request(struct request_queue *q, struct bio *bio)
|
||||
#endif
|
||||
zvol_request_impl(zvol_state_t *zv, struct bio *bio, struct request *rq,
|
||||
boolean_t force_sync)
|
||||
{
|
||||
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
|
||||
#if defined(HAVE_BIO_BDEV_DISK)
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
#else
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
#endif
|
||||
#endif
|
||||
zvol_state_t *zv = q->queuedata;
|
||||
fstrans_cookie_t cookie = spl_fstrans_mark();
|
||||
uint64_t offset = BIO_BI_SECTOR(bio) << 9;
|
||||
uint64_t size = BIO_BI_SIZE(bio);
|
||||
int rw = bio_data_dir(bio);
|
||||
uint64_t offset = io_offset(bio, rq);
|
||||
uint64_t size = io_size(bio, rq);
|
||||
int rw = io_data_dir(bio, rq);
|
||||
|
||||
if (bio_has_data(bio) && offset + size > zv->zv_volsize) {
|
||||
printk(KERN_INFO
|
||||
"%s: bad access: offset=%llu, size=%lu\n",
|
||||
zv->zv_zso->zvo_disk->disk_name,
|
||||
(long long unsigned)offset,
|
||||
(long unsigned)size);
|
||||
|
||||
BIO_END_IO(bio, -SET_ERROR(EIO));
|
||||
goto out;
|
||||
}
|
||||
if (zvol_request_sync)
|
||||
force_sync = 1;
|
||||
|
||||
zv_request_t zvr = {
|
||||
.zv = zv,
|
||||
.bio = bio,
|
||||
.rq = rq,
|
||||
};
|
||||
|
||||
if (io_has_data(bio, rq) && offset + size > zv->zv_volsize) {
|
||||
printk(KERN_INFO "%s: bad access: offset=%llu, size=%lu\n",
|
||||
zv->zv_zso->zvo_disk->disk_name,
|
||||
(long long unsigned)offset,
|
||||
(long unsigned)size);
|
||||
|
||||
END_IO(zv, bio, rq, -SET_ERROR(EIO));
|
||||
goto out;
|
||||
}
|
||||
|
||||
zv_request_task_t *task;
|
||||
|
||||
if (rw == WRITE) {
|
||||
if (unlikely(zv->zv_flags & ZVOL_RDONLY)) {
|
||||
BIO_END_IO(bio, -SET_ERROR(EROFS));
|
||||
END_IO(zv, bio, rq, -SET_ERROR(EROFS));
|
||||
goto out;
|
||||
}
|
||||
|
||||
@ -421,7 +573,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
* i/o may be a ZIL write (via zil_commit()), or a read of an
|
||||
* indirect block, or a read of a data block (if this is a
|
||||
* partial-block write). We will indicate that the i/o is
|
||||
* complete by calling BIO_END_IO() from the taskq callback.
|
||||
* complete by calling END_IO() from the taskq callback.
|
||||
*
|
||||
* This design allows the calling thread to continue and
|
||||
* initiate more concurrent operations by calling
|
||||
@ -441,12 +593,12 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
* of one i/o at a time per zvol. However, an even better
|
||||
* design would be for zvol_request() to initiate the zio
|
||||
* directly, and then be notified by the zio_done callback,
|
||||
* which would call BIO_END_IO(). Unfortunately, the DMU/ZIL
|
||||
* which would call END_IO(). Unfortunately, the DMU/ZIL
|
||||
* interfaces lack this functionality (they block waiting for
|
||||
* the i/o to complete).
|
||||
*/
|
||||
if (bio_is_discard(bio) || bio_is_secure_erase(bio)) {
|
||||
if (zvol_request_sync) {
|
||||
if (io_is_discard(bio, rq) || io_is_secure_erase(bio, rq)) {
|
||||
if (force_sync) {
|
||||
zvol_discard(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
@ -454,7 +606,7 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
zvol_discard_task, task, 0, &task->ent);
|
||||
}
|
||||
} else {
|
||||
if (zvol_request_sync) {
|
||||
if (force_sync) {
|
||||
zvol_write(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
@ -469,14 +621,14 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
* data and require no additional handling.
|
||||
*/
|
||||
if (size == 0) {
|
||||
BIO_END_IO(bio, 0);
|
||||
END_IO(zv, bio, rq, 0);
|
||||
goto out;
|
||||
}
|
||||
|
||||
rw_enter(&zv->zv_suspend_lock, RW_READER);
|
||||
|
||||
/* See comment in WRITE case above. */
|
||||
if (zvol_request_sync) {
|
||||
if (force_sync) {
|
||||
zvol_read(&zvr);
|
||||
} else {
|
||||
task = zv_request_task_create(zvr);
|
||||
@ -487,8 +639,33 @@ zvol_request(struct request_queue *q, struct bio *bio)
|
||||
|
||||
out:
|
||||
spl_fstrans_unmark(cookie);
|
||||
#if (defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
|
||||
defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)) && \
|
||||
}
|
||||
|
||||
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
|
||||
#ifdef HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID
|
||||
static void
|
||||
zvol_submit_bio(struct bio *bio)
|
||||
#else
|
||||
static blk_qc_t
|
||||
zvol_submit_bio(struct bio *bio)
|
||||
#endif
|
||||
#else
|
||||
static MAKE_REQUEST_FN_RET
|
||||
zvol_request(struct request_queue *q, struct bio *bio)
|
||||
#endif
|
||||
{
|
||||
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
|
||||
#if defined(HAVE_BIO_BDEV_DISK)
|
||||
struct request_queue *q = bio->bi_bdev->bd_disk->queue;
|
||||
#else
|
||||
struct request_queue *q = bio->bi_disk->queue;
|
||||
#endif
|
||||
#endif
|
||||
zvol_state_t *zv = q->queuedata;
|
||||
|
||||
zvol_request_impl(zv, bio, NULL, 0);
|
||||
#if defined(HAVE_MAKE_REQUEST_FN_RET_QC) || \
|
||||
defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS) && \
|
||||
!defined(HAVE_BDEV_SUBMIT_BIO_RETURNS_VOID)
|
||||
return (BLK_QC_T_NONE);
|
||||
#endif
|
||||
@ -805,6 +982,27 @@ zvol_getgeo(struct block_device *bdev, struct hd_geometry *geo)
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Why have two separate block_device_operations structs?
|
||||
*
|
||||
* Normally we'd just have one, and assign 'submit_bio' as needed. However,
|
||||
* it's possible the user's kernel is built with CONSTIFY_PLUGIN, meaning we
|
||||
* can't just change submit_bio dynamically at runtime. So just create two
|
||||
* separate structs to get around this.
|
||||
*/
|
||||
static const struct block_device_operations zvol_ops_blk_mq = {
|
||||
.open = zvol_open,
|
||||
.release = zvol_release,
|
||||
.ioctl = zvol_ioctl,
|
||||
.compat_ioctl = zvol_compat_ioctl,
|
||||
.check_events = zvol_check_events,
|
||||
#ifdef HAVE_BLOCK_DEVICE_OPERATIONS_REVALIDATE_DISK
|
||||
.revalidate_disk = zvol_revalidate_disk,
|
||||
#endif
|
||||
.getgeo = zvol_getgeo,
|
||||
.owner = THIS_MODULE,
|
||||
};
|
||||
|
||||
static const struct block_device_operations zvol_ops = {
|
||||
.open = zvol_open,
|
||||
.release = zvol_release,
|
||||
@ -821,6 +1019,87 @@ static const struct block_device_operations zvol_ops = {
|
||||
#endif
|
||||
};
|
||||
|
||||
static int
|
||||
zvol_alloc_non_blk_mq(struct zvol_state_os *zso)
|
||||
{
|
||||
#if defined(HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS)
|
||||
#if defined(HAVE_BLK_ALLOC_DISK)
|
||||
zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (zso->zvo_disk == NULL)
|
||||
return (1);
|
||||
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
#else
|
||||
zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (zso->zvo_queue == NULL)
|
||||
return (1);
|
||||
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_cleanup_queue(zso->zvo_queue);
|
||||
return (1);
|
||||
}
|
||||
|
||||
zso->zvo_disk->queue = zso->zvo_queue;
|
||||
#endif /* HAVE_BLK_ALLOC_DISK */
|
||||
#else
|
||||
zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
|
||||
if (zso->zvo_queue == NULL)
|
||||
return (1);
|
||||
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_cleanup_queue(zso->zvo_queue);
|
||||
return (1);
|
||||
}
|
||||
|
||||
zso->zvo_disk->queue = zso->zvo_queue;
|
||||
#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
|
||||
return (0);
|
||||
|
||||
}
|
||||
|
||||
static int
|
||||
zvol_alloc_blk_mq(zvol_state_t *zv)
|
||||
{
|
||||
#ifdef HAVE_BLK_MQ
|
||||
struct zvol_state_os *zso = zv->zv_zso;
|
||||
|
||||
/* Allocate our blk-mq tag_set */
|
||||
if (zvol_blk_mq_alloc_tag_set(zv) != 0)
|
||||
return (1);
|
||||
|
||||
#if defined(HAVE_BLK_ALLOC_DISK)
|
||||
zso->zvo_disk = blk_mq_alloc_disk(&zso->tag_set, zv);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_mq_free_tag_set(&zso->tag_set);
|
||||
return (1);
|
||||
}
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
#else
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_cleanup_queue(zso->zvo_queue);
|
||||
blk_mq_free_tag_set(&zso->tag_set);
|
||||
return (1);
|
||||
}
|
||||
/* Allocate queue */
|
||||
zso->zvo_queue = blk_mq_init_queue(&zso->tag_set);
|
||||
if (IS_ERR(zso->zvo_queue)) {
|
||||
blk_mq_free_tag_set(&zso->tag_set);
|
||||
return (1);
|
||||
}
|
||||
|
||||
/* Our queue is now created, assign it to our disk */
|
||||
zso->zvo_disk->queue = zso->zvo_queue;
|
||||
|
||||
#endif
|
||||
#endif
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocate memory for a new zvol_state_t and setup the required
|
||||
* request queue and generic disk structures for the block device.
|
||||
@ -831,6 +1110,7 @@ zvol_alloc(dev_t dev, const char *name)
|
||||
zvol_state_t *zv;
|
||||
struct zvol_state_os *zso;
|
||||
uint64_t volmode;
|
||||
int ret;
|
||||
|
||||
if (dsl_prop_get_integer(name, "volmode", &volmode, NULL) != 0)
|
||||
return (NULL);
|
||||
@ -849,48 +1129,44 @@ zvol_alloc(dev_t dev, const char *name)
|
||||
list_link_init(&zv->zv_next);
|
||||
mutex_init(&zv->zv_state_lock, NULL, MUTEX_DEFAULT, NULL);
|
||||
|
||||
#ifdef HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS
|
||||
#ifdef HAVE_BLK_ALLOC_DISK
|
||||
zso->zvo_disk = blk_alloc_disk(NUMA_NO_NODE);
|
||||
if (zso->zvo_disk == NULL)
|
||||
goto out_kmem;
|
||||
#ifdef HAVE_BLK_MQ
|
||||
zv->zv_zso->use_blk_mq = zvol_use_blk_mq;
|
||||
#endif
|
||||
|
||||
zso->zvo_disk->minors = ZVOL_MINORS;
|
||||
zso->zvo_queue = zso->zvo_disk->queue;
|
||||
#else
|
||||
zso->zvo_queue = blk_alloc_queue(NUMA_NO_NODE);
|
||||
if (zso->zvo_queue == NULL)
|
||||
goto out_kmem;
|
||||
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_cleanup_queue(zso->zvo_queue);
|
||||
goto out_kmem;
|
||||
/*
|
||||
* The block layer has 3 interfaces for getting BIOs:
|
||||
*
|
||||
* 1. blk-mq request queues (new)
|
||||
* 2. submit_bio() (oldest)
|
||||
* 3. regular request queues (old).
|
||||
*
|
||||
* Each of those interfaces has two permutations:
|
||||
*
|
||||
* a) We have blk_alloc_disk()/blk_mq_alloc_disk(), which allocates
|
||||
* both the disk and its queue (5.14 kernel or newer)
|
||||
*
|
||||
* b) We don't have blk_*alloc_disk(), and have to allocate the
|
||||
* disk and the queue separately. (5.13 kernel or older)
|
||||
*/
|
||||
if (zv->zv_zso->use_blk_mq) {
|
||||
ret = zvol_alloc_blk_mq(zv);
|
||||
zso->zvo_disk->fops = &zvol_ops_blk_mq;
|
||||
} else {
|
||||
ret = zvol_alloc_non_blk_mq(zso);
|
||||
zso->zvo_disk->fops = &zvol_ops;
|
||||
}
|
||||
|
||||
zso->zvo_disk->queue = zso->zvo_queue;
|
||||
#endif /* HAVE_BLK_ALLOC_DISK */
|
||||
#else
|
||||
zso->zvo_queue = blk_generic_alloc_queue(zvol_request, NUMA_NO_NODE);
|
||||
if (zso->zvo_queue == NULL)
|
||||
if (ret != 0)
|
||||
goto out_kmem;
|
||||
|
||||
zso->zvo_disk = alloc_disk(ZVOL_MINORS);
|
||||
if (zso->zvo_disk == NULL) {
|
||||
blk_cleanup_queue(zso->zvo_queue);
|
||||
goto out_kmem;
|
||||
}
|
||||
|
||||
zso->zvo_disk->queue = zso->zvo_queue;
|
||||
#endif /* HAVE_SUBMIT_BIO_IN_BLOCK_DEVICE_OPERATIONS */
|
||||
|
||||
blk_queue_set_write_cache(zso->zvo_queue, B_TRUE, B_TRUE);
|
||||
|
||||
/* Limit read-ahead to a single page to prevent over-prefetching. */
|
||||
blk_queue_set_read_ahead(zso->zvo_queue, 1);
|
||||
|
||||
/* Disable write merging in favor of the ZIO pipeline. */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
|
||||
if (!zv->zv_zso->use_blk_mq) {
|
||||
/* Disable write merging in favor of the ZIO pipeline. */
|
||||
blk_queue_flag_set(QUEUE_FLAG_NOMERGES, zso->zvo_queue);
|
||||
}
|
||||
|
||||
/* Enable /proc/diskstats */
|
||||
blk_queue_flag_set(QUEUE_FLAG_IO_STAT, zso->zvo_queue);
|
||||
@ -918,7 +1194,6 @@ zvol_alloc(dev_t dev, const char *name)
|
||||
}
|
||||
|
||||
zso->zvo_disk->first_minor = (dev & MINORMASK);
|
||||
zso->zvo_disk->fops = &zvol_ops;
|
||||
zso->zvo_disk->private_data = zv;
|
||||
snprintf(zso->zvo_disk->disk_name, DISK_NAME_LEN, "%s%d",
|
||||
ZVOL_DEV_NAME, (dev & MINORMASK));
|
||||
@ -963,6 +1238,11 @@ zvol_os_free(zvol_state_t *zv)
|
||||
put_disk(zv->zv_zso->zvo_disk);
|
||||
#endif
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (zv->zv_zso->use_blk_mq)
|
||||
blk_mq_free_tag_set(&zv->zv_zso->tag_set);
|
||||
#endif
|
||||
|
||||
ida_simple_remove(&zvol_ida,
|
||||
MINOR(zv->zv_zso->zvo_dev) >> ZVOL_MINOR_BITS);
|
||||
|
||||
@ -1044,8 +1324,69 @@ zvol_os_create_minor(const char *name)
|
||||
|
||||
blk_queue_max_hw_sectors(zv->zv_zso->zvo_queue,
|
||||
(DMU_MAX_ACCESS / 4) >> 9);
|
||||
blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
|
||||
blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
|
||||
|
||||
if (zv->zv_zso->use_blk_mq) {
|
||||
/*
|
||||
* IO requests can be really big (1MB). When an IO request
|
||||
* comes in, it is passed off to zvol_read() or zvol_write()
|
||||
* in a new thread, where it is chunked up into 'volblocksize'
|
||||
* sized pieces and processed. So for example, if the request
|
||||
* is a 1MB write and your volblocksize is 128k, one zvol_write
|
||||
* thread will take that request and sequentially do ten 128k
|
||||
* IOs. This is due to the fact that the thread needs to lock
|
||||
* each volblocksize sized block. So you might be wondering:
|
||||
* "instead of passing the whole 1MB request to one thread,
|
||||
* why not pass ten individual 128k chunks to ten threads and
|
||||
* process the whole write in parallel?" The short answer is
|
||||
* that there's a sweet spot number of chunks that balances
|
||||
* the greater parallelism with the added overhead of more
|
||||
* threads. The sweet spot can be different depending on if you
|
||||
* have a read or write heavy workload. Writes typically want
|
||||
* high chunk counts while reads typically want lower ones. On
|
||||
* a test pool with 6 NVMe drives in a 3x 2-disk mirror
|
||||
* configuration, with volblocksize=8k, the sweet spot for good
|
||||
* sequential reads and writes was at 8 chunks.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Below we tell the kernel how big we want our requests
|
||||
* to be. You would think that blk_queue_io_opt() would be
|
||||
* used to do this since it is used to "set optimal request
|
||||
* size for the queue", but that doesn't seem to do
|
||||
* anything - the kernel still gives you huge requests
|
||||
* with tons of little PAGE_SIZE segments contained within it.
|
||||
*
|
||||
* Knowing that the kernel will just give you PAGE_SIZE segments
|
||||
* no matter what, you can say "ok, I want PAGE_SIZE byte
|
||||
* segments, and I want 'N' of them per request", where N is
|
||||
* the correct number of segments for the volblocksize and
|
||||
* number of chunks you want.
|
||||
*/
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (zvol_blk_mq_blocks_per_thread != 0) {
|
||||
unsigned int chunks;
|
||||
chunks = MIN(zvol_blk_mq_blocks_per_thread, UINT16_MAX);
|
||||
|
||||
blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
|
||||
PAGE_SIZE);
|
||||
blk_queue_max_segments(zv->zv_zso->zvo_queue,
|
||||
(zv->zv_volblocksize * chunks) / PAGE_SIZE);
|
||||
} else {
|
||||
/*
|
||||
* Special case: zvol_blk_mq_blocks_per_thread = 0
|
||||
* Max everything out.
|
||||
*/
|
||||
blk_queue_max_segments(zv->zv_zso->zvo_queue,
|
||||
UINT16_MAX);
|
||||
blk_queue_max_segment_size(zv->zv_zso->zvo_queue,
|
||||
UINT_MAX);
|
||||
}
|
||||
#endif
|
||||
} else {
|
||||
blk_queue_max_segments(zv->zv_zso->zvo_queue, UINT16_MAX);
|
||||
blk_queue_max_segment_size(zv->zv_zso->zvo_queue, UINT_MAX);
|
||||
}
|
||||
|
||||
blk_queue_physical_block_size(zv->zv_zso->zvo_queue,
|
||||
zv->zv_volblocksize);
|
||||
blk_queue_io_opt(zv->zv_zso->zvo_queue, zv->zv_volblocksize);
|
||||
@ -1167,19 +1508,54 @@ int
|
||||
zvol_init(void)
|
||||
{
|
||||
int error;
|
||||
int threads = MIN(MAX(zvol_threads, 1), 1024);
|
||||
|
||||
/*
|
||||
* zvol_threads is the module param the user passes in.
|
||||
*
|
||||
* zvol_actual_threads is what we use internally, since the user can
|
||||
* pass zvol_thread = 0 to mean "use all the CPUs" (the default).
|
||||
*/
|
||||
static unsigned int zvol_actual_threads;
|
||||
|
||||
if (zvol_threads == 0) {
|
||||
/*
|
||||
* See dde9380a1 for why 32 was chosen here. This should
|
||||
* probably be refined to be some multiple of the number
|
||||
* of CPUs.
|
||||
*/
|
||||
zvol_actual_threads = MAX(num_online_cpus(), 32);
|
||||
} else {
|
||||
zvol_actual_threads = MIN(MAX(zvol_threads, 1), 1024);
|
||||
}
|
||||
|
||||
error = register_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
if (error) {
|
||||
printk(KERN_INFO "ZFS: register_blkdev() failed %d\n", error);
|
||||
return (error);
|
||||
}
|
||||
zvol_taskq = taskq_create(ZVOL_DRIVER, threads, maxclsyspri,
|
||||
threads * 2, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
if (zvol_blk_mq_queue_depth == 0) {
|
||||
zvol_actual_blk_mq_queue_depth = BLKDEV_DEFAULT_RQ;
|
||||
} else {
|
||||
zvol_actual_blk_mq_queue_depth =
|
||||
MAX(zvol_blk_mq_queue_depth, BLKDEV_MIN_RQ);
|
||||
}
|
||||
|
||||
if (zvol_blk_mq_threads == 0) {
|
||||
zvol_blk_mq_actual_threads = num_online_cpus();
|
||||
} else {
|
||||
zvol_blk_mq_actual_threads = MIN(MAX(zvol_blk_mq_threads, 1),
|
||||
1024);
|
||||
}
|
||||
#endif
|
||||
zvol_taskq = taskq_create(ZVOL_DRIVER, zvol_actual_threads, maxclsyspri,
|
||||
zvol_actual_threads, INT_MAX, TASKQ_PREPOPULATE | TASKQ_DYNAMIC);
|
||||
if (zvol_taskq == NULL) {
|
||||
unregister_blkdev(zvol_major, ZVOL_DRIVER);
|
||||
return (-ENOMEM);
|
||||
}
|
||||
|
||||
zvol_init_impl();
|
||||
ida_init(&zvol_ida);
|
||||
return (0);
|
||||
@ -1202,7 +1578,8 @@ module_param(zvol_major, uint, 0444);
|
||||
MODULE_PARM_DESC(zvol_major, "Major number for zvol device");
|
||||
|
||||
module_param(zvol_threads, uint, 0444);
|
||||
MODULE_PARM_DESC(zvol_threads, "Max number of threads to handle I/O requests");
|
||||
MODULE_PARM_DESC(zvol_threads, "Number of threads to handle I/O requests. Set"
|
||||
"to 0 to use all active CPUs");
|
||||
|
||||
module_param(zvol_request_sync, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_request_sync, "Synchronously handle bio requests");
|
||||
@ -1215,4 +1592,17 @@ MODULE_PARM_DESC(zvol_prefetch_bytes, "Prefetch N bytes at zvol start+end");
|
||||
|
||||
module_param(zvol_volmode, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_volmode, "Default volmode property value");
|
||||
|
||||
#ifdef HAVE_BLK_MQ
|
||||
module_param(zvol_blk_mq_queue_depth, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_blk_mq_queue_depth, "Default blk-mq queue depth");
|
||||
|
||||
module_param(zvol_use_blk_mq, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_use_blk_mq, "Use the blk-mq API for zvols");
|
||||
|
||||
module_param(zvol_blk_mq_blocks_per_thread, uint, 0644);
|
||||
MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
|
||||
"Process volblocksize blocks per thread");
|
||||
#endif
|
||||
|
||||
/* END CSTYLED */
|
||||
|
@ -696,16 +696,15 @@ zpool_feature_init(void)
|
||||
ZFEATURE_FLAG_MOS, ZFEATURE_TYPE_BOOLEAN, NULL, sfeatures);
|
||||
|
||||
{
|
||||
|
||||
static const spa_feature_t zilsaxattr_deps[] = {
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURE_NONE
|
||||
};
|
||||
zfeature_register(SPA_FEATURE_ZILSAXATTR,
|
||||
"org.openzfs:zilsaxattr", "zilsaxattr",
|
||||
"Support for xattr=sa extended attribute logging in ZIL.",
|
||||
ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
|
||||
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
|
||||
static const spa_feature_t zilsaxattr_deps[] = {
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURE_NONE
|
||||
};
|
||||
zfeature_register(SPA_FEATURE_ZILSAXATTR,
|
||||
"org.openzfs:zilsaxattr", "zilsaxattr",
|
||||
"Support for xattr=sa extended attribute logging in ZIL.",
|
||||
ZFEATURE_FLAG_PER_DATASET | ZFEATURE_FLAG_READONLY_COMPAT,
|
||||
ZFEATURE_TYPE_BOOLEAN, zilsaxattr_deps, sfeatures);
|
||||
}
|
||||
|
||||
zfeature_register(SPA_FEATURE_HEAD_ERRLOG,
|
||||
@ -714,6 +713,18 @@ zpool_feature_init(void)
|
||||
ZFEATURE_FLAG_ACTIVATE_ON_ENABLE, ZFEATURE_TYPE_BOOLEAN, NULL,
|
||||
sfeatures);
|
||||
|
||||
{
|
||||
static const spa_feature_t blake3_deps[] = {
|
||||
SPA_FEATURE_EXTENSIBLE_DATASET,
|
||||
SPA_FEATURE_NONE
|
||||
};
|
||||
zfeature_register(SPA_FEATURE_BLAKE3,
|
||||
"org.openzfs:blake3", "blake3",
|
||||
"BLAKE3 hash algorithm.",
|
||||
ZFEATURE_FLAG_PER_DATASET, ZFEATURE_TYPE_BOOLEAN,
|
||||
blake3_deps, sfeatures);
|
||||
}
|
||||
|
||||
zfs_mod_list_supported_free(sfeatures);
|
||||
}
|
||||
|
||||
|
@ -84,6 +84,7 @@ zfs_prop_init(void)
|
||||
{ "sha512", ZIO_CHECKSUM_SHA512 },
|
||||
{ "skein", ZIO_CHECKSUM_SKEIN },
|
||||
{ "edonr", ZIO_CHECKSUM_EDONR },
|
||||
{ "blake3", ZIO_CHECKSUM_BLAKE3 },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -102,6 +103,9 @@ zfs_prop_init(void)
|
||||
ZIO_CHECKSUM_SKEIN | ZIO_CHECKSUM_VERIFY },
|
||||
{ "edonr,verify",
|
||||
ZIO_CHECKSUM_EDONR | ZIO_CHECKSUM_VERIFY },
|
||||
{ "blake3", ZIO_CHECKSUM_BLAKE3 },
|
||||
{ "blake3,verify",
|
||||
ZIO_CHECKSUM_BLAKE3 | ZIO_CHECKSUM_VERIFY },
|
||||
{ NULL }
|
||||
};
|
||||
|
||||
@ -394,12 +398,12 @@ zfs_prop_init(void)
|
||||
ZIO_CHECKSUM_DEFAULT, PROP_INHERIT, ZFS_TYPE_FILESYSTEM |
|
||||
ZFS_TYPE_VOLUME,
|
||||
"on | off | fletcher2 | fletcher4 | sha256 | sha512 | skein"
|
||||
" | edonr",
|
||||
" | edonr | blake3",
|
||||
"CHECKSUM", checksum_table, sfeatures);
|
||||
zprop_register_index(ZFS_PROP_DEDUP, "dedup", ZIO_CHECKSUM_OFF,
|
||||
PROP_INHERIT, ZFS_TYPE_FILESYSTEM | ZFS_TYPE_VOLUME,
|
||||
"on | off | verify | sha256[,verify] | sha512[,verify] | "
|
||||
"skein[,verify] | edonr,verify",
|
||||
"skein[,verify] | edonr,verify | blake3[,verify]",
|
||||
"DEDUP", dedup_table, sfeatures);
|
||||
zprop_register_index(ZFS_PROP_COMPRESSION, "compression",
|
||||
ZIO_COMPRESS_DEFAULT, PROP_INHERIT,
|
||||
|
117
sys/contrib/openzfs/module/zfs/blake3_zfs.c
Normal file
117
sys/contrib/openzfs/module/zfs/blake3_zfs.c
Normal file
@ -0,0 +1,117 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://opensource.org/licenses/CDDL-1.0.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright 2022 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/blake3.h>
|
||||
#include <sys/abd.h>
|
||||
|
||||
static int
|
||||
blake3_incremental(void *buf, size_t size, void *arg)
|
||||
{
|
||||
BLAKE3_CTX *ctx = arg;
|
||||
|
||||
Blake3_Update(ctx, buf, size);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Computes a native 256-bit BLAKE3 MAC checksum. Please note that this
|
||||
* function requires the presence of a ctx_template that should be allocated
|
||||
* using abd_checksum_blake3_tmpl_init.
|
||||
*/
|
||||
void
|
||||
abd_checksum_blake3_native(abd_t *abd, uint64_t size, const void *ctx_template,
|
||||
zio_cksum_t *zcp)
|
||||
{
|
||||
ASSERT(ctx_template != 0);
|
||||
|
||||
#if defined(_KERNEL)
|
||||
BLAKE3_CTX *ctx = blake3_per_cpu_ctx[CPU_SEQID_UNSTABLE];
|
||||
#else
|
||||
BLAKE3_CTX *ctx = kmem_alloc(sizeof (*ctx), KM_SLEEP);
|
||||
#endif
|
||||
|
||||
memcpy(ctx, ctx_template, sizeof (*ctx));
|
||||
(void) abd_iterate_func(abd, 0, size, blake3_incremental, ctx);
|
||||
Blake3_Final(ctx, (uint8_t *)zcp);
|
||||
|
||||
#if !defined(_KERNEL)
|
||||
memset(ctx, 0, sizeof (*ctx));
|
||||
kmem_free(ctx, sizeof (*ctx));
|
||||
#endif
|
||||
}
|
||||
|
||||
/*
|
||||
* Byteswapped version of abd_checksum_blake3_native. This just invokes
|
||||
* the native checksum function and byteswaps the resulting checksum (since
|
||||
* BLAKE3 is internally endian-insensitive).
|
||||
*/
|
||||
void
|
||||
abd_checksum_blake3_byteswap(abd_t *abd, uint64_t size,
|
||||
const void *ctx_template, zio_cksum_t *zcp)
|
||||
{
|
||||
zio_cksum_t tmp;
|
||||
|
||||
ASSERT(ctx_template != 0);
|
||||
|
||||
abd_checksum_blake3_native(abd, size, ctx_template, &tmp);
|
||||
zcp->zc_word[0] = BSWAP_64(tmp.zc_word[0]);
|
||||
zcp->zc_word[1] = BSWAP_64(tmp.zc_word[1]);
|
||||
zcp->zc_word[2] = BSWAP_64(tmp.zc_word[2]);
|
||||
zcp->zc_word[3] = BSWAP_64(tmp.zc_word[3]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Allocates a BLAKE3 MAC template suitable for using in BLAKE3 MAC checksum
|
||||
* computations and returns a pointer to it.
|
||||
*/
|
||||
void *
|
||||
abd_checksum_blake3_tmpl_init(const zio_cksum_salt_t *salt)
|
||||
{
|
||||
BLAKE3_CTX *ctx;
|
||||
|
||||
ASSERT(sizeof (salt->zcs_bytes) == 32);
|
||||
|
||||
/* init reference object */
|
||||
ctx = kmem_zalloc(sizeof (*ctx), KM_SLEEP);
|
||||
Blake3_InitKeyed(ctx, salt->zcs_bytes);
|
||||
|
||||
return (ctx);
|
||||
}
|
||||
|
||||
/*
|
||||
* Frees a BLAKE3 context template previously allocated using
|
||||
* zio_checksum_blake3_tmpl_init.
|
||||
*/
|
||||
void
|
||||
abd_checksum_blake3_tmpl_free(void *ctx_template)
|
||||
{
|
||||
BLAKE3_CTX *ctx = ctx_template;
|
||||
|
||||
memset(ctx, 0, sizeof (*ctx));
|
||||
kmem_free(ctx, sizeof (*ctx));
|
||||
}
|
@ -88,7 +88,7 @@ dsl_prop_get_dd(dsl_dir_t *dd, const char *propname,
|
||||
setpoint[0] = '\0';
|
||||
|
||||
prop = zfs_name_to_prop(propname);
|
||||
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
|
||||
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
|
||||
inheritstr = kmem_asprintf("%s%s", propname, ZPROP_INHERIT_SUFFIX);
|
||||
recvdstr = kmem_asprintf("%s%s", propname, ZPROP_RECVD_SUFFIX);
|
||||
|
||||
@ -168,7 +168,7 @@ dsl_prop_get_ds(dsl_dataset_t *ds, const char *propname,
|
||||
uint64_t zapobj;
|
||||
|
||||
ASSERT(dsl_pool_config_held(ds->ds_dir->dd_pool));
|
||||
inheritable = (prop == ZPROP_INVAL || zfs_prop_inheritable(prop));
|
||||
inheritable = (prop == ZPROP_USERPROP || zfs_prop_inheritable(prop));
|
||||
zapobj = dsl_dataset_phys(ds)->ds_props_obj;
|
||||
|
||||
if (zapobj != 0) {
|
||||
@ -1055,12 +1055,12 @@ dsl_prop_get_all_impl(objset_t *mos, uint64_t propobj,
|
||||
prop = zfs_name_to_prop(propname);
|
||||
|
||||
/* Skip non-inheritable properties. */
|
||||
if ((flags & DSL_PROP_GET_INHERITING) && prop != ZPROP_INVAL &&
|
||||
!zfs_prop_inheritable(prop))
|
||||
if ((flags & DSL_PROP_GET_INHERITING) &&
|
||||
prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
|
||||
continue;
|
||||
|
||||
/* Skip properties not valid for this type. */
|
||||
if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_INVAL &&
|
||||
if ((flags & DSL_PROP_GET_SNAPSHOT) && prop != ZPROP_USERPROP &&
|
||||
!zfs_prop_valid_for_type(prop, ZFS_TYPE_SNAPSHOT, B_FALSE))
|
||||
continue;
|
||||
|
||||
|
@ -280,6 +280,7 @@ typedef struct scan_io {
|
||||
struct dsl_scan_io_queue {
|
||||
dsl_scan_t *q_scn; /* associated dsl_scan_t */
|
||||
vdev_t *q_vd; /* top-level vdev that this queue represents */
|
||||
zio_t *q_zio; /* scn_zio_root child for waiting on IO */
|
||||
|
||||
/* trees used for sorting I/Os and extents of I/Os */
|
||||
range_tree_t *q_exts_by_addr;
|
||||
@ -1276,9 +1277,12 @@ dsl_scan_should_clear(dsl_scan_t *scn)
|
||||
mutex_enter(&tvd->vdev_scan_io_queue_lock);
|
||||
queue = tvd->vdev_scan_io_queue;
|
||||
if (queue != NULL) {
|
||||
/* # extents in exts_by_size = # in exts_by_addr */
|
||||
/*
|
||||
* # of extents in exts_by_size = # in exts_by_addr.
|
||||
* B-tree efficiency is ~75%, but can be as low as 50%.
|
||||
*/
|
||||
mused += zfs_btree_numnodes(&queue->q_exts_by_size) *
|
||||
sizeof (range_seg_gap_t) + queue->q_sio_memused;
|
||||
3 * sizeof (range_seg_gap_t) + queue->q_sio_memused;
|
||||
}
|
||||
mutex_exit(&tvd->vdev_scan_io_queue_lock);
|
||||
}
|
||||
@ -3033,15 +3037,19 @@ scan_io_queues_run_one(void *arg)
|
||||
dsl_scan_io_queue_t *queue = arg;
|
||||
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
||||
boolean_t suspended = B_FALSE;
|
||||
range_seg_t *rs = NULL;
|
||||
scan_io_t *sio = NULL;
|
||||
range_seg_t *rs;
|
||||
scan_io_t *sio;
|
||||
zio_t *zio;
|
||||
list_t sio_list;
|
||||
|
||||
ASSERT(queue->q_scn->scn_is_sorted);
|
||||
|
||||
list_create(&sio_list, sizeof (scan_io_t),
|
||||
offsetof(scan_io_t, sio_nodes.sio_list_node));
|
||||
zio = zio_null(queue->q_scn->scn_zio_root, queue->q_scn->scn_dp->dp_spa,
|
||||
NULL, NULL, NULL, ZIO_FLAG_CANFAIL);
|
||||
mutex_enter(q_lock);
|
||||
queue->q_zio = zio;
|
||||
|
||||
/* Calculate maximum in-flight bytes for this vdev. */
|
||||
queue->q_maxinflight_bytes = MAX(1, zfs_scan_vdev_limit *
|
||||
@ -3108,7 +3116,9 @@ scan_io_queues_run_one(void *arg)
|
||||
scan_io_queue_insert_impl(queue, sio);
|
||||
}
|
||||
|
||||
queue->q_zio = NULL;
|
||||
mutex_exit(q_lock);
|
||||
zio_nowait(zio);
|
||||
list_destroy(&sio_list);
|
||||
}
|
||||
|
||||
@ -4073,6 +4083,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||
dsl_scan_t *scn = dp->dp_scan;
|
||||
size_t size = BP_GET_PSIZE(bp);
|
||||
abd_t *data = abd_alloc_for_io(size, B_FALSE);
|
||||
zio_t *pio;
|
||||
|
||||
if (queue == NULL) {
|
||||
ASSERT3U(scn->scn_maxinflight_bytes, >, 0);
|
||||
@ -4081,6 +4092,7 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||
cv_wait(&spa->spa_scrub_io_cv, &spa->spa_scrub_lock);
|
||||
spa->spa_scrub_inflight += BP_GET_PSIZE(bp);
|
||||
mutex_exit(&spa->spa_scrub_lock);
|
||||
pio = scn->scn_zio_root;
|
||||
} else {
|
||||
kmutex_t *q_lock = &queue->q_vd->vdev_scan_io_queue_lock;
|
||||
|
||||
@ -4089,12 +4101,14 @@ scan_exec_io(dsl_pool_t *dp, const blkptr_t *bp, int zio_flags,
|
||||
while (queue->q_inflight_bytes >= queue->q_maxinflight_bytes)
|
||||
cv_wait(&queue->q_zio_cv, q_lock);
|
||||
queue->q_inflight_bytes += BP_GET_PSIZE(bp);
|
||||
pio = queue->q_zio;
|
||||
mutex_exit(q_lock);
|
||||
}
|
||||
|
||||
ASSERT(pio != NULL);
|
||||
count_block(scn, dp->dp_blkstats, bp);
|
||||
zio_nowait(zio_read(scn->scn_zio_root, spa, bp, data, size,
|
||||
dsl_scan_scrub_done, queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
||||
zio_nowait(zio_read(pio, spa, bp, data, size, dsl_scan_scrub_done,
|
||||
queue, ZIO_PRIORITY_SCRUB, zio_flags, zb));
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -30,6 +30,7 @@
|
||||
*/
|
||||
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_chksum.h>
|
||||
#include <sys/spa_impl.h>
|
||||
#include <sys/zio.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
@ -2417,6 +2418,7 @@ spa_init(spa_mode_t mode)
|
||||
vdev_raidz_math_init();
|
||||
vdev_file_init();
|
||||
zfs_prop_init();
|
||||
chksum_init();
|
||||
zpool_prop_init();
|
||||
zpool_feature_init();
|
||||
spa_config_load();
|
||||
@ -2438,6 +2440,7 @@ spa_fini(void)
|
||||
vdev_cache_stat_fini();
|
||||
vdev_mirror_stat_fini();
|
||||
vdev_raidz_math_fini();
|
||||
chksum_fini();
|
||||
zil_fini();
|
||||
dmu_fini();
|
||||
zio_fini();
|
||||
|
@ -5496,7 +5496,7 @@ vdev_props_set_sync(void *arg, dmu_tx_t *tx)
|
||||
}
|
||||
|
||||
switch (prop = vdev_name_to_prop(propname)) {
|
||||
case VDEV_PROP_USER:
|
||||
case VDEV_PROP_USERPROP:
|
||||
if (vdev_prop_user(propname)) {
|
||||
strval = fnvpair_value_string(elem);
|
||||
if (strlen(strval) == 0) {
|
||||
@ -5580,7 +5580,7 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
uint64_t intval = 0;
|
||||
char *strval = NULL;
|
||||
|
||||
if (prop == VDEV_PROP_USER && !vdev_prop_user(propname)) {
|
||||
if (prop == VDEV_PROP_USERPROP && !vdev_prop_user(propname)) {
|
||||
error = EINVAL;
|
||||
goto end;
|
||||
}
|
||||
@ -5937,7 +5937,7 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
case VDEV_PROP_COMMENT:
|
||||
/* Exists in the ZAP below */
|
||||
/* FALLTHRU */
|
||||
case VDEV_PROP_USER:
|
||||
case VDEV_PROP_USERPROP:
|
||||
/* User Properites */
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
|
||||
|
@ -325,7 +325,7 @@ zcp_synctask_inherit_prop_check(void *arg, dmu_tx_t *tx)
|
||||
zcp_inherit_prop_arg_t *args = arg;
|
||||
zfs_prop_t prop = zfs_name_to_prop(args->zipa_prop);
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (zfs_prop_user(args->zipa_prop))
|
||||
return (0);
|
||||
|
||||
|
323
sys/contrib/openzfs/module/zfs/zfs_chksum.c
Normal file
323
sys/contrib/openzfs/module/zfs/zfs_chksum.c
Normal file
@ -0,0 +1,323 @@
|
||||
/*
|
||||
* CDDL HEADER START
|
||||
*
|
||||
* The contents of this file are subject to the terms of the
|
||||
* Common Development and Distribution License (the "License").
|
||||
* You may not use this file except in compliance with the License.
|
||||
*
|
||||
* You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
|
||||
* or http://www.opensolaris.org/os/licensing.
|
||||
* See the License for the specific language governing permissions
|
||||
* and limitations under the License.
|
||||
*
|
||||
* When distributing Covered Code, include this CDDL HEADER in each
|
||||
* file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
||||
* If applicable, add the following below this CDDL HEADER, with the
|
||||
* fields enclosed by brackets "[]" replaced with your own identifying
|
||||
* information: Portions Copyright [yyyy] [name of copyright owner]
|
||||
*
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
|
||||
/*
|
||||
* Copyright (c) 2021 Tino Reichardt <milky-zfs@mcmilk.de>
|
||||
*/
|
||||
|
||||
#include <sys/types.h>
|
||||
#include <sys/spa.h>
|
||||
#include <sys/zio_checksum.h>
|
||||
#include <sys/zfs_context.h>
|
||||
#include <sys/zfs_chksum.h>
|
||||
|
||||
#include <sys/blake3.h>
|
||||
|
||||
static kstat_t *chksum_kstat = NULL;
|
||||
|
||||
typedef struct {
|
||||
const char *name;
|
||||
const char *impl;
|
||||
uint64_t bs1k;
|
||||
uint64_t bs4k;
|
||||
uint64_t bs16k;
|
||||
uint64_t bs64k;
|
||||
uint64_t bs256k;
|
||||
uint64_t bs1m;
|
||||
uint64_t bs4m;
|
||||
zio_cksum_salt_t salt;
|
||||
zio_checksum_t *(func);
|
||||
zio_checksum_tmpl_init_t *(init);
|
||||
zio_checksum_tmpl_free_t *(free);
|
||||
} chksum_stat_t;
|
||||
|
||||
static int chksum_stat_cnt = 0;
|
||||
static chksum_stat_t *chksum_stat_data = 0;
|
||||
|
||||
/*
|
||||
* i3-1005G1 test output:
|
||||
*
|
||||
* implementation 1k 4k 16k 64k 256k 1m 4m
|
||||
* fletcher-4 5421 15001 26468 32555 34720 32801 18847
|
||||
* edonr-generic 1196 1602 1761 1749 1762 1759 1751
|
||||
* skein-generic 546 591 608 615 619 612 616
|
||||
* sha256-generic 246 270 274 274 277 275 276
|
||||
* sha256-avx 262 296 304 307 307 307 306
|
||||
* sha256-sha-ni 769 1072 1172 1220 1219 1232 1228
|
||||
* sha256-openssl 240 300 316 314 304 285 276
|
||||
* sha512-generic 333 374 385 392 391 393 392
|
||||
* sha512-openssl 353 441 467 476 472 467 426
|
||||
* sha512-avx 362 444 473 475 479 476 478
|
||||
* sha512-avx2 394 500 530 538 543 545 542
|
||||
* blake3-generic 308 313 313 313 312 313 312
|
||||
* blake3-sse2 402 1289 1423 1446 1432 1458 1413
|
||||
* blake3-sse41 427 1470 1625 1704 1679 1607 1629
|
||||
* blake3-avx2 428 1920 3095 3343 3356 3318 3204
|
||||
* blake3-avx512 473 2687 4905 5836 5844 5643 5374
|
||||
*/
|
||||
static int
|
||||
chksum_stat_kstat_headers(char *buf, size_t size)
|
||||
{
|
||||
ssize_t off = 0;
|
||||
|
||||
off += snprintf(buf + off, size, "%-23s", "implementation");
|
||||
off += snprintf(buf + off, size - off, "%8s", "1k");
|
||||
off += snprintf(buf + off, size - off, "%8s", "4k");
|
||||
off += snprintf(buf + off, size - off, "%8s", "16k");
|
||||
off += snprintf(buf + off, size - off, "%8s", "64k");
|
||||
off += snprintf(buf + off, size - off, "%8s", "256k");
|
||||
off += snprintf(buf + off, size - off, "%8s", "1m");
|
||||
(void) snprintf(buf + off, size - off, "%8s\n", "4m");
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
chksum_stat_kstat_data(char *buf, size_t size, void *data)
|
||||
{
|
||||
chksum_stat_t *cs;
|
||||
ssize_t off = 0;
|
||||
char b[24];
|
||||
|
||||
cs = (chksum_stat_t *)data;
|
||||
snprintf(b, 23, "%s-%s", cs->name, cs->impl);
|
||||
off += snprintf(buf + off, size - off, "%-23s", b);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs1k);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs4k);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs16k);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs64k);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs256k);
|
||||
off += snprintf(buf + off, size - off, "%8llu",
|
||||
(u_longlong_t)cs->bs1m);
|
||||
(void) snprintf(buf + off, size - off, "%8llu\n",
|
||||
(u_longlong_t)cs->bs4m);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void *
|
||||
chksum_stat_kstat_addr(kstat_t *ksp, loff_t n)
|
||||
{
|
||||
if (n < chksum_stat_cnt)
|
||||
ksp->ks_private = (void *)(chksum_stat_data + n);
|
||||
else
|
||||
ksp->ks_private = NULL;
|
||||
|
||||
return (ksp->ks_private);
|
||||
}
|
||||
|
||||
static void
|
||||
chksum_run(chksum_stat_t *cs, abd_t *abd, void *ctx, int round,
|
||||
uint64_t *result)
|
||||
{
|
||||
hrtime_t start;
|
||||
uint64_t run_bw, run_time_ns, run_count = 0, size = 0;
|
||||
uint32_t l, loops = 0;
|
||||
zio_cksum_t zcp;
|
||||
|
||||
switch (round) {
|
||||
case 1: /* 1k */
|
||||
size = 1<<10; loops = 128; break;
|
||||
case 2: /* 2k */
|
||||
size = 1<<12; loops = 64; break;
|
||||
case 3: /* 4k */
|
||||
size = 1<<14; loops = 32; break;
|
||||
case 4: /* 16k */
|
||||
size = 1<<16; loops = 16; break;
|
||||
case 5: /* 256k */
|
||||
size = 1<<18; loops = 8; break;
|
||||
case 6: /* 1m */
|
||||
size = 1<<20; loops = 4; break;
|
||||
case 7: /* 4m */
|
||||
size = 1<<22; loops = 1; break;
|
||||
}
|
||||
|
||||
kpreempt_disable();
|
||||
start = gethrtime();
|
||||
do {
|
||||
for (l = 0; l < loops; l++, run_count++)
|
||||
cs->func(abd, size, ctx, &zcp);
|
||||
|
||||
run_time_ns = gethrtime() - start;
|
||||
} while (run_time_ns < MSEC2NSEC(1));
|
||||
kpreempt_enable();
|
||||
|
||||
run_bw = size * run_count * NANOSEC;
|
||||
run_bw /= run_time_ns; /* B/s */
|
||||
*result = run_bw/1024/1024; /* MiB/s */
|
||||
}
|
||||
|
||||
static void
|
||||
chksum_benchit(chksum_stat_t *cs)
|
||||
{
|
||||
abd_t *abd;
|
||||
void *ctx = 0;
|
||||
void *salt = &cs->salt.zcs_bytes;
|
||||
|
||||
/* allocate test memory via default abd interface */
|
||||
abd = abd_alloc_linear(1<<22, B_FALSE);
|
||||
memset(salt, 0, sizeof (cs->salt.zcs_bytes));
|
||||
if (cs->init) {
|
||||
ctx = cs->init(&cs->salt);
|
||||
}
|
||||
|
||||
chksum_run(cs, abd, ctx, 1, &cs->bs1k);
|
||||
chksum_run(cs, abd, ctx, 2, &cs->bs4k);
|
||||
chksum_run(cs, abd, ctx, 3, &cs->bs16k);
|
||||
chksum_run(cs, abd, ctx, 4, &cs->bs64k);
|
||||
chksum_run(cs, abd, ctx, 5, &cs->bs256k);
|
||||
chksum_run(cs, abd, ctx, 6, &cs->bs1m);
|
||||
chksum_run(cs, abd, ctx, 7, &cs->bs4m);
|
||||
|
||||
/* free up temp memory */
|
||||
if (cs->free) {
|
||||
cs->free(ctx);
|
||||
}
|
||||
abd_free(abd);
|
||||
}
|
||||
|
||||
/*
|
||||
* Initialize and benchmark all supported implementations.
|
||||
*/
|
||||
static void
|
||||
chksum_benchmark(void)
|
||||
{
|
||||
|
||||
#ifndef _KERNEL
|
||||
/* we need the benchmark only for the kernel module */
|
||||
return;
|
||||
#endif
|
||||
|
||||
chksum_stat_t *cs;
|
||||
int cbid = 0, id;
|
||||
uint64_t max = 0;
|
||||
|
||||
/* space for the benchmark times */
|
||||
chksum_stat_cnt = 4;
|
||||
chksum_stat_cnt += blake3_get_impl_count();
|
||||
chksum_stat_data = (chksum_stat_t *)kmem_zalloc(
|
||||
sizeof (chksum_stat_t) * chksum_stat_cnt, KM_SLEEP);
|
||||
|
||||
/* edonr */
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = abd_checksum_edonr_tmpl_init;
|
||||
cs->func = abd_checksum_edonr_native;
|
||||
cs->free = abd_checksum_edonr_tmpl_free;
|
||||
cs->name = "edonr";
|
||||
cs->impl = "generic";
|
||||
chksum_benchit(cs);
|
||||
|
||||
/* skein */
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = abd_checksum_skein_tmpl_init;
|
||||
cs->func = abd_checksum_skein_native;
|
||||
cs->free = abd_checksum_skein_tmpl_free;
|
||||
cs->name = "skein";
|
||||
cs->impl = "generic";
|
||||
chksum_benchit(cs);
|
||||
|
||||
/* sha256 */
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = 0;
|
||||
cs->func = abd_checksum_SHA256;
|
||||
cs->free = 0;
|
||||
cs->name = "sha256";
|
||||
cs->impl = "generic";
|
||||
chksum_benchit(cs);
|
||||
|
||||
/* sha512 */
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = 0;
|
||||
cs->func = abd_checksum_SHA512_native;
|
||||
cs->free = 0;
|
||||
cs->name = "sha512";
|
||||
cs->impl = "generic";
|
||||
chksum_benchit(cs);
|
||||
|
||||
/* blake3 */
|
||||
for (id = 0; id < blake3_get_impl_count(); id++) {
|
||||
blake3_set_impl_id(id);
|
||||
cs = &chksum_stat_data[cbid++];
|
||||
cs->init = abd_checksum_blake3_tmpl_init;
|
||||
cs->func = abd_checksum_blake3_native;
|
||||
cs->free = abd_checksum_blake3_tmpl_free;
|
||||
cs->name = "blake3";
|
||||
cs->impl = blake3_get_impl_name();
|
||||
chksum_benchit(cs);
|
||||
if (cs->bs256k > max) {
|
||||
max = cs->bs256k;
|
||||
blake3_set_impl_fastest(id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
chksum_init(void)
|
||||
{
|
||||
#ifdef _KERNEL
|
||||
blake3_per_cpu_ctx_init();
|
||||
#endif
|
||||
|
||||
/* Benchmark supported implementations */
|
||||
chksum_benchmark();
|
||||
|
||||
/* Install kstats for all implementations */
|
||||
chksum_kstat = kstat_create("zfs", 0, "chksum_bench", "misc",
|
||||
KSTAT_TYPE_RAW, 0, KSTAT_FLAG_VIRTUAL);
|
||||
|
||||
if (chksum_kstat != NULL) {
|
||||
chksum_kstat->ks_data = NULL;
|
||||
chksum_kstat->ks_ndata = UINT32_MAX;
|
||||
kstat_set_raw_ops(chksum_kstat,
|
||||
chksum_stat_kstat_headers,
|
||||
chksum_stat_kstat_data,
|
||||
chksum_stat_kstat_addr);
|
||||
kstat_install(chksum_kstat);
|
||||
}
|
||||
|
||||
/* setup implementations */
|
||||
blake3_setup_impl();
|
||||
}
|
||||
|
||||
void
|
||||
chksum_fini(void)
|
||||
{
|
||||
if (chksum_kstat != NULL) {
|
||||
kstat_delete(chksum_kstat);
|
||||
chksum_kstat = NULL;
|
||||
}
|
||||
|
||||
if (chksum_stat_cnt) {
|
||||
kmem_free(chksum_stat_data,
|
||||
sizeof (chksum_stat_t) * chksum_stat_cnt);
|
||||
chksum_stat_cnt = 0;
|
||||
chksum_stat_data = 0;
|
||||
}
|
||||
|
||||
#ifdef _KERNEL
|
||||
blake3_per_cpu_ctx_fini();
|
||||
#endif
|
||||
}
|
@ -1104,7 +1104,7 @@ zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
|
||||
(void) innvl;
|
||||
zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (!zfs_prop_user(zc->zc_value))
|
||||
return (SET_ERROR(EINVAL));
|
||||
return (zfs_secpolicy_write_perms(zc->zc_name,
|
||||
@ -2406,7 +2406,7 @@ zfs_prop_set_special(const char *dsname, zprop_source_t source,
|
||||
const char *strval = NULL;
|
||||
int err = -1;
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (zfs_prop_userquota(propname))
|
||||
return (zfs_prop_set_userquota(dsname, pair));
|
||||
return (-1);
|
||||
@ -2577,7 +2577,7 @@ retry:
|
||||
/* inherited properties are expected to be booleans */
|
||||
if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
|
||||
err = SET_ERROR(EINVAL);
|
||||
} else if (err == 0 && prop == ZPROP_INVAL) {
|
||||
} else if (err == 0 && prop == ZPROP_USERPROP) {
|
||||
if (zfs_prop_user(propname)) {
|
||||
if (nvpair_type(propval) != DATA_TYPE_STRING)
|
||||
err = SET_ERROR(EINVAL);
|
||||
@ -2853,11 +2853,11 @@ zfs_ioc_inherit_prop(zfs_cmd_t *zc)
|
||||
* and reservation to the received or default values even though
|
||||
* they are not considered inheritable.
|
||||
*/
|
||||
if (prop != ZPROP_INVAL && !zfs_prop_inheritable(prop))
|
||||
if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
|
||||
return (SET_ERROR(EINVAL));
|
||||
}
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (!zfs_prop_user(propname))
|
||||
return (SET_ERROR(EINVAL));
|
||||
|
||||
@ -4488,7 +4488,7 @@ zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
|
||||
uint64_t intval, compval;
|
||||
int err;
|
||||
|
||||
if (prop == ZPROP_INVAL) {
|
||||
if (prop == ZPROP_USERPROP) {
|
||||
if (zfs_prop_user(propname)) {
|
||||
if ((err = zfs_secpolicy_write_perms(dsname,
|
||||
ZFS_DELEG_PERM_USERPROP, cr)))
|
||||
@ -5034,7 +5034,7 @@ zfs_ioc_recv_impl(char *tofs, char *tosnap, char *origin, nvlist_t *recvprops,
|
||||
/* -x property */
|
||||
const char *name = nvpair_name(nvp);
|
||||
zfs_prop_t prop = zfs_name_to_prop(name);
|
||||
if (prop != ZPROP_INVAL) {
|
||||
if (prop != ZPROP_USERPROP) {
|
||||
if (!zfs_prop_inheritable(prop))
|
||||
continue;
|
||||
} else if (!zfs_prop_user(name))
|
||||
|
@ -195,6 +195,10 @@ zio_checksum_info_t zio_checksum_table[ZIO_CHECKSUM_FUNCTIONS] = {
|
||||
abd_checksum_edonr_tmpl_init, abd_checksum_edonr_tmpl_free,
|
||||
ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_SALTED |
|
||||
ZCHECKSUM_FLAG_NOPWRITE, "edonr"},
|
||||
{{abd_checksum_blake3_native, abd_checksum_blake3_byteswap},
|
||||
abd_checksum_blake3_tmpl_init, abd_checksum_blake3_tmpl_free,
|
||||
ZCHECKSUM_FLAG_METADATA | ZCHECKSUM_FLAG_DEDUP |
|
||||
ZCHECKSUM_FLAG_SALTED | ZCHECKSUM_FLAG_NOPWRITE, "blake3"},
|
||||
};
|
||||
|
||||
/*
|
||||
@ -207,6 +211,8 @@ zio_checksum_to_feature(enum zio_checksum cksum)
|
||||
VERIFY((cksum & ~ZIO_CHECKSUM_MASK) == 0);
|
||||
|
||||
switch (cksum) {
|
||||
case ZIO_CHECKSUM_BLAKE3:
|
||||
return (SPA_FEATURE_BLAKE3);
|
||||
case ZIO_CHECKSUM_SHA512:
|
||||
return (SPA_FEATURE_SHA512);
|
||||
case ZIO_CHECKSUM_SKEIN:
|
||||
|
@ -113,8 +113,8 @@ tests = ['tst.destroy_fs', 'tst.destroy_snap', 'tst.get_count_and_limit',
|
||||
tags = ['functional', 'channel_program', 'synctask_core']
|
||||
|
||||
[tests/functional/checksum]
|
||||
tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'filetest_001_pos',
|
||||
'filetest_002_pos']
|
||||
tests = ['run_edonr_test', 'run_sha2_test', 'run_skein_test', 'run_blake3_test',
|
||||
'filetest_001_pos', 'filetest_002_pos']
|
||||
tags = ['functional', 'checksum']
|
||||
|
||||
[tests/functional/clean_mirror]
|
||||
@ -937,9 +937,13 @@ tags = ['functional', 'zvol', 'zvol_cli']
|
||||
|
||||
[tests/functional/zvol/zvol_misc]
|
||||
tests = ['zvol_misc_002_pos', 'zvol_misc_hierarchy', 'zvol_misc_rename_inuse',
|
||||
'zvol_misc_snapdev', 'zvol_misc_volmode', 'zvol_misc_zil']
|
||||
'zvol_misc_snapdev', 'zvol_misc_trim', 'zvol_misc_volmode', 'zvol_misc_zil']
|
||||
tags = ['functional', 'zvol', 'zvol_misc']
|
||||
|
||||
[tests/functional/zvol/zvol_stress]
|
||||
tests = ['zvol_stress']
|
||||
tags = ['functional', 'zvol', 'zvol_stress']
|
||||
|
||||
[tests/functional/zvol/zvol_swap]
|
||||
tests = ['zvol_swap_001_pos', 'zvol_swap_002_pos', 'zvol_swap_004_pos']
|
||||
tags = ['functional', 'zvol', 'zvol_swap']
|
||||
|
Some files were not shown because too many files have changed in this diff Show More
Loading…
x
Reference in New Issue
Block a user