diff --git a/sys/contrib/openzfs/cmd/zdb/zdb.c b/sys/contrib/openzfs/cmd/zdb/zdb.c index b04b220c768e..d239da67613c 100644 --- a/sys/contrib/openzfs/cmd/zdb/zdb.c +++ b/sys/contrib/openzfs/cmd/zdb/zdb.c @@ -2377,7 +2377,8 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp, (void) snprintf(blkbuf + strlen(blkbuf), buflen - strlen(blkbuf), " %s", "FREE"); (void) snprintf(blkbuf + strlen(blkbuf), - buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", + buflen - strlen(blkbuf), + " cksum=%016llx:%016llx:%016llx:%016llx", (u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[1], (u_longlong_t)bp->blk_cksum.zc_word[2], @@ -7509,6 +7510,19 @@ mos_leak_log_spacemaps(spa_t *spa) mos_obj_refd(sls->sls_sm_obj); } +static void +errorlog_count_refd(objset_t *mos, uint64_t errlog) +{ + zap_cursor_t zc; + zap_attribute_t za; + for (zap_cursor_init(&zc, mos, errlog); + zap_cursor_retrieve(&zc, &za) == 0; + zap_cursor_advance(&zc)) { + mos_obj_refd(za.za_first_integer); + } + zap_cursor_fini(&zc); +} + static int dump_mos_leaks(spa_t *spa) { @@ -7529,6 +7543,12 @@ dump_mos_leaks(spa_t *spa) mos_obj_refd(spa->spa_history); mos_obj_refd(spa->spa_errlog_last); mos_obj_refd(spa->spa_errlog_scrub); + + if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) { + errorlog_count_refd(mos, spa->spa_errlog_last); + errorlog_count_refd(mos, spa->spa_errlog_scrub); + } + mos_obj_refd(spa->spa_all_vdev_zaps); mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); @@ -8364,7 +8384,9 @@ zdb_read_block(char *thing, spa_t *spa) DVA_GET_OFFSET(&bp->blk_dva[0]); ck_zio->io_bp = bp; zio_checksum_compute(ck_zio, ck, pabd, lsize); - printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", + printf( + "%12s\t" + "cksum=%016llx:%016llx:%016llx:%016llx\n", zio_checksum_table[ck].ci_name, (u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[1], diff --git a/sys/contrib/openzfs/cmd/zfs/zfs_main.c b/sys/contrib/openzfs/cmd/zfs/zfs_main.c index 5880343a92f3..b1b00738569b 100644 --- a/sys/contrib/openzfs/cmd/zfs/zfs_main.c +++ b/sys/contrib/openzfs/cmd/zfs/zfs_main.c @@ -4532,7 +4532,7 @@ zfs_do_send(int argc, char **argv) } } - if (flags.parsable && flags.verbosity == 0) + if ((flags.parsable || flags.progressastitle) && flags.verbosity == 0) flags.verbosity = 1; if (excludes.count > 0 && !flags.replicate) { @@ -8672,7 +8672,6 @@ main(int argc, char **argv) int i = 0; const char *cmdname; char **newargv; - extern char **environ; (void) setlocale(LC_ALL, ""); (void) setlocale(LC_NUMERIC, "C"); diff --git a/sys/contrib/openzfs/cmd/zpool/zpool_main.c b/sys/contrib/openzfs/cmd/zpool/zpool_main.c index 93d6a18981cb..efb2d10e591b 100644 --- a/sys/contrib/openzfs/cmd/zpool/zpool_main.c +++ b/sys/contrib/openzfs/cmd/zpool/zpool_main.c @@ -7524,19 +7524,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); - assert(ps->pss_func == POOL_SCAN_SCRUB || - ps->pss_func == POOL_SCAN_RESILVER); + int is_resilver = ps->pss_func == POOL_SCAN_RESILVER; + int is_scrub = ps->pss_func == POOL_SCAN_SCRUB; + assert(is_resilver || is_scrub); /* Scan is finished or canceled. */ if (ps->pss_state == DSS_FINISHED) { secs_to_dhms(end - start, time_buf); - if (ps->pss_func == POOL_SCAN_SCRUB) { + if (is_scrub) { (void) printf(gettext("scrub repaired %s " "in %s with %llu errors on %s"), processed_buf, time_buf, (u_longlong_t)ps->pss_errors, ctime(&end)); - } else if (ps->pss_func == POOL_SCAN_RESILVER) { + } else if (is_resilver) { (void) printf(gettext("resilvered %s " "in %s with %llu errors on %s"), processed_buf, time_buf, (u_longlong_t)ps->pss_errors, @@ -7544,10 +7545,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) } return; } else if (ps->pss_state == DSS_CANCELED) { - if (ps->pss_func == POOL_SCAN_SCRUB) { + if (is_scrub) { (void) printf(gettext("scrub canceled on %s"), ctime(&end)); - } else if (ps->pss_func == POOL_SCAN_RESILVER) { + } else if (is_resilver) { (void) printf(gettext("resilver canceled on %s"), ctime(&end)); } @@ -7557,7 +7558,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) assert(ps->pss_state == DSS_SCANNING); /* Scan is in progress. Resilvers can't be paused. */ - if (ps->pss_func == POOL_SCAN_SCRUB) { + if (is_scrub) { if (pause == 0) { (void) printf(gettext("scrub in progress since %s"), ctime(&start)); @@ -7567,7 +7568,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) (void) printf(gettext("\tscrub started on %s"), ctime(&start)); } - } else if (ps->pss_func == POOL_SCAN_RESILVER) { + } else if (is_resilver) { (void) printf(gettext("resilver in progress since %s"), ctime(&start)); } @@ -7609,17 +7610,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps) scanned_buf, issued_buf, total_buf); } - if (ps->pss_func == POOL_SCAN_RESILVER) { + if (is_resilver) { (void) printf(gettext("\t%s resilvered, %.2f%% done"), processed_buf, 100 * fraction_done); - } else if (ps->pss_func == POOL_SCAN_SCRUB) { + } else if (is_scrub) { (void) printf(gettext("\t%s repaired, %.2f%% done"), processed_buf, 100 * fraction_done); } if (pause == 0) { + /* + * Only provide an estimate iff: + * 1) the time remaining is valid, and + * 2) the issue rate exceeds 10 MB/s, and + * 3) it's either: + * a) a resilver which has started repairs, or + * b) a scrub which has entered the issue phase. + */ if (total_secs_left != UINT64_MAX && - issue_rate >= 10 * 1024 * 1024) { + issue_rate >= 10 * 1024 * 1024 && + ((is_resilver && ps->pss_processed > 0) || + (is_scrub && issued > 0))) { (void) printf(gettext(", %s to go\n"), time_buf); } else { (void) printf(gettext(", no estimated " diff --git a/sys/contrib/openzfs/config/kernel-filemap.m4 b/sys/contrib/openzfs/config/kernel-filemap.m4 new file mode 100644 index 000000000000..745928168f92 --- /dev/null +++ b/sys/contrib/openzfs/config/kernel-filemap.m4 @@ -0,0 +1,26 @@ +dnl # +dnl # filemap_range_has_page was not available till 4.13 +dnl # +AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [ + ZFS_LINUX_TEST_SRC([filemap_range_has_page], [ + #include + ],[ + struct address_space *mapping = NULL; + loff_t lstart = 0; + loff_t lend = 0; + bool ret __attribute__ ((unused)); + + ret = filemap_range_has_page(mapping, lstart, lend); + ]) +]) + +AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [ + AC_MSG_CHECKING([whether filemap_range_has_page() is available]) + ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [ + AC_MSG_RESULT(yes) + AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1, + [filemap_range_has_page() is available]) + ],[ + AC_MSG_RESULT(no) + ]) +]) diff --git a/sys/contrib/openzfs/config/kernel.m4 b/sys/contrib/openzfs/config/kernel.m4 index 353988e9c867..121d73ef641a 100644 --- a/sys/contrib/openzfs/config/kernel.m4 +++ b/sys/contrib/openzfs/config/kernel.m4 @@ -150,6 +150,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [ ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IATTR_VFSID + ZFS_AC_KERNEL_SRC_FILEMAP AC_MSG_CHECKING([for available kernel interfaces]) ZFS_LINUX_TEST_COMPILE_ALL([kabi]) @@ -273,6 +274,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [ ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IATTR_VFSID + ZFS_AC_KERNEL_FILEMAP ]) dnl # diff --git a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs index c724f0c2cf57..d1a99e4a87ba 100644 --- a/sys/contrib/openzfs/contrib/initramfs/scripts/zfs +++ b/sys/contrib/openzfs/contrib/initramfs/scripts/zfs @@ -272,30 +272,46 @@ import_pool() # with more logging etc. load_module_initrd() { - [ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY" + ZFS_INITRD_PRE_MOUNTROOT_SLEEP=${ROOTDELAY:-0} - if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null - then - if [ "$quiet" != "y" ]; then - zfs_log_begin_msg "Sleeping for" \ - "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP seconds..." + if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then + [ "$quiet" != "y" ] && zfs_log_begin_msg "Delaying for up to '${ZFS_INITRD_PRE_MOUNTROOT_SLEEP}' seconds." + fi + + START=$(/bin/date -u +%s) + END=$((START+ZFS_INITRD_PRE_MOUNTROOT_SLEEP)) + while true; do + + # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. + if command -v wait_for_udev > /dev/null 2>&1 ; then + wait_for_udev 10 + elif command -v wait_for_dev > /dev/null 2>&1 ; then + wait_for_dev fi - sleep "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" + + # + # zpool import refuse to import without a valid + # /proc/self/mounts + # + [ ! -f /proc/self/mounts ] && mount proc /proc + + # Load the module + if load_module "zfs"; then + ret=0 + break + else + ret=1 + fi + + [ "$(/bin/date -u +%s)" -gt "$END" ] && break + sleep 1 + + done + if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then [ "$quiet" != "y" ] && zfs_log_end_msg fi - # Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. - if command -v wait_for_udev > /dev/null 2>&1 ; then - wait_for_udev 10 - elif command -v wait_for_dev > /dev/null 2>&1 ; then - wait_for_dev - fi - - # zpool import refuse to import without a valid /proc/self/mounts - [ ! -f /proc/self/mounts ] && mount proc /proc - - # Load the module - load_module "zfs" || return 1 + [ "$ret" -ne 0 ] && return 1 if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null then @@ -343,9 +359,11 @@ mount_fs() # isn't the root fs. return 0 fi - ZFS_CMD="mount.zfs" # Last hail-mary: Hope 'rootmnt' is set! mountpoint="" + if [ "$mountpoint" = "legacy" ]; then + ZFS_CMD="mount.zfs" + fi else mountpoint="$mountpoint1" fi diff --git a/sys/contrib/openzfs/include/libzutil.h b/sys/contrib/openzfs/include/libzutil.h index 4d4bddaad5f3..948ac08cd772 100644 --- a/sys/contrib/openzfs/include/libzutil.h +++ b/sys/contrib/openzfs/include/libzutil.h @@ -183,6 +183,7 @@ _LIBZUTIL_H int printf_color(const char *color, const char *format, ...); _LIBZUTIL_H const char *zfs_basename(const char *path); _LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path); #ifdef __linux__ +extern char **environ; _LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]); _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); #else diff --git a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h index 058d600007af..1ebfd8350661 100644 --- a/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h +++ b/sys/contrib/openzfs/include/os/freebsd/spl/sys/ia32/asm_linkage.h @@ -36,7 +36,7 @@ #define ENDBR #define SECTION_TEXT .text -#define SECTION_STATIC .data +#define SECTION_STATIC .rodata #ifdef __cplusplus extern "C" { diff --git a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h index 41a5bb218c12..8cde33dbcbbb 100644 --- a/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h +++ b/sys/contrib/openzfs/include/os/freebsd/zfs/sys/zfs_znode_impl.h @@ -116,7 +116,8 @@ typedef struct zfs_soft_state { #define Z_ISLNK(type) ((type) == VLNK) #define Z_ISDIR(type) ((type) == VDIR) -#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp)) +#define zn_has_cached_data(zp, start, end) \ + vn_has_cached_data(ZTOV(zp)) #define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync) #define zn_rlimit_fsize(zp, uio) \ vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio)) diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h index 2c734322267a..15dc77edafac 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/trace_acl.h @@ -62,7 +62,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __field(uint32_t, z_async_writes_cnt) __field(mode_t, z_mode) __field(boolean_t, z_is_sa) - __field(boolean_t, z_is_mapped) __field(boolean_t, z_is_ctldir) __field(uint32_t, i_uid) @@ -96,7 +95,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_async_writes_cnt = zn->z_async_writes_cnt; __entry->z_mode = zn->z_mode; __entry->z_is_sa = zn->z_is_sa; - __entry->z_is_mapped = zn->z_is_mapped; __entry->z_is_ctldir = zn->z_is_ctldir; __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid); @@ -119,7 +117,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class, "zn_prefetch %u blksz %u seq %u " "mapcnt %llu size %llu pflags %llu " "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " - "mode 0x%x is_sa %d is_mapped %d is_ctldir %d " + "mode 0x%x is_sa %d is_ctldir %d " "inode { uid %u gid %u ino %lu nlink %u size %lli " "blkbits %u bytes %u mode 0x%x generation %x } } " "ace { type %u flags %u access_mask %u } mask_matched %u", @@ -128,9 +126,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class, __entry->z_seq, __entry->z_mapcnt, __entry->z_size, __entry->z_pflags, __entry->z_sync_cnt, __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, - __entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped, - __entry->z_is_ctldir, __entry->i_uid, - __entry->i_gid, __entry->i_ino, __entry->i_nlink, + __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir, + __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink, __entry->i_size, __entry->i_blkbits, __entry->i_bytes, __entry->i_mode, __entry->i_generation, __entry->z_type, __entry->z_flags, __entry->z_access_mask, diff --git a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h index 52568781011f..81607ef2a25e 100644 --- a/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h +++ b/sys/contrib/openzfs/include/os/linux/zfs/sys/zfs_znode_impl.h @@ -47,9 +47,16 @@ extern "C" { #endif +#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) #define ZNODE_OS_FIELDS \ inode_timespec_t z_btime; /* creation/birth time (cached) */ \ struct inode z_inode; +#else +#define ZNODE_OS_FIELDS \ + inode_timespec_t z_btime; /* creation/birth time (cached) */ \ + struct inode z_inode; \ + boolean_t z_is_mapped; /* we are mmap'ed */ +#endif /* * Convert between znode pointers and inode pointers @@ -70,7 +77,14 @@ extern "C" { #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) #define Z_ISDIR(type) S_ISDIR(type) -#define zn_has_cached_data(zp) ((zp)->z_is_mapped) +#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE) +#define zn_has_cached_data(zp, start, end) \ + filemap_range_has_page(ZTOI(zp)->i_mapping, start, end) +#else +#define zn_has_cached_data(zp, start, end) \ + ((zp)->z_is_mapped) +#endif + #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) #define zn_rlimit_fsize(zp, uio) (0) diff --git a/sys/contrib/openzfs/include/sys/bpobj.h b/sys/contrib/openzfs/include/sys/bpobj.h index 84f0ee76c44e..f3384f526454 100644 --- a/sys/contrib/openzfs/include/sys/bpobj.h +++ b/sys/contrib/openzfs/include/sys/bpobj.h @@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func, void *arg, int64_t start); void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx); +void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj); void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx); diff --git a/sys/contrib/openzfs/include/sys/dsl_dataset.h b/sys/contrib/openzfs/include/sys/dsl_dataset.h index 3450527af7e0..b0d8c7994c07 100644 --- a/sys/contrib/openzfs/include/sys/dsl_dataset.h +++ b/sys/contrib/openzfs/include/sys/dsl_dataset.h @@ -372,6 +372,7 @@ int dsl_dataset_rename_snapshot(const char *fsname, const char *oldsnapname, const char *newsnapname, boolean_t recursive); int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, minor_t cleanup_minor, const char *htag); +boolean_t zfeature_active(spa_feature_t f, void *arg); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); diff --git a/sys/contrib/openzfs/include/sys/spa.h b/sys/contrib/openzfs/include/sys/spa.h index 500eb3491a99..c9d03bf645a9 100644 --- a/sys/contrib/openzfs/include/sys/spa.h +++ b/sys/contrib/openzfs/include/sys/spa.h @@ -678,7 +678,7 @@ typedef struct blkptr { len += func(buf + len, size - len, \ "[L%llu %s] %s %s %s %s %s %s %s%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ - "cksum=%llx:%llx:%llx:%llx", \ + "cksum=%016llx:%016llx:%016llx:%016llx", \ (u_longlong_t)BP_GET_LEVEL(bp), \ type, \ checksum, \ diff --git a/sys/contrib/openzfs/include/sys/zfs_znode.h b/sys/contrib/openzfs/include/sys/zfs_znode.h index de38f56dc32d..fcee55b0199d 100644 --- a/sys/contrib/openzfs/include/sys/zfs_znode.h +++ b/sys/contrib/openzfs/include/sys/zfs_znode.h @@ -188,7 +188,6 @@ typedef struct znode { boolean_t z_atime_dirty; /* atime needs to be synced */ boolean_t z_zn_prefetch; /* Prefetch znodes? */ boolean_t z_is_sa; /* are we native sa? */ - boolean_t z_is_mapped; /* are we mmap'ed */ boolean_t z_is_ctldir; /* are we .zfs entry */ boolean_t z_suspended; /* extra ref from a suspend? */ uint_t z_blksz; /* block size in bytes */ diff --git a/sys/contrib/openzfs/lib/libshare/nfs.c b/sys/contrib/openzfs/lib/libshare/nfs.c index 118ad7ef2209..3962c87453d4 100644 --- a/sys/contrib/openzfs/lib/libshare/nfs.c +++ b/sys/contrib/openzfs/lib/libshare/nfs.c @@ -97,7 +97,7 @@ nfs_init_tmpfile(const char *prefix, const char *mdir, struct tmpfile *tmpf) } strlcpy(tmpf->name, prefix, sizeof (tmpf->name)); - strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name) - strlen(prefix)); + strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name)); int fd = mkostemp(tmpf->name, O_CLOEXEC); if (fd == -1) { diff --git a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h index 9964f183cc68..08c73037990f 100644 --- a/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h +++ b/sys/contrib/openzfs/lib/libspl/include/os/freebsd/sys/ia32/asm_linkage.h @@ -40,7 +40,7 @@ #define ENDBR #define SECTION_TEXT .text -#define SECTION_STATIC .data +#define SECTION_STATIC .rodata #ifdef __cplusplus extern "C" { diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c index 57737bc6c01a..8612e082ba34 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_mount.c @@ -1422,10 +1422,10 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force) * Walk through and first unshare everything. */ for (i = 0; i < used; i++) { - for (enum sa_protocol i = 0; i < SA_PROTOCOL_COUNT; ++i) { - if (sa_is_shared(sets[i].mountpoint, i) && + for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) { + if (sa_is_shared(sets[i].mountpoint, p) && unshare_one(hdl, sets[i].mountpoint, - sets[i].mountpoint, i) != 0) + sets[i].mountpoint, p) != 0) goto out; } } diff --git a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c index 038613a1fcfa..66a22e333663 100644 --- a/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c +++ b/sys/contrib/openzfs/lib/libzfs/libzfs_sendrecv.c @@ -84,6 +84,7 @@ typedef struct progress_arg { boolean_t pa_estimate; int pa_verbosity; boolean_t pa_astitle; + boolean_t pa_progress; uint64_t pa_size; } progress_arg_t; @@ -940,7 +941,7 @@ send_progress_thread(void *arg) struct tm tm; int err; - if (!pa->pa_parsable && pa->pa_verbosity != 0) { + if (!pa->pa_parsable && pa->pa_progress) { (void) fprintf(stderr, "TIME %s %sSNAPSHOT %s\n", pa->pa_estimate ? "BYTES" : " SENT", @@ -990,7 +991,7 @@ send_progress_thread(void *arg) (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", tm.tm_hour, tm.tm_min, tm.tm_sec, (u_longlong_t)bytes, zhp->zfs_name); - } else if (pa->pa_verbosity != 0) { + } else if (pa->pa_progress) { zfs_nicebytes(bytes, buf, sizeof (buf)); (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", tm.tm_hour, tm.tm_min, tm.tm_sec, @@ -1206,6 +1207,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg) pa.pa_verbosity = sdd->verbosity; pa.pa_size = sdd->size; pa.pa_astitle = sdd->progressastitle; + pa.pa_progress = sdd->progress; if ((err = pthread_create(&tid, NULL, send_progress_thread, &pa)) != 0) { @@ -1886,6 +1888,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags, pa.pa_verbosity = flags->verbosity; pa.pa_size = size; pa.pa_astitle = flags->progressastitle; + pa.pa_progress = flags->progress; error = pthread_create(&tid, NULL, send_progress_thread, &pa); @@ -2696,6 +2699,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd, pa.pa_verbosity = flags->verbosity; pa.pa_size = size; pa.pa_astitle = flags->progressastitle; + pa.pa_progress = flags->progress; err = pthread_create(&ptid, NULL, send_progress_thread, &pa); @@ -4586,7 +4590,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, B_FALSE, destsnap) == 0) { *strchr(destsnap, '@') = '\0'; (void) strlcat(destsnap, suffix, - sizeof (destsnap) - strlen(destsnap)); + sizeof (destsnap)); } } } else { @@ -4622,7 +4626,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap, B_FALSE, destsnap) == 0) { *strchr(destsnap, '@') = '\0'; (void) strlcat(destsnap, snap, - sizeof (destsnap) - strlen(destsnap)); + sizeof (destsnap)); } } } diff --git a/sys/contrib/openzfs/man/man4/zfs.4 b/sys/contrib/openzfs/man/man4/zfs.4 index e20d601340c6..88a044f63f28 100644 --- a/sys/contrib/openzfs/man/man4/zfs.4 +++ b/sys/contrib/openzfs/man/man4/zfs.4 @@ -1769,7 +1769,7 @@ completes in order to verify the checksums of all blocks which have been resilvered. This is enabled by default and strongly recommended. . -.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64 +.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64 Maximum amount of I/O that can be concurrently issued for a sequential resilver per leaf device, given in bytes. . @@ -1890,6 +1890,13 @@ I/O. In this case (unless the metadata scan is done) we stop issuing verification I/O and start scanning metadata again until we get to the hard limit. . +.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint +When reporting resilver throughput and estimated completion time use the +performance observed over roughly the last +.Sy zfs_scan_report_txgs +TXGs. +When set to zero performance is calculated over the time between checkpoints. +. .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int Enforce tight memory limits on pool scans when a sequential scan is in progress. When disabled, the memory limit may be exceeded by fast disks. @@ -1898,7 +1905,7 @@ When disabled, the memory limit may be exceeded by fast disks. Freezes a scrub/resilver in progress without actually pausing it. Intended for testing/debugging. . -.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq int +.It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int Maximum amount of data that can be concurrently issued at once for scrubs and resilvers per leaf device, given in bytes. . diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S index 8f9e766486f1..0ebec5c1095e 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/blake3/blake3_avx2.S @@ -1791,7 +1791,6 @@ ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64) SET_SIZE(zfs_blake3_hash_many_avx2) SECTION_STATIC -.section .rodata .p2align 6 ADD0: diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S index 165492a0ed76..909b2147dff9 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/aesni-gcm-x86_64.S @@ -53,6 +53,11 @@ /* Windows userland links with OpenSSL */ #if !defined (_WIN32) || defined (_KERNEL) +/* Apple needs _ */ +#if defined (__APPLE__) +#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe +#endif + .extern gcm_avx_can_use_movbe .text diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S index e40b3df32753..dec782fda33e 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/modes/gcm_pclmulqdq.S @@ -101,7 +101,7 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) { // static uint8_t byte_swap16_mask[] = { // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; -.section .rodata +SECTION_STATIC .balign XMM_ALIGN .Lbyte_swap16_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S index f3d701528459..f1fde51c1d69 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha256_impl.S @@ -2063,7 +2063,7 @@ ENTRY_NP(SHA256TransformBlocks) .cfi_endproc SET_SIZE(SHA256TransformBlocks) -.section .rodata +SECTION_STATIC .balign 64 SET_OBJ(K256) K256: diff --git a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S index 520f5b6dab24..b2f7d4863d8a 100644 --- a/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S +++ b/sys/contrib/openzfs/module/icp/asm-x86_64/sha2/sha512_impl.S @@ -2064,7 +2064,7 @@ ENTRY_NP(SHA512TransformBlocks) .cfi_endproc SET_SIZE(SHA512TransformBlocks) -.section .rodata +SECTION_STATIC .balign 64 SET_OBJ(K512) K512: @@ -2113,4 +2113,3 @@ K512: #if defined(__ELF__) .section .note.GNU-stack,"",%progbits #endif - diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c index e4c6cf7d097d..9a268573528c 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/kmod_core.c @@ -142,7 +142,7 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag, return (EINVAL); uaddr = (void *)(uintptr_t)zp->zfs_cmd; - zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); + zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); #ifdef ZFS_LEGACY_SUPPORT /* * Remap ioctl code for legacy user binaries @@ -150,10 +150,10 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag, if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) { vecnum = zfs_ioctl_legacy_to_ozfs(vecnum); if (vecnum < 0) { - kmem_free(zc, sizeof (zfs_cmd_t)); + vmem_free(zc, sizeof (zfs_cmd_t)); return (ENOTSUP); } - zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP); + zcl = vmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP); if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) { error = SET_ERROR(EFAULT); goto out; @@ -180,9 +180,9 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag, out: #ifdef ZFS_LEGACY_SUPPORT if (zcl) - kmem_free(zcl, sizeof (zfs_cmd_legacy_t)); + vmem_free(zcl, sizeof (zfs_cmd_legacy_t)); #endif - kmem_free(zc, sizeof (zfs_cmd_t)); + vmem_free(zc, sizeof (zfs_cmd_t)); MPASS(tsd_get(rrw_tsd_key) == NULL); return (error); } diff --git a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c index 2820f10b5de8..a1e0595bda34 100644 --- a/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/freebsd/zfs/zfs_vfsops.c @@ -230,7 +230,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, vfs_unbusy(vfsp); if (tmp != *val) { - (void) strcpy(setpoint, "temporary"); + if (setpoint) + (void) strcpy(setpoint, "temporary"); *val = tmp; } return (0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c index 16530d82693e..13150adbe0cf 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/abd_os.c @@ -272,18 +272,20 @@ abd_alloc_chunks(abd_t *abd, size_t size) struct page *page, *tmp_page = NULL; gfp_t gfp = __GFP_NOWARN | GFP_NOIO; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; - int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); - int nr_pages = abd_chunkcnt_for_bytes(size); - int chunks = 0, zones = 0; + unsigned int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); + unsigned int nr_pages = abd_chunkcnt_for_bytes(size); + unsigned int chunks = 0, zones = 0; size_t remaining_size; int nid = NUMA_NO_NODE; - int alloc_pages = 0; + unsigned int alloc_pages = 0; INIT_LIST_HEAD(&pages); + ASSERT3U(alloc_pages, <, nr_pages); + while (alloc_pages < nr_pages) { - unsigned chunk_pages; - int order; + unsigned int chunk_pages; + unsigned int order; order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order); chunk_pages = (1U << order); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c index 519f13212fac..dca48e1e4010 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ctldir.c @@ -392,7 +392,20 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay) zfsctl_snapshot_hold(se); rw_enter(&se->se_taskqid_lock, RW_WRITER); - ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID); + /* + * If this condition happens, we managed to: + * - dispatch once + * - want to dispatch _again_ before it returned + * + * So let's just return - if that task fails at unmounting, + * we'll eventually dispatch again, and if it succeeds, + * no problem. + */ + if (se->se_taskqid != TASKQID_INVALID) { + rw_exit(&se->se_taskqid_lock); + zfsctl_snapshot_rele(se); + return; + } se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); rw_exit(&se->se_taskqid_lock); @@ -485,7 +498,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id, zp->z_atime_dirty = B_FALSE; zp->z_zn_prefetch = B_FALSE; zp->z_is_sa = B_FALSE; +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) zp->z_is_mapped = B_FALSE; +#endif zp->z_is_ctldir = B_TRUE; zp->z_sa_hdl = NULL; zp->z_blksz = 0; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c index f8d1777b07a6..f068f544f0ec 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_ioctl_os.c @@ -135,7 +135,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) vecnum = cmd - ZFS_IOC_FIRST; - zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); + zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) { error = -SET_ERROR(EFAULT); @@ -146,7 +146,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg) if (error == 0 && rc != 0) error = -SET_ERROR(EFAULT); out: - kmem_free(zc, sizeof (zfs_cmd_t)); + vmem_free(zc, sizeof (zfs_cmd_t)); return (error); } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c index c921e587c75c..2d9b27a90884 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vfsops.c @@ -608,7 +608,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val, } if (tmp != *val) { - (void) strcpy(setpoint, "temporary"); + if (setpoint) + (void) strcpy(setpoint, "temporary"); *val = tmp; } return (0); diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c index 47f132a38abe..302a88c2d353 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_vnops_os.c @@ -987,7 +987,7 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags) mutex_enter(&zp->z_lock); may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 && - !(zp->z_is_mapped); + !zn_has_cached_data(zp, 0, LLONG_MAX); mutex_exit(&zp->z_lock); /* @@ -1075,7 +1075,8 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags) &xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); delete_now = may_delete_now && !toobig && atomic_read(&ZTOI(zp)->i_count) == 1 && - !(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked && + !zn_has_cached_data(zp, 0, LLONG_MAX) && + xattr_obj == xattr_obj_unlinked && zfs_external_acl(zp) == acl_obj; } diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c index 1faf25d93cc7..7b802a9bace0 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zfs_znode.c @@ -551,7 +551,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz, ASSERT3P(zp->z_xattr_cached, ==, NULL); zp->z_unlinked = B_FALSE; zp->z_atime_dirty = B_FALSE; +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) zp->z_is_mapped = B_FALSE; +#endif zp->z_is_ctldir = B_FALSE; zp->z_suspended = B_FALSE; zp->z_sa_hdl = NULL; @@ -1641,7 +1643,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len) * Zero partial page cache entries. This must be done under a * range lock in order to keep the ARC and page cache in sync. */ - if (zp->z_is_mapped) { + if (zn_has_cached_data(zp, off, off + len - 1)) { loff_t first_page, last_page, page_len; loff_t first_page_offset, last_page_offset; diff --git a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c index c56e3691e70a..e42b15042a3c 100644 --- a/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c +++ b/sys/contrib/openzfs/module/os/linux/zfs/zpl_file.c @@ -625,7 +625,6 @@ static int zpl_mmap(struct file *filp, struct vm_area_struct *vma) { struct inode *ip = filp->f_mapping->host; - znode_t *zp = ITOZ(ip); int error; fstrans_cookie_t cookie; @@ -640,9 +639,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma) if (error) return (error); +#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE) + znode_t *zp = ITOZ(ip); mutex_enter(&zp->z_lock); zp->z_is_mapped = B_TRUE; mutex_exit(&zp->z_lock); +#endif return (error); } @@ -937,7 +939,7 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice) case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_WILLNEED: #ifdef HAVE_GENERIC_FADVISE - if (zn_has_cached_data(zp)) + if (zn_has_cached_data(zp, offset, offset + len - 1)) error = generic_fadvise(filp, offset, len, advice); #endif /* diff --git a/sys/contrib/openzfs/module/zfs/arc.c b/sys/contrib/openzfs/module/zfs/arc.c index 2a52d0d24572..aa806706de29 100644 --- a/sys/contrib/openzfs/module/zfs/arc.c +++ b/sys/contrib/openzfs/module/zfs/arc.c @@ -5958,6 +5958,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp); boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF; + arc_buf_t *buf = NULL; int rc = 0; ASSERT(!embedded_bp || @@ -5987,7 +5988,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER, BLK_VERIFY_LOG)) { rc = SET_ERROR(ECKSUM); - goto out; + goto done; } if (!embedded_bp) { @@ -6008,14 +6009,13 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { boolean_t is_data = !HDR_ISTYPE_METADATA(hdr); - arc_buf_t *buf = NULL; if (HDR_IO_IN_PROGRESS(hdr)) { if (*arc_flags & ARC_FLAG_CACHED_ONLY) { mutex_exit(hash_lock); ARCSTAT_BUMP(arcstat_cached_only_in_progress); rc = SET_ERROR(ENOENT); - goto out; + goto done; } zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head; @@ -6144,9 +6144,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), demand, prefetch, is_data, data, metadata, hits); *arc_flags |= ARC_FLAG_CACHED; - - if (done) - done(NULL, zb, bp, buf, private); + goto done; } else { uint64_t lsize = BP_GET_LSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp); @@ -6159,10 +6157,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0; if (*arc_flags & ARC_FLAG_CACHED_ONLY) { - rc = SET_ERROR(ENOENT); if (hash_lock != NULL) mutex_exit(hash_lock); - goto out; + rc = SET_ERROR(ENOENT); + goto done; } if (hdr == NULL) { @@ -6482,6 +6480,16 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, spa_read_history_add(spa, zb, *arc_flags); spl_fstrans_unmark(cookie); return (rc); + +done: + if (done) + done(NULL, zb, bp, buf, private); + if (pio && rc != 0) { + zio_t *zio = zio_null(pio, spa, NULL, NULL, NULL, zio_flags); + zio->io_error = rc; + zio_nowait(zio); + } + goto out; } arc_prune_t * diff --git a/sys/contrib/openzfs/module/zfs/bpobj.c b/sys/contrib/openzfs/module/zfs/bpobj.c index f7fded56518b..fa99f5141d4e 100644 --- a/sys/contrib/openzfs/module/zfs/bpobj.c +++ b/sys/contrib/openzfs/module/zfs/bpobj.c @@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) } VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); - VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); - if (bpobj_is_empty(&subbpo)) { /* No point in having an empty subobj. */ bpobj_close(&subbpo); bpobj_free(bpo->bpo_os, subobj, tx); return; } + VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp)); mutex_enter(&bpo->bpo_lock); dmu_buf_will_dirty(bpo->bpo_dbuf, tx); @@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx) } +/* + * Prefetch metadata required for bpobj_enqueue_subobj(). + */ +void +bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj) +{ + dmu_object_info_t doi; + bpobj_t subbpo; + uint64_t subsubobjs; + boolean_t copy_subsub = B_TRUE; + boolean_t copy_bps = B_TRUE; + + ASSERT(bpobj_is_open(bpo)); + ASSERT(subobj != 0); + + if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj) + return; + + if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0) + return; + if (bpobj_is_empty(&subbpo)) { + bpobj_close(&subbpo); + return; + } + subsubobjs = subbpo.bpo_phys->bpo_subobjs; + bpobj_close(&subbpo); + + if (subsubobjs != 0) { + if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0) + return; + if (doi.doi_max_offset > doi.doi_data_block_size) + copy_subsub = B_FALSE; + } + + if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0) + return; + if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub) + copy_bps = B_FALSE; + + if (copy_subsub && subsubobjs != 0) { + if (bpo->bpo_phys->bpo_subobjs) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, + ZIO_PRIORITY_ASYNC_READ); + } + dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1, + ZIO_PRIORITY_ASYNC_READ); + } + + if (copy_bps) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0, + bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1, + ZIO_PRIORITY_ASYNC_READ); + dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1, + ZIO_PRIORITY_ASYNC_READ); + } else if (bpo->bpo_phys->bpo_subobjs) { + dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0, + bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1, + ZIO_PRIORITY_ASYNC_READ); + } +} + void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) diff --git a/sys/contrib/openzfs/module/zfs/dmu_send.c b/sys/contrib/openzfs/module/zfs/dmu_send.c index 7f8de23f0e29..f86a0a5b1c57 100644 --- a/sys/contrib/openzfs/module/zfs/dmu_send.c +++ b/sys/contrib/openzfs/module/zfs/dmu_send.c @@ -493,6 +493,7 @@ dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object, (bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && io_compressed : lsize != psize); if (raw || compressed) { + ASSERT(bp != NULL); ASSERT(raw || dscp->dsc_featureflags & DMU_BACKUP_FEATURE_COMPRESSED); ASSERT(!BP_IS_EMBEDDED(bp)); @@ -3028,8 +3029,7 @@ dmu_send_estimate_fast(dsl_dataset_t *origds, dsl_dataset_t *fromds, dsl_dataset_name(origds, dsname); (void) strcat(dsname, "/"); - (void) strlcat(dsname, recv_clone_name, - sizeof (dsname) - strlen(dsname)); + (void) strlcat(dsname, recv_clone_name, sizeof (dsname)); err = dsl_dataset_hold(origds->ds_dir->dd_pool, dsname, FTAG, &ds); diff --git a/sys/contrib/openzfs/module/zfs/dsl_dataset.c b/sys/contrib/openzfs/module/zfs/dsl_dataset.c index 57a58f88cec5..0584a356f9ac 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_dataset.c +++ b/sys/contrib/openzfs/module/zfs/dsl_dataset.c @@ -1039,7 +1039,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds) return (rv); } -static boolean_t +boolean_t zfeature_active(spa_feature_t f, void *arg) { switch (spa_feature_table[f].fi_type) { @@ -2121,16 +2121,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx) } dmu_objset_sync(ds->ds_objset, zio, tx); - - for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { - if (zfeature_active(f, ds->ds_feature_activation[f])) { - if (zfeature_active(f, ds->ds_feature[f])) - continue; - dsl_dataset_activate_feature(ds->ds_object, f, - ds->ds_feature_activation[f], tx); - ds->ds_feature[f] = ds->ds_feature_activation[f]; - } - } } /* @@ -2303,6 +2293,17 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx) ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); dmu_buf_rele(ds->ds_dbuf, ds); + + for (spa_feature_t f = 0; f < SPA_FEATURES; f++) { + if (zfeature_active(f, + ds->ds_feature_activation[f])) { + if (zfeature_active(f, ds->ds_feature[f])) + continue; + dsl_dataset_activate_feature(ds->ds_object, f, + ds->ds_feature_activation[f], tx); + ds->ds_feature[f] = ds->ds_feature_activation[f]; + } + } } int diff --git a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c index 2b33446e66af..d58820701f60 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_deadlist.c +++ b/sys/contrib/openzfs/module/zfs/dsl_deadlist.c @@ -438,6 +438,18 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, } } +/* + * Prefetch metadata required for dle_enqueue_subobj(). + */ +static void +dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle, + uint64_t obj) +{ + if (dle->dle_bpobj.bpo_object != + dmu_objset_pool(dl->dl_os)->dp_empty_bpobj) + bpobj_prefetch_subobj(&dle->dle_bpobj, obj); +} + void dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) @@ -810,6 +822,27 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth, dle_enqueue_subobj(dl, dle, obj, tx); } +/* + * Prefetch metadata required for dsl_deadlist_insert_bpobj(). + */ +static void +dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth) +{ + dsl_deadlist_entry_t dle_tofind; + dsl_deadlist_entry_t *dle; + avl_index_t where; + + ASSERT(MUTEX_HELD(&dl->dl_lock)); + + dsl_deadlist_load_tree(dl); + + dle_tofind.dle_mintxg = birth; + dle = avl_find(&dl->dl_tree, &dle_tofind, &where); + if (dle == NULL) + dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE); + dle_prefetch_subobj(dl, dle, obj); +} + static int dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dmu_tx_t *tx) @@ -826,12 +859,12 @@ dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, void dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) { - zap_cursor_t zc; - zap_attribute_t za; + zap_cursor_t zc, pzc; + zap_attribute_t za, pza; dmu_buf_t *bonus; dsl_deadlist_phys_t *dlp; dmu_object_info_t doi; - int error; + int error, perror, i; VERIFY0(dmu_object_info(dl->dl_os, obj, &doi)); if (doi.doi_type == DMU_OT_BPOBJ) { @@ -843,15 +876,32 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) } mutex_enter(&dl->dl_lock); + /* + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ + for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0; + (perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128; + zap_cursor_advance(&pzc), i++) { + dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, + zfs_strtonum(pza.za_name, NULL)); + } for (zap_cursor_init(&zc, dl->dl_os, obj); (error = zap_cursor_retrieve(&zc, &za)) == 0; zap_cursor_advance(&zc)) { uint64_t mintxg = zfs_strtonum(za.za_name, NULL); dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx)); + if (perror == 0) { + dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer, + zfs_strtonum(pza.za_name, NULL)); + zap_cursor_advance(&pzc); + perror = zap_cursor_retrieve(&pzc, &pza); + } } VERIFY3U(error, ==, ENOENT); zap_cursor_fini(&zc); + zap_cursor_fini(&pzc); VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); dlp = bonus->db_data; @@ -869,8 +919,9 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, dmu_tx_t *tx) { dsl_deadlist_entry_t dle_tofind; - dsl_deadlist_entry_t *dle; + dsl_deadlist_entry_t *dle, *pdle; avl_index_t where; + int i; ASSERT(!dl->dl_oldfmt); @@ -882,11 +933,23 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg, dle = avl_find(&dl->dl_tree, &dle_tofind, &where); if (dle == NULL) dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); + /* + * Prefetch up to 128 deadlists first and then more as we progress. + * The limit is a balance between ARC use and diminishing returns. + */ + for (pdle = dle, i = 0; pdle && i < 128; ) { + bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object); + pdle = AVL_NEXT(&dl->dl_tree, pdle); + } while (dle) { uint64_t used, comp, uncomp; dsl_deadlist_entry_t *dle_next; bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); + if (pdle) { + bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object); + pdle = AVL_NEXT(&dl->dl_tree, pdle); + } VERIFY0(bpobj_space(&dle->dle_bpobj, &used, &comp, &uncomp)); diff --git a/sys/contrib/openzfs/module/zfs/dsl_scan.c b/sys/contrib/openzfs/module/zfs/dsl_scan.c index f9e437f0c947..f971aa211e0c 100644 --- a/sys/contrib/openzfs/module/zfs/dsl_scan.c +++ b/sys/contrib/openzfs/module/zfs/dsl_scan.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include @@ -126,11 +127,20 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj, static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); -static uint64_t dsl_scan_count_data_disks(vdev_t *vd); +static uint64_t dsl_scan_count_data_disks(spa_t *spa); extern uint_t zfs_vdev_async_write_active_min_dirty_percent; static int zfs_scan_blkstats = 0; +/* + * 'zpool status' uses bytes processed per pass to report throughput and + * estimate time remaining. We define a pass to start when the scanning + * phase completes for a sequential resilver. Optionally, this value + * may be used to reset the pass statistics every N txgs to provide an + * estimated completion time based on currently observed performance. + */ +static uint_t zfs_scan_report_txgs = 0; + /* * By default zfs will check to ensure it is not over the hard memory * limit before each txg. If finer-grained control of this is needed @@ -147,7 +157,7 @@ static int zfs_scan_strict_mem_lim = B_FALSE; * overload the drives with I/O, since that is protected by * zfs_vdev_scrub_max_active. */ -static uint64_t zfs_scan_vdev_limit = 4 << 20; +static uint64_t zfs_scan_vdev_limit = 16 << 20; static uint_t zfs_scan_issue_strategy = 0; @@ -466,11 +476,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) /* * Calculate the max number of in-flight bytes for pool-wide - * scanning operations (minimum 1MB). Limits for the issuing - * phase are done per top-level vdev and are handled separately. + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). + * Limits for the issuing phase are done per top-level vdev and + * are handled separately. */ - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), offsetof(scan_ds_t, sds_node)); @@ -604,6 +615,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg) } spa_scan_stat_init(spa); + vdev_scan_stat_init(spa->spa_root_vdev); + return (0); } @@ -763,6 +776,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx) scn->scn_last_checkpoint = 0; scn->scn_checkpointing = B_FALSE; spa_scan_stat_init(spa); + vdev_scan_stat_init(spa->spa_root_vdev); if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; @@ -2024,6 +2038,26 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb, return; } + /* + * Check if this block contradicts any filesystem flags. + */ + spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS; + if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + + f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp)); + if (f != SPA_FEATURE_NONE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + + f = zio_compress_to_feature(BP_GET_COMPRESS(bp)); + if (f != SPA_FEATURE_NONE) + ASSERT(dsl_dataset_feature_is_active(ds, f)); + + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { + scn->scn_lt_min_this_txg++; + return; + } + if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { scn->scn_lt_min_this_txg++; return; @@ -2811,8 +2845,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx) } static uint64_t -dsl_scan_count_data_disks(vdev_t *rvd) +dsl_scan_count_data_disks(spa_t *spa) { + vdev_t *rvd = spa->spa_root_vdev; uint64_t i, leaves = 0; for (i = 0; i < rvd->vdev_children; i++) { @@ -3652,6 +3687,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) return; } + /* + * Disabled by default, set zfs_scan_report_txgs to report + * average performance over the last zfs_scan_report_txgs TXGs. + */ + if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 && + tx->tx_txg % zfs_scan_report_txgs == 0) { + scn->scn_issued_before_pass += spa->spa_scan_pass_issued; + spa_scan_stat_init(spa); + } + /* * It is possible to switch from unsorted to sorted at any time, * but afterwards the scan will remain sorted unless reloaded from @@ -3711,12 +3756,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) taskqid_t prefetch_tqid; /* - * Recalculate the max number of in-flight bytes for pool-wide - * scanning operations (minimum 1MB). Limits for the issuing - * phase are done per top-level vdev and are handled separately. + * Calculate the max number of in-flight bytes for pool-wide + * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max). + * Limits for the issuing phase are done per top-level vdev and + * are handled separately. */ - scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * - dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); + scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20, + zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa))); if (scnp->scn_ddt_bookmark.ddb_class <= scnp->scn_ddt_class_max) { @@ -3780,6 +3826,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx) if (scn->scn_is_sorted) { scn->scn_checkpointing = B_TRUE; scn->scn_clearing = B_TRUE; + scn->scn_issued_before_pass += + spa->spa_scan_pass_issued; + spa_scan_stat_init(spa); } zfs_dbgmsg("scan complete for %s txg %llu", spa->spa_name, @@ -4507,5 +4556,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, UINT, ZMOD_RW, "Tunable to adjust bias towards more filled segments during scans"); +ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW, + "Tunable to report resilver performance over the last N txgs"); + ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, "Process all resilvers immediately"); diff --git a/sys/contrib/openzfs/module/zfs/spa.c b/sys/contrib/openzfs/module/zfs/spa.c index 67b3a03a951a..bbb83fc610b1 100644 --- a/sys/contrib/openzfs/module/zfs/spa.c +++ b/sys/contrib/openzfs/module/zfs/spa.c @@ -1714,9 +1714,9 @@ spa_unload(spa_t *spa) */ spa_l2cache_drop(spa); - for (int i = 0; i < spa->spa_spares.sav_count; i++) - vdev_free(spa->spa_spares.sav_vdevs[i]); if (spa->spa_spares.sav_vdevs) { + for (int i = 0; i < spa->spa_spares.sav_count; i++) + vdev_free(spa->spa_spares.sav_vdevs[i]); kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); spa->spa_spares.sav_vdevs = NULL; @@ -1727,11 +1727,11 @@ spa_unload(spa_t *spa) } spa->spa_spares.sav_count = 0; - for (int i = 0; i < spa->spa_l2cache.sav_count; i++) { - vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); - vdev_free(spa->spa_l2cache.sav_vdevs[i]); - } if (spa->spa_l2cache.sav_vdevs) { + for (int i = 0; i < spa->spa_l2cache.sav_count; i++) { + vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]); + vdev_free(spa->spa_l2cache.sav_vdevs[i]); + } kmem_free(spa->spa_l2cache.sav_vdevs, spa->spa_l2cache.sav_count * sizeof (void *)); spa->spa_l2cache.sav_vdevs = NULL; @@ -1789,20 +1789,21 @@ spa_load_spares(spa_t *spa) /* * First, close and free any existing spare vdevs. */ - for (i = 0; i < spa->spa_spares.sav_count; i++) { - vd = spa->spa_spares.sav_vdevs[i]; + if (spa->spa_spares.sav_vdevs) { + for (i = 0; i < spa->spa_spares.sav_count; i++) { + vd = spa->spa_spares.sav_vdevs[i]; - /* Undo the call to spa_activate() below */ - if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, - B_FALSE)) != NULL && tvd->vdev_isspare) - spa_spare_remove(tvd); - vdev_close(vd); - vdev_free(vd); - } + /* Undo the call to spa_activate() below */ + if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, + B_FALSE)) != NULL && tvd->vdev_isspare) + spa_spare_remove(tvd); + vdev_close(vd); + vdev_free(vd); + } - if (spa->spa_spares.sav_vdevs) kmem_free(spa->spa_spares.sav_vdevs, spa->spa_spares.sav_count * sizeof (void *)); + } if (spa->spa_spares.sav_config == NULL) nspares = 0; @@ -2013,23 +2014,24 @@ spa_load_l2cache(spa_t *spa) /* * Purge vdevs that were dropped */ - for (i = 0; i < oldnvdevs; i++) { - uint64_t pool; + if (oldvdevs) { + for (i = 0; i < oldnvdevs; i++) { + uint64_t pool; - vd = oldvdevs[i]; - if (vd != NULL) { - ASSERT(vd->vdev_isl2cache); + vd = oldvdevs[i]; + if (vd != NULL) { + ASSERT(vd->vdev_isl2cache); - if (spa_l2cache_exists(vd->vdev_guid, &pool) && - pool != 0ULL && l2arc_vdev_present(vd)) - l2arc_remove_vdev(vd); - vdev_clear_stats(vd); - vdev_free(vd); + if (spa_l2cache_exists(vd->vdev_guid, &pool) && + pool != 0ULL && l2arc_vdev_present(vd)) + l2arc_remove_vdev(vd); + vdev_clear_stats(vd); + vdev_free(vd); + } } - } - if (oldvdevs) kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); + } for (i = 0; i < sav->sav_count; i++) nvlist_free(l2cache[i]); diff --git a/sys/contrib/openzfs/module/zfs/spa_misc.c b/sys/contrib/openzfs/module/zfs/spa_misc.c index 822fd0ee89b6..53763e915ca8 100644 --- a/sys/contrib/openzfs/module/zfs/spa_misc.c +++ b/sys/contrib/openzfs/module/zfs/spa_misc.c @@ -2556,7 +2556,6 @@ spa_scan_stat_init(spa_t *spa) spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_issued = 0; - vdev_scan_stat_init(spa->spa_root_vdev); } /* diff --git a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c index 1f56275c853b..62aa61b3b9e7 100644 --- a/sys/contrib/openzfs/module/zfs/vdev_rebuild.c +++ b/sys/contrib/openzfs/module/zfs/vdev_rebuild.c @@ -34,6 +34,7 @@ #include #include #include +#include #include /* @@ -116,13 +117,12 @@ static uint64_t zfs_rebuild_max_segment = 1024 * 1024; * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep * the queue depth short. * - * 32MB was selected as the default value to achieve good performance with - * a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential - * rebuild was unable to saturate all of the drives using smaller values. - * With a value of 32MB the sequential resilver write rate was measured at - * 800MB/s sustained while rebuilding to a distributed spare. + * 64MB was observed to deliver the best performance and set as the default. + * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c) + * and a rebuild rate of 1.2GB/s was measured to the distribute spare. + * Smaller values were unable to fully saturate the available pool I/O. */ -static uint64_t zfs_rebuild_vdev_limit = 32 << 20; +static uint64_t zfs_rebuild_vdev_limit = 64 << 20; /* * Automatically start a pool scrub when the last active sequential resilver @@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg) { vdev_t *vd = arg; spa_t *spa = vd->vdev_spa; + vdev_t *rvd = spa->spa_root_vdev; int error = 0; /* @@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg) vr->vr_pass_bytes_scanned = 0; vr->vr_pass_bytes_issued = 0; - vr->vr_bytes_inflight_max = MAX(1ULL << 20, - zfs_rebuild_vdev_limit * vd->vdev_children); - uint64_t update_est_time = gethrtime(); vdev_rebuild_update_bytes_est(vd, 0); @@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg) metaslab_t *msp = vd->vdev_ms[i]; vr->vr_scan_msp = msp; + /* + * Calculate the max number of in-flight bytes for top-level + * vdev scanning operations (minimum 1MB, maximum 1/4 of + * arc_c_max shared by all top-level vdevs). Limits for the + * issuing phase are done per top-level vdev and are handled + * separately. + */ + uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1); + vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20, + zfs_rebuild_vdev_limit * vd->vdev_children)); + /* * Removal of vdevs from the vdev tree may eliminate the need * for the rebuild, in which case it should be canceled. The diff --git a/sys/contrib/openzfs/module/zfs/zfs_replay.c b/sys/contrib/openzfs/module/zfs/zfs_replay.c index 0293e46d5858..32be27a8ba6e 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_replay.c +++ b/sys/contrib/openzfs/module/zfs/zfs_replay.c @@ -512,9 +512,9 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap) * * The _ATTR versions will grab the fuid info in their subcases. */ - if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK && - (int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && - (int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { + if (txtype != TX_SYMLINK && + txtype != TX_MKDIR_ATTR && + txtype != TX_CREATE_ATTR) { start = (lr + 1); zfsvfs->z_fuid_replay = zfs_replay_fuid_domain(start, &start, diff --git a/sys/contrib/openzfs/module/zfs/zfs_vnops.c b/sys/contrib/openzfs/module/zfs/zfs_vnops.c index 0c392b9da0fb..10677d8d9947 100644 --- a/sys/contrib/openzfs/module/zfs/zfs_vnops.c +++ b/sys/contrib/openzfs/module/zfs/zfs_vnops.c @@ -106,7 +106,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off) hole = B_FALSE; /* Flush any mmap()'d data to disk */ - if (zn_has_cached_data(zp)) + if (zn_has_cached_data(zp, 0, file_sz - 1)) zn_flush_cached_data(zp, B_FALSE); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); @@ -288,7 +288,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) error = mappedread_sf(zp, nbytes, uio); else #endif - if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) { + if (zn_has_cached_data(zp, zfs_uio_offset(uio), + zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) { error = mappedread(zp, nbytes, uio); } else { error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), @@ -696,7 +697,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr) zfs_uioskip(uio, nbytes); tx_bytes = nbytes; } - if (tx_bytes && zn_has_cached_data(zp) && + if (tx_bytes && + zn_has_cached_data(zp, woff, woff + tx_bytes - 1) && !(ioflag & O_DIRECT)) { update_pages(zp, woff, tx_bytes, zfsvfs->z_os); } diff --git a/sys/contrib/openzfs/module/zfs/zio.c b/sys/contrib/openzfs/module/zfs/zio.c index 5d7ed6d582a2..d888a584a93c 100644 --- a/sys/contrib/openzfs/module/zfs/zio.c +++ b/sys/contrib/openzfs/module/zfs/zio.c @@ -2778,7 +2778,7 @@ zio_write_gang_member_ready(zio_t *zio) ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp)); - ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); + VERIFY3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); mutex_enter(&pio->io_lock); for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) { @@ -2816,18 +2816,20 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc) uint64_t resid = pio->io_size; uint64_t lsize; int copies = gio->io_prop.zp_copies; - int gbh_copies; zio_prop_t zp; int error; boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA); /* - * encrypted blocks need DVA[2] free so encrypted gang headers can't - * have a third copy. + * If one copy was requested, store 2 copies of the GBH, so that we + * can still traverse all the data (e.g. to free or scrub) even if a + * block is damaged. Note that we can't store 3 copies of the GBH in + * all cases, e.g. with encryption, which uses DVA[2] for the IV+salt. */ - gbh_copies = MIN(copies + 1, spa_max_replication(spa)); - if (BP_IS_ENCRYPTED(bp) && gbh_copies >= SPA_DVAS_PER_BP) - gbh_copies = SPA_DVAS_PER_BP - 1; + int gbh_copies = copies; + if (gbh_copies == 1) { + gbh_copies = MIN(2, spa_max_replication(spa)); + } int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c index 27eed66cfedd..4a95c01a090d 100644 --- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c +++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_double_fast.c @@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ - & (repIndex > dictStartIndex)) + & (offset_1 < current+1 - dictStartIndex)) /* note: we are searching at current+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; @@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic( U32 const repIndex2 = current2 - offset_2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ - & (repIndex2 > dictStartIndex)) + & (offset_2 < current2 - dictStartIndex)) && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c index 85a3a7a91e49..17894b85472f 100644 --- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c +++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_fast.c @@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( const BYTE* const repMatch = repBase + repIndex; hashTable[h] = current; /* update hash table */ DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); - assert(offset_1 <= current +1); /* check repIndex */ - if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) + if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ + & (offset_1 < current+1 - dictStartIndex) ) /* note: we are searching at current+1 */ && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; @@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic( U32 const current2 = (U32)(ip-base); U32 const repIndex2 = current2 - offset_2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; - if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ + if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < current - dictStartIndex)) /* intentional overflow */ && (MEM_read32(repMatch2) == MEM_read32(ip)) ) { const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; diff --git a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c index 4cf5c88b5325..22d80597ec62 100644 --- a/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c +++ b/sys/contrib/openzfs/module/zstd/lib/compress/zstd_lazy.c @@ -975,7 +975,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(current+1 - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */ + & (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */ if (MEM_read32(ip+1) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1006,7 +1007,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1037,7 +1039,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = (U32)(current - offset_1); const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; @@ -1083,7 +1086,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic( const U32 repIndex = repCurrent - offset_2; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repMatch = repBase + repIndex; - if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ + if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */ + & (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */ if (MEM_read32(ip) == MEM_read32(repMatch)) { /* repcode detected we should take it */ const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; diff --git a/sys/contrib/openzfs/rpm/generic/zfs.spec.in b/sys/contrib/openzfs/rpm/generic/zfs.spec.in index 251470a67fba..8c538a00d203 100644 --- a/sys/contrib/openzfs/rpm/generic/zfs.spec.in +++ b/sys/contrib/openzfs/rpm/generic/zfs.spec.in @@ -3,7 +3,7 @@ # Set the default udev directory based on distribution. %if %{undefined _udevdir} -%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} +%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler} %global _udevdir %{_prefix}/lib/udev %else %global _udevdir /lib/udev @@ -12,7 +12,7 @@ # Set the default udevrule directory based on distribution. %if %{undefined _udevruledir} -%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} +%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler} %global _udevruledir %{_prefix}/lib/udev/rules.d %else %global _udevruledir /lib/udev/rules.d @@ -21,7 +21,7 @@ # Set the default dracut directory based on distribution. %if %{undefined _dracutdir} -%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} +%if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler} %global _dracutdir %{_prefix}/lib/dracut %else %global _dracutdir %{_prefix}/share/dracut @@ -110,7 +110,7 @@ BuildRequires: libblkid-devel BuildRequires: libudev-devel BuildRequires: libattr-devel BuildRequires: openssl-devel -%if 0%{?fedora}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 +%if 0%{?fedora}%{?suse_version}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 BuildRequires: libtirpc-devel %endif diff --git a/sys/contrib/openzfs/tests/runfiles/common.run b/sys/contrib/openzfs/tests/runfiles/common.run index 005c539fc89d..7a7cf927c77e 100644 --- a/sys/contrib/openzfs/tests/runfiles/common.run +++ b/sys/contrib/openzfs/tests/runfiles/common.run @@ -704,7 +704,7 @@ tags = ['functional', 'nestedfs'] [tests/functional/no_space] tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos', - 'enospc_df', 'enospc_rm'] + 'enospc_df', 'enospc_ganging', 'enospc_rm'] tags = ['functional', 'no_space'] [tests/functional/nopwrite] diff --git a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in index cb453b266f3c..28276ebc47e3 100755 --- a/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in +++ b/sys/contrib/openzfs/tests/test-runner/bin/test-runner.py.in @@ -297,7 +297,7 @@ User: %s proc = Popen(privcmd, stdout=PIPE, stderr=PIPE) # Allow a special timeout value of 0 to mean infinity if int(self.timeout) == 0: - self.timeout = sys.maxsize + self.timeout = sys.maxsize / (10 ** 9) t = Timer(int(self.timeout), self.kill_cmd, [proc]) try: diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am index bbe94f9177ae..ad2ec4670556 100644 --- a/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/Makefile.am @@ -1539,6 +1539,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \ functional/no_space/enospc_002_pos.ksh \ functional/no_space/enospc_003_pos.ksh \ functional/no_space/enospc_df.ksh \ + functional/no_space/enospc_ganging.ksh \ functional/no_space/enospc_rm.ksh \ functional/no_space/setup.ksh \ functional/online_offline/cleanup.ksh \ diff --git a/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh new file mode 100755 index 000000000000..1d35fba5dbfa --- /dev/null +++ b/sys/contrib/openzfs/tests/zfs-tests/tests/functional/no_space/enospc_ganging.ksh @@ -0,0 +1,86 @@ +#!/bin/ksh + +# +# This file and its contents are supplied under the terms of the +# Common Development and Distribution License ("CDDL"), version 1.0. +# You may only use this file in accordance with the terms of version +# 1.0 of the CDDL. +# +# A full copy of the text of the CDDL should have accompanied this +# source. A copy of the CDDL is also available via the Internet at +# http://www.illumos.org/license/CDDL. +# + +. $STF_SUITE/include/libtest.shlib + +# +# DESCRIPTION: +# Exercise gang block IO paths for non-encrypted and encrypted datasets. +# + +verify_runnable "both" +log_assert "Verify IO when file system is full and ganging." + +function cleanup +{ + log_must set_tunable64 METASLAB_FORCE_GANGING $metaslab_force_ganging + default_cleanup_noexit +} + +log_onexit cleanup + +default_setup_noexit $DISKS + +typeset metaslab_force_ganging=$(get_tunable METASLAB_FORCE_GANGING) +shift=$(random_int_between 15 17) +log_must set_tunable64 METASLAB_FORCE_GANGING $((2**$shift)) + +keyfile=/$TESTPOOL/keyencfods +log_must eval "echo 'password' > $keyfile" +bs=1024k +count=512 + +log_must dd if=/dev/urandom of=$TESTDIR/data bs=$bs count=$count +data_checksum=$(sha256digest $TESTDIR/data) + +# Test common large block configuration. +log_must zfs create -o recordsize=1m -o primarycache=metadata $TESTPOOL/gang +mntpnt=$(get_prop mountpoint $TESTPOOL/gang) + +log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count +sync_pool $TESTPOOL +log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count +out_checksum=$(sha256digest $TESTDIR/out) + +if [[ "$data_checksum" != "$out_checksum" ]]; then + log_fail "checksum mismatch ($data_checksum != $out_checksum)" +fi + +log_must rm -f $TESTDIR/out +log_must zfs destroy $TESTPOOL/gang + +# Test common large block configuration with encryption. +log_must zfs create \ + -o recordsize=1m \ + -o primarycache=metadata \ + -o compression=off \ + -o encryption=on \ + -o keyformat=passphrase \ + -o keylocation=file://$keyfile \ + -o copies=2 \ + $TESTPOOL/gang +mntpnt=$(get_prop mountpoint $TESTPOOL/gang) + +log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count +sync_pool $TESTPOOL +log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count +out_checksum=$(sha256digest $TESTDIR/out) + +if [[ "$data_checksum" != "$out_checksum" ]]; then + log_fail "checksum mismatch ($data_checksum != $out_checksum)" +fi + +log_must rm -f $TESTDIR/out +log_must zfs destroy $TESTPOOL/gang + +log_pass "Verified IO when file system is full and ganging." diff --git a/sys/modules/zfs/zfs_config.h b/sys/modules/zfs/zfs_config.h index 5c30abf047f4..0d4fea6ea856 100644 --- a/sys/modules/zfs/zfs_config.h +++ b/sys/modules/zfs/zfs_config.h @@ -296,6 +296,9 @@ /* fault_in_iov_iter_readable() is available */ /* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */ +/* filemap_range_has_page() is available */ +/* #undef HAVE_FILEMAP_RANGE_HAS_PAGE */ + /* fops->aio_fsync() exists */ /* #undef HAVE_FILE_AIO_FSYNC */ @@ -985,7 +988,7 @@ /* #undef ZFS_IS_GPL_COMPATIBLE */ /* Define the project alias string. */ -#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g9cd71c860" +#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g57cfae4a2" /* Define the project author. */ #define ZFS_META_AUTHOR "OpenZFS" @@ -1015,7 +1018,7 @@ #define ZFS_META_NAME "zfs" /* Define the project release. */ -#define ZFS_META_RELEASE "FreeBSD_g9cd71c860" +#define ZFS_META_RELEASE "FreeBSD_g57cfae4a2" /* Define the project version. */ #define ZFS_META_VERSION "2.1.99" diff --git a/sys/modules/zfs/zfs_gitrev.h b/sys/modules/zfs/zfs_gitrev.h index 334a458f9647..0d4020f71888 100644 --- a/sys/modules/zfs/zfs_gitrev.h +++ b/sys/modules/zfs/zfs_gitrev.h @@ -1 +1 @@ -#define ZFS_META_GITREV "zfs-2.1.99-1706-g9cd71c860" +#define ZFS_META_GITREV "zfs-2.1.99-1734-g57cfae4a2"