Notable upstream pull request merges:
  #13816 Fix a race condition in dsl_dataset_sync() when
         activating features
  #14402 Prefetch on deadlists merge
  #14410 Improve resilver ETAs
  #14428 Resilver performance tuning
  #14439 Resolve WS-2021-0184 vulnerability in zstd
  #14440 EIO caused by encryption + recursive gang
  #14448 Fix console progress reporting for recursive send
  #14454 Improve arc_read() error reporting
  #14460 Restore FreeBSD to use .rodata
  #14474 Reduce need for contiguous memory for ioctls

Obtained from:	OpenZFS
OpenZFS commit:	57cfae4a2f
This commit is contained in:
Martin Matuska 2023-02-16 22:38:51 +01:00
commit c9539b8901
56 changed files with 614 additions and 191 deletions

View File

@ -2377,7 +2377,8 @@ snprintf_blkptr_compact(char *blkbuf, size_t buflen, const blkptr_t *bp,
(void) snprintf(blkbuf + strlen(blkbuf), (void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), " %s", "FREE"); buflen - strlen(blkbuf), " %s", "FREE");
(void) snprintf(blkbuf + strlen(blkbuf), (void) snprintf(blkbuf + strlen(blkbuf),
buflen - strlen(blkbuf), " cksum=%llx:%llx:%llx:%llx", buflen - strlen(blkbuf),
" cksum=%016llx:%016llx:%016llx:%016llx",
(u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[0],
(u_longlong_t)bp->blk_cksum.zc_word[1], (u_longlong_t)bp->blk_cksum.zc_word[1],
(u_longlong_t)bp->blk_cksum.zc_word[2], (u_longlong_t)bp->blk_cksum.zc_word[2],
@ -7509,6 +7510,19 @@ mos_leak_log_spacemaps(spa_t *spa)
mos_obj_refd(sls->sls_sm_obj); mos_obj_refd(sls->sls_sm_obj);
} }
static void
errorlog_count_refd(objset_t *mos, uint64_t errlog)
{
zap_cursor_t zc;
zap_attribute_t za;
for (zap_cursor_init(&zc, mos, errlog);
zap_cursor_retrieve(&zc, &za) == 0;
zap_cursor_advance(&zc)) {
mos_obj_refd(za.za_first_integer);
}
zap_cursor_fini(&zc);
}
static int static int
dump_mos_leaks(spa_t *spa) dump_mos_leaks(spa_t *spa)
{ {
@ -7529,6 +7543,12 @@ dump_mos_leaks(spa_t *spa)
mos_obj_refd(spa->spa_history); mos_obj_refd(spa->spa_history);
mos_obj_refd(spa->spa_errlog_last); mos_obj_refd(spa->spa_errlog_last);
mos_obj_refd(spa->spa_errlog_scrub); mos_obj_refd(spa->spa_errlog_scrub);
if (!spa_feature_is_enabled(spa, SPA_FEATURE_HEAD_ERRLOG)) {
errorlog_count_refd(mos, spa->spa_errlog_last);
errorlog_count_refd(mos, spa->spa_errlog_scrub);
}
mos_obj_refd(spa->spa_all_vdev_zaps); mos_obj_refd(spa->spa_all_vdev_zaps);
mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj); mos_obj_refd(spa->spa_dsl_pool->dp_bptree_obj);
mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj); mos_obj_refd(spa->spa_dsl_pool->dp_tmp_userrefs_obj);
@ -8364,7 +8384,9 @@ zdb_read_block(char *thing, spa_t *spa)
DVA_GET_OFFSET(&bp->blk_dva[0]); DVA_GET_OFFSET(&bp->blk_dva[0]);
ck_zio->io_bp = bp; ck_zio->io_bp = bp;
zio_checksum_compute(ck_zio, ck, pabd, lsize); zio_checksum_compute(ck_zio, ck, pabd, lsize);
printf("%12s\tcksum=%llx:%llx:%llx:%llx\n", printf(
"%12s\t"
"cksum=%016llx:%016llx:%016llx:%016llx\n",
zio_checksum_table[ck].ci_name, zio_checksum_table[ck].ci_name,
(u_longlong_t)bp->blk_cksum.zc_word[0], (u_longlong_t)bp->blk_cksum.zc_word[0],
(u_longlong_t)bp->blk_cksum.zc_word[1], (u_longlong_t)bp->blk_cksum.zc_word[1],

View File

@ -4532,7 +4532,7 @@ zfs_do_send(int argc, char **argv)
} }
} }
if (flags.parsable && flags.verbosity == 0) if ((flags.parsable || flags.progressastitle) && flags.verbosity == 0)
flags.verbosity = 1; flags.verbosity = 1;
if (excludes.count > 0 && !flags.replicate) { if (excludes.count > 0 && !flags.replicate) {
@ -8672,7 +8672,6 @@ main(int argc, char **argv)
int i = 0; int i = 0;
const char *cmdname; const char *cmdname;
char **newargv; char **newargv;
extern char **environ;
(void) setlocale(LC_ALL, ""); (void) setlocale(LC_ALL, "");
(void) setlocale(LC_NUMERIC, "C"); (void) setlocale(LC_NUMERIC, "C");

View File

@ -7524,19 +7524,20 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf)); zfs_nicebytes(ps->pss_processed, processed_buf, sizeof (processed_buf));
assert(ps->pss_func == POOL_SCAN_SCRUB || int is_resilver = ps->pss_func == POOL_SCAN_RESILVER;
ps->pss_func == POOL_SCAN_RESILVER); int is_scrub = ps->pss_func == POOL_SCAN_SCRUB;
assert(is_resilver || is_scrub);
/* Scan is finished or canceled. */ /* Scan is finished or canceled. */
if (ps->pss_state == DSS_FINISHED) { if (ps->pss_state == DSS_FINISHED) {
secs_to_dhms(end - start, time_buf); secs_to_dhms(end - start, time_buf);
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
(void) printf(gettext("scrub repaired %s " (void) printf(gettext("scrub repaired %s "
"in %s with %llu errors on %s"), processed_buf, "in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors, time_buf, (u_longlong_t)ps->pss_errors,
ctime(&end)); ctime(&end));
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilvered %s " (void) printf(gettext("resilvered %s "
"in %s with %llu errors on %s"), processed_buf, "in %s with %llu errors on %s"), processed_buf,
time_buf, (u_longlong_t)ps->pss_errors, time_buf, (u_longlong_t)ps->pss_errors,
@ -7544,10 +7545,10 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
} }
return; return;
} else if (ps->pss_state == DSS_CANCELED) { } else if (ps->pss_state == DSS_CANCELED) {
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
(void) printf(gettext("scrub canceled on %s"), (void) printf(gettext("scrub canceled on %s"),
ctime(&end)); ctime(&end));
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilver canceled on %s"), (void) printf(gettext("resilver canceled on %s"),
ctime(&end)); ctime(&end));
} }
@ -7557,7 +7558,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
assert(ps->pss_state == DSS_SCANNING); assert(ps->pss_state == DSS_SCANNING);
/* Scan is in progress. Resilvers can't be paused. */ /* Scan is in progress. Resilvers can't be paused. */
if (ps->pss_func == POOL_SCAN_SCRUB) { if (is_scrub) {
if (pause == 0) { if (pause == 0) {
(void) printf(gettext("scrub in progress since %s"), (void) printf(gettext("scrub in progress since %s"),
ctime(&start)); ctime(&start));
@ -7567,7 +7568,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
(void) printf(gettext("\tscrub started on %s"), (void) printf(gettext("\tscrub started on %s"),
ctime(&start)); ctime(&start));
} }
} else if (ps->pss_func == POOL_SCAN_RESILVER) { } else if (is_resilver) {
(void) printf(gettext("resilver in progress since %s"), (void) printf(gettext("resilver in progress since %s"),
ctime(&start)); ctime(&start));
} }
@ -7609,17 +7610,27 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
scanned_buf, issued_buf, total_buf); scanned_buf, issued_buf, total_buf);
} }
if (ps->pss_func == POOL_SCAN_RESILVER) { if (is_resilver) {
(void) printf(gettext("\t%s resilvered, %.2f%% done"), (void) printf(gettext("\t%s resilvered, %.2f%% done"),
processed_buf, 100 * fraction_done); processed_buf, 100 * fraction_done);
} else if (ps->pss_func == POOL_SCAN_SCRUB) { } else if (is_scrub) {
(void) printf(gettext("\t%s repaired, %.2f%% done"), (void) printf(gettext("\t%s repaired, %.2f%% done"),
processed_buf, 100 * fraction_done); processed_buf, 100 * fraction_done);
} }
if (pause == 0) { if (pause == 0) {
/*
* Only provide an estimate iff:
* 1) the time remaining is valid, and
* 2) the issue rate exceeds 10 MB/s, and
* 3) it's either:
* a) a resilver which has started repairs, or
* b) a scrub which has entered the issue phase.
*/
if (total_secs_left != UINT64_MAX && if (total_secs_left != UINT64_MAX &&
issue_rate >= 10 * 1024 * 1024) { issue_rate >= 10 * 1024 * 1024 &&
((is_resilver && ps->pss_processed > 0) ||
(is_scrub && issued > 0))) {
(void) printf(gettext(", %s to go\n"), time_buf); (void) printf(gettext(", %s to go\n"), time_buf);
} else { } else {
(void) printf(gettext(", no estimated " (void) printf(gettext(", no estimated "

View File

@ -0,0 +1,26 @@
dnl #
dnl # filemap_range_has_page was not available till 4.13
dnl #
AC_DEFUN([ZFS_AC_KERNEL_SRC_FILEMAP], [
ZFS_LINUX_TEST_SRC([filemap_range_has_page], [
#include <linux/fs.h>
],[
struct address_space *mapping = NULL;
loff_t lstart = 0;
loff_t lend = 0;
bool ret __attribute__ ((unused));
ret = filemap_range_has_page(mapping, lstart, lend);
])
])
AC_DEFUN([ZFS_AC_KERNEL_FILEMAP], [
AC_MSG_CHECKING([whether filemap_range_has_page() is available])
ZFS_LINUX_TEST_RESULT([filemap_range_has_page], [
AC_MSG_RESULT(yes)
AC_DEFINE(HAVE_FILEMAP_RANGE_HAS_PAGE, 1,
[filemap_range_has_page() is available])
],[
AC_MSG_RESULT(no)
])
])

View File

@ -150,6 +150,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_SRC], [
ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM ZFS_AC_KERNEL_SRC_USER_NS_COMMON_INUM
ZFS_AC_KERNEL_SRC_IDMAP_MNT_API ZFS_AC_KERNEL_SRC_IDMAP_MNT_API
ZFS_AC_KERNEL_SRC_IATTR_VFSID ZFS_AC_KERNEL_SRC_IATTR_VFSID
ZFS_AC_KERNEL_SRC_FILEMAP
AC_MSG_CHECKING([for available kernel interfaces]) AC_MSG_CHECKING([for available kernel interfaces])
ZFS_LINUX_TEST_COMPILE_ALL([kabi]) ZFS_LINUX_TEST_COMPILE_ALL([kabi])
@ -273,6 +274,7 @@ AC_DEFUN([ZFS_AC_KERNEL_TEST_RESULT], [
ZFS_AC_KERNEL_USER_NS_COMMON_INUM ZFS_AC_KERNEL_USER_NS_COMMON_INUM
ZFS_AC_KERNEL_IDMAP_MNT_API ZFS_AC_KERNEL_IDMAP_MNT_API
ZFS_AC_KERNEL_IATTR_VFSID ZFS_AC_KERNEL_IATTR_VFSID
ZFS_AC_KERNEL_FILEMAP
]) ])
dnl # dnl #

View File

@ -272,30 +272,46 @@ import_pool()
# with more logging etc. # with more logging etc.
load_module_initrd() load_module_initrd()
{ {
[ -n "$ROOTDELAY" ] && ZFS_INITRD_PRE_MOUNTROOT_SLEEP="$ROOTDELAY" ZFS_INITRD_PRE_MOUNTROOT_SLEEP=${ROOTDELAY:-0}
if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ] 2>/dev/null if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then
then [ "$quiet" != "y" ] && zfs_log_begin_msg "Delaying for up to '${ZFS_INITRD_PRE_MOUNTROOT_SLEEP}' seconds."
if [ "$quiet" != "y" ]; then fi
zfs_log_begin_msg "Sleeping for" \
"$ZFS_INITRD_PRE_MOUNTROOT_SLEEP seconds..." START=$(/bin/date -u +%s)
END=$((START+ZFS_INITRD_PRE_MOUNTROOT_SLEEP))
while true; do
# Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear.
if command -v wait_for_udev > /dev/null 2>&1 ; then
wait_for_udev 10
elif command -v wait_for_dev > /dev/null 2>&1 ; then
wait_for_dev
fi fi
sleep "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP"
#
# zpool import refuse to import without a valid
# /proc/self/mounts
#
[ ! -f /proc/self/mounts ] && mount proc /proc
# Load the module
if load_module "zfs"; then
ret=0
break
else
ret=1
fi
[ "$(/bin/date -u +%s)" -gt "$END" ] && break
sleep 1
done
if [ "$ZFS_INITRD_PRE_MOUNTROOT_SLEEP" -gt 0 ]; then
[ "$quiet" != "y" ] && zfs_log_end_msg [ "$quiet" != "y" ] && zfs_log_end_msg
fi fi
# Wait for all of the /dev/{hd,sd}[a-z] device nodes to appear. [ "$ret" -ne 0 ] && return 1
if command -v wait_for_udev > /dev/null 2>&1 ; then
wait_for_udev 10
elif command -v wait_for_dev > /dev/null 2>&1 ; then
wait_for_dev
fi
# zpool import refuse to import without a valid /proc/self/mounts
[ ! -f /proc/self/mounts ] && mount proc /proc
# Load the module
load_module "zfs" || return 1
if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null if [ "$ZFS_INITRD_POST_MODPROBE_SLEEP" -gt 0 ] 2>/dev/null
then then
@ -343,9 +359,11 @@ mount_fs()
# isn't the root fs. # isn't the root fs.
return 0 return 0
fi fi
ZFS_CMD="mount.zfs"
# Last hail-mary: Hope 'rootmnt' is set! # Last hail-mary: Hope 'rootmnt' is set!
mountpoint="" mountpoint=""
if [ "$mountpoint" = "legacy" ]; then
ZFS_CMD="mount.zfs"
fi
else else
mountpoint="$mountpoint1" mountpoint="$mountpoint1"
fi fi

View File

@ -183,6 +183,7 @@ _LIBZUTIL_H int printf_color(const char *color, const char *format, ...);
_LIBZUTIL_H const char *zfs_basename(const char *path); _LIBZUTIL_H const char *zfs_basename(const char *path);
_LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path); _LIBZUTIL_H ssize_t zfs_dirnamelen(const char *path);
#ifdef __linux__ #ifdef __linux__
extern char **environ;
_LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]); _LIBZUTIL_H void zfs_setproctitle_init(int argc, char *argv[], char *envp[]);
_LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...); _LIBZUTIL_H void zfs_setproctitle(const char *fmt, ...);
#else #else

View File

@ -36,7 +36,7 @@
#define ENDBR #define ENDBR
#define SECTION_TEXT .text #define SECTION_TEXT .text
#define SECTION_STATIC .data #define SECTION_STATIC .rodata
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

View File

@ -116,7 +116,8 @@ typedef struct zfs_soft_state {
#define Z_ISLNK(type) ((type) == VLNK) #define Z_ISLNK(type) ((type) == VLNK)
#define Z_ISDIR(type) ((type) == VDIR) #define Z_ISDIR(type) ((type) == VDIR)
#define zn_has_cached_data(zp) vn_has_cached_data(ZTOV(zp)) #define zn_has_cached_data(zp, start, end) \
vn_has_cached_data(ZTOV(zp))
#define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync) #define zn_flush_cached_data(zp, sync) vn_flush_cached_data(ZTOV(zp), sync)
#define zn_rlimit_fsize(zp, uio) \ #define zn_rlimit_fsize(zp, uio) \
vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio)) vn_rlimit_fsize(ZTOV(zp), GET_UIO_STRUCT(uio), zfs_uio_td(uio))

View File

@ -62,7 +62,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__field(uint32_t, z_async_writes_cnt) __field(uint32_t, z_async_writes_cnt)
__field(mode_t, z_mode) __field(mode_t, z_mode)
__field(boolean_t, z_is_sa) __field(boolean_t, z_is_sa)
__field(boolean_t, z_is_mapped)
__field(boolean_t, z_is_ctldir) __field(boolean_t, z_is_ctldir)
__field(uint32_t, i_uid) __field(uint32_t, i_uid)
@ -96,7 +95,6 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_async_writes_cnt = zn->z_async_writes_cnt; __entry->z_async_writes_cnt = zn->z_async_writes_cnt;
__entry->z_mode = zn->z_mode; __entry->z_mode = zn->z_mode;
__entry->z_is_sa = zn->z_is_sa; __entry->z_is_sa = zn->z_is_sa;
__entry->z_is_mapped = zn->z_is_mapped;
__entry->z_is_ctldir = zn->z_is_ctldir; __entry->z_is_ctldir = zn->z_is_ctldir;
__entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid); __entry->i_uid = KUID_TO_SUID(ZTOI(zn)->i_uid);
@ -119,7 +117,7 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
"zn_prefetch %u blksz %u seq %u " "zn_prefetch %u blksz %u seq %u "
"mapcnt %llu size %llu pflags %llu " "mapcnt %llu size %llu pflags %llu "
"sync_cnt %u sync_writes_cnt %u async_writes_cnt %u " "sync_cnt %u sync_writes_cnt %u async_writes_cnt %u "
"mode 0x%x is_sa %d is_mapped %d is_ctldir %d " "mode 0x%x is_sa %d is_ctldir %d "
"inode { uid %u gid %u ino %lu nlink %u size %lli " "inode { uid %u gid %u ino %lu nlink %u size %lli "
"blkbits %u bytes %u mode 0x%x generation %x } } " "blkbits %u bytes %u mode 0x%x generation %x } } "
"ace { type %u flags %u access_mask %u } mask_matched %u", "ace { type %u flags %u access_mask %u } mask_matched %u",
@ -128,9 +126,8 @@ DECLARE_EVENT_CLASS(zfs_ace_class,
__entry->z_seq, __entry->z_mapcnt, __entry->z_size, __entry->z_seq, __entry->z_mapcnt, __entry->z_size,
__entry->z_pflags, __entry->z_sync_cnt, __entry->z_pflags, __entry->z_sync_cnt,
__entry->z_sync_writes_cnt, __entry->z_async_writes_cnt, __entry->z_sync_writes_cnt, __entry->z_async_writes_cnt,
__entry->z_mode, __entry->z_is_sa, __entry->z_is_mapped, __entry->z_mode, __entry->z_is_sa, __entry->z_is_ctldir,
__entry->z_is_ctldir, __entry->i_uid, __entry->i_uid, __entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_gid, __entry->i_ino, __entry->i_nlink,
__entry->i_size, __entry->i_blkbits, __entry->i_size, __entry->i_blkbits,
__entry->i_bytes, __entry->i_mode, __entry->i_generation, __entry->i_bytes, __entry->i_mode, __entry->i_generation,
__entry->z_type, __entry->z_flags, __entry->z_access_mask, __entry->z_type, __entry->z_flags, __entry->z_access_mask,

View File

@ -47,9 +47,16 @@
extern "C" { extern "C" {
#endif #endif
#if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
#define ZNODE_OS_FIELDS \ #define ZNODE_OS_FIELDS \
inode_timespec_t z_btime; /* creation/birth time (cached) */ \ inode_timespec_t z_btime; /* creation/birth time (cached) */ \
struct inode z_inode; struct inode z_inode;
#else
#define ZNODE_OS_FIELDS \
inode_timespec_t z_btime; /* creation/birth time (cached) */ \
struct inode z_inode; \
boolean_t z_is_mapped; /* we are mmap'ed */
#endif
/* /*
* Convert between znode pointers and inode pointers * Convert between znode pointers and inode pointers
@ -70,7 +77,14 @@ extern "C" {
#define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type)) #define Z_ISDEV(type) (S_ISCHR(type) || S_ISBLK(type) || S_ISFIFO(type))
#define Z_ISDIR(type) S_ISDIR(type) #define Z_ISDIR(type) S_ISDIR(type)
#define zn_has_cached_data(zp) ((zp)->z_is_mapped) #if defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
#define zn_has_cached_data(zp, start, end) \
filemap_range_has_page(ZTOI(zp)->i_mapping, start, end)
#else
#define zn_has_cached_data(zp, start, end) \
((zp)->z_is_mapped)
#endif
#define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync) #define zn_flush_cached_data(zp, sync) write_inode_now(ZTOI(zp), sync)
#define zn_rlimit_fsize(zp, uio) (0) #define zn_rlimit_fsize(zp, uio) (0)

View File

@ -87,6 +87,7 @@ int livelist_bpobj_iterate_from_nofree(bpobj_t *bpo, bpobj_itor_t func,
void *arg, int64_t start); void *arg, int64_t start);
void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx); void bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx);
void bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj);
void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, void bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx); dmu_tx_t *tx);

View File

@ -372,6 +372,7 @@ int dsl_dataset_rename_snapshot(const char *fsname,
const char *oldsnapname, const char *newsnapname, boolean_t recursive); const char *oldsnapname, const char *newsnapname, boolean_t recursive);
int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname, int dsl_dataset_snapshot_tmp(const char *fsname, const char *snapname,
minor_t cleanup_minor, const char *htag); minor_t cleanup_minor, const char *htag);
boolean_t zfeature_active(spa_feature_t f, void *arg);
blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds); blkptr_t *dsl_dataset_get_blkptr(dsl_dataset_t *ds);

View File

@ -678,7 +678,7 @@ typedef struct blkptr {
len += func(buf + len, size - len, \ len += func(buf + len, size - len, \
"[L%llu %s] %s %s %s %s %s %s %s%c" \ "[L%llu %s] %s %s %s %s %s %s %s%c" \
"size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \ "size=%llxL/%llxP birth=%lluL/%lluP fill=%llu%c" \
"cksum=%llx:%llx:%llx:%llx", \ "cksum=%016llx:%016llx:%016llx:%016llx", \
(u_longlong_t)BP_GET_LEVEL(bp), \ (u_longlong_t)BP_GET_LEVEL(bp), \
type, \ type, \
checksum, \ checksum, \

View File

@ -188,7 +188,6 @@ typedef struct znode {
boolean_t z_atime_dirty; /* atime needs to be synced */ boolean_t z_atime_dirty; /* atime needs to be synced */
boolean_t z_zn_prefetch; /* Prefetch znodes? */ boolean_t z_zn_prefetch; /* Prefetch znodes? */
boolean_t z_is_sa; /* are we native sa? */ boolean_t z_is_sa; /* are we native sa? */
boolean_t z_is_mapped; /* are we mmap'ed */
boolean_t z_is_ctldir; /* are we .zfs entry */ boolean_t z_is_ctldir; /* are we .zfs entry */
boolean_t z_suspended; /* extra ref from a suspend? */ boolean_t z_suspended; /* extra ref from a suspend? */
uint_t z_blksz; /* block size in bytes */ uint_t z_blksz; /* block size in bytes */

View File

@ -97,7 +97,7 @@ nfs_init_tmpfile(const char *prefix, const char *mdir, struct tmpfile *tmpf)
} }
strlcpy(tmpf->name, prefix, sizeof (tmpf->name)); strlcpy(tmpf->name, prefix, sizeof (tmpf->name));
strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name) - strlen(prefix)); strlcat(tmpf->name, ".XXXXXXXX", sizeof (tmpf->name));
int fd = mkostemp(tmpf->name, O_CLOEXEC); int fd = mkostemp(tmpf->name, O_CLOEXEC);
if (fd == -1) { if (fd == -1) {

View File

@ -40,7 +40,7 @@
#define ENDBR #define ENDBR
#define SECTION_TEXT .text #define SECTION_TEXT .text
#define SECTION_STATIC .data #define SECTION_STATIC .rodata
#ifdef __cplusplus #ifdef __cplusplus
extern "C" { extern "C" {

View File

@ -1422,10 +1422,10 @@ zpool_disable_datasets(zpool_handle_t *zhp, boolean_t force)
* Walk through and first unshare everything. * Walk through and first unshare everything.
*/ */
for (i = 0; i < used; i++) { for (i = 0; i < used; i++) {
for (enum sa_protocol i = 0; i < SA_PROTOCOL_COUNT; ++i) { for (enum sa_protocol p = 0; p < SA_PROTOCOL_COUNT; ++p) {
if (sa_is_shared(sets[i].mountpoint, i) && if (sa_is_shared(sets[i].mountpoint, p) &&
unshare_one(hdl, sets[i].mountpoint, unshare_one(hdl, sets[i].mountpoint,
sets[i].mountpoint, i) != 0) sets[i].mountpoint, p) != 0)
goto out; goto out;
} }
} }

View File

@ -84,6 +84,7 @@ typedef struct progress_arg {
boolean_t pa_estimate; boolean_t pa_estimate;
int pa_verbosity; int pa_verbosity;
boolean_t pa_astitle; boolean_t pa_astitle;
boolean_t pa_progress;
uint64_t pa_size; uint64_t pa_size;
} progress_arg_t; } progress_arg_t;
@ -940,7 +941,7 @@ send_progress_thread(void *arg)
struct tm tm; struct tm tm;
int err; int err;
if (!pa->pa_parsable && pa->pa_verbosity != 0) { if (!pa->pa_parsable && pa->pa_progress) {
(void) fprintf(stderr, (void) fprintf(stderr,
"TIME %s %sSNAPSHOT %s\n", "TIME %s %sSNAPSHOT %s\n",
pa->pa_estimate ? "BYTES" : " SENT", pa->pa_estimate ? "BYTES" : " SENT",
@ -990,7 +991,7 @@ send_progress_thread(void *arg)
(void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n", (void) fprintf(stderr, "%02d:%02d:%02d\t%llu\t%s\n",
tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_hour, tm.tm_min, tm.tm_sec,
(u_longlong_t)bytes, zhp->zfs_name); (u_longlong_t)bytes, zhp->zfs_name);
} else if (pa->pa_verbosity != 0) { } else if (pa->pa_progress) {
zfs_nicebytes(bytes, buf, sizeof (buf)); zfs_nicebytes(bytes, buf, sizeof (buf));
(void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n", (void) fprintf(stderr, "%02d:%02d:%02d %5s %s\n",
tm.tm_hour, tm.tm_min, tm.tm_sec, tm.tm_hour, tm.tm_min, tm.tm_sec,
@ -1206,6 +1207,7 @@ dump_snapshot(zfs_handle_t *zhp, void *arg)
pa.pa_verbosity = sdd->verbosity; pa.pa_verbosity = sdd->verbosity;
pa.pa_size = sdd->size; pa.pa_size = sdd->size;
pa.pa_astitle = sdd->progressastitle; pa.pa_astitle = sdd->progressastitle;
pa.pa_progress = sdd->progress;
if ((err = pthread_create(&tid, NULL, if ((err = pthread_create(&tid, NULL,
send_progress_thread, &pa)) != 0) { send_progress_thread, &pa)) != 0) {
@ -1886,6 +1888,7 @@ zfs_send_resume_impl_cb_impl(libzfs_handle_t *hdl, sendflags_t *flags,
pa.pa_verbosity = flags->verbosity; pa.pa_verbosity = flags->verbosity;
pa.pa_size = size; pa.pa_size = size;
pa.pa_astitle = flags->progressastitle; pa.pa_astitle = flags->progressastitle;
pa.pa_progress = flags->progress;
error = pthread_create(&tid, NULL, error = pthread_create(&tid, NULL,
send_progress_thread, &pa); send_progress_thread, &pa);
@ -2696,6 +2699,7 @@ zfs_send_one_cb_impl(zfs_handle_t *zhp, const char *from, int fd,
pa.pa_verbosity = flags->verbosity; pa.pa_verbosity = flags->verbosity;
pa.pa_size = size; pa.pa_size = size;
pa.pa_astitle = flags->progressastitle; pa.pa_astitle = flags->progressastitle;
pa.pa_progress = flags->progress;
err = pthread_create(&ptid, NULL, err = pthread_create(&ptid, NULL,
send_progress_thread, &pa); send_progress_thread, &pa);
@ -4586,7 +4590,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
B_FALSE, destsnap) == 0) { B_FALSE, destsnap) == 0) {
*strchr(destsnap, '@') = '\0'; *strchr(destsnap, '@') = '\0';
(void) strlcat(destsnap, suffix, (void) strlcat(destsnap, suffix,
sizeof (destsnap) - strlen(destsnap)); sizeof (destsnap));
} }
} }
} else { } else {
@ -4622,7 +4626,7 @@ zfs_receive_one(libzfs_handle_t *hdl, int infd, const char *tosnap,
B_FALSE, destsnap) == 0) { B_FALSE, destsnap) == 0) {
*strchr(destsnap, '@') = '\0'; *strchr(destsnap, '@') = '\0';
(void) strlcat(destsnap, snap, (void) strlcat(destsnap, snap,
sizeof (destsnap) - strlen(destsnap)); sizeof (destsnap));
} }
} }
} }

View File

@ -1769,7 +1769,7 @@ completes in order to verify the checksums of all blocks which have been
resilvered. resilvered.
This is enabled by default and strongly recommended. This is enabled by default and strongly recommended.
. .
.It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 33554432 Ns B Po 32 MiB Pc Pq u64 .It Sy zfs_rebuild_vdev_limit Ns = Ns Sy 67108864 Ns B Po 64 MiB Pc Pq u64
Maximum amount of I/O that can be concurrently issued for a sequential Maximum amount of I/O that can be concurrently issued for a sequential
resilver per leaf device, given in bytes. resilver per leaf device, given in bytes.
. .
@ -1890,6 +1890,13 @@ I/O.
In this case (unless the metadata scan is done) we stop issuing verification I/O In this case (unless the metadata scan is done) we stop issuing verification I/O
and start scanning metadata again until we get to the hard limit. and start scanning metadata again until we get to the hard limit.
. .
.It Sy zfs_scan_report_txgs Ns = Ns Sy 0 Ns | Ns 1 Pq uint
When reporting resilver throughput and estimated completion time use the
performance observed over roughly the last
.Sy zfs_scan_report_txgs
TXGs.
When set to zero performance is calculated over the time between checkpoints.
.
.It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int .It Sy zfs_scan_strict_mem_lim Ns = Ns Sy 0 Ns | Ns 1 Pq int
Enforce tight memory limits on pool scans when a sequential scan is in progress. Enforce tight memory limits on pool scans when a sequential scan is in progress.
When disabled, the memory limit may be exceeded by fast disks. When disabled, the memory limit may be exceeded by fast disks.
@ -1898,7 +1905,7 @@ When disabled, the memory limit may be exceeded by fast disks.
Freezes a scrub/resilver in progress without actually pausing it. Freezes a scrub/resilver in progress without actually pausing it.
Intended for testing/debugging. Intended for testing/debugging.
. .
.It Sy zfs_scan_vdev_limit Ns = Ns Sy 4194304 Ns B Po 4 MiB Pc Pq int .It Sy zfs_scan_vdev_limit Ns = Ns Sy 16777216 Ns B Po 16 MiB Pc Pq int
Maximum amount of data that can be concurrently issued at once for scrubs and Maximum amount of data that can be concurrently issued at once for scrubs and
resilvers per leaf device, given in bytes. resilvers per leaf device, given in bytes.
. .

View File

@ -1791,7 +1791,6 @@ ENTRY_ALIGN(zfs_blake3_hash_many_avx2, 64)
SET_SIZE(zfs_blake3_hash_many_avx2) SET_SIZE(zfs_blake3_hash_many_avx2)
SECTION_STATIC SECTION_STATIC
.section .rodata
.p2align 6 .p2align 6
ADD0: ADD0:

View File

@ -53,6 +53,11 @@
/* Windows userland links with OpenSSL */ /* Windows userland links with OpenSSL */
#if !defined (_WIN32) || defined (_KERNEL) #if !defined (_WIN32) || defined (_KERNEL)
/* Apple needs _ */
#if defined (__APPLE__)
#define gcm_avx_can_use_movbe _gcm_avx_can_use_movbe
#endif
.extern gcm_avx_can_use_movbe .extern gcm_avx_can_use_movbe
.text .text

View File

@ -101,7 +101,7 @@ gcm_mul_pclmulqdq(uint64_t *x_in, uint64_t *y, uint64_t *res) {
// static uint8_t byte_swap16_mask[] = { // static uint8_t byte_swap16_mask[] = {
// 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 }; // 15, 14, 13, 12, 11, 10, 9, 8, 7, 6 ,5, 4, 3, 2, 1, 0 };
.section .rodata SECTION_STATIC
.balign XMM_ALIGN .balign XMM_ALIGN
.Lbyte_swap16_mask: .Lbyte_swap16_mask:
.byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0

View File

@ -2063,7 +2063,7 @@ ENTRY_NP(SHA256TransformBlocks)
.cfi_endproc .cfi_endproc
SET_SIZE(SHA256TransformBlocks) SET_SIZE(SHA256TransformBlocks)
.section .rodata SECTION_STATIC
.balign 64 .balign 64
SET_OBJ(K256) SET_OBJ(K256)
K256: K256:

View File

@ -2064,7 +2064,7 @@ ENTRY_NP(SHA512TransformBlocks)
.cfi_endproc .cfi_endproc
SET_SIZE(SHA512TransformBlocks) SET_SIZE(SHA512TransformBlocks)
.section .rodata SECTION_STATIC
.balign 64 .balign 64
SET_OBJ(K512) SET_OBJ(K512)
K512: K512:
@ -2113,4 +2113,3 @@ K512:
#if defined(__ELF__) #if defined(__ELF__)
.section .note.GNU-stack,"",%progbits .section .note.GNU-stack,"",%progbits
#endif #endif

View File

@ -142,7 +142,7 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
return (EINVAL); return (EINVAL);
uaddr = (void *)(uintptr_t)zp->zfs_cmd; uaddr = (void *)(uintptr_t)zp->zfs_cmd;
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
#ifdef ZFS_LEGACY_SUPPORT #ifdef ZFS_LEGACY_SUPPORT
/* /*
* Remap ioctl code for legacy user binaries * Remap ioctl code for legacy user binaries
@ -150,10 +150,10 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) { if (zp->zfs_ioctl_version == ZFS_IOCVER_LEGACY) {
vecnum = zfs_ioctl_legacy_to_ozfs(vecnum); vecnum = zfs_ioctl_legacy_to_ozfs(vecnum);
if (vecnum < 0) { if (vecnum < 0) {
kmem_free(zc, sizeof (zfs_cmd_t)); vmem_free(zc, sizeof (zfs_cmd_t));
return (ENOTSUP); return (ENOTSUP);
} }
zcl = kmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP); zcl = vmem_zalloc(sizeof (zfs_cmd_legacy_t), KM_SLEEP);
if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) { if (copyin(uaddr, zcl, sizeof (zfs_cmd_legacy_t))) {
error = SET_ERROR(EFAULT); error = SET_ERROR(EFAULT);
goto out; goto out;
@ -180,9 +180,9 @@ zfsdev_ioctl(struct cdev *dev, ulong_t zcmd, caddr_t arg, int flag,
out: out:
#ifdef ZFS_LEGACY_SUPPORT #ifdef ZFS_LEGACY_SUPPORT
if (zcl) if (zcl)
kmem_free(zcl, sizeof (zfs_cmd_legacy_t)); vmem_free(zcl, sizeof (zfs_cmd_legacy_t));
#endif #endif
kmem_free(zc, sizeof (zfs_cmd_t)); vmem_free(zc, sizeof (zfs_cmd_t));
MPASS(tsd_get(rrw_tsd_key) == NULL); MPASS(tsd_get(rrw_tsd_key) == NULL);
return (error); return (error);
} }

View File

@ -230,7 +230,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
vfs_unbusy(vfsp); vfs_unbusy(vfsp);
if (tmp != *val) { if (tmp != *val) {
(void) strcpy(setpoint, "temporary"); if (setpoint)
(void) strcpy(setpoint, "temporary");
*val = tmp; *val = tmp;
} }
return (0); return (0);

View File

@ -272,18 +272,20 @@ abd_alloc_chunks(abd_t *abd, size_t size)
struct page *page, *tmp_page = NULL; struct page *page, *tmp_page = NULL;
gfp_t gfp = __GFP_NOWARN | GFP_NOIO; gfp_t gfp = __GFP_NOWARN | GFP_NOIO;
gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM; gfp_t gfp_comp = (gfp | __GFP_NORETRY | __GFP_COMP) & ~__GFP_RECLAIM;
int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1); unsigned int max_order = MIN(zfs_abd_scatter_max_order, MAX_ORDER - 1);
int nr_pages = abd_chunkcnt_for_bytes(size); unsigned int nr_pages = abd_chunkcnt_for_bytes(size);
int chunks = 0, zones = 0; unsigned int chunks = 0, zones = 0;
size_t remaining_size; size_t remaining_size;
int nid = NUMA_NO_NODE; int nid = NUMA_NO_NODE;
int alloc_pages = 0; unsigned int alloc_pages = 0;
INIT_LIST_HEAD(&pages); INIT_LIST_HEAD(&pages);
ASSERT3U(alloc_pages, <, nr_pages);
while (alloc_pages < nr_pages) { while (alloc_pages < nr_pages) {
unsigned chunk_pages; unsigned int chunk_pages;
int order; unsigned int order;
order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order); order = MIN(highbit64(nr_pages - alloc_pages) - 1, max_order);
chunk_pages = (1U << order); chunk_pages = (1U << order);

View File

@ -392,7 +392,20 @@ zfsctl_snapshot_unmount_delay_impl(zfs_snapentry_t *se, int delay)
zfsctl_snapshot_hold(se); zfsctl_snapshot_hold(se);
rw_enter(&se->se_taskqid_lock, RW_WRITER); rw_enter(&se->se_taskqid_lock, RW_WRITER);
ASSERT3S(se->se_taskqid, ==, TASKQID_INVALID); /*
* If this condition happens, we managed to:
* - dispatch once
* - want to dispatch _again_ before it returned
*
* So let's just return - if that task fails at unmounting,
* we'll eventually dispatch again, and if it succeeds,
* no problem.
*/
if (se->se_taskqid != TASKQID_INVALID) {
rw_exit(&se->se_taskqid_lock);
zfsctl_snapshot_rele(se);
return;
}
se->se_taskqid = taskq_dispatch_delay(system_delay_taskq, se->se_taskqid = taskq_dispatch_delay(system_delay_taskq,
snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ); snapentry_expire, se, TQ_SLEEP, ddi_get_lbolt() + delay * HZ);
rw_exit(&se->se_taskqid_lock); rw_exit(&se->se_taskqid_lock);
@ -485,7 +498,9 @@ zfsctl_inode_alloc(zfsvfs_t *zfsvfs, uint64_t id,
zp->z_atime_dirty = B_FALSE; zp->z_atime_dirty = B_FALSE;
zp->z_zn_prefetch = B_FALSE; zp->z_zn_prefetch = B_FALSE;
zp->z_is_sa = B_FALSE; zp->z_is_sa = B_FALSE;
#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
zp->z_is_mapped = B_FALSE; zp->z_is_mapped = B_FALSE;
#endif
zp->z_is_ctldir = B_TRUE; zp->z_is_ctldir = B_TRUE;
zp->z_sa_hdl = NULL; zp->z_sa_hdl = NULL;
zp->z_blksz = 0; zp->z_blksz = 0;

View File

@ -135,7 +135,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
vecnum = cmd - ZFS_IOC_FIRST; vecnum = cmd - ZFS_IOC_FIRST;
zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP); zc = vmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) { if (ddi_copyin((void *)(uintptr_t)arg, zc, sizeof (zfs_cmd_t), 0)) {
error = -SET_ERROR(EFAULT); error = -SET_ERROR(EFAULT);
@ -146,7 +146,7 @@ zfsdev_ioctl(struct file *filp, unsigned cmd, unsigned long arg)
if (error == 0 && rc != 0) if (error == 0 && rc != 0)
error = -SET_ERROR(EFAULT); error = -SET_ERROR(EFAULT);
out: out:
kmem_free(zc, sizeof (zfs_cmd_t)); vmem_free(zc, sizeof (zfs_cmd_t));
return (error); return (error);
} }

View File

@ -608,7 +608,8 @@ zfs_get_temporary_prop(dsl_dataset_t *ds, zfs_prop_t zfs_prop, uint64_t *val,
} }
if (tmp != *val) { if (tmp != *val) {
(void) strcpy(setpoint, "temporary"); if (setpoint)
(void) strcpy(setpoint, "temporary");
*val = tmp; *val = tmp;
} }
return (0); return (0);

View File

@ -987,7 +987,7 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
mutex_enter(&zp->z_lock); mutex_enter(&zp->z_lock);
may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 && may_delete_now = atomic_read(&ZTOI(zp)->i_count) == 1 &&
!(zp->z_is_mapped); !zn_has_cached_data(zp, 0, LLONG_MAX);
mutex_exit(&zp->z_lock); mutex_exit(&zp->z_lock);
/* /*
@ -1075,7 +1075,8 @@ zfs_remove(znode_t *dzp, char *name, cred_t *cr, int flags)
&xattr_obj_unlinked, sizeof (xattr_obj_unlinked)); &xattr_obj_unlinked, sizeof (xattr_obj_unlinked));
delete_now = may_delete_now && !toobig && delete_now = may_delete_now && !toobig &&
atomic_read(&ZTOI(zp)->i_count) == 1 && atomic_read(&ZTOI(zp)->i_count) == 1 &&
!(zp->z_is_mapped) && xattr_obj == xattr_obj_unlinked && !zn_has_cached_data(zp, 0, LLONG_MAX) &&
xattr_obj == xattr_obj_unlinked &&
zfs_external_acl(zp) == acl_obj; zfs_external_acl(zp) == acl_obj;
} }

View File

@ -551,7 +551,9 @@ zfs_znode_alloc(zfsvfs_t *zfsvfs, dmu_buf_t *db, int blksz,
ASSERT3P(zp->z_xattr_cached, ==, NULL); ASSERT3P(zp->z_xattr_cached, ==, NULL);
zp->z_unlinked = B_FALSE; zp->z_unlinked = B_FALSE;
zp->z_atime_dirty = B_FALSE; zp->z_atime_dirty = B_FALSE;
#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
zp->z_is_mapped = B_FALSE; zp->z_is_mapped = B_FALSE;
#endif
zp->z_is_ctldir = B_FALSE; zp->z_is_ctldir = B_FALSE;
zp->z_suspended = B_FALSE; zp->z_suspended = B_FALSE;
zp->z_sa_hdl = NULL; zp->z_sa_hdl = NULL;
@ -1641,7 +1643,7 @@ zfs_free_range(znode_t *zp, uint64_t off, uint64_t len)
* Zero partial page cache entries. This must be done under a * Zero partial page cache entries. This must be done under a
* range lock in order to keep the ARC and page cache in sync. * range lock in order to keep the ARC and page cache in sync.
*/ */
if (zp->z_is_mapped) { if (zn_has_cached_data(zp, off, off + len - 1)) {
loff_t first_page, last_page, page_len; loff_t first_page, last_page, page_len;
loff_t first_page_offset, last_page_offset; loff_t first_page_offset, last_page_offset;

View File

@ -625,7 +625,6 @@ static int
zpl_mmap(struct file *filp, struct vm_area_struct *vma) zpl_mmap(struct file *filp, struct vm_area_struct *vma)
{ {
struct inode *ip = filp->f_mapping->host; struct inode *ip = filp->f_mapping->host;
znode_t *zp = ITOZ(ip);
int error; int error;
fstrans_cookie_t cookie; fstrans_cookie_t cookie;
@ -640,9 +639,12 @@ zpl_mmap(struct file *filp, struct vm_area_struct *vma)
if (error) if (error)
return (error); return (error);
#if !defined(HAVE_FILEMAP_RANGE_HAS_PAGE)
znode_t *zp = ITOZ(ip);
mutex_enter(&zp->z_lock); mutex_enter(&zp->z_lock);
zp->z_is_mapped = B_TRUE; zp->z_is_mapped = B_TRUE;
mutex_exit(&zp->z_lock); mutex_exit(&zp->z_lock);
#endif
return (error); return (error);
} }
@ -937,7 +939,7 @@ zpl_fadvise(struct file *filp, loff_t offset, loff_t len, int advice)
case POSIX_FADV_SEQUENTIAL: case POSIX_FADV_SEQUENTIAL:
case POSIX_FADV_WILLNEED: case POSIX_FADV_WILLNEED:
#ifdef HAVE_GENERIC_FADVISE #ifdef HAVE_GENERIC_FADVISE
if (zn_has_cached_data(zp)) if (zn_has_cached_data(zp, offset, offset + len - 1))
error = generic_fadvise(filp, offset, len, advice); error = generic_fadvise(filp, offset, len, advice);
#endif #endif
/* /*

View File

@ -5958,6 +5958,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
(zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0; (zio_flags & ZIO_FLAG_RAW_ENCRYPT) != 0;
boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp); boolean_t embedded_bp = !!BP_IS_EMBEDDED(bp);
boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF; boolean_t no_buf = *arc_flags & ARC_FLAG_NO_BUF;
arc_buf_t *buf = NULL;
int rc = 0; int rc = 0;
ASSERT(!embedded_bp || ASSERT(!embedded_bp ||
@ -5987,7 +5988,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER, if (!zfs_blkptr_verify(spa, bp, zio_flags & ZIO_FLAG_CONFIG_WRITER,
BLK_VERIFY_LOG)) { BLK_VERIFY_LOG)) {
rc = SET_ERROR(ECKSUM); rc = SET_ERROR(ECKSUM);
goto out; goto done;
} }
if (!embedded_bp) { if (!embedded_bp) {
@ -6008,14 +6009,13 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) || if (hdr != NULL && HDR_HAS_L1HDR(hdr) && (HDR_HAS_RABD(hdr) ||
(hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) { (hdr->b_l1hdr.b_pabd != NULL && !encrypted_read))) {
boolean_t is_data = !HDR_ISTYPE_METADATA(hdr); boolean_t is_data = !HDR_ISTYPE_METADATA(hdr);
arc_buf_t *buf = NULL;
if (HDR_IO_IN_PROGRESS(hdr)) { if (HDR_IO_IN_PROGRESS(hdr)) {
if (*arc_flags & ARC_FLAG_CACHED_ONLY) { if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
mutex_exit(hash_lock); mutex_exit(hash_lock);
ARCSTAT_BUMP(arcstat_cached_only_in_progress); ARCSTAT_BUMP(arcstat_cached_only_in_progress);
rc = SET_ERROR(ENOENT); rc = SET_ERROR(ENOENT);
goto out; goto done;
} }
zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head; zio_t *head_zio = hdr->b_l1hdr.b_acb->acb_zio_head;
@ -6144,9 +6144,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH), ARCSTAT_CONDSTAT(!(*arc_flags & ARC_FLAG_PREFETCH),
demand, prefetch, is_data, data, metadata, hits); demand, prefetch, is_data, data, metadata, hits);
*arc_flags |= ARC_FLAG_CACHED; *arc_flags |= ARC_FLAG_CACHED;
goto done;
if (done)
done(NULL, zb, bp, buf, private);
} else { } else {
uint64_t lsize = BP_GET_LSIZE(bp); uint64_t lsize = BP_GET_LSIZE(bp);
uint64_t psize = BP_GET_PSIZE(bp); uint64_t psize = BP_GET_PSIZE(bp);
@ -6159,10 +6157,10 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0; int alloc_flags = encrypted_read ? ARC_HDR_ALLOC_RDATA : 0;
if (*arc_flags & ARC_FLAG_CACHED_ONLY) { if (*arc_flags & ARC_FLAG_CACHED_ONLY) {
rc = SET_ERROR(ENOENT);
if (hash_lock != NULL) if (hash_lock != NULL)
mutex_exit(hash_lock); mutex_exit(hash_lock);
goto out; rc = SET_ERROR(ENOENT);
goto done;
} }
if (hdr == NULL) { if (hdr == NULL) {
@ -6482,6 +6480,16 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp,
spa_read_history_add(spa, zb, *arc_flags); spa_read_history_add(spa, zb, *arc_flags);
spl_fstrans_unmark(cookie); spl_fstrans_unmark(cookie);
return (rc); return (rc);
done:
if (done)
done(NULL, zb, bp, buf, private);
if (pio && rc != 0) {
zio_t *zio = zio_null(pio, spa, NULL, NULL, NULL, zio_flags);
zio->io_error = rc;
zio_nowait(zio);
}
goto out;
} }
arc_prune_t * arc_prune_t *

View File

@ -663,14 +663,13 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
} }
VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj)); VERIFY3U(0, ==, bpobj_open(&subbpo, bpo->bpo_os, subobj));
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
if (bpobj_is_empty(&subbpo)) { if (bpobj_is_empty(&subbpo)) {
/* No point in having an empty subobj. */ /* No point in having an empty subobj. */
bpobj_close(&subbpo); bpobj_close(&subbpo);
bpobj_free(bpo->bpo_os, subobj, tx); bpobj_free(bpo->bpo_os, subobj, tx);
return; return;
} }
VERIFY3U(0, ==, bpobj_space(&subbpo, &used, &comp, &uncomp));
mutex_enter(&bpo->bpo_lock); mutex_enter(&bpo->bpo_lock);
dmu_buf_will_dirty(bpo->bpo_dbuf, tx); dmu_buf_will_dirty(bpo->bpo_dbuf, tx);
@ -780,6 +779,68 @@ bpobj_enqueue_subobj(bpobj_t *bpo, uint64_t subobj, dmu_tx_t *tx)
} }
/*
* Prefetch metadata required for bpobj_enqueue_subobj().
*/
void
bpobj_prefetch_subobj(bpobj_t *bpo, uint64_t subobj)
{
dmu_object_info_t doi;
bpobj_t subbpo;
uint64_t subsubobjs;
boolean_t copy_subsub = B_TRUE;
boolean_t copy_bps = B_TRUE;
ASSERT(bpobj_is_open(bpo));
ASSERT(subobj != 0);
if (subobj == dmu_objset_pool(bpo->bpo_os)->dp_empty_bpobj)
return;
if (bpobj_open(&subbpo, bpo->bpo_os, subobj) != 0)
return;
if (bpobj_is_empty(&subbpo)) {
bpobj_close(&subbpo);
return;
}
subsubobjs = subbpo.bpo_phys->bpo_subobjs;
bpobj_close(&subbpo);
if (subsubobjs != 0) {
if (dmu_object_info(bpo->bpo_os, subsubobjs, &doi) != 0)
return;
if (doi.doi_max_offset > doi.doi_data_block_size)
copy_subsub = B_FALSE;
}
if (dmu_object_info(bpo->bpo_os, subobj, &doi) != 0)
return;
if (doi.doi_max_offset > doi.doi_data_block_size || !copy_subsub)
copy_bps = B_FALSE;
if (copy_subsub && subsubobjs != 0) {
if (bpo->bpo_phys->bpo_subobjs) {
dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
ZIO_PRIORITY_ASYNC_READ);
}
dmu_prefetch(bpo->bpo_os, subsubobjs, 0, 0, 1,
ZIO_PRIORITY_ASYNC_READ);
}
if (copy_bps) {
dmu_prefetch(bpo->bpo_os, bpo->bpo_object, 0,
bpo->bpo_phys->bpo_num_blkptrs * sizeof (blkptr_t), 1,
ZIO_PRIORITY_ASYNC_READ);
dmu_prefetch(bpo->bpo_os, subobj, 0, 0, 1,
ZIO_PRIORITY_ASYNC_READ);
} else if (bpo->bpo_phys->bpo_subobjs) {
dmu_prefetch(bpo->bpo_os, bpo->bpo_phys->bpo_subobjs, 0,
bpo->bpo_phys->bpo_num_subobjs * sizeof (subobj), 1,
ZIO_PRIORITY_ASYNC_READ);
}
}
void void
bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed, bpobj_enqueue(bpobj_t *bpo, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx) dmu_tx_t *tx)

View File

@ -493,6 +493,7 @@ dmu_dump_write(dmu_send_cookie_t *dscp, dmu_object_type_t type, uint64_t object,
(bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF && (bp != NULL ? BP_GET_COMPRESS(bp) != ZIO_COMPRESS_OFF &&
io_compressed : lsize != psize); io_compressed : lsize != psize);
if (raw || compressed) { if (raw || compressed) {
ASSERT(bp != NULL);
ASSERT(raw || dscp->dsc_featureflags & ASSERT(raw || dscp->dsc_featureflags &
DMU_BACKUP_FEATURE_COMPRESSED); DMU_BACKUP_FEATURE_COMPRESSED);
ASSERT(!BP_IS_EMBEDDED(bp)); ASSERT(!BP_IS_EMBEDDED(bp));
@ -3028,8 +3029,7 @@ dmu_send_estimate_fast(dsl_dataset_t *origds, dsl_dataset_t *fromds,
dsl_dataset_name(origds, dsname); dsl_dataset_name(origds, dsname);
(void) strcat(dsname, "/"); (void) strcat(dsname, "/");
(void) strlcat(dsname, recv_clone_name, (void) strlcat(dsname, recv_clone_name, sizeof (dsname));
sizeof (dsname) - strlen(dsname));
err = dsl_dataset_hold(origds->ds_dir->dd_pool, err = dsl_dataset_hold(origds->ds_dir->dd_pool,
dsname, FTAG, &ds); dsname, FTAG, &ds);

View File

@ -1039,7 +1039,7 @@ dsl_dataset_has_owner(dsl_dataset_t *ds)
return (rv); return (rv);
} }
static boolean_t boolean_t
zfeature_active(spa_feature_t f, void *arg) zfeature_active(spa_feature_t f, void *arg)
{ {
switch (spa_feature_table[f].fi_type) { switch (spa_feature_table[f].fi_type) {
@ -2121,16 +2121,6 @@ dsl_dataset_sync(dsl_dataset_t *ds, zio_t *zio, dmu_tx_t *tx)
} }
dmu_objset_sync(ds->ds_objset, zio, tx); dmu_objset_sync(ds->ds_objset, zio, tx);
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
if (zfeature_active(f, ds->ds_feature_activation[f])) {
if (zfeature_active(f, ds->ds_feature[f]))
continue;
dsl_dataset_activate_feature(ds->ds_object, f,
ds->ds_feature_activation[f], tx);
ds->ds_feature[f] = ds->ds_feature_activation[f];
}
}
} }
/* /*
@ -2303,6 +2293,17 @@ dsl_dataset_sync_done(dsl_dataset_t *ds, dmu_tx_t *tx)
ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx))); ASSERT(!dmu_objset_is_dirty(os, dmu_tx_get_txg(tx)));
dmu_buf_rele(ds->ds_dbuf, ds); dmu_buf_rele(ds->ds_dbuf, ds);
for (spa_feature_t f = 0; f < SPA_FEATURES; f++) {
if (zfeature_active(f,
ds->ds_feature_activation[f])) {
if (zfeature_active(f, ds->ds_feature[f]))
continue;
dsl_dataset_activate_feature(ds->ds_object, f,
ds->ds_feature_activation[f], tx);
ds->ds_feature[f] = ds->ds_feature_activation[f];
}
}
} }
int int

View File

@ -438,6 +438,18 @@ dle_enqueue_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
} }
} }
/*
* Prefetch metadata required for dle_enqueue_subobj().
*/
static void
dle_prefetch_subobj(dsl_deadlist_t *dl, dsl_deadlist_entry_t *dle,
uint64_t obj)
{
if (dle->dle_bpobj.bpo_object !=
dmu_objset_pool(dl->dl_os)->dp_empty_bpobj)
bpobj_prefetch_subobj(&dle->dle_bpobj, obj);
}
void void
dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed, dsl_deadlist_insert(dsl_deadlist_t *dl, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx) dmu_tx_t *tx)
@ -810,6 +822,27 @@ dsl_deadlist_insert_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth,
dle_enqueue_subobj(dl, dle, obj, tx); dle_enqueue_subobj(dl, dle, obj, tx);
} }
/*
* Prefetch metadata required for dsl_deadlist_insert_bpobj().
*/
static void
dsl_deadlist_prefetch_bpobj(dsl_deadlist_t *dl, uint64_t obj, uint64_t birth)
{
dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle;
avl_index_t where;
ASSERT(MUTEX_HELD(&dl->dl_lock));
dsl_deadlist_load_tree(dl);
dle_tofind.dle_mintxg = birth;
dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_BEFORE);
dle_prefetch_subobj(dl, dle, obj);
}
static int static int
dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed, dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
dmu_tx_t *tx) dmu_tx_t *tx)
@ -826,12 +859,12 @@ dsl_deadlist_insert_cb(void *arg, const blkptr_t *bp, boolean_t bp_freed,
void void
dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx) dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
{ {
zap_cursor_t zc; zap_cursor_t zc, pzc;
zap_attribute_t za; zap_attribute_t za, pza;
dmu_buf_t *bonus; dmu_buf_t *bonus;
dsl_deadlist_phys_t *dlp; dsl_deadlist_phys_t *dlp;
dmu_object_info_t doi; dmu_object_info_t doi;
int error; int error, perror, i;
VERIFY0(dmu_object_info(dl->dl_os, obj, &doi)); VERIFY0(dmu_object_info(dl->dl_os, obj, &doi));
if (doi.doi_type == DMU_OT_BPOBJ) { if (doi.doi_type == DMU_OT_BPOBJ) {
@ -843,15 +876,32 @@ dsl_deadlist_merge(dsl_deadlist_t *dl, uint64_t obj, dmu_tx_t *tx)
} }
mutex_enter(&dl->dl_lock); mutex_enter(&dl->dl_lock);
/*
* Prefetch up to 128 deadlists first and then more as we progress.
* The limit is a balance between ARC use and diminishing returns.
*/
for (zap_cursor_init(&pzc, dl->dl_os, obj), i = 0;
(perror = zap_cursor_retrieve(&pzc, &pza)) == 0 && i < 128;
zap_cursor_advance(&pzc), i++) {
dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
zfs_strtonum(pza.za_name, NULL));
}
for (zap_cursor_init(&zc, dl->dl_os, obj); for (zap_cursor_init(&zc, dl->dl_os, obj);
(error = zap_cursor_retrieve(&zc, &za)) == 0; (error = zap_cursor_retrieve(&zc, &za)) == 0;
zap_cursor_advance(&zc)) { zap_cursor_advance(&zc)) {
uint64_t mintxg = zfs_strtonum(za.za_name, NULL); uint64_t mintxg = zfs_strtonum(za.za_name, NULL);
dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx); dsl_deadlist_insert_bpobj(dl, za.za_first_integer, mintxg, tx);
VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx)); VERIFY0(zap_remove_int(dl->dl_os, obj, mintxg, tx));
if (perror == 0) {
dsl_deadlist_prefetch_bpobj(dl, pza.za_first_integer,
zfs_strtonum(pza.za_name, NULL));
zap_cursor_advance(&pzc);
perror = zap_cursor_retrieve(&pzc, &pza);
}
} }
VERIFY3U(error, ==, ENOENT); VERIFY3U(error, ==, ENOENT);
zap_cursor_fini(&zc); zap_cursor_fini(&zc);
zap_cursor_fini(&pzc);
VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus)); VERIFY0(dmu_bonus_hold(dl->dl_os, obj, FTAG, &bonus));
dlp = bonus->db_data; dlp = bonus->db_data;
@ -869,8 +919,9 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dmu_tx_t *tx) dmu_tx_t *tx)
{ {
dsl_deadlist_entry_t dle_tofind; dsl_deadlist_entry_t dle_tofind;
dsl_deadlist_entry_t *dle; dsl_deadlist_entry_t *dle, *pdle;
avl_index_t where; avl_index_t where;
int i;
ASSERT(!dl->dl_oldfmt); ASSERT(!dl->dl_oldfmt);
@ -882,11 +933,23 @@ dsl_deadlist_move_bpobj(dsl_deadlist_t *dl, bpobj_t *bpo, uint64_t mintxg,
dle = avl_find(&dl->dl_tree, &dle_tofind, &where); dle = avl_find(&dl->dl_tree, &dle_tofind, &where);
if (dle == NULL) if (dle == NULL)
dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER); dle = avl_nearest(&dl->dl_tree, where, AVL_AFTER);
/*
* Prefetch up to 128 deadlists first and then more as we progress.
* The limit is a balance between ARC use and diminishing returns.
*/
for (pdle = dle, i = 0; pdle && i < 128; ) {
bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
pdle = AVL_NEXT(&dl->dl_tree, pdle);
}
while (dle) { while (dle) {
uint64_t used, comp, uncomp; uint64_t used, comp, uncomp;
dsl_deadlist_entry_t *dle_next; dsl_deadlist_entry_t *dle_next;
bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx); bpobj_enqueue_subobj(bpo, dle->dle_bpobj.bpo_object, tx);
if (pdle) {
bpobj_prefetch_subobj(bpo, pdle->dle_bpobj.bpo_object);
pdle = AVL_NEXT(&dl->dl_tree, pdle);
}
VERIFY0(bpobj_space(&dle->dle_bpobj, VERIFY0(bpobj_space(&dle->dle_bpobj,
&used, &comp, &uncomp)); &used, &comp, &uncomp));

View File

@ -37,6 +37,7 @@
#include <sys/dmu_tx.h> #include <sys/dmu_tx.h>
#include <sys/dmu_objset.h> #include <sys/dmu_objset.h>
#include <sys/arc.h> #include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/zap.h> #include <sys/zap.h>
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/zfs_context.h> #include <sys/zfs_context.h>
@ -126,11 +127,20 @@ static boolean_t scan_ds_queue_contains(dsl_scan_t *scn, uint64_t dsobj,
static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg); static void scan_ds_queue_insert(dsl_scan_t *scn, uint64_t dsobj, uint64_t txg);
static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj); static void scan_ds_queue_remove(dsl_scan_t *scn, uint64_t dsobj);
static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx); static void scan_ds_queue_sync(dsl_scan_t *scn, dmu_tx_t *tx);
static uint64_t dsl_scan_count_data_disks(vdev_t *vd); static uint64_t dsl_scan_count_data_disks(spa_t *spa);
extern uint_t zfs_vdev_async_write_active_min_dirty_percent; extern uint_t zfs_vdev_async_write_active_min_dirty_percent;
static int zfs_scan_blkstats = 0; static int zfs_scan_blkstats = 0;
/*
* 'zpool status' uses bytes processed per pass to report throughput and
* estimate time remaining. We define a pass to start when the scanning
* phase completes for a sequential resilver. Optionally, this value
* may be used to reset the pass statistics every N txgs to provide an
* estimated completion time based on currently observed performance.
*/
static uint_t zfs_scan_report_txgs = 0;
/* /*
* By default zfs will check to ensure it is not over the hard memory * By default zfs will check to ensure it is not over the hard memory
* limit before each txg. If finer-grained control of this is needed * limit before each txg. If finer-grained control of this is needed
@ -147,7 +157,7 @@ static int zfs_scan_strict_mem_lim = B_FALSE;
* overload the drives with I/O, since that is protected by * overload the drives with I/O, since that is protected by
* zfs_vdev_scrub_max_active. * zfs_vdev_scrub_max_active.
*/ */
static uint64_t zfs_scan_vdev_limit = 4 << 20; static uint64_t zfs_scan_vdev_limit = 16 << 20;
static uint_t zfs_scan_issue_strategy = 0; static uint_t zfs_scan_issue_strategy = 0;
@ -466,11 +476,12 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
/* /*
* Calculate the max number of in-flight bytes for pool-wide * Calculate the max number of in-flight bytes for pool-wide
* scanning operations (minimum 1MB). Limits for the issuing * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
* phase are done per top-level vdev and are handled separately. * Limits for the issuing phase are done per top-level vdev and
* are handled separately.
*/ */
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t), avl_create(&scn->scn_queue, scan_ds_queue_compare, sizeof (scan_ds_t),
offsetof(scan_ds_t, sds_node)); offsetof(scan_ds_t, sds_node));
@ -604,6 +615,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
} }
spa_scan_stat_init(spa); spa_scan_stat_init(spa);
vdev_scan_stat_init(spa->spa_root_vdev);
return (0); return (0);
} }
@ -763,6 +776,7 @@ dsl_scan_setup_sync(void *arg, dmu_tx_t *tx)
scn->scn_last_checkpoint = 0; scn->scn_last_checkpoint = 0;
scn->scn_checkpointing = B_FALSE; scn->scn_checkpointing = B_FALSE;
spa_scan_stat_init(spa); spa_scan_stat_init(spa);
vdev_scan_stat_init(spa->spa_root_vdev);
if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) { if (DSL_SCAN_IS_SCRUB_RESILVER(scn)) {
scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max; scn->scn_phys.scn_ddt_class_max = zfs_scrub_ddt_class_max;
@ -2024,6 +2038,26 @@ dsl_scan_visitbp(blkptr_t *bp, const zbookmark_phys_t *zb,
return; return;
} }
/*
* Check if this block contradicts any filesystem flags.
*/
spa_feature_t f = SPA_FEATURE_LARGE_BLOCKS;
if (BP_GET_LSIZE(bp) > SPA_OLD_MAXBLOCKSIZE)
ASSERT(dsl_dataset_feature_is_active(ds, f));
f = zio_checksum_to_feature(BP_GET_CHECKSUM(bp));
if (f != SPA_FEATURE_NONE)
ASSERT(dsl_dataset_feature_is_active(ds, f));
f = zio_compress_to_feature(BP_GET_COMPRESS(bp));
if (f != SPA_FEATURE_NONE)
ASSERT(dsl_dataset_feature_is_active(ds, f));
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++;
return;
}
if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) { if (bp->blk_birth <= scn->scn_phys.scn_cur_min_txg) {
scn->scn_lt_min_this_txg++; scn->scn_lt_min_this_txg++;
return; return;
@ -2811,8 +2845,9 @@ dsl_scan_visit(dsl_scan_t *scn, dmu_tx_t *tx)
} }
static uint64_t static uint64_t
dsl_scan_count_data_disks(vdev_t *rvd) dsl_scan_count_data_disks(spa_t *spa)
{ {
vdev_t *rvd = spa->spa_root_vdev;
uint64_t i, leaves = 0; uint64_t i, leaves = 0;
for (i = 0; i < rvd->vdev_children; i++) { for (i = 0; i < rvd->vdev_children; i++) {
@ -3652,6 +3687,16 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
return; return;
} }
/*
* Disabled by default, set zfs_scan_report_txgs to report
* average performance over the last zfs_scan_report_txgs TXGs.
*/
if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
tx->tx_txg % zfs_scan_report_txgs == 0) {
scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
}
/* /*
* It is possible to switch from unsorted to sorted at any time, * It is possible to switch from unsorted to sorted at any time,
* but afterwards the scan will remain sorted unless reloaded from * but afterwards the scan will remain sorted unless reloaded from
@ -3711,12 +3756,13 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
taskqid_t prefetch_tqid; taskqid_t prefetch_tqid;
/* /*
* Recalculate the max number of in-flight bytes for pool-wide * Calculate the max number of in-flight bytes for pool-wide
* scanning operations (minimum 1MB). Limits for the issuing * scanning operations (minimum 1MB, maximum 1/4 of arc_c_max).
* phase are done per top-level vdev and are handled separately. * Limits for the issuing phase are done per top-level vdev and
* are handled separately.
*/ */
scn->scn_maxinflight_bytes = MAX(zfs_scan_vdev_limit * scn->scn_maxinflight_bytes = MIN(arc_c_max / 4, MAX(1ULL << 20,
dsl_scan_count_data_disks(spa->spa_root_vdev), 1ULL << 20); zfs_scan_vdev_limit * dsl_scan_count_data_disks(spa)));
if (scnp->scn_ddt_bookmark.ddb_class <= if (scnp->scn_ddt_bookmark.ddb_class <=
scnp->scn_ddt_class_max) { scnp->scn_ddt_class_max) {
@ -3780,6 +3826,9 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
if (scn->scn_is_sorted) { if (scn->scn_is_sorted) {
scn->scn_checkpointing = B_TRUE; scn->scn_checkpointing = B_TRUE;
scn->scn_clearing = B_TRUE; scn->scn_clearing = B_TRUE;
scn->scn_issued_before_pass +=
spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
} }
zfs_dbgmsg("scan complete for %s txg %llu", zfs_dbgmsg("scan complete for %s txg %llu",
spa->spa_name, spa->spa_name,
@ -4507,5 +4556,8 @@ ZFS_MODULE_PARAM(zfs, zfs_, scan_strict_mem_lim, INT, ZMOD_RW,
ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, UINT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, scan_fill_weight, UINT, ZMOD_RW,
"Tunable to adjust bias towards more filled segments during scans"); "Tunable to adjust bias towards more filled segments during scans");
ZFS_MODULE_PARAM(zfs, zfs_, scan_report_txgs, UINT, ZMOD_RW,
"Tunable to report resilver performance over the last N txgs");
ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW, ZFS_MODULE_PARAM(zfs, zfs_, resilver_disable_defer, INT, ZMOD_RW,
"Process all resilvers immediately"); "Process all resilvers immediately");

View File

@ -1714,9 +1714,9 @@ spa_unload(spa_t *spa)
*/ */
spa_l2cache_drop(spa); spa_l2cache_drop(spa);
for (int i = 0; i < spa->spa_spares.sav_count; i++)
vdev_free(spa->spa_spares.sav_vdevs[i]);
if (spa->spa_spares.sav_vdevs) { if (spa->spa_spares.sav_vdevs) {
for (int i = 0; i < spa->spa_spares.sav_count; i++)
vdev_free(spa->spa_spares.sav_vdevs[i]);
kmem_free(spa->spa_spares.sav_vdevs, kmem_free(spa->spa_spares.sav_vdevs,
spa->spa_spares.sav_count * sizeof (void *)); spa->spa_spares.sav_count * sizeof (void *));
spa->spa_spares.sav_vdevs = NULL; spa->spa_spares.sav_vdevs = NULL;
@ -1727,11 +1727,11 @@ spa_unload(spa_t *spa)
} }
spa->spa_spares.sav_count = 0; spa->spa_spares.sav_count = 0;
for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
vdev_free(spa->spa_l2cache.sav_vdevs[i]);
}
if (spa->spa_l2cache.sav_vdevs) { if (spa->spa_l2cache.sav_vdevs) {
for (int i = 0; i < spa->spa_l2cache.sav_count; i++) {
vdev_clear_stats(spa->spa_l2cache.sav_vdevs[i]);
vdev_free(spa->spa_l2cache.sav_vdevs[i]);
}
kmem_free(spa->spa_l2cache.sav_vdevs, kmem_free(spa->spa_l2cache.sav_vdevs,
spa->spa_l2cache.sav_count * sizeof (void *)); spa->spa_l2cache.sav_count * sizeof (void *));
spa->spa_l2cache.sav_vdevs = NULL; spa->spa_l2cache.sav_vdevs = NULL;
@ -1789,20 +1789,21 @@ spa_load_spares(spa_t *spa)
/* /*
* First, close and free any existing spare vdevs. * First, close and free any existing spare vdevs.
*/ */
for (i = 0; i < spa->spa_spares.sav_count; i++) { if (spa->spa_spares.sav_vdevs) {
vd = spa->spa_spares.sav_vdevs[i]; for (i = 0; i < spa->spa_spares.sav_count; i++) {
vd = spa->spa_spares.sav_vdevs[i];
/* Undo the call to spa_activate() below */ /* Undo the call to spa_activate() below */
if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid, if ((tvd = spa_lookup_by_guid(spa, vd->vdev_guid,
B_FALSE)) != NULL && tvd->vdev_isspare) B_FALSE)) != NULL && tvd->vdev_isspare)
spa_spare_remove(tvd); spa_spare_remove(tvd);
vdev_close(vd); vdev_close(vd);
vdev_free(vd); vdev_free(vd);
} }
if (spa->spa_spares.sav_vdevs)
kmem_free(spa->spa_spares.sav_vdevs, kmem_free(spa->spa_spares.sav_vdevs,
spa->spa_spares.sav_count * sizeof (void *)); spa->spa_spares.sav_count * sizeof (void *));
}
if (spa->spa_spares.sav_config == NULL) if (spa->spa_spares.sav_config == NULL)
nspares = 0; nspares = 0;
@ -2013,23 +2014,24 @@ spa_load_l2cache(spa_t *spa)
/* /*
* Purge vdevs that were dropped * Purge vdevs that were dropped
*/ */
for (i = 0; i < oldnvdevs; i++) { if (oldvdevs) {
uint64_t pool; for (i = 0; i < oldnvdevs; i++) {
uint64_t pool;
vd = oldvdevs[i]; vd = oldvdevs[i];
if (vd != NULL) { if (vd != NULL) {
ASSERT(vd->vdev_isl2cache); ASSERT(vd->vdev_isl2cache);
if (spa_l2cache_exists(vd->vdev_guid, &pool) && if (spa_l2cache_exists(vd->vdev_guid, &pool) &&
pool != 0ULL && l2arc_vdev_present(vd)) pool != 0ULL && l2arc_vdev_present(vd))
l2arc_remove_vdev(vd); l2arc_remove_vdev(vd);
vdev_clear_stats(vd); vdev_clear_stats(vd);
vdev_free(vd); vdev_free(vd);
}
} }
}
if (oldvdevs)
kmem_free(oldvdevs, oldnvdevs * sizeof (void *)); kmem_free(oldvdevs, oldnvdevs * sizeof (void *));
}
for (i = 0; i < sav->sav_count; i++) for (i = 0; i < sav->sav_count; i++)
nvlist_free(l2cache[i]); nvlist_free(l2cache[i]);

View File

@ -2556,7 +2556,6 @@ spa_scan_stat_init(spa_t *spa)
spa->spa_scan_pass_scrub_spent_paused = 0; spa->spa_scan_pass_scrub_spent_paused = 0;
spa->spa_scan_pass_exam = 0; spa->spa_scan_pass_exam = 0;
spa->spa_scan_pass_issued = 0; spa->spa_scan_pass_issued = 0;
vdev_scan_stat_init(spa->spa_root_vdev);
} }
/* /*

View File

@ -34,6 +34,7 @@
#include <sys/zio.h> #include <sys/zio.h>
#include <sys/dmu_tx.h> #include <sys/dmu_tx.h>
#include <sys/arc.h> #include <sys/arc.h>
#include <sys/arc_impl.h>
#include <sys/zap.h> #include <sys/zap.h>
/* /*
@ -116,13 +117,12 @@ static uint64_t zfs_rebuild_max_segment = 1024 * 1024;
* segment size is also large (zfs_rebuild_max_segment=1M). This helps keep * segment size is also large (zfs_rebuild_max_segment=1M). This helps keep
* the queue depth short. * the queue depth short.
* *
* 32MB was selected as the default value to achieve good performance with * 64MB was observed to deliver the best performance and set as the default.
* a large 90-drive dRAID HDD configuration (draid2:8d:90c:2s). A sequential * Testing was performed with a 106-drive dRAID HDD pool (draid2:11d:106c)
* rebuild was unable to saturate all of the drives using smaller values. * and a rebuild rate of 1.2GB/s was measured to the distribute spare.
* With a value of 32MB the sequential resilver write rate was measured at * Smaller values were unable to fully saturate the available pool I/O.
* 800MB/s sustained while rebuilding to a distributed spare.
*/ */
static uint64_t zfs_rebuild_vdev_limit = 32 << 20; static uint64_t zfs_rebuild_vdev_limit = 64 << 20;
/* /*
* Automatically start a pool scrub when the last active sequential resilver * Automatically start a pool scrub when the last active sequential resilver
@ -754,6 +754,7 @@ vdev_rebuild_thread(void *arg)
{ {
vdev_t *vd = arg; vdev_t *vd = arg;
spa_t *spa = vd->vdev_spa; spa_t *spa = vd->vdev_spa;
vdev_t *rvd = spa->spa_root_vdev;
int error = 0; int error = 0;
/* /*
@ -786,9 +787,6 @@ vdev_rebuild_thread(void *arg)
vr->vr_pass_bytes_scanned = 0; vr->vr_pass_bytes_scanned = 0;
vr->vr_pass_bytes_issued = 0; vr->vr_pass_bytes_issued = 0;
vr->vr_bytes_inflight_max = MAX(1ULL << 20,
zfs_rebuild_vdev_limit * vd->vdev_children);
uint64_t update_est_time = gethrtime(); uint64_t update_est_time = gethrtime();
vdev_rebuild_update_bytes_est(vd, 0); vdev_rebuild_update_bytes_est(vd, 0);
@ -804,6 +802,17 @@ vdev_rebuild_thread(void *arg)
metaslab_t *msp = vd->vdev_ms[i]; metaslab_t *msp = vd->vdev_ms[i];
vr->vr_scan_msp = msp; vr->vr_scan_msp = msp;
/*
* Calculate the max number of in-flight bytes for top-level
* vdev scanning operations (minimum 1MB, maximum 1/4 of
* arc_c_max shared by all top-level vdevs). Limits for the
* issuing phase are done per top-level vdev and are handled
* separately.
*/
uint64_t limit = (arc_c_max / 4) / MAX(rvd->vdev_children, 1);
vr->vr_bytes_inflight_max = MIN(limit, MAX(1ULL << 20,
zfs_rebuild_vdev_limit * vd->vdev_children));
/* /*
* Removal of vdevs from the vdev tree may eliminate the need * Removal of vdevs from the vdev tree may eliminate the need
* for the rebuild, in which case it should be canceled. The * for the rebuild, in which case it should be canceled. The

View File

@ -512,9 +512,9 @@ zfs_replay_create(void *arg1, void *arg2, boolean_t byteswap)
* *
* The _ATTR versions will grab the fuid info in their subcases. * The _ATTR versions will grab the fuid info in their subcases.
*/ */
if ((int)lr->lr_common.lrc_txtype != TX_SYMLINK && if (txtype != TX_SYMLINK &&
(int)lr->lr_common.lrc_txtype != TX_MKDIR_ATTR && txtype != TX_MKDIR_ATTR &&
(int)lr->lr_common.lrc_txtype != TX_CREATE_ATTR) { txtype != TX_CREATE_ATTR) {
start = (lr + 1); start = (lr + 1);
zfsvfs->z_fuid_replay = zfsvfs->z_fuid_replay =
zfs_replay_fuid_domain(start, &start, zfs_replay_fuid_domain(start, &start,

View File

@ -106,7 +106,7 @@ zfs_holey_common(znode_t *zp, ulong_t cmd, loff_t *off)
hole = B_FALSE; hole = B_FALSE;
/* Flush any mmap()'d data to disk */ /* Flush any mmap()'d data to disk */
if (zn_has_cached_data(zp)) if (zn_has_cached_data(zp, 0, file_sz - 1))
zn_flush_cached_data(zp, B_FALSE); zn_flush_cached_data(zp, B_FALSE);
lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER); lr = zfs_rangelock_enter(&zp->z_rangelock, 0, file_sz, RL_READER);
@ -288,7 +288,8 @@ zfs_read(struct znode *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
error = mappedread_sf(zp, nbytes, uio); error = mappedread_sf(zp, nbytes, uio);
else else
#endif #endif
if (zn_has_cached_data(zp) && !(ioflag & O_DIRECT)) { if (zn_has_cached_data(zp, zfs_uio_offset(uio),
zfs_uio_offset(uio) + nbytes - 1) && !(ioflag & O_DIRECT)) {
error = mappedread(zp, nbytes, uio); error = mappedread(zp, nbytes, uio);
} else { } else {
error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl), error = dmu_read_uio_dbuf(sa_get_db(zp->z_sa_hdl),
@ -696,7 +697,8 @@ zfs_write(znode_t *zp, zfs_uio_t *uio, int ioflag, cred_t *cr)
zfs_uioskip(uio, nbytes); zfs_uioskip(uio, nbytes);
tx_bytes = nbytes; tx_bytes = nbytes;
} }
if (tx_bytes && zn_has_cached_data(zp) && if (tx_bytes &&
zn_has_cached_data(zp, woff, woff + tx_bytes - 1) &&
!(ioflag & O_DIRECT)) { !(ioflag & O_DIRECT)) {
update_pages(zp, woff, tx_bytes, zfsvfs->z_os); update_pages(zp, woff, tx_bytes, zfsvfs->z_os);
} }

View File

@ -2778,7 +2778,7 @@ zio_write_gang_member_ready(zio_t *zio)
ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies); ASSERT3U(zio->io_prop.zp_copies, ==, gio->io_prop.zp_copies);
ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp)); ASSERT3U(zio->io_prop.zp_copies, <=, BP_GET_NDVAS(zio->io_bp));
ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp)); ASSERT3U(pio->io_prop.zp_copies, <=, BP_GET_NDVAS(pio->io_bp));
ASSERT3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp)); VERIFY3U(BP_GET_NDVAS(zio->io_bp), <=, BP_GET_NDVAS(pio->io_bp));
mutex_enter(&pio->io_lock); mutex_enter(&pio->io_lock);
for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) { for (int d = 0; d < BP_GET_NDVAS(zio->io_bp); d++) {
@ -2816,18 +2816,20 @@ zio_write_gang_block(zio_t *pio, metaslab_class_t *mc)
uint64_t resid = pio->io_size; uint64_t resid = pio->io_size;
uint64_t lsize; uint64_t lsize;
int copies = gio->io_prop.zp_copies; int copies = gio->io_prop.zp_copies;
int gbh_copies;
zio_prop_t zp; zio_prop_t zp;
int error; int error;
boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA); boolean_t has_data = !(pio->io_flags & ZIO_FLAG_NODATA);
/* /*
* encrypted blocks need DVA[2] free so encrypted gang headers can't * If one copy was requested, store 2 copies of the GBH, so that we
* have a third copy. * can still traverse all the data (e.g. to free or scrub) even if a
* block is damaged. Note that we can't store 3 copies of the GBH in
* all cases, e.g. with encryption, which uses DVA[2] for the IV+salt.
*/ */
gbh_copies = MIN(copies + 1, spa_max_replication(spa)); int gbh_copies = copies;
if (BP_IS_ENCRYPTED(bp) && gbh_copies >= SPA_DVAS_PER_BP) if (gbh_copies == 1) {
gbh_copies = SPA_DVAS_PER_BP - 1; gbh_copies = MIN(2, spa_max_replication(spa));
}
int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER; int flags = METASLAB_HINTBP_FAVOR | METASLAB_GANG_HEADER;
if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) { if (pio->io_flags & ZIO_FLAG_IO_ALLOCATING) {

View File

@ -409,7 +409,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */ hashSmall[hSmall] = hashLong[hLong] = current; /* update hash table */
if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */ if ((((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow : ensure repIndex doesn't overlap dict + prefix */
& (repIndex > dictStartIndex)) & (offset_1 < current+1 - dictStartIndex)) /* note: we are searching at current+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4; mLength = ZSTD_count_2segments(ip+1+4, repMatch+4, iend, repMatchEnd, prefixStart) + 4;
@ -477,7 +477,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
U32 const repIndex2 = current2 - offset_2; U32 const repIndex2 = current2 - offset_2;
const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; const BYTE* repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) /* intentional overflow : ensure repIndex2 doesn't overlap dict + prefix */
& (repIndex2 > dictStartIndex)) & (offset_2 < current2 - dictStartIndex))
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View File

@ -416,9 +416,9 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
const BYTE* const repMatch = repBase + repIndex; const BYTE* const repMatch = repBase + repIndex;
hashTable[h] = current; /* update hash table */ hashTable[h] = current; /* update hash table */
DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current); DEBUGLOG(7, "offset_1 = %u , current = %u", offset_1, current);
assert(offset_1 <= current +1); /* check repIndex */
if ( (((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */ & (repIndex > dictStartIndex)) if ( ( ((U32)((prefixStartIndex-1) - repIndex) >= 3) /* intentional underflow */
& (offset_1 < current+1 - dictStartIndex) ) /* note: we are searching at current+1 */
&& (MEM_read32(repMatch) == MEM_read32(ip+1)) ) { && (MEM_read32(repMatch) == MEM_read32(ip+1)) ) {
const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend; const BYTE* const repMatchEnd = repIndex < prefixStartIndex ? dictEnd : iend;
size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4; size_t const rLength = ZSTD_count_2segments(ip+1 +4, repMatch +4, iend, repMatchEnd, prefixStart) + 4;
@ -453,7 +453,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
U32 const current2 = (U32)(ip-base); U32 const current2 = (U32)(ip-base);
U32 const repIndex2 = current2 - offset_2; U32 const repIndex2 = current2 - offset_2;
const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2; const BYTE* const repMatch2 = repIndex2 < prefixStartIndex ? dictBase + repIndex2 : base + repIndex2;
if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (repIndex2 > dictStartIndex)) /* intentional overflow */ if ( (((U32)((prefixStartIndex-1) - repIndex2) >= 3) & (offset_2 < current - dictStartIndex)) /* intentional overflow */
&& (MEM_read32(repMatch2) == MEM_read32(ip)) ) { && (MEM_read32(repMatch2) == MEM_read32(ip)) ) {
const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend; const BYTE* const repEnd2 = repIndex2 < prefixStartIndex ? dictEnd : iend;
size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4; size_t const repLength2 = ZSTD_count_2segments(ip+4, repMatch2+4, iend, repEnd2, prefixStart) + 4;

View File

@ -975,7 +975,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current+1 - offset_1); const U32 repIndex = (U32)(current+1 - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex; const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow */
& (offset_1 < current+1 - windowLow) ) /* note: we are searching at current+1 */
if (MEM_read32(ip+1) == MEM_read32(repMatch)) { if (MEM_read32(ip+1) == MEM_read32(repMatch)) {
/* repcode detected we should take it */ /* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -1006,7 +1007,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current - offset_1); const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex; const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) { if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */ /* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -1037,7 +1039,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = (U32)(current - offset_1); const U32 repIndex = (U32)(current - offset_1);
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex; const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_1 < current - windowLow) ) /* equivalent to `current > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) { if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected */ /* repcode detected */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;
@ -1083,7 +1086,8 @@ size_t ZSTD_compressBlock_lazy_extDict_generic(
const U32 repIndex = repCurrent - offset_2; const U32 repIndex = repCurrent - offset_2;
const BYTE* const repBase = repIndex < dictLimit ? dictBase : base; const BYTE* const repBase = repIndex < dictLimit ? dictBase : base;
const BYTE* const repMatch = repBase + repIndex; const BYTE* const repMatch = repBase + repIndex;
if (((U32)((dictLimit-1) - repIndex) >= 3) & (repIndex > windowLow)) /* intentional overflow */ if ( ((U32)((dictLimit-1) - repIndex) >= 3) /* intentional overflow : do not test positions overlapping 2 memory segments */
& (offset_2 < repCurrent - windowLow) ) /* equivalent to `curr > repIndex >= windowLow` */
if (MEM_read32(ip) == MEM_read32(repMatch)) { if (MEM_read32(ip) == MEM_read32(repMatch)) {
/* repcode detected we should take it */ /* repcode detected we should take it */
const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend; const BYTE* const repEnd = repIndex < dictLimit ? dictEnd : iend;

View File

@ -3,7 +3,7 @@
# Set the default udev directory based on distribution. # Set the default udev directory based on distribution.
%if %{undefined _udevdir} %if %{undefined _udevdir}
%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _udevdir %{_prefix}/lib/udev %global _udevdir %{_prefix}/lib/udev
%else %else
%global _udevdir /lib/udev %global _udevdir /lib/udev
@ -12,7 +12,7 @@
# Set the default udevrule directory based on distribution. # Set the default udevrule directory based on distribution.
%if %{undefined _udevruledir} %if %{undefined _udevruledir}
%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _udevruledir %{_prefix}/lib/udev/rules.d %global _udevruledir %{_prefix}/lib/udev/rules.d
%else %else
%global _udevruledir /lib/udev/rules.d %global _udevruledir /lib/udev/rules.d
@ -21,7 +21,7 @@
# Set the default dracut directory based on distribution. # Set the default dracut directory based on distribution.
%if %{undefined _dracutdir} %if %{undefined _dracutdir}
%if 0%{?fedora}%{?rhel}%{?centos}%{?openEuler} %if 0%{?rhel}%{?fedora}%{?centos}%{?suse_version}%{?openEuler}
%global _dracutdir %{_prefix}/lib/dracut %global _dracutdir %{_prefix}/lib/dracut
%else %else
%global _dracutdir %{_prefix}/share/dracut %global _dracutdir %{_prefix}/share/dracut
@ -110,7 +110,7 @@ BuildRequires: libblkid-devel
BuildRequires: libudev-devel BuildRequires: libudev-devel
BuildRequires: libattr-devel BuildRequires: libattr-devel
BuildRequires: openssl-devel BuildRequires: openssl-devel
%if 0%{?fedora}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8 %if 0%{?fedora}%{?suse_version}%{?openEuler} || 0%{?rhel} >= 8 || 0%{?centos} >= 8
BuildRequires: libtirpc-devel BuildRequires: libtirpc-devel
%endif %endif

View File

@ -704,7 +704,7 @@ tags = ['functional', 'nestedfs']
[tests/functional/no_space] [tests/functional/no_space]
tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos', tests = ['enospc_001_pos', 'enospc_002_pos', 'enospc_003_pos',
'enospc_df', 'enospc_rm'] 'enospc_df', 'enospc_ganging', 'enospc_rm']
tags = ['functional', 'no_space'] tags = ['functional', 'no_space']
[tests/functional/nopwrite] [tests/functional/nopwrite]

View File

@ -297,7 +297,7 @@ User: %s
proc = Popen(privcmd, stdout=PIPE, stderr=PIPE) proc = Popen(privcmd, stdout=PIPE, stderr=PIPE)
# Allow a special timeout value of 0 to mean infinity # Allow a special timeout value of 0 to mean infinity
if int(self.timeout) == 0: if int(self.timeout) == 0:
self.timeout = sys.maxsize self.timeout = sys.maxsize / (10 ** 9)
t = Timer(int(self.timeout), self.kill_cmd, [proc]) t = Timer(int(self.timeout), self.kill_cmd, [proc])
try: try:

View File

@ -1539,6 +1539,7 @@ nobase_dist_datadir_zfs_tests_tests_SCRIPTS += \
functional/no_space/enospc_002_pos.ksh \ functional/no_space/enospc_002_pos.ksh \
functional/no_space/enospc_003_pos.ksh \ functional/no_space/enospc_003_pos.ksh \
functional/no_space/enospc_df.ksh \ functional/no_space/enospc_df.ksh \
functional/no_space/enospc_ganging.ksh \
functional/no_space/enospc_rm.ksh \ functional/no_space/enospc_rm.ksh \
functional/no_space/setup.ksh \ functional/no_space/setup.ksh \
functional/online_offline/cleanup.ksh \ functional/online_offline/cleanup.ksh \

View File

@ -0,0 +1,86 @@
#!/bin/ksh
#
# This file and its contents are supplied under the terms of the
# Common Development and Distribution License ("CDDL"), version 1.0.
# You may only use this file in accordance with the terms of version
# 1.0 of the CDDL.
#
# A full copy of the text of the CDDL should have accompanied this
# source. A copy of the CDDL is also available via the Internet at
# http://www.illumos.org/license/CDDL.
#
. $STF_SUITE/include/libtest.shlib
#
# DESCRIPTION:
# Exercise gang block IO paths for non-encrypted and encrypted datasets.
#
verify_runnable "both"
log_assert "Verify IO when file system is full and ganging."
function cleanup
{
log_must set_tunable64 METASLAB_FORCE_GANGING $metaslab_force_ganging
default_cleanup_noexit
}
log_onexit cleanup
default_setup_noexit $DISKS
typeset metaslab_force_ganging=$(get_tunable METASLAB_FORCE_GANGING)
shift=$(random_int_between 15 17)
log_must set_tunable64 METASLAB_FORCE_GANGING $((2**$shift))
keyfile=/$TESTPOOL/keyencfods
log_must eval "echo 'password' > $keyfile"
bs=1024k
count=512
log_must dd if=/dev/urandom of=$TESTDIR/data bs=$bs count=$count
data_checksum=$(sha256digest $TESTDIR/data)
# Test common large block configuration.
log_must zfs create -o recordsize=1m -o primarycache=metadata $TESTPOOL/gang
mntpnt=$(get_prop mountpoint $TESTPOOL/gang)
log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count
sync_pool $TESTPOOL
log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count
out_checksum=$(sha256digest $TESTDIR/out)
if [[ "$data_checksum" != "$out_checksum" ]]; then
log_fail "checksum mismatch ($data_checksum != $out_checksum)"
fi
log_must rm -f $TESTDIR/out
log_must zfs destroy $TESTPOOL/gang
# Test common large block configuration with encryption.
log_must zfs create \
-o recordsize=1m \
-o primarycache=metadata \
-o compression=off \
-o encryption=on \
-o keyformat=passphrase \
-o keylocation=file://$keyfile \
-o copies=2 \
$TESTPOOL/gang
mntpnt=$(get_prop mountpoint $TESTPOOL/gang)
log_must dd if=$TESTDIR/data of=$mntpnt/file bs=$bs count=$count
sync_pool $TESTPOOL
log_must dd if=$mntpnt/file of=$TESTDIR/out bs=$bs count=$count
out_checksum=$(sha256digest $TESTDIR/out)
if [[ "$data_checksum" != "$out_checksum" ]]; then
log_fail "checksum mismatch ($data_checksum != $out_checksum)"
fi
log_must rm -f $TESTDIR/out
log_must zfs destroy $TESTPOOL/gang
log_pass "Verified IO when file system is full and ganging."

View File

@ -296,6 +296,9 @@
/* fault_in_iov_iter_readable() is available */ /* fault_in_iov_iter_readable() is available */
/* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */ /* #undef HAVE_FAULT_IN_IOV_ITER_READABLE */
/* filemap_range_has_page() is available */
/* #undef HAVE_FILEMAP_RANGE_HAS_PAGE */
/* fops->aio_fsync() exists */ /* fops->aio_fsync() exists */
/* #undef HAVE_FILE_AIO_FSYNC */ /* #undef HAVE_FILE_AIO_FSYNC */
@ -985,7 +988,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */ /* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */ /* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g9cd71c860" #define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_g57cfae4a2"
/* Define the project author. */ /* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS" #define ZFS_META_AUTHOR "OpenZFS"
@ -1015,7 +1018,7 @@
#define ZFS_META_NAME "zfs" #define ZFS_META_NAME "zfs"
/* Define the project release. */ /* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_g9cd71c860" #define ZFS_META_RELEASE "FreeBSD_g57cfae4a2"
/* Define the project version. */ /* Define the project version. */
#define ZFS_META_VERSION "2.1.99" #define ZFS_META_VERSION "2.1.99"

View File

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.1.99-1706-g9cd71c860" #define ZFS_META_GITREV "zfs-2.1.99-1734-g57cfae4a2"