Notable upstream pull request merges:
  #14654 Pack our DDT ZAPs a bit denser
  #14979 Again fix race between zil_commit() and zil_suspend()
  #14985 Some ZIO micro-optimizations
  #15000 Fix remount when setting multiple properties
  #15004 ddt_addref: remove unnecessary phys fill when refcount is 0
  #15007 Do not report bytes skipped by scan as issued
  #15023 Enable tuning of ZVOL open timeout value

Obtained from:	OpenZFS
OpenZFS commit:	009d3288de
OpenZFS tag:	zfs-2.2.0-rc1
This commit is contained in:
Martin Matuska 2023-07-01 12:32:38 +02:00
commit 0a97523d46
24 changed files with 218 additions and 96 deletions

View File

@ -1,8 +1,8 @@
Meta: 1
Name: zfs
Branch: 1.0
Version: 2.1.99
Release: 1
Version: 2.2.0
Release: rc1
Release-Tags: relext
License: CDDL
Author: OpenZFS

View File

@ -794,7 +794,7 @@ usage(void)
"\t\t[-o <var>=<value>]... [-t <txg>] [-U <cache>] [-x <dumpdir>]\n"
"\t\t[-K <key>] <poolname>/<objset id> [<backupflags>]\n"
"\t%s [-v] <bookmark>\n"
"\t%s -C [-A] [-U <cache>]\n"
"\t%s -C [-A] [-U <cache>] [<poolname>]\n"
"\t%s -l [-Aqu] <device>\n"
"\t%s -m [-AFLPX] [-e [-V] [-p <path> ...]] [-t <txg>] "
"[-U <cache>]\n\t\t<poolname> [<vdev> [<metaslab> ...]]\n"

View File

@ -6057,8 +6057,8 @@ construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
if (p != NULL)
rid = p->pw_uid;
else if (*endch != '\0') {
(void) snprintf(errbuf, 256, gettext(
"invalid user %s\n"), curr);
(void) snprintf(errbuf, sizeof (errbuf),
gettext("invalid user %s\n"), curr);
allow_usage(un, B_TRUE, errbuf);
}
} else if (opts->group) {
@ -6071,8 +6071,9 @@ construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
if (g != NULL)
rid = g->gr_gid;
else if (*endch != '\0') {
(void) snprintf(errbuf, 256, gettext(
"invalid group %s\n"), curr);
(void) snprintf(errbuf, sizeof (errbuf),
gettext("invalid group %s\n"),
curr);
allow_usage(un, B_TRUE, errbuf);
}
} else {
@ -6097,8 +6098,9 @@ construct_fsacl_list(boolean_t un, struct allow_opts *opts, nvlist_t **nvlp)
who_type = ZFS_DELEG_GROUP;
rid = g->gr_gid;
} else {
(void) snprintf(errbuf, 256, gettext(
"invalid user/group %s\n"), curr);
(void) snprintf(errbuf, sizeof (errbuf),
gettext("invalid user/group %s\n"),
curr);
allow_usage(un, B_TRUE, errbuf);
}
}

View File

@ -7662,11 +7662,11 @@ static void
print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
{
time_t start, end, pause;
uint64_t pass_scanned, scanned, pass_issued, issued, total;
uint64_t pass_scanned, scanned, pass_issued, issued, total_s, total_i;
uint64_t elapsed, scan_rate, issue_rate;
double fraction_done;
char processed_buf[7], scanned_buf[7], issued_buf[7], total_buf[7];
char srate_buf[7], irate_buf[7], time_buf[32];
char processed_buf[7], scanned_buf[7], issued_buf[7], total_s_buf[7];
char total_i_buf[7], srate_buf[7], irate_buf[7], time_buf[32];
printf(" ");
printf_color(ANSI_BOLD, gettext("scan:"));
@ -7738,10 +7738,11 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
pass_scanned = ps->pss_pass_exam;
issued = ps->pss_issued;
pass_issued = ps->pss_pass_issued;
total = ps->pss_to_examine;
total_s = ps->pss_to_examine;
total_i = ps->pss_to_examine - ps->pss_skipped;
/* we are only done with a block once we have issued the IO for it */
fraction_done = (double)issued / total;
fraction_done = (double)issued / total_i;
/* elapsed time for this pass, rounding up to 1 if it's 0 */
elapsed = time(NULL) - ps->pss_pass_start;
@ -7750,26 +7751,25 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
scan_rate = pass_scanned / elapsed;
issue_rate = pass_issued / elapsed;
uint64_t total_secs_left = (issue_rate != 0 && total >= issued) ?
((total - issued) / issue_rate) : UINT64_MAX;
secs_to_dhms(total_secs_left, time_buf);
/* format all of the numbers we will be reporting */
zfs_nicebytes(scanned, scanned_buf, sizeof (scanned_buf));
zfs_nicebytes(issued, issued_buf, sizeof (issued_buf));
zfs_nicebytes(total, total_buf, sizeof (total_buf));
zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
zfs_nicebytes(total_s, total_s_buf, sizeof (total_s_buf));
zfs_nicebytes(total_i, total_i_buf, sizeof (total_i_buf));
/* do not print estimated time if we have a paused scrub */
if (pause == 0) {
(void) printf(gettext("\t%s scanned at %s/s, "
"%s issued at %s/s, %s total\n"),
scanned_buf, srate_buf, issued_buf, irate_buf, total_buf);
} else {
(void) printf(gettext("\t%s scanned, %s issued, %s total\n"),
scanned_buf, issued_buf, total_buf);
(void) printf(gettext("\t%s / %s scanned"), scanned_buf, total_s_buf);
if (pause == 0 && scan_rate > 0) {
zfs_nicebytes(scan_rate, srate_buf, sizeof (srate_buf));
(void) printf(gettext(" at %s/s"), srate_buf);
}
(void) printf(gettext(", %s / %s issued"), issued_buf, total_i_buf);
if (pause == 0 && issue_rate > 0) {
zfs_nicebytes(issue_rate, irate_buf, sizeof (irate_buf));
(void) printf(gettext(" at %s/s"), irate_buf);
}
(void) printf(gettext("\n"));
if (is_resilver) {
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
@ -7782,16 +7782,16 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
if (pause == 0) {
/*
* Only provide an estimate iff:
* 1) the time remaining is valid, and
* 1) we haven't yet issued all we expected, and
* 2) the issue rate exceeds 10 MB/s, and
* 3) it's either:
* a) a resilver which has started repairs, or
* b) a scrub which has entered the issue phase.
*/
if (total_secs_left != UINT64_MAX &&
issue_rate >= 10 * 1024 * 1024 &&
if (total_i >= issued && issue_rate >= 10 * 1024 * 1024 &&
((is_resilver && ps->pss_processed > 0) ||
(is_scrub && issued > 0))) {
secs_to_dhms((total_i - issued) / issue_rate, time_buf);
(void) printf(gettext(", %s to go\n"), time_buf);
} else {
(void) printf(gettext(", no estimated "
@ -7803,7 +7803,7 @@ print_scan_scrub_resilver_status(pool_scan_stat_t *ps)
}
static void
print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, uint_t c, char *vdev_name)
{
if (vrs == NULL || vrs->vrs_state == VDEV_REBUILD_NONE)
return;
@ -7815,17 +7815,20 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
uint64_t bytes_scanned = vrs->vrs_bytes_scanned;
uint64_t bytes_issued = vrs->vrs_bytes_issued;
uint64_t bytes_rebuilt = vrs->vrs_bytes_rebuilt;
uint64_t bytes_est = vrs->vrs_bytes_est;
uint64_t bytes_est_s = vrs->vrs_bytes_est;
uint64_t bytes_est_i = vrs->vrs_bytes_est;
if (c > offsetof(vdev_rebuild_stat_t, vrs_pass_bytes_skipped) / 8)
bytes_est_i -= vrs->vrs_pass_bytes_skipped;
uint64_t scan_rate = (vrs->vrs_pass_bytes_scanned /
(vrs->vrs_pass_time_ms + 1)) * 1000;
uint64_t issue_rate = (vrs->vrs_pass_bytes_issued /
(vrs->vrs_pass_time_ms + 1)) * 1000;
double scan_pct = MIN((double)bytes_scanned * 100 /
(bytes_est + 1), 100);
(bytes_est_s + 1), 100);
/* Format all of the numbers we will be reporting */
char bytes_scanned_buf[7], bytes_issued_buf[7];
char bytes_rebuilt_buf[7], bytes_est_buf[7];
char bytes_rebuilt_buf[7], bytes_est_s_buf[7], bytes_est_i_buf[7];
char scan_rate_buf[7], issue_rate_buf[7], time_buf[32];
zfs_nicebytes(bytes_scanned, bytes_scanned_buf,
sizeof (bytes_scanned_buf));
@ -7833,9 +7836,8 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
sizeof (bytes_issued_buf));
zfs_nicebytes(bytes_rebuilt, bytes_rebuilt_buf,
sizeof (bytes_rebuilt_buf));
zfs_nicebytes(bytes_est, bytes_est_buf, sizeof (bytes_est_buf));
zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
zfs_nicebytes(issue_rate, issue_rate_buf, sizeof (issue_rate_buf));
zfs_nicebytes(bytes_est_s, bytes_est_s_buf, sizeof (bytes_est_s_buf));
zfs_nicebytes(bytes_est_i, bytes_est_i_buf, sizeof (bytes_est_i_buf));
time_t start = vrs->vrs_start_time;
time_t end = vrs->vrs_end_time;
@ -7858,17 +7860,29 @@ print_rebuild_status_impl(vdev_rebuild_stat_t *vrs, char *vdev_name)
assert(vrs->vrs_state == VDEV_REBUILD_ACTIVE);
secs_to_dhms(MAX((int64_t)bytes_est - (int64_t)bytes_scanned, 0) /
MAX(scan_rate, 1), time_buf);
(void) printf(gettext("\t%s / %s scanned"), bytes_scanned_buf,
bytes_est_s_buf);
if (scan_rate > 0) {
zfs_nicebytes(scan_rate, scan_rate_buf, sizeof (scan_rate_buf));
(void) printf(gettext(" at %s/s"), scan_rate_buf);
}
(void) printf(gettext(", %s / %s issued"), bytes_issued_buf,
bytes_est_i_buf);
if (issue_rate > 0) {
zfs_nicebytes(issue_rate, issue_rate_buf,
sizeof (issue_rate_buf));
(void) printf(gettext(" at %s/s"), issue_rate_buf);
}
(void) printf(gettext("\n"));
(void) printf(gettext("\t%s scanned at %s/s, %s issued %s/s, "
"%s total\n"), bytes_scanned_buf, scan_rate_buf,
bytes_issued_buf, issue_rate_buf, bytes_est_buf);
(void) printf(gettext("\t%s resilvered, %.2f%% done"),
bytes_rebuilt_buf, scan_pct);
if (vrs->vrs_state == VDEV_REBUILD_ACTIVE) {
if (scan_rate >= 10 * 1024 * 1024) {
if (bytes_est_s >= bytes_scanned &&
scan_rate >= 10 * 1024 * 1024) {
secs_to_dhms((bytes_est_s - bytes_scanned) / scan_rate,
time_buf);
(void) printf(gettext(", %s to go\n"), time_buf);
} else {
(void) printf(gettext(", no estimated "
@ -7900,7 +7914,7 @@ print_rebuild_status(zpool_handle_t *zhp, nvlist_t *nvroot)
ZPOOL_CONFIG_REBUILD_STATS, (uint64_t **)&vrs, &i) == 0) {
char *name = zpool_vdev_name(g_zfs, zhp,
child[c], VDEV_NAME_TYPE_ID);
print_rebuild_status_impl(vrs, name);
print_rebuild_status_impl(vrs, i, name);
free(name);
}
}
@ -8005,13 +8019,15 @@ print_scan_status(zpool_handle_t *zhp, nvlist_t *nvroot)
active_resilver = (ps->pss_state == DSS_SCANNING);
}
have_resilver = (ps->pss_func == POOL_SCAN_RESILVER);
have_scrub = (ps->pss_func == POOL_SCAN_SCRUB);
scrub_start = ps->pss_start_time;
have_errorscrub = (ps->pss_error_scrub_func ==
POOL_SCAN_ERRORSCRUB);
errorscrub_start = ps->pss_error_scrub_start;
if (c > offsetof(pool_scan_stat_t,
pss_pass_error_scrub_pause) / 8) {
have_errorscrub = (ps->pss_error_scrub_func ==
POOL_SCAN_ERRORSCRUB);
errorscrub_start = ps->pss_error_scrub_start;
}
}
boolean_t active_rebuild = check_rebuilding(nvroot, &rebuild_end_time);

View File

@ -238,6 +238,7 @@ print_scan_status(nvlist_t *nvroot, const char *pool_name)
print_kv("end_ts", ps->pss_end_time);
print_kv(",errors", ps->pss_errors);
print_kv(",examined", examined);
print_kv(",skipped", ps->pss_skipped);
print_kv(",issued", ps->pss_issued);
print_kv(",pass_examined", pass_exam);
print_kv(",pass_issued", ps->pss_pass_issued);
@ -249,7 +250,6 @@ print_scan_status(nvlist_t *nvroot, const char *pool_name)
print_kv(",remaining_t", remaining_time);
print_kv(",start_ts", ps->pss_start_time);
print_kv(",to_examine", ps->pss_to_examine);
print_kv(",to_process", ps->pss_to_process);
printf(" %llu\n", (u_longlong_t)timestamp);
return (0);
}

View File

@ -36,7 +36,7 @@ install() {
{ dfatal "Failed to install essential binaries"; exit 1; }
# Adapted from https://github.com/zbm-dev/zfsbootmenu
if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so'; then
if ! ldd "$(command -v zpool)" | grep -qF 'libgcc_s.so' && ldconfig -p 2> /dev/null | grep -qF 'libc.so.6' ; then
# On systems with gcc-config (Gentoo, Funtoo, etc.), use it to find libgcc_s
if command -v gcc-config >/dev/null; then
inst_simple "/usr/lib/gcc/$(s=$(gcc-config -c); echo "${s%-*}/${s##*-}")/libgcc_s.so.1" ||

View File

@ -61,7 +61,7 @@ typedef struct dsl_scan_phys {
uint64_t scn_end_time;
uint64_t scn_to_examine; /* total bytes to be scanned */
uint64_t scn_examined; /* bytes scanned so far */
uint64_t scn_to_process;
uint64_t scn_skipped; /* bytes skipped by scanner */
uint64_t scn_processed;
uint64_t scn_errors; /* scan I/O error count */
uint64_t scn_ddt_class_max;

View File

@ -1088,7 +1088,7 @@ typedef struct pool_scan_stat {
uint64_t pss_end_time; /* scan end time */
uint64_t pss_to_examine; /* total bytes to scan */
uint64_t pss_examined; /* total bytes located by scanner */
uint64_t pss_to_process; /* total bytes to process */
uint64_t pss_skipped; /* total bytes skipped by scanner */
uint64_t pss_processed; /* total processed bytes */
uint64_t pss_errors; /* scan errors */
@ -1152,6 +1152,7 @@ typedef struct vdev_rebuild_stat {
uint64_t vrs_pass_time_ms; /* pass run time (millisecs) */
uint64_t vrs_pass_bytes_scanned; /* bytes scanned since start/resume */
uint64_t vrs_pass_bytes_issued; /* bytes rebuilt since start/resume */
uint64_t vrs_pass_bytes_skipped; /* bytes skipped since start/resume */
} vdev_rebuild_stat_t;
/*

View File

@ -79,6 +79,7 @@ typedef struct vdev_rebuild {
uint64_t vr_pass_start_time;
uint64_t vr_pass_bytes_scanned;
uint64_t vr_pass_bytes_issued;
uint64_t vr_pass_bytes_skipped;
/* On-disk state updated by vdev_rebuild_zap_update_sync() */
vdev_rebuild_phys_t vr_rebuild_phys;

View File

@ -341,9 +341,9 @@ typedef struct zio_prop {
enum zio_checksum zp_checksum;
enum zio_compress zp_compress;
uint8_t zp_complevel;
dmu_object_type_t zp_type;
uint8_t zp_level;
uint8_t zp_copies;
dmu_object_type_t zp_type;
boolean_t zp_dedup;
boolean_t zp_dedup_verify;
boolean_t zp_nopwrite;
@ -611,6 +611,7 @@ extern zio_t *zio_walk_parents(zio_t *cio, zio_link_t **);
extern zio_t *zio_walk_children(zio_t *pio, zio_link_t **);
extern zio_t *zio_unique_parent(zio_t *cio);
extern void zio_add_child(zio_t *pio, zio_t *cio);
extern void zio_add_child_first(zio_t *pio, zio_t *cio);
extern void *zio_buf_alloc(size_t size);
extern void zio_buf_free(void *buf, size_t size);

View File

@ -1789,7 +1789,8 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
nvlist_t *nvl;
int nvl_len = 0;
int added_resv = 0;
zfs_prop_t prop = 0;
zfs_prop_t prop;
boolean_t nsprop = B_FALSE;
nvpair_t *elem;
(void) snprintf(errbuf, sizeof (errbuf),
@ -1836,6 +1837,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
elem = nvlist_next_nvpair(nvl, elem)) {
prop = zfs_name_to_prop(nvpair_name(elem));
nsprop |= zfs_is_namespace_prop(prop);
assert(cl_idx < nvl_len);
/*
@ -1934,8 +1936,7 @@ zfs_prop_set_list(zfs_handle_t *zhp, nvlist_t *props)
* if one of the options handled by the generic
* Linux namespace layer has been modified.
*/
if (zfs_is_namespace_prop(prop) &&
zfs_is_mounted(zhp, NULL))
if (nsprop && zfs_is_mounted(zhp, NULL))
ret = zfs_mount(zhp, MNTOPT_REMOUNT, 0);
}
}

View File

@ -239,6 +239,16 @@ relative to the pool.
Make some blocks above a certain size be gang blocks.
This option is used by the test suite to facilitate testing.
.
.It Sy zfs_ddt_zap_default_bs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
Default DDT ZAP data block size as a power of 2. Note that changing this after
creating a DDT on the pool will not affect existing DDTs, only newly created
ones.
.
.It Sy zfs_ddt_zap_default_ibs Ns = Ns Sy 15 Po 32 KiB Pc Pq int
Default DDT ZAP indirect block size as a power of 2. Note that changing this
after creating a DDT on the pool will not affect existing DDTs, only newly
created ones.
.
.It Sy zfs_default_bs Ns = Ns Sy 9 Po 512 B Pc Pq int
Default dnode block size as a power of 2.
.

View File

@ -14,7 +14,7 @@
.\" Copyright (c) 2017 Lawrence Livermore National Security, LLC.
.\" Copyright (c) 2017 Intel Corporation.
.\"
.Dd June 4, 2023
.Dd June 27, 2023
.Dt ZDB 8
.Os
.
@ -51,6 +51,7 @@
.Fl C
.Op Fl A
.Op Fl U Ar cache
.Op Ar poolname
.Nm
.Fl E
.Op Fl A

View File

@ -26,7 +26,7 @@
.\" Copyright 2017 Nexenta Systems, Inc.
.\" Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
.\"
.Dd July 25, 2021
.Dd June 22, 2023
.Dt ZPOOL-SCRUB 8
.Os
.
@ -123,7 +123,7 @@ Status of pool with ongoing scrub:
.No # Nm zpool Cm status
...
scan: scrub in progress since Sun Jul 25 16:07:49 2021
403M scanned at 100M/s, 68.4M issued at 10.0M/s, 405M total
403M / 405M scanned at 100M/s, 68.4M / 405M issued at 10.0M/s
0B repaired, 16.91% done, 00:00:04 to go
...
.Ed

View File

@ -54,7 +54,7 @@ static unsigned int zvol_prefetch_bytes = (128 * 1024);
static unsigned long zvol_max_discard_blocks = 16384;
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
static const unsigned int zvol_open_timeout_ms = 1000;
static unsigned int zvol_open_timeout_ms = 1000;
#endif
static unsigned int zvol_threads = 0;
@ -1612,4 +1612,9 @@ MODULE_PARM_DESC(zvol_blk_mq_blocks_per_thread,
"Process volblocksize blocks per thread");
#endif
#ifndef HAVE_BLKDEV_GET_ERESTARTSYS
module_param(zvol_open_timeout_ms, uint, 0644);
MODULE_PARM_DESC(zvol_open_timeout_ms, "Timeout for ZVOL open retries");
#endif
/* END CSTYLED */

View File

@ -1209,10 +1209,19 @@ ddt_addref(spa_t *spa, const blkptr_t *bp)
ASSERT3S(dde->dde_class, <, DDT_CLASSES);
ddp = &dde->dde_phys[BP_GET_NDVAS(bp)];
if (ddp->ddp_refcnt == 0) {
/* This should never happen? */
ddt_phys_fill(ddp, bp);
}
/*
* This entry already existed (dde_type is real), so it must
* have refcnt >0 at the start of this txg. We are called from
* brt_pending_apply(), before frees are issued, so the refcnt
* can't be lowered yet. Therefore, it must be >0. We assert
* this because if the order of BRT and DDT interactions were
* ever to change and the refcnt was ever zero here, then
* likely further action is required to fill out the DDT entry,
* and this is a place that is likely to be missed in testing.
*/
ASSERT3U(ddp->ddp_refcnt, >, 0);
ddt_phys_addref(ddp);
result = B_TRUE;
} else {

View File

@ -31,8 +31,8 @@
#include <sys/zap.h>
#include <sys/dmu_tx.h>
static const int ddt_zap_leaf_blockshift = 12;
static const int ddt_zap_indirect_blockshift = 12;
static unsigned int ddt_zap_default_bs = 15;
static unsigned int ddt_zap_default_ibs = 15;
static int
ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
@ -43,7 +43,7 @@ ddt_zap_create(objset_t *os, uint64_t *objectp, dmu_tx_t *tx, boolean_t prehash)
flags |= ZAP_FLAG_PRE_HASHED_KEY;
*objectp = zap_create_flags(os, 0, flags, DMU_OT_DDT_ZAP,
ddt_zap_leaf_blockshift, ddt_zap_indirect_blockshift,
ddt_zap_default_bs, ddt_zap_default_ibs,
DMU_OT_NONE, 0, tx);
return (*objectp == 0 ? SET_ERROR(ENOTSUP) : 0);
@ -166,3 +166,10 @@ const ddt_ops_t ddt_zap_ops = {
ddt_zap_walk,
ddt_zap_count,
};
/* BEGIN CSTYLED */
ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_bs, UINT, ZMOD_RW,
"DDT ZAP leaf blockshift");
ZFS_MODULE_PARAM(zfs_dedup, , ddt_zap_default_ibs, UINT, ZMOD_RW,
"DDT ZAP indirect blockshift");
/* END CSTYLED */

View File

@ -573,7 +573,8 @@ dsl_scan_init(dsl_pool_t *dp, uint64_t txg)
* counter to how far we've scanned. We know we're consistent
* up to here.
*/
scn->scn_issued_before_pass = scn->scn_phys.scn_examined;
scn->scn_issued_before_pass = scn->scn_phys.scn_examined -
scn->scn_phys.scn_skipped;
if (dsl_scan_is_running(scn) &&
spa_prev_software_version(dp->dp_spa) < SPA_VERSION_SCAN) {
@ -4362,7 +4363,7 @@ dsl_scan_sync(dsl_pool_t *dp, dmu_tx_t *tx)
* Disabled by default, set zfs_scan_report_txgs to report
* average performance over the last zfs_scan_report_txgs TXGs.
*/
if (!dsl_scan_is_paused_scrub(scn) && zfs_scan_report_txgs != 0 &&
if (zfs_scan_report_txgs != 0 &&
tx->tx_txg % zfs_scan_report_txgs == 0) {
scn->scn_issued_before_pass += spa->spa_scan_pass_issued;
spa_scan_stat_init(spa);
@ -4564,6 +4565,15 @@ count_block_issued(spa_t *spa, const blkptr_t *bp, boolean_t all)
all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
}
static void
count_block_skipped(dsl_scan_t *scn, const blkptr_t *bp, boolean_t all)
{
if (BP_IS_EMBEDDED(bp))
return;
atomic_add_64(&scn->scn_phys.scn_skipped,
all ? BP_GET_ASIZE(bp) : DVA_GET_ASIZE(&bp->blk_dva[0]));
}
static void
count_block(zfs_all_blkstats_t *zab, const blkptr_t *bp)
{
@ -4709,7 +4719,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
count_block(dp->dp_blkstats, bp);
if (phys_birth <= scn->scn_phys.scn_min_txg ||
phys_birth >= scn->scn_phys.scn_max_txg) {
count_block_issued(spa, bp, B_TRUE);
count_block_skipped(scn, bp, B_TRUE);
return (0);
}
@ -4750,7 +4760,7 @@ dsl_scan_scrub_cb(dsl_pool_t *dp,
if (needs_io && !zfs_no_scrub_io) {
dsl_scan_enqueue(dp, bp, zio_flags, zb);
} else {
count_block_issued(spa, bp, B_TRUE);
count_block_skipped(scn, bp, B_TRUE);
}
/* do not relocate this block */
@ -5119,9 +5129,9 @@ dsl_scan_freed_dva(spa_t *spa, const blkptr_t *bp, int dva_i)
ASSERT(range_tree_contains(queue->q_exts_by_addr, start, size));
range_tree_remove_fill(queue->q_exts_by_addr, start, size);
/* count the block as though we issued it */
/* count the block as though we skipped it */
sio2bp(sio, &tmpbp);
count_block_issued(spa, &tmpbp, B_FALSE);
count_block_skipped(scn, &tmpbp, B_FALSE);
sio_free(sio);
}

View File

@ -2611,7 +2611,7 @@ spa_scan_get_stats(spa_t *spa, pool_scan_stat_t *ps)
ps->pss_end_time = scn->scn_phys.scn_end_time;
ps->pss_to_examine = scn->scn_phys.scn_to_examine;
ps->pss_examined = scn->scn_phys.scn_examined;
ps->pss_to_process = scn->scn_phys.scn_to_process;
ps->pss_skipped = scn->scn_phys.scn_skipped;
ps->pss_processed = scn->scn_phys.scn_processed;
ps->pss_errors = scn->scn_phys.scn_errors;

View File

@ -571,8 +571,10 @@ vdev_rebuild_range(vdev_rebuild_t *vr, uint64_t start, uint64_t size)
vdev_rebuild_blkptr_init(&blk, vd, start, size);
uint64_t psize = BP_GET_PSIZE(&blk);
if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN))
if (!vdev_dtl_need_resilver(vd, &blk.blk_dva[0], psize, TXG_UNKNOWN)) {
vr->vr_pass_bytes_skipped += size;
return (0);
}
mutex_enter(&vr->vr_io_lock);
@ -786,6 +788,7 @@ vdev_rebuild_thread(void *arg)
vr->vr_pass_start_time = gethrtime();
vr->vr_pass_bytes_scanned = 0;
vr->vr_pass_bytes_issued = 0;
vr->vr_pass_bytes_skipped = 0;
uint64_t update_est_time = gethrtime();
vdev_rebuild_update_bytes_est(vd, 0);
@ -1153,6 +1156,7 @@ vdev_rebuild_get_stats(vdev_t *tvd, vdev_rebuild_stat_t *vrs)
vr->vr_pass_start_time);
vrs->vrs_pass_bytes_scanned = vr->vr_pass_bytes_scanned;
vrs->vrs_pass_bytes_issued = vr->vr_pass_bytes_issued;
vrs->vrs_pass_bytes_skipped = vr->vr_pass_bytes_skipped;
mutex_exit(&tvd->vdev_rebuild_lock);
}

View File

@ -798,8 +798,8 @@ zil_free_lwb(zilog_t *zilog, lwb_t *lwb)
{
ASSERT(MUTEX_HELD(&zilog->zl_lock));
ASSERT(!MUTEX_HELD(&lwb->lwb_vdev_lock));
ASSERT(list_is_empty(&lwb->lwb_waiters));
ASSERT(list_is_empty(&lwb->lwb_itxs));
VERIFY(list_is_empty(&lwb->lwb_waiters));
VERIFY(list_is_empty(&lwb->lwb_itxs));
ASSERT(avl_is_empty(&lwb->lwb_vdev_tree));
ASSERT3P(lwb->lwb_write_zio, ==, NULL);
ASSERT3P(lwb->lwb_root_zio, ==, NULL);
@ -2525,10 +2525,10 @@ zil_clean(zilog_t *zilog, uint64_t synced_txg)
* This function will traverse the queue of itxs that need to be
* committed, and move them onto the ZIL's zl_itx_commit_list.
*/
static void
static uint64_t
zil_get_commit_list(zilog_t *zilog)
{
uint64_t otxg, txg;
uint64_t otxg, txg, wtxg = 0;
list_t *commit_list = &zilog->zl_itx_commit_list;
ASSERT(MUTEX_HELD(&zilog->zl_issuer_lock));
@ -2562,10 +2562,22 @@ zil_get_commit_list(zilog_t *zilog)
*/
ASSERT(zilog_is_dirty_in_txg(zilog, txg) ||
spa_freeze_txg(zilog->zl_spa) != UINT64_MAX);
list_move_tail(commit_list, &itxg->itxg_itxs->i_sync_list);
list_t *sync_list = &itxg->itxg_itxs->i_sync_list;
if (unlikely(zilog->zl_suspend > 0)) {
/*
* ZIL was just suspended, but we lost the race.
* Allow all earlier itxs to be committed, but ask
* caller to do txg_wait_synced(txg) for any new.
*/
if (!list_is_empty(sync_list))
wtxg = MAX(wtxg, txg);
} else {
list_move_tail(commit_list, sync_list);
}
mutex_exit(&itxg->itxg_lock);
}
return (wtxg);
}
/*
@ -2953,11 +2965,12 @@ zil_process_commit_list(zilog_t *zilog, zil_commit_waiter_t *zcw, list_t *ilwbs)
* not issued, we rely on future calls to zil_commit_writer() to issue
* the lwb, or the timeout mechanism found in zil_commit_waiter().
*/
static void
static uint64_t
zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
{
list_t ilwbs;
lwb_t *lwb;
uint64_t wtxg = 0;
ASSERT(!MUTEX_HELD(&zilog->zl_lock));
ASSERT(spa_writeable(zilog->zl_spa));
@ -2987,7 +3000,7 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
ZIL_STAT_BUMP(zilog, zil_commit_writer_count);
zil_get_commit_list(zilog);
wtxg = zil_get_commit_list(zilog);
zil_prune_commit_list(zilog);
zil_process_commit_list(zilog, zcw, &ilwbs);
@ -2996,6 +3009,7 @@ zil_commit_writer(zilog_t *zilog, zil_commit_waiter_t *zcw)
while ((lwb = list_remove_head(&ilwbs)) != NULL)
zil_lwb_write_issue(zilog, lwb);
list_destroy(&ilwbs);
return (wtxg);
}
static void
@ -3511,7 +3525,7 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
zil_commit_waiter_t *zcw = zil_alloc_commit_waiter();
zil_commit_itx_assign(zilog, zcw);
zil_commit_writer(zilog, zcw);
uint64_t wtxg = zil_commit_writer(zilog, zcw);
zil_commit_waiter(zilog, zcw);
if (zcw->zcw_zio_error != 0) {
@ -3526,6 +3540,8 @@ zil_commit_impl(zilog_t *zilog, uint64_t foid)
DTRACE_PROBE2(zil__commit__io__error,
zilog_t *, zilog, zil_commit_waiter_t *, zcw);
txg_wait_synced(zilog->zl_dmu_pool, 0);
} else if (wtxg != 0) {
txg_wait_synced(zilog->zl_dmu_pool, wtxg);
}
zil_free_commit_waiter(zcw);
@ -3905,11 +3921,13 @@ zil_suspend(const char *osname, void **cookiep)
return (error);
zilog = dmu_objset_zil(os);
mutex_enter(&zilog->zl_issuer_lock);
mutex_enter(&zilog->zl_lock);
zh = zilog->zl_header;
if (zh->zh_flags & ZIL_REPLAY_NEEDED) { /* unplayed log */
mutex_exit(&zilog->zl_lock);
mutex_exit(&zilog->zl_issuer_lock);
dmu_objset_rele(os, suspend_tag);
return (SET_ERROR(EBUSY));
}
@ -3923,6 +3941,7 @@ zil_suspend(const char *osname, void **cookiep)
if (cookiep == NULL && !zilog->zl_suspending &&
(zilog->zl_suspend > 0 || BP_IS_HOLE(&zh->zh_log))) {
mutex_exit(&zilog->zl_lock);
mutex_exit(&zilog->zl_issuer_lock);
dmu_objset_rele(os, suspend_tag);
return (0);
}
@ -3931,6 +3950,7 @@ zil_suspend(const char *osname, void **cookiep)
dsl_pool_rele(dmu_objset_pool(os), suspend_tag);
zilog->zl_suspend++;
mutex_exit(&zilog->zl_issuer_lock);
if (zilog->zl_suspend > 1) {
/*

View File

@ -626,8 +626,6 @@ zio_unique_parent(zio_t *cio)
void
zio_add_child(zio_t *pio, zio_t *cio)
{
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
/*
* Logical I/Os can have logical, gang, or vdev children.
* Gang I/Os can have gang or vdev children.
@ -636,6 +634,7 @@ zio_add_child(zio_t *pio, zio_t *cio)
*/
ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
zl->zl_parent = pio;
zl->zl_child = cio;
@ -644,8 +643,9 @@ zio_add_child(zio_t *pio, zio_t *cio)
ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
uint64_t *countp = pio->io_children[cio->io_child_type];
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
pio->io_children[cio->io_child_type][w] += !cio->io_state[w];
countp[w] += !cio->io_state[w];
list_insert_head(&pio->io_child_list, zl);
list_insert_head(&cio->io_parent_list, zl);
@ -654,6 +654,37 @@ zio_add_child(zio_t *pio, zio_t *cio)
mutex_exit(&pio->io_lock);
}
void
zio_add_child_first(zio_t *pio, zio_t *cio)
{
/*
* Logical I/Os can have logical, gang, or vdev children.
* Gang I/Os can have gang or vdev children.
* Vdev I/Os can only have vdev children.
* The following ASSERT captures all of these constraints.
*/
ASSERT3S(cio->io_child_type, <=, pio->io_child_type);
zio_link_t *zl = kmem_cache_alloc(zio_link_cache, KM_SLEEP);
zl->zl_parent = pio;
zl->zl_child = cio;
ASSERT(list_is_empty(&cio->io_parent_list));
list_insert_head(&cio->io_parent_list, zl);
mutex_enter(&pio->io_lock);
ASSERT(pio->io_state[ZIO_WAIT_DONE] == 0);
uint64_t *countp = pio->io_children[cio->io_child_type];
for (int w = 0; w < ZIO_WAIT_TYPES; w++)
countp[w] += !cio->io_state[w];
list_insert_head(&pio->io_child_list, zl);
mutex_exit(&pio->io_lock);
}
static void
zio_remove_child(zio_t *pio, zio_t *cio, zio_link_t *zl)
{
@ -840,12 +871,14 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_child_type = ZIO_CHILD_LOGICAL;
if (bp != NULL) {
zio->io_bp = (blkptr_t *)bp;
zio->io_bp_copy = *bp;
zio->io_bp_orig = *bp;
if (type != ZIO_TYPE_WRITE ||
zio->io_child_type == ZIO_CHILD_DDT)
zio->io_child_type == ZIO_CHILD_DDT) {
zio->io_bp_copy = *bp;
zio->io_bp = &zio->io_bp_copy; /* so caller can free */
} else {
zio->io_bp = (blkptr_t *)bp;
}
zio->io_bp_orig = *bp;
if (zio->io_child_type == ZIO_CHILD_LOGICAL)
zio->io_logical = zio;
if (zio->io_child_type > ZIO_CHILD_GANG && BP_IS_GANG(bp))
@ -880,7 +913,7 @@ zio_create(zio_t *pio, spa_t *spa, uint64_t txg, const blkptr_t *bp,
zio->io_logical = pio->io_logical;
if (zio->io_child_type == ZIO_CHILD_GANG)
zio->io_gang_leader = pio->io_gang_leader;
zio_add_child(pio, zio);
zio_add_child_first(pio, zio);
}
taskq_init_ent(&zio->io_tqent);
@ -1601,7 +1634,6 @@ zio_read_bp_init(zio_t *zio)
abd_return_buf_copy(zio->io_abd, data, psize);
} else {
ASSERT(!BP_IS_EMBEDDED(bp));
ASSERT3P(zio->io_bp, ==, &zio->io_bp_copy);
}
if (BP_GET_DEDUP(bp) && zio->io_child_type == ZIO_CHILD_LOGICAL)
@ -4442,8 +4474,10 @@ zio_ready(zio_t *zio)
zio->io_ready(zio);
}
#ifdef ZFS_DEBUG
if (bp != NULL && bp != &zio->io_bp_copy)
zio->io_bp_copy = *bp;
#endif
if (zio->io_error != 0) {
zio->io_pipeline = ZIO_INTERLOCK_PIPELINE;

View File

@ -1051,7 +1051,7 @@
/* #undef ZFS_IS_GPL_COMPATIBLE */
/* Define the project alias string. */
#define ZFS_META_ALIAS "zfs-2.1.99-FreeBSD_ga9d6b0690"
#define ZFS_META_ALIAS "zfs-2.2.0-FreeBSD_g009d3288"
/* Define the project author. */
#define ZFS_META_AUTHOR "OpenZFS"
@ -1081,10 +1081,10 @@
#define ZFS_META_NAME "zfs"
/* Define the project release. */
#define ZFS_META_RELEASE "FreeBSD_ga9d6b0690"
#define ZFS_META_RELEASE "FreeBSD_g009d3288"
/* Define the project version. */
#define ZFS_META_VERSION "2.1.99"
#define ZFS_META_VERSION "2.2.0"
/* count is located in percpu_ref.data */
/* #undef ZFS_PERCPU_REF_COUNT_IN_DATA */

View File

@ -1 +1 @@
#define ZFS_META_GITREV "zfs-2.1.99-2004-ga9d6b0690"
#define ZFS_META_GITREV "zfs-2.2.0-rc1-0-g009d3288d"