From 0626917d07863899cfee3820aa57c6b7ed4304f4 Mon Sep 17 00:00:00 2001 From: Martin Matuska Date: Sun, 21 Feb 2021 21:22:07 +0100 Subject: [PATCH] Update vendor/openzfs to master-9312e0fd1 Notable changes: - fix reporting of mount progress (778869fa1) - disable use of hardware crypto offload drivers on FreeBSD (e7adccf7f) - fix checksum errors not being counted on repeated repair (03e02e5b5) - restore FreeBSD resource usage accounting (64e0fe14f) - fix panic if scrubbing after removing a slog device (11f2e9a49) --- cmd/zfs/zfs_main.c | 3 - cmd/ztest/ztest.c | 113 +++++++- include/os/freebsd/spl/sys/uio.h | 43 +-- include/os/linux/spl/sys/uio.h | 22 +- include/os/linux/zfs/sys/zfs_context_os.h | 1 - include/sys/Makefile.am | 1 + include/sys/spa.h | 3 +- include/sys/uio_impl.h | 21 +- include/sys/zfs_context.h | 3 +- include/sys/zfs_racct.h | 37 +++ lib/libzpool/Makefile.am | 1 + lib/libzpool/util.c | 74 +++-- lib/libzutil/os/linux/zutil_device_path_os.c | 252 ++++++++++-------- module/Makefile.bsd | 1 + module/os/freebsd/spl/spl_uio.c | 9 +- module/os/freebsd/zfs/crypto_os.c | 15 +- module/os/freebsd/zfs/zfs_racct.c | 55 ++++ module/os/linux/zfs/Makefile.in | 1 + module/os/linux/zfs/zfs_racct.c | 36 +++ module/zfs/arc.c | 2 + module/zfs/dmu.c | 5 + module/zfs/dsl_scan.c | 6 +- module/zfs/spa_config.c | 2 +- module/zfs/vdev.c | 9 +- module/zfs/vdev_indirect.c | 10 +- module/zfs/vdev_raidz.c | 20 +- module/zfs/vdev_rebuild.c | 3 + module/zfs/zfs_fm.c | 46 +++- module/zfs/zfs_ioctl.c | 2 +- module/zfs/zfs_vnops.c | 2 +- module/zfs/zio.c | 11 +- scripts/zfs.sh | 2 +- tests/runfiles/common.run | 3 +- .../cli_root/zpool_events/Makefile.am | 3 +- .../zpool_events_clear_retained.ksh | 135 ++++++++++ .../zpool_events/zpool_events_duplicates.ksh | 11 - 36 files changed, 716 insertions(+), 247 deletions(-) create mode 100644 include/sys/zfs_racct.h create mode 100644 module/os/freebsd/zfs/zfs_racct.c create mode 100644 module/os/linux/zfs/zfs_racct.c create mode 100755 tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh diff --git a/cmd/zfs/zfs_main.c b/cmd/zfs/zfs_main.c index ab2b006ae460..9a59fddbf06d 100644 --- a/cmd/zfs/zfs_main.c +++ b/cmd/zfs/zfs_main.c @@ -6912,9 +6912,6 @@ report_mount_progress(int current, int total) time_t now = time(NULL); char info[32]; - /* report 1..n instead of 0..n-1 */ - ++current; - /* display header if we're here for the first time */ if (current == 1) { set_progress_header(gettext("Mounting ZFS filesystems")); diff --git a/cmd/ztest/ztest.c b/cmd/ztest/ztest.c index d2bf0101993e..cfa1290d78d1 100644 --- a/cmd/ztest/ztest.c +++ b/cmd/ztest/ztest.c @@ -158,6 +158,9 @@ enum ztest_class_state { ZTEST_VDEV_CLASS_RND }; +#define ZO_GVARS_MAX_ARGLEN ((size_t)64) +#define ZO_GVARS_MAX_COUNT ((size_t)10) + typedef struct ztest_shared_opts { char zo_pool[ZFS_MAX_DATASET_NAME_LEN]; char zo_dir[ZFS_MAX_DATASET_NAME_LEN]; @@ -185,6 +188,8 @@ typedef struct ztest_shared_opts { int zo_mmp_test; int zo_special_vdevs; int zo_dump_dbgmsg; + int zo_gvars_count; + char zo_gvars[ZO_GVARS_MAX_COUNT][ZO_GVARS_MAX_ARGLEN]; } ztest_shared_opts_t; static const ztest_shared_opts_t ztest_opts_defaults = { @@ -212,6 +217,7 @@ static const ztest_shared_opts_t ztest_opts_defaults = { .zo_maxloops = 50, /* max loops during spa_freeze() */ .zo_metaslab_force_ganging = 64 << 10, .zo_special_vdevs = ZTEST_VDEV_CLASS_RND, + .zo_gvars_count = 0, }; extern uint64_t metaslab_force_ganging; @@ -918,8 +924,21 @@ process_options(int argc, char **argv) ztest_parse_name_value(optarg, zo); break; case 'o': - if (set_global_var(optarg) != 0) + if (zo->zo_gvars_count >= ZO_GVARS_MAX_COUNT) { + (void) fprintf(stderr, + "max global var count (%zu) exceeded\n", + ZO_GVARS_MAX_COUNT); usage(B_FALSE); + } + char *v = zo->zo_gvars[zo->zo_gvars_count]; + if (strlcpy(v, optarg, ZO_GVARS_MAX_ARGLEN) >= + ZO_GVARS_MAX_ARGLEN) { + (void) fprintf(stderr, + "global var option '%s' is too long\n", + optarg); + usage(B_FALSE); + } + zo->zo_gvars_count++; break; case 'G': zo->zo_dump_dbgmsg = 1; @@ -6373,6 +6392,75 @@ ztest_fletcher_incr(ztest_ds_t *zd, uint64_t id) } } +static int +ztest_set_global_vars(void) +{ + for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) { + char *kv = ztest_opts.zo_gvars[i]; + VERIFY3U(strlen(kv), <=, ZO_GVARS_MAX_ARGLEN); + VERIFY3U(strlen(kv), >, 0); + int err = set_global_var(kv); + if (ztest_opts.zo_verbose > 0) { + (void) printf("setting global var %s ... %s\n", kv, + err ? "failed" : "ok"); + } + if (err != 0) { + (void) fprintf(stderr, + "failed to set global var '%s'\n", kv); + return (err); + } + } + return (0); +} + +static char ** +ztest_global_vars_to_zdb_args(void) +{ + char **args = calloc(2*ztest_opts.zo_gvars_count + 1, sizeof (char *)); + char **cur = args; + for (size_t i = 0; i < ztest_opts.zo_gvars_count; i++) { + char *kv = ztest_opts.zo_gvars[i]; + *cur = "-o"; + cur++; + *cur = strdup(kv); + cur++; + } + ASSERT3P(cur, ==, &args[2*ztest_opts.zo_gvars_count]); + *cur = NULL; + return (args); +} + +/* The end of strings is indicated by a NULL element */ +static char * +join_strings(char **strings, const char *sep) +{ + size_t totallen = 0; + for (char **sp = strings; *sp != NULL; sp++) { + totallen += strlen(*sp); + totallen += strlen(sep); + } + if (totallen > 0) { + ASSERT(totallen >= strlen(sep)); + totallen -= strlen(sep); + } + + size_t buflen = totallen + 1; + char *o = malloc(buflen); /* trailing 0 byte */ + o[0] = '\0'; + for (char **sp = strings; *sp != NULL; sp++) { + size_t would; + would = strlcat(o, *sp, buflen); + VERIFY3U(would, <, buflen); + if (*(sp+1) == NULL) { + break; + } + would = strlcat(o, sep, buflen); + VERIFY3U(would, <, buflen); + } + ASSERT3S(strlen(o), ==, totallen); + return (o); +} + static int ztest_check_path(char *path) { @@ -6601,13 +6689,21 @@ ztest_run_zdb(char *pool) ztest_get_zdb_bin(bin, len); - (void) sprintf(zdb, - "%s -bcc%s%s -G -d -Y -e -y -p %s %s", + char **set_gvars_args = ztest_global_vars_to_zdb_args(); + char *set_gvars_args_joined = join_strings(set_gvars_args, " "); + free(set_gvars_args); + + size_t would = snprintf(zdb, len, + "%s -bcc%s%s -G -d -Y -e -y %s -p %s %s", bin, ztest_opts.zo_verbose >= 3 ? "s" : "", ztest_opts.zo_verbose >= 4 ? "v" : "", + set_gvars_args_joined, ztest_opts.zo_dir, pool); + ASSERT3U(would, <, len); + + free(set_gvars_args_joined); if (ztest_opts.zo_verbose >= 5) (void) printf("Executing %s\n", strstr(zdb, "zdb ")); @@ -7727,7 +7823,7 @@ main(int argc, char **argv) char numbuf[NN_NUMBUF_SZ]; char *cmd; boolean_t hasalt; - int f; + int f, err; char *fd_data_str = getenv("ZTEST_FD_DATA"); struct sigaction action; @@ -7794,6 +7890,15 @@ main(int argc, char **argv) } ASSERT3U(ztest_opts.zo_datasets, ==, ztest_shared_hdr->zh_ds_count); + err = ztest_set_global_vars(); + if (err != 0 && !fd_data_str) { + /* error message done by ztest_set_global_vars */ + exit(EXIT_FAILURE); + } else { + /* children should not be spawned if setting gvars fails */ + VERIFY3S(err, ==, 0); + } + /* Override location of zpool.cache */ VERIFY3S(asprintf((char **)&spa_config_path, "%s/zpool.cache", ztest_opts.zo_dir), !=, -1); diff --git a/include/os/freebsd/spl/sys/uio.h b/include/os/freebsd/spl/sys/uio.h index f1d30195f048..b71f2f2e5625 100644 --- a/include/os/freebsd/spl/sys/uio.h +++ b/include/os/freebsd/spl/sys/uio.h @@ -55,38 +55,12 @@ typedef struct zfs_uio { #define zfs_uio_fault_disable(u, set) #define zfs_uio_prefaultpages(size, u) (0) - -static __inline void -zfs_uio_init(zfs_uio_t *uio, struct uio *uio_s) -{ - GET_UIO_STRUCT(uio) = uio_s; -} - -static __inline void +static inline void zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) { zfs_uio_offset(uio) = off; } -static __inline int -zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) -{ - ASSERT(zfs_uio_rw(uio) == dir); - return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio))); -} - -int zfs_uiocopy(void *p, size_t n, zfs_uio_rw_t rw, zfs_uio_t *uio, - size_t *cbytes); -void zfs_uioskip(zfs_uio_t *uiop, size_t n); -int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio); - -static inline void -zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) -{ - *base = zfs_uio_iovbase(uio, idx); - *len = zfs_uio_iovlen(uio, idx); -} - static inline void zfs_uio_advance(zfs_uio_t *uio, size_t size) { @@ -94,19 +68,14 @@ zfs_uio_advance(zfs_uio_t *uio, size_t size) zfs_uio_offset(uio) += size; } -static inline offset_t -zfs_uio_index_at_offset(zfs_uio_t *uio, offset_t off, uint_t *vec_idx) +static __inline void +zfs_uio_init(zfs_uio_t *uio, struct uio *uio_s) { - *vec_idx = 0; - while (*vec_idx < zfs_uio_iovcnt(uio) && - off >= zfs_uio_iovlen(uio, *vec_idx)) { - off -= zfs_uio_iovlen(uio, *vec_idx); - (*vec_idx)++; - } - - return (off); + GET_UIO_STRUCT(uio) = uio_s; } +int zfs_uio_fault_move(void *p, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio); + #endif /* !_STANDALONE */ #endif /* !_OPENSOLARIS_SYS_UIO_H_ */ diff --git a/include/os/linux/spl/sys/uio.h b/include/os/linux/spl/sys/uio.h index 0deed3c5736d..66af2b0b534c 100644 --- a/include/os/linux/spl/sys/uio.h +++ b/include/os/linux/spl/sys/uio.h @@ -78,19 +78,14 @@ typedef struct zfs_uio { #define zfs_uio_rlimit_fsize(z, u) (0) #define zfs_uio_fault_move(p, n, rw, u) zfs_uiomove((p), (n), (rw), (u)) +extern int zfs_uio_prefaultpages(ssize_t, zfs_uio_t *); + static inline void zfs_uio_setoffset(zfs_uio_t *uio, offset_t off) { uio->uio_loffset = off; } -static inline void -zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) -{ - *base = zfs_uio_iovbase(uio, idx); - *len = zfs_uio_iovlen(uio, idx); -} - static inline void zfs_uio_advance(zfs_uio_t *uio, size_t size) { @@ -98,19 +93,6 @@ zfs_uio_advance(zfs_uio_t *uio, size_t size) uio->uio_loffset += size; } -static inline offset_t -zfs_uio_index_at_offset(zfs_uio_t *uio, offset_t off, uint_t *vec_idx) -{ - *vec_idx = 0; - while (*vec_idx < zfs_uio_iovcnt(uio) && - off >= zfs_uio_iovlen(uio, *vec_idx)) { - off -= zfs_uio_iovlen(uio, *vec_idx); - (*vec_idx)++; - } - - return (off); -} - static inline void zfs_uio_iovec_init(zfs_uio_t *uio, const struct iovec *iov, unsigned long nr_segs, offset_t offset, zfs_uio_seg_t seg, ssize_t resid, diff --git a/include/os/linux/zfs/sys/zfs_context_os.h b/include/os/linux/zfs/sys/zfs_context_os.h index 9e5fdd79f019..de7015b929b6 100644 --- a/include/os/linux/zfs/sys/zfs_context_os.h +++ b/include/os/linux/zfs/sys/zfs_context_os.h @@ -23,7 +23,6 @@ #ifndef ZFS_CONTEXT_OS_H #define ZFS_CONTEXT_OS_H -#include #include #include diff --git a/include/sys/Makefile.am b/include/sys/Makefile.am index c3ebf17b5288..385c82c926ae 100644 --- a/include/sys/Makefile.am +++ b/include/sys/Makefile.am @@ -111,6 +111,7 @@ COMMON_H = \ zfs_fuid.h \ zfs_project.h \ zfs_quota.h \ + zfs_racct.h \ zfs_ratelimit.h \ zfs_refcount.h \ zfs_rlock.h \ diff --git a/include/sys/spa.h b/include/sys/spa.h index 0762ae8a3e13..8391be8328b6 100644 --- a/include/sys/spa.h +++ b/include/sys/spa.h @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2011 Nexenta Systems, Inc. All rights reserved. * Copyright (c) 2014 Spectra Logic Corporation, All rights reserved. * Copyright 2013 Saso Kiselkov. All rights reserved. @@ -1150,6 +1150,7 @@ extern int zfs_ereport_post(const char *clazz, spa_t *spa, vdev_t *vd, extern boolean_t zfs_ereport_is_valid(const char *clazz, spa_t *spa, vdev_t *vd, zio_t *zio); extern void zfs_ereport_taskq_fini(void); +extern void zfs_ereport_clear(spa_t *spa, vdev_t *vd); extern nvlist_t *zfs_event_create(spa_t *spa, vdev_t *vd, const char *type, const char *name, nvlist_t *aux); extern void zfs_post_remove(spa_t *spa, vdev_t *vd); diff --git a/include/sys/uio_impl.h b/include/sys/uio_impl.h index be70cea54818..cde3ef40485b 100644 --- a/include/sys/uio_impl.h +++ b/include/sys/uio_impl.h @@ -42,8 +42,27 @@ #include extern int zfs_uiomove(void *, size_t, zfs_uio_rw_t, zfs_uio_t *); -extern int zfs_uio_prefaultpages(ssize_t, zfs_uio_t *); extern int zfs_uiocopy(void *, size_t, zfs_uio_rw_t, zfs_uio_t *, size_t *); extern void zfs_uioskip(zfs_uio_t *, size_t); +static inline void +zfs_uio_iov_at_index(zfs_uio_t *uio, uint_t idx, void **base, uint64_t *len) +{ + *base = zfs_uio_iovbase(uio, idx); + *len = zfs_uio_iovlen(uio, idx); +} + +static inline offset_t +zfs_uio_index_at_offset(zfs_uio_t *uio, offset_t off, uint_t *vec_idx) +{ + *vec_idx = 0; + while (*vec_idx < zfs_uio_iovcnt(uio) && + off >= zfs_uio_iovlen(uio, *vec_idx)) { + off -= zfs_uio_iovlen(uio, *vec_idx); + (*vec_idx)++; + } + + return (off); +} + #endif /* _SYS_UIO_IMPL_H */ diff --git a/include/sys/zfs_context.h b/include/sys/zfs_context.h index ee3216d6763a..6cdcc6d30966 100644 --- a/include/sys/zfs_context.h +++ b/include/sys/zfs_context.h @@ -72,6 +72,7 @@ extern "C" { #include #include #include +#include #include #else /* _KERNEL || _STANDALONE */ @@ -652,7 +653,7 @@ extern void random_fini(void); struct spa; extern void show_pool_stats(struct spa *); -extern int set_global_var(char *arg); +extern int set_global_var(char const *arg); typedef struct callb_cpr { kmutex_t *cc_lockp; diff --git a/include/sys/zfs_racct.h b/include/sys/zfs_racct.h new file mode 100644 index 000000000000..cfcdd336ea42 --- /dev/null +++ b/include/sys/zfs_racct.h @@ -0,0 +1,37 @@ +/* + * CDDL HEADER START + * + * The contents of this file are subject to the terms of the + * Common Development and Distribution License (the "License"). + * You may not use this file except in compliance with the License. + * + * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE + * or http://www.opensolaris.org/os/licensing. + * See the License for the specific language governing permissions + * and limitations under the License. + * + * When distributing Covered Code, include this CDDL HEADER in each + * file and include the License file at usr/src/OPENSOLARIS.LICENSE. + * If applicable, add the following below this CDDL HEADER, with the + * fields enclosed by brackets "[]" replaced with your own identifying + * information: Portions Copyright [yyyy] [name of copyright owner] + * + * CDDL HEADER END + */ + +/* + * Portions Copyright 2021 iXsystems, Inc. + */ + +#ifndef _SYS_ZFS_RACCT_H +#define _SYS_ZFS_RACCT_H + +#include + +/* + * Platform-dependent resource accounting hooks + */ +void zfs_racct_read(uint64_t size, uint64_t iops); +void zfs_racct_write(uint64_t size, uint64_t iops); + +#endif /* _SYS_ZFS_RACCT_H */ diff --git a/lib/libzpool/Makefile.am b/lib/libzpool/Makefile.am index 04ef34ebfa1b..929f5bed8b2f 100644 --- a/lib/libzpool/Makefile.am +++ b/lib/libzpool/Makefile.am @@ -163,6 +163,7 @@ KERNEL_C = \ zfs_debug.c \ zfs_fm.c \ zfs_fuid.c \ + zfs_racct.c \ zfs_sa.c \ zfs_znode.c \ zfs_ratelimit.c \ diff --git a/lib/libzpool/util.c b/lib/libzpool/util.c index ebfaa9b41a2a..2da2375a1d2d 100644 --- a/lib/libzpool/util.c +++ b/lib/libzpool/util.c @@ -148,18 +148,54 @@ show_pool_stats(spa_t *spa) nvlist_free(config); } +/* *k_out must be freed by the caller */ +static int +set_global_var_parse_kv(const char *arg, char **k_out, u_longlong_t *v_out) +{ + int err; + VERIFY(arg); + char *d = strdup(arg); + + char *save = NULL; + char *k = strtok_r(d, "=", &save); + char *v_str = strtok_r(NULL, "=", &save); + char *follow = strtok_r(NULL, "=", &save); + if (k == NULL || v_str == NULL || follow != NULL) { + err = EINVAL; + goto err_free; + } + + u_longlong_t val = strtoull(v_str, NULL, 0); + if (val > UINT32_MAX) { + fprintf(stderr, "Value for global variable '%s' must " + "be a 32-bit unsigned integer, got '%s'\n", k, v_str); + err = EOVERFLOW; + goto err_free; + } + + *k_out = k; + *v_out = val; + return (0); + +err_free: + free(k); + + return (err); +} + /* * Sets given global variable in libzpool to given unsigned 32-bit value. * arg: "=" */ int -set_global_var(char *arg) +set_global_var(char const *arg) { void *zpoolhdl; - char *varname = arg, *varval; + char *varname; u_longlong_t val; + int ret; -#ifndef _LITTLE_ENDIAN +#ifndef _ZFS_LITTLE_ENDIAN /* * On big endian systems changing a 64-bit variable would set the high * 32 bits instead of the low 32 bits, which could cause unexpected @@ -167,19 +203,12 @@ set_global_var(char *arg) */ fprintf(stderr, "Setting global variables is only supported on " "little-endian systems\n"); - return (ENOTSUP); + ret = ENOTSUP; + goto out_ret; #endif - if (arg != NULL && (varval = strchr(arg, '=')) != NULL) { - *varval = '\0'; - varval++; - val = strtoull(varval, NULL, 0); - if (val > UINT32_MAX) { - fprintf(stderr, "Value for global variable '%s' must " - "be a 32-bit unsigned integer\n", varname); - return (EOVERFLOW); - } - } else { - return (EINVAL); + + if ((ret = set_global_var_parse_kv(arg, &varname, &val)) != 0) { + goto out_ret; } zpoolhdl = dlopen("libzpool.so", RTLD_LAZY); @@ -189,18 +218,25 @@ set_global_var(char *arg) if (var == NULL) { fprintf(stderr, "Global variable '%s' does not exist " "in libzpool.so\n", varname); - return (EINVAL); + ret = EINVAL; + goto out_dlclose; } *var = (uint32_t)val; - dlclose(zpoolhdl); } else { fprintf(stderr, "Failed to open libzpool.so to set global " "variable\n"); - return (EIO); + ret = EIO; + goto out_dlclose; } - return (0); + ret = 0; + +out_dlclose: + dlclose(zpoolhdl); + free(varname); +out_ret: + return (ret); } static nvlist_t * diff --git a/lib/libzutil/os/linux/zutil_device_path_os.c b/lib/libzutil/os/linux/zutil_device_path_os.c index 36331fd72bf7..1f767bb7a6e7 100644 --- a/lib/libzutil/os/linux/zutil_device_path_os.c +++ b/lib/libzutil/os/linux/zutil_device_path_os.c @@ -154,15 +154,124 @@ zfs_strip_path(char *path) return (strrchr(path, '/') + 1); } +/* + * Given a dev name like "sda", return the full enclosure sysfs path to + * the disk. You can also pass in the name with "/dev" prepended + * to it (like /dev/sda). + * + * For example, disk "sda" in enclosure slot 1: + * dev: "sda" + * returns: "/sys/class/enclosure/1:0:3:0/Slot 1" + * + * 'dev' must be a non-devicemapper device. + * + * Returned string must be freed. + */ +char * +zfs_get_enclosure_sysfs_path(const char *dev_name) +{ + DIR *dp = NULL; + struct dirent *ep; + char buf[MAXPATHLEN]; + char *tmp1 = NULL; + char *tmp2 = NULL; + char *tmp3 = NULL; + char *path = NULL; + size_t size; + int tmpsize; + + if (dev_name == NULL) + return (NULL); + + /* If they preface 'dev' with a path (like "/dev") then strip it off */ + tmp1 = strrchr(dev_name, '/'); + if (tmp1 != NULL) + dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ + + tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); + if (tmpsize == -1 || tmp1 == NULL) { + tmp1 = NULL; + goto end; + } + + dp = opendir(tmp1); + if (dp == NULL) { + tmp1 = NULL; /* To make free() at the end a NOP */ + goto end; + } + + /* + * Look though all sysfs entries in /sys/block//device for + * the enclosure symlink. + */ + while ((ep = readdir(dp))) { + /* Ignore everything that's not our enclosure_device link */ + if (strstr(ep->d_name, "enclosure_device") == NULL) + continue; + + if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 || + tmp2 == NULL) + break; + + size = readlink(tmp2, buf, sizeof (buf)); + + /* Did readlink fail or crop the link name? */ + if (size == -1 || size >= sizeof (buf)) { + free(tmp2); + tmp2 = NULL; /* To make free() at the end a NOP */ + break; + } + + /* + * We got a valid link. readlink() doesn't terminate strings + * so we have to do it. + */ + buf[size] = '\0'; + + /* + * Our link will look like: + * + * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1" + * + * We want to grab the "enclosure/1:0:3:0/SLOT 1" part + */ + tmp3 = strstr(buf, "enclosure"); + if (tmp3 == NULL) + break; + + if (asprintf(&path, "/sys/class/%s", tmp3) == -1) { + /* If asprintf() fails, 'path' is undefined */ + path = NULL; + break; + } + + if (path == NULL) + break; + } + +end: + free(tmp2); + free(tmp1); + + if (dp != NULL) + closedir(dp); + + return (path); +} + /* * Allocate and return the underlying device name for a device mapper device. - * If a device mapper device maps to multiple devices, return the first device. * * For example, dm_name = "/dev/dm-0" could return "/dev/sda". Symlinks to a * DM device (like /dev/disk/by-vdev/A0) are also allowed. * - * Returns device name, or NULL on error or no match. If dm_name is not a DM - * device then return NULL. + * If the DM device has multiple underlying devices (like with multipath + * DM devices), then favor underlying devices that have a symlink back to their + * back to their enclosure device in sysfs. This will be useful for the + * zedlet scripts that toggle the fault LED. + * + * Returns an underlying device name, or NULL on error or no match. If dm_name + * is not a DM device then return NULL. * * NOTE: The returned name string must be *freed*. */ @@ -176,6 +285,8 @@ dm_get_underlying_path(const char *dm_name) char *path = NULL; char *dev_str; int size; + char *first_path = NULL; + char *enclosure_path; if (dm_name == NULL) return (NULL); @@ -204,13 +315,27 @@ dm_get_underlying_path(const char *dm_name) goto end; /* - * Return first entry (that isn't itself a directory) in the - * directory containing device-mapper dependent (underlying) - * devices. + * A device-mapper device can have multiple paths to it (multipath). + * Favor paths that have a symlink back to their enclosure device. + * We have to do this since some enclosures may only provide a symlink + * back for one underlying path to a disk and not the other. + * + * If no paths have links back to their enclosure, then just return the + * first path. */ while ((ep = readdir(dp))) { if (ep->d_type != DT_DIR) { /* skip "." and ".." dirs */ + if (!first_path) + first_path = strdup(ep->d_name); + + enclosure_path = + zfs_get_enclosure_sysfs_path(ep->d_name); + + if (!enclosure_path) + continue; + size = asprintf(&path, "/dev/%s", ep->d_name); + free(enclosure_path); break; } } @@ -220,6 +345,17 @@ dm_get_underlying_path(const char *dm_name) closedir(dp); free(tmp); free(realp); + + if (!path) { + /* + * None of the underlying paths had a link back to their + * enclosure devices. Throw up out hands and return the first + * underlying path. + */ + size = asprintf(&path, "/dev/%s", first_path); + } + + free(first_path); return (path); } @@ -331,110 +467,6 @@ zfs_get_underlying_path(const char *dev_name) return (name); } -/* - * Given a dev name like "sda", return the full enclosure sysfs path to - * the disk. You can also pass in the name with "/dev" prepended - * to it (like /dev/sda). - * - * For example, disk "sda" in enclosure slot 1: - * dev: "sda" - * returns: "/sys/class/enclosure/1:0:3:0/Slot 1" - * - * 'dev' must be a non-devicemapper device. - * - * Returned string must be freed. - */ -char * -zfs_get_enclosure_sysfs_path(const char *dev_name) -{ - DIR *dp = NULL; - struct dirent *ep; - char buf[MAXPATHLEN]; - char *tmp1 = NULL; - char *tmp2 = NULL; - char *tmp3 = NULL; - char *path = NULL; - size_t size; - int tmpsize; - - if (dev_name == NULL) - return (NULL); - - /* If they preface 'dev' with a path (like "/dev") then strip it off */ - tmp1 = strrchr(dev_name, '/'); - if (tmp1 != NULL) - dev_name = tmp1 + 1; /* +1 since we want the chr after '/' */ - - tmpsize = asprintf(&tmp1, "/sys/block/%s/device", dev_name); - if (tmpsize == -1 || tmp1 == NULL) { - tmp1 = NULL; - goto end; - } - - dp = opendir(tmp1); - if (dp == NULL) { - tmp1 = NULL; /* To make free() at the end a NOP */ - goto end; - } - - /* - * Look though all sysfs entries in /sys/block//device for - * the enclosure symlink. - */ - while ((ep = readdir(dp))) { - /* Ignore everything that's not our enclosure_device link */ - if (strstr(ep->d_name, "enclosure_device") == NULL) - continue; - - if (asprintf(&tmp2, "%s/%s", tmp1, ep->d_name) == -1 || - tmp2 == NULL) - break; - - size = readlink(tmp2, buf, sizeof (buf)); - - /* Did readlink fail or crop the link name? */ - if (size == -1 || size >= sizeof (buf)) { - free(tmp2); - tmp2 = NULL; /* To make free() at the end a NOP */ - break; - } - - /* - * We got a valid link. readlink() doesn't terminate strings - * so we have to do it. - */ - buf[size] = '\0'; - - /* - * Our link will look like: - * - * "../../../../port-11:1:2/..STUFF../enclosure/1:0:3:0/SLOT 1" - * - * We want to grab the "enclosure/1:0:3:0/SLOT 1" part - */ - tmp3 = strstr(buf, "enclosure"); - if (tmp3 == NULL) - break; - - if (asprintf(&path, "/sys/class/%s", tmp3) == -1) { - /* If asprintf() fails, 'path' is undefined */ - path = NULL; - break; - } - - if (path == NULL) - break; - } - -end: - free(tmp2); - free(tmp1); - - if (dp != NULL) - closedir(dp); - - return (path); -} #ifdef HAVE_LIBUDEV diff --git a/module/Makefile.bsd b/module/Makefile.bsd index e7cddcc5bb5e..8aa4ed22275e 100644 --- a/module/Makefile.bsd +++ b/module/Makefile.bsd @@ -153,6 +153,7 @@ SRCS+= abd_os.c \ zfs_dir.c \ zfs_ioctl_compat.c \ zfs_ioctl_os.c \ + zfs_racct.c \ zfs_vfsops.c \ zfs_vnops_os.c \ zfs_znode.c \ diff --git a/module/os/freebsd/spl/spl_uio.c b/module/os/freebsd/spl/spl_uio.c index f5f3524f7b9d..59a781ee1b64 100644 --- a/module/os/freebsd/spl/spl_uio.c +++ b/module/os/freebsd/spl/spl_uio.c @@ -41,10 +41,17 @@ */ #include -#include +#include #include #include +int +zfs_uiomove(void *cp, size_t n, zfs_uio_rw_t dir, zfs_uio_t *uio) +{ + ASSERT(zfs_uio_rw(uio) == dir); + return (uiomove(cp, (int)n, GET_UIO_STRUCT(uio))); +} + /* * same as zfs_uiomove() but doesn't modify uio structure. * return in cbytes how many bytes were copied. diff --git a/module/os/freebsd/zfs/crypto_os.c b/module/os/freebsd/zfs/crypto_os.c index fbf998416234..03d14ed7cf5c 100644 --- a/module/os/freebsd/zfs/crypto_os.c +++ b/module/os/freebsd/zfs/crypto_os.c @@ -293,8 +293,19 @@ freebsd_crypt_newsession(freebsd_crypt_session_t *sessp, error = ENOTSUP; goto bad; } - error = crypto_newsession(&sessp->fs_sid, &csp, - CRYPTOCAP_F_HARDWARE | CRYPTOCAP_F_SOFTWARE); + + /* + * Disable the use of hardware drivers on FreeBSD 13 and later since + * common crypto offload drivers impose constraints on AES-GCM AAD + * lengths that make them unusable for ZFS, and we currently do not have + * a mechanism to fall back to a software driver for requests not + * handled by a hardware driver. + * + * On 12 we continue to permit the use of hardware drivers since + * CPU-accelerated drivers such as aesni(4) register themselves as + * hardware drivers. + */ + error = crypto_newsession(&sessp->fs_sid, &csp, CRYPTOCAP_F_SOFTWARE); mtx_init(&sessp->fs_lock, "FreeBSD Cryptographic Session Lock", NULL, MTX_DEF); crypt_sessions++; diff --git a/module/os/freebsd/zfs/zfs_racct.c b/module/os/freebsd/zfs/zfs_racct.c new file mode 100644 index 000000000000..b46cc046268e --- /dev/null +++ b/module/os/freebsd/zfs/zfs_racct.c @@ -0,0 +1,55 @@ +/* + * Copyright (c) 2021 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +#include + +void +zfs_racct_read(uint64_t size, uint64_t iops) +{ + curthread->td_ru.ru_inblock += iops; +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(curproc); + racct_add_force(curproc, RACCT_READBPS, size); + racct_add_force(curproc, RACCT_READIOPS, iops); + PROC_UNLOCK(curproc); + } +#endif /* RACCT */ +} + +void +zfs_racct_write(uint64_t size, uint64_t iops) +{ + curthread->td_ru.ru_oublock += iops; +#ifdef RACCT + if (racct_enable) { + PROC_LOCK(curproc); + racct_add_force(curproc, RACCT_WRITEBPS, size); + racct_add_force(curproc, RACCT_WRITEIOPS, iops); + PROC_UNLOCK(curproc); + } +#endif /* RACCT */ +} diff --git a/module/os/linux/zfs/Makefile.in b/module/os/linux/zfs/Makefile.in index 75bec52c94e2..fa990776db83 100644 --- a/module/os/linux/zfs/Makefile.in +++ b/module/os/linux/zfs/Makefile.in @@ -22,6 +22,7 @@ $(MODULE)-objs += ../os/linux/zfs/zfs_debug.o $(MODULE)-objs += ../os/linux/zfs/zfs_dir.o $(MODULE)-objs += ../os/linux/zfs/zfs_file_os.o $(MODULE)-objs += ../os/linux/zfs/zfs_ioctl_os.o +$(MODULE)-objs += ../os/linux/zfs/zfs_racct.o $(MODULE)-objs += ../os/linux/zfs/zfs_sysfs.o $(MODULE)-objs += ../os/linux/zfs/zfs_uio.o $(MODULE)-objs += ../os/linux/zfs/zfs_vfsops.o diff --git a/module/os/linux/zfs/zfs_racct.c b/module/os/linux/zfs/zfs_racct.c new file mode 100644 index 000000000000..7897e0f9edc1 --- /dev/null +++ b/module/os/linux/zfs/zfs_racct.c @@ -0,0 +1,36 @@ +/* + * Copyright (c) 2021 iXsystems, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +void +zfs_racct_read(uint64_t size, uint64_t iops) +{ +} + +void +zfs_racct_write(uint64_t size, uint64_t iops) +{ +} diff --git a/module/zfs/arc.c b/module/zfs/arc.c index b4f0c8a85b64..9be0a4e8a4a9 100644 --- a/module/zfs/arc.c +++ b/module/zfs/arc.c @@ -308,6 +308,7 @@ #include #include #include +#include #include #ifndef _KERNEL @@ -6302,6 +6303,7 @@ arc_read(zio_t *pio, spa_t *spa, const blkptr_t *bp, ARCSTAT_CONDSTAT(!HDR_PREFETCH(hdr), demand, prefetch, !HDR_ISTYPE_METADATA(hdr), data, metadata, misses); + zfs_racct_read(size, 1); } /* Check if the spa even has l2 configured */ diff --git a/module/zfs/dmu.c b/module/zfs/dmu.c index ed345f0b6ec3..b46bf60d1a29 100644 --- a/module/zfs/dmu.c +++ b/module/zfs/dmu.c @@ -52,6 +52,7 @@ #include #include #include +#include #include #ifdef _KERNEL #include @@ -551,6 +552,9 @@ dmu_buf_hold_array_by_dnode(dnode_t *dn, uint64_t offset, uint64_t length, dbp[i] = &db->db; } + if (!read) + zfs_racct_write(length, nblks); + if ((flags & DMU_READ_NO_PREFETCH) == 0 && DNODE_META_IS_CACHEABLE(dn) && length <= zfetch_array_rd_sz) { dmu_zfetch(&dn->dn_zfetch, blkid, nblks, @@ -1440,6 +1444,7 @@ dmu_assign_arcbuf_by_dnode(dnode_t *dn, uint64_t offset, arc_buf_t *buf, * same size as the dbuf. */ if (offset == db->db.db_offset && blksz == db->db.db_size) { + zfs_racct_write(blksz, 1); dbuf_assign_arcbuf(db, buf, tx); dbuf_rele(db, FTAG); } else { diff --git a/module/zfs/dsl_scan.c b/module/zfs/dsl_scan.c index 40adfbcee4e1..a54cd6ca800e 100644 --- a/module/zfs/dsl_scan.c +++ b/module/zfs/dsl_scan.c @@ -20,7 +20,7 @@ */ /* * Copyright (c) 2008, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2018 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2016 Gary Mills * Copyright (c) 2017, 2019, Datto Inc. All rights reserved. * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved. @@ -987,6 +987,10 @@ dsl_scan_done(dsl_scan_t *scn, boolean_t complete, dmu_tx_t *tx) (u_longlong_t)spa_get_errlog_size(spa)); spa_async_request(spa, SPA_ASYNC_RESILVER); } + + /* Clear recent error events (i.e. duplicate events tracking) */ + if (complete) + zfs_ereport_clear(spa, NULL); } scn->scn_phys.scn_end_time = gethrestime_sec(); diff --git a/module/zfs/spa_config.c b/module/zfs/spa_config.c index 4a3144313267..ad82932ce567 100644 --- a/module/zfs/spa_config.c +++ b/module/zfs/spa_config.c @@ -32,7 +32,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,7 @@ #include #include #include +#include #ifdef _KERNEL #include #endif diff --git a/module/zfs/vdev.c b/module/zfs/vdev.c index 36001e0a6626..ad4f3efb87b1 100644 --- a/module/zfs/vdev.c +++ b/module/zfs/vdev.c @@ -21,7 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. - * Copyright (c) 2011, 2020 by Delphix. All rights reserved. + * Copyright (c) 2011, 2021 by Delphix. All rights reserved. * Copyright 2017 Nexenta Systems, Inc. * Copyright (c) 2014 Integros [integros.com] * Copyright 2016 Toomas Soome @@ -2527,7 +2527,7 @@ vdev_hold(vdev_t *vd) for (int c = 0; c < vd->vdev_children; c++) vdev_hold(vd->vdev_child[c]); - if (vd->vdev_ops->vdev_op_leaf) + if (vd->vdev_ops->vdev_op_leaf && vd->vdev_ops->vdev_op_hold != NULL) vd->vdev_ops->vdev_op_hold(vd); } @@ -2538,7 +2538,7 @@ vdev_rele(vdev_t *vd) for (int c = 0; c < vd->vdev_children; c++) vdev_rele(vd->vdev_child[c]); - if (vd->vdev_ops->vdev_op_leaf) + if (vd->vdev_ops->vdev_op_leaf && vd->vdev_ops->vdev_op_rele != NULL) vd->vdev_ops->vdev_op_rele(vd); } @@ -4170,6 +4170,9 @@ vdev_clear(spa_t *spa, vdev_t *vd) vd->vdev_parent->vdev_ops == &vdev_spare_ops && vd->vdev_parent->vdev_child[0] == vd) vd->vdev_unspare = B_TRUE; + + /* Clear recent error events cache (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, vd); } boolean_t diff --git a/module/zfs/vdev_indirect.c b/module/zfs/vdev_indirect.c index b26d0993711a..416f4c54d8e8 100644 --- a/module/zfs/vdev_indirect.c +++ b/module/zfs/vdev_indirect.c @@ -1485,14 +1485,12 @@ vdev_indirect_all_checksum_errors(zio_t *zio) vdev_t *vd = ic->ic_vdev; - int ret = zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, NULL, zio, is->is_target_offset, is->is_size, NULL, NULL, NULL); - if (ret != EALREADY) { - mutex_enter(&vd->vdev_stat_lock); - vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&vd->vdev_stat_lock); - } + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&vd->vdev_stat_lock); } } } diff --git a/module/zfs/vdev_raidz.c b/module/zfs/vdev_raidz.c index f4812e61252c..57a594c80ce3 100644 --- a/module/zfs/vdev_raidz.c +++ b/module/zfs/vdev_raidz.c @@ -1852,14 +1852,12 @@ raidz_checksum_error(zio_t *zio, raidz_col_t *rc, abd_t *bad_data) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - int ret = zfs_ereport_post_checksum(zio->io_spa, vd, + (void) zfs_ereport_post_checksum(zio->io_spa, vd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, rc->rc_abd, bad_data, &zbc); - if (ret != EALREADY) { - mutex_enter(&vd->vdev_stat_lock); - vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&vd->vdev_stat_lock); - } + mutex_enter(&vd->vdev_stat_lock); + vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&vd->vdev_stat_lock); } } @@ -2453,14 +2451,12 @@ vdev_raidz_io_done_unrecoverable(zio_t *zio) zbc.zbc_has_cksum = 0; zbc.zbc_injected = rm->rm_ecksuminjected; - int ret = zfs_ereport_start_checksum(zio->io_spa, + (void) zfs_ereport_start_checksum(zio->io_spa, cvd, &zio->io_bookmark, zio, rc->rc_offset, rc->rc_size, (void *)(uintptr_t)c, &zbc); - if (ret != EALREADY) { - mutex_enter(&cvd->vdev_stat_lock); - cvd->vdev_stat.vs_checksum_errors++; - mutex_exit(&cvd->vdev_stat_lock); - } + mutex_enter(&cvd->vdev_stat_lock); + cvd->vdev_stat.vs_checksum_errors++; + mutex_exit(&cvd->vdev_stat_lock); } } } diff --git a/module/zfs/vdev_rebuild.c b/module/zfs/vdev_rebuild.c index 784d1af15a81..037abc01f7a7 100644 --- a/module/zfs/vdev_rebuild.c +++ b/module/zfs/vdev_rebuild.c @@ -338,6 +338,9 @@ vdev_rebuild_complete_sync(void *arg, dmu_tx_t *tx) } cv_broadcast(&vd->vdev_rebuild_cv); + + /* Clear recent error events (i.e. duplicate events tracking) */ + zfs_ereport_clear(spa, NULL); } /* diff --git a/module/zfs/zfs_fm.c b/module/zfs/zfs_fm.c index ea71ef325c89..9e9f4a80ba1d 100644 --- a/module/zfs/zfs_fm.c +++ b/module/zfs/zfs_fm.c @@ -24,7 +24,7 @@ */ /* - * Copyright (c) 2012,2020 by Delphix. All rights reserved. + * Copyright (c) 2012,2021 by Delphix. All rights reserved. */ #include @@ -247,6 +247,44 @@ zfs_ereport_schedule_cleaner(void) ddi_get_lbolt() + NSEC_TO_TICK(timeout)); } +/* + * Clear entries for a given vdev or all vdevs in a pool when vdev == NULL + */ +void +zfs_ereport_clear(spa_t *spa, vdev_t *vd) +{ + uint64_t vdev_guid, pool_guid; + int cnt = 0; + + ASSERT(vd != NULL || spa != NULL); + if (vd == NULL) { + vdev_guid = 0; + pool_guid = spa_guid(spa); + } else { + vdev_guid = vd->vdev_guid; + pool_guid = 0; + } + + mutex_enter(&recent_events_lock); + + recent_events_node_t *next = list_head(&recent_events_list); + while (next != NULL) { + recent_events_node_t *entry = next; + + next = list_next(&recent_events_list, next); + + if (entry->re_vdev_guid == vdev_guid || + entry->re_pool_guid == pool_guid) { + avl_remove(&recent_events_tree, entry); + list_remove(&recent_events_list, entry); + kmem_free(entry, sizeof (*entry)); + cnt++; + } + } + + mutex_exit(&recent_events_lock); +} + /* * Check if an ereport would be a duplicate of one recently posted. * @@ -951,6 +989,12 @@ annotate_ecksum(nvlist_t *ereport, zio_bad_cksum_t *info, } return (eip); } +#else +/*ARGSUSED*/ +void +zfs_ereport_clear(spa_t *spa, vdev_t *vd) +{ +} #endif /* diff --git a/module/zfs/zfs_ioctl.c b/module/zfs/zfs_ioctl.c index c64330e289a2..4e64563388f1 100644 --- a/module/zfs/zfs_ioctl.c +++ b/module/zfs/zfs_ioctl.c @@ -159,7 +159,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/module/zfs/zfs_vnops.c b/module/zfs/zfs_vnops.c index 61d5f06c6455..a35c17f86f93 100644 --- a/module/zfs/zfs_vnops.c +++ b/module/zfs/zfs_vnops.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/module/zfs/zio.c b/module/zfs/zio.c index 538a2a2cdd9b..74d1595a85d3 100644 --- a/module/zfs/zio.c +++ b/module/zfs/zio.c @@ -4255,15 +4255,12 @@ zio_checksum_verify(zio_t *zio) zio->io_error = error; if (error == ECKSUM && !(zio->io_flags & ZIO_FLAG_SPECULATIVE)) { - int ret = zfs_ereport_start_checksum(zio->io_spa, + (void) zfs_ereport_start_checksum(zio->io_spa, zio->io_vd, &zio->io_bookmark, zio, zio->io_offset, zio->io_size, NULL, &info); - - if (ret != EALREADY) { - mutex_enter(&zio->io_vd->vdev_stat_lock); - zio->io_vd->vdev_stat.vs_checksum_errors++; - mutex_exit(&zio->io_vd->vdev_stat_lock); - } + mutex_enter(&zio->io_vd->vdev_stat_lock); + zio->io_vd->vdev_stat.vs_checksum_errors++; + mutex_exit(&zio->io_vd->vdev_stat_lock); } } diff --git a/scripts/zfs.sh b/scripts/zfs.sh index c131429c919c..2f5f3f8fdb54 100755 --- a/scripts/zfs.sh +++ b/scripts/zfs.sh @@ -198,7 +198,7 @@ unload_modules_linux() { if [ "$USE_COUNT" = "0" ] ; then unload_module_linux "$KMOD" || return 1 - else + elif [ "$USE_COUNT" != "" ] ; then echo "Module ${NAME} is still in use!" return 1 fi diff --git a/tests/runfiles/common.run b/tests/runfiles/common.run index 9468ee06de44..e656785d95d8 100644 --- a/tests/runfiles/common.run +++ b/tests/runfiles/common.run @@ -362,7 +362,8 @@ tags = ['functional', 'cli_root', 'zpool_detach'] [tests/functional/cli_root/zpool_events] tests = ['zpool_events_clear', 'zpool_events_cliargs', 'zpool_events_follow', - 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates'] + 'zpool_events_poolname', 'zpool_events_errors', 'zpool_events_duplicates', + 'zpool_events_clear_retained'] tags = ['functional', 'cli_root', 'zpool_events'] [tests/functional/cli_root/zpool_export] diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am index 99c46f0143c2..765df102229d 100644 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/Makefile.am @@ -11,7 +11,8 @@ dist_pkgdata_SCRIPTS = \ zpool_events_follow.ksh \ zpool_events_poolname.ksh \ zpool_events_errors.ksh \ - zpool_events_duplicates.ksh + zpool_events_duplicates.ksh \ + zpool_events_clear_retained.ksh dist_pkgdata_DATA = \ zpool_events.cfg \ diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh new file mode 100755 index 000000000000..fdf56b2cf9a6 --- /dev/null +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_clear_retained.ksh @@ -0,0 +1,135 @@ +#!/bin/ksh -p +# CDDL HEADER START +# +# The contents of this file are subject to the terms of the +# Common Development and Distribution License (the "License"). +# You may not use this file except in compliance with the License. +# +# You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE +# or http://www.opensolaris.org/os/licensing. +# See the License for the specific language governing permissions +# and limitations under the License. +# +# When distributing Covered Code, include this CDDL HEADER in each +# file and include the License file at usr/src/OPENSOLARIS.LICENSE. +# If applicable, add the following below this CDDL HEADER, with the +# fields enclosed by brackets "[]" replaced with your own identifying +# information: Portions Copyright [yyyy] [name of copyright owner] +# +# CDDL HEADER END +# + +# +# Copyright (c) 2018 by Lawrence Livermore National Security, LLC. +# Copyright (c) 2021 by Delphix. All rights reserved. +# + +# DESCRIPTION: +# Verify that new errors after a pool scrub are considered a duplicate +# +# STRATEGY: +# 1. Create a raidz pool with a file +# 2. Inject garbage into one of the vdevs +# 3. Scrub the pool +# 4. Observe the checksum error counts +# 5. Repeat inject and pool scrub +# 6. Verify that second pass also produces similar errors (i.e. not +# treated as a duplicate) +# + +. $STF_SUITE/include/libtest.shlib + +verify_runnable "both" + +MOUNTDIR=$TEST_BASE_DIR/mount +FILEPATH=$MOUNTDIR/target +VDEV1=$TEST_BASE_DIR/vfile1 +VDEV2=$TEST_BASE_DIR/vfile2 +VDEV3=$TEST_BASE_DIR/vfile3 +SUPPLY=$TEST_BASE_DIR/supply +POOL=test_pool +FILESIZE="15M" +DAMAGEBLKS=10 + +OLD_LEN_MAX=$(get_tunable ZEVENT_LEN_MAX) +RETAIN_MAX=$(get_tunable ZEVENT_RETAIN_MAX) +OLD_CHECKSUMS=$(get_tunable CHECKSUM_EVENTS_PER_SECOND) + +EREPORTS="$STF_SUITE/tests/functional/cli_root/zpool_events/ereports" + +function cleanup +{ + log_must set_tunable64 CHECKSUM_EVENTS_PER_SECOND $OLD_CHECKSUMS + log_must set_tunable64 ZEVENT_LEN_MAX $OLD_LEN_MAX + + zpool events -c + if poolexists $POOL ; then + zpool export $POOL + fi + log_must rm -f $VDEV1 $VDEV2 $VDEV3 +} + +function damage_and_repair +{ + log_must zpool clear $POOL $VDEV1 + log_must zpool events -c + + log_note injecting damage to $VDEV1 + log_must dd conv=notrunc if=$SUPPLY of=$VDEV1 bs=1M seek=4 count=$DAMAGEBLKS + log_must zpool scrub $POOL + log_must zpool wait -t scrub $POOL + log_note "pass $1 observed $($EREPORTS | grep -c checksum) checksum ereports" + + repaired=$(zpool status $POOL | grep "scan: scrub repaired" | awk '{print $4}') + if [ "$repaired" == "0B" ]; then + log_fail "INVALID TEST -- expected scrub to repair some blocks" + else + log_note "$repaired repaired during scrub" + fi +} + +function checksum_error_count +{ + zpool status -p $POOL | grep $VDEV1 | awk '{print $5}' +} + +assertion="Damage to recently repaired blocks should be reported/counted" +log_assert "$assertion" +log_note "zevent retain max setting: $RETAIN_MAX" + +log_onexit cleanup + +# Set our threshold high to avoid dropping events. +set_tunable64 ZEVENT_LEN_MAX 20000 +set_tunable64 CHECKSUM_EVENTS_PER_SECOND 20000 + +# Initialize resources for the test +log_must truncate -s $MINVDEVSIZE $VDEV1 $VDEV2 $VDEV3 +log_must dd if=/dev/urandom of=$SUPPLY bs=1M count=$DAMAGEBLKS +log_must mkdir -p $MOUNTDIR +log_must zpool create -f -m $MOUNTDIR -o failmode=continue $POOL raidz $VDEV1 $VDEV2 $VDEV3 +log_must zfs set compression=off recordsize=16k $POOL +# create a file full of zeros +log_must mkfile -v $FILESIZE $FILEPATH +log_must zpool sync $POOL + +# run once and observe the checksum errors +damage_and_repair 1 +errcnt=$(checksum_error_count) +log_note "$errcnt errors observed" +# set expectaton of at least 75% of what we observed in first pass +(( expected = (errcnt * 75) / 100 )) + +# run again and we should observe new checksum errors +damage_and_repair 2 +errcnt=$(checksum_error_count) + +log_must zpool destroy $POOL + +if (( errcnt < expected )); then + log_fail "FAILED -- expecting at least $expected checksum errors but only observed $errcnt" +else + log_note observed $errcnt new checksum errors after a scrub + log_pass "$assertion" +fi + diff --git a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh index 1ba7b1b34496..d4194a5b8f5b 100755 --- a/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh +++ b/tests/zfs-tests/tests/functional/cli_root/zpool_events/zpool_events_duplicates.ksh @@ -114,21 +114,10 @@ function do_dup_test if [ "$RW" == "write" ] ; then log_must mkfile $FILESIZE $FILEPATH log_must zpool sync $POOL - else - # scrub twice to generate some duplicates - log_must zpool scrub $POOL - log_must zpool wait -t scrub $POOL - log_must zpool scrub $POOL - log_must zpool wait -t scrub $POOL fi log_must zinject -c all - # Wait for the pool to settle down and finish resilvering (if - # necessary). We want the errors to stop incrementing before we - # check for duplicates. - zpool wait -t resilver $POOL - ereports="$($EREPORTS | sort)" actual=$(echo "$ereports" | wc -l) unique=$(echo "$ereports" | uniq | wc -l)