From 0241e491a08ffa471a08ceaa0b0943999d775cbe Mon Sep 17 00:00:00 2001 From: Don Brady Date: Fri, 16 Jun 2017 18:21:11 -0600 Subject: [PATCH] Inject zinject(8) a percentage amount of dev errs In the original form of device error injection, it was an all or nothing situation. To help simulate intermittent error conditions, you can now specify a real number percentage value. This is also very useful for our ZFS fault diagnosis testing and for injecting intermittent errors during load testing. Reviewed-by: Brian Behlendorf Signed-off-by: Don Brady Closes #6227 --- cmd/zinject/zinject.c | 48 ++++++++++++++++++++++++++++++++--------- include/sys/zfs_ioctl.h | 5 +++++ man/man8/zinject.8 | 6 +++--- module/zfs/zio_inject.c | 37 +++++++++++++++++++++++++------ 4 files changed, 76 insertions(+), 20 deletions(-) diff --git a/cmd/zinject/zinject.c b/cmd/zinject/zinject.c index 604554e71337..ccd3534d0579 100644 --- a/cmd/zinject/zinject.c +++ b/cmd/zinject/zinject.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ /* @@ -124,7 +125,7 @@ * cache. * * The '-f' flag controls the frequency of errors injected, expressed as a - * integer percentage between 1 and 100. The default is 100. + * real number percentage between 0.0001 and 100. The default is 100. * * The this form is responsible for actually injecting the handler into the * framework. It takes the arguments described above, translates them to the @@ -230,11 +231,13 @@ usage(void) "\t\tspa_vdev_exit() will trigger a panic.\n" "\n" "\tzinject -d device [-e errno] [-L ] [-F]\n" - "\t [-T pool\n" + "\t [-T ] [-f frequency] pool\n" "\t\tInject a fault into a particular device or the device's\n" "\t\tlabel. Label injection can either be 'nvlist', 'uber',\n " "\t\t'pad1', or 'pad2'.\n" "\t\t'errno' can be 'nxio' (the default), 'io', or 'dtl'.\n" + "\t\t'frequency' is a value between 0.0001 and 100.0 that limits\n" + "\t\tdevice error injection to a percentage of the IOs.\n" "\n" "\tzinject -d device -A -D pool\n" "\t\tPerform a specific action on a particular device.\n" @@ -305,7 +308,7 @@ usage(void) "\t\t-u\tUnload the associated pool. Can be specified with only\n" "\t\t\ta pool object.\n" "\t\t-f\tOnly inject errors a fraction of the time. Expressed as\n" - "\t\t\ta percentage between 1 and 100.\n" + "\t\t\ta percentage between 0.0001 and 100.\n" "\n" "\t-t data\t\tInject an error into the plain file contents of a\n" "\t\t\tfile. The object must be specified as a complete path\n" @@ -645,6 +648,27 @@ parse_delay(char *str, uint64_t *delay, uint64_t *nlanes) return (0); } +static int +parse_frequency(const char *str, uint32_t *percent) +{ + double val; + char *post; + + val = strtod(str, &post); + if (post == NULL || *post != '\0') + return (EINVAL); + + /* valid range is [0.0001, 100.0] */ + val /= 100.0f; + if (val < 0.000001f || val > 1.0f) + return (ERANGE); + + /* convert to an integer for use by kernel */ + *percent = ((uint32_t)(val * ZI_PERCENTAGE_MAX)); + + return (0); +} + int main(int argc, char **argv) { @@ -760,10 +784,12 @@ main(int argc, char **argv) } break; case 'f': - record.zi_freq = atoi(optarg); - if (record.zi_freq < 1 || record.zi_freq > 100) { - (void) fprintf(stderr, "frequency range must " - "be in the range (0, 100]\n"); + ret = parse_frequency(optarg, &record.zi_freq); + if (ret != 0) { + (void) fprintf(stderr, "%sfrequency value must " + "be in the range [0.0001, 100.0]\n", + ret == EINVAL ? "invalid value: " : + ret == ERANGE ? "out of range: " : ""); libzfs_fini(g_zfs); return (1); } @@ -898,7 +924,8 @@ main(int argc, char **argv) * '-c' is invalid with any other options. */ if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED) { + level != 0 || record.zi_cmd != ZINJECT_UNINITIALIZED || + record.zi_freq > 0) { (void) fprintf(stderr, "cancel (-c) incompatible with " "any other options\n"); usage(); @@ -972,7 +999,8 @@ main(int argc, char **argv) } else if (raw != NULL) { if (range != NULL || type != TYPE_INVAL || level != 0 || - record.zi_cmd != ZINJECT_UNINITIALIZED) { + record.zi_cmd != ZINJECT_UNINITIALIZED || + record.zi_freq > 0) { (void) fprintf(stderr, "raw (-b) format with " "any other options\n"); usage(); @@ -1007,7 +1035,7 @@ main(int argc, char **argv) error = EIO; } else if (record.zi_cmd == ZINJECT_PANIC) { if (raw != NULL || range != NULL || type != TYPE_INVAL || - level != 0 || device != NULL) { + level != 0 || device != NULL || record.zi_freq > 0) { (void) fprintf(stderr, "panic (-p) incompatible with " "other options\n"); usage(); diff --git a/include/sys/zfs_ioctl.h b/include/sys/zfs_ioctl.h index 495cdea3a832..c68b8770b556 100644 --- a/include/sys/zfs_ioctl.h +++ b/include/sys/zfs_ioctl.h @@ -22,6 +22,7 @@ * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. * Copyright 2016 RackTop Systems. + * Copyright (c) 2017, Intel Corporation. */ #ifndef _SYS_ZFS_IOCTL_H @@ -338,6 +339,10 @@ typedef struct zinject_record { #define ZEVENT_SEEK_START 0 #define ZEVENT_SEEK_END UINT64_MAX +/* scaled frequency ranges */ +#define ZI_PERCENTAGE_MIN 4294UL +#define ZI_PERCENTAGE_MAX UINT32_MAX + typedef enum zinject_type { ZINJECT_UNINITIALIZED, ZINJECT_DATA_FAULT, diff --git a/man/man8/zinject.8 b/man/man8/zinject.8 index ab22d4a5bb15..50fecfb64364 100644 --- a/man/man8/zinject.8 +++ b/man/man8/zinject.8 @@ -76,7 +76,7 @@ create 3 lanes on the device; one lane with a latency of 10 ms and two lanes with a 25 ms latency. .TP -.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-F] \fIpool\fB" +.B "zinject \-d \fIvdev\fB [\-e \fIdevice_error\fB] [\-L \fIlabel_error\fB] [\-T \fIfailure\fB] [\-f \fIfrequency\fB] [\-F] \fIpool\fB" Force a vdev error. .TP .B "zinject \-I [\-s \fIseconds\fB | \-g \fItxgs\fB] \fIpool\fB" @@ -113,8 +113,8 @@ Specify .BR "nxio" " for an ENXIO error where reopening the device will fail." .TP .BI "\-f" " frequency" -Only inject errors a fraction of the time. Expressed as an integer -percentage between 1 and 100. +Only inject errors a fraction of the time. Expressed as a real number +percentage between 0.0001 and 100. .TP .BI "\-F" Fail faster. Do fewer checks. diff --git a/module/zfs/zio_inject.c b/module/zfs/zio_inject.c index 0e8e9d932107..4a4d431e33bc 100644 --- a/module/zfs/zio_inject.c +++ b/module/zfs/zio_inject.c @@ -21,6 +21,7 @@ /* * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. * Copyright (c) 2012, 2015 by Delphix. All rights reserved. + * Copyright (c) 2017, Intel Corporation. */ /* @@ -98,6 +99,26 @@ static kmutex_t inject_delay_mtx; */ static int inject_next_id = 1; +/* + * Test if the requested frequency was triggered + */ +static boolean_t +freq_triggered(uint32_t frequency) +{ + /* + * zero implies always (100%) + */ + if (frequency == 0) + return (B_TRUE); + + /* + * Note: we still handle legacy (unscaled) frequecy values + */ + uint32_t maximum = (frequency <= 100) ? 100 : ZI_PERCENTAGE_MAX; + + return (spa_get_random(maximum) < frequency); +} + /* * Returns true if the given record matches the I/O in progress. */ @@ -113,8 +134,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, record->zi_object == DMU_META_DNODE_OBJECT) { if (record->zi_type == DMU_OT_NONE || type == record->zi_type) - return (record->zi_freq == 0 || - spa_get_random(100) < record->zi_freq); + return (freq_triggered(record->zi_freq)); else return (B_FALSE); } @@ -128,8 +148,7 @@ zio_match_handler(zbookmark_phys_t *zb, uint64_t type, zb->zb_blkid >= record->zi_start && zb->zb_blkid <= record->zi_end && error == record->zi_error) - return (record->zi_freq == 0 || - spa_get_random(100) < record->zi_freq); + return (freq_triggered(record->zi_freq)); return (B_FALSE); } @@ -293,6 +312,12 @@ zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error) continue; if (handler->zi_record.zi_error == error) { + /* + * limit error injection if requested + */ + if (!freq_triggered(handler->zi_record.zi_freq)) + continue; + /* * For a failed open, pretend like the device * has gone away. @@ -466,10 +491,8 @@ zio_handle_io_delay(zio_t *zio) if (handler->zi_record.zi_cmd != ZINJECT_DELAY_IO) continue; - if (handler->zi_record.zi_freq != 0 && - spa_get_random(100) >= handler->zi_record.zi_freq) { + if (!freq_triggered(handler->zi_record.zi_freq)) continue; - } if (vd->vdev_guid != handler->zi_record.zi_guid) continue;