From d38677d23c2545b2d98c93ef13a6c9af9c2d28cc Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Wed, 14 Mar 2018 17:53:37 +0000 Subject: [PATCH] Create a sysctl kern.cam.{,a,n}da.X.invalidate kern.cam.{,a,n}da.X.invalidate=1 forces *daX to detach by calling cam_periph_invalidate on the underlying periph. This is for testing purposes only. Include only with options CAM_TEST_FAILURE and rename the former [AN]DA_TEST_FAILURE, and fix nda to compile with it set. We're using it at work to harden geom and the buffer cache to be resilient in the face of drive failure. Today, it far too often results in a panic. While much work was done on SIM initiated removal for the USB thumnb drive removal work, little has been done for periph initiated removal. This simulates what *daerror() does for some errors nicely: we get the same panics with it that we do with failing drives. Sponsored by: Netflix Differential Revision: https://reviews.freebsd.org/D14581 --- sys/cam/ata/ata_da.c | 10 +++++++--- sys/cam/cam_periph.c | 22 ++++++++++++++++++++++ sys/cam/cam_periph.h | 2 ++ sys/cam/nvme/nvme_da.c | 15 ++++++++++++++- sys/cam/scsi/scsi_da.c | 7 +++++++ sys/conf/NOTES | 1 + sys/conf/options | 2 +- 7 files changed, 54 insertions(+), 5 deletions(-) diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 1a55ff41df80..44ee7c46de02 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -238,7 +238,7 @@ struct ada_softc { int write_cache; int unmappedio; int rotating; -#ifdef ADA_TEST_FAILURE +#ifdef CAM_TEST_FAILURE int force_read_error; int force_write_error; int periodic_read_error; @@ -1475,7 +1475,7 @@ adasysctlinit(void *context, int pending) "max_seq_zones", CTLFLAG_RD, &softc->max_seq_zones, "Maximum Number of Open Sequential Write Required Zones"); -#ifdef ADA_TEST_FAILURE +#ifdef CAM_TEST_FAILURE /* * Add a 'door bell' sysctl which allows one to set it from userland * and cause something bad to happen. For the moment, we only allow @@ -1493,6 +1493,10 @@ adasysctlinit(void *context, int pending) OID_AUTO, "periodic_read_error", CTLFLAG_RW | CTLFLAG_MPSAFE, &softc->periodic_read_error, 0, "Force a read error every N reads (don't set too low)."); + SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, + periph, 0, cam_periph_invalidate_sysctl, "I", + "Write 1 to invalidate the drive immediately"); #endif #ifdef CAM_IO_STATS @@ -2293,7 +2297,7 @@ adastart(struct cam_periph *periph, union ccb *start_ccb) data_ptr = bp; } -#ifdef ADA_TEST_FAILURE +#ifdef CAM_TEST_FAILURE int fail = 0; /* diff --git a/sys/cam/cam_periph.c b/sys/cam/cam_periph.c index 1c8365655fcd..d7ea119f4d54 100644 --- a/sys/cam/cam_periph.c +++ b/sys/cam/cam_periph.c @@ -2066,3 +2066,25 @@ cam_periph_devctl_notify(union ccb *ccb) free(sbmsg, M_CAMPERIPH); } +/* + * Sysctl to force an invalidation of the drive right now. Can be + * called with CTLFLAG_MPSAFE since we take periph lock. + */ +int +cam_periph_invalidate_sysctl(SYSCTL_HANDLER_ARGS) +{ + struct cam_periph *periph; + int error, value; + + periph = arg1; + value = 0; + error = sysctl_handle_int(oidp, &value, 0, req); + if (error != 0 || req->newptr == NULL || value != 1) + return (error); + + cam_periph_lock(periph); + cam_periph_invalidate(periph); + cam_periph_unlock(periph); + + return (0); +} diff --git a/sys/cam/cam_periph.h b/sys/cam/cam_periph.h index 4694327be2fd..6eb0084a11ce 100644 --- a/sys/cam/cam_periph.h +++ b/sys/cam/cam_periph.h @@ -37,6 +37,7 @@ #include #ifdef _KERNEL +#include #include #include @@ -198,6 +199,7 @@ void cam_periph_freeze_after_event(struct cam_periph *periph, u_int duration_ms); int cam_periph_error(union ccb *ccb, cam_flags camflags, u_int32_t sense_flags); +int cam_periph_invalidate_sysctl(SYSCTL_HANDLER_ARGS); static __inline struct mtx * cam_periph_mtx(struct cam_periph *periph) diff --git a/sys/cam/nvme/nvme_da.c b/sys/cam/nvme/nvme_da.c index a041e9fe948a..d83f505f5fb1 100644 --- a/sys/cam/nvme/nvme_da.c +++ b/sys/cam/nvme/nvme_da.c @@ -112,6 +112,12 @@ struct nda_softc { struct task sysctl_task; struct sysctl_ctx_list sysctl_ctx; struct sysctl_oid *sysctl_tree; +#ifdef CAM_TEST_FAILURE + int force_read_error; + int force_write_error; + int periodic_read_error; + int periodic_read_count; +#endif #ifdef CAM_IO_STATS struct sysctl_ctx_list sysctl_stats_ctx; struct sysctl_oid *sysctl_stats_tree; @@ -666,6 +672,13 @@ ndasysctlinit(void *context, int pending) "Device pack invalidations."); #endif +#ifdef CAM_TEST_FAILURE + SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, + periph, 0, cam_periph_invalidate_sysctl, "I", + "Write 1 to invalidate the drive immediately"); +#endif + cam_iosched_sysctl_init(softc->cam_iosched, &softc->sysctl_ctx, softc->sysctl_tree); @@ -876,7 +889,7 @@ ndastart(struct cam_periph *periph, union ccb *start_ccb) /* FALLTHROUGH */ case BIO_READ: { -#ifdef NDA_TEST_FAILURE +#ifdef CAM_TEST_FAILURE int fail = 0; /* diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index f5be34f96e09..c6941990a8df 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -2203,6 +2203,13 @@ dasysctlinit(void *context, int pending) 0, "Rotating media"); +#ifdef CAM_TEST_FAILURE + SYSCTL_ADD_PROC(&softc->sysctl_ctx, SYSCTL_CHILDREN(softc->sysctl_tree), + OID_AUTO, "invalidate", CTLTYPE_U64 | CTLFLAG_RW | CTLFLAG_MPSAFE, + periph, 0, cam_periph_invalidate_sysctl, "I", + "Write 1 to invalidate the drive immediately"); +#endif + /* * Add some addressing info. */ diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 60905b952618..4f889024492d 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1356,6 +1356,7 @@ options SCSI_NO_SENSE_STRINGS options SCSI_NO_OP_STRINGS options SCSI_DELAY=5000 # Be pessimistic about Joe SCSI device options CAM_IOSCHED_DYNAMIC +options CAM_TEST_FAILURE # Options for the CAM CDROM driver: # CHANGER_MIN_BUSY_SECONDS: Guaranteed minimum time quantum for a changer LUN diff --git a/sys/conf/options b/sys/conf/options index e9cfa2fac5a6..15199168897d 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -335,12 +335,12 @@ CAM_DEBUG_LUN opt_cam.h CAM_DEBUG_FLAGS opt_cam.h CAM_BOOT_DELAY opt_cam.h CAM_IOSCHED_DYNAMIC opt_cam.h +CAM_TEST_FAILURE opt_cam.h SCSI_DELAY opt_scsi.h SCSI_NO_SENSE_STRINGS opt_scsi.h SCSI_NO_OP_STRINGS opt_scsi.h # Options used only in cam/ata/ata_da.c -ADA_TEST_FAILURE opt_ada.h ATA_STATIC_ID opt_ada.h # Options used only in cam/scsi/scsi_cd.c