Properly handle IO with B_FAILFAST
Retry IO once with ZIO_FLAG_TRYHARD before declaring a pool faulted OpenSolaris revision and Bug IDs: 9725:0bf7402e8022 6843014 ZFS B_FAILFAST handling is broken Approved by: delphij (mentor) Obtained from: OpenSolaris (Bug ID 6843014) MFC after: 3 weeks
This commit is contained in:
parent
96a1a6a568
commit
aa007a9f0e
@ -19,12 +19,10 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
#pragma ident "%Z%%M% %I% %E% SMI"
|
||||
|
||||
/*
|
||||
* ZFS Fault Injector
|
||||
*
|
||||
@ -227,7 +225,7 @@ usage(void)
|
||||
"\t\tClear the particular record (if given a numeric ID), or\n"
|
||||
"\t\tall records if 'all' is specificed.\n"
|
||||
"\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber>] pool\n"
|
||||
"\tzinject -d device [-e errno] [-L <nvlist|uber>] [-F] pool\n"
|
||||
"\t\tInject a fault into a particular device or the device's\n"
|
||||
"\t\tlabel. Label injection can either be 'nvlist' or 'uber'.\n"
|
||||
"\t\t'errno' can either be 'nxio' (the default) or 'io'.\n"
|
||||
@ -519,7 +517,7 @@ main(int argc, char **argv)
|
||||
return (0);
|
||||
}
|
||||
|
||||
while ((c = getopt(argc, argv, ":ab:d:f:qhc:t:l:mr:e:uL:")) != -1) {
|
||||
while ((c = getopt(argc, argv, ":ab:d:f:Fqhc:t:l:mr:e:uL:")) != -1) {
|
||||
switch (c) {
|
||||
case 'a':
|
||||
flags |= ZINJECT_FLUSH_ARC;
|
||||
@ -556,6 +554,9 @@ main(int argc, char **argv)
|
||||
return (1);
|
||||
}
|
||||
break;
|
||||
case 'F':
|
||||
record.zi_failfast = B_TRUE;
|
||||
break;
|
||||
case 'h':
|
||||
usage();
|
||||
return (0);
|
||||
|
@ -4252,10 +4252,16 @@ spa_sync(spa_t *spa, uint64_t txg)
|
||||
if (svdcount == SPA_DVAS_PER_BP)
|
||||
break;
|
||||
}
|
||||
error = vdev_config_sync(svd, svdcount, txg);
|
||||
error = vdev_config_sync(svd, svdcount, txg, B_FALSE);
|
||||
if (error != 0)
|
||||
error = vdev_config_sync(svd, svdcount, txg,
|
||||
B_TRUE);
|
||||
} else {
|
||||
error = vdev_config_sync(rvd->vdev_child,
|
||||
rvd->vdev_children, txg);
|
||||
rvd->vdev_children, txg, B_FALSE);
|
||||
if (error != 0)
|
||||
error = vdev_config_sync(rvd->vdev_child,
|
||||
rvd->vdev_children, txg, B_TRUE);
|
||||
}
|
||||
|
||||
spa_config_exit(spa, SCL_STATE, FTAG);
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -113,7 +113,8 @@ extern void vdev_queue_io_done(zio_t *zio);
|
||||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg);
|
||||
extern int vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg,
|
||||
boolean_t);
|
||||
|
||||
extern void vdev_state_dirty(vdev_t *vd);
|
||||
extern void vdev_state_clean(vdev_t *vd);
|
||||
|
@ -118,7 +118,7 @@ typedef struct zinject_record {
|
||||
uint32_t zi_error;
|
||||
uint64_t zi_type;
|
||||
uint32_t zi_freq;
|
||||
uint32_t zi_pad; /* pad out to 64 bit alignment */
|
||||
uint32_t zi_failfast;
|
||||
} zinject_record_t;
|
||||
|
||||
#define ZINJECT_NULL 0x1
|
||||
|
@ -117,31 +117,33 @@ enum zio_compress {
|
||||
#define ZIO_PRIORITY_SCRUB (zio_priority_table[10])
|
||||
#define ZIO_PRIORITY_TABLE_SIZE 11
|
||||
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x00000
|
||||
#define ZIO_FLAG_CANFAIL 0x00001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x00002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x00004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x00008
|
||||
#define ZIO_FLAG_MUSTSUCCEED 0x000000
|
||||
#define ZIO_FLAG_CANFAIL 0x000001
|
||||
#define ZIO_FLAG_SPECULATIVE 0x000002
|
||||
#define ZIO_FLAG_CONFIG_WRITER 0x000004
|
||||
#define ZIO_FLAG_DONT_RETRY 0x000008
|
||||
|
||||
#define ZIO_FLAG_DONT_CACHE 0x00010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x00020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x00040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x00080
|
||||
#define ZIO_FLAG_DONT_CACHE 0x000010
|
||||
#define ZIO_FLAG_DONT_QUEUE 0x000020
|
||||
#define ZIO_FLAG_DONT_AGGREGATE 0x000040
|
||||
#define ZIO_FLAG_DONT_PROPAGATE 0x000080
|
||||
|
||||
#define ZIO_FLAG_IO_BYPASS 0x00100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x00200
|
||||
#define ZIO_FLAG_IO_RETRY 0x00400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x00800
|
||||
#define ZIO_FLAG_IO_BYPASS 0x000100
|
||||
#define ZIO_FLAG_IO_REPAIR 0x000200
|
||||
#define ZIO_FLAG_IO_RETRY 0x000400
|
||||
#define ZIO_FLAG_IO_REWRITE 0x000800
|
||||
|
||||
#define ZIO_FLAG_SELF_HEAL 0x01000
|
||||
#define ZIO_FLAG_RESILVER 0x02000
|
||||
#define ZIO_FLAG_SCRUB 0x04000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x08000
|
||||
#define ZIO_FLAG_SELF_HEAL 0x001000
|
||||
#define ZIO_FLAG_RESILVER 0x002000
|
||||
#define ZIO_FLAG_SCRUB 0x004000
|
||||
#define ZIO_FLAG_SCRUB_THREAD 0x008000
|
||||
|
||||
#define ZIO_FLAG_PROBE 0x10000
|
||||
#define ZIO_FLAG_GANG_CHILD 0x20000
|
||||
#define ZIO_FLAG_RAW 0x40000
|
||||
#define ZIO_FLAG_GODFATHER 0x80000
|
||||
#define ZIO_FLAG_PROBE 0x010000
|
||||
#define ZIO_FLAG_GANG_CHILD 0x020000
|
||||
#define ZIO_FLAG_RAW 0x040000
|
||||
#define ZIO_FLAG_GODFATHER 0x080000
|
||||
|
||||
#define ZIO_FLAG_TRYHARD 0x100000
|
||||
|
||||
#define ZIO_FLAG_GANG_INHERIT \
|
||||
(ZIO_FLAG_CANFAIL | \
|
||||
@ -159,7 +161,8 @@ enum zio_compress {
|
||||
(ZIO_FLAG_GANG_INHERIT | \
|
||||
ZIO_FLAG_IO_REPAIR | \
|
||||
ZIO_FLAG_IO_RETRY | \
|
||||
ZIO_FLAG_PROBE)
|
||||
ZIO_FLAG_PROBE | \
|
||||
ZIO_FLAG_TRYHARD)
|
||||
|
||||
#define ZIO_FLAG_AGG_INHERIT \
|
||||
(ZIO_FLAG_DONT_AGGREGATE | \
|
||||
@ -440,7 +443,7 @@ extern int zio_inject_list_next(int *id, char *name, size_t buflen,
|
||||
struct zinject_record *record);
|
||||
extern int zio_clear_fault(int id);
|
||||
extern int zio_handle_fault_injection(zio_t *zio, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, int error);
|
||||
extern int zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error);
|
||||
extern int zio_handle_label_injection(zio_t *zio, int error);
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
@ -928,7 +928,7 @@ vdev_probe(vdev_t *vd, zio_t *zio)
|
||||
|
||||
vps->vps_flags = ZIO_FLAG_CANFAIL | ZIO_FLAG_PROBE |
|
||||
ZIO_FLAG_DONT_CACHE | ZIO_FLAG_DONT_AGGREGATE |
|
||||
ZIO_FLAG_DONT_RETRY;
|
||||
ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (spa_config_held(spa, SCL_ZIO, RW_WRITER)) {
|
||||
/*
|
||||
@ -1025,7 +1025,7 @@ vdev_open(vdev_t *vd)
|
||||
error = vd->vdev_ops->vdev_op_open(vd, &osize, &ashift);
|
||||
|
||||
if (zio_injection_enabled && error == 0)
|
||||
error = zio_handle_device_injection(vd, ENXIO);
|
||||
error = zio_handle_device_injection(vd, NULL, ENXIO);
|
||||
|
||||
if (error) {
|
||||
if (vd->vdev_removed &&
|
||||
@ -2207,6 +2207,16 @@ vdev_stat_update(zio_t *zio, uint64_t psize)
|
||||
if (flags & ZIO_FLAG_SPECULATIVE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this is an I/O error that is going to be retried, then ignore the
|
||||
* error. Otherwise, the user may interpret B_FAILFAST I/O errors as
|
||||
* hard errors, when in reality they can happen for any number of
|
||||
* innocuous reasons (bus resets, MPxIO link failure, etc).
|
||||
*/
|
||||
if (zio->io_error == EIO &&
|
||||
!(zio->io_flags & ZIO_FLAG_IO_RETRY))
|
||||
return;
|
||||
|
||||
mutex_enter(&vd->vdev_stat_lock);
|
||||
if (type == ZIO_TYPE_READ && !vdev_is_dead(vd)) {
|
||||
if (zio->io_error == ECKSUM)
|
||||
|
@ -401,8 +401,9 @@ vdev_disk_io_start(zio_t *zio)
|
||||
|
||||
bioinit(bp);
|
||||
bp->b_flags = B_BUSY | B_NOCACHE |
|
||||
(zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE) |
|
||||
((zio->io_flags & ZIO_FLAG_IO_RETRY) ? 0 : B_FAILFAST);
|
||||
(zio->io_type == ZIO_TYPE_READ ? B_READ : B_WRITE);
|
||||
if (!(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
|
||||
bp->b_flags |= B_FAILFAST;
|
||||
bp->b_bcount = zio->io_size;
|
||||
bp->b_un.b_addr = zio->io_data;
|
||||
bp->b_lblkno = lbtodb(zio->io_offset);
|
||||
|
@ -339,8 +339,8 @@ vdev_label_read_config(vdev_t *vd)
|
||||
nvlist_t *config = NULL;
|
||||
vdev_phys_t *vp;
|
||||
zio_t *zio;
|
||||
int flags =
|
||||
ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE;
|
||||
|
||||
ASSERT(spa_config_held(spa, SCL_STATE_ALL, RW_WRITER) == SCL_STATE_ALL);
|
||||
|
||||
@ -349,6 +349,7 @@ vdev_label_read_config(vdev_t *vd)
|
||||
|
||||
vp = zio_buf_alloc(sizeof (vdev_phys_t));
|
||||
|
||||
retry:
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
@ -368,6 +369,11 @@ vdev_label_read_config(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
if (config == NULL && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
zio_buf_free(vp, sizeof (vdev_phys_t));
|
||||
|
||||
return (config);
|
||||
@ -648,6 +654,7 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
||||
/*
|
||||
* Write everything in parallel.
|
||||
*/
|
||||
retry:
|
||||
zio = zio_root(spa, NULL, NULL, flags);
|
||||
|
||||
for (int l = 0; l < VDEV_LABELS; l++) {
|
||||
@ -674,6 +681,11 @@ vdev_label_init(vdev_t *vd, uint64_t crtxg, vdev_labeltype_t reason)
|
||||
|
||||
error = zio_wait(zio);
|
||||
|
||||
if (error != 0 && !(flags & ZIO_FLAG_TRYHARD)) {
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
goto retry;
|
||||
}
|
||||
|
||||
nvlist_free(label);
|
||||
zio_buf_free(pad2, VDEV_PAD_SIZE);
|
||||
zio_buf_free(ub, VDEV_UBERBLOCK_SIZE(vd));
|
||||
@ -760,8 +772,8 @@ vdev_uberblock_load(zio_t *zio, vdev_t *vd, uberblock_t *ubbest)
|
||||
{
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
vdev_t *rvd = spa->spa_root_vdev;
|
||||
int flags =
|
||||
ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL | ZIO_FLAG_SPECULATIVE;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL |
|
||||
ZIO_FLAG_SPECULATIVE | ZIO_FLAG_TRYHARD;
|
||||
|
||||
if (vd == rvd) {
|
||||
ASSERT(zio == NULL);
|
||||
@ -999,7 +1011,7 @@ vdev_label_sync_list(spa_t *spa, int l, uint64_t txg, int flags)
|
||||
* at any time, you can just call it again, and it will resume its work.
|
||||
*/
|
||||
int
|
||||
vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
|
||||
vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg, boolean_t tryhard)
|
||||
{
|
||||
spa_t *spa = svd[0]->vdev_spa;
|
||||
uberblock_t *ub = &spa->spa_uberblock;
|
||||
@ -1008,6 +1020,16 @@ vdev_config_sync(vdev_t **svd, int svdcount, uint64_t txg)
|
||||
int error;
|
||||
int flags = ZIO_FLAG_CONFIG_WRITER | ZIO_FLAG_CANFAIL;
|
||||
|
||||
/*
|
||||
* Normally, we don't want to try too hard to write every label and
|
||||
* uberblock. If there is a flaky disk, we don't want the rest of the
|
||||
* sync process to block while we retry. But if we can't write a
|
||||
* single label out, we should retry with ZIO_FLAG_TRYHARD before
|
||||
* bailing out and declaring the pool faulted.
|
||||
*/
|
||||
if (tryhard)
|
||||
flags |= ZIO_FLAG_TRYHARD;
|
||||
|
||||
ASSERT(ub->ub_txg <= txg);
|
||||
|
||||
/*
|
||||
|
@ -134,6 +134,15 @@ zfs_ereport_post(const char *subclass, spa_t *spa, vdev_t *vd, zio_t *zio,
|
||||
if (zio->io_flags & ZIO_FLAG_SPECULATIVE)
|
||||
return;
|
||||
|
||||
/*
|
||||
* If this I/O is not a retry I/O, don't post an ereport.
|
||||
* Otherwise, we risk making bad diagnoses based on B_FAILFAST
|
||||
* I/Os.
|
||||
*/
|
||||
if (zio->io_error == EIO &&
|
||||
!(zio->io_flags & ZIO_FLAG_IO_RETRY))
|
||||
return;
|
||||
|
||||
if (vd != NULL) {
|
||||
/*
|
||||
* If the vdev has already been marked as failing due
|
||||
|
@ -1870,7 +1870,8 @@ zio_vdev_io_done(zio_t *zio)
|
||||
vdev_cache_write(zio);
|
||||
|
||||
if (zio_injection_enabled && zio->io_error == 0)
|
||||
zio->io_error = zio_handle_device_injection(vd, EIO);
|
||||
zio->io_error = zio_handle_device_injection(vd,
|
||||
zio, EIO);
|
||||
|
||||
if (zio_injection_enabled && zio->io_error == 0)
|
||||
zio->io_error = zio_handle_label_injection(zio, EIO);
|
||||
|
@ -19,7 +19,7 @@
|
||||
* CDDL HEADER END
|
||||
*/
|
||||
/*
|
||||
* Copyright 2008 Sun Microsystems, Inc. All rights reserved.
|
||||
* Copyright 2009 Sun Microsystems, Inc. All rights reserved.
|
||||
* Use is subject to license terms.
|
||||
*/
|
||||
|
||||
@ -195,7 +195,7 @@ zio_handle_label_injection(zio_t *zio, int error)
|
||||
|
||||
|
||||
int
|
||||
zio_handle_device_injection(vdev_t *vd, int error)
|
||||
zio_handle_device_injection(vdev_t *vd, zio_t *zio, int error)
|
||||
{
|
||||
inject_handler_t *handler;
|
||||
int ret = 0;
|
||||
@ -210,6 +210,12 @@ zio_handle_device_injection(vdev_t *vd, int error)
|
||||
continue;
|
||||
|
||||
if (vd->vdev_guid == handler->zi_record.zi_guid) {
|
||||
if (handler->zi_record.zi_failfast &&
|
||||
(zio == NULL || (zio->io_flags &
|
||||
(ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (handler->zi_record.zi_error == error) {
|
||||
/*
|
||||
* For a failed open, pretend like the device
|
||||
|
Loading…
x
Reference in New Issue
Block a user