Allow to control failfast
Linux defaults to setting "failfast" on BIOs, so that the OS will not retry IOs that fail, and instead report the error to ZFS. In some cases, such as errors reported by the HBA driver, not the device itself, we would wish to retry rather than generating vdev errors in ZFS. This new property allows that. This introduces a per vdev option to disable the failfast option. This also introduces a global module parameter to define the failfast mask value. Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov> Co-authored-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Allan Jude <allan@klarasystems.com> Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com> Sponsored-by: Seagate Technology LLC Submitted-by: Klara, Inc. Closes #14056
This commit is contained in:
parent
945b407486
commit
16f0fdaddd
@ -126,7 +126,8 @@ typedef int bvec_iterator_t;
|
||||
#endif
|
||||
|
||||
static inline void
|
||||
bio_set_flags_failfast(struct block_device *bdev, int *flags)
|
||||
bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev,
|
||||
bool transport, bool driver)
|
||||
{
|
||||
#ifdef CONFIG_BUG
|
||||
/*
|
||||
@ -148,7 +149,12 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
|
||||
#endif /* BLOCK_EXT_MAJOR */
|
||||
#endif /* CONFIG_BUG */
|
||||
|
||||
*flags |= REQ_FAILFAST_MASK;
|
||||
if (dev)
|
||||
*flags |= REQ_FAILFAST_DEV;
|
||||
if (transport)
|
||||
*flags |= REQ_FAILFAST_TRANSPORT;
|
||||
if (driver)
|
||||
*flags |= REQ_FAILFAST_DRIVER;
|
||||
}
|
||||
|
||||
/*
|
||||
|
@ -355,6 +355,7 @@ typedef enum {
|
||||
VDEV_PROP_BYTES_TRIM,
|
||||
VDEV_PROP_REMOVING,
|
||||
VDEV_PROP_ALLOCATING,
|
||||
VDEV_PROP_FAILFAST,
|
||||
VDEV_NUM_PROPS
|
||||
} vdev_prop_t;
|
||||
|
||||
|
@ -299,6 +299,7 @@ struct vdev {
|
||||
uint64_t vdev_islog; /* is an intent log device */
|
||||
uint64_t vdev_noalloc; /* device is passivated? */
|
||||
uint64_t vdev_removing; /* device is being removed? */
|
||||
uint64_t vdev_failfast; /* device failfast setting */
|
||||
boolean_t vdev_ishole; /* is a hole in the namespace */
|
||||
uint64_t vdev_top_zap;
|
||||
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */
|
||||
|
@ -3214,7 +3214,8 @@
|
||||
<enumerator name='VDEV_PROP_BYTES_TRIM' value='38'/>
|
||||
<enumerator name='VDEV_PROP_REMOVING' value='39'/>
|
||||
<enumerator name='VDEV_PROP_ALLOCATING' value='40'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='41'/>
|
||||
<enumerator name='VDEV_PROP_FAILFAST' value='41'/>
|
||||
<enumerator name='VDEV_NUM_PROPS' value='42'/>
|
||||
</enum-decl>
|
||||
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
|
||||
<enum-decl name='vdev_state' id='21566197'>
|
||||
|
@ -15,7 +15,7 @@
|
||||
.\" own identifying information:
|
||||
.\" Portions Copyright [yyyy] [name of copyright owner]
|
||||
.\"
|
||||
.Dd November 7, 2022
|
||||
.Dd November 9, 2022
|
||||
.Dt ZFS 4
|
||||
.Os
|
||||
.
|
||||
@ -1345,6 +1345,19 @@ as fuller devices will tend to be slower than empty devices.
|
||||
Also see
|
||||
.Sy zio_dva_throttle_enabled .
|
||||
.
|
||||
.It Sy zfs_vdev_failfast_mask Ns = Ns Sy 1 Pq uint
|
||||
Defines if the driver should retire on a given error type.
|
||||
The following options may be bitwise-ored together:
|
||||
.TS
|
||||
box;
|
||||
lbz r l l .
|
||||
Value Name Description
|
||||
_
|
||||
1 Device No driver retries on device errors
|
||||
2 Transport No driver retries on transport errors.
|
||||
4 Driver No driver retries on driver errors.
|
||||
.TE
|
||||
.
|
||||
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
|
||||
Time before expiring
|
||||
.Pa .zfs/snapshot .
|
||||
@ -1364,7 +1377,7 @@ The following flags may be bitwise-ored together:
|
||||
.TS
|
||||
box;
|
||||
lbz r l l .
|
||||
Value Symbolic Name Description
|
||||
Value Name Description
|
||||
_
|
||||
1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log.
|
||||
* 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications.
|
||||
|
@ -20,7 +20,7 @@
|
||||
.\"
|
||||
.\" Copyright (c) 2021 Klara, Inc.
|
||||
.\"
|
||||
.Dd November 27, 2021
|
||||
.Dd October 30, 2022
|
||||
.Dt VDEVPROPS 7
|
||||
.Os
|
||||
.
|
||||
@ -121,6 +121,9 @@ dataset.
|
||||
A text comment up to 8192 characters long
|
||||
.It Sy bootsize
|
||||
The amount of space to reserve for the EFI system partition
|
||||
.It Sy failfast
|
||||
If this device should propage BIO errors back to ZFS, used to disable
|
||||
failfast.
|
||||
.It Sy path
|
||||
The path to the device for this vdev
|
||||
.It Sy allocating
|
||||
|
@ -74,6 +74,12 @@ typedef struct dio_request {
|
||||
struct bio *dr_bio[0]; /* Attached bio's */
|
||||
} dio_request_t;
|
||||
|
||||
/*
|
||||
* BIO request failfast mask.
|
||||
*/
|
||||
|
||||
static unsigned int zfs_vdev_failfast_mask = 1;
|
||||
|
||||
static fmode_t
|
||||
vdev_bdev_mode(spa_mode_t spa_mode)
|
||||
{
|
||||
@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
|
||||
retry:
|
||||
dr = vdev_disk_dio_alloc(bio_count);
|
||||
|
||||
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
|
||||
bio_set_flags_failfast(bdev, &flags);
|
||||
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
|
||||
zio->io_vd->vdev_failfast == B_TRUE) {
|
||||
bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
|
||||
zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
|
||||
}
|
||||
|
||||
dr->dr_zio = zio;
|
||||
|
||||
@ -1045,3 +1054,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
|
||||
"Timeout before determining that a device is missing");
|
||||
|
||||
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
|
||||
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");
|
||||
|
@ -420,6 +420,9 @@ vdev_prop_init(void)
|
||||
boolean_na_table, sfeatures);
|
||||
|
||||
/* default index properties */
|
||||
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
|
||||
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
|
||||
sfeatures);
|
||||
|
||||
/* hidden properties */
|
||||
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,
|
||||
|
@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
|
||||
}
|
||||
}
|
||||
|
||||
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
|
||||
spa_t *spa = vd->vdev_spa;
|
||||
uint64_t failfast;
|
||||
|
||||
error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
|
||||
vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
|
||||
1, &failfast);
|
||||
if (error == 0) {
|
||||
vd->vdev_failfast = failfast & 1;
|
||||
} else if (error == ENOENT) {
|
||||
vd->vdev_failfast = vdev_prop_default_numeric(
|
||||
VDEV_PROP_FAILFAST);
|
||||
} else {
|
||||
vdev_dbgmsg(vd,
|
||||
"vdev_load: zap_lookup(top_zap=%llu) "
|
||||
"failed [error=%d]",
|
||||
(u_longlong_t)vd->vdev_top_zap, error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Load any rebuild state from the top-level vdev zap.
|
||||
*/
|
||||
@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
else
|
||||
error = spa_vdev_alloc(spa, vdev_guid);
|
||||
break;
|
||||
case VDEV_PROP_FAILFAST:
|
||||
if (nvpair_value_uint64(elem, &intval) != 0) {
|
||||
error = EINVAL;
|
||||
break;
|
||||
}
|
||||
vd->vdev_failfast = intval & 1;
|
||||
break;
|
||||
default:
|
||||
/* Most processing is done in vdev_props_set_sync */
|
||||
break;
|
||||
@ -6019,6 +6046,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
|
||||
intval = ZPROP_BOOLEAN_NA;
|
||||
}
|
||||
|
||||
vdev_prop_add_list(outnvl, propname, strval,
|
||||
intval, src);
|
||||
break;
|
||||
case VDEV_PROP_FAILFAST:
|
||||
src = ZPROP_SRC_LOCAL;
|
||||
strval = NULL;
|
||||
|
||||
err = zap_lookup(mos, objid, nvpair_name(elem),
|
||||
sizeof (uint64_t), 1, &intval);
|
||||
if (err == ENOENT) {
|
||||
intval = vdev_prop_default_numeric(
|
||||
prop);
|
||||
err = 0;
|
||||
} else if (err) {
|
||||
break;
|
||||
}
|
||||
if (intval == vdev_prop_default_numeric(prop))
|
||||
src = ZPROP_SRC_DEFAULT;
|
||||
|
||||
vdev_prop_add_list(outnvl, propname, strval,
|
||||
intval, src);
|
||||
break;
|
||||
|
Loading…
Reference in New Issue
Block a user