Allow to control failfast

Linux defaults to setting "failfast" on BIOs, so that the OS will not
retry IOs that fail, and instead report the error to ZFS.

In some cases, such as errors reported by the HBA driver, not
the device itself, we would wish to retry rather than generating
vdev errors in ZFS. This new property allows that.

This introduces a per vdev option to disable the failfast option.
This also introduces a global module parameter to define the failfast
mask value.

Reviewed-by: Brian Behlendorf <behlendorf1@llnl.gov>
Co-authored-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Allan Jude <allan@klarasystems.com>
Signed-off-by: Mariusz Zaborski <mariusz.zaborski@klarasystems.com>
Sponsored-by: Seagate Technology LLC
Submitted-by: Klara, Inc.
Closes #14056
This commit is contained in:
Mariusz Zaborski 2022-11-10 22:37:12 +01:00 committed by GitHub
parent 945b407486
commit 16f0fdaddd
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
9 changed files with 94 additions and 8 deletions

View File

@ -126,7 +126,8 @@ typedef int bvec_iterator_t;
#endif
static inline void
bio_set_flags_failfast(struct block_device *bdev, int *flags)
bio_set_flags_failfast(struct block_device *bdev, int *flags, bool dev,
bool transport, bool driver)
{
#ifdef CONFIG_BUG
/*
@ -148,7 +149,12 @@ bio_set_flags_failfast(struct block_device *bdev, int *flags)
#endif /* BLOCK_EXT_MAJOR */
#endif /* CONFIG_BUG */
*flags |= REQ_FAILFAST_MASK;
if (dev)
*flags |= REQ_FAILFAST_DEV;
if (transport)
*flags |= REQ_FAILFAST_TRANSPORT;
if (driver)
*flags |= REQ_FAILFAST_DRIVER;
}
/*

View File

@ -355,6 +355,7 @@ typedef enum {
VDEV_PROP_BYTES_TRIM,
VDEV_PROP_REMOVING,
VDEV_PROP_ALLOCATING,
VDEV_PROP_FAILFAST,
VDEV_NUM_PROPS
} vdev_prop_t;

View File

@ -299,6 +299,7 @@ struct vdev {
uint64_t vdev_islog; /* is an intent log device */
uint64_t vdev_noalloc; /* device is passivated? */
uint64_t vdev_removing; /* device is being removed? */
uint64_t vdev_failfast; /* device failfast setting */
boolean_t vdev_ishole; /* is a hole in the namespace */
uint64_t vdev_top_zap;
vdev_alloc_bias_t vdev_alloc_bias; /* metaslab allocation bias */

View File

@ -3214,7 +3214,8 @@
<enumerator name='VDEV_PROP_BYTES_TRIM' value='38'/>
<enumerator name='VDEV_PROP_REMOVING' value='39'/>
<enumerator name='VDEV_PROP_ALLOCATING' value='40'/>
<enumerator name='VDEV_NUM_PROPS' value='41'/>
<enumerator name='VDEV_PROP_FAILFAST' value='41'/>
<enumerator name='VDEV_NUM_PROPS' value='42'/>
</enum-decl>
<typedef-decl name='vdev_prop_t' type-id='1573bec8' id='5aa5c90c'/>
<enum-decl name='vdev_state' id='21566197'>

View File

@ -15,7 +15,7 @@
.\" own identifying information:
.\" Portions Copyright [yyyy] [name of copyright owner]
.\"
.Dd November 7, 2022
.Dd November 9, 2022
.Dt ZFS 4
.Os
.
@ -1345,6 +1345,19 @@ as fuller devices will tend to be slower than empty devices.
Also see
.Sy zio_dva_throttle_enabled .
.
.It Sy zfs_vdev_failfast_mask Ns = Ns Sy 1 Pq uint
Defines if the driver should retire on a given error type.
The following options may be bitwise-ored together:
.TS
box;
lbz r l l .
Value Name Description
_
1 Device No driver retries on device errors
2 Transport No driver retries on transport errors.
4 Driver No driver retries on driver errors.
.TE
.
.It Sy zfs_expire_snapshot Ns = Ns Sy 300 Ns s Pq int
Time before expiring
.Pa .zfs/snapshot .
@ -1364,7 +1377,7 @@ The following flags may be bitwise-ored together:
.TS
box;
lbz r l l .
Value Symbolic Name Description
Value Name Description
_
1 ZFS_DEBUG_DPRINTF Enable dprintf entries in the debug log.
* 2 ZFS_DEBUG_DBUF_VERIFY Enable extra dbuf verifications.

View File

@ -20,7 +20,7 @@
.\"
.\" Copyright (c) 2021 Klara, Inc.
.\"
.Dd November 27, 2021
.Dd October 30, 2022
.Dt VDEVPROPS 7
.Os
.
@ -121,6 +121,9 @@ dataset.
A text comment up to 8192 characters long
.It Sy bootsize
The amount of space to reserve for the EFI system partition
.It Sy failfast
If this device should propage BIO errors back to ZFS, used to disable
failfast.
.It Sy path
The path to the device for this vdev
.It Sy allocating

View File

@ -74,6 +74,12 @@ typedef struct dio_request {
struct bio *dr_bio[0]; /* Attached bio's */
} dio_request_t;
/*
* BIO request failfast mask.
*/
static unsigned int zfs_vdev_failfast_mask = 1;
static fmode_t
vdev_bdev_mode(spa_mode_t spa_mode)
{
@ -659,8 +665,11 @@ __vdev_disk_physio(struct block_device *bdev, zio_t *zio,
retry:
dr = vdev_disk_dio_alloc(bio_count);
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)))
bio_set_flags_failfast(bdev, &flags);
if (zio && !(zio->io_flags & (ZIO_FLAG_IO_RETRY | ZIO_FLAG_TRYHARD)) &&
zio->io_vd->vdev_failfast == B_TRUE) {
bio_set_flags_failfast(bdev, &flags, zfs_vdev_failfast_mask & 1,
zfs_vdev_failfast_mask & 2, zfs_vdev_failfast_mask & 4);
}
dr->dr_zio = zio;
@ -1045,3 +1054,6 @@ param_set_max_auto_ashift(const char *buf, zfs_kernel_param_t *kp)
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, open_timeout_ms, UINT, ZMOD_RW,
"Timeout before determining that a device is missing");
ZFS_MODULE_PARAM(zfs_vdev, zfs_vdev_, failfast_mask, UINT, ZMOD_RW,
"Defines failfast mask: 1 - device, 2 - transport, 4 - driver");

View File

@ -420,6 +420,9 @@ vdev_prop_init(void)
boolean_na_table, sfeatures);
/* default index properties */
zprop_register_index(VDEV_PROP_FAILFAST, "failfast", B_TRUE,
PROP_DEFAULT, ZFS_TYPE_VDEV, "on | off", "FAILFAST", boolean_table,
sfeatures);
/* hidden properties */
zprop_register_hidden(VDEV_PROP_NAME, "name", PROP_TYPE_STRING,

View File

@ -3563,6 +3563,26 @@ vdev_load(vdev_t *vd)
}
}
if (vd == vd->vdev_top && vd->vdev_top_zap != 0) {
spa_t *spa = vd->vdev_spa;
uint64_t failfast;
error = zap_lookup(spa->spa_meta_objset, vd->vdev_top_zap,
vdev_prop_to_name(VDEV_PROP_FAILFAST), sizeof (failfast),
1, &failfast);
if (error == 0) {
vd->vdev_failfast = failfast & 1;
} else if (error == ENOENT) {
vd->vdev_failfast = vdev_prop_default_numeric(
VDEV_PROP_FAILFAST);
} else {
vdev_dbgmsg(vd,
"vdev_load: zap_lookup(top_zap=%llu) "
"failed [error=%d]",
(u_longlong_t)vd->vdev_top_zap, error);
}
}
/*
* Load any rebuild state from the top-level vdev zap.
*/
@ -5709,6 +5729,13 @@ vdev_prop_set(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
else
error = spa_vdev_alloc(spa, vdev_guid);
break;
case VDEV_PROP_FAILFAST:
if (nvpair_value_uint64(elem, &intval) != 0) {
error = EINVAL;
break;
}
vd->vdev_failfast = intval & 1;
break;
default:
/* Most processing is done in vdev_props_set_sync */
break;
@ -6019,6 +6046,25 @@ vdev_prop_get(vdev_t *vd, nvlist_t *innvl, nvlist_t *outnvl)
intval = ZPROP_BOOLEAN_NA;
}
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;
case VDEV_PROP_FAILFAST:
src = ZPROP_SRC_LOCAL;
strval = NULL;
err = zap_lookup(mos, objid, nvpair_name(elem),
sizeof (uint64_t), 1, &intval);
if (err == ENOENT) {
intval = vdev_prop_default_numeric(
prop);
err = 0;
} else if (err) {
break;
}
if (intval == vdev_prop_default_numeric(prop))
src = ZPROP_SRC_DEFAULT;
vdev_prop_add_list(outnvl, propname, strval,
intval, src);
break;