From 5ca679e3c447f6d03c2457e6e072b10eb6ffdb48 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Fri, 8 Mar 2019 21:13:45 +0000 Subject: [PATCH] MFV/ZoL: Disable LBA weighting on files and SSDs The LBA weighting makes sense on rotational media where the outer tracks have twice the bandwidth of the inner tracks. However, it is detrimental on nonrotational media such as solid state disks, where the only effect is to ensure that metaslabs enter the best-fit allocation behavior sooner, which is detrimental to performance. It also makes no sense on files where the underlying filesystem can arrange things however it wants. Author: Richard Yao Signed-off-by: Richard Yao Signed-off-by: Brian Behlendorf Closes #3712 zfsonlinux/zfs@fb40095f5f0853946f8150481ca22602d1334dfe To reduce code divergence this merge replaces equivalent but different FreeBSD code detecting non-rotating medium vdevs. MFC after: 1 month --- .../contrib/opensolaris/uts/common/fs/zfs/metaslab.c | 2 +- .../opensolaris/uts/common/fs/zfs/sys/vdev_impl.h | 4 +--- sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c | 9 ++++++++- .../contrib/opensolaris/uts/common/fs/zfs/vdev_file.c | 3 +++ .../contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c | 6 +++--- .../contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c | 2 +- .../contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c | 2 +- 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c index 7e98ad65046e..fcb1f3487b31 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/metaslab.c @@ -1830,7 +1830,7 @@ metaslab_space_weight(metaslab_t *msp) * In effect, this means that we'll select the metaslab with the most * free bandwidth rather than simply the one with the most free space. */ - if (metaslab_lba_weighting_enabled) { + if (!vd->vdev_nonrot && metaslab_lba_weighting_enabled) { weight = 2 * weight - (msp->ms_id * weight) / vd->vdev_ms_count; ASSERT(weight >= space && weight <= 2 * space); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h index 4a3af854d465..476b2b18b472 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/sys/vdev_impl.h @@ -235,6 +235,7 @@ struct vdev { vdev_stat_t vdev_stat; /* virtual device statistics */ boolean_t vdev_expanding; /* expand the vdev? */ boolean_t vdev_reopening; /* reopen in progress? */ + boolean_t vdev_nonrot; /* true if solid state */ int vdev_open_error; /* error on last open */ kthread_t *vdev_open_thread; /* thread opening children */ uint64_t vdev_crtxg; /* txg when top-level was added */ @@ -372,9 +373,6 @@ struct vdev { zio_t *vdev_probe_zio; /* root of current probe */ vdev_aux_t vdev_label_aux; /* on-disk aux state */ struct trim_map *vdev_trimmap; /* map on outstanding trims */ - uint16_t vdev_rotation_rate; /* rotational rate of the media */ -#define VDEV_RATE_UNKNOWN 0 -#define VDEV_RATE_NON_ROTATING 1 uint64_t vdev_leaf_zap; /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c index 67f26e0cb027..2f210cebaedc 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev.c @@ -1476,15 +1476,19 @@ vdev_open_children(vdev_t *vd) taskq_t *tq; int children = vd->vdev_children; + vd->vdev_nonrot = B_TRUE; + /* * in order to handle pools on top of zvols, do the opens * in a single thread so that the same thread holds the * spa_namespace_lock */ if (B_TRUE || vdev_uses_zvols(vd)) { - for (int c = 0; c < children; c++) + for (int c = 0; c < children; c++) { vd->vdev_child[c]->vdev_open_error = vdev_open(vd->vdev_child[c]); + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; + } return; } tq = taskq_create("vdev_open", children, minclsyspri, @@ -1495,6 +1499,9 @@ vdev_open_children(vdev_t *vd) TQ_SLEEP) != 0); taskq_destroy(tq); + + for (int c = 0; c < children; c++) + vd->vdev_nonrot &= vd->vdev_child[c]->vdev_nonrot; } /* diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c index c198d77e21d4..6cc5343b5e69 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_file.c @@ -72,6 +72,9 @@ vdev_file_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize, vattr_t vattr; int error; + /* Rotational optimizations only make sense on block devices */ + vd->vdev_nonrot = B_TRUE; + /* * We must have a pathname, and it must be absolute. */ diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index fddb50081d80..034ee00c1203 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -88,10 +88,10 @@ vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp) uint16_t rate; error = g_getattr("GEOM::rotation_rate", cp, &rate); - if (error == 0) - vd->vdev_rotation_rate = rate; + if (error == 0 && rate == 1) + vd->vdev_nonrot = B_TRUE; else - vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN; + vd->vdev_nonrot = B_FALSE; } static void diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c index 26be35fc3501..4691da726ec2 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_mirror.c @@ -169,7 +169,7 @@ vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset) load = vdev_queue_length(vd); lastoffset = vdev_queue_lastoffset(vd); - if (vd->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) { + if (vd->vdev_nonrot) { /* Non-rotating media. */ if (lastoffset == zio_offset) return (load + non_rotating_inc); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c index a19dbdb27285..556afda1c70e 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_queue.c @@ -686,7 +686,7 @@ vdev_queue_aggregate(vdev_queue_t *vq, zio_t *zio) ASSERT(MUTEX_HELD(&vq->vq_lock)); maxblocksize = spa_maxblocksize(vq->vq_vdev->vdev_spa); - if (vq->vq_vdev->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) + if (vq->vq_vdev->vdev_nonrot) limit = zfs_vdev_aggregation_limit_non_rotating; else limit = zfs_vdev_aggregation_limit;