MFC r256956:
Improve ZFS N-way mirror read performance by using load and locality information. MFC r260713: Fix ZFS mirror code for handling multiple DVA's Also make the addition of the d_rotation_rate binary compatible. This allows storage drivers compiled for 10.0 to work by preserving the ABI for disks. Approved by: re (gjb) Sponsored by: Multiplay
This commit is contained in:
parent
73701b0292
commit
a7cb473513
@ -1229,12 +1229,13 @@ adaregister(struct cam_periph *periph, void *arg)
|
||||
"kern.cam.ada.%d.write_cache", periph->unit_number);
|
||||
TUNABLE_INT_FETCH(announce_buf, &softc->write_cache);
|
||||
/* Disable queue sorting for non-rotational media by default. */
|
||||
if (cgd->ident_data.media_rotation_rate == 1)
|
||||
if (cgd->ident_data.media_rotation_rate == ATA_RATE_NON_ROTATING)
|
||||
softc->sort_io_queue = 0;
|
||||
else
|
||||
softc->sort_io_queue = -1;
|
||||
adagetparams(periph, cgd);
|
||||
softc->disk = disk_alloc();
|
||||
softc->disk->d_rotation_rate = cgd->ident_data.media_rotation_rate;
|
||||
softc->disk->d_devstat = devstat_new_entry(periph->periph_name,
|
||||
periph->unit_number, softc->params.secsize,
|
||||
DEVSTAT_ALL_SUPPORTED,
|
||||
|
@ -2302,7 +2302,7 @@ struct scsi_vpd_block_characteristics
|
||||
u_int8_t page_length[2];
|
||||
u_int8_t medium_rotation_rate[2];
|
||||
#define SVPD_BDC_RATE_NOT_REPORTED 0x00
|
||||
#define SVPD_BDC_RATE_NONE_ROTATING 0x01
|
||||
#define SVPD_BDC_RATE_NON_ROTATING 0x01
|
||||
u_int8_t reserved1;
|
||||
u_int8_t nominal_form_factor;
|
||||
#define SVPD_BDC_FORM_NOT_REPORTED 0x00
|
||||
|
@ -3390,9 +3390,18 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
|
||||
* Disable queue sorting for non-rotational media
|
||||
* by default.
|
||||
*/
|
||||
if (scsi_2btoul(bdc->medium_rotation_rate) ==
|
||||
SVPD_BDC_RATE_NONE_ROTATING)
|
||||
u_int old_rate = softc->disk->d_rotation_rate;
|
||||
|
||||
softc->disk->d_rotation_rate =
|
||||
scsi_2btoul(bdc->medium_rotation_rate);
|
||||
if (softc->disk->d_rotation_rate ==
|
||||
SVPD_BDC_RATE_NON_ROTATING) {
|
||||
softc->sort_io_queue = 0;
|
||||
}
|
||||
if (softc->disk->d_rotation_rate != old_rate) {
|
||||
disk_attr_changed(softc->disk,
|
||||
"GEOM::rotation_rate", M_NOWAIT);
|
||||
}
|
||||
} else {
|
||||
int error;
|
||||
error = daerror(done_ccb, CAM_RETRY_SELTO,
|
||||
@ -3427,6 +3436,8 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
|
||||
ptr = (uint16_t *)ata_params;
|
||||
|
||||
if ((csio->ccb_h.status & CAM_STATUS_MASK) == CAM_REQ_CMP) {
|
||||
uint16_t old_rate;
|
||||
|
||||
for (i = 0; i < sizeof(*ata_params) / 2; i++)
|
||||
ptr[i] = le16toh(ptr[i]);
|
||||
if (ata_params->support_dsm & ATA_SUPPORT_DSM_TRIM &&
|
||||
@ -3442,8 +3453,18 @@ dadone(struct cam_periph *periph, union ccb *done_ccb)
|
||||
* Disable queue sorting for non-rotational media
|
||||
* by default.
|
||||
*/
|
||||
if (ata_params->media_rotation_rate == 1)
|
||||
old_rate = softc->disk->d_rotation_rate;
|
||||
softc->disk->d_rotation_rate =
|
||||
ata_params->media_rotation_rate;
|
||||
if (softc->disk->d_rotation_rate ==
|
||||
ATA_RATE_NON_ROTATING) {
|
||||
softc->sort_io_queue = 0;
|
||||
}
|
||||
|
||||
if (softc->disk->d_rotation_rate != old_rate) {
|
||||
disk_attr_changed(softc->disk,
|
||||
"GEOM::rotation_rate", M_NOWAIT);
|
||||
}
|
||||
} else {
|
||||
int error;
|
||||
error = daerror(done_ccb, CAM_RETRY_SELTO,
|
||||
|
@ -120,6 +120,9 @@ extern void vdev_queue_init(vdev_t *vd);
|
||||
extern void vdev_queue_fini(vdev_t *vd);
|
||||
extern zio_t *vdev_queue_io(zio_t *zio);
|
||||
extern void vdev_queue_io_done(zio_t *zio);
|
||||
extern int vdev_queue_length(vdev_t *vd);
|
||||
extern uint64_t vdev_queue_lastoffset(vdev_t *vd);
|
||||
extern void vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio);
|
||||
|
||||
extern void vdev_config_dirty(vdev_t *vd);
|
||||
extern void vdev_config_clean(vdev_t *vd);
|
||||
|
@ -116,6 +116,7 @@ struct vdev_queue {
|
||||
uint64_t vq_last_offset;
|
||||
hrtime_t vq_io_complete_ts; /* time last i/o completed */
|
||||
kmutex_t vq_lock;
|
||||
uint64_t vq_lastoffset;
|
||||
};
|
||||
|
||||
/*
|
||||
@ -227,7 +228,10 @@ struct vdev {
|
||||
spa_aux_vdev_t *vdev_aux; /* for l2cache vdevs */
|
||||
zio_t *vdev_probe_zio; /* root of current probe */
|
||||
vdev_aux_t vdev_label_aux; /* on-disk aux state */
|
||||
struct trim_map *vdev_trimmap;
|
||||
struct trim_map *vdev_trimmap; /* map on outstanding trims */
|
||||
uint16_t vdev_rotation_rate; /* rotational rate of the media */
|
||||
#define VDEV_RATE_UNKNOWN 0
|
||||
#define VDEV_RATE_NON_ROTATING 1
|
||||
|
||||
/*
|
||||
* For DTrace to work in userland (libzpool) context, these fields must
|
||||
|
@ -42,9 +42,11 @@
|
||||
* Virtual device vector for GEOM.
|
||||
*/
|
||||
|
||||
static g_attrchanged_t vdev_geom_attrchanged;
|
||||
struct g_class zfs_vdev_class = {
|
||||
.name = "ZFS::VDEV",
|
||||
.version = G_VERSION,
|
||||
.attrchanged = vdev_geom_attrchanged,
|
||||
};
|
||||
|
||||
DECLARE_GEOM_CLASS(zfs_vdev_class, zfs_vdev);
|
||||
@ -61,6 +63,34 @@ TUNABLE_INT("vfs.zfs.vdev.bio_delete_disable", &vdev_geom_bio_delete_disable);
|
||||
SYSCTL_INT(_vfs_zfs_vdev, OID_AUTO, bio_delete_disable, CTLFLAG_RW,
|
||||
&vdev_geom_bio_delete_disable, 0, "Disable BIO_DELETE");
|
||||
|
||||
static void
|
||||
vdev_geom_set_rotation_rate(vdev_t *vd, struct g_consumer *cp)
|
||||
{
|
||||
int error;
|
||||
uint16_t rate;
|
||||
|
||||
error = g_getattr("GEOM::rotation_rate", cp, &rate);
|
||||
if (error == 0)
|
||||
vd->vdev_rotation_rate = rate;
|
||||
else
|
||||
vd->vdev_rotation_rate = VDEV_RATE_UNKNOWN;
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_geom_attrchanged(struct g_consumer *cp, const char *attr)
|
||||
{
|
||||
vdev_t *vd;
|
||||
|
||||
vd = cp->private;
|
||||
if (vd == NULL)
|
||||
return;
|
||||
|
||||
if (strcmp(attr, "GEOM::rotation_rate") == 0) {
|
||||
vdev_geom_set_rotation_rate(vd, cp);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_geom_orphan(struct g_consumer *cp)
|
||||
{
|
||||
@ -689,6 +719,11 @@ vdev_geom_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
|
||||
vd->vdev_physpath = kmem_alloc(bufsize, KM_SLEEP);
|
||||
snprintf(vd->vdev_physpath, bufsize, "/dev/%s", pp->name);
|
||||
|
||||
/*
|
||||
* Determine the device's rotation rate.
|
||||
*/
|
||||
vdev_geom_set_rotation_rate(vd, cp);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
@ -41,27 +41,97 @@ typedef struct mirror_child {
|
||||
vdev_t *mc_vd;
|
||||
uint64_t mc_offset;
|
||||
int mc_error;
|
||||
int mc_load;
|
||||
uint8_t mc_tried;
|
||||
uint8_t mc_skipped;
|
||||
uint8_t mc_speculative;
|
||||
} mirror_child_t;
|
||||
|
||||
typedef struct mirror_map {
|
||||
int *mm_preferred;
|
||||
int mm_preferred_cnt;
|
||||
int mm_children;
|
||||
int mm_replacing;
|
||||
int mm_preferred;
|
||||
int mm_root;
|
||||
mirror_child_t mm_child[1];
|
||||
boolean_t mm_replacing;
|
||||
boolean_t mm_root;
|
||||
mirror_child_t mm_child[];
|
||||
} mirror_map_t;
|
||||
|
||||
int vdev_mirror_shift = 21;
|
||||
static int vdev_mirror_shift = 21;
|
||||
|
||||
SYSCTL_DECL(_vfs_zfs_vdev);
|
||||
static SYSCTL_NODE(_vfs_zfs_vdev, OID_AUTO, mirror, CTLFLAG_RD, 0,
|
||||
"ZFS VDEV Mirror");
|
||||
|
||||
/*
|
||||
* The load configuration settings below are tuned by default for
|
||||
* the case where all devices are of the same rotational type.
|
||||
*
|
||||
* If there is a mixture of rotating and non-rotating media, setting
|
||||
* non_rotating_seek_inc to 0 may well provide better results as it
|
||||
* will direct more reads to the non-rotating vdevs which are more
|
||||
* likely to have a higher performance.
|
||||
*/
|
||||
|
||||
/* Rotating media load calculation configuration. */
|
||||
static int rotating_inc = 0;
|
||||
TUNABLE_INT("vfs.zfs.vdev.mirror.rotating_inc", &rotating_inc);
|
||||
SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_inc, CTLFLAG_RW,
|
||||
&rotating_inc, 0, "Rotating media load increment for non-seeking I/O's");
|
||||
|
||||
static int rotating_seek_inc = 5;
|
||||
TUNABLE_INT("vfs.zfs.vdev.mirror.rotating_seek_inc", &rotating_seek_inc);
|
||||
SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_inc, CTLFLAG_RW,
|
||||
&rotating_seek_inc, 0, "Rotating media load increment for seeking I/O's");
|
||||
|
||||
static int rotating_seek_offset = 1 * 1024 * 1024;
|
||||
TUNABLE_INT("vfs.zfs.vdev.mirror.rotating_seek_offset", &rotating_seek_offset);
|
||||
SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, rotating_seek_offset, CTLFLAG_RW,
|
||||
&rotating_seek_offset, 0, "Offset in bytes from the last I/O which "
|
||||
"triggers a reduced rotating media seek increment");
|
||||
|
||||
/* Non-rotating media load calculation configuration. */
|
||||
static int non_rotating_inc = 0;
|
||||
TUNABLE_INT("vfs.zfs.vdev.mirror.non_rotating_inc", &non_rotating_inc);
|
||||
SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_inc, CTLFLAG_RW,
|
||||
&non_rotating_inc, 0,
|
||||
"Non-rotating media load increment for non-seeking I/O's");
|
||||
|
||||
static int non_rotating_seek_inc = 1;
|
||||
TUNABLE_INT("vfs.zfs.vdev.mirror.non_rotating_seek_inc",
|
||||
&non_rotating_seek_inc);
|
||||
SYSCTL_INT(_vfs_zfs_vdev_mirror, OID_AUTO, non_rotating_seek_inc, CTLFLAG_RW,
|
||||
&non_rotating_seek_inc, 0,
|
||||
"Non-rotating media load increment for seeking I/O's");
|
||||
|
||||
|
||||
static inline size_t
|
||||
vdev_mirror_map_size(int children)
|
||||
{
|
||||
return (offsetof(mirror_map_t, mm_child[children]) +
|
||||
sizeof(int) * children);
|
||||
}
|
||||
|
||||
static inline mirror_map_t *
|
||||
vdev_mirror_map_alloc(int children, boolean_t replacing, boolean_t root)
|
||||
{
|
||||
mirror_map_t *mm;
|
||||
|
||||
mm = kmem_zalloc(vdev_mirror_map_size(children), KM_SLEEP);
|
||||
mm->mm_children = children;
|
||||
mm->mm_replacing = replacing;
|
||||
mm->mm_root = root;
|
||||
mm->mm_preferred = (int *)((uintptr_t)mm +
|
||||
offsetof(mirror_map_t, mm_child[children]));
|
||||
|
||||
return mm;
|
||||
}
|
||||
|
||||
static void
|
||||
vdev_mirror_map_free(zio_t *zio)
|
||||
{
|
||||
mirror_map_t *mm = zio->io_vsd;
|
||||
|
||||
kmem_free(mm, offsetof(mirror_map_t, mm_child[mm->mm_children]));
|
||||
kmem_free(mm, vdev_mirror_map_size(mm->mm_children));
|
||||
}
|
||||
|
||||
static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
|
||||
@ -69,55 +139,80 @@ static const zio_vsd_ops_t vdev_mirror_vsd_ops = {
|
||||
zio_vsd_default_cksum_report
|
||||
};
|
||||
|
||||
static int
|
||||
vdev_mirror_load(mirror_map_t *mm, vdev_t *vd, uint64_t zio_offset)
|
||||
{
|
||||
uint64_t lastoffset;
|
||||
int load;
|
||||
|
||||
/* All DVAs have equal weight at the root. */
|
||||
if (mm->mm_root)
|
||||
return (INT_MAX);
|
||||
|
||||
/*
|
||||
* We don't return INT_MAX if the device is resilvering i.e.
|
||||
* vdev_resilver_txg != 0 as when tested performance was slightly
|
||||
* worse overall when resilvering with compared to without.
|
||||
*/
|
||||
|
||||
/* Standard load based on pending queue length. */
|
||||
load = vdev_queue_length(vd);
|
||||
lastoffset = vdev_queue_lastoffset(vd);
|
||||
|
||||
if (vd->vdev_rotation_rate == VDEV_RATE_NON_ROTATING) {
|
||||
/* Non-rotating media. */
|
||||
if (lastoffset == zio_offset)
|
||||
return (load + non_rotating_inc);
|
||||
|
||||
/*
|
||||
* Apply a seek penalty even for non-rotating devices as
|
||||
* sequential I/O'a can be aggregated into fewer operations
|
||||
* on the device, thus avoiding unnecessary per-command
|
||||
* overhead and boosting performance.
|
||||
*/
|
||||
return (load + non_rotating_seek_inc);
|
||||
}
|
||||
|
||||
/* Rotating media I/O's which directly follow the last I/O. */
|
||||
if (lastoffset == zio_offset)
|
||||
return (load + rotating_inc);
|
||||
|
||||
/*
|
||||
* Apply half the seek increment to I/O's within seek offset
|
||||
* of the last I/O queued to this vdev as they should incure less
|
||||
* of a seek increment.
|
||||
*/
|
||||
if (ABS(lastoffset - zio_offset) < rotating_seek_offset)
|
||||
return (load + (rotating_seek_inc / 2));
|
||||
|
||||
/* Apply the full seek increment to all other I/O's. */
|
||||
return (load + rotating_seek_inc);
|
||||
}
|
||||
|
||||
|
||||
static mirror_map_t *
|
||||
vdev_mirror_map_alloc(zio_t *zio)
|
||||
vdev_mirror_map_init(zio_t *zio)
|
||||
{
|
||||
mirror_map_t *mm = NULL;
|
||||
mirror_child_t *mc;
|
||||
vdev_t *vd = zio->io_vd;
|
||||
int c, d;
|
||||
int c;
|
||||
|
||||
if (vd == NULL) {
|
||||
dva_t *dva = zio->io_bp->blk_dva;
|
||||
spa_t *spa = zio->io_spa;
|
||||
|
||||
c = BP_GET_NDVAS(zio->io_bp);
|
||||
|
||||
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
|
||||
mm->mm_children = c;
|
||||
mm->mm_replacing = B_FALSE;
|
||||
mm->mm_preferred = spa_get_random(c);
|
||||
mm->mm_root = B_TRUE;
|
||||
|
||||
/*
|
||||
* Check the other, lower-index DVAs to see if they're on
|
||||
* the same vdev as the child we picked. If they are, use
|
||||
* them since they are likely to have been allocated from
|
||||
* the primary metaslab in use at the time, and hence are
|
||||
* more likely to have locality with single-copy data.
|
||||
*/
|
||||
for (c = mm->mm_preferred, d = c - 1; d >= 0; d--) {
|
||||
if (DVA_GET_VDEV(&dva[d]) == DVA_GET_VDEV(&dva[c]))
|
||||
mm->mm_preferred = d;
|
||||
}
|
||||
|
||||
mm = vdev_mirror_map_alloc(BP_GET_NDVAS(zio->io_bp), B_FALSE,
|
||||
B_TRUE);
|
||||
for (c = 0; c < mm->mm_children; c++) {
|
||||
mc = &mm->mm_child[c];
|
||||
|
||||
mc->mc_vd = vdev_lookup_top(spa, DVA_GET_VDEV(&dva[c]));
|
||||
mc->mc_offset = DVA_GET_OFFSET(&dva[c]);
|
||||
}
|
||||
} else {
|
||||
c = vd->vdev_children;
|
||||
|
||||
mm = kmem_zalloc(offsetof(mirror_map_t, mm_child[c]), KM_SLEEP);
|
||||
mm->mm_children = c;
|
||||
mm->mm_replacing = (vd->vdev_ops == &vdev_replacing_ops ||
|
||||
vd->vdev_ops == &vdev_spare_ops);
|
||||
mm->mm_preferred = mm->mm_replacing ? 0 :
|
||||
(zio->io_offset >> vdev_mirror_shift) % c;
|
||||
mm->mm_root = B_FALSE;
|
||||
|
||||
mm = vdev_mirror_map_alloc(vd->vdev_children,
|
||||
(vd->vdev_ops == &vdev_replacing_ops ||
|
||||
vd->vdev_ops == &vdev_spare_ops), B_FALSE);
|
||||
for (c = 0; c < mm->mm_children; c++) {
|
||||
mc = &mm->mm_child[c];
|
||||
mc->mc_vd = vd->vdev_child[c];
|
||||
@ -211,50 +306,124 @@ vdev_mirror_scrub_done(zio_t *zio)
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to find a child whose DTL doesn't contain the block we want to read.
|
||||
* Check the other, lower-index DVAs to see if they're on the same
|
||||
* vdev as the child we picked. If they are, use them since they
|
||||
* are likely to have been allocated from the primary metaslab in
|
||||
* use at the time, and hence are more likely to have locality with
|
||||
* single-copy data.
|
||||
*/
|
||||
static int
|
||||
vdev_mirror_dva_select(zio_t *zio, int p)
|
||||
{
|
||||
dva_t *dva = zio->io_bp->blk_dva;
|
||||
mirror_map_t *mm = zio->io_vsd;
|
||||
int preferred;
|
||||
int c;
|
||||
|
||||
preferred = mm->mm_preferred[p];
|
||||
for (p-- ; p >= 0; p--) {
|
||||
c = mm->mm_preferred[p];
|
||||
if (DVA_GET_VDEV(&dva[c]) == DVA_GET_VDEV(&dva[preferred]))
|
||||
preferred = c;
|
||||
}
|
||||
return (preferred);
|
||||
}
|
||||
|
||||
static int
|
||||
vdev_mirror_preferred_child_randomize(zio_t *zio)
|
||||
{
|
||||
mirror_map_t *mm = zio->io_vsd;
|
||||
int p;
|
||||
|
||||
if (mm->mm_root) {
|
||||
p = spa_get_random(mm->mm_preferred_cnt);
|
||||
return (vdev_mirror_dva_select(zio, p));
|
||||
}
|
||||
|
||||
/*
|
||||
* To ensure we don't always favour the first matching vdev,
|
||||
* which could lead to wear leveling issues on SSD's, we
|
||||
* use the I/O offset as a pseudo random seed into the vdevs
|
||||
* which have the lowest load.
|
||||
*/
|
||||
p = (zio->io_offset >> vdev_mirror_shift) % mm->mm_preferred_cnt;
|
||||
return (mm->mm_preferred[p]);
|
||||
}
|
||||
|
||||
/*
|
||||
* Try to find a vdev whose DTL doesn't contain the block we want to read
|
||||
* prefering vdevs based on determined load.
|
||||
*
|
||||
* If we can't, try the read on any vdev we haven't already tried.
|
||||
*/
|
||||
static int
|
||||
vdev_mirror_child_select(zio_t *zio)
|
||||
{
|
||||
mirror_map_t *mm = zio->io_vsd;
|
||||
mirror_child_t *mc;
|
||||
uint64_t txg = zio->io_txg;
|
||||
int i, c;
|
||||
int c, lowest_load;
|
||||
|
||||
ASSERT(zio->io_bp == NULL || BP_PHYSICAL_BIRTH(zio->io_bp) == txg);
|
||||
|
||||
/*
|
||||
* Try to find a child whose DTL doesn't contain the block to read.
|
||||
* If a child is known to be completely inaccessible (indicated by
|
||||
* vdev_readable() returning B_FALSE), don't even try.
|
||||
*/
|
||||
for (i = 0, c = mm->mm_preferred; i < mm->mm_children; i++, c++) {
|
||||
if (c >= mm->mm_children)
|
||||
c = 0;
|
||||
lowest_load = INT_MAX;
|
||||
mm->mm_preferred_cnt = 0;
|
||||
for (c = 0; c < mm->mm_children; c++) {
|
||||
mirror_child_t *mc;
|
||||
|
||||
mc = &mm->mm_child[c];
|
||||
if (mc->mc_tried || mc->mc_skipped)
|
||||
continue;
|
||||
|
||||
if (!vdev_readable(mc->mc_vd)) {
|
||||
mc->mc_error = SET_ERROR(ENXIO);
|
||||
mc->mc_tried = 1; /* don't even try */
|
||||
mc->mc_skipped = 1;
|
||||
continue;
|
||||
}
|
||||
if (!vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1))
|
||||
return (c);
|
||||
mc->mc_error = SET_ERROR(ESTALE);
|
||||
mc->mc_skipped = 1;
|
||||
mc->mc_speculative = 1;
|
||||
|
||||
if (vdev_dtl_contains(mc->mc_vd, DTL_MISSING, txg, 1)) {
|
||||
mc->mc_error = SET_ERROR(ESTALE);
|
||||
mc->mc_skipped = 1;
|
||||
mc->mc_speculative = 1;
|
||||
continue;
|
||||
}
|
||||
|
||||
mc->mc_load = vdev_mirror_load(mm, mc->mc_vd, mc->mc_offset);
|
||||
if (mc->mc_load > lowest_load)
|
||||
continue;
|
||||
|
||||
if (mc->mc_load < lowest_load) {
|
||||
lowest_load = mc->mc_load;
|
||||
mm->mm_preferred_cnt = 0;
|
||||
}
|
||||
mm->mm_preferred[mm->mm_preferred_cnt] = c;
|
||||
mm->mm_preferred_cnt++;
|
||||
}
|
||||
|
||||
if (mm->mm_preferred_cnt == 1) {
|
||||
vdev_queue_register_lastoffset(
|
||||
mm->mm_child[mm->mm_preferred[0]].mc_vd, zio);
|
||||
return (mm->mm_preferred[0]);
|
||||
}
|
||||
|
||||
if (mm->mm_preferred_cnt > 1) {
|
||||
int c = vdev_mirror_preferred_child_randomize(zio);
|
||||
|
||||
vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd, zio);
|
||||
return (c);
|
||||
}
|
||||
|
||||
/*
|
||||
* Every device is either missing or has this txg in its DTL.
|
||||
* Look for any child we haven't already tried before giving up.
|
||||
*/
|
||||
for (c = 0; c < mm->mm_children; c++)
|
||||
if (!mm->mm_child[c].mc_tried)
|
||||
for (c = 0; c < mm->mm_children; c++) {
|
||||
if (!mm->mm_child[c].mc_tried) {
|
||||
vdev_queue_register_lastoffset(mm->mm_child[c].mc_vd,
|
||||
zio);
|
||||
return (c);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Every child failed. There's no place left to look.
|
||||
@ -269,7 +438,7 @@ vdev_mirror_io_start(zio_t *zio)
|
||||
mirror_child_t *mc;
|
||||
int c, children;
|
||||
|
||||
mm = vdev_mirror_map_alloc(zio);
|
||||
mm = vdev_mirror_map_init(zio);
|
||||
|
||||
if (zio->io_type == ZIO_TYPE_READ) {
|
||||
if ((zio->io_flags & ZIO_FLAG_SCRUB) && !mm->mm_replacing) {
|
||||
|
@ -289,6 +289,8 @@ vdev_queue_init(vdev_t *vd)
|
||||
vdev_queue_offset_compare,
|
||||
sizeof (zio_t), offsetof(struct zio, io_queue_node));
|
||||
}
|
||||
|
||||
vq->vq_lastoffset = 0;
|
||||
}
|
||||
|
||||
void
|
||||
@ -815,3 +817,26 @@ vdev_queue_io_done(zio_t *zio)
|
||||
|
||||
mutex_exit(&vq->vq_lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* As these three methods are only used for load calculations we're not concerned
|
||||
* if we get an incorrect value on 32bit platforms due to lack of vq_lock mutex
|
||||
* use here, instead we prefer to keep it lock free for performance.
|
||||
*/
|
||||
int
|
||||
vdev_queue_length(vdev_t *vd)
|
||||
{
|
||||
return (avl_numnodes(&vd->vdev_queue.vq_active_tree));
|
||||
}
|
||||
|
||||
uint64_t
|
||||
vdev_queue_lastoffset(vdev_t *vd)
|
||||
{
|
||||
return (vd->vdev_queue.vq_lastoffset);
|
||||
}
|
||||
|
||||
void
|
||||
vdev_queue_register_lastoffset(vdev_t *vd, zio_t *zio)
|
||||
{
|
||||
vd->vdev_queue.vq_lastoffset = zio->io_offset + zio->io_size;
|
||||
}
|
||||
|
@ -274,6 +274,7 @@ int g_handleattr(struct bio *bp, const char *attribute, const void *val,
|
||||
int len);
|
||||
int g_handleattr_int(struct bio *bp, const char *attribute, int val);
|
||||
int g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val);
|
||||
int g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val);
|
||||
int g_handleattr_str(struct bio *bp, const char *attribute, const char *str);
|
||||
struct g_consumer * g_new_consumer(struct g_geom *gp);
|
||||
struct g_geom * g_new_geomf(struct g_class *mp, const char *fmt, ...)
|
||||
|
@ -412,23 +412,32 @@ g_disk_start(struct bio *bp)
|
||||
break;
|
||||
else if (g_handleattr_str(bp, "GEOM::ident", dp->d_ident))
|
||||
break;
|
||||
else if (g_handleattr(bp, "GEOM::hba_vendor",
|
||||
&dp->d_hba_vendor, 2))
|
||||
else if (g_handleattr_uint16_t(bp, "GEOM::hba_vendor",
|
||||
dp->d_hba_vendor))
|
||||
break;
|
||||
else if (g_handleattr(bp, "GEOM::hba_device",
|
||||
&dp->d_hba_device, 2))
|
||||
else if (g_handleattr_uint16_t(bp, "GEOM::hba_device",
|
||||
dp->d_hba_device))
|
||||
break;
|
||||
else if (g_handleattr(bp, "GEOM::hba_subvendor",
|
||||
&dp->d_hba_subvendor, 2))
|
||||
else if (g_handleattr_uint16_t(bp, "GEOM::hba_subvendor",
|
||||
dp->d_hba_subvendor))
|
||||
break;
|
||||
else if (g_handleattr(bp, "GEOM::hba_subdevice",
|
||||
&dp->d_hba_subdevice, 2))
|
||||
else if (g_handleattr_uint16_t(bp, "GEOM::hba_subdevice",
|
||||
dp->d_hba_subdevice))
|
||||
break;
|
||||
else if (!strcmp(bp->bio_attribute, "GEOM::kerneldump"))
|
||||
g_disk_kerneldump(bp, dp);
|
||||
else if (!strcmp(bp->bio_attribute, "GEOM::setstate"))
|
||||
g_disk_setstate(bp, sc);
|
||||
else
|
||||
else if (!strcmp(bp->bio_attribute, "GEOM::rotation_rate")) {
|
||||
uint64_t v;
|
||||
|
||||
if ((dp->d_flags & DISKFLAG_LACKS_ROTRATE) == 0)
|
||||
v = dp->d_rotation_rate;
|
||||
else
|
||||
v = 0; /* rate unknown */
|
||||
g_handleattr_uint16_t(bp, "GEOM::rotation_rate", v);
|
||||
break;
|
||||
} else
|
||||
error = ENOIOCTL;
|
||||
break;
|
||||
case BIO_FLUSH:
|
||||
@ -694,6 +703,8 @@ disk_create(struct disk *dp, int version)
|
||||
dp->d_name, dp->d_unit);
|
||||
return;
|
||||
}
|
||||
if (version < DISK_VERSION_04)
|
||||
dp->d_flags |= DISKFLAG_LACKS_ROTRATE;
|
||||
KASSERT(dp->d_strategy != NULL, ("disk_create need d_strategy"));
|
||||
KASSERT(dp->d_name != NULL, ("disk_create need d_name"));
|
||||
KASSERT(*dp->d_name != 0, ("disk_create need d_name"));
|
||||
|
@ -100,6 +100,9 @@ struct disk {
|
||||
|
||||
/* Fields private to the driver */
|
||||
void *d_drv1;
|
||||
|
||||
/* New field - don't use if DISKFLAG_LACKS_ROTRATE is set */
|
||||
uint16_t d_rotation_rate;
|
||||
};
|
||||
|
||||
#define DISKFLAG_NEEDSGIANT 0x1
|
||||
@ -108,6 +111,7 @@ struct disk {
|
||||
#define DISKFLAG_CANFLUSHCACHE 0x8
|
||||
#define DISKFLAG_UNMAPPED_BIO 0x10
|
||||
#define DISKFLAG_DIRECT_COMPLETION 0x20
|
||||
#define DISKFLAG_LACKS_ROTRATE 0x40
|
||||
|
||||
struct disk *disk_alloc(void);
|
||||
void disk_create(struct disk *disk, int version);
|
||||
@ -122,7 +126,8 @@ int disk_resize(struct disk *dp, int flag);
|
||||
#define DISK_VERSION_01 0x5856105a
|
||||
#define DISK_VERSION_02 0x5856105b
|
||||
#define DISK_VERSION_03 0x5856105c
|
||||
#define DISK_VERSION DISK_VERSION_03
|
||||
#define DISK_VERSION_04 0x5856105d
|
||||
#define DISK_VERSION DISK_VERSION_04
|
||||
|
||||
#endif /* _KERNEL */
|
||||
#endif /* _GEOM_GEOM_DISK_H_ */
|
||||
|
@ -951,6 +951,13 @@ g_handleattr_int(struct bio *bp, const char *attribute, int val)
|
||||
return (g_handleattr(bp, attribute, &val, sizeof val));
|
||||
}
|
||||
|
||||
int
|
||||
g_handleattr_uint16_t(struct bio *bp, const char *attribute, uint16_t val)
|
||||
{
|
||||
|
||||
return (g_handleattr(bp, attribute, &val, sizeof val));
|
||||
}
|
||||
|
||||
int
|
||||
g_handleattr_off_t(struct bio *bp, const char *attribute, off_t val)
|
||||
{
|
||||
|
@ -262,6 +262,8 @@ struct ata_params {
|
||||
/*215*/ u_int16_t nv_cache_size_1;
|
||||
u_int16_t nv_cache_size_2;
|
||||
/*217*/ u_int16_t media_rotation_rate;
|
||||
#define ATA_RATE_NOT_REPORTED 0x0000
|
||||
#define ATA_RATE_NON_ROTATING 0x0001
|
||||
u_int16_t reserved218;
|
||||
/*219*/ u_int16_t nv_cache_opt;
|
||||
/*220*/ u_int16_t wrv_mode;
|
||||
|
Loading…
x
Reference in New Issue
Block a user