From 40ea77a036cd80d17111bd00f470fac57c77f07c Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Tue, 22 Oct 2013 08:22:19 +0000 Subject: [PATCH] Merge GEOM direct dispatch changes from the projects/camlock branch. When safety requirements are met, it allows to avoid passing I/O requests to GEOM g_up/g_down thread, executing them directly in the caller context. That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid several context switches per I/O. The defined now safety requirements are: - caller should not hold any locks and should be reenterable; - callee should not depend on GEOM dual-threaded concurency semantics; - on the way down, if request is unmapped while callee doesn't support it, the context should be sleepable; - kernel thread stack usage should be below 50%. To keep compatibility with GEOM classes not meeting above requirements new provider and consumer flags added: - G_CF_DIRECT_SEND -- consumer code meets caller requirements (request); - G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done); - G_PF_DIRECT_SEND -- provider code meets caller requirements (done); - G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request). Capable GEOM class can set them, allowing direct dispatch in cases where it is safe. If any of requirements are not met, request is queued to g_up or g_down thread same as before. Such GEOM classes were reviewed and updated to support direct dispatch: CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE, VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL, MAP, FLASHMAP, etc). To declare direct completion capability disk(9) KPI got new flag equivalent to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk drivers got it set now thanks to earlier CAM locking work. This change more then twice increases peak block storage performance on systems with manu CPUs, together with earlier CAM locking changes reaching more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to 256 user-level threads). Sponsored by: iXsystems, Inc. MFC after: 2 months --- sys/cam/ata/ata_da.c | 2 +- sys/cam/scsi/scsi_da.c | 2 +- .../opensolaris/uts/common/fs/zfs/vdev_geom.c | 1 + .../opensolaris/uts/common/fs/zfs/zvol.c | 31 ++- sys/dev/md/md.c | 10 +- sys/geom/concat/g_concat.c | 86 +++++-- sys/geom/concat/g_concat.h | 1 + sys/geom/gate/g_gate.c | 31 ++- sys/geom/geom.h | 6 + sys/geom/geom_dev.c | 1 + sys/geom/geom_disk.c | 64 +++-- sys/geom/geom_disk.h | 1 + sys/geom/geom_int.h | 3 + sys/geom/geom_io.c | 220 +++++++++++------- sys/geom/geom_kern.c | 7 + sys/geom/geom_slice.c | 13 +- sys/geom/geom_vfs.c | 33 ++- sys/geom/mirror/g_mirror.c | 58 +++-- sys/geom/mirror/g_mirror.h | 2 + sys/geom/multipath/g_multipath.c | 2 + sys/geom/nop/g_nop.c | 13 +- sys/geom/nop/g_nop.h | 1 + sys/geom/part/g_part.c | 3 + sys/geom/raid/g_raid.c | 3 + sys/geom/raid/md_ddf.c | 1 + sys/geom/raid/md_intel.c | 1 + sys/geom/raid/md_jmicron.c | 1 + sys/geom/raid/md_nvidia.c | 1 + sys/geom/raid/md_promise.c | 1 + sys/geom/raid/md_sii.c | 1 + sys/geom/stripe/g_stripe.c | 72 ++++-- sys/geom/stripe/g_stripe.h | 1 + sys/geom/zero/g_zero.c | 1 + sys/kern/subr_devstat.c | 3 +- sys/sys/proc.h | 2 + 35 files changed, 465 insertions(+), 214 deletions(-) diff --git a/sys/cam/ata/ata_da.c b/sys/cam/ata/ata_da.c index 139f8e689744..c8d8cdcd933d 100644 --- a/sys/cam/ata/ata_da.c +++ b/sys/cam/ata/ata_da.c @@ -1253,7 +1253,7 @@ adaregister(struct cam_periph *periph, void *arg) maxio = min(maxio, 256 * softc->params.secsize); softc->disk->d_maxsize = maxio; softc->disk->d_unit = periph->unit_number; - softc->disk->d_flags = 0; + softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if (softc->flags & ADA_FLAG_CAN_FLUSHCACHE) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if (softc->flags & ADA_FLAG_CAN_TRIM) { diff --git a/sys/cam/scsi/scsi_da.c b/sys/cam/scsi/scsi_da.c index ea91ca8e7460..7a806a565f50 100644 --- a/sys/cam/scsi/scsi_da.c +++ b/sys/cam/scsi/scsi_da.c @@ -2125,7 +2125,7 @@ daregister(struct cam_periph *periph, void *arg) else softc->disk->d_maxsize = cpi.maxio; softc->disk->d_unit = periph->unit_number; - softc->disk->d_flags = 0; + softc->disk->d_flags = DISKFLAG_DIRECT_COMPLETION; if ((softc->quirks & DA_Q_NO_SYNC_CACHE) == 0) softc->disk->d_flags |= DISKFLAG_CANFLUSHCACHE; if ((cpi.hba_misc & PIM_UNMAPPED) != 0) diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c index 15685a5bcb3d..2a6910a59c24 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/vdev_geom.c @@ -147,6 +147,7 @@ vdev_geom_attach(struct g_provider *pp) ZFS_LOG(1, "Used existing consumer for %s.", pp->name); } } + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; return (cp); } diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c index 72d45025a88d..753927da6b6f 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c @@ -2153,6 +2153,7 @@ zvol_geom_create(const char *name) gp->start = zvol_geom_start; gp->access = zvol_geom_access; pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, name); + pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; pp->sectorsize = DEV_BSIZE; zv = kmem_zalloc(sizeof(*zv), KM_SLEEP); @@ -2256,18 +2257,20 @@ zvol_geom_start(struct bio *bp) zvol_state_t *zv; boolean_t first; + zv = bp->bio_to->private; + ASSERT(zv != NULL); switch (bp->bio_cmd) { + case BIO_FLUSH: + if (!THREAD_CAN_SLEEP()) + goto enqueue; + zil_commit(zv->zv_zilog, ZVOL_OBJ); + g_io_deliver(bp, 0); + break; case BIO_READ: case BIO_WRITE: - case BIO_FLUSH: - zv = bp->bio_to->private; - ASSERT(zv != NULL); - mtx_lock(&zv->zv_queue_mtx); - first = (bioq_first(&zv->zv_queue) == NULL); - bioq_insert_tail(&zv->zv_queue, bp); - mtx_unlock(&zv->zv_queue_mtx); - if (first) - wakeup_one(&zv->zv_queue); + if (!THREAD_CAN_SLEEP()) + goto enqueue; + zvol_strategy(bp); break; case BIO_GETATTR: case BIO_DELETE: @@ -2275,6 +2278,15 @@ zvol_geom_start(struct bio *bp) g_io_deliver(bp, EOPNOTSUPP); break; } + return; + +enqueue: + mtx_lock(&zv->zv_queue_mtx); + first = (bioq_first(&zv->zv_queue) == NULL); + bioq_insert_tail(&zv->zv_queue, bp); + mtx_unlock(&zv->zv_queue_mtx); + if (first) + wakeup_one(&zv->zv_queue); } static void @@ -2449,6 +2461,7 @@ zvol_rename_minor(struct g_geom *gp, const char *newname) g_wither_provider(pp, ENXIO); pp = g_new_providerf(gp, "%s/%s", ZVOL_DRIVER, newname); + pp->flags |= G_PF_DIRECT_RECEIVE | G_PF_DIRECT_SEND; pp->sectorsize = DEV_BSIZE; pp->mediasize = zv->zv_volsize; pp->private = zv; diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c index f0d1aec0d917..8ae51d134c17 100644 --- a/sys/dev/md/md.c +++ b/sys/dev/md/md.c @@ -189,6 +189,7 @@ struct md_s { LIST_ENTRY(md_s) list; struct bio_queue_head bio_queue; struct mtx queue_mtx; + struct mtx stat_mtx; struct cdev *dev; enum md_types type; off_t mediasize; @@ -415,8 +416,11 @@ g_md_start(struct bio *bp) struct md_s *sc; sc = bp->bio_to->geom->softc; - if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) + if ((bp->bio_cmd == BIO_READ) || (bp->bio_cmd == BIO_WRITE)) { + mtx_lock(&sc->stat_mtx); devstat_start_transaction_bio(sc->devstat, bp); + mtx_unlock(&sc->stat_mtx); + } mtx_lock(&sc->queue_mtx); bioq_disksort(&sc->bio_queue, bp); mtx_unlock(&sc->queue_mtx); @@ -987,6 +991,7 @@ mdnew(int unit, int *errp, enum md_types type) sc->type = type; bioq_init(&sc->bio_queue); mtx_init(&sc->queue_mtx, "md bio queue", NULL, MTX_DEF); + mtx_init(&sc->stat_mtx, "md stat", NULL, MTX_DEF); sc->unit = unit; sprintf(sc->name, "md%d", unit); LIST_INSERT_HEAD(&md_softc_list, sc, list); @@ -994,6 +999,7 @@ mdnew(int unit, int *errp, enum md_types type) if (error == 0) return (sc); LIST_REMOVE(sc, list); + mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); free_unr(md_uh, sc->unit); free(sc, M_MD); @@ -1011,6 +1017,7 @@ mdinit(struct md_s *sc) gp = g_new_geomf(&g_md_class, "md%d", sc->unit); gp->softc = sc; pp = g_new_providerf(gp, "md%d", sc->unit); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = sc->mediasize; pp->sectorsize = sc->sectorsize; switch (sc->type) { @@ -1206,6 +1213,7 @@ mddestroy(struct md_s *sc, struct thread *td) while (!(sc->flags & MD_EXITING)) msleep(sc->procp, &sc->queue_mtx, PRIBIO, "mddestroy", hz / 10); mtx_unlock(&sc->queue_mtx); + mtx_destroy(&sc->stat_mtx); mtx_destroy(&sc->queue_mtx); if (sc->vnode != NULL) { vn_lock(sc->vnode, LK_EXCLUSIVE | LK_RETRY); diff --git a/sys/geom/concat/g_concat.c b/sys/geom/concat/g_concat.c index 91431662e634..2efc1b53fd61 100644 --- a/sys/geom/concat/g_concat.c +++ b/sys/geom/concat/g_concat.c @@ -238,6 +238,27 @@ g_concat_kernel_dump(struct bio *bp) disk->d_consumer->provider->name); } +static void +g_concat_done(struct bio *bp) +{ + struct g_concat_softc *sc; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + mtx_lock(&sc->sc_lock); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_lock); + g_io_deliver(pbp, pbp->bio_error); + } else + mtx_unlock(&sc->sc_lock); + g_destroy_bio(bp); +} + static void g_concat_flush(struct g_concat_softc *sc, struct bio *bp) { @@ -250,23 +271,19 @@ g_concat_flush(struct g_concat_softc *sc, struct bio *bp) for (no = 0; no < sc->sc_ndisks; no++) { cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; + cbp->bio_done = g_concat_done; cbp->bio_caller1 = sc->sc_disks[no].d_consumer; cbp->bio_to = sc->sc_disks[no].d_consumer->provider; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_CONCAT_LOGREQ(cbp, "Sending request."); cp = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -320,7 +337,10 @@ g_concat_start(struct bio *bp) offset = bp->bio_offset; length = bp->bio_length; - addr = bp->bio_data; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) + addr = NULL; + else + addr = bp->bio_data; end = offset + length; bioq_init(&queue); @@ -338,11 +358,8 @@ g_concat_start(struct bio *bp) cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -352,11 +369,21 @@ g_concat_start(struct bio *bp) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + if (len == bp->bio_length) + cbp->bio_done = g_std_done; + else + cbp->bio_done = g_concat_done; cbp->bio_offset = off; - cbp->bio_data = addr; - addr += len; cbp->bio_length = len; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; + addr += len; cbp->bio_to = disk->d_consumer->provider; cbp->bio_caller1 = disk; @@ -366,8 +393,7 @@ g_concat_start(struct bio *bp) KASSERT(length == 0, ("Length is still greater than 0 (class=%s, name=%s).", bp->bio_to->geom->class->name, bp->bio_to->geom->name)); - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_CONCAT_LOGREQ(cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -379,7 +405,7 @@ static void g_concat_check_and_run(struct g_concat_softc *sc) { struct g_concat_disk *disk; - struct g_provider *pp; + struct g_provider *dp, *pp; u_int no, sectorsize = 0; off_t start; @@ -388,20 +414,27 @@ g_concat_check_and_run(struct g_concat_softc *sc) return; pp = g_new_providerf(sc->sc_geom, "concat/%s", sc->sc_name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE | + G_PF_ACCEPT_UNMAPPED; start = 0; for (no = 0; no < sc->sc_ndisks; no++) { disk = &sc->sc_disks[no]; + dp = disk->d_consumer->provider; disk->d_start = start; - disk->d_end = disk->d_start + - disk->d_consumer->provider->mediasize; + disk->d_end = disk->d_start + dp->mediasize; if (sc->sc_type == G_CONCAT_TYPE_AUTOMATIC) - disk->d_end -= disk->d_consumer->provider->sectorsize; + disk->d_end -= dp->sectorsize; start = disk->d_end; if (no == 0) - sectorsize = disk->d_consumer->provider->sectorsize; - else { - sectorsize = lcm(sectorsize, - disk->d_consumer->provider->sectorsize); + sectorsize = dp->sectorsize; + else + sectorsize = lcm(sectorsize, dp->sectorsize); + + /* A provider underneath us doesn't support unmapped */ + if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { + G_CONCAT_DEBUG(1, "Cancelling unmapped " + "because of %s.", dp->name); + pp->flags &= ~G_PF_ACCEPT_UNMAPPED; } } pp->sectorsize = sectorsize; @@ -468,6 +501,7 @@ g_concat_add_disk(struct g_concat_softc *sc, struct g_provider *pp, u_int no) fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); @@ -557,6 +591,7 @@ g_concat_create(struct g_class *mp, const struct g_concat_metadata *md, for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no].d_consumer = NULL; sc->sc_type = type; + mtx_init(&sc->sc_lock, "gconcat lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; @@ -605,6 +640,7 @@ g_concat_destroy(struct g_concat_softc *sc, boolean_t force) KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_CONCAT); + mtx_destroy(&sc->sc_lock); free(sc, M_CONCAT); G_CONCAT_DEBUG(0, "Device %s destroyed.", gp->name); diff --git a/sys/geom/concat/g_concat.h b/sys/geom/concat/g_concat.h index 1c1e6f584d4a..c2ea3664772f 100644 --- a/sys/geom/concat/g_concat.h +++ b/sys/geom/concat/g_concat.h @@ -83,6 +83,7 @@ struct g_concat_softc { struct g_concat_disk *sc_disks; uint16_t sc_ndisks; + struct mtx sc_lock; }; #define sc_name sc_geom->name #endif /* _KERNEL */ diff --git a/sys/geom/gate/g_gate.c b/sys/geom/gate/g_gate.c index 0727ecd68f96..eed4abbc54a2 100644 --- a/sys/geom/gate/g_gate.c +++ b/sys/geom/gate/g_gate.c @@ -91,6 +91,7 @@ static struct mtx g_gate_units_lock; static int g_gate_destroy(struct g_gate_softc *sc, boolean_t force) { + struct bio_queue_head queue; struct g_provider *pp; struct g_consumer *cp; struct g_geom *gp; @@ -113,21 +114,22 @@ g_gate_destroy(struct g_gate_softc *sc, boolean_t force) pp->flags |= G_PF_WITHER; g_orphan_provider(pp, ENXIO); callout_drain(&sc->sc_callout); + bioq_init(&queue); mtx_lock(&sc->sc_queue_mtx); - while ((bp = bioq_first(&sc->sc_inqueue)) != NULL) { - bioq_remove(&sc->sc_inqueue, bp); + while ((bp = bioq_takefirst(&sc->sc_inqueue)) != NULL) { sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request canceled."); - g_io_deliver(bp, ENXIO); + bioq_insert_tail(&queue, bp); } - while ((bp = bioq_first(&sc->sc_outqueue)) != NULL) { - bioq_remove(&sc->sc_outqueue, bp); + while ((bp = bioq_takefirst(&sc->sc_outqueue)) != NULL) { sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request canceled."); - g_io_deliver(bp, ENXIO); + bioq_insert_tail(&queue, bp); } mtx_unlock(&sc->sc_queue_mtx); g_topology_unlock(); + while ((bp = bioq_takefirst(&queue)) != NULL) { + G_GATE_LOGREQ(1, bp, "Request canceled."); + g_io_deliver(bp, ENXIO); + } mtx_lock(&g_gate_units_lock); /* One reference is ours. */ sc->sc_ref--; @@ -334,6 +336,7 @@ g_gate_getunit(int unit, int *errorp) static void g_gate_guard(void *arg) { + struct bio_queue_head queue; struct g_gate_softc *sc; struct bintime curtime; struct bio *bp, *bp2; @@ -341,24 +344,27 @@ g_gate_guard(void *arg) sc = arg; binuptime(&curtime); g_gate_hold(sc->sc_unit, NULL); + bioq_init(&queue); mtx_lock(&sc->sc_queue_mtx); TAILQ_FOREACH_SAFE(bp, &sc->sc_inqueue.queue, bio_queue, bp2) { if (curtime.sec - bp->bio_t0.sec < 5) continue; bioq_remove(&sc->sc_inqueue, bp); sc->sc_queue_count--; - G_GATE_LOGREQ(1, bp, "Request timeout."); - g_io_deliver(bp, EIO); + bioq_insert_tail(&queue, bp); } TAILQ_FOREACH_SAFE(bp, &sc->sc_outqueue.queue, bio_queue, bp2) { if (curtime.sec - bp->bio_t0.sec < 5) continue; bioq_remove(&sc->sc_outqueue, bp); sc->sc_queue_count--; + bioq_insert_tail(&queue, bp); + } + mtx_unlock(&sc->sc_queue_mtx); + while ((bp = bioq_takefirst(&queue)) != NULL) { G_GATE_LOGREQ(1, bp, "Request timeout."); g_io_deliver(bp, EIO); } - mtx_unlock(&sc->sc_queue_mtx); if ((sc->sc_flags & G_GATE_FLAG_DESTROY) == 0) { callout_reset(&sc->sc_callout, sc->sc_timeout * hz, g_gate_guard, sc); @@ -542,6 +548,7 @@ g_gate_create(struct g_gate_ctl_create *ggio) if (ropp != NULL) { cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, ropp); if (error != 0) { G_GATE_DEBUG(1, "Unable to attach to %s.", ropp->name); @@ -560,6 +567,7 @@ g_gate_create(struct g_gate_ctl_create *ggio) ggio->gctl_unit = sc->sc_unit; pp = g_new_providerf(gp, "%s", name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; pp->mediasize = ggio->gctl_mediasize; pp->sectorsize = ggio->gctl_sectorsize; sc->sc_provider = pp; @@ -636,6 +644,7 @@ g_gate_modify(struct g_gate_softc *sc, struct g_gate_ctl_modify *ggio) return (EINVAL); } cp = g_new_consumer(sc->sc_provider->geom); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { G_GATE_DEBUG(1, "Unable to attach to %s.", diff --git a/sys/geom/geom.h b/sys/geom/geom.h index 660bf6e73804..1c1fdb03392b 100644 --- a/sys/geom/geom.h +++ b/sys/geom/geom.h @@ -177,6 +177,8 @@ struct g_consumer { int flags; #define G_CF_SPOILED 0x1 #define G_CF_ORPHAN 0x4 +#define G_CF_DIRECT_SEND 0x10 +#define G_CF_DIRECT_RECEIVE 0x20 struct devstat *stat; u_int nstart, nend; @@ -206,6 +208,8 @@ struct g_provider { #define G_PF_WITHER 0x2 #define G_PF_ORPHAN 0x4 #define G_PF_ACCEPT_UNMAPPED 0x8 +#define G_PF_DIRECT_SEND 0x10 +#define G_PF_DIRECT_RECEIVE 0x20 /* Two fields for the implementing class to use */ void *private; @@ -393,6 +397,8 @@ g_free(void *ptr) }; \ DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_FIRST); +int g_is_geom_thread(struct thread *td); + #endif /* _KERNEL */ /* geom_ctl.c */ diff --git a/sys/geom/geom_dev.c b/sys/geom/geom_dev.c index 0b4ffbf8017f..6ec77cca25ae 100644 --- a/sys/geom/geom_dev.c +++ b/sys/geom/geom_dev.c @@ -222,6 +222,7 @@ g_dev_taste(struct g_class *mp, struct g_provider *pp, int insist __unused) mtx_init(&sc->sc_mtx, "g_dev", NULL, MTX_DEF); cp = g_new_consumer(gp); cp->private = sc; + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); KASSERT(error == 0, ("g_dev_taste(%s) failed to g_attach, err=%d", pp->name, error)); diff --git a/sys/geom/geom_disk.c b/sys/geom/geom_disk.c index aed4901456bc..e0b9776d3e03 100644 --- a/sys/geom/geom_disk.c +++ b/sys/geom/geom_disk.c @@ -66,6 +66,7 @@ struct g_disk_softc { struct sysctl_oid *sysctl_tree; char led[64]; uint32_t state; + struct mtx start_mtx; }; static g_access_t g_disk_access; @@ -255,6 +256,25 @@ g_disk_done(struct bio *bp) g_destroy_bio(bp); } +static void +g_disk_done_single(struct bio *bp) +{ + struct bintime now; + struct g_disk_softc *sc; + + bp->bio_completed = bp->bio_length - bp->bio_resid; + bp->bio_done = (void *)bp->bio_to; + bp->bio_to = LIST_FIRST(&bp->bio_disk->d_geom->provider); + if ((bp->bio_cmd & (BIO_READ|BIO_WRITE|BIO_DELETE)) != 0) { + binuptime(&now); + sc = bp->bio_to->private; + mtx_lock(&sc->done_mtx); + devstat_end_transaction_bio_bt(sc->dp->d_devstat, bp, &now); + mtx_unlock(&sc->done_mtx); + } + g_io_deliver(bp, bp->bio_error); +} + static int g_disk_ioctl(struct g_provider *pp, u_long cmd, void * data, int fflag, struct thread *td) { @@ -280,7 +300,7 @@ g_disk_start(struct bio *bp) struct disk *dp; struct g_disk_softc *sc; int error; - off_t off; + off_t d_maxsize, off; sc = bp->bio_to->private; if (sc == NULL || (dp = sc->dp) == NULL || dp->d_destroyed) { @@ -297,6 +317,22 @@ g_disk_start(struct bio *bp) /* fall-through */ case BIO_READ: case BIO_WRITE: + d_maxsize = (bp->bio_cmd == BIO_DELETE) ? + dp->d_delmaxsize : dp->d_maxsize; + if (bp->bio_length <= d_maxsize) { + bp->bio_disk = dp; + bp->bio_to = (void *)bp->bio_done; + bp->bio_done = g_disk_done_single; + bp->bio_pblkno = bp->bio_offset / dp->d_sectorsize; + bp->bio_bcount = bp->bio_length; + mtx_lock(&sc->start_mtx); + devstat_start_transaction_bio(dp->d_devstat, bp); + mtx_unlock(&sc->start_mtx); + g_disk_lock_giant(dp); + dp->d_strategy(bp); + g_disk_unlock_giant(dp); + break; + } off = 0; bp3 = NULL; bp2 = g_clone_bio(bp); @@ -305,10 +341,6 @@ g_disk_start(struct bio *bp) break; } do { - off_t d_maxsize; - - d_maxsize = (bp->bio_cmd == BIO_DELETE) ? - dp->d_delmaxsize : dp->d_maxsize; bp2->bio_offset += off; bp2->bio_length -= off; if ((bp->bio_flags & BIO_UNMAPPED) == 0) { @@ -349,7 +381,9 @@ g_disk_start(struct bio *bp) bp2->bio_pblkno = bp2->bio_offset / dp->d_sectorsize; bp2->bio_bcount = bp2->bio_length; bp2->bio_disk = dp; + mtx_lock(&sc->start_mtx); devstat_start_transaction_bio(dp->d_devstat, bp2); + mtx_unlock(&sc->start_mtx); g_disk_lock_giant(dp); dp->d_strategy(bp2); g_disk_unlock_giant(dp); @@ -405,15 +439,11 @@ g_disk_start(struct bio *bp) error = EOPNOTSUPP; break; } - bp2 = g_clone_bio(bp); - if (bp2 == NULL) { - g_io_deliver(bp, ENOMEM); - return; - } - bp2->bio_done = g_disk_done; - bp2->bio_disk = dp; + bp->bio_disk = dp; + bp->bio_to = (void *)bp->bio_done; + bp->bio_done = g_disk_done_single; g_disk_lock_giant(dp); - dp->d_strategy(bp2); + dp->d_strategy(bp); g_disk_unlock_giant(dp); break; default: @@ -518,17 +548,24 @@ g_disk_create(void *arg, int flag) g_topology_assert(); dp = arg; sc = g_malloc(sizeof(*sc), M_WAITOK | M_ZERO); + mtx_init(&sc->start_mtx, "g_disk_start", NULL, MTX_DEF); mtx_init(&sc->done_mtx, "g_disk_done", NULL, MTX_DEF); sc->dp = dp; gp = g_new_geomf(&g_disk_class, "%s%d", dp->d_name, dp->d_unit); gp->softc = sc; pp = g_new_providerf(gp, "%s", gp->name); + devstat_remove_entry(pp->stat); + pp->stat = NULL; + dp->d_devstat->id = pp; pp->mediasize = dp->d_mediasize; pp->sectorsize = dp->d_sectorsize; pp->stripeoffset = dp->d_stripeoffset; pp->stripesize = dp->d_stripesize; if ((dp->d_flags & DISKFLAG_UNMAPPED_BIO) != 0) pp->flags |= G_PF_ACCEPT_UNMAPPED; + if ((dp->d_flags & DISKFLAG_DIRECT_COMPLETION) != 0) + pp->flags |= G_PF_DIRECT_SEND; + pp->flags |= G_PF_DIRECT_RECEIVE; if (bootverbose) printf("GEOM: new disk %s\n", gp->name); sysctl_ctx_init(&sc->sysctl_ctx); @@ -577,6 +614,7 @@ g_disk_providergone(struct g_provider *pp) pp->private = NULL; pp->geom->softc = NULL; mtx_destroy(&sc->done_mtx); + mtx_destroy(&sc->start_mtx); g_free(sc); } diff --git a/sys/geom/geom_disk.h b/sys/geom/geom_disk.h index 852047bae43a..b092146e7fba 100644 --- a/sys/geom/geom_disk.h +++ b/sys/geom/geom_disk.h @@ -107,6 +107,7 @@ struct disk { #define DISKFLAG_CANDELETE 0x4 #define DISKFLAG_CANFLUSHCACHE 0x8 #define DISKFLAG_UNMAPPED_BIO 0x10 +#define DISKFLAG_DIRECT_COMPLETION 0x20 struct disk *disk_alloc(void); void disk_create(struct disk *disk, int version); diff --git a/sys/geom/geom_int.h b/sys/geom/geom_int.h index 149a283c6561..22f42e2b2b31 100644 --- a/sys/geom/geom_int.h +++ b/sys/geom/geom_int.h @@ -39,6 +39,9 @@ LIST_HEAD(class_list_head, g_class); TAILQ_HEAD(g_tailq_head, g_geom); extern int g_collectstats; +#define G_STATS_PROVIDERS 1 /* Collect I/O stats for providers */ +#define G_STATS_CONSUMERS 2 /* Collect I/O stats for consumers */ + extern int g_debugflags; /* * 1 G_T_TOPOLOGY diff --git a/sys/geom/geom_io.c b/sys/geom/geom_io.c index d6f02ac1353c..0b8e118a6944 100644 --- a/sys/geom/geom_io.c +++ b/sys/geom/geom_io.c @@ -65,6 +65,8 @@ __FBSDID("$FreeBSD$"); #include #include +static int g_io_transient_map_bio(struct bio *bp); + static struct g_bioq g_bio_run_down; static struct g_bioq g_bio_run_up; static struct g_bioq g_bio_run_task; @@ -310,6 +312,8 @@ g_io_check(struct bio *bp) { struct g_consumer *cp; struct g_provider *pp; + off_t excess; + int error; cp = bp->bio_from; pp = bp->bio_to; @@ -354,11 +358,44 @@ g_io_check(struct bio *bp) return (EIO); if (bp->bio_offset > pp->mediasize) return (EIO); + + /* Truncate requests to the end of providers media. */ + excess = bp->bio_offset + bp->bio_length; + if (excess > bp->bio_to->mediasize) { + KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || + round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, + ("excess bio %p too short", bp)); + excess -= bp->bio_to->mediasize; + bp->bio_length -= excess; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + bp->bio_ma_n = round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE; + } + if (excess > 0) + CTR3(KTR_GEOM, "g_down truncated bio " + "%p provider %s by %d", bp, + bp->bio_to->name, excess); + } + + /* Deliver zero length transfers right here. */ + if (bp->bio_length == 0) { + CTR2(KTR_GEOM, "g_down terminated 0-length " + "bp %p provider %s", bp, bp->bio_to->name); + return (0); + } + + if ((bp->bio_flags & BIO_UNMAPPED) != 0 && + (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && + (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { + if ((error = g_io_transient_map_bio(bp)) >= 0) + return (error); + } break; default: break; } - return (0); + return (EJUSTRETURN); } /* @@ -422,7 +459,8 @@ void g_io_request(struct bio *bp, struct g_consumer *cp) { struct g_provider *pp; - int first; + struct mtx *mtxp; + int direct, error, first; KASSERT(cp != NULL, ("NULL cp in g_io_request")); KASSERT(bp != NULL, ("NULL bp in g_io_request")); @@ -472,40 +510,71 @@ g_io_request(struct bio *bp, struct g_consumer *cp) KASSERT(!(bp->bio_flags & BIO_ONQUEUE), ("Bio already on queue bp=%p", bp)); - bp->bio_flags |= BIO_ONQUEUE; - - if (g_collectstats) + if ((g_collectstats & G_STATS_CONSUMERS) != 0 || + ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) binuptime(&bp->bio_t0); else getbinuptime(&bp->bio_t0); +#ifdef GET_STACK_USAGE + direct = (cp->flags & G_CF_DIRECT_SEND) && + (pp->flags & G_PF_DIRECT_RECEIVE) && + !g_is_geom_thread(curthread) && + (((pp->flags & G_PF_ACCEPT_UNMAPPED) == 0 && + (bp->bio_flags & BIO_UNMAPPED) != 0) || THREAD_CAN_SLEEP()); + if (direct) { + /* Block direct execution if less then half of stack left. */ + size_t st, su; + GET_STACK_USAGE(st, su); + if (su * 2 > st) + direct = 0; + } +#else + direct = 0; +#endif + + if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) { + g_bioq_lock(&g_bio_run_down); + g_run_classifiers(bp); + g_bioq_unlock(&g_bio_run_down); + } + /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. - * - * We also use the lock to protect the list of classifiers. */ - g_bioq_lock(&g_bio_run_down); - - if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) - g_run_classifiers(bp); - - if (g_collectstats & 1) + mtxp = mtx_pool_find(mtxpool_sleep, pp); + mtx_lock(mtxp); + if (g_collectstats & G_STATS_PROVIDERS) devstat_start_transaction(pp->stat, &bp->bio_t0); - if (g_collectstats & 2) + if (g_collectstats & G_STATS_CONSUMERS) devstat_start_transaction(cp->stat, &bp->bio_t0); - pp->nstart++; cp->nstart++; - first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); - TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); - g_bio_run_down.bio_queue_length++; - g_bioq_unlock(&g_bio_run_down); + mtx_unlock(mtxp); - /* Pass it on down. */ - if (first) - wakeup(&g_wait_down); + if (direct) { + error = g_io_check(bp); + if (error >= 0) { + CTR3(KTR_GEOM, "g_io_request g_io_check on bp %p " + "provider %s returned %d", bp, bp->bio_to->name, + error); + g_io_deliver(bp, error); + return; + } + bp->bio_to->geom->start(bp); + } else { + g_bioq_lock(&g_bio_run_down); + first = TAILQ_EMPTY(&g_bio_run_down.bio_queue); + TAILQ_INSERT_TAIL(&g_bio_run_down.bio_queue, bp, bio_queue); + bp->bio_flags |= BIO_ONQUEUE; + g_bio_run_down.bio_queue_length++; + g_bioq_unlock(&g_bio_run_down); + /* Pass it on down. */ + if (first) + wakeup(&g_wait_down); + } } void @@ -514,7 +583,8 @@ g_io_deliver(struct bio *bp, int error) struct bintime now; struct g_consumer *cp; struct g_provider *pp; - int first; + struct mtx *mtxp; + int direct, first; KASSERT(bp != NULL, ("NULL bp in g_io_deliver")); pp = bp->bio_to; @@ -560,33 +630,55 @@ g_io_deliver(struct bio *bp, int error) bp->bio_bcount = bp->bio_length; bp->bio_resid = bp->bio_bcount - bp->bio_completed; +#ifdef GET_STACK_USAGE + direct = (pp->flags & G_PF_DIRECT_SEND) && + (cp->flags & G_CF_DIRECT_RECEIVE) && + !g_is_geom_thread(curthread); + if (direct) { + /* Block direct execution if less then half of stack left. */ + size_t st, su; + GET_STACK_USAGE(st, su); + if (su * 2 > st) + direct = 0; + } +#else + direct = 0; +#endif + /* * The statistics collection is lockless, as such, but we * can not update one instance of the statistics from more * than one thread at a time, so grab the lock first. */ - if (g_collectstats) + if ((g_collectstats & G_STATS_CONSUMERS) != 0 || + ((g_collectstats & G_STATS_PROVIDERS) != 0 && pp->stat != NULL)) binuptime(&now); - g_bioq_lock(&g_bio_run_up); - if (g_collectstats & 1) + mtxp = mtx_pool_find(mtxpool_sleep, cp); + mtx_lock(mtxp); + if (g_collectstats & G_STATS_PROVIDERS) devstat_end_transaction_bio_bt(pp->stat, bp, &now); - if (g_collectstats & 2) + if (g_collectstats & G_STATS_CONSUMERS) devstat_end_transaction_bio_bt(cp->stat, bp, &now); - cp->nend++; pp->nend++; + mtx_unlock(mtxp); + if (error != ENOMEM) { bp->bio_error = error; - first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); - TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); - bp->bio_flags |= BIO_ONQUEUE; - g_bio_run_up.bio_queue_length++; - g_bioq_unlock(&g_bio_run_up); - if (first) - wakeup(&g_wait_up); + if (direct) { + biodone(bp); + } else { + g_bioq_lock(&g_bio_run_up); + first = TAILQ_EMPTY(&g_bio_run_up.bio_queue); + TAILQ_INSERT_TAIL(&g_bio_run_up.bio_queue, bp, bio_queue); + bp->bio_flags |= BIO_ONQUEUE; + g_bio_run_up.bio_queue_length++; + g_bioq_unlock(&g_bio_run_up); + if (first) + wakeup(&g_wait_up); + } return; } - g_bioq_unlock(&g_bio_run_up); if (bootverbose) printf("ENOMEM %p on %p(%s)\n", bp, pp, pp->name); @@ -642,11 +734,10 @@ g_io_transient_map_bio(struct bio *bp) if (vmem_alloc(transient_arena, size, M_BESTFIT | M_NOWAIT, &addr)) { if (transient_map_retries != 0 && retried >= transient_map_retries) { - g_io_deliver(bp, EDEADLK/* XXXKIB */); CTR2(KTR_GEOM, "g_down cannot map bp %p provider %s", bp, bp->bio_to->name); atomic_add_int(&transient_map_hard_failures, 1); - return (1); + return (EDEADLK/* XXXKIB */); } else { /* * Naive attempt to quisce the I/O to get more @@ -666,14 +757,13 @@ g_io_transient_map_bio(struct bio *bp) bp->bio_data = (caddr_t)addr + bp->bio_ma_offset; bp->bio_flags |= BIO_TRANSIENT_MAPPING; bp->bio_flags &= ~BIO_UNMAPPED; - return (0); + return (EJUSTRETURN); } void g_io_schedule_down(struct thread *tp __unused) { struct bio *bp; - off_t excess; int error; for(;;) { @@ -692,59 +782,15 @@ g_io_schedule_down(struct thread *tp __unused) pause("g_down", hz/10); pace--; } + CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, + bp->bio_to->name); error = g_io_check(bp); - if (error) { + if (error >= 0) { CTR3(KTR_GEOM, "g_down g_io_check on bp %p provider " "%s returned %d", bp, bp->bio_to->name, error); g_io_deliver(bp, error); continue; } - CTR2(KTR_GEOM, "g_down processing bp %p provider %s", bp, - bp->bio_to->name); - switch (bp->bio_cmd) { - case BIO_READ: - case BIO_WRITE: - case BIO_DELETE: - /* Truncate requests to the end of providers media. */ - /* - * XXX: What if we truncate because of offset being - * bad, not length? - */ - excess = bp->bio_offset + bp->bio_length; - if (excess > bp->bio_to->mediasize) { - KASSERT((bp->bio_flags & BIO_UNMAPPED) == 0 || - round_page(bp->bio_ma_offset + - bp->bio_length) / PAGE_SIZE == bp->bio_ma_n, - ("excess bio %p too short", bp)); - excess -= bp->bio_to->mediasize; - bp->bio_length -= excess; - if ((bp->bio_flags & BIO_UNMAPPED) != 0) { - bp->bio_ma_n = round_page( - bp->bio_ma_offset + - bp->bio_length) / PAGE_SIZE; - } - if (excess > 0) - CTR3(KTR_GEOM, "g_down truncated bio " - "%p provider %s by %d", bp, - bp->bio_to->name, excess); - } - /* Deliver zero length transfers right here. */ - if (bp->bio_length == 0) { - g_io_deliver(bp, 0); - CTR2(KTR_GEOM, "g_down terminated 0-length " - "bp %p provider %s", bp, bp->bio_to->name); - continue; - } - break; - default: - break; - } - if ((bp->bio_flags & BIO_UNMAPPED) != 0 && - (bp->bio_to->flags & G_PF_ACCEPT_UNMAPPED) == 0 && - (bp->bio_cmd == BIO_READ || bp->bio_cmd == BIO_WRITE)) { - if (g_io_transient_map_bio(bp)) - continue; - } THREAD_NO_SLEEPING(); CTR4(KTR_GEOM, "g_down starting bp %p provider %s off %ld " "len %ld", bp, bp->bio_to->name, bp->bio_offset, diff --git a/sys/geom/geom_kern.c b/sys/geom/geom_kern.c index 2e65bfbcd9b9..79afb14abe9e 100644 --- a/sys/geom/geom_kern.c +++ b/sys/geom/geom_kern.c @@ -124,6 +124,13 @@ g_event_procbody(void *arg) /* NOTREACHED */ } +int +g_is_geom_thread(struct thread *td) +{ + + return (td == g_up_td || td == g_down_td || td == g_event_td); +} + static void geom_shutdown(void *foo __unused) { diff --git a/sys/geom/geom_slice.c b/sys/geom/geom_slice.c index 976d84091e87..f40a4de129e3 100644 --- a/sys/geom/geom_slice.c +++ b/sys/geom/geom_slice.c @@ -396,8 +396,10 @@ g_slice_config(struct g_geom *gp, u_int idx, int how, off_t offset, off_t length pp->stripeoffset = pp2->stripeoffset + offset; if (pp->stripesize > 0) pp->stripeoffset %= pp->stripesize; - if (gsp->nhotspot == 0) + if (gsp->nhotspot == 0) { pp->flags |= pp2->flags & G_PF_ACCEPT_UNMAPPED; + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + } if (0 && bootverbose) printf("GEOM: Configure %s, start %jd length %jd end %jd\n", pp->name, (intmax_t)offset, (intmax_t)length, @@ -430,16 +432,20 @@ g_slice_conf_hot(struct g_geom *gp, u_int idx, off_t offset, off_t length, int r { struct g_slicer *gsp; struct g_slice_hot *gsl, *gsl2; + struct g_consumer *cp; struct g_provider *pp; g_trace(G_T_TOPOLOGY, "g_slice_conf_hot(%s, idx: %d, off: %jd, len: %jd)", gp->name, idx, (intmax_t)offset, (intmax_t)length); g_topology_assert(); gsp = gp->softc; - /* Deny unmapped I/O if hotspots are used. */ + /* Deny unmapped I/O and direct dispatch if hotspots are used. */ if (gsp->nhotspot == 0) { LIST_FOREACH(pp, &gp->provider, provider) - pp->flags &= ~G_PF_ACCEPT_UNMAPPED; + pp->flags &= ~(G_PF_ACCEPT_UNMAPPED | + G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE); + LIST_FOREACH(cp, &gp->consumer, consumer) + cp->flags &= ~(G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE); } gsl = gsp->hotspot; if(idx >= gsp->nhotspot) { @@ -511,6 +517,7 @@ g_slice_new(struct g_class *mp, u_int slices, struct g_provider *pp, struct g_co if (gp->class->destroy_geom == NULL) gp->class->destroy_geom = g_slice_destroy_geom; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); diff --git a/sys/geom/geom_vfs.c b/sys/geom/geom_vfs.c index 92f1ad2f509b..eda4b75b0891 100644 --- a/sys/geom/geom_vfs.c +++ b/sys/geom/geom_vfs.c @@ -102,14 +102,10 @@ g_vfs_done(struct bio *bip) /* * Collect statistics on synchronous and asynchronous read * and write counts for disks that have associated filesystems. - * Since this run by the g_up thread it is single threaded and - * we do not need to use atomic increments on the counters. */ bp = bip->bio_caller2; vp = bp->b_vp; - if (vp == NULL) { - mp = NULL; - } else { + if (vp != NULL) { /* * If not a disk vnode, use its associated mount point * otherwise use the mountpoint associated with the disk. @@ -122,20 +118,20 @@ g_vfs_done(struct bio *bip) mp = vp->v_mount; else mp = cdevp->si_mountpt; - VI_UNLOCK(vp); - } - if (mp != NULL) { - if (bp->b_iocmd == BIO_WRITE) { - if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) - mp->mnt_stat.f_asyncwrites++; - else - mp->mnt_stat.f_syncwrites++; - } else { - if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) - mp->mnt_stat.f_asyncreads++; - else - mp->mnt_stat.f_syncreads++; + if (mp != NULL) { + if (bp->b_iocmd == BIO_READ) { + if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) + mp->mnt_stat.f_asyncreads++; + else + mp->mnt_stat.f_syncreads++; + } else if (bp->b_iocmd == BIO_WRITE) { + if (LK_HOLDER(bp->b_lock.lk_lock) == LK_KERNPROC) + mp->mnt_stat.f_asyncwrites++; + else + mp->mnt_stat.f_syncwrites++; + } } + VI_UNLOCK(vp); } cp = bip->bio_from; @@ -260,6 +256,7 @@ g_vfs_open(struct vnode *vp, struct g_consumer **cpp, const char *fsname, int wr vnode_create_vobject(vp, pp->mediasize, curthread); *cpp = cp; cp->private = vp; + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; bo->bo_ops = g_vfs_bufops; bo->bo_private = cp; bo->bo_bsize = pp->sectorsize; diff --git a/sys/geom/mirror/g_mirror.c b/sys/geom/mirror/g_mirror.c index 04233fec78ec..b4be912a2732 100644 --- a/sys/geom/mirror/g_mirror.c +++ b/sys/geom/mirror/g_mirror.c @@ -394,6 +394,7 @@ g_mirror_connect_disk(struct g_mirror_disk *disk, struct g_provider *pp) g_topology_lock(); cp = g_new_consumer(disk->d_softc->sc_geom); + cp->flags |= G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { g_destroy_consumer(cp); @@ -554,6 +555,7 @@ g_mirror_destroy_device(struct g_mirror_softc *sc) g_topology_unlock(); mtx_destroy(&sc->sc_queue_mtx); mtx_destroy(&sc->sc_events_mtx); + mtx_destroy(&sc->sc_done_mtx); sx_xunlock(&sc->sc_lock); sx_destroy(&sc->sc_lock); } @@ -851,6 +853,27 @@ g_mirror_unidle(struct g_mirror_softc *sc) } } +static void +g_mirror_flush_done(struct bio *bp) +{ + struct g_mirror_softc *sc; + struct bio *pbp; + + pbp = bp->bio_parent; + sc = pbp->bio_to->geom->softc; + mtx_lock(&sc->sc_done_mtx); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; + pbp->bio_inbed++; + if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_done_mtx); + g_io_deliver(pbp, pbp->bio_error); + } else + mtx_unlock(&sc->sc_done_mtx); + g_destroy_bio(bp); +} + static void g_mirror_done(struct bio *bp) { @@ -1037,23 +1060,19 @@ g_mirror_flush(struct g_mirror_softc *sc, struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; + cbp->bio_done = g_mirror_flush_done; cbp->bio_caller1 = disk; cbp->bio_to = disk->d_consumer->provider; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1538,11 +1557,8 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { + while ((cbp = bioq_takefirst(&queue)) != NULL) bioq_remove(&queue, cbp); - g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -1561,8 +1577,7 @@ g_mirror_request_split(struct g_mirror_softc *sc, struct bio *bp) offset += cbp->bio_length; data += cbp->bio_length; } - for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); disk = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1643,11 +1658,8 @@ g_mirror_register_request(struct bio *bp) continue; cbp = g_clone_bio(bp); if (cbp == NULL) { - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) g_destroy_bio(cbp); - } if (bp->bio_error == 0) bp->bio_error = ENOMEM; g_io_deliver(bp, bp->bio_error); @@ -1662,9 +1674,7 @@ g_mirror_register_request(struct bio *bp) ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, cp->acr, cp->acw, cp->ace)); } - for (cbp = bioq_first(&queue); cbp != NULL; - cbp = bioq_first(&queue)) { - bioq_remove(&queue, cbp); + while ((cbp = bioq_takefirst(&queue)) != NULL) { G_MIRROR_LOGREQ(3, cbp, "Sending request."); cp = cbp->bio_caller1; cbp->bio_caller1 = NULL; @@ -1920,6 +1930,7 @@ g_mirror_sync_start(struct g_mirror_disk *disk) sx_xunlock(&sc->sc_lock); g_topology_lock(); cp = g_new_consumer(sc->sc_sync.ds_geom); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, sc->sc_provider); KASSERT(error == 0, ("Cannot attach to %s (error=%d).", sc->sc_name, error)); @@ -2034,6 +2045,7 @@ g_mirror_launch_provider(struct g_mirror_softc *sc) g_topology_lock(); pp = g_new_providerf(sc->sc_geom, "mirror/%s", sc->sc_name); + pp->flags |= G_PF_DIRECT_RECEIVE; pp->mediasize = sc->sc_mediasize; pp->sectorsize = sc->sc_sectorsize; pp->stripesize = 0; @@ -2082,10 +2094,8 @@ g_mirror_destroy_provider(struct g_mirror_softc *sc) g_topology_lock(); g_error_provider(sc->sc_provider, ENXIO); mtx_lock(&sc->sc_queue_mtx); - while ((bp = bioq_first(&sc->sc_queue)) != NULL) { - bioq_remove(&sc->sc_queue, bp); + while ((bp = bioq_takefirst(&sc->sc_queue)) != NULL) g_io_deliver(bp, ENXIO); - } mtx_unlock(&sc->sc_queue_mtx); G_MIRROR_DEBUG(0, "Device %s: provider %s destroyed.", sc->sc_name, sc->sc_provider->name); @@ -2896,6 +2906,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) TAILQ_INIT(&sc->sc_events); mtx_init(&sc->sc_events_mtx, "gmirror:events", NULL, MTX_DEF); callout_init(&sc->sc_callout, CALLOUT_MPSAFE); + mtx_init(&sc->sc_done_mtx, "gmirror:done", NULL, MTX_DEF); sc->sc_state = G_MIRROR_DEVICE_STATE_STARTING; gp->softc = sc; sc->sc_geom = gp; @@ -2914,6 +2925,7 @@ g_mirror_create(struct g_class *mp, const struct g_mirror_metadata *md) G_MIRROR_DEBUG(1, "Cannot create kernel thread for %s.", sc->sc_name); g_destroy_geom(sc->sc_sync.ds_geom); + mtx_destroy(&sc->sc_done_mtx); mtx_destroy(&sc->sc_events_mtx); mtx_destroy(&sc->sc_queue_mtx); sx_destroy(&sc->sc_lock); diff --git a/sys/geom/mirror/g_mirror.h b/sys/geom/mirror/g_mirror.h index 44ea18adbf17..96270c8bf2b7 100644 --- a/sys/geom/mirror/g_mirror.h +++ b/sys/geom/mirror/g_mirror.h @@ -212,6 +212,8 @@ struct g_mirror_softc { struct callout sc_callout; struct root_hold_token *sc_rootmount; + + struct mtx sc_done_mtx; }; #define sc_name sc_geom->name diff --git a/sys/geom/multipath/g_multipath.c b/sys/geom/multipath/g_multipath.c index 72cd2c5bb674..6bc1d6e72c7e 100644 --- a/sys/geom/multipath/g_multipath.c +++ b/sys/geom/multipath/g_multipath.c @@ -442,6 +442,7 @@ g_multipath_create(struct g_class *mp, struct g_multipath_metadata *md) gp->dumpconf = g_multipath_dumpconf; pp = g_new_providerf(gp, "multipath/%s", md->md_name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (md->md_size != 0) { pp->mediasize = md->md_size - ((md->md_uuid[0] != 0) ? md->md_sectorsize : 0); @@ -479,6 +480,7 @@ g_multipath_add_disk(struct g_geom *gp, struct g_provider *pp) } nxtcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = MP_NEW; error = g_attach(cp, pp); diff --git a/sys/geom/nop/g_nop.c b/sys/geom/nop/g_nop.c index 0cb03746ea0b..e6b44bb1d218 100644 --- a/sys/geom/nop/g_nop.c +++ b/sys/geom/nop/g_nop.c @@ -107,6 +107,7 @@ g_nop_start(struct bio *bp) gp = bp->bio_to->geom; sc = gp->softc; G_NOP_LOGREQ(bp, "Request received."); + mtx_lock(&sc->sc_lock); switch (bp->bio_cmd) { case BIO_READ: sc->sc_reads++; @@ -119,6 +120,7 @@ g_nop_start(struct bio *bp) failprob = sc->sc_wfailprob; break; } + mtx_unlock(&sc->sc_lock); if (failprob > 0) { u_int rval; @@ -224,6 +226,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, sc->sc_writes = 0; sc->sc_readbytes = 0; sc->sc_wrotebytes = 0; + mtx_init(&sc->sc_lock, "gnop lock", NULL, MTX_DEF); gp->softc = sc; gp->start = g_nop_start; gp->orphan = g_nop_orphan; @@ -232,10 +235,12 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, gp->dumpconf = g_nop_dumpconf; newpp = g_new_providerf(gp, "%s", gp->name); + newpp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; newpp->mediasize = size; newpp->sectorsize = secsize; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error != 0) { gctl_error(req, "Cannot attach to provider %s.", pp->name); @@ -251,6 +256,7 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, g_detach(cp); g_destroy_consumer(cp); g_destroy_provider(newpp); + mtx_destroy(&sc->sc_lock); g_free(gp->softc); g_destroy_geom(gp); return (error); @@ -259,10 +265,12 @@ g_nop_create(struct gctl_req *req, struct g_class *mp, struct g_provider *pp, static int g_nop_destroy(struct g_geom *gp, boolean_t force) { + struct g_nop_softc *sc; struct g_provider *pp; g_topology_assert(); - if (gp->softc == NULL) + sc = gp->softc; + if (sc == NULL) return (ENXIO); pp = LIST_FIRST(&gp->provider); if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) { @@ -277,8 +285,9 @@ g_nop_destroy(struct g_geom *gp, boolean_t force) } else { G_NOP_DEBUG(0, "Device %s removed.", gp->name); } - g_free(gp->softc); gp->softc = NULL; + mtx_destroy(&sc->sc_lock); + g_free(sc); g_wither_geom(gp, ENXIO); return (0); diff --git a/sys/geom/nop/g_nop.h b/sys/geom/nop/g_nop.h index da555ecb1002..3e37c05d93ed 100644 --- a/sys/geom/nop/g_nop.h +++ b/sys/geom/nop/g_nop.h @@ -65,6 +65,7 @@ struct g_nop_softc { uintmax_t sc_writes; uintmax_t sc_readbytes; uintmax_t sc_wrotebytes; + struct mtx sc_lock; }; #endif /* _KERNEL */ diff --git a/sys/geom/part/g_part.c b/sys/geom/part/g_part.c index 993b9a2093e2..08c612d57808 100644 --- a/sys/geom/part/g_part.c +++ b/sys/geom/part/g_part.c @@ -418,6 +418,7 @@ g_part_new_provider(struct g_geom *gp, struct g_part_table *table, sbuf_finish(sb); entry->gpe_pp = g_new_providerf(gp, "%s", sbuf_data(sb)); sbuf_delete(sb); + entry->gpe_pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; entry->gpe_pp->private = entry; /* Close the circle. */ } entry->gpe_pp->index = entry->gpe_index - 1; /* index is 1-based. */ @@ -930,6 +931,7 @@ g_part_ctl_create(struct gctl_req *req, struct g_part_parms *gpp) LIST_INIT(&table->gpt_entry); if (null == NULL) { cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 1, 1); @@ -1886,6 +1888,7 @@ g_part_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) */ gp = g_new_geomf(mp, "%s", pp->name); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; error = g_attach(cp, pp); if (error == 0) error = g_access(cp, 1, 0, 0); diff --git a/sys/geom/raid/g_raid.c b/sys/geom/raid/g_raid.c index 9933cf1f8d7b..a161f8a1004a 100644 --- a/sys/geom/raid/g_raid.c +++ b/sys/geom/raid/g_raid.c @@ -792,6 +792,7 @@ g_raid_open_consumer(struct g_raid_softc *sc, const char *name) if (pp == NULL) return (NULL); cp = g_new_consumer(sc->sc_geom); + cp->flags |= G_CF_DIRECT_RECEIVE; if (g_attach(cp, pp) != 0) { g_destroy_consumer(cp); return (NULL); @@ -1670,6 +1671,7 @@ g_raid_launch_provider(struct g_raid_volume *vol) } pp = g_new_providerf(sc->sc_geom, "%s", name); + pp->flags |= G_PF_DIRECT_RECEIVE; if (vol->v_tr->tro_class->trc_accept_unmapped) { pp->flags |= G_PF_ACCEPT_UNMAPPED; for (i = 0; i < vol->v_disks_count; i++) { @@ -2255,6 +2257,7 @@ g_raid_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) */ gp->orphan = g_raid_taste_orphan; cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_RECEIVE; g_attach(cp, pp); geom = NULL; diff --git a/sys/geom/raid/md_ddf.c b/sys/geom/raid/md_ddf.c index 5a173017078f..4e1545b6217f 100644 --- a/sys/geom/raid/md_ddf.c +++ b/sys/geom/raid/md_ddf.c @@ -2143,6 +2143,7 @@ g_raid_md_taste_ddf(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_intel.c b/sys/geom/raid/md_intel.c index eeb42d5f9d9a..11917f578e37 100644 --- a/sys/geom/raid/md_intel.c +++ b/sys/geom/raid/md_intel.c @@ -1477,6 +1477,7 @@ g_raid_md_taste_intel(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_jmicron.c b/sys/geom/raid/md_jmicron.c index a06221595ec3..2da4a33a799e 100644 --- a/sys/geom/raid/md_jmicron.c +++ b/sys/geom/raid/md_jmicron.c @@ -923,6 +923,7 @@ g_raid_md_taste_jmicron(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_nvidia.c b/sys/geom/raid/md_nvidia.c index 92d9f710ab94..25cc2cc099db 100644 --- a/sys/geom/raid/md_nvidia.c +++ b/sys/geom/raid/md_nvidia.c @@ -919,6 +919,7 @@ g_raid_md_taste_nvidia(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_promise.c b/sys/geom/raid/md_promise.c index 0007b20f778b..b1e442702815 100644 --- a/sys/geom/raid/md_promise.c +++ b/sys/geom/raid/md_promise.c @@ -1176,6 +1176,7 @@ g_raid_md_taste_promise(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/raid/md_sii.c b/sys/geom/raid/md_sii.c index 03bb03b23a81..149b3369c6bd 100644 --- a/sys/geom/raid/md_sii.c +++ b/sys/geom/raid/md_sii.c @@ -1012,6 +1012,7 @@ g_raid_md_taste_sii(struct g_raid_md_object *md, struct g_class *mp, } rcp = g_new_consumer(geom); + rcp->flags |= G_CF_DIRECT_RECEIVE; g_attach(rcp, pp); if (g_access(rcp, 1, 1, 1) != 0) ; //goto fail1; diff --git a/sys/geom/stripe/g_stripe.c b/sys/geom/stripe/g_stripe.c index 575ec5f37745..b5d77c5303cf 100644 --- a/sys/geom/stripe/g_stripe.c +++ b/sys/geom/stripe/g_stripe.c @@ -284,22 +284,25 @@ g_stripe_done(struct bio *bp) pbp = bp->bio_parent; sc = pbp->bio_to->geom->softc; - if (pbp->bio_error == 0) - pbp->bio_error = bp->bio_error; - pbp->bio_completed += bp->bio_completed; if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) { g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset, bp->bio_length, 1); bp->bio_data = bp->bio_caller1; bp->bio_caller1 = NULL; } - g_destroy_bio(bp); + mtx_lock(&sc->sc_lock); + if (pbp->bio_error == 0) + pbp->bio_error = bp->bio_error; + pbp->bio_completed += bp->bio_completed; pbp->bio_inbed++; if (pbp->bio_children == pbp->bio_inbed) { + mtx_unlock(&sc->sc_lock); if (pbp->bio_driver1 != NULL) uma_zfree(g_stripe_zone, pbp->bio_driver1); g_io_deliver(pbp, pbp->bio_error); - } + } else + mtx_unlock(&sc->sc_lock); + g_destroy_bio(bp); } static int @@ -442,7 +445,6 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) sc = bp->bio_to->geom->softc; - addr = bp->bio_data; stripesize = sc->sc_stripesize; cbp = g_clone_bio(bp); @@ -454,10 +456,18 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + if (bp->bio_length == length) + cbp->bio_done = g_std_done; /* Optimized lockless case. */ + else + cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; - cbp->bio_data = addr; cbp->bio_length = length; + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + bp->bio_ma_n = round_page(bp->bio_ma_offset + + bp->bio_length) / PAGE_SIZE; + addr = NULL; + } else + addr = bp->bio_data; cbp->bio_caller2 = sc->sc_disks[no]; /* offset -= offset % stripesize; */ @@ -479,14 +489,21 @@ g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length) /* * Fill in the component buf structure. */ - cbp->bio_done = g_std_done; + cbp->bio_done = g_stripe_done; cbp->bio_offset = offset; - cbp->bio_data = addr; /* * MIN() is in case when * (bp->bio_length % sc->sc_stripesize) != 0. */ cbp->bio_length = MIN(stripesize, length); + if ((bp->bio_flags & BIO_UNMAPPED) != 0) { + cbp->bio_ma_offset += (uintptr_t)addr; + cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE; + cbp->bio_ma_offset %= PAGE_SIZE; + cbp->bio_ma_n = round_page(cbp->bio_ma_offset + + cbp->bio_length) / PAGE_SIZE; + } else + cbp->bio_data = addr; cbp->bio_caller2 = sc->sc_disks[no]; } @@ -536,15 +553,15 @@ g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp) return; } bioq_insert_tail(&queue, cbp); - cbp->bio_done = g_std_done; - cbp->bio_caller1 = sc->sc_disks[no]; + cbp->bio_done = g_stripe_done; + cbp->bio_caller2 = sc->sc_disks[no]; cbp->bio_to = sc->sc_disks[no]->provider; } for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) { bioq_remove(&queue, cbp); G_STRIPE_LOGREQ(cbp, "Sending request."); - cp = cbp->bio_caller1; - cbp->bio_caller1 = NULL; + cp = cbp->bio_caller2; + cbp->bio_caller2 = NULL; g_io_request(cbp, cp); } } @@ -613,9 +630,12 @@ g_stripe_start(struct bio *bp) * 3. Request size is bigger than stripesize * ndisks. If it isn't, * there will be no need to send more than one I/O request to * a provider, so there is nothing to optmize. + * and + * 4. Request is not unmapped. */ if (g_stripe_fast && bp->bio_length <= MAXPHYS && - bp->bio_length >= stripesize * sc->sc_ndisks) { + bp->bio_length >= stripesize * sc->sc_ndisks && + (bp->bio_flags & BIO_UNMAPPED) == 0) { fast = 1; } error = 0; @@ -642,6 +662,7 @@ g_stripe_start(struct bio *bp) static void g_stripe_check_and_run(struct g_stripe_softc *sc) { + struct g_provider *dp; off_t mediasize, ms; u_int no, sectorsize = 0; @@ -651,6 +672,9 @@ g_stripe_check_and_run(struct g_stripe_softc *sc) sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s", sc->sc_name); + sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; + if (g_stripe_fast == 0) + sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED; /* * Find the smallest disk. */ @@ -660,14 +684,21 @@ g_stripe_check_and_run(struct g_stripe_softc *sc) mediasize -= mediasize % sc->sc_stripesize; sectorsize = sc->sc_disks[0]->provider->sectorsize; for (no = 1; no < sc->sc_ndisks; no++) { - ms = sc->sc_disks[no]->provider->mediasize; + dp = sc->sc_disks[no]->provider; + ms = dp->mediasize; if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) - ms -= sc->sc_disks[no]->provider->sectorsize; + ms -= dp->sectorsize; ms -= ms % sc->sc_stripesize; if (ms < mediasize) mediasize = ms; - sectorsize = lcm(sectorsize, - sc->sc_disks[no]->provider->sectorsize); + sectorsize = lcm(sectorsize, dp->sectorsize); + + /* A provider underneath us doesn't support unmapped */ + if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) { + G_STRIPE_DEBUG(1, "Cancelling unmapped " + "because of %s.", dp->name); + sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED; + } } sc->sc_provider->sectorsize = sectorsize; sc->sc_provider->mediasize = mediasize * sc->sc_ndisks; @@ -729,6 +760,7 @@ g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no) fcp = LIST_FIRST(&gp->consumer); cp = g_new_consumer(gp); + cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE; cp->private = NULL; cp->index = no; error = g_attach(cp, pp); @@ -830,6 +862,7 @@ g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md, for (no = 0; no < sc->sc_ndisks; no++) sc->sc_disks[no] = NULL; sc->sc_type = type; + mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF); gp->softc = sc; sc->sc_geom = gp; @@ -878,6 +911,7 @@ g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force) KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)", gp->name)); free(sc->sc_disks, M_STRIPE); + mtx_destroy(&sc->sc_lock); free(sc, M_STRIPE); G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name); g_wither_geom(gp, ENXIO); diff --git a/sys/geom/stripe/g_stripe.h b/sys/geom/stripe/g_stripe.h index 2720c6f77958..fe4452b15fcc 100644 --- a/sys/geom/stripe/g_stripe.h +++ b/sys/geom/stripe/g_stripe.h @@ -76,6 +76,7 @@ struct g_stripe_softc { uint16_t sc_ndisks; uint32_t sc_stripesize; uint32_t sc_stripebits; + struct mtx sc_lock; }; #define sc_name sc_geom->name #endif /* _KERNEL */ diff --git a/sys/geom/zero/g_zero.c b/sys/geom/zero/g_zero.c index 311db54402b9..8cdfd904d42a 100644 --- a/sys/geom/zero/g_zero.c +++ b/sys/geom/zero/g_zero.c @@ -106,6 +106,7 @@ g_zero_init(struct g_class *mp) gp->start = g_zero_start; gp->access = g_std_access; gpp = pp = g_new_providerf(gp, "%s", gp->name); + pp->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE; if (!g_zero_clear) pp->flags |= G_PF_ACCEPT_UNMAPPED; pp->mediasize = 1152921504606846976LLU; diff --git a/sys/kern/subr_devstat.c b/sys/kern/subr_devstat.c index c753ac9e852f..6800ce3aeb18 100644 --- a/sys/kern/subr_devstat.c +++ b/sys/kern/subr_devstat.c @@ -131,6 +131,7 @@ devstat_new_entry(const void *dev_name, ds = devstat_alloc(); mtx_lock(&devstat_mutex); if (unit_number == -1) { + ds->unit_number = unit_number; ds->id = dev_name; binuptime(&ds->creation_time); devstat_generation++; @@ -242,7 +243,7 @@ devstat_remove_entry(struct devstat *ds) /* Remove this entry from the devstat queue */ atomic_add_acq_int(&ds->sequence1, 1); - if (ds->id == NULL) { + if (ds->unit_number != -1) { devstat_num_devs--; STAILQ_REMOVE(devstat_head, ds, devstat, dev_links); } diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 5443b61773b8..fce1f8aa7692 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -793,6 +793,8 @@ extern pid_t pid_max; #define THREAD_SLEEPING_OK() ((curthread)->td_no_sleeping--) +#define THREAD_CAN_SLEEP() ((curthread)->td_no_sleeping == 0) + #define PIDHASH(pid) (&pidhashtbl[(pid) & pidhash]) extern LIST_HEAD(pidhashhead, proc) *pidhashtbl; extern u_long pidhash;