2004-05-20 10:20:49 +00:00
|
|
|
/*-
|
2005-02-27 23:07:47 +00:00
|
|
|
* Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
|
2004-05-20 10:20:49 +00:00
|
|
|
* All rights reserved.
|
|
|
|
*
|
|
|
|
* Redistribution and use in source and binary forms, with or without
|
|
|
|
* modification, are permitted provided that the following conditions
|
|
|
|
* are met:
|
|
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer.
|
|
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
|
|
* documentation and/or other materials provided with the distribution.
|
2006-02-01 12:06:01 +00:00
|
|
|
*
|
2004-05-20 10:20:49 +00:00
|
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
|
|
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
|
|
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
|
|
* SUCH DAMAGE.
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/cdefs.h>
|
|
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
|
|
|
|
#include <sys/param.h>
|
|
|
|
#include <sys/systm.h>
|
|
|
|
#include <sys/kernel.h>
|
|
|
|
#include <sys/module.h>
|
|
|
|
#include <sys/lock.h>
|
|
|
|
#include <sys/mutex.h>
|
|
|
|
#include <sys/bio.h>
|
2011-07-11 05:22:31 +00:00
|
|
|
#include <sys/sbuf.h>
|
2004-05-20 10:20:49 +00:00
|
|
|
#include <sys/sysctl.h>
|
|
|
|
#include <sys/malloc.h>
|
2004-07-09 14:30:09 +00:00
|
|
|
#include <vm/uma.h>
|
2004-05-20 10:20:49 +00:00
|
|
|
#include <geom/geom.h>
|
|
|
|
#include <geom/stripe/g_stripe.h>
|
|
|
|
|
2011-02-25 10:24:35 +00:00
|
|
|
FEATURE(geom_stripe, "GEOM striping support");
|
2004-05-20 10:20:49 +00:00
|
|
|
|
2005-10-31 15:41:29 +00:00
|
|
|
static MALLOC_DEFINE(M_STRIPE, "stripe_data", "GEOM_STRIPE Data");
|
2004-05-20 10:20:49 +00:00
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
static uma_zone_t g_stripe_zone;
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
|
|
|
|
static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
|
|
|
|
struct g_geom *gp);
|
|
|
|
|
|
|
|
static g_taste_t g_stripe_taste;
|
|
|
|
static g_ctl_req_t g_stripe_config;
|
|
|
|
static g_dumpconf_t g_stripe_dumpconf;
|
2004-07-09 14:30:09 +00:00
|
|
|
static g_init_t g_stripe_init;
|
|
|
|
static g_fini_t g_stripe_fini;
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
struct g_class g_stripe_class = {
|
|
|
|
.name = G_STRIPE_CLASS_NAME,
|
2004-08-08 07:57:53 +00:00
|
|
|
.version = G_VERSION,
|
2004-05-20 10:20:49 +00:00
|
|
|
.ctlreq = g_stripe_config,
|
|
|
|
.taste = g_stripe_taste,
|
2004-07-09 14:30:09 +00:00
|
|
|
.destroy_geom = g_stripe_destroy_geom,
|
|
|
|
.init = g_stripe_init,
|
|
|
|
.fini = g_stripe_fini
|
2004-05-20 10:20:49 +00:00
|
|
|
};
|
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
SYSCTL_DECL(_kern_geom);
|
2011-11-07 15:43:11 +00:00
|
|
|
static SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0,
|
|
|
|
"GEOM_STRIPE stuff");
|
2004-07-09 14:30:09 +00:00
|
|
|
static u_int g_stripe_debug = 0;
|
2014-06-28 03:56:17 +00:00
|
|
|
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RWTUN, &g_stripe_debug, 0,
|
2004-07-09 14:30:09 +00:00
|
|
|
"Debug level");
|
2004-12-09 12:26:47 +00:00
|
|
|
static int g_stripe_fast = 0;
|
2004-07-09 14:30:09 +00:00
|
|
|
static int
|
|
|
|
g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
|
|
|
|
{
|
|
|
|
int error, fast;
|
|
|
|
|
|
|
|
fast = g_stripe_fast;
|
2007-06-04 18:25:08 +00:00
|
|
|
error = sysctl_handle_int(oidp, &fast, 0, req);
|
2004-07-09 14:30:09 +00:00
|
|
|
if (error == 0 && req->newptr != NULL)
|
|
|
|
g_stripe_fast = fast;
|
|
|
|
return (error);
|
|
|
|
}
|
2014-06-28 03:56:17 +00:00
|
|
|
SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RWTUN,
|
2004-07-13 11:23:31 +00:00
|
|
|
NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
|
2009-09-04 19:20:46 +00:00
|
|
|
static u_int g_stripe_maxmem = MAXPHYS * 100;
|
2014-06-28 03:56:17 +00:00
|
|
|
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RDTUN, &g_stripe_maxmem,
|
2004-07-13 11:23:31 +00:00
|
|
|
0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
|
2004-08-06 10:19:34 +00:00
|
|
|
static u_int g_stripe_fast_failed = 0;
|
|
|
|
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
|
|
|
|
&g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Greatest Common Divisor.
|
|
|
|
*/
|
|
|
|
static u_int
|
|
|
|
gcd(u_int a, u_int b)
|
|
|
|
{
|
|
|
|
u_int c;
|
|
|
|
|
|
|
|
while (b != 0) {
|
|
|
|
c = a;
|
|
|
|
a = b;
|
|
|
|
b = (c % b);
|
|
|
|
}
|
|
|
|
return (a);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Least Common Multiple.
|
|
|
|
*/
|
|
|
|
static u_int
|
|
|
|
lcm(u_int a, u_int b)
|
|
|
|
{
|
|
|
|
|
|
|
|
return ((a * b) / gcd(a, b));
|
|
|
|
}
|
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
static void
|
|
|
|
g_stripe_init(struct g_class *mp __unused)
|
|
|
|
{
|
|
|
|
|
2009-09-04 19:20:46 +00:00
|
|
|
g_stripe_zone = uma_zcreate("g_stripe_zone", MAXPHYS, NULL, NULL,
|
2004-07-09 14:30:09 +00:00
|
|
|
NULL, NULL, 0, 0);
|
2009-09-04 19:20:46 +00:00
|
|
|
g_stripe_maxmem -= g_stripe_maxmem % MAXPHYS;
|
|
|
|
uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAXPHYS);
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_fini(struct g_class *mp __unused)
|
|
|
|
{
|
|
|
|
|
|
|
|
uma_zdestroy(g_stripe_zone);
|
|
|
|
}
|
|
|
|
|
2004-05-20 10:20:49 +00:00
|
|
|
/*
|
|
|
|
* Return the number of valid disks.
|
|
|
|
*/
|
|
|
|
static u_int
|
|
|
|
g_stripe_nvalid(struct g_stripe_softc *sc)
|
|
|
|
{
|
|
|
|
u_int i, no;
|
|
|
|
|
|
|
|
no = 0;
|
|
|
|
for (i = 0; i < sc->sc_ndisks; i++) {
|
|
|
|
if (sc->sc_disks[i] != NULL)
|
|
|
|
no++;
|
|
|
|
}
|
|
|
|
|
|
|
|
return (no);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_remove_disk(struct g_consumer *cp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
g_topology_assert();
|
2004-05-20 10:20:49 +00:00
|
|
|
KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
|
2011-11-01 17:04:42 +00:00
|
|
|
sc = (struct g_stripe_softc *)cp->geom->softc;
|
2004-05-20 10:20:49 +00:00
|
|
|
KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
if (cp->private == NULL) {
|
|
|
|
G_STRIPE_DEBUG(0, "Disk %s removed from %s.",
|
|
|
|
cp->provider->name, sc->sc_name);
|
|
|
|
cp->private = (void *)(uintptr_t)-1;
|
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
if (sc->sc_provider != NULL) {
|
2005-07-17 13:15:02 +00:00
|
|
|
sc->sc_provider->flags |= G_PF_WITHER;
|
2011-11-01 17:04:42 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Device %s deactivated.",
|
|
|
|
sc->sc_provider->name);
|
2004-05-20 10:20:49 +00:00
|
|
|
g_orphan_provider(sc->sc_provider, ENXIO);
|
|
|
|
sc->sc_provider = NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
|
2011-11-01 17:04:42 +00:00
|
|
|
return;
|
|
|
|
sc->sc_disks[cp->index] = NULL;
|
|
|
|
cp->index = 0;
|
2004-05-20 10:20:49 +00:00
|
|
|
g_detach(cp);
|
|
|
|
g_destroy_consumer(cp);
|
2011-11-01 17:04:42 +00:00
|
|
|
/* If there are no valid disks anymore, remove device. */
|
|
|
|
if (LIST_EMPTY(&sc->sc_geom->consumer))
|
|
|
|
g_stripe_destroy(sc, 1);
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_orphan(struct g_consumer *cp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_geom *gp;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
gp = cp->geom;
|
|
|
|
sc = gp->softc;
|
|
|
|
if (sc == NULL)
|
|
|
|
return;
|
|
|
|
|
|
|
|
g_stripe_remove_disk(cp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
|
|
|
|
{
|
2011-11-01 17:04:42 +00:00
|
|
|
struct g_consumer *cp1, *cp2, *tmp;
|
2004-05-20 10:20:49 +00:00
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_geom *gp;
|
|
|
|
int error;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
g_topology_assert();
|
2004-05-20 10:20:49 +00:00
|
|
|
gp = pp->geom;
|
|
|
|
sc = gp->softc;
|
2011-11-01 17:04:42 +00:00
|
|
|
KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
/* On first open, grab an extra "exclusive" bit */
|
|
|
|
if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
|
|
|
|
de++;
|
|
|
|
/* ... and let go of it on last close */
|
|
|
|
if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
|
|
|
|
de--;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
LIST_FOREACH_SAFE(cp1, &gp->consumer, consumer, tmp) {
|
2004-05-20 10:20:49 +00:00
|
|
|
error = g_access(cp1, dr, dw, de);
|
2011-11-01 17:04:42 +00:00
|
|
|
if (error != 0)
|
|
|
|
goto fail;
|
|
|
|
if (cp1->acr == 0 && cp1->acw == 0 && cp1->ace == 0 &&
|
|
|
|
cp1->private != NULL) {
|
|
|
|
g_stripe_remove_disk(cp1); /* May destroy geom. */
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
}
|
2011-11-01 17:04:42 +00:00
|
|
|
return (0);
|
2004-05-20 10:20:49 +00:00
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
fail:
|
|
|
|
LIST_FOREACH(cp2, &gp->consumer, consumer) {
|
|
|
|
if (cp1 == cp2)
|
|
|
|
break;
|
|
|
|
g_access(cp2, -dr, -dw, -de);
|
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2004-07-09 14:30:09 +00:00
|
|
|
g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
|
|
|
|
off_t length, int mode)
|
|
|
|
{
|
|
|
|
u_int stripesize;
|
|
|
|
size_t len;
|
|
|
|
|
|
|
|
stripesize = sc->sc_stripesize;
|
|
|
|
len = (size_t)(stripesize - (offset & (stripesize - 1)));
|
|
|
|
do {
|
|
|
|
bcopy(src, dst, len);
|
|
|
|
if (mode) {
|
|
|
|
dst += len + stripesize * (sc->sc_ndisks - 1);
|
|
|
|
src += len;
|
|
|
|
} else {
|
|
|
|
dst += len;
|
|
|
|
src += len + stripesize * (sc->sc_ndisks - 1);
|
|
|
|
}
|
|
|
|
length -= len;
|
|
|
|
KASSERT(length >= 0,
|
|
|
|
("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
|
|
|
|
(size_t)stripesize, (intmax_t)offset, (intmax_t)length));
|
|
|
|
if (length > stripesize)
|
|
|
|
len = stripesize;
|
|
|
|
else
|
|
|
|
len = length;
|
|
|
|
} while (length > 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_done(struct bio *bp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct bio *pbp;
|
|
|
|
|
|
|
|
pbp = bp->bio_parent;
|
|
|
|
sc = pbp->bio_to->geom->softc;
|
2004-08-06 10:07:03 +00:00
|
|
|
if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
|
|
|
|
g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
|
2004-07-09 14:30:09 +00:00
|
|
|
bp->bio_length, 1);
|
2004-08-06 10:07:03 +00:00
|
|
|
bp->bio_data = bp->bio_caller1;
|
|
|
|
bp->bio_caller1 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_lock(&sc->sc_lock);
|
|
|
|
if (pbp->bio_error == 0)
|
|
|
|
pbp->bio_error = bp->bio_error;
|
|
|
|
pbp->bio_completed += bp->bio_completed;
|
2004-07-09 14:30:09 +00:00
|
|
|
pbp->bio_inbed++;
|
|
|
|
if (pbp->bio_children == pbp->bio_inbed) {
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_unlock(&sc->sc_lock);
|
2004-08-06 10:07:03 +00:00
|
|
|
if (pbp->bio_driver1 != NULL)
|
|
|
|
uma_zfree(g_stripe_zone, pbp->bio_driver1);
|
2004-07-09 14:30:09 +00:00
|
|
|
g_io_deliver(pbp, pbp->bio_error);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
} else
|
|
|
|
mtx_unlock(&sc->sc_lock);
|
|
|
|
g_destroy_bio(bp);
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
|
2004-05-20 10:20:49 +00:00
|
|
|
{
|
2004-07-09 14:30:09 +00:00
|
|
|
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
|
|
|
|
u_int nparts = 0, stripesize;
|
2004-05-20 10:20:49 +00:00
|
|
|
struct g_stripe_softc *sc;
|
2004-07-09 14:30:09 +00:00
|
|
|
char *addr, *data = NULL;
|
2004-05-20 10:20:49 +00:00
|
|
|
struct bio *cbp;
|
2004-07-09 14:30:09 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
sc = bp->bio_to->geom->softc;
|
|
|
|
|
|
|
|
addr = bp->bio_data;
|
|
|
|
stripesize = sc->sc_stripesize;
|
|
|
|
|
|
|
|
cbp = g_clone_bio(bp);
|
|
|
|
if (cbp == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
|
|
|
|
nparts++;
|
|
|
|
/*
|
|
|
|
* Fill in the component buf structure.
|
|
|
|
*/
|
|
|
|
cbp->bio_done = g_stripe_done;
|
|
|
|
cbp->bio_offset = offset;
|
|
|
|
cbp->bio_data = addr;
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller1 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_length = length;
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller2 = sc->sc_disks[no];
|
2004-07-09 14:30:09 +00:00
|
|
|
|
|
|
|
/* offset -= offset % stripesize; */
|
|
|
|
offset -= offset & (stripesize - 1);
|
|
|
|
addr += length;
|
|
|
|
length = bp->bio_length - length;
|
|
|
|
for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
|
|
|
|
if (no > sc->sc_ndisks - 1) {
|
|
|
|
no = 0;
|
|
|
|
offset += stripesize;
|
|
|
|
}
|
|
|
|
if (nparts >= sc->sc_ndisks) {
|
|
|
|
cbp = TAILQ_NEXT(cbp, bio_queue);
|
|
|
|
if (cbp == NULL)
|
|
|
|
cbp = TAILQ_FIRST(&queue);
|
|
|
|
nparts++;
|
|
|
|
/*
|
|
|
|
* Update bio structure.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
* MIN() is in case when
|
|
|
|
* (bp->bio_length % sc->sc_stripesize) != 0.
|
|
|
|
*/
|
|
|
|
cbp->bio_length += MIN(stripesize, length);
|
2004-08-06 10:07:03 +00:00
|
|
|
if (cbp->bio_caller1 == NULL) {
|
|
|
|
cbp->bio_caller1 = cbp->bio_data;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_data = NULL;
|
|
|
|
if (data == NULL) {
|
|
|
|
data = uma_zalloc(g_stripe_zone,
|
|
|
|
M_NOWAIT);
|
|
|
|
if (data == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
cbp = g_clone_bio(bp);
|
|
|
|
if (cbp == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
|
|
|
|
nparts++;
|
|
|
|
/*
|
|
|
|
* Fill in the component buf structure.
|
|
|
|
*/
|
|
|
|
cbp->bio_done = g_stripe_done;
|
|
|
|
cbp->bio_offset = offset;
|
|
|
|
cbp->bio_data = addr;
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller1 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
/*
|
|
|
|
* MIN() is in case when
|
|
|
|
* (bp->bio_length % sc->sc_stripesize) != 0.
|
|
|
|
*/
|
|
|
|
cbp->bio_length = MIN(stripesize, length);
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller2 = sc->sc_disks[no];
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (data != NULL)
|
2004-08-10 19:07:55 +00:00
|
|
|
bp->bio_driver1 = data;
|
2004-07-09 14:30:09 +00:00
|
|
|
/*
|
|
|
|
* Fire off all allocated requests!
|
|
|
|
*/
|
|
|
|
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
|
|
|
|
struct g_consumer *cp;
|
|
|
|
|
|
|
|
TAILQ_REMOVE(&queue, cbp, bio_queue);
|
2004-08-06 10:07:03 +00:00
|
|
|
cp = cbp->bio_caller2;
|
|
|
|
cbp->bio_caller2 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_to = cp->provider;
|
2004-08-06 10:07:03 +00:00
|
|
|
if (cbp->bio_caller1 != NULL) {
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_data = data;
|
|
|
|
if (bp->bio_cmd == BIO_WRITE) {
|
2004-08-06 10:07:03 +00:00
|
|
|
g_stripe_copy(sc, cbp->bio_caller1, data,
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_offset, cbp->bio_length, 0);
|
|
|
|
}
|
|
|
|
data += cbp->bio_length;
|
|
|
|
}
|
|
|
|
G_STRIPE_LOGREQ(cbp, "Sending request.");
|
|
|
|
g_io_request(cbp, cp);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
failure:
|
|
|
|
if (data != NULL)
|
|
|
|
uma_zfree(g_stripe_zone, data);
|
|
|
|
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
|
|
|
|
TAILQ_REMOVE(&queue, cbp, bio_queue);
|
2004-08-06 10:07:03 +00:00
|
|
|
if (cbp->bio_caller1 != NULL) {
|
|
|
|
cbp->bio_data = cbp->bio_caller1;
|
|
|
|
cbp->bio_caller1 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
2004-08-06 09:55:40 +00:00
|
|
|
bp->bio_children--;
|
2004-07-09 14:30:09 +00:00
|
|
|
g_destroy_bio(cbp);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
|
|
|
|
{
|
|
|
|
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
|
|
|
|
struct g_stripe_softc *sc;
|
2004-05-20 10:20:49 +00:00
|
|
|
uint32_t stripesize;
|
2004-07-09 14:30:09 +00:00
|
|
|
struct bio *cbp;
|
2004-05-20 10:20:49 +00:00
|
|
|
char *addr;
|
2004-07-09 14:30:09 +00:00
|
|
|
int error;
|
|
|
|
|
|
|
|
sc = bp->bio_to->geom->softc;
|
2004-05-20 10:20:49 +00:00
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
stripesize = sc->sc_stripesize;
|
|
|
|
|
|
|
|
cbp = g_clone_bio(bp);
|
|
|
|
if (cbp == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
|
|
|
|
/*
|
|
|
|
* Fill in the component buf structure.
|
|
|
|
*/
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
if (bp->bio_length == length)
|
|
|
|
cbp->bio_done = g_std_done; /* Optimized lockless case. */
|
|
|
|
else
|
|
|
|
cbp->bio_done = g_stripe_done;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_offset = offset;
|
|
|
|
cbp->bio_length = length;
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
|
|
|
bp->bio_ma_n = round_page(bp->bio_ma_offset +
|
|
|
|
bp->bio_length) / PAGE_SIZE;
|
|
|
|
addr = NULL;
|
|
|
|
} else
|
|
|
|
addr = bp->bio_data;
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller2 = sc->sc_disks[no];
|
2004-07-09 14:30:09 +00:00
|
|
|
|
|
|
|
/* offset -= offset % stripesize; */
|
|
|
|
offset -= offset & (stripesize - 1);
|
2014-04-10 10:12:56 +00:00
|
|
|
if (bp->bio_cmd != BIO_DELETE)
|
|
|
|
addr += length;
|
2004-07-09 14:30:09 +00:00
|
|
|
length = bp->bio_length - length;
|
2014-04-10 10:12:56 +00:00
|
|
|
for (no++; length > 0; no++, length -= stripesize) {
|
2004-07-09 14:30:09 +00:00
|
|
|
if (no > sc->sc_ndisks - 1) {
|
|
|
|
no = 0;
|
|
|
|
offset += stripesize;
|
|
|
|
}
|
|
|
|
cbp = g_clone_bio(bp);
|
|
|
|
if (cbp == NULL) {
|
|
|
|
error = ENOMEM;
|
|
|
|
goto failure;
|
|
|
|
}
|
|
|
|
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Fill in the component buf structure.
|
|
|
|
*/
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
cbp->bio_done = g_stripe_done;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_offset = offset;
|
|
|
|
/*
|
|
|
|
* MIN() is in case when
|
|
|
|
* (bp->bio_length % sc->sc_stripesize) != 0.
|
|
|
|
*/
|
|
|
|
cbp->bio_length = MIN(stripesize, length);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
if ((bp->bio_flags & BIO_UNMAPPED) != 0) {
|
|
|
|
cbp->bio_ma_offset += (uintptr_t)addr;
|
|
|
|
cbp->bio_ma += cbp->bio_ma_offset / PAGE_SIZE;
|
|
|
|
cbp->bio_ma_offset %= PAGE_SIZE;
|
|
|
|
cbp->bio_ma_n = round_page(cbp->bio_ma_offset +
|
|
|
|
cbp->bio_length) / PAGE_SIZE;
|
|
|
|
} else
|
|
|
|
cbp->bio_data = addr;
|
2004-07-09 14:30:09 +00:00
|
|
|
|
2004-08-06 10:07:03 +00:00
|
|
|
cbp->bio_caller2 = sc->sc_disks[no];
|
2014-04-10 10:12:56 +00:00
|
|
|
|
|
|
|
if (bp->bio_cmd != BIO_DELETE)
|
|
|
|
addr += stripesize;
|
2004-07-09 14:30:09 +00:00
|
|
|
}
|
|
|
|
/*
|
|
|
|
* Fire off all allocated requests!
|
|
|
|
*/
|
|
|
|
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
|
|
|
|
struct g_consumer *cp;
|
|
|
|
|
|
|
|
TAILQ_REMOVE(&queue, cbp, bio_queue);
|
2004-08-06 10:07:03 +00:00
|
|
|
cp = cbp->bio_caller2;
|
|
|
|
cbp->bio_caller2 = NULL;
|
2004-07-09 14:30:09 +00:00
|
|
|
cbp->bio_to = cp->provider;
|
|
|
|
G_STRIPE_LOGREQ(cbp, "Sending request.");
|
|
|
|
g_io_request(cbp, cp);
|
|
|
|
}
|
|
|
|
return (0);
|
|
|
|
failure:
|
|
|
|
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
|
|
|
|
TAILQ_REMOVE(&queue, cbp, bio_queue);
|
2004-08-06 09:55:40 +00:00
|
|
|
bp->bio_children--;
|
2004-07-09 14:30:09 +00:00
|
|
|
g_destroy_bio(cbp);
|
|
|
|
}
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
2006-10-31 21:23:51 +00:00
|
|
|
static void
|
|
|
|
g_stripe_flush(struct g_stripe_softc *sc, struct bio *bp)
|
|
|
|
{
|
|
|
|
struct bio_queue_head queue;
|
|
|
|
struct g_consumer *cp;
|
|
|
|
struct bio *cbp;
|
|
|
|
u_int no;
|
|
|
|
|
|
|
|
bioq_init(&queue);
|
|
|
|
for (no = 0; no < sc->sc_ndisks; no++) {
|
|
|
|
cbp = g_clone_bio(bp);
|
|
|
|
if (cbp == NULL) {
|
|
|
|
for (cbp = bioq_first(&queue); cbp != NULL;
|
|
|
|
cbp = bioq_first(&queue)) {
|
|
|
|
bioq_remove(&queue, cbp);
|
|
|
|
g_destroy_bio(cbp);
|
|
|
|
}
|
|
|
|
if (bp->bio_error == 0)
|
|
|
|
bp->bio_error = ENOMEM;
|
|
|
|
g_io_deliver(bp, bp->bio_error);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
bioq_insert_tail(&queue, cbp);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
cbp->bio_done = g_stripe_done;
|
|
|
|
cbp->bio_caller2 = sc->sc_disks[no];
|
2006-10-31 21:23:51 +00:00
|
|
|
cbp->bio_to = sc->sc_disks[no]->provider;
|
|
|
|
}
|
|
|
|
for (cbp = bioq_first(&queue); cbp != NULL; cbp = bioq_first(&queue)) {
|
|
|
|
bioq_remove(&queue, cbp);
|
|
|
|
G_STRIPE_LOGREQ(cbp, "Sending request.");
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
cp = cbp->bio_caller2;
|
|
|
|
cbp->bio_caller2 = NULL;
|
2006-10-31 21:23:51 +00:00
|
|
|
g_io_request(cbp, cp);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
static void
|
|
|
|
g_stripe_start(struct bio *bp)
|
|
|
|
{
|
|
|
|
off_t offset, start, length, nstripe;
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
u_int no, stripesize;
|
|
|
|
int error, fast = 0;
|
|
|
|
|
|
|
|
sc = bp->bio_to->geom->softc;
|
2004-05-20 10:20:49 +00:00
|
|
|
/*
|
|
|
|
* If sc == NULL, provider's error should be set and g_stripe_start()
|
|
|
|
* should not be called at all.
|
|
|
|
*/
|
|
|
|
KASSERT(sc != NULL,
|
|
|
|
("Provider's error should be set (error=%d)(device=%s).",
|
|
|
|
bp->bio_to->error, bp->bio_to->name));
|
|
|
|
|
|
|
|
G_STRIPE_LOGREQ(bp, "Request received.");
|
|
|
|
|
|
|
|
switch (bp->bio_cmd) {
|
|
|
|
case BIO_READ:
|
|
|
|
case BIO_WRITE:
|
|
|
|
case BIO_DELETE:
|
|
|
|
break;
|
2006-11-01 22:16:53 +00:00
|
|
|
case BIO_FLUSH:
|
|
|
|
g_stripe_flush(sc, bp);
|
|
|
|
return;
|
2004-05-20 10:20:49 +00:00
|
|
|
case BIO_GETATTR:
|
|
|
|
/* To which provider it should be delivered? */
|
|
|
|
default:
|
|
|
|
g_io_deliver(bp, EOPNOTSUPP);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
stripesize = sc->sc_stripesize;
|
|
|
|
|
|
|
|
/*
|
2004-07-09 14:30:09 +00:00
|
|
|
* Calculations are quite messy, but fast I hope.
|
2004-05-20 10:20:49 +00:00
|
|
|
*/
|
|
|
|
|
|
|
|
/* Stripe number. */
|
|
|
|
/* nstripe = bp->bio_offset / stripesize; */
|
|
|
|
nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
|
|
|
|
/* Disk number. */
|
|
|
|
no = nstripe % sc->sc_ndisks;
|
|
|
|
/* Start position in stripe. */
|
|
|
|
/* start = bp->bio_offset % stripesize; */
|
|
|
|
start = bp->bio_offset & (stripesize - 1);
|
|
|
|
/* Start position in disk. */
|
2004-07-09 14:30:09 +00:00
|
|
|
/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
|
|
|
|
offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
|
2004-05-20 10:20:49 +00:00
|
|
|
/* Length of data to operate. */
|
|
|
|
length = MIN(bp->bio_length, stripesize - start);
|
|
|
|
|
2004-07-09 14:30:09 +00:00
|
|
|
/*
|
|
|
|
* Do use "fast" mode when:
|
|
|
|
* 1. "Fast" mode is ON.
|
|
|
|
* and
|
2009-09-04 19:20:46 +00:00
|
|
|
* 2. Request size is less than or equal to MAXPHYS,
|
2004-07-09 14:30:09 +00:00
|
|
|
* which should always be true.
|
|
|
|
* and
|
|
|
|
* 3. Request size is bigger than stripesize * ndisks. If it isn't,
|
|
|
|
* there will be no need to send more than one I/O request to
|
|
|
|
* a provider, so there is nothing to optmize.
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
* and
|
|
|
|
* 4. Request is not unmapped.
|
2014-04-10 10:12:56 +00:00
|
|
|
* and
|
|
|
|
* 5. It is not a BIO_DELETE.
|
2004-07-09 14:30:09 +00:00
|
|
|
*/
|
2009-09-04 19:20:46 +00:00
|
|
|
if (g_stripe_fast && bp->bio_length <= MAXPHYS &&
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
bp->bio_length >= stripesize * sc->sc_ndisks &&
|
2014-04-10 10:12:56 +00:00
|
|
|
(bp->bio_flags & BIO_UNMAPPED) == 0 &&
|
|
|
|
bp->bio_cmd != BIO_DELETE) {
|
2004-07-09 14:30:09 +00:00
|
|
|
fast = 1;
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
2004-07-09 14:30:09 +00:00
|
|
|
error = 0;
|
2004-08-06 10:19:34 +00:00
|
|
|
if (fast) {
|
2004-07-09 14:30:09 +00:00
|
|
|
error = g_stripe_start_fast(bp, no, offset, length);
|
2004-08-06 10:19:34 +00:00
|
|
|
if (error != 0)
|
|
|
|
g_stripe_fast_failed++;
|
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
/*
|
2004-07-09 14:30:09 +00:00
|
|
|
* Do use "economic" when:
|
|
|
|
* 1. "Economic" mode is ON.
|
|
|
|
* or
|
2010-02-18 22:28:12 +00:00
|
|
|
* 2. "Fast" mode failed. It can only fail if there is no memory.
|
2004-05-20 10:20:49 +00:00
|
|
|
*/
|
2004-07-09 14:30:09 +00:00
|
|
|
if (!fast || error != 0)
|
|
|
|
error = g_stripe_start_economic(bp, no, offset, length);
|
|
|
|
if (error != 0) {
|
|
|
|
if (bp->bio_error == 0)
|
|
|
|
bp->bio_error = error;
|
|
|
|
g_io_deliver(bp, bp->bio_error);
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_check_and_run(struct g_stripe_softc *sc)
|
|
|
|
{
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
struct g_provider *dp;
|
2004-05-20 10:20:49 +00:00
|
|
|
off_t mediasize, ms;
|
|
|
|
u_int no, sectorsize = 0;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
g_topology_assert();
|
2004-05-20 10:20:49 +00:00
|
|
|
if (g_stripe_nvalid(sc) != sc->sc_ndisks)
|
|
|
|
return;
|
|
|
|
|
2004-07-26 16:10:27 +00:00
|
|
|
sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
|
|
|
|
sc->sc_name);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
sc->sc_provider->flags |= G_PF_DIRECT_SEND | G_PF_DIRECT_RECEIVE;
|
|
|
|
if (g_stripe_fast == 0)
|
|
|
|
sc->sc_provider->flags |= G_PF_ACCEPT_UNMAPPED;
|
2004-05-20 10:20:49 +00:00
|
|
|
/*
|
|
|
|
* Find the smallest disk.
|
|
|
|
*/
|
|
|
|
mediasize = sc->sc_disks[0]->provider->mediasize;
|
|
|
|
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
|
|
|
|
mediasize -= sc->sc_disks[0]->provider->sectorsize;
|
|
|
|
mediasize -= mediasize % sc->sc_stripesize;
|
|
|
|
sectorsize = sc->sc_disks[0]->provider->sectorsize;
|
|
|
|
for (no = 1; no < sc->sc_ndisks; no++) {
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
dp = sc->sc_disks[no]->provider;
|
|
|
|
ms = dp->mediasize;
|
2004-05-20 10:20:49 +00:00
|
|
|
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
ms -= dp->sectorsize;
|
2004-05-20 10:20:49 +00:00
|
|
|
ms -= ms % sc->sc_stripesize;
|
|
|
|
if (ms < mediasize)
|
|
|
|
mediasize = ms;
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
sectorsize = lcm(sectorsize, dp->sectorsize);
|
|
|
|
|
|
|
|
/* A provider underneath us doesn't support unmapped */
|
|
|
|
if ((dp->flags & G_PF_ACCEPT_UNMAPPED) == 0) {
|
|
|
|
G_STRIPE_DEBUG(1, "Cancelling unmapped "
|
|
|
|
"because of %s.", dp->name);
|
|
|
|
sc->sc_provider->flags &= ~G_PF_ACCEPT_UNMAPPED;
|
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
sc->sc_provider->sectorsize = sectorsize;
|
|
|
|
sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
|
2009-12-24 10:43:44 +00:00
|
|
|
sc->sc_provider->stripesize = sc->sc_stripesize;
|
|
|
|
sc->sc_provider->stripeoffset = 0;
|
2004-05-20 10:20:49 +00:00
|
|
|
g_error_provider(sc->sc_provider, 0);
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_provider->name);
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
|
|
|
|
{
|
|
|
|
struct g_provider *pp;
|
|
|
|
u_char *buf;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
|
|
|
|
error = g_access(cp, 1, 0, 0);
|
|
|
|
if (error != 0)
|
|
|
|
return (error);
|
|
|
|
pp = cp->provider;
|
|
|
|
g_topology_unlock();
|
|
|
|
buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
|
|
|
|
&error);
|
|
|
|
g_topology_lock();
|
|
|
|
g_access(cp, -1, 0, 0);
|
|
|
|
if (buf == NULL)
|
|
|
|
return (error);
|
|
|
|
|
|
|
|
/* Decode metadata. */
|
|
|
|
stripe_metadata_decode(buf, md);
|
|
|
|
g_free(buf);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* Add disk to given device.
|
|
|
|
*/
|
|
|
|
static int
|
|
|
|
g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
|
|
|
|
{
|
|
|
|
struct g_consumer *cp, *fcp;
|
|
|
|
struct g_geom *gp;
|
|
|
|
int error;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
g_topology_assert();
|
2004-05-20 10:20:49 +00:00
|
|
|
/* Metadata corrupted? */
|
|
|
|
if (no >= sc->sc_ndisks)
|
|
|
|
return (EINVAL);
|
|
|
|
|
|
|
|
/* Check if disk is not already attached. */
|
|
|
|
if (sc->sc_disks[no] != NULL)
|
|
|
|
return (EEXIST);
|
|
|
|
|
|
|
|
gp = sc->sc_geom;
|
|
|
|
fcp = LIST_FIRST(&gp->consumer);
|
|
|
|
|
|
|
|
cp = g_new_consumer(gp);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
cp->flags |= G_CF_DIRECT_SEND | G_CF_DIRECT_RECEIVE;
|
2011-11-01 17:04:42 +00:00
|
|
|
cp->private = NULL;
|
|
|
|
cp->index = no;
|
2004-05-20 10:20:49 +00:00
|
|
|
error = g_attach(cp, pp);
|
|
|
|
if (error != 0) {
|
|
|
|
g_destroy_consumer(cp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
|
|
|
|
error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
|
|
|
|
if (error != 0) {
|
|
|
|
g_detach(cp);
|
|
|
|
g_destroy_consumer(cp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
|
|
|
|
struct g_stripe_metadata md;
|
|
|
|
|
|
|
|
/* Reread metadata. */
|
|
|
|
error = g_stripe_read_metadata(cp, &md);
|
|
|
|
if (error != 0)
|
|
|
|
goto fail;
|
|
|
|
|
|
|
|
if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
|
|
|
|
strcmp(md.md_name, sc->sc_name) != 0 ||
|
|
|
|
md.md_id != sc->sc_id) {
|
|
|
|
G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
|
|
|
|
goto fail;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
sc->sc_disks[no] = cp;
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
g_stripe_check_and_run(sc);
|
|
|
|
|
|
|
|
return (0);
|
|
|
|
fail:
|
|
|
|
if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
|
|
|
|
g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
|
|
|
|
g_detach(cp);
|
|
|
|
g_destroy_consumer(cp);
|
|
|
|
return (error);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct g_geom *
|
|
|
|
g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
|
|
|
|
u_int type)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_geom *gp;
|
|
|
|
u_int no;
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
g_topology_assert();
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
|
2004-05-20 10:20:49 +00:00
|
|
|
md->md_id);
|
|
|
|
|
|
|
|
/* Two disks is minimum. */
|
2004-07-26 16:10:27 +00:00
|
|
|
if (md->md_all < 2) {
|
|
|
|
G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
#if 0
|
|
|
|
/* Stripe size have to be grater than or equal to sector size. */
|
|
|
|
if (md->md_stripesize < sectorsize) {
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
#endif
|
|
|
|
/* Stripe size have to be power of 2. */
|
|
|
|
if (!powerof2(md->md_stripesize)) {
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Check for duplicate unit */
|
|
|
|
LIST_FOREACH(gp, &mp->geom, geom) {
|
|
|
|
sc = gp->softc;
|
|
|
|
if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
|
|
|
|
G_STRIPE_DEBUG(0, "Device %s already configured.",
|
2004-07-26 16:10:27 +00:00
|
|
|
sc->sc_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
2004-07-26 16:10:27 +00:00
|
|
|
gp = g_new_geomf(mp, "%s", md->md_name);
|
2004-07-26 15:41:28 +00:00
|
|
|
sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
|
2004-05-20 10:20:49 +00:00
|
|
|
gp->start = g_stripe_start;
|
|
|
|
gp->spoiled = g_stripe_orphan;
|
|
|
|
gp->orphan = g_stripe_orphan;
|
|
|
|
gp->access = g_stripe_access;
|
|
|
|
gp->dumpconf = g_stripe_dumpconf;
|
|
|
|
|
|
|
|
sc->sc_id = md->md_id;
|
|
|
|
sc->sc_stripesize = md->md_stripesize;
|
2005-08-19 22:10:19 +00:00
|
|
|
sc->sc_stripebits = bitcount32(sc->sc_stripesize - 1);
|
2004-05-20 10:20:49 +00:00
|
|
|
sc->sc_ndisks = md->md_all;
|
|
|
|
sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
|
|
|
|
M_STRIPE, M_WAITOK | M_ZERO);
|
|
|
|
for (no = 0; no < sc->sc_ndisks; no++)
|
|
|
|
sc->sc_disks[no] = NULL;
|
|
|
|
sc->sc_type = type;
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_init(&sc->sc_lock, "gstripe lock", NULL, MTX_DEF);
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
gp->softc = sc;
|
|
|
|
sc->sc_geom = gp;
|
|
|
|
sc->sc_provider = NULL;
|
|
|
|
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
return (gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
|
|
|
|
{
|
|
|
|
struct g_provider *pp;
|
2011-11-01 17:04:42 +00:00
|
|
|
struct g_consumer *cp, *cp1;
|
2004-05-20 10:20:49 +00:00
|
|
|
struct g_geom *gp;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
|
|
|
|
if (sc == NULL)
|
|
|
|
return (ENXIO);
|
|
|
|
|
|
|
|
pp = sc->sc_provider;
|
|
|
|
if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
|
|
|
|
if (force) {
|
|
|
|
G_STRIPE_DEBUG(0, "Device %s is still open, so it "
|
|
|
|
"can't be definitely removed.", pp->name);
|
|
|
|
} else {
|
|
|
|
G_STRIPE_DEBUG(1,
|
|
|
|
"Device %s is still open (r%dw%de%d).", pp->name,
|
|
|
|
pp->acr, pp->acw, pp->ace);
|
|
|
|
return (EBUSY);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-11-01 17:04:42 +00:00
|
|
|
gp = sc->sc_geom;
|
|
|
|
LIST_FOREACH_SAFE(cp, &gp->consumer, consumer, cp1) {
|
|
|
|
g_stripe_remove_disk(cp);
|
|
|
|
if (cp1 == NULL)
|
|
|
|
return (0); /* Recursion happened. */
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
2011-11-01 17:04:42 +00:00
|
|
|
if (!LIST_EMPTY(&gp->consumer))
|
|
|
|
return (EINPROGRESS);
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
gp->softc = NULL;
|
|
|
|
KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
|
|
|
|
gp->name));
|
|
|
|
free(sc->sc_disks, M_STRIPE);
|
Merge GEOM direct dispatch changes from the projects/camlock branch.
When safety requirements are met, it allows to avoid passing I/O requests
to GEOM g_up/g_down thread, executing them directly in the caller context.
That allows to avoid CPU bottlenecks in g_up/g_down threads, plus avoid
several context switches per I/O.
The defined now safety requirements are:
- caller should not hold any locks and should be reenterable;
- callee should not depend on GEOM dual-threaded concurency semantics;
- on the way down, if request is unmapped while callee doesn't support it,
the context should be sleepable;
- kernel thread stack usage should be below 50%.
To keep compatibility with GEOM classes not meeting above requirements
new provider and consumer flags added:
- G_CF_DIRECT_SEND -- consumer code meets caller requirements (request);
- G_CF_DIRECT_RECEIVE -- consumer code meets callee requirements (done);
- G_PF_DIRECT_SEND -- provider code meets caller requirements (done);
- G_PF_DIRECT_RECEIVE -- provider code meets callee requirements (request).
Capable GEOM class can set them, allowing direct dispatch in cases where
it is safe. If any of requirements are not met, request is queued to
g_up or g_down thread same as before.
Such GEOM classes were reviewed and updated to support direct dispatch:
CONCAT, DEV, DISK, GATE, MD, MIRROR, MULTIPATH, NOP, PART, RAID, STRIPE,
VFS, ZERO, ZFS::VDEV, ZFS::ZVOL, all classes based on g_slice KPI (LABEL,
MAP, FLASHMAP, etc).
To declare direct completion capability disk(9) KPI got new flag equivalent
to G_PF_DIRECT_SEND -- DISKFLAG_DIRECT_COMPLETION. da(4) and ada(4) disk
drivers got it set now thanks to earlier CAM locking work.
This change more then twice increases peak block storage performance on
systems with manu CPUs, together with earlier CAM locking changes reaching
more then 1 million IOPS (512 byte raw reads from 16 SATA SSDs on 4 HBAs to
256 user-level threads).
Sponsored by: iXsystems, Inc.
MFC after: 2 months
2013-10-22 08:22:19 +00:00
|
|
|
mtx_destroy(&sc->sc_lock);
|
2004-05-20 10:20:49 +00:00
|
|
|
free(sc, M_STRIPE);
|
2011-11-01 17:04:42 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
|
2004-05-20 10:20:49 +00:00
|
|
|
g_wither_geom(gp, ENXIO);
|
|
|
|
return (0);
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
g_stripe_destroy_geom(struct gctl_req *req __unused,
|
|
|
|
struct g_class *mp __unused, struct g_geom *gp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
|
|
|
|
sc = gp->softc;
|
|
|
|
return (g_stripe_destroy(sc, 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct g_geom *
|
|
|
|
g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
|
|
|
|
{
|
|
|
|
struct g_stripe_metadata md;
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_consumer *cp;
|
|
|
|
struct g_geom *gp;
|
|
|
|
int error;
|
|
|
|
|
|
|
|
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
|
|
|
|
g_topology_assert();
|
|
|
|
|
2009-10-09 09:42:22 +00:00
|
|
|
/* Skip providers that are already open for writing. */
|
|
|
|
if (pp->acw > 0)
|
|
|
|
return (NULL);
|
|
|
|
|
2004-05-20 10:20:49 +00:00
|
|
|
G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
|
|
|
|
|
|
|
|
gp = g_new_geomf(mp, "stripe:taste");
|
|
|
|
gp->start = g_stripe_start;
|
|
|
|
gp->access = g_stripe_access;
|
|
|
|
gp->orphan = g_stripe_orphan;
|
|
|
|
cp = g_new_consumer(gp);
|
|
|
|
g_attach(cp, pp);
|
|
|
|
error = g_stripe_read_metadata(cp, &md);
|
2004-08-09 11:14:25 +00:00
|
|
|
g_detach(cp);
|
|
|
|
g_destroy_consumer(cp);
|
|
|
|
g_destroy_geom(gp);
|
2004-05-20 10:20:49 +00:00
|
|
|
if (error != 0)
|
|
|
|
return (NULL);
|
|
|
|
gp = NULL;
|
|
|
|
|
|
|
|
if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
|
|
|
|
return (NULL);
|
|
|
|
if (md.md_version > G_STRIPE_VERSION) {
|
|
|
|
printf("geom_stripe.ko module is too old to handle %s.\n",
|
|
|
|
pp->name);
|
|
|
|
return (NULL);
|
|
|
|
}
|
2004-08-09 11:29:42 +00:00
|
|
|
/*
|
|
|
|
* Backward compatibility:
|
|
|
|
*/
|
2005-02-27 23:07:47 +00:00
|
|
|
/* There was no md_provider field in earlier versions of metadata. */
|
2004-08-09 11:29:42 +00:00
|
|
|
if (md.md_version < 2)
|
|
|
|
bzero(md.md_provider, sizeof(md.md_provider));
|
2005-02-27 23:07:47 +00:00
|
|
|
/* There was no md_provsize field in earlier versions of metadata. */
|
|
|
|
if (md.md_version < 3)
|
|
|
|
md.md_provsize = pp->mediasize;
|
2004-08-09 11:29:42 +00:00
|
|
|
|
2011-04-27 00:10:26 +00:00
|
|
|
if (md.md_provider[0] != '\0' &&
|
|
|
|
!g_compare_names(md.md_provider, pp->name))
|
2004-08-09 11:29:42 +00:00
|
|
|
return (NULL);
|
2005-02-27 23:07:47 +00:00
|
|
|
if (md.md_provsize != pp->mediasize)
|
|
|
|
return (NULL);
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Let's check if device already exists.
|
|
|
|
*/
|
|
|
|
sc = NULL;
|
|
|
|
LIST_FOREACH(gp, &mp->geom, geom) {
|
|
|
|
sc = gp->softc;
|
|
|
|
if (sc == NULL)
|
|
|
|
continue;
|
|
|
|
if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
|
|
|
|
continue;
|
|
|
|
if (strcmp(md.md_name, sc->sc_name) != 0)
|
|
|
|
continue;
|
|
|
|
if (md.md_id != sc->sc_id)
|
|
|
|
continue;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
if (gp != NULL) {
|
|
|
|
G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
|
|
|
|
error = g_stripe_add_disk(sc, pp, md.md_no);
|
|
|
|
if (error != 0) {
|
|
|
|
G_STRIPE_DEBUG(0,
|
|
|
|
"Cannot add disk %s to %s (error=%d).", pp->name,
|
|
|
|
gp->name, error);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
|
|
|
|
if (gp == NULL) {
|
2004-07-26 16:10:27 +00:00
|
|
|
G_STRIPE_DEBUG(0, "Cannot create device %s.",
|
2004-05-20 10:20:49 +00:00
|
|
|
md.md_name);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
sc = gp->softc;
|
|
|
|
G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
|
|
|
|
error = g_stripe_add_disk(sc, pp, md.md_no);
|
|
|
|
if (error != 0) {
|
|
|
|
G_STRIPE_DEBUG(0,
|
|
|
|
"Cannot add disk %s to %s (error=%d).", pp->name,
|
|
|
|
gp->name, error);
|
|
|
|
g_stripe_destroy(sc, 1);
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return (gp);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
|
|
|
|
{
|
|
|
|
u_int attached, no;
|
|
|
|
struct g_stripe_metadata md;
|
|
|
|
struct g_provider *pp;
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_geom *gp;
|
|
|
|
struct sbuf *sb;
|
|
|
|
intmax_t *stripesize;
|
|
|
|
const char *name;
|
|
|
|
char param[16];
|
|
|
|
int *nargs;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
|
|
|
|
if (nargs == NULL) {
|
|
|
|
gctl_error(req, "No '%s' argument.", "nargs");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (*nargs <= 2) {
|
|
|
|
gctl_error(req, "Too few arguments.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
|
|
|
|
md.md_version = G_STRIPE_VERSION;
|
|
|
|
name = gctl_get_asciiparam(req, "arg0");
|
|
|
|
if (name == NULL) {
|
|
|
|
gctl_error(req, "No 'arg%u' argument.", 0);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
strlcpy(md.md_name, name, sizeof(md.md_name));
|
|
|
|
md.md_id = arc4random();
|
|
|
|
md.md_no = 0;
|
|
|
|
md.md_all = *nargs - 1;
|
|
|
|
stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
|
|
|
|
if (stripesize == NULL) {
|
|
|
|
gctl_error(req, "No '%s' argument.", "stripesize");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
md.md_stripesize = *stripesize;
|
2004-08-09 11:29:42 +00:00
|
|
|
bzero(md.md_provider, sizeof(md.md_provider));
|
2005-02-27 23:07:47 +00:00
|
|
|
/* This field is not important here. */
|
|
|
|
md.md_provsize = 0;
|
2004-05-20 10:20:49 +00:00
|
|
|
|
|
|
|
/* Check all providers are valid */
|
|
|
|
for (no = 1; no < *nargs; no++) {
|
|
|
|
snprintf(param, sizeof(param), "arg%u", no);
|
|
|
|
name = gctl_get_asciiparam(req, param);
|
|
|
|
if (name == NULL) {
|
|
|
|
gctl_error(req, "No 'arg%u' argument.", no);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
|
|
|
|
name += strlen("/dev/");
|
|
|
|
pp = g_provider_by_name(name);
|
|
|
|
if (pp == NULL) {
|
|
|
|
G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
|
|
|
|
gctl_error(req, "Disk %s is invalid.", name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
|
|
|
|
if (gp == NULL) {
|
2004-07-26 16:10:27 +00:00
|
|
|
gctl_error(req, "Can't configure %s.", md.md_name);
|
2004-05-20 10:20:49 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
sc = gp->softc;
|
2008-08-09 11:14:05 +00:00
|
|
|
sb = sbuf_new_auto();
|
2004-05-20 10:20:49 +00:00
|
|
|
sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
|
|
|
|
for (attached = 0, no = 1; no < *nargs; no++) {
|
|
|
|
snprintf(param, sizeof(param), "arg%u", no);
|
|
|
|
name = gctl_get_asciiparam(req, param);
|
2005-05-11 18:07:39 +00:00
|
|
|
if (name == NULL) {
|
|
|
|
gctl_error(req, "No 'arg%u' argument.", no);
|
|
|
|
continue;
|
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
|
|
|
|
name += strlen("/dev/");
|
|
|
|
pp = g_provider_by_name(name);
|
|
|
|
KASSERT(pp != NULL, ("Provider %s disappear?!", name));
|
|
|
|
if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
|
|
|
|
G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
|
|
|
|
no, pp->name, gp->name);
|
|
|
|
sbuf_printf(sb, " %s", pp->name);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
attached++;
|
|
|
|
}
|
|
|
|
sbuf_finish(sb);
|
|
|
|
if (md.md_all != attached) {
|
|
|
|
g_stripe_destroy(gp->softc, 1);
|
|
|
|
gctl_error(req, "%s", sbuf_data(sb));
|
|
|
|
}
|
|
|
|
sbuf_delete(sb);
|
|
|
|
}
|
|
|
|
|
|
|
|
static struct g_stripe_softc *
|
|
|
|
g_stripe_find_device(struct g_class *mp, const char *name)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
struct g_geom *gp;
|
|
|
|
|
|
|
|
LIST_FOREACH(gp, &mp->geom, geom) {
|
|
|
|
sc = gp->softc;
|
|
|
|
if (sc == NULL)
|
|
|
|
continue;
|
2004-07-26 16:10:27 +00:00
|
|
|
if (strcmp(sc->sc_name, name) == 0)
|
2004-05-20 10:20:49 +00:00
|
|
|
return (sc);
|
|
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
int *force, *nargs, error;
|
|
|
|
const char *name;
|
|
|
|
char param[16];
|
|
|
|
u_int i;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
|
|
|
|
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
|
|
|
|
if (nargs == NULL) {
|
|
|
|
gctl_error(req, "No '%s' argument.", "nargs");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (*nargs <= 0) {
|
|
|
|
gctl_error(req, "Missing device(s).");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
force = gctl_get_paraml(req, "force", sizeof(*force));
|
|
|
|
if (force == NULL) {
|
|
|
|
gctl_error(req, "No '%s' argument.", "force");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < (u_int)*nargs; i++) {
|
|
|
|
snprintf(param, sizeof(param), "arg%u", i);
|
|
|
|
name = gctl_get_asciiparam(req, param);
|
|
|
|
if (name == NULL) {
|
|
|
|
gctl_error(req, "No 'arg%u' argument.", i);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
sc = g_stripe_find_device(mp, name);
|
|
|
|
if (sc == NULL) {
|
|
|
|
gctl_error(req, "No such device: %s.", name);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
error = g_stripe_destroy(sc, *force);
|
|
|
|
if (error != 0) {
|
|
|
|
gctl_error(req, "Cannot destroy device %s (error=%d).",
|
2004-07-26 16:10:27 +00:00
|
|
|
sc->sc_name, error);
|
2004-05-20 10:20:49 +00:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
|
|
|
|
{
|
|
|
|
uint32_t *version;
|
|
|
|
|
|
|
|
g_topology_assert();
|
|
|
|
|
|
|
|
version = gctl_get_paraml(req, "version", sizeof(*version));
|
|
|
|
if (version == NULL) {
|
|
|
|
gctl_error(req, "No '%s' argument.", "version");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
if (*version != G_STRIPE_VERSION) {
|
|
|
|
gctl_error(req, "Userland and kernel parts are out of sync.");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (strcmp(verb, "create") == 0) {
|
|
|
|
g_stripe_ctl_create(req, mp);
|
|
|
|
return;
|
2004-07-05 21:16:37 +00:00
|
|
|
} else if (strcmp(verb, "destroy") == 0 ||
|
|
|
|
strcmp(verb, "stop") == 0) {
|
2004-05-20 10:20:49 +00:00
|
|
|
g_stripe_ctl_destroy(req, mp);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
gctl_error(req, "Unknown verb.");
|
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
|
|
|
|
struct g_consumer *cp, struct g_provider *pp)
|
|
|
|
{
|
|
|
|
struct g_stripe_softc *sc;
|
|
|
|
|
|
|
|
sc = gp->softc;
|
2004-07-26 17:14:47 +00:00
|
|
|
if (sc == NULL)
|
2004-05-20 10:20:49 +00:00
|
|
|
return;
|
2004-07-26 17:14:47 +00:00
|
|
|
if (pp != NULL) {
|
|
|
|
/* Nothing here. */
|
|
|
|
} else if (cp != NULL) {
|
2004-08-25 12:14:44 +00:00
|
|
|
sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
|
|
|
|
(u_int)cp->index);
|
2004-07-26 17:14:47 +00:00
|
|
|
} else {
|
|
|
|
sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
|
|
|
|
sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent,
|
|
|
|
(u_int)sc->sc_stripesize);
|
|
|
|
sbuf_printf(sb, "%s<Type>", indent);
|
|
|
|
switch (sc->sc_type) {
|
|
|
|
case G_STRIPE_TYPE_AUTOMATIC:
|
|
|
|
sbuf_printf(sb, "AUTOMATIC");
|
|
|
|
break;
|
|
|
|
case G_STRIPE_TYPE_MANUAL:
|
|
|
|
sbuf_printf(sb, "MANUAL");
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
sbuf_printf(sb, "UNKNOWN");
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
sbuf_printf(sb, "</Type>\n");
|
|
|
|
sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
|
|
|
|
indent, sc->sc_ndisks, g_stripe_nvalid(sc));
|
|
|
|
sbuf_printf(sb, "%s<State>", indent);
|
|
|
|
if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
|
|
|
|
sbuf_printf(sb, "UP");
|
|
|
|
else
|
|
|
|
sbuf_printf(sb, "DOWN");
|
|
|
|
sbuf_printf(sb, "</State>\n");
|
2004-05-26 11:36:27 +00:00
|
|
|
}
|
2004-05-20 10:20:49 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);
|