freebsd-nq/sys/geom/stripe/g_stripe.c
Pawel Jakub Dawidek e68909854c - Add md_provsize field to metadata, which will help with
shared-last-sector problem.
  After this change, even if there is more than one provider with the same
  last sector, the proper one will be chosen based on its size.
  It still doesn't fix the 'c' partition problem (when da0s1 can be confused
  with da0s1c) and situation when 'a' partition starts at offset 0
  (then da0s1a can be confused with da0s1 and da0s1c). One can use '-h'
  option there, when creating device or avoid sharing last sector.
  Actually, when providers share the same last sector and their size is equal,
  they provide exactly the same data, so the name (da0s1, da0s1a, da0s1c)
  isn't important at all.
- Provide backward compatibility.
- Update copyright's year.

MFC after:	1 week
2005-02-27 23:07:47 +00:00

1195 lines
29 KiB
C

/*-
* Copyright (c) 2004-2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/module.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/bio.h>
#include <sys/sysctl.h>
#include <sys/malloc.h>
#include <vm/uma.h>
#include <geom/geom.h>
#include <geom/stripe/g_stripe.h>
#define MAX_IO_SIZE (DFLTPHYS * 2)
static MALLOC_DEFINE(M_STRIPE, "stripe data", "GEOM_STRIPE Data");
static uma_zone_t g_stripe_zone;
static int g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force);
static int g_stripe_destroy_geom(struct gctl_req *req, struct g_class *mp,
struct g_geom *gp);
static g_taste_t g_stripe_taste;
static g_ctl_req_t g_stripe_config;
static g_dumpconf_t g_stripe_dumpconf;
static g_init_t g_stripe_init;
static g_fini_t g_stripe_fini;
struct g_class g_stripe_class = {
.name = G_STRIPE_CLASS_NAME,
.version = G_VERSION,
.ctlreq = g_stripe_config,
.taste = g_stripe_taste,
.destroy_geom = g_stripe_destroy_geom,
.init = g_stripe_init,
.fini = g_stripe_fini
};
SYSCTL_DECL(_kern_geom);
SYSCTL_NODE(_kern_geom, OID_AUTO, stripe, CTLFLAG_RW, 0, "GEOM_STRIPE stuff");
static u_int g_stripe_debug = 0;
TUNABLE_INT("kern.geom.stripe.debug", &g_stripe_debug);
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, debug, CTLFLAG_RW, &g_stripe_debug, 0,
"Debug level");
static int g_stripe_fast = 0;
TUNABLE_INT("kern.geom.stripe.fast", &g_stripe_fast);
static int
g_sysctl_stripe_fast(SYSCTL_HANDLER_ARGS)
{
int error, fast;
fast = g_stripe_fast;
error = sysctl_handle_int(oidp, &fast, sizeof(fast), req);
if (error == 0 && req->newptr != NULL)
g_stripe_fast = fast;
return (error);
}
SYSCTL_PROC(_kern_geom_stripe, OID_AUTO, fast, CTLTYPE_INT | CTLFLAG_RW,
NULL, 0, g_sysctl_stripe_fast, "I", "Fast, but memory-consuming, mode");
static u_int g_stripe_maxmem = MAX_IO_SIZE * 100;
TUNABLE_INT("kern.geom.stripe.maxmem", &g_stripe_maxmem);
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, maxmem, CTLFLAG_RD, &g_stripe_maxmem,
0, "Maximum memory that can be allocated in \"fast\" mode (in bytes)");
static u_int g_stripe_fast_failed = 0;
SYSCTL_UINT(_kern_geom_stripe, OID_AUTO, fast_failed, CTLFLAG_RD,
&g_stripe_fast_failed, 0, "How many times \"fast\" mode failed");
/*
* Greatest Common Divisor.
*/
static u_int
gcd(u_int a, u_int b)
{
u_int c;
while (b != 0) {
c = a;
a = b;
b = (c % b);
}
return (a);
}
/*
* Least Common Multiple.
*/
static u_int
lcm(u_int a, u_int b)
{
return ((a * b) / gcd(a, b));
}
static void
g_stripe_init(struct g_class *mp __unused)
{
g_stripe_zone = uma_zcreate("g_stripe_zone", MAX_IO_SIZE, NULL, NULL,
NULL, NULL, 0, 0);
g_stripe_maxmem -= g_stripe_maxmem % MAX_IO_SIZE;
uma_zone_set_max(g_stripe_zone, g_stripe_maxmem / MAX_IO_SIZE);
}
static void
g_stripe_fini(struct g_class *mp __unused)
{
uma_zdestroy(g_stripe_zone);
}
/*
* Return the number of valid disks.
*/
static u_int
g_stripe_nvalid(struct g_stripe_softc *sc)
{
u_int i, no;
no = 0;
for (i = 0; i < sc->sc_ndisks; i++) {
if (sc->sc_disks[i] != NULL)
no++;
}
return (no);
}
static void
g_stripe_remove_disk(struct g_consumer *cp)
{
struct g_stripe_softc *sc;
u_int no;
KASSERT(cp != NULL, ("Non-valid disk in %s.", __func__));
sc = (struct g_stripe_softc *)cp->private;
KASSERT(sc != NULL, ("NULL sc in %s.", __func__));
no = cp->index;
G_STRIPE_DEBUG(0, "Disk %s removed from %s.", cp->provider->name,
sc->sc_name);
sc->sc_disks[no] = NULL;
if (sc->sc_provider != NULL) {
g_orphan_provider(sc->sc_provider, ENXIO);
sc->sc_provider = NULL;
G_STRIPE_DEBUG(0, "Device %s removed.", sc->sc_name);
}
if (cp->acr > 0 || cp->acw > 0 || cp->ace > 0)
g_access(cp, -cp->acr, -cp->acw, -cp->ace);
g_detach(cp);
g_destroy_consumer(cp);
}
static void
g_stripe_orphan(struct g_consumer *cp)
{
struct g_stripe_softc *sc;
struct g_geom *gp;
g_topology_assert();
gp = cp->geom;
sc = gp->softc;
if (sc == NULL)
return;
g_stripe_remove_disk(cp);
/* If there are no valid disks anymore, remove device. */
if (g_stripe_nvalid(sc) == 0)
g_stripe_destroy(sc, 1);
}
static int
g_stripe_access(struct g_provider *pp, int dr, int dw, int de)
{
struct g_consumer *cp1, *cp2;
struct g_stripe_softc *sc;
struct g_geom *gp;
int error;
gp = pp->geom;
sc = gp->softc;
if (sc == NULL) {
/*
* It looks like geom is being withered.
* In that case we allow only negative requests.
*/
KASSERT(dr <= 0 && dw <= 0 && de <= 0,
("Positive access request (device=%s).", pp->name));
if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 &&
(pp->ace + de) == 0) {
G_STRIPE_DEBUG(0, "Device %s definitely destroyed.",
gp->name);
}
return (0);
}
/* On first open, grab an extra "exclusive" bit */
if (pp->acr == 0 && pp->acw == 0 && pp->ace == 0)
de++;
/* ... and let go of it on last close */
if ((pp->acr + dr) == 0 && (pp->acw + dw) == 0 && (pp->ace + de) == 0)
de--;
error = ENXIO;
LIST_FOREACH(cp1, &gp->consumer, consumer) {
error = g_access(cp1, dr, dw, de);
if (error == 0)
continue;
/*
* If we fail here, backout all previous changes.
*/
LIST_FOREACH(cp2, &gp->consumer, consumer) {
if (cp1 == cp2)
return (error);
g_access(cp2, -dr, -dw, -de);
}
/* NOTREACHED */
}
return (error);
}
static void
g_stripe_copy(struct g_stripe_softc *sc, char *src, char *dst, off_t offset,
off_t length, int mode)
{
u_int stripesize;
size_t len;
stripesize = sc->sc_stripesize;
len = (size_t)(stripesize - (offset & (stripesize - 1)));
do {
bcopy(src, dst, len);
if (mode) {
dst += len + stripesize * (sc->sc_ndisks - 1);
src += len;
} else {
dst += len;
src += len + stripesize * (sc->sc_ndisks - 1);
}
length -= len;
KASSERT(length >= 0,
("Length < 0 (stripesize=%zu, offset=%jd, length=%jd).",
(size_t)stripesize, (intmax_t)offset, (intmax_t)length));
if (length > stripesize)
len = stripesize;
else
len = length;
} while (length > 0);
}
static void
g_stripe_done(struct bio *bp)
{
struct g_stripe_softc *sc;
struct bio *pbp;
pbp = bp->bio_parent;
sc = pbp->bio_to->geom->softc;
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
pbp->bio_completed += bp->bio_completed;
if (bp->bio_cmd == BIO_READ && bp->bio_caller1 != NULL) {
g_stripe_copy(sc, bp->bio_data, bp->bio_caller1, bp->bio_offset,
bp->bio_length, 1);
bp->bio_data = bp->bio_caller1;
bp->bio_caller1 = NULL;
}
g_destroy_bio(bp);
pbp->bio_inbed++;
if (pbp->bio_children == pbp->bio_inbed) {
if (pbp->bio_driver1 != NULL)
uma_zfree(g_stripe_zone, pbp->bio_driver1);
g_io_deliver(pbp, pbp->bio_error);
}
}
static int
g_stripe_start_fast(struct bio *bp, u_int no, off_t offset, off_t length)
{
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
u_int nparts = 0, stripesize;
struct g_stripe_softc *sc;
char *addr, *data = NULL;
struct bio *cbp;
int error;
sc = bp->bio_to->geom->softc;
addr = bp->bio_data;
stripesize = sc->sc_stripesize;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
error = ENOMEM;
goto failure;
}
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
nparts++;
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
cbp->bio_caller1 = NULL;
cbp->bio_length = length;
cbp->bio_caller2 = sc->sc_disks[no];
/* offset -= offset % stripesize; */
offset -= offset & (stripesize - 1);
addr += length;
length = bp->bio_length - length;
for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
if (no > sc->sc_ndisks - 1) {
no = 0;
offset += stripesize;
}
if (nparts >= sc->sc_ndisks) {
cbp = TAILQ_NEXT(cbp, bio_queue);
if (cbp == NULL)
cbp = TAILQ_FIRST(&queue);
nparts++;
/*
* Update bio structure.
*/
/*
* MIN() is in case when
* (bp->bio_length % sc->sc_stripesize) != 0.
*/
cbp->bio_length += MIN(stripesize, length);
if (cbp->bio_caller1 == NULL) {
cbp->bio_caller1 = cbp->bio_data;
cbp->bio_data = NULL;
if (data == NULL) {
data = uma_zalloc(g_stripe_zone,
M_NOWAIT);
if (data == NULL) {
error = ENOMEM;
goto failure;
}
}
}
} else {
cbp = g_clone_bio(bp);
if (cbp == NULL) {
error = ENOMEM;
goto failure;
}
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
nparts++;
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_stripe_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
cbp->bio_caller1 = NULL;
/*
* MIN() is in case when
* (bp->bio_length % sc->sc_stripesize) != 0.
*/
cbp->bio_length = MIN(stripesize, length);
cbp->bio_caller2 = sc->sc_disks[no];
}
}
if (data != NULL)
bp->bio_driver1 = data;
/*
* Fire off all allocated requests!
*/
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
struct g_consumer *cp;
TAILQ_REMOVE(&queue, cbp, bio_queue);
cp = cbp->bio_caller2;
cbp->bio_caller2 = NULL;
cbp->bio_to = cp->provider;
if (cbp->bio_caller1 != NULL) {
cbp->bio_data = data;
if (bp->bio_cmd == BIO_WRITE) {
g_stripe_copy(sc, cbp->bio_caller1, data,
cbp->bio_offset, cbp->bio_length, 0);
}
data += cbp->bio_length;
}
G_STRIPE_LOGREQ(cbp, "Sending request.");
g_io_request(cbp, cp);
}
return (0);
failure:
if (data != NULL)
uma_zfree(g_stripe_zone, data);
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
TAILQ_REMOVE(&queue, cbp, bio_queue);
if (cbp->bio_caller1 != NULL) {
cbp->bio_data = cbp->bio_caller1;
cbp->bio_caller1 = NULL;
}
bp->bio_children--;
g_destroy_bio(cbp);
}
return (error);
}
static int
g_stripe_start_economic(struct bio *bp, u_int no, off_t offset, off_t length)
{
TAILQ_HEAD(, bio) queue = TAILQ_HEAD_INITIALIZER(queue);
struct g_stripe_softc *sc;
uint32_t stripesize;
struct bio *cbp;
char *addr;
int error;
sc = bp->bio_to->geom->softc;
addr = bp->bio_data;
stripesize = sc->sc_stripesize;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
error = ENOMEM;
goto failure;
}
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_std_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
cbp->bio_length = length;
cbp->bio_caller2 = sc->sc_disks[no];
/* offset -= offset % stripesize; */
offset -= offset & (stripesize - 1);
addr += length;
length = bp->bio_length - length;
for (no++; length > 0; no++, length -= stripesize, addr += stripesize) {
if (no > sc->sc_ndisks - 1) {
no = 0;
offset += stripesize;
}
cbp = g_clone_bio(bp);
if (cbp == NULL) {
error = ENOMEM;
goto failure;
}
TAILQ_INSERT_TAIL(&queue, cbp, bio_queue);
/*
* Fill in the component buf structure.
*/
cbp->bio_done = g_std_done;
cbp->bio_offset = offset;
cbp->bio_data = addr;
/*
* MIN() is in case when
* (bp->bio_length % sc->sc_stripesize) != 0.
*/
cbp->bio_length = MIN(stripesize, length);
cbp->bio_caller2 = sc->sc_disks[no];
}
/*
* Fire off all allocated requests!
*/
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
struct g_consumer *cp;
TAILQ_REMOVE(&queue, cbp, bio_queue);
cp = cbp->bio_caller2;
cbp->bio_caller2 = NULL;
cbp->bio_to = cp->provider;
G_STRIPE_LOGREQ(cbp, "Sending request.");
g_io_request(cbp, cp);
}
return (0);
failure:
while ((cbp = TAILQ_FIRST(&queue)) != NULL) {
TAILQ_REMOVE(&queue, cbp, bio_queue);
bp->bio_children--;
g_destroy_bio(cbp);
}
return (error);
}
static void
g_stripe_start(struct bio *bp)
{
off_t offset, start, length, nstripe;
struct g_stripe_softc *sc;
u_int no, stripesize;
int error, fast = 0;
sc = bp->bio_to->geom->softc;
/*
* If sc == NULL, provider's error should be set and g_stripe_start()
* should not be called at all.
*/
KASSERT(sc != NULL,
("Provider's error should be set (error=%d)(device=%s).",
bp->bio_to->error, bp->bio_to->name));
G_STRIPE_LOGREQ(bp, "Request received.");
switch (bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
case BIO_DELETE:
/*
* Only those requests are supported.
*/
break;
case BIO_GETATTR:
/* To which provider it should be delivered? */
default:
g_io_deliver(bp, EOPNOTSUPP);
return;
}
stripesize = sc->sc_stripesize;
/*
* Calculations are quite messy, but fast I hope.
*/
/* Stripe number. */
/* nstripe = bp->bio_offset / stripesize; */
nstripe = bp->bio_offset >> (off_t)sc->sc_stripebits;
/* Disk number. */
no = nstripe % sc->sc_ndisks;
/* Start position in stripe. */
/* start = bp->bio_offset % stripesize; */
start = bp->bio_offset & (stripesize - 1);
/* Start position in disk. */
/* offset = (nstripe / sc->sc_ndisks) * stripesize + start; */
offset = ((nstripe / sc->sc_ndisks) << sc->sc_stripebits) + start;
/* Length of data to operate. */
length = MIN(bp->bio_length, stripesize - start);
/*
* Do use "fast" mode when:
* 1. "Fast" mode is ON.
* and
* 2. Request size is less than or equal to MAX_IO_SIZE (128kB),
* which should always be true.
* and
* 3. Request size is bigger than stripesize * ndisks. If it isn't,
* there will be no need to send more than one I/O request to
* a provider, so there is nothing to optmize.
*/
if (g_stripe_fast && bp->bio_length <= MAX_IO_SIZE &&
bp->bio_length >= stripesize * sc->sc_ndisks) {
fast = 1;
}
error = 0;
if (fast) {
error = g_stripe_start_fast(bp, no, offset, length);
if (error != 0)
g_stripe_fast_failed++;
}
/*
* Do use "economic" when:
* 1. "Economic" mode is ON.
* or
* 2. "Fast" mode failed. It can only failed if there is no memory.
*/
if (!fast || error != 0)
error = g_stripe_start_economic(bp, no, offset, length);
if (error != 0) {
if (bp->bio_error == 0)
bp->bio_error = error;
g_io_deliver(bp, bp->bio_error);
}
}
static void
g_stripe_check_and_run(struct g_stripe_softc *sc)
{
off_t mediasize, ms;
u_int no, sectorsize = 0;
if (g_stripe_nvalid(sc) != sc->sc_ndisks)
return;
sc->sc_provider = g_new_providerf(sc->sc_geom, "stripe/%s",
sc->sc_name);
/*
* Find the smallest disk.
*/
mediasize = sc->sc_disks[0]->provider->mediasize;
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
mediasize -= sc->sc_disks[0]->provider->sectorsize;
mediasize -= mediasize % sc->sc_stripesize;
sectorsize = sc->sc_disks[0]->provider->sectorsize;
for (no = 1; no < sc->sc_ndisks; no++) {
ms = sc->sc_disks[no]->provider->mediasize;
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC)
ms -= sc->sc_disks[no]->provider->sectorsize;
ms -= ms % sc->sc_stripesize;
if (ms < mediasize)
mediasize = ms;
sectorsize = lcm(sectorsize,
sc->sc_disks[no]->provider->sectorsize);
}
sc->sc_provider->sectorsize = sectorsize;
sc->sc_provider->mediasize = mediasize * sc->sc_ndisks;
g_error_provider(sc->sc_provider, 0);
G_STRIPE_DEBUG(0, "Device %s activated.", sc->sc_name);
}
static int
g_stripe_read_metadata(struct g_consumer *cp, struct g_stripe_metadata *md)
{
struct g_provider *pp;
u_char *buf;
int error;
g_topology_assert();
error = g_access(cp, 1, 0, 0);
if (error != 0)
return (error);
pp = cp->provider;
g_topology_unlock();
buf = g_read_data(cp, pp->mediasize - pp->sectorsize, pp->sectorsize,
&error);
g_topology_lock();
g_access(cp, -1, 0, 0);
if (buf == NULL)
return (error);
/* Decode metadata. */
stripe_metadata_decode(buf, md);
g_free(buf);
return (0);
}
/*
* Add disk to given device.
*/
static int
g_stripe_add_disk(struct g_stripe_softc *sc, struct g_provider *pp, u_int no)
{
struct g_consumer *cp, *fcp;
struct g_geom *gp;
int error;
/* Metadata corrupted? */
if (no >= sc->sc_ndisks)
return (EINVAL);
/* Check if disk is not already attached. */
if (sc->sc_disks[no] != NULL)
return (EEXIST);
gp = sc->sc_geom;
fcp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
error = g_attach(cp, pp);
if (error != 0) {
g_destroy_consumer(cp);
return (error);
}
if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0)) {
error = g_access(cp, fcp->acr, fcp->acw, fcp->ace);
if (error != 0) {
g_detach(cp);
g_destroy_consumer(cp);
return (error);
}
}
if (sc->sc_type == G_STRIPE_TYPE_AUTOMATIC) {
struct g_stripe_metadata md;
/* Reread metadata. */
error = g_stripe_read_metadata(cp, &md);
if (error != 0)
goto fail;
if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0 ||
strcmp(md.md_name, sc->sc_name) != 0 ||
md.md_id != sc->sc_id) {
G_STRIPE_DEBUG(0, "Metadata on %s changed.", pp->name);
goto fail;
}
}
cp->private = sc;
cp->index = no;
sc->sc_disks[no] = cp;
G_STRIPE_DEBUG(0, "Disk %s attached to %s.", pp->name, sc->sc_name);
g_stripe_check_and_run(sc);
return (0);
fail:
if (fcp != NULL && (fcp->acr > 0 || fcp->acw > 0 || fcp->ace > 0))
g_access(cp, -fcp->acr, -fcp->acw, -fcp->ace);
g_detach(cp);
g_destroy_consumer(cp);
return (error);
}
static struct g_geom *
g_stripe_create(struct g_class *mp, const struct g_stripe_metadata *md,
u_int type)
{
struct g_stripe_softc *sc;
struct g_geom *gp;
u_int no;
G_STRIPE_DEBUG(1, "Creating device %s (id=%u).", md->md_name,
md->md_id);
/* Two disks is minimum. */
if (md->md_all < 2) {
G_STRIPE_DEBUG(0, "Too few disks defined for %s.", md->md_name);
return (NULL);
}
#if 0
/* Stripe size have to be grater than or equal to sector size. */
if (md->md_stripesize < sectorsize) {
G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
return (NULL);
}
#endif
/* Stripe size have to be power of 2. */
if (!powerof2(md->md_stripesize)) {
G_STRIPE_DEBUG(0, "Invalid stripe size for %s.", md->md_name);
return (NULL);
}
/* Check for duplicate unit */
LIST_FOREACH(gp, &mp->geom, geom) {
sc = gp->softc;
if (sc != NULL && strcmp(sc->sc_name, md->md_name) == 0) {
G_STRIPE_DEBUG(0, "Device %s already configured.",
sc->sc_name);
return (NULL);
}
}
gp = g_new_geomf(mp, "%s", md->md_name);
gp->softc = NULL; /* for a moment */
sc = malloc(sizeof(*sc), M_STRIPE, M_WAITOK | M_ZERO);
gp->start = g_stripe_start;
gp->spoiled = g_stripe_orphan;
gp->orphan = g_stripe_orphan;
gp->access = g_stripe_access;
gp->dumpconf = g_stripe_dumpconf;
sc->sc_id = md->md_id;
sc->sc_stripesize = md->md_stripesize;
sc->sc_stripebits = BITCOUNT(sc->sc_stripesize - 1);
sc->sc_ndisks = md->md_all;
sc->sc_disks = malloc(sizeof(struct g_consumer *) * sc->sc_ndisks,
M_STRIPE, M_WAITOK | M_ZERO);
for (no = 0; no < sc->sc_ndisks; no++)
sc->sc_disks[no] = NULL;
sc->sc_type = type;
gp->softc = sc;
sc->sc_geom = gp;
sc->sc_provider = NULL;
G_STRIPE_DEBUG(0, "Device %s created (id=%u).", sc->sc_name, sc->sc_id);
return (gp);
}
static int
g_stripe_destroy(struct g_stripe_softc *sc, boolean_t force)
{
struct g_provider *pp;
struct g_geom *gp;
u_int no;
g_topology_assert();
if (sc == NULL)
return (ENXIO);
pp = sc->sc_provider;
if (pp != NULL && (pp->acr != 0 || pp->acw != 0 || pp->ace != 0)) {
if (force) {
G_STRIPE_DEBUG(0, "Device %s is still open, so it "
"can't be definitely removed.", pp->name);
} else {
G_STRIPE_DEBUG(1,
"Device %s is still open (r%dw%de%d).", pp->name,
pp->acr, pp->acw, pp->ace);
return (EBUSY);
}
}
for (no = 0; no < sc->sc_ndisks; no++) {
if (sc->sc_disks[no] != NULL)
g_stripe_remove_disk(sc->sc_disks[no]);
}
gp = sc->sc_geom;
gp->softc = NULL;
KASSERT(sc->sc_provider == NULL, ("Provider still exists? (device=%s)",
gp->name));
free(sc->sc_disks, M_STRIPE);
free(sc, M_STRIPE);
pp = LIST_FIRST(&gp->provider);
if (pp == NULL || (pp->acr == 0 && pp->acw == 0 && pp->ace == 0))
G_STRIPE_DEBUG(0, "Device %s destroyed.", gp->name);
g_wither_geom(gp, ENXIO);
return (0);
}
static int
g_stripe_destroy_geom(struct gctl_req *req __unused,
struct g_class *mp __unused, struct g_geom *gp)
{
struct g_stripe_softc *sc;
sc = gp->softc;
return (g_stripe_destroy(sc, 0));
}
static struct g_geom *
g_stripe_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
{
struct g_stripe_metadata md;
struct g_stripe_softc *sc;
struct g_consumer *cp;
struct g_geom *gp;
int error;
g_trace(G_T_TOPOLOGY, "%s(%s, %s)", __func__, mp->name, pp->name);
g_topology_assert();
G_STRIPE_DEBUG(3, "Tasting %s.", pp->name);
gp = g_new_geomf(mp, "stripe:taste");
gp->start = g_stripe_start;
gp->access = g_stripe_access;
gp->orphan = g_stripe_orphan;
cp = g_new_consumer(gp);
g_attach(cp, pp);
error = g_stripe_read_metadata(cp, &md);
g_detach(cp);
g_destroy_consumer(cp);
g_destroy_geom(gp);
if (error != 0)
return (NULL);
gp = NULL;
if (strcmp(md.md_magic, G_STRIPE_MAGIC) != 0)
return (NULL);
if (md.md_version > G_STRIPE_VERSION) {
printf("geom_stripe.ko module is too old to handle %s.\n",
pp->name);
return (NULL);
}
/*
* Backward compatibility:
*/
/* There was no md_provider field in earlier versions of metadata. */
if (md.md_version < 2)
bzero(md.md_provider, sizeof(md.md_provider));
/* There was no md_provsize field in earlier versions of metadata. */
if (md.md_version < 3)
md.md_provsize = pp->mediasize;
if (md.md_provider[0] != '\0' && strcmp(md.md_provider, pp->name) != 0)
return (NULL);
if (md.md_provsize != pp->mediasize)
return (NULL);
/*
* Let's check if device already exists.
*/
sc = NULL;
LIST_FOREACH(gp, &mp->geom, geom) {
sc = gp->softc;
if (sc == NULL)
continue;
if (sc->sc_type != G_STRIPE_TYPE_AUTOMATIC)
continue;
if (strcmp(md.md_name, sc->sc_name) != 0)
continue;
if (md.md_id != sc->sc_id)
continue;
break;
}
if (gp != NULL) {
G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
error = g_stripe_add_disk(sc, pp, md.md_no);
if (error != 0) {
G_STRIPE_DEBUG(0,
"Cannot add disk %s to %s (error=%d).", pp->name,
gp->name, error);
return (NULL);
}
} else {
gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_AUTOMATIC);
if (gp == NULL) {
G_STRIPE_DEBUG(0, "Cannot create device %s.",
md.md_name);
return (NULL);
}
sc = gp->softc;
G_STRIPE_DEBUG(1, "Adding disk %s to %s.", pp->name, gp->name);
error = g_stripe_add_disk(sc, pp, md.md_no);
if (error != 0) {
G_STRIPE_DEBUG(0,
"Cannot add disk %s to %s (error=%d).", pp->name,
gp->name, error);
g_stripe_destroy(sc, 1);
return (NULL);
}
}
return (gp);
}
static void
g_stripe_ctl_create(struct gctl_req *req, struct g_class *mp)
{
u_int attached, no;
struct g_stripe_metadata md;
struct g_provider *pp;
struct g_stripe_softc *sc;
struct g_geom *gp;
struct sbuf *sb;
intmax_t *stripesize;
const char *name;
char param[16];
int *nargs;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
return;
}
if (*nargs <= 2) {
gctl_error(req, "Too few arguments.");
return;
}
strlcpy(md.md_magic, G_STRIPE_MAGIC, sizeof(md.md_magic));
md.md_version = G_STRIPE_VERSION;
name = gctl_get_asciiparam(req, "arg0");
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", 0);
return;
}
strlcpy(md.md_name, name, sizeof(md.md_name));
md.md_id = arc4random();
md.md_no = 0;
md.md_all = *nargs - 1;
stripesize = gctl_get_paraml(req, "stripesize", sizeof(*stripesize));
if (stripesize == NULL) {
gctl_error(req, "No '%s' argument.", "stripesize");
return;
}
md.md_stripesize = *stripesize;
bzero(md.md_provider, sizeof(md.md_provider));
/* This field is not important here. */
md.md_provsize = 0;
/* Check all providers are valid */
for (no = 1; no < *nargs; no++) {
snprintf(param, sizeof(param), "arg%u", no);
name = gctl_get_asciiparam(req, param);
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", no);
return;
}
if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
name += strlen("/dev/");
pp = g_provider_by_name(name);
if (pp == NULL) {
G_STRIPE_DEBUG(1, "Disk %s is invalid.", name);
gctl_error(req, "Disk %s is invalid.", name);
return;
}
}
gp = g_stripe_create(mp, &md, G_STRIPE_TYPE_MANUAL);
if (gp == NULL) {
gctl_error(req, "Can't configure %s.", md.md_name);
return;
}
sc = gp->softc;
sb = sbuf_new(NULL, NULL, 0, SBUF_AUTOEXTEND);
sbuf_printf(sb, "Can't attach disk(s) to %s:", gp->name);
for (attached = 0, no = 1; no < *nargs; no++) {
snprintf(param, sizeof(param), "arg%u", no);
name = gctl_get_asciiparam(req, param);
if (strncmp(name, "/dev/", strlen("/dev/")) == 0)
name += strlen("/dev/");
pp = g_provider_by_name(name);
KASSERT(pp != NULL, ("Provider %s disappear?!", name));
if (g_stripe_add_disk(sc, pp, no - 1) != 0) {
G_STRIPE_DEBUG(1, "Disk %u (%s) not attached to %s.",
no, pp->name, gp->name);
sbuf_printf(sb, " %s", pp->name);
continue;
}
attached++;
}
sbuf_finish(sb);
if (md.md_all != attached) {
g_stripe_destroy(gp->softc, 1);
gctl_error(req, "%s", sbuf_data(sb));
}
sbuf_delete(sb);
}
static struct g_stripe_softc *
g_stripe_find_device(struct g_class *mp, const char *name)
{
struct g_stripe_softc *sc;
struct g_geom *gp;
LIST_FOREACH(gp, &mp->geom, geom) {
sc = gp->softc;
if (sc == NULL)
continue;
if (strcmp(sc->sc_name, name) == 0)
return (sc);
}
return (NULL);
}
static void
g_stripe_ctl_destroy(struct gctl_req *req, struct g_class *mp)
{
struct g_stripe_softc *sc;
int *force, *nargs, error;
const char *name;
char param[16];
u_int i;
g_topology_assert();
nargs = gctl_get_paraml(req, "nargs", sizeof(*nargs));
if (nargs == NULL) {
gctl_error(req, "No '%s' argument.", "nargs");
return;
}
if (*nargs <= 0) {
gctl_error(req, "Missing device(s).");
return;
}
force = gctl_get_paraml(req, "force", sizeof(*force));
if (force == NULL) {
gctl_error(req, "No '%s' argument.", "force");
return;
}
for (i = 0; i < (u_int)*nargs; i++) {
snprintf(param, sizeof(param), "arg%u", i);
name = gctl_get_asciiparam(req, param);
if (name == NULL) {
gctl_error(req, "No 'arg%u' argument.", i);
return;
}
sc = g_stripe_find_device(mp, name);
if (sc == NULL) {
gctl_error(req, "No such device: %s.", name);
return;
}
error = g_stripe_destroy(sc, *force);
if (error != 0) {
gctl_error(req, "Cannot destroy device %s (error=%d).",
sc->sc_name, error);
return;
}
}
}
static void
g_stripe_config(struct gctl_req *req, struct g_class *mp, const char *verb)
{
uint32_t *version;
g_topology_assert();
version = gctl_get_paraml(req, "version", sizeof(*version));
if (version == NULL) {
gctl_error(req, "No '%s' argument.", "version");
return;
}
if (*version != G_STRIPE_VERSION) {
gctl_error(req, "Userland and kernel parts are out of sync.");
return;
}
if (strcmp(verb, "create") == 0) {
g_stripe_ctl_create(req, mp);
return;
} else if (strcmp(verb, "destroy") == 0 ||
strcmp(verb, "stop") == 0) {
g_stripe_ctl_destroy(req, mp);
return;
}
gctl_error(req, "Unknown verb.");
}
static void
g_stripe_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
struct g_consumer *cp, struct g_provider *pp)
{
struct g_stripe_softc *sc;
sc = gp->softc;
if (sc == NULL)
return;
if (pp != NULL) {
/* Nothing here. */
} else if (cp != NULL) {
sbuf_printf(sb, "%s<Number>%u</Number>\n", indent,
(u_int)cp->index);
} else {
sbuf_printf(sb, "%s<ID>%u</ID>\n", indent, (u_int)sc->sc_id);
sbuf_printf(sb, "%s<Stripesize>%u</Stripesize>\n", indent,
(u_int)sc->sc_stripesize);
sbuf_printf(sb, "%s<Type>", indent);
switch (sc->sc_type) {
case G_STRIPE_TYPE_AUTOMATIC:
sbuf_printf(sb, "AUTOMATIC");
break;
case G_STRIPE_TYPE_MANUAL:
sbuf_printf(sb, "MANUAL");
break;
default:
sbuf_printf(sb, "UNKNOWN");
break;
}
sbuf_printf(sb, "</Type>\n");
sbuf_printf(sb, "%s<Status>Total=%u, Online=%u</Status>\n",
indent, sc->sc_ndisks, g_stripe_nvalid(sc));
sbuf_printf(sb, "%s<State>", indent);
if (sc->sc_provider != NULL && sc->sc_provider->error == 0)
sbuf_printf(sb, "UP");
else
sbuf_printf(sb, "DOWN");
sbuf_printf(sb, "</State>\n");
}
}
DECLARE_GEOM_CLASS(g_stripe_class, g_stripe);