2155338ac1
plex. If the plex is a raid5 plex, and is being written to, parity data might have to be read from the underlying disks, requiring them to be opened for reading as well as writing. MFC after: 1 week
868 lines
20 KiB
C
868 lines
20 KiB
C
/*-
|
|
* Copyright (c) 2004 Lukas Ertl
|
|
* All rights reserved.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/kthread.h>
|
|
#include <sys/libkern.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/module.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/systm.h>
|
|
|
|
#include <geom/geom.h>
|
|
#include <geom/vinum/geom_vinum_var.h>
|
|
#include <geom/vinum/geom_vinum_raid5.h>
|
|
#include <geom/vinum/geom_vinum.h>
|
|
|
|
static void gv_plex_completed_request(struct gv_plex *, struct bio *);
|
|
static void gv_plex_normal_request(struct gv_plex *, struct bio *);
|
|
static void gv_plex_worker(void *);
|
|
static int gv_check_parity(struct gv_plex *, struct bio *,
|
|
struct gv_raid5_packet *);
|
|
static int gv_normal_parity(struct gv_plex *, struct bio *,
|
|
struct gv_raid5_packet *);
|
|
|
|
/* XXX: is this the place to catch dying subdisks? */
|
|
static void
|
|
gv_plex_orphan(struct g_consumer *cp)
|
|
{
|
|
struct g_geom *gp;
|
|
struct gv_plex *p;
|
|
int error;
|
|
|
|
g_topology_assert();
|
|
gp = cp->geom;
|
|
g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
|
|
|
|
if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
|
|
g_access(cp, -cp->acr, -cp->acw, -cp->ace);
|
|
error = cp->provider->error;
|
|
if (error == 0)
|
|
error = ENXIO;
|
|
g_detach(cp);
|
|
g_destroy_consumer(cp);
|
|
if (!LIST_EMPTY(&gp->consumer))
|
|
return;
|
|
|
|
p = gp->softc;
|
|
if (p != NULL) {
|
|
gv_kill_plex_thread(p);
|
|
p->geom = NULL;
|
|
p->provider = NULL;
|
|
p->consumer = NULL;
|
|
}
|
|
gp->softc = NULL;
|
|
g_wither_geom(gp, error);
|
|
}
|
|
|
|
void
|
|
gv_plex_done(struct bio *bp)
|
|
{
|
|
struct gv_plex *p;
|
|
|
|
p = bp->bio_from->geom->softc;
|
|
bp->bio_cflags |= GV_BIO_DONE;
|
|
mtx_lock(&p->bqueue_mtx);
|
|
bioq_insert_tail(p->bqueue, bp);
|
|
wakeup(p);
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
}
|
|
|
|
/* Find the correct subdisk to send the bio to and build a bio to send. */
|
|
static int
|
|
gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
|
|
{
|
|
struct g_geom *gp;
|
|
struct gv_sd *s;
|
|
struct bio *cbp, *pbp;
|
|
int i, sdno;
|
|
off_t len_left, real_len, real_off;
|
|
off_t stripeend, stripeno, stripestart;
|
|
|
|
if (p == NULL || LIST_EMPTY(&p->subdisks))
|
|
return (ENXIO);
|
|
|
|
s = NULL;
|
|
gp = bp->bio_to->geom;
|
|
|
|
/*
|
|
* We only handle concatenated and striped plexes here. RAID5 plexes
|
|
* are handled in build_raid5_request().
|
|
*/
|
|
switch (p->org) {
|
|
case GV_PLEX_CONCAT:
|
|
/*
|
|
* Find the subdisk where this request starts. The subdisks in
|
|
* this list must be ordered by plex_offset.
|
|
*/
|
|
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
|
if (s->plex_offset <= boff &&
|
|
s->plex_offset + s->size > boff)
|
|
break;
|
|
}
|
|
/* Subdisk not found. */
|
|
if (s == NULL)
|
|
return (ENXIO);
|
|
|
|
/* Calculate corresponding offsets on disk. */
|
|
real_off = boff - s->plex_offset;
|
|
len_left = s->size - real_off;
|
|
real_len = (bcount > len_left) ? len_left : bcount;
|
|
break;
|
|
|
|
case GV_PLEX_STRIPED:
|
|
/* The number of the stripe where the request starts. */
|
|
stripeno = boff / p->stripesize;
|
|
|
|
/* The number of the subdisk where the stripe resides. */
|
|
sdno = stripeno % p->sdcount;
|
|
|
|
/* Find the right subdisk. */
|
|
i = 0;
|
|
LIST_FOREACH(s, &p->subdisks, in_plex) {
|
|
if (i == sdno)
|
|
break;
|
|
i++;
|
|
}
|
|
|
|
/* Subdisk not found. */
|
|
if (s == NULL)
|
|
return (ENXIO);
|
|
|
|
/* The offset of the stripe from the start of the subdisk. */
|
|
stripestart = (stripeno / p->sdcount) *
|
|
p->stripesize;
|
|
|
|
/* The offset at the end of the stripe. */
|
|
stripeend = stripestart + p->stripesize;
|
|
|
|
/* The offset of the request on this subdisk. */
|
|
real_off = boff - (stripeno * p->stripesize) +
|
|
stripestart;
|
|
|
|
/* The length left in this stripe. */
|
|
len_left = stripeend - real_off;
|
|
|
|
real_len = (bcount <= len_left) ? bcount : len_left;
|
|
break;
|
|
|
|
default:
|
|
return (EINVAL);
|
|
}
|
|
|
|
/* Now check if we can handle the request on this subdisk. */
|
|
switch (s->state) {
|
|
case GV_SD_UP:
|
|
/* If the subdisk is up, just continue. */
|
|
break;
|
|
|
|
case GV_SD_STALE:
|
|
if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
|
|
return (ENXIO);
|
|
|
|
G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
|
|
gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
|
|
break;
|
|
|
|
case GV_SD_INITIALIZING:
|
|
if (bp->bio_cmd == BIO_READ)
|
|
return (ENXIO);
|
|
break;
|
|
|
|
default:
|
|
/* All other subdisk states mean it's not accessible. */
|
|
return (ENXIO);
|
|
}
|
|
|
|
/* Clone the bio and adjust the offsets and sizes. */
|
|
cbp = g_clone_bio(bp);
|
|
if (cbp == NULL)
|
|
return (ENOMEM);
|
|
cbp->bio_offset = real_off;
|
|
cbp->bio_length = real_len;
|
|
cbp->bio_data = addr;
|
|
cbp->bio_done = g_std_done;
|
|
cbp->bio_caller2 = s->consumer;
|
|
if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
|
|
cbp->bio_cflags |= GV_BIO_SYNCREQ;
|
|
cbp->bio_done = gv_plex_done;
|
|
}
|
|
|
|
if (bp->bio_driver1 == NULL) {
|
|
bp->bio_driver1 = cbp;
|
|
} else {
|
|
pbp = bp->bio_driver1;
|
|
while (pbp->bio_caller1 != NULL)
|
|
pbp = pbp->bio_caller1;
|
|
pbp->bio_caller1 = cbp;
|
|
}
|
|
|
|
return (0);
|
|
}
|
|
|
|
static void
|
|
gv_plex_start(struct bio *bp)
|
|
{
|
|
struct gv_plex *p;
|
|
|
|
switch(bp->bio_cmd) {
|
|
case BIO_READ:
|
|
case BIO_WRITE:
|
|
case BIO_DELETE:
|
|
break;
|
|
case BIO_GETATTR:
|
|
default:
|
|
g_io_deliver(bp, EOPNOTSUPP);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* We cannot handle this request if too many of our subdisks are
|
|
* inaccessible.
|
|
*/
|
|
p = bp->bio_to->geom->softc;
|
|
if ((p->state < GV_PLEX_DEGRADED) &&
|
|
!(bp->bio_cflags & GV_BIO_SYNCREQ)) {
|
|
g_io_deliver(bp, ENXIO);
|
|
return;
|
|
}
|
|
|
|
mtx_lock(&p->bqueue_mtx);
|
|
bioq_disksort(p->bqueue, bp);
|
|
wakeup(p);
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
}
|
|
|
|
static void
|
|
gv_plex_worker(void *arg)
|
|
{
|
|
struct bio *bp;
|
|
struct gv_plex *p;
|
|
struct gv_sd *s;
|
|
|
|
p = arg;
|
|
KASSERT(p != NULL, ("NULL p"));
|
|
|
|
mtx_lock(&p->bqueue_mtx);
|
|
for (;;) {
|
|
/* We were signaled to exit. */
|
|
if (p->flags & GV_PLEX_THREAD_DIE)
|
|
break;
|
|
|
|
/* Take the first BIO from our queue. */
|
|
bp = bioq_takefirst(p->bqueue);
|
|
if (bp == NULL) {
|
|
msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
|
|
continue;
|
|
}
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
|
|
/* A completed request. */
|
|
if (bp->bio_cflags & GV_BIO_DONE) {
|
|
if (bp->bio_cflags & GV_BIO_SYNCREQ ||
|
|
bp->bio_cflags & GV_BIO_REBUILD) {
|
|
s = bp->bio_to->private;
|
|
if (bp->bio_error == 0)
|
|
s->initialized += bp->bio_length;
|
|
if (s->initialized >= s->size) {
|
|
g_topology_lock();
|
|
gv_set_sd_state(s, GV_SD_UP,
|
|
GV_SETSTATE_CONFIG);
|
|
g_topology_unlock();
|
|
s->initialized = 0;
|
|
}
|
|
}
|
|
|
|
if (bp->bio_cflags & GV_BIO_SYNCREQ)
|
|
g_std_done(bp);
|
|
else
|
|
gv_plex_completed_request(p, bp);
|
|
/*
|
|
* A sub-request that was hold back because it interfered with
|
|
* another sub-request.
|
|
*/
|
|
} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
|
|
/* Is it still locked out? */
|
|
if (gv_stripe_active(p, bp)) {
|
|
/* Park the bio on the waiting queue. */
|
|
mtx_lock(&p->bqueue_mtx);
|
|
bioq_disksort(p->wqueue, bp);
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
} else {
|
|
bp->bio_cflags &= ~GV_BIO_ONHOLD;
|
|
g_io_request(bp, bp->bio_caller2);
|
|
}
|
|
|
|
/* A normal request to this plex. */
|
|
} else
|
|
gv_plex_normal_request(p, bp);
|
|
|
|
mtx_lock(&p->bqueue_mtx);
|
|
}
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
p->flags |= GV_PLEX_THREAD_DEAD;
|
|
wakeup(p);
|
|
|
|
kproc_exit(ENXIO);
|
|
}
|
|
|
|
static int
|
|
gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
|
|
{
|
|
struct bio *cbp, *pbp;
|
|
int finished, i;
|
|
|
|
finished = 1;
|
|
|
|
if (wp->waiting != NULL) {
|
|
pbp = wp->waiting;
|
|
wp->waiting = NULL;
|
|
cbp = wp->parity;
|
|
for (i = 0; i < wp->length; i++)
|
|
cbp->bio_data[i] ^= pbp->bio_data[i];
|
|
g_io_request(pbp, pbp->bio_caller2);
|
|
finished = 0;
|
|
|
|
} else if (wp->parity != NULL) {
|
|
cbp = wp->parity;
|
|
wp->parity = NULL;
|
|
g_io_request(cbp, cbp->bio_caller2);
|
|
finished = 0;
|
|
}
|
|
|
|
return (finished);
|
|
}
|
|
|
|
static int
|
|
gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
|
|
{
|
|
struct bio *pbp;
|
|
int err, finished, i;
|
|
|
|
err = 0;
|
|
finished = 1;
|
|
|
|
if (wp->waiting != NULL) {
|
|
pbp = wp->waiting;
|
|
wp->waiting = NULL;
|
|
g_io_request(pbp, pbp->bio_caller2);
|
|
finished = 0;
|
|
|
|
} else if (wp->parity != NULL) {
|
|
pbp = wp->parity;
|
|
wp->parity = NULL;
|
|
|
|
/* Check if the parity is correct. */
|
|
for (i = 0; i < wp->length; i++) {
|
|
if (bp->bio_data[i] != pbp->bio_data[i]) {
|
|
err = 1;
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* The parity is not correct... */
|
|
if (err) {
|
|
bp->bio_parent->bio_error = EAGAIN;
|
|
|
|
/* ... but we rebuild it. */
|
|
if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
|
|
g_io_request(pbp, pbp->bio_caller2);
|
|
finished = 0;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Clean up the BIO we would have used for rebuilding the
|
|
* parity.
|
|
*/
|
|
if (finished) {
|
|
bp->bio_parent->bio_inbed++;
|
|
g_destroy_bio(pbp);
|
|
}
|
|
|
|
}
|
|
|
|
return (finished);
|
|
}
|
|
|
|
void
|
|
gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
|
|
{
|
|
struct bio *cbp, *pbp;
|
|
struct gv_bioq *bq, *bq2;
|
|
struct gv_raid5_packet *wp;
|
|
int i;
|
|
|
|
wp = bp->bio_driver1;
|
|
|
|
switch (bp->bio_parent->bio_cmd) {
|
|
case BIO_READ:
|
|
if (wp == NULL)
|
|
break;
|
|
|
|
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
|
if (bq->bp == bp) {
|
|
TAILQ_REMOVE(&wp->bits, bq, queue);
|
|
g_free(bq);
|
|
for (i = 0; i < wp->length; i++)
|
|
wp->data[i] ^= bp->bio_data[i];
|
|
break;
|
|
}
|
|
}
|
|
if (TAILQ_EMPTY(&wp->bits)) {
|
|
bp->bio_parent->bio_completed += wp->length;
|
|
if (wp->lockbase != -1) {
|
|
TAILQ_REMOVE(&p->packets, wp, list);
|
|
/* Bring the waiting bios back into the game. */
|
|
mtx_lock(&p->bqueue_mtx);
|
|
pbp = bioq_takefirst(p->wqueue);
|
|
while (pbp != NULL) {
|
|
bioq_disksort(p->bqueue, pbp);
|
|
pbp = bioq_takefirst(p->wqueue);
|
|
}
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
}
|
|
g_free(wp);
|
|
}
|
|
|
|
break;
|
|
|
|
case BIO_WRITE:
|
|
if (wp == NULL)
|
|
break;
|
|
|
|
/* Check if we need to handle parity data. */
|
|
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
|
if (bq->bp == bp) {
|
|
TAILQ_REMOVE(&wp->bits, bq, queue);
|
|
g_free(bq);
|
|
cbp = wp->parity;
|
|
if (cbp != NULL) {
|
|
for (i = 0; i < wp->length; i++)
|
|
cbp->bio_data[i] ^=
|
|
bp->bio_data[i];
|
|
}
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* Handle parity data. */
|
|
if (TAILQ_EMPTY(&wp->bits)) {
|
|
if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
|
|
i = gv_check_parity(p, bp, wp);
|
|
else
|
|
i = gv_normal_parity(p, bp, wp);
|
|
|
|
/* All of our sub-requests have finished. */
|
|
if (i) {
|
|
bp->bio_parent->bio_completed += wp->length;
|
|
TAILQ_REMOVE(&p->packets, wp, list);
|
|
/* Bring the waiting bios back into the game. */
|
|
mtx_lock(&p->bqueue_mtx);
|
|
pbp = bioq_takefirst(p->wqueue);
|
|
while (pbp != NULL) {
|
|
bioq_disksort(p->bqueue, pbp);
|
|
pbp = bioq_takefirst(p->wqueue);
|
|
}
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
g_free(wp);
|
|
}
|
|
}
|
|
|
|
break;
|
|
}
|
|
|
|
pbp = bp->bio_parent;
|
|
if (pbp->bio_error == 0)
|
|
pbp->bio_error = bp->bio_error;
|
|
|
|
/* When the original request is finished, we deliver it. */
|
|
pbp->bio_inbed++;
|
|
if (pbp->bio_inbed == pbp->bio_children)
|
|
g_io_deliver(pbp, pbp->bio_error);
|
|
|
|
/* Clean up what we allocated. */
|
|
if (bp->bio_cflags & GV_BIO_MALLOC)
|
|
g_free(bp->bio_data);
|
|
g_destroy_bio(bp);
|
|
}
|
|
|
|
void
|
|
gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
|
|
{
|
|
struct bio *cbp, *pbp;
|
|
struct gv_bioq *bq, *bq2;
|
|
struct gv_raid5_packet *wp, *wp2;
|
|
caddr_t addr;
|
|
off_t bcount, boff;
|
|
int err;
|
|
|
|
bcount = bp->bio_length;
|
|
addr = bp->bio_data;
|
|
boff = bp->bio_offset;
|
|
|
|
/* Walk over the whole length of the request, we might split it up. */
|
|
while (bcount > 0) {
|
|
wp = NULL;
|
|
|
|
/*
|
|
* RAID5 plexes need special treatment, as a single write
|
|
* request involves several read/write sub-requests.
|
|
*/
|
|
if (p->org == GV_PLEX_RAID5) {
|
|
wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
|
|
wp->bio = bp;
|
|
TAILQ_INIT(&wp->bits);
|
|
|
|
if (bp->bio_cflags & GV_BIO_REBUILD)
|
|
err = gv_rebuild_raid5(p, wp, bp, addr,
|
|
boff, bcount);
|
|
else if (bp->bio_cflags & GV_BIO_CHECK)
|
|
err = gv_check_raid5(p, wp, bp, addr,
|
|
boff, bcount);
|
|
else
|
|
err = gv_build_raid5_req(p, wp, bp, addr,
|
|
boff, bcount);
|
|
|
|
/*
|
|
* Building the sub-request failed, we probably need to
|
|
* clean up a lot.
|
|
*/
|
|
if (err) {
|
|
G_VINUM_LOGREQ(0, bp, "plex request failed.");
|
|
TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
|
|
TAILQ_REMOVE(&wp->bits, bq, queue);
|
|
g_free(bq);
|
|
}
|
|
if (wp->waiting != NULL) {
|
|
if (wp->waiting->bio_cflags &
|
|
GV_BIO_MALLOC)
|
|
g_free(wp->waiting->bio_data);
|
|
g_destroy_bio(wp->waiting);
|
|
}
|
|
if (wp->parity != NULL) {
|
|
if (wp->parity->bio_cflags &
|
|
GV_BIO_MALLOC)
|
|
g_free(wp->parity->bio_data);
|
|
g_destroy_bio(wp->parity);
|
|
}
|
|
g_free(wp);
|
|
|
|
TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
|
|
if (wp->bio == bp) {
|
|
TAILQ_REMOVE(&p->packets, wp,
|
|
list);
|
|
TAILQ_FOREACH_SAFE(bq,
|
|
&wp->bits, queue, bq2) {
|
|
TAILQ_REMOVE(&wp->bits,
|
|
bq, queue);
|
|
g_free(bq);
|
|
}
|
|
g_free(wp);
|
|
}
|
|
}
|
|
|
|
cbp = bp->bio_driver1;
|
|
while (cbp != NULL) {
|
|
pbp = cbp->bio_caller1;
|
|
if (cbp->bio_cflags & GV_BIO_MALLOC)
|
|
g_free(cbp->bio_data);
|
|
g_destroy_bio(cbp);
|
|
cbp = pbp;
|
|
}
|
|
|
|
g_io_deliver(bp, err);
|
|
return;
|
|
}
|
|
|
|
if (TAILQ_EMPTY(&wp->bits))
|
|
g_free(wp);
|
|
else if (wp->lockbase != -1)
|
|
TAILQ_INSERT_TAIL(&p->packets, wp, list);
|
|
|
|
/*
|
|
* Requests to concatenated and striped plexes go straight
|
|
* through.
|
|
*/
|
|
} else {
|
|
err = gv_plexbuffer(p, bp, addr, boff, bcount);
|
|
|
|
/* Building the sub-request failed. */
|
|
if (err) {
|
|
G_VINUM_LOGREQ(0, bp, "plex request failed.");
|
|
cbp = bp->bio_driver1;
|
|
while (cbp != NULL) {
|
|
pbp = cbp->bio_caller1;
|
|
g_destroy_bio(cbp);
|
|
cbp = pbp;
|
|
}
|
|
g_io_deliver(bp, err);
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* Abuse bio_caller1 as linked list. */
|
|
pbp = bp->bio_driver1;
|
|
while (pbp->bio_caller1 != NULL)
|
|
pbp = pbp->bio_caller1;
|
|
bcount -= pbp->bio_length;
|
|
addr += pbp->bio_length;
|
|
boff += pbp->bio_length;
|
|
}
|
|
|
|
/* Fire off all sub-requests. */
|
|
pbp = bp->bio_driver1;
|
|
while (pbp != NULL) {
|
|
/*
|
|
* RAID5 sub-requests need to come in correct order, otherwise
|
|
* we trip over the parity, as it might be overwritten by
|
|
* another sub-request.
|
|
*/
|
|
if (pbp->bio_driver1 != NULL &&
|
|
gv_stripe_active(p, pbp)) {
|
|
/* Park the bio on the waiting queue. */
|
|
pbp->bio_cflags |= GV_BIO_ONHOLD;
|
|
mtx_lock(&p->bqueue_mtx);
|
|
bioq_disksort(p->wqueue, pbp);
|
|
mtx_unlock(&p->bqueue_mtx);
|
|
} else
|
|
g_io_request(pbp, pbp->bio_caller2);
|
|
pbp = pbp->bio_caller1;
|
|
}
|
|
}
|
|
|
|
static int
|
|
gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
|
|
{
|
|
struct gv_plex *p;
|
|
struct g_geom *gp;
|
|
struct g_consumer *cp, *cp2;
|
|
int error;
|
|
|
|
gp = pp->geom;
|
|
p = gp->softc;
|
|
KASSERT(p != NULL, ("NULL p"));
|
|
|
|
if (p->org == GV_PLEX_RAID5) {
|
|
if (dw > 0 && dr == 0)
|
|
dr = 1;
|
|
else if (dw < 0 && dr == 0)
|
|
dr = -1;
|
|
}
|
|
|
|
LIST_FOREACH(cp, &gp->consumer, consumer) {
|
|
error = g_access(cp, dr, dw, de);
|
|
if (error) {
|
|
LIST_FOREACH(cp2, &gp->consumer, consumer) {
|
|
if (cp == cp2)
|
|
break;
|
|
g_access(cp2, -dr, -dw, -de);
|
|
}
|
|
return (error);
|
|
}
|
|
}
|
|
return (0);
|
|
}
|
|
|
|
static struct g_geom *
|
|
gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
|
|
{
|
|
struct g_geom *gp;
|
|
struct g_consumer *cp, *cp2;
|
|
struct g_provider *pp2;
|
|
struct gv_plex *p;
|
|
struct gv_sd *s;
|
|
struct gv_softc *sc;
|
|
int error;
|
|
|
|
g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
|
|
g_topology_assert();
|
|
|
|
/* We only want to attach to subdisks. */
|
|
if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
|
|
return (NULL);
|
|
|
|
/* Find the VINUM class and its associated geom. */
|
|
gp = find_vinum_geom();
|
|
if (gp == NULL)
|
|
return (NULL);
|
|
sc = gp->softc;
|
|
KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
|
|
|
|
/* Find out which subdisk the offered provider corresponds to. */
|
|
s = pp->private;
|
|
KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
|
|
|
|
/* Now find the correct plex where this subdisk belongs to. */
|
|
p = gv_find_plex(sc, s->plex);
|
|
if (p == NULL) {
|
|
G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name);
|
|
return (NULL);
|
|
}
|
|
|
|
/*
|
|
* Add this subdisk to this plex. Since we trust the on-disk
|
|
* configuration, we don't check the given value (should we?).
|
|
* XXX: shouldn't be done here
|
|
*/
|
|
gv_sd_to_plex(p, s, 0);
|
|
|
|
/* Now check if there's already a geom for this plex. */
|
|
gp = p->geom;
|
|
|
|
/* Yes, there is already a geom, so we just add the consumer. */
|
|
if (gp != NULL) {
|
|
cp2 = LIST_FIRST(&gp->consumer);
|
|
/* Need to attach a new consumer to this subdisk. */
|
|
cp = g_new_consumer(gp);
|
|
error = g_attach(cp, pp);
|
|
if (error) {
|
|
G_VINUM_DEBUG(0, "unable to attach consumer to %s",
|
|
pp->name);
|
|
g_destroy_consumer(cp);
|
|
return (NULL);
|
|
}
|
|
/* Adjust the access counts of the new consumer. */
|
|
if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
|
|
error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
|
|
if (error) {
|
|
G_VINUM_DEBUG(0, "unable to set access counts"
|
|
" for consumer on %s", pp->name);
|
|
g_detach(cp);
|
|
g_destroy_consumer(cp);
|
|
return (NULL);
|
|
}
|
|
}
|
|
s->consumer = cp;
|
|
|
|
/* Adjust the size of the providers this plex has. */
|
|
LIST_FOREACH(pp2, &gp->provider, provider)
|
|
pp2->mediasize = p->size;
|
|
|
|
/* Update the size of the volume this plex is attached to. */
|
|
if (p->vol_sc != NULL)
|
|
gv_update_vol_size(p->vol_sc, p->size);
|
|
|
|
/*
|
|
* If necessary, create bio queues, queue mutex and a worker
|
|
* thread.
|
|
*/
|
|
if (p->bqueue == NULL) {
|
|
p->bqueue = g_malloc(sizeof(struct bio_queue_head),
|
|
M_WAITOK | M_ZERO);
|
|
bioq_init(p->bqueue);
|
|
}
|
|
if (p->wqueue == NULL) {
|
|
p->wqueue = g_malloc(sizeof(struct bio_queue_head),
|
|
M_WAITOK | M_ZERO);
|
|
bioq_init(p->wqueue);
|
|
}
|
|
if (mtx_initialized(&p->bqueue_mtx) == 0)
|
|
mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
|
|
if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
|
|
kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
|
|
p->name);
|
|
p->flags |= GV_PLEX_THREAD_ACTIVE;
|
|
}
|
|
|
|
return (NULL);
|
|
|
|
/* We need to create a new geom. */
|
|
} else {
|
|
gp = g_new_geomf(mp, "%s", p->name);
|
|
gp->start = gv_plex_start;
|
|
gp->orphan = gv_plex_orphan;
|
|
gp->access = gv_plex_access;
|
|
gp->softc = p;
|
|
p->geom = gp;
|
|
|
|
TAILQ_INIT(&p->packets);
|
|
p->bqueue = g_malloc(sizeof(struct bio_queue_head),
|
|
M_WAITOK | M_ZERO);
|
|
bioq_init(p->bqueue);
|
|
p->wqueue = g_malloc(sizeof(struct bio_queue_head),
|
|
M_WAITOK | M_ZERO);
|
|
bioq_init(p->wqueue);
|
|
mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
|
|
kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
|
|
p->name);
|
|
p->flags |= GV_PLEX_THREAD_ACTIVE;
|
|
|
|
/* Attach a consumer to this provider. */
|
|
cp = g_new_consumer(gp);
|
|
g_attach(cp, pp);
|
|
s->consumer = cp;
|
|
|
|
/* Create a provider for the outside world. */
|
|
pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
|
|
pp2->mediasize = p->size;
|
|
pp2->sectorsize = pp->sectorsize;
|
|
p->provider = pp2;
|
|
g_error_provider(pp2, 0);
|
|
return (gp);
|
|
}
|
|
}
|
|
|
|
static int
|
|
gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
|
|
struct g_geom *gp)
|
|
{
|
|
struct gv_plex *p;
|
|
|
|
g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
|
|
g_topology_assert();
|
|
|
|
p = gp->softc;
|
|
|
|
KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
|
|
|
|
/*
|
|
* If this is a RAID5 plex, check if its worker thread is still active
|
|
* and signal it to self destruct.
|
|
*/
|
|
gv_kill_plex_thread(p);
|
|
/* g_free(sc); */
|
|
g_wither_geom(gp, ENXIO);
|
|
return (0);
|
|
}
|
|
|
|
#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
|
|
|
|
static struct g_class g_vinum_plex_class = {
|
|
.name = VINUMPLEX_CLASS_NAME,
|
|
.version = G_VERSION,
|
|
.taste = gv_plex_taste,
|
|
.destroy_geom = gv_plex_destroy_geom,
|
|
};
|
|
|
|
DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
|