freebsd-skq/sys/geom/vinum/geom_vinum_volume.c
julian 51d643caa6 Rename the kthread_xxx (e.g. kthread_create()) calls
to kproc_xxx as they actually make whole processes.
Thos makes way for us to add REAL kthread_create() and friends
that actually make theads. it turns out that most of these
calls actually end up being moved back to the thread version
when it's added. but we need to make this cosmetic change first.

I'd LOVE to do this rename in 7.0  so that we can eventually MFC the
new kthread_xxx() calls.
2007-10-20 23:23:23 +00:00

445 lines
10 KiB
C

/*-
* Copyright (c) 2004 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/conf.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mutex.h>
#include <sys/systm.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
static void gv_vol_completed_request(struct gv_volume *, struct bio *);
static void gv_vol_normal_request(struct gv_volume *, struct bio *);
static void
gv_volume_orphan(struct g_consumer *cp)
{
struct g_geom *gp;
struct gv_volume *v;
int error;
g_topology_assert();
gp = cp->geom;
g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
g_access(cp, -cp->acr, -cp->acw, -cp->ace);
error = cp->provider->error;
if (error == 0)
error = ENXIO;
g_detach(cp);
g_destroy_consumer(cp);
if (!LIST_EMPTY(&gp->consumer))
return;
v = gp->softc;
if (v != NULL) {
gv_kill_vol_thread(v);
v->geom = NULL;
}
gp->softc = NULL;
g_wither_geom(gp, error);
}
/* We end up here after the requests to our plexes are done. */
static void
gv_volume_done(struct bio *bp)
{
struct gv_volume *v;
v = bp->bio_from->geom->softc;
bp->bio_cflags |= GV_BIO_DONE;
mtx_lock(&v->bqueue_mtx);
bioq_insert_tail(v->bqueue, bp);
wakeup(v);
mtx_unlock(&v->bqueue_mtx);
}
static void
gv_volume_start(struct bio *bp)
{
struct gv_volume *v;
switch(bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
case BIO_DELETE:
break;
case BIO_GETATTR:
default:
g_io_deliver(bp, EOPNOTSUPP);
return;
}
v = bp->bio_to->geom->softc;
if (v->state != GV_VOL_UP) {
g_io_deliver(bp, ENXIO);
return;
}
mtx_lock(&v->bqueue_mtx);
bioq_disksort(v->bqueue, bp);
wakeup(v);
mtx_unlock(&v->bqueue_mtx);
}
static void
gv_vol_worker(void *arg)
{
struct bio *bp;
struct gv_volume *v;
v = arg;
KASSERT(v != NULL, ("NULL v"));
mtx_lock(&v->bqueue_mtx);
for (;;) {
/* We were signaled to exit. */
if (v->flags & GV_VOL_THREAD_DIE)
break;
/* Take the first BIO from our queue. */
bp = bioq_takefirst(v->bqueue);
if (bp == NULL) {
msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
continue;
}
mtx_unlock(&v->bqueue_mtx);
if (bp->bio_cflags & GV_BIO_DONE)
gv_vol_completed_request(v, bp);
else
gv_vol_normal_request(v, bp);
mtx_lock(&v->bqueue_mtx);
}
mtx_unlock(&v->bqueue_mtx);
v->flags |= GV_VOL_THREAD_DEAD;
wakeup(v);
kproc_exit(ENXIO);
}
static void
gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
{
struct bio *pbp;
struct g_geom *gp;
struct g_consumer *cp, *cp2;
pbp = bp->bio_parent;
if (pbp->bio_error == 0)
pbp->bio_error = bp->bio_error;
switch (pbp->bio_cmd) {
case BIO_READ:
if (bp->bio_error == 0)
break;
if (pbp->bio_cflags & GV_BIO_RETRY)
break;
/* Check if we have another plex left. */
cp = bp->bio_from;
gp = cp->geom;
cp2 = LIST_NEXT(cp, consumer);
if (cp2 == NULL)
break;
if (LIST_NEXT(cp2, consumer) == NULL)
pbp->bio_cflags |= GV_BIO_RETRY;
g_destroy_bio(bp);
pbp->bio_children--;
mtx_lock(&v->bqueue_mtx);
bioq_disksort(v->bqueue, pbp);
mtx_unlock(&v->bqueue_mtx);
return;
case BIO_WRITE:
case BIO_DELETE:
/* Remember if this write request succeeded. */
if (bp->bio_error == 0)
pbp->bio_cflags |= GV_BIO_SUCCEED;
break;
}
/* When the original request is finished, we deliver it. */
pbp->bio_inbed++;
if (pbp->bio_inbed == pbp->bio_children) {
if (pbp->bio_cflags & GV_BIO_SUCCEED)
pbp->bio_error = 0;
pbp->bio_completed = bp->bio_length;
g_io_deliver(pbp, pbp->bio_error);
}
g_destroy_bio(bp);
}
static void
gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
{
struct bio_queue_head queue;
struct g_geom *gp;
struct gv_plex *p, *lp;
struct bio *cbp;
gp = v->geom;
switch (bp->bio_cmd) {
case BIO_READ:
cbp = g_clone_bio(bp);
if (cbp == NULL) {
g_io_deliver(bp, ENOMEM);
return;
}
cbp->bio_done = gv_volume_done;
/*
* Try to find a good plex where we can send the request to.
* The plex either has to be up, or it's a degraded RAID5 plex.
*/
lp = v->last_read_plex;
if (lp == NULL)
lp = LIST_FIRST(&v->plexes);
p = LIST_NEXT(lp, in_volume);
do {
if (p == NULL)
p = LIST_FIRST(&v->plexes);
if ((p->state > GV_PLEX_DEGRADED) ||
(p->state >= GV_PLEX_DEGRADED &&
p->org == GV_PLEX_RAID5))
break;
p = LIST_NEXT(p, in_volume);
} while (p != lp);
if (p == NULL ||
(p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
(p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
g_destroy_bio(cbp);
bp->bio_children--;
g_io_deliver(bp, ENXIO);
return;
}
g_io_request(cbp, p->consumer);
v->last_read_plex = p;
break;
case BIO_WRITE:
case BIO_DELETE:
bioq_init(&queue);
LIST_FOREACH(p, &v->plexes, in_volume) {
if (p->state < GV_PLEX_DEGRADED)
continue;
cbp = g_clone_bio(bp);
if (cbp == NULL) {
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
g_destroy_bio(cbp);
}
if (bp->bio_error == 0)
bp->bio_error = ENOMEM;
g_io_deliver(bp, bp->bio_error);
return;
}
bioq_insert_tail(&queue, cbp);
cbp->bio_done = gv_volume_done;
cbp->bio_caller1 = p->consumer;
}
/* Fire off all sub-requests. */
for (cbp = bioq_first(&queue); cbp != NULL;
cbp = bioq_first(&queue)) {
bioq_remove(&queue, cbp);
g_io_request(cbp, cbp->bio_caller1);
}
break;
}
}
static int
gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
{
struct g_geom *gp;
struct g_consumer *cp, *cp2;
int error;
gp = pp->geom;
error = ENXIO;
LIST_FOREACH(cp, &gp->consumer, consumer) {
error = g_access(cp, dr, dw, de);
if (error) {
LIST_FOREACH(cp2, &gp->consumer, consumer) {
if (cp == cp2)
break;
g_access(cp2, -dr, -dw, -de);
}
return (error);
}
}
return (error);
}
static struct g_geom *
gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
{
struct g_geom *gp;
struct g_provider *pp2;
struct g_consumer *cp, *ocp;
struct gv_softc *sc;
struct gv_volume *v;
struct gv_plex *p;
int error, first;
g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
g_topology_assert();
/* First, find the VINUM class and its associated geom. */
gp = find_vinum_geom();
if (gp == NULL)
return (NULL);
sc = gp->softc;
KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
gp = pp->geom;
/* We only want to attach to plexes. */
if (strcmp(gp->class->name, "VINUMPLEX"))
return (NULL);
first = 0;
p = gp->softc;
/* Let's see if the volume this plex wants is already configured. */
v = gv_find_vol(sc, p->volume);
if (v == NULL)
return (NULL);
if (v->geom == NULL) {
gp = g_new_geomf(mp, "%s", p->volume);
gp->start = gv_volume_start;
gp->orphan = gv_volume_orphan;
gp->access = gv_volume_access;
gp->softc = v;
first++;
} else
gp = v->geom;
/* Create bio queue, queue mutex, and worker thread, if necessary. */
if (v->bqueue == NULL) {
v->bqueue = g_malloc(sizeof(struct bio_queue_head),
M_WAITOK | M_ZERO);
bioq_init(v->bqueue);
}
if (mtx_initialized(&v->bqueue_mtx) == 0)
mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
kproc_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
v->name);
v->flags |= GV_VOL_THREAD_ACTIVE;
}
/*
* Create a new consumer and attach it to the plex geom. Since this
* volume might already have a plex attached, we need to adjust the
* access counts of the new consumer.
*/
ocp = LIST_FIRST(&gp->consumer);
cp = g_new_consumer(gp);
g_attach(cp, pp);
if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
if (error) {
printf("GEOM_VINUM: failed g_access %s -> %s; "
"errno %d\n", v->name, p->name, error);
g_detach(cp);
g_destroy_consumer(cp);
if (first)
g_destroy_geom(gp);
return (NULL);
}
}
p->consumer = cp;
if (p->vol_sc != v) {
p->vol_sc = v;
v->plexcount++;
LIST_INSERT_HEAD(&v->plexes, p, in_volume);
}
/* We need to setup a new VINUMVOLUME geom. */
if (first) {
pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
pp2->mediasize = pp->mediasize;
pp2->sectorsize = pp->sectorsize;
g_error_provider(pp2, 0);
v->size = pp2->mediasize;
v->geom = gp;
return (gp);
}
return (NULL);
}
static int
gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
struct g_geom *gp)
{
struct gv_volume *v;
g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
g_topology_assert();
v = gp->softc;
gv_kill_vol_thread(v);
g_wither_geom(gp, ENXIO);
return (0);
}
#define VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
static struct g_class g_vinum_volume_class = {
.name = VINUMVOLUME_CLASS_NAME,
.version = G_VERSION,
.taste = gv_volume_taste,
.destroy_geom = gv_volume_destroy_geom,
};
DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);