freebsd-skq/sys/geom/vinum/geom_vinum_init.c
le 3998514c39 Only allow RAID5 plexes to be parity checked.
PR:           kern/80427
Submitty by:  Stijn Hoop <stijn@win.tue.nl>
2005-04-28 13:09:00 +00:00

662 lines
14 KiB
C

/*-
* Copyright (c) 2004 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/queue.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
#include <geom/vinum/geom_vinum_share.h>
int gv_init_plex(struct gv_plex *);
int gv_init_sd(struct gv_sd *);
void gv_init_td(void *);
void gv_rebuild_plex(struct gv_plex *);
void gv_rebuild_td(void *);
void gv_start_plex(struct gv_plex *);
void gv_start_vol(struct gv_volume *);
void gv_sync(struct gv_volume *);
void gv_sync_td(void *);
struct gv_sync_args {
struct gv_volume *v;
struct gv_plex *from;
struct gv_plex *to;
off_t syncsize;
};
void
gv_parityop(struct g_geom *gp, struct gctl_req *req)
{
struct gv_softc *sc;
struct gv_plex *p;
struct bio *bp;
struct g_consumer *cp;
int error, *flags, type, *rebuild, rv;
char *plex;
rv = -1;
plex = gctl_get_param(req, "plex", NULL);
if (plex == NULL) {
gctl_error(req, "no plex given");
goto out;
}
flags = gctl_get_paraml(req, "flags", sizeof(*flags));
if (flags == NULL) {
gctl_error(req, "no flags given");
goto out;
}
rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
if (rebuild == NULL) {
gctl_error(req, "no rebuild op given");
goto out;
}
sc = gp->softc;
type = gv_object_type(sc, plex);
switch (type) {
case GV_TYPE_PLEX:
break;
case GV_TYPE_VOL:
case GV_TYPE_SD:
case GV_TYPE_DRIVE:
default:
gctl_error(req, "'%s' is not a plex", plex);
goto out;
}
p = gv_find_plex(sc, plex);
if (p->state != GV_PLEX_UP) {
gctl_error(req, "plex %s is not completely accessible",
p->name);
goto out;
}
if (p->org != GV_PLEX_RAID5) {
gctl_error(req, "plex %s is not a RAID5 plex", p->name);
goto out;
}
cp = p->consumer;
error = g_access(cp, 1, 1, 0);
if (error) {
gctl_error(req, "cannot access consumer");
goto out;
}
g_topology_unlock();
/* Reset the check pointer when using -f. */
if (*flags & GV_FLAG_F)
p->synced = 0;
bp = g_new_bio();
if (bp == NULL) {
gctl_error(req, "cannot create BIO - out of memory");
g_topology_lock();
error = g_access(cp, -1, -1, 0);
goto out;
}
bp->bio_cmd = BIO_WRITE;
bp->bio_done = NULL;
bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
bp->bio_cflags |= GV_BIO_CHECK;
if (*rebuild)
bp->bio_cflags |= GV_BIO_PARITY;
bp->bio_offset = p->synced;
bp->bio_length = p->stripesize;
/* Schedule it down ... */
g_io_request(bp, cp);
/* ... and wait for the result. */
error = biowait(bp, "gwrite");
g_free(bp->bio_data);
g_destroy_bio(bp);
if (error) {
/* Incorrect parity. */
if (error == EAGAIN)
rv = 1;
/* Some other error happened. */
else
gctl_error(req, "Parity check failed at offset 0x%jx, "
"errno %d", (intmax_t)p->synced, error);
/* Correct parity. */
} else
rv = 0;
gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
/* Advance the checkpointer if there was no error. */
if (rv == 0)
p->synced += p->stripesize;
/* End of plex; reset the check pointer and signal it to the caller. */
if (p->synced >= p->size) {
p->synced = 0;
rv = -2;
}
g_topology_lock();
error = g_access(cp, -1, -1, 0);
out:
gctl_set_param(req, "rv", &rv, sizeof(rv));
}
void
gv_start_obj(struct g_geom *gp, struct gctl_req *req)
{
struct gv_softc *sc;
struct gv_volume *v;
struct gv_plex *p;
int *argc, *initsize;
char *argv, buf[20];
int i, type;
argc = gctl_get_paraml(req, "argc", sizeof(*argc));
initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
if (argc == NULL || *argc == 0) {
gctl_error(req, "no arguments given");
return;
}
sc = gp->softc;
for (i = 0; i < *argc; i++) {
snprintf(buf, sizeof(buf), "argv%d", i);
argv = gctl_get_param(req, buf, NULL);
if (argv == NULL)
continue;
type = gv_object_type(sc, argv);
switch (type) {
case GV_TYPE_VOL:
v = gv_find_vol(sc, argv);
gv_start_vol(v);
break;
case GV_TYPE_PLEX:
p = gv_find_plex(sc, argv);
gv_start_plex(p);
break;
case GV_TYPE_SD:
case GV_TYPE_DRIVE:
/* XXX not yet */
gctl_error(req, "cannot start '%s'", argv);
return;
default:
gctl_error(req, "unknown object '%s'", argv);
return;
}
}
}
void
gv_start_plex(struct gv_plex *p)
{
struct gv_volume *v;
KASSERT(p != NULL, ("gv_start_plex: NULL p"));
if (p->state == GV_PLEX_UP)
return;
v = p->vol_sc;
if ((v != NULL) && (v->plexcount > 1))
gv_sync(v);
else if (p->org == GV_PLEX_RAID5) {
if (p->state == GV_PLEX_DEGRADED)
gv_rebuild_plex(p);
else
gv_init_plex(p);
}
return;
}
void
gv_start_vol(struct gv_volume *v)
{
struct gv_plex *p;
struct gv_sd *s;
KASSERT(v != NULL, ("gv_start_vol: NULL v"));
if (v->plexcount == 0)
return;
else if (v->plexcount == 1) {
p = LIST_FIRST(&v->plexes);
KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
if (p->org == GV_PLEX_RAID5) {
switch (p->state) {
case GV_PLEX_DOWN:
gv_init_plex(p);
break;
case GV_PLEX_DEGRADED:
gv_rebuild_plex(p);
break;
default:
return;
}
} else {
LIST_FOREACH(s, &p->subdisks, in_plex) {
gv_set_sd_state(s, GV_SD_UP,
GV_SETSTATE_CONFIG);
}
}
} else
gv_sync(v);
}
void
gv_sync(struct gv_volume *v)
{
struct gv_softc *sc;
struct gv_plex *p, *up;
struct gv_sync_args *sync;
KASSERT(v != NULL, ("gv_sync: NULL v"));
sc = v->vinumconf;
KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
/* Find the plex that's up. */
up = NULL;
LIST_FOREACH(up, &v->plexes, in_volume) {
if (up->state == GV_PLEX_UP)
break;
}
/* Didn't find a good plex. */
if (up == NULL)
return;
LIST_FOREACH(p, &v->plexes, in_volume) {
if ((p == up) || (p->state == GV_PLEX_UP))
continue;
sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
sync->v = v;
sync->from = up;
sync->to = p;
sync->syncsize = GV_DFLT_SYNCSIZE;
kthread_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
p->name);
}
}
void
gv_rebuild_plex(struct gv_plex *p)
{
struct gv_sync_args *sync;
if ((p->flags & GV_PLEX_SYNCING) || gv_is_open(p->geom))
return;
sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
sync->to = p;
sync->syncsize = GV_DFLT_SYNCSIZE;
kthread_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
p->name);
}
int
gv_init_plex(struct gv_plex *p)
{
struct gv_sd *s;
int err;
KASSERT(p != NULL, ("gv_init_plex: NULL p"));
LIST_FOREACH(s, &p->subdisks, in_plex) {
err = gv_init_sd(s);
if (err)
return (err);
}
return (0);
}
int
gv_init_sd(struct gv_sd *s)
{
KASSERT(s != NULL, ("gv_init_sd: NULL s"));
if (gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE))
return (-1);
s->init_size = GV_DFLT_SYNCSIZE;
s->flags &= ~GV_SD_INITCANCEL;
/* Spawn the thread that does the work for us. */
kthread_create(gv_init_td, s, NULL, 0, 0, "gv_init %s", s->name);
return (0);
}
/* This thread is responsible for rebuilding a degraded RAID5 plex. */
void
gv_rebuild_td(void *arg)
{
struct bio *bp;
struct gv_plex *p;
struct g_consumer *cp;
struct gv_sync_args *sync;
u_char *buf;
off_t i;
int error;
buf = NULL;
bp = NULL;
sync = arg;
p = sync->to;
p->synced = 0;
p->flags |= GV_PLEX_SYNCING;
cp = p->consumer;
g_topology_lock();
error = g_access(cp, 1, 1, 0);
if (error) {
g_topology_unlock();
printf("GEOM_VINUM: rebuild of %s failed to access consumer: "
"%d\n", p->name, error);
kthread_exit(error);
}
g_topology_unlock();
buf = g_malloc(sync->syncsize, M_WAITOK);
printf("GEOM_VINUM: rebuild of %s started\n", p->name);
i = 0;
for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
/*
if (i + sync->syncsize > p->size)
sync->syncsize = p->size - i;
*/
bp = g_new_bio();
if (bp == NULL) {
printf("GEOM_VINUM: rebuild of %s failed creating bio: "
"out of memory\n", p->name);
break;
}
bp->bio_cmd = BIO_WRITE;
bp->bio_done = NULL;
bp->bio_data = buf;
bp->bio_cflags |= GV_BIO_REBUILD;
bp->bio_offset = i;
bp->bio_length = p->stripesize;
/* Schedule it down ... */
g_io_request(bp, cp);
/* ... and wait for the result. */
error = biowait(bp, "gwrite");
if (error) {
printf("GEOM_VINUM: rebuild of %s failed at offset %jd "
"errno: %d\n", p->name, i, error);
break;
}
g_destroy_bio(bp);
bp = NULL;
}
if (bp != NULL)
g_destroy_bio(bp);
if (buf != NULL)
g_free(buf);
g_topology_lock();
g_access(cp, -1, -1, 0);
gv_save_config_all(p->vinumconf);
g_topology_unlock();
p->flags &= ~GV_PLEX_SYNCING;
p->synced = 0;
/* Successful initialization. */
if (!error)
printf("GEOM_VINUM: rebuild of %s finished\n", p->name);
g_free(sync);
kthread_exit(error);
}
void
gv_sync_td(void *arg)
{
struct bio *bp;
struct gv_plex *p;
struct g_consumer *from, *to;
struct gv_sync_args *sync;
u_char *buf;
off_t i;
int error;
sync = arg;
from = sync->from->consumer;
to = sync->to->consumer;
p = sync->to;
if (p->flags & GV_PLEX_SYNCING) {
printf("GEOM_VINUM: plex '%s' is already syncing.\n", p->name);
g_free(sync);
kthread_exit(0);
}
p->synced = 0;
p->flags |= GV_PLEX_SYNCING;
error = 0;
g_topology_lock();
error = g_access(from, 1, 0, 0);
if (error) {
g_topology_unlock();
printf("GEOM_VINUM: sync from '%s' failed to access "
"consumer: %d\n", sync->from->name, error);
g_free(sync);
kthread_exit(error);
}
error = g_access(to, 0, 1, 0);
if (error) {
g_access(from, -1, 0, 0);
g_topology_unlock();
printf("GEOM_VINUM: sync to '%s' failed to access "
"consumer: %d\n", p->name, error);
g_free(sync);
kthread_exit(error);
}
g_topology_unlock();
printf("GEOM_VINUM: plex sync %s -> %s started\n", sync->from->name,
sync->to->name);
for (i = 0; i < p->size; i+= sync->syncsize) {
/* Read some bits from the good plex. */
buf = g_read_data(from, i, sync->syncsize, &error);
if (buf == NULL) {
printf("GEOM_VINUM: sync read from '%s' failed at "
"offset %jd; errno: %d\n", sync->from->name, i,
error);
break;
}
/*
* Create a bio and schedule it down on the 'bad' plex. We
* cannot simply use g_write_data() because we have to let the
* lower parts know that we are an initialization process and
* not a 'normal' request.
*/
bp = g_new_bio();
if (bp == NULL) {
printf("GEOM_VINUM: sync write to '%s' failed at "
"offset %jd; out of memory\n", p->name, i);
g_free(buf);
break;
}
bp->bio_cmd = BIO_WRITE;
bp->bio_offset = i;
bp->bio_length = sync->syncsize;
bp->bio_data = buf;
bp->bio_done = NULL;
/*
* This hack declare this bio as part of an initialization
* process, so that the lower levels allow it to get through.
*/
bp->bio_cflags |= GV_BIO_SYNCREQ;
/* Schedule it down ... */
g_io_request(bp, to);
/* ... and wait for the result. */
error = biowait(bp, "gwrite");
g_destroy_bio(bp);
g_free(buf);
if (error) {
printf("GEOM_VINUM: sync write to '%s' failed at "
"offset %jd; errno: %d\n", p->name, i, error);
break;
}
/* Note that we have synced a little bit more. */
p->synced += sync->syncsize;
}
g_topology_lock();
g_access(from, -1, 0, 0);
g_access(to, 0, -1, 0);
gv_save_config_all(p->vinumconf);
g_topology_unlock();
/* Successful initialization. */
if (!error)
printf("GEOM_VINUM: plex sync %s -> %s finished\n",
sync->from->name, sync->to->name);
p->flags &= ~GV_PLEX_SYNCING;
p->synced = 0;
g_free(sync);
kthread_exit(error);
}
void
gv_init_td(void *arg)
{
struct gv_sd *s;
struct gv_drive *d;
struct g_geom *gp;
struct g_consumer *cp;
int error;
off_t i, init_size, start, offset, length;
u_char *buf;
s = arg;
KASSERT(s != NULL, ("gv_init_td: NULL s"));
d = s->drive_sc;
KASSERT(d != NULL, ("gv_init_td: NULL d"));
gp = d->geom;
KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
cp = LIST_FIRST(&gp->consumer);
KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
s->init_error = 0;
init_size = s->init_size;
start = s->drive_offset + s->initialized;
offset = s->drive_offset;
length = s->size;
buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
g_topology_lock();
error = g_access(cp, 0, 1, 0);
if (error) {
s->init_error = error;
g_topology_unlock();
printf("geom_vinum: init '%s' failed to access consumer: %d\n",
s->name, error);
kthread_exit(error);
}
g_topology_unlock();
for (i = start; i < offset + length; i += init_size) {
if (s->flags & GV_SD_INITCANCEL) {
printf("geom_vinum: subdisk '%s' init: cancelled at"
" offset %jd (drive offset %jd)\n", s->name,
(intmax_t)s->initialized, (intmax_t)i);
error = EAGAIN;
break;
}
error = g_write_data(cp, i, buf, init_size);
if (error) {
printf("geom_vinum: subdisk '%s' init: write failed"
" at offset %jd (drive offset %jd)\n", s->name,
(intmax_t)s->initialized, (intmax_t)i);
break;
}
s->initialized += init_size;
}
g_free(buf);
g_topology_lock();
g_access(cp, 0, -1, 0);
g_topology_unlock();
if (error) {
s->init_error = error;
g_topology_lock();
gv_set_sd_state(s, GV_SD_STALE,
GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
g_topology_unlock();
} else {
g_topology_lock();
gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
g_topology_unlock();
s->initialized = 0;
printf("geom_vinum: init '%s' finished\n", s->name);
}
kthread_exit(error);
}