/*- * Copyright (c) 2003 Poul-Henning Kamp * All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * 3. The names of the authors may not be used to endorse or promote * products derived from this software without specific prior written * permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF * SUCH DAMAGE. * * $FreeBSD$ */ /* This is a GEOM module for handling path selection for multi-path * storage devices. It is named "fox" because it, like they, prefer * to have multiple exits to choose from. * */ #include <sys/param.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/conf.h> #include <sys/bio.h> #include <sys/malloc.h> #include <sys/lock.h> #include <sys/mutex.h> #include <sys/libkern.h> #include <sys/endian.h> #include <sys/md5.h> #include <sys/errno.h> #include <geom/geom.h> #define FOX_CLASS_NAME "FOX" #define FOX_MAGIC "GEOM::FOX" struct g_fox_softc { off_t mediasize; u_int sectorsize; TAILQ_HEAD(, bio) queue; struct mtx lock; u_char magic[16]; struct g_consumer *path; struct g_consumer *opath; int waiting; int cr, cw, ce; }; /* * This function is called whenever we need to select a new path. */ static void g_fox_select_path(void *arg, int flag) { struct g_geom *gp; struct g_fox_softc *sc; struct g_consumer *cp1; struct bio *bp; int error; g_topology_assert(); if (flag == EV_CANCEL) return; gp = arg; sc = gp->softc; if (sc->opath != NULL) { /* * First, close the old path entirely. */ printf("Closing old path (%s) on fox (%s)\n", sc->opath->provider->name, gp->name); cp1 = LIST_NEXT(sc->opath, consumer); g_access(sc->opath, -sc->cr, -sc->cw, -(sc->ce + 1)); /* * The attempt to reopen it with a exclusive count */ error = g_access(sc->opath, 0, 0, 1); if (error) { /* * Ok, ditch this consumer, we can't use it. */ printf("Drop old path (%s) on fox (%s)\n", sc->opath->provider->name, gp->name); g_detach(sc->opath); g_destroy_consumer(sc->opath); if (LIST_EMPTY(&gp->consumer)) { /* No consumers left */ g_wither_geom(gp, ENXIO); for (;;) { bp = TAILQ_FIRST(&sc->queue); if (bp == NULL) break; TAILQ_REMOVE(&sc->queue, bp, bio_queue); bp->bio_error = ENXIO; g_std_done(bp); } return; } } else { printf("Got e-bit on old path (%s) on fox (%s)\n", sc->opath->provider->name, gp->name); } sc->opath = NULL; } else { cp1 = LIST_FIRST(&gp->consumer); } if (cp1 == NULL) cp1 = LIST_FIRST(&gp->consumer); printf("Open new path (%s) on fox (%s)\n", cp1->provider->name, gp->name); error = g_access(cp1, sc->cr, sc->cw, sc->ce); if (error) { /* * If we failed, we take another trip through here */ printf("Open new path (%s) on fox (%s) failed, reselect.\n", cp1->provider->name, gp->name); sc->opath = cp1; g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); } else { printf("Open new path (%s) on fox (%s) succeeded\n", cp1->provider->name, gp->name); mtx_lock(&sc->lock); sc->path = cp1; sc->waiting = 0; for (;;) { bp = TAILQ_FIRST(&sc->queue); if (bp == NULL) break; TAILQ_REMOVE(&sc->queue, bp, bio_queue); g_io_request(bp, sc->path); } mtx_unlock(&sc->lock); } } static void g_fox_orphan(struct g_consumer *cp) { struct g_geom *gp; struct g_fox_softc *sc; int error, mark; g_topology_assert(); gp = cp->geom; sc = gp->softc; printf("Removing path (%s) from fox (%s)\n", cp->provider->name, gp->name); mtx_lock(&sc->lock); if (cp == sc->path) { sc->opath = NULL; sc->path = NULL; sc->waiting = 1; mark = 1; } else { mark = 0; } mtx_unlock(&sc->lock); g_access(cp, -cp->acr, -cp->acw, -cp->ace); error = cp->provider->error; g_detach(cp); g_destroy_consumer(cp); if (!LIST_EMPTY(&gp->consumer)) { if (mark) g_post_event(g_fox_select_path, gp, M_WAITOK, gp, NULL); return; } mtx_destroy(&sc->lock); g_free(gp->softc); gp->softc = NULL; g_wither_geom(gp, ENXIO); } static void g_fox_done(struct bio *bp) { struct g_geom *gp; struct g_fox_softc *sc; int error; if (bp->bio_error == 0) { g_std_done(bp); return; } gp = bp->bio_from->geom; sc = gp->softc; if (bp->bio_from != sc->path) { g_io_request(bp, sc->path); return; } mtx_lock(&sc->lock); sc->opath = sc->path; sc->path = NULL; error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL); if (error) { bp->bio_error = ENOMEM; g_std_done(bp); } else { sc->waiting = 1; TAILQ_INSERT_TAIL(&sc->queue, bp, bio_queue); } mtx_unlock(&sc->lock); } static void g_fox_start(struct bio *bp) { struct g_geom *gp; struct bio *bp2; struct g_fox_softc *sc; int error; gp = bp->bio_to->geom; sc = gp->softc; if (sc == NULL) { g_io_deliver(bp, ENXIO); return; } switch(bp->bio_cmd) { case BIO_READ: case BIO_WRITE: case BIO_DELETE: bp2 = g_clone_bio(bp); if (bp2 == NULL) { g_io_deliver(bp, ENOMEM); break; } bp2->bio_offset += sc->sectorsize; bp2->bio_done = g_fox_done; mtx_lock(&sc->lock); if (sc->path == NULL || !TAILQ_EMPTY(&sc->queue)) { if (sc->waiting == 0) { error = g_post_event(g_fox_select_path, gp, M_NOWAIT, gp, NULL); if (error) { g_destroy_bio(bp2); bp2 = NULL; g_io_deliver(bp, error); } else { sc->waiting = 1; } } if (bp2 != NULL) TAILQ_INSERT_TAIL(&sc->queue, bp2, bio_queue); } else { g_io_request(bp2, sc->path); } mtx_unlock(&sc->lock); break; default: g_io_deliver(bp, EOPNOTSUPP); break; } return; } static int g_fox_access(struct g_provider *pp, int dr, int dw, int de) { struct g_geom *gp; struct g_fox_softc *sc; struct g_consumer *cp1; int error; g_topology_assert(); gp = pp->geom; sc = gp->softc; if (sc == NULL) { if (dr <= 0 && dw <= 0 && de <= 0) return (0); else return (ENXIO); } if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { /* * First open, open all consumers with an exclusive bit */ error = 0; LIST_FOREACH(cp1, &gp->consumer, consumer) { error = g_access(cp1, 0, 0, 1); if (error) { printf("FOX: access(%s,0,0,1) = %d\n", cp1->provider->name, error); break; } } if (error) { LIST_FOREACH(cp1, &gp->consumer, consumer) { if (cp1->ace) g_access(cp1, 0, 0, -1); } return (error); } } if (sc->path == NULL) g_fox_select_path(gp, 0); if (sc->path == NULL) error = ENXIO; else error = g_access(sc->path, dr, dw, de); if (error == 0) { sc->cr += dr; sc->cw += dw; sc->ce += de; if (sc->cr == 0 && sc->cw == 0 && sc->ce == 0) { /* * Last close, remove e-bit on all consumers */ LIST_FOREACH(cp1, &gp->consumer, consumer) g_access(cp1, 0, 0, -1); } } return (error); } static struct g_geom * g_fox_taste(struct g_class *mp, struct g_provider *pp, int flags __unused) { struct g_geom *gp, *gp2; struct g_provider *pp2; struct g_consumer *cp, *cp2; struct g_fox_softc *sc, *sc2; int error; u_int sectorsize; u_char *buf; g_trace(G_T_TOPOLOGY, "fox_taste(%s, %s)", mp->name, pp->name); g_topology_assert(); if (!strcmp(pp->geom->class->name, mp->name)) return (NULL); gp = g_new_geomf(mp, "%s.fox", pp->name); gp->softc = g_malloc(sizeof(struct g_fox_softc), M_WAITOK | M_ZERO); sc = gp->softc; cp = g_new_consumer(gp); g_attach(cp, pp); error = g_access(cp, 1, 0, 0); if (error) { g_free(sc); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return(NULL); } do { sectorsize = cp->provider->sectorsize; g_topology_unlock(); buf = g_read_data(cp, 0, sectorsize, NULL); g_topology_lock(); if (buf == NULL) break; if (memcmp(buf, FOX_MAGIC, strlen(FOX_MAGIC))) break; /* * First we need to see if this a new path for an existing fox. */ LIST_FOREACH(gp2, &mp->geom, geom) { sc2 = gp2->softc; if (sc2 == NULL) continue; if (memcmp(buf + 16, sc2->magic, sizeof sc2->magic)) continue; break; } if (gp2 != NULL) { /* * It was. Create a new consumer for that fox, * attach it, and if the fox is open, open this * path with an exclusive count of one. */ printf("Adding path (%s) to fox (%s)\n", pp->name, gp2->name); cp2 = g_new_consumer(gp2); g_attach(cp2, pp); pp2 = LIST_FIRST(&gp2->provider); if (pp2->acr > 0 || pp2->acw > 0 || pp2->ace > 0) { error = g_access(cp2, 0, 0, 1); if (error) { /* * This is bad, or more likely, * the user is doing something stupid */ printf( "WARNING: New path (%s) to fox(%s) not added: %s\n%s", cp2->provider->name, gp2->name, "Could not get exclusive bit.", "WARNING: This indicates a risk of data inconsistency." ); g_detach(cp2); g_destroy_consumer(cp2); } } break; } printf("Creating new fox (%s)\n", pp->name); sc->path = cp; memcpy(sc->magic, buf + 16, sizeof sc->magic); pp2 = g_new_providerf(gp, "%s", gp->name); pp2->mediasize = sc->mediasize = pp->mediasize - pp->sectorsize; pp2->sectorsize = sc->sectorsize = pp->sectorsize; printf("fox %s lock %p\n", gp->name, &sc->lock); mtx_init(&sc->lock, "fox queue", NULL, MTX_DEF); TAILQ_INIT(&sc->queue); g_error_provider(pp2, 0); } while (0); if (buf != NULL) g_free(buf); g_access(cp, -1, 0, 0); if (!LIST_EMPTY(&gp->provider)) return (gp); g_free(gp->softc); g_detach(cp); g_destroy_consumer(cp); g_destroy_geom(gp); return (NULL); } static int g_fox_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp) { struct g_fox_softc *sc; g_topology_assert(); sc = gp->softc; mtx_destroy(&sc->lock); g_free(gp->softc); gp->softc = NULL; g_wither_geom(gp, ENXIO); return (0); } static struct g_class g_fox_class = { .name = FOX_CLASS_NAME, .version = G_VERSION, .taste = g_fox_taste, .destroy_geom = g_fox_destroy_geom, .start = g_fox_start, .spoiled = g_fox_orphan, .orphan = g_fox_orphan, .access= g_fox_access, }; DECLARE_GEOM_CLASS(g_fox_class, g_fox);