- Split up the BIO queue into a queue for new and one for completed requests.

This is necessary for two reasons:
  1) In order to avoid collisions with the use of a BIOs flags set by a consumer
     or a provider
  2) Because GV_BIO_DONE was used to mark a BIO as done, not enough flags was
     available, so the consumer flags of a BIO had to be misused in order to
     support enough flags. The new queue makes it possible to recycle the
     GV_BIO_DONE flag into GV_BIO_GROW.
  As a consequence, gvinum will now work with any other GEOM class under it or
  on top of it.

- Use bio_pflags for storing internal flags on downgoing BIOs, as the requests
  appear to come from a consumer of a gvinum volume. Use bio_cflags only for
  cloned BIOs.
- Move gv_post_bio to be used internally for maintenance requests.
- Remove some cases where flags where set without need.

PR:		kern/133604
This commit is contained in:
Ulf Lilleengen 2009-05-06 19:34:32 +00:00
parent 41944888fe
commit d8d015cddc
5 changed files with 98 additions and 86 deletions

View File

@ -80,18 +80,6 @@ gv_orphan(struct g_consumer *cp)
gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
}
void
gv_post_bio(struct gv_softc *sc, struct bio *bp)
{
KASSERT(sc != NULL, ("NULL sc"));
KASSERT(bp != NULL, ("NULL bp"));
mtx_lock(&sc->bqueue_mtx);
bioq_disksort(sc->bqueue, bp);
wakeup(sc);
mtx_unlock(&sc->bqueue_mtx);
}
void
gv_start(struct bio *bp)
{
@ -111,8 +99,10 @@ gv_start(struct bio *bp)
g_io_deliver(bp, EOPNOTSUPP);
return;
}
gv_post_bio(sc, bp);
mtx_lock(&sc->bqueue_mtx);
bioq_disksort(sc->bqueue_down, bp);
wakeup(sc);
mtx_unlock(&sc->bqueue_mtx);
}
void
@ -125,9 +115,11 @@ gv_done(struct bio *bp)
gp = bp->bio_from->geom;
sc = gp->softc;
bp->bio_cflags |= GV_BIO_DONE;
gv_post_bio(sc, bp);
mtx_lock(&sc->bqueue_mtx);
bioq_disksort(sc->bqueue_up, bp);
wakeup(sc);
mtx_unlock(&sc->bqueue_mtx);
}
int
@ -179,8 +171,12 @@ gv_init(struct g_class *mp)
gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
sc = gp->softc;
sc->geom = gp;
sc->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
bioq_init(sc->bqueue);
sc->bqueue_down = g_malloc(sizeof(struct bio_queue_head),
M_WAITOK | M_ZERO);
sc->bqueue_up = g_malloc(sizeof(struct bio_queue_head),
M_WAITOK | M_ZERO);
bioq_init(sc->bqueue_down);
bioq_init(sc->bqueue_up);
LIST_INIT(&sc->drives);
LIST_INIT(&sc->subdisks);
LIST_INIT(&sc->plexes);
@ -969,7 +965,8 @@ gv_worker(void *arg)
gv_cleanup(sc);
mtx_destroy(&sc->bqueue_mtx);
mtx_destroy(&sc->equeue_mtx);
g_free(sc->bqueue);
g_free(sc->bqueue_down);
g_free(sc->bqueue_up);
g_free(sc);
kproc_exit(ENXIO);
break; /* not reached */
@ -984,38 +981,40 @@ gv_worker(void *arg)
/* ... then do I/O processing. */
mtx_lock(&sc->bqueue_mtx);
bp = bioq_takefirst(sc->bqueue);
/* First do new requests. */
bp = bioq_takefirst(sc->bqueue_down);
if (bp != NULL) {
mtx_unlock(&sc->bqueue_mtx);
/* A bio that interfered with another bio. */
if (bp->bio_pflags & GV_BIO_ONHOLD) {
s = bp->bio_caller1;
p = s->plex_sc;
/* Is it still locked out? */
if (gv_stripe_active(p, bp)) {
/* Park the bio on the waiting queue. */
bioq_disksort(p->wqueue, bp);
} else {
bp->bio_pflags &= ~GV_BIO_ONHOLD;
g_io_request(bp, s->drive_sc->consumer);
}
/* A special request requireing special handling. */
} else if (bp->bio_pflags & GV_BIO_INTERNAL) {
p = bp->bio_caller1;
gv_plex_start(p, bp);
} else {
gv_volume_start(sc, bp);
}
mtx_lock(&sc->bqueue_mtx);
}
/* Then do completed requests. */
bp = bioq_takefirst(sc->bqueue_up);
if (bp == NULL) {
msleep(sc, &sc->bqueue_mtx, PRIBIO, "-", hz/10);
mtx_unlock(&sc->bqueue_mtx);
continue;
}
mtx_unlock(&sc->bqueue_mtx);
/* A bio that is coming up from an underlying device. */
if (bp->bio_cflags & GV_BIO_DONE) {
gv_bio_done(sc, bp);
/* A bio that interfered with another bio. */
} else if (bp->bio_cflags & GV_BIO_ONHOLD) {
s = bp->bio_caller1;
p = s->plex_sc;
/* Is it still locked out? */
if (gv_stripe_active(p, bp)) {
/* Park the bio on the waiting queue. */
bioq_disksort(p->wqueue, bp);
} else {
bp->bio_cflags &= ~GV_BIO_ONHOLD;
g_io_request(bp, s->drive_sc->consumer);
}
/* A special request requireing special handling. */
} else if (bp->bio_cflags & GV_BIO_INTERNAL ||
bp->bio_pflags & GV_BIO_INTERNAL) {
p = bp->bio_caller1;
gv_plex_start(p, bp);
/* A fresh bio, scheduled it down. */
} else {
gv_volume_start(sc, bp);
}
gv_bio_done(sc, bp);
}
}

View File

@ -127,7 +127,6 @@ void gv_remove_event(struct gv_softc *, struct gv_event *);
void gv_drive_tasted(struct gv_softc *, struct g_provider *);
void gv_drive_lost(struct gv_softc *, struct gv_drive *);
void gv_setup_objects(struct gv_softc *);
void gv_post_bio(struct gv_softc *, struct bio *);
void gv_start(struct bio *);
int gv_access(struct g_provider *, int, int, int);
void gv_cleanup(struct gv_softc *);

View File

@ -48,6 +48,8 @@ static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
int *, int);
static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
off_t, caddr_t);
static void gv_post_bio(struct gv_softc *, struct bio *);
void
gv_plex_start(struct gv_plex *p, struct bio *bp)
{
@ -111,7 +113,7 @@ gv_plex_start(struct gv_plex *p, struct bio *bp)
*/
if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
/* Park the bio on the waiting queue. */
cbp->bio_cflags |= GV_BIO_ONHOLD;
cbp->bio_pflags |= GV_BIO_ONHOLD;
bioq_disksort(p->wqueue, cbp);
} else {
s = cbp->bio_caller1;
@ -209,7 +211,7 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
goto bad;
err = gv_plex_offset(p, boff, bcount, &real_off,
&real_len, &sdno, (bp->bio_pflags & GV_BIO_SYNCREQ));
&real_len, &sdno, (bp->bio_pflags & GV_BIO_GROW));
/* If the request was blocked, put it into wait. */
if (err == GV_ERR_ISBUSY) {
bioq_disksort(p->rqueue, bp);
@ -239,12 +241,12 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
/* If the subdisk is up, just continue. */
break;
case GV_SD_DOWN:
if (bp->bio_cflags & GV_BIO_INTERNAL)
if (bp->bio_pflags & GV_BIO_INTERNAL)
G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
" order to perform administrative requests");
goto bad;
case GV_SD_STALE:
if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) {
if (!(bp->bio_pflags & GV_BIO_SYNCREQ)) {
G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
"regular requests");
goto bad;
@ -273,8 +275,6 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
cbp->bio_data = addr;
cbp->bio_done = gv_done;
cbp->bio_caller1 = s;
if ((bp->bio_cflags & GV_BIO_SYNCREQ))
cbp->bio_cflags |= GV_BIO_SYNCREQ;
/* Store the sub-requests now and let others issue them. */
bioq_insert_tail(p->bqueue, cbp);
@ -282,8 +282,8 @@ gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
bad:
G_VINUM_LOGREQ(0, bp, "plex request failed.");
/* Building the sub-request failed. If internal BIO, do not deliver. */
if (bp->bio_cflags & GV_BIO_INTERNAL) {
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_INTERNAL) {
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
g_destroy_bio(bp);
p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
@ -311,9 +311,9 @@ gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
/* Just set it to length since multiple plexes will
* screw things up. */
pbp->bio_completed = pbp->bio_length;
if (pbp->bio_cflags & GV_BIO_SYNCREQ)
if (pbp->bio_pflags & GV_BIO_SYNCREQ)
gv_sync_complete(p, pbp);
else if (pbp->bio_pflags & GV_BIO_SYNCREQ)
else if (pbp->bio_pflags & GV_BIO_GROW)
gv_grow_complete(p, pbp);
else
g_io_deliver(pbp, pbp->bio_error);
@ -392,7 +392,7 @@ gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
/* Handle parity data. */
if (TAILQ_EMPTY(&wp->bits)) {
if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
if (bp->bio_parent->bio_pflags & GV_BIO_CHECK)
i = gv_check_parity(p, bp, wp);
else
i = gv_normal_parity(p, bp, wp);
@ -424,16 +424,16 @@ gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
if (pbp->bio_inbed == pbp->bio_children) {
/* Hand it over for checking or delivery. */
if (pbp->bio_cmd == BIO_WRITE &&
(pbp->bio_cflags & GV_BIO_CHECK)) {
(pbp->bio_pflags & GV_BIO_CHECK)) {
gv_parity_complete(p, pbp);
} else if (pbp->bio_cmd == BIO_WRITE &&
(pbp->bio_cflags & GV_BIO_REBUILD)) {
(pbp->bio_pflags & GV_BIO_REBUILD)) {
gv_rebuild_complete(p, pbp);
} else if (pbp->bio_cflags & GV_BIO_INIT) {
} else if (pbp->bio_pflags & GV_BIO_INIT) {
gv_init_complete(p, pbp);
} else if (pbp->bio_cflags & GV_BIO_SYNCREQ) {
gv_sync_complete(p, pbp);
} else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
gv_sync_complete(p, pbp);
} else if (pbp->bio_pflags & GV_BIO_GROW) {
gv_grow_complete(p, pbp);
} else {
g_io_deliver(pbp, pbp->bio_error);
@ -480,7 +480,7 @@ gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
bp->bio_parent->bio_error = EAGAIN;
/* ... but we rebuild it. */
if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
if (bp->bio_parent->bio_pflags & GV_BIO_PARITY) {
s = pbp->bio_caller1;
g_io_request(pbp, s->drive_sc->consumer);
finished = 0;
@ -546,6 +546,18 @@ gv_plex_flush(struct gv_plex *p)
}
}
static void
gv_post_bio(struct gv_softc *sc, struct bio *bp)
{
KASSERT(sc != NULL, ("NULL sc"));
KASSERT(bp != NULL, ("NULL bp"));
mtx_lock(&sc->bqueue_mtx);
bioq_disksort(sc->bqueue_down, bp);
wakeup(sc);
mtx_unlock(&sc->bqueue_mtx);
}
int
gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
off_t length, int type, caddr_t data)
@ -566,14 +578,14 @@ gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
}
bp->bio_length = length;
bp->bio_done = gv_done;
bp->bio_cflags |= GV_BIO_SYNCREQ;
bp->bio_pflags |= GV_BIO_SYNCREQ;
bp->bio_offset = offset;
bp->bio_caller1 = from;
bp->bio_caller2 = to;
bp->bio_cmd = type;
if (data == NULL)
data = g_malloc(length, M_WAITOK);
bp->bio_cflags |= GV_BIO_MALLOC; /* Free on the next run. */
bp->bio_pflags |= GV_BIO_MALLOC; /* Free on the next run. */
bp->bio_data = data;
/* Send down next. */
@ -613,7 +625,7 @@ gv_sync_complete(struct gv_plex *to, struct bio *bp)
BIO_WRITE, bp->bio_data);
/* If it was a write, read the next one. */
} else if (bp->bio_cmd == BIO_WRITE) {
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
to->synced += bp->bio_length;
/* If we're finished, clean up. */
@ -684,10 +696,10 @@ gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
bp->bio_caller1 = p;
bp->bio_offset = offset;
bp->bio_length = length;
bp->bio_pflags |= GV_BIO_SYNCREQ; /* XXX: misuse of pflags AND syncreq.*/
bp->bio_pflags |= GV_BIO_GROW;
if (data == NULL)
data = g_malloc(length, M_WAITOK);
bp->bio_cflags |= GV_BIO_MALLOC;
bp->bio_pflags |= GV_BIO_MALLOC;
bp->bio_data = data;
gv_post_bio(sc, bp);
@ -720,7 +732,7 @@ gv_grow_complete(struct gv_plex *p, struct bio *bp)
BIO_WRITE, bp->bio_data);
/* If it was a write, read next. */
} else if (bp->bio_cmd == BIO_WRITE) {
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
/* Find the real size of the plex. */
@ -790,7 +802,7 @@ gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
bp->bio_done = gv_done;
bp->bio_error = 0;
bp->bio_length = length;
bp->bio_cflags |= GV_BIO_INIT;
bp->bio_pflags |= GV_BIO_INIT;
bp->bio_offset = start;
bp->bio_caller1 = s;
@ -908,8 +920,8 @@ gv_parity_request(struct gv_plex *p, int flags, off_t offset)
return;
}
bp->bio_cflags = flags;
bp->bio_cflags |= GV_BIO_MALLOC;
bp->bio_pflags = flags;
bp->bio_pflags |= GV_BIO_MALLOC;
/* We still have more parity to build. */
bp->bio_offset = offset;
@ -927,14 +939,14 @@ gv_parity_complete(struct gv_plex *p, struct bio *bp)
int error, flags;
error = bp->bio_error;
flags = bp->bio_cflags;
flags = bp->bio_pflags;
flags &= ~GV_BIO_MALLOC;
sc = p->vinumconf;
KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
/* Clean up what we allocated. */
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
g_destroy_bio(bp);
@ -986,14 +998,14 @@ gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
off_t offset;
error = bp->bio_error;
flags = bp->bio_cflags;
flags = bp->bio_pflags;
offset = bp->bio_offset;
flags &= ~GV_BIO_MALLOC;
sc = p->vinumconf;
KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
/* Clean up what we allocated. */
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
g_destroy_bio(bp);

View File

@ -65,9 +65,9 @@ gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
wp->parity = NULL;
TAILQ_INIT(&wp->bits);
if (bp->bio_cflags & GV_BIO_REBUILD)
if (bp->bio_pflags & GV_BIO_REBUILD)
err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
else if (bp->bio_cflags & GV_BIO_CHECK)
else if (bp->bio_pflags & GV_BIO_CHECK)
err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
else
err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
@ -120,8 +120,8 @@ gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
}
/* If internal, stop and reset state. */
if (bp->bio_cflags & GV_BIO_INTERNAL) {
if (bp->bio_cflags & GV_BIO_MALLOC)
if (bp->bio_pflags & GV_BIO_INTERNAL) {
if (bp->bio_pflags & GV_BIO_MALLOC)
g_free(bp->bio_data);
g_destroy_bio(bp);
/* Reset flags. */
@ -277,7 +277,7 @@ gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
return (EINVAL);
case GV_SD_STALE:
if (!(bp->bio_cflags & GV_BIO_REBUILD))
if (!(bp->bio_pflags & GV_BIO_REBUILD))
return (ENXIO);
G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
@ -326,7 +326,6 @@ gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cflags |= GV_BIO_REBUILD;
wp->parity = cbp;
p->synced = boff;
@ -400,7 +399,7 @@ gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
/* If synchronizing request, just write it if disks are stale. */
if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
bp->bio_cflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
bp->bio_pflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
type = REQ_TYPE_NORMAL;
/* Our parity stripe is missing. */
} else if (parity->state != GV_SD_UP) {

View File

@ -108,7 +108,7 @@
#define GV_DFLT_SYNCSIZE 65536
/* Flags for BIOs, as they are processed within vinum. */
#define GV_BIO_DONE 0x01
#define GV_BIO_GROW 0x01
#define GV_BIO_MALLOC 0x02
#define GV_BIO_ONHOLD 0x04
#define GV_BIO_SYNCREQ 0x08
@ -117,7 +117,7 @@
#define GV_BIO_CHECK 0x40
#define GV_BIO_PARITY 0x80
#define GV_BIO_INTERNAL \
(GV_BIO_SYNCREQ | GV_BIO_INIT | GV_BIO_REBUILD |GV_BIO_CHECK)
(GV_BIO_SYNCREQ | GV_BIO_INIT | GV_BIO_REBUILD | GV_BIO_CHECK | GV_BIO_GROW)
/* Error codes to be used within gvinum. */
#define GV_ERR_SETSTATE (-1) /* Error setting state. */
@ -233,7 +233,10 @@ struct gv_softc {
struct mtx equeue_mtx; /* Event queue lock. */
struct mtx bqueue_mtx; /* BIO queue lock. */
struct mtx config_mtx; /* Configuration lock. */
struct bio_queue_head *bqueue; /* BIO queue. */
struct bio_queue_head *bqueue_down; /* BIO queue incoming
requests. */
struct bio_queue_head *bqueue_up; /* BIO queue for completed
requests. */
struct g_geom *geom; /* Pointer to our VINUM geom. */
};
#endif