5afa461402
Replace ARC4 with SHA2-512. Change lock-structure encoding to use random ordering rather for obscurity. Encrypt lock-structure with AES/256 instead of AES/128. Change kkey derivation to be MD5 hash based. Watch for malloc(M_NOWAIT) failures and ditch our cache when they happen. Remove clause 3 of the license with NAI Labs consent. Many thanks to "Lucky Green" <shamrock@cypherpunks.to> and "David Wagner" <daw@cs.berkeley.edu>, for code reading, inputs and suggestions. This code has still not been stared at for 10 years by a gang of hard-core cryptographers. Discretion advised. NB: These changes result in the on-disk format changing: dump/restore needed. Sponsored by: DARPA & NAI Labs.
773 lines
20 KiB
C
773 lines
20 KiB
C
/*-
|
|
* Copyright (c) 2002 Poul-Henning Kamp
|
|
* Copyright (c) 2002 Networks Associates Technology, Inc.
|
|
* All rights reserved.
|
|
*
|
|
* This software was developed for the FreeBSD Project by Poul-Henning Kamp
|
|
* and NAI Labs, the Security Research Division of Network Associates, Inc.
|
|
* under DARPA/SPAWAR contract N66001-01-C-8035 ("CBOSS"), as part of the
|
|
* DARPA CHATS research program.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* $FreeBSD$
|
|
*
|
|
* This source file contains the state-engine which makes things happen in the
|
|
* right order.
|
|
*
|
|
* Outline:
|
|
* 1) g_bde_start1()
|
|
* Break the struct bio into multiple work packets one per zone.
|
|
* 2) g_bde_start2()
|
|
* Setup the necessary sector buffers and start those read operations
|
|
* which we can start at this time and put the item on the work-list.
|
|
* 3) g_bde_worker()
|
|
* Scan the work-list for items which are ready for crypto processing
|
|
* and call the matching crypto function in g_bde_crypt.c and schedule
|
|
* any writes needed. Read operations finish here by releasing the
|
|
* sector buffers and delivering the original bio request.
|
|
* 4) g_bde_write_done()
|
|
* Release sector buffers and deliver the original bio request.
|
|
*
|
|
* Because of the C-scope rules, the functions are almost perfectly in the
|
|
* opposite order in this source file.
|
|
*
|
|
* XXX: A switch to the hardware assisted crypto in src/sys/opencrypto will add
|
|
* XXX: additional states to this state-engine. Since no hardware available
|
|
* XXX: at this time has AES support, implementing this has been postponed
|
|
* XXX: until such time as it would result in a benefit.
|
|
*/
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/stdint.h>
|
|
#include <sys/bio.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/queue.h>
|
|
#include <sys/malloc.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/time.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/kthread.h>
|
|
|
|
#include <crypto/rijndael/rijndael.h>
|
|
#include <crypto/sha2/sha2.h>
|
|
#include <geom/geom.h>
|
|
#include <geom/bde/g_bde.h>
|
|
|
|
static void g_bde_delete_sector(struct g_bde_softc *wp, struct g_bde_sector *sp);
|
|
static struct g_bde_sector * g_bde_new_sector(struct g_bde_work *wp, u_int len);
|
|
static void g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp);
|
|
static struct g_bde_sector *g_bde_get_sector(struct g_bde_work *wp, off_t offset);
|
|
static int g_bde_start_read(struct g_bde_sector *sp);
|
|
static void g_bde_purge_sector(struct g_bde_softc *sc, int fraction);
|
|
|
|
/*
|
|
* Work item allocation.
|
|
*
|
|
* C++ would call these constructors and destructors.
|
|
*/
|
|
static u_int g_bde_nwork;
|
|
SYSCTL_UINT(_debug, OID_AUTO, gbde_nwork, CTLFLAG_RD, &g_bde_nwork, 0, "");
|
|
|
|
static struct g_bde_work *
|
|
g_bde_new_work(struct g_bde_softc *sc)
|
|
{
|
|
struct g_bde_work *wp;
|
|
|
|
wp = g_malloc(sizeof *wp, M_NOWAIT | M_ZERO);
|
|
if (wp == NULL)
|
|
return (wp);
|
|
wp->state = SETUP;
|
|
wp->softc = sc;
|
|
g_bde_nwork++;
|
|
sc->nwork++;
|
|
TAILQ_INSERT_TAIL(&sc->worklist, wp, list);
|
|
return (wp);
|
|
}
|
|
|
|
static void
|
|
g_bde_delete_work(struct g_bde_work *wp)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
|
|
sc = wp->softc;
|
|
g_bde_nwork--;
|
|
sc->nwork--;
|
|
TAILQ_REMOVE(&sc->worklist, wp, list);
|
|
g_free(wp);
|
|
}
|
|
|
|
/*
|
|
* Sector buffer allocation
|
|
*
|
|
* These two functions allocate and free back variable sized sector buffers
|
|
*/
|
|
|
|
static u_int g_bde_nsect;
|
|
SYSCTL_UINT(_debug, OID_AUTO, gbde_nsect, CTLFLAG_RD, &g_bde_nsect, 0, "");
|
|
|
|
static void
|
|
g_bde_delete_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
|
|
{
|
|
|
|
g_bde_nsect--;
|
|
sc->nsect--;
|
|
if (sp->malloc)
|
|
g_free(sp->data);
|
|
g_free(sp);
|
|
}
|
|
|
|
static struct g_bde_sector *
|
|
g_bde_new_sector(struct g_bde_work *wp, u_int len)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
|
|
sp = g_malloc(sizeof *sp, M_NOWAIT | M_ZERO);
|
|
if (sp == NULL)
|
|
return (sp);
|
|
if (len > 0) {
|
|
sp->data = g_malloc(len, M_NOWAIT | M_ZERO);
|
|
if (sp->data == NULL) {
|
|
g_free(sp);
|
|
return (NULL);
|
|
}
|
|
sp->malloc = 1;
|
|
}
|
|
g_bde_nsect++;
|
|
wp->softc->nsect++;
|
|
sp->size = len;
|
|
sp->softc = wp->softc;
|
|
sp->ref = 1;
|
|
sp->owner = wp;
|
|
sp->offset = wp->so;
|
|
sp->state = JUNK;
|
|
return (sp);
|
|
}
|
|
|
|
/*
|
|
* Skey sector cache.
|
|
*
|
|
* Nothing prevents two separate I/O requests from addressing the same zone
|
|
* and thereby needing the same skey sector. We therefore need to sequence
|
|
* I/O operations to the skey sectors. A certain amount of caching is also
|
|
* desirable, although the extent of benefit from this is not at this point
|
|
* determined.
|
|
*
|
|
* XXX: GEOM may be able to grow a generic caching facility at some point
|
|
* XXX: to support such needs.
|
|
*/
|
|
|
|
static u_int g_bde_ncache;
|
|
SYSCTL_UINT(_debug, OID_AUTO, gbde_ncache, CTLFLAG_RD, &g_bde_ncache, 0, "");
|
|
|
|
static void
|
|
g_bde_purge_one_sector(struct g_bde_softc *sc, struct g_bde_sector *sp)
|
|
{
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_purge_one_sector(%p, %p)", sc, sp);
|
|
if (sp->ref != 0)
|
|
return;
|
|
TAILQ_REMOVE(&sc->freelist, sp, list);
|
|
g_bde_ncache--;
|
|
sc->ncache--;
|
|
bzero(sp->data, sp->size);
|
|
g_bde_delete_sector(sc, sp);
|
|
}
|
|
|
|
static struct g_bde_sector *
|
|
g_bde_get_sector(struct g_bde_work *wp, off_t offset)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
struct g_bde_softc *sc;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_get_sector(%p, %jd)", wp, (intmax_t)offset);
|
|
sc = wp->softc;
|
|
|
|
if (malloc_last_fail() < g_bde_ncache)
|
|
g_bde_purge_sector(sc, -1);
|
|
|
|
sp = TAILQ_FIRST(&sc->freelist);
|
|
if (sp != NULL && sp->ref == 0 && sp->used + 300 < time_uptime)
|
|
g_bde_purge_one_sector(sc, sp);
|
|
|
|
TAILQ_FOREACH(sp, &sc->freelist, list) {
|
|
if (sp->offset == offset)
|
|
break;
|
|
}
|
|
if (sp != NULL) {
|
|
sp->ref++;
|
|
KASSERT(sp->offset == offset, ("wrong offset"));
|
|
KASSERT(sp->softc == wp->softc, ("wrong softc"));
|
|
if (sp->ref == 1)
|
|
sp->owner = wp;
|
|
} else {
|
|
if (malloc_last_fail() < g_bde_ncache) {
|
|
TAILQ_FOREACH(sp, &sc->freelist, list)
|
|
if (sp->ref == 0)
|
|
break;
|
|
}
|
|
if (sp == NULL && !TAILQ_EMPTY(&sc->freelist))
|
|
sp = TAILQ_FIRST(&sc->freelist);
|
|
if (sp != NULL && sp->ref > 0)
|
|
sp = NULL;
|
|
if (sp == NULL) {
|
|
g_bde_ncache++;
|
|
sc->ncache++;
|
|
sp = g_bde_new_sector(wp, sc->sectorsize);
|
|
if (sp != NULL) {
|
|
TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
|
|
sp->malloc = 2;
|
|
}
|
|
}
|
|
if (sp != NULL) {
|
|
sp->offset = offset;
|
|
sp->softc = wp->softc;
|
|
sp->ref = 1;
|
|
sp->owner = wp;
|
|
sp->state = JUNK;
|
|
sp->error = 0;
|
|
}
|
|
}
|
|
if (sp != NULL) {
|
|
TAILQ_REMOVE(&sc->freelist, sp, list);
|
|
TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
|
|
}
|
|
wp->ksp = sp;
|
|
if (sp == NULL) {
|
|
g_bde_purge_sector(sc, -1);
|
|
sp = g_bde_get_sector(wp, offset);
|
|
}
|
|
if (sp != NULL)
|
|
sp->used = time_uptime;
|
|
KASSERT(sp != NULL, ("get_sector failed"));
|
|
return(sp);
|
|
}
|
|
|
|
static void
|
|
g_bde_release_sector(struct g_bde_work *wp, struct g_bde_sector *sp)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
struct g_bde_work *wp2;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_release_sector(%p)", sp);
|
|
KASSERT(sp->malloc == 2, ("Wrong sector released"));
|
|
sc = sp->softc;
|
|
KASSERT(sc != NULL, ("NULL sp->softc"));
|
|
KASSERT(wp == sp->owner, ("Releasing, not owner"));
|
|
sp->owner = NULL;
|
|
wp->ksp = NULL;
|
|
sp->ref--;
|
|
if (sp->ref > 0) {
|
|
TAILQ_REMOVE(&sc->freelist, sp, list);
|
|
TAILQ_INSERT_TAIL(&sc->freelist, sp, list);
|
|
TAILQ_FOREACH(wp2, &sc->worklist, list) {
|
|
if (wp2->ksp == sp) {
|
|
KASSERT(wp2 != wp, ("Self-reowning"));
|
|
sp->owner = wp2;
|
|
wakeup(sp->softc);
|
|
break;
|
|
}
|
|
}
|
|
KASSERT(wp2 != NULL, ("Failed to pick up owner for %p\n", sp));
|
|
} else if (sp->error != 0) {
|
|
sp->offset = ~0;
|
|
sp->error = 0;
|
|
sp->state = JUNK;
|
|
}
|
|
TAILQ_REMOVE(&sc->freelist, sp, list);
|
|
TAILQ_INSERT_HEAD(&sc->freelist, sp, list);
|
|
}
|
|
|
|
static void
|
|
g_bde_purge_sector(struct g_bde_softc *sc, int fraction)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
int n;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_purge_sector(%p)", sc);
|
|
if (fraction > 0)
|
|
n = sc->ncache / fraction + 1;
|
|
else
|
|
n = g_bde_ncache - malloc_last_fail();
|
|
if (n < 0)
|
|
return;
|
|
if (n > sc->ncache)
|
|
n = sc->ncache;
|
|
while(n--) {
|
|
TAILQ_FOREACH(sp, &sc->freelist, list) {
|
|
if (sp->ref != 0)
|
|
continue;
|
|
TAILQ_REMOVE(&sc->freelist, sp, list);
|
|
g_bde_ncache--;
|
|
sc->ncache--;
|
|
bzero(sp->data, sp->size);
|
|
g_bde_delete_sector(sc, sp);
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
|
|
static struct g_bde_sector *
|
|
g_bde_read_sector(struct g_bde_softc *sc, struct g_bde_work *wp, off_t offset)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_read_sector(%p)", wp);
|
|
sp = g_bde_get_sector(wp, offset);
|
|
if (sp == NULL)
|
|
return (sp);
|
|
if (sp->owner != wp)
|
|
return (sp);
|
|
if (sp->state == VALID)
|
|
return (sp);
|
|
if (g_bde_start_read(sp) == 0)
|
|
return (sp);
|
|
g_bde_release_sector(wp, sp);
|
|
return (NULL);
|
|
}
|
|
|
|
/*
|
|
* Contribute to the completion of the original bio request.
|
|
*
|
|
* We have no simple way to tell how many bits the original bio request has
|
|
* been segmented into, so the easiest way to determine when we can deliver
|
|
* it is to keep track of the number of bytes we have completed. We keep
|
|
* track of any errors underway and latch onto the first one.
|
|
*
|
|
* We always report "nothing done" in case of error, because random bits here
|
|
* and there may be completed and returning a number of completed bytes does
|
|
* not convey any useful information about which bytes they were. If some
|
|
* piece of broken code somewhere interprets this to mean that nothing has
|
|
* changed on the underlying media they deserve the lossage headed for them.
|
|
*
|
|
* A single mutex per g_bde instance is used to prevent contention.
|
|
*/
|
|
|
|
static void
|
|
g_bde_contribute(struct bio *bp, off_t bytes, int error)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_contribute bp %p bytes %jd error %d",
|
|
bp, (intmax_t)bytes, error);
|
|
sc = bp->bio_driver1;
|
|
if (bp->bio_error == 0)
|
|
bp->bio_error = error;
|
|
bp->bio_completed += bytes;
|
|
KASSERT(bp->bio_completed <= bp->bio_length, ("Too large contribution"));
|
|
if (bp->bio_completed == bp->bio_length) {
|
|
if (bp->bio_error != 0)
|
|
bp->bio_completed = 0;
|
|
g_io_deliver(bp, bp->bio_error);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* A write operation has finished. When we have all expected cows in the
|
|
* barn close the door and call it a day.
|
|
*/
|
|
|
|
static void
|
|
g_bde_write_done(struct bio *bp)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
struct g_bde_work *wp;
|
|
struct g_bde_softc *sc;
|
|
|
|
sp = bp->bio_caller1;
|
|
sc = bp->bio_caller2;
|
|
mtx_lock(&sc->worklist_mutex);
|
|
KASSERT(sp != NULL, ("NULL sp"));
|
|
KASSERT(sc != NULL, ("NULL sc"));
|
|
KASSERT(sp->owner != NULL, ("NULL sp->owner"));
|
|
g_trace(G_T_TOPOLOGY, "g_bde_write_done(%p)", sp);
|
|
sp->error = bp->bio_error;
|
|
g_destroy_bio(bp);
|
|
wp = sp->owner;
|
|
if (wp->error == 0)
|
|
wp->error = sp->error;
|
|
|
|
if (wp->bp->bio_cmd == BIO_DELETE) {
|
|
KASSERT(sp == wp->sp, ("trashed delete op"));
|
|
g_bde_contribute(wp->bp, wp->length, wp->error);
|
|
g_bde_delete_sector(sc, sp);
|
|
g_bde_delete_work(wp);
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
return;
|
|
}
|
|
|
|
KASSERT(wp->bp->bio_cmd == BIO_WRITE, ("Confused in g_bde_write_done()"));
|
|
KASSERT(sp == wp->sp || sp == wp->ksp, ("trashed write op"));
|
|
if (wp->sp == sp) {
|
|
g_bde_delete_sector(sc, wp->sp);
|
|
wp->sp = NULL;
|
|
} else {
|
|
sp->state = VALID;
|
|
}
|
|
if (wp->sp == NULL && wp->ksp != NULL && wp->ksp->state == VALID) {
|
|
g_bde_contribute(wp->bp, wp->length, wp->error);
|
|
g_bde_release_sector(wp, wp->ksp);
|
|
g_bde_delete_work(wp);
|
|
}
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* Send a write request for the given sector down the pipeline.
|
|
*/
|
|
|
|
static int
|
|
g_bde_start_write(struct g_bde_sector *sp)
|
|
{
|
|
struct bio *bp;
|
|
struct g_bde_softc *sc;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_start_write(%p)", sp);
|
|
sc = sp->softc;
|
|
KASSERT(sc != NULL, ("NULL sc in g_bde_start_write"));
|
|
KASSERT(sp->owner != NULL, ("NULL sp->owner in g_bde_start_write"));
|
|
bp = g_new_bio();
|
|
if (bp == NULL)
|
|
return (ENOMEM);
|
|
bp->bio_cmd = BIO_WRITE;
|
|
bp->bio_offset = sp->offset;
|
|
bp->bio_data = sp->data;
|
|
bp->bio_length = sp->size;
|
|
bp->bio_done = g_bde_write_done;
|
|
bp->bio_caller1 = sp;
|
|
bp->bio_caller2 = sc;
|
|
sp->state = IO;
|
|
g_io_request(bp, sc->consumer);
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
* A read operation has finished. Mark the sector no longer iobusy and
|
|
* wake up the worker thread and let it do its thing.
|
|
*/
|
|
|
|
static void
|
|
g_bde_read_done(struct bio *bp)
|
|
{
|
|
struct g_bde_sector *sp;
|
|
struct g_bde_softc *sc;
|
|
|
|
sp = bp->bio_caller1;
|
|
g_trace(G_T_TOPOLOGY, "g_bde_read_done(%p)", sp);
|
|
sc = bp->bio_caller2;
|
|
mtx_lock(&sc->worklist_mutex);
|
|
sp->error = bp->bio_error;
|
|
sp->state = VALID;
|
|
wakeup(sc);
|
|
g_destroy_bio(bp);
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
}
|
|
|
|
/*
|
|
* Send a read request for the given sector down the pipeline.
|
|
*/
|
|
|
|
static int
|
|
g_bde_start_read(struct g_bde_sector *sp)
|
|
{
|
|
struct bio *bp;
|
|
struct g_bde_softc *sc;
|
|
|
|
g_trace(G_T_TOPOLOGY, "g_bde_start_read(%p)", sp);
|
|
sc = sp->softc;
|
|
KASSERT(sc != NULL, ("Null softc in sp %p", sp));
|
|
bp = g_new_bio();
|
|
if (bp == NULL)
|
|
return (ENOMEM);
|
|
bp->bio_cmd = BIO_READ;
|
|
bp->bio_offset = sp->offset;
|
|
bp->bio_data = sp->data;
|
|
bp->bio_length = sp->size;
|
|
bp->bio_done = g_bde_read_done;
|
|
bp->bio_caller1 = sp;
|
|
bp->bio_caller2 = sc;
|
|
sp->state = IO;
|
|
g_io_request(bp, sc->consumer);
|
|
return(0);
|
|
}
|
|
|
|
/*
|
|
* The worker thread.
|
|
*
|
|
* The up/down path of GEOM is not allowed to sleep or do any major work
|
|
* so we use this thread to do the actual crypto operations and to push
|
|
* the state engine onwards.
|
|
*
|
|
* XXX: if we switch to the src/sys/opencrypt hardware assisted encryption
|
|
* XXX: using a thread here is probably not needed.
|
|
*/
|
|
|
|
void
|
|
g_bde_worker(void *arg)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
struct g_bde_work *wp;
|
|
struct g_geom *gp;
|
|
int busy, error;
|
|
|
|
gp = arg;
|
|
sc = gp->softc;
|
|
|
|
mtx_lock(&sc->worklist_mutex);
|
|
for (;;) {
|
|
busy = 0;
|
|
g_trace(G_T_TOPOLOGY, "g_bde_worker scan");
|
|
TAILQ_FOREACH(wp, &sc->worklist, list) {
|
|
KASSERT(wp != NULL, ("NULL wp"));
|
|
KASSERT(wp->softc != NULL, ("NULL wp->softc"));
|
|
if (wp->state != WAIT)
|
|
continue; /* Not interesting here */
|
|
|
|
KASSERT(wp->bp != NULL, ("NULL wp->bp"));
|
|
KASSERT(wp->sp != NULL, ("NULL wp->sp"));
|
|
|
|
if (wp->ksp != NULL) {
|
|
if (wp->ksp->owner != wp)
|
|
continue;
|
|
if (wp->ksp->state == IO)
|
|
continue;
|
|
KASSERT(wp->ksp->state == VALID,
|
|
("Illegal sector state (JUNK ?)"));
|
|
}
|
|
|
|
if (wp->bp->bio_cmd == BIO_READ && wp->sp->state != VALID)
|
|
continue;
|
|
|
|
if (wp->ksp != NULL && wp->ksp->error != 0) {
|
|
g_bde_contribute(wp->bp, wp->length,
|
|
wp->ksp->error);
|
|
g_bde_delete_sector(sc, wp->sp);
|
|
g_bde_release_sector(wp, wp->ksp);
|
|
g_bde_delete_work(wp);
|
|
busy++;
|
|
break;
|
|
}
|
|
switch(wp->bp->bio_cmd) {
|
|
case BIO_READ:
|
|
if (wp->ksp != NULL && wp->sp->error == 0) {
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
g_bde_crypt_read(wp);
|
|
mtx_lock(&sc->worklist_mutex);
|
|
}
|
|
g_bde_contribute(wp->bp, wp->length,
|
|
wp->sp->error);
|
|
g_bde_delete_sector(sc, wp->sp);
|
|
if (wp->ksp != NULL)
|
|
g_bde_release_sector(wp, wp->ksp);
|
|
g_bde_delete_work(wp);
|
|
break;
|
|
case BIO_WRITE:
|
|
wp->state = FINISH;
|
|
KASSERT(wp->sp->owner == wp, ("Write not owner sp"));
|
|
KASSERT(wp->ksp->owner == wp, ("Write not owner ksp"));
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
g_bde_crypt_write(wp);
|
|
mtx_lock(&sc->worklist_mutex);
|
|
g_bde_start_write(wp->sp);
|
|
g_bde_start_write(wp->ksp);
|
|
break;
|
|
case BIO_DELETE:
|
|
wp->state = FINISH;
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
g_bde_crypt_delete(wp);
|
|
mtx_lock(&sc->worklist_mutex);
|
|
g_bde_start_write(wp->sp);
|
|
break;
|
|
}
|
|
busy++;
|
|
break;
|
|
}
|
|
if (!busy) {
|
|
/*
|
|
* We don't look for our death-warrant until we are
|
|
* idle. Shouldn't make a difference in practice.
|
|
*/
|
|
if (sc->dead)
|
|
break;
|
|
g_trace(G_T_TOPOLOGY, "g_bde_worker sleep");
|
|
error = msleep(sc, &sc->worklist_mutex,
|
|
PRIBIO, "g_bde", hz);
|
|
if (error == EWOULDBLOCK) {
|
|
/*
|
|
* Loose our skey cache in an orderly fashion.
|
|
* The exact rate can be tuned to be less
|
|
* aggressive if this is desirable. 10% per
|
|
* second means that the cache is gone in a
|
|
* few minutes.
|
|
*/
|
|
g_bde_purge_sector(sc, 10);
|
|
}
|
|
}
|
|
}
|
|
g_trace(G_T_TOPOLOGY, "g_bde_worker die");
|
|
g_bde_purge_sector(sc, 1);
|
|
KASSERT(sc->nwork == 0, ("Dead but %d work remaining", sc->nwork));
|
|
KASSERT(sc->ncache == 0, ("Dead but %d cache remaining", sc->ncache));
|
|
KASSERT(sc->nsect == 0, ("Dead but %d sect remaining", sc->nsect));
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
sc->dead = 2;
|
|
wakeup(sc);
|
|
mtx_lock(&Giant);
|
|
kthread_exit(0);
|
|
}
|
|
|
|
/*
|
|
* g_bde_start1 has chopped the incoming request up so all the requests
|
|
* we see here are inside a single zone. Map the data and key locations
|
|
* grab the buffers we need and fire off the first volley of read requests.
|
|
*/
|
|
|
|
static void
|
|
g_bde_start2(struct g_bde_work *wp)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
|
|
KASSERT(wp != NULL, ("NULL wp in g_bde_start2"));
|
|
g_trace(G_T_TOPOLOGY, "g_bde_start2(%p)", wp);
|
|
sc = wp->softc;
|
|
KASSERT(wp->softc != NULL, ("NULL wp->softc"));
|
|
g_bde_map_sector(&sc->key, wp->offset, &wp->so, &wp->kso, &wp->ko);
|
|
if (wp->bp->bio_cmd == BIO_READ) {
|
|
wp->sp = g_bde_new_sector(wp, 0);
|
|
if (wp->sp == NULL) {
|
|
g_bde_contribute(wp->bp, wp->length, ENOMEM);
|
|
g_bde_delete_work(wp);
|
|
return;
|
|
}
|
|
wp->sp->size = wp->length;
|
|
wp->sp->data = wp->data;
|
|
if (g_bde_start_read(wp->sp) != 0) {
|
|
g_bde_contribute(wp->bp, wp->length, ENOMEM);
|
|
g_bde_delete_sector(sc, wp->sp);
|
|
g_bde_delete_work(wp);
|
|
return;
|
|
}
|
|
g_bde_read_sector(sc, wp, wp->kso);
|
|
if (wp->ksp == NULL)
|
|
wp->error = ENOMEM;
|
|
} else if (wp->bp->bio_cmd == BIO_DELETE) {
|
|
wp->sp = g_bde_new_sector(wp, wp->length);
|
|
if (wp->sp == NULL) {
|
|
g_bde_contribute(wp->bp, wp->length, ENOMEM);
|
|
g_bde_delete_work(wp);
|
|
return;
|
|
}
|
|
} else if (wp->bp->bio_cmd == BIO_WRITE) {
|
|
wp->sp = g_bde_new_sector(wp, wp->length);
|
|
if (wp->sp == NULL) {
|
|
g_bde_contribute(wp->bp, wp->length, ENOMEM);
|
|
g_bde_delete_work(wp);
|
|
return;
|
|
}
|
|
g_bde_read_sector(sc, wp, wp->kso);
|
|
if (wp->ksp == NULL) {
|
|
g_bde_contribute(wp->bp, wp->length, ENOMEM);
|
|
g_bde_delete_sector(sc, wp->sp);
|
|
g_bde_delete_work(wp);
|
|
return;
|
|
}
|
|
} else {
|
|
KASSERT(0 == 1,
|
|
("Wrong bio_cmd %d in g_bde_start2", wp->bp->bio_cmd));
|
|
}
|
|
|
|
wp->state = WAIT;
|
|
wakeup(sc);
|
|
}
|
|
|
|
/*
|
|
* Split the incoming bio on zone boundaries and submit the resulting
|
|
* work structures to g_bde_start2().
|
|
*/
|
|
|
|
void
|
|
g_bde_start1(struct bio *bp)
|
|
{
|
|
struct g_bde_softc *sc;
|
|
struct g_bde_work *wp;
|
|
off_t zone_start, left;
|
|
caddr_t p;
|
|
|
|
sc = bp->bio_to->geom->softc;
|
|
bp->bio_driver1 = sc;
|
|
|
|
mtx_lock(&sc->worklist_mutex);
|
|
zone_start = bp->bio_offset - bp->bio_offset % sc->zone_cont;
|
|
wp = g_bde_new_work(sc);
|
|
if (wp == NULL) {
|
|
g_io_deliver(bp, ENOMEM);
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
return;
|
|
}
|
|
left = bp->bio_length;
|
|
p = bp->bio_data;
|
|
|
|
/* Do the first and possible only fragment */
|
|
wp->bp = bp;
|
|
wp->offset = bp->bio_offset;
|
|
wp->data = p;
|
|
wp->length = zone_start + sc->zone_cont - wp->offset;
|
|
if (wp->length >= left) {
|
|
/* Only this one fragment needed */
|
|
wp->length = left;
|
|
g_bde_start2(wp);
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
return;
|
|
}
|
|
|
|
/* Submit the first fragment */
|
|
g_bde_start2(wp);
|
|
left -= wp->length;
|
|
p += wp->length;
|
|
|
|
/* Do the subsequent fragments */
|
|
for(;left > 0;) {
|
|
wp = g_bde_new_work(sc);
|
|
if (wp == NULL) {
|
|
g_bde_contribute(bp, left, ENOMEM);
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
return;
|
|
}
|
|
zone_start += sc->zone_cont;
|
|
wp->bp = bp;
|
|
wp->offset = zone_start;
|
|
wp->data = p;
|
|
if (left > sc->zone_cont)
|
|
wp->length = sc->zone_cont;
|
|
else
|
|
wp->length = left;
|
|
left -= wp->length;
|
|
p += wp->length;
|
|
g_bde_start2(wp);
|
|
}
|
|
mtx_unlock(&sc->worklist_mutex);
|
|
}
|