Workaround bhyve virtual disks operation on top of GEOM providers.

GEOM does not support scatter/gather lists in its I/Os.  Such requests
are cut in pieces by physio(), that may be problematic, if those pieces
are not multiple of provider's sector size.  If such case is detected,
move the data through temporary sequential buffer.

MFC after:	2 weeks
This commit is contained in:
Alexander Motin 2015-04-18 20:10:19 +00:00
parent 2ef26470c5
commit bb1524af0c
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=281700
4 changed files with 100 additions and 18 deletions

View File

@ -85,6 +85,7 @@ struct blockif_ctxt {
int bc_magic;
int bc_fd;
int bc_ischr;
int bc_isgeom;
int bc_candelete;
int bc_rdonly;
off_t bc_size;
@ -198,27 +199,93 @@ blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
}
static void
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be, uint8_t *buf)
{
struct blockif_req *br;
off_t arg[2];
int err;
ssize_t clen, len, off, boff, voff;
int i, err;
br = be->be_req;
if (br->br_iovcnt <= 1)
buf = NULL;
err = 0;
switch (be->be_op) {
case BOP_READ:
if (preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
br->br_offset) < 0)
err = errno;
if (buf == NULL) {
if ((len = preadv(bc->bc_fd, br->br_iov, br->br_iovcnt,
br->br_offset)) < 0)
err = errno;
else
br->br_resid -= len;
break;
}
i = 0;
off = voff = 0;
while (br->br_resid > 0) {
len = MIN(br->br_resid, MAXPHYS);
if (pread(bc->bc_fd, buf, len, br->br_offset +
off) < 0) {
err = errno;
break;
}
boff = 0;
do {
clen = MIN(len - boff, br->br_iov[i].iov_len -
voff);
memcpy(br->br_iov[i].iov_base + voff,
buf + boff, clen);
if (clen < br->br_iov[i].iov_len - voff)
voff += clen;
else {
i++;
voff = 0;
}
boff += clen;
} while (boff < len);
off += len;
br->br_resid -= len;
}
break;
case BOP_WRITE:
if (bc->bc_rdonly)
if (bc->bc_rdonly) {
err = EROFS;
else if (pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
br->br_offset) < 0)
err = errno;
break;
}
if (buf == NULL) {
if ((len = pwritev(bc->bc_fd, br->br_iov, br->br_iovcnt,
br->br_offset)) < 0)
err = errno;
else
br->br_resid -= len;
break;
}
i = 0;
off = voff = 0;
while (br->br_resid > 0) {
len = MIN(br->br_resid, MAXPHYS);
boff = 0;
do {
clen = MIN(len - boff, br->br_iov[i].iov_len -
voff);
memcpy(buf + boff,
br->br_iov[i].iov_base + voff, clen);
if (clen < br->br_iov[i].iov_len - voff)
voff += clen;
else {
i++;
voff = 0;
}
boff += clen;
} while (boff < len);
if (pwrite(bc->bc_fd, buf, len, br->br_offset +
off) < 0) {
err = errno;
break;
}
off += len;
br->br_resid -= len;
}
break;
case BOP_FLUSH:
if (bc->bc_ischr) {
@ -234,9 +301,11 @@ blockif_proc(struct blockif_ctxt *bc, struct blockif_elem *be)
err = EROFS;
else if (bc->bc_ischr) {
arg[0] = br->br_offset;
arg[1] = br->br_iov[0].iov_len;
arg[1] = br->br_resid;
if (ioctl(bc->bc_fd, DIOCGDELETE, arg))
err = errno;
else
br->br_resid = 0;
} else
err = EOPNOTSUPP;
break;
@ -256,15 +325,20 @@ blockif_thr(void *arg)
struct blockif_ctxt *bc;
struct blockif_elem *be;
pthread_t t;
uint8_t *buf;
bc = arg;
if (bc->bc_isgeom)
buf = malloc(MAXPHYS);
else
buf = NULL;
t = pthread_self();
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
while (blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be);
blockif_proc(bc, be, buf);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
@ -275,6 +349,8 @@ blockif_thr(void *arg)
}
pthread_mutex_unlock(&bc->bc_mtx);
if (buf)
free(buf);
pthread_exit(NULL);
return (NULL);
}
@ -315,13 +391,14 @@ struct blockif_ctxt *
blockif_open(const char *optstr, const char *ident)
{
char tname[MAXCOMLEN + 1];
char name[MAXPATHLEN];
char *nopt, *xopts;
struct blockif_ctxt *bc;
struct stat sbuf;
struct diocgattr_arg arg;
off_t size, psectsz, psectoff;
int extra, fd, i, sectsz;
int nocache, sync, ro, candelete;
int nocache, sync, ro, candelete, geom;
pthread_once(&blockif_once, blockif_init);
@ -375,7 +452,7 @@ blockif_open(const char *optstr, const char *ident)
size = sbuf.st_size;
sectsz = DEV_BSIZE;
psectsz = psectoff = 0;
candelete = 0;
candelete = geom = 0;
if (S_ISCHR(sbuf.st_mode)) {
if (ioctl(fd, DIOCGMEDIASIZE, &size) < 0 ||
ioctl(fd, DIOCGSECTORSIZE, &sectsz)) {
@ -391,6 +468,8 @@ blockif_open(const char *optstr, const char *ident)
arg.len = sizeof(arg.value.i);
if (ioctl(fd, DIOCGATTR, &arg) == 0)
candelete = arg.value.i;
if (ioctl(fd, DIOCGPROVIDERNAME, name) == 0)
geom = 1;
} else
psectsz = sbuf.st_blksize;
@ -403,6 +482,7 @@ blockif_open(const char *optstr, const char *ident)
bc->bc_magic = BLOCKIF_SIG;
bc->bc_fd = fd;
bc->bc_ischr = S_ISCHR(sbuf.st_mode);
bc->bc_isgeom = geom;
bc->bc_candelete = candelete;
bc->bc_rdonly = ro;
bc->bc_size = size;

View File

@ -45,6 +45,7 @@ struct blockif_req {
struct iovec br_iov[BLOCKIF_IOV_MAX];
int br_iovcnt;
off_t br_offset;
ssize_t br_resid;
void (*br_callback)(struct blockif_req *req, int err);
void *br_param;
};

View File

@ -571,6 +571,7 @@ ahci_build_iov(struct ahci_port *p, struct ahci_ioreq *aior,
}
breq->br_iovcnt = j;
breq->br_resid = todo;
aior->done += todo;
aior->more = (aior->done < aior->len && i < prdtl);
}
@ -776,8 +777,7 @@ ahci_handle_dsm_trim(struct ahci_port *p, int slot, uint8_t *cfis, uint32_t done
breq = &aior->io_req;
breq->br_offset = elba * blockif_sectsz(p->bctx);
breq->br_iovcnt = 1;
breq->br_iov[0].iov_len = elen * blockif_sectsz(p->bctx);
breq->br_resid = elen * blockif_sectsz(p->bctx);
/*
* Mark this command in-flight.

View File

@ -198,7 +198,7 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
struct pci_vtblk_ioreq *io;
int i, n;
int err;
int iolen;
ssize_t iolen;
int writeop, type;
off_t offset;
struct iovec iov[BLOCKIF_IOV_MAX + 2];
@ -246,8 +246,9 @@ pci_vtblk_proc(struct pci_vtblk_softc *sc, struct vqueue_info *vq)
assert(((flags[i] & VRING_DESC_F_WRITE) == 0) == writeop);
iolen += iov[i].iov_len;
}
io->io_req.br_resid = iolen;
DPRINTF(("virtio-block: %s op, %d bytes, %d segs, offset %ld\n\r",
DPRINTF(("virtio-block: %s op, %zd bytes, %d segs, offset %ld\n\r",
writeop ? "write" : "read/ident", iolen, i - 1, offset));
switch (type) {