bhyve: refactor NVMe IO command handling
This refactors the NVMe I/O command processing function to make adding new commands easier. The main change is to move command specific processing (i.e. Read/Write) to separate functions for each NVMe I/O command and leave the common per-command processing in the existing pci_nvme_handle_io_cmd() function. While here, add checks for some common errors (invalid Namespace ID, invalid opcode, LBA out of range). Add myself to the Copyright holders Reviewed by: imp Tested by: Jason Tubnor MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D24879
This commit is contained in:
parent
0220a2aeed
commit
a43ab8d253
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=362745
@ -3,6 +3,7 @@
|
||||
*
|
||||
* Copyright (c) 2017 Shunsuke Mie
|
||||
* Copyright (c) 2018 Leon Dang
|
||||
* Copyright (c) 2020 Chuck Tuffli
|
||||
*
|
||||
* Function crc16 Copyright (c) 2017, Fedor Uporov
|
||||
* Obtained from function ext2_crc16() in sys/fs/ext2fs/ext2_csum.c
|
||||
@ -1386,6 +1387,122 @@ pci_nvme_io_partial(struct blockif_req *br, int err)
|
||||
pthread_cond_signal(&req->cv);
|
||||
}
|
||||
|
||||
static bool
|
||||
nvme_opc_write_read(struct pci_nvme_softc *sc,
|
||||
struct nvme_command *cmd,
|
||||
struct pci_nvme_blockstore *nvstore,
|
||||
struct pci_nvme_ioreq *req,
|
||||
uint16_t *status)
|
||||
{
|
||||
uint64_t lba, nblocks, bytes;
|
||||
size_t offset;
|
||||
bool is_write = cmd->opc == NVME_OPC_WRITE;
|
||||
bool pending = false;
|
||||
|
||||
lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
|
||||
nblocks = (cmd->cdw12 & 0xFFFF) + 1;
|
||||
|
||||
offset = lba * nvstore->sectsz;
|
||||
bytes = nblocks * nvstore->sectsz;
|
||||
|
||||
if ((offset + bytes) > nvstore->size) {
|
||||
WPRINTF("%s command would exceed LBA range", __func__);
|
||||
pci_nvme_status_genc(status, NVME_SC_LBA_OUT_OF_RANGE);
|
||||
goto out;
|
||||
}
|
||||
|
||||
req->io_req.br_offset = lba;
|
||||
|
||||
/* PRP bits 1:0 must be zero */
|
||||
cmd->prp1 &= ~0x3UL;
|
||||
cmd->prp2 &= ~0x3UL;
|
||||
|
||||
if (nvstore->type == NVME_STOR_RAM) {
|
||||
uint8_t *buf = nvstore->ctx;
|
||||
enum nvme_copy_dir dir;
|
||||
|
||||
if (is_write)
|
||||
dir = NVME_COPY_TO_PRP;
|
||||
else
|
||||
dir = NVME_COPY_FROM_PRP;
|
||||
|
||||
if (nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
|
||||
buf + offset, bytes, dir))
|
||||
pci_nvme_status_genc(status,
|
||||
NVME_SC_DATA_TRANSFER_ERROR);
|
||||
else
|
||||
pci_nvme_status_genc(status, NVME_SC_SUCCESS);
|
||||
} else {
|
||||
uint64_t size;
|
||||
int err;
|
||||
|
||||
size = MIN(PAGE_SIZE - (cmd->prp1 % PAGE_SIZE), bytes);
|
||||
if (pci_nvme_append_iov_req(sc, req, cmd->prp1,
|
||||
size, is_write, offset)) {
|
||||
pci_nvme_status_genc(status,
|
||||
NVME_SC_DATA_TRANSFER_ERROR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
offset += size;
|
||||
bytes -= size;
|
||||
|
||||
if (bytes == 0) {
|
||||
;
|
||||
} else if (bytes <= PAGE_SIZE) {
|
||||
size = bytes;
|
||||
if (pci_nvme_append_iov_req(sc, req, cmd->prp2,
|
||||
size, is_write, offset)) {
|
||||
pci_nvme_status_genc(status,
|
||||
NVME_SC_DATA_TRANSFER_ERROR);
|
||||
goto out;
|
||||
}
|
||||
} else {
|
||||
void *vmctx = sc->nsc_pi->pi_vmctx;
|
||||
uint64_t *prp_list = &cmd->prp2;
|
||||
uint64_t *last = prp_list;
|
||||
|
||||
/* PRP2 is pointer to a physical region page list */
|
||||
while (bytes) {
|
||||
/* Last entry in list points to the next list */
|
||||
if (prp_list == last) {
|
||||
uint64_t prp = *prp_list;
|
||||
|
||||
prp_list = paddr_guest2host(vmctx, prp,
|
||||
PAGE_SIZE - (prp % PAGE_SIZE));
|
||||
last = prp_list + (NVME_PRP2_ITEMS - 1);
|
||||
}
|
||||
|
||||
size = MIN(bytes, PAGE_SIZE);
|
||||
|
||||
if (pci_nvme_append_iov_req(sc, req, *prp_list,
|
||||
size, is_write, offset)) {
|
||||
pci_nvme_status_genc(status,
|
||||
NVME_SC_DATA_TRANSFER_ERROR);
|
||||
goto out;
|
||||
}
|
||||
|
||||
offset += size;
|
||||
bytes -= size;
|
||||
|
||||
prp_list++;
|
||||
}
|
||||
}
|
||||
req->io_req.br_callback = pci_nvme_io_done;
|
||||
if (is_write)
|
||||
err = blockif_write(nvstore->ctx, &req->io_req);
|
||||
else
|
||||
err = blockif_read(nvstore->ctx, &req->io_req);
|
||||
|
||||
if (err)
|
||||
pci_nvme_status_genc(status, NVME_SC_DATA_TRANSFER_ERROR);
|
||||
else
|
||||
pending = true;
|
||||
}
|
||||
out:
|
||||
return (pending);
|
||||
}
|
||||
|
||||
static void
|
||||
pci_nvme_dealloc_sm(struct blockif_req *br, int err)
|
||||
{
|
||||
@ -1421,14 +1538,15 @@ pci_nvme_dealloc_sm(struct blockif_req *br, int err)
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
static bool
|
||||
nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
|
||||
struct nvme_command *cmd,
|
||||
struct pci_nvme_blockstore *nvstore,
|
||||
struct pci_nvme_ioreq *req,
|
||||
uint16_t *status)
|
||||
{
|
||||
int err = -1;
|
||||
int err;
|
||||
bool pending = false;
|
||||
|
||||
if ((sc->ctrldata.oncs & NVME_ONCS_DSM) == 0) {
|
||||
pci_nvme_status_genc(status, NVME_SC_INVALID_OPCODE);
|
||||
@ -1463,9 +1581,6 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
|
||||
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, cmd->prp1, cmd->prp2,
|
||||
(uint8_t *)range, NVME_MAX_DSM_TRIM, NVME_COPY_FROM_PRP);
|
||||
|
||||
req->opc = cmd->opc;
|
||||
req->cid = cmd->cid;
|
||||
req->nsid = cmd->nsid;
|
||||
/*
|
||||
* If the request is for more than a single range, store
|
||||
* the ranges in the br_iov. Optimize for the common case
|
||||
@ -1501,11 +1616,13 @@ nvme_opc_dataset_mgmt(struct pci_nvme_softc *sc,
|
||||
err = blockif_delete(nvstore->ctx, &req->io_req);
|
||||
if (err)
|
||||
pci_nvme_status_genc(status, NVME_SC_INTERNAL_DEVICE_ERROR);
|
||||
else
|
||||
pending = true;
|
||||
|
||||
free(range);
|
||||
}
|
||||
out:
|
||||
return (err);
|
||||
return (pending);
|
||||
}
|
||||
|
||||
static void
|
||||
@ -1514,7 +1631,6 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
|
||||
struct nvme_submission_queue *sq;
|
||||
uint16_t status;
|
||||
uint16_t sqhead;
|
||||
int err;
|
||||
|
||||
/* handle all submissions up to sq->tail index */
|
||||
sq = &sc->submit_queues[idx];
|
||||
@ -1531,189 +1647,69 @@ pci_nvme_handle_io_cmd(struct pci_nvme_softc* sc, uint16_t idx)
|
||||
|
||||
while (sqhead != atomic_load_acq_short(&sq->tail)) {
|
||||
struct nvme_command *cmd;
|
||||
struct pci_nvme_ioreq *req = NULL;
|
||||
uint64_t lba;
|
||||
uint64_t nblocks, bytes, size, cpsz;
|
||||
struct pci_nvme_ioreq *req;
|
||||
uint32_t nsid;
|
||||
bool pending;
|
||||
|
||||
/* TODO: support scatter gather list handling */
|
||||
pending = false;
|
||||
req = NULL;
|
||||
status = 0;
|
||||
|
||||
cmd = &sq->qbase[sqhead];
|
||||
sqhead = (sqhead + 1) % sq->size;
|
||||
|
||||
lba = ((uint64_t)cmd->cdw11 << 32) | cmd->cdw10;
|
||||
|
||||
if (cmd->opc == NVME_OPC_FLUSH) {
|
||||
pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
|
||||
pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
|
||||
status, 1);
|
||||
|
||||
continue;
|
||||
} else if (cmd->opc == 0x08) {
|
||||
/* TODO: write zeroes */
|
||||
WPRINTF("%s write zeroes lba 0x%lx blocks %u",
|
||||
__func__, lba, cmd->cdw12 & 0xFFFF);
|
||||
pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
|
||||
pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
|
||||
status, 1);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
if (sc->nvstore.type == NVME_STOR_BLOCKIF) {
|
||||
req = pci_nvme_get_ioreq(sc);
|
||||
req->nvme_sq = sq;
|
||||
req->sqid = idx;
|
||||
}
|
||||
|
||||
if (cmd->opc == NVME_OPC_DATASET_MANAGEMENT) {
|
||||
if (nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore, req,
|
||||
&status)) {
|
||||
pci_nvme_set_completion(sc, sq, idx, cmd->cid,
|
||||
0, status, 1);
|
||||
if (req)
|
||||
pci_nvme_release_ioreq(sc, req);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
nblocks = (cmd->cdw12 & 0xFFFF) + 1;
|
||||
|
||||
bytes = nblocks * sc->nvstore.sectsz;
|
||||
|
||||
/*
|
||||
* If data starts mid-page and flows into the next page, then
|
||||
* increase page count
|
||||
*/
|
||||
|
||||
DPRINTF("[h%u:t%u:n%u] %s starting LBA 0x%lx blocks %lu "
|
||||
"(%lu-bytes)",
|
||||
sqhead==0 ? sq->size-1 : sqhead-1, sq->tail, sq->size,
|
||||
cmd->opc == NVME_OPC_WRITE ?
|
||||
"WRITE" : "READ",
|
||||
lba, nblocks, bytes);
|
||||
|
||||
cmd->prp1 &= ~(0x03UL);
|
||||
cmd->prp2 &= ~(0x03UL);
|
||||
|
||||
DPRINTF(" prp1 0x%lx prp2 0x%lx", cmd->prp1, cmd->prp2);
|
||||
|
||||
size = bytes;
|
||||
lba *= sc->nvstore.sectsz;
|
||||
|
||||
cpsz = PAGE_SIZE - (cmd->prp1 % PAGE_SIZE);
|
||||
|
||||
if (cpsz > bytes)
|
||||
cpsz = bytes;
|
||||
|
||||
if (req != NULL) {
|
||||
req->io_req.br_offset = ((uint64_t)cmd->cdw11 << 32) |
|
||||
cmd->cdw10;
|
||||
req->opc = cmd->opc;
|
||||
req->cid = cmd->cid;
|
||||
req->nsid = cmd->nsid;
|
||||
}
|
||||
|
||||
err = pci_nvme_append_iov_req(sc, req, cmd->prp1, cpsz,
|
||||
cmd->opc == NVME_OPC_WRITE, lba);
|
||||
lba += cpsz;
|
||||
size -= cpsz;
|
||||
|
||||
if (size == 0)
|
||||
goto iodone;
|
||||
|
||||
if (size <= PAGE_SIZE) {
|
||||
/* prp2 is second (and final) page in transfer */
|
||||
|
||||
err = pci_nvme_append_iov_req(sc, req, cmd->prp2,
|
||||
size,
|
||||
cmd->opc == NVME_OPC_WRITE,
|
||||
lba);
|
||||
} else {
|
||||
uint64_t *prp_list;
|
||||
int i;
|
||||
|
||||
/* prp2 is pointer to a physical region page list */
|
||||
prp_list = paddr_guest2host(sc->nsc_pi->pi_vmctx,
|
||||
cmd->prp2, PAGE_SIZE);
|
||||
|
||||
i = 0;
|
||||
while (size != 0) {
|
||||
cpsz = MIN(size, PAGE_SIZE);
|
||||
|
||||
/*
|
||||
* Move to linked physical region page list
|
||||
* in last item.
|
||||
*/
|
||||
if (i == (NVME_PRP2_ITEMS-1) &&
|
||||
size > PAGE_SIZE) {
|
||||
assert((prp_list[i] & (PAGE_SIZE-1)) == 0);
|
||||
prp_list = paddr_guest2host(
|
||||
sc->nsc_pi->pi_vmctx,
|
||||
prp_list[i], PAGE_SIZE);
|
||||
i = 0;
|
||||
}
|
||||
if (prp_list[i] == 0) {
|
||||
WPRINTF("PRP2[%d] = 0 !!!", i);
|
||||
err = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
err = pci_nvme_append_iov_req(sc, req,
|
||||
prp_list[i], cpsz,
|
||||
cmd->opc == NVME_OPC_WRITE, lba);
|
||||
if (err)
|
||||
break;
|
||||
|
||||
lba += cpsz;
|
||||
size -= cpsz;
|
||||
i++;
|
||||
}
|
||||
}
|
||||
|
||||
iodone:
|
||||
if (sc->nvstore.type == NVME_STOR_RAM) {
|
||||
uint16_t code, status;
|
||||
|
||||
code = err ? NVME_SC_LBA_OUT_OF_RANGE :
|
||||
NVME_SC_SUCCESS;
|
||||
pci_nvme_status_genc(&status, code);
|
||||
|
||||
pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
|
||||
status, 1);
|
||||
|
||||
continue;
|
||||
}
|
||||
|
||||
|
||||
if (err)
|
||||
goto do_error;
|
||||
|
||||
req->io_req.br_callback = pci_nvme_io_done;
|
||||
|
||||
err = 0;
|
||||
switch (cmd->opc) {
|
||||
case NVME_OPC_READ:
|
||||
err = blockif_read(sc->nvstore.ctx, &req->io_req);
|
||||
break;
|
||||
case NVME_OPC_WRITE:
|
||||
err = blockif_write(sc->nvstore.ctx, &req->io_req);
|
||||
break;
|
||||
default:
|
||||
WPRINTF("%s unhandled io command 0x%x",
|
||||
__func__, cmd->opc);
|
||||
err = 1;
|
||||
}
|
||||
|
||||
do_error:
|
||||
if (err) {
|
||||
uint16_t status;
|
||||
|
||||
nsid = le32toh(cmd->nsid);
|
||||
if ((nsid == 0) || (nsid > sc->ctrldata.nn)) {
|
||||
pci_nvme_status_genc(&status,
|
||||
NVME_SC_DATA_TRANSFER_ERROR);
|
||||
NVME_SC_INVALID_NAMESPACE_OR_FORMAT);
|
||||
status |=
|
||||
NVME_STATUS_DNR_MASK << NVME_STATUS_DNR_SHIFT;
|
||||
goto complete;
|
||||
}
|
||||
|
||||
req = pci_nvme_get_ioreq(sc);
|
||||
if (req == NULL) {
|
||||
pci_nvme_status_genc(&status,
|
||||
NVME_SC_INTERNAL_DEVICE_ERROR);
|
||||
WPRINTF("%s: unable to allocate IO req", __func__);
|
||||
goto complete;
|
||||
}
|
||||
req->nvme_sq = sq;
|
||||
req->sqid = idx;
|
||||
req->opc = cmd->opc;
|
||||
req->cid = cmd->cid;
|
||||
req->nsid = cmd->nsid;
|
||||
|
||||
switch (cmd->opc) {
|
||||
case NVME_OPC_FLUSH:
|
||||
pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
|
||||
break;
|
||||
case NVME_OPC_WRITE:
|
||||
case NVME_OPC_READ:
|
||||
pending = nvme_opc_write_read(sc, cmd, &sc->nvstore,
|
||||
req, &status);
|
||||
break;
|
||||
case NVME_OPC_WRITE_ZEROES:
|
||||
/* TODO: write zeroes
|
||||
WPRINTF("%s write zeroes lba 0x%lx blocks %u",
|
||||
__func__, lba, cmd->cdw12 & 0xFFFF); */
|
||||
pci_nvme_status_genc(&status, NVME_SC_SUCCESS);
|
||||
break;
|
||||
case NVME_OPC_DATASET_MANAGEMENT:
|
||||
pending = nvme_opc_dataset_mgmt(sc, cmd, &sc->nvstore,
|
||||
req, &status);
|
||||
break;
|
||||
default:
|
||||
WPRINTF("%s unhandled io command 0x%x",
|
||||
__func__, cmd->opc);
|
||||
pci_nvme_status_genc(&status, NVME_SC_INVALID_OPCODE);
|
||||
}
|
||||
complete:
|
||||
if (!pending) {
|
||||
pci_nvme_set_completion(sc, sq, idx, cmd->cid, 0,
|
||||
status, 1);
|
||||
pci_nvme_release_ioreq(sc, req);
|
||||
status, 1);
|
||||
if (req != NULL)
|
||||
pci_nvme_release_ioreq(sc, req);
|
||||
}
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user