bhyve: Fix NVMe data structure copy to guest

bhyve's NVMe emulation was transferring Identify data back to the guest
incorrectly causing memory corruptions. These corruptions resulted in
core dumps and other system level errors in the guest.

In their simplest form, NVMe Physical Region Page (PRP) values in
commands indicate which physical pages to use for data transfer. The
first PRP value is not required to be page aligned but does not cross a
page boundary. The second PRP value must be page aligned, does not cross
a page boundary, and need not be contiguous with PRP1.

The code was copying Identify data past the end of PRP1. This happens to
work if PRP1 and PRP2 are physically contiguous but will corrupt guest
memory in unpredictable ways if they are not.

Fix is to copy the Identify data back to the guest piecewise (i.e. for
each PRP entry). Also fix a similarly wrong problem when copying back
Log page data.

Reviewed by:	imp (mentor), araujo, jhb, rgrimes, bhyve
Approved by:	imp (mentor), bhyve (jhb)
MFC after:	2 weeks
Differential Revision: https://reviews.freebsd.org/D19695
This commit is contained in:
Chuck Tuffli 2019-04-05 16:54:20 +00:00
parent fe1b713e2c
commit a1daa3ae5e
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=345957

View File

@ -202,6 +202,9 @@ struct pci_nvme_softc {
struct nvme_namespace_data nsdata;
struct nvme_controller_data ctrldata;
struct nvme_error_information_entry err_log;
struct nvme_health_information_page health_log;
struct nvme_firmware_page fw_log;
struct pci_nvme_blockstore nvstore;
@ -368,6 +371,15 @@ pci_nvme_init_nsdata(struct pci_nvme_softc *sc)
nd->flbas = 0;
}
static void
pci_nvme_init_logpages(struct pci_nvme_softc *sc)
{
memset(&sc->err_log, 0, sizeof(sc->err_log));
memset(&sc->health_log, 0, sizeof(sc->health_log));
memset(&sc->fw_log, 0, sizeof(sc->fw_log));
}
static void
pci_nvme_reset_locked(struct pci_nvme_softc *sc)
{
@ -457,6 +469,47 @@ pci_nvme_init_controller(struct vmctx *ctx, struct pci_nvme_softc *sc)
__func__, sc->regs.acq, sc->compl_queues[0].qbase));
}
static int
nvme_prp_memcpy(struct vmctx *ctx, uint64_t prp1, uint64_t prp2, uint8_t *src,
size_t len)
{
uint8_t *dst;
size_t bytes;
if (len > (8 * 1024)) {
return (-1);
}
/* Copy from the start of prp1 to the end of the physical page */
bytes = PAGE_SIZE - (prp1 & PAGE_MASK);
bytes = MIN(bytes, len);
dst = vm_map_gpa(ctx, prp1, bytes);
if (dst == NULL) {
return (-1);
}
memcpy(dst, src, bytes);
src += bytes;
len -= bytes;
if (len == 0) {
return (0);
}
len = MIN(len, PAGE_SIZE);
dst = vm_map_gpa(ctx, prp2, len);
if (dst == NULL) {
return (-1);
}
memcpy(dst, src, len);
return (0);
}
static int
nvme_opc_delete_io_sq(struct pci_nvme_softc* sc, struct nvme_command* command,
struct nvme_completion* compl)
@ -590,26 +643,24 @@ nvme_opc_get_log_page(struct pci_nvme_softc* sc, struct nvme_command* command,
{
uint32_t logsize = (1 + ((command->cdw10 >> 16) & 0xFFF)) * 2;
uint8_t logpage = command->cdw10 & 0xFF;
void *data;
DPRINTF(("%s log page %u len %u\r\n", __func__, logpage, logsize));
if (logpage >= 1 && logpage <= 3)
data = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
PAGE_SIZE);
pci_nvme_status_genc(&compl->status, NVME_SC_SUCCESS);
switch (logpage) {
case 0x01: /* Error information */
memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
case NVME_LOG_ERROR:
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
command->prp2, (uint8_t *)&sc->err_log, logsize);
break;
case 0x02: /* SMART/Health information */
case NVME_LOG_HEALTH_INFORMATION:
/* TODO: present some smart info */
memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
command->prp2, (uint8_t *)&sc->health_log, logsize);
break;
case 0x03: /* Firmware slot information */
memset(data, 0, logsize > PAGE_SIZE ? PAGE_SIZE : logsize);
case NVME_LOG_FIRMWARE_SLOT:
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
command->prp2, (uint8_t *)&sc->fw_log, logsize);
break;
default:
WPRINTF(("%s get log page %x command not supported\r\n",
@ -633,14 +684,13 @@ nvme_opc_identify(struct pci_nvme_softc* sc, struct nvme_command* command,
switch (command->cdw10 & 0xFF) {
case 0x00: /* return Identify Namespace data structure */
dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
sizeof(sc->nsdata));
memcpy(dest, &sc->nsdata, sizeof(sc->nsdata));
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
command->prp2, (uint8_t *)&sc->nsdata, sizeof(sc->nsdata));
break;
case 0x01: /* return Identify Controller data structure */
dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
sizeof(sc->ctrldata));
memcpy(dest, &sc->ctrldata, sizeof(sc->ctrldata));
nvme_prp_memcpy(sc->nsc_pi->pi_vmctx, command->prp1,
command->prp2, (uint8_t *)&sc->ctrldata,
sizeof(sc->ctrldata));
break;
case 0x02: /* list of 1024 active NSIDs > CDW1.NSID */
dest = vm_map_gpa(sc->nsc_pi->pi_vmctx, command->prp1,
@ -1881,6 +1931,7 @@ pci_nvme_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_nvme_reset(sc);
pci_nvme_init_ctrldata(sc);
pci_nvme_init_nsdata(sc);
pci_nvme_init_logpages(sc);
pci_lintr_request(pi);