Improve PCI BAR emulation:

- Respect the MEMEN and PORTEN bits in the command register
- Allow the guest to reprogram the address decoded by the BAR

Submitted by:	Gopakumar T
Obtained from:	NetApp
This commit is contained in:
Neel Natu 2013-04-10 02:12:39 +00:00
parent d58a96538f
commit 028d9311cd
7 changed files with 320 additions and 60 deletions

View File

@ -128,6 +128,7 @@ console_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
static struct inout_port consport = {
"bvmcons",
BVM_CONSOLE_PORT,
1,
IOPORT_F_INOUT,
console_handler
};

View File

@ -105,6 +105,7 @@ dbg_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
static struct inout_port dbgport = {
"bvmdbg",
BVM_DBG_PORT,
1,
IOPORT_F_INOUT,
dbg_handler
};

View File

@ -33,6 +33,7 @@ __FBSDID("$FreeBSD$");
#include <sys/linker_set.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include "inout.h"
@ -41,6 +42,9 @@ SET_DECLARE(inout_port_set, struct inout_port);
#define MAX_IOPORTS (1 << 16)
#define VERIFY_IOPORT(port, size) \
assert((port) >= 0 && (size) > 0 && ((port) + (size)) <= MAX_IOPORTS)
static struct {
const char *name;
int flags;
@ -69,6 +73,23 @@ default_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
return (0);
}
static void
register_default_iohandler(int start, int size)
{
struct inout_port iop;
VERIFY_IOPORT(start, size);
bzero(&iop, sizeof(iop));
iop.name = "default";
iop.port = start;
iop.size = size;
iop.flags = IOPORT_F_INOUT;
iop.handler = default_inout;
register_inout(&iop);
}
int
emulate_inout(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, int strict)
@ -113,17 +134,11 @@ void
init_inout(void)
{
struct inout_port **iopp, *iop;
int i;
/*
* Set up the default handler for all ports
*/
for (i = 0; i < MAX_IOPORTS; i++) {
inout_handlers[i].name = "default";
inout_handlers[i].flags = IOPORT_F_IN | IOPORT_F_OUT;
inout_handlers[i].handler = default_inout;
inout_handlers[i].arg = NULL;
}
register_default_iohandler(0, MAX_IOPORTS);
/*
* Overwrite with specified handlers
@ -141,11 +156,28 @@ init_inout(void)
int
register_inout(struct inout_port *iop)
{
assert(iop->port < MAX_IOPORTS);
inout_handlers[iop->port].name = iop->name;
inout_handlers[iop->port].flags = iop->flags;
inout_handlers[iop->port].handler = iop->handler;
inout_handlers[iop->port].arg = iop->arg;
int i;
VERIFY_IOPORT(iop->port, iop->size);
for (i = iop->port; i < iop->port + iop->size; i++) {
inout_handlers[i].name = iop->name;
inout_handlers[i].flags = iop->flags;
inout_handlers[i].handler = iop->handler;
inout_handlers[i].arg = iop->arg;
}
return (0);
}
int
unregister_inout(struct inout_port *iop)
{
VERIFY_IOPORT(iop->port, iop->size);
assert(inout_handlers[iop->port].name == iop->name);
register_default_iohandler(iop->port, iop->size);
return (0);
}

View File

@ -39,6 +39,7 @@ typedef int (*inout_func_t)(struct vmctx *ctx, int vcpu, int in, int port,
struct inout_port {
const char *name;
int port;
int size;
int flags;
inout_func_t handler;
void *arg;
@ -51,6 +52,7 @@ struct inout_port {
static struct inout_port __CONCAT(__inout_port, __LINE__) = { \
#name, \
(port), \
1, \
(flags), \
(handler), \
0 \
@ -61,7 +63,7 @@ void init_inout(void);
int emulate_inout(struct vmctx *, int vcpu, int in, int port, int bytes,
uint32_t *eax, int strict);
int register_inout(struct inout_port *iop);
int unregister_inout(struct inout_port *iop);
void init_bvmcons(void);
#endif /* _INOUT_H_ */

View File

@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <pthread.h>
#include "mem.h"
@ -71,6 +72,8 @@ RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rb_root, mmio_rb_fallback;
*/
static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
static pthread_rwlock_t rwlock;
static int
mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
{
@ -125,10 +128,12 @@ mmio_rb_dump(struct mmio_rb_tree *rbt)
{
struct mmio_rb_range *np;
pthread_rwlock_rdlock(&rwlock);
RB_FOREACH(np, mmio_rb_tree, rbt) {
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
np->mr_param.name);
}
pthread_rwlock_unlock(&rwlock);
}
#endif
@ -161,7 +166,8 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie)
{
struct mmio_rb_range *entry;
int err;
pthread_rwlock_rdlock(&rwlock);
/*
* First check the per-vCPU cache
*/
@ -173,10 +179,11 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie)
entry = NULL;
if (entry == NULL) {
if (!mmio_rb_lookup(&mmio_rb_root, paddr, &entry)) {
if (mmio_rb_lookup(&mmio_rb_root, paddr, &entry) == 0) {
/* Update the per-vCPU cache */
mmio_hint[vcpu] = entry;
} else if (mmio_rb_lookup(&mmio_rb_fallback, paddr, &entry)) {
pthread_rwlock_unlock(&rwlock);
return (ESRCH);
}
}
@ -184,25 +191,29 @@ emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, struct vie *vie)
assert(entry != NULL);
err = vmm_emulate_instruction(ctx, vcpu, paddr, vie,
mem_read, mem_write, &entry->mr_param);
pthread_rwlock_unlock(&rwlock);
return (err);
}
static int
register_mem_int(struct mmio_rb_tree *rbt, struct mem_range *memp)
{
struct mmio_rb_range *mrp;
struct mmio_rb_range *entry, *mrp;
int err;
err = 0;
mrp = malloc(sizeof(struct mmio_rb_range));
if (mrp != NULL) {
mrp->mr_param = *memp;
mrp->mr_base = memp->base;
mrp->mr_end = memp->base + memp->size - 1;
err = mmio_rb_add(rbt, mrp);
pthread_rwlock_wrlock(&rwlock);
if (mmio_rb_lookup(rbt, memp->base, &entry) != 0)
err = mmio_rb_add(rbt, mrp);
pthread_rwlock_unlock(&rwlock);
if (err)
free(mrp);
} else
@ -225,10 +236,40 @@ register_mem_fallback(struct mem_range *memp)
return (register_mem_int(&mmio_rb_fallback, memp));
}
int
unregister_mem(struct mem_range *memp)
{
struct mem_range *mr;
struct mmio_rb_range *entry = NULL;
int err, i;
pthread_rwlock_wrlock(&rwlock);
err = mmio_rb_lookup(&mmio_rb_root, memp->base, &entry);
if (err == 0) {
mr = &entry->mr_param;
assert(mr->name == memp->name);
assert(mr->base == memp->base && mr->size == memp->size);
RB_REMOVE(mmio_rb_tree, &mmio_rb_root, entry);
/* flush Per-vCPU cache */
for (i=0; i < VM_MAXCPU; i++) {
if (mmio_hint[i] == entry)
mmio_hint[i] = NULL;
}
}
pthread_rwlock_unlock(&rwlock);
if (entry)
free(entry);
return (err);
}
void
init_mem(void)
{
RB_INIT(&mmio_rb_root);
RB_INIT(&mmio_rb_fallback);
pthread_rwlock_init(&rwlock, NULL);
}

View File

@ -54,5 +54,6 @@ int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, struct vie *vie);
int register_mem(struct mem_range *memp);
int register_mem_fallback(struct mem_range *memp);
int unregister_mem(struct mem_range *memp);
#endif /* _MEM_H_ */

View File

@ -31,6 +31,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/linker_set.h>
#include <sys/errno.h>
#include <ctype.h>
#include <stdio.h>
@ -38,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <string.h>
#include <strings.h>
#include <assert.h>
#include <stdbool.h>
#include <machine/vmm.h>
#include <vmmapi.h>
@ -353,20 +355,150 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
}
/*
* Register (or unregister) the MMIO or I/O region associated with the BAR
* register 'idx' of an emulated pci device.
*/
static void
modify_bar_registration(struct pci_devinst *pi, int idx, int registration)
{
int error;
struct inout_port iop;
struct mem_range mr;
switch (pi->pi_bar[idx].type) {
case PCIBAR_IO:
bzero(&iop, sizeof(struct inout_port));
iop.name = pi->pi_name;
iop.port = pi->pi_bar[idx].addr;
iop.size = pi->pi_bar[idx].size;
if (registration) {
iop.flags = IOPORT_F_INOUT;
iop.handler = pci_emul_io_handler;
iop.arg = pi;
error = register_inout(&iop);
} else
error = unregister_inout(&iop);
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
bzero(&mr, sizeof(struct mem_range));
mr.name = pi->pi_name;
mr.base = pi->pi_bar[idx].addr;
mr.size = pi->pi_bar[idx].size;
if (registration) {
mr.flags = MEM_F_RW;
mr.handler = pci_emul_mem_handler;
mr.arg1 = pi;
mr.arg2 = idx;
error = register_mem(&mr);
} else
error = unregister_mem(&mr);
break;
default:
error = EINVAL;
break;
}
assert(error == 0);
}
static void
unregister_bar(struct pci_devinst *pi, int idx)
{
modify_bar_registration(pi, idx, 0);
}
static void
register_bar(struct pci_devinst *pi, int idx)
{
modify_bar_registration(pi, idx, 1);
}
/* Are we decoding i/o port accesses for the emulated pci device? */
static int
porten(struct pci_devinst *pi)
{
uint16_t cmd;
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
return (cmd & PCIM_CMD_PORTEN);
}
/* Are we decoding memory accesses for the emulated pci device? */
static int
memen(struct pci_devinst *pi)
{
uint16_t cmd;
cmd = pci_get_cfgdata16(pi, PCIR_COMMAND);
return (cmd & PCIM_CMD_MEMEN);
}
/*
* Update the MMIO or I/O address that is decoded by the BAR register.
*
* If the pci device has enabled the address space decoding then intercept
* the address range decoded by the BAR register.
*/
static void
update_bar_address(struct pci_devinst *pi, uint64_t addr, int idx, int type)
{
int decode;
if (pi->pi_bar[idx].type == PCIBAR_IO)
decode = porten(pi);
else
decode = memen(pi);
if (decode)
unregister_bar(pi, idx);
switch (type) {
case PCIBAR_IO:
case PCIBAR_MEM32:
pi->pi_bar[idx].addr = addr;
break;
case PCIBAR_MEM64:
pi->pi_bar[idx].addr &= ~0xffffffffUL;
pi->pi_bar[idx].addr |= addr;
break;
case PCIBAR_MEMHI64:
pi->pi_bar[idx].addr &= 0xffffffff;
pi->pi_bar[idx].addr |= addr;
break;
default:
assert(0);
}
if (decode)
register_bar(pi, idx);
}
int
pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
enum pcibar_type type, uint64_t size)
{
int i, error;
int error;
uint64_t *baseptr, limit, addr, mask, lobits, bar;
struct inout_port iop;
struct mem_range memp;
assert(idx >= 0 && idx <= PCI_BARMAX);
if ((size & (size - 1)) != 0)
size = 1UL << flsl(size); /* round up to a power of 2 */
/* Enforce minimum BAR sizes required by the PCI standard */
if (type == PCIBAR_IO) {
if (size < 4)
size = 4;
} else {
if (size < 16)
size = 16;
}
switch (type) {
case PCIBAR_NONE:
baseptr = NULL;
@ -443,30 +575,7 @@ pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
pci_set_cfgdata32(pdi, PCIR_BAR(idx + 1), bar >> 32);
}
/* add a handler to intercept accesses to the I/O bar */
if (type == PCIBAR_IO) {
iop.name = pdi->pi_name;
iop.flags = IOPORT_F_INOUT;
iop.handler = pci_emul_io_handler;
iop.arg = pdi;
for (i = 0; i < size; i++) {
iop.port = addr + i;
register_inout(&iop);
}
} else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) {
/* add memory bar intercept handler */
memp.name = pdi->pi_name;
memp.flags = MEM_F_RW;
memp.base = addr;
memp.size = size;
memp.handler = pci_emul_mem_handler;
memp.arg1 = pdi;
memp.arg2 = idx;
error = register_mem(&memp);
assert(error == 0);
}
register_bar(pdi, idx);
return (0);
}
@ -1101,6 +1210,62 @@ pci_emul_cfgaddr(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
}
INOUT_PORT(pci_cfgaddr, CONF1_ADDR_PORT, IOPORT_F_OUT, pci_emul_cfgaddr);
static uint32_t
bits_changed(uint32_t old, uint32_t new, uint32_t mask)
{
return ((old ^ new) & mask);
}
static void
pci_emul_cmdwrite(struct pci_devinst *pi, uint32_t new, int bytes)
{
int i;
uint16_t old;
/*
* The command register is at an offset of 4 bytes and thus the
* guest could write 1, 2 or 4 bytes starting at this offset.
*/
old = pci_get_cfgdata16(pi, PCIR_COMMAND); /* stash old value */
CFGWRITE(pi, PCIR_COMMAND, new, bytes); /* update config */
new = pci_get_cfgdata16(pi, PCIR_COMMAND); /* get updated value */
/*
* If the MMIO or I/O address space decoding has changed then
* register/unregister all BARs that decode that address space.
*/
for (i = 0; i < PCI_BARMAX; i++) {
switch (pi->pi_bar[i].type) {
case PCIBAR_NONE:
case PCIBAR_MEMHI64:
break;
case PCIBAR_IO:
/* I/O address space decoding changed? */
if (bits_changed(old, new, PCIM_CMD_PORTEN)) {
if (porten(pi))
register_bar(pi, i);
else
unregister_bar(pi, i);
}
break;
case PCIBAR_MEM32:
case PCIBAR_MEM64:
/* MMIO address space decoding changed? */
if (bits_changed(old, new, PCIM_CMD_MEMEN)) {
if (memen(pi))
register_bar(pi, i);
else
unregister_bar(pi, i);
}
break;
default:
assert(0);
}
}
}
static int
pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
@ -1108,7 +1273,7 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
struct pci_devinst *pi;
struct pci_devemu *pe;
int coff, idx, needcfg;
uint64_t mask, bar;
uint64_t addr, bar, mask;
assert(bytes == 1 || bytes == 2 || bytes == 4);
@ -1175,33 +1340,48 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
if (bytes != 4 || (coff & 0x3) != 0)
return (0);
idx = (coff - PCIR_BAR(0)) / 4;
mask = ~(pi->pi_bar[idx].size - 1);
switch (pi->pi_bar[idx].type) {
case PCIBAR_NONE:
bar = 0;
pi->pi_bar[idx].addr = bar = 0;
break;
case PCIBAR_IO:
mask = ~(pi->pi_bar[idx].size - 1);
mask &= PCIM_BAR_IO_BASE;
bar = (*eax & mask) | PCIM_BAR_IO_SPACE;
addr = *eax & mask;
addr &= 0xffff;
bar = addr | PCIM_BAR_IO_SPACE;
/*
* Register the new BAR value for interception
*/
if (addr != pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_IO);
}
break;
case PCIBAR_MEM32:
mask = ~(pi->pi_bar[idx].size - 1);
mask &= PCIM_BAR_MEM_BASE;
bar = *eax & mask;
addr = bar = *eax & mask;
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_32;
if (addr != pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM32);
}
break;
case PCIBAR_MEM64:
mask = ~(pi->pi_bar[idx].size - 1);
mask &= PCIM_BAR_MEM_BASE;
bar = *eax & mask;
addr = bar = *eax & mask;
bar |= PCIM_BAR_MEM_SPACE | PCIM_BAR_MEM_64 |
PCIM_BAR_MEM_PREFETCH;
if (addr != (uint32_t)pi->pi_bar[idx].addr) {
update_bar_address(pi, addr, idx,
PCIBAR_MEM64);
}
break;
case PCIBAR_MEMHI64:
mask = ~(pi->pi_bar[idx - 1].size - 1);
mask &= PCIM_BAR_MEM_BASE;
bar = ((uint64_t)*eax << 32) & mask;
bar = bar >> 32;
addr = ((uint64_t)*eax << 32) & mask;
bar = addr >> 32;
if (bar != pi->pi_bar[idx - 1].addr >> 32) {
update_bar_address(pi, addr, idx - 1,
PCIBAR_MEMHI64);
}
break;
default:
assert(0);
@ -1210,6 +1390,8 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax);
} else if (coff == PCIR_COMMAND) {
pci_emul_cmdwrite(pi, *eax, bytes);
} else {
CFGWRITE(pi, coff, *eax, bytes);
}