Rework how guest MMIO regions are dealt with.

- New memory region interface. An RB tree holds the regions,
with a last-found per-vCPU cache to deal with the common case
of repeated guest accesses to MMIO registers in the same page.

- Support memory-mapped BARs in PCI emulation.

 mem.c/h - memory region interface

 instruction_emul.c/h - remove old region interface.
 Use gpa from EPT exit to avoid a tablewalk to
 determine operand address. Determine operand size
 and use when calling through to region handler.

 fbsdrun.c - call into region interface on paging
  exit. Distinguish between instruction emul error
  and region not found

 pci_emul.c/h - implement new BAR callback api.
 Split BAR alloc routine into routines that
 require/don't require the BAR phys address.

 ioapic.c
 pci_passthru.c
 pci_virtio_block.c
 pci_virtio_net.c
 pci_uart.c  - update to new BAR callback i/f

Reviewed by:	neel
Obtained from:	NetApp
This commit is contained in:
Peter Grehan 2012-10-19 18:11:17 +00:00
parent 13ec93719a
commit 4d1e669cad
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/projects/bhyve/; revision=241744
13 changed files with 852 additions and 376 deletions

View File

@ -5,7 +5,7 @@
PROG= bhyve
SRCS= atpic.c consport.c dbgport.c elcr.c fbsdrun.c inout.c
SRCS+= instruction_emul.c ioapic.c mevent.c
SRCS+= instruction_emul.c ioapic.c mem.c mevent.c
SRCS+= pci_emul.c pci_hostbridge.c pci_passthru.c pci_virtio_block.c
SRCS+= pci_virtio_net.c pci_uart.c pit_8254.c post.c rtc.c uart.c xmsr.c
SRCS+= spinup_ap.c

View File

@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
#include "fbsdrun.h"
#include "inout.h"
#include "dbgport.h"
#include "mem.h"
#include "mevent.h"
#include "pci_emul.h"
#include "xmsr.h"
@ -446,11 +447,21 @@ vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
static int
vmexit_paging(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
int err;
stats.vmexit_paging++;
if (emulate_instruction(ctx, *pvcpu, vmexit->rip, vmexit->u.paging.cr3) != 0) {
printf("Failed to emulate instruction at 0x%lx\n", vmexit->rip);
err = emulate_mem(ctx, *pvcpu, vmexit->u.paging.gpa, vmexit->rip,
vmexit->u.paging.cr3, vmexit->u.paging.rwx);
if (err) {
if (err == EINVAL) {
printf("Failed to emulate instruction at 0x%lx\n",
vmexit->rip);
} else if (err == ESRCH) {
printf("Unhandled memory access to 0x%lx\n",
vmexit->u.paging.gpa);
}
return (VMEXIT_ABORT);
}

View File

@ -28,10 +28,12 @@
#include <strings.h>
#include <unistd.h>
#include <assert.h>
#include <machine/vmm.h>
#include <vmmapi.h>
#include "fbsdrun.h"
#include "mem.h"
#include "instruction_emul.h"
#define PREFIX_LOCK 0xF0
@ -46,6 +48,7 @@
#define PREFIX_BRANCH_NOT_TAKEN 0x2E
#define PREFIX_BRANCH_TAKEN 0x3E
#define PREFIX_OPSIZE 0x66
#define is_opsz_prefix(x) ((x) == PREFIX_OPSIZE)
#define PREFIX_ADDRSIZE 0x67
#define OPCODE_2BYTE_ESCAPE 0x0F
@ -95,6 +98,11 @@
#define FROM_REG (1<<2)
#define TO_RM (1<<3)
#define TO_REG (1<<4)
#define ZEXT (1<<5)
#define FROM_8 (1<<6)
#define FROM_16 (1<<7)
#define TO_8 (1<<8)
#define TO_16 (1<<9)
#define REX_MASK 0xF0
#define REX_PREFIX 0x40
@ -118,16 +126,7 @@
#define PML4E_OFFSET_MASK 0x0000FF8000000000
#define PML4E_SHIFT 39
#define MAX_EMULATED_REGIONS 8
int registered_regions = 0;
struct memory_region
{
uintptr_t start;
uintptr_t end;
emulated_read_func_t memread;
emulated_write_func_t memwrite;
void *arg;
} emulated_regions[MAX_EMULATED_REGIONS];
#define INSTR_VERIFY
struct decoded_instruction
{
@ -138,11 +137,12 @@ struct decoded_instruction
uint8_t *displacement;
uint8_t *immediate;
uint8_t opcode_flags;
uint16_t opcode_flags;
uint8_t addressing_mode;
uint8_t rm;
uint8_t reg;
uint8_t opsz;
uint8_t rex_r;
uint8_t rex_w;
uint8_t rex_b;
@ -170,11 +170,17 @@ static enum vm_reg_name vm_reg_name_mappings[] = {
[REG_R15] = VM_REG_GUEST_R15
};
uint8_t one_byte_opcodes[256] = {
[0x89] = HAS_MODRM | FROM_REG | TO_RM,
uint16_t one_byte_opcodes[256] = {
[0x88] = HAS_MODRM | FROM_REG | TO_RM | TO_8 | FROM_8,
[0x89] = HAS_MODRM | FROM_REG | TO_RM,
[0x8B] = HAS_MODRM | FROM_RM | TO_REG,
};
uint16_t two_byte_opcodes[256] = {
[0xB6] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_8,
[0xB7] = HAS_MODRM | FROM_RM | TO_REG | ZEXT | FROM_16,
};
static uintptr_t
gla2gpa(uint64_t gla, uint64_t guest_cr3)
{
@ -211,7 +217,8 @@ gla2hla(uint64_t gla, uint64_t guest_cr3)
uintptr_t gpa;
gpa = gla2gpa(gla, guest_cr3);
return paddr_guest2host(gpa);
return (paddr_guest2host(gpa));
}
/*
@ -232,6 +239,9 @@ decode_prefixes(struct decoded_instruction *decoded)
decoded->rex_x = *current_prefix & REX_X_MASK;
decoded->rex_b = *current_prefix & REX_B_MASK;
current_prefix++;
} else if (is_opsz_prefix(*current_prefix)) {
decoded->opsz = 1;
current_prefix++;
} else if (is_prefix(*current_prefix)) {
return (-1);
}
@ -248,16 +258,26 @@ decode_prefixes(struct decoded_instruction *decoded)
static int
decode_opcode(struct decoded_instruction *decoded)
{
uint8_t opcode, flags;
uint8_t opcode;
uint16_t flags;
int extra;
opcode = *decoded->opcode;
flags = one_byte_opcodes[opcode];
extra = 0;
if (opcode != 0xf)
flags = one_byte_opcodes[opcode];
else {
opcode = *(decoded->opcode + 1);
flags = two_byte_opcodes[opcode];
extra = 1;
}
if (!flags)
return (-1);
if (flags & HAS_MODRM) {
decoded->modrm = decoded->opcode + 1;
decoded->modrm = decoded->opcode + 1 + extra;
}
decoded->opcode_flags = flags;
@ -381,37 +401,70 @@ decode_instruction(void *instr, struct decoded_instruction *decoded)
return (0);
}
static struct memory_region *
find_region(uintptr_t addr)
{
int i;
for (i = 0; i < registered_regions; ++i) {
if (emulated_regions[i].start <= addr &&
emulated_regions[i].end >= addr) {
return &emulated_regions[i];
}
}
return (0);
}
static enum vm_reg_name
get_vm_reg_name(uint8_t reg)
{
return vm_reg_name_mappings[reg];
return (vm_reg_name_mappings[reg]);
}
static uint64_t
adjust_operand(const struct decoded_instruction *instruction, uint64_t val,
int size)
{
uint64_t ret;
if (instruction->opcode_flags & ZEXT) {
switch (size) {
case 1:
ret = val & 0xff;
break;
case 2:
ret = val & 0xffff;
break;
case 4:
ret = val & 0xffffffff;
break;
case 8:
ret = val;
break;
default:
break;
}
} else {
/*
* Extend the sign
*/
switch (size) {
case 1:
ret = (int8_t)(val & 0xff);
break;
case 2:
ret = (int16_t)(val & 0xffff);
break;
case 4:
ret = (int32_t)(val & 0xffffffff);
break;
case 8:
ret = val;
break;
default:
break;
}
}
return (ret);
}
static int
get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
const struct decoded_instruction *instruction, uint64_t *operand)
get_operand(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
const struct decoded_instruction *instruction, uint64_t *operand,
struct mem_range *mr)
{
enum vm_reg_name regname;
uint64_t reg;
uintptr_t target;
int error;
uint8_t rm, addressing_mode;
struct memory_region *emulated_memory;
uint8_t rm, addressing_mode, size;
if (instruction->opcode_flags & FROM_RM) {
rm = instruction->rm;
@ -422,6 +475,17 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
} else
return (-1);
/*
* Determine size of operand
*/
size = 4;
if (instruction->opcode_flags & FROM_8) {
size = 1;
} else if (instruction->opcode_flags & FROM_16 ||
instruction->opsz) {
size = 2;
}
regname = get_vm_reg_name(rm);
error = vm_get_register(vm, vcpu, regname, &reg);
if (error)
@ -430,33 +494,67 @@ get_operand(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
switch (addressing_mode) {
case MOD_DIRECT:
*operand = reg;
return (0);
error = 0;
break;
case MOD_INDIRECT:
case MOD_INDIRECT_DISP8:
case MOD_INDIRECT_DISP32:
#ifdef INSTR_VERIFY
{
uintptr_t target;
target = gla2gpa(reg, guest_cr3);
target += instruction->disp;
emulated_memory = find_region(target);
if (emulated_memory) {
return emulated_memory->memread(vm, vcpu, target,
4, operand,
emulated_memory->arg);
}
return (-1);
assert(gpa == target);
}
#endif
error = (*mr->handler)(vm, vcpu, MEM_F_READ, gpa, size,
operand, mr->arg1, mr->arg2);
break;
default:
return (-1);
}
if (!error)
*operand = adjust_operand(instruction, *operand, size);
return (error);
}
static uint64_t
adjust_write(uint64_t reg, uint64_t operand, int size)
{
uint64_t val;
switch (size) {
case 1:
val = (reg & ~0xff) | (operand & 0xff);
break;
case 2:
val = (reg & ~0xffff) | (operand & 0xffff);
break;
case 4:
val = (reg & ~0xffffffff) | (operand & 0xffffffff);
break;
case 8:
val = operand;
default:
break;
}
return (val);
}
static int
perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
const struct decoded_instruction *instruction, uint64_t operand)
perform_write(struct vmctx *vm, int vcpu, uint64_t gpa, uint64_t guest_cr3,
const struct decoded_instruction *instruction, uint64_t operand,
struct mem_range *mr)
{
enum vm_reg_name regname;
uintptr_t target;
int error;
int size;
uint64_t reg;
struct memory_region *emulated_memory;
uint8_t addressing_mode;
if (instruction->opcode_flags & TO_RM) {
@ -467,83 +565,77 @@ perform_write(struct vmctx *vm, int vcpu, uint64_t guest_cr3,
addressing_mode = MOD_DIRECT;
} else
return (-1);
regname = get_vm_reg_name(reg);
error = vm_get_register(vm, vcpu, regname, &reg);
if (error)
return (error);
/*
* Determine the operand size. rex.w has priority
*/
size = 4;
if (instruction->rex_w) {
size = 8;
} else if (instruction->opcode_flags & TO_8) {
size = 1;
} else if (instruction->opsz) {
size = 2;
};
switch(addressing_mode) {
case MOD_DIRECT:
return vm_set_register(vm, vcpu, regname, operand);
regname = get_vm_reg_name(reg);
error = vm_get_register(vm, vcpu, regname, &reg);
if (error)
return (error);
operand = adjust_write(reg, operand, size);
return (vm_set_register(vm, vcpu, regname, operand));
case MOD_INDIRECT:
case MOD_INDIRECT_DISP8:
case MOD_INDIRECT_DISP32:
#ifdef INSTR_VERIFY
regname = get_vm_reg_name(reg);
error = vm_get_register(vm, vcpu, regname, &reg);
assert(!error);
target = gla2gpa(reg, guest_cr3);
target += instruction->disp;
emulated_memory = find_region(target);
if (emulated_memory) {
return emulated_memory->memwrite(vm, vcpu, target,
4, operand,
emulated_memory->arg);
}
return (-1);
assert(gpa == target);
#endif
error = (*mr->handler)(vm, vcpu, MEM_F_WRITE, gpa, size,
&operand, mr->arg1, mr->arg2);
return (error);
default:
return (-1);
}
}
static int
emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t cr3,
const struct decoded_instruction *instruction)
emulate_decoded_instruction(struct vmctx *vm, int vcpu, uint64_t gpa,
uint64_t cr3,
const struct decoded_instruction *instruction,
struct mem_range *mr)
{
uint64_t operand;
int error;
error = get_operand(vm, vcpu, cr3, instruction, &operand);
error = get_operand(vm, vcpu, gpa, cr3, instruction, &operand, mr);
if (error)
return (error);
return perform_write(vm, vcpu, cr3, instruction, operand);
return perform_write(vm, vcpu, gpa, cr3, instruction, operand, mr);
}
int
emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3)
int
emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip, uint64_t cr3,
uint64_t gpa, int flags, struct mem_range *mr)
{
struct decoded_instruction instr;
int error;
void *instruction = gla2hla(rip, cr3);
void *instruction;
if ((error = decode_instruction(instruction, &instr)) != 0)
return (error);
instruction = gla2hla(rip, cr3);
return emulate_decoded_instruction(vm, vcpu, cr3, &instr);
}
struct memory_region *
register_emulated_memory(uintptr_t start, size_t len, emulated_read_func_t memread,
emulated_write_func_t memwrite, void *arg)
{
if (registered_regions >= MAX_EMULATED_REGIONS)
return (NULL);
struct memory_region *region = &emulated_regions[registered_regions];
region->start = start;
region->end = start + len;
region->memread = memread;
region->memwrite = memwrite;
region->arg = arg;
registered_regions++;
return (region);
}
void
move_memory_region(struct memory_region *region, uintptr_t start)
{
size_t len;
len = region->end - region->start;
region->start = start;
region->end = start + len;
error = decode_instruction(instruction, &instr);
if (!error)
error = emulate_decoded_instruction(vm, vcpu, gpa, cr3,
&instr, mr);
return (error);
}

View File

@ -29,19 +29,8 @@
#ifndef _INSTRUCTION_EMUL_H_
#define _INSTRUCTION_EMUL_H_
struct memory_region;
typedef int (*emulated_read_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr,
int size, uint64_t *data, void *arg);
typedef int (*emulated_write_func_t)(struct vmctx *vm, int vcpu, uintptr_t addr,
int size, uint64_t data, void *arg);
int emulate_instruction(struct vmctx *vm, int vcpu, uint64_t rip,
uint64_t cr3);
struct memory_region *register_emulated_memory(uintptr_t start, size_t len,
emulated_read_func_t memread,
emulated_write_func_t memwrite,
void *arg);
void move_memory_region(struct memory_region *memory_region, uintptr_t start);
uint64_t cr3, uint64_t gpa, int flags,
struct mem_range *mr);
#endif

View File

@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "inout.h"
#include "mem.h"
#include "instruction_emul.h"
#include "fbsdrun.h"
@ -67,10 +68,13 @@ struct ioapic {
static struct ioapic ioapics[1]; /* only a single ioapic for now */
static int ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr,
int size, uint64_t *data, void *arg);
static int ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr,
int size, uint64_t data, void *arg);
static int ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr,
int size, uint64_t *data);
static int ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr,
int size, uint64_t data);
static int ioapic_region_handler(struct vmctx *vm, int vcpu, int dir,
uintptr_t paddr, int size, uint64_t *val,
void *arg1, long arg2);
static void
ioapic_set_pinstate(struct vmctx *ctx, int pin, bool newstate)
@ -139,8 +143,10 @@ ioapic_assert_pin(struct vmctx *ctx, int pin)
void
ioapic_init(int which)
{
int i;
struct mem_range memp;
struct ioapic *ioapic;
int error;
int i;
assert(which == 0);
@ -153,14 +159,19 @@ ioapic_init(int which)
for (i = 0; i < REDIR_ENTRIES; i++)
ioapic->redtbl[i] = 0x0001000000010000UL;
/* Register emulated memory region */
ioapic->paddr = IOAPIC_PADDR;
ioapic->region = register_emulated_memory(ioapic->paddr,
sizeof(struct IOAPIC),
ioapic_region_read,
ioapic_region_write,
(void *)(uintptr_t)which);
assert(ioapic->region != NULL);
/* Register emulated memory region */
memp.name = "ioapic";
memp.flags = MEM_F_RW;
memp.handler = ioapic_region_handler;
memp.arg1 = ioapic;
memp.arg2 = which;
memp.base = ioapic->paddr;
memp.size = sizeof(struct IOAPIC);
error = register_mem(&memp);
assert (error == 0);
ioapic->inited = 1;
}
@ -237,15 +248,11 @@ ioapic_write(struct ioapic *ioapic, uint32_t addr, uint32_t data)
}
static int
ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
uint64_t *data, void *arg)
ioapic_region_read(struct ioapic *ioapic, uintptr_t paddr, int size,
uint64_t *data)
{
int which, offset;
struct ioapic *ioapic;
int offset;
which = (uintptr_t)arg;
ioapic = &ioapics[which];
offset = paddr - ioapic->paddr;
/*
@ -255,7 +262,7 @@ ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
#if 1
printf("invalid access to ioapic%d: size %d, offset %d\n",
which, size, offset);
(int)(ioapic - ioapics), size, offset);
#endif
*data = 0;
return (0);
@ -270,15 +277,11 @@ ioapic_region_read(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
}
static int
ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
uint64_t data, void *arg)
ioapic_region_write(struct ioapic *ioapic, uintptr_t paddr, int size,
uint64_t data)
{
int which, offset;
struct ioapic *ioapic;
int offset;
which = (uintptr_t)arg;
ioapic = &ioapics[which];
offset = paddr - ioapic->paddr;
/*
@ -288,7 +291,7 @@ ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
if (size != 4 || (offset != IOREGSEL && offset != IOWIN)) {
#if 1
printf("invalid access to ioapic%d: size %d, offset %d\n",
which, size, offset);
(int)(ioapic - ioapics), size, offset);
#endif
return (0);
}
@ -300,3 +303,23 @@ ioapic_region_write(struct vmctx *vm, int vcpu, uintptr_t paddr, int size,
return (0);
}
static int
ioapic_region_handler(struct vmctx *vm, int vcpu, int dir, uintptr_t paddr,
int size, uint64_t *val, void *arg1, long arg2)
{
struct ioapic *ioapic;
int which;
ioapic = arg1;
which = arg2;
assert(ioapic == &ioapics[which]);
if (dir == MEM_F_READ)
ioapic_region_read(ioapic, paddr, size, val);
else
ioapic_region_write(ioapic, paddr, size, *val);
return (0);
}

196
usr.sbin/bhyve/mem.c Normal file
View File

@ -0,0 +1,196 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
/*
* Memory ranges are represented with an RB tree. On insertion, the range
* is checked for overlaps. On lookup, the key has the same base and limit
* so it can be searched within the range.
*
* It is assumed that all setup of ranges takes place in single-threaded
* mode before vCPUs have been started. As such, no locks are used on the
* RB tree. If this is no longer the case, then a r/w lock could be used,
* with readers on the lookup and a writer if the tree needs to be changed
* (and per vCPU caches flushed)
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/tree.h>
#include <sys/errno.h>
#include <machine/vmm.h>
#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include "mem.h"
#include "instruction_emul.h"
struct mmio_rb_range {
RB_ENTRY(mmio_rb_range) mr_link; /* RB tree links */
struct mem_range mr_param;
uint64_t mr_base;
uint64_t mr_end;
};
struct mmio_rb_tree;
RB_PROTOTYPE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
RB_HEAD(mmio_rb_tree, mmio_rb_range) mmio_rbroot;
/*
* Per-vCPU cache. Since most accesses from a vCPU will be to
* consecutive addresses in a range, it makes sense to cache the
* result of a lookup.
*/
static struct mmio_rb_range *mmio_hint[VM_MAXCPU];
static int
mmio_rb_range_compare(struct mmio_rb_range *a, struct mmio_rb_range *b)
{
if (a->mr_end < b->mr_base)
return (-1);
else if (a->mr_base > b->mr_end)
return (1);
return (0);
}
static int
mmio_rb_lookup(uint64_t addr, struct mmio_rb_range **entry)
{
struct mmio_rb_range find, *res;
find.mr_base = find.mr_end = addr;
res = RB_FIND(mmio_rb_tree, &mmio_rbroot, &find);
if (res != NULL) {
*entry = res;
return (0);
}
return (ENOENT);
}
static int
mmio_rb_add(struct mmio_rb_range *new)
{
struct mmio_rb_range *overlap;
overlap = RB_INSERT(mmio_rb_tree, &mmio_rbroot, new);
if (overlap != NULL) {
#ifdef RB_DEBUG
printf("overlap detected: new %lx:%lx, tree %lx:%lx\n",
new->mr_base, new->mr_end,
overlap->mr_base, overlap->mr_end);
#endif
return (EEXIST);
}
return (0);
}
#if 0
static void
mmio_rb_dump(void)
{
struct mmio_rb_range *np;
RB_FOREACH(np, mmio_rb_tree, &mmio_rbroot) {
printf(" %lx:%lx, %s\n", np->mr_base, np->mr_end,
np->mr_param.name);
}
}
#endif
RB_GENERATE(mmio_rb_tree, mmio_rb_range, mr_link, mmio_rb_range_compare);
int
emulate_mem(struct vmctx *ctx, int vcpu, uint64_t paddr, uint64_t rip,
uint64_t cr3, int mode)
{
struct mmio_rb_range *entry;
int err;
err = 0;
/*
* First check the per-vCPU cache
*/
if (mmio_hint[vcpu] &&
paddr >= mmio_hint[vcpu]->mr_base &&
paddr <= mmio_hint[vcpu]->mr_end) {
err = emulate_instruction(ctx, vcpu, rip, cr3, paddr, mode,
&mmio_hint[vcpu]->mr_param);
} else {
if (mmio_rb_lookup(paddr, &entry)) {
err = ENOENT;
} else {
mmio_hint[vcpu] = entry;
err = emulate_instruction(ctx, vcpu, rip, cr3, paddr,
mode, &entry->mr_param);
}
}
return (err);
}
int
register_mem(struct mem_range *memp)
{
struct mmio_rb_range *mrp;
int err;
err = 0;
mrp = malloc(sizeof(struct mmio_rb_range));
if (mrp != NULL) {
mrp->mr_param = *memp;
mrp->mr_base = memp->base;
mrp->mr_end = memp->base + memp->size - 1;
err = mmio_rb_add(mrp);
if (err)
free(mrp);
} else
err = ENOMEM;
return (err);
}
void
init_mem(void)
{
RB_INIT(&mmio_rbroot);
}

58
usr.sbin/bhyve/mem.h Normal file
View File

@ -0,0 +1,58 @@
/*-
* Copyright (c) 2012 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _MEM_H_
#define _MEM_H_
#include <sys/linker_set.h>
struct vmctx;
typedef int (*mem_func_t)(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
int size, uint64_t *val, void *arg1, long arg2);
struct mem_range {
const char *name;
int flags;
mem_func_t handler;
void *arg1;
long arg2;
uint64_t base;
uint64_t size;
};
#define MEM_F_READ 0x1
#define MEM_F_WRITE 0x2
#define MEM_F_RW 0x3
void init_mem(void);
int emulate_mem(struct vmctx *, int vcpu, uint64_t paddr, uint64_t rip,
uint64_t cr3, int mode);
int register_mem(struct mem_range *memp);
#endif /* _MEM_H_ */

View File

@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include "fbsdrun.h"
#include "inout.h"
#include "mem.h"
#include "pci_emul.h"
#include "ioapic.h"
@ -364,28 +365,58 @@ pci_finish_mptable_names(void)
}
static int
pci_emul_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
pci_emul_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
{
struct pci_devinst *pdi = arg;
struct pci_devemu *pe = pdi->pi_d;
int offset, i;
uint64_t offset;
int i;
for (i = 0; i <= PCI_BARMAX; i++) {
if (pdi->pi_bar[i].type == PCIBAR_IO &&
port >= pdi->pi_bar[i].addr &&
port + bytes <= pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
port + bytes <=
pdi->pi_bar[i].addr + pdi->pi_bar[i].size) {
offset = port - pdi->pi_bar[i].addr;
if (in)
*eax = (*pe->pe_ior)(pdi, i, offset, bytes);
*eax = (*pe->pe_barread)(ctx, vcpu, pdi, i,
offset, bytes);
else
(*pe->pe_iow)(pdi, i, offset, bytes, *eax);
(*pe->pe_barwrite)(ctx, vcpu, pdi, i, offset,
bytes, *eax);
return (0);
}
}
return (-1);
}
static int
pci_emul_mem_handler(struct vmctx *ctx, int vcpu, int dir, uint64_t addr,
int size, uint64_t *val, void *arg1, long arg2)
{
struct pci_devinst *pdi = arg1;
struct pci_devemu *pe = pdi->pi_d;
uint64_t offset;
int bidx = (int) arg2;
assert(bidx <= PCI_BARMAX);
assert(pdi->pi_bar[bidx].type == PCIBAR_MEM32 ||
pdi->pi_bar[bidx].type == PCIBAR_MEM64);
assert(addr >= pdi->pi_bar[bidx].addr &&
addr + size <= pdi->pi_bar[bidx].addr + pdi->pi_bar[bidx].size);
offset = addr - pdi->pi_bar[bidx].addr;
if (dir == MEM_F_WRITE)
(*pe->pe_barwrite)(ctx, vcpu, pdi, bidx, offset, size, *val);
else
*val = (*pe->pe_barread)(ctx, vcpu, pdi, bidx, offset, size);
return (0);
}
static int
pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
uint64_t *addr)
@ -405,12 +436,21 @@ pci_emul_alloc_resource(uint64_t *baseptr, uint64_t limit, uint64_t size,
}
int
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
enum pcibar_type type, uint64_t size)
pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, enum pcibar_type type,
uint64_t size)
{
return (pci_emul_alloc_pbar(pdi, idx, 0, type, size));
}
int
pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
enum pcibar_type type, uint64_t size)
{
int i, error;
uint64_t *baseptr, limit, addr, mask, lobits, bar;
struct inout_port iop;
struct mem_range memp;
assert(idx >= 0 && idx <= PCI_BARMAX);
@ -497,13 +537,25 @@ pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
if (type == PCIBAR_IO) {
iop.name = pdi->pi_name;
iop.flags = IOPORT_F_INOUT;
iop.handler = pci_emul_handler;
iop.handler = pci_emul_io_handler;
iop.arg = pdi;
for (i = 0; i < size; i++) {
iop.port = addr + i;
register_inout(&iop);
}
} else if (type == PCIBAR_MEM32 || type == PCIBAR_MEM64) {
/* add memory bar intercept handler */
memp.name = pdi->pi_name;
memp.flags = MEM_F_RW;
memp.base = addr;
memp.size = size;
memp.handler = pci_emul_mem_handler;
memp.arg1 = pdi;
memp.arg2 = idx;
error = register_mem(&memp);
assert(error == 0);
}
return (0);
@ -1061,10 +1113,6 @@ pci_emul_cfgdata(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
}
pci_set_cfgdata32(pi, coff, bar);
if (pi->pi_bar[idx].handler) {
pi->pi_bar[idx].handler(pi, idx, bar);
}
} else if (pci_emul_iscap(pi, coff)) {
pci_emul_capwrite(pi, coff, bytes, *eax);
} else {
@ -1098,12 +1146,15 @@ INOUT_PORT(pci_irq, 0xC01, IOPORT_F_OUT, pci_irq_port_handler);
/*
* Define a dummy test device
*/
#define DREGSZ 20
#define DIOSZ 20
#define DMEMSZ 4096
struct pci_emul_dsoftc {
uint8_t regs[DREGSZ];
uint8_t ioregs[DIOSZ];
uint8_t memregs[DMEMSZ];
};
#define PCI_EMUL_MSGS 4
#define PCI_EMUL_MSI_MSGS 4
#define PCI_EMUL_MSIX_MSGS 16
static int
pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
@ -1120,64 +1171,132 @@ pci_emul_dinit(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, 0x10DD);
pci_set_cfgdata8(pi, PCIR_CLASS, 0x02);
error = pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, DREGSZ);
error = pci_emul_add_msicap(pi, PCI_EMUL_MSI_MSGS);
assert(error == 0);
error = pci_emul_add_msicap(pi, PCI_EMUL_MSGS);
error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, DIOSZ);
assert(error == 0);
error = pci_emul_alloc_bar(pi, 1, PCIBAR_MEM32, DMEMSZ);
assert(error == 0);
return (0);
}
static void
pci_emul_diow(struct pci_devinst *pi, int baridx, int offset, int size,
uint32_t value)
pci_emul_diow(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
{
int i;
struct pci_emul_dsoftc *sc = pi->pi_arg;
if (offset + size > DREGSZ) {
printf("diow: too large, offset %d size %d\n", offset, size);
return;
if (baridx == 0) {
if (offset + size > DIOSZ) {
printf("diow: iow too large, offset %ld size %d\n",
offset, size);
return;
}
if (size == 1) {
sc->ioregs[offset] = value & 0xff;
} else if (size == 2) {
*(uint16_t *)&sc->ioregs[offset] = value & 0xffff;
} else if (size == 4) {
*(uint32_t *)&sc->ioregs[offset] = value;
} else {
printf("diow: iow unknown size %d\n", size);
}
/*
* Special magic value to generate an interrupt
*/
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
pci_generate_msi(pi, value % pci_msi_msgnum(pi));
if (value == 0xabcdef) {
for (i = 0; i < pci_msi_msgnum(pi); i++)
pci_generate_msi(pi, i);
}
}
if (size == 1) {
sc->regs[offset] = value & 0xff;
} else if (size == 2) {
*(uint16_t *)&sc->regs[offset] = value & 0xffff;
} else {
*(uint32_t *)&sc->regs[offset] = value;
if (baridx == 1) {
if (offset + size > DMEMSZ) {
printf("diow: memw too large, offset %ld size %d\n",
offset, size);
return;
}
if (size == 1) {
sc->memregs[offset] = value;
} else if (size == 2) {
*(uint16_t *)&sc->memregs[offset] = value;
} else if (size == 4) {
*(uint32_t *)&sc->memregs[offset] = value;
} else if (size == 8) {
*(uint64_t *)&sc->memregs[offset] = value;
} else {
printf("diow: memw unknown size %d\n", size);
}
/*
* magic interrupt ??
*/
}
/*
* Special magic value to generate an interrupt
*/
if (offset == 4 && size == 4 && pci_msi_enabled(pi))
pci_generate_msi(pi, value % pci_msi_msgnum(pi));
if (value == 0xabcdef) {
for (i = 0; i < pci_msi_msgnum(pi); i++)
pci_generate_msi(pi, i);
if (baridx > 1) {
printf("diow: unknown bar idx %d\n", baridx);
}
}
static uint32_t
pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size)
static uint64_t
pci_emul_dior(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size)
{
struct pci_emul_dsoftc *sc = pi->pi_arg;
uint32_t value;
if (offset + size > DREGSZ) {
printf("dior: too large, offset %d size %d\n", offset, size);
return (0);
if (baridx == 0) {
if (offset + size > DIOSZ) {
printf("dior: ior too large, offset %ld size %d\n",
offset, size);
return (0);
}
if (size == 1) {
value = sc->ioregs[offset];
} else if (size == 2) {
value = *(uint16_t *) &sc->ioregs[offset];
} else if (size == 4) {
value = *(uint32_t *) &sc->ioregs[offset];
} else {
printf("dior: ior unknown size %d\n", size);
}
}
if (size == 1) {
value = sc->regs[offset];
} else if (size == 2) {
value = *(uint16_t *) &sc->regs[offset];
} else {
value = *(uint32_t *) &sc->regs[offset];
if (baridx == 1) {
if (offset + size > DMEMSZ) {
printf("dior: memr too large, offset %ld size %d\n",
offset, size);
return (0);
}
if (size == 1) {
value = sc->memregs[offset];
} else if (size == 2) {
value = *(uint16_t *) &sc->memregs[offset];
} else if (size == 4) {
value = *(uint32_t *) &sc->memregs[offset];
} else if (size == 8) {
value = *(uint64_t *) &sc->memregs[offset];
} else {
printf("dior: ior unknown size %d\n", size);
}
}
if (baridx > 1) {
printf("dior: unknown bar idx %d\n", baridx);
return (0);
}
return (value);
@ -1186,8 +1305,8 @@ pci_emul_dior(struct pci_devinst *pi, int baridx, int offset, int size)
struct pci_devemu pci_dummy = {
.pe_emu = "dummy",
.pe_init = pci_emul_dinit,
.pe_iow = pci_emul_diow,
.pe_ior = pci_emul_dior
.pe_barwrite = pci_emul_diow,
.pe_barread = pci_emul_dior
};
PCI_EMUL_SET(pci_dummy);

View File

@ -48,7 +48,8 @@ struct pci_devemu {
char *pe_emu; /* Name of device emulation */
/* instance creation */
int (*pe_init)(struct vmctx *, struct pci_devinst *, char *opts);
int (*pe_init)(struct vmctx *, struct pci_devinst *,
char *opts);
/* config space read/write callbacks */
int (*pe_cfgwrite)(struct vmctx *ctx, int vcpu,
@ -58,11 +59,13 @@ struct pci_devemu {
struct pci_devinst *pi, int offset,
int bytes, uint32_t *retval);
/* I/O space read/write callbacks */
void (*pe_iow)(struct pci_devinst *pi, int baridx,
int offset, int size, uint32_t value);
uint32_t (*pe_ior)(struct pci_devinst *pi, int baridx,
int offset, int size);
/* BAR read/write callbacks */
void (*pe_barwrite)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value);
uint64_t (*pe_barread)(struct vmctx *ctx, int vcpu,
struct pci_devinst *pi, int baridx,
uint64_t offset, int size);
};
#define PCI_EMUL_SET(x) DATA_SET(pci_devemu_set, x);
@ -74,13 +77,10 @@ enum pcibar_type {
PCIBAR_MEMHI64
};
typedef int (*bar_write_func_t)(struct pci_devinst *pdi, int idx, uint64_t bar);
struct pcibar {
enum pcibar_type type; /* io or memory */
uint64_t size;
uint64_t addr;
bar_write_func_t handler;
};
#define PI_NAMESZ 40
@ -119,11 +119,9 @@ struct pci_devinst {
int table_bar;
int pba_bar;
size_t table_offset;
uintptr_t table_gpa;
size_t table_size;
int table_count;
size_t pba_offset;
struct memory_region *table_bar_region;
struct msix_table_entry table[MAX_MSIX_TABLE_SIZE];
} pi_msix;
@ -156,15 +154,19 @@ void msicap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
void msixcap_cfgwrite(struct pci_devinst *pi, int capoff, int offset,
int bytes, uint32_t val);
void pci_callback(void);
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx, uint64_t hostbase,
int pci_emul_alloc_bar(struct pci_devinst *pdi, int idx,
enum pcibar_type type, uint64_t size);
int pci_emul_alloc_pbar(struct pci_devinst *pdi, int idx,
uint64_t hostbase, enum pcibar_type type, uint64_t size);
int pci_emul_add_msicap(struct pci_devinst *pi, int msgnum);
int pci_is_legacy(struct pci_devinst *pi);
void pci_generate_msi(struct pci_devinst *pi, int msgnum);
void pci_generate_msix(struct pci_devinst *pi, int msgnum);
void pci_lintr_assert(struct pci_devinst *pi);
void pci_lintr_deassert(struct pci_devinst *pi);
int pci_lintr_request(struct pci_devinst *pi, int ivec);
int pci_msi_enabled(struct pci_devinst *pi);
int pci_msix_enabled(struct pci_devinst *pi);
int pci_msi_msgnum(struct pci_devinst *pi);
void pci_parse_name(char *opt);
void pci_parse_slot(char *opt, int legacy);

View File

@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <vmmapi.h>
#include "pci_emul.h"
#include "mem.h"
#include "instruction_emul.h"
#ifndef _PATH_DEVPCI
@ -218,15 +219,17 @@ cfginitmsi(struct passthru_softc *sc)
}
}
if (sc->psc_msix.capoff == 0)
return (-1);
pi->pi_msix.pba_bar = msixcap.pba_offset & MSIX_TABLE_BIR_MASK;
pi->pi_msix.pba_offset = msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK;
pi->pi_msix.table_bar = msixcap.table_offset & MSIX_TABLE_BIR_MASK;
pi->pi_msix.table_offset = msixcap.table_offset & MSIX_TABLE_OFFSET_MASK;
pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
if (sc->psc_msix.capoff != 0) {
pi->pi_msix.pba_bar =
msixcap.pba_offset & MSIX_TABLE_BIR_MASK;
pi->pi_msix.pba_offset =
msixcap.pba_offset & MSIX_TABLE_OFFSET_MASK;
pi->pi_msix.table_bar =
msixcap.table_offset & MSIX_TABLE_BIR_MASK;
pi->pi_msix.table_offset =
msixcap.table_offset & MSIX_TABLE_OFFSET_MASK;
pi->pi_msix.table_count = MSIX_TABLE_COUNT(msixcap.msgctrl);
}
#ifdef LEGACY_SUPPORT
/*
@ -252,106 +255,84 @@ cfginitmsi(struct passthru_softc *sc)
return (0);
}
static int
msix_table_read(struct vmctx *vm, int vcpu, uintptr_t addr,
int size, uint64_t *data, void *arg)
static uint64_t
msix_table_read(struct passthru_softc *sc, uint64_t offset, int size)
{
struct passthru_softc *sc;
struct pci_devinst *pi;
int index;
size_t offset, entry_offset;
struct msix_table_entry *entry;
uint8_t *src8;
uint16_t *src16;
uint32_t *src32;
uint64_t *src64;
struct msix_table_entry *entry;
uint64_t data;
size_t entry_offset;
int index;
sc = arg;
pi = sc->psc_pi;
offset = addr - pi->pi_msix.table_gpa;
entry_offset = addr % MSIX_TABLE_ENTRY_SIZE;
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
index = offset / MSIX_TABLE_ENTRY_SIZE;
entry = &pi->pi_msix.table[index];
switch(size) {
case 1:
src8 = (uint8_t*)((void*)entry + entry_offset);
*data = *src8;
src8 = (uint8_t *)((void *)entry + entry_offset);
data = *src8;
break;
case 2:
src16 = (uint16_t*)((void*)entry + entry_offset);
*data = *src16;
src16 = (uint16_t *)((void *)entry + entry_offset);
data = *src16;
break;
case 4:
src32 = (uint32_t*)((void*)entry + entry_offset);
*data = *src32;
src32 = (uint32_t *)((void *)entry + entry_offset);
data = *src32;
break;
case 8:
src64 = (uint64_t*)((void*)entry + entry_offset);
*data = *src64;
src64 = (uint64_t *)((void *)entry + entry_offset);
data = *src64;
break;
default:
return (-1);
}
return (0);
return (data);
}
static int
msix_table_write(struct vmctx *vm, int vcpu, uintptr_t addr,
int size, uint64_t data, void *arg)
static void
msix_table_write(struct vmctx *ctx, int vcpu, struct passthru_softc *sc,
uint64_t offset, int size, uint64_t data)
{
struct passthru_softc *sc;
struct pci_devinst *pi;
int error, index;
size_t offset, entry_offset;
uint32_t *dest;
struct msix_table_entry *entry;
uint32_t *dest;
size_t entry_offset;
uint32_t vector_control;
int error, index;
sc = arg;
pi = sc->psc_pi;
offset = addr - pi->pi_msix.table_gpa;
entry_offset = addr % MSIX_TABLE_ENTRY_SIZE;
entry_offset = offset % MSIX_TABLE_ENTRY_SIZE;
index = offset / MSIX_TABLE_ENTRY_SIZE;
entry = &pi->pi_msix.table[index];
/* Only 4 byte naturally-aligned writes are supported */
if (size == 4 && entry_offset % 4 == 0) {
vector_control = entry->vector_control;
dest = (uint32_t*)((void*)entry + entry_offset);
*dest = data;
/* If MSI-X hasn't been enabled, do nothing */
if (pi->pi_msix.enabled) {
/* If the entry is masked, don't set it up */
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
error = vm_setup_msix(vm, vcpu, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func,
index, entry->msg_data,
entry->vector_control,
entry->addr);
if (error)
return (-1);
}
assert(size == 4);
assert(entry_offset % 4 == 0);
vector_control = entry->vector_control;
dest = (uint32_t *)((void *)entry + entry_offset);
*dest = data;
/* If MSI-X hasn't been enabled, do nothing */
if (pi->pi_msix.enabled) {
/* If the entry is masked, don't set it up */
if ((entry->vector_control & PCIM_MSIX_VCTRL_MASK) == 0 ||
(vector_control & PCIM_MSIX_VCTRL_MASK) == 0) {
error = vm_setup_msix(ctx, vcpu, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev,
sc->psc_sel.pc_func,
index, entry->msg_data,
entry->vector_control,
entry->addr);
}
} else {
printf("Unsupported unaligned or non-4-byte write to MSI-X table\n");
return (-1);
}
return (0);
}
static int
msix_bar_handler(struct pci_devinst *pdi, int idx, uint64_t bar)
{
uintptr_t start;
start = (bar & PCIM_BAR_MEM_BASE) + pdi->pi_msix.table_offset;
move_memory_region(pdi->pi_msix.table_bar_region, start);
pdi->pi_msix.table_gpa = start;
return (0);
}
static int
@ -375,6 +356,7 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
printf("Unsupported MSI-X table and PBA in same page\n");
return (-1);
}
/*
* May need to split the BAR into 3 regions:
* Before the MSI-X table, the MSI-X table, and after it
@ -395,30 +377,9 @@ init_msix_table(struct vmctx *ctx, struct passthru_softc *sc, uint64_t base)
start = pi->pi_bar[idx].addr;
len = pi->pi_msix.table_offset;
}
return vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
start, len, base + table_size);
}
static int
cfginitmsix(struct passthru_softc *sc)
{
int table_bar;
struct pci_devinst *pi;
pi = sc->psc_pi;
table_bar = pi->pi_msix.table_bar;
pi->pi_msix.table_gpa = sc->psc_bar[table_bar].addr + pi->pi_msix.table_offset;
pi->pi_msix.table_bar_region = register_emulated_memory(pi->pi_msix.table_gpa,
pi->pi_msix.table_size,
msix_table_read,
msix_table_write, sc);
if (!pi->pi_msix.table_bar_region)
return (-1);
pi->pi_bar[table_bar].handler = msix_bar_handler;
return (0);
return (vm_map_pptdev_mmio(ctx, sc->psc_sel.pc_bus,
sc->psc_sel.pc_dev, sc->psc_sel.pc_func,
start, len, base + table_size));
}
static int
@ -464,8 +425,8 @@ cfginitbar(struct vmctx *ctx, struct passthru_softc *sc)
sc->psc_bar[i].addr = base;
/* Allocate the BAR in the guest I/O or MMIO space */
error = pci_emul_alloc_bar(pi, i, base, bartype,
bar.pbi_length);
error = pci_emul_alloc_pbar(pi, i, base, bartype,
bar.pbi_length);
if (error)
return (-1);
@ -515,9 +476,6 @@ cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func)
if (cfginitbar(ctx, sc) != 0)
goto done;
if (cfginitmsix(sc) != 0)
goto done;
error = 0; /* success */
done:
return (error);
@ -544,7 +502,8 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
goto done;
}
if (opts == NULL || sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
if (opts == NULL ||
sscanf(opts, "%d/%d/%d", &bus, &slot, &func) != 3)
goto done;
if (vm_assign_pptdev(ctx, bus, slot, func) != 0)
@ -557,7 +516,7 @@ passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
sc->psc_pi = pi;
/* initialize config space */
if (cfginit(ctx, pi, bus, slot, func) != 0)
if ((error = cfginit(ctx, pi, bus, slot, func)) != 0)
goto done;
error = 0; /* success */
@ -605,8 +564,8 @@ msixcap_access(struct passthru_softc *sc, int coff)
}
static int
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
int bytes, uint32_t *rv)
passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t *rv)
{
struct passthru_softc *sc;
@ -636,8 +595,8 @@ passthru_cfgread(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
}
static int
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
int bytes, uint32_t val)
passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int coff, int bytes, uint32_t val)
{
int error, msix_table_entries, i;
struct passthru_softc *sc;
@ -705,40 +664,54 @@ passthru_cfgwrite(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int coff,
}
static void
passthru_iow(struct pci_devinst *pi, int baridx, int offset, int size,
uint32_t value)
passthru_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size, uint64_t value)
{
struct passthru_softc *sc;
struct iodev_pio_req pio;
sc = pi->pi_arg;
bzero(&pio, sizeof(struct iodev_pio_req));
pio.access = IODEV_PIO_WRITE;
pio.port = sc->psc_bar[baridx].addr + offset;
pio.width = size;
pio.val = value;
(void)ioctl(iofd, IODEV_PIO, &pio);
if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) {
msix_table_write(ctx, vcpu, sc, offset, size, value);
} else {
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
bzero(&pio, sizeof(struct iodev_pio_req));
pio.access = IODEV_PIO_WRITE;
pio.port = sc->psc_bar[baridx].addr + offset;
pio.width = size;
pio.val = value;
(void)ioctl(iofd, IODEV_PIO, &pio);
}
}
static uint32_t
passthru_ior(struct pci_devinst *pi, int baridx, int offset, int size)
static uint64_t
passthru_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, int baridx,
uint64_t offset, int size)
{
struct passthru_softc *sc;
struct iodev_pio_req pio;
uint64_t val;
sc = pi->pi_arg;
bzero(&pio, sizeof(struct iodev_pio_req));
pio.access = IODEV_PIO_READ;
pio.port = sc->psc_bar[baridx].addr + offset;
pio.width = size;
pio.val = 0;
if (pi->pi_msix.enabled && pi->pi_msix.table_bar == baridx) {
val = msix_table_read(sc, offset, size);
} else {
assert(pi->pi_bar[baridx].type == PCIBAR_IO);
bzero(&pio, sizeof(struct iodev_pio_req));
pio.access = IODEV_PIO_READ;
pio.port = sc->psc_bar[baridx].addr + offset;
pio.width = size;
pio.val = 0;
(void)ioctl(iofd, IODEV_PIO, &pio);
(void)ioctl(iofd, IODEV_PIO, &pio);
return (pio.val);
val = pio.val;
}
return (val);
}
struct pci_devemu passthru = {
@ -746,7 +719,7 @@ struct pci_devemu passthru = {
.pe_init = passthru_init,
.pe_cfgwrite = passthru_cfgwrite,
.pe_cfgread = passthru_cfgread,
.pe_iow = passthru_iow,
.pe_ior = passthru_ior,
.pe_barwrite = passthru_write,
.pe_barread = passthru_read,
};
PCI_EMUL_SET(passthru);

View File

@ -320,8 +320,8 @@ pci_uart_drain(int fd, enum ev_type ev, void *arg)
}
static void
pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size,
uint32_t value)
pci_uart_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value)
{
struct pci_uart_softc *sc;
int fifosz;
@ -329,6 +329,7 @@ pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size,
sc = pi->pi_arg;
assert(baridx == 0);
assert(size == 1);
/* Open terminal */
@ -459,15 +460,17 @@ pci_uart_write(struct pci_devinst *pi, int baridx, int offset, int size,
pci_uart_toggle_intr(sc);
}
uint32_t
pci_uart_read(struct pci_devinst *pi, int baridx, int offset, int size)
uint64_t
pci_uart_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size)
{
struct pci_uart_softc *sc;
uint8_t iir, intr_reason;
uint32_t reg;
uint64_t reg;
sc = pi->pi_arg;
assert(baridx == 0);
assert(size == 1);
/* Open terminal */
@ -573,11 +576,11 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_SIMPLECOMM);
if (pci_is_legacy(pi)) {
pci_uart_legacy_res(&bar, &ivec);
pci_emul_alloc_pbar(pi, 0, bar, PCIBAR_IO, 8);
} else {
bar = 0;
ivec = -1;
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, 8);
}
pci_emul_alloc_bar(pi, 0, bar, PCIBAR_IO, 8);
pci_lintr_request(pi, ivec);
if (opts != NULL && !strcmp("stdio", opts) && !pci_uart_stdio) {
@ -591,9 +594,9 @@ pci_uart_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
}
struct pci_devemu pci_de_com = {
.pe_emu = "uart",
.pe_init = pci_uart_init,
.pe_iow = pci_uart_write,
.pe_ior = pci_uart_read,
.pe_emu = "uart",
.pe_init = pci_uart_init,
.pe_barwrite = pci_uart_write,
.pe_barread = pci_uart_read
};
PCI_EMUL_SET(pci_de_com);

View File

@ -382,20 +382,22 @@ pci_vtblk_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_STORAGE);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_BLOCK);
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTBLK_REGSZ);
pci_emul_add_msicap(pi, 1);
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTBLK_REGSZ);
return (0);
}
static void
pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size,
uint32_t value)
pci_vtblk_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value)
{
struct pci_vtblk_softc *sc = pi->pi_arg;
assert(baridx == 0);
if (offset + size > VTBLK_REGSZ) {
DPRINTF(("vtblk_write: 2big, offset %d size %d\n",
DPRINTF(("vtblk_write: 2big, offset %ld size %d\n",
offset, size));
return;
}
@ -426,24 +428,27 @@ pci_vtblk_write(struct pci_devinst *pi, int baridx, int offset, int size,
case VTCFG_R_QNUM:
case VTCFG_R_ISR:
case VTBLK_R_CFG ... VTBLK_R_CFG_END:
DPRINTF(("vtblk: write to readonly reg %d\n\r", offset));
DPRINTF(("vtblk: write to readonly reg %ld\n\r", offset));
break;
default:
DPRINTF(("vtblk: unknown i/o write offset %d\n\r", offset));
DPRINTF(("vtblk: unknown i/o write offset %ld\n\r", offset));
value = 0;
break;
}
}
uint32_t
pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
uint64_t
pci_vtblk_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size)
{
struct pci_vtblk_softc *sc = pi->pi_arg;
void *ptr;
uint32_t value;
assert(baridx == 0);
if (offset + size > VTBLK_REGSZ) {
DPRINTF(("vtblk_read: 2big, offset %d size %d\n",
DPRINTF(("vtblk_read: 2big, offset %ld size %d\n",
offset, size));
return (0);
}
@ -493,7 +498,7 @@ pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
}
break;
default:
DPRINTF(("vtblk: unknown i/o read offset %d\n\r", offset));
DPRINTF(("vtblk: unknown i/o read offset %ld\n\r", offset));
value = 0;
break;
}
@ -502,9 +507,9 @@ pci_vtblk_read(struct pci_devinst *pi, int baridx, int offset, int size)
}
struct pci_devemu pci_de_vblk = {
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_iow = pci_vtblk_write,
.pe_ior = pci_vtblk_read,
.pe_emu = "virtio-blk",
.pe_init = pci_vtblk_init,
.pe_barwrite = pci_vtblk_write,
.pe_barread = pci_vtblk_read
};
PCI_EMUL_SET(pci_de_vblk);

View File

@ -574,8 +574,8 @@ pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR);
pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK);
pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET);
pci_emul_alloc_bar(pi, 0, 0, PCIBAR_IO, VTNET_REGSZ);
pci_emul_add_msicap(pi, 1);
pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ);
return (0);
}
@ -590,14 +590,16 @@ static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = {
};
static void
pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
uint32_t value)
pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size, uint64_t value)
{
struct pci_vtnet_softc *sc = pi->pi_arg;
void *ptr;
assert(baridx == 0);
if (offset + size > VTNET_REGSZ) {
DPRINTF(("vtnet_write: 2big, offset %d size %d\n",
DPRINTF(("vtnet_write: 2big, offset %ld size %d\n",
offset, size));
return;
}
@ -652,10 +654,10 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
case VTCFG_R_ISR:
case VTNET_R_CFG6:
case VTNET_R_CFG7:
DPRINTF(("vtnet: write to readonly reg %d\n\r", offset));
DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset));
break;
default:
DPRINTF(("vtnet: unknown i/o write offset %d\n\r", offset));
DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset));
value = 0;
break;
}
@ -663,15 +665,18 @@ pci_vtnet_write(struct pci_devinst *pi, int baridx, int offset, int size,
pthread_mutex_unlock(&sc->vsc_mtx);
}
uint32_t
pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
uint64_t
pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi,
int baridx, uint64_t offset, int size)
{
struct pci_vtnet_softc *sc = pi->pi_arg;
void *ptr;
uint32_t value;
uint64_t value;
assert(baridx == 0);
if (offset + size > VTNET_REGSZ) {
DPRINTF(("vtnet_read: 2big, offset %d size %d\n",
DPRINTF(("vtnet_read: 2big, offset %ld size %d\n",
offset, size));
return (0);
}
@ -737,7 +742,7 @@ pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
value = 0; /* XXX link status in LSB */
break;
default:
DPRINTF(("vtnet: unknown i/o read offset %d\n\r", offset));
DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset));
value = 0;
break;
}
@ -748,9 +753,9 @@ pci_vtnet_read(struct pci_devinst *pi, int baridx, int offset, int size)
}
struct pci_devemu pci_de_vnet = {
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_iow = pci_vtnet_write,
.pe_ior = pci_vtnet_read,
.pe_emu = "virtio-net",
.pe_init = pci_vtnet_init,
.pe_barwrite = pci_vtnet_write,
.pe_barread = pci_vtnet_read
};
PCI_EMUL_SET(pci_de_vnet);