Restructure memory allocation in bhyve to support "devmem".

devmem is used to represent MMIO devices like the boot ROM or a VESA framebuffer
where doing a trap-and-emulate for every access is impractical. devmem is a
hybrid of system memory (sysmem) and emulated device models.

devmem is mapped in the guest address space via nested page tables similar
to sysmem. However the address range where devmem is mapped may be changed
by the guest at runtime (e.g. by reprogramming a PCI BAR). Also devmem is
usually mapped RO or RW as compared to RWX mappings for sysmem.

Each devmem segment is named (e.g. "bootrom") and this name is used to
create a device node for the devmem segment (e.g. /dev/vmm/testvm.bootrom).
The device node supports mmap(2) and this decouples the host mapping of
devmem from its mapping in the guest address space (which can change).

Reviewed by:	tychon
Discussed with:	grehan
Differential Revision:	https://reviews.freebsd.org/D2762
MFC after:	4 weeks
This commit is contained in:
Neel Natu 2015-06-18 06:00:17 +00:00
parent e2dd197458
commit 9b1aa8d622
25 changed files with 1418 additions and 432 deletions

View File

@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$");
#define MB (1024 * 1024UL)
#define GB (1024 * 1024 * 1024UL)
/*
* Size of the guard region before and after the virtual address space
* mapping the guest physical memory. This must be a multiple of the
* superpage size for performance reasons.
*/
#define VM_MMAP_GUARD_SIZE (4 * MB)
#define PROT_RW (PROT_READ | PROT_WRITE)
#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC)
struct vmctx {
int fd;
uint32_t lowmem_limit;
enum vm_mmap_style vms;
int memflags;
size_t lowmem;
char *lowmem_addr;
size_t highmem;
char *highmem_addr;
char *baseaddr;
char *name;
};
@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, size_t *ret_memsize)
return (error);
}
int
vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
int *wired)
{
int error;
struct vm_memory_segment seg;
bzero(&seg, sizeof(seg));
seg.gpa = gpa;
error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg);
*ret_len = seg.len;
if (wired != NULL)
*wired = seg.wired;
return (error);
}
uint32_t
vm_get_lowmem_limit(struct vmctx *ctx)
{
@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int flags)
ctx->memflags = flags;
}
static int
setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr)
int
vm_get_memflags(struct vmctx *ctx)
{
int error, mmap_flags;
struct vm_memory_segment seg;
return (ctx->memflags);
}
/*
* Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len).
*/
int
vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot)
{
struct vm_memmap memmap;
int error, flags;
memmap.gpa = gpa;
memmap.segid = segid;
memmap.segoff = off;
memmap.len = len;
memmap.prot = prot;
memmap.flags = 0;
if (ctx->memflags & VM_MEM_F_WIRED)
memmap.flags |= VM_MEMMAP_F_WIRED;
/*
* Create and optionally map 'len' bytes of memory at guest
* physical address 'gpa'
* If this mapping already exists then don't create it again. This
* is the common case for SYSMEM mappings created by bhyveload(8).
*/
bzero(&seg, sizeof(seg));
seg.gpa = gpa;
seg.len = len;
error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg);
if (error == 0 && addr != NULL) {
mmap_flags = MAP_SHARED;
if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
mmap_flags |= MAP_NOCORE;
*addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags,
ctx->fd, gpa);
error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags);
if (error == 0 && gpa == memmap.gpa) {
if (segid != memmap.segid || off != memmap.segoff ||
prot != memmap.prot || flags != memmap.flags) {
errno = EEXIST;
return (-1);
} else {
return (0);
}
}
error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap);
return (error);
}
int
vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
{
struct vm_memmap memmap;
int error;
bzero(&memmap, sizeof(struct vm_memmap));
memmap.gpa = *gpa;
error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap);
if (error == 0) {
*gpa = memmap.gpa;
*segid = memmap.segid;
*segoff = memmap.segoff;
*len = memmap.len;
*prot = memmap.prot;
*flags = memmap.flags;
}
return (error);
}
/*
* Return 0 if the segments are identical and non-zero otherwise.
*
* This is slightly complicated by the fact that only device memory segments
* are named.
*/
static int
cmpseg(size_t len, const char *str, size_t len2, const char *str2)
{
if (len == len2) {
if ((!str && !str2) || (str && str2 && !strcmp(str, str2)))
return (0);
}
return (-1);
}
static int
vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name)
{
struct vm_memseg memseg;
size_t n;
int error;
/*
* If the memory segment has already been created then just return.
* This is the usual case for the SYSMEM segment created by userspace
* loaders like bhyveload(8).
*/
error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name,
sizeof(memseg.name));
if (error)
return (error);
if (memseg.len != 0) {
if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) {
errno = EINVAL;
return (-1);
} else {
return (0);
}
}
bzero(&memseg, sizeof(struct vm_memseg));
memseg.segid = segid;
memseg.len = len;
if (name != NULL) {
n = strlcpy(memseg.name, name, sizeof(memseg.name));
if (n >= sizeof(memseg.name)) {
errno = ENAMETOOLONG;
return (-1);
}
}
error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg);
return (error);
}
int
vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf,
size_t bufsize)
{
struct vm_memseg memseg;
size_t n;
int error;
memseg.segid = segid;
error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg);
if (error == 0) {
*lenp = memseg.len;
n = strlcpy(namebuf, memseg.name, bufsize);
if (n >= bufsize) {
errno = ENAMETOOLONG;
error = -1;
}
}
return (error);
}
static int
setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base)
{
char *ptr;
int error, flags;
/* Map 'len' bytes starting at 'gpa' in the guest address space */
error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL);
if (error)
return (error);
flags = MAP_SHARED | MAP_FIXED;
if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
flags |= MAP_NOCORE;
/* mmap into the process address space on the host */
ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa);
if (ptr == MAP_FAILED)
return (-1);
return (0);
}
int
vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
{
char **addr;
int error;
size_t objsize, len;
vm_paddr_t gpa;
char *baseaddr, *ptr;
int error, flags;
/* XXX VM_MMAP_SPARSE not implemented yet */
assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL);
ctx->vms = vms;
assert(vms == VM_MMAP_ALL);
/*
* If 'memsize' cannot fit entirely in the 'lowmem' segment then
@ -234,25 +371,46 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
*/
if (memsize > ctx->lowmem_limit) {
ctx->lowmem = ctx->lowmem_limit;
ctx->highmem = memsize - ctx->lowmem;
ctx->highmem = memsize - ctx->lowmem_limit;
objsize = 4*GB + ctx->highmem;
} else {
ctx->lowmem = memsize;
ctx->highmem = 0;
objsize = ctx->lowmem;
}
error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL);
if (error)
return (error);
/*
* Stake out a contiguous region covering the guest physical memory
* and the adjoining guard regions.
*/
len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE;
flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0);
if (ptr == MAP_FAILED)
return (-1);
baseaddr = ptr + VM_MMAP_GUARD_SIZE;
if (ctx->highmem > 0) {
gpa = 4*GB;
len = ctx->highmem;
error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
if (ctx->lowmem > 0) {
addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL;
error = setup_memory_segment(ctx, 0, ctx->lowmem, addr);
gpa = 0;
len = ctx->lowmem;
error = setup_memory_segment(ctx, gpa, len, baseaddr);
if (error)
return (error);
}
if (ctx->highmem > 0) {
addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL;
error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr);
if (error)
return (error);
}
ctx->baseaddr = baseaddr;
return (0);
}
@ -260,20 +418,16 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms)
void *
vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len)
{
vm_paddr_t start, end, mapend;
/* XXX VM_MMAP_SPARSE not implemented yet */
assert(ctx->vms == VM_MMAP_ALL);
start = gaddr;
end = gaddr + len;
mapend = ctx->highmem ? 4*GB + ctx->highmem : ctx->lowmem;
if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem)
return ((void *)(ctx->lowmem_addr + gaddr));
if (gaddr >= 4*GB) {
gaddr -= 4*GB;
if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem)
return ((void *)(ctx->highmem_addr + gaddr));
}
return (NULL);
if (start <= end && end <= mapend)
return (ctx->baseaddr + start);
else
return (NULL);
}
size_t
@ -290,6 +444,56 @@ vm_get_highmem_size(struct vmctx *ctx)
return (ctx->highmem);
}
void *
vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len)
{
char pathname[MAXPATHLEN];
size_t len2;
char *base, *ptr;
int fd, error, flags;
fd = -1;
ptr = MAP_FAILED;
if (name == NULL || strlen(name) == 0) {
errno = EINVAL;
goto done;
}
error = vm_alloc_memseg(ctx, segid, len, name);
if (error)
goto done;
strlcpy(pathname, "/dev/vmm/", sizeof(pathname));
strlcat(pathname, ctx->name, sizeof(pathname));
strlcat(pathname, ".", sizeof(pathname));
strlcat(pathname, name, sizeof(pathname));
fd = open(pathname, O_RDWR);
if (fd < 0)
goto done;
/*
* Stake out a contiguous region covering the device memory and the
* adjoining guard regions.
*/
len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE;
flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER;
base = mmap(NULL, len2, PROT_NONE, flags, -1, 0);
if (base == MAP_FAILED)
goto done;
flags = MAP_SHARED | MAP_FIXED;
if ((ctx->memflags & VM_MEM_F_INCORE) == 0)
flags |= MAP_NOCORE;
/* mmap the devmem region in the host address space */
ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0);
done:
if (fd >= 0)
close(fd);
return (ptr);
}
int
vm_set_desc(struct vmctx *ctx, int vcpu, int reg,
uint64_t base, uint32_t limit, uint32_t access)

View File

@ -36,7 +36,7 @@
* API version for out-of-tree consumers like grub-bhyve for making compile
* time decisions.
*/
#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
#define VMMAPI_VERSION 0102 /* 2 digit major followed by 2 digit minor */
struct iovec;
struct vmctx;
@ -52,14 +52,59 @@ enum vm_mmap_style {
VM_MMAP_SPARSE, /* mappings created on-demand */
};
/*
* 'flags' value passed to 'vm_set_memflags()'.
*/
#define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */
#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */
/*
* Identifiers for memory segments:
* - vm_setup_memory() uses VM_SYSMEM for the system memory segment.
* - the remaining identifiers can be used to create devmem segments.
*/
enum {
VM_SYSMEM,
VM_BOOTROM,
VM_FRAMEBUFFER,
};
/*
* Get the length and name of the memory segment identified by 'segid'.
* Note that system memory segments are identified with a nul name.
*
* Returns 0 on success and non-zero otherwise.
*/
int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name,
size_t namesiz);
/*
* Iterate over the guest address space. This function finds an address range
* that starts at an address >= *gpa.
*
* Returns 0 if the next address range was found and non-zero otherwise.
*/
int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
/*
* Create a device memory segment identified by 'segid'.
*
* Returns a pointer to the memory segment on success and MAP_FAILED otherwise.
*/
void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name,
size_t len);
/*
* Map the memory segment identified by 'segid' into the guest address space
* at [gpa,gpa+len) with protection 'prot'.
*/
int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid,
vm_ooffset_t segoff, size_t len, int prot);
int vm_create(const char *name);
struct vmctx *vm_open(const char *name);
void vm_destroy(struct vmctx *ctx);
int vm_parse_memsize(const char *optarg, size_t *memsize);
int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len,
int *wired);
int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s);
void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len);
int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num);
@ -68,6 +113,7 @@ int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging,
uint32_t vm_get_lowmem_limit(struct vmctx *ctx);
void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit);
void vm_set_memflags(struct vmctx *ctx, int flags);
int vm_get_memflags(struct vmctx *ctx);
size_t vm_get_lowmem_size(struct vmctx *ctx);
size_t vm_get_highmem_size(struct vmctx *ctx);
int vm_set_desc(struct vmctx *ctx, int vcpu, int reg,

View File

@ -87,15 +87,15 @@ console=${DEFAULT_CONSOLE}
cpus=${DEFAULT_CPUS}
tap_total=0
disk_total=0
apic_opt=""
gdbport=0
loader_opt=""
bhyverun_opt="-H -A -P"
pass_total=0
while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do
case $c in
a)
apic_opt="-a"
bhyverun_opt="${bhyverun_opt} -a"
;;
c)
cpus=${OPTARG}
@ -163,6 +163,12 @@ if [ -n "${host_base}" ]; then
loader_opt="${loader_opt} -h ${host_base}"
fi
# If PCI passthru devices are configured then guest memory must be wired
if [ ${pass_total} -gt 0 ]; then
loader_opt="${loader_opt} -S"
bhyverun_opt="${bhyverun_opt} -S"
fi
make_and_check_diskdev()
{
local virtio_diskdev="$1"
@ -263,7 +269,7 @@ while [ 1 ]; do
i=$(($i + 1))
done
${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P \
${FBSDRUN} -c ${cpus} -m ${memsize} ${bhyverun_opt} \
-g ${gdbport} \
-s 0:0,hostbridge \
-s 1:0,lpc \

View File

@ -108,7 +108,6 @@ enum x2apic_state {
struct vm;
struct vm_exception;
struct vm_memory_segment;
struct seg_desc;
struct vm_exit;
struct vm_run;
@ -175,17 +174,33 @@ int vm_create(const char *name, struct vm **retvm);
void vm_destroy(struct vm *vm);
int vm_reinit(struct vm *vm);
const char *vm_name(struct vm *vm);
int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len);
/*
* APIs that modify the guest memory map require all vcpus to be frozen.
*/
int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off,
size_t len, int prot, int flags);
int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem);
void vm_free_memseg(struct vm *vm, int ident);
int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa);
int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len);
void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot,
void **cookie);
int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
/*
* APIs that inspect the guest memory map require only a *single* vcpu to
* be frozen. This acts like a read lock on the guest memory map since any
* modification requires *all* vcpus to be frozen.
*/
int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags);
int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
struct vm_object **objptr);
void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len,
int prot, void **cookie);
void vm_gpa_release(void *cookie);
int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
struct vm_memory_segment *seg);
int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
vm_offset_t *offset, struct vm_object **object);
boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa);
bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa);
int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval);
int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val);
int vm_get_seg_desc(struct vm *vm, int vcpu, int reg,
@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcpu)
void *vcpu_stats(struct vm *vm, int vcpu);
void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr);
struct vmspace *vm_get_vmspace(struct vm *vm);
int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func);
int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
struct vpmtmr *vm_pmtmr(struct vm *vm);

View File

@ -34,10 +34,22 @@ void vmmdev_init(void);
int vmmdev_cleanup(void);
#endif
struct vm_memory_segment {
vm_paddr_t gpa; /* in */
struct vm_memmap {
vm_paddr_t gpa;
int segid; /* memory segment */
vm_ooffset_t segoff; /* offset into memory segment */
size_t len; /* mmap length */
int prot; /* RWX */
int flags;
};
#define VM_MEMMAP_F_WIRED 0x01
#define VM_MEMMAP_F_IOMMU 0x02
#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL)
struct vm_memseg {
int segid;
size_t len;
int wired;
char name[SPECNAMELEN + 1];
};
struct vm_register {
@ -214,10 +226,14 @@ enum {
IOCNUM_REINIT = 5,
/* memory apis */
IOCNUM_MAP_MEMORY = 10,
IOCNUM_GET_MEMORY_SEG = 11,
IOCNUM_MAP_MEMORY = 10, /* deprecated */
IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */
IOCNUM_GET_GPA_PMAP = 12,
IOCNUM_GLA2GPA = 13,
IOCNUM_ALLOC_MEMSEG = 14,
IOCNUM_GET_MEMSEG = 15,
IOCNUM_MMAP_MEMSEG = 16,
IOCNUM_MMAP_GETNEXT = 17,
/* register/state accessors */
IOCNUM_SET_REGISTER = 20,
@ -278,10 +294,14 @@ enum {
_IOW('v', IOCNUM_SUSPEND, struct vm_suspend)
#define VM_REINIT \
_IO('v', IOCNUM_REINIT)
#define VM_MAP_MEMORY \
_IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment)
#define VM_GET_MEMORY_SEG \
_IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment)
#define VM_ALLOC_MEMSEG \
_IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg)
#define VM_GET_MEMSEG \
_IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg)
#define VM_MMAP_MEMSEG \
_IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap)
#define VM_MMAP_GETNEXT \
_IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap)
#define VM_SET_REGISTER \
_IOW('v', IOCNUM_SET_REGISTER, struct vm_register)
#define VM_GET_REGISTER \

View File

@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with "
"reserved bits set: info1(%#lx) info2(%#lx)",
info1, info2);
} else if (vm_mem_allocated(svm_sc->vm, info2)) {
} else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->u.paging.gpa = info2;
vmexit->u.paging.fault_type = npf_fault_type(info1);

View File

@ -2425,7 +2425,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
* this must be an instruction that accesses MMIO space.
*/
gpa = vmcs_gpa();
if (vm_mem_allocated(vmx->vm, gpa) ||
if (vm_mem_allocated(vmx->vm, vcpu, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->inst_length = 0;

View File

@ -76,11 +76,17 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */
uint64_t msg_data;
};
struct pptseg {
vm_paddr_t gpa;
size_t len;
int wired;
};
struct pptdev {
device_t dev;
struct vm *vm; /* owner of this device */
TAILQ_ENTRY(pptdev) next;
struct vm_memory_segment mmio[MAX_MMIOSEGS];
struct pptseg mmio[MAX_MMIOSEGS];
struct {
int num_msgs; /* guest state */
@ -207,14 +213,14 @@ static void
ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt)
{
int i;
struct vm_memory_segment *seg;
struct pptseg *seg;
for (i = 0; i < MAX_MMIOSEGS; i++) {
seg = &ppt->mmio[i];
if (seg->len == 0)
continue;
(void)vm_unmap_mmio(vm, seg->gpa, seg->len);
bzero(seg, sizeof(struct vm_memory_segment));
bzero(seg, sizeof(struct pptseg));
}
}
@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gpa)
{
int i;
struct pptdev *ppt;
struct vm_memory_segment *seg;
struct pptseg *seg;
TAILQ_FOREACH(ppt, &pptdev_list, next) {
if (ppt->vm != vm)
@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func,
vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
{
int i, error;
struct vm_memory_segment *seg;
struct pptseg *seg;
struct pptdev *ppt;
ppt = ppt_find(bus, slot, func);

View File

@ -119,12 +119,21 @@ struct vcpu {
#define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED)
struct mem_seg {
size_t len;
bool sysmem;
struct vm_object *object;
};
#define VM_MAX_MEMSEGS 2
struct mem_map {
vm_paddr_t gpa;
size_t len;
boolean_t wired;
vm_object_t object;
vm_ooffset_t segoff;
int segid;
int prot;
int flags;
};
#define VM_MAX_MEMORY_SEGMENTS 2
#define VM_MAX_MEMMAPS 4
/*
* Initialization:
@ -150,8 +159,8 @@ struct vm {
void *rendezvous_arg; /* (x) rendezvous func/arg */
vm_rendezvous_func_t rendezvous_func;
struct mtx rendezvous_mtx; /* (o) rendezvous lock */
int num_mem_segs; /* (o) guest memory segments */
struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS];
struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */
struct vmspace *vmspace; /* (o) guest's address space */
char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */
struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */
@ -222,6 +231,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu);
SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0,
"Force use of I/O MMU even if no passthrough devices were found.");
static void vm_free_memmap(struct vm *vm, int ident);
static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr);
#ifdef KTR
@ -442,7 +453,6 @@ vm_create(const char *name, struct vm **retvm)
vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO);
strcpy(vm->name, name);
vm->num_mem_segs = 0;
vm->vmspace = vmspace;
mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF);
@ -452,19 +462,10 @@ vm_create(const char *name, struct vm **retvm)
return (0);
}
static void
vm_free_mem_seg(struct vm *vm, struct mem_seg *seg)
{
if (seg->object != NULL)
vmm_mem_free(vm->vmspace, seg->gpa, seg->len);
bzero(seg, sizeof(*seg));
}
static void
vm_cleanup(struct vm *vm, bool destroy)
{
struct mem_map *mm;
int i;
ppt_unassign_all(vm);
@ -487,11 +488,23 @@ vm_cleanup(struct vm *vm, bool destroy)
VMCLEANUP(vm->cookie);
if (destroy) {
for (i = 0; i < vm->num_mem_segs; i++)
vm_free_mem_seg(vm, &vm->mem_segs[i]);
/*
* System memory is removed from the guest address space only when
* the VM is destroyed. This is because the mapping remains the same
* across VM reset.
*
* Device memory can be relocated by the guest (e.g. using PCI BARs)
* so those mappings are removed on a VM reset.
*/
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (destroy || !sysmem_mapping(vm, mm))
vm_free_memmap(vm, i);
}
vm->num_mem_segs = 0;
if (destroy) {
for (i = 0; i < VM_MAX_MEMSEGS; i++)
vm_free_memseg(vm, i);
VMSPACE_FREE(vm->vmspace);
vm->vmspace = NULL;
@ -549,146 +562,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
return (0);
}
boolean_t
vm_mem_allocated(struct vm *vm, vm_paddr_t gpa)
/*
* Return 'true' if 'gpa' is allocated in the guest address space.
*
* This function is called in the context of a running vcpu which acts as
* an implicit lock on 'vm->mem_maps[]'.
*/
bool
vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
{
struct mem_map *mm;
int i;
vm_paddr_t gpabase, gpalimit;
for (i = 0; i < vm->num_mem_segs; i++) {
gpabase = vm->mem_segs[i].gpa;
gpalimit = gpabase + vm->mem_segs[i].len;
if (gpa >= gpabase && gpa < gpalimit)
return (TRUE); /* 'gpa' is regular memory */
#ifdef INVARIANTS
int hostcpu, state;
state = vcpu_get_state(vm, vcpuid, &hostcpu);
KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
#endif
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
return (true); /* 'gpa' is sysmem or devmem */
}
if (ppt_is_mmio(vm, gpa))
return (TRUE); /* 'gpa' is pci passthru mmio */
return (true); /* 'gpa' is pci passthru mmio */
return (FALSE);
return (false);
}
int
vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len)
vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
{
int available, allocated;
struct mem_seg *seg;
vm_object_t object;
vm_paddr_t g;
vm_object_t obj;
if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0)
if (ident < 0 || ident >= VM_MAX_MEMSEGS)
return (EINVAL);
available = allocated = 0;
g = gpa;
while (g < gpa + len) {
if (vm_mem_allocated(vm, g))
allocated++;
else
available++;
g += PAGE_SIZE;
if (len == 0 || (len & PAGE_MASK))
return (EINVAL);
seg = &vm->mem_segs[ident];
if (seg->object != NULL) {
if (seg->len == len && seg->sysmem == sysmem)
return (EEXIST);
else
return (EINVAL);
}
/*
* If there are some allocated and some available pages in the address
* range then it is an error.
*/
if (allocated && available)
return (EINVAL);
/*
* If the entire address range being requested has already been
* allocated then there isn't anything more to do.
*/
if (allocated && available == 0)
return (0);
if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS)
return (E2BIG);
seg = &vm->mem_segs[vm->num_mem_segs];
if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL)
obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
if (obj == NULL)
return (ENOMEM);
seg->gpa = gpa;
seg->len = len;
seg->object = object;
seg->wired = FALSE;
vm->num_mem_segs++;
seg->object = obj;
seg->sysmem = sysmem;
return (0);
}
static vm_paddr_t
vm_maxmem(struct vm *vm)
int
vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
vm_object_t *objptr)
{
int i;
vm_paddr_t gpa, maxmem;
struct mem_seg *seg;
maxmem = 0;
for (i = 0; i < vm->num_mem_segs; i++) {
gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len;
if (gpa > maxmem)
maxmem = gpa;
if (ident < 0 || ident >= VM_MAX_MEMSEGS)
return (EINVAL);
seg = &vm->mem_segs[ident];
if (len)
*len = seg->len;
if (sysmem)
*sysmem = seg->sysmem;
if (objptr)
*objptr = seg->object;
return (0);
}
void
vm_free_memseg(struct vm *vm, int ident)
{
struct mem_seg *seg;
KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
("%s: invalid memseg ident %d", __func__, ident));
seg = &vm->mem_segs[ident];
if (seg->object != NULL) {
vm_object_deallocate(seg->object);
bzero(seg, sizeof(struct mem_seg));
}
}
int
vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
size_t len, int prot, int flags)
{
struct mem_seg *seg;
struct mem_map *m, *map;
vm_ooffset_t last;
int i, error;
if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
return (EINVAL);
if (flags & ~VM_MEMMAP_F_WIRED)
return (EINVAL);
if (segid < 0 || segid >= VM_MAX_MEMSEGS)
return (EINVAL);
seg = &vm->mem_segs[segid];
if (seg->object == NULL)
return (EINVAL);
last = first + len;
if (first < 0 || first >= last || last > seg->len)
return (EINVAL);
if ((gpa | first | last) & PAGE_MASK)
return (EINVAL);
map = NULL;
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
m = &vm->mem_maps[i];
if (m->len == 0) {
map = m;
break;
}
}
if (map == NULL)
return (ENOSPC);
error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
len, 0, VMFS_NO_SPACE, prot, prot, 0);
if (error != KERN_SUCCESS)
return (EFAULT);
vm_object_reference(seg->object);
if (flags & VM_MEMMAP_F_WIRED) {
error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
if (error != KERN_SUCCESS) {
vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
return (EFAULT);
}
}
map->gpa = gpa;
map->len = len;
map->segoff = first;
map->segid = segid;
map->prot = prot;
map->flags = flags;
return (0);
}
int
vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
{
struct mem_map *mm, *mmnext;
int i;
mmnext = NULL;
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (mm->len == 0 || mm->gpa < *gpa)
continue;
if (mmnext == NULL || mm->gpa < mmnext->gpa)
mmnext = mm;
}
if (mmnext != NULL) {
*gpa = mmnext->gpa;
if (segid)
*segid = mmnext->segid;
if (segoff)
*segoff = mmnext->segoff;
if (len)
*len = mmnext->len;
if (prot)
*prot = mmnext->prot;
if (flags)
*flags = mmnext->flags;
return (0);
} else {
return (ENOENT);
}
return (maxmem);
}
static void
vm_gpa_unwire(struct vm *vm)
vm_free_memmap(struct vm *vm, int ident)
{
int i, rv;
struct mem_seg *seg;
struct mem_map *mm;
int error;
for (i = 0; i < vm->num_mem_segs; i++) {
seg = &vm->mem_segs[i];
if (!seg->wired)
continue;
rv = vm_map_unwire(&vm->vmspace->vm_map,
seg->gpa, seg->gpa + seg->len,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment "
"%#lx/%ld could not be unwired: %d",
vm_name(vm), seg->gpa, seg->len, rv));
seg->wired = FALSE;
mm = &vm->mem_maps[ident];
if (mm->len) {
error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
mm->gpa + mm->len);
KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
__func__, error));
bzero(mm, sizeof(struct mem_map));
}
}
static int
vm_gpa_wire(struct vm *vm)
static __inline bool
sysmem_mapping(struct vm *vm, struct mem_map *mm)
{
int i, rv;
struct mem_seg *seg;
for (i = 0; i < vm->num_mem_segs; i++) {
seg = &vm->mem_segs[i];
if (seg->wired)
continue;
if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
return (true);
else
return (false);
}
/* XXX rlimits? */
rv = vm_map_wire(&vm->vmspace->vm_map,
seg->gpa, seg->gpa + seg->len,
VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
if (rv != KERN_SUCCESS)
break;
static vm_paddr_t
sysmem_maxaddr(struct vm *vm)
{
struct mem_map *mm;
vm_paddr_t maxaddr;
int i;
seg->wired = TRUE;
maxaddr = 0;
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (sysmem_mapping(vm, mm)) {
if (maxaddr < mm->gpa + mm->len)
maxaddr = mm->gpa + mm->len;
}
}
if (i < vm->num_mem_segs) {
/*
* Undo the wiring before returning an error.
*/
vm_gpa_unwire(vm);
return (EAGAIN);
}
return (0);
return (maxaddr);
}
static void
@ -696,20 +806,36 @@ vm_iommu_modify(struct vm *vm, boolean_t map)
{
int i, sz;
vm_paddr_t gpa, hpa;
struct mem_seg *seg;
struct mem_map *mm;
void *vp, *cookie, *host_domain;
sz = PAGE_SIZE;
host_domain = iommu_host_domain();
for (i = 0; i < vm->num_mem_segs; i++) {
seg = &vm->mem_segs[i];
KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired",
vm_name(vm), seg->gpa, seg->len));
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (!sysmem_mapping(vm, mm))
continue;
gpa = seg->gpa;
while (gpa < seg->gpa + seg->len) {
vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE,
if (map) {
KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
("iommu map found invalid memmap %#lx/%#lx/%#x",
mm->gpa, mm->len, mm->flags));
if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
continue;
mm->flags |= VM_MEMMAP_F_IOMMU;
} else {
if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
continue;
mm->flags &= ~VM_MEMMAP_F_IOMMU;
KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
("iommu unmap found invalid memmap %#lx/%#lx/%#x",
mm->gpa, mm->len, mm->flags));
}
gpa = mm->gpa;
while (gpa < mm->gpa + mm->len) {
vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
&cookie);
KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx",
vm_name(vm), gpa));
@ -751,10 +877,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func)
if (error)
return (error);
if (ppt_assigned_devices(vm) == 0) {
if (ppt_assigned_devices(vm) == 0)
vm_iommu_unmap(vm);
vm_gpa_unwire(vm);
}
return (0);
}
@ -764,23 +889,12 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
int error;
vm_paddr_t maxaddr;
/*
* Virtual machines with pci passthru devices get special treatment:
* - the guest physical memory is wired
* - the iommu is programmed to do the 'gpa' to 'hpa' translation
*
* We need to do this before the first pci passthru device is attached.
*/
/* Set up the IOMMU to do the 'gpa' to 'hpa' translation */
if (ppt_assigned_devices(vm) == 0) {
KASSERT(vm->iommu == NULL,
("vm_assign_pptdev: iommu must be NULL"));
maxaddr = vm_maxmem(vm);
maxaddr = sysmem_maxaddr(vm);
vm->iommu = iommu_create_domain(maxaddr);
error = vm_gpa_wire(vm);
if (error)
return (error);
vm_iommu_map(vm);
}
@ -789,18 +903,43 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func)
}
void *
vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot,
vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
void **cookie)
{
int count, pageoff;
int i, count, pageoff;
struct mem_map *mm;
vm_page_t m;
#ifdef INVARIANTS
/*
* All vcpus are frozen by ioctls that modify the memory map
* (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
* guaranteed if at least one vcpu is in the VCPU_FROZEN state.
*/
int state;
KASSERT(vcpuid >= -1 || vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d",
__func__, vcpuid));
for (i = 0; i < VM_MAXCPU; i++) {
if (vcpuid != -1 && vcpuid != i)
continue;
state = vcpu_get_state(vm, i, NULL);
KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
__func__, state));
}
#endif
pageoff = gpa & PAGE_MASK;
if (len > PAGE_SIZE - pageoff)
panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
count = 0;
for (i = 0; i < VM_MAX_MEMMAPS; i++) {
mm = &vm->mem_maps[i];
if (sysmem_mapping(vm, mm) && gpa >= mm->gpa &&
gpa < mm->gpa + mm->len) {
count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
break;
}
}
if (count == 1) {
*cookie = m;
@ -821,50 +960,6 @@ vm_gpa_release(void *cookie)
vm_page_unlock(m);
}
int
vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase,
struct vm_memory_segment *seg)
{
int i;
for (i = 0; i < vm->num_mem_segs; i++) {
if (gpabase == vm->mem_segs[i].gpa) {
seg->gpa = vm->mem_segs[i].gpa;
seg->len = vm->mem_segs[i].len;
seg->wired = vm->mem_segs[i].wired;
return (0);
}
}
return (-1);
}
int
vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len,
vm_offset_t *offset, struct vm_object **object)
{
int i;
size_t seg_len;
vm_paddr_t seg_gpa;
vm_object_t seg_obj;
for (i = 0; i < vm->num_mem_segs; i++) {
if ((seg_obj = vm->mem_segs[i].object) == NULL)
continue;
seg_gpa = vm->mem_segs[i].gpa;
seg_len = vm->mem_segs[i].len;
if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) {
*offset = gpa - seg_gpa;
*object = seg_obj;
vm_object_reference(seg_obj);
return (0);
}
}
return (EINVAL);
}
int
vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
{
@ -2423,8 +2518,8 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
}
for (idx = 0; idx < nused; idx++) {
hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len,
prot, &cookie);
hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
copyinfo[idx].len, prot, &cookie);
if (hva == NULL)
break;
copyinfo[idx].hva = hva;

View File

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <vm/pmap.h>
#include <vm/vm_map.h>
#include <vm/vm_object.h>
#include <machine/vmparam.h>
#include <machine/vmm.h>
@ -60,10 +61,19 @@ __FBSDID("$FreeBSD$");
#include "io/vhpet.h"
#include "io/vrtc.h"
struct devmem_softc {
int segid;
char *name;
struct cdev *cdev;
struct vmmdev_softc *sc;
SLIST_ENTRY(devmem_softc) link;
};
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
struct cdev *cdev;
SLIST_ENTRY(vmmdev_softc) link;
SLIST_HEAD(, devmem_softc) devmem;
int flags;
};
#define VSC_LINKED 0x01
@ -76,6 +86,63 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev");
SYSCTL_DECL(_hw_vmm);
static int devmem_create_cdev(const char *vmname, int id, char *devmem);
static void devmem_destroy(void *arg);
static int
vcpu_lock_one(struct vmmdev_softc *sc, int vcpu)
{
int error;
if (vcpu < 0 || vcpu >= VM_MAXCPU)
return (EINVAL);
error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
return (error);
}
static void
vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu)
{
enum vcpu_state state;
state = vcpu_get_state(sc->vm, vcpu, NULL);
if (state != VCPU_FROZEN) {
panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm),
vcpu, state);
}
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
}
static int
vcpu_lock_all(struct vmmdev_softc *sc)
{
int error, vcpu;
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
error = vcpu_lock_one(sc, vcpu);
if (error)
break;
}
if (error) {
while (--vcpu >= 0)
vcpu_unlock_one(sc, vcpu);
}
return (error);
}
static void
vcpu_unlock_all(struct vmmdev_softc *sc)
{
int vcpu;
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
vcpu_unlock_one(sc, vcpu);
}
static struct vmmdev_softc *
vmmdev_lookup(const char *name)
{
@ -108,12 +175,16 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
void *hpa, *cookie;
struct vmmdev_softc *sc;
static char zerobuf[PAGE_SIZE];
error = 0;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
error = ENXIO;
return (ENXIO);
/*
* Get a read lock on the guest memory map by freezing any vcpu.
*/
error = vcpu_lock_one(sc, VM_MAXCPU - 1);
if (error)
return (error);
prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ);
while (uio->uio_resid > 0 && error == 0) {
@ -129,10 +200,11 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
* Since this device does not support lseek(2), dd(1) will
* read(2) blocks of data to simulate the lseek(2).
*/
hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie);
hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie);
if (hpa == NULL) {
if (uio->uio_rw == UIO_READ)
error = uiomove(zerobuf, c, uio);
error = uiomove(__DECONST(void *, zero_region),
c, uio);
else
error = EFAULT;
} else {
@ -140,6 +212,70 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags)
vm_gpa_release(cookie);
}
}
vcpu_unlock_one(sc, VM_MAXCPU - 1);
return (error);
}
CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1);
static int
get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
{
struct devmem_softc *dsc;
int error;
bool sysmem;
error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL);
if (error || mseg->len == 0)
return (error);
if (!sysmem) {
SLIST_FOREACH(dsc, &sc->devmem, link) {
if (dsc->segid == mseg->segid)
break;
}
KASSERT(dsc != NULL, ("%s: devmem segment %d not found",
__func__, mseg->segid));
error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL);
} else {
bzero(mseg->name, sizeof(mseg->name));
}
return (error);
}
static int
alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg)
{
char *name;
int error;
bool sysmem;
error = 0;
name = NULL;
sysmem = true;
if (VM_MEMSEG_NAME(mseg)) {
sysmem = false;
name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK);
error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0);
if (error)
goto done;
}
error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem);
if (error)
goto done;
if (VM_MEMSEG_NAME(mseg)) {
error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name);
if (error)
vm_free_memseg(sc->vm, mseg->segid);
else
name = NULL; /* freed when 'cdev' is destroyed */
}
done:
free(name, M_VMMDEV);
return (error);
}
@ -150,7 +286,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
int error, vcpu, state_changed, size;
cpuset_t *cpuset;
struct vmmdev_softc *sc;
struct vm_memory_segment *seg;
struct vm_register *vmreg;
struct vm_seg_desc *vmsegdesc;
struct vm_run *vmrun;
@ -177,6 +312,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_intinfo *vmii;
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
struct vm_memmap *mm;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@ -211,43 +347,41 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
* Assumes that the first field of the ioctl data is the vcpu.
*/
vcpu = *(int *)data;
if (vcpu < 0 || vcpu >= VM_MAXCPU) {
error = EINVAL;
goto done;
}
error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
error = vcpu_lock_one(sc, vcpu);
if (error)
goto done;
state_changed = 1;
break;
case VM_MAP_PPTDEV_MMIO:
case VM_BIND_PPTDEV:
case VM_UNBIND_PPTDEV:
case VM_MAP_MEMORY:
case VM_ALLOC_MEMSEG:
case VM_MMAP_MEMSEG:
case VM_REINIT:
/*
* ioctls that operate on the entire virtual machine must
* prevent all vcpus from running.
*/
error = 0;
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) {
error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true);
if (error)
break;
}
if (error) {
while (--vcpu >= 0)
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
error = vcpu_lock_all(sc);
if (error)
goto done;
}
state_changed = 2;
break;
case VM_GET_MEMSEG:
case VM_MMAP_GETNEXT:
/*
* Lock a vcpu to make sure that the memory map cannot be
* modified while it is being inspected.
*/
vcpu = VM_MAXCPU - 1;
error = vcpu_lock_one(sc, vcpu);
if (error)
goto done;
state_changed = 1;
break;
default:
break;
}
@ -372,15 +506,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vatpic_set_irq_trigger(sc->vm,
isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger);
break;
case VM_MAP_MEMORY:
seg = (struct vm_memory_segment *)data;
error = vm_malloc(sc->vm, seg->gpa, seg->len);
case VM_MMAP_GETNEXT:
mm = (struct vm_memmap *)data;
error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid,
&mm->segoff, &mm->len, &mm->prot, &mm->flags);
break;
case VM_GET_MEMORY_SEG:
seg = (struct vm_memory_segment *)data;
seg->len = 0;
(void)vm_gpabase2memseg(sc->vm, seg->gpa, seg);
error = 0;
case VM_MMAP_MEMSEG:
mm = (struct vm_memmap *)data;
error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff,
mm->len, mm->prot, mm->flags);
break;
case VM_ALLOC_MEMSEG:
error = alloc_memseg(sc, (struct vm_memseg *)data);
break;
case VM_GET_MEMSEG:
error = get_memseg(sc, (struct vm_memseg *)data);
break;
case VM_GET_REGISTER:
vmreg = (struct vm_register *)data;
@ -505,12 +645,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
}
if (state_changed == 1) {
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
} else if (state_changed == 2) {
for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++)
vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false);
}
if (state_changed == 1)
vcpu_unlock_one(sc, vcpu);
else if (state_changed == 2)
vcpu_unlock_all(sc);
done:
/* Make sure that no handler returns a bogus value like ERESTART */
@ -519,26 +657,79 @@ done:
}
static int
vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset,
vm_size_t size, struct vm_object **object, int nprot)
vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize,
struct vm_object **objp, int nprot)
{
int error;
struct vmmdev_softc *sc;
vm_paddr_t gpa;
size_t len;
vm_ooffset_t segoff, first, last;
int error, found, segid;
bool sysmem;
first = *offset;
last = first + mapsize;
if ((nprot & PROT_EXEC) || first < 0 || first >= last)
return (EINVAL);
sc = vmmdev_lookup2(cdev);
if (sc != NULL && (nprot & PROT_EXEC) == 0)
error = vm_get_memobj(sc->vm, *offset, size, offset, object);
else
error = EINVAL;
if (sc == NULL) {
/* virtual machine is in the process of being created */
return (EINVAL);
}
/*
* Get a read lock on the guest memory map by freezing any vcpu.
*/
error = vcpu_lock_one(sc, VM_MAXCPU - 1);
if (error)
return (error);
gpa = 0;
found = 0;
while (!found) {
error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len,
NULL, NULL);
if (error)
break;
if (first >= gpa && last <= gpa + len)
found = 1;
else
gpa += len;
}
if (found) {
error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp);
KASSERT(error == 0 && *objp != NULL,
("%s: invalid memory segment %d", __func__, segid));
if (sysmem) {
vm_object_reference(*objp);
*offset = segoff + (first - gpa);
} else {
error = EINVAL;
}
}
vcpu_unlock_one(sc, VM_MAXCPU - 1);
return (error);
}
static void
vmmdev_destroy(void *arg)
{
struct vmmdev_softc *sc = arg;
struct devmem_softc *dsc;
int error;
error = vcpu_lock_all(sc);
KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error));
while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) {
KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__));
SLIST_REMOVE_HEAD(&sc->devmem, link);
free(dsc->name, M_VMMDEV);
free(dsc, M_VMMDEV);
}
if (sc->cdev != NULL)
destroy_dev(sc->cdev);
@ -560,6 +751,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
{
int error;
char buf[VM_MAX_NAMELEN];
struct devmem_softc *dsc;
struct vmmdev_softc *sc;
struct cdev *cdev;
@ -578,22 +770,30 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS)
/*
* The 'cdev' will be destroyed asynchronously when 'si_threadcount'
* goes down to 0 so we should not do it again in the callback.
*
* Setting 'sc->cdev' to NULL is also used to indicate that the VM
* is scheduled for destruction.
*/
cdev = sc->cdev;
sc->cdev = NULL;
mtx_unlock(&vmmdev_mtx);
/*
* Schedule the 'cdev' to be destroyed:
* Schedule all cdevs to be destroyed:
*
* - any new operations on this 'cdev' will return an error (ENXIO).
* - any new operations on the 'cdev' will return an error (ENXIO).
*
* - when the 'si_threadcount' dwindles down to zero the 'cdev' will
* be destroyed and the callback will be invoked in a taskqueue
* context.
*
* - the 'devmem' cdevs are destroyed before the virtual machine 'cdev'
*/
SLIST_FOREACH(dsc, &sc->devmem, link) {
KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed"));
destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc);
}
destroy_dev_sched_cb(cdev, vmmdev_destroy, sc);
return (0);
}
SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW,
@ -634,6 +834,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS)
sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO);
sc->vm = vm;
SLIST_INIT(&sc->devmem);
/*
* Lookup the name again just in case somebody sneaked in when we
@ -687,3 +888,96 @@ vmmdev_cleanup(void)
return (error);
}
static int
devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len,
struct vm_object **objp, int nprot)
{
struct devmem_softc *dsc;
vm_ooffset_t first, last;
size_t seglen;
int error;
bool sysmem;
dsc = cdev->si_drv1;
if (dsc == NULL) {
/* 'cdev' has been created but is not ready for use */
return (ENXIO);
}
first = *offset;
last = *offset + len;
if ((nprot & PROT_EXEC) || first < 0 || first >= last)
return (EINVAL);
error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1);
if (error)
return (error);
error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp);
KASSERT(error == 0 && !sysmem && *objp != NULL,
("%s: invalid devmem segment %d", __func__, dsc->segid));
vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1);
if (seglen >= last) {
vm_object_reference(*objp);
return (0);
} else {
return (EINVAL);
}
}
static struct cdevsw devmemsw = {
.d_name = "devmem",
.d_version = D_VERSION,
.d_mmap_single = devmem_mmap_single,
};
static int
devmem_create_cdev(const char *vmname, int segid, char *devname)
{
struct devmem_softc *dsc;
struct vmmdev_softc *sc;
struct cdev *cdev;
int error;
error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL,
UID_ROOT, GID_WHEEL, 0600, "vmm/%s.%s", vmname, devname);
if (error)
return (error);
dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO);
mtx_lock(&vmmdev_mtx);
sc = vmmdev_lookup(vmname);
KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname));
if (sc->cdev == NULL) {
/* virtual machine is being created or destroyed */
mtx_unlock(&vmmdev_mtx);
free(dsc, M_VMMDEV);
destroy_dev_sched_cb(cdev, NULL, 0);
return (ENODEV);
}
dsc->segid = segid;
dsc->name = devname;
dsc->cdev = cdev;
dsc->sc = sc;
SLIST_INSERT_HEAD(&sc->devmem, dsc, link);
mtx_unlock(&vmmdev_mtx);
/* The 'cdev' is ready for use after 'si_drv1' is initialized */
cdev->si_drv1 = dsc;
return (0);
}
static void
devmem_destroy(void *arg)
{
struct devmem_softc *dsc = arg;
KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__));
dsc->cdev = NULL;
dsc->sc = NULL;
}

View File

@ -1677,12 +1677,12 @@ ptp_release(void **cookie)
}
static void *
ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie)
{
void *ptr;
ptp_release(cookie);
ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie);
ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie);
return (ptr);
}
@ -1729,7 +1729,8 @@ restart:
/* Zero out the lower 12 bits. */
ptpphys &= ~0xfff;
ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE,
&cookie);
if (ptpbase32 == NULL)
goto error;
@ -1788,7 +1789,8 @@ restart:
/* Zero out the lower 5 bits and the upper 32 bits */
ptpphys &= 0xffffffe0UL;
ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie);
ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4,
&cookie);
if (ptpbase == NULL)
goto error;
@ -1811,7 +1813,7 @@ restart:
/* Zero out the lower 12 bits and the upper 12 bits */
ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12;
ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie);
ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie);
if (ptpbase == NULL)
goto error;

View File

@ -114,38 +114,6 @@ vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
}
vm_object_t
vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
{
int error;
vm_object_t obj;
if (gpa & PAGE_MASK)
panic("vmm_mem_alloc: invalid gpa %#lx", gpa);
if (len == 0 || (len & PAGE_MASK) != 0)
panic("vmm_mem_alloc: invalid allocation size %lu", len);
obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
if (obj != NULL) {
error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0,
VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0);
if (error != KERN_SUCCESS) {
vm_object_deallocate(obj);
obj = NULL;
}
}
return (obj);
}
void
vmm_mem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len)
{
vm_map_remove(&vmspace->vm_map, gpa, gpa + len);
}
vm_paddr_t
vmm_mem_maxaddr(void)
{

View File

@ -33,10 +33,8 @@ struct vmspace;
struct vm_object;
int vmm_mem_init(void);
struct vm_object *vmm_mem_alloc(struct vmspace *, vm_paddr_t gpa, size_t size);
struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len,
vm_paddr_t hpa);
void vmm_mem_free(struct vmspace *, vm_paddr_t gpa, size_t size);
void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size);
vm_paddr_t vmm_mem_maxaddr(void);

View File

@ -13,6 +13,7 @@ SRCS= \
acpi.c \
bhyverun.c \
block_if.c \
bootrom.c \
consport.c \
dbgport.c \
inout.c \

View File

@ -32,7 +32,7 @@
.Nd "run a guest operating system inside a virtual machine"
.Sh SYNOPSIS
.Nm
.Op Fl abehuwxACHPWY
.Op Fl abehuwxACHPSWY
.Op Fl c Ar numcpus
.Op Fl g Ar gdbport
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
@ -99,10 +99,12 @@ Yield the virtual CPU thread when a HLT instruction is detected.
If this option is not specified, virtual CPUs will use 100% of a host CPU.
.It Fl l Ar lpcdev Ns Op , Ns Ar conf
Allow devices behind the LPC PCI-ISA bridge to be configured.
The only supported devices are the TTY-class devices,
.Li com1
The only supported devices are the TTY-class devices
.Ar com1
and
.Li com2 .
.Ar com2
and the boot ROM device
.Ar bootrom .
.It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t
Guest physical memory size in bytes.
This must be the same size that was given to
@ -165,8 +167,8 @@ AHCI controller attached to a SATA hard-drive.
.It Li uart
PCI 16550 serial device.
.It Li lpc
LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge
emulation can only be configured on bus 0.
LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM.
The LPC bridge emulation can only be configured on bus 0.
.El
.It Op Ar conf
This optional parameter describes the backend for device emulations.
@ -224,6 +226,14 @@ the bhyve process.
Use the host TTY device for serial port I/O.
.El
.Pp
Boot ROM device:
.Bl -tag -width 10n
.It Pa romfile
Map
.Ar romfile
in the guest address space reserved for boot firmware.
.El
.Pp
Pass-through devices:
.Bl -tag -width 10n
.It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function
@ -235,11 +245,17 @@ and
numbers.
.El
.Pp
Guest memory must be wired using the
.Fl S
option when a pass-through device is configured.
.Pp
The host device must have been reserved at boot-time using the
.Va pptdev
loader variable as described in
.Xr vmm 4 .
.El
.It Fl S
Wire guest memory.
.It Fl u
RTC keeps UTC time.
.It Fl U Ar uuid

View File

@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <pthread.h>
#include <pthread_np.h>
#include <sysexits.h>
#include <stdbool.h>
#include <machine/vmm.h>
#include <vmmapi.h>
@ -122,7 +123,7 @@ usage(int code)
{
fprintf(stderr,
"Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
"Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
@ -137,6 +138,7 @@ usage(int code)
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
" -S: guest memory cannot be swapped\n"
" -u: RTC keeps UTC time\n"
" -U: uuid\n"
" -w: ignore unimplemented MSRs\n"
@ -700,26 +702,82 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu)
vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1);
}
static struct vmctx *
do_open(const char *vmname)
{
struct vmctx *ctx;
int error;
bool reinit, romboot;
reinit = romboot = false;
if (lpc_bootrom())
romboot = true;
error = vm_create(vmname);
if (error) {
if (errno == EEXIST) {
if (romboot) {
reinit = true;
} else {
/*
* The virtual machine has been setup by the
* userspace bootloader.
*/
}
} else {
perror("vm_create");
exit(1);
}
} else {
if (!romboot) {
/*
* If the virtual machine was just created then a
* bootrom must be configured to boot it.
*/
fprintf(stderr, "virtual machine cannot be booted\n");
exit(1);
}
}
ctx = vm_open(vmname);
if (ctx == NULL) {
perror("vm_open");
exit(1);
}
if (reinit) {
error = vm_reinit(ctx);
if (error) {
perror("vm_reinit");
exit(1);
}
}
return (ctx);
}
int
main(int argc, char *argv[])
{
int c, error, gdb_port, err, bvmcons;
int dump_guest_memory, max_vcpus, mptgen;
int max_vcpus, mptgen, memflags;
int rtc_localtime;
struct vmctx *ctx;
uint64_t rip;
size_t memsize;
char *optstr;
bvmcons = 0;
dump_guest_memory = 0;
progname = basename(argv[0]);
gdb_port = 0;
guest_ncpus = 1;
memsize = 256 * MB;
mptgen = 1;
rtc_localtime = 1;
memflags = 0;
while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:";
while ((c = getopt(argc, argv, optstr)) != -1) {
switch (c) {
case 'a':
x2apic_mode = 0;
@ -740,7 +798,7 @@ main(int argc, char *argv[])
guest_ncpus = atoi(optarg);
break;
case 'C':
dump_guest_memory = 1;
memflags |= VM_MEM_F_INCORE;
break;
case 'g':
gdb_port = atoi(optarg);
@ -756,6 +814,9 @@ main(int argc, char *argv[])
exit(1);
else
break;
case 'S':
memflags |= VM_MEM_F_WIRED;
break;
case 'm':
error = vm_parse_memsize(optarg, &memsize);
if (error)
@ -810,12 +871,7 @@ main(int argc, char *argv[])
usage(1);
vmname = argv[0];
ctx = vm_open(vmname);
if (ctx == NULL) {
perror("vm_open");
exit(1);
}
ctx = do_open(vmname);
if (guest_ncpus < 1) {
fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus);
@ -831,11 +887,10 @@ main(int argc, char *argv[])
fbsdrun_set_capabilities(ctx, BSP);
if (dump_guest_memory)
vm_set_memflags(ctx, VM_MEM_F_INCORE);
vm_set_memflags(ctx, memflags);
err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (err) {
fprintf(stderr, "Unable to setup memory (%d)\n", err);
fprintf(stderr, "Unable to setup memory (%d)\n", errno);
exit(1);
}
@ -865,6 +920,16 @@ main(int argc, char *argv[])
if (bvmcons)
init_bvmcons();
if (lpc_bootrom()) {
if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) {
fprintf(stderr, "ROM boot failed: unrestricted guest "
"capability not available\n");
exit(1);
}
error = vcpu_reset(ctx, BSP);
assert(error == 0);
}
error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip);
assert(error == 0);

111
usr.sbin/bhyve/bootrom.c Normal file
View File

@ -0,0 +1,111 @@
/*-
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/param.h>
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <machine/vmm.h>
#include <errno.h>
#include <fcntl.h>
#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdbool.h>
#include <vmmapi.h>
#include "bhyverun.h"
#include "bootrom.h"
#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */
int
bootrom_init(struct vmctx *ctx, const char *romfile)
{
struct stat sbuf;
vm_paddr_t gpa;
ssize_t rlen;
char *ptr;
int fd, i, rv, prot;
rv = -1;
fd = open(romfile, O_RDONLY);
if (fd < 0) {
fprintf(stderr, "Error opening bootrom \"%s\": %s\n",
romfile, strerror(errno));
goto done;
}
if (fstat(fd, &sbuf) < 0) {
fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n",
romfile, strerror(errno));
goto done;
}
/*
* Limit bootrom size to 16MB so it doesn't encroach into reserved
* MMIO space (e.g. APIC, HPET, MSI).
*/
if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) {
fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size);
goto done;
}
if (sbuf.st_size & PAGE_MASK) {
fprintf(stderr, "Bootrom size %ld is not a multiple of the "
"page size\n", sbuf.st_size);
goto done;
}
ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size);
if (ptr == MAP_FAILED)
goto done;
/* Map the bootrom into the guest address space */
prot = PROT_READ | PROT_EXEC;
gpa = (1ULL << 32) - sbuf.st_size;
if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0)
goto done;
/* Read 'romfile' into the guest address space */
for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) {
rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE);
if (rlen != PAGE_SIZE) {
fprintf(stderr, "Incomplete read of page %d of bootrom "
"file %s: %ld bytes\n", i, romfile, rlen);
goto done;
}
}
rv = 0;
done:
if (fd >= 0)
close(fd);
return (rv);
}

38
usr.sbin/bhyve/bootrom.h Normal file
View File

@ -0,0 +1,38 @@
/*-
* Copyright (c) 2015 Neel Natu <neel@freebsd.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _BOOTROM_H_
#define _BOOTROM_H_
#include <stdbool.h>
struct vmctx;
int bootrom_init(struct vmctx *ctx, const char *romfile);
#endif

View File

@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "acpi.h"
#include "bootrom.h"
#include "inout.h"
#include "pci_emul.h"
#include "pci_irq.h"
@ -62,6 +63,8 @@ SYSRES_IO(NMISC_PORT, 1);
static struct pci_devinst *lpc_bridge;
static const char *romfile;
#define LPC_UART_NUM 2
static struct lpc_uart_softc {
struct uart_softc *uart_softc;
@ -76,7 +79,7 @@ static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" };
/*
* LPC device configuration is in the following form:
* <lpc_device_name>[,<options>]
* For e.g. "com1,stdio"
* For e.g. "com1,stdio" or "bootrom,/var/romfile"
*/
int
lpc_device_parse(const char *opts)
@ -88,6 +91,11 @@ lpc_device_parse(const char *opts)
str = cpy = strdup(opts);
lpcdev = strsep(&str, ",");
if (lpcdev != NULL) {
if (strcasecmp(lpcdev, "bootrom") == 0) {
romfile = str;
error = 0;
goto done;
}
for (unit = 0; unit < LPC_UART_NUM; unit++) {
if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) {
lpc_uart_softc[unit].opts = str;
@ -104,6 +112,13 @@ done:
return (error);
}
const char *
lpc_bootrom(void)
{
return (romfile);
}
static void
lpc_uart_intr_assert(void *arg)
{
@ -156,13 +171,19 @@ lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
}
static int
lpc_init(void)
lpc_init(struct vmctx *ctx)
{
struct lpc_uart_softc *sc;
struct inout_port iop;
const char *name;
int unit, error;
if (romfile != NULL) {
error = bootrom_init(ctx, romfile);
if (error)
return (error);
}
/* COM1 and COM2 */
for (unit = 0; unit < LPC_UART_NUM; unit++) {
sc = &lpc_uart_softc[unit];
@ -379,7 +400,7 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
return (-1);
}
if (lpc_init() != 0)
if (lpc_init(ctx) != 0)
return (-1);
/* initialize config space */

View File

@ -68,5 +68,6 @@ struct lpc_sysres {
int lpc_device_parse(const char *opt);
char *lpc_pirq_name(int pin);
void lpc_pirq_routed(void);
const char *lpc_bootrom(void);
#endif

View File

@ -548,12 +548,18 @@ done:
static int
passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts)
{
int bus, slot, func, error;
int bus, slot, func, error, memflags;
struct passthru_softc *sc;
sc = NULL;
error = 1;
memflags = vm_get_memflags(ctx);
if (!(memflags & VM_MEM_F_WIRED)) {
fprintf(stderr, "passthru requires guest memory to be wired\n");
goto done;
}
if (pcifd < 0) {
pcifd = open(_PATH_DEVPCI, O_RDWR, 0);
if (pcifd < 0)

View File

@ -7,7 +7,7 @@ SRCS= bhyvectl.c
MAN=
LIBADD= vmmapi
LIBADD= vmmapi util
WARNS?= 3

View File

@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/errno.h>
#include <sys/mman.h>
#include <sys/cpuset.h>
#include <stdio.h>
#include <stdlib.h>
@ -47,10 +48,12 @@ __FBSDID("$FreeBSD$");
#include <getopt.h>
#include <time.h>
#include <assert.h>
#include <libutil.h>
#include <machine/cpufunc.h>
#include <machine/vmm.h>
#include <machine/specialreg.h>
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <vmmapi.h>
#include "amd/vmcb.h"
@ -236,7 +239,7 @@ static int get_stats, getcap, setcap, capval, get_gpa_pmap;
static int inject_nmi, assert_lapic_lvt;
static int force_reset, force_poweroff;
static const char *capname;
static int create, destroy, get_lowmem, get_highmem;
static int create, destroy, get_memmap, get_memseg;
static int get_intinfo;
static int get_active_cpus, get_suspended_cpus;
static uint64_t memsize;
@ -1320,8 +1323,8 @@ setup_options(bool cpu_intel)
{ "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 },
{ "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 },
{ "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 },
{ "get-lowmem", NO_ARG, &get_lowmem, 1 },
{ "get-highmem",NO_ARG, &get_highmem, 1 },
{ "get-memmap", NO_ARG, &get_memmap, 1 },
{ "get-memseg", NO_ARG, &get_memseg, 1 },
{ "get-efer", NO_ARG, &get_efer, 1 },
{ "get-cr0", NO_ARG, &get_cr0, 1 },
{ "get-cr3", NO_ARG, &get_cr3, 1 },
@ -1520,18 +1523,92 @@ mon_str(int idx)
return ("UNK");
}
static int
show_memmap(struct vmctx *ctx)
{
char name[SPECNAMELEN + 1], numbuf[8];
vm_ooffset_t segoff;
vm_paddr_t gpa;
size_t maplen, seglen;
int error, flags, prot, segid, delim;
printf("Address Length Segment Offset ");
printf("Prot Flags\n");
gpa = 0;
while (1) {
error = vm_mmap_getnext(ctx, &gpa, &segid, &segoff, &maplen,
&prot, &flags);
if (error)
return (errno == ENOENT ? 0 : error);
error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
if (error)
return (error);
printf("%-12lX", gpa);
humanize_number(numbuf, sizeof(numbuf), maplen, "B",
HN_AUTOSCALE, HN_NOSPACE);
printf("%-12s", numbuf);
printf("%-12s", name[0] ? name : "sysmem");
printf("%-12lX", segoff);
printf("%c%c%c ", prot & PROT_READ ? 'R' : '-',
prot & PROT_WRITE ? 'W' : '-',
prot & PROT_EXEC ? 'X' : '-');
delim = '\0';
if (flags & VM_MEMMAP_F_WIRED) {
printf("%cwired", delim);
delim = '/';
}
if (flags & VM_MEMMAP_F_IOMMU) {
printf("%ciommu", delim);
delim = '/';
}
printf("\n");
gpa += maplen;
}
}
static int
show_memseg(struct vmctx *ctx)
{
char name[SPECNAMELEN + 1], numbuf[8];
size_t seglen;
int error, segid;
printf("ID Length Name\n");
segid = 0;
while (1) {
error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name));
if (error)
return (errno == EINVAL ? 0 : error);
if (seglen) {
printf("%-4d", segid);
humanize_number(numbuf, sizeof(numbuf), seglen, "B",
HN_AUTOSCALE, HN_NOSPACE);
printf("%-12s", numbuf);
printf("%s", name[0] ? name : "sysmem");
printf("\n");
}
segid++;
}
}
int
main(int argc, char *argv[])
{
char *vmname;
int error, ch, vcpu, ptenum;
vm_paddr_t gpa, gpa_pmap;
size_t len;
vm_paddr_t gpa_pmap;
struct vm_exit vmexit;
uint64_t rax, cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat;
uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2];
struct vmctx *ctx;
int wired;
cpuset_t cpus;
bool cpu_intel;
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
@ -1703,7 +1780,7 @@ main(int argc, char *argv[])
}
if (!error && memsize)
error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE);
error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL);
if (!error && set_efer)
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer);
@ -1838,21 +1915,11 @@ main(int argc, char *argv[])
error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt);
}
if (!error && (get_lowmem || get_all)) {
gpa = 0;
error = vm_get_memory_seg(ctx, gpa, &len, &wired);
if (error == 0)
printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len,
wired ? " wired" : "");
}
if (!error && (get_memseg || get_all))
error = show_memseg(ctx);
if (!error && (get_highmem || get_all)) {
gpa = 4 * GB;
error = vm_get_memory_seg(ctx, gpa, &len, &wired);
if (error == 0)
printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len,
wired ? " wired" : "");
}
if (!error && (get_memmap || get_all))
error = show_memmap(ctx);
if (!error)
error = get_all_registers(ctx, vcpu);

View File

@ -35,6 +35,7 @@
guest inside a bhyve virtual machine
.Sh SYNOPSIS
.Nm
.Op Fl S
.Op Fl c Ar cons-dev
.Op Fl d Ar disk-path
.Op Fl e Ar name=value
@ -111,8 +112,10 @@ respectively.
The default value of
.Ar mem-size
is 256M.
.El
.It Fl S
Wire guest memory.
.Sh EXAMPLES
.El
To create a virtual machine named
.Ar freebsd-vm
that boots off the ISO image

View File

@ -629,7 +629,7 @@ usage(void)
{
fprintf(stderr,
"usage: %s [-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
"usage: %s [-S][-c <console-device>] [-d <disk-path>] [-e <name=value>]\n"
" %*s [-h <host-path>] [-m mem-size] <vmname>\n",
progname,
(int)strlen(progname), "");
@ -642,16 +642,17 @@ main(int argc, char** argv)
void *h;
void (*func)(struct loader_callbacks *, void *, int, int);
uint64_t mem_size;
int opt, error, need_reinit;
int opt, error, need_reinit, memflags;
progname = basename(argv[0]);
memflags = 0;
mem_size = 256 * MB;
consin_fd = STDIN_FILENO;
consout_fd = STDOUT_FILENO;
while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) {
while ((opt = getopt(argc, argv, "Sc:d:e:h:m:")) != -1) {
switch (opt) {
case 'c':
error = altcons_open(optarg);
@ -678,6 +679,9 @@ main(int argc, char** argv)
if (error != 0)
errx(EX_USAGE, "Invalid memsize '%s'", optarg);
break;
case 'S':
memflags |= VM_MEM_F_WIRED;
break;
case '?':
usage();
}
@ -715,6 +719,7 @@ main(int argc, char** argv)
}
}
vm_set_memflags(ctx, memflags);
error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL);
if (error) {
perror("vm_setup_memory");