From 9b1aa8d622e84cde39a5c4e1542410956e38548c Mon Sep 17 00:00:00 2001 From: Neel Natu Date: Thu, 18 Jun 2015 06:00:17 +0000 Subject: [PATCH] Restructure memory allocation in bhyve to support "devmem". devmem is used to represent MMIO devices like the boot ROM or a VESA framebuffer where doing a trap-and-emulate for every access is impractical. devmem is a hybrid of system memory (sysmem) and emulated device models. devmem is mapped in the guest address space via nested page tables similar to sysmem. However the address range where devmem is mapped may be changed by the guest at runtime (e.g. by reprogramming a PCI BAR). Also devmem is usually mapped RO or RW as compared to RWX mappings for sysmem. Each devmem segment is named (e.g. "bootrom") and this name is used to create a device node for the devmem segment (e.g. /dev/vmm/testvm.bootrom). The device node supports mmap(2) and this decouples the host mapping of devmem from its mapping in the guest address space (which can change). Reviewed by: tychon Discussed with: grehan Differential Revision: https://reviews.freebsd.org/D2762 MFC after: 4 weeks --- lib/libvmmapi/vmmapi.c | 326 ++++++++++++++---- lib/libvmmapi/vmmapi.h | 52 ++- share/examples/bhyve/vmrun.sh | 12 +- sys/amd64/include/vmm.h | 35 +- sys/amd64/include/vmm_dev.h | 38 ++- sys/amd64/vmm/amd/svm.c | 2 +- sys/amd64/vmm/intel/vmx.c | 2 +- sys/amd64/vmm/io/ppt.c | 16 +- sys/amd64/vmm/vmm.c | 487 ++++++++++++++++----------- sys/amd64/vmm/vmm_dev.c | 398 +++++++++++++++++++--- sys/amd64/vmm/vmm_instruction_emul.c | 12 +- sys/amd64/vmm/vmm_mem.c | 32 -- sys/amd64/vmm/vmm_mem.h | 2 - usr.sbin/bhyve/Makefile | 1 + usr.sbin/bhyve/bhyve.8 | 28 +- usr.sbin/bhyve/bhyverun.c | 93 ++++- usr.sbin/bhyve/bootrom.c | 111 ++++++ usr.sbin/bhyve/bootrom.h | 38 +++ usr.sbin/bhyve/pci_lpc.c | 27 +- usr.sbin/bhyve/pci_lpc.h | 1 + usr.sbin/bhyve/pci_passthru.c | 8 +- usr.sbin/bhyvectl/Makefile | 2 +- usr.sbin/bhyvectl/bhyvectl.c | 111 ++++-- usr.sbin/bhyveload/bhyveload.8 | 5 +- usr.sbin/bhyveload/bhyveload.c | 11 +- 25 files changed, 1418 insertions(+), 432 deletions(-) create mode 100644 usr.sbin/bhyve/bootrom.c create mode 100644 usr.sbin/bhyve/bootrom.h diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 1e6e627011b9..d5f387aec682 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -58,15 +58,23 @@ __FBSDID("$FreeBSD$"); #define MB (1024 * 1024UL) #define GB (1024 * 1024 * 1024UL) +/* + * Size of the guard region before and after the virtual address space + * mapping the guest physical memory. This must be a multiple of the + * superpage size for performance reasons. + */ +#define VM_MMAP_GUARD_SIZE (4 * MB) + +#define PROT_RW (PROT_READ | PROT_WRITE) +#define PROT_ALL (PROT_READ | PROT_WRITE | PROT_EXEC) + struct vmctx { int fd; uint32_t lowmem_limit; - enum vm_mmap_style vms; int memflags; size_t lowmem; - char *lowmem_addr; size_t highmem; - char *highmem_addr; + char *baseaddr; char *name; }; @@ -157,22 +165,6 @@ vm_parse_memsize(const char *optarg, size_t *ret_memsize) return (error); } -int -vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, - int *wired) -{ - int error; - struct vm_memory_segment seg; - - bzero(&seg, sizeof(seg)); - seg.gpa = gpa; - error = ioctl(ctx->fd, VM_GET_MEMORY_SEG, &seg); - *ret_len = seg.len; - if (wired != NULL) - *wired = seg.wired; - return (error); -} - uint32_t vm_get_lowmem_limit(struct vmctx *ctx) { @@ -194,39 +186,184 @@ vm_set_memflags(struct vmctx *ctx, int flags) ctx->memflags = flags; } -static int -setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char **addr) +int +vm_get_memflags(struct vmctx *ctx) { - int error, mmap_flags; - struct vm_memory_segment seg; + + return (ctx->memflags); +} + +/* + * Map segment 'segid' starting at 'off' into guest address range [gpa,gpa+len). + */ +int +vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot) +{ + struct vm_memmap memmap; + int error, flags; + + memmap.gpa = gpa; + memmap.segid = segid; + memmap.segoff = off; + memmap.len = len; + memmap.prot = prot; + memmap.flags = 0; + + if (ctx->memflags & VM_MEM_F_WIRED) + memmap.flags |= VM_MEMMAP_F_WIRED; /* - * Create and optionally map 'len' bytes of memory at guest - * physical address 'gpa' + * If this mapping already exists then don't create it again. This + * is the common case for SYSMEM mappings created by bhyveload(8). */ - bzero(&seg, sizeof(seg)); - seg.gpa = gpa; - seg.len = len; - error = ioctl(ctx->fd, VM_MAP_MEMORY, &seg); - if (error == 0 && addr != NULL) { - mmap_flags = MAP_SHARED; - if ((ctx->memflags & VM_MEM_F_INCORE) == 0) - mmap_flags |= MAP_NOCORE; - *addr = mmap(NULL, len, PROT_READ | PROT_WRITE, mmap_flags, - ctx->fd, gpa); + error = vm_mmap_getnext(ctx, &gpa, &segid, &off, &len, &prot, &flags); + if (error == 0 && gpa == memmap.gpa) { + if (segid != memmap.segid || off != memmap.segoff || + prot != memmap.prot || flags != memmap.flags) { + errno = EEXIST; + return (-1); + } else { + return (0); + } + } + + error = ioctl(ctx->fd, VM_MMAP_MEMSEG, &memmap); + return (error); +} + +int +vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct vm_memmap memmap; + int error; + + bzero(&memmap, sizeof(struct vm_memmap)); + memmap.gpa = *gpa; + error = ioctl(ctx->fd, VM_MMAP_GETNEXT, &memmap); + if (error == 0) { + *gpa = memmap.gpa; + *segid = memmap.segid; + *segoff = memmap.segoff; + *len = memmap.len; + *prot = memmap.prot; + *flags = memmap.flags; } return (error); } +/* + * Return 0 if the segments are identical and non-zero otherwise. + * + * This is slightly complicated by the fact that only device memory segments + * are named. + */ +static int +cmpseg(size_t len, const char *str, size_t len2, const char *str2) +{ + + if (len == len2) { + if ((!str && !str2) || (str && str2 && !strcmp(str, str2))) + return (0); + } + return (-1); +} + +static int +vm_alloc_memseg(struct vmctx *ctx, int segid, size_t len, const char *name) +{ + struct vm_memseg memseg; + size_t n; + int error; + + /* + * If the memory segment has already been created then just return. + * This is the usual case for the SYSMEM segment created by userspace + * loaders like bhyveload(8). + */ + error = vm_get_memseg(ctx, segid, &memseg.len, memseg.name, + sizeof(memseg.name)); + if (error) + return (error); + + if (memseg.len != 0) { + if (cmpseg(len, name, memseg.len, VM_MEMSEG_NAME(&memseg))) { + errno = EINVAL; + return (-1); + } else { + return (0); + } + } + + bzero(&memseg, sizeof(struct vm_memseg)); + memseg.segid = segid; + memseg.len = len; + if (name != NULL) { + n = strlcpy(memseg.name, name, sizeof(memseg.name)); + if (n >= sizeof(memseg.name)) { + errno = ENAMETOOLONG; + return (-1); + } + } + + error = ioctl(ctx->fd, VM_ALLOC_MEMSEG, &memseg); + return (error); +} + +int +vm_get_memseg(struct vmctx *ctx, int segid, size_t *lenp, char *namebuf, + size_t bufsize) +{ + struct vm_memseg memseg; + size_t n; + int error; + + memseg.segid = segid; + error = ioctl(ctx->fd, VM_GET_MEMSEG, &memseg); + if (error == 0) { + *lenp = memseg.len; + n = strlcpy(namebuf, memseg.name, bufsize); + if (n >= bufsize) { + errno = ENAMETOOLONG; + error = -1; + } + } + return (error); +} + +static int +setup_memory_segment(struct vmctx *ctx, vm_paddr_t gpa, size_t len, char *base) +{ + char *ptr; + int error, flags; + + /* Map 'len' bytes starting at 'gpa' in the guest address space */ + error = vm_mmap_memseg(ctx, gpa, VM_SYSMEM, gpa, len, PROT_ALL); + if (error) + return (error); + + flags = MAP_SHARED | MAP_FIXED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + flags |= MAP_NOCORE; + + /* mmap into the process address space on the host */ + ptr = mmap(base + gpa, len, PROT_RW, flags, ctx->fd, gpa); + if (ptr == MAP_FAILED) + return (-1); + + return (0); +} + int vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) { - char **addr; - int error; + size_t objsize, len; + vm_paddr_t gpa; + char *baseaddr, *ptr; + int error, flags; - /* XXX VM_MMAP_SPARSE not implemented yet */ - assert(vms == VM_MMAP_NONE || vms == VM_MMAP_ALL); - ctx->vms = vms; + assert(vms == VM_MMAP_ALL); /* * If 'memsize' cannot fit entirely in the 'lowmem' segment then @@ -234,25 +371,46 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) */ if (memsize > ctx->lowmem_limit) { ctx->lowmem = ctx->lowmem_limit; - ctx->highmem = memsize - ctx->lowmem; + ctx->highmem = memsize - ctx->lowmem_limit; + objsize = 4*GB + ctx->highmem; } else { ctx->lowmem = memsize; ctx->highmem = 0; + objsize = ctx->lowmem; + } + + error = vm_alloc_memseg(ctx, VM_SYSMEM, objsize, NULL); + if (error) + return (error); + + /* + * Stake out a contiguous region covering the guest physical memory + * and the adjoining guard regions. + */ + len = VM_MMAP_GUARD_SIZE + objsize + VM_MMAP_GUARD_SIZE; + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER; + ptr = mmap(NULL, len, PROT_NONE, flags, -1, 0); + if (ptr == MAP_FAILED) + return (-1); + + baseaddr = ptr + VM_MMAP_GUARD_SIZE; + if (ctx->highmem > 0) { + gpa = 4*GB; + len = ctx->highmem; + error = setup_memory_segment(ctx, gpa, len, baseaddr); + if (error) + return (error); } if (ctx->lowmem > 0) { - addr = (vms == VM_MMAP_ALL) ? &ctx->lowmem_addr : NULL; - error = setup_memory_segment(ctx, 0, ctx->lowmem, addr); + gpa = 0; + len = ctx->lowmem; + error = setup_memory_segment(ctx, gpa, len, baseaddr); if (error) return (error); } - if (ctx->highmem > 0) { - addr = (vms == VM_MMAP_ALL) ? &ctx->highmem_addr : NULL; - error = setup_memory_segment(ctx, 4*GB, ctx->highmem, addr); - if (error) - return (error); - } + ctx->baseaddr = baseaddr; return (0); } @@ -260,20 +418,16 @@ vm_setup_memory(struct vmctx *ctx, size_t memsize, enum vm_mmap_style vms) void * vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len) { + vm_paddr_t start, end, mapend; - /* XXX VM_MMAP_SPARSE not implemented yet */ - assert(ctx->vms == VM_MMAP_ALL); + start = gaddr; + end = gaddr + len; + mapend = ctx->highmem ? 4*GB + ctx->highmem : ctx->lowmem; - if (gaddr < ctx->lowmem && gaddr + len <= ctx->lowmem) - return ((void *)(ctx->lowmem_addr + gaddr)); - - if (gaddr >= 4*GB) { - gaddr -= 4*GB; - if (gaddr < ctx->highmem && gaddr + len <= ctx->highmem) - return ((void *)(ctx->highmem_addr + gaddr)); - } - - return (NULL); + if (start <= end && end <= mapend) + return (ctx->baseaddr + start); + else + return (NULL); } size_t @@ -290,6 +444,56 @@ vm_get_highmem_size(struct vmctx *ctx) return (ctx->highmem); } +void * +vm_create_devmem(struct vmctx *ctx, int segid, const char *name, size_t len) +{ + char pathname[MAXPATHLEN]; + size_t len2; + char *base, *ptr; + int fd, error, flags; + + fd = -1; + ptr = MAP_FAILED; + if (name == NULL || strlen(name) == 0) { + errno = EINVAL; + goto done; + } + + error = vm_alloc_memseg(ctx, segid, len, name); + if (error) + goto done; + + strlcpy(pathname, "/dev/vmm/", sizeof(pathname)); + strlcat(pathname, ctx->name, sizeof(pathname)); + strlcat(pathname, ".", sizeof(pathname)); + strlcat(pathname, name, sizeof(pathname)); + + fd = open(pathname, O_RDWR); + if (fd < 0) + goto done; + + /* + * Stake out a contiguous region covering the device memory and the + * adjoining guard regions. + */ + len2 = VM_MMAP_GUARD_SIZE + len + VM_MMAP_GUARD_SIZE; + flags = MAP_PRIVATE | MAP_ANON | MAP_NOCORE | MAP_ALIGNED_SUPER; + base = mmap(NULL, len2, PROT_NONE, flags, -1, 0); + if (base == MAP_FAILED) + goto done; + + flags = MAP_SHARED | MAP_FIXED; + if ((ctx->memflags & VM_MEM_F_INCORE) == 0) + flags |= MAP_NOCORE; + + /* mmap the devmem region in the host address space */ + ptr = mmap(base + VM_MMAP_GUARD_SIZE, len, PROT_RW, flags, fd, 0); +done: + if (fd >= 0) + close(fd); + return (ptr); +} + int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, uint64_t base, uint32_t limit, uint32_t access) diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index d3ecdc4f843a..57f8c564ed2d 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -36,7 +36,7 @@ * API version for out-of-tree consumers like grub-bhyve for making compile * time decisions. */ -#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */ +#define VMMAPI_VERSION 0102 /* 2 digit major followed by 2 digit minor */ struct iovec; struct vmctx; @@ -52,14 +52,59 @@ enum vm_mmap_style { VM_MMAP_SPARSE, /* mappings created on-demand */ }; +/* + * 'flags' value passed to 'vm_set_memflags()'. + */ #define VM_MEM_F_INCORE 0x01 /* include guest memory in core file */ +#define VM_MEM_F_WIRED 0x02 /* guest memory is wired */ + +/* + * Identifiers for memory segments: + * - vm_setup_memory() uses VM_SYSMEM for the system memory segment. + * - the remaining identifiers can be used to create devmem segments. + */ +enum { + VM_SYSMEM, + VM_BOOTROM, + VM_FRAMEBUFFER, +}; + +/* + * Get the length and name of the memory segment identified by 'segid'. + * Note that system memory segments are identified with a nul name. + * + * Returns 0 on success and non-zero otherwise. + */ +int vm_get_memseg(struct vmctx *ctx, int ident, size_t *lenp, char *name, + size_t namesiz); + +/* + * Iterate over the guest address space. This function finds an address range + * that starts at an address >= *gpa. + * + * Returns 0 if the next address range was found and non-zero otherwise. + */ +int vm_mmap_getnext(struct vmctx *ctx, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); +/* + * Create a device memory segment identified by 'segid'. + * + * Returns a pointer to the memory segment on success and MAP_FAILED otherwise. + */ +void *vm_create_devmem(struct vmctx *ctx, int segid, const char *name, + size_t len); + +/* + * Map the memory segment identified by 'segid' into the guest address space + * at [gpa,gpa+len) with protection 'prot'. + */ +int vm_mmap_memseg(struct vmctx *ctx, vm_paddr_t gpa, int segid, + vm_ooffset_t segoff, size_t len, int prot); int vm_create(const char *name); struct vmctx *vm_open(const char *name); void vm_destroy(struct vmctx *ctx); int vm_parse_memsize(const char *optarg, size_t *memsize); -int vm_get_memory_seg(struct vmctx *ctx, vm_paddr_t gpa, size_t *ret_len, - int *wired); int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); @@ -68,6 +113,7 @@ int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging, uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); void vm_set_memflags(struct vmctx *ctx, int flags); +int vm_get_memflags(struct vmctx *ctx); size_t vm_get_lowmem_size(struct vmctx *ctx); size_t vm_get_highmem_size(struct vmctx *ctx); int vm_set_desc(struct vmctx *ctx, int vcpu, int reg, diff --git a/share/examples/bhyve/vmrun.sh b/share/examples/bhyve/vmrun.sh index 88cbd7c0ca91..58bfa4a8f999 100755 --- a/share/examples/bhyve/vmrun.sh +++ b/share/examples/bhyve/vmrun.sh @@ -87,15 +87,15 @@ console=${DEFAULT_CONSOLE} cpus=${DEFAULT_CPUS} tap_total=0 disk_total=0 -apic_opt="" gdbport=0 loader_opt="" +bhyverun_opt="-H -A -P" pass_total=0 while getopts ac:C:d:e:g:hH:iI:m:p:t: c ; do case $c in a) - apic_opt="-a" + bhyverun_opt="${bhyverun_opt} -a" ;; c) cpus=${OPTARG} @@ -163,6 +163,12 @@ if [ -n "${host_base}" ]; then loader_opt="${loader_opt} -h ${host_base}" fi +# If PCI passthru devices are configured then guest memory must be wired +if [ ${pass_total} -gt 0 ]; then + loader_opt="${loader_opt} -S" + bhyverun_opt="${bhyverun_opt} -S" +fi + make_and_check_diskdev() { local virtio_diskdev="$1" @@ -263,7 +269,7 @@ while [ 1 ]; do i=$(($i + 1)) done - ${FBSDRUN} -c ${cpus} -m ${memsize} ${apic_opt} -A -H -P \ + ${FBSDRUN} -c ${cpus} -m ${memsize} ${bhyverun_opt} \ -g ${gdbport} \ -s 0:0,hostbridge \ -s 1:0,lpc \ diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 1a4e5ab485c8..f2de960c2a81 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -108,7 +108,6 @@ enum x2apic_state { struct vm; struct vm_exception; -struct vm_memory_segment; struct seg_desc; struct vm_exit; struct vm_run; @@ -175,17 +174,33 @@ int vm_create(const char *name, struct vm **retvm); void vm_destroy(struct vm *vm); int vm_reinit(struct vm *vm); const char *vm_name(struct vm *vm); -int vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len); + +/* + * APIs that modify the guest memory map require all vcpus to be frozen. + */ +int vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t off, + size_t len, int prot, int flags); +int vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem); +void vm_free_memseg(struct vm *vm, int ident); int vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); int vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len); -void *vm_gpa_hold(struct vm *, vm_paddr_t gpa, size_t len, int prot, - void **cookie); +int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); +int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); + +/* + * APIs that inspect the guest memory map require only a *single* vcpu to + * be frozen. This acts like a read lock on the guest memory map since any + * modification requires *all* vcpus to be frozen. + */ +int vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags); +int vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + struct vm_object **objptr); +void *vm_gpa_hold(struct vm *, int vcpuid, vm_paddr_t gpa, size_t len, + int prot, void **cookie); void vm_gpa_release(void *cookie); -int vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, - struct vm_memory_segment *seg); -int vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, - vm_offset_t *offset, struct vm_object **object); -boolean_t vm_mem_allocated(struct vm *vm, vm_paddr_t gpa); +bool vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa); + int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval); int vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val); int vm_get_seg_desc(struct vm *vm, int vcpu, int reg, @@ -302,8 +317,6 @@ vcpu_should_yield(struct vm *vm, int vcpu) void *vcpu_stats(struct vm *vm, int vcpu); void vcpu_notify_event(struct vm *vm, int vcpuid, bool lapic_intr); struct vmspace *vm_get_vmspace(struct vm *vm); -int vm_assign_pptdev(struct vm *vm, int bus, int slot, int func); -int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func); struct vatpic *vm_atpic(struct vm *vm); struct vatpit *vm_atpit(struct vm *vm); struct vpmtmr *vm_pmtmr(struct vm *vm); diff --git a/sys/amd64/include/vmm_dev.h b/sys/amd64/include/vmm_dev.h index 9d031a9525e5..1af75a3c065d 100644 --- a/sys/amd64/include/vmm_dev.h +++ b/sys/amd64/include/vmm_dev.h @@ -34,10 +34,22 @@ void vmmdev_init(void); int vmmdev_cleanup(void); #endif -struct vm_memory_segment { - vm_paddr_t gpa; /* in */ +struct vm_memmap { + vm_paddr_t gpa; + int segid; /* memory segment */ + vm_ooffset_t segoff; /* offset into memory segment */ + size_t len; /* mmap length */ + int prot; /* RWX */ + int flags; +}; +#define VM_MEMMAP_F_WIRED 0x01 +#define VM_MEMMAP_F_IOMMU 0x02 + +#define VM_MEMSEG_NAME(m) ((m)->name[0] != '\0' ? (m)->name : NULL) +struct vm_memseg { + int segid; size_t len; - int wired; + char name[SPECNAMELEN + 1]; }; struct vm_register { @@ -214,10 +226,14 @@ enum { IOCNUM_REINIT = 5, /* memory apis */ - IOCNUM_MAP_MEMORY = 10, - IOCNUM_GET_MEMORY_SEG = 11, + IOCNUM_MAP_MEMORY = 10, /* deprecated */ + IOCNUM_GET_MEMORY_SEG = 11, /* deprecated */ IOCNUM_GET_GPA_PMAP = 12, IOCNUM_GLA2GPA = 13, + IOCNUM_ALLOC_MEMSEG = 14, + IOCNUM_GET_MEMSEG = 15, + IOCNUM_MMAP_MEMSEG = 16, + IOCNUM_MMAP_GETNEXT = 17, /* register/state accessors */ IOCNUM_SET_REGISTER = 20, @@ -278,10 +294,14 @@ enum { _IOW('v', IOCNUM_SUSPEND, struct vm_suspend) #define VM_REINIT \ _IO('v', IOCNUM_REINIT) -#define VM_MAP_MEMORY \ - _IOWR('v', IOCNUM_MAP_MEMORY, struct vm_memory_segment) -#define VM_GET_MEMORY_SEG \ - _IOWR('v', IOCNUM_GET_MEMORY_SEG, struct vm_memory_segment) +#define VM_ALLOC_MEMSEG \ + _IOW('v', IOCNUM_ALLOC_MEMSEG, struct vm_memseg) +#define VM_GET_MEMSEG \ + _IOWR('v', IOCNUM_GET_MEMSEG, struct vm_memseg) +#define VM_MMAP_MEMSEG \ + _IOW('v', IOCNUM_MMAP_MEMSEG, struct vm_memmap) +#define VM_MMAP_GETNEXT \ + _IOWR('v', IOCNUM_MMAP_GETNEXT, struct vm_memmap) #define VM_SET_REGISTER \ _IOW('v', IOCNUM_SET_REGISTER, struct vm_register) #define VM_GET_REGISTER \ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index b25d69d4b2eb..6d44801e9cac 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -1477,7 +1477,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) VCPU_CTR2(svm_sc->vm, vcpu, "nested page fault with " "reserved bits set: info1(%#lx) info2(%#lx)", info1, info2); - } else if (vm_mem_allocated(svm_sc->vm, info2)) { + } else if (vm_mem_allocated(svm_sc->vm, vcpu, info2)) { vmexit->exitcode = VM_EXITCODE_PAGING; vmexit->u.paging.gpa = info2; vmexit->u.paging.fault_type = npf_fault_type(info1); diff --git a/sys/amd64/vmm/intel/vmx.c b/sys/amd64/vmm/intel/vmx.c index f590586c44b3..517a3749054e 100644 --- a/sys/amd64/vmm/intel/vmx.c +++ b/sys/amd64/vmm/intel/vmx.c @@ -2425,7 +2425,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit) * this must be an instruction that accesses MMIO space. */ gpa = vmcs_gpa(); - if (vm_mem_allocated(vmx->vm, gpa) || + if (vm_mem_allocated(vmx->vm, vcpu, gpa) || apic_access_fault(vmx, vcpu, gpa)) { vmexit->exitcode = VM_EXITCODE_PAGING; vmexit->inst_length = 0; diff --git a/sys/amd64/vmm/io/ppt.c b/sys/amd64/vmm/io/ppt.c index b789f77ba842..692190a5c2b1 100644 --- a/sys/amd64/vmm/io/ppt.c +++ b/sys/amd64/vmm/io/ppt.c @@ -76,11 +76,17 @@ struct pptintr_arg { /* pptintr(pptintr_arg) */ uint64_t msg_data; }; +struct pptseg { + vm_paddr_t gpa; + size_t len; + int wired; +}; + struct pptdev { device_t dev; struct vm *vm; /* owner of this device */ TAILQ_ENTRY(pptdev) next; - struct vm_memory_segment mmio[MAX_MMIOSEGS]; + struct pptseg mmio[MAX_MMIOSEGS]; struct { int num_msgs; /* guest state */ @@ -207,14 +213,14 @@ static void ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) { int i; - struct vm_memory_segment *seg; + struct pptseg *seg; for (i = 0; i < MAX_MMIOSEGS; i++) { seg = &ppt->mmio[i]; if (seg->len == 0) continue; (void)vm_unmap_mmio(vm, seg->gpa, seg->len); - bzero(seg, sizeof(struct vm_memory_segment)); + bzero(seg, sizeof(struct pptseg)); } } @@ -324,7 +330,7 @@ ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) { int i; struct pptdev *ppt; - struct vm_memory_segment *seg; + struct pptseg *seg; TAILQ_FOREACH(ppt, &pptdev_list, next) { if (ppt->vm != vm) @@ -410,7 +416,7 @@ ppt_map_mmio(struct vm *vm, int bus, int slot, int func, vm_paddr_t gpa, size_t len, vm_paddr_t hpa) { int i, error; - struct vm_memory_segment *seg; + struct pptseg *seg; struct pptdev *ppt; ppt = ppt_find(bus, slot, func); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 2c37a1ac2c69..098705909953 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -119,12 +119,21 @@ struct vcpu { #define vcpu_assert_locked(v) mtx_assert(&((v)->mtx), MA_OWNED) struct mem_seg { + size_t len; + bool sysmem; + struct vm_object *object; +}; +#define VM_MAX_MEMSEGS 2 + +struct mem_map { vm_paddr_t gpa; size_t len; - boolean_t wired; - vm_object_t object; + vm_ooffset_t segoff; + int segid; + int prot; + int flags; }; -#define VM_MAX_MEMORY_SEGMENTS 2 +#define VM_MAX_MEMMAPS 4 /* * Initialization: @@ -150,8 +159,8 @@ struct vm { void *rendezvous_arg; /* (x) rendezvous func/arg */ vm_rendezvous_func_t rendezvous_func; struct mtx rendezvous_mtx; /* (o) rendezvous lock */ - int num_mem_segs; /* (o) guest memory segments */ - struct mem_seg mem_segs[VM_MAX_MEMORY_SEGMENTS]; + struct mem_map mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */ + struct mem_seg mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */ struct vmspace *vmspace; /* (o) guest's address space */ char name[VM_MAX_NAMELEN]; /* (o) virtual machine name */ struct vcpu vcpu[VM_MAXCPU]; /* (i) guest vcpus */ @@ -222,6 +231,8 @@ TUNABLE_INT("hw.vmm.force_iommu", &vmm_force_iommu); SYSCTL_INT(_hw_vmm, OID_AUTO, force_iommu, CTLFLAG_RDTUN, &vmm_force_iommu, 0, "Force use of I/O MMU even if no passthrough devices were found."); +static void vm_free_memmap(struct vm *vm, int ident); +static bool sysmem_mapping(struct vm *vm, struct mem_map *mm); static void vcpu_notify_event_locked(struct vcpu *vcpu, bool lapic_intr); #ifdef KTR @@ -442,7 +453,6 @@ vm_create(const char *name, struct vm **retvm) vm = malloc(sizeof(struct vm), M_VM, M_WAITOK | M_ZERO); strcpy(vm->name, name); - vm->num_mem_segs = 0; vm->vmspace = vmspace; mtx_init(&vm->rendezvous_mtx, "vm rendezvous lock", 0, MTX_DEF); @@ -452,19 +462,10 @@ vm_create(const char *name, struct vm **retvm) return (0); } -static void -vm_free_mem_seg(struct vm *vm, struct mem_seg *seg) -{ - - if (seg->object != NULL) - vmm_mem_free(vm->vmspace, seg->gpa, seg->len); - - bzero(seg, sizeof(*seg)); -} - static void vm_cleanup(struct vm *vm, bool destroy) { + struct mem_map *mm; int i; ppt_unassign_all(vm); @@ -487,11 +488,23 @@ vm_cleanup(struct vm *vm, bool destroy) VMCLEANUP(vm->cookie); - if (destroy) { - for (i = 0; i < vm->num_mem_segs; i++) - vm_free_mem_seg(vm, &vm->mem_segs[i]); + /* + * System memory is removed from the guest address space only when + * the VM is destroyed. This is because the mapping remains the same + * across VM reset. + * + * Device memory can be relocated by the guest (e.g. using PCI BARs) + * so those mappings are removed on a VM reset. + */ + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (destroy || !sysmem_mapping(vm, mm)) + vm_free_memmap(vm, i); + } - vm->num_mem_segs = 0; + if (destroy) { + for (i = 0; i < VM_MAX_MEMSEGS; i++) + vm_free_memseg(vm, i); VMSPACE_FREE(vm->vmspace); vm->vmspace = NULL; @@ -549,146 +562,243 @@ vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len) return (0); } -boolean_t -vm_mem_allocated(struct vm *vm, vm_paddr_t gpa) +/* + * Return 'true' if 'gpa' is allocated in the guest address space. + * + * This function is called in the context of a running vcpu which acts as + * an implicit lock on 'vm->mem_maps[]'. + */ +bool +vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa) { + struct mem_map *mm; int i; - vm_paddr_t gpabase, gpalimit; - for (i = 0; i < vm->num_mem_segs; i++) { - gpabase = vm->mem_segs[i].gpa; - gpalimit = gpabase + vm->mem_segs[i].len; - if (gpa >= gpabase && gpa < gpalimit) - return (TRUE); /* 'gpa' is regular memory */ +#ifdef INVARIANTS + int hostcpu, state; + state = vcpu_get_state(vm, vcpuid, &hostcpu); + KASSERT(state == VCPU_RUNNING && hostcpu == curcpu, + ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu)); +#endif + + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len) + return (true); /* 'gpa' is sysmem or devmem */ } if (ppt_is_mmio(vm, gpa)) - return (TRUE); /* 'gpa' is pci passthru mmio */ + return (true); /* 'gpa' is pci passthru mmio */ - return (FALSE); + return (false); } int -vm_malloc(struct vm *vm, vm_paddr_t gpa, size_t len) +vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem) { - int available, allocated; struct mem_seg *seg; - vm_object_t object; - vm_paddr_t g; + vm_object_t obj; - if ((gpa & PAGE_MASK) || (len & PAGE_MASK) || len == 0) + if (ident < 0 || ident >= VM_MAX_MEMSEGS) return (EINVAL); - - available = allocated = 0; - g = gpa; - while (g < gpa + len) { - if (vm_mem_allocated(vm, g)) - allocated++; - else - available++; - g += PAGE_SIZE; + if (len == 0 || (len & PAGE_MASK)) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + if (seg->len == len && seg->sysmem == sysmem) + return (EEXIST); + else + return (EINVAL); } - /* - * If there are some allocated and some available pages in the address - * range then it is an error. - */ - if (allocated && available) - return (EINVAL); - - /* - * If the entire address range being requested has already been - * allocated then there isn't anything more to do. - */ - if (allocated && available == 0) - return (0); - - if (vm->num_mem_segs >= VM_MAX_MEMORY_SEGMENTS) - return (E2BIG); - - seg = &vm->mem_segs[vm->num_mem_segs]; - - if ((object = vmm_mem_alloc(vm->vmspace, gpa, len)) == NULL) + obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); + if (obj == NULL) return (ENOMEM); - seg->gpa = gpa; seg->len = len; - seg->object = object; - seg->wired = FALSE; - - vm->num_mem_segs++; - + seg->object = obj; + seg->sysmem = sysmem; return (0); } -static vm_paddr_t -vm_maxmem(struct vm *vm) +int +vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem, + vm_object_t *objptr) { - int i; - vm_paddr_t gpa, maxmem; + struct mem_seg *seg; - maxmem = 0; - for (i = 0; i < vm->num_mem_segs; i++) { - gpa = vm->mem_segs[i].gpa + vm->mem_segs[i].len; - if (gpa > maxmem) - maxmem = gpa; + if (ident < 0 || ident >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[ident]; + if (len) + *len = seg->len; + if (sysmem) + *sysmem = seg->sysmem; + if (objptr) + *objptr = seg->object; + return (0); +} + +void +vm_free_memseg(struct vm *vm, int ident) +{ + struct mem_seg *seg; + + KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS, + ("%s: invalid memseg ident %d", __func__, ident)); + + seg = &vm->mem_segs[ident]; + if (seg->object != NULL) { + vm_object_deallocate(seg->object); + bzero(seg, sizeof(struct mem_seg)); + } +} + +int +vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first, + size_t len, int prot, int flags) +{ + struct mem_seg *seg; + struct mem_map *m, *map; + vm_ooffset_t last; + int i, error; + + if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0) + return (EINVAL); + + if (flags & ~VM_MEMMAP_F_WIRED) + return (EINVAL); + + if (segid < 0 || segid >= VM_MAX_MEMSEGS) + return (EINVAL); + + seg = &vm->mem_segs[segid]; + if (seg->object == NULL) + return (EINVAL); + + last = first + len; + if (first < 0 || first >= last || last > seg->len) + return (EINVAL); + + if ((gpa | first | last) & PAGE_MASK) + return (EINVAL); + + map = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + m = &vm->mem_maps[i]; + if (m->len == 0) { + map = m; + break; + } + } + + if (map == NULL) + return (ENOSPC); + + error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa, + len, 0, VMFS_NO_SPACE, prot, prot, 0); + if (error != KERN_SUCCESS) + return (EFAULT); + + vm_object_reference(seg->object); + + if (flags & VM_MEMMAP_F_WIRED) { + error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len, + VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); + if (error != KERN_SUCCESS) { + vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len); + return (EFAULT); + } + } + + map->gpa = gpa; + map->len = len; + map->segoff = first; + map->segid = segid; + map->prot = prot; + map->flags = flags; + return (0); +} + +int +vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid, + vm_ooffset_t *segoff, size_t *len, int *prot, int *flags) +{ + struct mem_map *mm, *mmnext; + int i; + + mmnext = NULL; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (mm->len == 0 || mm->gpa < *gpa) + continue; + if (mmnext == NULL || mm->gpa < mmnext->gpa) + mmnext = mm; + } + + if (mmnext != NULL) { + *gpa = mmnext->gpa; + if (segid) + *segid = mmnext->segid; + if (segoff) + *segoff = mmnext->segoff; + if (len) + *len = mmnext->len; + if (prot) + *prot = mmnext->prot; + if (flags) + *flags = mmnext->flags; + return (0); + } else { + return (ENOENT); } - return (maxmem); } static void -vm_gpa_unwire(struct vm *vm) +vm_free_memmap(struct vm *vm, int ident) { - int i, rv; - struct mem_seg *seg; + struct mem_map *mm; + int error; - for (i = 0; i < vm->num_mem_segs; i++) { - seg = &vm->mem_segs[i]; - if (!seg->wired) - continue; - - rv = vm_map_unwire(&vm->vmspace->vm_map, - seg->gpa, seg->gpa + seg->len, - VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - KASSERT(rv == KERN_SUCCESS, ("vm(%s) memory segment " - "%#lx/%ld could not be unwired: %d", - vm_name(vm), seg->gpa, seg->len, rv)); - - seg->wired = FALSE; + mm = &vm->mem_maps[ident]; + if (mm->len) { + error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa, + mm->gpa + mm->len); + KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d", + __func__, error)); + bzero(mm, sizeof(struct mem_map)); } } -static int -vm_gpa_wire(struct vm *vm) +static __inline bool +sysmem_mapping(struct vm *vm, struct mem_map *mm) { - int i, rv; - struct mem_seg *seg; - for (i = 0; i < vm->num_mem_segs; i++) { - seg = &vm->mem_segs[i]; - if (seg->wired) - continue; + if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem) + return (true); + else + return (false); +} - /* XXX rlimits? */ - rv = vm_map_wire(&vm->vmspace->vm_map, - seg->gpa, seg->gpa + seg->len, - VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES); - if (rv != KERN_SUCCESS) - break; +static vm_paddr_t +sysmem_maxaddr(struct vm *vm) +{ + struct mem_map *mm; + vm_paddr_t maxaddr; + int i; - seg->wired = TRUE; + maxaddr = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm)) { + if (maxaddr < mm->gpa + mm->len) + maxaddr = mm->gpa + mm->len; + } } - - if (i < vm->num_mem_segs) { - /* - * Undo the wiring before returning an error. - */ - vm_gpa_unwire(vm); - return (EAGAIN); - } - - return (0); + return (maxaddr); } static void @@ -696,20 +806,36 @@ vm_iommu_modify(struct vm *vm, boolean_t map) { int i, sz; vm_paddr_t gpa, hpa; - struct mem_seg *seg; + struct mem_map *mm; void *vp, *cookie, *host_domain; sz = PAGE_SIZE; host_domain = iommu_host_domain(); - for (i = 0; i < vm->num_mem_segs; i++) { - seg = &vm->mem_segs[i]; - KASSERT(seg->wired, ("vm(%s) memory segment %#lx/%ld not wired", - vm_name(vm), seg->gpa, seg->len)); + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (!sysmem_mapping(vm, mm)) + continue; - gpa = seg->gpa; - while (gpa < seg->gpa + seg->len) { - vp = vm_gpa_hold(vm, gpa, PAGE_SIZE, VM_PROT_WRITE, + if (map) { + KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0, + ("iommu map found invalid memmap %#lx/%#lx/%#x", + mm->gpa, mm->len, mm->flags)); + if ((mm->flags & VM_MEMMAP_F_WIRED) == 0) + continue; + mm->flags |= VM_MEMMAP_F_IOMMU; + } else { + if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0) + continue; + mm->flags &= ~VM_MEMMAP_F_IOMMU; + KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0, + ("iommu unmap found invalid memmap %#lx/%#lx/%#x", + mm->gpa, mm->len, mm->flags)); + } + + gpa = mm->gpa; + while (gpa < mm->gpa + mm->len) { + vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE, &cookie); KASSERT(vp != NULL, ("vm(%s) could not map gpa %#lx", vm_name(vm), gpa)); @@ -751,10 +877,9 @@ vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func) if (error) return (error); - if (ppt_assigned_devices(vm) == 0) { + if (ppt_assigned_devices(vm) == 0) vm_iommu_unmap(vm); - vm_gpa_unwire(vm); - } + return (0); } @@ -764,23 +889,12 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) int error; vm_paddr_t maxaddr; - /* - * Virtual machines with pci passthru devices get special treatment: - * - the guest physical memory is wired - * - the iommu is programmed to do the 'gpa' to 'hpa' translation - * - * We need to do this before the first pci passthru device is attached. - */ + /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */ if (ppt_assigned_devices(vm) == 0) { KASSERT(vm->iommu == NULL, ("vm_assign_pptdev: iommu must be NULL")); - maxaddr = vm_maxmem(vm); + maxaddr = sysmem_maxaddr(vm); vm->iommu = iommu_create_domain(maxaddr); - - error = vm_gpa_wire(vm); - if (error) - return (error); - vm_iommu_map(vm); } @@ -789,18 +903,43 @@ vm_assign_pptdev(struct vm *vm, int bus, int slot, int func) } void * -vm_gpa_hold(struct vm *vm, vm_paddr_t gpa, size_t len, int reqprot, +vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot, void **cookie) { - int count, pageoff; + int i, count, pageoff; + struct mem_map *mm; vm_page_t m; - +#ifdef INVARIANTS + /* + * All vcpus are frozen by ioctls that modify the memory map + * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is + * guaranteed if at least one vcpu is in the VCPU_FROZEN state. + */ + int state; + KASSERT(vcpuid >= -1 || vcpuid < VM_MAXCPU, ("%s: invalid vcpuid %d", + __func__, vcpuid)); + for (i = 0; i < VM_MAXCPU; i++) { + if (vcpuid != -1 && vcpuid != i) + continue; + state = vcpu_get_state(vm, i, NULL); + KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d", + __func__, state)); + } +#endif pageoff = gpa & PAGE_MASK; if (len > PAGE_SIZE - pageoff) panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len); - count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, - trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); + count = 0; + for (i = 0; i < VM_MAX_MEMMAPS; i++) { + mm = &vm->mem_maps[i]; + if (sysmem_mapping(vm, mm) && gpa >= mm->gpa && + gpa < mm->gpa + mm->len) { + count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map, + trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1); + break; + } + } if (count == 1) { *cookie = m; @@ -821,50 +960,6 @@ vm_gpa_release(void *cookie) vm_page_unlock(m); } -int -vm_gpabase2memseg(struct vm *vm, vm_paddr_t gpabase, - struct vm_memory_segment *seg) -{ - int i; - - for (i = 0; i < vm->num_mem_segs; i++) { - if (gpabase == vm->mem_segs[i].gpa) { - seg->gpa = vm->mem_segs[i].gpa; - seg->len = vm->mem_segs[i].len; - seg->wired = vm->mem_segs[i].wired; - return (0); - } - } - return (-1); -} - -int -vm_get_memobj(struct vm *vm, vm_paddr_t gpa, size_t len, - vm_offset_t *offset, struct vm_object **object) -{ - int i; - size_t seg_len; - vm_paddr_t seg_gpa; - vm_object_t seg_obj; - - for (i = 0; i < vm->num_mem_segs; i++) { - if ((seg_obj = vm->mem_segs[i].object) == NULL) - continue; - - seg_gpa = vm->mem_segs[i].gpa; - seg_len = vm->mem_segs[i].len; - - if (gpa >= seg_gpa && gpa < seg_gpa + seg_len) { - *offset = gpa - seg_gpa; - *object = seg_obj; - vm_object_reference(seg_obj); - return (0); - } - } - - return (EINVAL); -} - int vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval) { @@ -2423,8 +2518,8 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, } for (idx = 0; idx < nused; idx++) { - hva = vm_gpa_hold(vm, copyinfo[idx].gpa, copyinfo[idx].len, - prot, &cookie); + hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa, + copyinfo[idx].len, prot, &cookie); if (hva == NULL) break; copyinfo[idx].hva = hva; diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index e3e140af5cc1..4ef1482894b0 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -60,10 +61,19 @@ __FBSDID("$FreeBSD$"); #include "io/vhpet.h" #include "io/vrtc.h" +struct devmem_softc { + int segid; + char *name; + struct cdev *cdev; + struct vmmdev_softc *sc; + SLIST_ENTRY(devmem_softc) link; +}; + struct vmmdev_softc { struct vm *vm; /* vm instance cookie */ struct cdev *cdev; SLIST_ENTRY(vmmdev_softc) link; + SLIST_HEAD(, devmem_softc) devmem; int flags; }; #define VSC_LINKED 0x01 @@ -76,6 +86,63 @@ static MALLOC_DEFINE(M_VMMDEV, "vmmdev", "vmmdev"); SYSCTL_DECL(_hw_vmm); +static int devmem_create_cdev(const char *vmname, int id, char *devmem); +static void devmem_destroy(void *arg); + +static int +vcpu_lock_one(struct vmmdev_softc *sc, int vcpu) +{ + int error; + + if (vcpu < 0 || vcpu >= VM_MAXCPU) + return (EINVAL); + + error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); + return (error); +} + +static void +vcpu_unlock_one(struct vmmdev_softc *sc, int vcpu) +{ + enum vcpu_state state; + + state = vcpu_get_state(sc->vm, vcpu, NULL); + if (state != VCPU_FROZEN) { + panic("vcpu %s(%d) has invalid state %d", vm_name(sc->vm), + vcpu, state); + } + + vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); +} + +static int +vcpu_lock_all(struct vmmdev_softc *sc) +{ + int error, vcpu; + + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { + error = vcpu_lock_one(sc, vcpu); + if (error) + break; + } + + if (error) { + while (--vcpu >= 0) + vcpu_unlock_one(sc, vcpu); + } + + return (error); +} + +static void +vcpu_unlock_all(struct vmmdev_softc *sc) +{ + int vcpu; + + for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) + vcpu_unlock_one(sc, vcpu); +} + static struct vmmdev_softc * vmmdev_lookup(const char *name) { @@ -108,12 +175,16 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) void *hpa, *cookie; struct vmmdev_softc *sc; - static char zerobuf[PAGE_SIZE]; - - error = 0; sc = vmmdev_lookup2(cdev); if (sc == NULL) - error = ENXIO; + return (ENXIO); + + /* + * Get a read lock on the guest memory map by freezing any vcpu. + */ + error = vcpu_lock_one(sc, VM_MAXCPU - 1); + if (error) + return (error); prot = (uio->uio_rw == UIO_WRITE ? VM_PROT_WRITE : VM_PROT_READ); while (uio->uio_resid > 0 && error == 0) { @@ -129,10 +200,11 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) * Since this device does not support lseek(2), dd(1) will * read(2) blocks of data to simulate the lseek(2). */ - hpa = vm_gpa_hold(sc->vm, gpa, c, prot, &cookie); + hpa = vm_gpa_hold(sc->vm, VM_MAXCPU - 1, gpa, c, prot, &cookie); if (hpa == NULL) { if (uio->uio_rw == UIO_READ) - error = uiomove(zerobuf, c, uio); + error = uiomove(__DECONST(void *, zero_region), + c, uio); else error = EFAULT; } else { @@ -140,6 +212,70 @@ vmmdev_rw(struct cdev *cdev, struct uio *uio, int flags) vm_gpa_release(cookie); } } + vcpu_unlock_one(sc, VM_MAXCPU - 1); + return (error); +} + +CTASSERT(sizeof(((struct vm_memseg *)0)->name) >= SPECNAMELEN + 1); + +static int +get_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) +{ + struct devmem_softc *dsc; + int error; + bool sysmem; + + error = vm_get_memseg(sc->vm, mseg->segid, &mseg->len, &sysmem, NULL); + if (error || mseg->len == 0) + return (error); + + if (!sysmem) { + SLIST_FOREACH(dsc, &sc->devmem, link) { + if (dsc->segid == mseg->segid) + break; + } + KASSERT(dsc != NULL, ("%s: devmem segment %d not found", + __func__, mseg->segid)); + error = copystr(dsc->name, mseg->name, SPECNAMELEN + 1, NULL); + } else { + bzero(mseg->name, sizeof(mseg->name)); + } + + return (error); +} + +static int +alloc_memseg(struct vmmdev_softc *sc, struct vm_memseg *mseg) +{ + char *name; + int error; + bool sysmem; + + error = 0; + name = NULL; + sysmem = true; + + if (VM_MEMSEG_NAME(mseg)) { + sysmem = false; + name = malloc(SPECNAMELEN + 1, M_VMMDEV, M_WAITOK); + error = copystr(VM_MEMSEG_NAME(mseg), name, SPECNAMELEN + 1, 0); + if (error) + goto done; + } + + error = vm_alloc_memseg(sc->vm, mseg->segid, mseg->len, sysmem); + if (error) + goto done; + + if (VM_MEMSEG_NAME(mseg)) { + error = devmem_create_cdev(vm_name(sc->vm), mseg->segid, name); + if (error) + vm_free_memseg(sc->vm, mseg->segid); + else + name = NULL; /* freed when 'cdev' is destroyed */ + } +done: + free(name, M_VMMDEV); return (error); } @@ -150,7 +286,6 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, int error, vcpu, state_changed, size; cpuset_t *cpuset; struct vmmdev_softc *sc; - struct vm_memory_segment *seg; struct vm_register *vmreg; struct vm_seg_desc *vmsegdesc; struct vm_run *vmrun; @@ -177,6 +312,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct vm_intinfo *vmii; struct vm_rtc_time *rtctime; struct vm_rtc_data *rtcdata; + struct vm_memmap *mm; sc = vmmdev_lookup2(cdev); if (sc == NULL) @@ -211,43 +347,41 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, * Assumes that the first field of the ioctl data is the vcpu. */ vcpu = *(int *)data; - if (vcpu < 0 || vcpu >= VM_MAXCPU) { - error = EINVAL; - goto done; - } - - error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); + error = vcpu_lock_one(sc, vcpu); if (error) goto done; - state_changed = 1; break; case VM_MAP_PPTDEV_MMIO: case VM_BIND_PPTDEV: case VM_UNBIND_PPTDEV: - case VM_MAP_MEMORY: + case VM_ALLOC_MEMSEG: + case VM_MMAP_MEMSEG: case VM_REINIT: /* * ioctls that operate on the entire virtual machine must * prevent all vcpus from running. */ - error = 0; - for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) { - error = vcpu_set_state(sc->vm, vcpu, VCPU_FROZEN, true); - if (error) - break; - } - - if (error) { - while (--vcpu >= 0) - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); + error = vcpu_lock_all(sc); + if (error) goto done; - } - state_changed = 2; break; + case VM_GET_MEMSEG: + case VM_MMAP_GETNEXT: + /* + * Lock a vcpu to make sure that the memory map cannot be + * modified while it is being inspected. + */ + vcpu = VM_MAXCPU - 1; + error = vcpu_lock_one(sc, vcpu); + if (error) + goto done; + state_changed = 1; + break; + default: break; } @@ -372,15 +506,21 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = vatpic_set_irq_trigger(sc->vm, isa_irq_trigger->atpic_irq, isa_irq_trigger->trigger); break; - case VM_MAP_MEMORY: - seg = (struct vm_memory_segment *)data; - error = vm_malloc(sc->vm, seg->gpa, seg->len); + case VM_MMAP_GETNEXT: + mm = (struct vm_memmap *)data; + error = vm_mmap_getnext(sc->vm, &mm->gpa, &mm->segid, + &mm->segoff, &mm->len, &mm->prot, &mm->flags); break; - case VM_GET_MEMORY_SEG: - seg = (struct vm_memory_segment *)data; - seg->len = 0; - (void)vm_gpabase2memseg(sc->vm, seg->gpa, seg); - error = 0; + case VM_MMAP_MEMSEG: + mm = (struct vm_memmap *)data; + error = vm_mmap_memseg(sc->vm, mm->gpa, mm->segid, mm->segoff, + mm->len, mm->prot, mm->flags); + break; + case VM_ALLOC_MEMSEG: + error = alloc_memseg(sc, (struct vm_memseg *)data); + break; + case VM_GET_MEMSEG: + error = get_memseg(sc, (struct vm_memseg *)data); break; case VM_GET_REGISTER: vmreg = (struct vm_register *)data; @@ -505,12 +645,10 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, break; } - if (state_changed == 1) { - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); - } else if (state_changed == 2) { - for (vcpu = 0; vcpu < VM_MAXCPU; vcpu++) - vcpu_set_state(sc->vm, vcpu, VCPU_IDLE, false); - } + if (state_changed == 1) + vcpu_unlock_one(sc, vcpu); + else if (state_changed == 2) + vcpu_unlock_all(sc); done: /* Make sure that no handler returns a bogus value like ERESTART */ @@ -519,26 +657,79 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, } static int -vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, - vm_size_t size, struct vm_object **object, int nprot) +vmmdev_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t mapsize, + struct vm_object **objp, int nprot) { - int error; struct vmmdev_softc *sc; + vm_paddr_t gpa; + size_t len; + vm_ooffset_t segoff, first, last; + int error, found, segid; + bool sysmem; + + first = *offset; + last = first + mapsize; + if ((nprot & PROT_EXEC) || first < 0 || first >= last) + return (EINVAL); sc = vmmdev_lookup2(cdev); - if (sc != NULL && (nprot & PROT_EXEC) == 0) - error = vm_get_memobj(sc->vm, *offset, size, offset, object); - else - error = EINVAL; + if (sc == NULL) { + /* virtual machine is in the process of being created */ + return (EINVAL); + } + /* + * Get a read lock on the guest memory map by freezing any vcpu. + */ + error = vcpu_lock_one(sc, VM_MAXCPU - 1); + if (error) + return (error); + + gpa = 0; + found = 0; + while (!found) { + error = vm_mmap_getnext(sc->vm, &gpa, &segid, &segoff, &len, + NULL, NULL); + if (error) + break; + + if (first >= gpa && last <= gpa + len) + found = 1; + else + gpa += len; + } + + if (found) { + error = vm_get_memseg(sc->vm, segid, &len, &sysmem, objp); + KASSERT(error == 0 && *objp != NULL, + ("%s: invalid memory segment %d", __func__, segid)); + if (sysmem) { + vm_object_reference(*objp); + *offset = segoff + (first - gpa); + } else { + error = EINVAL; + } + } + vcpu_unlock_one(sc, VM_MAXCPU - 1); return (error); } static void vmmdev_destroy(void *arg) { - struct vmmdev_softc *sc = arg; + struct devmem_softc *dsc; + int error; + + error = vcpu_lock_all(sc); + KASSERT(error == 0, ("%s: error %d freezing vcpus", __func__, error)); + + while ((dsc = SLIST_FIRST(&sc->devmem)) != NULL) { + KASSERT(dsc->cdev == NULL, ("%s: devmem not free", __func__)); + SLIST_REMOVE_HEAD(&sc->devmem, link); + free(dsc->name, M_VMMDEV); + free(dsc, M_VMMDEV); + } if (sc->cdev != NULL) destroy_dev(sc->cdev); @@ -560,6 +751,7 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) { int error; char buf[VM_MAX_NAMELEN]; + struct devmem_softc *dsc; struct vmmdev_softc *sc; struct cdev *cdev; @@ -578,22 +770,30 @@ sysctl_vmm_destroy(SYSCTL_HANDLER_ARGS) /* * The 'cdev' will be destroyed asynchronously when 'si_threadcount' * goes down to 0 so we should not do it again in the callback. + * + * Setting 'sc->cdev' to NULL is also used to indicate that the VM + * is scheduled for destruction. */ cdev = sc->cdev; sc->cdev = NULL; mtx_unlock(&vmmdev_mtx); /* - * Schedule the 'cdev' to be destroyed: + * Schedule all cdevs to be destroyed: * - * - any new operations on this 'cdev' will return an error (ENXIO). + * - any new operations on the 'cdev' will return an error (ENXIO). * * - when the 'si_threadcount' dwindles down to zero the 'cdev' will * be destroyed and the callback will be invoked in a taskqueue * context. + * + * - the 'devmem' cdevs are destroyed before the virtual machine 'cdev' */ + SLIST_FOREACH(dsc, &sc->devmem, link) { + KASSERT(dsc->cdev != NULL, ("devmem cdev already destroyed")); + destroy_dev_sched_cb(dsc->cdev, devmem_destroy, dsc); + } destroy_dev_sched_cb(cdev, vmmdev_destroy, sc); - return (0); } SYSCTL_PROC(_hw_vmm, OID_AUTO, destroy, CTLTYPE_STRING | CTLFLAG_RW, @@ -634,6 +834,7 @@ sysctl_vmm_create(SYSCTL_HANDLER_ARGS) sc = malloc(sizeof(struct vmmdev_softc), M_VMMDEV, M_WAITOK | M_ZERO); sc->vm = vm; + SLIST_INIT(&sc->devmem); /* * Lookup the name again just in case somebody sneaked in when we @@ -687,3 +888,96 @@ vmmdev_cleanup(void) return (error); } + +static int +devmem_mmap_single(struct cdev *cdev, vm_ooffset_t *offset, vm_size_t len, + struct vm_object **objp, int nprot) +{ + struct devmem_softc *dsc; + vm_ooffset_t first, last; + size_t seglen; + int error; + bool sysmem; + + dsc = cdev->si_drv1; + if (dsc == NULL) { + /* 'cdev' has been created but is not ready for use */ + return (ENXIO); + } + + first = *offset; + last = *offset + len; + if ((nprot & PROT_EXEC) || first < 0 || first >= last) + return (EINVAL); + + error = vcpu_lock_one(dsc->sc, VM_MAXCPU - 1); + if (error) + return (error); + + error = vm_get_memseg(dsc->sc->vm, dsc->segid, &seglen, &sysmem, objp); + KASSERT(error == 0 && !sysmem && *objp != NULL, + ("%s: invalid devmem segment %d", __func__, dsc->segid)); + + vcpu_unlock_one(dsc->sc, VM_MAXCPU - 1); + + if (seglen >= last) { + vm_object_reference(*objp); + return (0); + } else { + return (EINVAL); + } +} + +static struct cdevsw devmemsw = { + .d_name = "devmem", + .d_version = D_VERSION, + .d_mmap_single = devmem_mmap_single, +}; + +static int +devmem_create_cdev(const char *vmname, int segid, char *devname) +{ + struct devmem_softc *dsc; + struct vmmdev_softc *sc; + struct cdev *cdev; + int error; + + error = make_dev_p(MAKEDEV_CHECKNAME, &cdev, &devmemsw, NULL, + UID_ROOT, GID_WHEEL, 0600, "vmm/%s.%s", vmname, devname); + if (error) + return (error); + + dsc = malloc(sizeof(struct devmem_softc), M_VMMDEV, M_WAITOK | M_ZERO); + + mtx_lock(&vmmdev_mtx); + sc = vmmdev_lookup(vmname); + KASSERT(sc != NULL, ("%s: vm %s softc not found", __func__, vmname)); + if (sc->cdev == NULL) { + /* virtual machine is being created or destroyed */ + mtx_unlock(&vmmdev_mtx); + free(dsc, M_VMMDEV); + destroy_dev_sched_cb(cdev, NULL, 0); + return (ENODEV); + } + + dsc->segid = segid; + dsc->name = devname; + dsc->cdev = cdev; + dsc->sc = sc; + SLIST_INSERT_HEAD(&sc->devmem, dsc, link); + mtx_unlock(&vmmdev_mtx); + + /* The 'cdev' is ready for use after 'si_drv1' is initialized */ + cdev->si_drv1 = dsc; + return (0); +} + +static void +devmem_destroy(void *arg) +{ + struct devmem_softc *dsc = arg; + + KASSERT(dsc->cdev, ("%s: devmem cdev already destroyed", __func__)); + dsc->cdev = NULL; + dsc->sc = NULL; +} diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 758b7e80f77c..6dadcc18ce52 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -1677,12 +1677,12 @@ ptp_release(void **cookie) } static void * -ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie) +ptp_hold(struct vm *vm, int vcpu, vm_paddr_t ptpphys, size_t len, void **cookie) { void *ptr; ptp_release(cookie); - ptr = vm_gpa_hold(vm, ptpphys, len, VM_PROT_RW, cookie); + ptr = vm_gpa_hold(vm, vcpu, ptpphys, len, VM_PROT_RW, cookie); return (ptr); } @@ -1729,7 +1729,8 @@ vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, /* Zero out the lower 12 bits. */ ptpphys &= ~0xfff; - ptpbase32 = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); + ptpbase32 = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, + &cookie); if (ptpbase32 == NULL) goto error; @@ -1788,7 +1789,8 @@ vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, /* Zero out the lower 5 bits and the upper 32 bits */ ptpphys &= 0xffffffe0UL; - ptpbase = ptp_hold(vm, ptpphys, sizeof(*ptpbase) * 4, &cookie); + ptpbase = ptp_hold(vm, vcpuid, ptpphys, sizeof(*ptpbase) * 4, + &cookie); if (ptpbase == NULL) goto error; @@ -1811,7 +1813,7 @@ vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, /* Zero out the lower 12 bits and the upper 12 bits */ ptpphys >>= 12; ptpphys <<= 24; ptpphys >>= 12; - ptpbase = ptp_hold(vm, ptpphys, PAGE_SIZE, &cookie); + ptpbase = ptp_hold(vm, vcpuid, ptpphys, PAGE_SIZE, &cookie); if (ptpbase == NULL) goto error; diff --git a/sys/amd64/vmm/vmm_mem.c b/sys/amd64/vmm/vmm_mem.c index 1019f2b3614b..c9be6c9affdd 100644 --- a/sys/amd64/vmm/vmm_mem.c +++ b/sys/amd64/vmm/vmm_mem.c @@ -114,38 +114,6 @@ vmm_mmio_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len) vm_map_remove(&vmspace->vm_map, gpa, gpa + len); } -vm_object_t -vmm_mem_alloc(struct vmspace *vmspace, vm_paddr_t gpa, size_t len) -{ - int error; - vm_object_t obj; - - if (gpa & PAGE_MASK) - panic("vmm_mem_alloc: invalid gpa %#lx", gpa); - - if (len == 0 || (len & PAGE_MASK) != 0) - panic("vmm_mem_alloc: invalid allocation size %lu", len); - - obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT); - if (obj != NULL) { - error = vm_map_find(&vmspace->vm_map, obj, 0, &gpa, len, 0, - VMFS_NO_SPACE, VM_PROT_ALL, VM_PROT_ALL, 0); - if (error != KERN_SUCCESS) { - vm_object_deallocate(obj); - obj = NULL; - } - } - - return (obj); -} - -void -vmm_mem_free(struct vmspace *vmspace, vm_paddr_t gpa, size_t len) -{ - - vm_map_remove(&vmspace->vm_map, gpa, gpa + len); -} - vm_paddr_t vmm_mem_maxaddr(void) { diff --git a/sys/amd64/vmm/vmm_mem.h b/sys/amd64/vmm/vmm_mem.h index a375070e640b..7773faab22ed 100644 --- a/sys/amd64/vmm/vmm_mem.h +++ b/sys/amd64/vmm/vmm_mem.h @@ -33,10 +33,8 @@ struct vmspace; struct vm_object; int vmm_mem_init(void); -struct vm_object *vmm_mem_alloc(struct vmspace *, vm_paddr_t gpa, size_t size); struct vm_object *vmm_mmio_alloc(struct vmspace *, vm_paddr_t gpa, size_t len, vm_paddr_t hpa); -void vmm_mem_free(struct vmspace *, vm_paddr_t gpa, size_t size); void vmm_mmio_free(struct vmspace *, vm_paddr_t gpa, size_t size); vm_paddr_t vmm_mem_maxaddr(void); diff --git a/usr.sbin/bhyve/Makefile b/usr.sbin/bhyve/Makefile index bb81bcbaa7f0..be5cb33b423a 100644 --- a/usr.sbin/bhyve/Makefile +++ b/usr.sbin/bhyve/Makefile @@ -13,6 +13,7 @@ SRCS= \ acpi.c \ bhyverun.c \ block_if.c \ + bootrom.c \ consport.c \ dbgport.c \ inout.c \ diff --git a/usr.sbin/bhyve/bhyve.8 b/usr.sbin/bhyve/bhyve.8 index ee0f2caf001e..c5de82a74f36 100644 --- a/usr.sbin/bhyve/bhyve.8 +++ b/usr.sbin/bhyve/bhyve.8 @@ -32,7 +32,7 @@ .Nd "run a guest operating system inside a virtual machine" .Sh SYNOPSIS .Nm -.Op Fl abehuwxACHPWY +.Op Fl abehuwxACHPSWY .Op Fl c Ar numcpus .Op Fl g Ar gdbport .Op Fl l Ar lpcdev Ns Op , Ns Ar conf @@ -99,10 +99,12 @@ Yield the virtual CPU thread when a HLT instruction is detected. If this option is not specified, virtual CPUs will use 100% of a host CPU. .It Fl l Ar lpcdev Ns Op , Ns Ar conf Allow devices behind the LPC PCI-ISA bridge to be configured. -The only supported devices are the TTY-class devices, -.Li com1 +The only supported devices are the TTY-class devices +.Ar com1 and -.Li com2 . +.Ar com2 +and the boot ROM device +.Ar bootrom . .It Fl m Ar size Ns Op Ar K|k|M|m|G|g|T|t Guest physical memory size in bytes. This must be the same size that was given to @@ -165,8 +167,8 @@ AHCI controller attached to a SATA hard-drive. .It Li uart PCI 16550 serial device. .It Li lpc -LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports. The LPC bridge -emulation can only be configured on bus 0. +LPC PCI-ISA bridge with COM1 and COM2 16550 serial ports and a boot ROM. +The LPC bridge emulation can only be configured on bus 0. .El .It Op Ar conf This optional parameter describes the backend for device emulations. @@ -224,6 +226,14 @@ the bhyve process. Use the host TTY device for serial port I/O. .El .Pp +Boot ROM device: +.Bl -tag -width 10n +.It Pa romfile +Map +.Ar romfile +in the guest address space reserved for boot firmware. +.El +.Pp Pass-through devices: .Bl -tag -width 10n .It Ns Ar slot Ns / Ns Ar bus Ns / Ns Ar function @@ -235,11 +245,17 @@ and numbers. .El .Pp +Guest memory must be wired using the +.Fl S +option when a pass-through device is configured. +.Pp The host device must have been reserved at boot-time using the .Va pptdev loader variable as described in .Xr vmm 4 . .El +.It Fl S +Wire guest memory. .It Fl u RTC keeps UTC time. .It Fl U Ar uuid diff --git a/usr.sbin/bhyve/bhyverun.c b/usr.sbin/bhyve/bhyverun.c index ee0f1060a92c..cfb3fe44b726 100644 --- a/usr.sbin/bhyve/bhyverun.c +++ b/usr.sbin/bhyve/bhyverun.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -122,7 +123,7 @@ usage(int code) { fprintf(stderr, - "Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g ] [-l ]\n" + "Usage: %s [-abehuwxACHPSWY] [-c vcpus] [-g ] [-l ]\n" " %*s [-m mem] [-p vcpu:hostcpu] [-s ] [-U uuid] \n" " -a: local apic is in xAPIC mode (deprecated)\n" " -A: create ACPI tables\n" @@ -137,6 +138,7 @@ usage(int code) " -p: pin 'vcpu' to 'hostcpu'\n" " -P: vmexit from the guest on pause\n" " -s: PCI slot config\n" + " -S: guest memory cannot be swapped\n" " -u: RTC keeps UTC time\n" " -U: uuid\n" " -w: ignore unimplemented MSRs\n" @@ -700,26 +702,82 @@ fbsdrun_set_capabilities(struct vmctx *ctx, int cpu) vm_set_capability(ctx, cpu, VM_CAP_ENABLE_INVPCID, 1); } +static struct vmctx * +do_open(const char *vmname) +{ + struct vmctx *ctx; + int error; + bool reinit, romboot; + + reinit = romboot = false; + + if (lpc_bootrom()) + romboot = true; + + error = vm_create(vmname); + if (error) { + if (errno == EEXIST) { + if (romboot) { + reinit = true; + } else { + /* + * The virtual machine has been setup by the + * userspace bootloader. + */ + } + } else { + perror("vm_create"); + exit(1); + } + } else { + if (!romboot) { + /* + * If the virtual machine was just created then a + * bootrom must be configured to boot it. + */ + fprintf(stderr, "virtual machine cannot be booted\n"); + exit(1); + } + } + + ctx = vm_open(vmname); + if (ctx == NULL) { + perror("vm_open"); + exit(1); + } + + if (reinit) { + error = vm_reinit(ctx); + if (error) { + perror("vm_reinit"); + exit(1); + } + } + return (ctx); +} + int main(int argc, char *argv[]) { int c, error, gdb_port, err, bvmcons; - int dump_guest_memory, max_vcpus, mptgen; + int max_vcpus, mptgen, memflags; int rtc_localtime; struct vmctx *ctx; uint64_t rip; size_t memsize; + char *optstr; bvmcons = 0; - dump_guest_memory = 0; progname = basename(argv[0]); gdb_port = 0; guest_ncpus = 1; memsize = 256 * MB; mptgen = 1; rtc_localtime = 1; + memflags = 0; - while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) { + optstr = "abehuwxACHIPSWYp:g:c:s:m:l:U:"; + while ((c = getopt(argc, argv, optstr)) != -1) { switch (c) { case 'a': x2apic_mode = 0; @@ -740,7 +798,7 @@ main(int argc, char *argv[]) guest_ncpus = atoi(optarg); break; case 'C': - dump_guest_memory = 1; + memflags |= VM_MEM_F_INCORE; break; case 'g': gdb_port = atoi(optarg); @@ -756,6 +814,9 @@ main(int argc, char *argv[]) exit(1); else break; + case 'S': + memflags |= VM_MEM_F_WIRED; + break; case 'm': error = vm_parse_memsize(optarg, &memsize); if (error) @@ -810,12 +871,7 @@ main(int argc, char *argv[]) usage(1); vmname = argv[0]; - - ctx = vm_open(vmname); - if (ctx == NULL) { - perror("vm_open"); - exit(1); - } + ctx = do_open(vmname); if (guest_ncpus < 1) { fprintf(stderr, "Invalid guest vCPUs (%d)\n", guest_ncpus); @@ -831,11 +887,10 @@ main(int argc, char *argv[]) fbsdrun_set_capabilities(ctx, BSP); - if (dump_guest_memory) - vm_set_memflags(ctx, VM_MEM_F_INCORE); + vm_set_memflags(ctx, memflags); err = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (err) { - fprintf(stderr, "Unable to setup memory (%d)\n", err); + fprintf(stderr, "Unable to setup memory (%d)\n", errno); exit(1); } @@ -865,6 +920,16 @@ main(int argc, char *argv[]) if (bvmcons) init_bvmcons(); + if (lpc_bootrom()) { + if (vm_set_capability(ctx, BSP, VM_CAP_UNRESTRICTED_GUEST, 1)) { + fprintf(stderr, "ROM boot failed: unrestricted guest " + "capability not available\n"); + exit(1); + } + error = vcpu_reset(ctx, BSP); + assert(error == 0); + } + error = vm_get_register(ctx, BSP, VM_REG_GUEST_RIP, &rip); assert(error == 0); diff --git a/usr.sbin/bhyve/bootrom.c b/usr.sbin/bhyve/bootrom.c new file mode 100644 index 000000000000..5e4e0e93a2f4 --- /dev/null +++ b/usr.sbin/bhyve/bootrom.c @@ -0,0 +1,111 @@ +/*- + * Copyright (c) 2015 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include "bhyverun.h" +#include "bootrom.h" + +#define MAX_BOOTROM_SIZE (16 * 1024 * 1024) /* 16 MB */ + +int +bootrom_init(struct vmctx *ctx, const char *romfile) +{ + struct stat sbuf; + vm_paddr_t gpa; + ssize_t rlen; + char *ptr; + int fd, i, rv, prot; + + rv = -1; + fd = open(romfile, O_RDONLY); + if (fd < 0) { + fprintf(stderr, "Error opening bootrom \"%s\": %s\n", + romfile, strerror(errno)); + goto done; + } + + if (fstat(fd, &sbuf) < 0) { + fprintf(stderr, "Could not fstat bootrom file \"%s\": %s\n", + romfile, strerror(errno)); + goto done; + } + + /* + * Limit bootrom size to 16MB so it doesn't encroach into reserved + * MMIO space (e.g. APIC, HPET, MSI). + */ + if (sbuf.st_size > MAX_BOOTROM_SIZE || sbuf.st_size < PAGE_SIZE) { + fprintf(stderr, "Invalid bootrom size %ld\n", sbuf.st_size); + goto done; + } + + if (sbuf.st_size & PAGE_MASK) { + fprintf(stderr, "Bootrom size %ld is not a multiple of the " + "page size\n", sbuf.st_size); + goto done; + } + + ptr = vm_create_devmem(ctx, VM_BOOTROM, "bootrom", sbuf.st_size); + if (ptr == MAP_FAILED) + goto done; + + /* Map the bootrom into the guest address space */ + prot = PROT_READ | PROT_EXEC; + gpa = (1ULL << 32) - sbuf.st_size; + if (vm_mmap_memseg(ctx, gpa, VM_BOOTROM, 0, sbuf.st_size, prot) != 0) + goto done; + + /* Read 'romfile' into the guest address space */ + for (i = 0; i < sbuf.st_size / PAGE_SIZE; i++) { + rlen = read(fd, ptr + i * PAGE_SIZE, PAGE_SIZE); + if (rlen != PAGE_SIZE) { + fprintf(stderr, "Incomplete read of page %d of bootrom " + "file %s: %ld bytes\n", i, romfile, rlen); + goto done; + } + } + rv = 0; +done: + if (fd >= 0) + close(fd); + return (rv); +} diff --git a/usr.sbin/bhyve/bootrom.h b/usr.sbin/bhyve/bootrom.h new file mode 100644 index 000000000000..af150d3255d1 --- /dev/null +++ b/usr.sbin/bhyve/bootrom.h @@ -0,0 +1,38 @@ +/*- + * Copyright (c) 2015 Neel Natu + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _BOOTROM_H_ +#define _BOOTROM_H_ + +#include + +struct vmctx; + +int bootrom_init(struct vmctx *ctx, const char *romfile); + +#endif diff --git a/usr.sbin/bhyve/pci_lpc.c b/usr.sbin/bhyve/pci_lpc.c index e98b1411dfcd..2203a00baa8f 100644 --- a/usr.sbin/bhyve/pci_lpc.c +++ b/usr.sbin/bhyve/pci_lpc.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include "acpi.h" +#include "bootrom.h" #include "inout.h" #include "pci_emul.h" #include "pci_irq.h" @@ -62,6 +63,8 @@ SYSRES_IO(NMISC_PORT, 1); static struct pci_devinst *lpc_bridge; +static const char *romfile; + #define LPC_UART_NUM 2 static struct lpc_uart_softc { struct uart_softc *uart_softc; @@ -76,7 +79,7 @@ static const char *lpc_uart_names[LPC_UART_NUM] = { "COM1", "COM2" }; /* * LPC device configuration is in the following form: * [,] - * For e.g. "com1,stdio" + * For e.g. "com1,stdio" or "bootrom,/var/romfile" */ int lpc_device_parse(const char *opts) @@ -88,6 +91,11 @@ lpc_device_parse(const char *opts) str = cpy = strdup(opts); lpcdev = strsep(&str, ","); if (lpcdev != NULL) { + if (strcasecmp(lpcdev, "bootrom") == 0) { + romfile = str; + error = 0; + goto done; + } for (unit = 0; unit < LPC_UART_NUM; unit++) { if (strcasecmp(lpcdev, lpc_uart_names[unit]) == 0) { lpc_uart_softc[unit].opts = str; @@ -104,6 +112,13 @@ lpc_device_parse(const char *opts) return (error); } +const char * +lpc_bootrom(void) +{ + + return (romfile); +} + static void lpc_uart_intr_assert(void *arg) { @@ -156,13 +171,19 @@ lpc_uart_io_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes, } static int -lpc_init(void) +lpc_init(struct vmctx *ctx) { struct lpc_uart_softc *sc; struct inout_port iop; const char *name; int unit, error; + if (romfile != NULL) { + error = bootrom_init(ctx, romfile); + if (error) + return (error); + } + /* COM1 and COM2 */ for (unit = 0; unit < LPC_UART_NUM; unit++) { sc = &lpc_uart_softc[unit]; @@ -379,7 +400,7 @@ pci_lpc_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) return (-1); } - if (lpc_init() != 0) + if (lpc_init(ctx) != 0) return (-1); /* initialize config space */ diff --git a/usr.sbin/bhyve/pci_lpc.h b/usr.sbin/bhyve/pci_lpc.h index 55a58653f422..431f5cffd19f 100644 --- a/usr.sbin/bhyve/pci_lpc.h +++ b/usr.sbin/bhyve/pci_lpc.h @@ -68,5 +68,6 @@ struct lpc_sysres { int lpc_device_parse(const char *opt); char *lpc_pirq_name(int pin); void lpc_pirq_routed(void); +const char *lpc_bootrom(void); #endif diff --git a/usr.sbin/bhyve/pci_passthru.c b/usr.sbin/bhyve/pci_passthru.c index 04d68c4c15ce..5b52b05b5998 100644 --- a/usr.sbin/bhyve/pci_passthru.c +++ b/usr.sbin/bhyve/pci_passthru.c @@ -548,12 +548,18 @@ cfginit(struct vmctx *ctx, struct pci_devinst *pi, int bus, int slot, int func) static int passthru_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) { - int bus, slot, func, error; + int bus, slot, func, error, memflags; struct passthru_softc *sc; sc = NULL; error = 1; + memflags = vm_get_memflags(ctx); + if (!(memflags & VM_MEM_F_WIRED)) { + fprintf(stderr, "passthru requires guest memory to be wired\n"); + goto done; + } + if (pcifd < 0) { pcifd = open(_PATH_DEVPCI, O_RDWR, 0); if (pcifd < 0) diff --git a/usr.sbin/bhyvectl/Makefile b/usr.sbin/bhyvectl/Makefile index dba3f12efe00..4a33dee94676 100644 --- a/usr.sbin/bhyvectl/Makefile +++ b/usr.sbin/bhyvectl/Makefile @@ -7,7 +7,7 @@ SRCS= bhyvectl.c MAN= -LIBADD= vmmapi +LIBADD= vmmapi util WARNS?= 3 diff --git a/usr.sbin/bhyvectl/bhyvectl.c b/usr.sbin/bhyvectl/bhyvectl.c index 7d3017fed31c..51bade17e361 100644 --- a/usr.sbin/bhyvectl/bhyvectl.c +++ b/usr.sbin/bhyvectl/bhyvectl.c @@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -47,10 +48,12 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include -#include #include +#include +#include #include #include "amd/vmcb.h" @@ -236,7 +239,7 @@ static int get_stats, getcap, setcap, capval, get_gpa_pmap; static int inject_nmi, assert_lapic_lvt; static int force_reset, force_poweroff; static const char *capname; -static int create, destroy, get_lowmem, get_highmem; +static int create, destroy, get_memmap, get_memseg; static int get_intinfo; static int get_active_cpus, get_suspended_cpus; static uint64_t memsize; @@ -1320,8 +1323,8 @@ setup_options(bool cpu_intel) { "get-desc-gdtr", NO_ARG, &get_desc_gdtr, 1 }, { "set-desc-idtr", NO_ARG, &set_desc_idtr, 1 }, { "get-desc-idtr", NO_ARG, &get_desc_idtr, 1 }, - { "get-lowmem", NO_ARG, &get_lowmem, 1 }, - { "get-highmem",NO_ARG, &get_highmem, 1 }, + { "get-memmap", NO_ARG, &get_memmap, 1 }, + { "get-memseg", NO_ARG, &get_memseg, 1 }, { "get-efer", NO_ARG, &get_efer, 1 }, { "get-cr0", NO_ARG, &get_cr0, 1 }, { "get-cr3", NO_ARG, &get_cr3, 1 }, @@ -1520,18 +1523,92 @@ mon_str(int idx) return ("UNK"); } +static int +show_memmap(struct vmctx *ctx) +{ + char name[SPECNAMELEN + 1], numbuf[8]; + vm_ooffset_t segoff; + vm_paddr_t gpa; + size_t maplen, seglen; + int error, flags, prot, segid, delim; + + printf("Address Length Segment Offset "); + printf("Prot Flags\n"); + + gpa = 0; + while (1) { + error = vm_mmap_getnext(ctx, &gpa, &segid, &segoff, &maplen, + &prot, &flags); + if (error) + return (errno == ENOENT ? 0 : error); + + error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name)); + if (error) + return (error); + + printf("%-12lX", gpa); + humanize_number(numbuf, sizeof(numbuf), maplen, "B", + HN_AUTOSCALE, HN_NOSPACE); + printf("%-12s", numbuf); + + printf("%-12s", name[0] ? name : "sysmem"); + printf("%-12lX", segoff); + printf("%c%c%c ", prot & PROT_READ ? 'R' : '-', + prot & PROT_WRITE ? 'W' : '-', + prot & PROT_EXEC ? 'X' : '-'); + + delim = '\0'; + if (flags & VM_MEMMAP_F_WIRED) { + printf("%cwired", delim); + delim = '/'; + } + if (flags & VM_MEMMAP_F_IOMMU) { + printf("%ciommu", delim); + delim = '/'; + } + printf("\n"); + + gpa += maplen; + } +} + +static int +show_memseg(struct vmctx *ctx) +{ + char name[SPECNAMELEN + 1], numbuf[8]; + size_t seglen; + int error, segid; + + printf("ID Length Name\n"); + + segid = 0; + while (1) { + error = vm_get_memseg(ctx, segid, &seglen, name, sizeof(name)); + if (error) + return (errno == EINVAL ? 0 : error); + + if (seglen) { + printf("%-4d", segid); + humanize_number(numbuf, sizeof(numbuf), seglen, "B", + HN_AUTOSCALE, HN_NOSPACE); + printf("%-12s", numbuf); + printf("%s", name[0] ? name : "sysmem"); + printf("\n"); + } + segid++; + } +} + int main(int argc, char *argv[]) { char *vmname; int error, ch, vcpu, ptenum; - vm_paddr_t gpa, gpa_pmap; - size_t len; + vm_paddr_t gpa_pmap; struct vm_exit vmexit; uint64_t rax, cr0, cr3, cr4, dr7, rsp, rip, rflags, efer, pat; uint64_t eptp, bm, addr, u64, pteval[4], *pte, info[2]; struct vmctx *ctx; - int wired; cpuset_t cpus; bool cpu_intel; uint64_t cs, ds, es, fs, gs, ss, tr, ldtr; @@ -1703,7 +1780,7 @@ main(int argc, char *argv[]) } if (!error && memsize) - error = vm_setup_memory(ctx, memsize, VM_MMAP_NONE); + error = vm_setup_memory(ctx, memsize, VM_MMAP_ALL); if (!error && set_efer) error = vm_set_register(ctx, vcpu, VM_REG_GUEST_EFER, efer); @@ -1838,21 +1915,11 @@ main(int argc, char *argv[]) error = vm_lapic_local_irq(ctx, vcpu, assert_lapic_lvt); } - if (!error && (get_lowmem || get_all)) { - gpa = 0; - error = vm_get_memory_seg(ctx, gpa, &len, &wired); - if (error == 0) - printf("lowmem\t\t0x%016lx/%ld%s\n", gpa, len, - wired ? " wired" : ""); - } + if (!error && (get_memseg || get_all)) + error = show_memseg(ctx); - if (!error && (get_highmem || get_all)) { - gpa = 4 * GB; - error = vm_get_memory_seg(ctx, gpa, &len, &wired); - if (error == 0) - printf("highmem\t\t0x%016lx/%ld%s\n", gpa, len, - wired ? " wired" : ""); - } + if (!error && (get_memmap || get_all)) + error = show_memmap(ctx); if (!error) error = get_all_registers(ctx, vcpu); diff --git a/usr.sbin/bhyveload/bhyveload.8 b/usr.sbin/bhyveload/bhyveload.8 index c1688322cc75..fc9c8e173cd2 100644 --- a/usr.sbin/bhyveload/bhyveload.8 +++ b/usr.sbin/bhyveload/bhyveload.8 @@ -35,6 +35,7 @@ guest inside a bhyve virtual machine .Sh SYNOPSIS .Nm +.Op Fl S .Op Fl c Ar cons-dev .Op Fl d Ar disk-path .Op Fl e Ar name=value @@ -111,8 +112,10 @@ respectively. The default value of .Ar mem-size is 256M. -.El +.It Fl S +Wire guest memory. .Sh EXAMPLES +.El To create a virtual machine named .Ar freebsd-vm that boots off the ISO image diff --git a/usr.sbin/bhyveload/bhyveload.c b/usr.sbin/bhyveload/bhyveload.c index 8ebf1167be9e..8178bb219910 100644 --- a/usr.sbin/bhyveload/bhyveload.c +++ b/usr.sbin/bhyveload/bhyveload.c @@ -629,7 +629,7 @@ usage(void) { fprintf(stderr, - "usage: %s [-c ] [-d ] [-e ]\n" + "usage: %s [-S][-c ] [-d ] [-e ]\n" " %*s [-h ] [-m mem-size] \n", progname, (int)strlen(progname), ""); @@ -642,16 +642,17 @@ main(int argc, char** argv) void *h; void (*func)(struct loader_callbacks *, void *, int, int); uint64_t mem_size; - int opt, error, need_reinit; + int opt, error, need_reinit, memflags; progname = basename(argv[0]); + memflags = 0; mem_size = 256 * MB; consin_fd = STDIN_FILENO; consout_fd = STDOUT_FILENO; - while ((opt = getopt(argc, argv, "c:d:e:h:m:")) != -1) { + while ((opt = getopt(argc, argv, "Sc:d:e:h:m:")) != -1) { switch (opt) { case 'c': error = altcons_open(optarg); @@ -678,6 +679,9 @@ main(int argc, char** argv) if (error != 0) errx(EX_USAGE, "Invalid memsize '%s'", optarg); break; + case 'S': + memflags |= VM_MEM_F_WIRED; + break; case '?': usage(); } @@ -715,6 +719,7 @@ main(int argc, char** argv) } } + vm_set_memflags(ctx, memflags); error = vm_setup_memory(ctx, mem_size, VM_MMAP_ALL); if (error) { perror("vm_setup_memory");