Enable memory overcommit for AMD processors.
- No emulation of A/D bits is required since AMD-V RVI supports A/D bits. - Enable pmap PT_RVI support(w/o PAT) which is required for memory over-commit support. - Other minor fixes: * Make use of VMCB EXITINTINFO field. If a #VMEXIT happens while delivering an interrupt, EXITINTINFO has all the details that bhyve needs to inject the same interrupt. * SVM h/w decode assist code was incomplete - removed for now. * Some minor code clean-up (more coming). Submitted by: Anish Gupta (akgupt3@gmail.com)
This commit is contained in:
parent
d8ced94511
commit
a0b78f096a
@ -145,6 +145,13 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/smp.h>
|
||||
#endif
|
||||
|
||||
static __inline boolean_t
|
||||
pmap_type_guest(pmap_t pmap)
|
||||
{
|
||||
|
||||
return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI));
|
||||
}
|
||||
|
||||
static __inline boolean_t
|
||||
pmap_emulate_ad_bits(pmap_t pmap)
|
||||
{
|
||||
@ -159,6 +166,7 @@ pmap_valid_bit(pmap_t pmap)
|
||||
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
case PT_RVI:
|
||||
mask = X86_PG_V;
|
||||
break;
|
||||
case PT_EPT:
|
||||
@ -181,6 +189,7 @@ pmap_rw_bit(pmap_t pmap)
|
||||
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
case PT_RVI:
|
||||
mask = X86_PG_RW;
|
||||
break;
|
||||
case PT_EPT:
|
||||
@ -205,6 +214,7 @@ pmap_global_bit(pmap_t pmap)
|
||||
case PT_X86:
|
||||
mask = X86_PG_G;
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
mask = 0;
|
||||
break;
|
||||
@ -222,6 +232,7 @@ pmap_accessed_bit(pmap_t pmap)
|
||||
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
case PT_RVI:
|
||||
mask = X86_PG_A;
|
||||
break;
|
||||
case PT_EPT:
|
||||
@ -244,6 +255,7 @@ pmap_modified_bit(pmap_t pmap)
|
||||
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
case PT_RVI:
|
||||
mask = X86_PG_M;
|
||||
break;
|
||||
case PT_EPT:
|
||||
@ -1094,6 +1106,9 @@ pmap_swap_pat(pmap_t pmap, pt_entry_t entry)
|
||||
if ((entry & x86_pat_bits) != 0)
|
||||
entry ^= x86_pat_bits;
|
||||
break;
|
||||
case PT_RVI:
|
||||
/* XXX: PAT support. */
|
||||
break;
|
||||
case PT_EPT:
|
||||
/*
|
||||
* Nothing to do - the memory attributes are represented
|
||||
@ -1137,6 +1152,11 @@ pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde)
|
||||
cache_bits |= PG_NC_PWT;
|
||||
break;
|
||||
|
||||
case PT_RVI:
|
||||
/* XXX: PAT support. */
|
||||
cache_bits = 0;
|
||||
break;
|
||||
|
||||
case PT_EPT:
|
||||
cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode);
|
||||
break;
|
||||
@ -1157,6 +1177,10 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde)
|
||||
case PT_X86:
|
||||
mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE;
|
||||
break;
|
||||
case PT_RVI:
|
||||
/* XXX: PAT support. */
|
||||
mask = 0;
|
||||
break;
|
||||
case PT_EPT:
|
||||
mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7);
|
||||
break;
|
||||
@ -1181,6 +1205,7 @@ pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde)
|
||||
switch (pmap->pm_type) {
|
||||
case PT_X86:
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
/*
|
||||
* XXX
|
||||
@ -1216,9 +1241,9 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde)
|
||||
{
|
||||
pt_entry_t PG_G;
|
||||
|
||||
if (pmap->pm_type == PT_EPT)
|
||||
if (pmap_type_guest(pmap))
|
||||
return;
|
||||
|
||||
|
||||
KASSERT(pmap->pm_type == PT_X86,
|
||||
("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type));
|
||||
|
||||
@ -1331,11 +1356,11 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
|
||||
cpuset_t other_cpus;
|
||||
u_int cpuid;
|
||||
|
||||
if (pmap->pm_type == PT_EPT) {
|
||||
if (pmap_type_guest(pmap)) {
|
||||
pmap_invalidate_ept(pmap);
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
KASSERT(pmap->pm_type == PT_X86,
|
||||
("pmap_invalidate_page: invalid type %d", pmap->pm_type));
|
||||
|
||||
@ -1409,7 +1434,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
vm_offset_t addr;
|
||||
u_int cpuid;
|
||||
|
||||
if (pmap->pm_type == PT_EPT) {
|
||||
if (pmap_type_guest(pmap)) {
|
||||
pmap_invalidate_ept(pmap);
|
||||
return;
|
||||
}
|
||||
@ -1468,7 +1493,7 @@ pmap_invalidate_all(pmap_t pmap)
|
||||
uint64_t cr3;
|
||||
u_int cpuid;
|
||||
|
||||
if (pmap->pm_type == PT_EPT) {
|
||||
if (pmap_type_guest(pmap)) {
|
||||
pmap_invalidate_ept(pmap);
|
||||
return;
|
||||
}
|
||||
@ -1588,7 +1613,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
|
||||
cpuid = PCPU_GET(cpuid);
|
||||
other_cpus = all_cpus;
|
||||
CPU_CLR(cpuid, &other_cpus);
|
||||
if (pmap == kernel_pmap || pmap->pm_type == PT_EPT)
|
||||
if (pmap == kernel_pmap || pmap_type_guest(pmap))
|
||||
active = all_cpus;
|
||||
else {
|
||||
active = pmap->pm_active;
|
||||
@ -1626,6 +1651,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
|
||||
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
|
||||
invlpg(va);
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
pmap->pm_eptgen++;
|
||||
break;
|
||||
@ -1645,6 +1671,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
|
||||
for (addr = sva; addr < eva; addr += PAGE_SIZE)
|
||||
invlpg(addr);
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
pmap->pm_eptgen++;
|
||||
break;
|
||||
@ -1662,6 +1689,7 @@ pmap_invalidate_all(pmap_t pmap)
|
||||
if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
|
||||
invltlb();
|
||||
break;
|
||||
case PT_RVI:
|
||||
case PT_EPT:
|
||||
pmap->pm_eptgen++;
|
||||
break;
|
||||
|
@ -37,136 +37,6 @@ __FBSDID("$FreeBSD$");
|
||||
#include <machine/vmm.h>
|
||||
#include "io/iommu.h"
|
||||
|
||||
static int
|
||||
amdv_init(void)
|
||||
{
|
||||
|
||||
printf("amdv_init: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_cleanup(void)
|
||||
{
|
||||
|
||||
printf("amdv_cleanup: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void *
|
||||
amdv_vminit(struct vm *vm, struct pmap *pmap)
|
||||
{
|
||||
|
||||
printf("amdv_vminit: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap)
|
||||
{
|
||||
|
||||
printf("amdv_vmrun: not implemented\n");
|
||||
return (ENXIO);
|
||||
}
|
||||
|
||||
static void
|
||||
amdv_vmcleanup(void *arg)
|
||||
{
|
||||
|
||||
printf("amdv_vmcleanup: not implemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val)
|
||||
{
|
||||
|
||||
printf("amdv_setreg: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc)
|
||||
{
|
||||
|
||||
printf("amdv_get_desc: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_inject_event(void *vmi, int vcpu, int type, int vector,
|
||||
uint32_t error_code, int error_code_valid)
|
||||
{
|
||||
|
||||
printf("amdv_inject_event: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_getcap(void *arg, int vcpu, int type, int *retval)
|
||||
{
|
||||
|
||||
printf("amdv_getcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static int
|
||||
amdv_setcap(void *arg, int vcpu, int type, int val)
|
||||
{
|
||||
|
||||
printf("amdv_setcap: not implemented\n");
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
static struct vmspace *
|
||||
amdv_vmspace_alloc(vm_offset_t min, vm_offset_t max)
|
||||
{
|
||||
|
||||
printf("amdv_vmspace_alloc: not implemented\n");
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
amdv_vmspace_free(struct vmspace *vmspace)
|
||||
{
|
||||
|
||||
printf("amdv_vmspace_free: not implemented\n");
|
||||
return;
|
||||
}
|
||||
|
||||
struct vmm_ops vmm_ops_amd = {
|
||||
amdv_init,
|
||||
amdv_cleanup,
|
||||
amdv_vminit,
|
||||
amdv_vmrun,
|
||||
amdv_vmcleanup,
|
||||
amdv_getreg,
|
||||
amdv_setreg,
|
||||
amdv_getdesc,
|
||||
amdv_setdesc,
|
||||
amdv_inject_event,
|
||||
amdv_getcap,
|
||||
amdv_setcap,
|
||||
amdv_vmspace_alloc,
|
||||
amdv_vmspace_free,
|
||||
};
|
||||
|
||||
static int
|
||||
amd_iommu_init(void)
|
||||
{
|
||||
|
@ -28,11 +28,13 @@
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/kernel.h>
|
||||
#include <sys/systm.h>
|
||||
#include <sys/malloc.h>
|
||||
#include <sys/sysctl.h>
|
||||
|
||||
#include <vm/vm.h>
|
||||
#include <vm/pmap.h>
|
||||
#include <vm/vm_extern.h>
|
||||
|
||||
#include <machine/pmap.h>
|
||||
#include <machine/md_var.h>
|
||||
@ -44,268 +46,46 @@ __FBSDID("$FreeBSD$");
|
||||
#include "svm_softc.h"
|
||||
#include "npt.h"
|
||||
|
||||
/*
|
||||
* "Nested Paging" is an optional SVM feature that provides two levels of
|
||||
* address translation, thus eliminating the need for the VMM to maintain
|
||||
* shadow page tables.
|
||||
*
|
||||
* Documented in APMv2, section 15.25, Nested Paging.
|
||||
*/
|
||||
|
||||
#define PAGE_4KB (4 * 1024)
|
||||
#define PAGE_2MB (2 * 1024 * 1024UL)
|
||||
#define PAGE_1GB (1024 * 1024 * 1024UL)
|
||||
|
||||
#define GPA_UNMAPPED ((vm_paddr_t)~0)
|
||||
|
||||
/* Get page entry to physical address. */
|
||||
#define PTE2PA(x) ((uint64_t)(x) & ~PAGE_MASK)
|
||||
|
||||
MALLOC_DECLARE(M_SVM);
|
||||
|
||||
static uint64_t svm_npt_create(pml4_entry_t *pml4, vm_paddr_t gpa,
|
||||
vm_paddr_t hpa, vm_memattr_t attr,
|
||||
int prot, uint64_t size);
|
||||
|
||||
static const int PT_INDEX_MASK = 0x1FF;
|
||||
static const int PT_SHIFT = 9;
|
||||
|
||||
/*
|
||||
* Helper function to create nested page table entries for a page
|
||||
* of size 1GB, 2MB or 4KB.
|
||||
*
|
||||
* Starting from PML4 create a PDPTE, PDE or PTE depending on 'pg_size'
|
||||
* value of 1GB, 2MB or 4KB respectively.
|
||||
*
|
||||
* Return size of the mapping created on success and 0 on failure.
|
||||
*
|
||||
* XXX: NPT PAT settings.
|
||||
*/
|
||||
static uint64_t
|
||||
svm_npt_create(pml4_entry_t * pml4, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
vm_memattr_t attr, int prot, uint64_t pg_size)
|
||||
{
|
||||
uint64_t *pt, *page, pa;
|
||||
pt_entry_t mode;
|
||||
int shift, index;
|
||||
|
||||
KASSERT(pg_size, ("Size of page must be 1GB, 2MB or 4KB"));
|
||||
if (hpa & (pg_size - 1)) {
|
||||
ERR("HPA(0x%lx) is not aligned, size:0x%lx\n", hpa, pg_size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
if (gpa & (pg_size - 1)) {
|
||||
ERR("GPA(0x%lx) is not aligned, size (0x%lx)\n", gpa, pg_size);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Find out mode bits for PTE */
|
||||
mode = PG_U | PG_V;
|
||||
if (prot & VM_PROT_WRITE)
|
||||
mode |= PG_RW;
|
||||
if ((prot & VM_PROT_EXECUTE) == 0)
|
||||
mode |= pg_nx;
|
||||
|
||||
pt = (uint64_t *)pml4;
|
||||
shift = PML4SHIFT;
|
||||
|
||||
while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) {
|
||||
/* Get PDP, PD or PT index from guest physical address. */
|
||||
index = (gpa >> shift) & PT_INDEX_MASK;
|
||||
|
||||
/* If page entry is missing, allocate new page for table.*/
|
||||
if (pt[index] == 0) {
|
||||
page = malloc(PAGE_SIZE, M_SVM, M_WAITOK | M_ZERO);
|
||||
pt[index] = vtophys(page) | mode;
|
||||
}
|
||||
|
||||
pa = PTE2PA(pt[index]);;
|
||||
pt = (uint64_t *)PHYS_TO_DMAP(pa);
|
||||
shift -= PT_SHIFT;
|
||||
}
|
||||
|
||||
/* Create leaf entry mapping. */
|
||||
index = (gpa >> shift) & PT_INDEX_MASK;
|
||||
|
||||
if (prot != VM_PROT_NONE) {
|
||||
pt[index] = hpa | mode;
|
||||
pt[index] |= (pg_size > PAGE_SIZE) ? PG_PS : 0;
|
||||
} else
|
||||
pt[index] = 0;
|
||||
|
||||
return (1UL << shift);
|
||||
}
|
||||
|
||||
/*
|
||||
* Map guest physical address to host physical address.
|
||||
*/
|
||||
int
|
||||
svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
size_t size, vm_memattr_t attr, int prot, boolean_t spok)
|
||||
{
|
||||
pml4_entry_t *pml4;
|
||||
struct svm_softc *svm_sc;
|
||||
uint64_t len, mapped, pg_size;
|
||||
|
||||
svm_sc = arg;
|
||||
pml4 = svm_sc->np_pml4;
|
||||
|
||||
pg_size = PAGE_4KB;
|
||||
if (spok) {
|
||||
pg_size = PAGE_2MB;
|
||||
if (amd_feature & AMDID_PAGE1GB)
|
||||
pg_size = PAGE_1GB;
|
||||
}
|
||||
|
||||
/* Compute the largest page mapping that can be used */
|
||||
while (pg_size > PAGE_4KB) {
|
||||
if (size >= pg_size &&
|
||||
(gpa & (pg_size - 1)) == 0 &&
|
||||
(hpa & (pg_size - 1)) == 0) {
|
||||
break;
|
||||
}
|
||||
pg_size >>= PT_SHIFT;
|
||||
}
|
||||
|
||||
len = 0;
|
||||
while (len < size) {
|
||||
mapped = svm_npt_create(pml4, gpa + len, hpa + len, attr, prot,
|
||||
pg_size);
|
||||
len += mapped;
|
||||
}
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Get HPA for a given GPA.
|
||||
*/
|
||||
vm_paddr_t
|
||||
svm_npt_vmmap_get(void *arg, vm_paddr_t gpa)
|
||||
{
|
||||
struct svm_softc *svm_sc;
|
||||
pml4_entry_t *pml4;
|
||||
uint64_t *pt, pa, hpa, pgmask;
|
||||
int shift, index;
|
||||
|
||||
svm_sc = arg;
|
||||
pml4 = svm_sc->np_pml4;
|
||||
|
||||
pt = (uint64_t *)pml4;
|
||||
shift = PML4SHIFT;
|
||||
|
||||
while (shift > PAGE_SHIFT) {
|
||||
/* Get PDP, PD or PT index from GPA */
|
||||
index = (gpa >> shift) & PT_INDEX_MASK;
|
||||
if (pt[index] == 0) {
|
||||
ERR("No entry for GPA:0x%lx.", gpa);
|
||||
return (GPA_UNMAPPED);
|
||||
}
|
||||
|
||||
if (pt[index] & PG_PS) {
|
||||
break;
|
||||
}
|
||||
|
||||
pa = PTE2PA(pt[index]);;
|
||||
pt = (uint64_t *)PHYS_TO_DMAP(pa);
|
||||
shift -= PT_SHIFT;
|
||||
}
|
||||
|
||||
index = (gpa >> shift) & PT_INDEX_MASK;
|
||||
if (pt[index] == 0) {
|
||||
ERR("No mapping for GPA:0x%lx.\n", gpa);
|
||||
return (GPA_UNMAPPED);
|
||||
}
|
||||
|
||||
/* Add GPA offset to HPA */
|
||||
pgmask = (1UL << shift) - 1;
|
||||
hpa = (PTE2PA(pt[index]) & ~pgmask) | (gpa & pgmask);
|
||||
|
||||
return (hpa);
|
||||
}
|
||||
SYSCTL_DECL(_hw_vmm);
|
||||
SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW, NULL, NULL);
|
||||
|
||||
static int npt_flags;
|
||||
SYSCTL_INT(_hw_vmm_npt, OID_AUTO, pmap_flags, CTLFLAG_RD,
|
||||
&npt_flags, 0, NULL);
|
||||
/*
|
||||
* AMD nested page table init.
|
||||
*/
|
||||
int
|
||||
svm_npt_init(void)
|
||||
{
|
||||
int enable_superpage = 1;
|
||||
|
||||
TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage);
|
||||
if (enable_superpage)
|
||||
npt_flags |= PMAP_PDE_SUPERPAGE;
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free Page Table page.
|
||||
*/
|
||||
static void
|
||||
free_pt(pd_entry_t pde)
|
||||
{
|
||||
pt_entry_t *pt;
|
||||
|
||||
pt = (pt_entry_t *)PHYS_TO_DMAP(PTE2PA(pde));
|
||||
free(pt, M_SVM);
|
||||
|
||||
static int
|
||||
npt_pinit(pmap_t pmap)
|
||||
{
|
||||
|
||||
return (pmap_pinit_type(pmap, PT_RVI, npt_flags));
|
||||
}
|
||||
|
||||
/*
|
||||
* Free Page Directory page.
|
||||
*/
|
||||
static void
|
||||
free_pd(pdp_entry_t pdpe)
|
||||
struct vmspace *
|
||||
svm_npt_alloc(vm_offset_t min, vm_offset_t max)
|
||||
{
|
||||
pd_entry_t *pd;
|
||||
int i;
|
||||
|
||||
pd = (pd_entry_t *)PHYS_TO_DMAP(PTE2PA(pdpe));
|
||||
for (i = 0; i < NPDEPG; i++) {
|
||||
/* Skip not-present or superpage entries */
|
||||
if ((pd[i] == 0) || (pd[i] & PG_PS))
|
||||
continue;
|
||||
|
||||
free_pt(pd[i]);
|
||||
}
|
||||
|
||||
free(pd, M_SVM);
|
||||
|
||||
return (vmspace_alloc(min, max, npt_pinit));
|
||||
}
|
||||
|
||||
/*
|
||||
* Free Page Directory Pointer page.
|
||||
*/
|
||||
static void
|
||||
free_pdp(pml4_entry_t pml4e)
|
||||
void
|
||||
svm_npt_free(struct vmspace *vmspace)
|
||||
{
|
||||
pdp_entry_t *pdp;
|
||||
int i;
|
||||
|
||||
pdp = (pdp_entry_t *)PHYS_TO_DMAP(PTE2PA(pml4e));
|
||||
for (i = 0; i < NPDPEPG; i++) {
|
||||
/* Skip not-present or superpage entries */
|
||||
if ((pdp[i] == 0) || (pdp[i] & PG_PS))
|
||||
continue;
|
||||
|
||||
free_pd(pdp[i]);
|
||||
}
|
||||
|
||||
free(pdp, M_SVM);
|
||||
}
|
||||
|
||||
/*
|
||||
* Free the guest's nested page table.
|
||||
*/
|
||||
int
|
||||
svm_npt_cleanup(struct svm_softc *svm_sc)
|
||||
{
|
||||
pml4_entry_t *pml4;
|
||||
int i;
|
||||
|
||||
pml4 = svm_sc->np_pml4;
|
||||
|
||||
for (i = 0; i < NPML4EPG; i++) {
|
||||
if (pml4[i] != 0) {
|
||||
free_pdp(pml4[i]);
|
||||
pml4[i] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
return (0);
|
||||
vmspace_free(vmspace);
|
||||
}
|
||||
|
@ -31,10 +31,7 @@
|
||||
|
||||
struct svm_softc;
|
||||
|
||||
int svm_npt_init(void);
|
||||
int svm_npt_cleanup(struct svm_softc *sc);
|
||||
vm_paddr_t svm_npt_vmmap_get(void *arg, vm_paddr_t gpa);
|
||||
int svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa,
|
||||
size_t len, vm_memattr_t attr, int prot,
|
||||
boolean_t sp);
|
||||
int svm_npt_init(void);
|
||||
struct vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max);
|
||||
void svm_npt_free(struct vmspace *vmspace);
|
||||
#endif /* _SVM_NPT_H_ */
|
||||
|
@ -61,7 +61,7 @@ __FBSDID("$FreeBSD$");
|
||||
#include "npt.h"
|
||||
|
||||
/*
|
||||
* SVM CPUID function 0x8000_000Ai, edx bit decoding.
|
||||
* SVM CPUID function 0x8000_000A, edx bit decoding.
|
||||
*/
|
||||
#define AMD_CPUID_SVM_NP BIT(0) /* Nested paging or RVI */
|
||||
#define AMD_CPUID_SVM_LBR BIT(1) /* Last branch virtualization */
|
||||
@ -79,7 +79,7 @@ MALLOC_DEFINE(M_SVM, "svm", "svm");
|
||||
/* Per-CPU context area. */
|
||||
extern struct pcpu __pcpu[];
|
||||
|
||||
static int svm_vmexit(struct svm_softc *svm_sc, int vcpu,
|
||||
static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu,
|
||||
struct vm_exit *vmexit);
|
||||
static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr);
|
||||
static int svm_msr_index(uint64_t msr, int *index, int *bit);
|
||||
@ -98,11 +98,6 @@ static uint32_t guest_asid = 1;
|
||||
*/
|
||||
static int max_asid;
|
||||
|
||||
/*
|
||||
* Statistics
|
||||
*/
|
||||
static VMM_STAT_AMD(VMEXIT_NPF_LAPIC, "vm exits due to Local APIC access");
|
||||
|
||||
/*
|
||||
* SVM host state saved area of size 4KB for each core.
|
||||
*/
|
||||
@ -113,6 +108,8 @@ static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE);
|
||||
*/
|
||||
static struct svm_regctx host_ctx[MAXCPU];
|
||||
|
||||
static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid EXITINTINFO");
|
||||
|
||||
/*
|
||||
* Common function to enable or disabled SVM for a CPU.
|
||||
*/
|
||||
@ -123,19 +120,13 @@ cpu_svm_enable_disable(boolean_t enable)
|
||||
|
||||
efer_msr = rdmsr(MSR_EFER);
|
||||
|
||||
if (enable) {
|
||||
if (enable)
|
||||
efer_msr |= EFER_SVM;
|
||||
} else {
|
||||
else
|
||||
efer_msr &= ~EFER_SVM;
|
||||
}
|
||||
|
||||
wrmsr(MSR_EFER, efer_msr);
|
||||
|
||||
if(rdmsr(MSR_EFER) != efer_msr) {
|
||||
ERR("SVM couldn't be enabled on CPU%d.\n", curcpu);
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
return(0);
|
||||
}
|
||||
|
||||
@ -199,20 +190,16 @@ svm_cpuid_features(void)
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: BHyVe need EPT or RVI to work.
|
||||
* bhyve need RVI to work.
|
||||
*/
|
||||
if (!(svm_feature & AMD_CPUID_SVM_NP)) {
|
||||
printf("Missing Nested paging or RVI SVM support in processor.\n");
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
if (svm_feature & (AMD_CPUID_SVM_NRIP_SAVE |
|
||||
AMD_CPUID_SVM_DECODE_ASSIST)) {
|
||||
if (svm_feature & AMD_CPUID_SVM_NRIP_SAVE)
|
||||
return (0);
|
||||
}
|
||||
/* XXX: Should never be here? */
|
||||
printf("Processor doesn't support nRIP or decode assist, can't"
|
||||
"run BhyVe.\n");
|
||||
|
||||
return (EIO);
|
||||
}
|
||||
|
||||
@ -267,16 +254,16 @@ svm_init(void)
|
||||
int err;
|
||||
|
||||
err = is_svm_enabled();
|
||||
if (err) {
|
||||
if (err)
|
||||
return (err);
|
||||
}
|
||||
|
||||
|
||||
svm_npt_init();
|
||||
|
||||
/* Start SVM on all CPUs */
|
||||
smp_rendezvous(NULL, svm_enable, NULL, NULL);
|
||||
|
||||
return(0);
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -383,7 +370,7 @@ svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa,
|
||||
* Initialise a virtual machine.
|
||||
*/
|
||||
static void *
|
||||
svm_vminit(struct vm *vm)
|
||||
svm_vminit(struct vm *vm, pmap_t pmap)
|
||||
{
|
||||
struct svm_softc *svm_sc;
|
||||
vm_paddr_t msrpm_pa, iopm_pa, pml4_pa;
|
||||
@ -401,10 +388,10 @@ svm_vminit(struct vm *vm)
|
||||
svm_sc->vm = vm;
|
||||
svm_sc->svm_feature = svm_feature;
|
||||
svm_sc->vcpu_cnt = VM_MAXCPU;
|
||||
|
||||
svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4);
|
||||
/*
|
||||
* Each guest has its own unique ASID.
|
||||
* ASID(Addres Space Identifier) are used by TLB entries.
|
||||
* ASID(Address Space Identifier) is used by TLB entry.
|
||||
*/
|
||||
svm_sc->asid = guest_asid++;
|
||||
|
||||
@ -438,7 +425,7 @@ svm_vminit(struct vm *vm)
|
||||
/* Cache physical address for multiple vcpus. */
|
||||
iopm_pa = vtophys(svm_sc->iopm_bitmap);
|
||||
msrpm_pa = vtophys(svm_sc->msr_bitmap);
|
||||
pml4_pa = vtophys(svm_sc->np_pml4);
|
||||
pml4_pa = svm_sc->nptp;
|
||||
|
||||
for (i = 0; i < svm_sc->vcpu_cnt; i++) {
|
||||
if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa,
|
||||
@ -458,7 +445,7 @@ cleanup:
|
||||
/*
|
||||
* Handle guest I/O intercept.
|
||||
*/
|
||||
static int
|
||||
static bool
|
||||
svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
{
|
||||
struct vmcb_ctrl *ctrl;
|
||||
@ -477,74 +464,39 @@ svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
vmexit->u.inout.port = (uint16_t)(info1 >> 16);
|
||||
vmexit->u.inout.eax = (uint32_t)(state->rax);
|
||||
|
||||
return (1);
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
* SVM Nested Page(RVI) Fault handler.
|
||||
* Nested page fault handler used by local APIC emulation.
|
||||
*/
|
||||
static int
|
||||
svm_handle_npf(struct vm *vm, int vcpu, uint64_t gpa, uint64_t rip,
|
||||
uint64_t exitinfo1, uint64_t cr3, struct vie *vie)
|
||||
static void
|
||||
svm_npf_paging(uint64_t exitinfo1, int *type, int *prot)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (exitinfo1 & VMCB_NPF_INFO1_W)
|
||||
*type = VM_PROT_WRITE;
|
||||
else
|
||||
*type = VM_PROT_READ;
|
||||
|
||||
/* XXX: protection is not used. */
|
||||
*prot = 0;
|
||||
}
|
||||
|
||||
static bool
|
||||
svm_npf_emul_fault(uint64_t exitinfo1)
|
||||
{
|
||||
|
||||
if (exitinfo1 & VMCB_NPF_INFO1_ID) {
|
||||
VMM_CTR0(vm, vcpu, "SVM:NPF for code access.");
|
||||
return (0);
|
||||
return (false);
|
||||
}
|
||||
|
||||
if (exitinfo1 & VMCB_NPF_INFO1_RSV) {
|
||||
VMM_CTR0(vm, vcpu, "SVM:NPF reserved bits are set.");
|
||||
return (0);
|
||||
}
|
||||
|
||||
|
||||
if (exitinfo1 & VMCB_NPF_INFO1_GPT) {
|
||||
VMM_CTR0(vm, vcpu, "SVM:NPF during guest page table walk.");
|
||||
return (0);
|
||||
return (false);
|
||||
}
|
||||
|
||||
/*
|
||||
* nRIP is NULL for NPF so we don't have the length of instruction,
|
||||
* we rely on instruction decode s/w to determine the size of
|
||||
* instruction.
|
||||
*
|
||||
* XXX: DecodeAssist can use instruction from buffer.
|
||||
*/
|
||||
if (vmm_fetch_instruction(vm, vcpu, rip, VIE_INST_SIZE,
|
||||
cr3, vie) != 0) {
|
||||
ERR("SVM:NPF instruction fetch failed, RIP:0x%lx\n", rip);
|
||||
return (EINVAL);
|
||||
if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) {
|
||||
return (false);
|
||||
}
|
||||
|
||||
KASSERT(vie->num_valid, ("No instruction to emulate."));
|
||||
/*
|
||||
* SVM doesn't provide GLA unlike Intel VM-x. VIE_INVALID_GLA
|
||||
* which is a non-cannonical address indicate that GLA is not
|
||||
* available to instruction emulation.
|
||||
*
|
||||
* XXX: Which SVM capability can provided GLA?
|
||||
*/
|
||||
if(vmm_decode_instruction(vm, vcpu, VIE_INVALID_GLA, vie)) {
|
||||
ERR("SVM: Couldn't decode instruction.\n");
|
||||
return (0);
|
||||
}
|
||||
|
||||
/*
|
||||
* XXX: Decoding for user space(IOAPIC) should be done in
|
||||
* user space.
|
||||
*/
|
||||
if (gpa < DEFAULT_APIC_BASE || gpa >= (DEFAULT_APIC_BASE + PAGE_SIZE)) {
|
||||
VMM_CTR2(vm, vcpu, "SVM:NPF GPA(0x%lx) outside of local APIC"
|
||||
" range(0x%x)\n", gpa, DEFAULT_APIC_BASE);
|
||||
return (0);
|
||||
}
|
||||
|
||||
err = vmm_emulate_instruction(vm, vcpu, gpa, vie, lapic_mmio_read,
|
||||
lapic_mmio_write, 0);
|
||||
|
||||
return (err ? 0 : 1);
|
||||
return (true);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -571,12 +523,12 @@ svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write)
|
||||
}
|
||||
|
||||
/*
|
||||
* Determine the cause of virtual cpu exit and return to user space if exit
|
||||
* demand so.
|
||||
* Return: 1 - Return to user space.
|
||||
* 0 - Continue vcpu run.
|
||||
* Determine the cause of virtual cpu exit and handle VMEXIT.
|
||||
* Return: false - Break vcpu execution loop and handle vmexit
|
||||
* in kernel or user space.
|
||||
* true - Continue vcpu run.
|
||||
*/
|
||||
static int
|
||||
static bool
|
||||
svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
{
|
||||
struct vmcb_state *state;
|
||||
@ -584,35 +536,27 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
struct svm_regctx *ctx;
|
||||
uint64_t code, info1, info2, val;
|
||||
uint32_t eax, ecx, edx;
|
||||
int user; /* Flag for user mode */
|
||||
int update_rip; /* Flag for updating RIP */
|
||||
int inst_len;
|
||||
bool update_rip, loop;
|
||||
|
||||
KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
|
||||
|
||||
state = svm_get_vmcb_state(svm_sc, vcpu);
|
||||
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
|
||||
ctx = svm_get_guest_regctx(svm_sc, vcpu);
|
||||
update_rip = 1;
|
||||
user = 0;
|
||||
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
vmexit->u.vmx.error = 0;
|
||||
code = ctrl->exitcode;
|
||||
code = ctrl->exitcode;
|
||||
info1 = ctrl->exitinfo1;
|
||||
info2 = ctrl->exitinfo2;
|
||||
|
||||
if (ctrl->nrip) {
|
||||
inst_len = ctrl->nrip - state->rip;
|
||||
} else {
|
||||
inst_len = ctrl->inst_decode_size;
|
||||
}
|
||||
update_rip = true;
|
||||
loop = true;
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
vmexit->u.vmx.error = 0;
|
||||
|
||||
switch (code) {
|
||||
case VMCB_EXIT_MC: /* Machine Check. */
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1);
|
||||
vmexit->exitcode = VM_EXITCODE_MTRAP;
|
||||
user = 1;
|
||||
loop = false;
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_MSR: /* MSR access. */
|
||||
@ -628,27 +572,29 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
|
||||
if (info1) {
|
||||
/* VM exited because of write MSR */
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1);
|
||||
vmm_stat_incr(svm_sc->vm, vcpu,
|
||||
VMEXIT_WRMSR, 1);
|
||||
vmexit->exitcode = VM_EXITCODE_WRMSR;
|
||||
vmexit->u.msr.code = ecx;
|
||||
val = (uint64_t)edx << 32 | eax;
|
||||
if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val)) {
|
||||
vmexit->u.msr.wval = val;
|
||||
user = 1;
|
||||
loop = false;
|
||||
}
|
||||
VMM_CTR3(svm_sc->vm, vcpu,
|
||||
"VMEXIT WRMSR(%s handling) 0x%lx @0x%x",
|
||||
user ? "user" : "kernel", val, ecx);
|
||||
loop ? "kernel" : "user", val, ecx);
|
||||
} else {
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1);
|
||||
vmm_stat_incr(svm_sc->vm, vcpu,
|
||||
VMEXIT_RDMSR, 1);
|
||||
vmexit->exitcode = VM_EXITCODE_RDMSR;
|
||||
vmexit->u.msr.code = ecx;
|
||||
if (emulate_rdmsr(svm_sc->vm, vcpu, ecx)) {
|
||||
user = 1;
|
||||
loop = false;
|
||||
}
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR"
|
||||
" 0x%lx,%lx @0x%x", ctx->e.g.sctx_rdx,
|
||||
state->rax, ecx);
|
||||
" MSB=0x%08x, LSB=%08x @0x%x",
|
||||
ctx->e.g.sctx_rdx, state->rax, ecx);
|
||||
}
|
||||
|
||||
#define MSR_AMDK8_IPM 0xc0010055
|
||||
@ -659,17 +605,16 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
* XXX: special handling of AMD C1E - Ignore.
|
||||
*/
|
||||
if (ecx == MSR_AMDK8_IPM)
|
||||
user = 0;
|
||||
loop = true;
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_INTR:
|
||||
case VMCB_EXIT_INTR:
|
||||
/*
|
||||
* Exit on External Interrupt.
|
||||
* Give host interrupt handler to run and if its guest
|
||||
* interrupt, local APIC will inject event in guest.
|
||||
*/
|
||||
user = 0;
|
||||
update_rip = 0;
|
||||
update_rip = false;
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt"
|
||||
" RIP:0x%lx.\n", state->rip);
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1);
|
||||
@ -677,9 +622,8 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
|
||||
case VMCB_EXIT_IO:
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1);
|
||||
user = svm_handle_io(svm_sc, vcpu, vmexit);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:I/O VMEXIT RIP:0x%lx\n",
|
||||
state->rip);
|
||||
loop = svm_handle_io(svm_sc, vcpu, vmexit);
|
||||
update_rip = true;
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_CPUID:
|
||||
@ -690,14 +634,12 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
(uint32_t *)&ctx->sctx_rcx,
|
||||
(uint32_t *)&ctx->e.g.sctx_rdx);
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n");
|
||||
user = 0;
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_HLT:
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1);
|
||||
if (ctrl->v_irq) {
|
||||
/* Interrupt is pending, can't halt guest. */
|
||||
user = 0;
|
||||
vmm_stat_incr(svm_sc->vm, vcpu,
|
||||
VMEXIT_HLT_IGNORED, 1);
|
||||
VMM_CTR0(svm_sc->vm, vcpu,
|
||||
@ -706,7 +648,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
VMM_CTR0(svm_sc->vm, vcpu,
|
||||
"VMEXIT halted CPU.");
|
||||
vmexit->exitcode = VM_EXITCODE_HLT;
|
||||
user = 1;
|
||||
loop = false;
|
||||
|
||||
}
|
||||
break;
|
||||
@ -719,44 +661,54 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_NPF:
|
||||
loop = false;
|
||||
update_rip = false;
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1);
|
||||
|
||||
if (info1 & VMCB_NPF_INFO1_RSV) {
|
||||
VMM_CTR2(svm_sc->vm, vcpu, "SVM_ERR:NPT"
|
||||
" reserved bit is set,"
|
||||
"INFO1:0x%lx INFO2:0x%lx .\n",
|
||||
info1, info2);
|
||||
break;
|
||||
}
|
||||
|
||||
/* EXITINFO2 has the physical fault address (GPA). */
|
||||
if (!svm_handle_npf(svm_sc->vm, vcpu, info2,
|
||||
state->rip, info1, state->cr3,
|
||||
&vmexit->u.paging.vie)) {
|
||||
/* I/O APIC for MSI/X. */
|
||||
if(vm_mem_allocated(svm_sc->vm, info2)) {
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "SVM:NPF-paging,"
|
||||
"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
|
||||
state->rip, info1, info2);
|
||||
vmexit->exitcode = VM_EXITCODE_PAGING;
|
||||
user = 1;
|
||||
vmexit->u.paging.gpa = info2;
|
||||
} else {
|
||||
/* Local APIC NPF */
|
||||
update_rip = 1;
|
||||
vmm_stat_incr(svm_sc->vm, vcpu,
|
||||
VMEXIT_NPF_LAPIC, 1);
|
||||
svm_npf_paging(info1, &vmexit->u.paging.fault_type,
|
||||
&vmexit->u.paging.protection);
|
||||
} else if (svm_npf_emul_fault(info1)) {
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "SVM:NPF-inst_emul,"
|
||||
"RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n",
|
||||
state->rip, info1, info2);
|
||||
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
|
||||
vmexit->u.inst_emul.gpa = info2;
|
||||
vmexit->u.inst_emul.gla = VIE_INVALID_GLA;
|
||||
vmexit->u.inst_emul.cr3 = state->cr3;
|
||||
vmexit->inst_length = VIE_INST_SIZE;
|
||||
}
|
||||
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1);
|
||||
inst_len = vmexit->u.paging.vie.num_processed;
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT NPF, GPA:0x%lx "
|
||||
"user=%d instr len=%d.\n", info2, user,
|
||||
inst_len);
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_SHUTDOWN:
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT guest shutdown.");
|
||||
user = 1;
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT shutdown.");
|
||||
loop = false;
|
||||
break;
|
||||
|
||||
case VMCB_EXIT_INVALID:
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID.");
|
||||
user = 1;
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
loop = false;
|
||||
break;
|
||||
|
||||
default:
|
||||
/* Return to user space. */
|
||||
user = 1;
|
||||
update_rip = 0;
|
||||
loop = false;
|
||||
update_rip = false;
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx"
|
||||
" EXITINFO1: 0x%lx EXITINFO2:0x%lx\n",
|
||||
ctrl->exitcode, info1, info2);
|
||||
@ -767,29 +719,27 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
|
||||
break;
|
||||
}
|
||||
|
||||
if (ctrl->v_irq) {
|
||||
VMM_CTR2(svm_sc->vm, vcpu, "SVM:SVM intr pending vector:0x%x"
|
||||
" priority:0x%x", ctrl->v_intr_vector, ctrl->v_intr_prio);
|
||||
}
|
||||
|
||||
vmexit->rip = state->rip;
|
||||
if (update_rip) {
|
||||
vmexit->rip += inst_len;
|
||||
if (ctrl->nrip == 0) {
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set "
|
||||
"for RIP0x%lx.\n", state->rip);
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
} else
|
||||
vmexit->rip = ctrl->nrip;
|
||||
}
|
||||
|
||||
/* Return to userland for APs to start. */
|
||||
if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP) {
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:Starting APs, RIP0x%lx.\n",
|
||||
vmexit->rip);
|
||||
user = 1;
|
||||
/* If vcpu execution is continued, update RIP. */
|
||||
if (loop) {
|
||||
state->rip = vmexit->rip;
|
||||
}
|
||||
|
||||
/* XXX: Set next RIP before restarting virtual cpus. */
|
||||
if (ctrl->nrip == 0) {
|
||||
ctrl->nrip = state->rip;
|
||||
if (state->rip == 0) {
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM_ERR:RIP is NULL\n");
|
||||
vmexit->exitcode = VM_EXITCODE_VMX;
|
||||
}
|
||||
|
||||
return (user);
|
||||
return (loop);
|
||||
}
|
||||
|
||||
/*
|
||||
@ -808,7 +758,7 @@ svm_inject_nmi(struct svm_softc *svm_sc, int vcpu)
|
||||
return (0);
|
||||
|
||||
/* Inject NMI, vector number is not used.*/
|
||||
if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, FALSE)) {
|
||||
if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, false)) {
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n");
|
||||
return (EIO);
|
||||
}
|
||||
@ -846,14 +796,7 @@ svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu)
|
||||
/* Wait for guest to come out of interrupt shadow. */
|
||||
if (ctrl->intr_shadow) {
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n");
|
||||
goto inject_failed;
|
||||
}
|
||||
|
||||
/* Make sure no interrupt is pending.*/
|
||||
if (ctrl->v_irq) {
|
||||
VMM_CTR0(svm_sc->vm, vcpu,
|
||||
"SVM:virtual interrupt is pending.\n");
|
||||
goto inject_failed;
|
||||
return;
|
||||
}
|
||||
|
||||
/* NMI event has priority over interrupts.*/
|
||||
@ -862,32 +805,32 @@ svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu)
|
||||
}
|
||||
|
||||
vector = lapic_pending_intr(svm_sc->vm, vcpu);
|
||||
if (vector < 0) {
|
||||
return;
|
||||
}
|
||||
|
||||
/* No interrupt is pending. */
|
||||
if (vector < 0)
|
||||
return;
|
||||
|
||||
if (vector < 32 || vector > 255) {
|
||||
ERR("Invalid vector number:%d\n", vector);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM_ERR:Event injection"
|
||||
"invalid vector=%d.\n", vector);
|
||||
ERR("SVM_ERR:Event injection invalid vector=%d.\n", vector);
|
||||
return;
|
||||
}
|
||||
|
||||
if ((state->rflags & PSL_I) == 0) {
|
||||
VMM_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n");
|
||||
goto inject_failed;
|
||||
return;
|
||||
}
|
||||
|
||||
if(vmcb_eventinject(ctrl, VM_HW_INTR, vector, 0, FALSE)) {
|
||||
VMM_CTR2(svm_sc->vm, vcpu, "SVM:Event injection failed to"
|
||||
" VCPU%d,vector=%d.\n", vcpu, vector);
|
||||
if (vmcb_eventinject(ctrl, VM_HW_INTR, vector, 0, false)) {
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:Event injection failed to"
|
||||
" vector=%d.\n", vector);
|
||||
return;
|
||||
}
|
||||
|
||||
/* Acknowledge that event is accepted.*/
|
||||
lapic_intr_accepted(svm_sc->vm, vcpu, vector);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector);
|
||||
|
||||
inject_failed:
|
||||
return;
|
||||
}
|
||||
|
||||
/*
|
||||
@ -908,11 +851,39 @@ setup_tss_type(void)
|
||||
desc->sd_type = 9;
|
||||
}
|
||||
|
||||
static void
|
||||
svm_handle_exitintinfo(struct svm_softc *svm_sc, int vcpu)
|
||||
{
|
||||
struct vmcb_ctrl *ctrl;
|
||||
uint64_t intinfo;
|
||||
|
||||
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
|
||||
|
||||
/*
|
||||
* VMEXIT while delivering an exception or interrupt.
|
||||
* Inject it as virtual interrupt.
|
||||
* Section 15.7.2 Intercepts during IDT interrupt delivery.
|
||||
*/
|
||||
intinfo = ctrl->exitintinfo;
|
||||
|
||||
if (intinfo & VMCB_EXITINTINFO_VALID) {
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:EXITINTINFO:0x%lx is valid\n",
|
||||
intinfo);
|
||||
if (vmcb_eventinject(ctrl, VMCB_EXITINTINFO_TYPE(intinfo),
|
||||
VMCB_EXITINTINFO_VECTOR(intinfo),
|
||||
VMCB_EXITINTINFO_EC(intinfo),
|
||||
VMCB_EXITINTINFO_EC_VALID & intinfo)) {
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:couldn't inject pending"
|
||||
" interrupt, exitintinfo:0x%lx\n", intinfo);
|
||||
}
|
||||
}
|
||||
}
|
||||
/*
|
||||
* Start vcpu with specified RIP.
|
||||
*/
|
||||
static int
|
||||
svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap)
|
||||
{
|
||||
struct svm_regctx *hctx, *gctx;
|
||||
struct svm_softc *svm_sc;
|
||||
@ -920,23 +891,17 @@ svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
struct vmcb_state *state;
|
||||
struct vmcb_ctrl *ctrl;
|
||||
struct vm_exit *vmexit;
|
||||
int user;
|
||||
uint64_t vmcb_pa;
|
||||
static uint64_t host_cr2;
|
||||
bool loop; /* Continue vcpu execution loop. */
|
||||
|
||||
user = 0;
|
||||
loop = true;
|
||||
svm_sc = arg;
|
||||
|
||||
KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu));
|
||||
|
||||
vcpustate = svm_get_vcpu(svm_sc, vcpu);
|
||||
state = svm_get_vmcb_state(svm_sc, vcpu);
|
||||
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
|
||||
vmexit = vm_exitinfo(svm_sc->vm , vcpu);
|
||||
if (vmexit->exitcode == VM_EXITCODE_VMX) {
|
||||
ERR("vcpu%d shouldn't run again.\n", vcpu);
|
||||
return(EIO);
|
||||
}
|
||||
state = svm_get_vmcb_state(svm_sc, vcpu);
|
||||
ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu);
|
||||
vmexit = vm_exitinfo(svm_sc->vm, vcpu);
|
||||
|
||||
gctx = svm_get_guest_regctx(svm_sc, vcpu);
|
||||
hctx = &host_ctx[curcpu];
|
||||
@ -974,24 +939,24 @@ svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
ctrl->vmcb_clean = VMCB_CACHE_ASID |
|
||||
VMCB_CACHE_IOPM |
|
||||
VMCB_CACHE_NP;
|
||||
|
||||
}
|
||||
|
||||
vcpustate->lastcpu = curcpu;
|
||||
|
||||
VMM_CTR3(svm_sc->vm, vcpu, "SVM:Enter vmrun old RIP:0x%lx"
|
||||
" new RIP:0x%lx inst len=%d\n",
|
||||
state->rip, rip, vmexit->inst_length);
|
||||
/* Update Guest RIP */
|
||||
state->rip = rip;
|
||||
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:entered with RIP:0x%lx\n",
|
||||
state->rip);
|
||||
do {
|
||||
vmexit->inst_length = 0;
|
||||
/* We are asked to give the cpu by scheduler. */
|
||||
if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) {
|
||||
vmexit->exitcode = VM_EXITCODE_BOGUS;
|
||||
vmexit->inst_length = 0;
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_ASTPENDING, 1);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:gave up cpu, RIP:0x%lx\n",
|
||||
state->rip);
|
||||
VMM_CTR1(svm_sc->vm, vcpu,
|
||||
"SVM: gave up CPU, RIP:0x%lx\n", state->rip);
|
||||
vmexit->rip = state->rip;
|
||||
break;
|
||||
}
|
||||
|
||||
@ -999,6 +964,8 @@ svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
|
||||
(void)svm_set_vmcb(svm_get_vmcb(svm_sc, vcpu), svm_sc->asid);
|
||||
|
||||
svm_handle_exitintinfo(svm_sc, vcpu);
|
||||
|
||||
(void)svm_inj_interrupts(svm_sc, vcpu);
|
||||
|
||||
/* Change TSS type to available.*/
|
||||
@ -1013,10 +980,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
|
||||
save_cr2(&host_cr2);
|
||||
load_cr2(&state->cr2);
|
||||
|
||||
|
||||
|
||||
/* Launch Virtual Machine. */
|
||||
svm_launch(vmcb_pa, gctx, hctx);
|
||||
|
||||
|
||||
save_cr2(&state->cr2);
|
||||
load_cr2(&host_cr2);
|
||||
|
||||
@ -1045,17 +1013,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip)
|
||||
enable_gintr();
|
||||
|
||||
/* Handle #VMEXIT and if required return to user space. */
|
||||
user = svm_vmexit(svm_sc, vcpu, vmexit);
|
||||
loop = svm_vmexit(svm_sc, vcpu, vmexit);
|
||||
vcpustate->loop++;
|
||||
vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1);
|
||||
|
||||
/* Update RIP since we are continuing vcpu execution.*/
|
||||
state->rip = vmexit->rip;
|
||||
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:loop RIP:0x%lx\n", state->rip);
|
||||
} while (!user);
|
||||
VMM_CTR1(svm_sc->vm, vcpu, "SVM:exited with RIP:0x%lx\n",
|
||||
state->rip);
|
||||
} while (loop);
|
||||
|
||||
return (0);
|
||||
}
|
||||
@ -1072,7 +1034,6 @@ svm_vmcleanup(void *arg)
|
||||
|
||||
VMM_CTR0(svm_sc->vm, 0, "SVM:cleanup\n");
|
||||
|
||||
svm_npt_cleanup(svm_sc);
|
||||
free(svm_sc, M_SVM);
|
||||
}
|
||||
|
||||
@ -1113,7 +1074,7 @@ swctx_regptr(struct svm_regctx *regctx, int reg)
|
||||
case VM_REG_GUEST_R15:
|
||||
return (®ctx->sctx_r15);
|
||||
default:
|
||||
ERR("Unknown register requested.\n");
|
||||
ERR("Unknown register requested, reg=%d.\n", reg);
|
||||
break;
|
||||
}
|
||||
|
||||
@ -1141,12 +1102,13 @@ svm_getreg(void *arg, int vcpu, int ident, uint64_t *val)
|
||||
}
|
||||
|
||||
reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident);
|
||||
|
||||
if (reg != NULL) {
|
||||
*val = *reg;
|
||||
return (0);
|
||||
}
|
||||
|
||||
ERR("reg type %x is not saved n VMCB\n", ident);
|
||||
ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@ -1176,7 +1138,7 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val)
|
||||
return (0);
|
||||
}
|
||||
|
||||
ERR("reg type %x is not saved n VMCB\n", ident);
|
||||
ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@ -1201,7 +1163,7 @@ svm_setdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
|
||||
|
||||
seg = vmcb_seg(vmcb, type);
|
||||
if (seg == NULL) {
|
||||
ERR("Unsupported seg type %d\n", type);
|
||||
ERR("SVM_ERR:Unsupported segment type%d\n", type);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@ -1232,7 +1194,7 @@ svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc)
|
||||
|
||||
seg = vmcb_seg(svm_get_vmcb(svm_sc, vcpu), type);
|
||||
if (!seg) {
|
||||
ERR("Unsupported seg type %d\n", type);
|
||||
ERR("SVM_ERR:Unsupported segment type%d\n", type);
|
||||
return (EINVAL);
|
||||
}
|
||||
|
||||
@ -1366,13 +1328,13 @@ struct vmm_ops vmm_ops_amd = {
|
||||
svm_vminit,
|
||||
svm_vmrun,
|
||||
svm_vmcleanup,
|
||||
svm_npt_vmmap_set,
|
||||
svm_npt_vmmap_get,
|
||||
svm_getreg,
|
||||
svm_setreg,
|
||||
svm_getdesc,
|
||||
svm_setdesc,
|
||||
svm_inject_event,
|
||||
svm_getcap,
|
||||
svm_setcap
|
||||
svm_setcap,
|
||||
svm_npt_alloc,
|
||||
svm_npt_free
|
||||
};
|
||||
|
@ -62,7 +62,7 @@ struct svm_softc {
|
||||
uint8_t msr_bitmap[SVM_MSR_BITMAP_SIZE];
|
||||
|
||||
/* Nested Paging */
|
||||
pml4_entry_t np_pml4[NPML4EPG];
|
||||
vm_offset_t nptp;
|
||||
|
||||
/* Virtual machine pointer. */
|
||||
struct vm *vm;
|
||||
@ -76,7 +76,7 @@ struct svm_softc {
|
||||
int vcpu_cnt; /* number of VCPUs for this guest.*/
|
||||
} __aligned(PAGE_SIZE);
|
||||
|
||||
CTASSERT((offsetof(struct svm_softc, np_pml4) & PAGE_MASK) == 0);
|
||||
CTASSERT((offsetof(struct svm_softc, nptp) & PAGE_MASK) == 0);
|
||||
|
||||
static __inline struct svm_vcpu *
|
||||
svm_get_vcpu(struct svm_softc *sc, int vcpu)
|
||||
|
@ -70,7 +70,7 @@ svm_init_vmcb(struct vmcb *vmcb, uint64_t iopm_base_pa,
|
||||
|
||||
/* EFER_SVM must always be set when the guest is executing */
|
||||
state->efer = EFER_SVM;
|
||||
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
@ -355,7 +355,7 @@ vmcb_seg(struct vmcb *vmcb, int type)
|
||||
*/
|
||||
int
|
||||
vmcb_eventinject(struct vmcb_ctrl *ctrl, int type, int vector,
|
||||
uint32_t error, boolean_t ec_valid)
|
||||
uint32_t error, bool ec_valid)
|
||||
{
|
||||
int intr_type;
|
||||
|
||||
|
@ -86,7 +86,7 @@
|
||||
|
||||
/* VMCB TLB control */
|
||||
#define VMCB_TLB_FLUSH_NOTHING 0 /* Flush nothing */
|
||||
#define VMCB_TLB_FLUSH_EVERYTHING 1 /* Flush entire TLB */
|
||||
#define VMCB_TLB_FLUSH_ALL 1 /* Flush entire TLB */
|
||||
#define VMCB_TLB_FLUSH_GUEST 3 /* Flush all guest entries */
|
||||
#define VMCB_TLB_FLUSH_GUEST_NONGLOBAL 7 /* Flush guest non-PG entries */
|
||||
|
||||
@ -148,6 +148,16 @@
|
||||
#define VMCB_NPF_INFO1_GPA BIT(32) /* Guest physical address. */
|
||||
#define VMCB_NPF_INFO1_GPT BIT(33) /* Guest page table. */
|
||||
|
||||
/*
|
||||
* EXITINTINFO, Interrupt exit info for all intrecepts.
|
||||
* Section 15.7.2, Intercepts during IDT Interrupt Delivery.
|
||||
*/
|
||||
#define VMCB_EXITINTINFO_VECTOR(x) (x & 0xFF)
|
||||
#define VMCB_EXITINTINFO_TYPE(x) ((x & 0x7) >> 8)
|
||||
#define VMCB_EXITINTINFO_EC_VALID BIT(11)
|
||||
#define VMCB_EXITINTINFO_VALID BIT(31)
|
||||
#define VMCB_EXITINTINFO_EC(x) ((x & 0xFFFFFFFF) >> 32)
|
||||
|
||||
/* VMCB save state area segment format */
|
||||
struct vmcb_segment {
|
||||
uint16_t selector;
|
||||
@ -254,8 +264,8 @@ struct vmcb_state {
|
||||
uint64_t dbgctl;
|
||||
uint64_t br_from;
|
||||
uint64_t br_to;
|
||||
uint64_t lastexcpfrom;
|
||||
uint64_t lastexcpto;
|
||||
uint64_t int_from;
|
||||
uint64_t int_to;
|
||||
uint8_t pad7[0x968]; /* Reserved upto end of VMCB */
|
||||
} __attribute__ ((__packed__));
|
||||
CTASSERT(sizeof(struct vmcb_state) == 0xC00);
|
||||
@ -274,6 +284,6 @@ int vmcb_read(struct vmcb *vmcb, int ident, uint64_t *retval);
|
||||
int vmcb_write(struct vmcb *vmcb, int ident, uint64_t val);
|
||||
struct vmcb_segment *vmcb_seg(struct vmcb *vmcb, int type);
|
||||
int vmcb_eventinject(struct vmcb_ctrl *ctrl, int type, int vector,
|
||||
uint32_t error, boolean_t ec_valid);
|
||||
uint32_t error, bool ec_valid);
|
||||
|
||||
#endif /* _VMCB_H_ */
|
||||
|
@ -960,6 +960,15 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0)
|
||||
return (EFAULT);
|
||||
|
||||
/*
|
||||
* AMD-V doesn't provide instruction length which is nRIP - RIP
|
||||
* for some of the exit including Nested Page Fault. Use instruction
|
||||
* length calculated by software instruction emulation to update
|
||||
* RIP of vcpu.
|
||||
*/
|
||||
if (vme->inst_length == VIE_INST_SIZE)
|
||||
vme->inst_length = vie->num_processed;
|
||||
|
||||
/* return to userland unless this is a local apic access */
|
||||
if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) {
|
||||
*retu = TRUE;
|
||||
|
@ -563,7 +563,7 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
|
||||
vie->num_valid += n;
|
||||
}
|
||||
|
||||
if (vie->num_valid == inst_length)
|
||||
if (vie->num_valid)
|
||||
return (0);
|
||||
else
|
||||
return (-1);
|
||||
|
@ -57,6 +57,7 @@ static struct vmm_msr vmm_msr[] = {
|
||||
{ MSR_PAT, VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID },
|
||||
{ MSR_BIOS_SIGN,VMM_MSR_F_EMULATE },
|
||||
{ MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
|
||||
#if 0
|
||||
{ MSR_IA32_PLATFORM_ID, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
|
||||
{ MSR_IA32_MISC_ENABLE, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY },
|
||||
#endif
|
||||
@ -140,6 +141,7 @@ guest_msrs_init(struct vm *vm, int cpu)
|
||||
case MSR_IA32_PLATFORM_ID:
|
||||
guest_msrs[i] = 0;
|
||||
break;
|
||||
#endif
|
||||
default:
|
||||
panic("guest_msrs_init: missing initialization for msr "
|
||||
"0x%0x", vmm_msr[i].num);
|
||||
|
Loading…
x
Reference in New Issue
Block a user