diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index 74d3d13eee9b..7724327d1c6c 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -145,6 +145,13 @@ __FBSDID("$FreeBSD$"); #include #endif +static __inline boolean_t +pmap_type_guest(pmap_t pmap) +{ + + return ((pmap->pm_type == PT_EPT) || (pmap->pm_type == PT_RVI)); +} + static __inline boolean_t pmap_emulate_ad_bits(pmap_t pmap) { @@ -159,6 +166,7 @@ pmap_valid_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_V; break; case PT_EPT: @@ -181,6 +189,7 @@ pmap_rw_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_RW; break; case PT_EPT: @@ -205,6 +214,7 @@ pmap_global_bit(pmap_t pmap) case PT_X86: mask = X86_PG_G; break; + case PT_RVI: case PT_EPT: mask = 0; break; @@ -222,6 +232,7 @@ pmap_accessed_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_A; break; case PT_EPT: @@ -244,6 +255,7 @@ pmap_modified_bit(pmap_t pmap) switch (pmap->pm_type) { case PT_X86: + case PT_RVI: mask = X86_PG_M; break; case PT_EPT: @@ -1094,6 +1106,9 @@ pmap_swap_pat(pmap_t pmap, pt_entry_t entry) if ((entry & x86_pat_bits) != 0) entry ^= x86_pat_bits; break; + case PT_RVI: + /* XXX: PAT support. */ + break; case PT_EPT: /* * Nothing to do - the memory attributes are represented @@ -1137,6 +1152,11 @@ pmap_cache_bits(pmap_t pmap, int mode, boolean_t is_pde) cache_bits |= PG_NC_PWT; break; + case PT_RVI: + /* XXX: PAT support. */ + cache_bits = 0; + break; + case PT_EPT: cache_bits = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(mode); break; @@ -1157,6 +1177,10 @@ pmap_cache_mask(pmap_t pmap, boolean_t is_pde) case PT_X86: mask = is_pde ? X86_PG_PDE_CACHE : X86_PG_PTE_CACHE; break; + case PT_RVI: + /* XXX: PAT support. */ + mask = 0; + break; case PT_EPT: mask = EPT_PG_IGNORE_PAT | EPT_PG_MEMORY_TYPE(0x7); break; @@ -1181,6 +1205,7 @@ pmap_update_pde_store(pmap_t pmap, pd_entry_t *pde, pd_entry_t newpde) switch (pmap->pm_type) { case PT_X86: break; + case PT_RVI: case PT_EPT: /* * XXX @@ -1216,9 +1241,9 @@ pmap_update_pde_invalidate(pmap_t pmap, vm_offset_t va, pd_entry_t newpde) { pt_entry_t PG_G; - if (pmap->pm_type == PT_EPT) + if (pmap_type_guest(pmap)) return; - + KASSERT(pmap->pm_type == PT_X86, ("pmap_update_pde_invalidate: invalid type %d", pmap->pm_type)); @@ -1331,11 +1356,11 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) cpuset_t other_cpus; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } - + KASSERT(pmap->pm_type == PT_X86, ("pmap_invalidate_page: invalid type %d", pmap->pm_type)); @@ -1409,7 +1434,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) vm_offset_t addr; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } @@ -1468,7 +1493,7 @@ pmap_invalidate_all(pmap_t pmap) uint64_t cr3; u_int cpuid; - if (pmap->pm_type == PT_EPT) { + if (pmap_type_guest(pmap)) { pmap_invalidate_ept(pmap); return; } @@ -1588,7 +1613,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) cpuid = PCPU_GET(cpuid); other_cpus = all_cpus; CPU_CLR(cpuid, &other_cpus); - if (pmap == kernel_pmap || pmap->pm_type == PT_EPT) + if (pmap == kernel_pmap || pmap_type_guest(pmap)) active = all_cpus; else { active = pmap->pm_active; @@ -1626,6 +1651,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; @@ -1645,6 +1671,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; @@ -1662,6 +1689,7 @@ pmap_invalidate_all(pmap_t pmap) if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); break; + case PT_RVI: case PT_EPT: pmap->pm_eptgen++; break; diff --git a/sys/amd64/vmm/amd/amdv.c b/sys/amd64/vmm/amd/amdv.c index 1cc130fa3a3c..ee121b0aac5f 100644 --- a/sys/amd64/vmm/amd/amdv.c +++ b/sys/amd64/vmm/amd/amdv.c @@ -37,136 +37,6 @@ __FBSDID("$FreeBSD$"); #include #include "io/iommu.h" -static int -amdv_init(void) -{ - - printf("amdv_init: not implemented\n"); - return (ENXIO); -} - -static int -amdv_cleanup(void) -{ - - printf("amdv_cleanup: not implemented\n"); - return (ENXIO); -} - -static void * -amdv_vminit(struct vm *vm, struct pmap *pmap) -{ - - printf("amdv_vminit: not implemented\n"); - return (NULL); -} - -static int -amdv_vmrun(void *arg, int vcpu, register_t rip, struct pmap *pmap) -{ - - printf("amdv_vmrun: not implemented\n"); - return (ENXIO); -} - -static void -amdv_vmcleanup(void *arg) -{ - - printf("amdv_vmcleanup: not implemented\n"); - return; -} - -static int -amdv_getreg(void *arg, int vcpu, int regnum, uint64_t *retval) -{ - - printf("amdv_getreg: not implemented\n"); - return (EINVAL); -} - -static int -amdv_setreg(void *arg, int vcpu, int regnum, uint64_t val) -{ - - printf("amdv_setreg: not implemented\n"); - return (EINVAL); -} - -static int -amdv_getdesc(void *vmi, int vcpu, int num, struct seg_desc *desc) -{ - - printf("amdv_get_desc: not implemented\n"); - return (EINVAL); -} - -static int -amdv_setdesc(void *vmi, int vcpu, int num, struct seg_desc *desc) -{ - - printf("amdv_get_desc: not implemented\n"); - return (EINVAL); -} - -static int -amdv_inject_event(void *vmi, int vcpu, int type, int vector, - uint32_t error_code, int error_code_valid) -{ - - printf("amdv_inject_event: not implemented\n"); - return (EINVAL); -} - -static int -amdv_getcap(void *arg, int vcpu, int type, int *retval) -{ - - printf("amdv_getcap: not implemented\n"); - return (EINVAL); -} - -static int -amdv_setcap(void *arg, int vcpu, int type, int val) -{ - - printf("amdv_setcap: not implemented\n"); - return (EINVAL); -} - -static struct vmspace * -amdv_vmspace_alloc(vm_offset_t min, vm_offset_t max) -{ - - printf("amdv_vmspace_alloc: not implemented\n"); - return (NULL); -} - -static void -amdv_vmspace_free(struct vmspace *vmspace) -{ - - printf("amdv_vmspace_free: not implemented\n"); - return; -} - -struct vmm_ops vmm_ops_amd = { - amdv_init, - amdv_cleanup, - amdv_vminit, - amdv_vmrun, - amdv_vmcleanup, - amdv_getreg, - amdv_setreg, - amdv_getdesc, - amdv_setdesc, - amdv_inject_event, - amdv_getcap, - amdv_setcap, - amdv_vmspace_alloc, - amdv_vmspace_free, -}; - static int amd_iommu_init(void) { diff --git a/sys/amd64/vmm/amd/npt.c b/sys/amd64/vmm/amd/npt.c index cfcfc427a9f2..164d9adcd862 100644 --- a/sys/amd64/vmm/amd/npt.c +++ b/sys/amd64/vmm/amd/npt.c @@ -28,11 +28,13 @@ __FBSDID("$FreeBSD$"); #include +#include #include -#include +#include #include #include +#include #include #include @@ -44,268 +46,46 @@ __FBSDID("$FreeBSD$"); #include "svm_softc.h" #include "npt.h" -/* - * "Nested Paging" is an optional SVM feature that provides two levels of - * address translation, thus eliminating the need for the VMM to maintain - * shadow page tables. - * - * Documented in APMv2, section 15.25, Nested Paging. - */ - -#define PAGE_4KB (4 * 1024) -#define PAGE_2MB (2 * 1024 * 1024UL) -#define PAGE_1GB (1024 * 1024 * 1024UL) - -#define GPA_UNMAPPED ((vm_paddr_t)~0) - -/* Get page entry to physical address. */ -#define PTE2PA(x) ((uint64_t)(x) & ~PAGE_MASK) - -MALLOC_DECLARE(M_SVM); - -static uint64_t svm_npt_create(pml4_entry_t *pml4, vm_paddr_t gpa, - vm_paddr_t hpa, vm_memattr_t attr, - int prot, uint64_t size); - -static const int PT_INDEX_MASK = 0x1FF; -static const int PT_SHIFT = 9; - -/* - * Helper function to create nested page table entries for a page - * of size 1GB, 2MB or 4KB. - * - * Starting from PML4 create a PDPTE, PDE or PTE depending on 'pg_size' - * value of 1GB, 2MB or 4KB respectively. - * - * Return size of the mapping created on success and 0 on failure. - * - * XXX: NPT PAT settings. - */ -static uint64_t -svm_npt_create(pml4_entry_t * pml4, vm_paddr_t gpa, vm_paddr_t hpa, - vm_memattr_t attr, int prot, uint64_t pg_size) -{ - uint64_t *pt, *page, pa; - pt_entry_t mode; - int shift, index; - - KASSERT(pg_size, ("Size of page must be 1GB, 2MB or 4KB")); - if (hpa & (pg_size - 1)) { - ERR("HPA(0x%lx) is not aligned, size:0x%lx\n", hpa, pg_size); - return (0); - } - - if (gpa & (pg_size - 1)) { - ERR("GPA(0x%lx) is not aligned, size (0x%lx)\n", gpa, pg_size); - return (0); - } - - /* Find out mode bits for PTE */ - mode = PG_U | PG_V; - if (prot & VM_PROT_WRITE) - mode |= PG_RW; - if ((prot & VM_PROT_EXECUTE) == 0) - mode |= pg_nx; - - pt = (uint64_t *)pml4; - shift = PML4SHIFT; - - while ((shift > PAGE_SHIFT) && (pg_size < (1UL << shift))) { - /* Get PDP, PD or PT index from guest physical address. */ - index = (gpa >> shift) & PT_INDEX_MASK; - - /* If page entry is missing, allocate new page for table.*/ - if (pt[index] == 0) { - page = malloc(PAGE_SIZE, M_SVM, M_WAITOK | M_ZERO); - pt[index] = vtophys(page) | mode; - } - - pa = PTE2PA(pt[index]);; - pt = (uint64_t *)PHYS_TO_DMAP(pa); - shift -= PT_SHIFT; - } - - /* Create leaf entry mapping. */ - index = (gpa >> shift) & PT_INDEX_MASK; - - if (prot != VM_PROT_NONE) { - pt[index] = hpa | mode; - pt[index] |= (pg_size > PAGE_SIZE) ? PG_PS : 0; - } else - pt[index] = 0; - - return (1UL << shift); -} - -/* - * Map guest physical address to host physical address. - */ -int -svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, - size_t size, vm_memattr_t attr, int prot, boolean_t spok) -{ - pml4_entry_t *pml4; - struct svm_softc *svm_sc; - uint64_t len, mapped, pg_size; - - svm_sc = arg; - pml4 = svm_sc->np_pml4; - - pg_size = PAGE_4KB; - if (spok) { - pg_size = PAGE_2MB; - if (amd_feature & AMDID_PAGE1GB) - pg_size = PAGE_1GB; - } - - /* Compute the largest page mapping that can be used */ - while (pg_size > PAGE_4KB) { - if (size >= pg_size && - (gpa & (pg_size - 1)) == 0 && - (hpa & (pg_size - 1)) == 0) { - break; - } - pg_size >>= PT_SHIFT; - } - - len = 0; - while (len < size) { - mapped = svm_npt_create(pml4, gpa + len, hpa + len, attr, prot, - pg_size); - len += mapped; - } - - return (0); -} - -/* - * Get HPA for a given GPA. - */ -vm_paddr_t -svm_npt_vmmap_get(void *arg, vm_paddr_t gpa) -{ - struct svm_softc *svm_sc; - pml4_entry_t *pml4; - uint64_t *pt, pa, hpa, pgmask; - int shift, index; - - svm_sc = arg; - pml4 = svm_sc->np_pml4; - - pt = (uint64_t *)pml4; - shift = PML4SHIFT; - - while (shift > PAGE_SHIFT) { - /* Get PDP, PD or PT index from GPA */ - index = (gpa >> shift) & PT_INDEX_MASK; - if (pt[index] == 0) { - ERR("No entry for GPA:0x%lx.", gpa); - return (GPA_UNMAPPED); - } - - if (pt[index] & PG_PS) { - break; - } - - pa = PTE2PA(pt[index]);; - pt = (uint64_t *)PHYS_TO_DMAP(pa); - shift -= PT_SHIFT; - } - - index = (gpa >> shift) & PT_INDEX_MASK; - if (pt[index] == 0) { - ERR("No mapping for GPA:0x%lx.\n", gpa); - return (GPA_UNMAPPED); - } - - /* Add GPA offset to HPA */ - pgmask = (1UL << shift) - 1; - hpa = (PTE2PA(pt[index]) & ~pgmask) | (gpa & pgmask); - - return (hpa); -} +SYSCTL_DECL(_hw_vmm); +SYSCTL_NODE(_hw_vmm, OID_AUTO, npt, CTLFLAG_RW, NULL, NULL); +static int npt_flags; +SYSCTL_INT(_hw_vmm_npt, OID_AUTO, pmap_flags, CTLFLAG_RD, + &npt_flags, 0, NULL); /* * AMD nested page table init. */ int svm_npt_init(void) { + int enable_superpage = 1; + + TUNABLE_INT_FETCH("hw.vmm.npt.enable_superpage", &enable_superpage); + if (enable_superpage) + npt_flags |= PMAP_PDE_SUPERPAGE; return (0); } -/* - * Free Page Table page. - */ -static void -free_pt(pd_entry_t pde) -{ - pt_entry_t *pt; - pt = (pt_entry_t *)PHYS_TO_DMAP(PTE2PA(pde)); - free(pt, M_SVM); + +static int +npt_pinit(pmap_t pmap) +{ + + return (pmap_pinit_type(pmap, PT_RVI, npt_flags)); } -/* - * Free Page Directory page. - */ -static void -free_pd(pdp_entry_t pdpe) +struct vmspace * +svm_npt_alloc(vm_offset_t min, vm_offset_t max) { - pd_entry_t *pd; - int i; - - pd = (pd_entry_t *)PHYS_TO_DMAP(PTE2PA(pdpe)); - for (i = 0; i < NPDEPG; i++) { - /* Skip not-present or superpage entries */ - if ((pd[i] == 0) || (pd[i] & PG_PS)) - continue; - - free_pt(pd[i]); - } - - free(pd, M_SVM); + + return (vmspace_alloc(min, max, npt_pinit)); } -/* - * Free Page Directory Pointer page. - */ -static void -free_pdp(pml4_entry_t pml4e) +void +svm_npt_free(struct vmspace *vmspace) { - pdp_entry_t *pdp; - int i; - pdp = (pdp_entry_t *)PHYS_TO_DMAP(PTE2PA(pml4e)); - for (i = 0; i < NPDPEPG; i++) { - /* Skip not-present or superpage entries */ - if ((pdp[i] == 0) || (pdp[i] & PG_PS)) - continue; - - free_pd(pdp[i]); - } - - free(pdp, M_SVM); -} - -/* - * Free the guest's nested page table. - */ -int -svm_npt_cleanup(struct svm_softc *svm_sc) -{ - pml4_entry_t *pml4; - int i; - - pml4 = svm_sc->np_pml4; - - for (i = 0; i < NPML4EPG; i++) { - if (pml4[i] != 0) { - free_pdp(pml4[i]); - pml4[i] = 0; - } - } - - return (0); + vmspace_free(vmspace); } diff --git a/sys/amd64/vmm/amd/npt.h b/sys/amd64/vmm/amd/npt.h index 31f85f71595e..435ba203103f 100644 --- a/sys/amd64/vmm/amd/npt.h +++ b/sys/amd64/vmm/amd/npt.h @@ -31,10 +31,7 @@ struct svm_softc; -int svm_npt_init(void); -int svm_npt_cleanup(struct svm_softc *sc); -vm_paddr_t svm_npt_vmmap_get(void *arg, vm_paddr_t gpa); -int svm_npt_vmmap_set(void *arg, vm_paddr_t gpa, vm_paddr_t hpa, - size_t len, vm_memattr_t attr, int prot, - boolean_t sp); +int svm_npt_init(void); +struct vmspace *svm_npt_alloc(vm_offset_t min, vm_offset_t max); +void svm_npt_free(struct vmspace *vmspace); #endif /* _SVM_NPT_H_ */ diff --git a/sys/amd64/vmm/amd/svm.c b/sys/amd64/vmm/amd/svm.c index 207b9e1ed971..96199b249d43 100644 --- a/sys/amd64/vmm/amd/svm.c +++ b/sys/amd64/vmm/amd/svm.c @@ -61,7 +61,7 @@ __FBSDID("$FreeBSD$"); #include "npt.h" /* - * SVM CPUID function 0x8000_000Ai, edx bit decoding. + * SVM CPUID function 0x8000_000A, edx bit decoding. */ #define AMD_CPUID_SVM_NP BIT(0) /* Nested paging or RVI */ #define AMD_CPUID_SVM_LBR BIT(1) /* Last branch virtualization */ @@ -79,7 +79,7 @@ MALLOC_DEFINE(M_SVM, "svm", "svm"); /* Per-CPU context area. */ extern struct pcpu __pcpu[]; -static int svm_vmexit(struct svm_softc *svm_sc, int vcpu, +static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit); static int svm_msr_rw_ok(uint8_t *btmap, uint64_t msr); static int svm_msr_index(uint64_t msr, int *index, int *bit); @@ -98,11 +98,6 @@ static uint32_t guest_asid = 1; */ static int max_asid; -/* - * Statistics - */ -static VMM_STAT_AMD(VMEXIT_NPF_LAPIC, "vm exits due to Local APIC access"); - /* * SVM host state saved area of size 4KB for each core. */ @@ -113,6 +108,8 @@ static uint8_t hsave[MAXCPU][PAGE_SIZE] __aligned(PAGE_SIZE); */ static struct svm_regctx host_ctx[MAXCPU]; +static VMM_STAT_AMD(VCPU_EXITINTINFO, "Valid EXITINTINFO"); + /* * Common function to enable or disabled SVM for a CPU. */ @@ -123,19 +120,13 @@ cpu_svm_enable_disable(boolean_t enable) efer_msr = rdmsr(MSR_EFER); - if (enable) { + if (enable) efer_msr |= EFER_SVM; - } else { + else efer_msr &= ~EFER_SVM; - } wrmsr(MSR_EFER, efer_msr); - if(rdmsr(MSR_EFER) != efer_msr) { - ERR("SVM couldn't be enabled on CPU%d.\n", curcpu); - return (EIO); - } - return(0); } @@ -199,20 +190,16 @@ svm_cpuid_features(void) } /* - * XXX: BHyVe need EPT or RVI to work. + * bhyve need RVI to work. */ if (!(svm_feature & AMD_CPUID_SVM_NP)) { printf("Missing Nested paging or RVI SVM support in processor.\n"); return (EIO); } - if (svm_feature & (AMD_CPUID_SVM_NRIP_SAVE | - AMD_CPUID_SVM_DECODE_ASSIST)) { + if (svm_feature & AMD_CPUID_SVM_NRIP_SAVE) return (0); - } - /* XXX: Should never be here? */ - printf("Processor doesn't support nRIP or decode assist, can't" - "run BhyVe.\n"); + return (EIO); } @@ -267,16 +254,16 @@ svm_init(void) int err; err = is_svm_enabled(); - if (err) { + if (err) return (err); - } + svm_npt_init(); /* Start SVM on all CPUs */ smp_rendezvous(NULL, svm_enable, NULL, NULL); - return(0); + return (0); } /* @@ -383,7 +370,7 @@ svm_init_vcpu(struct svm_vcpu *vcpu, vm_paddr_t iopm_pa, vm_paddr_t msrpm_pa, * Initialise a virtual machine. */ static void * -svm_vminit(struct vm *vm) +svm_vminit(struct vm *vm, pmap_t pmap) { struct svm_softc *svm_sc; vm_paddr_t msrpm_pa, iopm_pa, pml4_pa; @@ -401,10 +388,10 @@ svm_vminit(struct vm *vm) svm_sc->vm = vm; svm_sc->svm_feature = svm_feature; svm_sc->vcpu_cnt = VM_MAXCPU; - + svm_sc->nptp = (vm_offset_t)vtophys(pmap->pm_pml4); /* * Each guest has its own unique ASID. - * ASID(Addres Space Identifier) are used by TLB entries. + * ASID(Address Space Identifier) is used by TLB entry. */ svm_sc->asid = guest_asid++; @@ -438,7 +425,7 @@ svm_vminit(struct vm *vm) /* Cache physical address for multiple vcpus. */ iopm_pa = vtophys(svm_sc->iopm_bitmap); msrpm_pa = vtophys(svm_sc->msr_bitmap); - pml4_pa = vtophys(svm_sc->np_pml4); + pml4_pa = svm_sc->nptp; for (i = 0; i < svm_sc->vcpu_cnt; i++) { if (svm_init_vcpu(svm_get_vcpu(svm_sc, i), iopm_pa, msrpm_pa, @@ -458,7 +445,7 @@ cleanup: /* * Handle guest I/O intercept. */ -static int +static bool svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) { struct vmcb_ctrl *ctrl; @@ -477,74 +464,39 @@ svm_handle_io(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) vmexit->u.inout.port = (uint16_t)(info1 >> 16); vmexit->u.inout.eax = (uint32_t)(state->rax); - return (1); + return (false); } -/* - * SVM Nested Page(RVI) Fault handler. - * Nested page fault handler used by local APIC emulation. - */ -static int -svm_handle_npf(struct vm *vm, int vcpu, uint64_t gpa, uint64_t rip, - uint64_t exitinfo1, uint64_t cr3, struct vie *vie) +static void +svm_npf_paging(uint64_t exitinfo1, int *type, int *prot) { - int err; + if (exitinfo1 & VMCB_NPF_INFO1_W) + *type = VM_PROT_WRITE; + else + *type = VM_PROT_READ; + + /* XXX: protection is not used. */ + *prot = 0; +} + +static bool +svm_npf_emul_fault(uint64_t exitinfo1) +{ + if (exitinfo1 & VMCB_NPF_INFO1_ID) { - VMM_CTR0(vm, vcpu, "SVM:NPF for code access."); - return (0); + return (false); } - - if (exitinfo1 & VMCB_NPF_INFO1_RSV) { - VMM_CTR0(vm, vcpu, "SVM:NPF reserved bits are set."); - return (0); - } - + if (exitinfo1 & VMCB_NPF_INFO1_GPT) { - VMM_CTR0(vm, vcpu, "SVM:NPF during guest page table walk."); - return (0); + return (false); } - /* - * nRIP is NULL for NPF so we don't have the length of instruction, - * we rely on instruction decode s/w to determine the size of - * instruction. - * - * XXX: DecodeAssist can use instruction from buffer. - */ - if (vmm_fetch_instruction(vm, vcpu, rip, VIE_INST_SIZE, - cr3, vie) != 0) { - ERR("SVM:NPF instruction fetch failed, RIP:0x%lx\n", rip); - return (EINVAL); + if ((exitinfo1 & VMCB_NPF_INFO1_GPA) == 0) { + return (false); } - KASSERT(vie->num_valid, ("No instruction to emulate.")); - /* - * SVM doesn't provide GLA unlike Intel VM-x. VIE_INVALID_GLA - * which is a non-cannonical address indicate that GLA is not - * available to instruction emulation. - * - * XXX: Which SVM capability can provided GLA? - */ - if(vmm_decode_instruction(vm, vcpu, VIE_INVALID_GLA, vie)) { - ERR("SVM: Couldn't decode instruction.\n"); - return (0); - } - - /* - * XXX: Decoding for user space(IOAPIC) should be done in - * user space. - */ - if (gpa < DEFAULT_APIC_BASE || gpa >= (DEFAULT_APIC_BASE + PAGE_SIZE)) { - VMM_CTR2(vm, vcpu, "SVM:NPF GPA(0x%lx) outside of local APIC" - " range(0x%x)\n", gpa, DEFAULT_APIC_BASE); - return (0); - } - - err = vmm_emulate_instruction(vm, vcpu, gpa, vie, lapic_mmio_read, - lapic_mmio_write, 0); - - return (err ? 0 : 1); + return (true); } /* @@ -571,12 +523,12 @@ svm_efer(struct svm_softc *svm_sc, int vcpu, boolean_t write) } /* - * Determine the cause of virtual cpu exit and return to user space if exit - * demand so. - * Return: 1 - Return to user space. - * 0 - Continue vcpu run. + * Determine the cause of virtual cpu exit and handle VMEXIT. + * Return: false - Break vcpu execution loop and handle vmexit + * in kernel or user space. + * true - Continue vcpu run. */ -static int +static bool svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) { struct vmcb_state *state; @@ -584,35 +536,27 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) struct svm_regctx *ctx; uint64_t code, info1, info2, val; uint32_t eax, ecx, edx; - int user; /* Flag for user mode */ - int update_rip; /* Flag for updating RIP */ - int inst_len; + bool update_rip, loop; KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); state = svm_get_vmcb_state(svm_sc, vcpu); ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); ctx = svm_get_guest_regctx(svm_sc, vcpu); - update_rip = 1; - user = 0; - - vmexit->exitcode = VM_EXITCODE_VMX; - vmexit->u.vmx.error = 0; - code = ctrl->exitcode; + code = ctrl->exitcode; info1 = ctrl->exitinfo1; info2 = ctrl->exitinfo2; - if (ctrl->nrip) { - inst_len = ctrl->nrip - state->rip; - } else { - inst_len = ctrl->inst_decode_size; - } + update_rip = true; + loop = true; + vmexit->exitcode = VM_EXITCODE_VMX; + vmexit->u.vmx.error = 0; switch (code) { case VMCB_EXIT_MC: /* Machine Check. */ vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_MTRAP, 1); vmexit->exitcode = VM_EXITCODE_MTRAP; - user = 1; + loop = false; break; case VMCB_EXIT_MSR: /* MSR access. */ @@ -628,27 +572,29 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) if (info1) { /* VM exited because of write MSR */ - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_WRMSR, 1); + vmm_stat_incr(svm_sc->vm, vcpu, + VMEXIT_WRMSR, 1); vmexit->exitcode = VM_EXITCODE_WRMSR; vmexit->u.msr.code = ecx; val = (uint64_t)edx << 32 | eax; if (emulate_wrmsr(svm_sc->vm, vcpu, ecx, val)) { vmexit->u.msr.wval = val; - user = 1; + loop = false; } VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT WRMSR(%s handling) 0x%lx @0x%x", - user ? "user" : "kernel", val, ecx); + loop ? "kernel" : "user", val, ecx); } else { - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_RDMSR, 1); + vmm_stat_incr(svm_sc->vm, vcpu, + VMEXIT_RDMSR, 1); vmexit->exitcode = VM_EXITCODE_RDMSR; vmexit->u.msr.code = ecx; if (emulate_rdmsr(svm_sc->vm, vcpu, ecx)) { - user = 1; + loop = false; } VMM_CTR3(svm_sc->vm, vcpu, "SVM:VMEXIT RDMSR" - " 0x%lx,%lx @0x%x", ctx->e.g.sctx_rdx, - state->rax, ecx); + " MSB=0x%08x, LSB=%08x @0x%x", + ctx->e.g.sctx_rdx, state->rax, ecx); } #define MSR_AMDK8_IPM 0xc0010055 @@ -659,17 +605,16 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) * XXX: special handling of AMD C1E - Ignore. */ if (ecx == MSR_AMDK8_IPM) - user = 0; + loop = true; break; - case VMCB_EXIT_INTR: + case VMCB_EXIT_INTR: /* * Exit on External Interrupt. * Give host interrupt handler to run and if its guest * interrupt, local APIC will inject event in guest. */ - user = 0; - update_rip = 0; + update_rip = false; VMM_CTR1(svm_sc->vm, vcpu, "SVM:VMEXIT ExtInt" " RIP:0x%lx.\n", state->rip); vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EXTINT, 1); @@ -677,9 +622,8 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) case VMCB_EXIT_IO: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_INOUT, 1); - user = svm_handle_io(svm_sc, vcpu, vmexit); - VMM_CTR1(svm_sc->vm, vcpu, "SVM:I/O VMEXIT RIP:0x%lx\n", - state->rip); + loop = svm_handle_io(svm_sc, vcpu, vmexit); + update_rip = true; break; case VMCB_EXIT_CPUID: @@ -690,14 +634,12 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) (uint32_t *)&ctx->sctx_rcx, (uint32_t *)&ctx->e.g.sctx_rdx); VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT CPUID\n"); - user = 0; break; case VMCB_EXIT_HLT: vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT, 1); if (ctrl->v_irq) { /* Interrupt is pending, can't halt guest. */ - user = 0; vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_HLT_IGNORED, 1); VMM_CTR0(svm_sc->vm, vcpu, @@ -706,7 +648,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) VMM_CTR0(svm_sc->vm, vcpu, "VMEXIT halted CPU."); vmexit->exitcode = VM_EXITCODE_HLT; - user = 1; + loop = false; } break; @@ -719,44 +661,54 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) break; case VMCB_EXIT_NPF: + loop = false; + update_rip = false; + vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1); + + if (info1 & VMCB_NPF_INFO1_RSV) { + VMM_CTR2(svm_sc->vm, vcpu, "SVM_ERR:NPT" + " reserved bit is set," + "INFO1:0x%lx INFO2:0x%lx .\n", + info1, info2); + break; + } + /* EXITINFO2 has the physical fault address (GPA). */ - if (!svm_handle_npf(svm_sc->vm, vcpu, info2, - state->rip, info1, state->cr3, - &vmexit->u.paging.vie)) { - /* I/O APIC for MSI/X. */ + if(vm_mem_allocated(svm_sc->vm, info2)) { + VMM_CTR3(svm_sc->vm, vcpu, "SVM:NPF-paging," + "RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n", + state->rip, info1, info2); vmexit->exitcode = VM_EXITCODE_PAGING; - user = 1; vmexit->u.paging.gpa = info2; - } else { - /* Local APIC NPF */ - update_rip = 1; - vmm_stat_incr(svm_sc->vm, vcpu, - VMEXIT_NPF_LAPIC, 1); + svm_npf_paging(info1, &vmexit->u.paging.fault_type, + &vmexit->u.paging.protection); + } else if (svm_npf_emul_fault(info1)) { + VMM_CTR3(svm_sc->vm, vcpu, "SVM:NPF-inst_emul," + "RIP:0x%lx INFO1:0x%lx INFO2:0x%lx .\n", + state->rip, info1, info2); + vmexit->exitcode = VM_EXITCODE_INST_EMUL; + vmexit->u.inst_emul.gpa = info2; + vmexit->u.inst_emul.gla = VIE_INVALID_GLA; + vmexit->u.inst_emul.cr3 = state->cr3; + vmexit->inst_length = VIE_INST_SIZE; } - vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_EPT_FAULT, 1); - inst_len = vmexit->u.paging.vie.num_processed; - VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT NPF, GPA:0x%lx " - "user=%d instr len=%d.\n", info2, user, - inst_len); break; case VMCB_EXIT_SHUTDOWN: - VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT guest shutdown."); - user = 1; - vmexit->exitcode = VM_EXITCODE_VMX; + VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT shutdown."); + loop = false; break; case VMCB_EXIT_INVALID: VMM_CTR0(svm_sc->vm, vcpu, "SVM:VMEXIT INVALID."); - user = 1; - vmexit->exitcode = VM_EXITCODE_VMX; + loop = false; break; default: /* Return to user space. */ - user = 1; - update_rip = 0; + loop = false; + update_rip = false; VMM_CTR3(svm_sc->vm, vcpu, "VMEXIT=0x%lx" " EXITINFO1: 0x%lx EXITINFO2:0x%lx\n", ctrl->exitcode, info1, info2); @@ -767,29 +719,27 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit) break; } - if (ctrl->v_irq) { - VMM_CTR2(svm_sc->vm, vcpu, "SVM:SVM intr pending vector:0x%x" - " priority:0x%x", ctrl->v_intr_vector, ctrl->v_intr_prio); - } - vmexit->rip = state->rip; if (update_rip) { - vmexit->rip += inst_len; + if (ctrl->nrip == 0) { + VMM_CTR1(svm_sc->vm, vcpu, "SVM_ERR:nRIP is not set " + "for RIP0x%lx.\n", state->rip); + vmexit->exitcode = VM_EXITCODE_VMX; + } else + vmexit->rip = ctrl->nrip; } - /* Return to userland for APs to start. */ - if (vmexit->exitcode == VM_EXITCODE_SPINUP_AP) { - VMM_CTR1(svm_sc->vm, vcpu, "SVM:Starting APs, RIP0x%lx.\n", - vmexit->rip); - user = 1; + /* If vcpu execution is continued, update RIP. */ + if (loop) { + state->rip = vmexit->rip; } - /* XXX: Set next RIP before restarting virtual cpus. */ - if (ctrl->nrip == 0) { - ctrl->nrip = state->rip; + if (state->rip == 0) { + VMM_CTR0(svm_sc->vm, vcpu, "SVM_ERR:RIP is NULL\n"); + vmexit->exitcode = VM_EXITCODE_VMX; } - return (user); + return (loop); } /* @@ -808,7 +758,7 @@ svm_inject_nmi(struct svm_softc *svm_sc, int vcpu) return (0); /* Inject NMI, vector number is not used.*/ - if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, FALSE)) { + if (vmcb_eventinject(ctrl, VM_NMI, IDT_NMI, 0, false)) { VMM_CTR0(svm_sc->vm, vcpu, "SVM:NMI injection failed.\n"); return (EIO); } @@ -846,14 +796,7 @@ svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu) /* Wait for guest to come out of interrupt shadow. */ if (ctrl->intr_shadow) { VMM_CTR0(svm_sc->vm, vcpu, "SVM:Guest in interrupt shadow.\n"); - goto inject_failed; - } - - /* Make sure no interrupt is pending.*/ - if (ctrl->v_irq) { - VMM_CTR0(svm_sc->vm, vcpu, - "SVM:virtual interrupt is pending.\n"); - goto inject_failed; + return; } /* NMI event has priority over interrupts.*/ @@ -862,32 +805,32 @@ svm_inj_interrupts(struct svm_softc *svm_sc, int vcpu) } vector = lapic_pending_intr(svm_sc->vm, vcpu); - if (vector < 0) { - return; - } + /* No interrupt is pending. */ + if (vector < 0) + return; + if (vector < 32 || vector > 255) { - ERR("Invalid vector number:%d\n", vector); + VMM_CTR1(svm_sc->vm, vcpu, "SVM_ERR:Event injection" + "invalid vector=%d.\n", vector); + ERR("SVM_ERR:Event injection invalid vector=%d.\n", vector); return; } if ((state->rflags & PSL_I) == 0) { VMM_CTR0(svm_sc->vm, vcpu, "SVM:Interrupt is disabled\n"); - goto inject_failed; + return; } - if(vmcb_eventinject(ctrl, VM_HW_INTR, vector, 0, FALSE)) { - VMM_CTR2(svm_sc->vm, vcpu, "SVM:Event injection failed to" - " VCPU%d,vector=%d.\n", vcpu, vector); + if (vmcb_eventinject(ctrl, VM_HW_INTR, vector, 0, false)) { + VMM_CTR1(svm_sc->vm, vcpu, "SVM:Event injection failed to" + " vector=%d.\n", vector); return; } /* Acknowledge that event is accepted.*/ lapic_intr_accepted(svm_sc->vm, vcpu, vector); VMM_CTR1(svm_sc->vm, vcpu, "SVM:event injected,vector=%d.\n", vector); - -inject_failed: - return; } /* @@ -908,11 +851,39 @@ setup_tss_type(void) desc->sd_type = 9; } +static void +svm_handle_exitintinfo(struct svm_softc *svm_sc, int vcpu) +{ + struct vmcb_ctrl *ctrl; + uint64_t intinfo; + + ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); + + /* + * VMEXIT while delivering an exception or interrupt. + * Inject it as virtual interrupt. + * Section 15.7.2 Intercepts during IDT interrupt delivery. + */ + intinfo = ctrl->exitintinfo; + + if (intinfo & VMCB_EXITINTINFO_VALID) { + vmm_stat_incr(svm_sc->vm, vcpu, VCPU_EXITINTINFO, 1); + VMM_CTR1(svm_sc->vm, vcpu, "SVM:EXITINTINFO:0x%lx is valid\n", + intinfo); + if (vmcb_eventinject(ctrl, VMCB_EXITINTINFO_TYPE(intinfo), + VMCB_EXITINTINFO_VECTOR(intinfo), + VMCB_EXITINTINFO_EC(intinfo), + VMCB_EXITINTINFO_EC_VALID & intinfo)) { + VMM_CTR1(svm_sc->vm, vcpu, "SVM:couldn't inject pending" + " interrupt, exitintinfo:0x%lx\n", intinfo); + } + } +} /* * Start vcpu with specified RIP. */ static int -svm_vmrun(void *arg, int vcpu, register_t rip) +svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap) { struct svm_regctx *hctx, *gctx; struct svm_softc *svm_sc; @@ -920,23 +891,17 @@ svm_vmrun(void *arg, int vcpu, register_t rip) struct vmcb_state *state; struct vmcb_ctrl *ctrl; struct vm_exit *vmexit; - int user; uint64_t vmcb_pa; static uint64_t host_cr2; + bool loop; /* Continue vcpu execution loop. */ - user = 0; + loop = true; svm_sc = arg; - KASSERT(vcpu < svm_sc->vcpu_cnt, ("Guest doesn't have VCPU%d", vcpu)); - vcpustate = svm_get_vcpu(svm_sc, vcpu); - state = svm_get_vmcb_state(svm_sc, vcpu); - ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); - vmexit = vm_exitinfo(svm_sc->vm , vcpu); - if (vmexit->exitcode == VM_EXITCODE_VMX) { - ERR("vcpu%d shouldn't run again.\n", vcpu); - return(EIO); - } + state = svm_get_vmcb_state(svm_sc, vcpu); + ctrl = svm_get_vmcb_ctrl(svm_sc, vcpu); + vmexit = vm_exitinfo(svm_sc->vm, vcpu); gctx = svm_get_guest_regctx(svm_sc, vcpu); hctx = &host_ctx[curcpu]; @@ -974,24 +939,24 @@ svm_vmrun(void *arg, int vcpu, register_t rip) ctrl->vmcb_clean = VMCB_CACHE_ASID | VMCB_CACHE_IOPM | VMCB_CACHE_NP; - } vcpustate->lastcpu = curcpu; - + VMM_CTR3(svm_sc->vm, vcpu, "SVM:Enter vmrun old RIP:0x%lx" + " new RIP:0x%lx inst len=%d\n", + state->rip, rip, vmexit->inst_length); /* Update Guest RIP */ state->rip = rip; - VMM_CTR1(svm_sc->vm, vcpu, "SVM:entered with RIP:0x%lx\n", - state->rip); do { + vmexit->inst_length = 0; /* We are asked to give the cpu by scheduler. */ if (curthread->td_flags & (TDF_ASTPENDING | TDF_NEEDRESCHED)) { vmexit->exitcode = VM_EXITCODE_BOGUS; - vmexit->inst_length = 0; vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_ASTPENDING, 1); - VMM_CTR1(svm_sc->vm, vcpu, "SVM:gave up cpu, RIP:0x%lx\n", - state->rip); + VMM_CTR1(svm_sc->vm, vcpu, + "SVM: gave up CPU, RIP:0x%lx\n", state->rip); + vmexit->rip = state->rip; break; } @@ -999,6 +964,8 @@ svm_vmrun(void *arg, int vcpu, register_t rip) (void)svm_set_vmcb(svm_get_vmcb(svm_sc, vcpu), svm_sc->asid); + svm_handle_exitintinfo(svm_sc, vcpu); + (void)svm_inj_interrupts(svm_sc, vcpu); /* Change TSS type to available.*/ @@ -1013,10 +980,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip) save_cr2(&host_cr2); load_cr2(&state->cr2); - + + /* Launch Virtual Machine. */ svm_launch(vmcb_pa, gctx, hctx); - + save_cr2(&state->cr2); load_cr2(&host_cr2); @@ -1045,17 +1013,11 @@ svm_vmrun(void *arg, int vcpu, register_t rip) enable_gintr(); /* Handle #VMEXIT and if required return to user space. */ - user = svm_vmexit(svm_sc, vcpu, vmexit); + loop = svm_vmexit(svm_sc, vcpu, vmexit); vcpustate->loop++; vmm_stat_incr(svm_sc->vm, vcpu, VMEXIT_COUNT, 1); - /* Update RIP since we are continuing vcpu execution.*/ - state->rip = vmexit->rip; - - VMM_CTR1(svm_sc->vm, vcpu, "SVM:loop RIP:0x%lx\n", state->rip); - } while (!user); - VMM_CTR1(svm_sc->vm, vcpu, "SVM:exited with RIP:0x%lx\n", - state->rip); + } while (loop); return (0); } @@ -1072,7 +1034,6 @@ svm_vmcleanup(void *arg) VMM_CTR0(svm_sc->vm, 0, "SVM:cleanup\n"); - svm_npt_cleanup(svm_sc); free(svm_sc, M_SVM); } @@ -1113,7 +1074,7 @@ swctx_regptr(struct svm_regctx *regctx, int reg) case VM_REG_GUEST_R15: return (®ctx->sctx_r15); default: - ERR("Unknown register requested.\n"); + ERR("Unknown register requested, reg=%d.\n", reg); break; } @@ -1141,12 +1102,13 @@ svm_getreg(void *arg, int vcpu, int ident, uint64_t *val) } reg = swctx_regptr(svm_get_guest_regctx(svm_sc, vcpu), ident); + if (reg != NULL) { *val = *reg; return (0); } - ERR("reg type %x is not saved n VMCB\n", ident); + ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident); return (EINVAL); } @@ -1176,7 +1138,7 @@ svm_setreg(void *arg, int vcpu, int ident, uint64_t val) return (0); } - ERR("reg type %x is not saved n VMCB\n", ident); + ERR("SVM_ERR:reg type %x is not saved in VMCB.\n", ident); return (EINVAL); } @@ -1201,7 +1163,7 @@ svm_setdesc(void *arg, int vcpu, int type, struct seg_desc *desc) seg = vmcb_seg(vmcb, type); if (seg == NULL) { - ERR("Unsupported seg type %d\n", type); + ERR("SVM_ERR:Unsupported segment type%d\n", type); return (EINVAL); } @@ -1232,7 +1194,7 @@ svm_getdesc(void *arg, int vcpu, int type, struct seg_desc *desc) seg = vmcb_seg(svm_get_vmcb(svm_sc, vcpu), type); if (!seg) { - ERR("Unsupported seg type %d\n", type); + ERR("SVM_ERR:Unsupported segment type%d\n", type); return (EINVAL); } @@ -1366,13 +1328,13 @@ struct vmm_ops vmm_ops_amd = { svm_vminit, svm_vmrun, svm_vmcleanup, - svm_npt_vmmap_set, - svm_npt_vmmap_get, svm_getreg, svm_setreg, svm_getdesc, svm_setdesc, svm_inject_event, svm_getcap, - svm_setcap + svm_setcap, + svm_npt_alloc, + svm_npt_free }; diff --git a/sys/amd64/vmm/amd/svm_softc.h b/sys/amd64/vmm/amd/svm_softc.h index 13b9902ab78e..8c9d3d77911b 100644 --- a/sys/amd64/vmm/amd/svm_softc.h +++ b/sys/amd64/vmm/amd/svm_softc.h @@ -62,7 +62,7 @@ struct svm_softc { uint8_t msr_bitmap[SVM_MSR_BITMAP_SIZE]; /* Nested Paging */ - pml4_entry_t np_pml4[NPML4EPG]; + vm_offset_t nptp; /* Virtual machine pointer. */ struct vm *vm; @@ -76,7 +76,7 @@ struct svm_softc { int vcpu_cnt; /* number of VCPUs for this guest.*/ } __aligned(PAGE_SIZE); -CTASSERT((offsetof(struct svm_softc, np_pml4) & PAGE_MASK) == 0); +CTASSERT((offsetof(struct svm_softc, nptp) & PAGE_MASK) == 0); static __inline struct svm_vcpu * svm_get_vcpu(struct svm_softc *sc, int vcpu) diff --git a/sys/amd64/vmm/amd/vmcb.c b/sys/amd64/vmm/amd/vmcb.c index 048da20a7ece..a5f5c2f8ff7b 100644 --- a/sys/amd64/vmm/amd/vmcb.c +++ b/sys/amd64/vmm/amd/vmcb.c @@ -70,7 +70,7 @@ svm_init_vmcb(struct vmcb *vmcb, uint64_t iopm_base_pa, /* EFER_SVM must always be set when the guest is executing */ state->efer = EFER_SVM; - + return (0); } @@ -355,7 +355,7 @@ vmcb_seg(struct vmcb *vmcb, int type) */ int vmcb_eventinject(struct vmcb_ctrl *ctrl, int type, int vector, - uint32_t error, boolean_t ec_valid) + uint32_t error, bool ec_valid) { int intr_type; diff --git a/sys/amd64/vmm/amd/vmcb.h b/sys/amd64/vmm/amd/vmcb.h index 55db985b8315..1d280b306bc8 100644 --- a/sys/amd64/vmm/amd/vmcb.h +++ b/sys/amd64/vmm/amd/vmcb.h @@ -86,7 +86,7 @@ /* VMCB TLB control */ #define VMCB_TLB_FLUSH_NOTHING 0 /* Flush nothing */ -#define VMCB_TLB_FLUSH_EVERYTHING 1 /* Flush entire TLB */ +#define VMCB_TLB_FLUSH_ALL 1 /* Flush entire TLB */ #define VMCB_TLB_FLUSH_GUEST 3 /* Flush all guest entries */ #define VMCB_TLB_FLUSH_GUEST_NONGLOBAL 7 /* Flush guest non-PG entries */ @@ -148,6 +148,16 @@ #define VMCB_NPF_INFO1_GPA BIT(32) /* Guest physical address. */ #define VMCB_NPF_INFO1_GPT BIT(33) /* Guest page table. */ +/* + * EXITINTINFO, Interrupt exit info for all intrecepts. + * Section 15.7.2, Intercepts during IDT Interrupt Delivery. + */ +#define VMCB_EXITINTINFO_VECTOR(x) (x & 0xFF) +#define VMCB_EXITINTINFO_TYPE(x) ((x & 0x7) >> 8) +#define VMCB_EXITINTINFO_EC_VALID BIT(11) +#define VMCB_EXITINTINFO_VALID BIT(31) +#define VMCB_EXITINTINFO_EC(x) ((x & 0xFFFFFFFF) >> 32) + /* VMCB save state area segment format */ struct vmcb_segment { uint16_t selector; @@ -254,8 +264,8 @@ struct vmcb_state { uint64_t dbgctl; uint64_t br_from; uint64_t br_to; - uint64_t lastexcpfrom; - uint64_t lastexcpto; + uint64_t int_from; + uint64_t int_to; uint8_t pad7[0x968]; /* Reserved upto end of VMCB */ } __attribute__ ((__packed__)); CTASSERT(sizeof(struct vmcb_state) == 0xC00); @@ -274,6 +284,6 @@ int vmcb_read(struct vmcb *vmcb, int ident, uint64_t *retval); int vmcb_write(struct vmcb *vmcb, int ident, uint64_t val); struct vmcb_segment *vmcb_seg(struct vmcb *vmcb, int type); int vmcb_eventinject(struct vmcb_ctrl *ctrl, int type, int vector, - uint32_t error, boolean_t ec_valid); + uint32_t error, bool ec_valid); #endif /* _VMCB_H_ */ diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index 5c2f202b2741..bbf8f23bf5b6 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -960,6 +960,15 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu) if (vmm_decode_instruction(vm, vcpuid, gla, vie) != 0) return (EFAULT); + /* + * AMD-V doesn't provide instruction length which is nRIP - RIP + * for some of the exit including Nested Page Fault. Use instruction + * length calculated by software instruction emulation to update + * RIP of vcpu. + */ + if (vme->inst_length == VIE_INST_SIZE) + vme->inst_length = vie->num_processed; + /* return to userland unless this is a local apic access */ if (gpa < DEFAULT_APIC_BASE || gpa >= DEFAULT_APIC_BASE + PAGE_SIZE) { *retu = TRUE; diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index 5341abb39db8..a02cf9339397 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -563,7 +563,7 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length, vie->num_valid += n; } - if (vie->num_valid == inst_length) + if (vie->num_valid) return (0); else return (-1); diff --git a/sys/amd64/vmm/vmm_msr.c b/sys/amd64/vmm/vmm_msr.c index 9517dcb6ff7d..1d22ad0bdb2e 100644 --- a/sys/amd64/vmm/vmm_msr.c +++ b/sys/amd64/vmm/vmm_msr.c @@ -57,6 +57,7 @@ static struct vmm_msr vmm_msr[] = { { MSR_PAT, VMM_MSR_F_EMULATE | VMM_MSR_F_INVALID }, { MSR_BIOS_SIGN,VMM_MSR_F_EMULATE }, { MSR_MCG_CAP, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY }, +#if 0 { MSR_IA32_PLATFORM_ID, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY }, { MSR_IA32_MISC_ENABLE, VMM_MSR_F_EMULATE | VMM_MSR_F_READONLY }, #endif @@ -140,6 +141,7 @@ guest_msrs_init(struct vm *vm, int cpu) case MSR_IA32_PLATFORM_ID: guest_msrs[i] = 0; break; +#endif default: panic("guest_msrs_init: missing initialization for msr " "0x%0x", vmm_msr[i].num);