freebsd-dev/sys/amd64/vmm/intel/vmcs.c
Peter Grehan 366f60834f Import of bhyve hypervisor and utilities, part 1.
vmm.ko - kernel module for VT-x, VT-d and hypervisor control
  bhyve  - user-space sequencer and i/o emulation
  vmmctl - dump of hypervisor register state
  libvmm - front-end to vmm.ko chardev interface

bhyve was designed and implemented by Neel Natu.

Thanks to the following folk from NetApp who helped to make this available:
	Joe CaraDonna
	Peter Snyder
	Jeff Heller
	Sandeep Mann
	Steve Miller
	Brian Pawlowski
2011-05-13 04:54:01 +00:00

452 lines
10 KiB
C

/*-
* Copyright (c) 2011 NetApp, Inc.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/pcpu.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/segments.h>
#include <machine/pmap.h>
#include <machine/vmm.h>
#include "vmcs.h"
#include "vmx_cpufunc.h"
#include "ept.h"
#include "vmx.h"
static uint64_t
vmcs_fix_regval(uint32_t encoding, uint64_t val)
{
switch (encoding) {
case VMCS_GUEST_CR0:
val = vmx_fix_cr0(val);
break;
case VMCS_GUEST_CR4:
val = vmx_fix_cr4(val);
break;
default:
break;
}
return (val);
}
static uint32_t
vmcs_field_encoding(int ident)
{
switch (ident) {
case VM_REG_GUEST_CR0:
return (VMCS_GUEST_CR0);
case VM_REG_GUEST_CR3:
return (VMCS_GUEST_CR3);
case VM_REG_GUEST_CR4:
return (VMCS_GUEST_CR4);
case VM_REG_GUEST_DR7:
return (VMCS_GUEST_DR7);
case VM_REG_GUEST_RSP:
return (VMCS_GUEST_RSP);
case VM_REG_GUEST_RIP:
return (VMCS_GUEST_RIP);
case VM_REG_GUEST_RFLAGS:
return (VMCS_GUEST_RFLAGS);
case VM_REG_GUEST_ES:
return (VMCS_GUEST_ES_SELECTOR);
case VM_REG_GUEST_CS:
return (VMCS_GUEST_CS_SELECTOR);
case VM_REG_GUEST_SS:
return (VMCS_GUEST_SS_SELECTOR);
case VM_REG_GUEST_DS:
return (VMCS_GUEST_DS_SELECTOR);
case VM_REG_GUEST_FS:
return (VMCS_GUEST_FS_SELECTOR);
case VM_REG_GUEST_GS:
return (VMCS_GUEST_GS_SELECTOR);
case VM_REG_GUEST_TR:
return (VMCS_GUEST_TR_SELECTOR);
case VM_REG_GUEST_LDTR:
return (VMCS_GUEST_LDTR_SELECTOR);
case VM_REG_GUEST_EFER:
return (VMCS_GUEST_IA32_EFER);
default:
return (-1);
}
}
static int
vmcs_seg_desc_encoding(int seg, uint32_t *base, uint32_t *lim, uint32_t *acc)
{
switch (seg) {
case VM_REG_GUEST_ES:
*base = VMCS_GUEST_ES_BASE;
*lim = VMCS_GUEST_ES_LIMIT;
*acc = VMCS_GUEST_ES_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_CS:
*base = VMCS_GUEST_CS_BASE;
*lim = VMCS_GUEST_CS_LIMIT;
*acc = VMCS_GUEST_CS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_SS:
*base = VMCS_GUEST_SS_BASE;
*lim = VMCS_GUEST_SS_LIMIT;
*acc = VMCS_GUEST_SS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_DS:
*base = VMCS_GUEST_DS_BASE;
*lim = VMCS_GUEST_DS_LIMIT;
*acc = VMCS_GUEST_DS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_FS:
*base = VMCS_GUEST_FS_BASE;
*lim = VMCS_GUEST_FS_LIMIT;
*acc = VMCS_GUEST_FS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_GS:
*base = VMCS_GUEST_GS_BASE;
*lim = VMCS_GUEST_GS_LIMIT;
*acc = VMCS_GUEST_GS_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_TR:
*base = VMCS_GUEST_TR_BASE;
*lim = VMCS_GUEST_TR_LIMIT;
*acc = VMCS_GUEST_TR_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_LDTR:
*base = VMCS_GUEST_LDTR_BASE;
*lim = VMCS_GUEST_LDTR_LIMIT;
*acc = VMCS_GUEST_LDTR_ACCESS_RIGHTS;
break;
case VM_REG_GUEST_IDTR:
*base = VMCS_GUEST_IDTR_BASE;
*lim = VMCS_GUEST_IDTR_LIMIT;
*acc = VMCS_INVALID_ENCODING;
break;
case VM_REG_GUEST_GDTR:
*base = VMCS_GUEST_GDTR_BASE;
*lim = VMCS_GUEST_GDTR_LIMIT;
*acc = VMCS_INVALID_ENCODING;
break;
default:
return (EINVAL);
}
return (0);
}
int
vmcs_getreg(struct vmcs *vmcs, int ident, uint64_t *retval)
{
int error;
uint32_t encoding;
/*
* If we need to get at vmx-specific state in the VMCS we can bypass
* the translation of 'ident' to 'encoding' by simply setting the
* sign bit. As it so happens the upper 16 bits are reserved (i.e
* set to 0) in the encodings for the VMCS so we are free to use the
* sign bit.
*/
if (ident < 0)
encoding = ident & 0x7fffffff;
else
encoding = vmcs_field_encoding(ident);
if (encoding == (uint32_t)-1)
return (EINVAL);
VMPTRLD(vmcs);
error = vmread(encoding, retval);
VMCLEAR(vmcs);
return (error);
}
int
vmcs_setreg(struct vmcs *vmcs, int ident, uint64_t val)
{
int error;
uint32_t encoding;
if (ident < 0)
encoding = ident & 0x7fffffff;
else
encoding = vmcs_field_encoding(ident);
if (encoding == (uint32_t)-1)
return (EINVAL);
val = vmcs_fix_regval(encoding, val);
VMPTRLD(vmcs);
error = vmwrite(encoding, val);
VMCLEAR(vmcs);
return (error);
}
int
vmcs_setdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
if (error != 0)
panic("vmcs_setdesc: invalid segment register %d", seg);
VMPTRLD(vmcs);
if ((error = vmwrite(base, desc->base)) != 0)
goto done;
if ((error = vmwrite(limit, desc->limit)) != 0)
goto done;
if (access != VMCS_INVALID_ENCODING) {
if ((error = vmwrite(access, desc->access)) != 0)
goto done;
}
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_getdesc(struct vmcs *vmcs, int seg, struct seg_desc *desc)
{
int error;
uint32_t base, limit, access;
uint64_t u64;
error = vmcs_seg_desc_encoding(seg, &base, &limit, &access);
if (error != 0)
panic("vmcs_getdesc: invalid segment register %d", seg);
VMPTRLD(vmcs);
if ((error = vmread(base, &u64)) != 0)
goto done;
desc->base = u64;
if ((error = vmread(limit, &u64)) != 0)
goto done;
desc->limit = u64;
if (access != VMCS_INVALID_ENCODING) {
if ((error = vmread(access, &u64)) != 0)
goto done;
desc->access = u64;
}
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_set_msr_save(struct vmcs *vmcs, u_long g_area, u_int g_count)
{
int error;
VMPTRLD(vmcs);
/*
* Guest MSRs are saved in the VM-exit MSR-store area.
* Guest MSRs are loaded from the VM-entry MSR-load area.
* Both areas point to the same location in memory.
*/
if ((error = vmwrite(VMCS_EXIT_MSR_STORE, g_area)) != 0)
goto done;
if ((error = vmwrite(VMCS_EXIT_MSR_STORE_COUNT, g_count)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD, g_area)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_MSR_LOAD_COUNT, g_count)) != 0)
goto done;
error = 0;
done:
VMCLEAR(vmcs);
return (error);
}
int
vmcs_set_defaults(struct vmcs *vmcs,
u_long host_rip, u_long host_rsp, u_long ept_pml4,
uint32_t pinbased_ctls, uint32_t procbased_ctls,
uint32_t procbased_ctls2, uint32_t exit_ctls,
uint32_t entry_ctls, u_long msr_bitmap, uint16_t vpid)
{
int error, codesel, datasel, tsssel;
u_long cr0, cr4, efer;
uint64_t eptp, pat;
uint32_t exc_bitmap;
codesel = GSEL(GCODE_SEL, SEL_KPL);
datasel = GSEL(GDATA_SEL, SEL_KPL);
tsssel = GSEL(GPROC0_SEL, SEL_KPL);
/*
* Make sure we have a "current" VMCS to work with.
*/
VMPTRLD(vmcs);
/*
* Load the VMX controls
*/
if ((error = vmwrite(VMCS_PIN_BASED_CTLS, pinbased_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_PRI_PROC_BASED_CTLS, procbased_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_SEC_PROC_BASED_CTLS, procbased_ctls2)) != 0)
goto done;
if ((error = vmwrite(VMCS_EXIT_CTLS, exit_ctls)) != 0)
goto done;
if ((error = vmwrite(VMCS_ENTRY_CTLS, entry_ctls)) != 0)
goto done;
/* Guest state */
/* Initialize guest IA32_PAT MSR with the default value */
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
goto done;
/* Host state */
/* Initialize host IA32_PAT MSR */
pat = rdmsr(MSR_PAT);
if ((error = vmwrite(VMCS_HOST_IA32_PAT, pat)) != 0)
goto done;
/* Load the IA32_EFER MSR */
efer = rdmsr(MSR_EFER);
if ((error = vmwrite(VMCS_HOST_IA32_EFER, efer)) != 0)
goto done;
/* Load the control registers */
cr0 = rcr0();
if ((error = vmwrite(VMCS_HOST_CR0, cr0)) != 0)
goto done;
cr4 = rcr4();
if ((error = vmwrite(VMCS_HOST_CR4, cr4)) != 0)
goto done;
/* Load the segment selectors */
if ((error = vmwrite(VMCS_HOST_ES_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_CS_SELECTOR, codesel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_SS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_DS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_FS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_GS_SELECTOR, datasel)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_TR_SELECTOR, tsssel)) != 0)
goto done;
/*
* Load the Base-Address for %fs and idtr.
*
* Note that we exclude %gs, tss and gdtr here because their base
* address is pcpu specific.
*/
if ((error = vmwrite(VMCS_HOST_FS_BASE, 0)) != 0)
goto done;
if ((error = vmwrite(VMCS_HOST_IDTR_BASE, r_idt.rd_base)) != 0)
goto done;
/* instruction pointer */
if ((error = vmwrite(VMCS_HOST_RIP, host_rip)) != 0)
goto done;
/* stack pointer */
if ((error = vmwrite(VMCS_HOST_RSP, host_rsp)) != 0)
goto done;
/* eptp */
eptp = EPTP(ept_pml4);
if ((error = vmwrite(VMCS_EPTP, eptp)) != 0)
goto done;
/* vpid */
if ((error = vmwrite(VMCS_VPID, vpid)) != 0)
goto done;
/* msr bitmap */
if ((error = vmwrite(VMCS_MSR_BITMAP, msr_bitmap)) != 0)
goto done;
/* exception bitmap */
exc_bitmap = 1 << IDT_MC;
if ((error = vmwrite(VMCS_EXCEPTION_BITMAP, exc_bitmap)) != 0)
goto done;
/* link pointer */
if ((error = vmwrite(VMCS_LINK_POINTER, ~0)) != 0)
goto done;
done:
VMCLEAR(vmcs);
return (error);
}
uint64_t
vmcs_read(uint32_t encoding)
{
int error;
uint64_t val;
error = vmread(encoding, &val);
if (error != 0)
panic("vmcs_read(%u) error %d", encoding, error);
return (val);
}