Consolidate all the information needed by the guest page table walker into

'struct vm_guest_paging'.

Check for canonical addressing in vmm_gla2gpa() and inject a protection
fault into the guest if a violation is detected.

If the page table walk is restarted in vmm_gla2gpa() then reset 'ptpphys' to
point to the root of the page tables.
This commit is contained in:
Neel Natu 2014-05-24 20:26:57 +00:00
parent 37a723a5b3
commit e813a87350
8 changed files with 157 additions and 128 deletions

View File

@ -243,8 +243,6 @@ enum vm_reg_name vm_segment_name(int seg_encoding);
#endif /* KERNEL */
#include <machine/vmm_instruction_emul.h>
#define VM_MAXCPU 16 /* maximum virtual cpus */
/*
@ -324,6 +322,71 @@ struct seg_desc {
uint32_t access;
};
enum vm_cpu_mode {
CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
};
enum vm_paging_mode {
PAGING_MODE_FLAT,
PAGING_MODE_32,
PAGING_MODE_PAE,
PAGING_MODE_64,
};
struct vm_guest_paging {
uint64_t cr3;
int cpl;
enum vm_cpu_mode cpu_mode;
enum vm_paging_mode paging_mode;
};
/*
* The data structures 'vie' and 'vie_op' are meant to be opaque to the
* consumers of instruction decoding. The only reason why their contents
* need to be exposed is because they are part of the 'vm_exit' structure.
*/
struct vie_op {
uint8_t op_byte; /* actual opcode byte */
uint8_t op_type; /* type of operation (e.g. MOV) */
uint16_t op_flags;
};
#define VIE_INST_SIZE 15
struct vie {
uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
uint8_t num_valid; /* size of the instruction */
uint8_t num_processed;
uint8_t rex_w:1, /* REX prefix */
rex_r:1,
rex_x:1,
rex_b:1,
rex_present:1;
uint8_t mod:2, /* ModRM byte */
reg:4,
rm:4;
uint8_t ss:2, /* SIB byte */
index:4,
base:4;
uint8_t disp_bytes;
uint8_t imm_bytes;
uint8_t scale;
int base_register; /* VM_REG_GUEST_xyz */
int index_register; /* VM_REG_GUEST_xyz */
int64_t displacement; /* optional addr displacement */
int64_t immediate; /* optional immediate operand */
uint8_t decoded; /* set to 1 if successfully decoded */
struct vie_op op; /* opcode description */
};
enum vm_exitcode {
VM_EXITCODE_INOUT,
VM_EXITCODE_VMX,
@ -355,14 +418,11 @@ struct vm_inout {
struct vm_inout_str {
struct vm_inout inout; /* must be the first element */
enum vie_cpu_mode cpu_mode;
enum vie_paging_mode paging_mode;
struct vm_guest_paging paging;
uint64_t rflags;
uint64_t cr0;
uint64_t cr3;
uint64_t index;
uint64_t count; /* rep=1 (%rcx), rep=0 (1) */
int cpl;
int addrsize;
enum vm_reg_name seg_name;
struct seg_desc seg_desc;
@ -384,10 +444,7 @@ struct vm_exit {
struct {
uint64_t gpa;
uint64_t gla;
uint64_t cr3;
enum vie_cpu_mode cpu_mode;
enum vie_paging_mode paging_mode;
int cpl;
struct vm_guest_paging paging;
struct vie vie;
} inst_emul;
/*

View File

@ -29,66 +29,6 @@
#ifndef _VMM_INSTRUCTION_EMUL_H_
#define _VMM_INSTRUCTION_EMUL_H_
enum vm_reg_name;
enum vie_cpu_mode {
CPU_MODE_COMPATIBILITY, /* IA-32E mode (CS.L = 0) */
CPU_MODE_64BIT, /* IA-32E mode (CS.L = 1) */
};
enum vie_paging_mode {
PAGING_MODE_FLAT,
PAGING_MODE_32,
PAGING_MODE_PAE,
PAGING_MODE_64,
};
/*
* The data structures 'vie' and 'vie_op' are meant to be opaque to the
* consumers of instruction decoding. The only reason why their contents
* need to be exposed is because they are part of the 'vm_exit' structure.
*/
struct vie_op {
uint8_t op_byte; /* actual opcode byte */
uint8_t op_type; /* type of operation (e.g. MOV) */
uint16_t op_flags;
};
#define VIE_INST_SIZE 15
struct vie {
uint8_t inst[VIE_INST_SIZE]; /* instruction bytes */
uint8_t num_valid; /* size of the instruction */
uint8_t num_processed;
uint8_t rex_w:1, /* REX prefix */
rex_r:1,
rex_x:1,
rex_b:1,
rex_present:1;
uint8_t mod:2, /* ModRM byte */
reg:4,
rm:4;
uint8_t ss:2, /* SIB byte */
index:4,
base:4;
uint8_t disp_bytes;
uint8_t imm_bytes;
uint8_t scale;
int base_register; /* VM_REG_GUEST_xyz */
int index_register; /* VM_REG_GUEST_xyz */
int64_t displacement; /* optional addr displacement */
int64_t immediate; /* optional immediate operand */
uint8_t decoded; /* set to 1 if successfully decoded */
struct vie_op op; /* opcode description */
};
/*
* Callback functions to read and write memory regions.
*/
@ -122,6 +62,9 @@ int vie_update_register(void *vm, int vcpuid, enum vm_reg_name reg,
int vie_alignment_check(int cpl, int operand_size, uint64_t cr0,
uint64_t rflags, uint64_t gla);
/* Returns 1 if the 'gla' is not canonical and 0 otherwise. */
int vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla);
uint64_t vie_size2mask(int size);
#ifdef _KERNEL
@ -131,23 +74,22 @@ uint64_t vie_size2mask(int size);
* 'vie' must be initialized before calling 'vmm_fetch_instruction()'
*/
int vmm_fetch_instruction(struct vm *vm, int cpuid,
uint64_t rip, int inst_length, uint64_t cr3,
enum vie_paging_mode paging_mode, int cpl,
struct vie *vie);
struct vm_guest_paging *guest_paging,
uint64_t rip, int inst_length, struct vie *vie);
/*
* Translate the guest linear address 'gla' to a guest physical address.
*
* Returns 0 on success and '*gpa' contains the result of the translation.
* Returns 1 if a page fault exception was injected into the guest.
* Returns 1 if an exception was injected into the guest.
* Returns -1 otherwise.
*/
int vmm_gla2gpa(struct vm *vm, int vcpuid, uint64_t gla, uint64_t cr3,
uint64_t *gpa, enum vie_paging_mode paging_mode, int cpl, int prot);
int vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa);
void vie_init(struct vie *vie);
uint64_t vie_segbase(enum vm_reg_name segment, enum vie_cpu_mode cpu_mode,
uint64_t vie_segbase(enum vm_reg_name segment, enum vm_cpu_mode cpu_mode,
const struct seg_desc *desc);
/*
@ -163,7 +105,7 @@ uint64_t vie_segbase(enum vm_reg_name segment, enum vie_cpu_mode cpu_mode,
*/
#define VIE_INVALID_GLA (1UL << 63) /* a non-canonical address */
int vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
enum vie_cpu_mode cpu_mode, struct vie *vie);
enum vm_cpu_mode cpu_mode, struct vie *vie);
#endif /* _KERNEL */
#endif /* _VMM_INSTRUCTION_EMUL_H_ */

View File

@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include "vmm_host.h"
#include "vmm_ioport.h"
#include "vmm_ipi.h"
@ -1517,7 +1518,7 @@ vmx_cpl(void)
return ((ssar >> 5) & 0x3);
}
static enum vie_cpu_mode
static enum vm_cpu_mode
vmx_cpu_mode(void)
{
@ -1527,7 +1528,7 @@ vmx_cpu_mode(void)
return (CPU_MODE_COMPATIBILITY);
}
static enum vie_paging_mode
static enum vm_paging_mode
vmx_paging_mode(void)
{
@ -1606,16 +1607,22 @@ inout_str_seginfo(struct vmx *vmx, int vcpuid, uint32_t inst_info, int in,
/* XXX modify svm.c to update bit 16 of seg_desc.access (unusable) */
}
static void
vmx_paging_info(struct vm_guest_paging *paging)
{
paging->cr3 = vmcs_guest_cr3();
paging->cpl = vmx_cpl();
paging->cpu_mode = vmx_cpu_mode();
paging->paging_mode = vmx_paging_mode();
}
static void
vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
{
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
vmexit->u.inst_emul.gpa = gpa;
vmexit->u.inst_emul.gla = gla;
vmexit->u.inst_emul.cr3 = vmcs_guest_cr3();
vmexit->u.inst_emul.cpu_mode = vmx_cpu_mode();
vmexit->u.inst_emul.paging_mode = vmx_paging_mode();
vmexit->u.inst_emul.cpl = vmx_cpl();
vmx_paging_info(&vmexit->u.inst_emul.paging);
}
static int
@ -1998,12 +2005,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
inst_info = vmcs_read(VMCS_EXIT_INSTRUCTION_INFO);
vmexit->exitcode = VM_EXITCODE_INOUT_STR;
vis = &vmexit->u.inout_str;
vis->cpu_mode = vmx_cpu_mode();
vis->paging_mode = vmx_paging_mode();
vmx_paging_info(&vis->paging);
vis->rflags = vmcs_read(VMCS_GUEST_RFLAGS);
vis->cr0 = vmcs_read(VMCS_GUEST_CR0);
vis->cr3 = vmcs_read(VMCS_GUEST_CR3);
vis->cpl = vmx_cpl();
vis->index = inout_str_index(vmx, vcpu, in);
vis->count = inout_str_count(vmx, vcpu, vis->inout.rep);
vis->addrsize = inout_str_addrsize(inst_info);

View File

@ -62,6 +62,7 @@ __FBSDID("$FreeBSD$");
#include <machine/vmm.h>
#include <machine/vmm_dev.h>
#include <machine/vmm_instruction_emul.h>
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@ -1132,32 +1133,25 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
struct vie *vie;
struct vcpu *vcpu;
struct vm_exit *vme;
int cpl, error, inst_length;
uint64_t rip, gla, gpa, cr3;
enum vie_cpu_mode cpu_mode;
enum vie_paging_mode paging_mode;
uint64_t gla, gpa;
struct vm_guest_paging *paging;
mem_region_read_t mread;
mem_region_write_t mwrite;
int error;
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
rip = vme->rip;
inst_length = vme->inst_length;
gla = vme->u.inst_emul.gla;
gpa = vme->u.inst_emul.gpa;
cr3 = vme->u.inst_emul.cr3;
cpl = vme->u.inst_emul.cpl;
cpu_mode = vme->u.inst_emul.cpu_mode;
paging_mode = vme->u.inst_emul.paging_mode;
vie = &vme->u.inst_emul.vie;
paging = &vme->u.inst_emul.paging;
vie_init(vie);
/* Fetch, decode and emulate the faulting instruction */
error = vmm_fetch_instruction(vm, vcpuid, rip, inst_length, cr3,
paging_mode, cpl, vie);
error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip,
vme->inst_length, vie);
if (error == 1)
return (0); /* Resume guest to handle page fault */
else if (error == -1)
@ -1165,7 +1159,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
else if (error != 0)
panic("%s: vmm_fetch_instruction error %d", __func__, error);
if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, vie) != 0)
if (vmm_decode_instruction(vm, vcpuid, gla, paging->cpu_mode, vie) != 0)
return (EFAULT);
/* return to userland unless this is an in-kernel emulated device */

View File

@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#define KASSERT(exp,msg) assert((exp))
#endif /* _KERNEL */
#include <machine/vmm_instruction_emul.h>
#include <x86/psl.h>
#include <x86/specialreg.h>
@ -579,6 +580,25 @@ vie_alignment_check(int cpl, int size, uint64_t cr0, uint64_t rf, uint64_t gla)
return ((gla & (size - 1)) ? 1 : 0);
}
int
vie_canonical_check(enum vm_cpu_mode cpu_mode, uint64_t gla)
{
uint64_t mask;
if (cpu_mode != CPU_MODE_64BIT)
return (0);
/*
* The value of the bit 47 in the 'gla' should be replicated in the
* most significant 16 bits.
*/
mask = ~((1UL << 48) - 1);
if (gla & (1UL << 47))
return ((gla & mask) != mask);
else
return ((gla & mask) != 0);
}
uint64_t
vie_size2mask(int size)
{
@ -637,31 +657,41 @@ ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie)
}
int
vmm_gla2gpa(struct vm *vm, int vcpuid, uint64_t gla, uint64_t ptpphys,
uint64_t *gpa, enum vie_paging_mode paging_mode, int cpl, int prot)
vmm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
uint64_t gla, int prot, uint64_t *gpa)
{
int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable;
u_int retries;
uint64_t *ptpbase, pte, pgsize;
uint64_t *ptpbase, ptpphys, pte, pgsize;
uint32_t *ptpbase32, pte32;
void *cookie;
usermode = (cpl == 3 ? 1 : 0);
usermode = (paging->cpl == 3 ? 1 : 0);
writable = prot & VM_PROT_WRITE;
cookie = NULL;
retval = 0;
retries = 0;
restart:
ptpphys = paging->cr3; /* root of the page tables */
ptp_release(&cookie);
if (retries++ > 0)
maybe_yield();
if (paging_mode == PAGING_MODE_FLAT) {
if (vie_canonical_check(paging->cpu_mode, gla)) {
/*
* XXX assuming a non-stack reference otherwise a stack fault
* should be generated.
*/
vm_inject_gp(vm, vcpuid);
goto fault;
}
if (paging->paging_mode == PAGING_MODE_FLAT) {
*gpa = gla;
goto done;
}
if (paging_mode == PAGING_MODE_32) {
if (paging->paging_mode == PAGING_MODE_32) {
nlevels = 2;
while (--nlevels >= 0) {
/* Zero out the lower 12 bits. */
@ -684,7 +714,7 @@ restart:
pfcode = pf_error_code(usermode, prot, 0,
pte32);
vm_inject_pf(vm, vcpuid, pfcode, gla);
goto pagefault;
goto fault;
}
/*
@ -722,7 +752,7 @@ restart:
goto done;
}
if (paging_mode == PAGING_MODE_PAE) {
if (paging->paging_mode == PAGING_MODE_PAE) {
/* Zero out the lower 5 bits and the upper 32 bits */
ptpphys &= 0xffffffe0UL;
@ -737,7 +767,7 @@ restart:
if ((pte & PG_V) == 0) {
pfcode = pf_error_code(usermode, prot, 0, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
goto pagefault;
goto fault;
}
ptpphys = pte;
@ -764,7 +794,7 @@ restart:
(writable && (pte & PG_RW) == 0)) {
pfcode = pf_error_code(usermode, prot, 0, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
goto pagefault;
goto fault;
}
/* Set the accessed bit in the page table entry */
@ -779,7 +809,7 @@ restart:
if (pgsize > 1 * GB) {
pfcode = pf_error_code(usermode, prot, 1, pte);
vm_inject_pf(vm, vcpuid, pfcode, gla);
goto pagefault;
goto fault;
}
break;
}
@ -802,15 +832,14 @@ done:
error:
retval = -1;
goto done;
pagefault:
fault:
retval = 1;
goto done;
}
int
vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
uint64_t cr3, enum vie_paging_mode paging_mode, int cpl,
struct vie *vie)
vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *paging,
uint64_t rip, int inst_length, struct vie *vie)
{
int n, error, prot;
uint64_t gpa, off;
@ -826,8 +855,7 @@ vmm_fetch_instruction(struct vm *vm, int cpuid, uint64_t rip, int inst_length,
/* Copy the instruction into 'vie' */
while (vie->num_valid < inst_length) {
error = vmm_gla2gpa(vm, cpuid, rip, cr3, &gpa, paging_mode,
cpl, prot);
error = vmm_gla2gpa(vm, cpuid, paging, rip, prot, &gpa);
if (error)
return (error);
@ -930,7 +958,7 @@ decode_opcode(struct vie *vie)
}
static int
decode_modrm(struct vie *vie, enum vie_cpu_mode cpu_mode)
decode_modrm(struct vie *vie, enum vm_cpu_mode cpu_mode)
{
uint8_t x;
@ -1210,7 +1238,7 @@ verify_gla(struct vm *vm, int cpuid, uint64_t gla, struct vie *vie)
int
vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
enum vie_cpu_mode cpu_mode, struct vie *vie)
enum vm_cpu_mode cpu_mode, struct vie *vie)
{
if (cpu_mode == CPU_MODE_64BIT) {
@ -1245,7 +1273,7 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
}
uint64_t
vie_segbase(enum vm_reg_name seg, enum vie_cpu_mode cpu_mode,
vie_segbase(enum vm_reg_name seg, enum vm_cpu_mode cpu_mode,
const struct seg_desc *desc)
{
int basesize;

View File

@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm.h>
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
#include <x86/psl.h>
#include "vatpic.h"
@ -167,9 +168,9 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
* The #GP(0) fault conditions described above don't apply in
* 64-bit mode.
*/
if (vis->cpu_mode != CPU_MODE_64BIT) {
if (vis->paging.cpu_mode != CPU_MODE_64BIT) {
VCPU_CTR1(vm, vcpuid, "ins/outs not emulated in cpu mode %d",
vis->cpu_mode);
vis->paging.cpu_mode);
return (EINVAL);
}
@ -181,7 +182,8 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
return (EINVAL);
}
segbase = vie_segbase(vis->seg_name, vis->cpu_mode, &vis->seg_desc);
segbase = vie_segbase(vis->seg_name, vis->paging.cpu_mode,
&vis->seg_desc);
index = vis->index & vie_size2mask(vis->addrsize);
gla = segbase + index;
@ -195,8 +197,8 @@ emulate_inout_str(struct vm *vm, int vcpuid, struct vm_exit *vmexit, bool *retu)
}
vis->gla = gla;
error = vmm_gla2gpa(vm, vcpuid, gla, vis->cr3, &vis->gpa,
vis->paging_mode, vis->cpl, in ? VM_PROT_WRITE : VM_PROT_READ);
error = vmm_gla2gpa(vm, vcpuid, &vis->paging, gla,
in ? VM_PROT_WRITE : VM_PROT_READ, &vis->gpa);
KASSERT(error == 0 || error == 1 || error == -1,
("%s: vmm_gla2gpa unexpected error %d", __func__, error));
if (error == -1) {

View File

@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
#include <x86/segments.h>
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
#include <vmmapi.h>
#include <stdio.h>
@ -152,7 +153,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
gpaend = rounddown(gpa + PAGE_SIZE, PAGE_SIZE);
gva = paddr_guest2host(ctx, gpa, gpaend - gpa);
if (vie_alignment_check(vis->cpl, bytes, vis->cr0,
if (vie_alignment_check(vis->paging.cpl, bytes, vis->cr0,
vis->rflags, vis->gla)) {
error = vm_inject_exception2(ctx, vcpu, IDT_AC, 0);
assert(error == 0);

View File

@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/tree.h>
#include <sys/errno.h>
#include <machine/vmm.h>
#include <machine/vmm_instruction_emul.h>
#include <stdio.h>
#include <stdlib.h>