Add emulation for legacy x86 task switching mechanism.
FreeBSD/i386 uses task switching to handle double fault exceptions and this change enables that to work. Reported by: glebius
This commit is contained in:
parent
526ed91414
commit
3d5444c864
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=268777
@ -75,6 +75,10 @@ enum vm_reg_name {
|
||||
VM_REG_GUEST_GDTR,
|
||||
VM_REG_GUEST_EFER,
|
||||
VM_REG_GUEST_CR2,
|
||||
VM_REG_GUEST_PDPTE0,
|
||||
VM_REG_GUEST_PDPTE1,
|
||||
VM_REG_GUEST_PDPTE2,
|
||||
VM_REG_GUEST_PDPTE3,
|
||||
VM_REG_LAST
|
||||
};
|
||||
|
||||
@ -323,6 +327,7 @@ struct seg_desc {
|
||||
uint32_t access;
|
||||
};
|
||||
#define SEG_DESC_TYPE(access) ((access) & 0x001f)
|
||||
#define SEG_DESC_DPL(access) (((access) >> 5) & 0x3)
|
||||
#define SEG_DESC_PRESENT(access) (((access) & 0x0080) ? 1 : 0)
|
||||
#define SEG_DESC_DEF32(access) (((access) & 0x4000) ? 1 : 0)
|
||||
#define SEG_DESC_GRANULARITY(access) (((access) & 0x8000) ? 1 : 0)
|
||||
@ -415,6 +420,7 @@ enum vm_exitcode {
|
||||
VM_EXITCODE_IOAPIC_EOI,
|
||||
VM_EXITCODE_SUSPENDED,
|
||||
VM_EXITCODE_INOUT_STR,
|
||||
VM_EXITCODE_TASK_SWITCH,
|
||||
VM_EXITCODE_MAX
|
||||
};
|
||||
|
||||
@ -439,6 +445,22 @@ struct vm_inout_str {
|
||||
struct seg_desc seg_desc;
|
||||
};
|
||||
|
||||
enum task_switch_reason {
|
||||
TSR_CALL,
|
||||
TSR_IRET,
|
||||
TSR_JMP,
|
||||
TSR_IDT_GATE, /* task gate in IDT */
|
||||
};
|
||||
|
||||
struct vm_task_switch {
|
||||
uint16_t tsssel; /* new TSS selector */
|
||||
int ext; /* task switch due to external event */
|
||||
uint32_t errcode;
|
||||
int errcode_valid; /* push 'errcode' on the new stack */
|
||||
enum task_switch_reason reason;
|
||||
struct vm_guest_paging paging;
|
||||
};
|
||||
|
||||
struct vm_exit {
|
||||
enum vm_exitcode exitcode;
|
||||
int inst_length; /* 0 means unknown */
|
||||
@ -493,6 +515,7 @@ struct vm_exit {
|
||||
struct {
|
||||
enum vm_suspend_how how;
|
||||
} suspended;
|
||||
struct vm_task_switch task_switch;
|
||||
} u;
|
||||
};
|
||||
|
||||
|
@ -103,6 +103,14 @@ vmcs_field_encoding(int ident)
|
||||
return (VMCS_GUEST_LDTR_SELECTOR);
|
||||
case VM_REG_GUEST_EFER:
|
||||
return (VMCS_GUEST_IA32_EFER);
|
||||
case VM_REG_GUEST_PDPTE0:
|
||||
return (VMCS_GUEST_PDPTE0);
|
||||
case VM_REG_GUEST_PDPTE1:
|
||||
return (VMCS_GUEST_PDPTE1);
|
||||
case VM_REG_GUEST_PDPTE2:
|
||||
return (VMCS_GUEST_PDPTE2);
|
||||
case VM_REG_GUEST_PDPTE3:
|
||||
return (VMCS_GUEST_PDPTE3);
|
||||
default:
|
||||
return (-1);
|
||||
}
|
||||
|
@ -346,6 +346,9 @@ vmcs_write(uint32_t encoding, uint64_t val)
|
||||
#define VMCS_INTR_T_HWINTR (0 << 8)
|
||||
#define VMCS_INTR_T_NMI (2 << 8)
|
||||
#define VMCS_INTR_T_HWEXCEPTION (3 << 8)
|
||||
#define VMCS_INTR_T_SWINTR (4 << 8)
|
||||
#define VMCS_INTR_T_PRIV_SWEXCEPTION (5 << 8)
|
||||
#define VMCS_INTR_T_SWEXCEPTION (6 << 8)
|
||||
#define VMCS_INTR_DEL_ERRCODE (1 << 11)
|
||||
|
||||
/*
|
||||
|
@ -2020,6 +2020,26 @@ vmx_handle_apic_access(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
|
||||
return (UNHANDLED);
|
||||
}
|
||||
|
||||
static enum task_switch_reason
|
||||
vmx_task_switch_reason(uint64_t qual)
|
||||
{
|
||||
int reason;
|
||||
|
||||
reason = (qual >> 30) & 0x3;
|
||||
switch (reason) {
|
||||
case 0:
|
||||
return (TSR_CALL);
|
||||
case 1:
|
||||
return (TSR_IRET);
|
||||
case 2:
|
||||
return (TSR_JMP);
|
||||
case 3:
|
||||
return (TSR_IDT_GATE);
|
||||
default:
|
||||
panic("%s: invalid reason %d", __func__, reason);
|
||||
}
|
||||
}
|
||||
|
||||
static int
|
||||
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
{
|
||||
@ -2027,8 +2047,9 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
struct vmxctx *vmxctx;
|
||||
struct vlapic *vlapic;
|
||||
struct vm_inout_str *vis;
|
||||
struct vm_task_switch *ts;
|
||||
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
|
||||
uint32_t reason;
|
||||
uint32_t intr_type, reason;
|
||||
uint64_t qual, gpa;
|
||||
bool retu;
|
||||
|
||||
@ -2045,9 +2066,13 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_COUNT, 1);
|
||||
|
||||
/*
|
||||
* VM exits that could be triggered during event injection on the
|
||||
* previous VM entry need to be handled specially by re-injecting
|
||||
* the event.
|
||||
* VM exits that can be triggered during event delivery need to
|
||||
* be handled specially by re-injecting the event if the IDT
|
||||
* vectoring information field's valid bit is set.
|
||||
*
|
||||
* If the VM-exit is due to a task gate in the IDT then we don't
|
||||
* reinject the event because emulating the task switch also
|
||||
* completes the event delivery.
|
||||
*
|
||||
* See "Information for VM Exits During Event Delivery" in Intel SDM
|
||||
* for details.
|
||||
@ -2059,7 +2084,10 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
case EXIT_REASON_TASK_SWITCH:
|
||||
case EXIT_REASON_EXCEPTION:
|
||||
idtvec_info = vmcs_idt_vectoring_info();
|
||||
if (idtvec_info & VMCS_IDT_VEC_VALID) {
|
||||
VCPU_CTR2(vmx->vm, vcpu, "vm exit %s: idtvec_info 0x%08x",
|
||||
exit_reason_to_str(reason), idtvec_info);
|
||||
if ((idtvec_info & VMCS_IDT_VEC_VALID) &&
|
||||
(reason != EXIT_REASON_TASK_SWITCH)) {
|
||||
idtvec_info &= ~(1 << 12); /* clear undefined bit */
|
||||
vmcs_write(VMCS_ENTRY_INTR_INFO, idtvec_info);
|
||||
if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
|
||||
@ -2079,12 +2107,56 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
}
|
||||
vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
|
||||
}
|
||||
break;
|
||||
default:
|
||||
idtvec_info = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
switch (reason) {
|
||||
case EXIT_REASON_TASK_SWITCH:
|
||||
ts = &vmexit->u.task_switch;
|
||||
ts->tsssel = qual & 0xffff;
|
||||
ts->reason = vmx_task_switch_reason(qual);
|
||||
ts->ext = 0;
|
||||
ts->errcode_valid = 0;
|
||||
vmx_paging_info(&ts->paging);
|
||||
/*
|
||||
* If the task switch was due to a CALL, JMP, IRET, software
|
||||
* interrupt (INT n) or software exception (INT3, INTO),
|
||||
* then the saved %rip references the instruction that caused
|
||||
* the task switch. The instruction length field in the VMCS
|
||||
* is valid in this case.
|
||||
*
|
||||
* In all other cases (e.g., NMI, hardware exception) the
|
||||
* saved %rip is one that would have been saved in the old TSS
|
||||
* had the task switch completed normally so the instruction
|
||||
* length field is not needed in this case and is explicitly
|
||||
* set to 0.
|
||||
*/
|
||||
if (ts->reason == TSR_IDT_GATE) {
|
||||
KASSERT(idtvec_info & VMCS_IDT_VEC_VALID,
|
||||
("invalid idtvec_info %x for IDT task switch",
|
||||
idtvec_info));
|
||||
intr_type = idtvec_info & VMCS_INTR_T_MASK;
|
||||
if (intr_type != VMCS_INTR_T_SWINTR &&
|
||||
intr_type != VMCS_INTR_T_SWEXCEPTION &&
|
||||
intr_type != VMCS_INTR_T_PRIV_SWEXCEPTION) {
|
||||
/* Task switch triggered by external event */
|
||||
ts->ext = 1;
|
||||
vmexit->inst_length = 0;
|
||||
if (idtvec_info & VMCS_IDT_VEC_ERRCODE_VALID) {
|
||||
ts->errcode_valid = 1;
|
||||
ts->errcode = vmcs_idt_vectoring_err();
|
||||
}
|
||||
}
|
||||
}
|
||||
vmexit->exitcode = VM_EXITCODE_TASK_SWITCH;
|
||||
VCPU_CTR4(vmx->vm, vcpu, "task switch reason %d, tss 0x%04x, "
|
||||
"%s errcode 0x%016lx", ts->reason, ts->tsssel,
|
||||
ts->ext ? "external" : "internal",
|
||||
((uint64_t)ts->errcode << 32) | ts->errcode_valid);
|
||||
break;
|
||||
case EXIT_REASON_CR_ACCESS:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_CR_ACCESS, 1);
|
||||
switch (qual & 0xf) {
|
||||
|
@ -35,6 +35,7 @@ SRCS= \
|
||||
post.c \
|
||||
rtc.c \
|
||||
smbiostbl.c \
|
||||
task_switch.c \
|
||||
uart_emul.c \
|
||||
virtio.c \
|
||||
xmsr.c \
|
||||
|
@ -69,16 +69,11 @@ __FBSDID("$FreeBSD$");
|
||||
|
||||
#define GUEST_NIO_PORT 0x488 /* guest upcalls via i/o port */
|
||||
|
||||
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
|
||||
#define VMEXIT_RESTART 2 /* restart current instruction */
|
||||
#define VMEXIT_ABORT 3 /* abort the vm run loop */
|
||||
#define VMEXIT_RESET 4 /* guest machine has reset */
|
||||
#define VMEXIT_POWEROFF 5 /* guest machine has powered off */
|
||||
|
||||
#define MB (1024UL * 1024)
|
||||
#define GB (1024UL * MB)
|
||||
|
||||
typedef int (*vmexit_handler_t)(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
extern int vmexit_task_switch(struct vmctx *, struct vm_exit *, int *vcpu);
|
||||
|
||||
char *vmname;
|
||||
|
||||
@ -556,7 +551,8 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
|
||||
[VM_EXITCODE_MTRAP] = vmexit_mtrap,
|
||||
[VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
|
||||
[VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
|
||||
[VM_EXITCODE_SUSPENDED] = vmexit_suspend
|
||||
[VM_EXITCODE_SUSPENDED] = vmexit_suspend,
|
||||
[VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
|
||||
};
|
||||
|
||||
static void
|
||||
|
@ -35,6 +35,12 @@
|
||||
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
|
||||
#endif
|
||||
|
||||
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
|
||||
#define VMEXIT_RESTART 2 /* restart current instruction */
|
||||
#define VMEXIT_ABORT 3 /* abort the vm run loop */
|
||||
#define VMEXIT_RESET 4 /* guest machine has reset */
|
||||
#define VMEXIT_POWEROFF 5 /* guest machine has powered off */
|
||||
|
||||
struct vmctx;
|
||||
extern int guest_ncpus;
|
||||
extern char *guest_uuid_str;
|
||||
|
916
usr.sbin/bhyve/task_switch.c
Normal file
916
usr.sbin/bhyve/task_switch.c
Normal file
@ -0,0 +1,916 @@
|
||||
/*-
|
||||
* Copyright (c) 2014 Neel Natu <neel@freebsd.org>
|
||||
* All rights reserved.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
* are met:
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND
|
||||
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
|
||||
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
||||
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
||||
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
||||
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
||||
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
||||
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
||||
* SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <sys/cdefs.h>
|
||||
__FBSDID("$FreeBSD$");
|
||||
|
||||
#include <sys/param.h>
|
||||
#include <sys/_iovec.h>
|
||||
#include <sys/mman.h>
|
||||
|
||||
#include <x86/psl.h>
|
||||
#include <x86/segments.h>
|
||||
#include <x86/specialreg.h>
|
||||
#include <machine/vmm.h>
|
||||
#include <machine/vmm_instruction_emul.h>
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
|
||||
#include <vmmapi.h>
|
||||
|
||||
#include "bhyverun.h"
|
||||
|
||||
/*
|
||||
* Various functions in this file use 0 to denote success and VMEXIT_ABORT
|
||||
* or VMEXIT_RESTART to denote failure. This assumes that the VMEXIT_xyz
|
||||
* macros expand to non-zero values. Enforce this with a compile-time
|
||||
* assertion.
|
||||
*/
|
||||
CTASSERT(VMEXIT_ABORT != 0);
|
||||
CTASSERT(VMEXIT_RESTART != 0);
|
||||
|
||||
/*
|
||||
* Using 'struct i386tss' is tempting but causes myriad sign extension
|
||||
* issues because all of its fields are defined as signed integers.
|
||||
*/
|
||||
struct tss32 {
|
||||
uint16_t tss_link;
|
||||
uint16_t rsvd1;
|
||||
uint32_t tss_esp0;
|
||||
uint16_t tss_ss0;
|
||||
uint16_t rsvd2;
|
||||
uint32_t tss_esp1;
|
||||
uint16_t tss_ss1;
|
||||
uint16_t rsvd3;
|
||||
uint32_t tss_esp2;
|
||||
uint16_t tss_ss2;
|
||||
uint16_t rsvd4;
|
||||
uint32_t tss_cr3;
|
||||
uint32_t tss_eip;
|
||||
uint32_t tss_eflags;
|
||||
uint32_t tss_eax;
|
||||
uint32_t tss_ecx;
|
||||
uint32_t tss_edx;
|
||||
uint32_t tss_ebx;
|
||||
uint32_t tss_esp;
|
||||
uint32_t tss_ebp;
|
||||
uint32_t tss_esi;
|
||||
uint32_t tss_edi;
|
||||
uint16_t tss_es;
|
||||
uint16_t rsvd5;
|
||||
uint16_t tss_cs;
|
||||
uint16_t rsvd6;
|
||||
uint16_t tss_ss;
|
||||
uint16_t rsvd7;
|
||||
uint16_t tss_ds;
|
||||
uint16_t rsvd8;
|
||||
uint16_t tss_fs;
|
||||
uint16_t rsvd9;
|
||||
uint16_t tss_gs;
|
||||
uint16_t rsvd10;
|
||||
uint16_t tss_ldt;
|
||||
uint16_t rsvd11;
|
||||
uint16_t tss_trap;
|
||||
uint16_t tss_iomap;
|
||||
};
|
||||
CTASSERT(sizeof(struct tss32) == 104);
|
||||
|
||||
#define SEL_START(sel) (((sel) & ~0x7))
|
||||
#define SEL_LIMIT(sel) (((sel) | 0x7))
|
||||
#define TSS_BUSY(type) (((type) & 0x2) != 0)
|
||||
|
||||
static uint64_t
|
||||
GETREG(struct vmctx *ctx, int vcpu, int reg)
|
||||
{
|
||||
uint64_t val;
|
||||
int error;
|
||||
|
||||
error = vm_get_register(ctx, vcpu, reg, &val);
|
||||
assert(error == 0);
|
||||
return (val);
|
||||
}
|
||||
|
||||
static void
|
||||
SETREG(struct vmctx *ctx, int vcpu, int reg, uint64_t val)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = vm_set_register(ctx, vcpu, reg, val);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static struct seg_desc
|
||||
usd_to_seg_desc(struct user_segment_descriptor *usd)
|
||||
{
|
||||
struct seg_desc seg_desc;
|
||||
|
||||
seg_desc.base = (u_int)USD_GETBASE(usd);
|
||||
if (usd->sd_gran)
|
||||
seg_desc.limit = (u_int)(USD_GETLIMIT(usd) << 12) | 0xfff;
|
||||
else
|
||||
seg_desc.limit = (u_int)USD_GETLIMIT(usd);
|
||||
seg_desc.access = usd->sd_type | usd->sd_dpl << 5 | usd->sd_p << 7;
|
||||
seg_desc.access |= usd->sd_xx << 12;
|
||||
seg_desc.access |= usd->sd_def32 << 14;
|
||||
seg_desc.access |= usd->sd_gran << 15;
|
||||
|
||||
return (seg_desc);
|
||||
}
|
||||
|
||||
/*
|
||||
* Inject an exception with an error code that is a segment selector.
|
||||
* The format of the error code is described in section 6.13, "Error Code",
|
||||
* Intel SDM volume 3.
|
||||
*
|
||||
* Bit 0 (EXT) denotes whether the exception occurred during delivery
|
||||
* of an external event like an interrupt.
|
||||
*
|
||||
* Bit 1 (IDT) indicates whether the selector points to a gate descriptor
|
||||
* in the IDT.
|
||||
*
|
||||
* Bit 2(GDT/LDT) has the usual interpretation of Table Indicator (TI).
|
||||
*/
|
||||
static void
|
||||
sel_exception(struct vmctx *ctx, int vcpu, int vector, uint16_t sel, int ext)
|
||||
{
|
||||
int error;
|
||||
|
||||
/*
|
||||
* Bit 2 from the selector is retained as-is in the error code.
|
||||
*
|
||||
* Bit 1 can be safely cleared because none of the selectors
|
||||
* encountered during task switch emulation refer to a task
|
||||
* gate in the IDT.
|
||||
*
|
||||
* Bit 0 is set depending on the value of 'ext'.
|
||||
*/
|
||||
sel &= ~0x3;
|
||||
if (ext)
|
||||
sel |= 0x1;
|
||||
error = vm_inject_exception2(ctx, vcpu, vector, sel);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel)
|
||||
{
|
||||
uint64_t base;
|
||||
uint32_t limit, access;
|
||||
int error, reg;
|
||||
|
||||
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
|
||||
error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
|
||||
assert(error == 0);
|
||||
|
||||
if (reg == VM_REG_GUEST_LDTR) {
|
||||
if (SEG_DESC_UNUSABLE(access) || !SEG_DESC_PRESENT(access))
|
||||
return (-1);
|
||||
}
|
||||
|
||||
if (limit < SEL_LIMIT(sel))
|
||||
return (-1);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc, bool doread)
|
||||
{
|
||||
struct iovec iov[2];
|
||||
uint64_t base;
|
||||
uint32_t limit, access;
|
||||
int error, reg;
|
||||
|
||||
reg = ISLDT(sel) ? VM_REG_GUEST_LDTR : VM_REG_GUEST_GDTR;
|
||||
error = vm_get_desc(ctx, vcpu, reg, &base, &limit, &access);
|
||||
assert(error == 0);
|
||||
assert(limit >= SEL_LIMIT(sel));
|
||||
|
||||
error = vm_gla2gpa(ctx, vcpu, paging, base + SEL_START(sel),
|
||||
sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov));
|
||||
if (error == 0) {
|
||||
if (doread)
|
||||
vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc));
|
||||
else
|
||||
vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc));
|
||||
}
|
||||
return (error);
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc)
|
||||
{
|
||||
return (desc_table_rw(ctx, vcpu, paging, sel, desc, true));
|
||||
}
|
||||
|
||||
static int
|
||||
desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
uint16_t sel, struct user_segment_descriptor *desc)
|
||||
{
|
||||
return (desc_table_rw(ctx, vcpu, paging, sel, desc, false));
|
||||
}
|
||||
|
||||
static int
|
||||
read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
uint16_t sel, struct user_segment_descriptor *desc)
|
||||
{
|
||||
struct vm_guest_paging sup_paging;
|
||||
int error;
|
||||
|
||||
assert(!ISLDT(sel));
|
||||
assert(IDXSEL(sel) != 0);
|
||||
|
||||
/* Fetch the new TSS descriptor */
|
||||
if (desc_table_limit_check(ctx, vcpu, sel)) {
|
||||
if (ts->reason == TSR_IRET)
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
else
|
||||
sel_exception(ctx, vcpu, IDT_GP, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
sup_paging = ts->paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc);
|
||||
if (error < 0)
|
||||
return (VMEXIT_ABORT);
|
||||
else if (error > 0)
|
||||
return (VMEXIT_RESTART);
|
||||
else
|
||||
return (0);
|
||||
}
|
||||
|
||||
static bool
|
||||
code_desc(int sd_type)
|
||||
{
|
||||
/* code descriptor */
|
||||
return ((sd_type & 0x18) == 0x18);
|
||||
}
|
||||
|
||||
static bool
|
||||
stack_desc(int sd_type)
|
||||
{
|
||||
/* writable data descriptor */
|
||||
return ((sd_type & 0x1A) == 0x12);
|
||||
}
|
||||
|
||||
static bool
|
||||
data_desc(int sd_type)
|
||||
{
|
||||
/* data descriptor or a readable code descriptor */
|
||||
return ((sd_type & 0x18) == 0x10 || (sd_type & 0x1A) == 0x1A);
|
||||
}
|
||||
|
||||
static bool
|
||||
ldt_desc(int sd_type)
|
||||
{
|
||||
|
||||
return (sd_type == SDT_SYSLDT);
|
||||
}
|
||||
|
||||
static int
|
||||
validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
int segment, struct seg_desc *seg_desc)
|
||||
{
|
||||
struct vm_guest_paging sup_paging;
|
||||
struct user_segment_descriptor usd;
|
||||
int error, idtvec;
|
||||
int cpl, dpl, rpl;
|
||||
uint16_t sel, cs;
|
||||
bool ldtseg, codeseg, stackseg, dataseg, conforming;
|
||||
|
||||
ldtseg = codeseg = stackseg = dataseg = false;
|
||||
switch (segment) {
|
||||
case VM_REG_GUEST_LDTR:
|
||||
ldtseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_CS:
|
||||
codeseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_SS:
|
||||
stackseg = true;
|
||||
break;
|
||||
case VM_REG_GUEST_DS:
|
||||
case VM_REG_GUEST_ES:
|
||||
case VM_REG_GUEST_FS:
|
||||
case VM_REG_GUEST_GS:
|
||||
dataseg = true;
|
||||
break;
|
||||
default:
|
||||
assert(0);
|
||||
}
|
||||
|
||||
/* Get the segment selector */
|
||||
sel = GETREG(ctx, vcpu, segment);
|
||||
|
||||
/* LDT selector must point into the GDT */
|
||||
if (ldtseg && ISLDT(sel)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* Descriptor table limit check */
|
||||
if (desc_table_limit_check(ctx, vcpu, sel)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* NULL selector */
|
||||
if (IDXSEL(sel) == 0) {
|
||||
/* Code and stack segment selectors cannot be NULL */
|
||||
if (codeseg || stackseg) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
seg_desc->base = 0;
|
||||
seg_desc->limit = 0;
|
||||
seg_desc->access = 0x10000; /* unusable */
|
||||
return (0);
|
||||
}
|
||||
|
||||
/* Read the descriptor from the GDT/LDT */
|
||||
sup_paging = ts->paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd);
|
||||
if (error < 0)
|
||||
return (VMEXIT_ABORT);
|
||||
else if (error > 0)
|
||||
return (VMEXIT_RESTART);
|
||||
|
||||
/* Verify that the descriptor type is compatible with the segment */
|
||||
if ((ldtseg && !ldt_desc(usd.sd_type)) ||
|
||||
(codeseg && !code_desc(usd.sd_type)) ||
|
||||
(dataseg && !data_desc(usd.sd_type)) ||
|
||||
(stackseg && !stack_desc(usd.sd_type))) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* Segment must be marked present */
|
||||
if (!usd.sd_p) {
|
||||
if (ldtseg)
|
||||
idtvec = IDT_TS;
|
||||
else if (stackseg)
|
||||
idtvec = IDT_SS;
|
||||
else
|
||||
idtvec = IDT_NP;
|
||||
sel_exception(ctx, vcpu, idtvec, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
|
||||
cpl = cs & SEL_RPL_MASK;
|
||||
rpl = sel & SEL_RPL_MASK;
|
||||
dpl = usd.sd_dpl;
|
||||
|
||||
if (stackseg && (rpl != cpl || dpl != cpl)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
if (codeseg) {
|
||||
conforming = (usd.sd_type & 0x4) ? true : false;
|
||||
if ((conforming && (cpl < dpl)) ||
|
||||
(!conforming && (cpl != dpl))) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
}
|
||||
|
||||
if (dataseg) {
|
||||
/*
|
||||
* A data segment is always non-conforming except when it's
|
||||
* descriptor is a readable, conforming code segment.
|
||||
*/
|
||||
if (code_desc(usd.sd_type) && (usd.sd_type & 0x4) != 0)
|
||||
conforming = true;
|
||||
else
|
||||
conforming = false;
|
||||
|
||||
if (!conforming && (rpl > dpl || cpl > dpl)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, sel, ts->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
}
|
||||
*seg_desc = usd_to_seg_desc(&usd);
|
||||
return (0);
|
||||
}
|
||||
|
||||
static void
|
||||
tss32_save(struct vmctx *ctx, int vcpu, struct vm_task_switch *task_switch,
|
||||
uint32_t eip, struct tss32 *tss, struct iovec *iov)
|
||||
{
|
||||
|
||||
/* General purpose registers */
|
||||
tss->tss_eax = GETREG(ctx, vcpu, VM_REG_GUEST_RAX);
|
||||
tss->tss_ecx = GETREG(ctx, vcpu, VM_REG_GUEST_RCX);
|
||||
tss->tss_edx = GETREG(ctx, vcpu, VM_REG_GUEST_RDX);
|
||||
tss->tss_ebx = GETREG(ctx, vcpu, VM_REG_GUEST_RBX);
|
||||
tss->tss_esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
|
||||
tss->tss_ebp = GETREG(ctx, vcpu, VM_REG_GUEST_RBP);
|
||||
tss->tss_esi = GETREG(ctx, vcpu, VM_REG_GUEST_RSI);
|
||||
tss->tss_edi = GETREG(ctx, vcpu, VM_REG_GUEST_RDI);
|
||||
|
||||
/* Segment selectors */
|
||||
tss->tss_es = GETREG(ctx, vcpu, VM_REG_GUEST_ES);
|
||||
tss->tss_cs = GETREG(ctx, vcpu, VM_REG_GUEST_CS);
|
||||
tss->tss_ss = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
|
||||
tss->tss_ds = GETREG(ctx, vcpu, VM_REG_GUEST_DS);
|
||||
tss->tss_fs = GETREG(ctx, vcpu, VM_REG_GUEST_FS);
|
||||
tss->tss_gs = GETREG(ctx, vcpu, VM_REG_GUEST_GS);
|
||||
|
||||
/* eflags and eip */
|
||||
tss->tss_eflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
|
||||
if (task_switch->reason == TSR_IRET)
|
||||
tss->tss_eflags &= ~PSL_NT;
|
||||
tss->tss_eip = eip;
|
||||
|
||||
/* Copy updated old TSS into guest memory */
|
||||
vm_copyout(ctx, vcpu, tss, iov, sizeof(struct tss32));
|
||||
}
|
||||
|
||||
static void
|
||||
update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd)
|
||||
{
|
||||
int error;
|
||||
|
||||
error = vm_set_desc(ctx, vcpu, reg, sd->base, sd->limit, sd->access);
|
||||
assert(error == 0);
|
||||
}
|
||||
|
||||
static int
|
||||
tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts,
|
||||
uint16_t ot_sel, struct tss32 *tss, struct iovec *iov)
|
||||
{
|
||||
struct seg_desc seg_desc, seg_desc2;
|
||||
uint64_t *pdpte, maxphyaddr, reserved;
|
||||
uint32_t eflags;
|
||||
int error, i;
|
||||
bool nested;
|
||||
|
||||
nested = false;
|
||||
if (ts->reason != TSR_IRET && ts->reason != TSR_JMP) {
|
||||
tss->tss_link = ot_sel;
|
||||
nested = true;
|
||||
}
|
||||
|
||||
eflags = tss->tss_eflags;
|
||||
if (nested)
|
||||
eflags |= PSL_NT;
|
||||
|
||||
/* LDTR */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_LDTR, tss->tss_ldt);
|
||||
|
||||
/* PBDR */
|
||||
if (ts->paging.paging_mode != PAGING_MODE_FLAT) {
|
||||
if (ts->paging.paging_mode == PAGING_MODE_PAE) {
|
||||
/*
|
||||
* XXX Assuming 36-bit MAXPHYADDR.
|
||||
*/
|
||||
maxphyaddr = (1UL << 36) - 1;
|
||||
pdpte = paddr_guest2host(ctx, tss->tss_cr3 & ~0x1f, 32);
|
||||
for (i = 0; i < 4; i++) {
|
||||
/* Check reserved bits if the PDPTE is valid */
|
||||
if (!(pdpte[i] & 0x1))
|
||||
continue;
|
||||
/*
|
||||
* Bits 2:1, 8:5 and bits above the processor's
|
||||
* maximum physical address are reserved.
|
||||
*/
|
||||
reserved = ~maxphyaddr | 0x1E6;
|
||||
if (pdpte[i] & reserved) {
|
||||
error = vm_inject_exception2(ctx, vcpu,
|
||||
IDT_GP, 0);
|
||||
assert(error == 0);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
}
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE0, pdpte[0]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE1, pdpte[1]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE2, pdpte[2]);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_PDPTE3, pdpte[3]);
|
||||
}
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CR3, tss->tss_cr3);
|
||||
ts->paging.cr3 = tss->tss_cr3;
|
||||
}
|
||||
|
||||
/* eflags and eip */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS, eflags);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RIP, tss->tss_eip);
|
||||
|
||||
/* General purpose registers */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RAX, tss->tss_eax);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RCX, tss->tss_ecx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RDX, tss->tss_edx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RBX, tss->tss_ebx);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSP, tss->tss_esp);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RBP, tss->tss_ebp);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSI, tss->tss_esi);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RDI, tss->tss_edi);
|
||||
|
||||
/* Segment selectors */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_ES, tss->tss_es);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CS, tss->tss_cs);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_SS, tss->tss_ss);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_DS, tss->tss_ds);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_FS, tss->tss_fs);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_GS, tss->tss_gs);
|
||||
|
||||
/*
|
||||
* If this is a nested task then write out the new TSS to update
|
||||
* the previous link field.
|
||||
*/
|
||||
if (nested)
|
||||
vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss));
|
||||
|
||||
/* Validate segment descriptors */
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc);
|
||||
|
||||
/*
|
||||
* Section "Checks on Guest Segment Registers", Intel SDM, Vol 3.
|
||||
*
|
||||
* The SS and CS attribute checks on VM-entry are inter-dependent so
|
||||
* we need to make sure that both segments are valid before updating
|
||||
* either of them. This ensures that the VMCS state can pass the
|
||||
* VM-entry checks so the guest can handle any exception injected
|
||||
* during task switch emulation.
|
||||
*/
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2);
|
||||
ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK;
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc);
|
||||
|
||||
error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc);
|
||||
|
||||
return (0);
|
||||
}
|
||||
|
||||
static int
|
||||
push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
|
||||
int task_type, uint32_t errcode)
|
||||
{
|
||||
struct iovec iov[2];
|
||||
struct seg_desc seg_desc;
|
||||
int stacksize, bytes, error;
|
||||
uint64_t gla, cr0, rflags;
|
||||
uint32_t esp;
|
||||
uint16_t stacksel;
|
||||
|
||||
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
|
||||
rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS);
|
||||
stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS);
|
||||
|
||||
error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc.base,
|
||||
&seg_desc.limit, &seg_desc.access);
|
||||
assert(error == 0);
|
||||
|
||||
/*
|
||||
* Section "Error Code" in the Intel SDM vol 3: the error code is
|
||||
* pushed on the stack as a doubleword or word (depending on the
|
||||
* default interrupt, trap or task gate size).
|
||||
*/
|
||||
if (task_type == SDT_SYS386BSY || task_type == SDT_SYS386TSS)
|
||||
bytes = 4;
|
||||
else
|
||||
bytes = 2;
|
||||
|
||||
/*
|
||||
* PUSH instruction from Intel SDM vol 2: the 'B' flag in the
|
||||
* stack-segment descriptor determines the size of the stack
|
||||
* pointer outside of 64-bit mode.
|
||||
*/
|
||||
if (SEG_DESC_DEF32(seg_desc.access))
|
||||
stacksize = 4;
|
||||
else
|
||||
stacksize = 2;
|
||||
|
||||
esp = GETREG(ctx, vcpu, VM_REG_GUEST_RSP);
|
||||
esp -= bytes;
|
||||
|
||||
if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS,
|
||||
&seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) {
|
||||
sel_exception(ctx, vcpu, IDT_SS, stacksel, 1);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) {
|
||||
error = vm_inject_exception2(ctx, vcpu, IDT_AC, 1);
|
||||
assert(error == 0);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
error = vm_gla2gpa(ctx, vcpu, paging, gla, bytes, PROT_WRITE,
|
||||
iov, nitems(iov));
|
||||
assert(error == 0 || error == 1 || error == -1);
|
||||
if (error) {
|
||||
return ((error == 1) ? VMEXIT_RESTART : VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
vm_copyout(ctx, vcpu, &errcode, iov, bytes);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_RSP, esp);
|
||||
return (0);
|
||||
}
|
||||
|
||||
int
|
||||
vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
|
||||
{
|
||||
struct seg_desc nt;
|
||||
struct tss32 oldtss, newtss;
|
||||
struct vm_task_switch *task_switch;
|
||||
struct vm_guest_paging *paging, sup_paging;
|
||||
struct user_segment_descriptor nt_desc, ot_desc;
|
||||
struct iovec nt_iov[2], ot_iov[2];
|
||||
uint64_t cr0, ot_base;
|
||||
uint32_t eip, ot_lim, access;
|
||||
int error, ext, minlimit, nt_type, ot_type, vcpu;
|
||||
enum task_switch_reason reason;
|
||||
uint16_t nt_sel, ot_sel;
|
||||
|
||||
task_switch = &vmexit->u.task_switch;
|
||||
nt_sel = task_switch->tsssel;
|
||||
ext = vmexit->u.task_switch.ext;
|
||||
reason = vmexit->u.task_switch.reason;
|
||||
paging = &vmexit->u.task_switch.paging;
|
||||
vcpu = *pvcpu;
|
||||
|
||||
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
|
||||
|
||||
/*
|
||||
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
|
||||
* The following page table accesses are implicitly supervisor mode:
|
||||
* - accesses to GDT or LDT to load segment descriptors
|
||||
* - accesses to the task state segment during task switch
|
||||
*/
|
||||
sup_paging = *paging;
|
||||
sup_paging.cpl = 0; /* implicit supervisor mode */
|
||||
|
||||
/* Fetch the new TSS descriptor */
|
||||
error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
nt = usd_to_seg_desc(&nt_desc);
|
||||
|
||||
/* Verify the type of the new TSS */
|
||||
nt_type = SEG_DESC_TYPE(nt.access);
|
||||
if (nt_type != SDT_SYS386BSY && nt_type != SDT_SYS386TSS &&
|
||||
nt_type != SDT_SYS286BSY && nt_type != SDT_SYS286TSS) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* TSS descriptor must have present bit set */
|
||||
if (!SEG_DESC_PRESENT(nt.access)) {
|
||||
sel_exception(ctx, vcpu, IDT_NP, nt_sel, ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/*
|
||||
* TSS must have a minimum length of 104 bytes for a 32-bit TSS and
|
||||
* 44 bytes for a 16-bit TSS.
|
||||
*/
|
||||
if (nt_type == SDT_SYS386BSY || nt_type == SDT_SYS386TSS)
|
||||
minlimit = 104 - 1;
|
||||
else if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS)
|
||||
minlimit = 44 - 1;
|
||||
else
|
||||
minlimit = 0;
|
||||
|
||||
assert(minlimit > 0);
|
||||
if (nt.limit < minlimit) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* TSS must be busy if task switch is due to IRET */
|
||||
if (reason == TSR_IRET && !TSS_BUSY(nt_type)) {
|
||||
sel_exception(ctx, vcpu, IDT_TS, nt_sel, ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/*
|
||||
* TSS must be available (not busy) if task switch reason is
|
||||
* CALL, JMP, exception or interrupt.
|
||||
*/
|
||||
if (reason != TSR_IRET && TSS_BUSY(nt_type)) {
|
||||
sel_exception(ctx, vcpu, IDT_GP, nt_sel, ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* Fetch the new TSS */
|
||||
error = vm_gla2gpa(ctx, vcpu, &sup_paging, nt.base, minlimit + 1,
|
||||
PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov));
|
||||
if (error == 1) {
|
||||
/* Restart vcpu execution to handle the page fault */
|
||||
return (VMEXIT_RESTART);
|
||||
} else if (error == -1) {
|
||||
return (VMEXIT_ABORT);
|
||||
} else {
|
||||
assert(error == 0);
|
||||
vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1);
|
||||
}
|
||||
|
||||
/* Get the old TSS selector from the guest's task register */
|
||||
ot_sel = GETREG(ctx, vcpu, VM_REG_GUEST_TR);
|
||||
if (ISLDT(ot_sel) || IDXSEL(ot_sel) == 0) {
|
||||
/*
|
||||
* This might happen if a task switch was attempted without
|
||||
* ever loading the task register with LTR. In this case the
|
||||
* TR would contain the values from power-on:
|
||||
* (sel = 0, base = 0, limit = 0xffff).
|
||||
*/
|
||||
sel_exception(ctx, vcpu, IDT_TS, ot_sel, task_switch->ext);
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
||||
|
||||
/* Get the old TSS base and limit from the guest's task register */
|
||||
error = vm_get_desc(ctx, vcpu, VM_REG_GUEST_TR, &ot_base, &ot_lim,
|
||||
&access);
|
||||
assert(error == 0);
|
||||
assert(!SEG_DESC_UNUSABLE(access) && SEG_DESC_PRESENT(access));
|
||||
ot_type = SEG_DESC_TYPE(access);
|
||||
assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY);
|
||||
|
||||
/* Fetch the old TSS descriptor */
|
||||
error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel,
|
||||
&ot_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/* Get the old TSS */
|
||||
error = vm_gla2gpa(ctx, vcpu, &sup_paging, ot_base, minlimit + 1,
|
||||
PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov));
|
||||
if (error == 1) {
|
||||
/* Restart vcpu execution to handle the page fault */
|
||||
return (VMEXIT_RESTART);
|
||||
} else if (error == -1) {
|
||||
fprintf(stderr, "Error copying in old TSS: %d\n", errno);
|
||||
return (VMEXIT_ABORT);
|
||||
} else {
|
||||
assert(error == 0);
|
||||
vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1);
|
||||
}
|
||||
|
||||
/*
|
||||
* Clear the busy bit in the old TSS descriptor if the task switch
|
||||
* due to an IRET or JMP instruction.
|
||||
*/
|
||||
if (reason == TSR_IRET || reason == TSR_JMP) {
|
||||
ot_desc.sd_type &= ~0x2;
|
||||
error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel,
|
||||
&ot_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
|
||||
if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) {
|
||||
fprintf(stderr, "Task switch to 16-bit TSS not supported\n");
|
||||
return (VMEXIT_ABORT);
|
||||
}
|
||||
|
||||
/* Save processor state in old TSS */
|
||||
eip = vmexit->rip + vmexit->inst_length;
|
||||
tss32_save(ctx, vcpu, task_switch, eip, &oldtss, ot_iov);
|
||||
|
||||
/*
|
||||
* If the task switch was triggered for any reason other than IRET
|
||||
* then set the busy bit in the new TSS descriptor.
|
||||
*/
|
||||
if (reason != TSR_IRET) {
|
||||
nt_desc.sd_type |= 0x2;
|
||||
error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel,
|
||||
&nt_desc);
|
||||
if (error)
|
||||
return (error);
|
||||
}
|
||||
|
||||
/* Update task register to point at the new TSS */
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_TR, nt_sel);
|
||||
|
||||
/* Update the hidden descriptor state of the task register */
|
||||
nt = usd_to_seg_desc(&nt_desc);
|
||||
update_seg_desc(ctx, vcpu, VM_REG_GUEST_TR, &nt);
|
||||
|
||||
/* Set CR0.TS */
|
||||
cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0);
|
||||
SETREG(ctx, vcpu, VM_REG_GUEST_CR0, cr0 | CR0_TS);
|
||||
|
||||
/*
|
||||
* We are now committed to the task switch. Any exceptions encountered
|
||||
* after this point will be handled in the context of the new task and
|
||||
* the saved instruction pointer will belong to the new task.
|
||||
*/
|
||||
vmexit->rip = newtss.tss_eip;
|
||||
vmexit->inst_length = 0;
|
||||
|
||||
/* Load processor state from new TSS */
|
||||
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);
|
||||
if (error)
|
||||
return (error);
|
||||
|
||||
/*
|
||||
* Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception
|
||||
* caused an error code to be generated, this error code is copied
|
||||
* to the stack of the new task.
|
||||
*/
|
||||
if (task_switch->errcode_valid) {
|
||||
assert(task_switch->ext);
|
||||
assert(task_switch->reason == TSR_IDT_GATE);
|
||||
error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type,
|
||||
task_switch->errcode);
|
||||
if (error) {
|
||||
return (error);
|
||||
}
|
||||
}
|
||||
|
||||
/*
|
||||
* Treatment of virtual-NMI blocking if NMI is delivered through
|
||||
* a task gate.
|
||||
*
|
||||
* Section "Architectural State Before A VM Exit", Intel SDM, Vol3:
|
||||
* If the virtual NMIs VM-execution control is 1, VM entry injects
|
||||
* an NMI, and delivery of the NMI causes a task switch that causes
|
||||
* a VM exit, virtual-NMI blocking is in effect before the VM exit
|
||||
* commences.
|
||||
*
|
||||
* Thus, virtual-NMI blocking is in effect at the time of the task
|
||||
* switch VM exit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* Treatment of virtual-NMI unblocking on IRET from NMI handler task.
|
||||
*
|
||||
* Section "Changes to Instruction Behavior in VMX Non-Root Operation"
|
||||
* If "virtual NMIs" control is 1 IRET removes any virtual-NMI blocking.
|
||||
* This unblocking of virtual-NMI occurs even if IRET causes a fault.
|
||||
*
|
||||
* Thus, virtual-NMI blocking is cleared at the time of the task switch
|
||||
* VM exit.
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX is the original task switch was triggered by a hardware
|
||||
* exception then do we generate a double-fault if we encounter
|
||||
* an exception during the task switch?
|
||||
*/
|
||||
|
||||
/*
|
||||
* XXX should inject debug exception if 'T' bit is 1
|
||||
*/
|
||||
return (VMEXIT_RESTART);
|
||||
}
|
Loading…
Reference in New Issue
Block a user