diff --git a/lib/libvmmapi/vmmapi.c b/lib/libvmmapi/vmmapi.c index 0c158456db4d..1e6e627011b9 100644 --- a/lib/libvmmapi/vmmapi.c +++ b/lib/libvmmapi/vmmapi.c @@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -958,9 +959,9 @@ vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities) return (error); } -static int -gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint64_t gla, int prot, int *fault, uint64_t *gpa) +int +vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, + uint64_t gla, int prot, uint64_t *gpa, int *fault) { struct vm_gla2gpa gg; int error; @@ -979,29 +980,18 @@ gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, return (error); } -int -vm_gla2gpa(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa) -{ - int error, fault; - - error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, gpa); - if (fault) - error = fault; - return (error); -} - #ifndef min #define min(a,b) (((a) < (b)) ? (a) : (b)) #endif int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt) + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, + int *fault) { void *va; uint64_t gpa; - int error, fault, i, n, off; + int error, i, n, off; for (i = 0; i < iovcnt; i++) { iov[i].iov_base = 0; @@ -1010,18 +1000,16 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, while (len) { assert(iovcnt > 0); - error = gla2gpa(ctx, vcpu, paging, gla, prot, &fault, &gpa); - if (error) - return (-1); - if (fault) - return (1); + error = vm_gla2gpa(ctx, vcpu, paging, gla, prot, &gpa, fault); + if (error || *fault) + return (error); off = gpa & PAGE_MASK; n = min(len, PAGE_SIZE - off); va = vm_map_gpa(ctx, gpa, n); if (va == NULL) - return (-1); + return (EFAULT); iov->iov_base = va; iov->iov_len = n; diff --git a/lib/libvmmapi/vmmapi.h b/lib/libvmmapi/vmmapi.h index d001cd8ee423..d3ecdc4f843a 100644 --- a/lib/libvmmapi/vmmapi.h +++ b/lib/libvmmapi/vmmapi.h @@ -64,7 +64,7 @@ int vm_setup_memory(struct vmctx *ctx, size_t len, enum vm_mmap_style s); void *vm_map_gpa(struct vmctx *ctx, vm_paddr_t gaddr, size_t len); int vm_get_gpa_pmap(struct vmctx *, uint64_t gpa, uint64_t *pte, int *num); int vm_gla2gpa(struct vmctx *, int vcpuid, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa); + uint64_t gla, int prot, uint64_t *gpa, int *fault); uint32_t vm_get_lowmem_limit(struct vmctx *ctx); void vm_set_lowmem_limit(struct vmctx *ctx, uint32_t limit); void vm_set_memflags(struct vmctx *ctx, int flags); @@ -131,10 +131,15 @@ int vm_get_hpet_capabilities(struct vmctx *ctx, uint32_t *capabilities); /* * Translate the GLA range [gla,gla+len) into GPA segments in 'iov'. * The 'iovcnt' should be big enough to accomodate all GPA segments. - * Returns 0 on success, 1 on a guest fault condition and -1 otherwise. + * + * retval fault Interpretation + * 0 0 Success + * 0 1 An exception was injected into the guest + * EFAULT N/A Error */ int vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *pg, - uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt); + uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt, + int *fault); void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov, void *host_dst, size_t len); void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src, diff --git a/sys/amd64/include/vmm.h b/sys/amd64/include/vmm.h index 7c617bebdf0a..8b9fb7428a63 100644 --- a/sys/amd64/include/vmm.h +++ b/sys/amd64/include/vmm.h @@ -345,9 +345,10 @@ struct vm_copyinfo { * at 'gla' and 'len' bytes long. The 'prot' should be set to PROT_READ for * a copyin or PROT_WRITE for a copyout. * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. + * retval is_fault Intepretation + * 0 0 Success + * 0 1 An exception was injected into the guest + * EFAULT N/A Unrecoverable error * * The 'copyinfo[]' can be passed to 'vm_copyin()' or 'vm_copyout()' only if * the return value is 0. The 'copyinfo[]' resources should be freed by calling @@ -355,7 +356,7 @@ struct vm_copyinfo { */ int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, - int num_copyinfo); + int num_copyinfo, int *is_fault); void vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, int num_copyinfo); void vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, diff --git a/sys/amd64/include/vmm_instruction_emul.h b/sys/amd64/include/vmm_instruction_emul.h index 651b3b3642cc..5e7127f0129d 100644 --- a/sys/amd64/include/vmm_instruction_emul.h +++ b/sys/amd64/include/vmm_instruction_emul.h @@ -81,17 +81,19 @@ int vie_calculate_gla(enum vm_cpu_mode cpu_mode, enum vm_reg_name seg, */ int vmm_fetch_instruction(struct vm *vm, int cpuid, struct vm_guest_paging *guest_paging, - uint64_t rip, int inst_length, struct vie *vie); + uint64_t rip, int inst_length, struct vie *vie, + int *is_fault); /* * Translate the guest linear address 'gla' to a guest physical address. * - * Returns 0 on success and '*gpa' contains the result of the translation. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. + * retval is_fault Interpretation + * 0 0 'gpa' contains result of the translation + * 0 1 An exception was injected into the guest + * EFAULT N/A An unrecoverable hypervisor error occurred */ int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa); + uint64_t gla, int prot, uint64_t *gpa, int *is_fault); void vie_init(struct vie *vie, const char *inst_bytes, int inst_length); diff --git a/sys/amd64/vmm/vmm.c b/sys/amd64/vmm/vmm.c index bca9b98adc7e..51c63f5459f2 100644 --- a/sys/amd64/vmm/vmm.c +++ b/sys/amd64/vmm/vmm.c @@ -1256,7 +1256,7 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) mem_region_read_t mread; mem_region_write_t mwrite; enum vm_cpu_mode cpu_mode; - int cs_d, error, length; + int cs_d, error, fault, length; vcpu = &vm->vcpu[vcpuid]; vme = &vcpu->exitinfo; @@ -1279,19 +1279,15 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu) */ length = vme->inst_length ? vme->inst_length : VIE_INST_SIZE; error = vmm_fetch_instruction(vm, vcpuid, paging, vme->rip + - cs_base, length, vie); + cs_base, length, vie, &fault); } else { /* * The instruction bytes have already been copied into 'vie' */ - error = 0; + error = fault = 0; } - if (error == 1) - return (0); /* Resume guest to handle page fault */ - else if (error == -1) - return (EFAULT); - else if (error != 0) - panic("%s: vmm_fetch_instruction error %d", __func__, error); + if (error || fault) + return (error); if (vmm_decode_instruction(vm, vcpuid, gla, cpu_mode, cs_d, vie) != 0) { VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %#lx", @@ -2323,7 +2319,7 @@ vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, int vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo, - int num_copyinfo) + int num_copyinfo, int *fault) { int error, idx, nused; size_t n, off, remaining; @@ -2336,8 +2332,8 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, remaining = len; while (remaining > 0) { KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo")); - error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa); - if (error) + error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault); + if (error || *fault) return (error); off = gpa & PAGE_MASK; n = min(remaining, PAGE_SIZE - off); @@ -2359,8 +2355,9 @@ vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, if (idx != nused) { vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo); - return (-1); + return (EFAULT); } else { + *fault = 0; return (0); } } diff --git a/sys/amd64/vmm/vmm_dev.c b/sys/amd64/vmm/vmm_dev.c index 5be99cbcf4a1..e3e140af5cc1 100644 --- a/sys/amd64/vmm/vmm_dev.c +++ b/sys/amd64/vmm/vmm_dev.c @@ -441,19 +441,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, CTASSERT(PROT_EXEC == VM_PROT_EXECUTE); gg = (struct vm_gla2gpa *)data; error = vm_gla2gpa(sc->vm, gg->vcpuid, &gg->paging, gg->gla, - gg->prot, &gg->gpa); - KASSERT(error == 0 || error == 1 || error == -1, + gg->prot, &gg->gpa, &gg->fault); + KASSERT(error == 0 || error == EFAULT, ("%s: vm_gla2gpa unknown error %d", __func__, error)); - if (error >= 0) { - /* - * error = 0: the translation was successful - * error = 1: a fault was injected into the guest - */ - gg->fault = error; - error = 0; - } else { - error = EFAULT; - } break; } case VM_ACTIVATE_CPU: diff --git a/sys/amd64/vmm/vmm_instruction_emul.c b/sys/amd64/vmm/vmm_instruction_emul.c index c83f3e0ba7e9..9c6158a2f0ac 100644 --- a/sys/amd64/vmm/vmm_instruction_emul.c +++ b/sys/amd64/vmm/vmm_instruction_emul.c @@ -597,13 +597,11 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, /* * Helper function to calculate and validate a linear address. - * - * Returns 0 on success and 1 if an exception was injected into the guest. */ static int get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, int opsize, int addrsize, int prot, enum vm_reg_name seg, - enum vm_reg_name gpr, uint64_t *gla) + enum vm_reg_name gpr, uint64_t *gla, int *fault) { struct seg_desc desc; uint64_t cr0, val, rflags; @@ -629,7 +627,7 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); - return (1); + goto guest_fault; } if (vie_canonical_check(paging->cpu_mode, *gla)) { @@ -637,14 +635,19 @@ get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging, vm_inject_ss(vm, vcpuid, 0); else vm_inject_gp(vm, vcpuid); - return (1); + goto guest_fault; } if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) { vm_inject_ac(vm, vcpuid, 0); - return (1); + goto guest_fault; } + *fault = 0; + return (0); + +guest_fault: + *fault = 1; return (0); } @@ -660,7 +663,7 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, #endif uint64_t dstaddr, srcaddr, dstgpa, srcgpa, val; uint64_t rcx, rdi, rsi, rflags; - int error, opsize, seg, repeat; + int error, fault, opsize, seg, repeat; opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize; val = 0; @@ -683,8 +686,10 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, * The count register is %rcx, %ecx or %cx depending on the * address size of the instruction. */ - if ((rcx & vie_size2mask(vie->addrsize)) == 0) - return (0); + if ((rcx & vie_size2mask(vie->addrsize)) == 0) { + error = 0; + goto done; + } } /* @@ -705,13 +710,16 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS; error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, - PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr); - if (error) + PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr, &fault); + if (error || fault) goto done; error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ, - copyinfo, nitems(copyinfo)); + copyinfo, nitems(copyinfo), &fault); if (error == 0) { + if (fault) + goto done; /* Resume guest to handle fault */ + /* * case (2): read from system memory and write to mmio. */ @@ -720,11 +728,6 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, error = memwrite(vm, vcpuid, gpa, val, opsize, arg); if (error) goto done; - } else if (error > 0) { - /* - * Resume guest execution to handle fault. - */ - goto done; } else { /* * 'vm_copy_setup()' is expected to fail for cases (3) and (4) @@ -732,13 +735,17 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, */ error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize, - PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr); - if (error) + PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr, + &fault); + if (error || fault) goto done; error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize, - PROT_WRITE, copyinfo, nitems(copyinfo)); + PROT_WRITE, copyinfo, nitems(copyinfo), &fault); if (error == 0) { + if (fault) + goto done; /* Resume guest to handle fault */ + /* * case (3): read from MMIO and write to system memory. * @@ -754,27 +761,29 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, vm_copyout(vm, vcpuid, &val, copyinfo, opsize); vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); - } else if (error > 0) { - /* - * Resume guest execution to handle fault. - */ - goto done; } else { /* * Case (4): read from and write to mmio. + * + * Commit to the MMIO read/write (with potential + * side-effects) only after we are sure that the + * instruction is not going to be restarted due + * to address translation faults. */ error = vm_gla2gpa(vm, vcpuid, paging, srcaddr, - PROT_READ, &srcgpa); - if (error) + PROT_READ, &srcgpa, &fault); + if (error || fault) goto done; + + error = vm_gla2gpa(vm, vcpuid, paging, dstaddr, + PROT_WRITE, &dstgpa, &fault); + if (error || fault) + goto done; + error = memread(vm, vcpuid, srcgpa, &val, opsize, arg); if (error) goto done; - error = vm_gla2gpa(vm, vcpuid, paging, dstaddr, - PROT_WRITE, &dstgpa); - if (error) - goto done; error = memwrite(vm, vcpuid, dstgpa, val, opsize, arg); if (error) goto done; @@ -819,10 +828,9 @@ emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie, vm_restart_instruction(vm, vcpuid); } done: - if (error < 0) - return (EFAULT); - else - return (0); + KASSERT(error == 0 || error == EFAULT, ("%s: unexpected error %d", + __func__, error)); + return (error); } static int @@ -1185,7 +1193,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, #endif struct seg_desc ss_desc; uint64_t cr0, rflags, rsp, stack_gla, val; - int error, size, stackaddrsize, pushop; + int error, fault, size, stackaddrsize, pushop; val = 0; size = vie->opsize; @@ -1251,18 +1259,10 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie, } error = vm_copy_setup(vm, vcpuid, paging, stack_gla, size, - pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo)); - if (error == -1) { - /* - * XXX cannot return a negative error value here because it - * ends up being the return value of the VM_RUN() ioctl and - * is interpreted as a pseudo-error (for e.g. ERESTART). - */ - return (EFAULT); - } else if (error == 1) { - /* Resume guest execution to handle page fault */ - return (0); - } + pushop ? PROT_WRITE : PROT_READ, copyinfo, nitems(copyinfo), + &fault); + if (error || fault) + return (error); if (pushop) { error = memread(vm, vcpuid, mmio_gpa, &val, size, arg); @@ -1672,7 +1672,7 @@ ptp_hold(struct vm *vm, vm_paddr_t ptpphys, size_t len, void **cookie) int vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, - uint64_t gla, int prot, uint64_t *gpa) + uint64_t gla, int prot, uint64_t *gpa, int *guest_fault) { int nlevels, pfcode, ptpshift, ptpindex, retval, usermode, writable; u_int retries; @@ -1680,6 +1680,8 @@ vm_gla2gpa(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, uint32_t *ptpbase32, pte32; void *cookie; + *guest_fault = 0; + usermode = (paging->cpl == 3 ? 1 : 0); writable = prot & VM_PROT_WRITE; cookie = NULL; @@ -1842,18 +1844,20 @@ restart: *gpa = pte | (gla & (pgsize - 1)); done: ptp_release(&cookie); + KASSERT(retval == 0 || retval == EFAULT, ("%s: unexpected retval %d", + __func__, retval)); return (retval); error: - retval = -1; + retval = EFAULT; goto done; fault: - retval = 1; + *guest_fault = 1; goto done; } int vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, - uint64_t rip, int inst_length, struct vie *vie) + uint64_t rip, int inst_length, struct vie *vie, int *faultptr) { struct vm_copyinfo copyinfo[2]; int error, prot; @@ -1863,13 +1867,14 @@ vmm_fetch_instruction(struct vm *vm, int vcpuid, struct vm_guest_paging *paging, prot = PROT_READ | PROT_EXEC; error = vm_copy_setup(vm, vcpuid, paging, rip, inst_length, prot, - copyinfo, nitems(copyinfo)); - if (error == 0) { - vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); - vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); - vie->num_valid = inst_length; - } - return (error); + copyinfo, nitems(copyinfo), faultptr); + if (error || *faultptr) + return (error); + + vm_copyin(vm, vcpuid, copyinfo, vie->inst, inst_length); + vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo)); + vie->num_valid = inst_length; + return (0); } static int diff --git a/usr.sbin/bhyve/inout.c b/usr.sbin/bhyve/inout.c index 402b953be882..929bb3c5b839 100644 --- a/usr.sbin/bhyve/inout.c +++ b/usr.sbin/bhyve/inout.c @@ -107,7 +107,7 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) uint32_t eax, val; inout_func_t handler; void *arg; - int error, retval; + int error, fault, retval; enum vm_reg_name idxreg; uint64_t gla, index, iterations, count; struct vm_inout_str *vis; @@ -163,11 +163,11 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict) } error = vm_copy_setup(ctx, vcpu, &vis->paging, gla, - bytes, prot, iov, nitems(iov)); - if (error == -1) { + bytes, prot, iov, nitems(iov), &fault); + if (error) { retval = -1; /* Unrecoverable error */ break; - } else if (error == 1) { + } else if (fault) { retval = 0; /* Resume guest to handle fault */ break; } diff --git a/usr.sbin/bhyve/task_switch.c b/usr.sbin/bhyve/task_switch.c index ba6a9d26b0df..69dfaae26801 100644 --- a/usr.sbin/bhyve/task_switch.c +++ b/usr.sbin/bhyve/task_switch.c @@ -202,7 +202,8 @@ desc_table_limit_check(struct vmctx *ctx, int vcpu, uint16_t sel) */ static int desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc, bool doread) + uint16_t sel, struct user_segment_descriptor *desc, bool doread, + int *faultptr) { struct iovec iov[2]; uint64_t base; @@ -215,28 +216,30 @@ desc_table_rw(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, assert(limit >= SEL_LIMIT(sel)); error = vm_copy_setup(ctx, vcpu, paging, base + SEL_START(sel), - sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov)); - if (error == 0) { - if (doread) - vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); - else - vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); - } - return (error); + sizeof(*desc), doread ? PROT_READ : PROT_WRITE, iov, nitems(iov), + faultptr); + if (error || *faultptr) + return (error); + + if (doread) + vm_copyin(ctx, vcpu, iov, desc, sizeof(*desc)); + else + vm_copyout(ctx, vcpu, desc, iov, sizeof(*desc)); + return (0); } static int desc_table_read(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { - return (desc_table_rw(ctx, vcpu, paging, sel, desc, true)); + return (desc_table_rw(ctx, vcpu, paging, sel, desc, true, faultptr)); } static int desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { - return (desc_table_rw(ctx, vcpu, paging, sel, desc, false)); + return (desc_table_rw(ctx, vcpu, paging, sel, desc, false, faultptr)); } /* @@ -248,7 +251,7 @@ desc_table_write(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, */ static int read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t sel, struct user_segment_descriptor *desc) + uint16_t sel, struct user_segment_descriptor *desc, int *faultptr) { struct vm_guest_paging sup_paging; int error; @@ -267,7 +270,7 @@ read_tss_descriptor(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc); + error = desc_table_read(ctx, vcpu, &sup_paging, sel, desc, faultptr); return (error); } @@ -301,14 +304,10 @@ ldt_desc(int sd_type) /* * Validate the descriptor 'seg_desc' associated with 'segment'. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - int segment, struct seg_desc *seg_desc) + int segment, struct seg_desc *seg_desc, int *faultptr) { struct vm_guest_paging sup_paging; struct user_segment_descriptor usd; @@ -369,8 +368,8 @@ validate_seg_desc(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, /* Read the descriptor from the GDT/LDT */ sup_paging = ts->paging; sup_paging.cpl = 0; /* implicit supervisor mode */ - error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd); - if (error) + error = desc_table_read(ctx, vcpu, &sup_paging, sel, &usd, faultptr); + if (error || *faultptr) return (error); /* Verify that the descriptor type is compatible with the segment */ @@ -476,14 +475,10 @@ update_seg_desc(struct vmctx *ctx, int vcpu, int reg, struct seg_desc *sd) /* * Update the vcpu registers to reflect the state of the new task. - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, - uint16_t ot_sel, struct tss32 *tss, struct iovec *iov) + uint16_t ot_sel, struct tss32 *tss, struct iovec *iov, int *faultptr) { struct seg_desc seg_desc, seg_desc2; uint64_t *pdpte, maxphyaddr, reserved; @@ -565,8 +560,9 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, vm_copyout(ctx, vcpu, tss, iov, sizeof(*tss)); /* Validate segment descriptors */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_LDTR, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_LDTR, &seg_desc); @@ -579,33 +575,40 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, * VM-entry checks so the guest can handle any exception injected * during task switch emulation. */ - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_CS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2); - if (error) + + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_SS, &seg_desc2, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_CS, &seg_desc); update_seg_desc(ctx, vcpu, VM_REG_GUEST_SS, &seg_desc2); ts->paging.cpl = tss->tss_cs & SEL_RPL_MASK; - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_DS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_DS, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_ES, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_ES, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_FS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_FS, &seg_desc); - error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc); - if (error) + error = validate_seg_desc(ctx, vcpu, ts, VM_REG_GUEST_GS, &seg_desc, + faultptr); + if (error || *faultptr) return (error); update_seg_desc(ctx, vcpu, VM_REG_GUEST_GS, &seg_desc); @@ -616,14 +619,10 @@ tss32_restore(struct vmctx *ctx, int vcpu, struct vm_task_switch *ts, * Push an error code on the stack of the new task. This is needed if the * task switch was triggered by a hardware exception that causes an error * code to be saved (e.g. #PF). - * - * Returns 0 on success. - * Returns 1 if an exception was injected into the guest. - * Returns -1 otherwise. */ static int push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, - int task_type, uint32_t errcode) + int task_type, uint32_t errcode, int *faultptr) { struct iovec iov[2]; struct seg_desc seg_desc; @@ -632,6 +631,8 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, uint32_t esp; uint16_t stacksel; + *faultptr = 0; + cr0 = GETREG(ctx, vcpu, VM_REG_GUEST_CR0); rflags = GETREG(ctx, vcpu, VM_REG_GUEST_RFLAGS); stacksel = GETREG(ctx, vcpu, VM_REG_GUEST_SS); @@ -666,17 +667,19 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, if (vie_calculate_gla(paging->cpu_mode, VM_REG_GUEST_SS, &seg_desc, esp, bytes, stacksize, PROT_WRITE, &gla)) { sel_exception(ctx, vcpu, IDT_SS, stacksel, 1); - return (1); + *faultptr = 1; + return (0); } if (vie_alignment_check(paging->cpl, bytes, cr0, rflags, gla)) { vm_inject_ac(ctx, vcpu, 1); - return (1); + *faultptr = 1; + return (0); } error = vm_copy_setup(ctx, vcpu, paging, gla, bytes, PROT_WRITE, - iov, nitems(iov)); - if (error) + iov, nitems(iov), faultptr); + if (error || *faultptr) return (error); vm_copyout(ctx, vcpu, &errcode, iov, bytes); @@ -687,16 +690,13 @@ push_errcode(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging, /* * Evaluate return value from helper functions and potentially return to * the VM run loop. - * 0: success - * +1: an exception was injected into the guest vcpu - * -1: unrecoverable/programming error */ -#define CHKERR(x) \ +#define CHKERR(error,fault) \ do { \ - assert(((x) == 0) || ((x) == 1) || ((x) == -1)); \ - if ((x) == -1) \ + assert((error == 0) || (error == EFAULT)); \ + if (error) \ return (VMEXIT_ABORT); \ - else if ((x) == 1) \ + else if (fault) \ return (VMEXIT_CONTINUE); \ } while (0) @@ -711,7 +711,7 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) struct iovec nt_iov[2], ot_iov[2]; uint64_t cr0, ot_base; uint32_t eip, ot_lim, access; - int error, ext, minlimit, nt_type, ot_type, vcpu; + int error, ext, fault, minlimit, nt_type, ot_type, vcpu; enum task_switch_reason reason; uint16_t nt_sel, ot_sel; @@ -739,8 +739,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) sup_paging.cpl = 0; /* implicit supervisor mode */ /* Fetch the new TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc); - CHKERR(error); + error = read_tss_descriptor(ctx, vcpu, task_switch, nt_sel, &nt_desc, + &fault); + CHKERR(error, fault); nt = usd_to_seg_desc(&nt_desc); @@ -792,8 +793,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) /* Fetch the new TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, nt.base, minlimit + 1, - PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov)); - CHKERR(error); + PROT_READ | PROT_WRITE, nt_iov, nitems(nt_iov), &fault); + CHKERR(error, fault); vm_copyin(ctx, vcpu, nt_iov, &newtss, minlimit + 1); /* Get the old TSS selector from the guest's task register */ @@ -818,13 +819,14 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(ot_type == SDT_SYS386BSY || ot_type == SDT_SYS286BSY); /* Fetch the old TSS descriptor */ - error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc); - CHKERR(error); + error = read_tss_descriptor(ctx, vcpu, task_switch, ot_sel, &ot_desc, + &fault); + CHKERR(error, fault); /* Get the old TSS */ error = vm_copy_setup(ctx, vcpu, &sup_paging, ot_base, minlimit + 1, - PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov)); - CHKERR(error); + PROT_READ | PROT_WRITE, ot_iov, nitems(ot_iov), &fault); + CHKERR(error, fault); vm_copyin(ctx, vcpu, ot_iov, &oldtss, minlimit + 1); /* @@ -834,8 +836,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (reason == TSR_IRET || reason == TSR_JMP) { ot_desc.sd_type &= ~0x2; error = desc_table_write(ctx, vcpu, &sup_paging, ot_sel, - &ot_desc); - CHKERR(error); + &ot_desc, &fault); + CHKERR(error, fault); } if (nt_type == SDT_SYS286BSY || nt_type == SDT_SYS286TSS) { @@ -853,8 +855,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) if (reason != TSR_IRET) { nt_desc.sd_type |= 0x2; error = desc_table_write(ctx, vcpu, &sup_paging, nt_sel, - &nt_desc); - CHKERR(error); + &nt_desc, &fault); + CHKERR(error, fault); } /* Update task register to point at the new TSS */ @@ -877,8 +879,9 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(error == 0); /* Load processor state from new TSS */ - error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov); - CHKERR(error); + error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov, + &fault); + CHKERR(error, fault); /* * Section "Interrupt Tasks" in Intel SDM, Vol 3: if an exception @@ -889,8 +892,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu) assert(task_switch->ext); assert(task_switch->reason == TSR_IDT_GATE); error = push_errcode(ctx, vcpu, &task_switch->paging, nt_type, - task_switch->errcode); - CHKERR(error); + task_switch->errcode, &fault); + CHKERR(error, fault); } /*