Fix x2apic support in bhyve.
When the guest is bringing up the APs in the x2APIC mode a write to the ICR register will now trigger a return to userspace with an exitcode of VM_EXITCODE_SPINUP_AP. This gets SMP guests working again with x2APIC. Change the vlapic timer lock to be a spinlock because the vlapic can be accessed from within a critical section (vm run loop) when guest is using x2apic mode. Reviewed by: grehan@
This commit is contained in:
parent
68a9d67c29
commit
becd984900
Notes:
svn2git
2020-12-20 02:59:44 +00:00
svn path=/head/; revision=259205
@ -1337,6 +1337,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
struct vmxctx *vmxctx;
|
||||
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
|
||||
uint64_t qual, gpa, rflags;
|
||||
bool retu;
|
||||
|
||||
handled = 0;
|
||||
vmcs = &vmx->vmcs[vcpu];
|
||||
@ -1382,27 +1383,39 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
|
||||
break;
|
||||
case EXIT_REASON_RDMSR:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
|
||||
retu = false;
|
||||
ecx = vmxctx->guest_rcx;
|
||||
error = emulate_rdmsr(vmx->vm, vcpu, ecx);
|
||||
error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
|
||||
if (error) {
|
||||
vmexit->exitcode = VM_EXITCODE_RDMSR;
|
||||
vmexit->u.msr.code = ecx;
|
||||
} else
|
||||
} else if (!retu) {
|
||||
handled = 1;
|
||||
} else {
|
||||
/* Return to userspace with a valid exitcode */
|
||||
KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
|
||||
("emulate_wrmsr retu with bogus exitcode"));
|
||||
}
|
||||
break;
|
||||
case EXIT_REASON_WRMSR:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
|
||||
retu = false;
|
||||
eax = vmxctx->guest_rax;
|
||||
ecx = vmxctx->guest_rcx;
|
||||
edx = vmxctx->guest_rdx;
|
||||
error = emulate_wrmsr(vmx->vm, vcpu, ecx,
|
||||
(uint64_t)edx << 32 | eax);
|
||||
(uint64_t)edx << 32 | eax, &retu);
|
||||
if (error) {
|
||||
vmexit->exitcode = VM_EXITCODE_WRMSR;
|
||||
vmexit->u.msr.code = ecx;
|
||||
vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
|
||||
} else
|
||||
} else if (!retu) {
|
||||
handled = 1;
|
||||
} else {
|
||||
/* Return to userspace with a valid exitcode */
|
||||
KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
|
||||
("emulate_wrmsr retu with bogus exitcode"));
|
||||
}
|
||||
break;
|
||||
case EXIT_REASON_HLT:
|
||||
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);
|
||||
|
@ -139,8 +139,8 @@ struct vlapic {
|
||||
* Note that the vlapic_callout_handler() does not write to any of these
|
||||
* registers so they can be safely read from the vcpu context without locking.
|
||||
*/
|
||||
#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock(&((vlapic)->timer_mtx))
|
||||
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock(&((vlapic)->timer_mtx))
|
||||
#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx))
|
||||
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx))
|
||||
#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx))
|
||||
|
||||
#define VLAPIC_BUS_FREQ tsc_freq
|
||||
@ -613,7 +613,7 @@ vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
|
||||
static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
|
||||
|
||||
static int
|
||||
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
|
||||
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
|
||||
{
|
||||
int i;
|
||||
cpuset_t dmask;
|
||||
@ -688,17 +688,18 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
|
||||
if (vlapic2->boot_state != BS_SIPI)
|
||||
return (0);
|
||||
|
||||
vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
|
||||
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
|
||||
vmexit->u.spinup_ap.vcpu = dest;
|
||||
vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
|
||||
|
||||
/*
|
||||
* XXX this assumes that the startup IPI always succeeds
|
||||
*/
|
||||
vlapic2->boot_state = BS_RUNNING;
|
||||
vm_activate_cpu(vlapic2->vm, dest);
|
||||
|
||||
*retu = true;
|
||||
vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
|
||||
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
|
||||
vmexit->u.spinup_ap.vcpu = dest;
|
||||
vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
|
||||
|
||||
return (0);
|
||||
}
|
||||
}
|
||||
@ -804,7 +805,7 @@ lapic_set_svr(struct vlapic *vlapic, uint32_t new)
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
|
||||
vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
uint32_t *reg;
|
||||
@ -895,7 +896,7 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
|
||||
}
|
||||
|
||||
int
|
||||
vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
|
||||
vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
|
||||
{
|
||||
struct LAPIC *lapic = &vlapic->apic;
|
||||
int retval;
|
||||
@ -931,7 +932,7 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
|
||||
data &= 0xffffffff;
|
||||
data |= (uint64_t)lapic->icr_hi << 32;
|
||||
}
|
||||
retval = lapic_process_icr(vlapic, data);
|
||||
retval = lapic_process_icr(vlapic, data, retu);
|
||||
break;
|
||||
case APIC_OFFSET_ICR_HI:
|
||||
if (!x2apic(vlapic)) {
|
||||
@ -978,7 +979,14 @@ vlapic_init(struct vm *vm, int vcpuid)
|
||||
vlapic->vm = vm;
|
||||
vlapic->vcpuid = vcpuid;
|
||||
|
||||
mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_DEF);
|
||||
/*
|
||||
* If the vlapic is configured in x2apic mode then it will be
|
||||
* accessed in the critical section via the MSR emulation code.
|
||||
*
|
||||
* Therefore the timer mutex must be a spinlock because blockable
|
||||
* mutexes cannot be acquired in a critical section.
|
||||
*/
|
||||
mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
|
||||
callout_init(&vlapic->callout, 1);
|
||||
|
||||
vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;
|
||||
|
@ -90,8 +90,10 @@ enum x2apic_state;
|
||||
|
||||
struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
|
||||
void vlapic_cleanup(struct vlapic *vlapic);
|
||||
int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data);
|
||||
int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data);
|
||||
int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
|
||||
bool *retu);
|
||||
int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
|
||||
bool *retu);
|
||||
int vlapic_pending_intr(struct vlapic *vlapic);
|
||||
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
|
||||
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);
|
||||
|
@ -860,8 +860,7 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
|
||||
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
|
||||
*/
|
||||
static int
|
||||
vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
|
||||
boolean_t *retu)
|
||||
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
|
||||
{
|
||||
struct vm_exit *vmexit;
|
||||
struct vcpu *vcpu;
|
||||
@ -894,7 +893,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
|
||||
* Spindown the vcpu if the apic is disabled and it
|
||||
* had entered the halted state.
|
||||
*/
|
||||
*retu = TRUE;
|
||||
*retu = true;
|
||||
vmexit = vm_exitinfo(vm, vcpuid);
|
||||
vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
|
||||
VCPU_CTR0(vm, vcpuid, "spinning down cpu");
|
||||
@ -908,7 +907,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
|
||||
}
|
||||
|
||||
static int
|
||||
vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
|
||||
{
|
||||
int rv, ftype;
|
||||
struct vm_map *map;
|
||||
@ -946,7 +945,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
}
|
||||
|
||||
static int
|
||||
vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
|
||||
{
|
||||
struct vie *vie;
|
||||
struct vcpu *vcpu;
|
||||
@ -987,15 +986,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
|
||||
mread = vhpet_mmio_read;
|
||||
mwrite = vhpet_mmio_write;
|
||||
} else {
|
||||
*retu = TRUE;
|
||||
*retu = true;
|
||||
return (0);
|
||||
}
|
||||
|
||||
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0);
|
||||
|
||||
/* return to userland to spin up the AP */
|
||||
if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP)
|
||||
*retu = TRUE;
|
||||
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
|
||||
retu);
|
||||
|
||||
return (error);
|
||||
}
|
||||
@ -1008,7 +1004,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
|
||||
struct pcb *pcb;
|
||||
uint64_t tscval, rip;
|
||||
struct vm_exit *vme;
|
||||
boolean_t retu, intr_disabled;
|
||||
bool retu, intr_disabled;
|
||||
pmap_t pmap;
|
||||
|
||||
vcpuid = vmrun->cpuid;
|
||||
@ -1048,13 +1044,10 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
|
||||
critical_exit();
|
||||
|
||||
if (error == 0) {
|
||||
retu = FALSE;
|
||||
retu = false;
|
||||
switch (vme->exitcode) {
|
||||
case VM_EXITCODE_HLT:
|
||||
if ((vme->u.hlt.rflags & PSL_I) == 0)
|
||||
intr_disabled = TRUE;
|
||||
else
|
||||
intr_disabled = FALSE;
|
||||
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
|
||||
error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
|
||||
break;
|
||||
case VM_EXITCODE_PAGING:
|
||||
@ -1064,12 +1057,12 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
|
||||
error = vm_handle_inst_emul(vm, vcpuid, &retu);
|
||||
break;
|
||||
default:
|
||||
retu = TRUE; /* handled in userland */
|
||||
retu = true; /* handled in userland */
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (error == 0 && retu == FALSE) {
|
||||
if (error == 0 && retu == false) {
|
||||
rip = vme->rip + vme->inst_length;
|
||||
goto restart;
|
||||
}
|
||||
|
@ -107,7 +107,7 @@ lapic_msr(u_int msr)
|
||||
}
|
||||
|
||||
int
|
||||
lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
|
||||
lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
|
||||
{
|
||||
int error;
|
||||
u_int offset;
|
||||
@ -120,14 +120,14 @@ lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
|
||||
error = 0;
|
||||
} else {
|
||||
offset = x2apic_msr_to_regoff(msr);
|
||||
error = vlapic_read(vlapic, offset, rval);
|
||||
error = vlapic_read(vlapic, offset, rval, retu);
|
||||
}
|
||||
|
||||
return (error);
|
||||
}
|
||||
|
||||
int
|
||||
lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
|
||||
lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
|
||||
{
|
||||
int error;
|
||||
u_int offset;
|
||||
@ -140,7 +140,7 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
|
||||
error = 0;
|
||||
} else {
|
||||
offset = x2apic_msr_to_regoff(msr);
|
||||
error = vlapic_write(vlapic, offset, val);
|
||||
error = vlapic_write(vlapic, offset, val, retu);
|
||||
}
|
||||
|
||||
return (error);
|
||||
@ -164,7 +164,7 @@ lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
error = vlapic_write(vlapic, off, wval);
|
||||
error = vlapic_write(vlapic, off, wval, arg);
|
||||
return (error);
|
||||
}
|
||||
|
||||
@ -186,6 +186,6 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
|
||||
return (EINVAL);
|
||||
|
||||
vlapic = vm_lapic(vm, cpu);
|
||||
error = vlapic_read(vlapic, off, rval);
|
||||
error = vlapic_read(vlapic, off, rval, arg);
|
||||
return (error);
|
||||
}
|
||||
|
@ -32,8 +32,10 @@
|
||||
struct vm;
|
||||
|
||||
boolean_t lapic_msr(u_int num);
|
||||
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
|
||||
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
|
||||
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval,
|
||||
bool *retu);
|
||||
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval,
|
||||
bool *retu);
|
||||
|
||||
int lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
|
||||
uint64_t *rval, int size, void *arg);
|
||||
|
@ -154,13 +154,13 @@ msr_num_to_idx(u_int num)
|
||||
}
|
||||
|
||||
int
|
||||
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
|
||||
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val, bool *retu)
|
||||
{
|
||||
int idx;
|
||||
uint64_t *guest_msrs;
|
||||
|
||||
if (lapic_msr(num))
|
||||
return (lapic_wrmsr(vm, cpu, num, val));
|
||||
return (lapic_wrmsr(vm, cpu, num, val, retu));
|
||||
|
||||
idx = msr_num_to_idx(num);
|
||||
if (idx < 0 || invalid_msr(idx))
|
||||
@ -181,14 +181,14 @@ emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
|
||||
}
|
||||
|
||||
int
|
||||
emulate_rdmsr(struct vm *vm, int cpu, u_int num)
|
||||
emulate_rdmsr(struct vm *vm, int cpu, u_int num, bool *retu)
|
||||
{
|
||||
int error, idx;
|
||||
uint32_t eax, edx;
|
||||
uint64_t result, *guest_msrs;
|
||||
|
||||
if (lapic_msr(num)) {
|
||||
error = lapic_rdmsr(vm, cpu, num, &result);
|
||||
error = lapic_rdmsr(vm, cpu, num, &result, retu);
|
||||
goto done;
|
||||
}
|
||||
|
||||
|
@ -33,8 +33,9 @@
|
||||
struct vm;
|
||||
|
||||
void vmm_msr_init(void);
|
||||
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
|
||||
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
|
||||
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val,
|
||||
bool *retu);
|
||||
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr, bool *retu);
|
||||
void guest_msrs_init(struct vm *vm, int cpu);
|
||||
void guest_msr_valid(int msr);
|
||||
void restore_host_msrs(struct vm *vm, int cpu);
|
||||
|
Loading…
Reference in New Issue
Block a user