Fix x2apic support in bhyve.

When the guest is bringing up the APs in the x2APIC mode a write to the
ICR register will now trigger a return to userspace with an exitcode of
VM_EXITCODE_SPINUP_AP. This gets SMP guests working again with x2APIC.

Change the vlapic timer lock to be a spinlock because the vlapic can be
accessed from within a critical section (vm run loop) when guest is using
x2apic mode.

Reviewed by:	grehan@
This commit is contained in:
Neel Natu 2013-12-10 22:56:51 +00:00
parent 68a9d67c29
commit becd984900
Notes: svn2git 2020-12-20 02:59:44 +00:00
svn path=/head/; revision=259205
8 changed files with 70 additions and 51 deletions

View File

@ -1337,6 +1337,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
struct vmxctx *vmxctx;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, reason;
uint64_t qual, gpa, rflags;
bool retu;
handled = 0;
vmcs = &vmx->vmcs[vcpu];
@ -1382,27 +1383,39 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
break;
case EXIT_REASON_RDMSR:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_RDMSR, 1);
retu = false;
ecx = vmxctx->guest_rcx;
error = emulate_rdmsr(vmx->vm, vcpu, ecx);
error = emulate_rdmsr(vmx->vm, vcpu, ecx, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_RDMSR;
vmexit->u.msr.code = ecx;
} else
} else if (!retu) {
handled = 1;
} else {
/* Return to userspace with a valid exitcode */
KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
("emulate_wrmsr retu with bogus exitcode"));
}
break;
case EXIT_REASON_WRMSR:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_WRMSR, 1);
retu = false;
eax = vmxctx->guest_rax;
ecx = vmxctx->guest_rcx;
edx = vmxctx->guest_rdx;
error = emulate_wrmsr(vmx->vm, vcpu, ecx,
(uint64_t)edx << 32 | eax);
(uint64_t)edx << 32 | eax, &retu);
if (error) {
vmexit->exitcode = VM_EXITCODE_WRMSR;
vmexit->u.msr.code = ecx;
vmexit->u.msr.wval = (uint64_t)edx << 32 | eax;
} else
} else if (!retu) {
handled = 1;
} else {
/* Return to userspace with a valid exitcode */
KASSERT(vmexit->exitcode != VM_EXITCODE_BOGUS,
("emulate_wrmsr retu with bogus exitcode"));
}
break;
case EXIT_REASON_HLT:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_HLT, 1);

View File

@ -139,8 +139,8 @@ struct vlapic {
* Note that the vlapic_callout_handler() does not write to any of these
* registers so they can be safely read from the vcpu context without locking.
*/
#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock(&((vlapic)->timer_mtx))
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock(&((vlapic)->timer_mtx))
#define VLAPIC_TIMER_LOCK(vlapic) mtx_lock_spin(&((vlapic)->timer_mtx))
#define VLAPIC_TIMER_UNLOCK(vlapic) mtx_unlock_spin(&((vlapic)->timer_mtx))
#define VLAPIC_TIMER_LOCKED(vlapic) mtx_owned(&((vlapic)->timer_mtx))
#define VLAPIC_BUS_FREQ tsc_freq
@ -613,7 +613,7 @@ vlapic_set_icr_timer(struct vlapic *vlapic, uint32_t icr_timer)
static VMM_STAT_ARRAY(IPIS_SENT, VM_MAXCPU, "ipis sent to vcpu");
static int
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
lapic_process_icr(struct vlapic *vlapic, uint64_t icrval, bool *retu)
{
int i;
cpuset_t dmask;
@ -688,17 +688,18 @@ lapic_process_icr(struct vlapic *vlapic, uint64_t icrval)
if (vlapic2->boot_state != BS_SIPI)
return (0);
vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
vmexit->u.spinup_ap.vcpu = dest;
vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
/*
* XXX this assumes that the startup IPI always succeeds
*/
vlapic2->boot_state = BS_RUNNING;
vm_activate_cpu(vlapic2->vm, dest);
*retu = true;
vmexit = vm_exitinfo(vlapic->vm, vlapic->vcpuid);
vmexit->exitcode = VM_EXITCODE_SPINUP_AP;
vmexit->u.spinup_ap.vcpu = dest;
vmexit->u.spinup_ap.rip = vec << PAGE_SHIFT;
return (0);
}
}
@ -804,7 +805,7 @@ lapic_set_svr(struct vlapic *vlapic, uint32_t new)
}
int
vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data, bool *retu)
{
struct LAPIC *lapic = &vlapic->apic;
uint32_t *reg;
@ -895,7 +896,7 @@ vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data)
}
int
vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data, bool *retu)
{
struct LAPIC *lapic = &vlapic->apic;
int retval;
@ -931,7 +932,7 @@ vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data)
data &= 0xffffffff;
data |= (uint64_t)lapic->icr_hi << 32;
}
retval = lapic_process_icr(vlapic, data);
retval = lapic_process_icr(vlapic, data, retu);
break;
case APIC_OFFSET_ICR_HI:
if (!x2apic(vlapic)) {
@ -978,7 +979,14 @@ vlapic_init(struct vm *vm, int vcpuid)
vlapic->vm = vm;
vlapic->vcpuid = vcpuid;
mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_DEF);
/*
* If the vlapic is configured in x2apic mode then it will be
* accessed in the critical section via the MSR emulation code.
*
* Therefore the timer mutex must be a spinlock because blockable
* mutexes cannot be acquired in a critical section.
*/
mtx_init(&vlapic->timer_mtx, "vlapic timer mtx", NULL, MTX_SPIN);
callout_init(&vlapic->callout, 1);
vlapic->msr_apicbase = DEFAULT_APIC_BASE | APICBASE_ENABLED;

View File

@ -90,8 +90,10 @@ enum x2apic_state;
struct vlapic *vlapic_init(struct vm *vm, int vcpuid);
void vlapic_cleanup(struct vlapic *vlapic);
int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data);
int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data);
int vlapic_write(struct vlapic *vlapic, uint64_t offset, uint64_t data,
bool *retu);
int vlapic_read(struct vlapic *vlapic, uint64_t offset, uint64_t *data,
bool *retu);
int vlapic_pending_intr(struct vlapic *vlapic);
void vlapic_intr_accepted(struct vlapic *vlapic, int vector);
void vlapic_set_intr_ready(struct vlapic *vlapic, int vector, bool level);

View File

@ -860,8 +860,7 @@ vcpu_require_state_locked(struct vcpu *vcpu, enum vcpu_state newstate)
* Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
*/
static int
vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
boolean_t *retu)
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vm_exit *vmexit;
struct vcpu *vcpu;
@ -894,7 +893,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
* Spindown the vcpu if the apic is disabled and it
* had entered the halted state.
*/
*retu = TRUE;
*retu = true;
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->exitcode = VM_EXITCODE_SPINDOWN_CPU;
VCPU_CTR0(vm, vcpuid, "spinning down cpu");
@ -908,7 +907,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, boolean_t intr_disabled,
}
static int
vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
{
int rv, ftype;
struct vm_map *map;
@ -946,7 +945,7 @@ vm_handle_paging(struct vm *vm, int vcpuid, boolean_t *retu)
}
static int
vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
{
struct vie *vie;
struct vcpu *vcpu;
@ -987,15 +986,12 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, boolean_t *retu)
mread = vhpet_mmio_read;
mwrite = vhpet_mmio_write;
} else {
*retu = TRUE;
*retu = true;
return (0);
}
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite, 0);
/* return to userland to spin up the AP */
if (error == 0 && vme->exitcode == VM_EXITCODE_SPINUP_AP)
*retu = TRUE;
error = vmm_emulate_instruction(vm, vcpuid, gpa, vie, mread, mwrite,
retu);
return (error);
}
@ -1008,7 +1004,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
struct pcb *pcb;
uint64_t tscval, rip;
struct vm_exit *vme;
boolean_t retu, intr_disabled;
bool retu, intr_disabled;
pmap_t pmap;
vcpuid = vmrun->cpuid;
@ -1048,13 +1044,10 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
critical_exit();
if (error == 0) {
retu = FALSE;
retu = false;
switch (vme->exitcode) {
case VM_EXITCODE_HLT:
if ((vme->u.hlt.rflags & PSL_I) == 0)
intr_disabled = TRUE;
else
intr_disabled = FALSE;
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
error = vm_handle_hlt(vm, vcpuid, intr_disabled, &retu);
break;
case VM_EXITCODE_PAGING:
@ -1064,12 +1057,12 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
error = vm_handle_inst_emul(vm, vcpuid, &retu);
break;
default:
retu = TRUE; /* handled in userland */
retu = true; /* handled in userland */
break;
}
}
if (error == 0 && retu == FALSE) {
if (error == 0 && retu == false) {
rip = vme->rip + vme->inst_length;
goto restart;
}

View File

@ -107,7 +107,7 @@ lapic_msr(u_int msr)
}
int
lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval, bool *retu)
{
int error;
u_int offset;
@ -120,14 +120,14 @@ lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval)
error = 0;
} else {
offset = x2apic_msr_to_regoff(msr);
error = vlapic_read(vlapic, offset, rval);
error = vlapic_read(vlapic, offset, rval, retu);
}
return (error);
}
int
lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val, bool *retu)
{
int error;
u_int offset;
@ -140,7 +140,7 @@ lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t val)
error = 0;
} else {
offset = x2apic_msr_to_regoff(msr);
error = vlapic_write(vlapic, offset, val);
error = vlapic_write(vlapic, offset, val, retu);
}
return (error);
@ -164,7 +164,7 @@ lapic_mmio_write(void *vm, int cpu, uint64_t gpa, uint64_t wval, int size,
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
error = vlapic_write(vlapic, off, wval);
error = vlapic_write(vlapic, off, wval, arg);
return (error);
}
@ -186,6 +186,6 @@ lapic_mmio_read(void *vm, int cpu, uint64_t gpa, uint64_t *rval, int size,
return (EINVAL);
vlapic = vm_lapic(vm, cpu);
error = vlapic_read(vlapic, off, rval);
error = vlapic_read(vlapic, off, rval, arg);
return (error);
}

View File

@ -32,8 +32,10 @@
struct vm;
boolean_t lapic_msr(u_int num);
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval);
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval);
int lapic_rdmsr(struct vm *vm, int cpu, u_int msr, uint64_t *rval,
bool *retu);
int lapic_wrmsr(struct vm *vm, int cpu, u_int msr, uint64_t wval,
bool *retu);
int lapic_mmio_read(void *vm, int cpu, uint64_t gpa,
uint64_t *rval, int size, void *arg);

View File

@ -154,13 +154,13 @@ msr_num_to_idx(u_int num)
}
int
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val, bool *retu)
{
int idx;
uint64_t *guest_msrs;
if (lapic_msr(num))
return (lapic_wrmsr(vm, cpu, num, val));
return (lapic_wrmsr(vm, cpu, num, val, retu));
idx = msr_num_to_idx(num);
if (idx < 0 || invalid_msr(idx))
@ -181,14 +181,14 @@ emulate_wrmsr(struct vm *vm, int cpu, u_int num, uint64_t val)
}
int
emulate_rdmsr(struct vm *vm, int cpu, u_int num)
emulate_rdmsr(struct vm *vm, int cpu, u_int num, bool *retu)
{
int error, idx;
uint32_t eax, edx;
uint64_t result, *guest_msrs;
if (lapic_msr(num)) {
error = lapic_rdmsr(vm, cpu, num, &result);
error = lapic_rdmsr(vm, cpu, num, &result, retu);
goto done;
}

View File

@ -33,8 +33,9 @@
struct vm;
void vmm_msr_init(void);
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val);
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr);
int emulate_wrmsr(struct vm *vm, int vcpu, u_int msr, uint64_t val,
bool *retu);
int emulate_rdmsr(struct vm *vm, int vcpu, u_int msr, bool *retu);
void guest_msrs_init(struct vm *vm, int cpu);
void guest_msr_valid(int msr);
void restore_host_msrs(struct vm *vm, int cpu);