Fix missed posted interrupts in VT-x in bhyve.
When a vCPU is HLTed, interrupts with a priority below the processor priority (PPR) should not resume the vCPU while interrupts at or above the PPR should. With posted interrupts, bhyve maintains a bitmap of pending interrupts in PIR descriptor along with a single 'pending' bit. This bit is checked by a CPU running in guest mode at various places to determine if it should be checked. In addition, another CPU can force a CPU in guest mode to check for pending interrupts by sending an IPI to a special IDT vector reserved for this purpose. bhyve had a bug in that it would only notify a guest vCPU of an interrupt (e.g. by sending the special IPI or by resuming it if it was idle due to HLT) if an interrupt arrived that was higher priority than PPR and no interrupts were currently pending. This assumed that if the 'pending' bit was set, any needed notification was already in progress. However, if the first interrupt sent to a HLTed vCPU was lower priority than PPR and the second was higher than PPR, the first interrupt would set 'pending' but not notify the vCPU, and the second interrupt would not notify the vCPU because 'pending' was already set. To fix this, track the priority of pending interrupts in a separate per-vCPU bitmask and notify a vCPU anytime an interrupt arrives that is above PPR and higher than any previously-received interrupt. This was found and debugged in the bhyve port to SmartOS maintained by Joyent. Relevant SmartOS bugs with more background: https://smartos.org/bugview/OS-6829 https://smartos.org/bugview/OS-6930 https://smartos.org/bugview/OS-7354 Submitted by: Patrick Mooney <pmooney@pfmooney.com> Reviewed by: tychon, rgrimes Obtained from: SmartOS / Joyent MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D19299
This commit is contained in:
parent
51c68d18e2
commit
2c352feb3b
@ -3,6 +3,7 @@
|
||||
*
|
||||
* Copyright (c) 2011 NetApp, Inc.
|
||||
* All rights reserved.
|
||||
* Copyright (c) 2018 Joyent, Inc.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* modification, are permitted provided that the following conditions
|
||||
@ -3402,8 +3403,11 @@ struct vlapic_vtx {
|
||||
struct vlapic vlapic;
|
||||
struct pir_desc *pir_desc;
|
||||
struct vmx *vmx;
|
||||
u_int pending_prio;
|
||||
};
|
||||
|
||||
#define VPR_PRIO_BIT(vpr) (1 << ((vpr) >> 4))
|
||||
|
||||
#define VMX_CTR_PIR(vm, vcpuid, pir_desc, notify, vector, level, msg) \
|
||||
do { \
|
||||
VCPU_CTR2(vm, vcpuid, msg " assert %s-triggered vector %d", \
|
||||
@ -3425,7 +3429,7 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
|
||||
struct vlapic_vtx *vlapic_vtx;
|
||||
struct pir_desc *pir_desc;
|
||||
uint64_t mask;
|
||||
int idx, notify;
|
||||
int idx, notify = 0;
|
||||
|
||||
vlapic_vtx = (struct vlapic_vtx *)vlapic;
|
||||
pir_desc = vlapic_vtx->pir_desc;
|
||||
@ -3438,7 +3442,37 @@ vmx_set_intr_ready(struct vlapic *vlapic, int vector, bool level)
|
||||
idx = vector / 64;
|
||||
mask = 1UL << (vector % 64);
|
||||
atomic_set_long(&pir_desc->pir[idx], mask);
|
||||
notify = atomic_cmpset_long(&pir_desc->pending, 0, 1);
|
||||
|
||||
/*
|
||||
* A notification is required whenever the 'pending' bit makes a
|
||||
* transition from 0->1.
|
||||
*
|
||||
* Even if the 'pending' bit is already asserted, notification about
|
||||
* the incoming interrupt may still be necessary. For example, if a
|
||||
* vCPU is HLTed with a high PPR, a low priority interrupt would cause
|
||||
* the 0->1 'pending' transition with a notification, but the vCPU
|
||||
* would ignore the interrupt for the time being. The same vCPU would
|
||||
* need to then be notified if a high-priority interrupt arrived which
|
||||
* satisfied the PPR.
|
||||
*
|
||||
* The priorities of interrupts injected while 'pending' is asserted
|
||||
* are tracked in a custom bitfield 'pending_prio'. Should the
|
||||
* to-be-injected interrupt exceed the priorities already present, the
|
||||
* notification is sent. The priorities recorded in 'pending_prio' are
|
||||
* cleared whenever the 'pending' bit makes another 0->1 transition.
|
||||
*/
|
||||
if (atomic_cmpset_long(&pir_desc->pending, 0, 1) != 0) {
|
||||
notify = 1;
|
||||
vlapic_vtx->pending_prio = 0;
|
||||
} else {
|
||||
const u_int old_prio = vlapic_vtx->pending_prio;
|
||||
const u_int prio_bit = VPR_PRIO_BIT(vector & APIC_TPR_INT);
|
||||
|
||||
if ((old_prio & prio_bit) == 0 && prio_bit > old_prio) {
|
||||
atomic_set_int(&vlapic_vtx->pending_prio, prio_bit);
|
||||
notify = 1;
|
||||
}
|
||||
}
|
||||
|
||||
VMX_CTR_PIR(vlapic->vm, vlapic->vcpuid, pir_desc, notify, vector,
|
||||
level, "vmx_set_intr_ready");
|
||||
@ -3504,14 +3538,31 @@ vmx_pending_intr(struct vlapic *vlapic, int *vecptr)
|
||||
VCPU_CTR1(vlapic->vm, vlapic->vcpuid, "HLT with non-zero PPR %d",
|
||||
lapic->ppr);
|
||||
|
||||
vpr = 0;
|
||||
for (i = 3; i >= 0; i--) {
|
||||
pirval = pir_desc->pir[i];
|
||||
if (pirval != 0) {
|
||||
vpr = (i * 64 + flsl(pirval) - 1) & APIC_TPR_INT;
|
||||
return (vpr > ppr);
|
||||
break;
|
||||
}
|
||||
}
|
||||
return (0);
|
||||
|
||||
/*
|
||||
* If the highest-priority pending interrupt falls short of the
|
||||
* processor priority of this vCPU, ensure that 'pending_prio' does not
|
||||
* have any stale bits which would preclude a higher-priority interrupt
|
||||
* from incurring a notification later.
|
||||
*/
|
||||
if (vpr <= ppr) {
|
||||
const u_int prio_bit = VPR_PRIO_BIT(vpr);
|
||||
const u_int old = vlapic_vtx->pending_prio;
|
||||
|
||||
if (old > prio_bit && (old & prio_bit) == 0) {
|
||||
vlapic_vtx->pending_prio = prio_bit;
|
||||
}
|
||||
return (0);
|
||||
}
|
||||
return (1);
|
||||
}
|
||||
|
||||
static void
|
||||
|
Loading…
Reference in New Issue
Block a user