MFC r276428:

Replace bhyve's minimal RTC emulation with a fully featured one in vmm.ko.

MFC r276432:
Initialize all fields of 'struct vm_exception exception' before passing it
to vm_inject_exception().

MFC r276763:
Clear blocking due to STI or MOV SS in the hypervisor when an instruction is
emulated or when the vcpu incurs an exception.

MFC r277149:
Clean up usage of 'struct vm_exception' to only to communicate information
from userspace to vmm.ko when injecting an exception.

MFC r277168:
Fix typo (missing comma).

MFC r277309:
Make the error message explicit instead of just printing the usage if the
virtual machine name is not specified.

MFC r277310:
Simplify instruction restart logic in bhyve.

MFC r277359:
Fix a bug in libvmmapi 'vm_copy_setup()' where it would return success even
if the 'gpa' was in the guest MMIO region.

MFC r277360:
MOVS instruction emulation.

MFC r277626:
Add macro to identify AVIC capability (advanced virtual interrupt controller)
in AMD processors.

MFC r279220:
Don't close a block context if it couldn't be opened avoiding a null deref.

MFC r279225:
Add "-u" option to bhyve(8) to indicate that the RTC should maintain UTC time.

MFC r279227:
Emulate MSR 0xC0011024 when running on AMD processors.

MFC r279228:
Always emulate MSR_PAT on Intel processors and don't rely on PAT save/restore
capability of VT-x. This lets bhyve run nested in older VMware versions that
don't support the PAT save/restore capability.

MFC r279540:
Fix warnings/errors when building vmm.ko with gcc.
This commit is contained in:
neel 2015-06-27 22:48:22 +00:00
parent 02efaba1d1
commit 115742fae3
30 changed files with 1875 additions and 564 deletions

@ -368,14 +368,13 @@ vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *ret_val)
}
int
vm_run(struct vmctx *ctx, int vcpu, uint64_t rip, struct vm_exit *vmexit)
vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit)
{
int error;
struct vm_run vmrun;
bzero(&vmrun, sizeof(vmrun));
vmrun.cpuid = vcpu;
vmrun.rip = rip;
error = ioctl(ctx->fd, VM_RUN, &vmrun);
bcopy(&vmrun.vm_exit, vmexit, sizeof(struct vm_exit));
@ -399,35 +398,21 @@ vm_reinit(struct vmctx *ctx)
return (ioctl(ctx->fd, VM_REINIT, 0));
}
static int
vm_inject_exception_real(struct vmctx *ctx, int vcpu, int vector,
int error_code, int error_code_valid)
int
vm_inject_exception(struct vmctx *ctx, int vcpu, int vector, int errcode_valid,
uint32_t errcode, int restart_instruction)
{
struct vm_exception exc;
bzero(&exc, sizeof(exc));
exc.cpuid = vcpu;
exc.vector = vector;
exc.error_code = error_code;
exc.error_code_valid = error_code_valid;
exc.error_code = errcode;
exc.error_code_valid = errcode_valid;
exc.restart_instruction = restart_instruction;
return (ioctl(ctx->fd, VM_INJECT_EXCEPTION, &exc));
}
int
vm_inject_exception(struct vmctx *ctx, int vcpu, int vector)
{
return (vm_inject_exception_real(ctx, vcpu, vector, 0, 0));
}
int
vm_inject_exception2(struct vmctx *ctx, int vcpu, int vector, int errcode)
{
return (vm_inject_exception_real(ctx, vcpu, vector, errcode, 1));
}
int
vm_apicid2vcpu(struct vmctx *ctx, int apicid)
{
@ -1002,6 +987,7 @@ int
vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
uint64_t gla, size_t len, int prot, struct iovec *iov, int iovcnt)
{
void *va;
uint64_t gpa;
int error, fault, i, n, off;
@ -1021,7 +1007,11 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
off = gpa & PAGE_MASK;
n = min(len, PAGE_SIZE - off);
iov->iov_base = (void *)gpa;
va = vm_map_gpa(ctx, gpa, n);
if (va == NULL)
return (-1);
iov->iov_base = va;
iov->iov_len = n;
iov++;
iovcnt--;
@ -1032,20 +1022,25 @@ vm_copy_setup(struct vmctx *ctx, int vcpu, struct vm_guest_paging *paging,
return (0);
}
void
vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov, int iovcnt)
{
return;
}
void
vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *iov, void *vp, size_t len)
{
const char *src;
char *dst;
uint64_t gpa;
size_t n;
dst = vp;
while (len) {
assert(iov->iov_len);
gpa = (uint64_t)iov->iov_base;
n = min(len, iov->iov_len);
src = vm_map_gpa(ctx, gpa, n);
src = iov->iov_base;
bcopy(src, dst, n);
iov++;
@ -1060,15 +1055,13 @@ vm_copyout(struct vmctx *ctx, int vcpu, const void *vp, struct iovec *iov,
{
const char *src;
char *dst;
uint64_t gpa;
size_t n;
src = vp;
while (len) {
assert(iov->iov_len);
gpa = (uint64_t)iov->iov_base;
n = min(len, iov->iov_len);
dst = vm_map_gpa(ctx, gpa, n);
dst = iov->iov_base;
bcopy(src, dst, n);
iov++;
@ -1146,3 +1139,63 @@ vm_set_intinfo(struct vmctx *ctx, int vcpu, uint64_t info1)
error = ioctl(ctx->fd, VM_SET_INTINFO, &vmii);
return (error);
}
int
vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value)
{
struct vm_rtc_data rtcdata;
int error;
bzero(&rtcdata, sizeof(struct vm_rtc_data));
rtcdata.offset = offset;
rtcdata.value = value;
error = ioctl(ctx->fd, VM_RTC_WRITE, &rtcdata);
return (error);
}
int
vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval)
{
struct vm_rtc_data rtcdata;
int error;
bzero(&rtcdata, sizeof(struct vm_rtc_data));
rtcdata.offset = offset;
error = ioctl(ctx->fd, VM_RTC_READ, &rtcdata);
if (error == 0)
*retval = rtcdata.value;
return (error);
}
int
vm_rtc_settime(struct vmctx *ctx, time_t secs)
{
struct vm_rtc_time rtctime;
int error;
bzero(&rtctime, sizeof(struct vm_rtc_time));
rtctime.secs = secs;
error = ioctl(ctx->fd, VM_RTC_SETTIME, &rtctime);
return (error);
}
int
vm_rtc_gettime(struct vmctx *ctx, time_t *secs)
{
struct vm_rtc_time rtctime;
int error;
bzero(&rtctime, sizeof(struct vm_rtc_time));
error = ioctl(ctx->fd, VM_RTC_GETTIME, &rtctime);
if (error == 0)
*secs = rtctime.secs;
return (error);
}
int
vm_restart_instruction(void *arg, int vcpu)
{
struct vmctx *ctx = arg;
return (ioctl(ctx->fd, VM_RESTART_INSTRUCTION, &vcpu));
}

@ -32,6 +32,12 @@
#include <sys/param.h>
#include <sys/cpuset.h>
/*
* API version for out-of-tree consumers like grub-bhyve for making compile
* time decisions.
*/
#define VMMAPI_VERSION 0101 /* 2 digit major followed by 2 digit minor */
struct iovec;
struct vmctx;
enum x2apic_state;
@ -70,13 +76,12 @@ int vm_get_seg_desc(struct vmctx *ctx, int vcpu, int reg,
struct seg_desc *seg_desc);
int vm_set_register(struct vmctx *ctx, int vcpu, int reg, uint64_t val);
int vm_get_register(struct vmctx *ctx, int vcpu, int reg, uint64_t *retval);
int vm_run(struct vmctx *ctx, int vcpu, uint64_t rip,
struct vm_exit *ret_vmexit);
int vm_run(struct vmctx *ctx, int vcpu, struct vm_exit *ret_vmexit);
int vm_suspend(struct vmctx *ctx, enum vm_suspend_how how);
int vm_reinit(struct vmctx *ctx);
int vm_apicid2vcpu(struct vmctx *ctx, int apicid);
int vm_inject_exception(struct vmctx *ctx, int vcpu, int vec);
int vm_inject_exception2(struct vmctx *ctx, int vcpu, int vec, int errcode);
int vm_inject_exception(struct vmctx *ctx, int vcpu, int vector,
int errcode_valid, uint32_t errcode, int restart_instruction);
int vm_lapic_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_local_irq(struct vmctx *ctx, int vcpu, int vector);
int vm_lapic_msi(struct vmctx *ctx, uint64_t addr, uint64_t msg);
@ -132,6 +137,14 @@ void vm_copyin(struct vmctx *ctx, int vcpu, struct iovec *guest_iov,
void *host_dst, size_t len);
void vm_copyout(struct vmctx *ctx, int vcpu, const void *host_src,
struct iovec *guest_iov, size_t len);
void vm_copy_teardown(struct vmctx *ctx, int vcpu, struct iovec *iov,
int iovcnt);
/* RTC */
int vm_rtc_write(struct vmctx *ctx, int offset, uint8_t value);
int vm_rtc_read(struct vmctx *ctx, int offset, uint8_t *retval);
int vm_rtc_settime(struct vmctx *ctx, time_t secs);
int vm_rtc_gettime(struct vmctx *ctx, time_t *secs);
/* Reset vcpu register state */
int vcpu_reset(struct vmctx *ctx, int vcpu);

@ -39,7 +39,13 @@ DEFAULT_CONSOLE=stdio
DEFAULT_VIRTIO_DISK="./diskdev"
DEFAULT_ISOFILE="./release.iso"
errmsg() {
echo "*** $1"
}
usage() {
local msg=$1
echo "Usage: vmrun.sh [-ahi] [-c <CPUs>] [-C <console>] [-d <disk file>]"
echo " [-e <name=value>] [-g <gdbport> ] [-H <directory>]"
echo " [-I <location of installation iso>] [-m <memsize>]"
@ -58,18 +64,18 @@ usage() {
echo " -m: memory size (default is ${DEFAULT_MEMSIZE})"
echo " -t: tap device for virtio-net (default is $DEFAULT_TAPDEV)"
echo ""
echo " This script needs to be executed with superuser privileges"
echo ""
[ -n "$msg" ] && errmsg "$msg"
exit 1
}
if [ `id -u` -ne 0 ]; then
usage
errmsg "This script must be executed with superuser privileges"
exit 1
fi
kldstat -n vmm > /dev/null 2>&1
if [ $? -ne 0 ]; then
echo "vmm.ko is not loaded!"
errmsg "vmm.ko is not loaded"
exit 1
fi
@ -143,7 +149,7 @@ fi
shift $((${OPTIND} - 1))
if [ $# -ne 1 ]; then
usage
usage "virtual machine name not specified"
fi
vmname="$1"

@ -286,9 +286,10 @@ int vm_unassign_pptdev(struct vm *vm, int bus, int slot, int func);
struct vatpic *vm_atpic(struct vm *vm);
struct vatpit *vm_atpit(struct vm *vm);
struct vpmtmr *vm_pmtmr(struct vm *vm);
struct vrtc *vm_rtc(struct vm *vm);
/*
* Inject exception 'vme' into the guest vcpu. This function returns 0 on
* Inject exception 'vector' into the guest vcpu. This function returns 0 on
* success and non-zero on failure.
*
* Wrapper functions like 'vm_inject_gp()' should be preferred to calling
@ -298,7 +299,8 @@ struct vpmtmr *vm_pmtmr(struct vm *vm);
* This function should only be called in the context of the thread that is
* executing this vcpu.
*/
int vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *vme);
int vm_inject_exception(struct vm *vm, int vcpuid, int vector, int err_valid,
uint32_t errcode, int restart_instruction);
/*
* This function is called after a VM-exit that occurred during exception or
@ -444,8 +446,11 @@ struct vie {
rex_x:1,
rex_b:1,
rex_present:1,
repz_present:1, /* REP/REPE/REPZ prefix */
repnz_present:1, /* REPNE/REPNZ prefix */
opsize_override:1, /* Operand size override */
addrsize_override:1; /* Address size override */
addrsize_override:1, /* Address size override */
segment_override:1; /* Segment override */
uint8_t mod:2, /* ModRM byte */
reg:4,
@ -461,6 +466,7 @@ struct vie {
uint8_t scale;
int base_register; /* VM_REG_GUEST_xyz */
int index_register; /* VM_REG_GUEST_xyz */
int segment_register; /* VM_REG_GUEST_xyz */
int64_t displacement; /* optional addr displacement */
int64_t immediate; /* optional immediate operand */
@ -627,4 +633,6 @@ vm_inject_ss(void *vm, int vcpuid, int errcode)
void vm_inject_pf(void *vm, int vcpuid, int error_code, uint64_t cr2);
int vm_restart_instruction(void *vm, int vcpuid);
#endif /* _VMM_H_ */

@ -54,7 +54,6 @@ struct vm_seg_desc { /* data or code segment */
struct vm_run {
int cpuid;
uint64_t rip; /* start running here */
struct vm_exit vm_exit;
};
@ -63,6 +62,7 @@ struct vm_exception {
int vector;
uint32_t error_code;
int error_code_valid;
int restart_instruction;
};
struct vm_lapic_msi {
@ -195,6 +195,15 @@ struct vm_intinfo {
uint64_t info2;
};
struct vm_rtc_time {
time_t secs;
};
struct vm_rtc_data {
int offset;
uint8_t value;
};
enum {
/* general routines */
IOCNUM_ABIVERS = 0,
@ -228,6 +237,7 @@ enum {
IOCNUM_LAPIC_MSI = 36,
IOCNUM_LAPIC_LOCAL_IRQ = 37,
IOCNUM_IOAPIC_PINCOUNT = 38,
IOCNUM_RESTART_INSTRUCTION = 39,
/* PCI pass-thru */
IOCNUM_BIND_PPTDEV = 40,
@ -254,6 +264,12 @@ enum {
/* vm_cpuset */
IOCNUM_ACTIVATE_CPU = 90,
IOCNUM_GET_CPUSET = 91,
/* RTC */
IOCNUM_RTC_READ = 100,
IOCNUM_RTC_WRITE = 101,
IOCNUM_RTC_SETTIME = 102,
IOCNUM_RTC_GETTIME = 103,
};
#define VM_RUN \
@ -336,4 +352,14 @@ enum {
_IOW('v', IOCNUM_SET_INTINFO, struct vm_intinfo)
#define VM_GET_INTINFO \
_IOWR('v', IOCNUM_GET_INTINFO, struct vm_intinfo)
#define VM_RTC_WRITE \
_IOW('v', IOCNUM_RTC_WRITE, struct vm_rtc_data)
#define VM_RTC_READ \
_IOWR('v', IOCNUM_RTC_READ, struct vm_rtc_data)
#define VM_RTC_SETTIME \
_IOW('v', IOCNUM_RTC_SETTIME, struct vm_rtc_time)
#define VM_RTC_GETTIME \
_IOR('v', IOCNUM_RTC_GETTIME, struct vm_rtc_time)
#define VM_RESTART_INSTRUCTION \
_IOW('v', IOCNUM_RESTART_INSTRUCTION, int)
#endif

@ -80,6 +80,7 @@ SYSCTL_NODE(_hw_vmm, OID_AUTO, svm, CTLFLAG_RW, NULL, NULL);
#define AMD_CPUID_SVM_DECODE_ASSIST BIT(7) /* Decode assist */
#define AMD_CPUID_SVM_PAUSE_INC BIT(10) /* Pause intercept filter. */
#define AMD_CPUID_SVM_PAUSE_FTH BIT(12) /* Pause filter threshold */
#define AMD_CPUID_SVM_AVIC BIT(13) /* AVIC present */
#define VMCB_CACHE_DEFAULT (VMCB_CACHE_ASID | \
VMCB_CACHE_IOPM | \
@ -554,6 +555,7 @@ svm_vminit(struct vm *vm, pmap_t pmap)
pml4_pa = svm_sc->nptp;
for (i = 0; i < VM_MAXCPU; i++) {
vcpu = svm_get_vcpu(svm_sc, i);
vcpu->nextrip = ~0;
vcpu->lastcpu = NOCPU;
vcpu->vmcb_pa = vtophys(&vcpu->vmcb);
vmcb_init(svm_sc, i, iopm_pa, msrpm_pa, pml4_pa);
@ -1200,7 +1202,6 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
struct vmcb_state *state;
struct vmcb_ctrl *ctrl;
struct svm_regctx *ctx;
struct vm_exception exception;
uint64_t code, info1, info2, val;
uint32_t eax, ecx, edx;
int error, errcode_valid, handled, idtvec, reflect;
@ -1314,6 +1315,7 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
/* fallthru */
default:
errcode_valid = 0;
info1 = 0;
break;
}
KASSERT(vmexit->inst_length == 0, ("invalid inst_length (%d) "
@ -1322,14 +1324,10 @@ svm_vmexit(struct svm_softc *svm_sc, int vcpu, struct vm_exit *vmexit)
if (reflect) {
/* Reflect the exception back into the guest */
exception.vector = idtvec;
exception.error_code_valid = errcode_valid;
exception.error_code = errcode_valid ? info1 : 0;
VCPU_CTR2(svm_sc->vm, vcpu, "Reflecting exception "
"%d/%#x into the guest", exception.vector,
exception.error_code);
error = vm_inject_exception(svm_sc->vm, vcpu,
&exception);
"%d/%#x into the guest", idtvec, (int)info1);
error = vm_inject_exception(svm_sc->vm, vcpu, idtvec,
errcode_valid, info1, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
}
@ -1476,15 +1474,24 @@ svm_inj_interrupts(struct svm_softc *sc, int vcpu, struct vlapic *vlapic)
{
struct vmcb_ctrl *ctrl;
struct vmcb_state *state;
struct svm_vcpu *vcpustate;
uint8_t v_tpr;
int vector, need_intr_window, pending_apic_vector;
state = svm_get_vmcb_state(sc, vcpu);
ctrl = svm_get_vmcb_ctrl(sc, vcpu);
vcpustate = svm_get_vcpu(sc, vcpu);
need_intr_window = 0;
pending_apic_vector = 0;
if (vcpustate->nextrip != state->rip) {
ctrl->intr_shadow = 0;
VCPU_CTR2(sc->vm, vcpu, "Guest interrupt blocking "
"cleared due to rip change: %#lx/%#lx",
vcpustate->nextrip, state->rip);
}
/*
* Inject pending events or exceptions for this vcpu.
*
@ -1634,7 +1641,7 @@ done:
* VMRUN.
*/
v_tpr = vlapic_get_cr8(vlapic);
KASSERT(v_tpr >= 0 && v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
KASSERT(v_tpr <= 15, ("invalid v_tpr %#x", v_tpr));
if (ctrl->v_tpr != v_tpr) {
VCPU_CTR2(sc->vm, vcpu, "VMCB V_TPR changed from %#x to %#x",
ctrl->v_tpr, v_tpr);
@ -1801,14 +1808,14 @@ static __inline void
disable_gintr(void)
{
__asm __volatile("clgi" : : :);
__asm __volatile("clgi");
}
static __inline void
enable_gintr(void)
{
__asm __volatile("stgi" : : :);
__asm __volatile("stgi");
}
/*
@ -1955,6 +1962,9 @@ svm_vmrun(void *arg, int vcpu, register_t rip, pmap_t pmap,
/* #VMEXIT disables interrupts so re-enable them here. */
enable_gintr();
/* Update 'nextrip' */
vcpustate->nextrip = state->rip;
/* Handle #VMEXIT and if required return to user space. */
handled = svm_vmexit(svm_sc, vcpu, vmexit);
} while (handled);

@ -45,6 +45,7 @@ struct svm_vcpu {
struct vmcb vmcb; /* hardware saved vcpu context */
struct svm_regctx swctx; /* software saved vcpu context */
uint64_t vmcb_pa; /* VMCB physical address */
uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that the vcpu last ran on */
uint32_t dirty; /* state cache bits that must be cleared */
long eptgen; /* pmap->pm_eptgen when the vcpu last ran */

@ -22,6 +22,8 @@
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#include <machine/asmacros.h>
@ -35,6 +37,10 @@
#define VENTER push %rbp ; mov %rsp,%rbp
#define VLEAVE pop %rbp
#define VMLOAD .byte 0x0f, 0x01, 0xda
#define VMRUN .byte 0x0f, 0x01, 0xd8
#define VMSAVE .byte 0x0f, 0x01, 0xdb
/*
* svm_launch(uint64_t vmcb, struct svm_regctx *gctx)
* %rdi: physical address of VMCB
@ -79,9 +85,9 @@ ENTRY(svm_launch)
movq SCTX_RDI(%rsi), %rdi
movq SCTX_RSI(%rsi), %rsi /* %rsi must be restored last */
vmload %rax
vmrun %rax
vmsave %rax
VMLOAD
VMRUN
VMSAVE
pop %rax /* pop guest context pointer from the stack */

@ -342,18 +342,6 @@ vmcs_init(struct vmcs *vmcs)
*/
VMPTRLD(vmcs);
/* Initialize guest IA32_PAT MSR with the default value */
pat = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
if ((error = vmwrite(VMCS_GUEST_IA32_PAT, pat)) != 0)
goto done;
/* Host state */
/* Initialize host IA32_PAT MSR */

@ -100,13 +100,11 @@ __FBSDID("$FreeBSD$");
(VM_EXIT_HOST_LMA | \
VM_EXIT_SAVE_EFER | \
VM_EXIT_LOAD_EFER | \
VM_EXIT_ACKNOWLEDGE_INTERRUPT | \
VM_EXIT_SAVE_PAT | \
VM_EXIT_LOAD_PAT)
VM_EXIT_ACKNOWLEDGE_INTERRUPT)
#define VM_EXIT_CTLS_ZERO_SETTING VM_EXIT_SAVE_DEBUG_CONTROLS
#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER | VM_ENTRY_LOAD_PAT)
#define VM_ENTRY_CTLS_ONE_SETTING (VM_ENTRY_LOAD_EFER)
#define VM_ENTRY_CTLS_ZERO_SETTING \
(VM_ENTRY_LOAD_DEBUG_CONTROLS | \
@ -859,10 +857,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
* VM exit and entry respectively. It is also restored from the
* host VMCS area on a VM exit.
*
* MSR_PAT is saved and restored in the guest VMCS are on a VM exit
* and entry respectively. It is also restored from the host VMCS
* area on a VM exit.
*
* The TSC MSR is exposed read-only. Writes are disallowed as that
* will impact the host TSC.
* XXX Writes would be implemented with a wrmsr trap, and
@ -874,7 +868,6 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
guest_msr_rw(vmx, MSR_SYSENTER_ESP_MSR) ||
guest_msr_rw(vmx, MSR_SYSENTER_EIP_MSR) ||
guest_msr_rw(vmx, MSR_EFER) ||
guest_msr_rw(vmx, MSR_PAT) ||
guest_msr_ro(vmx, MSR_TSC))
panic("vmx_vminit: error setting guest msr access");
@ -941,6 +934,7 @@ vmx_vminit(struct vm *vm, pmap_t pmap)
vmx->cap[i].proc_ctls = procbased_ctls;
vmx->cap[i].proc_ctls2 = procbased_ctls2;
vmx->state[i].nextrip = ~0;
vmx->state[i].lastcpu = NOCPU;
vmx->state[i].vpid = vpid[i];
@ -1169,12 +1163,24 @@ vmx_inject_nmi(struct vmx *vmx, int vcpu)
}
static void
vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic)
vmx_inject_interrupts(struct vmx *vmx, int vcpu, struct vlapic *vlapic,
uint64_t guestrip)
{
int vector, need_nmi_exiting, extint_pending;
uint64_t rflags, entryinfo;
uint32_t gi, info;
if (vmx->state[vcpu].nextrip != guestrip) {
gi = vmcs_read(VMCS_GUEST_INTERRUPTIBILITY);
if (gi & HWINTR_BLOCKING) {
VCPU_CTR2(vmx->vm, vcpu, "Guest interrupt blocking "
"cleared due to rip change: %#lx/%#lx",
vmx->state[vcpu].nextrip, guestrip);
gi &= ~HWINTR_BLOCKING;
vmcs_write(VMCS_GUEST_INTERRUPTIBILITY, gi);
}
}
if (vm_entry_intinfo(vmx->vm, vcpu, &entryinfo)) {
KASSERT((entryinfo & VMCS_INTR_VALID) != 0, ("%s: entry "
"intinfo is not valid: %#lx", __func__, entryinfo));
@ -1771,7 +1777,7 @@ vmexit_inst_emul(struct vm_exit *vmexit, uint64_t gpa, uint64_t gla)
{
struct vm_guest_paging *paging;
uint32_t csar;
paging = &vmexit->u.inst_emul.paging;
vmexit->exitcode = VM_EXITCODE_INST_EMUL;
@ -2060,12 +2066,11 @@ emulate_rdmsr(struct vmx *vmx, int vcpuid, u_int num, bool *retu)
static int
vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
{
int error, handled, in;
int error, errcode, errcode_valid, handled, in;
struct vmxctx *vmxctx;
struct vlapic *vlapic;
struct vm_inout_str *vis;
struct vm_task_switch *ts;
struct vm_exception vmexc;
uint32_t eax, ecx, edx, idtvec_info, idtvec_err, intr_info, inst_info;
uint32_t intr_type, intr_vec, reason;
uint64_t exitintinfo, qual, gpa;
@ -2250,6 +2255,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
case EXIT_REASON_MTF:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_MTRAP, 1);
vmexit->exitcode = VM_EXITCODE_MTRAP;
vmexit->inst_length = 0;
break;
case EXIT_REASON_PAUSE:
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_PAUSE, 1);
@ -2376,15 +2382,15 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
vmcs_write(VMCS_ENTRY_INST_LENGTH, vmexit->inst_length);
/* Reflect all other exceptions back into the guest */
bzero(&vmexc, sizeof(struct vm_exception));
vmexc.vector = intr_vec;
errcode_valid = errcode = 0;
if (intr_info & VMCS_INTR_DEL_ERRCODE) {
vmexc.error_code_valid = 1;
vmexc.error_code = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
errcode_valid = 1;
errcode = vmcs_read(VMCS_EXIT_INTR_ERRCODE);
}
VCPU_CTR2(vmx->vm, vcpu, "Reflecting exception %d/%#x into "
"the guest", vmexc.vector, vmexc.error_code);
error = vm_inject_exception(vmx->vm, vcpu, &vmexc);
"the guest", intr_vec, errcode);
error = vm_inject_exception(vmx->vm, vcpu, intr_vec,
errcode_valid, errcode, 0);
KASSERT(error == 0, ("%s: vm_inject_exception error %d",
__func__, error));
return (1);
@ -2399,6 +2405,7 @@ vmx_exit_process(struct vmx *vmx, int vcpu, struct vm_exit *vmexit)
if (vm_mem_allocated(vmx->vm, gpa) ||
apic_access_fault(vmx, vcpu, gpa)) {
vmexit->exitcode = VM_EXITCODE_PAGING;
vmexit->inst_length = 0;
vmexit->u.paging.gpa = gpa;
vmexit->u.paging.fault_type = ept_fault_type(qual);
vmm_stat_incr(vmx->vm, vcpu, VMEXIT_NESTED_FAULT, 1);
@ -2540,7 +2547,7 @@ vmx_exit_handle_nmi(struct vmx *vmx, int vcpuid, struct vm_exit *vmexit)
}
static int
vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
vmx_run(void *arg, int vcpu, register_t rip, pmap_t pmap,
void *rendezvous_cookie, void *suspend_cookie)
{
int rc, handled, launched;
@ -2550,7 +2557,6 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
struct vmcs *vmcs;
struct vm_exit *vmexit;
struct vlapic *vlapic;
uint64_t rip;
uint32_t exit_reason;
vmx = arg;
@ -2578,11 +2584,13 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
vmcs_write(VMCS_HOST_CR3, rcr3());
vmcs_write(VMCS_GUEST_RIP, startrip);
vmcs_write(VMCS_GUEST_RIP, rip);
vmx_set_pcpu_defaults(vmx, vcpu, pmap);
do {
handled = UNHANDLED;
KASSERT(vmcs_guest_rip() == rip, ("%s: vmcs guest rip mismatch "
"%#lx/%#lx", __func__, vmcs_guest_rip(), rip));
handled = UNHANDLED;
/*
* Interrupts are disabled from this point on until the
* guest starts executing. This is done for the following
@ -2602,7 +2610,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
* pmap_invalidate_ept().
*/
disable_intr();
vmx_inject_interrupts(vmx, vcpu, vlapic);
vmx_inject_interrupts(vmx, vcpu, vlapic, rip);
/*
* Check for vcpu suspension after injecting events because
@ -2611,20 +2619,20 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
*/
if (vcpu_suspended(suspend_cookie)) {
enable_intr();
vm_exit_suspended(vmx->vm, vcpu, vmcs_guest_rip());
vm_exit_suspended(vmx->vm, vcpu, rip);
break;
}
if (vcpu_rendezvous_pending(rendezvous_cookie)) {
enable_intr();
vm_exit_rendezvous(vmx->vm, vcpu, vmcs_guest_rip());
vm_exit_rendezvous(vmx->vm, vcpu, rip);
break;
}
if (vcpu_should_yield(vm, vcpu)) {
enable_intr();
vm_exit_astpending(vmx->vm, vcpu, vmcs_guest_rip());
vmx_astpending_trace(vmx, vcpu, vmexit->rip);
vm_exit_astpending(vmx->vm, vcpu, rip);
vmx_astpending_trace(vmx, vcpu, rip);
handled = HANDLED;
break;
}
@ -2638,6 +2646,9 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
vmexit->u.vmx.exit_reason = exit_reason = vmcs_exit_reason();
vmexit->u.vmx.exit_qualification = vmcs_exit_qualification();
/* Update 'nextrip' */
vmx->state[vcpu].nextrip = rip;
if (rc == VMX_GUEST_VMEXIT) {
vmx_exit_handle_nmi(vmx, vcpu, vmexit);
enable_intr();
@ -2648,6 +2659,7 @@ vmx_run(void *arg, int vcpu, register_t startrip, pmap_t pmap,
}
launched = 1;
vmx_exit_trace(vmx, vcpu, rip, exit_reason, handled);
rip = vmexit->rip;
} while (handled);
/*

@ -78,6 +78,7 @@ struct vmxcap {
};
struct vmxstate {
uint64_t nextrip; /* next instruction to be executed by guest */
int lastcpu; /* host cpu that this 'vcpu' last ran on */
uint16_t vpid;
};
@ -102,6 +103,7 @@ enum {
IDX_MSR_STAR,
IDX_MSR_SF_MASK,
IDX_MSR_KGSBASE,
IDX_MSR_PAT,
GUEST_MSR_NUM /* must be the last enumeration */
};

@ -230,6 +230,25 @@ westmere_cpu(void)
return (false);
}
static bool
pat_valid(uint64_t val)
{
int i, pa;
/*
* From Intel SDM: Table "Memory Types That Can Be Encoded With PAT"
*
* Extract PA0 through PA7 and validate that each one encodes a
* valid memory type.
*/
for (i = 0; i < 8; i++) {
pa = (val >> (i * 8)) & 0xff;
if (pa == 2 || pa == 3 || pa >= 8)
return (false);
}
return (true);
}
void
vmx_msr_init(void)
{
@ -302,6 +321,10 @@ vmx_msr_init(void)
void
vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
{
uint64_t *guest_msrs;
guest_msrs = vmx->guest_msrs[vcpuid];
/*
* The permissions bitmap is shared between all vcpus so initialize it
* once when initializing the vBSP.
@ -313,6 +336,19 @@ vmx_msr_guest_init(struct vmx *vmx, int vcpuid)
guest_msr_rw(vmx, MSR_SF_MASK);
guest_msr_rw(vmx, MSR_KGSBASE);
}
/*
* Initialize guest IA32_PAT MSR with default value after reset.
*/
guest_msrs[IDX_MSR_PAT] = PAT_VALUE(0, PAT_WRITE_BACK) |
PAT_VALUE(1, PAT_WRITE_THROUGH) |
PAT_VALUE(2, PAT_UNCACHED) |
PAT_VALUE(3, PAT_UNCACHEABLE) |
PAT_VALUE(4, PAT_WRITE_BACK) |
PAT_VALUE(5, PAT_WRITE_THROUGH) |
PAT_VALUE(6, PAT_UNCACHED) |
PAT_VALUE(7, PAT_UNCACHEABLE);
return;
}
@ -353,7 +389,11 @@ vmx_msr_guest_exit(struct vmx *vmx, int vcpuid)
int
vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
{
int error = 0;
const uint64_t *guest_msrs;
int error;
guest_msrs = vmx->guest_msrs[vcpuid];
error = 0;
switch (num) {
case MSR_IA32_MISC_ENABLE:
@ -366,6 +406,9 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
case MSR_TURBO_RATIO_LIMIT1:
*val = turbo_ratio_limit;
break;
case MSR_PAT:
*val = guest_msrs[IDX_MSR_PAT];
break;
default:
error = EINVAL;
break;
@ -376,10 +419,13 @@ vmx_rdmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t *val, bool *retu)
int
vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
{
uint64_t *guest_msrs;
uint64_t changed;
int error;
guest_msrs = vmx->guest_msrs[vcpuid];
error = 0;
switch (num) {
case MSR_IA32_MISC_ENABLE:
changed = val ^ misc_enable;
@ -401,6 +447,12 @@ vmx_wrmsr(struct vmx *vmx, int vcpuid, u_int num, uint64_t val, bool *retu)
error = EINVAL;
break;
case MSR_PAT:
if (pat_valid(val))
guest_msrs[IDX_MSR_PAT] = val;
else
vm_inject_gp(vmx->vm, vcpuid);
break;
default:
error = EINVAL;
break;

@ -104,7 +104,6 @@ vhpet_capabilities(void)
uint64_t cap = 0;
cap |= 0x8086 << 16; /* vendor id */
cap |= HPET_CAP_LEG_RT; /* legacy routing capable */
cap |= (VHPET_NUM_TIMERS - 1) << 8; /* number of timers */
cap |= 1; /* revision */
cap &= ~HPET_CAP_COUNT_SIZE; /* 32-bit timer */
@ -127,15 +126,6 @@ vhpet_timer_msi_enabled(struct vhpet *vhpet, int n)
{
const uint64_t msi_enable = HPET_TCAP_FSB_INT_DEL | HPET_TCNF_FSB_EN;
/*
* LegacyReplacement Route configuration takes precedence over MSI
* for timers 0 and 1.
*/
if (n == 0 || n == 1) {
if (vhpet->config & HPET_CNF_LEG_RT)
return (false);
}
if ((vhpet->timer[n].cap_config & msi_enable) == msi_enable)
return (true);
else
@ -152,41 +142,9 @@ vhpet_timer_ioapic_pin(struct vhpet *vhpet, int n)
if (vhpet_timer_msi_enabled(vhpet, n))
return (0);
if (vhpet->config & HPET_CNF_LEG_RT) {
/*
* In "legacy routing" timers 0 and 1 are connected to
* ioapic pins 2 and 8 respectively.
*/
switch (n) {
case 0:
return (2);
case 1:
return (8);
}
}
return ((vhpet->timer[n].cap_config & HPET_TCNF_INT_ROUTE) >> 9);
}
static __inline int
vhpet_timer_atpic_pin(struct vhpet *vhpet, int n)
{
if (vhpet->config & HPET_CNF_LEG_RT) {
/*
* In "legacy routing" timers 0 and 1 are connected to
* 8259 master pin 0 and slave pin 0 respectively.
*/
switch (n) {
case 0:
return (0);
case 1:
return (8);
}
}
return (-1);
}
static uint32_t
vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
{
@ -216,17 +174,12 @@ vhpet_counter(struct vhpet *vhpet, sbintime_t *nowptr)
static void
vhpet_timer_clear_isr(struct vhpet *vhpet, int n)
{
int pin, legacy_pin;
int pin;
if (vhpet->isr & (1 << n)) {
pin = vhpet_timer_ioapic_pin(vhpet, n);
KASSERT(pin != 0, ("vhpet timer %d irq incorrectly routed", n));
vioapic_deassert_irq(vhpet->vm, pin);
legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
if (legacy_pin != -1)
vatpic_deassert_irq(vhpet->vm, legacy_pin);
vhpet->isr &= ~(1 << n);
}
}
@ -252,12 +205,6 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
KASSERT(!vhpet_timer_msi_enabled(vhpet, n), ("vhpet_timer_edge_trig: "
"timer %d is using MSI", n));
/* The legacy replacement interrupts are always edge triggered */
if (vhpet->config & HPET_CNF_LEG_RT) {
if (n == 0 || n == 1)
return (true);
}
if ((vhpet->timer[n].cap_config & HPET_TCNF_INT_TYPE) == 0)
return (true);
else
@ -267,7 +214,7 @@ vhpet_timer_edge_trig(struct vhpet *vhpet, int n)
static void
vhpet_timer_interrupt(struct vhpet *vhpet, int n)
{
int pin, legacy_pin;
int pin;
/* If interrupts are not enabled for this timer then just return. */
if (!vhpet_timer_interrupt_enabled(vhpet, n))
@ -293,17 +240,11 @@ vhpet_timer_interrupt(struct vhpet *vhpet, int n)
return;
}
legacy_pin = vhpet_timer_atpic_pin(vhpet, n);
if (vhpet_timer_edge_trig(vhpet, n)) {
vioapic_pulse_irq(vhpet->vm, pin);
if (legacy_pin != -1)
vatpic_pulse_irq(vhpet->vm, legacy_pin);
} else {
vhpet->isr |= 1 << n;
vioapic_assert_irq(vhpet->vm, pin);
if (legacy_pin != -1)
vatpic_assert_irq(vhpet->vm, legacy_pin);
}
}
@ -579,6 +520,13 @@ vhpet_mmio_write(void *vm, int vcpuid, uint64_t gpa, uint64_t val, int size,
counter = vhpet_counter(vhpet, nowptr);
oldval = vhpet->config;
update_register(&vhpet->config, data, mask);
/*
* LegacyReplacement Routing is not supported so clear the
* bit explicitly.
*/
vhpet->config &= ~HPET_CNF_LEG_RT;
if ((oldval ^ vhpet->config) & HPET_CNF_ENABLE) {
if (vhpet_counter_enabled(vhpet)) {
vhpet_start_counting(vhpet);

952
sys/amd64/vmm/io/vrtc.c Normal file

@ -0,0 +1,952 @@
/*-
* Copyright (c) 2014, Neel Natu (neel@freebsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/queue.h>
#include <sys/cpuset.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/clock.h>
#include <sys/sysctl.h>
#include <machine/vmm.h>
#include <isa/rtc.h>
#include "vmm_ktr.h"
#include "vatpic.h"
#include "vioapic.h"
#include "vrtc.h"
/* Register layout of the RTC */
struct rtcdev {
uint8_t sec;
uint8_t alarm_sec;
uint8_t min;
uint8_t alarm_min;
uint8_t hour;
uint8_t alarm_hour;
uint8_t day_of_week;
uint8_t day_of_month;
uint8_t month;
uint8_t year;
uint8_t reg_a;
uint8_t reg_b;
uint8_t reg_c;
uint8_t reg_d;
uint8_t nvram[128 - 14];
} __packed;
CTASSERT(sizeof(struct rtcdev) == 128);
struct vrtc {
struct vm *vm;
struct mtx mtx;
struct callout callout;
u_int addr; /* RTC register to read or write */
sbintime_t base_uptime;
time_t base_rtctime;
struct rtcdev rtcdev;
};
#define VRTC_LOCK(vrtc) mtx_lock(&((vrtc)->mtx))
#define VRTC_UNLOCK(vrtc) mtx_unlock(&((vrtc)->mtx))
#define VRTC_LOCKED(vrtc) mtx_owned(&((vrtc)->mtx))
/*
* RTC time is considered "broken" if:
* - RTC updates are halted by the guest
* - RTC date/time fields have invalid values
*/
#define VRTC_BROKEN_TIME ((time_t)-1)
#define RTC_IRQ 8
#define RTCSB_BIN 0x04
#define RTCSB_ALL_INTRS (RTCSB_UINTR | RTCSB_AINTR | RTCSB_PINTR)
#define rtc_halted(vrtc) ((vrtc->rtcdev.reg_b & RTCSB_HALT) != 0)
#define aintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_AINTR) != 0)
#define pintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_PINTR) != 0)
#define uintr_enabled(vrtc) (((vrtc)->rtcdev.reg_b & RTCSB_UINTR) != 0)
static void vrtc_callout_handler(void *arg);
static void vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval);
static MALLOC_DEFINE(M_VRTC, "vrtc", "bhyve virtual rtc");
SYSCTL_DECL(_hw_vmm);
SYSCTL_NODE(_hw_vmm, OID_AUTO, vrtc, CTLFLAG_RW, NULL, NULL);
static int rtc_flag_broken_time = 1;
SYSCTL_INT(_hw_vmm_vrtc, OID_AUTO, flag_broken_time, CTLFLAG_RDTUN,
&rtc_flag_broken_time, 0, "Stop guest when invalid RTC time is detected");
static __inline bool
divider_enabled(int reg_a)
{
/*
* The RTC is counting only when dividers are not held in reset.
*/
return ((reg_a & 0x70) == 0x20);
}
static __inline bool
update_enabled(struct vrtc *vrtc)
{
/*
* RTC date/time can be updated only if:
* - divider is not held in reset
* - guest has not disabled updates
* - the date/time fields have valid contents
*/
if (!divider_enabled(vrtc->rtcdev.reg_a))
return (false);
if (rtc_halted(vrtc))
return (false);
if (vrtc->base_rtctime == VRTC_BROKEN_TIME)
return (false);
return (true);
}
static time_t
vrtc_curtime(struct vrtc *vrtc)
{
sbintime_t now, delta;
time_t t;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
t = vrtc->base_rtctime;
if (update_enabled(vrtc)) {
now = sbinuptime();
delta = now - vrtc->base_uptime;
KASSERT(delta >= 0, ("vrtc_curtime: uptime went backwards: "
"%#lx to %#lx", vrtc->base_uptime, now));
t += delta / SBT_1S;
}
return (t);
}
static __inline uint8_t
rtcset(struct rtcdev *rtc, int val)
{
KASSERT(val >= 0 && val < 100, ("%s: invalid bin2bcd index %d",
__func__, val));
return ((rtc->reg_b & RTCSB_BIN) ? val : bin2bcd_data[val]);
}
static void
secs_to_rtc(time_t rtctime, struct vrtc *vrtc, int force_update)
{
struct clocktime ct;
struct timespec ts;
struct rtcdev *rtc;
int hour;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
if (rtctime < 0) {
KASSERT(rtctime == VRTC_BROKEN_TIME,
("%s: invalid vrtc time %#lx", __func__, rtctime));
return;
}
/*
* If the RTC is halted then the guest has "ownership" of the
* date/time fields. Don't update the RTC date/time fields in
* this case (unless forced).
*/
if (rtc_halted(vrtc) && !force_update)
return;
ts.tv_sec = rtctime;
ts.tv_nsec = 0;
clock_ts_to_ct(&ts, &ct);
KASSERT(ct.sec >= 0 && ct.sec <= 59, ("invalid clocktime sec %d",
ct.sec));
KASSERT(ct.min >= 0 && ct.min <= 59, ("invalid clocktime min %d",
ct.min));
KASSERT(ct.hour >= 0 && ct.hour <= 23, ("invalid clocktime hour %d",
ct.hour));
KASSERT(ct.dow >= 0 && ct.dow <= 6, ("invalid clocktime wday %d",
ct.dow));
KASSERT(ct.day >= 1 && ct.day <= 31, ("invalid clocktime mday %d",
ct.day));
KASSERT(ct.mon >= 1 && ct.mon <= 12, ("invalid clocktime month %d",
ct.mon));
KASSERT(ct.year >= POSIX_BASE_YEAR, ("invalid clocktime year %d",
ct.year));
rtc = &vrtc->rtcdev;
rtc->sec = rtcset(rtc, ct.sec);
rtc->min = rtcset(rtc, ct.min);
hour = ct.hour;
if ((rtc->reg_b & RTCSB_24HR) == 0)
hour = (hour % 12) + 1; /* convert to a 12-hour format */
rtc->hour = rtcset(rtc, hour);
if ((rtc->reg_b & RTCSB_24HR) == 0 && ct.hour >= 12)
rtc->hour |= 0x80; /* set MSB to indicate PM */
rtc->day_of_week = rtcset(rtc, ct.dow + 1);
rtc->day_of_month = rtcset(rtc, ct.day);
rtc->month = rtcset(rtc, ct.mon);
rtc->year = rtcset(rtc, ct.year % 100);
}
static int
rtcget(struct rtcdev *rtc, int val, int *retval)
{
uint8_t upper, lower;
if (rtc->reg_b & RTCSB_BIN) {
*retval = val;
return (0);
}
lower = val & 0xf;
upper = (val >> 4) & 0xf;
if (lower > 9 || upper > 9)
return (-1);
*retval = upper * 10 + lower;
return (0);
}
static time_t
rtc_to_secs(struct vrtc *vrtc)
{
struct clocktime ct;
struct timespec ts;
struct rtcdev *rtc;
struct vm *vm;
int error, hour, pm, year;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
vm = vrtc->vm;
rtc = &vrtc->rtcdev;
bzero(&ct, sizeof(struct clocktime));
error = rtcget(rtc, rtc->sec, &ct.sec);
if (error || ct.sec < 0 || ct.sec > 59) {
VM_CTR2(vm, "Invalid RTC sec %#x/%d", rtc->sec, ct.sec);
goto fail;
}
error = rtcget(rtc, rtc->min, &ct.min);
if (error || ct.min < 0 || ct.min > 59) {
VM_CTR2(vm, "Invalid RTC min %#x/%d", rtc->min, ct.min);
goto fail;
}
pm = 0;
hour = rtc->hour;
if ((rtc->reg_b & RTCSB_24HR) == 0) {
if (hour & 0x80) {
hour &= ~0x80;
pm = 1;
}
}
error = rtcget(rtc, hour, &ct.hour);
if ((rtc->reg_b & RTCSB_24HR) == 0) {
ct.hour -= 1;
if (pm)
ct.hour += 12;
}
if (error || ct.hour < 0 || ct.hour > 23) {
VM_CTR2(vm, "Invalid RTC hour %#x/%d", rtc->hour, ct.hour);
goto fail;
}
/*
* Ignore 'rtc->dow' because some guests like Linux don't bother
* setting it at all while others like OpenBSD/i386 set it incorrectly.
*
* clock_ct_to_ts() does not depend on 'ct.dow' anyways so ignore it.
*/
ct.dow = -1;
error = rtcget(rtc, rtc->day_of_month, &ct.day);
if (error || ct.day < 1 || ct.day > 31) {
VM_CTR2(vm, "Invalid RTC mday %#x/%d", rtc->day_of_month,
ct.day);
goto fail;
}
error = rtcget(rtc, rtc->month, &ct.mon);
if (error || ct.mon < 1 || ct.mon > 12) {
VM_CTR2(vm, "Invalid RTC month %#x/%d", rtc->month, ct.mon);
goto fail;
}
error = rtcget(rtc, rtc->year, &year);
if (error || year < 0 || year > 99) {
VM_CTR2(vm, "Invalid RTC year %#x/%d", rtc->year, year);
goto fail;
}
if (year >= 70)
ct.year = 1900 + year;
else
ct.year = 2000 + year;
error = clock_ct_to_ts(&ct, &ts);
if (error || ts.tv_sec < 0) {
VM_CTR3(vm, "Invalid RTC clocktime.date %04d-%02d-%02d",
ct.year, ct.mon, ct.day);
VM_CTR3(vm, "Invalid RTC clocktime.time %02d:%02d:%02d",
ct.hour, ct.min, ct.sec);
goto fail;
}
return (ts.tv_sec); /* success */
fail:
return (VRTC_BROKEN_TIME); /* failure */
}
static int
vrtc_time_update(struct vrtc *vrtc, time_t newtime)
{
struct rtcdev *rtc;
time_t oldtime;
uint8_t alarm_sec, alarm_min, alarm_hour;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
rtc = &vrtc->rtcdev;
alarm_sec = rtc->alarm_sec;
alarm_min = rtc->alarm_min;
alarm_hour = rtc->alarm_hour;
oldtime = vrtc->base_rtctime;
VM_CTR2(vrtc->vm, "Updating RTC time from %#lx to %#lx",
oldtime, newtime);
if (newtime == oldtime)
return (0);
/*
* If 'newtime' indicates that RTC updates are disabled then just
* record that and return. There is no need to do alarm interrupt
* processing or update 'base_uptime' in this case.
*/
if (newtime == VRTC_BROKEN_TIME) {
vrtc->base_rtctime = VRTC_BROKEN_TIME;
return (0);
}
/*
* Return an error if RTC updates are halted by the guest.
*/
if (rtc_halted(vrtc)) {
VM_CTR0(vrtc->vm, "RTC update halted by guest");
return (EBUSY);
}
do {
/*
* If the alarm interrupt is enabled and 'oldtime' is valid
* then visit all the seconds between 'oldtime' and 'newtime'
* to check for the alarm condition.
*
* Otherwise move the RTC time forward directly to 'newtime'.
*/
if (aintr_enabled(vrtc) && oldtime != VRTC_BROKEN_TIME)
vrtc->base_rtctime++;
else
vrtc->base_rtctime = newtime;
if (aintr_enabled(vrtc)) {
/*
* Update the RTC date/time fields before checking
* if the alarm conditions are satisfied.
*/
secs_to_rtc(vrtc->base_rtctime, vrtc, 0);
if ((alarm_sec >= 0xC0 || alarm_sec == rtc->sec) &&
(alarm_min >= 0xC0 || alarm_min == rtc->min) &&
(alarm_hour >= 0xC0 || alarm_hour == rtc->hour)) {
vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_ALARM);
}
}
} while (vrtc->base_rtctime != newtime);
if (uintr_enabled(vrtc))
vrtc_set_reg_c(vrtc, rtc->reg_c | RTCIR_UPDATE);
vrtc->base_uptime = sbinuptime();
return (0);
}
static sbintime_t
vrtc_freq(struct vrtc *vrtc)
{
int ratesel;
static sbintime_t pf[16] = {
0,
SBT_1S / 256,
SBT_1S / 128,
SBT_1S / 8192,
SBT_1S / 4096,
SBT_1S / 2048,
SBT_1S / 1024,
SBT_1S / 512,
SBT_1S / 256,
SBT_1S / 128,
SBT_1S / 64,
SBT_1S / 32,
SBT_1S / 16,
SBT_1S / 8,
SBT_1S / 4,
SBT_1S / 2,
};
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
/*
* If both periodic and alarm interrupts are enabled then use the
* periodic frequency to drive the callout. The minimum periodic
* frequency (2 Hz) is higher than the alarm frequency (1 Hz) so
* piggyback the alarm on top of it. The same argument applies to
* the update interrupt.
*/
if (pintr_enabled(vrtc) && divider_enabled(vrtc->rtcdev.reg_a)) {
ratesel = vrtc->rtcdev.reg_a & 0xf;
return (pf[ratesel]);
} else if (aintr_enabled(vrtc) && update_enabled(vrtc)) {
return (SBT_1S);
} else if (uintr_enabled(vrtc) && update_enabled(vrtc)) {
return (SBT_1S);
} else {
return (0);
}
}
static void
vrtc_callout_reset(struct vrtc *vrtc, sbintime_t freqsbt)
{
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
if (freqsbt == 0) {
if (callout_active(&vrtc->callout)) {
VM_CTR0(vrtc->vm, "RTC callout stopped");
callout_stop(&vrtc->callout);
}
return;
}
VM_CTR1(vrtc->vm, "RTC callout frequency %d hz", SBT_1S / freqsbt);
callout_reset_sbt(&vrtc->callout, freqsbt, 0, vrtc_callout_handler,
vrtc, 0);
}
static void
vrtc_callout_handler(void *arg)
{
struct vrtc *vrtc = arg;
sbintime_t freqsbt;
time_t rtctime;
int error;
VM_CTR0(vrtc->vm, "vrtc callout fired");
VRTC_LOCK(vrtc);
if (callout_pending(&vrtc->callout)) /* callout was reset */
goto done;
if (!callout_active(&vrtc->callout)) /* callout was stopped */
goto done;
callout_deactivate(&vrtc->callout);
KASSERT((vrtc->rtcdev.reg_b & RTCSB_ALL_INTRS) != 0,
("gratuitous vrtc callout"));
if (pintr_enabled(vrtc))
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c | RTCIR_PERIOD);
if (aintr_enabled(vrtc) || uintr_enabled(vrtc)) {
rtctime = vrtc_curtime(vrtc);
error = vrtc_time_update(vrtc, rtctime);
KASSERT(error == 0, ("%s: vrtc_time_update error %d",
__func__, error));
}
freqsbt = vrtc_freq(vrtc);
KASSERT(freqsbt != 0, ("%s: vrtc frequency cannot be zero", __func__));
vrtc_callout_reset(vrtc, freqsbt);
done:
VRTC_UNLOCK(vrtc);
}
static __inline void
vrtc_callout_check(struct vrtc *vrtc, sbintime_t freq)
{
int active;
active = callout_active(&vrtc->callout) ? 1 : 0;
KASSERT((freq == 0 && !active) || (freq != 0 && active),
("vrtc callout %s with frequency %#lx",
active ? "active" : "inactive", freq));
}
static void
vrtc_set_reg_c(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
int oldirqf, newirqf;
uint8_t oldval, changed;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
rtc = &vrtc->rtcdev;
newval &= RTCIR_ALARM | RTCIR_PERIOD | RTCIR_UPDATE;
oldirqf = rtc->reg_c & RTCIR_INT;
if ((aintr_enabled(vrtc) && (newval & RTCIR_ALARM) != 0) ||
(pintr_enabled(vrtc) && (newval & RTCIR_PERIOD) != 0) ||
(uintr_enabled(vrtc) && (newval & RTCIR_UPDATE) != 0)) {
newirqf = RTCIR_INT;
} else {
newirqf = 0;
}
oldval = rtc->reg_c;
rtc->reg_c = newirqf | newval;
changed = oldval ^ rtc->reg_c;
if (changed) {
VM_CTR2(vrtc->vm, "RTC reg_c changed from %#x to %#x",
oldval, rtc->reg_c);
}
if (!oldirqf && newirqf) {
VM_CTR1(vrtc->vm, "RTC irq %d asserted", RTC_IRQ);
vatpic_pulse_irq(vrtc->vm, RTC_IRQ);
vioapic_pulse_irq(vrtc->vm, RTC_IRQ);
} else if (oldirqf && !newirqf) {
VM_CTR1(vrtc->vm, "RTC irq %d deasserted", RTC_IRQ);
}
}
static int
vrtc_set_reg_b(struct vrtc *vrtc, uint8_t newval)
{
struct rtcdev *rtc;
sbintime_t oldfreq, newfreq;
time_t curtime, rtctime;
int error;
uint8_t oldval, changed;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
rtc = &vrtc->rtcdev;
oldval = rtc->reg_b;
oldfreq = vrtc_freq(vrtc);
rtc->reg_b = newval;
changed = oldval ^ newval;
if (changed) {
VM_CTR2(vrtc->vm, "RTC reg_b changed from %#x to %#x",
oldval, newval);
}
if (changed & RTCSB_HALT) {
if ((newval & RTCSB_HALT) == 0) {
rtctime = rtc_to_secs(vrtc);
if (rtctime == VRTC_BROKEN_TIME) {
/*
* Stop updating the RTC if the date/time
* programmed by the guest is not correct.
*/
VM_CTR0(vrtc->vm, "Invalid RTC date/time "
"programming detected");
if (rtc_flag_broken_time)
return (-1);
}
} else {
curtime = vrtc_curtime(vrtc);
KASSERT(curtime == vrtc->base_rtctime, ("%s: mismatch "
"between vrtc basetime (%#lx) and curtime (%#lx)",
__func__, vrtc->base_rtctime, curtime));
/*
* Force a refresh of the RTC date/time fields so
* they reflect the time right before the guest set
* the HALT bit.
*/
secs_to_rtc(curtime, vrtc, 1);
/*
* Updates are halted so mark 'base_rtctime' to denote
* that the RTC date/time is in flux.
*/
rtctime = VRTC_BROKEN_TIME;
rtc->reg_b &= ~RTCSB_UINTR;
}
error = vrtc_time_update(vrtc, rtctime);
KASSERT(error == 0, ("vrtc_time_update error %d", error));
}
/*
* Side effect of changes to the interrupt enable bits.
*/
if (changed & RTCSB_ALL_INTRS)
vrtc_set_reg_c(vrtc, vrtc->rtcdev.reg_c);
/*
* Change the callout frequency if it has changed.
*/
newfreq = vrtc_freq(vrtc);
if (newfreq != oldfreq)
vrtc_callout_reset(vrtc, newfreq);
else
vrtc_callout_check(vrtc, newfreq);
/*
* The side effect of bits that control the RTC date/time format
* is handled lazily when those fields are actually read.
*/
return (0);
}
static void
vrtc_set_reg_a(struct vrtc *vrtc, uint8_t newval)
{
sbintime_t oldfreq, newfreq;
uint8_t oldval, changed;
KASSERT(VRTC_LOCKED(vrtc), ("%s: vrtc not locked", __func__));
newval &= ~RTCSA_TUP;
oldval = vrtc->rtcdev.reg_a;
oldfreq = vrtc_freq(vrtc);
if (divider_enabled(oldval) && !divider_enabled(newval)) {
VM_CTR2(vrtc->vm, "RTC divider held in reset at %#lx/%#lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else if (!divider_enabled(oldval) && divider_enabled(newval)) {
/*
* If the dividers are coming out of reset then update
* 'base_uptime' before this happens. This is done to
* maintain the illusion that the RTC date/time was frozen
* while the dividers were disabled.
*/
vrtc->base_uptime = sbinuptime();
VM_CTR2(vrtc->vm, "RTC divider out of reset at %#lx/%#lx",
vrtc->base_rtctime, vrtc->base_uptime);
} else {
/* NOTHING */
}
vrtc->rtcdev.reg_a = newval;
changed = oldval ^ newval;
if (changed) {
VM_CTR2(vrtc->vm, "RTC reg_a changed from %#x to %#x",
oldval, newval);
}
/*
* Side effect of changes to rate select and divider enable bits.
*/
newfreq = vrtc_freq(vrtc);
if (newfreq != oldfreq)
vrtc_callout_reset(vrtc, newfreq);
else
vrtc_callout_check(vrtc, newfreq);
}
int
vrtc_set_time(struct vm *vm, time_t secs)
{
struct vrtc *vrtc;
int error;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
error = vrtc_time_update(vrtc, secs);
VRTC_UNLOCK(vrtc);
if (error) {
VM_CTR2(vrtc->vm, "Error %d setting RTC time to %#lx", error,
secs);
} else {
VM_CTR1(vrtc->vm, "RTC time set to %#lx", secs);
}
return (error);
}
time_t
vrtc_get_time(struct vm *vm)
{
struct vrtc *vrtc;
time_t t;
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
t = vrtc_curtime(vrtc);
VRTC_UNLOCK(vrtc);
return (t);
}
int
vrtc_nvram_write(struct vm *vm, int offset, uint8_t value)
{
struct vrtc *vrtc;
uint8_t *ptr;
vrtc = vm_rtc(vm);
/*
* Don't allow writes to RTC control registers or the date/time fields.
*/
if (offset < offsetof(struct rtcdev, nvram[0]) ||
offset >= sizeof(struct rtcdev)) {
VM_CTR1(vrtc->vm, "RTC nvram write to invalid offset %d",
offset);
return (EINVAL);
}
VRTC_LOCK(vrtc);
ptr = (uint8_t *)(&vrtc->rtcdev);
ptr[offset] = value;
VM_CTR2(vrtc->vm, "RTC nvram write %#x to offset %#x", value, offset);
VRTC_UNLOCK(vrtc);
return (0);
}
int
vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval)
{
struct vrtc *vrtc;
time_t curtime;
uint8_t *ptr;
/*
* Allow all offsets in the RTC to be read.
*/
if (offset < 0 || offset >= sizeof(struct rtcdev))
return (EINVAL);
vrtc = vm_rtc(vm);
VRTC_LOCK(vrtc);
/*
* Update RTC date/time fields if necessary.
*/
if (offset < 10) {
curtime = vrtc_curtime(vrtc);
secs_to_rtc(curtime, vrtc, 0);
}
ptr = (uint8_t *)(&vrtc->rtcdev);
*retval = ptr[offset];
VRTC_UNLOCK(vrtc);
return (0);
}
int
vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val)
{
struct vrtc *vrtc;
vrtc = vm_rtc(vm);
if (bytes != 1)
return (-1);
if (in) {
*val = 0xff;
return (0);
}
VRTC_LOCK(vrtc);
vrtc->addr = *val & 0x7f;
VRTC_UNLOCK(vrtc);
return (0);
}
int
vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val)
{
struct vrtc *vrtc;
struct rtcdev *rtc;
time_t curtime;
int error, offset;
vrtc = vm_rtc(vm);
rtc = &vrtc->rtcdev;
if (bytes != 1)
return (-1);
VRTC_LOCK(vrtc);
offset = vrtc->addr;
if (offset >= sizeof(struct rtcdev)) {
VRTC_UNLOCK(vrtc);
return (-1);
}
error = 0;
curtime = vrtc_curtime(vrtc);
vrtc_time_update(vrtc, curtime);
if (in) {
/*
* Update RTC date/time fields if necessary.
*/
if (offset < 10)
secs_to_rtc(curtime, vrtc, 0);
if (offset == 12) {
/*
* XXX
* reg_c interrupt flags are updated only if the
* corresponding interrupt enable bit in reg_b is set.
*/
*val = vrtc->rtcdev.reg_c;
vrtc_set_reg_c(vrtc, 0);
} else {
*val = *((uint8_t *)rtc + offset);
}
VCPU_CTR2(vm, vcpuid, "Read value %#x from RTC offset %#x",
*val, offset);
} else {
switch (offset) {
case 10:
VCPU_CTR1(vm, vcpuid, "RTC reg_a set to %#x", *val);
vrtc_set_reg_a(vrtc, *val);
break;
case 11:
VCPU_CTR1(vm, vcpuid, "RTC reg_b set to %#x", *val);
error = vrtc_set_reg_b(vrtc, *val);
break;
case 12:
VCPU_CTR1(vm, vcpuid, "RTC reg_c set to %#x (ignored)",
*val);
break;
case 13:
VCPU_CTR1(vm, vcpuid, "RTC reg_d set to %#x (ignored)",
*val);
break;
case 0:
/*
* High order bit of 'seconds' is readonly.
*/
*val &= 0x7f;
/* FALLTHRU */
default:
VCPU_CTR2(vm, vcpuid, "RTC offset %#x set to %#x",
offset, *val);
*((uint8_t *)rtc + offset) = *val;
break;
}
}
VRTC_UNLOCK(vrtc);
return (error);
}
void
vrtc_reset(struct vrtc *vrtc)
{
struct rtcdev *rtc;
VRTC_LOCK(vrtc);
rtc = &vrtc->rtcdev;
vrtc_set_reg_b(vrtc, rtc->reg_b & ~(RTCSB_ALL_INTRS | RTCSB_SQWE));
vrtc_set_reg_c(vrtc, 0);
KASSERT(!callout_active(&vrtc->callout), ("rtc callout still active"));
VRTC_UNLOCK(vrtc);
}
struct vrtc *
vrtc_init(struct vm *vm)
{
struct vrtc *vrtc;
struct rtcdev *rtc;
time_t curtime;
vrtc = malloc(sizeof(struct vrtc), M_VRTC, M_WAITOK | M_ZERO);
vrtc->vm = vm;
mtx_init(&vrtc->mtx, "vrtc lock", NULL, MTX_DEF);
callout_init(&vrtc->callout, 1);
/* Allow dividers to keep time but disable everything else */
rtc = &vrtc->rtcdev;
rtc->reg_a = 0x20;
rtc->reg_b = RTCSB_24HR;
rtc->reg_c = 0;
rtc->reg_d = RTCSD_PWR;
/* Reset the index register to a safe value. */
vrtc->addr = RTC_STATUSD;
/*
* Initialize RTC time to 00:00:00 Jan 1, 1970.
*/
curtime = 0;
VRTC_LOCK(vrtc);
vrtc->base_rtctime = VRTC_BROKEN_TIME;
vrtc_time_update(vrtc, curtime);
secs_to_rtc(curtime, vrtc, 0);
VRTC_UNLOCK(vrtc);
return (vrtc);
}
void
vrtc_cleanup(struct vrtc *vrtc)
{
callout_drain(&vrtc->callout);
free(vrtc, M_VRTC);
}

50
sys/amd64/vmm/io/vrtc.h Normal file

@ -0,0 +1,50 @@
/*-
* Copyright (c) 2014 Neel Natu (neel@freebsd.org)
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice unmodified, this list of conditions, and the following
* disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef _VRTC_H_
#define _VRTC_H_
#include <isa/isareg.h>
struct vrtc;
struct vrtc *vrtc_init(struct vm *vm);
void vrtc_cleanup(struct vrtc *vrtc);
void vrtc_reset(struct vrtc *vrtc);
time_t vrtc_get_time(struct vm *vm);
int vrtc_set_time(struct vm *vm, time_t secs);
int vrtc_nvram_write(struct vm *vm, int offset, uint8_t value);
int vrtc_nvram_read(struct vm *vm, int offset, uint8_t *retval);
int vrtc_addr_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
int vrtc_data_handler(struct vm *vm, int vcpuid, bool in, int port, int bytes,
uint32_t *val);
#endif

@ -75,6 +75,7 @@ __FBSDID("$FreeBSD$");
#include "vioapic.h"
#include "vlapic.h"
#include "vpmtmr.h"
#include "vrtc.h"
#include "vmm_ipi.h"
#include "vmm_stat.h"
#include "vmm_lapic.h"
@ -100,12 +101,15 @@ struct vcpu {
uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
int extint_pending; /* (i) INTR pending */
struct vm_exception exception; /* (x) exception collateral */
int exception_pending; /* (i) exception pending */
int exc_vector; /* (x) exception collateral */
int exc_errcode_valid;
uint32_t exc_errcode;
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
};
#define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
@ -136,6 +140,7 @@ struct vm {
struct vatpic *vatpic; /* (i) virtual atpic */
struct vatpit *vatpit; /* (i) virtual atpit */
struct vpmtmr *vpmtmr; /* (i) virtual ACPI PM timer */
struct vrtc *vrtc; /* (o) virtual RTC */
volatile cpuset_t active_cpus; /* (i) active vcpus */
int suspend; /* (i) stop VM execution */
volatile cpuset_t suspended_cpus; /* (i) suspended vcpus */
@ -376,6 +381,8 @@ vm_init(struct vm *vm, bool create)
vm->vatpic = vatpic_init(vm);
vm->vatpit = vatpit_init(vm);
vm->vpmtmr = vpmtmr_init(vm);
if (create)
vm->vrtc = vrtc_init(vm);
CPU_ZERO(&vm->active_cpus);
@ -438,6 +445,10 @@ vm_cleanup(struct vm *vm, bool destroy)
if (vm->iommu != NULL)
iommu_destroy_domain(vm->iommu);
if (destroy)
vrtc_cleanup(vm->vrtc);
else
vrtc_reset(vm->vrtc);
vpmtmr_cleanup(vm->vpmtmr);
vatpit_cleanup(vm->vatpit);
vhpet_cleanup(vm->vhpet);
@ -841,16 +852,26 @@ vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
}
int
vm_set_register(struct vm *vm, int vcpu, int reg, uint64_t val)
vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
{
struct vcpu *vcpu;
int error;
if (vcpu < 0 || vcpu >= VM_MAXCPU)
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (reg >= VM_REG_LAST)
return (EINVAL);
return (VMSETREG(vm->cookie, vcpu, reg, val));
error = VMSETREG(vm->cookie, vcpuid, reg, val);
if (error || reg != VM_REG_GUEST_RIP)
return (error);
/* Set 'nextrip' to match the value of %rip */
VCPU_CTR1(vm, vcpuid, "Setting nextrip to %#lx", val);
vcpu = &vm->vcpu[vcpuid];
vcpu->nextrip = val;
return (0);
}
static boolean_t
@ -1102,7 +1123,7 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
{
struct vcpu *vcpu;
const char *wmesg;
int error, t, vcpu_halted, vm_halted;
int t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
@ -1110,22 +1131,6 @@ vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled, bool *retu)
vcpu_halted = 0;
vm_halted = 0;
/*
* The typical way to halt a cpu is to execute: "sti; hlt"
*
* STI sets RFLAGS.IF to enable interrupts. However, the processor
* remains in an "interrupt shadow" for an additional instruction
* following the STI. This guarantees that "sti; hlt" sequence is
* atomic and a pending interrupt will be recognized after the HLT.
*
* After the HLT emulation is done the vcpu is no longer in an
* interrupt shadow and a pending interrupt can be injected on
* the next entry into the guest.
*/
error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
__func__, error));
vcpu_lock(vcpu);
while (1) {
/*
@ -1206,6 +1211,9 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
__func__, vme->inst_length));
ftype = vme->u.paging.fault_type;
KASSERT(ftype == VM_PROT_READ ||
ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
@ -1231,9 +1239,6 @@ vm_handle_paging(struct vm *vm, int vcpuid, bool *retu)
if (rv != KERN_SUCCESS)
return (EFAULT);
done:
/* restart execution at the faulting instruction */
vme->inst_length = 0;
return (0);
}
@ -1288,10 +1293,13 @@ vm_handle_inst_emul(struct vm *vm, int vcpuid, bool *retu)
return (EFAULT);
/*
* If the instruction length is not specified the update it now.
* If the instruction length was not specified then update it now
* along with 'nextrip'.
*/
if (vme->inst_length == 0)
if (vme->inst_length == 0) {
vme->inst_length = vie->num_processed;
vcpu->nextrip += vie->num_processed;
}
/* return to userland unless this is an in-kernel emulated device */
if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
@ -1440,7 +1448,7 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
int error, vcpuid;
struct vcpu *vcpu;
struct pcb *pcb;
uint64_t tscval, rip;
uint64_t tscval;
struct vm_exit *vme;
bool retu, intr_disabled;
pmap_t pmap;
@ -1462,7 +1470,6 @@ vm_run(struct vm *vm, struct vm_run *vmrun)
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
rip = vmrun->rip;
restart:
critical_enter();
@ -1477,7 +1484,7 @@ restart:
restore_guest_fpustate(vcpu);
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
error = VMRUN(vm->cookie, vcpuid, rip, pmap, rptr, sptr);
error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, rptr, sptr);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
save_guest_fpustate(vcpu);
@ -1488,6 +1495,7 @@ restart:
if (error == 0) {
retu = false;
vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid, &retu);
@ -1524,16 +1532,57 @@ restart:
}
}
if (error == 0 && retu == false) {
rip = vme->rip + vme->inst_length;
if (error == 0 && retu == false)
goto restart;
}
/* copy the exit information */
bcopy(vme, &vmrun->vm_exit, sizeof(struct vm_exit));
return (error);
}
int
vm_restart_instruction(void *arg, int vcpuid)
{
struct vm *vm;
struct vcpu *vcpu;
enum vcpu_state state;
uint64_t rip;
int error;
vm = arg;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
state = vcpu_get_state(vm, vcpuid, NULL);
if (state == VCPU_RUNNING) {
/*
* When a vcpu is "running" the next instruction is determined
* by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
* Thus setting 'inst_length' to zero will cause the current
* instruction to be restarted.
*/
vcpu->exitinfo.inst_length = 0;
VCPU_CTR1(vm, vcpuid, "restarting instruction at %#lx by "
"setting inst_length to zero", vcpu->exitinfo.rip);
} else if (state == VCPU_FROZEN) {
/*
* When a vcpu is "frozen" it is outside the critical section
* around VMRUN() and 'nextrip' points to the next instruction.
* Thus instruction restart is achieved by setting 'nextrip'
* to the vcpu's %rip.
*/
error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
KASSERT(!error, ("%s: error %d getting rip", __func__, error));
VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
"nextrip from %#lx to %#lx", vcpu->nextrip, rip);
vcpu->nextrip = rip;
} else {
panic("%s: invalid state %d", __func__, state);
}
return (0);
}
int
vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
{
@ -1664,11 +1713,11 @@ vcpu_exception_intinfo(struct vcpu *vcpu)
uint64_t info = 0;
if (vcpu->exception_pending) {
info = vcpu->exception.vector & 0xff;
info = vcpu->exc_vector & 0xff;
info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
if (vcpu->exception.error_code_valid) {
if (vcpu->exc_errcode_valid) {
info |= VM_INTINFO_DEL_ERRCODE;
info |= (uint64_t)vcpu->exception.error_code << 32;
info |= (uint64_t)vcpu->exc_errcode << 32;
}
}
return (info);
@ -1693,7 +1742,7 @@ vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
info2 = vcpu_exception_intinfo(vcpu);
vcpu->exception_pending = 0;
VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %#lx",
vcpu->exception.vector, info2);
vcpu->exc_vector, info2);
}
if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
@ -1731,14 +1780,16 @@ vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
}
int
vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
uint32_t errcode, int restart_instruction)
{
struct vcpu *vcpu;
int error;
if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
return (EINVAL);
if (exception->vector < 0 || exception->vector >= 32)
if (vector < 0 || vector >= 32)
return (EINVAL);
/*
@ -1746,21 +1797,35 @@ vm_inject_exception(struct vm *vm, int vcpuid, struct vm_exception *exception)
* the guest. It is a derived exception that results from specific
* combinations of nested faults.
*/
if (exception->vector == IDT_DF)
if (vector == IDT_DF)
return (EINVAL);
vcpu = &vm->vcpu[vcpuid];
if (vcpu->exception_pending) {
VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
"pending exception %d", exception->vector,
vcpu->exception.vector);
"pending exception %d", vector, vcpu->exc_vector);
return (EBUSY);
}
/*
* From section 26.6.1 "Interruptibility State" in Intel SDM:
*
* Event blocking by "STI" or "MOV SS" is cleared after guest executes
* one instruction or incurs an exception.
*/
error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
__func__, error));
if (restart_instruction)
vm_restart_instruction(vm, vcpuid);
vcpu->exception_pending = 1;
vcpu->exception = *exception;
VCPU_CTR1(vm, vcpuid, "Exception %d pending", exception->vector);
vcpu->exc_vector = vector;
vcpu->exc_errcode = errcode;
vcpu->exc_errcode_valid = errcode_valid;
VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector);
return (0);
}
@ -1768,28 +1833,15 @@ void
vm_inject_fault(void *vmarg, int vcpuid, int vector, int errcode_valid,
int errcode)
{
struct vm_exception exception;
struct vm_exit *vmexit;
struct vm *vm;
int error;
int error, restart_instruction;
vm = vmarg;
restart_instruction = 1;
exception.vector = vector;
exception.error_code = errcode;
exception.error_code_valid = errcode_valid;
error = vm_inject_exception(vm, vcpuid, &exception);
error = vm_inject_exception(vm, vcpuid, vector, errcode_valid,
errcode, restart_instruction);
KASSERT(error == 0, ("vm_inject_exception error %d", error));
/*
* A fault-like exception allows the instruction to be restarted
* after the exception handler returns.
*
* By setting the inst_length to 0 we ensure that the instruction
* pointer remains at the faulting instruction.
*/
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->inst_length = 0;
}
void
@ -2223,6 +2275,13 @@ vm_pmtmr(struct vm *vm)
return (vm->vpmtmr);
}
struct vrtc *
vm_rtc(struct vm *vm)
{
return (vm->vrtc);
}
enum vm_reg_name
vm_segment_name(int seg)
{

@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include "io/vatpic.h"
#include "io/vioapic.h"
#include "io/vhpet.h"
#include "io/vrtc.h"
struct vmmdev_softc {
struct vm *vm; /* vm instance cookie */
@ -174,6 +175,8 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct vm_activate_cpu *vac;
struct vm_cpuset *vm_cpuset;
struct vm_intinfo *vmii;
struct vm_rtc_time *rtctime;
struct vm_rtc_data *rtcdata;
sc = vmmdev_lookup2(cdev);
if (sc == NULL)
@ -202,6 +205,7 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
case VM_ACTIVATE_CPU:
case VM_SET_INTINFO:
case VM_GET_INTINFO:
case VM_RESTART_INSTRUCTION:
/*
* XXX fragile, handle with care
* Assumes that the first field of the ioctl data is the vcpu.
@ -307,7 +311,9 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
break;
case VM_INJECT_EXCEPTION:
vmexc = (struct vm_exception *)data;
error = vm_inject_exception(sc->vm, vmexc->cpuid, vmexc);
error = vm_inject_exception(sc->vm, vmexc->cpuid,
vmexc->vector, vmexc->error_code_valid, vmexc->error_code,
vmexc->restart_instruction);
break;
case VM_INJECT_NMI:
vmnmi = (struct vm_nmi *)data;
@ -482,6 +488,28 @@ vmmdev_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = vm_get_intinfo(sc->vm, vmii->vcpuid, &vmii->info1,
&vmii->info2);
break;
case VM_RTC_WRITE:
rtcdata = (struct vm_rtc_data *)data;
error = vrtc_nvram_write(sc->vm, rtcdata->offset,
rtcdata->value);
break;
case VM_RTC_READ:
rtcdata = (struct vm_rtc_data *)data;
error = vrtc_nvram_read(sc->vm, rtcdata->offset,
&rtcdata->value);
break;
case VM_RTC_SETTIME:
rtctime = (struct vm_rtc_time *)data;
error = vrtc_set_time(sc->vm, rtctime->secs);
break;
case VM_RTC_GETTIME:
error = 0;
rtctime = (struct vm_rtc_time *)data;
rtctime->secs = vrtc_get_time(sc->vm);
break;
case VM_RESTART_INSTRUCTION:
error = vm_restart_instruction(sc->vm, vcpu);
break;
default:
error = ENOTTY;
break;

@ -70,6 +70,7 @@ enum {
VIE_OP_TYPE_PUSH,
VIE_OP_TYPE_CMP,
VIE_OP_TYPE_POP,
VIE_OP_TYPE_MOVS,
VIE_OP_TYPE_LAST
};
@ -78,6 +79,7 @@ enum {
#define VIE_OP_F_IMM8 (1 << 1) /* 8-bit immediate operand */
#define VIE_OP_F_MOFFSET (1 << 2) /* 16/32/64-bit immediate moffset */
#define VIE_OP_F_NO_MODRM (1 << 3)
#define VIE_OP_F_NO_GLA_VERIFICATION (1 << 4)
static const struct vie_op two_byte_opcodes[256] = {
[0xB6] = {
@ -133,6 +135,16 @@ static const struct vie_op one_byte_opcodes[256] = {
.op_type = VIE_OP_TYPE_MOV,
.op_flags = VIE_OP_F_MOFFSET | VIE_OP_F_NO_MODRM,
},
[0xA4] = {
.op_byte = 0xA4,
.op_type = VIE_OP_TYPE_MOVS,
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
},
[0xA5] = {
.op_byte = 0xA5,
.op_type = VIE_OP_TYPE_MOVS,
.op_flags = VIE_OP_F_NO_MODRM | VIE_OP_F_NO_GLA_VERIFICATION
},
[0xC6] = {
/* XXX Group 11 extended opcode - not just MOV */
.op_byte = 0xC6,
@ -559,6 +571,217 @@ emulate_movx(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
return (error);
}
/*
* Helper function to calculate and validate a linear address.
*
* Returns 0 on success and 1 if an exception was injected into the guest.
*/
static int
get_gla(void *vm, int vcpuid, struct vie *vie, struct vm_guest_paging *paging,
int opsize, int addrsize, int prot, enum vm_reg_name seg,
enum vm_reg_name gpr, uint64_t *gla)
{
struct seg_desc desc;
uint64_t cr0, val, rflags;
int error;
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_CR0, &cr0);
KASSERT(error == 0, ("%s: error %d getting cr0", __func__, error));
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
error = vm_get_seg_desc(vm, vcpuid, seg, &desc);
KASSERT(error == 0, ("%s: error %d getting segment descriptor %d",
__func__, error, seg));
error = vie_read_register(vm, vcpuid, gpr, &val);
KASSERT(error == 0, ("%s: error %d getting register %d", __func__,
error, gpr));
if (vie_calculate_gla(paging->cpu_mode, seg, &desc, val, opsize,
addrsize, prot, gla)) {
if (seg == VM_REG_GUEST_SS)
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
return (1);
}
if (vie_canonical_check(paging->cpu_mode, *gla)) {
if (seg == VM_REG_GUEST_SS)
vm_inject_ss(vm, vcpuid, 0);
else
vm_inject_gp(vm, vcpuid);
return (1);
}
if (vie_alignment_check(paging->cpl, opsize, cr0, rflags, *gla)) {
vm_inject_ac(vm, vcpuid, 0);
return (1);
}
return (0);
}
static int
emulate_movs(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
struct vm_guest_paging *paging, mem_region_read_t memread,
mem_region_write_t memwrite, void *arg)
{
#ifdef _KERNEL
struct vm_copyinfo copyinfo[2];
#else
struct iovec copyinfo[2];
#endif
uint64_t dstaddr, srcaddr, val;
uint64_t rcx, rdi, rsi, rflags;
int error, opsize, seg, repeat;
opsize = (vie->op.op_byte == 0xA4) ? 1 : vie->opsize;
val = 0;
error = 0;
/*
* XXX although the MOVS instruction is only supposed to be used with
* the "rep" prefix some guests like FreeBSD will use "repnz" instead.
*
* Empirically the "repnz" prefix has identical behavior to "rep"
* and the zero flag does not make a difference.
*/
repeat = vie->repz_present | vie->repnz_present;
if (repeat) {
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RCX, &rcx);
KASSERT(!error, ("%s: error %d getting rcx", __func__, error));
/*
* The count register is %rcx, %ecx or %cx depending on the
* address size of the instruction.
*/
if ((rcx & vie_size2mask(vie->addrsize)) == 0)
return (0);
}
/*
* Source Destination Comments
* --------------------------------------------
* (1) memory memory n/a
* (2) memory mmio emulated
* (3) mmio memory emulated
* (4) mmio mmio not emulated
*
* At this point we don't have sufficient information to distinguish
* between (2), (3) and (4). We use 'vm_copy_setup()' to tease this
* out because it will succeed only when operating on regular memory.
*
* XXX the emulation doesn't properly handle the case where 'gpa'
* is straddling the boundary between the normal memory and MMIO.
*/
seg = vie->segment_override ? vie->segment_register : VM_REG_GUEST_DS;
error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
PROT_READ, seg, VM_REG_GUEST_RSI, &srcaddr);
if (error)
goto done;
error = vm_copy_setup(vm, vcpuid, paging, srcaddr, opsize, PROT_READ,
copyinfo, nitems(copyinfo));
if (error == 0) {
/*
* case (2): read from system memory and write to mmio.
*/
vm_copyin(vm, vcpuid, copyinfo, &val, opsize);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
error = memwrite(vm, vcpuid, gpa, val, opsize, arg);
goto done;
} else if (error > 0) {
/*
* Resume guest execution to handle fault.
*/
goto done;
} else {
/*
* 'vm_copy_setup()' is expected to fail for cases (3) and (4)
* if 'srcaddr' is in the mmio space.
*/
}
error = get_gla(vm, vcpuid, vie, paging, opsize, vie->addrsize,
PROT_WRITE, VM_REG_GUEST_ES, VM_REG_GUEST_RDI, &dstaddr);
if (error)
goto done;
error = vm_copy_setup(vm, vcpuid, paging, dstaddr, opsize,
PROT_WRITE, copyinfo, nitems(copyinfo));
if (error == 0) {
/*
* case (3): read from MMIO and write to system memory.
*
* A MMIO read can have side-effects so we commit to it
* only after vm_copy_setup() is successful. If a page-fault
* needs to be injected into the guest then it will happen
* before the MMIO read is attempted.
*/
error = memread(vm, vcpuid, gpa, &val, opsize, arg);
if (error)
goto done;
vm_copyout(vm, vcpuid, &val, copyinfo, opsize);
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
} else if (error > 0) {
/*
* Resume guest execution to handle fault.
*/
goto done;
} else {
goto done;
}
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RSI, &rsi);
KASSERT(error == 0, ("%s: error %d getting rsi", __func__, error));
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RDI, &rdi);
KASSERT(error == 0, ("%s: error %d getting rdi", __func__, error));
error = vie_read_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, &rflags);
KASSERT(error == 0, ("%s: error %d getting rflags", __func__, error));
if (rflags & PSL_D) {
rsi -= opsize;
rdi -= opsize;
} else {
rsi += opsize;
rdi += opsize;
}
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSI, rsi,
vie->addrsize);
KASSERT(error == 0, ("%s: error %d updating rsi", __func__, error));
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RDI, rdi,
vie->addrsize);
KASSERT(error == 0, ("%s: error %d updating rdi", __func__, error));
if (repeat) {
rcx = rcx - 1;
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RCX,
rcx, vie->addrsize);
KASSERT(!error, ("%s: error %d updating rcx", __func__, error));
/*
* Repeat the instruction if the count register is not zero.
*/
if ((rcx & vie_size2mask(vie->addrsize)) != 0)
vm_restart_instruction(vm, vcpuid);
}
done:
if (error < 0)
return (EFAULT);
else
return (0);
}
static int
emulate_and(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
mem_region_read_t memread, mem_region_write_t memwrite, void *arg)
@ -926,9 +1149,7 @@ emulate_stack_op(void *vm, int vcpuid, uint64_t mmio_gpa, struct vie *vie,
error = memwrite(vm, vcpuid, mmio_gpa, val, size, arg);
rsp += size;
}
#ifdef _KERNEL
vm_copy_teardown(vm, vcpuid, copyinfo, nitems(copyinfo));
#endif
if (error == 0) {
error = vie_update_register(vm, vcpuid, VM_REG_GUEST_RSP, rsp,
@ -1012,6 +1233,10 @@ vmm_emulate_instruction(void *vm, int vcpuid, uint64_t gpa, struct vie *vie,
error = emulate_movx(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
break;
case VIE_OP_TYPE_MOVS:
error = emulate_movs(vm, vcpuid, gpa, vie, paging, memread,
memwrite, memarg);
break;
case VIE_OP_TYPE_AND:
error = emulate_and(vm, vcpuid, gpa, vie,
memread, memwrite, memarg);
@ -1193,6 +1418,7 @@ vie_init(struct vie *vie, const char *inst_bytes, int inst_length)
vie->base_register = VM_REG_LAST;
vie->index_register = VM_REG_LAST;
vie->segment_register = VM_REG_LAST;
if (inst_length) {
bcopy(inst_bytes, vie->inst, inst_length);
@ -1458,6 +1684,35 @@ vie_advance(struct vie *vie)
vie->num_processed++;
}
static bool
segment_override(uint8_t x, int *seg)
{
switch (x) {
case 0x2E:
*seg = VM_REG_GUEST_CS;
break;
case 0x36:
*seg = VM_REG_GUEST_SS;
break;
case 0x3E:
*seg = VM_REG_GUEST_DS;
break;
case 0x26:
*seg = VM_REG_GUEST_ES;
break;
case 0x64:
*seg = VM_REG_GUEST_FS;
break;
case 0x65:
*seg = VM_REG_GUEST_GS;
break;
default:
return (false);
}
return (true);
}
static int
decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
{
@ -1471,6 +1726,12 @@ decode_prefixes(struct vie *vie, enum vm_cpu_mode cpu_mode, int cs_d)
vie->opsize_override = 1;
else if (x == 0x67)
vie->addrsize_override = 1;
else if (x == 0xF3)
vie->repz_present = 1;
else if (x == 0xF2)
vie->repnz_present = 1;
else if (segment_override(x, &vie->segment_register))
vie->segment_override = 1;
else
break;
@ -1923,8 +2184,10 @@ vmm_decode_instruction(struct vm *vm, int cpuid, uint64_t gla,
if (verify_inst_length(vie))
return (-1);
if (verify_gla(vm, cpuid, gla, vie))
return (-1);
if ((vie->op.op_flags & VIE_OP_F_NO_GLA_VERIFICATION) == 0) {
if (verify_gla(vm, cpuid, gla, vie))
return (-1);
}
vie->decoded = 1; /* success */

@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
#include "vatpic.h"
#include "vatpit.h"
#include "vpmtmr.h"
#include "vrtc.h"
#include "vmm_ioport.h"
#include "vmm_ktr.h"
@ -60,6 +61,8 @@ ioport_handler_func_t ioport_handler[MAX_IOPORTS] = {
[IO_ELCR1] = vatpic_elc_handler,
[IO_ELCR2] = vatpic_elc_handler,
[IO_PMTMR] = vpmtmr_handler,
[IO_RTC] = vrtc_addr_handler,
[IO_RTC + 1] = vrtc_data_handler,
};
#ifdef KTR
@ -71,7 +74,7 @@ inout_instruction(struct vm_exit *vmexit)
static const char *iodesc[] = {
"outb", "outw", "outl",
"inb", "inw", "inl",
"outsb", "outsw", "outsd"
"outsb", "outsw", "outsd",
"insb", "insw", "insd",
};

@ -35,7 +35,8 @@ SRCS+= iommu.c \
vhpet.c \
vioapic.c \
vlapic.c \
vpmtmr.c
vpmtmr.c \
vrtc.c
# intel-specific files
.PATH: ${.CURDIR}/../../amd64/vmm/intel

@ -32,7 +32,7 @@
.Nd "run a guest operating system inside a virtual machine"
.Sh SYNOPSIS
.Nm
.Op Fl abehwxACHPWY
.Op Fl abehuwxACHPWY
.Op Fl c Ar numcpus
.Op Fl g Ar gdbport
.Op Fl l Ar lpcdev Ns Op , Ns Ar conf
@ -239,6 +239,8 @@ The host device must have been reserved at boot-time using the
loader variable as described in
.Xr vmm 4 .
.El
.It Fl u
RTC keeps UTC time.
.It Fl U Ar uuid
Set the universally unique identifier
.Pq UUID

@ -122,7 +122,7 @@ usage(int code)
{
fprintf(stderr,
"Usage: %s [-abehwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
"Usage: %s [-abehuwxACHPWY] [-c vcpus] [-g <gdb port>] [-l <lpc>]\n"
" %*s [-m mem] [-p vcpu:hostcpu] [-s <pci>] [-U uuid] <vm>\n"
" -a: local apic is in xAPIC mode (deprecated)\n"
" -A: create ACPI tables\n"
@ -137,6 +137,7 @@ usage(int code)
" -p: pin 'vcpu' to 'hostcpu'\n"
" -P: vmexit from the guest on pause\n"
" -s: <slot,driver,configinfo> PCI slot config\n"
" -u: RTC keeps UTC time\n"
" -U: uuid\n"
" -w: ignore unimplemented MSRs\n"
" -W: force virtio to use single-vector MSI\n"
@ -185,20 +186,14 @@ vm_inject_fault(void *arg, int vcpu, int vector, int errcode_valid,
int errcode)
{
struct vmctx *ctx;
int error;
int error, restart_instruction;
ctx = arg;
if (errcode_valid)
error = vm_inject_exception2(ctx, vcpu, vector, errcode);
else
error = vm_inject_exception(ctx, vcpu, vector);
assert(error == 0);
restart_instruction = 1;
/*
* Set the instruction length to 0 to ensure that the instruction is
* restarted when the fault handler returns.
*/
vmexit[vcpu].inst_length = 0;
error = vm_inject_exception(ctx, vcpu, vector, errcode_valid, errcode,
restart_instruction);
assert(error == 0);
}
void *
@ -329,12 +324,6 @@ vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
}
error = emulate_inout(ctx, vcpu, vme, strictio);
if (!error && in && !string) {
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
vme->u.inout.eax);
assert(error == 0);
}
if (error) {
fprintf(stderr, "Unhandled %s%c 0x%04x\n", in ? "in" : "out",
bytes == 1 ? 'b' : (bytes == 2 ? 'w' : 'l'), port);
@ -358,7 +347,7 @@ vmexit_rdmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
return (VMEXIT_RESTART);
return (VMEXIT_CONTINUE);
}
}
@ -384,7 +373,7 @@ vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
vme->u.msr.code, vme->u.msr.wval, *pvcpu);
if (strictmsr) {
vm_inject_gp(ctx, *pvcpu);
return (VMEXIT_RESTART);
return (VMEXIT_CONTINUE);
}
}
return (VMEXIT_CONTINUE);
@ -462,9 +451,11 @@ static int
vmexit_bogus(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
assert(vmexit->inst_length == 0);
stats.vmexit_bogus++;
return (VMEXIT_RESTART);
return (VMEXIT_CONTINUE);
}
static int
@ -494,9 +485,11 @@ static int
vmexit_mtrap(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
{
assert(vmexit->inst_length == 0);
stats.vmexit_mtrap++;
return (VMEXIT_RESTART);
return (VMEXIT_CONTINUE);
}
static int
@ -581,7 +574,7 @@ static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
};
static void
vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
{
int error, rc, prevcpu;
enum vm_exitcode exitcode;
@ -596,8 +589,11 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
error = vm_active_cpus(ctx, &active_cpus);
assert(CPU_ISSET(vcpu, &active_cpus));
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, startrip);
assert(error == 0);
while (1) {
error = vm_run(ctx, vcpu, rip, &vmexit[vcpu]);
error = vm_run(ctx, vcpu, &vmexit[vcpu]);
if (error != 0)
break;
@ -614,10 +610,6 @@ vm_loop(struct vmctx *ctx, int vcpu, uint64_t rip)
switch (rc) {
case VMEXIT_CONTINUE:
rip = vmexit[vcpu].rip + vmexit[vcpu].inst_length;
break;
case VMEXIT_RESTART:
rip = vmexit[vcpu].rip;
break;
case VMEXIT_ABORT:
abort();
@ -694,6 +686,7 @@ main(int argc, char *argv[])
{
int c, error, gdb_port, err, bvmcons;
int dump_guest_memory, max_vcpus, mptgen;
int rtc_localtime;
struct vmctx *ctx;
uint64_t rip;
size_t memsize;
@ -705,8 +698,9 @@ main(int argc, char *argv[])
guest_ncpus = 1;
memsize = 256 * MB;
mptgen = 1;
rtc_localtime = 1;
while ((c = getopt(argc, argv, "abehwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
while ((c = getopt(argc, argv, "abehuwxACHIPWYp:g:c:s:m:l:U:")) != -1) {
switch (c) {
case 'a':
x2apic_mode = 0;
@ -766,6 +760,9 @@ main(int argc, char *argv[])
case 'e':
strictio = 1;
break;
case 'u':
rtc_localtime = 0;
break;
case 'U':
guest_uuid_str = optarg;
break;
@ -829,7 +826,7 @@ main(int argc, char *argv[])
pci_irq_init(ctx);
ioapic_init(ctx);
rtc_init(ctx);
rtc_init(ctx, rtc_localtime);
sci_init(ctx);
/*

@ -35,9 +35,8 @@
#define __CTASSERT(x, y) typedef char __assert ## y[(x) ? 1 : -1]
#endif
#define VMEXIT_CONTINUE 1 /* continue from next instruction */
#define VMEXIT_RESTART 2 /* restart current instruction */
#define VMEXIT_ABORT 3 /* abort the vm run loop */
#define VMEXIT_CONTINUE (0)
#define VMEXIT_ABORT (-1)
struct vmctx;
extern int guest_ncpus;

@ -104,7 +104,7 @@ int
emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
{
int addrsize, bytes, flags, in, port, prot, rep;
uint32_t val;
uint32_t eax, val;
inout_func_t handler;
void *arg;
int error, retval;
@ -214,16 +214,20 @@ emulate_inout(struct vmctx *ctx, int vcpu, struct vm_exit *vmexit, int strict)
}
/* Restart the instruction if more iterations remain */
if (retval == 0 && count != 0)
vmexit->inst_length = 0;
} else {
if (!in) {
val = vmexit->u.inout.eax & vie_size2mask(bytes);
if (retval == 0 && count != 0) {
error = vm_restart_instruction(ctx, vcpu);
assert(error == 0);
}
} else {
eax = vmexit->u.inout.eax;
val = eax & vie_size2mask(bytes);
retval = handler(ctx, vcpu, in, port, bytes, &val, arg);
if (retval == 0 && in) {
vmexit->u.inout.eax &= ~vie_size2mask(bytes);
vmexit->u.inout.eax |= val & vie_size2mask(bytes);
eax &= ~vie_size2mask(bytes);
eax |= val & vie_size2mask(bytes);
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RAX,
eax);
assert(error == 0);
}
}
return (retval);

@ -2299,7 +2299,8 @@ pci_ahci_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts, int atapi)
open_fail:
if (ret) {
blockif_close(sc->port[0].bctx);
if (sc->port[0].bctx != NULL)
blockif_close(sc->port[0].bctx);
free(sc);
}

@ -30,10 +30,7 @@
__FBSDID("$FreeBSD$");
#include <sys/types.h>
#include <sys/time.h>
#include <stdio.h>
#include <string.h>
#include <time.h>
#include <assert.h>
@ -41,47 +38,11 @@ __FBSDID("$FreeBSD$");
#include <vmmapi.h>
#include "acpi.h"
#include "inout.h"
#include "pci_lpc.h"
#include "rtc.h"
#define IO_RTC 0x70
#define IO_RTC 0x70
#define RTC_SEC 0x00 /* seconds */
#define RTC_SEC_ALARM 0x01
#define RTC_MIN 0x02
#define RTC_MIN_ALARM 0x03
#define RTC_HRS 0x04
#define RTC_HRS_ALARM 0x05
#define RTC_WDAY 0x06
#define RTC_DAY 0x07
#define RTC_MONTH 0x08
#define RTC_YEAR 0x09
#define RTC_CENTURY 0x32 /* current century */
#define RTC_STATUSA 0xA
#define RTCSA_TUP 0x80 /* time update, don't look now */
#define RTC_STATUSB 0xB
#define RTCSB_DST 0x01
#define RTCSB_24HR 0x02
#define RTCSB_BIN 0x04 /* 0 = BCD, 1 = Binary */
#define RTCSB_PINTR 0x40 /* 1 = enable periodic clock interrupt */
#define RTCSB_HALT 0x80 /* stop clock updates */
#define RTC_INTR 0x0c /* status register C (R) interrupt source */
#define RTC_STATUSD 0x0d /* status register D (R) Lost Power */
#define RTCSD_PWR 0x80 /* clock power OK */
#define RTC_NVRAM_START 0x0e
#define RTC_NVRAM_END 0x7f
#define RTC_NVRAM_SZ (128 - RTC_NVRAM_START)
#define nvoff(x) ((x) - RTC_NVRAM_START)
#define RTC_DIAG 0x0e
#define RTC_RSTCODE 0x0f
#define RTC_EQUIPMENT 0x14
#define RTC_LMEM_LSB 0x34
#define RTC_LMEM_MSB 0x35
#define RTC_HMEM_LSB 0x5b
@ -92,249 +53,30 @@ __FBSDID("$FreeBSD$");
#define m_16MB (16*1024*1024)
#define m_4GB (4ULL*1024*1024*1024)
static int addr;
static uint8_t rtc_nvram[RTC_NVRAM_SZ];
/* XXX initialize these to default values as they would be from BIOS */
static uint8_t status_a, status_b;
static struct {
uint8_t hours;
uint8_t mins;
uint8_t secs;
} rtc_alarm;
static u_char const bin2bcd_data[] = {
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09,
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19,
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28, 0x29,
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39,
0x40, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48, 0x49,
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68, 0x69,
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
0x80, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88, 0x89,
0x90, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96, 0x97, 0x98, 0x99
};
#define bin2bcd(bin) (bin2bcd_data[bin])
#define rtcout(val) ((status_b & RTCSB_BIN) ? (val) : bin2bcd((val)))
static void
timevalfix(struct timeval *t1)
/*
* Returns the current RTC time as number of seconds since 00:00:00 Jan 1, 1970
*/
static time_t
rtc_time(struct vmctx *ctx, int use_localtime)
{
if (t1->tv_usec < 0) {
t1->tv_sec--;
t1->tv_usec += 1000000;
}
if (t1->tv_usec >= 1000000) {
t1->tv_sec++;
t1->tv_usec -= 1000000;
}
}
static void
timevalsub(struct timeval *t1, const struct timeval *t2)
{
t1->tv_sec -= t2->tv_sec;
t1->tv_usec -= t2->tv_usec;
timevalfix(t1);
}
static int
rtc_addr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
{
if (bytes != 1)
return (-1);
if (in) {
/* straight read of this register will return 0xFF */
*eax = 0xff;
return (0);
}
switch (*eax & 0x7f) {
case RTC_SEC:
case RTC_SEC_ALARM:
case RTC_MIN:
case RTC_MIN_ALARM:
case RTC_HRS:
case RTC_HRS_ALARM:
case RTC_WDAY:
case RTC_DAY:
case RTC_MONTH:
case RTC_YEAR:
case RTC_STATUSA:
case RTC_STATUSB:
case RTC_INTR:
case RTC_STATUSD:
case RTC_NVRAM_START ... RTC_NVRAM_END:
break;
default:
return (-1);
}
addr = *eax & 0x7f;
return (0);
}
static int
rtc_data_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
uint32_t *eax, void *arg)
{
int hour;
struct tm tm;
time_t t;
struct timeval cur, delta;
static struct timeval last;
static struct tm tm;
if (bytes != 1)
return (-1);
gettimeofday(&cur, NULL);
/*
* Increment the cached time only once per second so we can guarantee
* that the guest has at least one second to read the hour:min:sec
* separately and still get a coherent view of the time.
*/
delta = cur;
timevalsub(&delta, &last);
if (delta.tv_sec >= 1 && (status_b & RTCSB_HALT) == 0) {
t = cur.tv_sec;
time(&t);
if (use_localtime) {
localtime_r(&t, &tm);
last = cur;
t = timegm(&tm);
}
if (in) {
switch (addr) {
case RTC_SEC_ALARM:
*eax = rtc_alarm.secs;
break;
case RTC_MIN_ALARM:
*eax = rtc_alarm.mins;
break;
case RTC_HRS_ALARM:
*eax = rtc_alarm.hours;
break;
case RTC_SEC:
*eax = rtcout(tm.tm_sec);
return (0);
case RTC_MIN:
*eax = rtcout(tm.tm_min);
return (0);
case RTC_HRS:
if (status_b & RTCSB_24HR)
hour = tm.tm_hour;
else
hour = (tm.tm_hour % 12) + 1;
*eax = rtcout(hour);
/*
* If we are representing time in the 12-hour format
* then set the MSB to indicate PM.
*/
if ((status_b & RTCSB_24HR) == 0 && tm.tm_hour >= 12)
*eax |= 0x80;
return (0);
case RTC_WDAY:
*eax = rtcout(tm.tm_wday + 1);
return (0);
case RTC_DAY:
*eax = rtcout(tm.tm_mday);
return (0);
case RTC_MONTH:
*eax = rtcout(tm.tm_mon + 1);
return (0);
case RTC_YEAR:
*eax = rtcout(tm.tm_year % 100);
return (0);
case RTC_STATUSA:
*eax = status_a;
return (0);
case RTC_STATUSB:
*eax = status_b;
return (0);
case RTC_INTR:
*eax = 0;
return (0);
case RTC_STATUSD:
*eax = RTCSD_PWR;
return (0);
case RTC_NVRAM_START ... RTC_NVRAM_END:
*eax = rtc_nvram[addr - RTC_NVRAM_START];
return (0);
default:
return (-1);
}
}
switch (addr) {
case RTC_STATUSA:
status_a = *eax & ~RTCSA_TUP;
break;
case RTC_STATUSB:
/* XXX not implemented yet XXX */
if (*eax & RTCSB_PINTR)
return (-1);
status_b = *eax;
break;
case RTC_STATUSD:
/* ignore write */
break;
case RTC_SEC_ALARM:
rtc_alarm.secs = *eax;
break;
case RTC_MIN_ALARM:
rtc_alarm.mins = *eax;
break;
case RTC_HRS_ALARM:
rtc_alarm.hours = *eax;
break;
case RTC_SEC:
case RTC_MIN:
case RTC_HRS:
case RTC_WDAY:
case RTC_DAY:
case RTC_MONTH:
case RTC_YEAR:
/*
* Ignore writes to the time of day registers
*/
break;
case RTC_NVRAM_START ... RTC_NVRAM_END:
rtc_nvram[addr - RTC_NVRAM_START] = *eax;
break;
default:
return (-1);
}
return (0);
return (t);
}
void
rtc_init(struct vmctx *ctx)
rtc_init(struct vmctx *ctx, int use_localtime)
{
struct timeval cur;
struct tm tm;
size_t himem;
size_t lomem;
int err;
err = gettimeofday(&cur, NULL);
assert(err == 0);
(void) localtime_r(&cur.tv_sec, &tm);
memset(rtc_nvram, 0, sizeof(rtc_nvram));
rtc_nvram[nvoff(RTC_CENTURY)] = bin2bcd((tm.tm_year + 1900) / 100);
/* XXX init diag/reset code/equipment/checksum ? */
/*
@ -344,17 +86,22 @@ rtc_init(struct vmctx *ctx)
* 0x5b/0x5c/0x5d - 64KB chunks above 4GB
*/
lomem = (vm_get_lowmem_size(ctx) - m_16MB) / m_64KB;
rtc_nvram[nvoff(RTC_LMEM_LSB)] = lomem;
rtc_nvram[nvoff(RTC_LMEM_MSB)] = lomem >> 8;
err = vm_rtc_write(ctx, RTC_LMEM_LSB, lomem);
assert(err == 0);
err = vm_rtc_write(ctx, RTC_LMEM_MSB, lomem >> 8);
assert(err == 0);
himem = vm_get_highmem_size(ctx) / m_64KB;
rtc_nvram[nvoff(RTC_HMEM_LSB)] = himem;
rtc_nvram[nvoff(RTC_HMEM_SB)] = himem >> 8;
rtc_nvram[nvoff(RTC_HMEM_MSB)] = himem >> 16;
}
err = vm_rtc_write(ctx, RTC_HMEM_LSB, himem);
assert(err == 0);
err = vm_rtc_write(ctx, RTC_HMEM_SB, himem >> 8);
assert(err == 0);
err = vm_rtc_write(ctx, RTC_HMEM_MSB, himem >> 16);
assert(err == 0);
INOUT_PORT(rtc, IO_RTC, IOPORT_F_INOUT, rtc_addr_handler);
INOUT_PORT(rtc, IO_RTC + 1, IOPORT_F_INOUT, rtc_data_handler);
err = vm_rtc_settime(ctx, rtc_time(ctx, use_localtime));
assert(err == 0);
}
static void
rtc_dsdt(void)

@ -29,6 +29,6 @@
#ifndef _RTC_H_
#define _RTC_H_
void rtc_init(struct vmctx *ctx);
void rtc_init(struct vmctx *ctx, int use_localtime);
#endif /* _RTC_H_ */

@ -725,20 +725,10 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
assert(paging->cpu_mode == CPU_MODE_PROTECTED);
/*
* Calculate the %eip to store in the old TSS before modifying the
* 'inst_length'.
* Calculate the instruction pointer to store in the old TSS.
*/
eip = vmexit->rip + vmexit->inst_length;
/*
* Set the 'inst_length' to '0'.
*
* If an exception is triggered during emulation of the task switch
* then the exception handler should return to the instruction that
* caused the task switch as opposed to the subsequent instruction.
*/
vmexit->inst_length = 0;
/*
* Section 4.6, "Access Rights" in Intel SDM Vol 3.
* The following page table accesses are implicitly supervisor mode:
@ -883,8 +873,8 @@ vmexit_task_switch(struct vmctx *ctx, struct vm_exit *vmexit, int *pvcpu)
* after this point will be handled in the context of the new task and
* the saved instruction pointer will belong to the new task.
*/
vmexit->rip = newtss.tss_eip;
assert(vmexit->inst_length == 0);
error = vm_set_register(ctx, vcpu, VM_REG_GUEST_RIP, newtss.tss_eip);
assert(error == 0);
/* Load processor state from new TSS */
error = tss32_restore(ctx, vcpu, task_switch, ot_sel, &newtss, nt_iov);

@ -185,6 +185,15 @@ emulate_rdmsr(struct vmctx *ctx, int vcpu, uint32_t num, uint64_t *val)
*val = 0;
break;
/*
* OpenBSD guests test bit 0 of this MSR to detect if the
* workaround for erratum 721 is already applied.
* http://support.amd.com/TechDocs/41322_10h_Rev_Gd.pdf
*/
case 0xC0011029:
*val = 1;
break;
default:
error = -1;
break;

@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <fcntl.h>
#include <string.h>
#include <getopt.h>
#include <time.h>
#include <assert.h>
#include <machine/cpufunc.h>
@ -157,6 +158,11 @@ usage(bool cpu_intel)
" [--inject-nmi]\n"
" [--force-reset]\n"
" [--force-poweroff]\n"
" [--get-rtc-time]\n"
" [--set-rtc-time=<secs>]\n"
" [--get-rtc-nvram]\n"
" [--set-rtc-nvram=<val>]\n"
" [--rtc-nvram-offset=<offset>]\n"
" [--get-active-cpus]\n"
" [--get-suspended-cpus]\n"
" [--get-intinfo]\n"
@ -220,6 +226,12 @@ usage(bool cpu_intel)
exit(1);
}
static int get_rtc_time, set_rtc_time;
static int get_rtc_nvram, set_rtc_nvram;
static int rtc_nvram_offset;
static uint8_t rtc_nvram_value;
static time_t rtc_secs;
static int get_stats, getcap, setcap, capval, get_gpa_pmap;
static int inject_nmi, assert_lapic_lvt;
static int force_reset, force_poweroff;
@ -545,6 +557,9 @@ enum {
UNASSIGN_PPTDEV,
GET_GPA_PMAP,
ASSERT_LAPIC_LVT,
SET_RTC_TIME,
SET_RTC_NVRAM,
RTC_NVRAM_OFFSET,
};
static void
@ -1269,6 +1284,11 @@ setup_options(bool cpu_intel)
{ "setcap", REQ_ARG, 0, SET_CAP },
{ "get-gpa-pmap", REQ_ARG, 0, GET_GPA_PMAP },
{ "assert-lapic-lvt", REQ_ARG, 0, ASSERT_LAPIC_LVT },
{ "get-rtc-time", NO_ARG, &get_rtc_time, 1 },
{ "set-rtc-time", REQ_ARG, 0, SET_RTC_TIME },
{ "rtc-nvram-offset", REQ_ARG, 0, RTC_NVRAM_OFFSET },
{ "get-rtc-nvram", NO_ARG, &get_rtc_nvram, 1 },
{ "set-rtc-nvram", REQ_ARG, 0, SET_RTC_NVRAM },
{ "getcap", NO_ARG, &getcap, 1 },
{ "get-stats", NO_ARG, &get_stats, 1 },
{ "get-desc-ds",NO_ARG, &get_desc_ds, 1 },
@ -1462,6 +1482,33 @@ setup_options(bool cpu_intel)
return (all_opts);
}
static const char *
wday_str(int idx)
{
static const char *weekdays[] = {
"Sun", "Mon", "Tue", "Wed", "Thu", "Fri", "Sat"
};
if (idx >= 0 && idx < 7)
return (weekdays[idx]);
else
return ("UNK");
}
static const char *
mon_str(int idx)
{
static const char *months[] = {
"Jan", "Feb", "Mar", "Apr", "May", "Jun",
"Jul", "Aug", "Sep", "Oct", "Nov", "Dec"
};
if (idx >= 0 && idx < 12)
return (months[idx]);
else
return ("UNK");
}
int
main(int argc, char *argv[])
{
@ -1477,6 +1524,7 @@ main(int argc, char *argv[])
cpuset_t cpus;
bool cpu_intel;
uint64_t cs, ds, es, fs, gs, ss, tr, ldtr;
struct tm tm;
struct option *opts;
cpu_intel = cpu_vendor_intel();
@ -1594,6 +1642,17 @@ main(int argc, char *argv[])
capval = strtoul(optarg, NULL, 0);
setcap = 1;
break;
case SET_RTC_TIME:
rtc_secs = strtoul(optarg, NULL, 0);
set_rtc_time = 1;
break;
case SET_RTC_NVRAM:
rtc_nvram_value = (uint8_t)strtoul(optarg, NULL, 0);
set_rtc_nvram = 1;
break;
case RTC_NVRAM_OFFSET:
rtc_nvram_offset = strtoul(optarg, NULL, 0);
break;
case GET_GPA_PMAP:
gpa_pmap = strtoul(optarg, NULL, 0);
get_gpa_pmap = 1;
@ -1971,6 +2030,31 @@ main(int argc, char *argv[])
}
}
if (!error && set_rtc_nvram)
error = vm_rtc_write(ctx, rtc_nvram_offset, rtc_nvram_value);
if (!error && (get_rtc_nvram || get_all)) {
error = vm_rtc_read(ctx, rtc_nvram_offset, &rtc_nvram_value);
if (error == 0) {
printf("rtc nvram[%03d]: 0x%02x\n", rtc_nvram_offset,
rtc_nvram_value);
}
}
if (!error && set_rtc_time)
error = vm_rtc_settime(ctx, rtc_secs);
if (!error && (get_rtc_time || get_all)) {
error = vm_rtc_gettime(ctx, &rtc_secs);
if (error == 0) {
gmtime_r(&rtc_secs, &tm);
printf("rtc time %#lx: %s %s %02d %02d:%02d:%02d %d\n",
rtc_secs, wday_str(tm.tm_wday), mon_str(tm.tm_mon),
tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec,
1900 + tm.tm_year);
}
}
if (!error && (getcap || get_all)) {
int captype, val, getcaptype;
@ -2034,10 +2118,7 @@ main(int argc, char *argv[])
}
if (!error && run) {
error = vm_get_register(ctx, vcpu, VM_REG_GUEST_RIP, &rip);
assert(error == 0);
error = vm_run(ctx, vcpu, rip, &vmexit);
error = vm_run(ctx, vcpu, &vmexit);
if (error == 0)
dump_vm_run_exitcode(&vmexit, vcpu);
else