Unsynchronized TSCs on the host require special handling in bhyve:
- use clock_gettime(2) as the time base for the emulated ACPI timer instead of directly using rdtsc(). - don't advertise the invariant TSC capability to the guest to discourage it from using the TSC as its time base. Discussed with: jhb@ (about making 'smp_tsc' a global) Reported by: Dan Mack on freebsd-virtualization@ Obtained from: NetApp
This commit is contained in:
parent
31c18e29cc
commit
1472b87f2f
@ -20,6 +20,9 @@ extern int i8254_max_count;
|
|||||||
extern uint64_t tsc_freq;
|
extern uint64_t tsc_freq;
|
||||||
extern int tsc_is_invariant;
|
extern int tsc_is_invariant;
|
||||||
extern int tsc_perf_stat;
|
extern int tsc_perf_stat;
|
||||||
|
#ifdef SMP
|
||||||
|
extern int smp_tsc;
|
||||||
|
#endif
|
||||||
|
|
||||||
void i8254_init(void);
|
void i8254_init(void);
|
||||||
|
|
||||||
|
@ -34,6 +34,7 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <sys/systm.h>
|
#include <sys/systm.h>
|
||||||
#include <sys/cpuset.h>
|
#include <sys/cpuset.h>
|
||||||
|
|
||||||
|
#include <machine/clock.h>
|
||||||
#include <machine/cpufunc.h>
|
#include <machine/cpufunc.h>
|
||||||
#include <machine/md_var.h>
|
#include <machine/md_var.h>
|
||||||
#include <machine/specialreg.h>
|
#include <machine/specialreg.h>
|
||||||
@ -89,11 +90,27 @@ x86_emulate_cpuid(struct vm *vm, int vcpu_id,
|
|||||||
case CPUID_8000_0003:
|
case CPUID_8000_0003:
|
||||||
case CPUID_8000_0004:
|
case CPUID_8000_0004:
|
||||||
case CPUID_8000_0006:
|
case CPUID_8000_0006:
|
||||||
case CPUID_8000_0007:
|
|
||||||
case CPUID_8000_0008:
|
case CPUID_8000_0008:
|
||||||
cpuid_count(*eax, *ecx, regs);
|
cpuid_count(*eax, *ecx, regs);
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case CPUID_8000_0007:
|
||||||
|
cpuid_count(*eax, *ecx, regs);
|
||||||
|
/*
|
||||||
|
* If the host TSCs are not synchronized across
|
||||||
|
* physical cpus then we cannot advertise an
|
||||||
|
* invariant tsc to a vcpu.
|
||||||
|
*
|
||||||
|
* XXX This still falls short because the vcpu
|
||||||
|
* can observe the TSC moving backwards as it
|
||||||
|
* migrates across physical cpus. But at least
|
||||||
|
* it should discourage the guest from using the
|
||||||
|
* TSC to keep track of time.
|
||||||
|
*/
|
||||||
|
if (!smp_tsc)
|
||||||
|
regs[3] &= ~AMDPM_TSC_INVARIANT;
|
||||||
|
break;
|
||||||
|
|
||||||
case CPUID_0000_0001:
|
case CPUID_0000_0001:
|
||||||
do_cpuid(1, regs);
|
do_cpuid(1, regs);
|
||||||
|
|
||||||
|
@ -61,7 +61,7 @@ SYSCTL_INT(_kern_timecounter, OID_AUTO, invariant_tsc, CTLFLAG_RDTUN,
|
|||||||
TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
|
TUNABLE_INT("kern.timecounter.invariant_tsc", &tsc_is_invariant);
|
||||||
|
|
||||||
#ifdef SMP
|
#ifdef SMP
|
||||||
static int smp_tsc;
|
int smp_tsc;
|
||||||
SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
|
SYSCTL_INT(_kern_timecounter, OID_AUTO, smp_tsc, CTLFLAG_RDTUN, &smp_tsc, 0,
|
||||||
"Indicates whether the TSC is safe to use in SMP mode");
|
"Indicates whether the TSC is safe to use in SMP mode");
|
||||||
TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
|
TUNABLE_INT("kern.timecounter.smp_tsc", &smp_tsc);
|
||||||
|
@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
|
|||||||
#include <machine/cpufunc.h>
|
#include <machine/cpufunc.h>
|
||||||
|
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
#include <stdlib.h>
|
||||||
#include <time.h>
|
#include <time.h>
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <pthread.h>
|
#include <pthread.h>
|
||||||
@ -53,35 +54,108 @@ __FBSDID("$FreeBSD$");
|
|||||||
#define PMTMR_FREQ 3579545 /* 3.579545MHz */
|
#define PMTMR_FREQ 3579545 /* 3.579545MHz */
|
||||||
|
|
||||||
static pthread_mutex_t pmtmr_mtx;
|
static pthread_mutex_t pmtmr_mtx;
|
||||||
static uint64_t pmtmr_tscf;
|
|
||||||
static uint64_t pmtmr_old;
|
static uint64_t pmtmr_old;
|
||||||
|
|
||||||
|
static uint64_t pmtmr_tscf;
|
||||||
static uint64_t pmtmr_tsc_old;
|
static uint64_t pmtmr_tsc_old;
|
||||||
|
|
||||||
|
static clockid_t clockid = CLOCK_UPTIME_FAST;
|
||||||
|
static struct timespec pmtmr_uptime_old;
|
||||||
|
|
||||||
|
#define timespecsub(vvp, uvp) \
|
||||||
|
do { \
|
||||||
|
(vvp)->tv_sec -= (uvp)->tv_sec; \
|
||||||
|
(vvp)->tv_nsec -= (uvp)->tv_nsec; \
|
||||||
|
if ((vvp)->tv_nsec < 0) { \
|
||||||
|
(vvp)->tv_sec--; \
|
||||||
|
(vvp)->tv_nsec += 1000000000; \
|
||||||
|
} \
|
||||||
|
} while (0)
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
timespec_to_pmtmr(const struct timespec *tsnew, const struct timespec *tsold)
|
||||||
|
{
|
||||||
|
struct timespec tsdiff;
|
||||||
|
int64_t nsecs;
|
||||||
|
|
||||||
|
tsdiff = *tsnew;
|
||||||
|
timespecsub(&tsdiff, tsold);
|
||||||
|
nsecs = tsdiff.tv_sec * 1000000000 + tsdiff.tv_nsec;
|
||||||
|
assert(nsecs >= 0);
|
||||||
|
|
||||||
|
return (nsecs * PMTMR_FREQ / 1000000000 + pmtmr_old);
|
||||||
|
}
|
||||||
|
|
||||||
|
static uint64_t
|
||||||
|
tsc_to_pmtmr(uint64_t tsc_new, uint64_t tsc_old)
|
||||||
|
{
|
||||||
|
|
||||||
|
return ((tsc_new - tsc_old) * PMTMR_FREQ / pmtmr_tscf + pmtmr_old);
|
||||||
|
}
|
||||||
|
|
||||||
|
static void
|
||||||
|
pmtmr_init(void)
|
||||||
|
{
|
||||||
|
size_t len;
|
||||||
|
int smp_tsc, err;
|
||||||
|
struct timespec tsnew, tsold = { 0 };
|
||||||
|
|
||||||
|
len = sizeof(smp_tsc);
|
||||||
|
err = sysctlbyname("kern.timecounter.smp_tsc", &smp_tsc, &len, NULL, 0);
|
||||||
|
assert(err == 0);
|
||||||
|
|
||||||
|
if (smp_tsc) {
|
||||||
|
len = sizeof(pmtmr_tscf);
|
||||||
|
err = sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
|
||||||
|
NULL, 0);
|
||||||
|
assert(err == 0);
|
||||||
|
|
||||||
|
pmtmr_tsc_old = rdtsc();
|
||||||
|
pmtmr_old = tsc_to_pmtmr(pmtmr_tsc_old, 0);
|
||||||
|
} else {
|
||||||
|
if (getenv("BHYVE_PMTMR_PRECISE") != NULL)
|
||||||
|
clockid = CLOCK_UPTIME;
|
||||||
|
|
||||||
|
err = clock_gettime(clockid, &tsnew);
|
||||||
|
assert(err == 0);
|
||||||
|
|
||||||
|
pmtmr_uptime_old = tsnew;
|
||||||
|
pmtmr_old = timespec_to_pmtmr(&tsnew, &tsold);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t
|
static uint32_t
|
||||||
pmtmr_val(void)
|
pmtmr_val(void)
|
||||||
{
|
{
|
||||||
|
struct timespec tsnew;
|
||||||
uint64_t pmtmr_tsc_new;
|
uint64_t pmtmr_tsc_new;
|
||||||
uint64_t pmtmr_new;
|
uint64_t pmtmr_new;
|
||||||
|
int error;
|
||||||
|
|
||||||
static int inited = 0;
|
static int inited = 0;
|
||||||
|
|
||||||
if (!inited) {
|
if (!inited) {
|
||||||
size_t len;
|
|
||||||
|
|
||||||
inited = 1;
|
|
||||||
pthread_mutex_init(&pmtmr_mtx, NULL);
|
pthread_mutex_init(&pmtmr_mtx, NULL);
|
||||||
len = sizeof(pmtmr_tscf);
|
pmtmr_init();
|
||||||
sysctlbyname("machdep.tsc_freq", &pmtmr_tscf, &len,
|
inited = 1;
|
||||||
NULL, 0);
|
|
||||||
pmtmr_tsc_old = rdtsc();
|
|
||||||
pmtmr_old = pmtmr_tsc_old / pmtmr_tscf * PMTMR_FREQ;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pthread_mutex_lock(&pmtmr_mtx);
|
pthread_mutex_lock(&pmtmr_mtx);
|
||||||
pmtmr_tsc_new = rdtsc();
|
|
||||||
pmtmr_new = (pmtmr_tsc_new - pmtmr_tsc_old) * PMTMR_FREQ / pmtmr_tscf +
|
if (pmtmr_tscf) {
|
||||||
pmtmr_old;
|
pmtmr_tsc_new = rdtsc();
|
||||||
|
pmtmr_new = tsc_to_pmtmr(pmtmr_tsc_new, pmtmr_tsc_old);
|
||||||
|
pmtmr_tsc_old = pmtmr_tsc_new;
|
||||||
|
} else {
|
||||||
|
error = clock_gettime(clockid, &tsnew);
|
||||||
|
assert(error == 0);
|
||||||
|
|
||||||
|
pmtmr_new = timespec_to_pmtmr(&tsnew, &pmtmr_uptime_old);
|
||||||
|
pmtmr_uptime_old = tsnew;
|
||||||
|
}
|
||||||
pmtmr_old = pmtmr_new;
|
pmtmr_old = pmtmr_new;
|
||||||
pmtmr_tsc_old = pmtmr_tsc_new;
|
|
||||||
pthread_mutex_unlock(&pmtmr_mtx);
|
pthread_mutex_unlock(&pmtmr_mtx);
|
||||||
|
|
||||||
return (pmtmr_new);
|
return (pmtmr_new);
|
||||||
@ -102,4 +176,3 @@ pmtmr_handler(struct vmctx *ctx, int vcpu, int in, int port, int bytes,
|
|||||||
}
|
}
|
||||||
|
|
||||||
INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
|
INOUT_PORT(pmtmr, IO_PMTMR, IOPORT_F_IN, pmtmr_handler);
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user