Generalized parts of the XEN timer code into a generic pvclock

KVM clock shares the same data structures between the guest and the host
as Xen so it makes sense to just have a single copy of this code.

Differential Revision: https://reviews.freebsd.org/D1429
Reviewed by:	royger (eariler version)
MFC after:	1 month
This commit is contained in:
Bryan Venteicher 2015-02-04 08:26:43 +00:00
parent 31a741f473
commit d3ccddf3ce
8 changed files with 271 additions and 126 deletions
sys
amd64/include
conf
dev/xen/timer
i386
x86

@ -0,0 +1,6 @@
/*-
* This file is in the public domain.
*/
/* $FreeBSD$ */
#include <x86/pvclock.h>

@ -569,6 +569,7 @@ x86/x86/mptable.c optional mptable
x86/x86/mptable_pci.c optional mptable pci
x86/x86/msi.c optional pci
x86/x86/nexus.c standard
x86/x86/pvclock.c standard
x86/x86/tsc.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm

@ -588,6 +588,7 @@ x86/x86/mptable_pci.c optional apic native pci
x86/x86/msi.c optional apic pci
x86/x86/nexus.c standard
x86/x86/tsc.c standard
x86/x86/pvclock.c standard
x86/x86/delay.c standard
x86/xen/hvm.c optional xenhvm
x86/xen/xen_intr.c optional xen | xenhvm

@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
#include <machine/clock.h>
#include <machine/_inttypes.h>
#include <machine/smp.h>
#include <machine/pvclock.h>
#include <dev/xen/timer/timer.h>
@ -95,9 +96,6 @@ struct xentimer_softc {
struct eventtimer et;
};
/* Last time; this guarantees a monotonically increasing clock. */
volatile uint64_t xen_timer_last_time = 0;
static void
xentimer_identify(driver_t *driver, device_t parent)
{
@ -148,128 +146,20 @@ xentimer_probe(device_t dev)
return (BUS_PROBE_NOWILDCARD);
}
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
static inline uint64_t
scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
{
uint64_t product;
if (shift < 0)
delta >>= -shift;
else
delta <<= shift;
#if defined(__i386__)
{
uint32_t tmp1, tmp2;
/**
* For i386, the formula looks like:
*
* lower = (mul_frac * (delta & UINT_MAX)) >> 32
* upper = mul_frac * (delta >> 32)
* product = lower + upper
*/
__asm__ (
"mul %5 ; "
"mov %4,%%eax ; "
"mov %%edx,%4 ; "
"mul %5 ; "
"xor %5,%5 ; "
"add %4,%%eax ; "
"adc %5,%%edx ; "
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
"2" (mul_frac) );
}
#elif defined(__amd64__)
{
unsigned long tmp;
__asm__ (
"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
: [lo]"=a" (product), [hi]"=d" (tmp)
: "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
}
#else
#error "xentimer: unsupported architecture"
#endif
return (product);
}
static uint64_t
get_nsec_offset(struct vcpu_time_info *tinfo)
{
return (scale_delta(rdtsc() - tinfo->tsc_timestamp,
tinfo->tsc_to_system_mul, tinfo->tsc_shift));
}
/*
* Read the current hypervisor system uptime value from Xen.
* See <xen/interface/xen.h> for a description of how this works.
*/
static uint32_t
xen_fetch_vcpu_tinfo(struct vcpu_time_info *dst, struct vcpu_time_info *src)
{
do {
dst->version = src->version;
rmb();
dst->tsc_timestamp = src->tsc_timestamp;
dst->system_time = src->system_time;
dst->tsc_to_system_mul = src->tsc_to_system_mul;
dst->tsc_shift = src->tsc_shift;
rmb();
} while ((src->version & 1) | (dst->version ^ src->version));
return (dst->version);
}
/**
* \brief Get the current time, in nanoseconds, since the hypervisor booted.
*
* \param vcpu vcpu_info structure to fetch the time from.
*
* \note This function returns the current CPU's idea of this value, unless
* it happens to be less than another CPU's previously determined value.
*/
static uint64_t
xen_fetch_vcpu_time(struct vcpu_info *vcpu)
{
struct vcpu_time_info dst;
struct vcpu_time_info *src;
uint32_t pre_version;
uint64_t now;
volatile uint64_t last;
struct pvclock_vcpu_time_info *time;
src = &vcpu->time;
time = (struct pvclock_vcpu_time_info *) &vcpu->time;
do {
pre_version = xen_fetch_vcpu_tinfo(&dst, src);
barrier();
now = dst.system_time + get_nsec_offset(&dst);
barrier();
} while (pre_version != src->version);
/*
* Enforce a monotonically increasing clock time across all
* VCPUs. If our time is too old, use the last time and return.
* Otherwise, try to update the last time.
*/
do {
last = xen_timer_last_time;
if (last > now) {
now = last;
break;
}
} while (!atomic_cmpset_64(&xen_timer_last_time, last, now));
return (now);
return (pvclock_get_timecount(time));
}
static uint32_t
@ -302,15 +192,11 @@ static void
xen_fetch_wallclock(struct timespec *ts)
{
shared_info_t *src = HYPERVISOR_shared_info;
uint32_t version = 0;
struct pvclock_wall_clock *wc;
do {
version = src->wc_version;
rmb();
ts->tv_sec = src->wc_sec;
ts->tv_nsec = src->wc_nsec;
rmb();
} while ((src->wc_version & 1) | (version ^ src->wc_version));
wc = (struct pvclock_wall_clock *) &src->wc_version;
pvclock_get_wallclock(wc, ts);
}
static void
@ -574,7 +460,7 @@ xentimer_resume(device_t dev)
}
/* Reset the last uptime value */
xen_timer_last_time = 0;
pvclock_resume();
/* Reset the RTC clock */
inittodr(time_second);

@ -0,0 +1,6 @@
/*-
* This file is in the public domain.
*/
/* $FreeBSD$ */
#include <x86/pvclock.h>

@ -69,6 +69,7 @@ __FBSDID("$FreeBSD$");
#include <machine/intr_machdep.h>
#include <machine/md_var.h>
#include <machine/psl.h>
#include <machine/pvclock.h>
#if defined(SMP)
#include <machine/smp.h>
#endif
@ -127,8 +128,6 @@ u_int timer_freq = TIMER_FREQ;
static u_long cyc2ns_scale;
static uint64_t processed_system_time; /* stime (ns) at last processing. */
extern volatile uint64_t xen_timer_last_time;
#define do_div(n,base) ({ \
unsigned long __upper, __low, __high, __mod, __base; \
__base = (base); \
@ -172,7 +171,7 @@ static inline unsigned long long cycles_2_ns(unsigned long long cyc)
static uint32_t
getit(void)
{
return (xen_timer_last_time);
return (pvclock_get_last_cycles());
}

58
sys/x86/include/pvclock.h Normal file

@ -0,0 +1,58 @@
/*-
* Copyright (c) 2014, Bryan Venteicher <bryanv@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*
* $FreeBSD$
*/
#ifndef X86_PVCLOCK
#define X86_PVCLOCK
struct pvclock_vcpu_time_info {
uint32_t version;
uint32_t pad0;
uint64_t tsc_timestamp;
uint64_t system_time;
uint32_t tsc_to_system_mul;
int8_t tsc_shift;
uint8_t flags;
uint8_t pad[2];
};
#define PVCLOCK_FLAG_TSC_STABLE 0x01
#define PVCLOCK_FLAG_GUEST_PASUED 0x02
struct pvclock_wall_clock {
uint32_t version;
uint32_t sec;
uint32_t nsec;
};
void pvclock_resume(void);
uint64_t pvclock_get_last_cycles(void);
uint64_t pvclock_get_timecount(struct pvclock_vcpu_time_info *ti);
void pvclock_get_wallclock(struct pvclock_wall_clock *wc,
struct timespec *ts);
#endif

188
sys/x86/x86/pvclock.c Normal file

@ -0,0 +1,188 @@
/*-
* Copyright (c) 2009 Adrian Chadd
* Copyright (c) 2012 Spectra Logic Corporation
* Copyright (c) 2014 Bryan Venteicher
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
* ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
*/
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/proc.h>
#include <machine/cpufunc.h>
#include <machine/cpu.h>
#include <machine/atomic.h>
#include <machine/pvclock.h>
/*
* Last time; this guarantees a monotonically increasing clock for when
* a stable TSC is not provided.
*/
static volatile uint64_t pvclock_last_cycles;
void
pvclock_resume(void)
{
atomic_store_rel_64(&pvclock_last_cycles, 0);
}
uint64_t
pvclock_get_last_cycles(void)
{
return (atomic_load_acq_64(&pvclock_last_cycles));
}
/*
* Scale a 64-bit delta by scaling and multiplying by a 32-bit fraction,
* yielding a 64-bit result.
*/
static inline uint64_t
pvclock_scale_delta(uint64_t delta, uint32_t mul_frac, int shift)
{
uint64_t product;
if (shift < 0)
delta >>= -shift;
else
delta <<= shift;
#if defined(__i386__)
{
uint32_t tmp1, tmp2;
/**
* For i386, the formula looks like:
*
* lower = (mul_frac * (delta & UINT_MAX)) >> 32
* upper = mul_frac * (delta >> 32)
* product = lower + upper
*/
__asm__ (
"mul %5 ; "
"mov %4,%%eax ; "
"mov %%edx,%4 ; "
"mul %5 ; "
"xor %5,%5 ; "
"add %4,%%eax ; "
"adc %5,%%edx ; "
: "=A" (product), "=r" (tmp1), "=r" (tmp2)
: "a" ((uint32_t)delta), "1" ((uint32_t)(delta >> 32)),
"2" (mul_frac) );
}
#elif defined(__amd64__)
{
unsigned long tmp;
__asm__ (
"mulq %[mul_frac] ; shrd $32, %[hi], %[lo]"
: [lo]"=a" (product), [hi]"=d" (tmp)
: "0" (delta), [mul_frac]"rm"((uint64_t)mul_frac));
}
#else
#error "pvclock: unsupported x86 architecture?"
#endif
return (product);
}
static uint64_t
pvclock_get_nsec_offset(struct pvclock_vcpu_time_info *ti)
{
uint64_t delta;
delta = rdtsc() - ti->tsc_timestamp;
return (pvclock_scale_delta(delta, ti->tsc_to_system_mul,
ti->tsc_shift));
}
static void
pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
uint64_t *cycles, uint8_t *flags)
{
uint32_t version;
do {
version = ti->version;
rmb();
*cycles = ti->system_time + pvclock_get_nsec_offset(ti);
*flags = ti->flags;
rmb();
} while ((ti->version & 1) != 0 || ti->version != version);
}
static void
pvclock_read_wall_clock(struct pvclock_wall_clock *wc, uint32_t *sec,
uint32_t *nsec)
{
uint32_t version;
do {
version = wc->version;
rmb();
*sec = wc->sec;
*nsec = wc->nsec;
rmb();
} while ((wc->version & 1) != 0 || wc->version != version);
}
uint64_t
pvclock_get_timecount(struct pvclock_vcpu_time_info *ti)
{
uint64_t now, last;
uint8_t flags;
pvclock_read_time_info(ti, &now, &flags);
if (flags & PVCLOCK_FLAG_TSC_STABLE)
return (now);
/*
* Enforce a monotonically increasing clock time across all VCPUs.
* If our time is too old, use the last time and return. Otherwise,
* try to update the last time.
*/
do {
last = atomic_load_acq_64(&pvclock_last_cycles);
if (last > now)
return (last);
} while (!atomic_cmpset_64(&pvclock_last_cycles, last, now));
return (now);
}
void
pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
{
uint32_t sec, nsec;
pvclock_read_wall_clock(wc, &sec, &nsec);
ts->tv_sec = sec;
ts->tv_nsec = nsec;
}