9f40021f28
Use this new driver for both PV and HVM instances. This driver requires a Xen hypervisor that supports vector callbacks, VCPUOP hypercalls, and reports that it has a "safe PV clock". New timer driver: Submitted by: will Sponsored by: Spectra Logic Corporation PV port to new driver, and bug fixes: Submitted by: Roger Pau Monné Sponsored by: Citrix Systems R&D sys/dev/xen/timer/timer.c: - Register a PV timer device driver which (currently) implements device_{identify,probe,attach} and stubs device_detach. The detach routine requires functionality not provided by timecounters(4). The suspend and resume routines need additional work (due to Xen requiring that the hypercalls be executed on the target VCPU), and aren't needed for our purposes. - Make sure there can only be one device instance of this driver, and that it only registers one eventtimers(4) and one timecounters(4) device interface. Make both interfaces use PCPU data as needed. - Match, with a few style cleanups & API differences, the Xen versions of the "fetch time" functions. - Document the magic scale_delta() better for the i386 version. - When registering the event timer, bind a separate event channel for the timer VIRQ to the device's event timer interrupt handler for each active VCPU. Describe each interrupt as "xen_et:c%d", so they can be identified per CPU in "vmstat -i" or "show intrcnt" in KDB. - When scheduling a timer into the hypervisor, try up to 60 times if the hypervisor rejects the time as being in the past. In the common case, this retry shouldn't happen, and if it does, it should only happen once. This is because the event timer advertises a minimum period of 100usec, which is only less than the usual hypercall round trip time about 1 out of every 100 tries. (Unlike other similar drivers, this one actually checks whether the hypervisor accepted the singleshot timer set hypercall.) - Implement a RTC PV clock based on the hypervisor wallclock. sys/conf/files: - Add dev/xen/timer/timer.c if the kernel configuration includes either the XEN or XENHVM options. sys/conf/files.i386: sys/i386/include/xen/xen_clock_util.h: sys/i386/xen/clock.c: sys/i386/xen/xen_clock_util.c: sys/i386/xen/mp_machdep.c: sys/i386/xen/xen_rtc.c: - Remove previous PV timer used in i386 XEN PV kernels, the new timer introduced in this change is used instead (so we share the same code between PVHVM and PV). MFC after: 2 weeks
572 lines
13 KiB
C
572 lines
13 KiB
C
/*-
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* William Jolitz and Don Ahn.
|
|
*
|
|
* Redistribution and use in source and binary forms, with or without
|
|
* modification, are permitted provided that the following conditions
|
|
* are met:
|
|
* 1. Redistributions of source code must retain the above copyright
|
|
* notice, this list of conditions and the following disclaimer.
|
|
* 2. Redistributions in binary form must reproduce the above copyright
|
|
* notice, this list of conditions and the following disclaimer in the
|
|
* documentation and/or other materials provided with the distribution.
|
|
* 3. All advertising materials mentioning features or use of this software
|
|
* must display the following acknowledgement:
|
|
* This product includes software developed by the University of
|
|
* California, Berkeley and its contributors.
|
|
* 4. Neither the name of the University nor the names of its contributors
|
|
* may be used to endorse or promote products derived from this software
|
|
* without specific prior written permission.
|
|
*
|
|
* THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
|
|
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
* ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
|
|
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|
* OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|
* HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|
* SUCH DAMAGE.
|
|
*
|
|
* from: @(#)clock.c 7.2 (Berkeley) 5/12/91
|
|
*/
|
|
|
|
#include <sys/cdefs.h>
|
|
__FBSDID("$FreeBSD$");
|
|
|
|
/* #define DELAYDEBUG */
|
|
/*
|
|
* Routines to handle clock hardware.
|
|
*/
|
|
|
|
#include "opt_ddb.h"
|
|
#include "opt_clock.h"
|
|
|
|
#include <sys/param.h>
|
|
#include <sys/systm.h>
|
|
#include <sys/bus.h>
|
|
#include <sys/clock.h>
|
|
#include <sys/lock.h>
|
|
#include <sys/mutex.h>
|
|
#include <sys/proc.h>
|
|
#include <sys/time.h>
|
|
#include <sys/timeet.h>
|
|
#include <sys/timetc.h>
|
|
#include <sys/kernel.h>
|
|
#include <sys/limits.h>
|
|
#include <sys/sysctl.h>
|
|
#include <sys/cons.h>
|
|
#include <sys/power.h>
|
|
|
|
#include <machine/clock.h>
|
|
#include <machine/cputypes.h>
|
|
#include <machine/frame.h>
|
|
#include <machine/intr_machdep.h>
|
|
#include <machine/md_var.h>
|
|
#include <machine/psl.h>
|
|
#if defined(SMP)
|
|
#include <machine/smp.h>
|
|
#endif
|
|
#include <machine/specialreg.h>
|
|
#include <machine/timerreg.h>
|
|
|
|
#include <x86/isa/icu.h>
|
|
#include <x86/isa/isa.h>
|
|
#include <isa/rtc.h>
|
|
|
|
#include <vm/vm.h>
|
|
#include <vm/pmap.h>
|
|
#include <machine/pmap.h>
|
|
#include <xen/hypervisor.h>
|
|
#include <xen/xen-os.h>
|
|
#include <machine/xen/xenfunc.h>
|
|
#include <xen/interface/vcpu.h>
|
|
#include <machine/cpu.h>
|
|
#include <xen/xen_intr.h>
|
|
|
|
/*
|
|
* 32-bit time_t's can't reach leap years before 1904 or after 2036, so we
|
|
* can use a simple formula for leap years.
|
|
*/
|
|
#define LEAPYEAR(y) (!((y) % 4))
|
|
#define DAYSPERYEAR (28+30*4+31*7)
|
|
|
|
#ifndef TIMER_FREQ
|
|
#define TIMER_FREQ 1193182
|
|
#endif
|
|
|
|
#ifdef CYC2NS_SCALE_FACTOR
|
|
#undef CYC2NS_SCALE_FACTOR
|
|
#endif
|
|
#define CYC2NS_SCALE_FACTOR 10
|
|
|
|
/* Values for timerX_state: */
|
|
#define RELEASED 0
|
|
#define RELEASE_PENDING 1
|
|
#define ACQUIRED 2
|
|
#define ACQUIRE_PENDING 3
|
|
|
|
struct mtx clock_lock;
|
|
#define RTC_LOCK_INIT \
|
|
mtx_init(&clock_lock, "clk", NULL, MTX_SPIN | MTX_NOPROFILE)
|
|
#define RTC_LOCK mtx_lock_spin(&clock_lock)
|
|
#define RTC_UNLOCK mtx_unlock_spin(&clock_lock)
|
|
#define NS_PER_TICK (1000000000ULL/hz)
|
|
|
|
int adjkerntz; /* local offset from GMT in seconds */
|
|
int clkintr_pending;
|
|
int pscnt = 1;
|
|
int psdiv = 1;
|
|
int wall_cmos_clock;
|
|
u_int timer_freq = TIMER_FREQ;
|
|
static u_long cyc2ns_scale;
|
|
static uint64_t processed_system_time; /* stime (ns) at last processing. */
|
|
|
|
extern volatile uint64_t xen_timer_last_time;
|
|
|
|
#define do_div(n,base) ({ \
|
|
unsigned long __upper, __low, __high, __mod, __base; \
|
|
__base = (base); \
|
|
__asm("":"=a" (__low), "=d" (__high):"A" (n)); \
|
|
__upper = __high; \
|
|
if (__high) { \
|
|
__upper = __high % (__base); \
|
|
__high = __high / (__base); \
|
|
} \
|
|
__asm("divl %2":"=a" (__low), "=d" (__mod):"rm" (__base), "0" (__low), "1" (__upper)); \
|
|
__asm("":"=A" (n):"a" (__low),"d" (__high)); \
|
|
__mod; \
|
|
})
|
|
|
|
|
|
/* convert from cycles(64bits) => nanoseconds (64bits)
|
|
* basic equation:
|
|
* ns = cycles / (freq / ns_per_sec)
|
|
* ns = cycles * (ns_per_sec / freq)
|
|
* ns = cycles * (10^9 / (cpu_mhz * 10^6))
|
|
* ns = cycles * (10^3 / cpu_mhz)
|
|
*
|
|
* Then we use scaling math (suggested by george@mvista.com) to get:
|
|
* ns = cycles * (10^3 * SC / cpu_mhz) / SC
|
|
* ns = cycles * cyc2ns_scale / SC
|
|
*
|
|
* And since SC is a constant power of two, we can convert the div
|
|
* into a shift.
|
|
* -johnstul@us.ibm.com "math is hard, lets go shopping!"
|
|
*/
|
|
static inline void set_cyc2ns_scale(unsigned long cpu_mhz)
|
|
{
|
|
cyc2ns_scale = (1000 << CYC2NS_SCALE_FACTOR)/cpu_mhz;
|
|
}
|
|
|
|
static inline unsigned long long cycles_2_ns(unsigned long long cyc)
|
|
{
|
|
return ((cyc * cyc2ns_scale) >> CYC2NS_SCALE_FACTOR);
|
|
}
|
|
|
|
static uint32_t
|
|
getit(void)
|
|
{
|
|
return (xen_timer_last_time);
|
|
}
|
|
|
|
|
|
/*
|
|
* XXX: timer needs more SMP work.
|
|
*/
|
|
void
|
|
i8254_init(void)
|
|
{
|
|
|
|
RTC_LOCK_INIT;
|
|
}
|
|
|
|
/*
|
|
* Wait "n" microseconds.
|
|
* Relies on timer 1 counting down from (timer_freq / hz)
|
|
* Note: timer had better have been programmed before this is first used!
|
|
*/
|
|
void
|
|
DELAY(int n)
|
|
{
|
|
int delta, ticks_left;
|
|
uint32_t tick, prev_tick;
|
|
#ifdef DELAYDEBUG
|
|
int getit_calls = 1;
|
|
int n1;
|
|
static int state = 0;
|
|
|
|
if (state == 0) {
|
|
state = 1;
|
|
for (n1 = 1; n1 <= 10000000; n1 *= 10)
|
|
DELAY(n1);
|
|
state = 2;
|
|
}
|
|
if (state == 1)
|
|
printf("DELAY(%d)...", n);
|
|
#endif
|
|
/*
|
|
* Read the counter first, so that the rest of the setup overhead is
|
|
* counted. Guess the initial overhead is 20 usec (on most systems it
|
|
* takes about 1.5 usec for each of the i/o's in getit(). The loop
|
|
* takes about 6 usec on a 486/33 and 13 usec on a 386/20. The
|
|
* multiplications and divisions to scale the count take a while).
|
|
*
|
|
* However, if ddb is active then use a fake counter since reading
|
|
* the i8254 counter involves acquiring a lock. ddb must not go
|
|
* locking for many reasons, but it calls here for at least atkbd
|
|
* input.
|
|
*/
|
|
prev_tick = getit();
|
|
|
|
n -= 0; /* XXX actually guess no initial overhead */
|
|
/*
|
|
* Calculate (n * (timer_freq / 1e6)) without using floating point
|
|
* and without any avoidable overflows.
|
|
*/
|
|
if (n <= 0)
|
|
ticks_left = 0;
|
|
else if (n < 256)
|
|
/*
|
|
* Use fixed point to avoid a slow division by 1000000.
|
|
* 39099 = 1193182 * 2^15 / 10^6 rounded to nearest.
|
|
* 2^15 is the first power of 2 that gives exact results
|
|
* for n between 0 and 256.
|
|
*/
|
|
ticks_left = ((u_int)n * 39099 + (1 << 15) - 1) >> 15;
|
|
else
|
|
/*
|
|
* Don't bother using fixed point, although gcc-2.7.2
|
|
* generates particularly poor code for the long long
|
|
* division, since even the slow way will complete long
|
|
* before the delay is up (unless we're interrupted).
|
|
*/
|
|
ticks_left = ((u_int)n * (long long)timer_freq + 999999)
|
|
/ 1000000;
|
|
|
|
while (ticks_left > 0) {
|
|
tick = getit();
|
|
#ifdef DELAYDEBUG
|
|
++getit_calls;
|
|
#endif
|
|
delta = tick - prev_tick;
|
|
prev_tick = tick;
|
|
if (delta < 0) {
|
|
/*
|
|
* Guard against timer0_max_count being wrong.
|
|
* This shouldn't happen in normal operation,
|
|
* but it may happen if set_timer_freq() is
|
|
* traced.
|
|
*/
|
|
/* delta += timer0_max_count; ??? */
|
|
if (delta < 0)
|
|
delta = 0;
|
|
}
|
|
ticks_left -= delta;
|
|
}
|
|
#ifdef DELAYDEBUG
|
|
if (state == 1)
|
|
printf(" %d calls to getit() at %d usec each\n",
|
|
getit_calls, (n + 5) / getit_calls);
|
|
#endif
|
|
}
|
|
|
|
void
|
|
startrtclock()
|
|
{
|
|
uint64_t __cpu_khz;
|
|
uint32_t cpu_khz;
|
|
struct vcpu_time_info *info;
|
|
|
|
__cpu_khz = 1000000ULL << 32;
|
|
info = &HYPERVISOR_shared_info->vcpu_info[0].time;
|
|
|
|
(void)do_div(__cpu_khz, info->tsc_to_system_mul);
|
|
if ( info->tsc_shift < 0 )
|
|
cpu_khz = __cpu_khz << -info->tsc_shift;
|
|
else
|
|
cpu_khz = __cpu_khz >> info->tsc_shift;
|
|
|
|
printf("Xen reported: %u.%03u MHz processor.\n",
|
|
cpu_khz / 1000, cpu_khz % 1000);
|
|
|
|
/* (10^6 * 2^32) / cpu_hz = (10^3 * 2^32) / cpu_khz =
|
|
(2^32 * 1 / (clocks/us)) */
|
|
|
|
set_cyc2ns_scale(cpu_khz/1000);
|
|
tsc_freq = cpu_khz * 1000;
|
|
}
|
|
|
|
/*
|
|
* RTC support routines
|
|
*/
|
|
|
|
|
|
static __inline int
|
|
readrtc(int port)
|
|
{
|
|
return(bcd2bin(rtcin(port)));
|
|
}
|
|
|
|
|
|
#ifdef XEN_PRIVILEGED_GUEST
|
|
|
|
/*
|
|
* Initialize the time of day register, based on the time base which is, e.g.
|
|
* from a filesystem.
|
|
*/
|
|
static void
|
|
domu_inittodr(time_t base)
|
|
{
|
|
unsigned long sec;
|
|
int s, y;
|
|
struct timespec ts;
|
|
|
|
update_wallclock();
|
|
add_uptime_to_wallclock();
|
|
|
|
RTC_LOCK;
|
|
|
|
if (base) {
|
|
ts.tv_sec = base;
|
|
ts.tv_nsec = 0;
|
|
tc_setclock(&ts);
|
|
}
|
|
|
|
sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
|
|
|
|
y = time_second - shadow_tv.tv_sec;
|
|
if (y <= -2 || y >= 2) {
|
|
/* badly off, adjust it */
|
|
tc_setclock(&shadow_tv);
|
|
}
|
|
RTC_UNLOCK;
|
|
}
|
|
|
|
/*
|
|
* Write system time back to RTC.
|
|
*/
|
|
static void
|
|
domu_resettodr(void)
|
|
{
|
|
unsigned long tm;
|
|
int s;
|
|
dom0_op_t op;
|
|
struct shadow_time_info *shadow;
|
|
struct pcpu *pc;
|
|
|
|
pc = pcpu_find(smp_processor_id());
|
|
shadow = &pc->pc_shadow_time;
|
|
if (xen_disable_rtc_set)
|
|
return;
|
|
|
|
s = splclock();
|
|
tm = time_second;
|
|
splx(s);
|
|
|
|
tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
|
|
|
|
if ((xen_start_info->flags & SIF_INITDOMAIN) &&
|
|
!independent_wallclock)
|
|
{
|
|
op.cmd = DOM0_SETTIME;
|
|
op.u.settime.secs = tm;
|
|
op.u.settime.nsecs = 0;
|
|
op.u.settime.system_time = shadow->system_timestamp;
|
|
HYPERVISOR_dom0_op(&op);
|
|
update_wallclock();
|
|
add_uptime_to_wallclock();
|
|
} else if (independent_wallclock) {
|
|
/* notyet */
|
|
;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Initialize the time of day register, based on the time base which is, e.g.
|
|
* from a filesystem.
|
|
*/
|
|
void
|
|
inittodr(time_t base)
|
|
{
|
|
unsigned long sec, days;
|
|
int year, month;
|
|
int y, m, s;
|
|
struct timespec ts;
|
|
|
|
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
|
|
domu_inittodr(base);
|
|
return;
|
|
}
|
|
|
|
if (base) {
|
|
s = splclock();
|
|
ts.tv_sec = base;
|
|
ts.tv_nsec = 0;
|
|
tc_setclock(&ts);
|
|
splx(s);
|
|
}
|
|
|
|
/* Look if we have a RTC present and the time is valid */
|
|
if (!(rtcin(RTC_STATUSD) & RTCSD_PWR))
|
|
goto wrong_time;
|
|
|
|
/* wait for time update to complete */
|
|
/* If RTCSA_TUP is zero, we have at least 244us before next update */
|
|
s = splhigh();
|
|
while (rtcin(RTC_STATUSA) & RTCSA_TUP) {
|
|
splx(s);
|
|
s = splhigh();
|
|
}
|
|
|
|
days = 0;
|
|
#ifdef USE_RTC_CENTURY
|
|
year = readrtc(RTC_YEAR) + readrtc(RTC_CENTURY) * 100;
|
|
#else
|
|
year = readrtc(RTC_YEAR) + 1900;
|
|
if (year < 1970)
|
|
year += 100;
|
|
#endif
|
|
if (year < 1970) {
|
|
splx(s);
|
|
goto wrong_time;
|
|
}
|
|
month = readrtc(RTC_MONTH);
|
|
for (m = 1; m < month; m++)
|
|
days += daysinmonth[m-1];
|
|
if ((month > 2) && LEAPYEAR(year))
|
|
days ++;
|
|
days += readrtc(RTC_DAY) - 1;
|
|
for (y = 1970; y < year; y++)
|
|
days += DAYSPERYEAR + LEAPYEAR(y);
|
|
sec = ((( days * 24 +
|
|
readrtc(RTC_HRS)) * 60 +
|
|
readrtc(RTC_MIN)) * 60 +
|
|
readrtc(RTC_SEC));
|
|
/* sec now contains the number of seconds, since Jan 1 1970,
|
|
in the local time zone */
|
|
|
|
sec += tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
|
|
|
|
y = time_second - sec;
|
|
if (y <= -2 || y >= 2) {
|
|
/* badly off, adjust it */
|
|
ts.tv_sec = sec;
|
|
ts.tv_nsec = 0;
|
|
tc_setclock(&ts);
|
|
}
|
|
splx(s);
|
|
return;
|
|
|
|
wrong_time:
|
|
printf("Invalid time in real time clock.\n");
|
|
printf("Check and reset the date immediately!\n");
|
|
}
|
|
|
|
|
|
/*
|
|
* Write system time back to RTC
|
|
*/
|
|
void
|
|
resettodr()
|
|
{
|
|
unsigned long tm;
|
|
int y, m, s;
|
|
|
|
if (!(xen_start_info->flags & SIF_INITDOMAIN)) {
|
|
domu_resettodr();
|
|
return;
|
|
}
|
|
|
|
if (xen_disable_rtc_set)
|
|
return;
|
|
|
|
s = splclock();
|
|
tm = time_second;
|
|
splx(s);
|
|
|
|
/* Disable RTC updates and interrupts. */
|
|
writertc(RTC_STATUSB, RTCSB_HALT | RTCSB_24HR);
|
|
|
|
/* Calculate local time to put in RTC */
|
|
|
|
tm -= tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
|
|
|
|
writertc(RTC_SEC, bin2bcd(tm%60)); tm /= 60; /* Write back Seconds */
|
|
writertc(RTC_MIN, bin2bcd(tm%60)); tm /= 60; /* Write back Minutes */
|
|
writertc(RTC_HRS, bin2bcd(tm%24)); tm /= 24; /* Write back Hours */
|
|
|
|
/* We have now the days since 01-01-1970 in tm */
|
|
writertc(RTC_WDAY, (tm + 4) % 7 + 1); /* Write back Weekday */
|
|
for (y = 1970, m = DAYSPERYEAR + LEAPYEAR(y);
|
|
tm >= m;
|
|
y++, m = DAYSPERYEAR + LEAPYEAR(y))
|
|
tm -= m;
|
|
|
|
/* Now we have the years in y and the day-of-the-year in tm */
|
|
writertc(RTC_YEAR, bin2bcd(y%100)); /* Write back Year */
|
|
#ifdef USE_RTC_CENTURY
|
|
writertc(RTC_CENTURY, bin2bcd(y/100)); /* ... and Century */
|
|
#endif
|
|
for (m = 0; ; m++) {
|
|
int ml;
|
|
|
|
ml = daysinmonth[m];
|
|
if (m == 1 && LEAPYEAR(y))
|
|
ml++;
|
|
if (tm < ml)
|
|
break;
|
|
tm -= ml;
|
|
}
|
|
|
|
writertc(RTC_MONTH, bin2bcd(m + 1)); /* Write back Month */
|
|
writertc(RTC_DAY, bin2bcd(tm + 1)); /* Write back Month Day */
|
|
|
|
/* Reenable RTC updates and interrupts. */
|
|
writertc(RTC_STATUSB, RTCSB_24HR);
|
|
rtcin(RTC_INTR);
|
|
}
|
|
#endif
|
|
|
|
/*
|
|
* Start clocks running.
|
|
*/
|
|
void
|
|
cpu_initclocks(void)
|
|
{
|
|
cpu_initclocks_bsp();
|
|
}
|
|
|
|
/* Return system time offset by ticks */
|
|
uint64_t
|
|
get_system_time(int ticks)
|
|
{
|
|
return (processed_system_time + (ticks * NS_PER_TICK));
|
|
}
|
|
|
|
int
|
|
timer_spkr_acquire(void)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
|
|
int
|
|
timer_spkr_release(void)
|
|
{
|
|
|
|
return (0);
|
|
}
|
|
|
|
void
|
|
timer_spkr_setfreq(int freq)
|
|
{
|
|
|
|
}
|
|
|