From 7ec73f64179417aeda085c1c338385559fb49c23 Mon Sep 17 00:00:00 2001
From: Poul-Henning Kamp <phk@FreeBSD.org>
Date: Fri, 20 Feb 1998 16:36:17 +0000
Subject: [PATCH] Replace TOD clock code with more systematic approach.

Highlights:
    * Simple model for underlying hardware.
    * Hardware basis for timekeeping can be changed on the fly.
    * Only one hardware clock responsible for TOD keeping.
    * Provides a real nanotime() function.
    * Time granularity: .232E-18 seconds.
    * Frequency granularity:  .238E-12 s/s
    * Frequency adjustment is continuous in time.
    * Less overhead for frequency adjustment.
    * Improves xntpd performance.

Reviewed by:    bde, bde, bde
---
 sys/amd64/amd64/tsc.c         | 183 ++++++++++++-------
 sys/amd64/include/clock.h     |  66 +------
 sys/amd64/isa/clock.c         | 183 ++++++++++++-------
 sys/conf/files.i386           |   3 +-
 sys/i386/conf/files.i386      |   3 +-
 sys/i386/i386/microtime.s     | 241 ------------------------
 sys/i386/i386/tsc.c           | 183 ++++++++++++-------
 sys/i386/include/clock.h      |  66 +------
 sys/i386/isa/clock.c          | 183 ++++++++++++-------
 sys/i386/isa/random_machdep.c |  19 +-
 sys/isa/atrtc.c               | 183 ++++++++++++-------
 sys/kern/kern_clock.c         | 335 ++++++++++++++++++++++++----------
 sys/kern/kern_ntptime.c       |  51 +++---
 sys/kern/kern_random.c        |  19 +-
 sys/kern/kern_tc.c            | 335 ++++++++++++++++++++++++----------
 sys/kern/kern_time.c          |  13 +-
 sys/sys/time.h                |  76 +++++++-
 sys/sys/timetc.h              |  76 +++++++-
 sys/sys/timex.h               |   2 +-
 19 files changed, 1248 insertions(+), 972 deletions(-)
 delete mode 100644 sys/i386/i386/microtime.s

diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 2fb46cb67c92..948dfa272c5f 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- *	$Id: clock.c,v 1.109 1998/02/09 06:08:26 eivind Exp $
+ *	$Id: clock.c,v 1.110 1998/02/13 06:33:16 bde Exp $
  */
 
 /*
@@ -109,9 +109,7 @@
 /*
  * Maximum frequency that we are willing to allow for timer0.  Must be
  * low enough to guarantee that the timer interrupt handler returns
- * before the next timer interrupt.  Must result in a lower TIMER_DIV
- * value than TIMER0_LATCH_COUNT so that we don't have to worry about
- * underflow in the calculation of timer0_overflow_threshold.
+ * before the next timer interrupt.
  */
 #define	TIMER0_MAX_FREQ		20000
 
@@ -120,25 +118,21 @@ int	disable_rtc_set;	/* disable resettodr() if != 0 */
 u_int	idelayed;
 int	statclock_disable;
 u_int	stat_imask = SWI_CLOCK_MASK;
-#ifdef TIMER_FREQ
-u_int	timer_freq = TIMER_FREQ;
-#else
-u_int	timer_freq = 1193182;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
 #endif
+u_int	timer_freq = TIMER_FREQ;
 int	timer0_max_count;
-u_int	timer0_overflow_threshold;
-u_int	timer0_prescaler_count;
-u_int	tsc_bias;
-u_int	tsc_comultiplier;
 u_int	tsc_freq;
-u_int	tsc_multiplier;
-static u_int	tsc_present;
 int	wall_cmos_clock;	/* wall	CMOS clock assumed if != 0 */
 
 static	int	beeping = 0;
 static	u_int	clk_imask = HWI_MASK | SWI_MASK;
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 static	u_int	hardclock_max_count;
+static	u_int32_t i8254_lastcount;
+static	u_int32_t i8254_offset;
+static	int	i8254_ticked;
 /*
  * XXX new_function and timer_func should not handle clockframes, but
  * timer_func currently needs to hold hardclock to handle the
@@ -149,6 +143,7 @@ static	void	(*new_function) __P((struct clockframe *frame));
 static	u_int	new_rate;
 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
+static	u_int	timer0_prescaler_count;
 
 /* Values for timerX_state: */
 #define	RELEASED	0
@@ -159,13 +154,42 @@ static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
 static	u_char	timer0_state;
 static	u_char	timer2_state;
 static	void	(*timer_func) __P((struct clockframe *frame)) = hardclock;
+static	u_int	tsc_present;
 
-static	void	set_tsc_freq(u_int tsc_count, u_int i8254_freq);
+static	u_int64_t i8254_get_timecount __P((void));
 static	void	set_timer_freq(u_int freq, int intr_freq);
+static	u_int64_t tsc_get_timecount __P((void));
+static	u_int32_t tsc_get_timedelta __P((struct timecounter *tc));
+
+static struct timecounter tsc_timecounter[3] = {
+	tsc_get_timedelta,	/* get_timedelta */
+	tsc_get_timecount,	/* get_timecount */
+ 	~0,			/* counter_mask */
+	0,			/* frequency */
+	 "TSC"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, 
+	tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", "");
+
+static struct timecounter i8254_timecounter[3] = {
+	0,			/* get_timedelta */
+	i8254_get_timecount,	/* get_timecount */
+	(1ULL << 32) - 1,	/* counter_mask */
+	0,			/* frequency */
+	"i8254"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, 
+	i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
 
 static void
 clkintr(struct clockframe frame)
 {
+	if (!i8254_ticked)
+		i8254_offset += timer0_max_count;
+	else
+		i8254_ticked = 0;
 	timer_func(&frame);
 	switch (timer0_state) {
 
@@ -185,8 +209,6 @@ clkintr(struct clockframe frame)
 	case ACQUIRE_PENDING:
 		setdelayed();
 		timer0_max_count = TIMER_DIV(new_rate);
-		timer0_overflow_threshold =
-			timer0_max_count - TIMER0_LATCH_COUNT;
 		disable_intr();
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
@@ -210,8 +232,6 @@ clkintr(struct clockframe frame)
 			hardclock(&frame);
 			setdelayed();
 			timer0_max_count = hardclock_max_count;
-			timer0_overflow_threshold =
-				timer0_max_count - TIMER0_LATCH_COUNT;
 			disable_intr();
 			outb(TIMER_MODE,
 			     TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
@@ -236,6 +256,8 @@ acquire_timer0(int rate, void (*function) __P((struct clockframe *frame)))
 
 	if (rate <= 0 || rate > TIMER0_MAX_FREQ)
 		return (-1);
+	if (strcmp(timecounter->name, "i8254") == 0)
+		return (-1);
 	switch (timer0_state) {
 
 	case RELEASED:
@@ -606,14 +628,14 @@ calibrate_clocks(void)
 	 * Read the cpu cycle counter.  The timing considerations are
 	 * similar to those for the i8254 clock.
 	 */
-	if (tsc_present) {
-		set_tsc_freq((u_int)rdtsc(), tot_count);
-		if (bootverbose)
+	if (tsc_present) 
+		tsc_freq = rdtsc();
+
+	if (bootverbose) {
+	        printf("i8254 clock: %u Hz\n", tot_count);
+		if (tsc_present)
 		        printf("TSC clock: %u Hz, ", tsc_freq);
 	}
-
-	if (bootverbose)
-	        printf("i8254 clock: %u Hz\n", tot_count);
 	return (tot_count);
 
 fail:
@@ -635,8 +657,6 @@ set_timer_freq(u_int freq, int intr_freq)
 	new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
 	if (new_timer0_max_count != timer0_max_count) {
 		timer0_max_count = new_timer0_max_count;
-		timer0_overflow_threshold = timer0_max_count -
-		    TIMER0_LATCH_COUNT;
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
 		outb(TIMER_CNTR0, timer0_max_count >> 8);
@@ -646,7 +666,7 @@ set_timer_freq(u_int freq, int intr_freq)
 }
 
 /*
- * Initialize 8253 timer 0 early so that it can be used in DELAY().
+ * Initialize 8254 timer 0 early so that it can be used in DELAY().
  * XXX initialization of other timers is unintentionally left blank.
  */
 void
@@ -700,6 +720,8 @@ startrtclock()
 	}
 
 	set_timer_freq(timer_freq, hz);
+	i8254_timecounter[0].frequency = timer_freq;
+	init_timecounter(i8254_timecounter);
 
 #ifndef CLK_USE_TSC_CALIBRATION
 	if (tsc_freq != 0) {
@@ -717,12 +739,16 @@ startrtclock()
 		 */
 		wrmsr(0x10, 0LL);	/* XXX */
 		DELAY(1000000);
-		set_tsc_freq((u_int)rdtsc(), timer_freq);
+		tsc_freq = rdtsc();
 #ifdef CLK_USE_TSC_CALIBRATION
 		if (bootverbose)
-			printf("TSC clock: %u Hz\n", tsc_freq);
+			printf("TSC clock: %u Hz (Method B)\n", tsc_freq);
 #endif
 	}
+	if (tsc_present && tsc_freq != 0) {
+		tsc_timecounter[0].frequency = tsc_freq;
+		init_timecounter(tsc_timecounter);
+	}
 }
 
 /*
@@ -736,11 +762,13 @@ inittodr(time_t base)
 	int		yd;
 	int		year, month;
 	int		y, m, s;
+	struct timespec ts;
 
 	if (base) {
 		s = splclock();
-		time.tv_sec  = base;
-		time.tv_usec = 0;
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
 		splx(s);
 	}
 
@@ -780,9 +808,15 @@ inittodr(time_t base)
 
 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
-	s = splclock();
-	time.tv_sec = sec;
-	splx(s);
+	y = time.tv_sec - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		s = splclock();
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
+		splx(s);
+	}
 	return;
 
 wrong_time:
@@ -929,12 +963,6 @@ cpu_initclocks()
 
 #endif /* APIC_IO */
 
-	/*
-	 * Finish setting up anti-jitter measures.
-	 */
-	if (tsc_freq != 0)
-		tsc_bias = rdtsc();
-
 	/* Initialize RTC. */
 	writertc(RTC_STATUSA, rtc_statusa);
 	writertc(RTC_STATUSB, RTCSB_24HR);
@@ -987,11 +1015,10 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 	freq = timer_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
 	if (error == 0 && req->newptr != NULL) {
-		if (timer0_state != 0)
+		if (timer0_state != RELEASED)
 			return (EBUSY);	/* too much trouble to handle */
 		set_timer_freq(freq, hz);
-		if (tsc_present)
-			set_tsc_freq(tsc_freq, timer_freq);
+		i8254_timecounter[0].frequency = freq;
 	}
 	return (error);
 }
@@ -999,28 +1026,6 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", "");
 
-static void
-set_tsc_freq(u_int tsc_count, u_int i8254_freq)
-{
-	u_int comultiplier, multiplier;
-	u_long ef;
-
-	if (tsc_count == 0) {
-		tsc_freq = tsc_count;
-		return;
-	}
-	comultiplier = ((unsigned long long)tsc_count
-			<< TSC_COMULTIPLIER_SHIFT) / i8254_freq;
-	multiplier = (1000000LL << TSC_MULTIPLIER_SHIFT) / tsc_count;
-	ef = read_eflags();
-	disable_intr();
-	tsc_freq = tsc_count;
-	tsc_comultiplier = comultiplier;
-	tsc_multiplier = multiplier;
-	CLOCK_UNLOCK();
-	write_eflags(ef);
-}
-
 static int
 sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 {
@@ -1031,10 +1036,52 @@ sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 		return (EOPNOTSUPP);
 	freq = tsc_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
-	if (error == 0 && req->newptr != NULL)
-		set_tsc_freq(freq, timer_freq);
+	if (error == 0 && req->newptr != NULL) {
+		tsc_freq = freq;
+		tsc_timecounter[0].frequency = tsc_freq;
+	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", "");
+
+static u_int64_t
+i8254_get_timecount(void)
+{
+	u_int32_t count;
+	u_long ef;
+	u_int high, low;
+
+	ef = read_eflags();
+	disable_intr();
+
+	/* Select timer0 and latch counter value. */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+
+	count = hardclock_max_count - ((high << 8) | low);
+	if (count < i8254_lastcount) {
+		i8254_ticked = 1;
+		i8254_offset += hardclock_max_count;
+	}
+
+	i8254_lastcount = count;
+	count += i8254_offset;
+	write_eflags(ef);
+	return (count);
+}
+
+static u_int64_t
+tsc_get_timecount(void)
+{
+	return ((u_int64_t)rdtsc());
+}
+
+static u_int32_t
+tsc_get_timedelta(struct timecounter *tc)
+{
+	return ((u_int64_t)rdtsc() - tc->offset_count);
+}
diff --git a/sys/amd64/include/clock.h b/sys/amd64/include/clock.h
index 6013414ede23..31b463221c38 100644
--- a/sys/amd64/include/clock.h
+++ b/sys/amd64/include/clock.h
@@ -3,16 +3,12 @@
  * Garrett Wollman, September 1994.
  * This file is in the public domain.
  *
- *	$Id: clock.h,v 1.30 1997/12/28 17:33:08 phk Exp $
+ *	$Id: clock.h,v 1.31 1998/02/01 22:45:23 bde Exp $
  */
 
 #ifndef _MACHINE_CLOCK_H_
 #define	_MACHINE_CLOCK_H_
 
-#define CPU_CLOCKUPDATE(otime, ntime)	cpu_clockupdate((otime), (ntime))
-
-#define CPU_THISTICKLEN(dflt) dflt
-
 #define	TSC_COMULTIPLIER_SHIFT	20
 #define	TSC_MULTIPLIER_SHIFT	32
 
@@ -27,12 +23,7 @@ extern int	disable_rtc_set;
 extern int	statclock_disable;
 extern u_int	timer_freq;
 extern int	timer0_max_count;
-extern u_int	timer0_overflow_threshold;
-extern u_int	timer0_prescaler_count;
-extern u_int	tsc_bias;
-extern u_int	tsc_comultiplier;
 extern u_int	tsc_freq;
-extern u_int	tsc_multiplier;
 extern int	wall_cmos_clock;
 
 /*
@@ -54,61 +45,6 @@ int	release_timer1 __P((void));
 #endif
 int	sysbeep __P((int pitch, int period));
 
-#ifdef CLOCK_HAIR
-
-#ifdef PC98
-#include <pc98/pc98/pc98.h>		/* XXX */
-#else
-#include <i386/isa/isa.h>		/* XXX */
-#endif
-#include <i386/isa/timerreg.h>		/* XXX */
-
-static __inline u_int
-clock_latency(void)
-{
-	u_char high, low;
-
-	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
-	low = inb(TIMER_CNTR0);
-	high = inb(TIMER_CNTR0);
-	return (timer0_prescaler_count + timer0_max_count
-		- ((high << 8) | low));
-}
-
-/*
- * When we update `time', we also update `tsc_bias' atomically (if we
- * are using the TSC).  `tsc_bias' is the best available approximation
- * to the value of the TSC (mod 2^32) at the time of the i8254
- * counter transition that caused the clock interrupt that caused the
- * update.  clock_latency() gives the time between the transition and
- * the update to within a few usec provided another such transition
- * hasn't occurred.  We don't bother checking for counter overflow as
- * in microtime(), since if it occurs then we're close to losing clock
- * interrupts.
- */
-static __inline void
-cpu_clockupdate(volatile struct timeval *otime, struct timeval *ntime)
-{
-	if (tsc_freq != 0) {
-		u_int tsc_count;	/* truncated */
-		u_int i8254_count;
-
-		disable_intr();
-		i8254_count = clock_latency();
-		tsc_count = rdtsc();
-		tsc_bias = tsc_count
-				- (u_int)
-				  (((unsigned long long)tsc_comultiplier
-				    * i8254_count)
-				   >> TSC_COMULTIPLIER_SHIFT);
-		*otime = *ntime;
-		enable_intr();
-	} else
-		*otime = *ntime;
-}
-
-#endif /* CLOCK_HAIR */
-
 #endif /* KERNEL */
 
 #endif /* !_MACHINE_CLOCK_H_ */
diff --git a/sys/amd64/isa/clock.c b/sys/amd64/isa/clock.c
index 2fb46cb67c92..948dfa272c5f 100644
--- a/sys/amd64/isa/clock.c
+++ b/sys/amd64/isa/clock.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- *	$Id: clock.c,v 1.109 1998/02/09 06:08:26 eivind Exp $
+ *	$Id: clock.c,v 1.110 1998/02/13 06:33:16 bde Exp $
  */
 
 /*
@@ -109,9 +109,7 @@
 /*
  * Maximum frequency that we are willing to allow for timer0.  Must be
  * low enough to guarantee that the timer interrupt handler returns
- * before the next timer interrupt.  Must result in a lower TIMER_DIV
- * value than TIMER0_LATCH_COUNT so that we don't have to worry about
- * underflow in the calculation of timer0_overflow_threshold.
+ * before the next timer interrupt.
  */
 #define	TIMER0_MAX_FREQ		20000
 
@@ -120,25 +118,21 @@ int	disable_rtc_set;	/* disable resettodr() if != 0 */
 u_int	idelayed;
 int	statclock_disable;
 u_int	stat_imask = SWI_CLOCK_MASK;
-#ifdef TIMER_FREQ
-u_int	timer_freq = TIMER_FREQ;
-#else
-u_int	timer_freq = 1193182;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
 #endif
+u_int	timer_freq = TIMER_FREQ;
 int	timer0_max_count;
-u_int	timer0_overflow_threshold;
-u_int	timer0_prescaler_count;
-u_int	tsc_bias;
-u_int	tsc_comultiplier;
 u_int	tsc_freq;
-u_int	tsc_multiplier;
-static u_int	tsc_present;
 int	wall_cmos_clock;	/* wall	CMOS clock assumed if != 0 */
 
 static	int	beeping = 0;
 static	u_int	clk_imask = HWI_MASK | SWI_MASK;
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 static	u_int	hardclock_max_count;
+static	u_int32_t i8254_lastcount;
+static	u_int32_t i8254_offset;
+static	int	i8254_ticked;
 /*
  * XXX new_function and timer_func should not handle clockframes, but
  * timer_func currently needs to hold hardclock to handle the
@@ -149,6 +143,7 @@ static	void	(*new_function) __P((struct clockframe *frame));
 static	u_int	new_rate;
 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
+static	u_int	timer0_prescaler_count;
 
 /* Values for timerX_state: */
 #define	RELEASED	0
@@ -159,13 +154,42 @@ static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
 static	u_char	timer0_state;
 static	u_char	timer2_state;
 static	void	(*timer_func) __P((struct clockframe *frame)) = hardclock;
+static	u_int	tsc_present;
 
-static	void	set_tsc_freq(u_int tsc_count, u_int i8254_freq);
+static	u_int64_t i8254_get_timecount __P((void));
 static	void	set_timer_freq(u_int freq, int intr_freq);
+static	u_int64_t tsc_get_timecount __P((void));
+static	u_int32_t tsc_get_timedelta __P((struct timecounter *tc));
+
+static struct timecounter tsc_timecounter[3] = {
+	tsc_get_timedelta,	/* get_timedelta */
+	tsc_get_timecount,	/* get_timecount */
+ 	~0,			/* counter_mask */
+	0,			/* frequency */
+	 "TSC"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, 
+	tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", "");
+
+static struct timecounter i8254_timecounter[3] = {
+	0,			/* get_timedelta */
+	i8254_get_timecount,	/* get_timecount */
+	(1ULL << 32) - 1,	/* counter_mask */
+	0,			/* frequency */
+	"i8254"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, 
+	i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
 
 static void
 clkintr(struct clockframe frame)
 {
+	if (!i8254_ticked)
+		i8254_offset += timer0_max_count;
+	else
+		i8254_ticked = 0;
 	timer_func(&frame);
 	switch (timer0_state) {
 
@@ -185,8 +209,6 @@ clkintr(struct clockframe frame)
 	case ACQUIRE_PENDING:
 		setdelayed();
 		timer0_max_count = TIMER_DIV(new_rate);
-		timer0_overflow_threshold =
-			timer0_max_count - TIMER0_LATCH_COUNT;
 		disable_intr();
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
@@ -210,8 +232,6 @@ clkintr(struct clockframe frame)
 			hardclock(&frame);
 			setdelayed();
 			timer0_max_count = hardclock_max_count;
-			timer0_overflow_threshold =
-				timer0_max_count - TIMER0_LATCH_COUNT;
 			disable_intr();
 			outb(TIMER_MODE,
 			     TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
@@ -236,6 +256,8 @@ acquire_timer0(int rate, void (*function) __P((struct clockframe *frame)))
 
 	if (rate <= 0 || rate > TIMER0_MAX_FREQ)
 		return (-1);
+	if (strcmp(timecounter->name, "i8254") == 0)
+		return (-1);
 	switch (timer0_state) {
 
 	case RELEASED:
@@ -606,14 +628,14 @@ calibrate_clocks(void)
 	 * Read the cpu cycle counter.  The timing considerations are
 	 * similar to those for the i8254 clock.
 	 */
-	if (tsc_present) {
-		set_tsc_freq((u_int)rdtsc(), tot_count);
-		if (bootverbose)
+	if (tsc_present) 
+		tsc_freq = rdtsc();
+
+	if (bootverbose) {
+	        printf("i8254 clock: %u Hz\n", tot_count);
+		if (tsc_present)
 		        printf("TSC clock: %u Hz, ", tsc_freq);
 	}
-
-	if (bootverbose)
-	        printf("i8254 clock: %u Hz\n", tot_count);
 	return (tot_count);
 
 fail:
@@ -635,8 +657,6 @@ set_timer_freq(u_int freq, int intr_freq)
 	new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
 	if (new_timer0_max_count != timer0_max_count) {
 		timer0_max_count = new_timer0_max_count;
-		timer0_overflow_threshold = timer0_max_count -
-		    TIMER0_LATCH_COUNT;
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
 		outb(TIMER_CNTR0, timer0_max_count >> 8);
@@ -646,7 +666,7 @@ set_timer_freq(u_int freq, int intr_freq)
 }
 
 /*
- * Initialize 8253 timer 0 early so that it can be used in DELAY().
+ * Initialize 8254 timer 0 early so that it can be used in DELAY().
  * XXX initialization of other timers is unintentionally left blank.
  */
 void
@@ -700,6 +720,8 @@ startrtclock()
 	}
 
 	set_timer_freq(timer_freq, hz);
+	i8254_timecounter[0].frequency = timer_freq;
+	init_timecounter(i8254_timecounter);
 
 #ifndef CLK_USE_TSC_CALIBRATION
 	if (tsc_freq != 0) {
@@ -717,12 +739,16 @@ startrtclock()
 		 */
 		wrmsr(0x10, 0LL);	/* XXX */
 		DELAY(1000000);
-		set_tsc_freq((u_int)rdtsc(), timer_freq);
+		tsc_freq = rdtsc();
 #ifdef CLK_USE_TSC_CALIBRATION
 		if (bootverbose)
-			printf("TSC clock: %u Hz\n", tsc_freq);
+			printf("TSC clock: %u Hz (Method B)\n", tsc_freq);
 #endif
 	}
+	if (tsc_present && tsc_freq != 0) {
+		tsc_timecounter[0].frequency = tsc_freq;
+		init_timecounter(tsc_timecounter);
+	}
 }
 
 /*
@@ -736,11 +762,13 @@ inittodr(time_t base)
 	int		yd;
 	int		year, month;
 	int		y, m, s;
+	struct timespec ts;
 
 	if (base) {
 		s = splclock();
-		time.tv_sec  = base;
-		time.tv_usec = 0;
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
 		splx(s);
 	}
 
@@ -780,9 +808,15 @@ inittodr(time_t base)
 
 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
-	s = splclock();
-	time.tv_sec = sec;
-	splx(s);
+	y = time.tv_sec - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		s = splclock();
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
+		splx(s);
+	}
 	return;
 
 wrong_time:
@@ -929,12 +963,6 @@ cpu_initclocks()
 
 #endif /* APIC_IO */
 
-	/*
-	 * Finish setting up anti-jitter measures.
-	 */
-	if (tsc_freq != 0)
-		tsc_bias = rdtsc();
-
 	/* Initialize RTC. */
 	writertc(RTC_STATUSA, rtc_statusa);
 	writertc(RTC_STATUSB, RTCSB_24HR);
@@ -987,11 +1015,10 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 	freq = timer_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
 	if (error == 0 && req->newptr != NULL) {
-		if (timer0_state != 0)
+		if (timer0_state != RELEASED)
 			return (EBUSY);	/* too much trouble to handle */
 		set_timer_freq(freq, hz);
-		if (tsc_present)
-			set_tsc_freq(tsc_freq, timer_freq);
+		i8254_timecounter[0].frequency = freq;
 	}
 	return (error);
 }
@@ -999,28 +1026,6 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", "");
 
-static void
-set_tsc_freq(u_int tsc_count, u_int i8254_freq)
-{
-	u_int comultiplier, multiplier;
-	u_long ef;
-
-	if (tsc_count == 0) {
-		tsc_freq = tsc_count;
-		return;
-	}
-	comultiplier = ((unsigned long long)tsc_count
-			<< TSC_COMULTIPLIER_SHIFT) / i8254_freq;
-	multiplier = (1000000LL << TSC_MULTIPLIER_SHIFT) / tsc_count;
-	ef = read_eflags();
-	disable_intr();
-	tsc_freq = tsc_count;
-	tsc_comultiplier = comultiplier;
-	tsc_multiplier = multiplier;
-	CLOCK_UNLOCK();
-	write_eflags(ef);
-}
-
 static int
 sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 {
@@ -1031,10 +1036,52 @@ sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 		return (EOPNOTSUPP);
 	freq = tsc_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
-	if (error == 0 && req->newptr != NULL)
-		set_tsc_freq(freq, timer_freq);
+	if (error == 0 && req->newptr != NULL) {
+		tsc_freq = freq;
+		tsc_timecounter[0].frequency = tsc_freq;
+	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", "");
+
+static u_int64_t
+i8254_get_timecount(void)
+{
+	u_int32_t count;
+	u_long ef;
+	u_int high, low;
+
+	ef = read_eflags();
+	disable_intr();
+
+	/* Select timer0 and latch counter value. */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+
+	count = hardclock_max_count - ((high << 8) | low);
+	if (count < i8254_lastcount) {
+		i8254_ticked = 1;
+		i8254_offset += hardclock_max_count;
+	}
+
+	i8254_lastcount = count;
+	count += i8254_offset;
+	write_eflags(ef);
+	return (count);
+}
+
+static u_int64_t
+tsc_get_timecount(void)
+{
+	return ((u_int64_t)rdtsc());
+}
+
+static u_int32_t
+tsc_get_timedelta(struct timecounter *tc)
+{
+	return ((u_int64_t)rdtsc() - tc->offset_count);
+}
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index 113af1c00592..118dfd68d475 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -1,7 +1,7 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
-#	$Id: files.i386,v 1.190 1998/02/17 11:32:33 sos Exp $
+#	$Id: files.i386,v 1.191 1998/02/18 13:43:42 msmith Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -53,7 +53,6 @@ i386/i386/initcpu.c		standard
 i386/i386/machdep.c		standard
 i386/i386/math_emulate.c	optional	math_emulate
 i386/i386/mem.c			standard
-i386/i386/microtime.s		standard
 i386/i386/mp_machdep.c		optional	smp
 i386/i386/mpapic.c		optional	smp
 i386/i386/mpboot.s		optional	smp
diff --git a/sys/i386/conf/files.i386 b/sys/i386/conf/files.i386
index 113af1c00592..118dfd68d475 100644
--- a/sys/i386/conf/files.i386
+++ b/sys/i386/conf/files.i386
@@ -1,7 +1,7 @@
 # This file tells config what files go into building a kernel,
 # files marked standard are always included.
 #
-#	$Id: files.i386,v 1.190 1998/02/17 11:32:33 sos Exp $
+#	$Id: files.i386,v 1.191 1998/02/18 13:43:42 msmith Exp $
 #
 # The long compile-with and dependency lines are required because of
 # limitations in config: backslash-newline doesn't work in strings, and
@@ -53,7 +53,6 @@ i386/i386/initcpu.c		standard
 i386/i386/machdep.c		standard
 i386/i386/math_emulate.c	optional	math_emulate
 i386/i386/mem.c			standard
-i386/i386/microtime.s		standard
 i386/i386/mp_machdep.c		optional	smp
 i386/i386/mpapic.c		optional	smp
 i386/i386/mpboot.s		optional	smp
diff --git a/sys/i386/i386/microtime.s b/sys/i386/i386/microtime.s
deleted file mode 100644
index 93b0f19c6cd3..000000000000
--- a/sys/i386/i386/microtime.s
+++ /dev/null
@@ -1,241 +0,0 @@
-/* -*- Fundamental -*- keep Emacs from f***ing up the formatting */
-/*
- * Copyright (c) 1993 The Regents of the University of California.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 3. All advertising materials mentioning features or use of this software
- *    must display the following acknowledgement:
- *      This product includes software developed by the University of
- *      California, Berkeley and its contributors.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: Steve McCanne's microtime code
- *	$Id: microtime.s,v 1.39 1997/12/28 17:32:59 phk Exp $
- */
-
-#include <machine/asmacros.h>
-
-#include <i386/isa/icu.h>
-#include <i386/isa/isa.h>
-#include <i386/isa/timerreg.h>
-
-#ifdef SMP
-#include <machine/smptests.h>		/** USE_CLOCKLOCK, REAL_MCPL */
-#endif
-
-ENTRY(microtime)
-
-	movl	_tsc_freq, %ecx
-	testl	%ecx, %ecx
-	je	i8254_microtime
-	pushfl
-	cli
-	.byte	0x0f, 0x31	/* RDTSC */
-	subl	_tsc_bias, %eax
-	mull	_tsc_multiplier
-	movl	%edx, %eax
-	addl	_time+4, %eax	/* usec += time.tv_sec */
-	movl	_time, %edx	/* sec = time.tv_sec */
-	popfl			/* restore interrupt mask */
-
-	cmpl	$1000000, %eax	/* usec valid? */
-	jb	1f
-	subl	$1000000, %eax	/* adjust usec */
-	incl	%edx		/* bump sec */
-1:
-	movl	4(%esp), %ecx	/* load timeval pointer arg */
-	movl	%edx, (%ecx)	/* tvp->tv_sec = sec */
-	movl	%eax, 4(%ecx)	/* tvp->tv_usec = usec */
-
-	ret
-
-	ALIGN_TEXT
-i8254_microtime:
-	movb	$TIMER_SEL0|TIMER_LATCH, %al	/* prepare to latch */
-
-	pushfl
-	cli			/* disable interrupts */
-#ifdef USE_CLOCKLOCK
-	pushl	%eax			/* s_lock destroys %eax, %ecx */
-	pushl	%ecx
-	pushl	$_clock_lock
- 	call	_s_lock
-	addl	$4, %esp
-	popl	%ecx
-	popl	%eax
-#endif /* USE_CLOCKLOCK */
-	outb	%al, $TIMER_MODE	/* latch timer 0's counter */
-	inb	$TIMER_CNTR0, %al	/* read counter value, LSB first */
-	movb	%al, %cl
-	inb	$TIMER_CNTR0, %al
-	movb	%al, %ch
-
-	/*
-	 * Now check for counter overflow.  This is tricky because the
-	 * timer chip doesn't let us atomically read the current counter
-	 * value and the output state (i.e., overflow state).  We have
-	 * to read the ICU interrupt request register (IRR) to see if the
-	 * overflow has occured.  Because we lack atomicity, we use
-	 * the (very accurate) heuristic that we only check for
-	 * overflow if the value read is close to the interrupt period.
-	 * E.g., if we just checked the IRR, we might read a non-overflowing
-	 * value close to 0, experience overflow, then read this overflow
-	 * from the IRR, and mistakenly add a correction to the "close
-	 * to zero" value.
-	 *
-	 * We compare the counter value to the prepared overflow threshold.
-	 * If the counter value is less than this, we assume the counter
-	 * didn't overflow between disabling timer interrupts and latching
-	 * the counter value above.  For example, we assume that interrupts
-	 * are enabled when we are called (or were disabled just a few
-	 * cycles before we are called and that the instructions before the
-	 * "cli" are fast) and that the "cli" and "outb" instructions take
-	 * less than 10 timer cycles to execute.  The last assumption is
-	 * very safe.
-	 *
-	 * Otherwise, the counter might have overflowed.  We check for this
-	 * condition by reading the interrupt request register out of the ICU.
-	 * If it overflowed, we add in one clock period.
-	 *
-	 * The heuristic is "very accurate" because it works 100% if we're
-	 * called with interrupts enabled.  Otherwise, it might not work.
-	 * Currently, only siointrts() calls us with interrupts disabled, so
-	 * the problem can be avoided at some cost to the general case.  The
-	 * costs are complications in callers to disable interrupts in
-	 * IO_ICU1 and extra reads of the IRR forced by a conservative
-	 * overflow threshold.
-	 *
-	 * In 2.0, we are called at splhigh() from mi_switch(), so we have
-	 * to allow for the overflow bit being in ipending instead of in
-	 * the IRR.  Our caller may have executed many instructions since
-	 * ipending was set, so the heuristic for the IRR is inappropriate
-	 * for ipending.  However, we don't need another heuristic, since
-	 * the "cli" suffices to lock ipending.
-	 */
-
-	movl	_timer0_max_count, %edx	/* prepare for 2 uses */
-
-#ifdef APIC_IO
-#ifdef REAL_MCPL			/* XXX do we need this??? */
-	pushl	%ecx			/* s_lock destroys %eax, %ecx */
-	CPL_LOCK			/* MP-safe, INTs disabled above */
-	popl	%ecx			/* restore %ecx */
-	movl	_ipending, %eax
-	movl	$0, _cpl_lock		/* s_unlock would destroy %eax */
-	testl	%eax, _mask8254		/* is soft timer interrupt pending? */
-#else
-	/** XXX FIXME: take our chances with a race, is this OK? */
-	movl	_ipending, %eax
-	testl	%eax, _mask8254		/* is soft timer interrupt pending? */
-#endif /* REAL_MCPL */
-#else
-	testb	$IRQ0, _ipending	/* is soft timer interrupt pending? */
-#endif /* APIC_IO */
-	jne	overflow
-
-	/* Do we have a possible overflow condition? */
-	cmpl	_timer0_overflow_threshold, %ecx
-	jbe	1f
-
-#ifdef APIC_IO
-	movl	lapic_irr1, %eax	/** XXX assumption: IRQ0-24 */
-	testl	%eax, _mask8254		/* is hard timer interrupt pending? */
-#else
-	inb	$IO_ICU1, %al		/* read IRR in ICU */
-	testb	$IRQ0, %al		/* is hard timer interrupt pending? */
-#endif /* APIC_IO */
-	je	1f
-overflow:
-	subl	%edx, %ecx	/* some intr pending, count timer down through 0 */
-1:
-
-	/*
-	 * Subtract counter value from max count since it is a count-down value.
-	 */
-	subl	%ecx, %edx
-
-	/* Adjust for partial ticks. */
-	addl	_timer0_prescaler_count, %edx
-
-	/*
-	 * To divide by 1.193200, we multiply by 27465 and shift right by 15.
-	 *
-	 * The multiplier was originally calculated to be
-	 *
-	 *	2^18 * 1000000 / 1193200 = 219698.
-	 *
-	 * The frequency is 1193200 to be compatible with rounding errors in
-	 * the calculation of the usual maximum count.  2^18 is the largest
-	 * power of 2 such that multiplying `i' by it doesn't overflow for i
-	 * in the range of interest ([0, 11932 + 5)).  We adjusted the
-	 * multiplier a little to minimise the average of
-	 *
-	 *	fabs(i / 1.1193200 - ((multiplier * i) >> 18))
-	 *
-	 * for i in the range and then removed powers of 2 to speed up the
-	 * multiplication and to avoid overflow for i outside the range
-	 * (i may be as high as 2^17 if the timer is programmed to its
-	 * maximum maximum count).  The absolute error is less than 1 for
-	 * all i in the range.
-	 */
-
-#if 0
-	imul	$27465, %edx				/* 25 cycles on a 486 */
-#else
-	leal	(%edx,%edx,2), %eax	/* a = 3	2 cycles on a 486   */
-	leal	(%edx,%eax,4), %eax	/* a = 13	2		    */
-	movl	%eax, %ecx		/* c = 13	1		    */
-	shl	$5, %eax		/* a = 416	2		    */
-	addl	%ecx, %eax		/* a = 429	1		    */
-	leal	(%edx,%eax,8), %eax	/* a = 3433	2		    */
-	leal	(%edx,%eax,8), %eax	/* a = 27465	2 (total 12 cycles) */
-#endif /* 0 */
-	shr	$15, %eax
-
-#ifdef USE_CLOCKLOCK
-	pushl	%eax		/* s_lock destroys %eax, %ecx */
-	pushl	%edx		/* during profiling, %edx is also destroyed */
-	pushl	$_clock_lock
- 	call	_s_unlock
-	addl	$4, %esp
-	popl	%edx
-	popl	%eax
-#endif /* USE_CLOCKLOCK */
-
-	addl	_time+4, %eax	/* usec += time.tv_sec */
-	movl	_time, %edx	/* sec = time.tv_sec */
-
-	popfl			/* restore interrupt mask */
-
-	cmpl	$1000000, %eax	/* usec valid? */
-	jb	1f
-	subl	$1000000, %eax	/* adjust usec */
-	incl	%edx		/* bump sec */
-1:
-	movl	4(%esp), %ecx	/* load timeval pointer arg */
-	movl	%edx, (%ecx)	/* tvp->tv_sec = sec */
-	movl	%eax, 4(%ecx)	/* tvp->tv_usec = usec */
-
-	ret
diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c
index 2fb46cb67c92..948dfa272c5f 100644
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- *	$Id: clock.c,v 1.109 1998/02/09 06:08:26 eivind Exp $
+ *	$Id: clock.c,v 1.110 1998/02/13 06:33:16 bde Exp $
  */
 
 /*
@@ -109,9 +109,7 @@
 /*
  * Maximum frequency that we are willing to allow for timer0.  Must be
  * low enough to guarantee that the timer interrupt handler returns
- * before the next timer interrupt.  Must result in a lower TIMER_DIV
- * value than TIMER0_LATCH_COUNT so that we don't have to worry about
- * underflow in the calculation of timer0_overflow_threshold.
+ * before the next timer interrupt.
  */
 #define	TIMER0_MAX_FREQ		20000
 
@@ -120,25 +118,21 @@ int	disable_rtc_set;	/* disable resettodr() if != 0 */
 u_int	idelayed;
 int	statclock_disable;
 u_int	stat_imask = SWI_CLOCK_MASK;
-#ifdef TIMER_FREQ
-u_int	timer_freq = TIMER_FREQ;
-#else
-u_int	timer_freq = 1193182;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
 #endif
+u_int	timer_freq = TIMER_FREQ;
 int	timer0_max_count;
-u_int	timer0_overflow_threshold;
-u_int	timer0_prescaler_count;
-u_int	tsc_bias;
-u_int	tsc_comultiplier;
 u_int	tsc_freq;
-u_int	tsc_multiplier;
-static u_int	tsc_present;
 int	wall_cmos_clock;	/* wall	CMOS clock assumed if != 0 */
 
 static	int	beeping = 0;
 static	u_int	clk_imask = HWI_MASK | SWI_MASK;
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 static	u_int	hardclock_max_count;
+static	u_int32_t i8254_lastcount;
+static	u_int32_t i8254_offset;
+static	int	i8254_ticked;
 /*
  * XXX new_function and timer_func should not handle clockframes, but
  * timer_func currently needs to hold hardclock to handle the
@@ -149,6 +143,7 @@ static	void	(*new_function) __P((struct clockframe *frame));
 static	u_int	new_rate;
 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
+static	u_int	timer0_prescaler_count;
 
 /* Values for timerX_state: */
 #define	RELEASED	0
@@ -159,13 +154,42 @@ static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
 static	u_char	timer0_state;
 static	u_char	timer2_state;
 static	void	(*timer_func) __P((struct clockframe *frame)) = hardclock;
+static	u_int	tsc_present;
 
-static	void	set_tsc_freq(u_int tsc_count, u_int i8254_freq);
+static	u_int64_t i8254_get_timecount __P((void));
 static	void	set_timer_freq(u_int freq, int intr_freq);
+static	u_int64_t tsc_get_timecount __P((void));
+static	u_int32_t tsc_get_timedelta __P((struct timecounter *tc));
+
+static struct timecounter tsc_timecounter[3] = {
+	tsc_get_timedelta,	/* get_timedelta */
+	tsc_get_timecount,	/* get_timecount */
+ 	~0,			/* counter_mask */
+	0,			/* frequency */
+	 "TSC"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, 
+	tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", "");
+
+static struct timecounter i8254_timecounter[3] = {
+	0,			/* get_timedelta */
+	i8254_get_timecount,	/* get_timecount */
+	(1ULL << 32) - 1,	/* counter_mask */
+	0,			/* frequency */
+	"i8254"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, 
+	i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
 
 static void
 clkintr(struct clockframe frame)
 {
+	if (!i8254_ticked)
+		i8254_offset += timer0_max_count;
+	else
+		i8254_ticked = 0;
 	timer_func(&frame);
 	switch (timer0_state) {
 
@@ -185,8 +209,6 @@ clkintr(struct clockframe frame)
 	case ACQUIRE_PENDING:
 		setdelayed();
 		timer0_max_count = TIMER_DIV(new_rate);
-		timer0_overflow_threshold =
-			timer0_max_count - TIMER0_LATCH_COUNT;
 		disable_intr();
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
@@ -210,8 +232,6 @@ clkintr(struct clockframe frame)
 			hardclock(&frame);
 			setdelayed();
 			timer0_max_count = hardclock_max_count;
-			timer0_overflow_threshold =
-				timer0_max_count - TIMER0_LATCH_COUNT;
 			disable_intr();
 			outb(TIMER_MODE,
 			     TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
@@ -236,6 +256,8 @@ acquire_timer0(int rate, void (*function) __P((struct clockframe *frame)))
 
 	if (rate <= 0 || rate > TIMER0_MAX_FREQ)
 		return (-1);
+	if (strcmp(timecounter->name, "i8254") == 0)
+		return (-1);
 	switch (timer0_state) {
 
 	case RELEASED:
@@ -606,14 +628,14 @@ calibrate_clocks(void)
 	 * Read the cpu cycle counter.  The timing considerations are
 	 * similar to those for the i8254 clock.
 	 */
-	if (tsc_present) {
-		set_tsc_freq((u_int)rdtsc(), tot_count);
-		if (bootverbose)
+	if (tsc_present) 
+		tsc_freq = rdtsc();
+
+	if (bootverbose) {
+	        printf("i8254 clock: %u Hz\n", tot_count);
+		if (tsc_present)
 		        printf("TSC clock: %u Hz, ", tsc_freq);
 	}
-
-	if (bootverbose)
-	        printf("i8254 clock: %u Hz\n", tot_count);
 	return (tot_count);
 
 fail:
@@ -635,8 +657,6 @@ set_timer_freq(u_int freq, int intr_freq)
 	new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
 	if (new_timer0_max_count != timer0_max_count) {
 		timer0_max_count = new_timer0_max_count;
-		timer0_overflow_threshold = timer0_max_count -
-		    TIMER0_LATCH_COUNT;
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
 		outb(TIMER_CNTR0, timer0_max_count >> 8);
@@ -646,7 +666,7 @@ set_timer_freq(u_int freq, int intr_freq)
 }
 
 /*
- * Initialize 8253 timer 0 early so that it can be used in DELAY().
+ * Initialize 8254 timer 0 early so that it can be used in DELAY().
  * XXX initialization of other timers is unintentionally left blank.
  */
 void
@@ -700,6 +720,8 @@ startrtclock()
 	}
 
 	set_timer_freq(timer_freq, hz);
+	i8254_timecounter[0].frequency = timer_freq;
+	init_timecounter(i8254_timecounter);
 
 #ifndef CLK_USE_TSC_CALIBRATION
 	if (tsc_freq != 0) {
@@ -717,12 +739,16 @@ startrtclock()
 		 */
 		wrmsr(0x10, 0LL);	/* XXX */
 		DELAY(1000000);
-		set_tsc_freq((u_int)rdtsc(), timer_freq);
+		tsc_freq = rdtsc();
 #ifdef CLK_USE_TSC_CALIBRATION
 		if (bootverbose)
-			printf("TSC clock: %u Hz\n", tsc_freq);
+			printf("TSC clock: %u Hz (Method B)\n", tsc_freq);
 #endif
 	}
+	if (tsc_present && tsc_freq != 0) {
+		tsc_timecounter[0].frequency = tsc_freq;
+		init_timecounter(tsc_timecounter);
+	}
 }
 
 /*
@@ -736,11 +762,13 @@ inittodr(time_t base)
 	int		yd;
 	int		year, month;
 	int		y, m, s;
+	struct timespec ts;
 
 	if (base) {
 		s = splclock();
-		time.tv_sec  = base;
-		time.tv_usec = 0;
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
 		splx(s);
 	}
 
@@ -780,9 +808,15 @@ inittodr(time_t base)
 
 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
-	s = splclock();
-	time.tv_sec = sec;
-	splx(s);
+	y = time.tv_sec - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		s = splclock();
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
+		splx(s);
+	}
 	return;
 
 wrong_time:
@@ -929,12 +963,6 @@ cpu_initclocks()
 
 #endif /* APIC_IO */
 
-	/*
-	 * Finish setting up anti-jitter measures.
-	 */
-	if (tsc_freq != 0)
-		tsc_bias = rdtsc();
-
 	/* Initialize RTC. */
 	writertc(RTC_STATUSA, rtc_statusa);
 	writertc(RTC_STATUSB, RTCSB_24HR);
@@ -987,11 +1015,10 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 	freq = timer_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
 	if (error == 0 && req->newptr != NULL) {
-		if (timer0_state != 0)
+		if (timer0_state != RELEASED)
 			return (EBUSY);	/* too much trouble to handle */
 		set_timer_freq(freq, hz);
-		if (tsc_present)
-			set_tsc_freq(tsc_freq, timer_freq);
+		i8254_timecounter[0].frequency = freq;
 	}
 	return (error);
 }
@@ -999,28 +1026,6 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", "");
 
-static void
-set_tsc_freq(u_int tsc_count, u_int i8254_freq)
-{
-	u_int comultiplier, multiplier;
-	u_long ef;
-
-	if (tsc_count == 0) {
-		tsc_freq = tsc_count;
-		return;
-	}
-	comultiplier = ((unsigned long long)tsc_count
-			<< TSC_COMULTIPLIER_SHIFT) / i8254_freq;
-	multiplier = (1000000LL << TSC_MULTIPLIER_SHIFT) / tsc_count;
-	ef = read_eflags();
-	disable_intr();
-	tsc_freq = tsc_count;
-	tsc_comultiplier = comultiplier;
-	tsc_multiplier = multiplier;
-	CLOCK_UNLOCK();
-	write_eflags(ef);
-}
-
 static int
 sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 {
@@ -1031,10 +1036,52 @@ sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 		return (EOPNOTSUPP);
 	freq = tsc_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
-	if (error == 0 && req->newptr != NULL)
-		set_tsc_freq(freq, timer_freq);
+	if (error == 0 && req->newptr != NULL) {
+		tsc_freq = freq;
+		tsc_timecounter[0].frequency = tsc_freq;
+	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", "");
+
+static u_int64_t
+i8254_get_timecount(void)
+{
+	u_int32_t count;
+	u_long ef;
+	u_int high, low;
+
+	ef = read_eflags();
+	disable_intr();
+
+	/* Select timer0 and latch counter value. */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+
+	count = hardclock_max_count - ((high << 8) | low);
+	if (count < i8254_lastcount) {
+		i8254_ticked = 1;
+		i8254_offset += hardclock_max_count;
+	}
+
+	i8254_lastcount = count;
+	count += i8254_offset;
+	write_eflags(ef);
+	return (count);
+}
+
+static u_int64_t
+tsc_get_timecount(void)
+{
+	return ((u_int64_t)rdtsc());
+}
+
+static u_int32_t
+tsc_get_timedelta(struct timecounter *tc)
+{
+	return ((u_int64_t)rdtsc() - tc->offset_count);
+}
diff --git a/sys/i386/include/clock.h b/sys/i386/include/clock.h
index 6013414ede23..31b463221c38 100644
--- a/sys/i386/include/clock.h
+++ b/sys/i386/include/clock.h
@@ -3,16 +3,12 @@
  * Garrett Wollman, September 1994.
  * This file is in the public domain.
  *
- *	$Id: clock.h,v 1.30 1997/12/28 17:33:08 phk Exp $
+ *	$Id: clock.h,v 1.31 1998/02/01 22:45:23 bde Exp $
  */
 
 #ifndef _MACHINE_CLOCK_H_
 #define	_MACHINE_CLOCK_H_
 
-#define CPU_CLOCKUPDATE(otime, ntime)	cpu_clockupdate((otime), (ntime))
-
-#define CPU_THISTICKLEN(dflt) dflt
-
 #define	TSC_COMULTIPLIER_SHIFT	20
 #define	TSC_MULTIPLIER_SHIFT	32
 
@@ -27,12 +23,7 @@ extern int	disable_rtc_set;
 extern int	statclock_disable;
 extern u_int	timer_freq;
 extern int	timer0_max_count;
-extern u_int	timer0_overflow_threshold;
-extern u_int	timer0_prescaler_count;
-extern u_int	tsc_bias;
-extern u_int	tsc_comultiplier;
 extern u_int	tsc_freq;
-extern u_int	tsc_multiplier;
 extern int	wall_cmos_clock;
 
 /*
@@ -54,61 +45,6 @@ int	release_timer1 __P((void));
 #endif
 int	sysbeep __P((int pitch, int period));
 
-#ifdef CLOCK_HAIR
-
-#ifdef PC98
-#include <pc98/pc98/pc98.h>		/* XXX */
-#else
-#include <i386/isa/isa.h>		/* XXX */
-#endif
-#include <i386/isa/timerreg.h>		/* XXX */
-
-static __inline u_int
-clock_latency(void)
-{
-	u_char high, low;
-
-	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
-	low = inb(TIMER_CNTR0);
-	high = inb(TIMER_CNTR0);
-	return (timer0_prescaler_count + timer0_max_count
-		- ((high << 8) | low));
-}
-
-/*
- * When we update `time', we also update `tsc_bias' atomically (if we
- * are using the TSC).  `tsc_bias' is the best available approximation
- * to the value of the TSC (mod 2^32) at the time of the i8254
- * counter transition that caused the clock interrupt that caused the
- * update.  clock_latency() gives the time between the transition and
- * the update to within a few usec provided another such transition
- * hasn't occurred.  We don't bother checking for counter overflow as
- * in microtime(), since if it occurs then we're close to losing clock
- * interrupts.
- */
-static __inline void
-cpu_clockupdate(volatile struct timeval *otime, struct timeval *ntime)
-{
-	if (tsc_freq != 0) {
-		u_int tsc_count;	/* truncated */
-		u_int i8254_count;
-
-		disable_intr();
-		i8254_count = clock_latency();
-		tsc_count = rdtsc();
-		tsc_bias = tsc_count
-				- (u_int)
-				  (((unsigned long long)tsc_comultiplier
-				    * i8254_count)
-				   >> TSC_COMULTIPLIER_SHIFT);
-		*otime = *ntime;
-		enable_intr();
-	} else
-		*otime = *ntime;
-}
-
-#endif /* CLOCK_HAIR */
-
 #endif /* KERNEL */
 
 #endif /* !_MACHINE_CLOCK_H_ */
diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c
index 2fb46cb67c92..948dfa272c5f 100644
--- a/sys/i386/isa/clock.c
+++ b/sys/i386/isa/clock.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- *	$Id: clock.c,v 1.109 1998/02/09 06:08:26 eivind Exp $
+ *	$Id: clock.c,v 1.110 1998/02/13 06:33:16 bde Exp $
  */
 
 /*
@@ -109,9 +109,7 @@
 /*
  * Maximum frequency that we are willing to allow for timer0.  Must be
  * low enough to guarantee that the timer interrupt handler returns
- * before the next timer interrupt.  Must result in a lower TIMER_DIV
- * value than TIMER0_LATCH_COUNT so that we don't have to worry about
- * underflow in the calculation of timer0_overflow_threshold.
+ * before the next timer interrupt.
  */
 #define	TIMER0_MAX_FREQ		20000
 
@@ -120,25 +118,21 @@ int	disable_rtc_set;	/* disable resettodr() if != 0 */
 u_int	idelayed;
 int	statclock_disable;
 u_int	stat_imask = SWI_CLOCK_MASK;
-#ifdef TIMER_FREQ
-u_int	timer_freq = TIMER_FREQ;
-#else
-u_int	timer_freq = 1193182;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
 #endif
+u_int	timer_freq = TIMER_FREQ;
 int	timer0_max_count;
-u_int	timer0_overflow_threshold;
-u_int	timer0_prescaler_count;
-u_int	tsc_bias;
-u_int	tsc_comultiplier;
 u_int	tsc_freq;
-u_int	tsc_multiplier;
-static u_int	tsc_present;
 int	wall_cmos_clock;	/* wall	CMOS clock assumed if != 0 */
 
 static	int	beeping = 0;
 static	u_int	clk_imask = HWI_MASK | SWI_MASK;
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 static	u_int	hardclock_max_count;
+static	u_int32_t i8254_lastcount;
+static	u_int32_t i8254_offset;
+static	int	i8254_ticked;
 /*
  * XXX new_function and timer_func should not handle clockframes, but
  * timer_func currently needs to hold hardclock to handle the
@@ -149,6 +143,7 @@ static	void	(*new_function) __P((struct clockframe *frame));
 static	u_int	new_rate;
 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
+static	u_int	timer0_prescaler_count;
 
 /* Values for timerX_state: */
 #define	RELEASED	0
@@ -159,13 +154,42 @@ static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
 static	u_char	timer0_state;
 static	u_char	timer2_state;
 static	void	(*timer_func) __P((struct clockframe *frame)) = hardclock;
+static	u_int	tsc_present;
 
-static	void	set_tsc_freq(u_int tsc_count, u_int i8254_freq);
+static	u_int64_t i8254_get_timecount __P((void));
 static	void	set_timer_freq(u_int freq, int intr_freq);
+static	u_int64_t tsc_get_timecount __P((void));
+static	u_int32_t tsc_get_timedelta __P((struct timecounter *tc));
+
+static struct timecounter tsc_timecounter[3] = {
+	tsc_get_timedelta,	/* get_timedelta */
+	tsc_get_timecount,	/* get_timecount */
+ 	~0,			/* counter_mask */
+	0,			/* frequency */
+	 "TSC"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, 
+	tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", "");
+
+static struct timecounter i8254_timecounter[3] = {
+	0,			/* get_timedelta */
+	i8254_get_timecount,	/* get_timecount */
+	(1ULL << 32) - 1,	/* counter_mask */
+	0,			/* frequency */
+	"i8254"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, 
+	i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
 
 static void
 clkintr(struct clockframe frame)
 {
+	if (!i8254_ticked)
+		i8254_offset += timer0_max_count;
+	else
+		i8254_ticked = 0;
 	timer_func(&frame);
 	switch (timer0_state) {
 
@@ -185,8 +209,6 @@ clkintr(struct clockframe frame)
 	case ACQUIRE_PENDING:
 		setdelayed();
 		timer0_max_count = TIMER_DIV(new_rate);
-		timer0_overflow_threshold =
-			timer0_max_count - TIMER0_LATCH_COUNT;
 		disable_intr();
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
@@ -210,8 +232,6 @@ clkintr(struct clockframe frame)
 			hardclock(&frame);
 			setdelayed();
 			timer0_max_count = hardclock_max_count;
-			timer0_overflow_threshold =
-				timer0_max_count - TIMER0_LATCH_COUNT;
 			disable_intr();
 			outb(TIMER_MODE,
 			     TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
@@ -236,6 +256,8 @@ acquire_timer0(int rate, void (*function) __P((struct clockframe *frame)))
 
 	if (rate <= 0 || rate > TIMER0_MAX_FREQ)
 		return (-1);
+	if (strcmp(timecounter->name, "i8254") == 0)
+		return (-1);
 	switch (timer0_state) {
 
 	case RELEASED:
@@ -606,14 +628,14 @@ calibrate_clocks(void)
 	 * Read the cpu cycle counter.  The timing considerations are
 	 * similar to those for the i8254 clock.
 	 */
-	if (tsc_present) {
-		set_tsc_freq((u_int)rdtsc(), tot_count);
-		if (bootverbose)
+	if (tsc_present) 
+		tsc_freq = rdtsc();
+
+	if (bootverbose) {
+	        printf("i8254 clock: %u Hz\n", tot_count);
+		if (tsc_present)
 		        printf("TSC clock: %u Hz, ", tsc_freq);
 	}
-
-	if (bootverbose)
-	        printf("i8254 clock: %u Hz\n", tot_count);
 	return (tot_count);
 
 fail:
@@ -635,8 +657,6 @@ set_timer_freq(u_int freq, int intr_freq)
 	new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
 	if (new_timer0_max_count != timer0_max_count) {
 		timer0_max_count = new_timer0_max_count;
-		timer0_overflow_threshold = timer0_max_count -
-		    TIMER0_LATCH_COUNT;
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
 		outb(TIMER_CNTR0, timer0_max_count >> 8);
@@ -646,7 +666,7 @@ set_timer_freq(u_int freq, int intr_freq)
 }
 
 /*
- * Initialize 8253 timer 0 early so that it can be used in DELAY().
+ * Initialize 8254 timer 0 early so that it can be used in DELAY().
  * XXX initialization of other timers is unintentionally left blank.
  */
 void
@@ -700,6 +720,8 @@ startrtclock()
 	}
 
 	set_timer_freq(timer_freq, hz);
+	i8254_timecounter[0].frequency = timer_freq;
+	init_timecounter(i8254_timecounter);
 
 #ifndef CLK_USE_TSC_CALIBRATION
 	if (tsc_freq != 0) {
@@ -717,12 +739,16 @@ startrtclock()
 		 */
 		wrmsr(0x10, 0LL);	/* XXX */
 		DELAY(1000000);
-		set_tsc_freq((u_int)rdtsc(), timer_freq);
+		tsc_freq = rdtsc();
 #ifdef CLK_USE_TSC_CALIBRATION
 		if (bootverbose)
-			printf("TSC clock: %u Hz\n", tsc_freq);
+			printf("TSC clock: %u Hz (Method B)\n", tsc_freq);
 #endif
 	}
+	if (tsc_present && tsc_freq != 0) {
+		tsc_timecounter[0].frequency = tsc_freq;
+		init_timecounter(tsc_timecounter);
+	}
 }
 
 /*
@@ -736,11 +762,13 @@ inittodr(time_t base)
 	int		yd;
 	int		year, month;
 	int		y, m, s;
+	struct timespec ts;
 
 	if (base) {
 		s = splclock();
-		time.tv_sec  = base;
-		time.tv_usec = 0;
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
 		splx(s);
 	}
 
@@ -780,9 +808,15 @@ inittodr(time_t base)
 
 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
-	s = splclock();
-	time.tv_sec = sec;
-	splx(s);
+	y = time.tv_sec - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		s = splclock();
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
+		splx(s);
+	}
 	return;
 
 wrong_time:
@@ -929,12 +963,6 @@ cpu_initclocks()
 
 #endif /* APIC_IO */
 
-	/*
-	 * Finish setting up anti-jitter measures.
-	 */
-	if (tsc_freq != 0)
-		tsc_bias = rdtsc();
-
 	/* Initialize RTC. */
 	writertc(RTC_STATUSA, rtc_statusa);
 	writertc(RTC_STATUSB, RTCSB_24HR);
@@ -987,11 +1015,10 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 	freq = timer_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
 	if (error == 0 && req->newptr != NULL) {
-		if (timer0_state != 0)
+		if (timer0_state != RELEASED)
 			return (EBUSY);	/* too much trouble to handle */
 		set_timer_freq(freq, hz);
-		if (tsc_present)
-			set_tsc_freq(tsc_freq, timer_freq);
+		i8254_timecounter[0].frequency = freq;
 	}
 	return (error);
 }
@@ -999,28 +1026,6 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", "");
 
-static void
-set_tsc_freq(u_int tsc_count, u_int i8254_freq)
-{
-	u_int comultiplier, multiplier;
-	u_long ef;
-
-	if (tsc_count == 0) {
-		tsc_freq = tsc_count;
-		return;
-	}
-	comultiplier = ((unsigned long long)tsc_count
-			<< TSC_COMULTIPLIER_SHIFT) / i8254_freq;
-	multiplier = (1000000LL << TSC_MULTIPLIER_SHIFT) / tsc_count;
-	ef = read_eflags();
-	disable_intr();
-	tsc_freq = tsc_count;
-	tsc_comultiplier = comultiplier;
-	tsc_multiplier = multiplier;
-	CLOCK_UNLOCK();
-	write_eflags(ef);
-}
-
 static int
 sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 {
@@ -1031,10 +1036,52 @@ sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 		return (EOPNOTSUPP);
 	freq = tsc_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
-	if (error == 0 && req->newptr != NULL)
-		set_tsc_freq(freq, timer_freq);
+	if (error == 0 && req->newptr != NULL) {
+		tsc_freq = freq;
+		tsc_timecounter[0].frequency = tsc_freq;
+	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", "");
+
+static u_int64_t
+i8254_get_timecount(void)
+{
+	u_int32_t count;
+	u_long ef;
+	u_int high, low;
+
+	ef = read_eflags();
+	disable_intr();
+
+	/* Select timer0 and latch counter value. */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+
+	count = hardclock_max_count - ((high << 8) | low);
+	if (count < i8254_lastcount) {
+		i8254_ticked = 1;
+		i8254_offset += hardclock_max_count;
+	}
+
+	i8254_lastcount = count;
+	count += i8254_offset;
+	write_eflags(ef);
+	return (count);
+}
+
+static u_int64_t
+tsc_get_timecount(void)
+{
+	return ((u_int64_t)rdtsc());
+}
+
+static u_int32_t
+tsc_get_timedelta(struct timecounter *tc)
+{
+	return ((u_int64_t)rdtsc() - tc->offset_count);
+}
diff --git a/sys/i386/isa/random_machdep.c b/sys/i386/isa/random_machdep.c
index f066949b08ba..7fd83646cac0 100644
--- a/sys/i386/isa/random_machdep.c
+++ b/sys/i386/isa/random_machdep.c
@@ -1,7 +1,7 @@
 /*
  * random_machdep.c -- A strong random number generator
  *
- * $Id: random_machdep.c,v 1.19 1997/10/28 15:58:13 bde Exp $
+ * $Id: random_machdep.c,v 1.20 1997/12/26 20:42:11 phk Exp $
  *
  * Version 0.95, last modified 18-Oct-95
  * 
@@ -190,21 +190,8 @@ add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state,
 	u_int		nbits;
 	u_int32_t	time;
 
-#if defined(I586_CPU) || defined(I686_CPU)
-	if (tsc_freq != 0) {
-		num ^= (u_int32_t) rdtsc() << 16;
-		r->entropy_count += 2;
-	} else {
-#endif
-		disable_intr();
-		outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
-		num ^= inb(TIMER_CNTR0) << 16;
-		num ^= inb(TIMER_CNTR0) << 24;
-		enable_intr();
-		r->entropy_count += 2;
-#if defined(I586_CPU) || defined(I686_CPU)
-	}
-#endif
+	num ^= timecounter->get_timecount() << 16;
+	r->entropy_count += 2;
 		
 	time = ticks;
 
diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c
index 2fb46cb67c92..948dfa272c5f 100644
--- a/sys/isa/atrtc.c
+++ b/sys/isa/atrtc.c
@@ -34,7 +34,7 @@
  * SUCH DAMAGE.
  *
  *	from: @(#)clock.c	7.2 (Berkeley) 5/12/91
- *	$Id: clock.c,v 1.109 1998/02/09 06:08:26 eivind Exp $
+ *	$Id: clock.c,v 1.110 1998/02/13 06:33:16 bde Exp $
  */
 
 /*
@@ -109,9 +109,7 @@
 /*
  * Maximum frequency that we are willing to allow for timer0.  Must be
  * low enough to guarantee that the timer interrupt handler returns
- * before the next timer interrupt.  Must result in a lower TIMER_DIV
- * value than TIMER0_LATCH_COUNT so that we don't have to worry about
- * underflow in the calculation of timer0_overflow_threshold.
+ * before the next timer interrupt.
  */
 #define	TIMER0_MAX_FREQ		20000
 
@@ -120,25 +118,21 @@ int	disable_rtc_set;	/* disable resettodr() if != 0 */
 u_int	idelayed;
 int	statclock_disable;
 u_int	stat_imask = SWI_CLOCK_MASK;
-#ifdef TIMER_FREQ
-u_int	timer_freq = TIMER_FREQ;
-#else
-u_int	timer_freq = 1193182;
+#ifndef TIMER_FREQ
+#define TIMER_FREQ   1193182
 #endif
+u_int	timer_freq = TIMER_FREQ;
 int	timer0_max_count;
-u_int	timer0_overflow_threshold;
-u_int	timer0_prescaler_count;
-u_int	tsc_bias;
-u_int	tsc_comultiplier;
 u_int	tsc_freq;
-u_int	tsc_multiplier;
-static u_int	tsc_present;
 int	wall_cmos_clock;	/* wall	CMOS clock assumed if != 0 */
 
 static	int	beeping = 0;
 static	u_int	clk_imask = HWI_MASK | SWI_MASK;
 static	const u_char daysinmonth[] = {31,28,31,30,31,30,31,31,30,31,30,31};
 static	u_int	hardclock_max_count;
+static	u_int32_t i8254_lastcount;
+static	u_int32_t i8254_offset;
+static	int	i8254_ticked;
 /*
  * XXX new_function and timer_func should not handle clockframes, but
  * timer_func currently needs to hold hardclock to handle the
@@ -149,6 +143,7 @@ static	void	(*new_function) __P((struct clockframe *frame));
 static	u_int	new_rate;
 static	u_char	rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF;
 static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
+static	u_int	timer0_prescaler_count;
 
 /* Values for timerX_state: */
 #define	RELEASED	0
@@ -159,13 +154,42 @@ static	u_char	rtc_statusb = RTCSB_24HR | RTCSB_PINTR;
 static	u_char	timer0_state;
 static	u_char	timer2_state;
 static	void	(*timer_func) __P((struct clockframe *frame)) = hardclock;
+static	u_int	tsc_present;
 
-static	void	set_tsc_freq(u_int tsc_count, u_int i8254_freq);
+static	u_int64_t i8254_get_timecount __P((void));
 static	void	set_timer_freq(u_int freq, int intr_freq);
+static	u_int64_t tsc_get_timecount __P((void));
+static	u_int32_t tsc_get_timedelta __P((struct timecounter *tc));
+
+static struct timecounter tsc_timecounter[3] = {
+	tsc_get_timedelta,	/* get_timedelta */
+	tsc_get_timecount,	/* get_timecount */
+ 	~0,			/* counter_mask */
+	0,			/* frequency */
+	 "TSC"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, tsc_timecounter, CTLFLAG_RD, 
+	tsc_timecounter, sizeof(tsc_timecounter), "S,timecounter", "");
+
+static struct timecounter i8254_timecounter[3] = {
+	0,			/* get_timedelta */
+	i8254_get_timecount,	/* get_timecount */
+	(1ULL << 32) - 1,	/* counter_mask */
+	0,			/* frequency */
+	"i8254"			/* name */
+};
+
+SYSCTL_OPAQUE(_debug, OID_AUTO, i8254_timecounter, CTLFLAG_RD, 
+	i8254_timecounter, sizeof(i8254_timecounter), "S,timecounter", "");
 
 static void
 clkintr(struct clockframe frame)
 {
+	if (!i8254_ticked)
+		i8254_offset += timer0_max_count;
+	else
+		i8254_ticked = 0;
 	timer_func(&frame);
 	switch (timer0_state) {
 
@@ -185,8 +209,6 @@ clkintr(struct clockframe frame)
 	case ACQUIRE_PENDING:
 		setdelayed();
 		timer0_max_count = TIMER_DIV(new_rate);
-		timer0_overflow_threshold =
-			timer0_max_count - TIMER0_LATCH_COUNT;
 		disable_intr();
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
@@ -210,8 +232,6 @@ clkintr(struct clockframe frame)
 			hardclock(&frame);
 			setdelayed();
 			timer0_max_count = hardclock_max_count;
-			timer0_overflow_threshold =
-				timer0_max_count - TIMER0_LATCH_COUNT;
 			disable_intr();
 			outb(TIMER_MODE,
 			     TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
@@ -236,6 +256,8 @@ acquire_timer0(int rate, void (*function) __P((struct clockframe *frame)))
 
 	if (rate <= 0 || rate > TIMER0_MAX_FREQ)
 		return (-1);
+	if (strcmp(timecounter->name, "i8254") == 0)
+		return (-1);
 	switch (timer0_state) {
 
 	case RELEASED:
@@ -606,14 +628,14 @@ calibrate_clocks(void)
 	 * Read the cpu cycle counter.  The timing considerations are
 	 * similar to those for the i8254 clock.
 	 */
-	if (tsc_present) {
-		set_tsc_freq((u_int)rdtsc(), tot_count);
-		if (bootverbose)
+	if (tsc_present) 
+		tsc_freq = rdtsc();
+
+	if (bootverbose) {
+	        printf("i8254 clock: %u Hz\n", tot_count);
+		if (tsc_present)
 		        printf("TSC clock: %u Hz, ", tsc_freq);
 	}
-
-	if (bootverbose)
-	        printf("i8254 clock: %u Hz\n", tot_count);
 	return (tot_count);
 
 fail:
@@ -635,8 +657,6 @@ set_timer_freq(u_int freq, int intr_freq)
 	new_timer0_max_count = hardclock_max_count = TIMER_DIV(intr_freq);
 	if (new_timer0_max_count != timer0_max_count) {
 		timer0_max_count = new_timer0_max_count;
-		timer0_overflow_threshold = timer0_max_count -
-		    TIMER0_LATCH_COUNT;
 		outb(TIMER_MODE, TIMER_SEL0 | TIMER_RATEGEN | TIMER_16BIT);
 		outb(TIMER_CNTR0, timer0_max_count & 0xff);
 		outb(TIMER_CNTR0, timer0_max_count >> 8);
@@ -646,7 +666,7 @@ set_timer_freq(u_int freq, int intr_freq)
 }
 
 /*
- * Initialize 8253 timer 0 early so that it can be used in DELAY().
+ * Initialize 8254 timer 0 early so that it can be used in DELAY().
  * XXX initialization of other timers is unintentionally left blank.
  */
 void
@@ -700,6 +720,8 @@ startrtclock()
 	}
 
 	set_timer_freq(timer_freq, hz);
+	i8254_timecounter[0].frequency = timer_freq;
+	init_timecounter(i8254_timecounter);
 
 #ifndef CLK_USE_TSC_CALIBRATION
 	if (tsc_freq != 0) {
@@ -717,12 +739,16 @@ startrtclock()
 		 */
 		wrmsr(0x10, 0LL);	/* XXX */
 		DELAY(1000000);
-		set_tsc_freq((u_int)rdtsc(), timer_freq);
+		tsc_freq = rdtsc();
 #ifdef CLK_USE_TSC_CALIBRATION
 		if (bootverbose)
-			printf("TSC clock: %u Hz\n", tsc_freq);
+			printf("TSC clock: %u Hz (Method B)\n", tsc_freq);
 #endif
 	}
+	if (tsc_present && tsc_freq != 0) {
+		tsc_timecounter[0].frequency = tsc_freq;
+		init_timecounter(tsc_timecounter);
+	}
 }
 
 /*
@@ -736,11 +762,13 @@ inittodr(time_t base)
 	int		yd;
 	int		year, month;
 	int		y, m, s;
+	struct timespec ts;
 
 	if (base) {
 		s = splclock();
-		time.tv_sec  = base;
-		time.tv_usec = 0;
+		ts.tv_sec = base;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
 		splx(s);
 	}
 
@@ -780,9 +808,15 @@ inittodr(time_t base)
 
 	sec += tz.tz_minuteswest * 60 + (wall_cmos_clock ? adjkerntz : 0);
 
-	s = splclock();
-	time.tv_sec = sec;
-	splx(s);
+	y = time.tv_sec - sec;
+	if (y <= -2 || y >= 2) {
+		/* badly off, adjust it */
+		s = splclock();
+		ts.tv_sec = sec;
+		ts.tv_nsec = 0;
+		set_timecounter(&ts);
+		splx(s);
+	}
 	return;
 
 wrong_time:
@@ -929,12 +963,6 @@ cpu_initclocks()
 
 #endif /* APIC_IO */
 
-	/*
-	 * Finish setting up anti-jitter measures.
-	 */
-	if (tsc_freq != 0)
-		tsc_bias = rdtsc();
-
 	/* Initialize RTC. */
 	writertc(RTC_STATUSA, rtc_statusa);
 	writertc(RTC_STATUSB, RTCSB_24HR);
@@ -987,11 +1015,10 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 	freq = timer_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
 	if (error == 0 && req->newptr != NULL) {
-		if (timer0_state != 0)
+		if (timer0_state != RELEASED)
 			return (EBUSY);	/* too much trouble to handle */
 		set_timer_freq(freq, hz);
-		if (tsc_present)
-			set_tsc_freq(tsc_freq, timer_freq);
+		i8254_timecounter[0].frequency = freq;
 	}
 	return (error);
 }
@@ -999,28 +1026,6 @@ sysctl_machdep_i8254_freq SYSCTL_HANDLER_ARGS
 SYSCTL_PROC(_machdep, OID_AUTO, i8254_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_i8254_freq, "I", "");
 
-static void
-set_tsc_freq(u_int tsc_count, u_int i8254_freq)
-{
-	u_int comultiplier, multiplier;
-	u_long ef;
-
-	if (tsc_count == 0) {
-		tsc_freq = tsc_count;
-		return;
-	}
-	comultiplier = ((unsigned long long)tsc_count
-			<< TSC_COMULTIPLIER_SHIFT) / i8254_freq;
-	multiplier = (1000000LL << TSC_MULTIPLIER_SHIFT) / tsc_count;
-	ef = read_eflags();
-	disable_intr();
-	tsc_freq = tsc_count;
-	tsc_comultiplier = comultiplier;
-	tsc_multiplier = multiplier;
-	CLOCK_UNLOCK();
-	write_eflags(ef);
-}
-
 static int
 sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 {
@@ -1031,10 +1036,52 @@ sysctl_machdep_tsc_freq SYSCTL_HANDLER_ARGS
 		return (EOPNOTSUPP);
 	freq = tsc_freq;
 	error = sysctl_handle_opaque(oidp, &freq, sizeof freq, req);
-	if (error == 0 && req->newptr != NULL)
-		set_tsc_freq(freq, timer_freq);
+	if (error == 0 && req->newptr != NULL) {
+		tsc_freq = freq;
+		tsc_timecounter[0].frequency = tsc_freq;
+	}
 	return (error);
 }
 
 SYSCTL_PROC(_machdep, OID_AUTO, tsc_freq, CTLTYPE_INT | CTLFLAG_RW,
 	    0, sizeof(u_int), sysctl_machdep_tsc_freq, "I", "");
+
+static u_int64_t
+i8254_get_timecount(void)
+{
+	u_int32_t count;
+	u_long ef;
+	u_int high, low;
+
+	ef = read_eflags();
+	disable_intr();
+
+	/* Select timer0 and latch counter value. */
+	outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
+
+	low = inb(TIMER_CNTR0);
+	high = inb(TIMER_CNTR0);
+
+	count = hardclock_max_count - ((high << 8) | low);
+	if (count < i8254_lastcount) {
+		i8254_ticked = 1;
+		i8254_offset += hardclock_max_count;
+	}
+
+	i8254_lastcount = count;
+	count += i8254_offset;
+	write_eflags(ef);
+	return (count);
+}
+
+static u_int64_t
+tsc_get_timecount(void)
+{
+	return ((u_int64_t)rdtsc());
+}
+
+static u_int32_t
+tsc_get_timedelta(struct timecounter *tc)
+{
+	return ((u_int64_t)rdtsc() - tc->offset_count);
+}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 20b700b32d3e..30bb775ecb7d 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
@@ -36,7 +37,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
- * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $
+ * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $
  */
 
 #include <sys/param.h>
@@ -55,7 +56,6 @@
 #include <sys/sysctl.h>
 
 #include <machine/cpu.h>
-#define CLOCK_HAIR		/* XXX */
 #include <machine/clock.h>
 #include <machine/limits.h>
 
@@ -70,6 +70,9 @@
 static void initclocks __P((void *dummy));
 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
 
+static void tco_forward __P((void));
+static void tco_setscales __P((struct timecounter *tc));
+
 /* Some of these don't belong here, but it's easiest to concentrate them. */
 #if defined(SMP) && defined(BETTER_CLOCK)
 long cp_time[CPUSTATES];
@@ -91,55 +94,43 @@ long tk_nin;
 long tk_nout;
 long tk_rawcc;
 
+struct timecounter *timecounter;
+
 /*
  * Clock handling routines.
  *
- * This code is written to operate with two timers that run independently of
- * each other.  The main clock, running hz times per second, is used to keep
- * track of real time.  The second timer handles kernel and user profiling,
- * and does resource use estimation.  If the second timer is programmable,
- * it is randomized to avoid aliasing between the two clocks.  For example,
- * the randomization prevents an adversary from always giving up the cpu
+ * This code is written to operate with two timers that run independently
+ * of each other.
+ *
+ * The main clock, running hz times per second, is used to trigger
+ * interval timers, timeouts and rescheduling as needed.
+ *
+ * The second timer handles kernel and user profiling, and does resource
+ * use estimation.  If the second timer is programmable, it is randomized
+ * to avoid aliasing between the two clocks.  For example, the
+ * randomization prevents an adversary from always giving up the cpu
  * just before its quantum expires.  Otherwise, it would never accumulate
  * cpu ticks.  The mean frequency of the second timer is stathz.
- *
- * If no second timer exists, stathz will be zero; in this case we drive
- * profiling and statistics off the main clock.  This WILL NOT be accurate;
- * do not do it unless absolutely necessary.
- *
+ * If no second timer exists, stathz will be zero; in this case we
+ * drive profiling and statistics off the main clock.  This WILL NOT
+ * be accurate; do not do it unless absolutely necessary.
  * The statistics clock may (or may not) be run at a higher rate while
- * profiling.  This profile clock runs at profhz.  We require that profhz
- * be an integral multiple of stathz.
+ * profiling.  This profile clock runs at profhz.  We require that
+ * profhz be an integral multiple of stathz.  If the statistics clock
+ * is running fast, it must be divided by the ratio profhz/stathz for
+ * statistics.  (For profiling, every tick counts.)
  *
- * If the statistics clock is running fast, it must be divided by the ratio
- * profhz/stathz for statistics.  (For profiling, every tick counts.)
+ * Time-of-day is maintained using a "timecounter", which may or may
+ * not be related to the hardware generating the above mentioned
+ * interrupts.
  */
 
-/*
- * TODO:
- *	allocate more timeout table slots when table overflows.
- */
-
-/*
- * Bump a timeval by a small number of usec's.
- */
-#define BUMPTIME(t, usec) { \
-	register volatile struct timeval *tp = (t); \
-	register long us; \
- \
-	tp->tv_usec = us = tp->tv_usec + (usec); \
-	if (us >= 1000000) { \
-		tp->tv_usec = us - 1000000; \
-		tp->tv_sec++; \
-	} \
-}
-
 int	stathz;
 int	profhz;
 static int profprocs;
 int	ticks;
 static int psdiv, pscnt;		/* prof => stat divider */
-int psratio;				/* ratio: prof / stat */
+int	psratio;			/* ratio: prof / stat */
 
 volatile struct	timeval time;
 volatile struct	timeval mono_time;
@@ -178,9 +169,6 @@ hardclock(frame)
 	register struct clockframe *frame;
 {
 	register struct proc *p;
-	int time_update;
-	struct timeval newtime = time;
-	long ltemp;
 
 	p = curproc;
 	if (p) {
@@ -208,56 +196,10 @@ hardclock(frame)
 	if (stathz == 0)
 		statclock(frame);
 
-	/*
-	 * Increment the time-of-day.
-	 */
+	tco_forward();
+
 	ticks++;
 
-	if (timedelta == 0) {
-		time_update = CPU_THISTICKLEN(tick);
-	} else {
-		time_update = CPU_THISTICKLEN(tick) + tickdelta;
-		timedelta -= tickdelta;
-	}
-	BUMPTIME(&mono_time, time_update);
-
-	/*
-	 * Compute the phase adjustment. If the low-order bits
-	 * (time_phase) of the update overflow, bump the high-order bits
-	 * (time_update).
-	 */
-	time_phase += time_adj;
-	if (time_phase <= -FINEUSEC) {
-		ltemp = -time_phase >> SHIFT_SCALE;
-		time_phase += ltemp << SHIFT_SCALE;
-		time_update -= ltemp;
-	}
-	else if (time_phase >= FINEUSEC) {
-		ltemp = time_phase >> SHIFT_SCALE;
-		time_phase -= ltemp << SHIFT_SCALE;
-		time_update += ltemp;
-	}
-
-	newtime.tv_usec += time_update;
-	/*
-	 * On rollover of the second the phase adjustment to be used for
-	 * the next second is calculated. Also, the maximum error is
-	 * increased by the tolerance. If the PPS frequency discipline
-	 * code is present, the phase is increased to compensate for the
-	 * CPU clock oscillator frequency error.
-	 *
-	 * On a 32-bit machine and given parameters in the timex.h
-	 * header file, the maximum phase adjustment is +-512 ms and
-	 * maximum frequency offset is a tad less than) +-512 ppm. On a
-	 * 64-bit machine, you shouldn't need to ask.
-	 */
-	if (newtime.tv_usec >= 1000000) {
-		newtime.tv_usec -= 1000000;
-		newtime.tv_sec++;
-		ntp_update_second(&newtime.tv_sec);
-	}
-	CPU_CLOCKUPDATE(&time, &newtime);
-
 	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL)
 		setsoftclock();
 }
@@ -315,6 +257,10 @@ hzto(tv)
 	}
 	if (sec < 0) {
 #ifdef DIAGNOSTIC
+		if (sec == -1 && usec > 0) {
+			sec++;
+			usec -= 1000000;
+		}
 		printf("hzto: negative time difference %ld sec %ld usec\n",
 		       sec, usec);
 #endif
@@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
 	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
 
 void
-nanotime(ts)
-	struct timespec *ts;
+microtime(struct timeval *tv)
 {
-	struct timeval tv;
-	microtime(&tv);
-	ts->tv_sec = tv.tv_sec;
-	ts->tv_nsec = tv.tv_usec * 1000;
+	struct timecounter *tc;
+
+	tc = (struct timecounter *)timecounter;
+	tv->tv_sec = tc->offset_sec;
+	tv->tv_usec = tc->offset_micro;
+	tv->tv_usec += 
+	    ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
+	if (tv->tv_usec >= 1000000) {
+		tv->tv_usec -= 1000000;
+		tv->tv_sec++;
+	}
 }
+
+void
+nanotime(struct timespec *tv)
+{
+	u_int32_t count;
+	u_int64_t delta;
+	struct timecounter *tc;
+
+	tc = (struct timecounter *)timecounter;
+	tv->tv_sec = tc->offset_sec;
+	count = tc->get_timedelta(tc);
+	delta = tc->offset_nano;
+	delta += ((u_int64_t)count * tc->scale_nano_f);
+	delta += ((u_int64_t)count * tc->scale_nano_i) << 32;
+	delta >>= 32;
+	if (delta >= 1000000000) {
+		delta -= 1000000000;
+		tv->tv_sec++;
+	}
+	tv->tv_nsec = delta;
+}
+
+static void
+tco_setscales(struct timecounter *tc)
+{
+	u_int64_t scale;
+
+	scale = 1000000000LL << 32;
+	if (tc->adjustment > 0)
+		scale += (tc->adjustment * 1000LL) << 10;
+	else
+		scale -= (-tc->adjustment * 1000LL) << 10;
+	/* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */
+	scale /= tc->frequency;
+	tc->scale_micro = scale / 1000;
+	tc->scale_nano_f = scale & 0xffffffff;
+	tc->scale_nano_i = scale >> 32;
+}
+
+static u_int
+delta_timecounter(struct timecounter *tc)
+{
+	return((tc->get_timecount() - tc->offset_count) & tc->counter_mask);
+}
+
+void
+init_timecounter(struct timecounter *tc)
+{
+	struct timespec ts0, ts1;
+	int i;
+
+	if (!tc->get_timedelta) 
+		tc->get_timedelta = delta_timecounter;
+	tc->adjustment = 0;
+	tco_setscales(tc);
+	tc->offset_count = tc->get_timecount();
+	tc[0].tweak = &tc[0];
+	tc[2] = tc[1] = tc[0];
+	tc[1].other = &tc[2];
+	tc[2].other = &tc[1];
+	if (!timecounter)
+		timecounter = &tc[2];
+	tc = &tc[1];
+
+	/* 
+	 * Figure out the cost of calling this timecounter.
+	 * XXX: The 1:15 ratio is a guess at reality.
+	 */
+	nanotime(&ts0);
+	for (i = 0; i < 16; i ++) 
+		tc->get_timecount();
+	for (i = 0; i < 240; i ++)
+		tc->get_timedelta(tc);
+	nanotime(&ts1);
+	ts1.tv_sec -= ts0.tv_sec;
+	tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
+	tc->cost >>= 8;
+	printf("Timecounter \"%s\"  frequency %lu Hz  cost %u ns\n", 
+	    tc->name, tc->frequency, tc->cost);
+
+	/* XXX: For now always start using the counter. */
+	tc->offset_count = tc->get_timecount();
+	nanotime(&ts1);
+	tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32;
+	tc->offset_micro = ts1.tv_nsec / 1000;
+	tc->offset_sec = ts1.tv_sec;
+	timecounter = tc;
+}
+
+void
+set_timecounter(struct timespec *ts)
+{
+	struct timecounter *tc, *tco;
+	int s;
+
+	s = splclock();
+	tc=timecounter->other;
+	tco = tc->other;
+	*tc = *timecounter;
+	tc->other = tco;
+	tc->offset_sec = ts->tv_sec;
+	tc->offset_nano = (u_int64_t)ts->tv_nsec << 32;
+	tc->offset_micro =  ts->tv_nsec / 1000;
+	tc->offset_count = tc->get_timecount();
+	time.tv_sec = tc->offset_sec;
+	time.tv_usec = tc->offset_micro;
+	timecounter = tc;
+	splx(s);
+}
+
+static struct timecounter *
+sync_other_counter(int flag)
+{
+	struct timecounter *tc, *tco;
+	u_int32_t delta;
+
+	tc = timecounter->other;
+	tco = tc->other;
+	*tc = *timecounter;
+	tc->other = tco;
+	delta = tc->get_timedelta(tc);
+	tc->offset_count += delta;
+	tc->offset_count &= tc->counter_mask;
+	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f;
+	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32;
+	if (flag)
+		return (tc);
+	if (tc->offset_nano > 1000000000ULL << 32) {
+		tc->offset_sec++;
+		tc->offset_nano -= 1000000000ULL << 32;
+	}
+	tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+	return (tc);
+}
+
+static void
+tco_forward(void)
+{
+	struct timecounter *tc;
+	u_int32_t time_update;
+
+	tc = sync_other_counter(1);
+	time_update = 0;
+
+	if (timedelta) {
+		time_update += tickdelta;
+		timedelta -= tickdelta;
+	}
+	mono_time.tv_usec += time_update + tick;
+	if (mono_time.tv_usec >= 1000000) {
+		mono_time.tv_usec -= 1000000;
+		mono_time.tv_sec++;
+	}
+	time_update *= 1000;
+	tc->offset_nano += (u_int64_t)time_update << 32;
+	if (tc->offset_nano >= 1000000000ULL << 32) {
+		tc->offset_nano -= 1000000000ULL << 32;
+		tc->offset_sec++;
+		tc->frequency = tc->tweak->frequency;
+		tc->adjustment = tc->tweak->adjustment;	/* XXX remove this ? */
+		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
+		tco_setscales(tc);
+	}
+	/*
+	 * Find the usec from the nsec.  This is just as fast (one 
+	 * multiplication) and prevents skew between the two due
+	 * to rounding errors. (2^32/1000 = 4294967.296)
+	 */
+	tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+	time.tv_usec = tc->offset_micro;
+	time.tv_sec = tc->offset_sec;
+	timecounter = tc;
+}
+
+static int
+sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
+{
+	return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency,
+	    sizeof(timecounter->tweak->frequency), req));
+}
+
+static int
+sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
+{
+	return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment,
+	    sizeof(timecounter->tweak->adjustment), req));
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW,
+	0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW,
+	0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", "");
diff --git a/sys/kern/kern_ntptime.c b/sys/kern/kern_ntptime.c
index 102e6500dcb5..636a5ce28b73 100644
--- a/sys/kern/kern_ntptime.c
+++ b/sys/kern/kern_ntptime.c
@@ -99,6 +99,7 @@ static long time_tolerance = MAXFREQ;	/* frequency tolerance (scaled ppm) */
 static long time_precision = 1;		/* clock precision (us) */
 static long time_maxerror = MAXPHASE;	/* maximum error (us) */
 static long time_esterror = MAXPHASE;	/* estimated error (us) */
+static int time_daemon = 0;		/* No timedaemon active */
 
 /*
  * The following variables establish the state of the PLL/FLL and the
@@ -285,11 +286,28 @@ hardupdate(offset)
 		time_freq = -time_tolerance;
 }
 
+/*
+ * On rollover of the second the phase adjustment to be used for
+ * the next second is calculated. Also, the maximum error is
+ * increased by the tolerance. If the PPS frequency discipline
+ * code is present, the phase is increased to compensate for the
+ * CPU clock oscillator frequency error.
+ *
+ * On a 32-bit machine and given parameters in the timex.h
+ * header file, the maximum phase adjustment is +-512 ms and
+ * maximum frequency offset is a tad less than) +-512 ppm. On a
+ * 64-bit machine, you shouldn't need to ask.
+ */
 void
-ntp_update_second(long *newsec)
+ntp_update_second(struct timecounter *tc)
 {
+	u_int32_t *newsec;
 	long ltemp;
 
+	if (!time_daemon)
+		return;
+
+	newsec = &tc->offset_sec;
 	time_maxerror += time_tolerance >> SHIFT_USEC;
 
 	/*
@@ -308,7 +326,7 @@ ntp_update_second(long *newsec)
 		if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 			ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 		time_offset += ltemp;
-		time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+		time_adj = -ltemp << (SHIFT_SCALE - SHIFT_UPDATE);
 	} else {
 		ltemp = time_offset;
 		if (!(time_status & STA_FLL))
@@ -316,7 +334,7 @@ ntp_update_second(long *newsec)
 		if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
 			ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
 		time_offset -= ltemp;
-		time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+		time_adj = ltemp << (SHIFT_SCALE - SHIFT_UPDATE);
 	}
 
 	/*
@@ -339,29 +357,12 @@ ntp_update_second(long *newsec)
 	ltemp = time_freq;
 #endif /* PPS_SYNC */
 	if (ltemp < 0)
-		time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+		time_adj -= -ltemp << (SHIFT_SCALE - SHIFT_USEC);
 	else
-		time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
-
-#if SHIFT_HZ == 7
-	/*
-	* When the CPU clock oscillator frequency is not a
-	* power of two in Hz, the SHIFT_HZ is only an
-	* approximate scale factor. In the SunOS kernel, this
-	* results in a PLL gain factor of 1/1.28 = 0.78 what it
-	* should be. In the following code the overall gain is
-	* increased by a factor of 1.25, which results in a
-	* residual error less than 3 percent.
-	*/
-	/* Same thing applies for FreeBSD --GAW */
-	if (hz == 100) {
-		if (time_adj < 0)
-			time_adj -= -time_adj >> 2;
-		else
-			time_adj += time_adj >> 2;
-	}
-#endif /* SHIFT_HZ */
+		time_adj += ltemp << (SHIFT_SCALE - SHIFT_USEC);
 
+	tc->adjustment = time_adj;
+	
 	/* XXX - this is really bogus, but can't be fixed until
 	xntpd's idea of the system clock is fixed to know how
 	the user wants leap seconds handled; in the mean time,
@@ -490,6 +491,8 @@ ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap)
 	int s;
 	int error;
 
+	time_daemon = 1;
+
 	error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv));
 	if (error)
 		return error;
diff --git a/sys/kern/kern_random.c b/sys/kern/kern_random.c
index f066949b08ba..7fd83646cac0 100644
--- a/sys/kern/kern_random.c
+++ b/sys/kern/kern_random.c
@@ -1,7 +1,7 @@
 /*
  * random_machdep.c -- A strong random number generator
  *
- * $Id: random_machdep.c,v 1.19 1997/10/28 15:58:13 bde Exp $
+ * $Id: random_machdep.c,v 1.20 1997/12/26 20:42:11 phk Exp $
  *
  * Version 0.95, last modified 18-Oct-95
  * 
@@ -190,21 +190,8 @@ add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state,
 	u_int		nbits;
 	u_int32_t	time;
 
-#if defined(I586_CPU) || defined(I686_CPU)
-	if (tsc_freq != 0) {
-		num ^= (u_int32_t) rdtsc() << 16;
-		r->entropy_count += 2;
-	} else {
-#endif
-		disable_intr();
-		outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
-		num ^= inb(TIMER_CNTR0) << 16;
-		num ^= inb(TIMER_CNTR0) << 24;
-		enable_intr();
-		r->entropy_count += 2;
-#if defined(I586_CPU) || defined(I686_CPU)
-	}
-#endif
+	num ^= timecounter->get_timecount() << 16;
+	r->entropy_count += 2;
 		
 	time = ticks;
 
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 20b700b32d3e..30bb775ecb7d 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1,4 +1,5 @@
 /*-
+ * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
  * Copyright (c) 1982, 1986, 1991, 1993
  *	The Regents of the University of California.  All rights reserved.
  * (c) UNIX System Laboratories, Inc.
@@ -36,7 +37,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_clock.c	8.5 (Berkeley) 1/21/94
- * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $
+ * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $
  */
 
 #include <sys/param.h>
@@ -55,7 +56,6 @@
 #include <sys/sysctl.h>
 
 #include <machine/cpu.h>
-#define CLOCK_HAIR		/* XXX */
 #include <machine/clock.h>
 #include <machine/limits.h>
 
@@ -70,6 +70,9 @@
 static void initclocks __P((void *dummy));
 SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
 
+static void tco_forward __P((void));
+static void tco_setscales __P((struct timecounter *tc));
+
 /* Some of these don't belong here, but it's easiest to concentrate them. */
 #if defined(SMP) && defined(BETTER_CLOCK)
 long cp_time[CPUSTATES];
@@ -91,55 +94,43 @@ long tk_nin;
 long tk_nout;
 long tk_rawcc;
 
+struct timecounter *timecounter;
+
 /*
  * Clock handling routines.
  *
- * This code is written to operate with two timers that run independently of
- * each other.  The main clock, running hz times per second, is used to keep
- * track of real time.  The second timer handles kernel and user profiling,
- * and does resource use estimation.  If the second timer is programmable,
- * it is randomized to avoid aliasing between the two clocks.  For example,
- * the randomization prevents an adversary from always giving up the cpu
+ * This code is written to operate with two timers that run independently
+ * of each other.
+ *
+ * The main clock, running hz times per second, is used to trigger
+ * interval timers, timeouts and rescheduling as needed.
+ *
+ * The second timer handles kernel and user profiling, and does resource
+ * use estimation.  If the second timer is programmable, it is randomized
+ * to avoid aliasing between the two clocks.  For example, the
+ * randomization prevents an adversary from always giving up the cpu
  * just before its quantum expires.  Otherwise, it would never accumulate
  * cpu ticks.  The mean frequency of the second timer is stathz.
- *
- * If no second timer exists, stathz will be zero; in this case we drive
- * profiling and statistics off the main clock.  This WILL NOT be accurate;
- * do not do it unless absolutely necessary.
- *
+ * If no second timer exists, stathz will be zero; in this case we
+ * drive profiling and statistics off the main clock.  This WILL NOT
+ * be accurate; do not do it unless absolutely necessary.
  * The statistics clock may (or may not) be run at a higher rate while
- * profiling.  This profile clock runs at profhz.  We require that profhz
- * be an integral multiple of stathz.
+ * profiling.  This profile clock runs at profhz.  We require that
+ * profhz be an integral multiple of stathz.  If the statistics clock
+ * is running fast, it must be divided by the ratio profhz/stathz for
+ * statistics.  (For profiling, every tick counts.)
  *
- * If the statistics clock is running fast, it must be divided by the ratio
- * profhz/stathz for statistics.  (For profiling, every tick counts.)
+ * Time-of-day is maintained using a "timecounter", which may or may
+ * not be related to the hardware generating the above mentioned
+ * interrupts.
  */
 
-/*
- * TODO:
- *	allocate more timeout table slots when table overflows.
- */
-
-/*
- * Bump a timeval by a small number of usec's.
- */
-#define BUMPTIME(t, usec) { \
-	register volatile struct timeval *tp = (t); \
-	register long us; \
- \
-	tp->tv_usec = us = tp->tv_usec + (usec); \
-	if (us >= 1000000) { \
-		tp->tv_usec = us - 1000000; \
-		tp->tv_sec++; \
-	} \
-}
-
 int	stathz;
 int	profhz;
 static int profprocs;
 int	ticks;
 static int psdiv, pscnt;		/* prof => stat divider */
-int psratio;				/* ratio: prof / stat */
+int	psratio;			/* ratio: prof / stat */
 
 volatile struct	timeval time;
 volatile struct	timeval mono_time;
@@ -178,9 +169,6 @@ hardclock(frame)
 	register struct clockframe *frame;
 {
 	register struct proc *p;
-	int time_update;
-	struct timeval newtime = time;
-	long ltemp;
 
 	p = curproc;
 	if (p) {
@@ -208,56 +196,10 @@ hardclock(frame)
 	if (stathz == 0)
 		statclock(frame);
 
-	/*
-	 * Increment the time-of-day.
-	 */
+	tco_forward();
+
 	ticks++;
 
-	if (timedelta == 0) {
-		time_update = CPU_THISTICKLEN(tick);
-	} else {
-		time_update = CPU_THISTICKLEN(tick) + tickdelta;
-		timedelta -= tickdelta;
-	}
-	BUMPTIME(&mono_time, time_update);
-
-	/*
-	 * Compute the phase adjustment. If the low-order bits
-	 * (time_phase) of the update overflow, bump the high-order bits
-	 * (time_update).
-	 */
-	time_phase += time_adj;
-	if (time_phase <= -FINEUSEC) {
-		ltemp = -time_phase >> SHIFT_SCALE;
-		time_phase += ltemp << SHIFT_SCALE;
-		time_update -= ltemp;
-	}
-	else if (time_phase >= FINEUSEC) {
-		ltemp = time_phase >> SHIFT_SCALE;
-		time_phase -= ltemp << SHIFT_SCALE;
-		time_update += ltemp;
-	}
-
-	newtime.tv_usec += time_update;
-	/*
-	 * On rollover of the second the phase adjustment to be used for
-	 * the next second is calculated. Also, the maximum error is
-	 * increased by the tolerance. If the PPS frequency discipline
-	 * code is present, the phase is increased to compensate for the
-	 * CPU clock oscillator frequency error.
-	 *
-	 * On a 32-bit machine and given parameters in the timex.h
-	 * header file, the maximum phase adjustment is +-512 ms and
-	 * maximum frequency offset is a tad less than) +-512 ppm. On a
-	 * 64-bit machine, you shouldn't need to ask.
-	 */
-	if (newtime.tv_usec >= 1000000) {
-		newtime.tv_usec -= 1000000;
-		newtime.tv_sec++;
-		ntp_update_second(&newtime.tv_sec);
-	}
-	CPU_CLOCKUPDATE(&time, &newtime);
-
 	if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL)
 		setsoftclock();
 }
@@ -315,6 +257,10 @@ hzto(tv)
 	}
 	if (sec < 0) {
 #ifdef DIAGNOSTIC
+		if (sec == -1 && usec > 0) {
+			sec++;
+			usec -= 1000000;
+		}
 		printf("hzto: negative time difference %ld sec %ld usec\n",
 		       sec, usec);
 #endif
@@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
 	0, 0, sysctl_kern_clockrate, "S,clockinfo","");
 
 void
-nanotime(ts)
-	struct timespec *ts;
+microtime(struct timeval *tv)
 {
-	struct timeval tv;
-	microtime(&tv);
-	ts->tv_sec = tv.tv_sec;
-	ts->tv_nsec = tv.tv_usec * 1000;
+	struct timecounter *tc;
+
+	tc = (struct timecounter *)timecounter;
+	tv->tv_sec = tc->offset_sec;
+	tv->tv_usec = tc->offset_micro;
+	tv->tv_usec += 
+	    ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
+	if (tv->tv_usec >= 1000000) {
+		tv->tv_usec -= 1000000;
+		tv->tv_sec++;
+	}
 }
+
+void
+nanotime(struct timespec *tv)
+{
+	u_int32_t count;
+	u_int64_t delta;
+	struct timecounter *tc;
+
+	tc = (struct timecounter *)timecounter;
+	tv->tv_sec = tc->offset_sec;
+	count = tc->get_timedelta(tc);
+	delta = tc->offset_nano;
+	delta += ((u_int64_t)count * tc->scale_nano_f);
+	delta += ((u_int64_t)count * tc->scale_nano_i) << 32;
+	delta >>= 32;
+	if (delta >= 1000000000) {
+		delta -= 1000000000;
+		tv->tv_sec++;
+	}
+	tv->tv_nsec = delta;
+}
+
+static void
+tco_setscales(struct timecounter *tc)
+{
+	u_int64_t scale;
+
+	scale = 1000000000LL << 32;
+	if (tc->adjustment > 0)
+		scale += (tc->adjustment * 1000LL) << 10;
+	else
+		scale -= (-tc->adjustment * 1000LL) << 10;
+	/* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */
+	scale /= tc->frequency;
+	tc->scale_micro = scale / 1000;
+	tc->scale_nano_f = scale & 0xffffffff;
+	tc->scale_nano_i = scale >> 32;
+}
+
+static u_int
+delta_timecounter(struct timecounter *tc)
+{
+	return((tc->get_timecount() - tc->offset_count) & tc->counter_mask);
+}
+
+void
+init_timecounter(struct timecounter *tc)
+{
+	struct timespec ts0, ts1;
+	int i;
+
+	if (!tc->get_timedelta) 
+		tc->get_timedelta = delta_timecounter;
+	tc->adjustment = 0;
+	tco_setscales(tc);
+	tc->offset_count = tc->get_timecount();
+	tc[0].tweak = &tc[0];
+	tc[2] = tc[1] = tc[0];
+	tc[1].other = &tc[2];
+	tc[2].other = &tc[1];
+	if (!timecounter)
+		timecounter = &tc[2];
+	tc = &tc[1];
+
+	/* 
+	 * Figure out the cost of calling this timecounter.
+	 * XXX: The 1:15 ratio is a guess at reality.
+	 */
+	nanotime(&ts0);
+	for (i = 0; i < 16; i ++) 
+		tc->get_timecount();
+	for (i = 0; i < 240; i ++)
+		tc->get_timedelta(tc);
+	nanotime(&ts1);
+	ts1.tv_sec -= ts0.tv_sec;
+	tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
+	tc->cost >>= 8;
+	printf("Timecounter \"%s\"  frequency %lu Hz  cost %u ns\n", 
+	    tc->name, tc->frequency, tc->cost);
+
+	/* XXX: For now always start using the counter. */
+	tc->offset_count = tc->get_timecount();
+	nanotime(&ts1);
+	tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32;
+	tc->offset_micro = ts1.tv_nsec / 1000;
+	tc->offset_sec = ts1.tv_sec;
+	timecounter = tc;
+}
+
+void
+set_timecounter(struct timespec *ts)
+{
+	struct timecounter *tc, *tco;
+	int s;
+
+	s = splclock();
+	tc=timecounter->other;
+	tco = tc->other;
+	*tc = *timecounter;
+	tc->other = tco;
+	tc->offset_sec = ts->tv_sec;
+	tc->offset_nano = (u_int64_t)ts->tv_nsec << 32;
+	tc->offset_micro =  ts->tv_nsec / 1000;
+	tc->offset_count = tc->get_timecount();
+	time.tv_sec = tc->offset_sec;
+	time.tv_usec = tc->offset_micro;
+	timecounter = tc;
+	splx(s);
+}
+
+static struct timecounter *
+sync_other_counter(int flag)
+{
+	struct timecounter *tc, *tco;
+	u_int32_t delta;
+
+	tc = timecounter->other;
+	tco = tc->other;
+	*tc = *timecounter;
+	tc->other = tco;
+	delta = tc->get_timedelta(tc);
+	tc->offset_count += delta;
+	tc->offset_count &= tc->counter_mask;
+	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f;
+	tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32;
+	if (flag)
+		return (tc);
+	if (tc->offset_nano > 1000000000ULL << 32) {
+		tc->offset_sec++;
+		tc->offset_nano -= 1000000000ULL << 32;
+	}
+	tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+	return (tc);
+}
+
+static void
+tco_forward(void)
+{
+	struct timecounter *tc;
+	u_int32_t time_update;
+
+	tc = sync_other_counter(1);
+	time_update = 0;
+
+	if (timedelta) {
+		time_update += tickdelta;
+		timedelta -= tickdelta;
+	}
+	mono_time.tv_usec += time_update + tick;
+	if (mono_time.tv_usec >= 1000000) {
+		mono_time.tv_usec -= 1000000;
+		mono_time.tv_sec++;
+	}
+	time_update *= 1000;
+	tc->offset_nano += (u_int64_t)time_update << 32;
+	if (tc->offset_nano >= 1000000000ULL << 32) {
+		tc->offset_nano -= 1000000000ULL << 32;
+		tc->offset_sec++;
+		tc->frequency = tc->tweak->frequency;
+		tc->adjustment = tc->tweak->adjustment;	/* XXX remove this ? */
+		ntp_update_second(tc);	/* XXX only needed if xntpd runs */
+		tco_setscales(tc);
+	}
+	/*
+	 * Find the usec from the nsec.  This is just as fast (one 
+	 * multiplication) and prevents skew between the two due
+	 * to rounding errors. (2^32/1000 = 4294967.296)
+	 */
+	tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+	time.tv_usec = tc->offset_micro;
+	time.tv_sec = tc->offset_sec;
+	timecounter = tc;
+}
+
+static int
+sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
+{
+	return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency,
+	    sizeof(timecounter->tweak->frequency), req));
+}
+
+static int
+sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
+{
+	return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment,
+	    sizeof(timecounter->tweak->adjustment), req));
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW,
+	0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW,
+	0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", "");
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 8060c15f5b79..fb78ffc709ff 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)kern_time.c	8.1 (Berkeley) 6/10/93
- * $Id: kern_time.c,v 1.39 1997/11/06 19:29:16 phk Exp $
+ * $Id: kern_time.c,v 1.40 1997/11/07 08:52:58 phk Exp $
  */
 
 #include <sys/param.h>
@@ -78,6 +78,7 @@ settime(tv)
 	struct timeval *tv;
 {
 	struct timeval delta;
+	struct timespec ts;
 	struct proc *p;
 	int s;
 
@@ -99,7 +100,9 @@ settime(tv)
 	 */
 	delta.tv_sec = tv->tv_sec - time.tv_sec;
 	delta.tv_usec = tv->tv_usec - time.tv_usec;
-	time = *tv;
+	ts.tv_sec = tv->tv_sec;
+	ts.tv_nsec = tv->tv_usec * 1000;
+	set_timecounter(&ts);
 	/*
 	 * XXX should arrange for microtime() to agree with *tv if
 	 * it is called now.  As it is, it may add up to about
@@ -138,13 +141,11 @@ clock_gettime(p, uap)
 	struct proc *p;
 	struct clock_gettime_args *uap;
 {
-	struct timeval atv;
 	struct timespec ats;
 
 	if (SCARG(uap, clock_id) != CLOCK_REALTIME)
 		return (EINVAL);
-	microtime(&atv);
-	TIMEVAL_TO_TIMESPEC(&atv, &ats);
+	nanotime(&ats);
 	return (copyout(&ats, SCARG(uap, tp), sizeof(ats)));
 }
 
@@ -199,7 +200,7 @@ clock_getres(p, uap)
 	error = 0;
 	if (SCARG(uap, tp)) {
 		ts.tv_sec = 0;
-		ts.tv_nsec = 1000000000 / hz;
+		ts.tv_nsec = 1000000000 / timecounter->frequency;
 		error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
 	}
 	return (error);
diff --git a/sys/sys/time.h b/sys/sys/time.h
index cdb812e5f29c..a726b17c66b7 100644
--- a/sys/sys/time.h
+++ b/sys/sys/time.h
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)time.h	8.5 (Berkeley) 5/4/95
- * $Id: time.h,v 1.15 1997/06/24 18:21:09 jhay Exp $
+ * $Id: time.h,v 1.16 1997/12/28 13:36:09 phk Exp $
  */
 
 #ifndef _SYS_TIME_H_
@@ -77,6 +77,70 @@ struct timezone {
 #define	DST_EET		5	/* Eastern European dst */
 #define	DST_CAN		6	/* Canada */
 
+/*
+ * Structure used to interface to the machine dependent hardware
+ * support for timekeeping.
+ *
+ * A timecounter is a binary counter which has two simple properties:
+ *    * it runs at a fixed frequency.
+ *    * must not roll over in less than (1+epsilon)/HZ
+ *
+ * get_timecount reads the counter.
+ *
+ * get_timedelta returns difference between the counter now and offset_count
+ *
+ * counter_mask removes unimplemented bits from the count value
+ *
+ * frequency should be obvious
+ *
+ * name is a short mnemonic name for this counter.
+ *
+ * cost is a measure of how long time it takes to read the counter.
+ *
+ * adjustment [PPM << 16] which means that the smallest unit of correction
+ *     you can apply amounts to 481.5 usec/year.
+ *
+ * scale_micro [2^32 * usec/tick]
+ *
+ * scale_nano_i [ns/tick]
+ *
+ * scale_nano_f [(ns/2^32)/tick]
+ *
+ * offset_count is the contents of the counter which corresponds to the
+ *     rest of the offset_* values
+ *
+ * offset_sec [s]
+ * offset_micro [usec]
+ * offset_nano [ns/2^32] is misnamed, the real unit is .23283064365...
+ *     attoseconds (10E-18) and before you ask: yes, they are in fact 
+ *     called attoseconds, it comes from "atten" for 18 in Danish/Swedish.
+ */
+
+struct timecounter;
+typedef u_int timecounter_get_t __P((struct timecounter *));
+typedef	u_int64_t timecounter_delta_t __P((void));
+
+struct timecounter {
+	/* These fields must be initialized by the driver */
+	timecounter_get_t	*get_timedelta;
+	timecounter_delta_t	*get_timecount;
+	u_int64_t		counter_mask;
+	u_int32_t		frequency;
+	char			*name;
+	/* These fields will be managed by the generic code */
+	int			cost;
+	int32_t			adjustment;
+	u_int32_t		scale_micro;
+	u_int32_t		scale_nano_i;
+	u_int32_t		scale_nano_f;
+	u_int64_t		offset_count;
+	u_int32_t		offset_sec;
+	u_int32_t		offset_micro;
+	u_int64_t		offset_nano;
+	struct timecounter	*other;
+	struct timecounter	*tweak;
+};
+
 /* Operations on timevals. */
 #define	timerclear(tvp)		(tvp)->tv_sec = (tvp)->tv_usec = 0
 #define	timerisset(tvp)		((tvp)->tv_sec || (tvp)->tv_usec)
@@ -138,13 +202,19 @@ struct clockinfo {
 #define TIMER_ABSTIME	0x1	/* absolute timer */
 
 #ifdef KERNEL
+extern struct timecounter *timecounter;
+
+void	forward_timecounter __P((void));
 void	gettime __P((struct timeval *tv));
+void	init_timecounter __P((struct timecounter *));
 int	itimerfix __P((struct timeval *tv));
 int	itimerdecr __P((struct itimerval *itp, int usec));
-void	timevaladd __P((struct timeval *, struct timeval *));
-void	timevalsub __P((struct timeval *, struct timeval *));
 void	microtime __P((struct timeval *tv));
 void	nanotime __P((struct timespec *ts));
+void	second_overflow __P((u_int32_t *psec));
+void	set_timecounter __P((struct timespec *));
+void	timevaladd __P((struct timeval *, struct timeval *));
+void	timevalsub __P((struct timeval *, struct timeval *));
 #else /* !KERNEL */
 #include <time.h>
 
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index cdb812e5f29c..a726b17c66b7 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -31,7 +31,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)time.h	8.5 (Berkeley) 5/4/95
- * $Id: time.h,v 1.15 1997/06/24 18:21:09 jhay Exp $
+ * $Id: time.h,v 1.16 1997/12/28 13:36:09 phk Exp $
  */
 
 #ifndef _SYS_TIME_H_
@@ -77,6 +77,70 @@ struct timezone {
 #define	DST_EET		5	/* Eastern European dst */
 #define	DST_CAN		6	/* Canada */
 
+/*
+ * Structure used to interface to the machine dependent hardware
+ * support for timekeeping.
+ *
+ * A timecounter is a binary counter which has two simple properties:
+ *    * it runs at a fixed frequency.
+ *    * must not roll over in less than (1+epsilon)/HZ
+ *
+ * get_timecount reads the counter.
+ *
+ * get_timedelta returns difference between the counter now and offset_count
+ *
+ * counter_mask removes unimplemented bits from the count value
+ *
+ * frequency should be obvious
+ *
+ * name is a short mnemonic name for this counter.
+ *
+ * cost is a measure of how long time it takes to read the counter.
+ *
+ * adjustment [PPM << 16] which means that the smallest unit of correction
+ *     you can apply amounts to 481.5 usec/year.
+ *
+ * scale_micro [2^32 * usec/tick]
+ *
+ * scale_nano_i [ns/tick]
+ *
+ * scale_nano_f [(ns/2^32)/tick]
+ *
+ * offset_count is the contents of the counter which corresponds to the
+ *     rest of the offset_* values
+ *
+ * offset_sec [s]
+ * offset_micro [usec]
+ * offset_nano [ns/2^32] is misnamed, the real unit is .23283064365...
+ *     attoseconds (10E-18) and before you ask: yes, they are in fact 
+ *     called attoseconds, it comes from "atten" for 18 in Danish/Swedish.
+ */
+
+struct timecounter;
+typedef u_int timecounter_get_t __P((struct timecounter *));
+typedef	u_int64_t timecounter_delta_t __P((void));
+
+struct timecounter {
+	/* These fields must be initialized by the driver */
+	timecounter_get_t	*get_timedelta;
+	timecounter_delta_t	*get_timecount;
+	u_int64_t		counter_mask;
+	u_int32_t		frequency;
+	char			*name;
+	/* These fields will be managed by the generic code */
+	int			cost;
+	int32_t			adjustment;
+	u_int32_t		scale_micro;
+	u_int32_t		scale_nano_i;
+	u_int32_t		scale_nano_f;
+	u_int64_t		offset_count;
+	u_int32_t		offset_sec;
+	u_int32_t		offset_micro;
+	u_int64_t		offset_nano;
+	struct timecounter	*other;
+	struct timecounter	*tweak;
+};
+
 /* Operations on timevals. */
 #define	timerclear(tvp)		(tvp)->tv_sec = (tvp)->tv_usec = 0
 #define	timerisset(tvp)		((tvp)->tv_sec || (tvp)->tv_usec)
@@ -138,13 +202,19 @@ struct clockinfo {
 #define TIMER_ABSTIME	0x1	/* absolute timer */
 
 #ifdef KERNEL
+extern struct timecounter *timecounter;
+
+void	forward_timecounter __P((void));
 void	gettime __P((struct timeval *tv));
+void	init_timecounter __P((struct timecounter *));
 int	itimerfix __P((struct timeval *tv));
 int	itimerdecr __P((struct itimerval *itp, int usec));
-void	timevaladd __P((struct timeval *, struct timeval *));
-void	timevalsub __P((struct timeval *, struct timeval *));
 void	microtime __P((struct timeval *tv));
 void	nanotime __P((struct timespec *ts));
+void	second_overflow __P((u_int32_t *psec));
+void	set_timecounter __P((struct timespec *));
+void	timevaladd __P((struct timeval *, struct timeval *));
+void	timevalsub __P((struct timeval *, struct timeval *));
 #else /* !KERNEL */
 #include <time.h>
 
diff --git a/sys/sys/timex.h b/sys/sys/timex.h
index b4e5f06bf970..5e0df4b9beb5 100644
--- a/sys/sys/timex.h
+++ b/sys/sys/timex.h
@@ -295,7 +295,7 @@ struct timex {
 		      }
 
 #ifdef KERNEL
-void ntp_update_second __P((long *newsec));
+void ntp_update_second __P((struct timecounter *tc));
 extern long time_phase;
 extern long time_adj;
 #else