diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index fe6860084f91..00182db80fa4 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -585,59 +585,89 @@ cpu_halt(void)
 }
 
 void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
+static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
+static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+    0, "Use MONITOR/MWAIT for short idle");
 
-static void
-cpu_idle_hlt(int busy)
-{
-	/*
-	 * we must absolutely guarentee that hlt is the next instruction
-	 * after sti or we introduce a timing window.
-	 */
-	disable_intr();
-  	if (sched_runnable())
-		enable_intr();
-	else
-		__asm __volatile("sti; hlt");
-}
+#define	STATE_RUNNING	0x0
+#define	STATE_MWAIT	0x1
+#define	STATE_SLEEPING	0x2
 
 static void
 cpu_idle_acpi(int busy)
 {
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
 	disable_intr();
-  	if (sched_runnable())
+	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook();
 	else
 		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
 }
 
-static int cpu_ident_amdc1e = 0;
-
-static int
-cpu_probe_amdc1e(void)
+static void
+cpu_idle_hlt(int busy)
 {
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
+	/*
+	 * We must absolutely guarentee that hlt is the next instruction
+	 * after sti or we introduce a timing window.
+	 */
+	disable_intr();
+	if (sched_runnable())
+		enable_intr();
+	else
+		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
+}
+
+/*
+ * MWAIT cpu power states.  Lower 4 bits are sub-states.
+ */
+#define	MWAIT_C0	0xf0
+#define	MWAIT_C1	0x00
+#define	MWAIT_C2	0x10
+#define	MWAIT_C3	0x20
+#define	MWAIT_C4	0x30
+
+static void
+cpu_idle_mwait(int busy)
+{
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_MWAIT;
+	if (!sched_runnable()) {
+		cpu_monitor(state, 0, 0);
+		if (*state == STATE_MWAIT)
+			cpu_mwait(0, MWAIT_C1);
+	}
+	*state = STATE_RUNNING;
+}
+
+static void
+cpu_idle_spin(int busy)
+{
+	int *state;
 	int i;
 
-	/*
-	 * Forget it, if we're not using local APIC timer.
-	 */
-	if (resource_disabled("apic", 0) ||
-	    (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
-		return (0);
-
-	/*
-	 * Detect the presence of C1E capability mostly on latest
-	 * dual-cores (or future) k8 family.
-	 */
-	if (cpu_vendor_id == CPU_VENDOR_AMD &&
-	    (cpu_id & 0x00000f00) == 0x00000f00 &&
-	    (cpu_id & 0x0fff0000) >=  0x00040000) {
-		cpu_ident_amdc1e = 1;
-		return (1);
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_RUNNING;
+	for (i = 0; i < 1000; i++) {
+		if (sched_runnable())
+			return;
+		cpu_spinwait();
 	}
-
-	return (0);
 }
 
 /*
@@ -655,110 +685,83 @@ cpu_probe_amdc1e(void)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
 {
 
-	disable_intr();
-	if (sched_runnable())
-		enable_intr();
-	else {
-		uint64_t msr;
-
-		msr = rdmsr(MSR_AMDK8_IPM);
-		if (msr & AMDK8_CMPHALT)
-			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
-		if (cpu_idle_hook)
-			cpu_idle_hook();
-		else
-			__asm __volatile("sti; hlt");
+	/*
+	 * Detect the presence of C1E capability mostly on latest
+	 * dual-cores (or future) k8 family.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_AMD &&
+	    (cpu_id & 0x00000f00) == 0x00000f00 &&
+	    (cpu_id & 0x0fff0000) >=  0x00040000) {
+		cpu_ident_amdc1e = 1;
 	}
 }
 
-static void
-cpu_idle_spin(int busy)
-{
-	return;
-}
-
 void (*cpu_idle_fn)(int) = cpu_idle_acpi;
 
 void
 cpu_idle(int busy)
 {
+	uint64_t msr;
+
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
 #ifdef SMP
 	if (mp_grab_cpu_hlt())
 		return;
 #endif
-	cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states.  Lower 4 bits are sub-states.
- */
-#define	MWAIT_C0	0xf0
-#define	MWAIT_C1	0x00
-#define	MWAIT_C2	0x10
-#define	MWAIT_C3	0x20
-#define	MWAIT_C4	0x30
-
-#define	MWAIT_DISABLED	0x0
-#define	MWAIT_WOKEN	0x1
-#define	MWAIT_WAITING	0x2
-
-static void
-cpu_idle_mwait(int busy)
-{
-	int *mwait;
-
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
-		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
-}
-
-static void
-cpu_idle_mwait_hlt(int busy)
-{
-	int *mwait;
-
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	if (busy == 0) {
-		*mwait = MWAIT_DISABLED;
-		cpu_idle_hlt(busy);
-		return;
+	/* If we are busy - try to use fast methods. */
+	if (busy) {
+		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+			cpu_idle_mwait(busy);
+			goto out;
+		}
 	}
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
-		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
+
+	/* If we have time - switch timers into idle mode. */
+	if (!busy) {
+		critical_enter();
+		cpu_idleclock();
+	}
+
+	/* Apply AMD APIC timer C1E workaround. */
+	if (cpu_ident_amdc1e && cpu_disable_deep_sleep) {
+		msr = rdmsr(MSR_AMDK8_IPM);
+		if (msr & AMDK8_CMPHALT)
+			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+	}
+
+	/* Call main idle method. */
+	cpu_idle_fn(busy);
+
+	/* Switch timers mack into active mode. */
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+out:
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
-	int *mwait;
+	int *state;
 
-	if (cpu_idle_fn == cpu_idle_spin)
-		return (1);
-	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
-		return (0);
 	pcpu = pcpu_find(cpu);
-	mwait = (int *)pcpu->pc_monitorbuf;
+	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
-	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+	if (*state == STATE_SLEEPING)
 		return (0);
-	*mwait = MWAIT_WOKEN;
-
+	if (*state == STATE_MWAIT)
+		*state = STATE_RUNNING;
 	return (1);
 }
 
@@ -771,8 +774,6 @@ struct {
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
-	{ cpu_idle_mwait_hlt, "mwait_hlt" },
-	{ cpu_idle_amdc1e, "amdc1e" },
 	{ cpu_idle_hlt, "hlt" },
 	{ cpu_idle_acpi, "acpi" },
 	{ NULL, NULL }
@@ -791,8 +792,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
-		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
-		    cpu_ident_amdc1e == 0)
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
 			continue;
 		p += sprintf(p, "%s, ", idle_tbl[i].id_name);
 	}
@@ -801,6 +802,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 	return (error);
 }
 
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+    0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -824,8 +828,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
-		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
-		    cpu_ident_amdc1e == 0)
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
@@ -835,9 +839,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 	return (EINVAL);
 }
 
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
-    0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
@@ -1743,8 +1744,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)
 	}
 #endif
 
-	if (cpu_probe_amdc1e())
-		cpu_idle_fn = cpu_idle_amdc1e;
+	cpu_probe_amdc1e();
 
 	/* Location of kernel stack for locore */
 	return ((u_int64_t)thread0.td_pcb);
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index e2f82ec282e0..49b380bd6da9 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -118,7 +118,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
 #endif
 
 extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
@@ -1196,16 +1195,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
+	struct trapframe *oldframe;
+	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
+	critical_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	oldframe = td->td_intr_frame;
+	td->td_intr_frame = &frame;
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
-		sched_preempt(curthread);
+		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
@@ -1217,14 +1222,11 @@ ipi_bitmap_handler(struct trapframe frame)
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
-		hardclockintr(&frame);
-	}
-	if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
-		(*ipi_statclock_counts[cpu])++;
-#endif
-		statclockintr(&frame);
+		hardclockintr();
 	}
+	td->td_intr_frame = oldframe;
+	td->td_intr_nesting_level--;
+	critical_exit();
 }
 
 /*
@@ -1579,8 +1581,6 @@ mp_ipi_intrcnt(void *dummy)
 		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
-		snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
-		intrcnt_add(buf, &ipi_statclock_counts[i]);
 	}
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 2ebf7c2582f4..ae2f5b90791e 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -123,8 +123,7 @@
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2
-#define IPI_STATCLOCK   3
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
diff --git a/sys/dev/acpica/acpi_cpu.c b/sys/dev/acpica/acpi_cpu.c
index 5dd6ab97f7e7..fabbee90d1c5 100644
--- a/sys/dev/acpica/acpi_cpu.c
+++ b/sys/dev/acpica/acpi_cpu.c
@@ -900,7 +900,13 @@ acpi_cpu_idle()
 
     /* Find the lowest state that has small enough latency. */
     cx_next_idx = 0;
-    for (i = sc->cpu_cx_lowest; i >= 0; i--) {
+#ifndef __ia64__
+    if (cpu_disable_deep_sleep)
+	i = sc->cpu_non_c3;
+    else
+#endif
+	i = sc->cpu_cx_lowest;
+    for (; i >= 0; i--) {
 	if (sc->cpu_cx_states[i].trans_lat * 3 <= sc->cpu_prev_sleep) {
 	    cx_next_idx = i;
 	    break;
@@ -929,15 +935,17 @@ acpi_cpu_idle()
     /*
      * Execute HLT (or equivalent) and wait for an interrupt.  We can't
      * precisely calculate the time spent in C1 since the place we wake up
-     * is an ISR.  Assume we slept no more then half of quantum.
+     * is an ISR.  Assume we slept no more then half of quantum, unless
+     * we are called inside critical section, delaying context switch.
      */
     if (cx_next->type == ACPI_STATE_C1) {
 	AcpiHwRead(&start_time, &AcpiGbl_FADT.XPmTimerBlock);
 	acpi_cpu_c1();
 	AcpiHwRead(&end_time, &AcpiGbl_FADT.XPmTimerBlock);
-        end_time = acpi_TimerDelta(end_time, start_time);
-	sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 +
-	    min(PM_USEC(end_time), 500000 / hz)) / 4;
+        end_time = PM_USEC(acpi_TimerDelta(end_time, start_time));
+        if (curthread->td_critnest == 0)
+		end_time = min(end_time, 500000 / hz);
+	sc->cpu_prev_sleep = (sc->cpu_prev_sleep * 3 + end_time) / 4;
 	return;
     }
 
diff --git a/sys/dev/acpica/acpi_hpet.c b/sys/dev/acpica/acpi_hpet.c
index 2a8eb301a091..f5cf11a09687 100644
--- a/sys/dev/acpica/acpi_hpet.c
+++ b/sys/dev/acpica/acpi_hpet.c
@@ -683,15 +683,15 @@ hpet_detach(device_t dev)
 static int
 hpet_suspend(device_t dev)
 {
-	struct hpet_softc *sc;
+//	struct hpet_softc *sc;
 
 	/*
 	 * Disable the timer during suspend.  The timer will not lose
 	 * its state in S1 or S2, but we are required to disable
 	 * it.
 	 */
-	sc = device_get_softc(dev);
-	hpet_disable(sc);
+//	sc = device_get_softc(dev);
+//	hpet_disable(sc);
 
 	return (0);
 }
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index ef229ca6f5b8..2bf6dd155fd6 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -1175,9 +1175,6 @@ cpu_est_clockrate(int cpu_id, uint64_t *rate)
 	return (0);
 }
 
-
-void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
-
 #ifdef XEN
 
 void
@@ -1208,60 +1205,94 @@ cpu_halt(void)
 		__asm__ ("hlt");
 }
 
-static void
-cpu_idle_hlt(int busy)
-{
-	/*
-	 * we must absolutely guarentee that hlt is the next instruction
-	 * after sti or we introduce a timing window.
-	 */
-	disable_intr();
-  	if (sched_runnable())
-		enable_intr();
-	else
-		__asm __volatile("sti; hlt");
-}
 #endif
 
+void (*cpu_idle_hook)(void) = NULL;	/* ACPI idle hook. */
+static int	cpu_ident_amdc1e = 0;	/* AMD C1E supported. */
+static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+    0, "Use MONITOR/MWAIT for short idle");
+
+#define	STATE_RUNNING	0x0
+#define	STATE_MWAIT	0x1
+#define	STATE_SLEEPING	0x2
+
 static void
 cpu_idle_acpi(int busy)
 {
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
 	disable_intr();
-  	if (sched_runnable())
+	if (sched_runnable())
 		enable_intr();
 	else if (cpu_idle_hook)
 		cpu_idle_hook();
 	else
 		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
 }
 
-static int cpu_ident_amdc1e = 0;
+#ifndef XEN
+static void
+cpu_idle_hlt(int busy)
+{
+	int *state;
 
-static int
-cpu_probe_amdc1e(void)
-{ 
-#ifdef DEV_APIC
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
+	/*
+	 * We must absolutely guarentee that hlt is the next instruction
+	 * after sti or we introduce a timing window.
+	 */
+	disable_intr();
+	if (sched_runnable())
+		enable_intr();
+	else
+		__asm __volatile("sti; hlt");
+	*state = STATE_RUNNING;
+}
+#endif
+
+/*
+ * MWAIT cpu power states.  Lower 4 bits are sub-states.
+ */
+#define	MWAIT_C0	0xf0
+#define	MWAIT_C1	0x00
+#define	MWAIT_C2	0x10
+#define	MWAIT_C3	0x20
+#define	MWAIT_C4	0x30
+
+static void
+cpu_idle_mwait(int busy)
+{
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_MWAIT;
+	if (!sched_runnable()) {
+		cpu_monitor(state, 0, 0);
+		if (*state == STATE_MWAIT)
+			cpu_mwait(0, MWAIT_C1);
+	}
+	*state = STATE_RUNNING;
+}
+
+static void
+cpu_idle_spin(int busy)
+{
+	int *state;
 	int i;
 
-	/*
-	 * Forget it, if we're not using local APIC timer.
-	 */
-	if (resource_disabled("apic", 0) ||
-	    (resource_int_value("apic", 0, "clock", &i) == 0 && i == 0))
-		return (0);
-
-	/*
-	 * Detect the presence of C1E capability mostly on latest
-	 * dual-cores (or future) k8 family.
-	 */
-	if (cpu_vendor_id == CPU_VENDOR_AMD &&
-	    (cpu_id & 0x00000f00) == 0x00000f00 &&
-	    (cpu_id & 0x0fff0000) >=  0x00040000) {
-		cpu_ident_amdc1e = 1;
-		return (1);
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_RUNNING;
+	for (i = 0; i < 1000; i++) {
+		if (sched_runnable())
+			return;
+		cpu_spinwait();
 	}
-#endif
-	return (0);
 }
 
 /*
@@ -1279,32 +1310,20 @@ cpu_probe_amdc1e(void)
 #define	AMDK8_CMPHALT		(AMDK8_SMIONCMPHALT | AMDK8_C1EONCMPHALT)
 
 static void
-cpu_idle_amdc1e(int busy)
+cpu_probe_amdc1e(void)
 {
 
-	disable_intr();
-	if (sched_runnable())
-		enable_intr();
-	else {
-		uint64_t msr;
-
-		msr = rdmsr(MSR_AMDK8_IPM);
-		if (msr & AMDK8_CMPHALT)
-			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
-
-		if (cpu_idle_hook)
-			cpu_idle_hook();
-		else
-			__asm __volatile("sti; hlt");
+	/*
+	 * Detect the presence of C1E capability mostly on latest
+	 * dual-cores (or future) k8 family.
+	 */
+	if (cpu_vendor_id == CPU_VENDOR_AMD &&
+	    (cpu_id & 0x00000f00) == 0x00000f00 &&
+	    (cpu_id & 0x0fff0000) >=  0x00040000) {
+		cpu_ident_amdc1e = 1;
 	}
 }
 
-static void
-cpu_idle_spin(int busy)
-{
-	return;
-}
-
 #ifdef XEN
 void (*cpu_idle_fn)(int) = cpu_idle_hlt;
 #else
@@ -1314,79 +1333,72 @@ void (*cpu_idle_fn)(int) = cpu_idle_acpi;
 void
 cpu_idle(int busy)
 {
+	uint64_t msr;
+
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
 #if defined(SMP) && !defined(XEN)
 	if (mp_grab_cpu_hlt())
 		return;
 #endif
-	cpu_idle_fn(busy);
-}
-
-/*
- * mwait cpu power states.  Lower 4 bits are sub-states.
- */
-#define	MWAIT_C0	0xf0
-#define	MWAIT_C1	0x00
-#define	MWAIT_C2	0x10
-#define	MWAIT_C3	0x20
-#define	MWAIT_C4	0x30
-
-#define	MWAIT_DISABLED	0x0
-#define	MWAIT_WOKEN	0x1
-#define	MWAIT_WAITING	0x2
-
-static void
-cpu_idle_mwait(int busy)
-{
-	int *mwait;
-
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
-		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
-}
-
-static void
-cpu_idle_mwait_hlt(int busy)
-{
-	int *mwait;
-
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	if (busy == 0) {
-		*mwait = MWAIT_DISABLED;
-		cpu_idle_hlt(busy);
-		return;
+	/* If we are busy - try to use fast methods. */
+	if (busy) {
+		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+			cpu_idle_mwait(busy);
+			goto out;
+		}
 	}
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
-		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
+
+#ifndef XEN
+	/* If we have time - switch timers into idle mode. */
+	if (!busy) {
+		critical_enter();
+		cpu_idleclock();
+	}
+#endif
+
+	/* Apply AMD APIC timer C1E workaround. */
+	if (cpu_ident_amdc1e
+#ifndef XEN
+	    && cpu_disable_deep_sleep
+#endif
+	    ) {
+		msr = rdmsr(MSR_AMDK8_IPM);
+		if (msr & AMDK8_CMPHALT)
+			wrmsr(MSR_AMDK8_IPM, msr & ~AMDK8_CMPHALT);
+	}
+
+	/* Call main idle method. */
+	cpu_idle_fn(busy);
+
+#ifndef XEN
+	/* Switch timers mack into active mode. */
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+#endif
+out:
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
-	int *mwait;
+	int *state;
 
-	if (cpu_idle_fn == cpu_idle_spin)
-		return (1);
-	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
-		return (0);
 	pcpu = pcpu_find(cpu);
-	mwait = (int *)pcpu->pc_monitorbuf;
+	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
-	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+	if (*state == STATE_SLEEPING)
 		return (0);
-	*mwait = MWAIT_WOKEN;
-
+	if (*state == STATE_MWAIT)
+		*state = STATE_RUNNING;
 	return (1);
 }
 
@@ -1399,8 +1411,6 @@ struct {
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
-	{ cpu_idle_mwait_hlt, "mwait_hlt" },
-	{ cpu_idle_amdc1e, "amdc1e" },
 	{ cpu_idle_hlt, "hlt" },
 	{ cpu_idle_acpi, "acpi" },
 	{ NULL, NULL }
@@ -1419,8 +1429,8 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
-		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
-		    cpu_ident_amdc1e == 0)
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
 			continue;
 		p += sprintf(p, "%s, ", idle_tbl[i].id_name);
 	}
@@ -1429,6 +1439,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 	return (error);
 }
 
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+    0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -1452,8 +1465,8 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 		if (strstr(idle_tbl[i].id_name, "mwait") &&
 		    (cpu_feature2 & CPUID2_MON) == 0)
 			continue;
-		if (strcmp(idle_tbl[i].id_name, "amdc1e") == 0 &&
-		    cpu_ident_amdc1e == 0)
+		if (strcmp(idle_tbl[i].id_name, "acpi") == 0 &&
+		    cpu_idle_hook == NULL)
 			continue;
 		if (strcmp(idle_tbl[i].id_name, buf))
 			continue;
@@ -1463,9 +1476,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 	return (EINVAL);
 }
 
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
-    0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
@@ -2695,8 +2705,7 @@ init386(first)
 	thread0.td_pcb->pcb_fsd = PCPU_GET(fsgs_gdt)[0];
 	thread0.td_pcb->pcb_gsd = PCPU_GET(fsgs_gdt)[1];
 
-	if (cpu_probe_amdc1e())
-		cpu_idle_fn = cpu_idle_amdc1e;
+	cpu_probe_amdc1e();
 }
 
 #else
@@ -2970,8 +2979,7 @@ init386(first)
 	thread0.td_pcb->pcb_ext = 0;
 	thread0.td_frame = &proc0_tf;
 
-	if (cpu_probe_amdc1e())
-		cpu_idle_fn = cpu_idle_amdc1e;
+	cpu_probe_amdc1e();
 }
 #endif
 
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index fa50ecfe6247..f660e1c89215 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -167,7 +167,6 @@ u_long *ipi_invlcache_counts[MAXCPU];
 u_long *ipi_rendezvous_counts[MAXCPU];
 u_long *ipi_lazypmap_counts[MAXCPU];
 static u_long *ipi_hardclock_counts[MAXCPU];
-static u_long *ipi_statclock_counts[MAXCPU];
 #endif
 
 /*
@@ -1284,16 +1283,22 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
 void
 ipi_bitmap_handler(struct trapframe frame)
 {
+	struct trapframe *oldframe;
+	struct thread *td;
 	int cpu = PCPU_GET(cpuid);
 	u_int ipi_bitmap;
 
+	critical_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	oldframe = td->td_intr_frame;
+	td->td_intr_frame = &frame;
 	ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]);
-
 	if (ipi_bitmap & (1 << IPI_PREEMPT)) {
 #ifdef COUNT_IPIS
 		(*ipi_preempt_counts[cpu])++;
 #endif
-		sched_preempt(curthread);
+		sched_preempt(td);
 	}
 	if (ipi_bitmap & (1 << IPI_AST)) {
 #ifdef COUNT_IPIS
@@ -1305,14 +1310,11 @@ ipi_bitmap_handler(struct trapframe frame)
 #ifdef COUNT_IPIS
 		(*ipi_hardclock_counts[cpu])++;
 #endif
-		hardclockintr(&frame); 
-	}
-	if (ipi_bitmap & (1 << IPI_STATCLOCK)) {
-#ifdef COUNT_IPIS
-		(*ipi_statclock_counts[cpu])++;
-#endif
-		statclockintr(&frame); 
+		hardclockintr();
 	}
+	td->td_intr_frame = oldframe;
+	td->td_intr_nesting_level--;
+	critical_exit();
 }
 
 /*
@@ -1627,8 +1629,6 @@ mp_ipi_intrcnt(void *dummy)
 		intrcnt_add(buf, &ipi_lazypmap_counts[i]);
 		snprintf(buf, sizeof(buf), "cpu%d:hardclock", i);
 		intrcnt_add(buf, &ipi_hardclock_counts[i]);
-		snprintf(buf, sizeof(buf), "cpu%d:statclock", i);
-		intrcnt_add(buf, &ipi_statclock_counts[i]);
 	}		
 }
 SYSINIT(mp_ipi_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, mp_ipi_intrcnt, NULL);
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index cada0173f756..ff1f6577272b 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -124,8 +124,7 @@
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2 
-#define IPI_STATCLOCK   3 
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
@@ -152,8 +151,7 @@
 #define	IPI_AST		0 	/* Generate software trap. */
 #define IPI_PREEMPT     1
 #define IPI_HARDCLOCK   2 
-#define IPI_STATCLOCK   3 
-#define IPI_BITMAP_LAST IPI_STATCLOCK
+#define IPI_BITMAP_LAST IPI_HARDCLOCK
 #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST)
 
 #define	IPI_STOP	(APIC_IPI_INTS + 7)	/* Stop CPU until restarted. */
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index c283b6b1e6a7..ff5747e104c7 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -373,11 +373,9 @@ int	profprocs;
 int	ticks;
 int	psratio;
 
-int	timer1hz;
-int	timer2hz;
-static DPCPU_DEFINE(u_int, hard_cnt);
-static DPCPU_DEFINE(u_int, stat_cnt);
-static DPCPU_DEFINE(u_int, prof_cnt);
+static DPCPU_DEFINE(int, pcputicks);	/* Per-CPU version of ticks. */
+static struct mtx	global_hardclock_mtx;
+MTX_SYSINIT(global_hardclock_mtx, &global_hardclock_mtx, "ghc_mtx", MTX_SPIN);
 
 /*
  * Initialize clock frequencies and start both clocks running.
@@ -408,52 +406,6 @@ initclocks(dummy)
 #endif
 }
 
-void
-timer1clock(int usermode, uintfptr_t pc)
-{
-	u_int *cnt;
-
-	cnt = DPCPU_PTR(hard_cnt);
-	*cnt += hz;
-	if (*cnt >= timer1hz) {
-		*cnt -= timer1hz;
-		if (*cnt >= timer1hz)
-			*cnt = 0;
-		if (PCPU_GET(cpuid) == 0)
-			hardclock(usermode, pc);
-		else
-			hardclock_cpu(usermode);
-	}
-	if (timer2hz == 0)
-		timer2clock(usermode, pc);
-}
-
-void
-timer2clock(int usermode, uintfptr_t pc)
-{
-	u_int *cnt;
-	int t2hz = timer2hz ? timer2hz : timer1hz;
-
-	cnt = DPCPU_PTR(stat_cnt);
-	*cnt += stathz;
-	if (*cnt >= t2hz) {
-		*cnt -= t2hz;
-		if (*cnt >= t2hz)
-			*cnt = 0;
-		statclock(usermode);
-	}
-	if (profprocs == 0)
-		return;
-	cnt = DPCPU_PTR(prof_cnt);
-	*cnt += profhz;
-	if (*cnt >= t2hz) {
-		*cnt -= t2hz;
-		if (*cnt >= t2hz)
-			*cnt = 0;
-		profclock(usermode, pc);
-	}
-}
-
 /*
  * Each time the real-time timer fires, this function is called on all CPUs.
  * Note that hardclock() calls hardclock_cpu() for the boot CPU, so only
@@ -486,7 +438,7 @@ hardclock_cpu(int usermode)
 		PROC_SUNLOCK(p);
 	}
 	thread_lock(td);
-	sched_tick();
+	sched_tick(1);
 	td->td_flags |= flags;
 	thread_unlock(td);
 
@@ -507,6 +459,7 @@ hardclock(int usermode, uintfptr_t pc)
 	atomic_add_int((volatile int *)&ticks, 1);
 	hardclock_cpu(usermode);
 	tc_ticktock();
+	cpu_tick_calibration();
 	/*
 	 * If no separate statistics clock is available, run it from here.
 	 *
@@ -525,6 +478,89 @@ hardclock(int usermode, uintfptr_t pc)
 #endif /* SW_WATCHDOG */
 }
 
+void
+hardclock_anycpu(int cnt, int usermode)
+{
+	struct pstats *pstats;
+	struct thread *td = curthread;
+	struct proc *p = td->td_proc;
+	int *t = DPCPU_PTR(pcputicks);
+	int flags;
+	int global, newticks;
+
+	/*
+	 * Update per-CPU and possibly global ticks values.
+	 */
+	*t += cnt;
+	do {
+		global = ticks;
+		newticks = *t - global;
+		if (newticks <= 0) {
+			if (newticks < -1)
+				*t = global - 1;
+			newticks = 0;
+			break;
+		}
+	} while (!atomic_cmpset_int(&ticks, global, *t));
+
+	/*
+	 * Run current process's virtual and profile time, as needed.
+	 */
+	pstats = p->p_stats;
+	flags = 0;
+	if (usermode &&
+	    timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value)) {
+		PROC_SLOCK(p);
+		if (itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL],
+		    tick * cnt) == 0)
+			flags |= TDF_ALRMPEND | TDF_ASTPENDING;
+		PROC_SUNLOCK(p);
+	}
+	if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value)) {
+		PROC_SLOCK(p);
+		if (itimerdecr(&pstats->p_timer[ITIMER_PROF],
+		    tick * cnt) == 0)
+			flags |= TDF_PROFPEND | TDF_ASTPENDING;
+		PROC_SUNLOCK(p);
+	}
+	thread_lock(td);
+	sched_tick(cnt);
+	td->td_flags |= flags;
+	thread_unlock(td);
+
+#ifdef	HWPMC_HOOKS
+	if (PMC_CPU_HAS_SAMPLES(PCPU_GET(cpuid)))
+		PMC_CALL_HOOK_UNLOCKED(curthread, PMC_FN_DO_SAMPLES, NULL);
+#endif
+	callout_tick();
+	/* We are in charge to handle this tick duty. */
+	if (newticks > 0) {
+		mtx_lock_spin(&global_hardclock_mtx);
+		tc_ticktock();
+#ifdef DEVICE_POLLING
+		hardclock_device_poll(); /* This is very short and quick. */
+#endif /* DEVICE_POLLING */
+#ifdef SW_WATCHDOG
+		if (watchdog_enabled > 0) {
+			watchdog_ticks -= newticks;
+			if (watchdog_ticks <= 0)
+				watchdog_fire();
+		}
+#endif /* SW_WATCHDOG */
+		mtx_unlock_spin(&global_hardclock_mtx);
+	}
+	if (curcpu == CPU_FIRST())
+		cpu_tick_calibration();
+}
+
+void
+hardclock_sync(int cpu)
+{
+	int	*t = DPCPU_ID_PTR(cpu, pcputicks);
+
+	*t = ticks;
+}
+
 /*
  * Compute number of ticks in the specified amount of time.
  */
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 6b005de8c5e9..29304a496cb8 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -41,6 +41,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/bus.h>
 #include <sys/lock.h>
 #include <sys/kdb.h>
+#include <sys/ktr.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
 #include <sys/kernel.h>
@@ -59,28 +60,79 @@ __FBSDID("$FreeBSD$");
 cyclic_clock_func_t	cyclic_clock_func[MAXCPU];
 #endif
 
-static void		cpu_restartclocks(void);
-static void		timercheck(void);
-inline static int	doconfigtimer(int i);
-static void		configtimer(int i);
+int			cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
 
-static struct eventtimer *timer[2] = { NULL, NULL };
-static int		timertest = 0;
-static int		timerticks[2] = { 0, 0 };
-static int		profiling_on = 0;
-static struct bintime	timerperiod[2];
+static void		setuptimer(void);
+static void		loadtimer(struct bintime *now, int first);
+static int		doconfigtimer(void);
+static void		configtimer(int start);
+static int		round_freq(struct eventtimer *et, int freq);
 
-static char		timername[2][32];
-TUNABLE_STR("kern.eventtimer.timer1", timername[0], sizeof(*timername));
-TUNABLE_STR("kern.eventtimer.timer2", timername[1], sizeof(*timername));
+static void		getnextcpuevent(struct bintime *event, int idle);
+static void		getnextevent(struct bintime *event);
+static int		handleevents(struct bintime *now, int fake);
+#ifdef SMP
+static void		cpu_new_callout(int cpu, int ticks);
+#endif
 
-static u_int		singlemul = 0;
+static struct mtx	et_hw_mtx;
+
+#define	ET_HW_LOCK(state)						\
+	{								\
+		if (timer->et_flags & ET_FLAGS_PERCPU)			\
+			mtx_lock_spin(&(state)->et_hw_mtx);		\
+		else							\
+			mtx_lock_spin(&et_hw_mtx);			\
+	}
+
+#define	ET_HW_UNLOCK(state)						\
+	{								\
+		if (timer->et_flags & ET_FLAGS_PERCPU)			\
+			mtx_unlock_spin(&(state)->et_hw_mtx);		\
+		else							\
+			mtx_unlock_spin(&et_hw_mtx);			\
+	}
+
+static struct eventtimer *timer = NULL;
+static struct bintime	timerperiod;	/* Timer period for periodic mode. */
+static struct bintime	hardperiod;	/* hardclock() events period. */
+static struct bintime	statperiod;	/* statclock() events period. */
+static struct bintime	profperiod;	/* profclock() events period. */
+static struct bintime	nexttick;	/* Next global timer tick time. */
+static u_int		busy = 0;	/* Reconfiguration is in progress. */
+static int		profiling = 0;	/* Profiling events enabled. */
+
+static char		timername[32];	/* Wanted timer. */
+TUNABLE_STR("kern.eventtimer.timer", timername, sizeof(timername));
+
+static u_int		singlemul = 0;	/* Multiplier for periodic mode. */
 TUNABLE_INT("kern.eventtimer.singlemul", &singlemul);
 SYSCTL_INT(_kern_eventtimer, OID_AUTO, singlemul, CTLFLAG_RW, &singlemul,
-    0, "Multiplier, used in single timer mode");
+    0, "Multiplier for periodic mode");
 
-typedef u_int tc[2];
-static DPCPU_DEFINE(tc, configtimer);
+static u_int		idletick = 0;	/* Idle mode allowed. */
+TUNABLE_INT("kern.eventtimer.idletick", &idletick);
+SYSCTL_INT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
+    0, "Run periodic events when idle");
+
+static int		periodic = 0;	/* Periodic or one-shot mode. */
+TUNABLE_INT("kern.eventtimer.periodic", &periodic);
+
+struct pcpu_state {
+	struct mtx	et_hw_mtx;	/* Per-CPU timer mutex. */
+	u_int		action;		/* Reconfiguration requests. */
+	u_int		handle;		/* Immediate handle resuests. */
+	struct bintime	now;		/* Last tick time. */
+	struct bintime	nextevent;	/* Next scheduled event on this CPU. */
+	struct bintime	nexttick;	/* Next timer tick time. */
+	struct bintime	nexthard;	/* Next hardlock() event. */
+	struct bintime	nextstat;	/* Next statclock() event. */
+	struct bintime	nextprof;	/* Next profclock() event. */
+	int		ipi;		/* This CPU needs IPI. */
+	int		idle;		/* This CPU is in idle mode. */
+};
+
+static DPCPU_DEFINE(struct pcpu_state, timerstate);
 
 #define FREQ2BT(freq, bt)						\
 {									\
@@ -91,159 +143,325 @@ static DPCPU_DEFINE(tc, configtimer);
 	(((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) /		\
 	    ((bt)->frac >> 1))
 
-/* Per-CPU timer1 handler. */
-static int
-hardclockhandler(struct trapframe *frame)
+/*
+ * Timer broadcast IPI handler.
+ */
+int
+hardclockintr(void)
 {
+	struct bintime now;
+	struct pcpu_state *state;
+	int done;
 
+	if (doconfigtimer() || busy)
+		return (FILTER_HANDLED);
+	state = DPCPU_PTR(timerstate);
+	now = state->now;
+	CTR4(KTR_SPARE2, "ipi  at %d:    now  %d.%08x%08x",
+	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
+			     (unsigned int)(now.frac & 0xffffffff));
+	done = handleevents(&now, 0);
+	return (done ? FILTER_HANDLED : FILTER_STRAY);
+}
+
+/*
+ * Handle all events for specified time on this CPU
+ */
+static int
+handleevents(struct bintime *now, int fake)
+{
+	struct bintime t;
+	struct trapframe *frame;
+	struct pcpu_state *state;
+	uintfptr_t pc;
+	int usermode;
+	int done, runs;
+
+	CTR4(KTR_SPARE2, "handle at %d:  now  %d.%08x%08x",
+	    curcpu, now->sec, (unsigned int)(now->frac >> 32),
+		     (unsigned int)(now->frac & 0xffffffff));
+	done = 0;
+	if (fake) {
+		frame = NULL;
+		usermode = 0;
+		pc = 0;
+	} else {
+		frame = curthread->td_intr_frame;
+		usermode = TRAPF_USERMODE(frame);
+		pc = TRAPF_PC(frame);
+	}
 #ifdef KDTRACE_HOOKS
 	/*
 	 * If the DTrace hooks are configured and a callback function
 	 * has been registered, then call it to process the high speed
 	 * timers.
 	 */
-	int cpu = curcpu;
-	if (cyclic_clock_func[cpu] != NULL)
-		(*cyclic_clock_func[cpu])(frame);
+	if (!fake && cyclic_clock_func[curcpu] != NULL)
+		(*cyclic_clock_func[curcpu])(frame);
 #endif
-
-	timer1clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
-	return (FILTER_HANDLED);
-}
-
-/* Per-CPU timer2 handler. */
-static int
-statclockhandler(struct trapframe *frame)
-{
-
-	timer2clock(TRAPF_USERMODE(frame), TRAPF_PC(frame));
-	return (FILTER_HANDLED);
-}
-
-/* timer1 broadcast IPI handler. */
-int
-hardclockintr(struct trapframe *frame)
-{
-
-	if (doconfigtimer(0))
-		return (FILTER_HANDLED);
-	return (hardclockhandler(frame));
-}
-
-/* timer2 broadcast IPI handler. */
-int
-statclockintr(struct trapframe *frame)
-{
-
-	if (doconfigtimer(1))
-		return (FILTER_HANDLED);
-	return (statclockhandler(frame));
-}
-
-/* timer1 callback. */
-static void
-timer1cb(struct eventtimer *et, void *arg)
-{
-
-#ifdef SMP
-	/* Broadcast interrupt to other CPUs for non-per-CPU timers */
-	if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
-		ipi_all_but_self(IPI_HARDCLOCK);
-#endif
-	if (timertest) {
-		if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
-			timerticks[0]++;
-			if (timerticks[0] >= timer1hz) {
-				ET_LOCK();
-				timercheck();
-				ET_UNLOCK();
-			}
-		}
+	runs = 0;
+	state = DPCPU_PTR(timerstate);
+	while (bintime_cmp(now, &state->nexthard, >=)) {
+		bintime_add(&state->nexthard, &hardperiod);
+		runs++;
 	}
-	hardclockhandler(curthread->td_intr_frame);
-}
-
-/* timer2 callback. */
-static void
-timer2cb(struct eventtimer *et, void *arg)
-{
-
-#ifdef SMP
-	/* Broadcast interrupt to other CPUs for non-per-CPU timers */
-	if (smp_started && (et->et_flags & ET_FLAGS_PERCPU) == 0)
-		ipi_all_but_self(IPI_STATCLOCK);
-#endif
-	if (timertest) {
-		if ((et->et_flags & ET_FLAGS_PERCPU) == 0 || curcpu == 0) {
-			timerticks[1]++;
-			if (timerticks[1] >= timer2hz * 2) {
-				ET_LOCK();
-				timercheck();
-				ET_UNLOCK();
-			}
-		}
+	if (runs) {
+		hardclock_anycpu(runs, usermode);
+		done = 1;
 	}
-	statclockhandler(curthread->td_intr_frame);
+	while (bintime_cmp(now, &state->nextstat, >=)) {
+		statclock(usermode);
+		bintime_add(&state->nextstat, &statperiod);
+		done = 1;
+	}
+	if (profiling) {
+		while (bintime_cmp(now, &state->nextprof, >=)) {
+			if (!fake)
+				profclock(usermode, pc);
+			bintime_add(&state->nextprof, &profperiod);
+			done = 1;
+		}
+	} else
+		state->nextprof = state->nextstat;
+	getnextcpuevent(&t, 0);
+	ET_HW_LOCK(state);
+	if (!busy) {
+		state->idle = 0;
+		state->nextevent = t;
+		loadtimer(now, 0);
+	}
+	ET_HW_UNLOCK(state);
+	return (done);
 }
 
 /*
- * Check that both timers are running with at least 1/4 of configured rate.
- * If not - replace the broken one.
+ * Schedule binuptime of the next event on current CPU.
  */
 static void
-timercheck(void)
+getnextcpuevent(struct bintime *event, int idle)
 {
+	struct bintime tmp;
+	struct pcpu_state *state;
+	int skip;
 
-	if (!timertest)
-		return;
-	timertest = 0;
-	if (timerticks[0] * 4 < timer1hz) {
-		printf("Event timer \"%s\" is dead.\n", timer[0]->et_name);
-		timer1hz = 0;
-		configtimer(0);
-		et_ban(timer[0]);
-		et_free(timer[0]);
-		timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-		if (timer[0] == NULL) {
-			timer2hz = 0;
-			configtimer(1);
-			et_free(timer[1]);
-			timer[1] = NULL;
-			timer[0] = timer[1];
+	state = DPCPU_PTR(timerstate);
+	*event = state->nexthard;
+	if (idle) { /* If CPU is idle - ask callouts for how long. */
+		skip = callout_tickstofirst() - 1;
+		CTR2(KTR_SPARE2, "skip   at %d: %d", curcpu, skip);
+		tmp = hardperiod;
+		bintime_mul(&tmp, skip);
+		bintime_add(event, &tmp);
+	} else { /* If CPU is active - handle all types of events. */
+		if (bintime_cmp(event, &state->nextstat, >))
+			*event = state->nextstat;
+		if (profiling &&
+		    bintime_cmp(event, &state->nextprof, >))
+			*event = state->nextprof;
+	}
+}
+
+/*
+ * Schedule binuptime of the next event on all CPUs.
+ */
+static void
+getnextevent(struct bintime *event)
+{
+	struct pcpu_state *state;
+#ifdef SMP
+	int	cpu;
+#endif
+	int	c;
+
+	state = DPCPU_PTR(timerstate);
+	*event = state->nextevent;
+	c = curcpu;
+#ifdef SMP
+	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+		CPU_FOREACH(cpu) {
+			if (curcpu == cpu)
+				continue;
+			state = DPCPU_ID_PTR(cpu, timerstate);
+			if (bintime_cmp(event, &state->nextevent, >)) {
+				*event = state->nextevent;
+				c = cpu;
+			}
 		}
-		et_init(timer[0], timer1cb, NULL, NULL);
-		cpu_restartclocks();
-		return;
 	}
-	if (timerticks[1] * 4 < timer2hz) {
-		printf("Event timer \"%s\" is dead.\n", timer[1]->et_name);
-		timer2hz = 0;
-		configtimer(1);
-		et_ban(timer[1]);
-		et_free(timer[1]);
-		timer[1] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-		if (timer[1] != NULL)
-			et_init(timer[1], timer2cb, NULL, NULL);
-		cpu_restartclocks();
+#endif
+	CTR5(KTR_SPARE2, "next at %d:    next %d.%08x%08x by %d",
+	    curcpu, event->sec, (unsigned int)(event->frac >> 32),
+			     (unsigned int)(event->frac & 0xffffffff), c);
+}
+
+/* Hardware timer callback function. */
+static void
+timercb(struct eventtimer *et, void *arg)
+{
+	struct bintime now;
+	struct bintime *next;
+	struct pcpu_state *state;
+#ifdef SMP
+	int cpu, bcast;
+#endif
+
+	/* Do not touch anything if somebody reconfiguring timers. */
+	if (busy)
 		return;
+	/* Update present and next tick times. */
+	state = DPCPU_PTR(timerstate);
+	if (et->et_flags & ET_FLAGS_PERCPU) {
+		next = &state->nexttick;
+	} else
+		next = &nexttick;
+	if (periodic) {
+		now = *next;	/* Ex-next tick time becomes present time. */
+		bintime_add(next, &timerperiod); /* Next tick in 1 period. */
+	} else {
+		binuptime(&now);	/* Get present time from hardware. */
+		next->sec = -1;		/* Next tick is not scheduled yet. */
 	}
+	state->now = now;
+	CTR4(KTR_SPARE2, "intr at %d:    now  %d.%08x%08x",
+	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
+			     (unsigned int)(now.frac & 0xffffffff));
+
+#ifdef SMP
+	/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
+	bcast = 0;
+	if ((et->et_flags & ET_FLAGS_PERCPU) == 0 && smp_started) {
+		CPU_FOREACH(cpu) {
+			if (curcpu == cpu)
+				continue;
+			state = DPCPU_ID_PTR(cpu, timerstate);
+			ET_HW_LOCK(state);
+			state->now = now;
+			if (bintime_cmp(&now, &state->nextevent, >=)) {
+				state->nextevent.sec++;
+				state->ipi = 1;
+				bcast = 1;
+			}
+			ET_HW_UNLOCK(state);
+		}
+	}
+#endif
+
+	/* Handle events for this time on this CPU. */
+	handleevents(&now, 0);
+
+#ifdef SMP
+	/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
+	if (bcast) {
+		CPU_FOREACH(cpu) {
+			if (curcpu == cpu)
+				continue;
+			state = DPCPU_ID_PTR(cpu, timerstate);
+			if (state->ipi) {
+				state->ipi = 0;
+				ipi_cpu(cpu, IPI_HARDCLOCK);
+			}
+		}
+	}
+#endif
+}
+
+/*
+ * Load new value into hardware timer.
+ */
+static void
+loadtimer(struct bintime *now, int start)
+{
+	struct pcpu_state *state;
+	struct bintime new;
+	struct bintime *next;
+	uint64_t tmp;
+	int eq;
+
+	if (periodic) {
+		if (start) {
+			/*
+			 * Try to start all periodic timers aligned
+			 * to period to make events synchronous.
+			 */
+			tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
+			tmp = (tmp % (timerperiod.frac >> 28)) << 28;
+			tmp = timerperiod.frac - tmp;
+			new = timerperiod;
+			bintime_addx(&new, tmp);
+			CTR5(KTR_SPARE2, "load p at %d:   now %d.%08x first in %d.%08x",
+			    curcpu, now->sec, (unsigned int)(now->frac >> 32),
+			    new.sec, (unsigned int)(new.frac >> 32));
+			et_start(timer, &new, &timerperiod);
+		}
+	} else {
+		if (timer->et_flags & ET_FLAGS_PERCPU) {
+			state = DPCPU_PTR(timerstate);
+			next = &state->nexttick;
+		} else
+			next = &nexttick;
+		getnextevent(&new);
+		eq = bintime_cmp(&new, next, ==);
+		CTR5(KTR_SPARE2, "load at %d:    next %d.%08x%08x eq %d",
+		    curcpu, new.sec, (unsigned int)(new.frac >> 32),
+			     (unsigned int)(new.frac & 0xffffffff),
+			     eq);
+		if (!eq) {
+			*next = new;
+			bintime_sub(&new, now);
+			et_start(timer, &new, NULL);
+		}
+	}
+}
+
+/*
+ * Prepare event timer parameters after configuration changes.
+ */
+static void
+setuptimer(void)
+{
+	int freq;
+
+	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+		periodic = 0;
+	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+		periodic = 1;
+	freq = hz * singlemul;
+	while (freq < (profiling ? profhz : stathz))
+		freq += hz;
+	freq = round_freq(timer, freq);
+	FREQ2BT(freq, &timerperiod);
 }
 
 /*
  * Reconfigure specified per-CPU timer on other CPU. Called from IPI handler.
  */
-inline static int
-doconfigtimer(int i)
+static int
+doconfigtimer(void)
 {
-	tc *conf;
+	struct bintime now;
+	struct pcpu_state *state;
 
-	conf = DPCPU_PTR(configtimer);
-	if (atomic_load_acq_int(*conf + i)) {
-		if (i == 0 ? timer1hz : timer2hz)
-			et_start(timer[i], NULL, &timerperiod[i]);
-		else
-			et_stop(timer[i]);
-		atomic_store_rel_int(*conf + i, 0);
+	state = DPCPU_PTR(timerstate);
+	switch (atomic_load_acq_int(&state->action)) {
+	case 1:
+		binuptime(&now);
+		ET_HW_LOCK(state);
+		loadtimer(&now, 1);
+		ET_HW_UNLOCK(state);
+		state->handle = 0;
+		atomic_store_rel_int(&state->action, 0);
+		return (1);
+	case 2:
+		ET_HW_LOCK(state);
+		et_stop(timer);
+		ET_HW_UNLOCK(state);
+		state->handle = 0;
+		atomic_store_rel_int(&state->action, 0);
+		return (1);
+	}
+	if (atomic_readandclear_int(&state->handle) && !busy) {
+		binuptime(&now);
+		handleevents(&now, 0);
 		return (1);
 	}
 	return (0);
@@ -254,45 +472,79 @@ doconfigtimer(int i)
  * For per-CPU timers use IPI to make other CPUs to reconfigure.
  */
 static void
-configtimer(int i)
+configtimer(int start)
 {
-#ifdef SMP
-	tc *conf;
+	struct bintime now, next;
+	struct pcpu_state *state;
 	int cpu;
 
+	if (start) {
+		setuptimer();
+		binuptime(&now);
+	}
 	critical_enter();
-#endif
-	/* Start/stop global timer or per-CPU timer of this CPU. */
-	if (i == 0 ? timer1hz : timer2hz)
-		et_start(timer[i], NULL, &timerperiod[i]);
-	else
-		et_stop(timer[i]);
+	ET_HW_LOCK(DPCPU_PTR(timerstate));
+	if (start) {
+		/* Initialize time machine parameters. */
+		next = now;
+		bintime_add(&next, &timerperiod);
+		if (periodic)
+			nexttick = next;
+		else
+			nexttick.sec = -1;
+		CPU_FOREACH(cpu) {
+			state = DPCPU_ID_PTR(cpu, timerstate);
+			state->now = now;
+			state->nextevent = next;
+			if (periodic)
+				state->nexttick = next;
+			else
+				state->nexttick.sec = -1;
+			state->nexthard = next;
+			state->nextstat = next;
+			state->nextprof = next;
+			hardclock_sync(cpu);
+		}
+		busy = 0;
+		/* Start global timer or per-CPU timer of this CPU. */
+		loadtimer(&now, 1);
+	} else {
+		busy = 1;
+		/* Stop global timer or per-CPU timer of this CPU. */
+		et_stop(timer);
+	}
+	ET_HW_UNLOCK(DPCPU_PTR(timerstate));
 #ifdef SMP
-	if ((timer[i]->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
+	/* If timer is global or there is no other CPUs yet - we are done. */
+	if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || !smp_started) {
 		critical_exit();
 		return;
 	}
 	/* Set reconfigure flags for other CPUs. */
 	CPU_FOREACH(cpu) {
-		conf = DPCPU_ID_PTR(cpu, configtimer);
-		atomic_store_rel_int(*conf + i, (cpu == curcpu) ? 0 : 1);
+		state = DPCPU_ID_PTR(cpu, timerstate);
+		atomic_store_rel_int(&state->action,
+		    (cpu == curcpu) ? 0 : ( start ? 1 : 2));
 	}
-	/* Send reconfigure IPI. */
-	ipi_all_but_self(i == 0 ? IPI_HARDCLOCK : IPI_STATCLOCK);
+	/* Broadcast reconfigure IPI. */
+	ipi_all_but_self(IPI_HARDCLOCK);
 	/* Wait for reconfiguration completed. */
 restart:
 	cpu_spinwait();
 	CPU_FOREACH(cpu) {
 		if (cpu == curcpu)
 			continue;
-		conf = DPCPU_ID_PTR(cpu, configtimer);
-		if (atomic_load_acq_int(*conf + i))
+		state = DPCPU_ID_PTR(cpu, timerstate);
+		if (atomic_load_acq_int(&state->action))
 			goto restart;
 	}
-	critical_exit();
 #endif
+	critical_exit();
 }
 
+/*
+ * Calculate nearest frequency supported by hardware timer.
+ */
 static int
 round_freq(struct eventtimer *et, int freq)
 {
@@ -314,23 +566,49 @@ round_freq(struct eventtimer *et, int freq)
 }
 
 /*
- * Configure and start event timers.
+ * Configure and start event timers (BSP part).
  */
 void
 cpu_initclocks_bsp(void)
 {
-	int base, div;
+	struct pcpu_state *state;
+	int base, div, cpu;
 
-	timer[0] = et_find(timername[0], ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-	if (timer[0] == NULL)
-		timer[0] = et_find(NULL, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-	if (timer[0] == NULL)
+	mtx_init(&et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+	CPU_FOREACH(cpu) {
+		state = DPCPU_ID_PTR(cpu, timerstate);
+		mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
+	}
+#ifdef SMP
+	callout_new_inserted = cpu_new_callout;
+#endif
+	/* Grab requested timer or the best of present. */
+	if (timername[0])
+		timer = et_find(timername, 0, 0);
+	if (timer == NULL && periodic) {
+		timer = et_find(NULL,
+		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+	}
+	if (timer == NULL) {
+		timer = et_find(NULL,
+		    ET_FLAGS_ONESHOT, ET_FLAGS_ONESHOT);
+	}
+	if (timer == NULL && !periodic) {
+		timer = et_find(NULL,
+		    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+	}
+	if (timer == NULL)
 		panic("No usable event timer found!");
-	et_init(timer[0], timer1cb, NULL, NULL);
-	timer[1] = et_find(timername[1][0] ? timername[1] : NULL,
-	    ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-	if (timer[1])
-		et_init(timer[1], timer2cb, NULL, NULL);
+	et_init(timer, timercb, NULL, NULL);
+
+	/* Adapt to timer capabilities. */
+	if (periodic && (timer->et_flags & ET_FLAGS_PERIODIC) == 0)
+		periodic = 0;
+	else if (!periodic && (timer->et_flags & ET_FLAGS_ONESHOT) == 0)
+		periodic = 1;
+	if (timer->et_flags & ET_FLAGS_C3STOP)
+		cpu_disable_deep_sleep++;
+
 	/*
 	 * We honor the requested 'hz' value.
 	 * We want to run stathz in the neighborhood of 128hz.
@@ -344,8 +622,8 @@ cpu_initclocks_bsp(void)
 		else
 			singlemul = 4;
 	}
-	if (timer[1] == NULL) {
-		base = round_freq(timer[0], hz * singlemul);
+	if (periodic) {
+		base = round_freq(timer, hz * singlemul);
 		singlemul = max((base + hz / 2) / hz, 1);
 		hz = (base + singlemul / 2) / singlemul;
 		if (base <= 128)
@@ -359,175 +637,236 @@ cpu_initclocks_bsp(void)
 		profhz = stathz;
 		while ((profhz + stathz) <= 128 * 64)
 			profhz += stathz;
-		profhz = round_freq(timer[0], profhz);
+		profhz = round_freq(timer, profhz);
 	} else {
-		hz = round_freq(timer[0], hz);
-		stathz = round_freq(timer[1], 127);
-		profhz = round_freq(timer[1], stathz * 64);
+		hz = round_freq(timer, hz);
+		stathz = round_freq(timer, 127);
+		profhz = round_freq(timer, stathz * 64);
 	}
 	tick = 1000000 / hz;
+	FREQ2BT(hz, &hardperiod);
+	FREQ2BT(stathz, &statperiod);
+	FREQ2BT(profhz, &profperiod);
 	ET_LOCK();
-	cpu_restartclocks();
+	configtimer(1);
 	ET_UNLOCK();
 }
 
-/* Start per-CPU event timers on APs. */
+/*
+ * Start per-CPU event timers on APs.
+ */
 void
 cpu_initclocks_ap(void)
 {
+	struct bintime now;
+	struct pcpu_state *state;
 
-	ET_LOCK();
-	if (timer[0]->et_flags & ET_FLAGS_PERCPU)
-		et_start(timer[0], NULL, &timerperiod[0]);
-	if (timer[1] && timer[1]->et_flags & ET_FLAGS_PERCPU)
-		et_start(timer[1], NULL, &timerperiod[1]);
-	ET_UNLOCK();
-}
-
-/* Reconfigure and restart event timers after configuration changes. */
-static void
-cpu_restartclocks(void)
-{
-
-	/* Stop all event timers. */
-	timertest = 0;
-	if (timer1hz) {
-		timer1hz = 0;
-		configtimer(0);
-	}
-	if (timer[1] && timer2hz) {
-		timer2hz = 0;
-		configtimer(1);
-	}
-	/* Calculate new event timers parameters. */
-	if (timer[1] == NULL) {
-		timer1hz = hz * singlemul;
-		while (timer1hz < (profiling_on ? profhz : stathz))
-			timer1hz += hz;
-		timer2hz = 0;
-	} else {
-		timer1hz = hz;
-		timer2hz = profiling_on ? profhz : stathz;
-		timer2hz = round_freq(timer[1], timer2hz);
-	}
-	timer1hz = round_freq(timer[0], timer1hz);
-	printf("Starting kernel event timers: %s @ %dHz, %s @ %dHz\n",
-	    timer[0]->et_name, timer1hz,
-	    timer[1] ? timer[1]->et_name : "NONE", timer2hz);
-	/* Restart event timers. */
-	FREQ2BT(timer1hz, &timerperiod[0]);
-	configtimer(0);
-	if (timer[1]) {
-		timerticks[0] = 0;
-		timerticks[1] = 0;
-		FREQ2BT(timer2hz, &timerperiod[1]);
-		configtimer(1);
-		timertest = 1;
+	if (timer->et_flags & ET_FLAGS_PERCPU) {
+		state = DPCPU_PTR(timerstate);
+		binuptime(&now);
+		ET_HW_LOCK(state);
+		loadtimer(&now, 1);
+		ET_HW_UNLOCK(state);
 	}
 }
 
-/* Switch to profiling clock rates. */
+/*
+ * Switch to profiling clock rates.
+ */
 void
 cpu_startprofclock(void)
 {
 
 	ET_LOCK();
-	profiling_on = 1;
-	cpu_restartclocks();
+	if (periodic) {
+		configtimer(0);
+		profiling = 1;
+		configtimer(1);
+	} else
+		profiling = 1;
 	ET_UNLOCK();
 }
 
-/* Switch to regular clock rates. */
+/*
+ * Switch to regular clock rates.
+ */
 void
 cpu_stopprofclock(void)
 {
 
 	ET_LOCK();
-	profiling_on = 0;
-	cpu_restartclocks();
+	if (periodic) {
+		configtimer(0);
+		profiling = 0;
+		configtimer(1);
+	} else
+		profiling = 0;
 	ET_UNLOCK();
 }
 
-/* Report or change the active event timers hardware. */
+/*
+ * Switch to idle mode (all ticks handled).
+ */
+void
+cpu_idleclock(void)
+{
+	struct bintime now, t;
+	struct pcpu_state *state;
+
+	if (idletick || busy ||
+	    (periodic && (timer->et_flags & ET_FLAGS_PERCPU)))
+		return;
+	state = DPCPU_PTR(timerstate);
+	if (periodic)
+		now = state->now;
+	else
+		binuptime(&now);
+	CTR4(KTR_SPARE2, "idle at %d:    now  %d.%08x%08x",
+	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
+			     (unsigned int)(now.frac & 0xffffffff));
+	getnextcpuevent(&t, 1);
+	ET_HW_LOCK(state);
+	state->idle = 1;
+	state->nextevent = t;
+	if (!periodic)
+		loadtimer(&now, 0);
+	ET_HW_UNLOCK(state);
+}
+
+/*
+ * Switch to active mode (skip empty ticks).
+ */
+void
+cpu_activeclock(void)
+{
+	struct bintime now;
+	struct pcpu_state *state;
+	struct thread *td;
+
+	state = DPCPU_PTR(timerstate);
+	if (state->idle == 0 || busy)
+		return;
+	if (periodic)
+		now = state->now;
+	else
+		binuptime(&now);
+	CTR4(KTR_SPARE2, "active at %d:  now  %d.%08x%08x",
+	    curcpu, now.sec, (unsigned int)(now.frac >> 32),
+			     (unsigned int)(now.frac & 0xffffffff));
+	spinlock_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	handleevents(&now, 1);
+	td->td_intr_nesting_level--;
+	spinlock_exit();
+}
+
+#ifdef SMP
+static void
+cpu_new_callout(int cpu, int ticks)
+{
+	struct bintime tmp;
+	struct pcpu_state *state;
+
+	CTR3(KTR_SPARE2, "new co at %d:    on %d in %d",
+	    curcpu, cpu, ticks);
+	state = DPCPU_ID_PTR(cpu, timerstate);
+	ET_HW_LOCK(state);
+	if (state->idle == 0 || busy) {
+		ET_HW_UNLOCK(state);
+		return;
+	}
+	/*
+	 * If timer is periodic - just update next event time for target CPU.
+	 */
+	if (periodic) {
+		state->nextevent = state->nexthard;
+		tmp = hardperiod;
+		bintime_mul(&tmp, ticks - 1);
+		bintime_add(&state->nextevent, &tmp);
+		ET_HW_UNLOCK(state);
+		return;
+	}
+	/*
+	 * Otherwise we have to wake that CPU up, as we can't get present
+	 * bintime to reprogram global timer from here. If timer is per-CPU,
+	 * we by definition can't do it from here.
+	 */
+	ET_HW_UNLOCK(state);
+	if (timer->et_flags & ET_FLAGS_PERCPU) {
+		state->handle = 1;
+		ipi_cpu(cpu, IPI_HARDCLOCK);
+	} else {
+		if (!cpu_idle_wakeup(cpu))
+			ipi_cpu(cpu, IPI_AST);
+	}
+}
+#endif
+
+/*
+ * Report or change the active event timers hardware.
+ */
 static int
-sysctl_kern_eventtimer_timer1(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_timer(SYSCTL_HANDLER_ARGS)
 {
 	char buf[32];
 	struct eventtimer *et;
 	int error;
 
 	ET_LOCK();
-	et = timer[0];
+	et = timer;
 	snprintf(buf, sizeof(buf), "%s", et->et_name);
 	ET_UNLOCK();
 	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
 	ET_LOCK();
-	et = timer[0];
+	et = timer;
 	if (error != 0 || req->newptr == NULL ||
-	    strcmp(buf, et->et_name) == 0) {
+	    strcasecmp(buf, et->et_name) == 0) {
 		ET_UNLOCK();
 		return (error);
 	}
-	et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
+	et = et_find(buf, 0, 0);
 	if (et == NULL) {
 		ET_UNLOCK();
 		return (ENOENT);
 	}
-	timer1hz = 0;
 	configtimer(0);
-	et_free(timer[0]);
-	timer[0] = et;
-	et_init(timer[0], timer1cb, NULL, NULL);
-	cpu_restartclocks();
+	et_free(timer);
+	if (et->et_flags & ET_FLAGS_C3STOP)
+		cpu_disable_deep_sleep++;
+	if (timer->et_flags & ET_FLAGS_C3STOP)
+		cpu_disable_deep_sleep--;
+	timer = et;
+	et_init(timer, timercb, NULL, NULL);
+	configtimer(1);
 	ET_UNLOCK();
 	return (error);
 }
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer1,
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer,
     CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
-    0, 0, sysctl_kern_eventtimer_timer1, "A", "Primary event timer");
+    0, 0, sysctl_kern_eventtimer_timer, "A", "Kernel event timer");
 
+/*
+ * Report or change the active event timer periodicity.
+ */
 static int
-sysctl_kern_eventtimer_timer2(SYSCTL_HANDLER_ARGS)
+sysctl_kern_eventtimer_periodic(SYSCTL_HANDLER_ARGS)
 {
-	char buf[32];
-	struct eventtimer *et;
-	int error;
+	int error, val;
 
-	ET_LOCK();
-	et = timer[1];
-	if (et == NULL)
-		snprintf(buf, sizeof(buf), "NONE");
-	else
-		snprintf(buf, sizeof(buf), "%s", et->et_name);
-	ET_UNLOCK();
-	error = sysctl_handle_string(oidp, buf, sizeof(buf), req);
-	ET_LOCK();
-	et = timer[1];
-	if (error != 0 || req->newptr == NULL ||
-	    strcmp(buf, et ? et->et_name : "NONE") == 0) {
-		ET_UNLOCK();
+	val = periodic;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
 		return (error);
-	}
-	et = et_find(buf, ET_FLAGS_PERIODIC, ET_FLAGS_PERIODIC);
-	if (et == NULL && strcasecmp(buf, "NONE") != 0) {
-		ET_UNLOCK();
-		return (ENOENT);
-	}
-	if (timer[1] != NULL) {
-		timer2hz = 0;
-		configtimer(1);
-		et_free(timer[1]);
-	}
-	timer[1] = et;
-	if (timer[1] != NULL)
-		et_init(timer[1], timer2cb, NULL, NULL);
-	cpu_restartclocks();
+	ET_LOCK();
+	configtimer(0);
+	periodic = val;
+	configtimer(1);
 	ET_UNLOCK();
 	return (error);
 }
-SYSCTL_PROC(_kern_eventtimer, OID_AUTO, timer2,
-    CTLTYPE_STRING | CTLFLAG_RW | CTLFLAG_MPSAFE,
-    0, 0, sysctl_kern_eventtimer_timer2, "A", "Secondary event timer");
+SYSCTL_PROC(_kern_eventtimer, OID_AUTO, periodic,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    0, 0, sysctl_kern_eventtimer_periodic, "I", "Kernel event timer periodic");
 
 #endif
-
diff --git a/sys/kern/kern_et.c b/sys/kern/kern_et.c
index 17b9c6764226..8c375561c42b 100644
--- a/sys/kern/kern_et.c
+++ b/sys/kern/kern_et.c
@@ -38,7 +38,7 @@ SLIST_HEAD(et_eventtimers_list, eventtimer);
 static struct et_eventtimers_list eventtimers = SLIST_HEAD_INITIALIZER(et_eventtimers);
 
 struct mtx	et_eventtimers_mtx;
-MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_SPIN);
+MTX_SYSINIT(et_eventtimers_init, &et_eventtimers_mtx, "et_mtx", MTX_DEF);
 
 SYSCTL_NODE(_kern, OID_AUTO, eventtimer, CTLFLAG_RW, 0, "Event timers");
 SYSCTL_NODE(_kern_eventtimer, OID_AUTO, et, CTLFLAG_RW, 0, "");
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index d97334849ced..811b24f30e53 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -770,16 +770,11 @@ void
 tc_ticktock(void)
 {
 	static int count;
-	static time_t last_calib;
 
 	if (++count < tc_tick)
 		return;
 	count = 0;
 	tc_windup();
-	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
-		cpu_tick_calibrate(0);
-		last_calib = time_uptime;
-	}
 }
 
 static void
@@ -830,9 +825,20 @@ tc_cpu_ticks(void)
 	return (u + base);
 }
 
+void
+cpu_tick_calibration(void)
+{
+	static time_t last_calib;
+
+	if (time_uptime != last_calib && !(time_uptime & 0xf)) {
+		cpu_tick_calibrate(0);
+		last_calib = time_uptime;
+	}
+}
+
 /*
  * This function gets called every 16 seconds on only one designated
- * CPU in the system from hardclock() via tc_ticktock().
+ * CPU in the system from hardclock() via cpu_tick_calibration()().
  *
  * Whenever the real time clock is stepped we get called with reset=1
  * to make sure we handle suspend/resume and similar events correctly.
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index 32d5691812d9..569779235481 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -111,6 +111,7 @@ struct callout_cpu {
 	int 			cc_softticks;
 	int			cc_cancel;
 	int			cc_waiting;
+	int 			cc_firsttick;
 };
 
 #ifdef SMP
@@ -126,6 +127,7 @@ struct callout_cpu cc_cpu;
 #define	CC_UNLOCK(cc)	mtx_unlock_spin(&(cc)->cc_lock)
 
 static int timeout_cpu;
+void (*callout_new_inserted)(int cpu, int ticks) = NULL;
 
 MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
@@ -260,7 +262,7 @@ callout_tick(void)
 	need_softclock = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	cc->cc_ticks++;
+	cc->cc_firsttick = cc->cc_ticks = ticks;
 	for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
 		bucket = cc->cc_softticks & callwheelmask;
 		if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
@@ -277,6 +279,34 @@ callout_tick(void)
 		swi_sched(cc->cc_cookie, 0);
 }
 
+int
+callout_tickstofirst(void)
+{
+	struct callout_cpu *cc;
+	struct callout *c;
+	struct callout_tailq *sc;
+	int curticks;
+	int skip = 1;
+
+	cc = CC_SELF();
+	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
+	curticks = cc->cc_ticks;
+	while( skip < ncallout && skip < hz/8 ) {
+		sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
+		/* search scanning ticks */
+		TAILQ_FOREACH( c, sc, c_links.tqe ){
+			if (c && (c->c_time <= curticks + ncallout)
+			    && (c->c_time > 0))
+				goto out;
+		}
+		skip++;
+	}
+out:
+	cc->cc_firsttick = curticks + skip;
+	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+	return (skip);
+}
+
 static struct callout_cpu *
 callout_lock(struct callout *c)
 {
@@ -639,9 +669,14 @@ retry:
 	c->c_arg = arg;
 	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
 	c->c_func = ftn;
-	c->c_time = cc->cc_ticks + to_ticks;
+	c->c_time = ticks + to_ticks;
 	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
 			  c, c_links.tqe);
+	if ((c->c_time - cc->cc_firsttick) < 0) {
+		cc->cc_firsttick = c->c_time;
+		(*callout_new_inserted)(cpu,
+		    to_ticks + (ticks - cc->cc_ticks));
+	}
 	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
 	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
 	CC_UNLOCK(cc);
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 780dc6d1760b..9face648de93 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -1547,7 +1547,7 @@ sched_pctcpu(struct thread *td)
 }
 
 void
-sched_tick(void)
+sched_tick(int cnt)
 {
 }
 
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index bb2d34a37cac..e1cc172592ff 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -196,7 +196,7 @@ static int preempt_thresh = 0;
 #endif
 static int static_boost = PRI_MIN_TIMESHARE;
 static int sched_idlespins = 10000;
-static int sched_idlespinthresh = 64;
+static int sched_idlespinthresh = 16;
 
 /*
  * tdq - per processor runqs and statistics.  All fields are protected by the
@@ -2163,7 +2163,7 @@ sched_clock(struct thread *td)
  * is easier than trying to scale based on stathz.
  */
 void
-sched_tick(void)
+sched_tick(int cnt)
 {
 	struct td_sched *ts;
 
@@ -2175,7 +2175,7 @@ sched_tick(void)
 	if (ts->ts_incrtick == ticks)
 		return;
 	/* Adjust ticks for pctcpu */
-	ts->ts_ticks += 1 << SCHED_TICK_SHIFT;
+	ts->ts_ticks += cnt << SCHED_TICK_SHIFT;
 	ts->ts_ltick = ticks;
 	ts->ts_incrtick = ticks;
 	/*
@@ -2549,7 +2549,7 @@ sched_idletd(void *dummy)
 		if (tdq->tdq_load == 0) {
 			tdq->tdq_cpu_idle = 1;
 			if (tdq->tdq_load == 0) {
-				cpu_idle(switchcnt > sched_idlespinthresh);
+				cpu_idle(switchcnt > sched_idlespinthresh * 4);
 				tdq->tdq_switchcnt++;
 			}
 			tdq->tdq_cpu_idle = 0;
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 28efd4c8cafe..58aaf03165bf 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -28,7 +28,6 @@
 #define	IPI_STOP_HARD		0x0008
 #define	IPI_PREEMPT		0x0010
 #define	IPI_HARDCLOCK		0x0020
-#define	IPI_STATCLOCK		0x0040
 
 #ifndef LOCORE
 
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index c7ff3d8560b8..ef2f24c035dd 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -164,11 +164,7 @@ mips_ipi_handler(void *arg)
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
-			hardclockintr(arg);;
-			break;
-		case IPI_STATCLOCK:
-			CTR1(KTR_SMP, "%s: IPI_STATCLOCK", __func__);
-			statclockintr(arg);;
+			hardclockintr();;
 			break;
 		default:
 			panic("Unknown IPI 0x%0x on cpu %d", ipi, curcpu);
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index 671ce5b3cf37..22dc8f0c5e00 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -1120,40 +1120,36 @@ cpu_halt(void)
 		__asm__ ("hlt");
 }
 
+static int	idle_mwait = 1;		/* Use MONITOR/MWAIT for short idle. */
+TUNABLE_INT("machdep.idle_mwait", &idle_mwait);
+SYSCTL_INT(_machdep, OID_AUTO, idle_mwait, CTLFLAG_RW, &idle_mwait,
+    0, "Use MONITOR/MWAIT for short idle");
+
+#define	STATE_RUNNING	0x0
+#define	STATE_MWAIT	0x1
+#define	STATE_SLEEPING	0x2
+
 static void
 cpu_idle_hlt(int busy)
 {
+	int *state;
+
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_SLEEPING;
 	/*
-	 * we must absolutely guarentee that hlt is the next instruction
+	 * We must absolutely guarentee that hlt is the next instruction
 	 * after sti or we introduce a timing window.
 	 */
 	disable_intr();
-  	if (sched_runnable())
+	if (sched_runnable())
 		enable_intr();
 	else
 		__asm __volatile("sti; hlt");
-}
-
-static void
-cpu_idle_spin(int busy)
-{
-	return;
-}
-
-void (*cpu_idle_fn)(int) = cpu_idle_hlt;
-
-void
-cpu_idle(int busy)
-{
-#if defined(SMP)
-	if (mp_grab_cpu_hlt())
-		return;
-#endif
-	cpu_idle_fn(busy);
+	*state = STATE_RUNNING;
 }
 
 /*
- * mwait cpu power states.  Lower 4 bits are sub-states.
+ * MWAIT cpu power states.  Lower 4 bits are sub-states.
  */
 #define	MWAIT_C0	0xf0
 #define	MWAIT_C1	0x00
@@ -1161,63 +1157,91 @@ cpu_idle(int busy)
 #define	MWAIT_C3	0x20
 #define	MWAIT_C4	0x30
 
-#define	MWAIT_DISABLED	0x0
-#define	MWAIT_WOKEN	0x1
-#define	MWAIT_WAITING	0x2
-
 static void
 cpu_idle_mwait(int busy)
 {
-	int *mwait;
+	int *state;
 
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
-		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_MWAIT;
+	if (!sched_runnable()) {
+		cpu_monitor(state, 0, 0);
+		if (*state == STATE_MWAIT)
+			cpu_mwait(0, MWAIT_C1);
+	}
+	*state = STATE_RUNNING;
 }
 
 static void
-cpu_idle_mwait_hlt(int busy)
+cpu_idle_spin(int busy)
 {
-	int *mwait;
+	int *state;
+	int i;
 
-	mwait = (int *)PCPU_PTR(monitorbuf);
-	if (busy == 0) {
-		*mwait = MWAIT_DISABLED;
-		cpu_idle_hlt(busy);
-		return;
+	state = (int *)PCPU_PTR(monitorbuf);
+	*state = STATE_RUNNING;
+	for (i = 0; i < 1000; i++) {
+		if (sched_runnable())
+			return;
+		cpu_spinwait();
 	}
-	*mwait = MWAIT_WAITING;
-	if (sched_runnable())
+}
+
+void (*cpu_idle_fn)(int) = cpu_idle_hlt;
+
+void
+cpu_idle(int busy)
+{
+
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
+#ifdef SMP
+	if (mp_grab_cpu_hlt())
 		return;
-	cpu_monitor(mwait, 0, 0);
-	if (*mwait == MWAIT_WAITING)
-		cpu_mwait(0, MWAIT_C1);
+#endif
+	/* If we are busy - try to use fast methods. */
+	if (busy) {
+		if ((cpu_feature2 & CPUID2_MON) && idle_mwait) {
+			cpu_idle_mwait(busy);
+			goto out;
+		}
+	}
+
+	/* If we have time - switch timers into idle mode. */
+	if (!busy) {
+		critical_enter();
+		cpu_idleclock();
+	}
+
+	/* Call main idle method. */
+	cpu_idle_fn(busy);
+
+	/* Switch timers mack into active mode. */
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+out:
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
 }
 
 int
 cpu_idle_wakeup(int cpu)
 {
 	struct pcpu *pcpu;
-	int *mwait;
+	int *state;
 
-	if (cpu_idle_fn == cpu_idle_spin)
-		return (1);
-	if (cpu_idle_fn != cpu_idle_mwait && cpu_idle_fn != cpu_idle_mwait_hlt)
-		return (0);
 	pcpu = pcpu_find(cpu);
-	mwait = (int *)pcpu->pc_monitorbuf;
+	state = (int *)pcpu->pc_monitorbuf;
 	/*
 	 * This doesn't need to be atomic since missing the race will
 	 * simply result in unnecessary IPIs.
 	 */
-	if (cpu_idle_fn == cpu_idle_mwait_hlt && *mwait == MWAIT_DISABLED)
+	if (*state == STATE_SLEEPING)
 		return (0);
-	*mwait = MWAIT_WOKEN;
-
+	if (*state == STATE_MWAIT)
+		*state = STATE_RUNNING;
 	return (1);
 }
 
@@ -1230,7 +1254,6 @@ struct {
 } idle_tbl[] = {
 	{ cpu_idle_spin, "spin" },
 	{ cpu_idle_mwait, "mwait" },
-	{ cpu_idle_mwait_hlt, "mwait_hlt" },
 	{ cpu_idle_hlt, "hlt" },
 	{ NULL, NULL }
 };
@@ -1255,6 +1278,9 @@ idle_sysctl_available(SYSCTL_HANDLER_ARGS)
 	return (error);
 }
 
+SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
+    0, 0, idle_sysctl_available, "A", "list of available idle functions");
+
 static int
 idle_sysctl(SYSCTL_HANDLER_ARGS)
 {
@@ -1286,9 +1312,6 @@ idle_sysctl(SYSCTL_HANDLER_ARGS)
 	return (EINVAL);
 }
 
-SYSCTL_PROC(_machdep, OID_AUTO, idle_available, CTLTYPE_STRING | CTLFLAG_RD,
-    0, 0, idle_sysctl_available, "A", "list of available idle functions");
-
 SYSCTL_PROC(_machdep, OID_AUTO, idle, CTLTYPE_STRING | CTLFLAG_RW, 0, 0,
     idle_sysctl, "A", "currently selected idle function");
 
diff --git a/sys/powerpc/aim/machdep.c b/sys/powerpc/aim/machdep.c
index 3290fa674414..ba06531bc848 100644
--- a/sys/powerpc/aim/machdep.c
+++ b/sys/powerpc/aim/machdep.c
@@ -638,7 +638,13 @@ cpu_idle(int busy)
 		panic("ints disabled in idleproc!");
 	}
 #endif
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
 	if (powerpc_pow_enabled) {
+		if (!busy) {
+			critical_enter();
+			cpu_idleclock();
+		}
 		switch (vers) {
 		case IBM970:
 		case IBM970FX:
@@ -658,7 +664,13 @@ cpu_idle(int busy)
 			isync();
 			break;
 		}
+		if (!busy) {
+			cpu_activeclock();
+			critical_exit();
+		}
 	}
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
 }
 
 int
diff --git a/sys/powerpc/booke/machdep.c b/sys/powerpc/booke/machdep.c
index c725dd85bf38..c4b80cc665e9 100644
--- a/sys/powerpc/booke/machdep.c
+++ b/sys/powerpc/booke/machdep.c
@@ -488,9 +488,21 @@ cpu_idle (int busy)
 	}
 #endif
 
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
+	    busy, curcpu);
+	if (!busy) {
+		critical_enter();
+		cpu_idleclock();
+	}
 	/* Freescale E500 core RM section 6.4.1. */
 	msr = msr | PSL_WE;
 	__asm __volatile("msync; mtmsr %0; isync" :: "r" (msr));
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d done",
+	    busy, curcpu);
 }
 
 int
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index c78af741f704..cf952788c680 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -37,7 +37,6 @@
 #define	IPI_STOP		3
 #define	IPI_STOP_HARD		3
 #define	IPI_HARDCLOCK		4
-#define	IPI_STATCLOCK		5
 
 #ifndef LOCORE
 
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index a8336389f45b..6915c4bb7288 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -315,7 +315,7 @@ powerpc_ipi_handler(void *arg)
 			break;
 		case IPI_HARDCLOCK:
 			CTR1(KTR_SMP, "%s: IPI_HARDCLOCK", __func__);
-			hardclockintr(curthread->td_intr_frame);
+			hardclockintr();
 			break;
 		}
 	}
diff --git a/sys/sparc64/include/intr_machdep.h b/sys/sparc64/include/intr_machdep.h
index 254ac78a90bd..158b5b6017a1 100644
--- a/sys/sparc64/include/intr_machdep.h
+++ b/sys/sparc64/include/intr_machdep.h
@@ -47,7 +47,6 @@
 #define	PIL_STOP	5	/* stop cpu ipi */
 #define	PIL_PREEMPT	6	/* preempt idle thread cpu ipi */
 #define	PIL_HARDCLOCK	7	/* hardclock broadcast */
-#define	PIL_STATCLOCK	8	/* statclock broadcast */
 #define	PIL_FILTER	12	/* filter interrupts */
 #define	PIL_FAST	13	/* fast interrupts */
 #define	PIL_TICK	14	/* tick interrupts */
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 3812431c1436..3ca8e0380444 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -59,7 +59,6 @@
 #define	IPI_RENDEZVOUS	PIL_RENDEZVOUS
 #define	IPI_PREEMPT	PIL_PREEMPT
 #define	IPI_HARDCLOCK	PIL_HARDCLOCK
-#define	IPI_STATCLOCK	PIL_STATCLOCK
 #define	IPI_STOP	PIL_STOP
 #define	IPI_STOP_HARD	PIL_STOP
 
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index 8e610f63951f..85712865b1b4 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -97,8 +97,7 @@ static const char *const pil_names[] = {
 	"stop",		/* PIL_STOP */
 	"preempt",	/* PIL_PREEMPT */
 	"hardclock",	/* PIL_HARDCLOCK */
-	"statclock",	/* PIL_STATCLOCK */
-	"stray", "stray", "stray",
+	"stray", "stray", "stray", "stray",
 	"filter",	/* PIL_FILTER */
 	"fast",		/* PIL_FAST */
 	"tick",		/* PIL_TICK */
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 57a2d6f38ac4..e5a9fb3eeb64 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -98,7 +98,6 @@ __FBSDID("$FreeBSD$");
 static ih_func_t cpu_ipi_ast;
 static ih_func_t cpu_ipi_hardclock;
 static ih_func_t cpu_ipi_preempt;
-static ih_func_t cpu_ipi_statclock;
 static ih_func_t cpu_ipi_stop;
 
 /*
@@ -292,7 +291,6 @@ cpu_mp_start(void)
 	intr_setup(PIL_STOP, cpu_ipi_stop, -1, NULL, NULL);
 	intr_setup(PIL_PREEMPT, cpu_ipi_preempt, -1, NULL, NULL);
 	intr_setup(PIL_HARDCLOCK, cpu_ipi_hardclock, -1, NULL, NULL);
-	intr_setup(PIL_STATCLOCK, cpu_ipi_statclock, -1, NULL, NULL);
 
 	cpuid_to_mid[curcpu] = PCPU_GET(mid);
 
@@ -524,15 +522,18 @@ cpu_ipi_preempt(struct trapframe *tf)
 static void
 cpu_ipi_hardclock(struct trapframe *tf)
 {
+	struct trapframe *oldframe;
+	struct thread *td;
 
-	hardclockintr(tf);
-}
-
-static void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
-	statclockintr(tf);
+	critical_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	oldframe = td->td_intr_frame;
+	td->td_intr_frame = tf;
+	hardclockintr();
+	td->td_intr_frame = oldframe;
+	td->td_intr_nesting_level--;
+	critical_exit();
 }
 
 static void
diff --git a/sys/sun4v/include/intr_machdep.h b/sys/sun4v/include/intr_machdep.h
index 370a5c0e2f4d..f686e66f37e4 100644
--- a/sys/sun4v/include/intr_machdep.h
+++ b/sys/sun4v/include/intr_machdep.h
@@ -47,7 +47,6 @@
 #define	PIL_STOP	5	/* stop cpu ipi */
 #define	PIL_PREEMPT	6	/* preempt idle thread cpu ipi */
 #define	PIL_HARDCLOCK	7	/* hardclock broadcast */
-#define	PIL_STATCLOCK	8	/* statclock broadcast */
 #define	PIL_FAST	13	/* fast interrupts */
 #define	PIL_TICK	14
 
diff --git a/sys/sun4v/include/smp.h b/sys/sun4v/include/smp.h
index 56c50ebbaedb..3202089cbf15 100644
--- a/sys/sun4v/include/smp.h
+++ b/sys/sun4v/include/smp.h
@@ -47,7 +47,6 @@
 #define	IPI_STOP_HARD	PIL_STOP
 #define IPI_PREEMPT     PIL_PREEMPT
 #define	IPI_HARDCLOCK	PIL_HARDCLOCK
-#define	IPI_STATCLOCK	PIL_STATCLOCK
 
 #define	IPI_RETRIES	5000
 
@@ -83,7 +82,6 @@ void cpu_ipi_ast(struct trapframe *tf);
 void cpu_ipi_stop(struct trapframe *tf);
 void cpu_ipi_preempt(struct trapframe *tf);
 void cpu_ipi_hardclock(struct trapframe *tf);
-void cpu_ipi_statclock(struct trapframe *tf);
 
 void	ipi_all_but_self(u_int ipi);
 void	ipi_cpu(int cpu, u_int ipi);
diff --git a/sys/sun4v/sun4v/intr_machdep.c b/sys/sun4v/sun4v/intr_machdep.c
index 123493ef5bf2..358740237f59 100644
--- a/sys/sun4v/sun4v/intr_machdep.c
+++ b/sys/sun4v/sun4v/intr_machdep.c
@@ -110,8 +110,7 @@ static char *pil_names[] = {
 	"stop",		/* PIL_STOP */
 	"preempt",      /* PIL_PREEMPT */
 	"hardclock",	/* PIL_HARDCLOCK */
-	"statclock",	/* PIL_STATCLOCK */
-	"stray", "stray", "stray", "stray",
+	"stray", "stray", "stray", "stray", "stray",
 	"fast",		/* PIL_FAST */
 	"tick",		/* PIL_TICK */
 };
@@ -265,7 +264,6 @@ intr_init(void)
 	intr_handlers[PIL_STOP]= cpu_ipi_stop;
 	intr_handlers[PIL_PREEMPT]= cpu_ipi_preempt;
 	intr_handlers[PIL_HARDCLOCK]= cpu_ipi_hardclock;
-	intr_handlers[PIL_STATCLOCK]= cpu_ipi_statclock;
 #endif
 	mtx_init(&intr_table_lock, "intr table", NULL, MTX_SPIN);
 	cpu_intrq_alloc();
diff --git a/sys/sun4v/sun4v/mp_machdep.c b/sys/sun4v/sun4v/mp_machdep.c
index 2e9a378d0ac8..a9535e34d779 100644
--- a/sys/sun4v/sun4v/mp_machdep.c
+++ b/sys/sun4v/sun4v/mp_machdep.c
@@ -472,15 +472,18 @@ cpu_ipi_preempt(struct trapframe *tf)
 void
 cpu_ipi_hardclock(struct trapframe *tf)
 {
+	struct trapframe *oldframe;
+	struct thread *td;
 
-	hardclockintr(tf);
-}
-
-void
-cpu_ipi_statclock(struct trapframe *tf)
-{
-
-	statclockintr(tf);
+	critical_enter();
+	td = curthread;
+	td->td_intr_nesting_level++;
+	oldframe = td->td_intr_frame;
+	td->td_intr_frame = tf;
+	hardclockintr();
+	td->td_intr_frame = oldframe;
+	td->td_intr_nesting_level--;
+	critical_exit();
 }
 
 void
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
index 2d43d1494ae0..8fcd06e4f151 100644
--- a/sys/sys/callout.h
+++ b/sys/sys/callout.h
@@ -96,7 +96,8 @@ int	callout_schedule_on(struct callout *, int, int);
 #define	callout_stop(c)		_callout_stop_safe(c, 0)
 int	_callout_stop_safe(struct callout *, int);
 void	callout_tick(void);
-
+int	callout_tickstofirst(void);
+extern void (*callout_new_inserted)(int cpu, int ticks);
 
 #endif
 
diff --git a/sys/sys/sched.h b/sys/sys/sched.h
index d0ebffd5684c..92dd4c4a1385 100644
--- a/sys/sys/sched.h
+++ b/sys/sys/sched.h
@@ -111,7 +111,7 @@ void	sched_preempt(struct thread *td);
 void	sched_add(struct thread *td, int flags);
 void	sched_clock(struct thread *td);
 void	sched_rem(struct thread *td);
-void	sched_tick(void);
+void	sched_tick(int cnt);
 void	sched_relinquish(struct thread *td);
 struct thread *sched_choose(void);
 void	sched_idletd(void *);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index f913887a0cda..8e98ef4bec5c 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -237,20 +237,22 @@ void	realitexpire(void *);
 int	sysbeep(int hertz, int period);
 
 void	hardclock(int usermode, uintfptr_t pc);
+void	hardclock_anycpu(int cnt, int usermode);
 void	hardclock_cpu(int usermode);
+void	hardclock_sync(int cpu);
 void	softclock(void *);
 void	statclock(int usermode);
 void	profclock(int usermode, uintfptr_t pc);
-void	timer1clock(int usermode, uintfptr_t pc);
-void	timer2clock(int usermode, uintfptr_t pc);
 
-int	hardclockintr(struct trapframe *frame);
-int	statclockintr(struct trapframe *frame);
+int	hardclockintr(void);
 
 void	startprofclock(struct proc *);
 void	stopprofclock(struct proc *);
 void	cpu_startprofclock(void);
 void	cpu_stopprofclock(void);
+void	cpu_idleclock(void);
+void	cpu_activeclock(void);
+extern int	cpu_disable_deep_sleep;
 
 int	cr_cansee(struct ucred *u1, struct ucred *u2);
 int	cr_canseesocket(struct ucred *cred, struct socket *so);
diff --git a/sys/sys/timeet.h b/sys/sys/timeet.h
index bc713d68ed33..87392a29a64d 100644
--- a/sys/sys/timeet.h
+++ b/sys/sys/timeet.h
@@ -83,8 +83,8 @@ struct eventtimer {
 };
 
 extern struct mtx	et_eventtimers_mtx;
-#define	ET_LOCK()	mtx_lock_spin(&et_eventtimers_mtx)
-#define	ET_UNLOCK()	mtx_unlock_spin(&et_eventtimers_mtx)
+#define	ET_LOCK()	mtx_lock(&et_eventtimers_mtx)
+#define	ET_UNLOCK()	mtx_unlock(&et_eventtimers_mtx)
 
 /* Driver API */
 int	et_register(struct eventtimer *et);
diff --git a/sys/sys/timetc.h b/sys/sys/timetc.h
index d5a818b4c0e9..3249788563e1 100644
--- a/sys/sys/timetc.h
+++ b/sys/sys/timetc.h
@@ -70,6 +70,7 @@ u_int64_t tc_getfrequency(void);
 void	tc_init(struct timecounter *tc);
 void	tc_setclock(struct timespec *ts);
 void	tc_ticktock(void);
+void	cpu_tick_calibration(void);
 
 #ifdef SYSCTL_DECL
 SYSCTL_DECL(_kern_timecounter);
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index f479bbee769f..6d7a53b5fdc4 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -261,7 +261,7 @@ lapic_init(vm_paddr_t addr)
 		lapic_et.et_quality = 600;
 		if (!arat) {
 			lapic_et.et_flags |= ET_FLAGS_C3STOP;
-			lapic_et.et_quality -= 100;
+			lapic_et.et_quality -= 200;
 		}
 		lapic_et.et_frequency = 0;
 		/* We don't know frequency yet, so trying to guess. */