From 63347b231c9f144f332b5b3f08d000e5f9b1d3b2 Mon Sep 17 00:00:00 2001 From: jhb Date: Tue, 8 Feb 2005 20:25:07 +0000 Subject: [PATCH] Use the local APIC timer to drive the various kernel clocks on SMP machines rather than forwarding interrupts from the clock devices around using IPIs: - Add an IDT vector that pushes a clock frame and calls lapic_handle_timer(). - Add functions to program the local APIC timer including setting the divisor, and setting up the timer to either down a periodic countdown or one-shot countdown. - Add a lapic_setup_clock() function that the BSP calls from cpu_init_clocks() to setup the local APIC timer if it is going to be used. The setup uses a one-shot countdown to calibrate the timer. We then program the timer on each CPU to fire at a frequency of hz * 3. stathz is defined as freq / 23 (hz * 3 / 23), and profhz is defined as freq / 2 (hz * 3 / 2). This gives the clocks relatively prime divisors while keeping a low LCM for the frequency of the clock interrupts. Thanks to Peter Jeremy for suggesting this approach. - Remove the hardclock and statclock forwarding code including the two associated IPIs. The bitmap IPI handler has now effectively degenerated to just IPI_AST. - When the local APIC timer is used we don't turn the RTC on at all, but we still enable interrupts on the ISA timer 0 (i8254) for timecounting purposes. --- sys/i386/i386/apic_vector.s | 24 +++++ sys/i386/i386/local_apic.c | 188 +++++++++++++++++++++++++++++++++++- sys/i386/i386/mp_machdep.c | 67 ------------- sys/i386/include/apicvar.h | 8 +- sys/i386/include/smp.h | 2 - sys/i386/isa/clock.c | 38 ++++---- sys/isa/atrtc.c | 38 ++++---- 7 files changed, 252 insertions(+), 113 deletions(-) diff --git a/sys/i386/i386/apic_vector.s b/sys/i386/i386/apic_vector.s index 88b54aac348c..c770bcf4be6e 100644 --- a/sys/i386/i386/apic_vector.s +++ b/sys/i386/i386/apic_vector.s @@ -115,6 +115,30 @@ IDTVEC(spuriousint) ISR_VEC(6, apic_isr6) ISR_VEC(7, apic_isr7) +/* + * Local APIC periodic timer handler. + */ + .text + SUPERALIGN_TEXT +IDTVEC(timerint) + PUSH_FRAME + movl $KDSEL, %eax /* reload with kernel's data segment */ + movl %eax, %ds + movl %eax, %es + movl $KPSEL, %eax + movl %eax, %fs + + movl lapic, %edx + movl $0, LA_EOI(%edx) /* End Of Interrupt to APIC */ + + FAKE_MCOUNT(TF_EIP(%esp)) + + pushl $0 /* XXX convert trapframe to clockframe */ + call lapic_handle_timer + addl $4, %esp /* XXX convert clockframe to trapframe */ + MEXITCOUNT + jmp doreti + #ifdef SMP /* * Global address space TLB shootdown. diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c index 4745e7371c56..debb691f96e8 100644 --- a/sys/i386/i386/local_apic.c +++ b/sys/i386/i386/local_apic.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -65,6 +66,10 @@ CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); +#define LAPIC_TIMER_HZ_DIVIDER 3 +#define LAPIC_TIMER_STATHZ_DIVIDER 23 +#define LAPIC_TIMER_PROFHZ_DIVIDER 2 + /* * Support for local APICs. Local APICs manage interrupts on each * individual processor as opposed to I/O APICs which receive interrupts @@ -89,6 +94,10 @@ struct lapic { u_int la_cluster:4; u_int la_cluster_id:2; u_int la_present:1; + u_long *la_timer_count; + u_long la_hard_ticks; + u_long la_stat_ticks; + u_long la_prof_ticks; } static lapics[MAX_APICID]; /* XXX: should thermal be an NMI? */ @@ -114,9 +123,21 @@ static inthand_t *ioint_handlers[] = { IDTVEC(apic_isr7), /* 224 - 255 */ }; +static u_int32_t lapic_timer_divisors[] = { + APIC_TDCR_1, APIC_TDCR_2, APIC_TDCR_4, APIC_TDCR_8, APIC_TDCR_16, + APIC_TDCR_32, APIC_TDCR_64, APIC_TDCR_128 +}; + volatile lapic_t *lapic; +static u_long lapic_timer_divisor, lapic_timer_period, lapic_timer_hz; +static u_long *lapic_virtual_hardclock, *lapic_virtual_statclock, + *lapic_virtual_profclock; static void lapic_enable(void); +static void lapic_timer_enable_intr(void); +static void lapic_timer_oneshot(u_int count); +static void lapic_timer_periodic(u_int count); +static void lapic_timer_set_divisor(u_int divisor); static uint32_t lvt_mode(struct lapic *la, u_int pin, uint32_t value); static uint32_t @@ -180,7 +201,11 @@ lapic_init(uintptr_t addr) /* Set BSP's per-CPU local APIC ID. */ PCPU_SET(apic_id, lapic_id()); - /* XXX: timer/error/thermal interrupts */ + /* Local APIC timer interrupt. */ + setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYS386IGT, SEL_KPL, + GSEL(GCODE_SEL, SEL_KPL)); + + /* XXX: error/thermal interrupts */ } /* @@ -252,6 +277,7 @@ lapic_setup(void) struct lapic *la; u_int32_t value, maxlvt; register_t eflags; + char buf[MAXCOMLEN + 1]; la = &lapics[lapic_id()]; KASSERT(la->la_present, ("missing APIC structure")); @@ -281,11 +307,84 @@ lapic_setup(void) lapic->lvt_lint0 = lvt_mode(la, LVT_LINT0, lapic->lvt_lint0); lapic->lvt_lint1 = lvt_mode(la, LVT_LINT1, lapic->lvt_lint1); - /* XXX: more LVT entries */ + /* Program timer LVT and setup handler. */ + lapic->lvt_timer = lvt_mode(la, LVT_TIMER, lapic->lvt_timer); + snprintf(buf, sizeof(buf), "lapic%d: timer", lapic_id()); + intrcnt_add(buf, &la->la_timer_count); + if (PCPU_GET(cpuid) != 0) { + KASSERT(lapic_timer_period != 0, ("lapic%u: zero divisor", + lapic_id())); + lapic_timer_set_divisor(lapic_timer_divisor); + lapic_timer_periodic(lapic_timer_period); + lapic_timer_enable_intr(); + } + + /* XXX: Performance counter, error, and thermal LVTs */ intr_restore(eflags); } +/* + * Called by cpu_initclocks() on the BSP to setup the local APIC timer so + * that it can drive hardclock, statclock, and profclock. This function + * returns true if it is able to use the local APIC timer to drive the + * clocks and false if it is not able. + */ +int +lapic_setup_clock(void) +{ + u_long value; + + /* Can't drive the timer without a local APIC. */ + if (lapic == NULL) + return (0); + + /* If we've only got one CPU, then use the RTC and ISA timer instead. */ + if (mp_ncpus == 1) + return (0); + + /* Start off with a divisor of 2 (power on reset default). */ + lapic_timer_divisor = 2; + + /* Try to calibrate the local APIC timer. */ + do { + lapic_timer_set_divisor(lapic_timer_divisor); + lapic_timer_oneshot(APIC_TIMER_MAX_COUNT); + DELAY(2000000); + value = APIC_TIMER_MAX_COUNT - lapic->ccr_timer; + if (value != APIC_TIMER_MAX_COUNT) + break; + lapic_timer_divisor <<= 1; + } while (lapic_timer_divisor <= 128); + if (lapic_timer_divisor > 128) + panic("lapic: Divisor too big"); + value /= 2; + if (bootverbose) + printf("lapic: Divisor %lu, Frequency %lu hz\n", + lapic_timer_divisor, value); + + /* + * We will drive the timer at a small multiple of hz and drive + * both of the other timers with similarly small but relatively + * prime divisors. + */ + lapic_timer_hz = hz * LAPIC_TIMER_HZ_DIVIDER; + stathz = lapic_timer_hz / LAPIC_TIMER_STATHZ_DIVIDER; + profhz = lapic_timer_hz / LAPIC_TIMER_PROFHZ_DIVIDER; + lapic_timer_period = value / lapic_timer_hz; + intrcnt_add("lapic: hardclock", &lapic_virtual_hardclock); + intrcnt_add("lapic: statclock", &lapic_virtual_statclock); + intrcnt_add("lapic: profclock", &lapic_virtual_profclock); + + /* + * Start up the timer on the BSP. The APs will kick off their + * timer during lapic_setup(). + */ + lapic_timer_periodic(lapic_timer_period); + lapic_timer_enable_intr(); + return (1); +} + void lapic_disable(void) { @@ -516,6 +615,91 @@ lapic_handle_intr(struct intrframe frame) intr_execute_handlers(isrc, &frame); } +void +lapic_handle_timer(struct clockframe frame) +{ + struct lapic *la; + + la = &lapics[PCPU_GET(apic_id)]; + (*la->la_timer_count)++; + critical_enter(); + + /* Fire hardclock at hz. */ + la->la_hard_ticks += hz; + if (la->la_hard_ticks >= lapic_timer_hz) { + la->la_hard_ticks -= lapic_timer_hz; + if (PCPU_GET(cpuid) == 0) { + (*lapic_virtual_hardclock)++; + hardclock(&frame); + } else + hardclock_process(&frame); + } + + /* Fire statclock at stathz. */ + la->la_stat_ticks += stathz; + if (la->la_stat_ticks >= lapic_timer_hz) { + la->la_stat_ticks -= lapic_timer_hz; + if (PCPU_GET(cpuid) == 0) + (*lapic_virtual_statclock)++; + statclock(&frame); + } + + /* Fire profclock at profhz, but only when needed. */ + la->la_prof_ticks += profhz; + if (la->la_prof_ticks >= lapic_timer_hz) { + la->la_prof_ticks -= lapic_timer_hz; + if (PCPU_GET(cpuid) == 0) + (*lapic_virtual_profclock)++; + if (profprocs != 0) + profclock(&frame); + } + critical_exit(); +} + +static void +lapic_timer_set_divisor(u_int divisor) +{ + + KASSERT(powerof2(divisor), ("lapic: invalid divisor %u", divisor)); + KASSERT(ffs(divisor) <= sizeof(lapic_timer_divisors) / + sizeof(u_int32_t), ("lapic: invalid divisor %u", divisor)); + lapic->dcr_timer = lapic_timer_divisors[ffs(divisor) - 1]; +} + +static void +lapic_timer_oneshot(u_int count) +{ + u_int32_t value; + + value = lapic->lvt_timer; + value &= ~APIC_LVTT_TM; + value |= APIC_LVTT_TM_ONE_SHOT; + lapic->lvt_timer = value; + lapic->icr_timer = count; +} + +static void +lapic_timer_periodic(u_int count) +{ + u_int32_t value; + + value = lapic->lvt_timer; + value &= ~APIC_LVTT_TM; + value |= APIC_LVTT_TM_PERIODIC; + lapic->lvt_timer = value; + lapic->icr_timer = count; +} + +static void +lapic_timer_enable_intr(void) +{ + u_int32_t value; + + value = lapic->lvt_timer; + value &= ~APIC_LVT_M; + lapic->lvt_timer = value; +} + /* Translate between IDT vectors and IRQ vectors. */ u_int apic_irq_to_idt(u_int irq) diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index 863aa30418b4..7cd184a22b02 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -1088,82 +1088,15 @@ smp_masked_invlpg_range(u_int mask, vm_offset_t addr1, vm_offset_t addr2) } -/* - * For statclock, we send an IPI to all CPU's to have them call this - * function. - */ - -void -forward_statclock(void) -{ - int map; - - CTR0(KTR_SMP, "forward_statclock"); - - if (!smp_started || cold || panicstr) - return; - - map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask); - if (map != 0) - ipi_selected(map, IPI_STATCLOCK); -} - -/* - * For each hardclock(), we send an IPI to all other CPU's to have them - * execute this function. It would be nice to reduce contention on - * sched_lock if we could simply peek at the CPU to determine the user/kernel - * state and call hardclock_process() on the CPU receiving the clock interrupt - * and then just use a simple IPI to handle any ast's if needed. - */ - -void -forward_hardclock(void) -{ - u_int map; - - CTR0(KTR_SMP, "forward_hardclock"); - - if (!smp_started || cold || panicstr) - return; - - map = PCPU_GET(other_cpus) & ~(stopped_cpus|hlt_cpus_mask); - if (map != 0) - ipi_selected(map, IPI_HARDCLOCK); -} - void ipi_bitmap_handler(struct clockframe frame) { int cpu = PCPU_GET(cpuid); u_int ipi_bitmap; - struct thread *td; ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); - critical_enter(); - /* Nothing to do for AST */ - - if (ipi_bitmap & (1 << IPI_HARDCLOCK)) { - td = curthread; - td->td_intr_nesting_level++; - hardclock_process(&frame); - td->td_intr_nesting_level--; - } - - if (ipi_bitmap & (1 << IPI_STATCLOCK)) { - CTR0(KTR_SMP, "forwarded_statclock"); - - td = curthread; - td->td_intr_nesting_level++; - if (profprocs != 0) - profclock(&frame); - if (pscnt == psdiv) - statclock(&frame); - td->td_intr_nesting_level--; - } - - critical_exit(); } /* diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h index a0f06ba93db1..d4af36b7d786 100644 --- a/sys/i386/include/apicvar.h +++ b/sys/i386/include/apicvar.h @@ -122,9 +122,7 @@ /* IPIs handled by IPI_BITMAPED_VECTOR (XXX ups is there a better place?) */ #define IPI_AST 0 /* Generate software trap. */ -#define IPI_HARDCLOCK 1 /* Inter-CPU clock handling. */ -#define IPI_STATCLOCK 2 -#define IPI_BITMAP_LAST IPI_STATCLOCK +#define IPI_BITMAP_LAST IPI_AST #define IPI_IS_BITMAPED(x) ((x) <= IPI_BITMAP_LAST) #define IPI_STOP (APIC_IPI_INTS + 6) /* Stop CPU until restarted. */ @@ -171,7 +169,7 @@ struct apic_enumerator { inthand_t IDTVEC(apic_isr1), IDTVEC(apic_isr2), IDTVEC(apic_isr3), IDTVEC(apic_isr4), IDTVEC(apic_isr5), IDTVEC(apic_isr6), - IDTVEC(apic_isr7), IDTVEC(spuriousint); + IDTVEC(apic_isr7), IDTVEC(spuriousint), IDTVEC(timerint); u_int apic_irq_to_idt(u_int irq); u_int apic_idt_to_irq(u_int vector); @@ -202,6 +200,7 @@ void lapic_ipi_raw(register_t icrlo, u_int dest); void lapic_ipi_vectored(u_int vector, int dest); int lapic_ipi_wait(int delay); void lapic_handle_intr(struct intrframe frame); +void lapic_handle_timer(struct clockframe frame); void lapic_set_logical_id(u_int apic_id, u_int cluster, u_int cluster_id); int lapic_set_lvt_mask(u_int apic_id, u_int lvt, u_char masked); int lapic_set_lvt_mode(u_int apic_id, u_int lvt, u_int32_t mode); @@ -211,6 +210,7 @@ int lapic_set_lvt_triggermode(u_int apic_id, u_int lvt, enum intr_trigger trigger); void lapic_set_tpr(u_int vector); void lapic_setup(void); +int lapic_setup_clock(void); #endif /* !LOCORE */ #endif /* _MACHINE_APICVAR_H_ */ diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index 6bad810aa9d1..7837120a7f2a 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -67,8 +67,6 @@ void ipi_selected(u_int cpus, u_int ipi); void ipi_all(u_int ipi); void ipi_all_but_self(u_int ipi); void ipi_self(u_int ipi); -void forward_statclock(void); -void forward_hardclock(void); void ipi_bitmap_handler(struct clockframe frame); u_int mp_bootaddress(u_int); int mp_grab_cpu_hlt(void); diff --git a/sys/i386/isa/clock.c b/sys/i386/isa/clock.c index 20763b251280..85b34101bf22 100644 --- a/sys/i386/isa/clock.c +++ b/sys/i386/isa/clock.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); * reintroduced and updated by Chris Stenton 8/10/94 */ +#include "opt_apic.h" #include "opt_clock.h" #include "opt_isa.h" #include "opt_mca.h" @@ -72,8 +73,8 @@ __FBSDID("$FreeBSD$"); #include #include #include -#if defined(SMP) -#include +#ifdef DEV_APIC +#include #endif #include @@ -137,6 +138,7 @@ static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int (*i8254_pending)(struct intsrc *); static int i8254_ticked; +static int using_lapic_timer; #ifndef BURN_BRIDGES /* * XXX new_function and timer_func should not handle clockframes, but @@ -188,11 +190,8 @@ clkintr(struct clockframe *frame) clkintr_pending = 0; mtx_unlock_spin(&clock_lock); } - timer_func(frame); -#ifdef SMP - if (timer_func == hardclock) - forward_hardclock(); -#endif + if (timer_func != hardclock || !using_lapic_timer) + timer_func(frame); #ifndef BURN_BRIDGES switch (timer0_state) { @@ -200,13 +199,12 @@ clkintr(struct clockframe *frame) break; case ACQUIRED: + if (using_lapic_timer) + break; if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(frame); -#ifdef SMP - forward_hardclock(); -#endif } break; @@ -238,10 +236,8 @@ clkintr(struct clockframe *frame) timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; - hardclock(frame); -#ifdef SMP - forward_hardclock(); -#endif + if (!using_lapic_timer) + hardclock(frame); } break; } @@ -377,9 +373,6 @@ rtcintr(struct clockframe *frame) } if (pscnt == psdiv) statclock(frame); -#ifdef SMP - forward_statclock(); -#endif } } @@ -924,7 +917,10 @@ cpu_initclocks() { int diag; - if (statclock_disable) { +#ifdef DEV_APIC + using_lapic_timer = lapic_setup_clock(); +#endif + if (statclock_disable || using_lapic_timer) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt @@ -950,7 +946,7 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (!statclock_disable) { + if (!statclock_disable && !using_lapic_timer) { diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -969,6 +965,8 @@ void cpu_startprofclock(void) { + if (using_lapic_timer) + return; rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; writertc(RTC_STATUSA, rtc_statusa); psdiv = pscnt = psratio; @@ -978,6 +976,8 @@ void cpu_stopprofclock(void) { + if (using_lapic_timer) + return; rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); psdiv = pscnt = 1; diff --git a/sys/isa/atrtc.c b/sys/isa/atrtc.c index 20763b251280..85b34101bf22 100644 --- a/sys/isa/atrtc.c +++ b/sys/isa/atrtc.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); * reintroduced and updated by Chris Stenton 8/10/94 */ +#include "opt_apic.h" #include "opt_clock.h" #include "opt_isa.h" #include "opt_mca.h" @@ -72,8 +73,8 @@ __FBSDID("$FreeBSD$"); #include #include #include -#if defined(SMP) -#include +#ifdef DEV_APIC +#include #endif #include @@ -137,6 +138,7 @@ static u_int32_t i8254_lastcount; static u_int32_t i8254_offset; static int (*i8254_pending)(struct intsrc *); static int i8254_ticked; +static int using_lapic_timer; #ifndef BURN_BRIDGES /* * XXX new_function and timer_func should not handle clockframes, but @@ -188,11 +190,8 @@ clkintr(struct clockframe *frame) clkintr_pending = 0; mtx_unlock_spin(&clock_lock); } - timer_func(frame); -#ifdef SMP - if (timer_func == hardclock) - forward_hardclock(); -#endif + if (timer_func != hardclock || !using_lapic_timer) + timer_func(frame); #ifndef BURN_BRIDGES switch (timer0_state) { @@ -200,13 +199,12 @@ clkintr(struct clockframe *frame) break; case ACQUIRED: + if (using_lapic_timer) + break; if ((timer0_prescaler_count += timer0_max_count) >= hardclock_max_count) { timer0_prescaler_count -= hardclock_max_count; hardclock(frame); -#ifdef SMP - forward_hardclock(); -#endif } break; @@ -238,10 +236,8 @@ clkintr(struct clockframe *frame) timer0_prescaler_count = 0; timer_func = hardclock; timer0_state = RELEASED; - hardclock(frame); -#ifdef SMP - forward_hardclock(); -#endif + if (!using_lapic_timer) + hardclock(frame); } break; } @@ -377,9 +373,6 @@ rtcintr(struct clockframe *frame) } if (pscnt == psdiv) statclock(frame); -#ifdef SMP - forward_statclock(); -#endif } } @@ -924,7 +917,10 @@ cpu_initclocks() { int diag; - if (statclock_disable) { +#ifdef DEV_APIC + using_lapic_timer = lapic_setup_clock(); +#endif + if (statclock_disable || using_lapic_timer) { /* * The stat interrupt mask is different without the * statistics clock. Also, don't set the interrupt @@ -950,7 +946,7 @@ cpu_initclocks() writertc(RTC_STATUSB, RTCSB_24HR); /* Don't bother enabling the statistics clock. */ - if (!statclock_disable) { + if (!statclock_disable && !using_lapic_timer) { diag = rtcin(RTC_DIAG); if (diag != 0) printf("RTC BIOS diagnostic error %b\n", diag, RTCDG_BITS); @@ -969,6 +965,8 @@ void cpu_startprofclock(void) { + if (using_lapic_timer) + return; rtc_statusa = RTCSA_DIVIDER | RTCSA_PROF; writertc(RTC_STATUSA, rtc_statusa); psdiv = pscnt = psratio; @@ -978,6 +976,8 @@ void cpu_stopprofclock(void) { + if (using_lapic_timer) + return; rtc_statusa = RTCSA_DIVIDER | RTCSA_NOPROF; writertc(RTC_STATUSA, rtc_statusa); psdiv = pscnt = 1;