From b20c19bc667aef513fa9f5449df2d4a833e42d3d Mon Sep 17 00:00:00 2001 From: kmacy Date: Thu, 23 Oct 2008 07:20:43 +0000 Subject: [PATCH] Fix IPI support --- sys/i386/include/pcpu.h | 29 ++++++++++++- sys/i386/include/smp.h | 4 +- sys/i386/include/xen/xen-os.h | 5 +-- sys/i386/include/xen/xen_intr.h | 2 +- sys/i386/xen/clock.c | 72 ++++++++++++++++++++++++--------- sys/i386/xen/exception.s | 5 +-- sys/i386/xen/mp_machdep.c | 66 ++++++++++++++++++++++++------ sys/xen/evtchn/evtchn.c | 41 ++++++++++++------- 8 files changed, 165 insertions(+), 59 deletions(-) diff --git a/sys/i386/include/pcpu.h b/sys/i386/include/pcpu.h index 5690e866a290..594077cf723a 100644 --- a/sys/i386/include/pcpu.h +++ b/sys/i386/include/pcpu.h @@ -45,6 +45,24 @@ */ #ifdef XEN +#ifndef NR_VIRQS +#define NR_VIRQS 24 +#endif +#ifndef NR_IPIS +#define NR_IPIS 2 +#endif + +/* These are peridically updated in shared_info, and then copied here. */ +struct shadow_time_info { + uint64_t tsc_timestamp; /* TSC at last update of time vals. */ + uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ + uint32_t tsc_to_nsec_mul; + uint32_t tsc_to_usec_mul; + int tsc_shift; + uint32_t version; +}; + + #define PCPU_MD_FIELDS \ char pc_monitorbuf[128] __aligned(128); /* cache line */ \ struct pcpu *pc_prvspace; /* Self-reference */ \ @@ -62,7 +80,14 @@ u_int pc_pdir; \ u_int pc_lazypmap; \ u_int pc_rendezvous; \ - u_int pc_cpuast + u_int pc_cpuast; \ + uint64_t pc_processed_system_time; \ + struct shadow_time_info pc_shadow_time; \ + int pc_resched_irq; \ + int pc_callfunc_irq; \ + int pc_virq_to_irq[NR_VIRQS]; \ + int pc_ipi_to_irq[NR_IPIS] + #else @@ -88,7 +113,7 @@ extern struct pcpu *pcpup; #define PCPU_GET(member) (pcpup->pc_ ## member) -#define PCPU_ADD(member, val) (pcpu->pc_ ## member += (val)) +#define PCPU_ADD(member, val) (pcpup->pc_ ## member += (val)) #define PCPU_INC(member) PCPU_ADD(member, 1) #define PCPU_PTR(member) (&pcpup->pc_ ## member) #define PCPU_SET(member, val) (pcpup->pc_ ## member = (val)) diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index ef3cbbbe1dc9..53b2b65cc7e1 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -86,8 +86,8 @@ int ipi_nmi_handler(void); #ifdef XEN void ipi_to_irq_init(void); -#define RESCHEDULE_VECTOR 0 -#define CALL_FUNCTION_VECTOR 1 +#define CALL_FUNCTION_VECTOR 0 +#define RESCHEDULE_VECTOR 1 #define NR_IPIS 2 #endif diff --git a/sys/i386/include/xen/xen-os.h b/sys/i386/include/xen/xen-os.h index 3ea05563ecee..98341b65b264 100644 --- a/sys/i386/include/xen/xen-os.h +++ b/sys/i386/include/xen/xen-os.h @@ -77,10 +77,7 @@ static inline void rep_nop(void) #define __builtin_expect(x, expected_value) (x) #endif -#define DEFINE_PER_CPU(type, name) \ - __typeof__(type) per_cpu__##name - -#define per_cpu(var, cpu) (*((void)cpu, &per_cpu__##var)) +#define per_cpu(var, cpu) (pcpu_find((cpu))->pc_ ## var) /* crude memory allocator for memory allocation early in * boot diff --git a/sys/i386/include/xen/xen_intr.h b/sys/i386/include/xen/xen_intr.h index 8ee4a4582efe..a0e6c8844404 100644 --- a/sys/i386/include/xen/xen_intr.h +++ b/sys/i386/include/xen/xen_intr.h @@ -43,7 +43,7 @@ extern int bind_virq_to_irqhandler(unsigned int virq, unsigned int cpu, const ch extern int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, const char *devname, - driver_intr_t handler, + driver_filter_t handler, unsigned long irqflags); extern int bind_interdomain_evtchn_to_irqhandler(unsigned int remote_domain, diff --git a/sys/i386/xen/clock.c b/sys/i386/xen/clock.c index b7548003a74d..ee574f2bed6a 100644 --- a/sys/i386/xen/clock.c +++ b/sys/i386/xen/clock.c @@ -154,19 +154,6 @@ SYSCTL_INT(_machdep, OID_AUTO, xen_disable_rtc_set, }) -/* These are peridically updated in shared_info, and then copied here. */ -struct shadow_time_info { - uint64_t tsc_timestamp; /* TSC at last update of time vals. */ - uint64_t system_timestamp; /* Time, in nanosecs, since boot. */ - uint32_t tsc_to_nsec_mul; - uint32_t tsc_to_usec_mul; - int tsc_shift; - uint32_t version; -}; -static DEFINE_PER_CPU(uint64_t, processed_system_time); -static DEFINE_PER_CPU(struct shadow_time_info, shadow_time); - - #define NS_PER_TICK (1000000000ULL/hz) #define rdtscll(val) \ @@ -300,10 +287,11 @@ static struct timecounter xen_timecounter = { 0 /* quality */ }; -static void -clkintr(struct trapframe *frame) +static int +clkintr(void *arg) { int64_t delta_cpu, delta; + struct trapframe *frame = (struct trapframe *)arg; int cpu = smp_processor_id(); struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); @@ -319,7 +307,7 @@ clkintr(struct trapframe *frame) if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) { printf("Timer ISR: Time went backwards: %lld\n", delta); - return; + return (FILTER_HANDLED); } /* Process elapsed ticks since last call. */ @@ -341,6 +329,54 @@ clkintr(struct trapframe *frame) } /* XXX TODO */ + return (FILTER_HANDLED); +} + +int clkintr2(void *arg); + +int +clkintr2(void *arg) +{ + int64_t delta_cpu, delta; + struct trapframe *frame = (struct trapframe *)arg; + int cpu = smp_processor_id(); + struct shadow_time_info *shadow = &per_cpu(shadow_time, cpu); + + do { + __get_time_values_from_xen(); + + delta = delta_cpu = + shadow->system_timestamp + get_nsec_offset(shadow); + delta -= processed_system_time; + delta_cpu -= per_cpu(processed_system_time, cpu); + + } while (!time_values_up_to_date(cpu)); + + if (unlikely(delta < (int64_t)0) || unlikely(delta_cpu < (int64_t)0)) { + printf("Timer ISR: Time went backwards: %lld\n", delta); + return (FILTER_HANDLED); + } + + /* Process elapsed ticks since last call. */ + if (delta >= NS_PER_TICK) { + processed_system_time += (delta / NS_PER_TICK) * NS_PER_TICK; + per_cpu(processed_system_time, cpu) += (delta_cpu / NS_PER_TICK) * NS_PER_TICK; + } + hardclock(TRAPF_USERMODE(frame), TRAPF_PC(frame)); + + /* + * Take synchronised time from Xen once a minute if we're not + * synchronised ourselves, and we haven't chosen to keep an independent + * time base. + */ + + if (shadow_tv_version != HYPERVISOR_shared_info->wc_version) { + update_wallclock(); + tc_setclock(&shadow_tv); + } + + /* XXX TODO */ + return (FILTER_HANDLED); } static uint32_t @@ -760,7 +796,7 @@ cpu_initclocks(void) &xen_set_periodic_tick); if ((time_irq = bind_virq_to_irqhandler(VIRQ_TIMER, 0, "clk", - (driver_filter_t *)clkintr, NULL, + clkintr, NULL, INTR_TYPE_CLK | INTR_FAST)) < 0) { panic("failed to register clock interrupt\n"); } @@ -780,7 +816,7 @@ ap_cpu_initclocks(int cpu) &xen_set_periodic_tick); if ((time_irq = bind_virq_to_irqhandler(VIRQ_TIMER, cpu, "clk", - (driver_filter_t *)clkintr, NULL, + clkintr2, NULL, INTR_TYPE_CLK | INTR_FAST)) < 0) { panic("failed to register clock interrupt\n"); } diff --git a/sys/i386/xen/exception.s b/sys/i386/xen/exception.s index cc3188420d12..607f96a46b2f 100644 --- a/sys/i386/xen/exception.s +++ b/sys/i386/xen/exception.s @@ -51,13 +51,10 @@ #ifdef SMP -#ifdef notyet -#define GET_VCPU_INFO movl TI_cpu(%ebp),reg ; \ +#define GET_VCPU_INFO(reg) movl PCPU(CPUID),reg ; \ shl $sizeof_vcpu_shift,reg ; \ addl HYPERVISOR_shared_info,reg #else -#endif - #define GET_VCPU_INFO(reg) movl HYPERVISOR_shared_info,reg #endif diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 4b1ee3b736f1..3036c0e35ddb 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -101,8 +101,6 @@ extern struct pcpu __pcpu[]; static int bootAP; static union descriptor *bootAPgdt; -static DEFINE_PER_CPU(int, resched_irq); -static DEFINE_PER_CPU(int, callfunc_irq); static char resched_name[NR_CPUS][15]; static char callfunc_name[NR_CPUS][15]; @@ -311,12 +309,14 @@ static void iv_invltlb(uintptr_t a, uintptr_t b) { xen_tlb_flush(); + atomic_add_int(&smp_tlb_wait, 1); } static void iv_invlpg(uintptr_t a, uintptr_t b) { xen_invlpg(a); + atomic_add_int(&smp_tlb_wait, 1); } static void @@ -329,6 +329,7 @@ iv_invlrng(uintptr_t a, uintptr_t b) xen_invlpg(start); start += PAGE_SIZE; } + atomic_add_int(&smp_tlb_wait, 1); } @@ -337,6 +338,7 @@ iv_invlcache(uintptr_t a, uintptr_t b) { wbinvd(); + atomic_add_int(&smp_tlb_wait, 1); } static void @@ -349,7 +351,24 @@ static void iv_bitmap_vector(uintptr_t a, uintptr_t b) { - + int cpu = PCPU_GET(cpuid); + u_int ipi_bitmap; + + ipi_bitmap = atomic_readandclear_int(&cpu_ipi_pending[cpu]); + + if (ipi_bitmap & (1 << IPI_PREEMPT)) { +#ifdef COUNT_IPIS + (*ipi_preempt_counts[cpu])++; +#endif + sched_preempt(curthread); + } + + if (ipi_bitmap & (1 << IPI_AST)) { +#ifdef COUNT_IPIS + (*ipi_ast_counts[cpu])++; +#endif + /* Nothing to do for AST */ + } } @@ -368,9 +387,10 @@ static call_data_func_t *ipi_vectors[IPI_BITMAP_VECTOR + 1] = * all the work is done automatically when * we return from the interrupt. */ -static void +static int smp_reschedule_interrupt(void *unused) { + return (FILTER_HANDLED); } struct _call_data { @@ -384,7 +404,7 @@ struct _call_data { static struct _call_data *call_data; -static void +static int smp_call_function_interrupt(void *unused) { call_data_func_t *func = call_data->func; @@ -407,6 +427,7 @@ smp_call_function_interrupt(void *unused) mb(); atomic_inc(&call_data->finished); } + return (FILTER_HANDLED); } /* @@ -432,7 +453,6 @@ cpu_mp_announce(void) } } - static int xen_smp_intr_init(unsigned int cpu) { @@ -445,8 +465,11 @@ xen_smp_intr_init(unsigned int cpu) cpu, resched_name[cpu], smp_reschedule_interrupt, - INTR_FAST); + INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE); + printf("cpu=%d irq=%d vector=%d\n", + cpu, rc, RESCHEDULE_VECTOR); + per_cpu(resched_irq, cpu) = rc; sprintf(callfunc_name[cpu], "callfunc%u", cpu); @@ -454,11 +477,15 @@ xen_smp_intr_init(unsigned int cpu) cpu, callfunc_name[cpu], smp_call_function_interrupt, - INTR_FAST); + INTR_FAST|INTR_TYPE_TTY|INTR_MPSAFE); if (rc < 0) goto fail; per_cpu(callfunc_irq, cpu) = rc; + printf("cpu=%d irq=%d vector=%d\n", + cpu, rc, CALL_FUNCTION_VECTOR); + + if ((cpu != 0) && ((rc = ap_cpu_initclocks(cpu)) != 0)) goto fail; @@ -472,6 +499,15 @@ xen_smp_intr_init(unsigned int cpu) return rc; } +static void +xen_smp_intr_init_cpus(void *unused) +{ + int i; + + for (i = 0; i < mp_ncpus; i++) + xen_smp_intr_init(i); +} + #define MTOPSIZE (1<<(14 + PAGE_SHIFT)) /* @@ -581,7 +617,6 @@ init_secondary(void) smp_active = 1; /* historic */ } - xen_smp_intr_init(bootAP); mtx_unlock_spin(&ap_boot_mtx); /* wait until all the AP's are up */ @@ -689,7 +724,6 @@ start_all_aps(void) /* set up temporary P==V mapping for AP boot */ /* XXX this is a hack, we should boot the AP on its own stack/PTD */ - xen_smp_intr_init(0); /* start each AP */ for (cpu = 1; cpu < mp_ncpus; cpu++) { apic_id = cpu_apic_ids[cpu]; @@ -923,14 +957,16 @@ static void smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) { u_int ncpu; + struct _call_data data; + call_data = &data; + ncpu = mp_ncpus - 1; /* does not shootdown self */ if (ncpu < 1) return; /* no other cpus */ if (!(read_eflags() & PSL_I)) panic("%s: interrupts disabled", __func__); mtx_lock_spin(&smp_ipi_mtx); - call_data->func = ipi_vectors[vector]; call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); @@ -1052,7 +1088,10 @@ ipi_selected(u_int32_t cpus, u_int ipi) u_int bitmap = 0; u_int old_pending; u_int new_pending; + struct _call_data data; + call_data = &data; + if (IPI_IS_BITMAPED(ipi)) { bitmap = 1 << ipi; ipi = IPI_BITMAP_VECTOR; @@ -1082,7 +1121,7 @@ ipi_selected(u_int32_t cpus, u_int ipi) continue; } call_data->func = ipi_vectors[ipi]; - ipi_pcpu(cpu, ipi); + ipi_pcpu(cpu, CALL_FUNCTION_VECTOR); } } @@ -1098,7 +1137,7 @@ ipi_all_but_self(u_int ipi) return; } CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - ipi_selected(((int)-1 & ~(1 << curcpu)), ipi); + ipi_selected((all_cpus & ~(1 << curcpu)), ipi); } #ifdef STOP_NMI @@ -1194,4 +1233,5 @@ release_aps(void *dummy __unused) ia32_pause(); } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); +SYSINIT(start_ipis, SI_SUB_INTR, SI_ORDER_ANY, xen_smp_intr_init_cpus, NULL); diff --git a/sys/xen/evtchn/evtchn.c b/sys/xen/evtchn/evtchn.c index 53b41accb427..155b38e14e5f 100644 --- a/sys/xen/evtchn/evtchn.c +++ b/sys/xen/evtchn/evtchn.c @@ -18,6 +18,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -26,6 +27,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include @@ -113,13 +115,14 @@ enum { #define type_from_irq(irq) ((uint8_t)(irq_info[irq] >> 24)) /* IRQ <-> VIRQ mapping. */ -DEFINE_PER_CPU(int, virq_to_irq[NR_VIRQS]) = {[0 ... NR_VIRQS-1] = -1}; /* IRQ <-> IPI mapping. */ -#ifndef NR_IPIS +#ifndef NR_IPIS +#ifdef SMP +#error "NR_IPIS not defined" +#endif #define NR_IPIS 1 #endif -DEFINE_PER_CPU(int, ipi_to_irq[NR_IPIS]) = {[0 ... NR_IPIS-1] = -1}; /* Bitmap indicating which PIRQs require Xen to be notified on unmask. */ static unsigned long pirq_needs_unmask_notify[NR_PIRQS/sizeof(unsigned long)]; @@ -222,8 +225,11 @@ evtchn_do_upcall(struct trapframe *frame) void ipi_pcpu(unsigned int cpu, int vector) { - int irq = per_cpu(ipi_to_irq, cpu)[vector]; + int irq; + irq = per_cpu(ipi_to_irq, cpu)[vector]; + irq = (pcpu_find((cpu))->pc_ipi_to_irq)[vector]; + notify_remote_via_irq(irq); } @@ -331,6 +337,9 @@ bind_virq_to_irq(unsigned int virq, unsigned int cpu) mtx_lock_spin(&irq_mapping_update_lock); if ((irq = per_cpu(virq_to_irq, cpu)[virq]) == -1) { + if ((irq = find_unbound_irq()) < 0) + goto out; + bind_virq.virq = virq; bind_virq.vcpu = cpu; PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, @@ -338,7 +347,6 @@ bind_virq_to_irq(unsigned int virq, unsigned int cpu) evtchn = bind_virq.port; - irq = find_unbound_irq(); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_irq_info(IRQT_VIRQ, virq, evtchn); @@ -348,7 +356,7 @@ bind_virq_to_irq(unsigned int virq, unsigned int cpu) } irq_bindcount[irq]++; - +out: mtx_unlock_spin(&irq_mapping_update_lock); return irq; @@ -370,7 +378,6 @@ bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) PANIC_IF(HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi) != 0); evtchn = bind_ipi.port; - irq = find_unbound_irq(); evtchn_to_irq[evtchn] = irq; irq_info[irq] = mk_irq_info(IRQT_IPI, ipi, evtchn); @@ -378,7 +385,6 @@ bind_ipi_to_irq(unsigned int ipi, unsigned int cpu) bind_evtchn_to_cpu(evtchn, cpu); } - irq_bindcount[irq]++; out: @@ -515,15 +521,15 @@ int bind_ipi_to_irqhandler(unsigned int ipi, unsigned int cpu, const char *devname, - driver_intr_t handler, + driver_filter_t filter, unsigned long irqflags) { - unsigned int irq; - int retval; - + int irq, retval; + irq = bind_ipi_to_irq(ipi, cpu); intr_register_source(&xp->xp_pins[irq].xp_intsrc); - retval = intr_add_handler(devname, irq, NULL, handler, NULL, irqflags, NULL); + retval = intr_add_handler(devname, irq, filter, NULL, + NULL, irqflags, NULL); if (retval != 0) { unbind_from_irq(irq); return -retval; @@ -760,6 +766,8 @@ notify_remote_via_irq(int irq) if (VALID_EVTCHN(evtchn)) notify_remote_via_evtchn(evtchn); + else + panic("invalid evtchn"); } /* required for support of physical devices */ @@ -810,6 +818,9 @@ xenpic_pirq_enable_intr(struct intsrc *isrc) bind_pirq.flags = probing_irq(irq) ? 0 : BIND_PIRQ__WILL_SHARE; if (HYPERVISOR_event_channel_op(EVTCHNOP_bind_pirq, &bind_pirq) != 0) { +#ifndef XEN_PRIVILEGED_GUEST + panic("unexpected pirq call"); +#endif if (!probing_irq(irq)) /* Some failures are expected when probing. */ printf("Failed to obtain physical IRQ %d\n", irq); mtx_unlock_spin(&irq_mapping_update_lock); @@ -1037,7 +1048,7 @@ evtchn_init(void *dummy __unused) struct xenpic_intsrc *pin, *tpin; /* No VIRQ or IPI bindings. */ - for (cpu = 0; cpu < NR_CPUS; cpu++) { + for (cpu = 0; cpu < mp_ncpus; cpu++) { for (i = 0; i < NR_VIRQS; i++) per_cpu(virq_to_irq, cpu)[i] = -1; for (i = 0; i < NR_IPIS; i++) @@ -1104,7 +1115,7 @@ evtchn_init(void *dummy __unused) } } -SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_ANY, evtchn_init, NULL); +SYSINIT(evtchn_init, SI_SUB_INTR, SI_ORDER_MIDDLE, evtchn_init, NULL); /* * irq_mapping_update_lock: in order to allow an interrupt to occur in a critical * section, to set pcpu->ipending (etc...) properly, we