From fd036deac1695c4188a12f075bdf1280dc260b22 Mon Sep 17 00:00:00 2001 From: John Baldwin Date: Tue, 28 Aug 2018 21:09:19 +0000 Subject: [PATCH] Dynamically allocate IRQ ranges on x86. Previously, x86 used static ranges of IRQ values for different types of I/O interrupts. Interrupt pins on I/O APICs and 8259A PICs used IRQ values from 0 to 254. MSI interrupts used a compile-time-defined range starting at 256, and Xen event channels used a compile-time-defined range after MSI. Some recent systems have more than 255 I/O APIC interrupt pins which resulted in those IRQ values overflowing into the MSI range triggering an assertion failure. Replace statically assigned ranges with dynamic ranges. Do a single pass computing the sizes of the IRQ ranges (PICs, MSI, Xen) to determine the total number of IRQs required. Allocate the interrupt source and interrupt count arrays dynamically once this pass has completed. To minimize runtime complexity these arrays are only sized once during bootup. The PIC range is determined by the PICs present in the system. The MSI and Xen ranges continue to use a fixed size, though this does make it possible to turn the MSI range size into a tunable in the future. As a result, various places are updated to use dynamic limits instead of constants. In addition, the vmstat(8) utility has been taught to understand that some kernels may treat 'intrcnt' and 'intrnames' as pointers rather than arrays when extracting interrupt stats from a crashdump. This is determined by the presence (vs absence) of a global 'nintrcnt' symbol. This change reverts r189404 which worked around a buggy BIOS which enumerated an I/O APIC twice (using the same memory mapped address for both entries but using an IRQ base of 256 for one entry and a valid IRQ base for the second entry). Making the "base" of MSI IRQ values dynamic avoids the panic that r189404 worked around, and there may now be valid I/O APICs with an IRQ base above 256 which this workaround would incorrectly skip. If in the future the issue reported in PR 130483 reoccurs, we will have to add a pass over the I/O APIC entries in the MADT to detect duplicates using the memory mapped address and use some strategy to choose the "correct" one. While here, reserve room in intrcnts for the Hyper-V counters. PR: 229429, 130483 Reviewed by: kib, royger, cem Tested by: royger (Xen), kib (DMAR) Approved by: re (gjb) MFC after: 2 weeks Differential Revision: https://reviews.freebsd.org/D16861 --- sys/sys/interrupt.h | 5 ++ sys/x86/acpica/madt.c | 4 -- sys/x86/include/apicvar.h | 8 +-- sys/x86/include/intr_machdep.h | 70 +++++++++------------ sys/x86/iommu/intel_intrmap.c | 2 +- sys/x86/isa/atpic.c | 57 ++++++++++------- sys/x86/x86/intr_machdep.c | 112 +++++++++++++++++++++++++++------ sys/x86/x86/io_apic.c | 46 +++++++++----- sys/x86/x86/local_apic.c | 66 +++++++++++++------ sys/x86/x86/msi.c | 28 +++++---- sys/x86/x86/nexus.c | 4 +- sys/x86/xen/xen_intr.c | 55 ++++++++++------ sys/x86/xen/xen_msi.c | 9 ++- sys/x86/xen/xen_nexus.c | 2 +- usr.bin/vmstat/vmstat.c | 36 ++++++++++- 15 files changed, 340 insertions(+), 164 deletions(-) diff --git a/sys/sys/interrupt.h b/sys/sys/interrupt.h index d3432c078f51..105bb968a6b3 100644 --- a/sys/sys/interrupt.h +++ b/sys/sys/interrupt.h @@ -154,8 +154,13 @@ extern struct intr_event *clk_intr_event; extern void *vm_ih; /* Counts and names for statistics (defined in MD code). */ +#if defined(__amd64__) || defined(__i386__) +extern u_long *intrcnt; /* counts for for each device and stray */ +extern char *intrnames; /* string table containing device names */ +#else extern u_long intrcnt[]; /* counts for for each device and stray */ extern char intrnames[]; /* string table containing device names */ +#endif extern size_t sintrcnt; /* size of intrcnt table */ extern size_t sintrnames; /* size of intrnames table */ diff --git a/sys/x86/acpica/madt.c b/sys/x86/acpica/madt.c index d9b58d3e7442..d590a639bf71 100644 --- a/sys/x86/acpica/madt.c +++ b/sys/x86/acpica/madt.c @@ -428,10 +428,6 @@ madt_parse_apics(ACPI_SUBTABLE_HEADER *entry, void *arg __unused) apic->Id); if (ioapics[apic->Id].io_apic != NULL) panic("%s: Double APIC ID %u", __func__, apic->Id); - if (apic->GlobalIrqBase >= FIRST_MSI_INT) { - printf("MADT: Ignoring bogus I/O APIC ID %u", apic->Id); - break; - } ioapics[apic->Id].io_apic = ioapic_create(apic->Address, apic->Id, apic->GlobalIrqBase); ioapics[apic->Id].io_vector = apic->GlobalIrqBase; diff --git a/sys/x86/include/apicvar.h b/sys/x86/include/apicvar.h index aecae5467309..7c80f62d0813 100644 --- a/sys/x86/include/apicvar.h +++ b/sys/x86/include/apicvar.h @@ -158,10 +158,10 @@ #define APIC_BUS_PCI 2 #define APIC_BUS_MAX APIC_BUS_PCI -#define IRQ_EXTINT (NUM_IO_INTS + 1) -#define IRQ_NMI (NUM_IO_INTS + 2) -#define IRQ_SMI (NUM_IO_INTS + 3) -#define IRQ_DISABLED (NUM_IO_INTS + 4) +#define IRQ_EXTINT -1 +#define IRQ_NMI -2 +#define IRQ_SMI -3 +#define IRQ_DISABLED -4 /* * An APIC enumerator is a pseudo bus driver that enumerates APIC's including diff --git a/sys/x86/include/intr_machdep.h b/sys/x86/include/intr_machdep.h index ee1c4418e026..d9e7f881ffde 100644 --- a/sys/x86/include/intr_machdep.h +++ b/sys/x86/include/intr_machdep.h @@ -34,55 +34,41 @@ #ifdef _KERNEL /* - * The maximum number of I/O interrupts we allow. This number is rather - * arbitrary as it is just the maximum IRQ resource value. The interrupt - * source for a given IRQ maps that I/O interrupt to device interrupt - * source whether it be a pin on an interrupt controller or an MSI interrupt. - * The 16 ISA IRQs are assigned fixed IDT vectors, but all other device - * interrupts allocate IDT vectors on demand. Currently we have 191 IDT - * vectors available for device interrupts. On many systems with I/O APICs, - * a lot of the IRQs are not used, so this number can be much larger than - * 191 and still be safe since only interrupt sources in actual use will - * allocate IDT vectors. + * Values used in determining the allocation of IRQ values among + * different types of I/O interrupts. These values are used as + * indices into a interrupt source array to map I/O interrupts to a + * device interrupt source whether it be a pin on an interrupt + * controller or an MSI interrupt. The 16 ISA IRQs are assigned fixed + * IDT vectors, but all other device interrupts allocate IDT vectors + * on demand. Currently we have 191 IDT vectors available for device + * interrupts on each CPU. On many systems with I/O APICs, a lot of + * the IRQs are not used, so the total number of IRQ values reserved + * can exceed the number of available IDT slots. * - * The first 255 IRQs (0 - 254) are reserved for ISA IRQs and PCI intline IRQs. - * IRQ values from 256 to 767 are used by MSI. When running under the Xen - * Hypervisor, IRQ values from 768 to 4863 are available for binding to - * event channel events. We leave 255 unused to avoid confusion since 255 is - * used in PCI to indicate an invalid IRQ. + * The first 16 IRQs (0 - 15) are reserved for ISA IRQs. Interrupt + * pins on I/O APICs for non-ISA interrupts use IRQ values starting at + * IRQ 17. This layout matches the GSI numbering used by ACPI so that + * IRQ values returned by ACPI methods such as _CRS can be used + * directly by the ACPI bus driver. + * + * MSI interrupts allocate a block of interrupts starting at either + * the end of the I/O APIC range or 256, whichever is higher. When + * running under the Xen Hypervisor, an additional range of IRQ values + * are available for binding to event channel events. We use 256 as + * the minimum IRQ value for MSI interrupts to attempt to leave 255 + * unused since 255 is used in PCI to indicate an invalid INTx IRQ. */ #define NUM_MSI_INTS 512 -#define FIRST_MSI_INT 256 -#ifdef XENHVM -#include -#include -#define NUM_EVTCHN_INTS NR_EVENT_CHANNELS -#define FIRST_EVTCHN_INT \ - (FIRST_MSI_INT + NUM_MSI_INTS) -#define LAST_EVTCHN_INT \ - (FIRST_EVTCHN_INT + NUM_EVTCHN_INTS - 1) -#else -#define NUM_EVTCHN_INTS 0 -#endif -#define NUM_IO_INTS (FIRST_MSI_INT + NUM_MSI_INTS + NUM_EVTCHN_INTS) +#define MINIMUM_MSI_INT 256 + +extern u_int first_msi_irq; +extern u_int num_io_irqs; /* * Default base address for MSI messages on x86 platforms. */ #define MSI_INTEL_ADDR_BASE 0xfee00000 -/* - * - 1 ??? dummy counter. - * - 2 counters for each I/O interrupt. - * - 1 counter for each CPU for lapic timer. - * - 8 counters for each CPU for IPI counters for SMP. - */ -#ifdef SMP -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + (1 + 8) * MAXCPU) -#else -#define INTRCNT_COUNT (1 + NUM_IO_INTS * 2 + 1) -#endif - #ifndef LOCORE typedef void inthand_t(void); @@ -97,6 +83,7 @@ struct intsrc; * return the vector associated with this source. */ struct pic { + void (*pic_register_sources)(struct pic *); void (*pic_enable_source)(struct intsrc *); void (*pic_disable_source)(struct intsrc *, int); void (*pic_eoi_source)(struct intsrc *); @@ -184,6 +171,9 @@ int msi_map(int irq, uint64_t *addr, uint32_t *data); int msi_release(int *irqs, int count); int msix_alloc(device_t dev, int *irq); int msix_release(int irq); +#ifdef XENHVM +void xen_intr_alloc_irqs(void); +#endif #endif /* !LOCORE */ #endif /* _KERNEL */ diff --git a/sys/x86/iommu/intel_intrmap.c b/sys/x86/iommu/intel_intrmap.c index 32e11a8347e8..89d06b23a503 100644 --- a/sys/x86/iommu/intel_intrmap.c +++ b/sys/x86/iommu/intel_intrmap.c @@ -337,7 +337,7 @@ dmar_init_irt(struct dmar_unit *unit) "QI disabled, disabling interrupt remapping\n"); return (0); } - unit->irte_cnt = clp2(NUM_IO_INTS); + unit->irte_cnt = clp2(num_io_irqs); unit->irt = (dmar_irte_t *)(uintptr_t)kmem_alloc_contig( unit->irte_cnt * sizeof(dmar_irte_t), M_ZERO | M_WAITOK, 0, dmar_high, PAGE_SIZE, 0, DMAR_IS_COHERENT(unit) ? diff --git a/sys/x86/isa/atpic.c b/sys/x86/isa/atpic.c index 33a0dd4ca266..8560793df503 100644 --- a/sys/x86/isa/atpic.c +++ b/sys/x86/isa/atpic.c @@ -95,6 +95,7 @@ inthand_t #define ATPIC(io, base, eoi) { \ .at_pic = { \ + .pic_register_sources = atpic_register_sources, \ .pic_enable_source = atpic_enable_source, \ .pic_disable_source = atpic_disable_source, \ .pic_eoi_source = (eoi), \ @@ -133,6 +134,7 @@ struct atpic_intsrc { u_long at_straycount; }; +static void atpic_register_sources(struct pic *pic); static void atpic_enable_source(struct intsrc *isrc); static void atpic_disable_source(struct intsrc *isrc, int eoi); static void atpic_eoi_master(struct intsrc *isrc); @@ -202,6 +204,36 @@ _atpic_eoi_slave(struct intsrc *isrc) #endif } +static void +atpic_register_sources(struct pic *pic) +{ + struct atpic *ap = (struct atpic *)pic; + struct atpic_intsrc *ai; + int i; + + /* + * If any of the ISA IRQs have an interrupt source already, then + * assume that the I/O APICs are being used and don't register any + * of our interrupt sources. This makes sure we don't accidentally + * use mixed mode. The "accidental" use could otherwise occur on + * machines that route the ACPI SCI interrupt to a different ISA + * IRQ (at least one machine routes it to IRQ 13) thus disabling + * that APIC ISA routing and allowing the ATPIC source for that IRQ + * to leak through. We used to depend on this feature for routing + * IRQ0 via mixed mode, but now we don't use mixed mode at all. + */ + for (i = 0; i < NUM_ISA_IRQS; i++) + if (intr_lookup_source(i) != NULL) + return; + + /* Loop through all interrupt sources and add them. */ + for (i = 0, ai = atintrs + ap->at_irqbase; i < 8; i++, ai++) { + if (ap->at_irqbase + i == ICU_SLAVEID) + continue; + intr_register_source(&ai->at_intsrc); + } +} + static void atpic_enable_source(struct intsrc *isrc) { @@ -467,8 +499,6 @@ atpic_startup(void) static void atpic_init(void *dummy __unused) { - struct atpic_intsrc *ai; - int i; /* * Register our PICs, even if we aren't going to use any of their @@ -478,27 +508,8 @@ atpic_init(void *dummy __unused) intr_register_pic(&atpics[1].at_pic) != 0) panic("Unable to register ATPICs"); - /* - * If any of the ISA IRQs have an interrupt source already, then - * assume that the APICs are being used and don't register any - * of our interrupt sources. This makes sure we don't accidentally - * use mixed mode. The "accidental" use could otherwise occur on - * machines that route the ACPI SCI interrupt to a different ISA - * IRQ (at least one machines routes it to IRQ 13) thus disabling - * that APIC ISA routing and allowing the ATPIC source for that IRQ - * to leak through. We used to depend on this feature for routing - * IRQ0 via mixed mode, but now we don't use mixed mode at all. - */ - for (i = 0; i < NUM_ISA_IRQS; i++) - if (intr_lookup_source(i) != NULL) - return; - - /* Loop through all interrupt sources and add them. */ - for (i = 0, ai = atintrs; i < NUM_ISA_IRQS; i++, ai++) { - if (i == ICU_SLAVEID) - continue; - intr_register_source(&ai->at_intsrc); - } + if (num_io_irqs == 0) + num_io_irqs = NUM_ISA_IRQS; } SYSINIT(atpic_init, SI_SUB_INTR, SI_ORDER_FOURTH, atpic_init, NULL); diff --git a/sys/x86/x86/intr_machdep.c b/sys/x86/x86/intr_machdep.c index a05b0ef36496..a879d616d17a 100644 --- a/sys/x86/x86/intr_machdep.c +++ b/sys/x86/x86/intr_machdep.c @@ -38,6 +38,7 @@ #include "opt_atpic.h" #include "opt_ddb.h" +#include "opt_smp.h" #include #include @@ -45,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -78,10 +80,9 @@ typedef void (*mask_fn)(void *); static int intrcnt_index; -static struct intsrc *interrupt_sources[NUM_IO_INTS]; +static struct intsrc **interrupt_sources; #ifdef SMP -static struct intsrc *interrupt_sorted[NUM_IO_INTS]; -CTASSERT(sizeof(interrupt_sources) == sizeof(interrupt_sorted)); +static struct intsrc **interrupt_sorted; static int intrbalance; SYSCTL_INT(_hw, OID_AUTO, intrbalance, CTLFLAG_RW, &intrbalance, 0, "Interrupt auto-balance interval (seconds). Zero disables."); @@ -91,15 +92,19 @@ static struct sx intrsrc_lock; static struct mtx intrpic_lock; static struct mtx intrcnt_lock; static TAILQ_HEAD(pics_head, pic) pics; +u_int num_io_irqs; #if defined(SMP) && !defined(EARLY_AP_STARTUP) static int assign_cpu; #endif -u_long intrcnt[INTRCNT_COUNT]; -char intrnames[INTRCNT_COUNT * (MAXCOMLEN + 1)]; +u_long *intrcnt; +char *intrnames; size_t sintrcnt = sizeof(intrcnt); size_t sintrnames = sizeof(intrnames); +int nintrcnt; + +static MALLOC_DEFINE(M_INTR, "intr", "Interrupt Sources"); static int intr_assign_cpu(void *arg, int cpu); static void intr_disable_src(void *arg); @@ -109,6 +114,18 @@ static void intrcnt_setname(const char *name, int index); static void intrcnt_updatename(struct intsrc *is); static void intrcnt_register(struct intsrc *is); +/* + * SYSINIT levels for SI_SUB_INTR: + * + * SI_ORDER_FIRST: Initialize locks and pics TAILQ, xen_hvm_cpu_init + * SI_ORDER_SECOND: Xen PICs + * SI_ORDER_THIRD: Add I/O APIC PICs, alloc MSI and Xen IRQ ranges + * SI_ORDER_FOURTH: Add 8259A PICs + * SI_ORDER_FOURTH + 1: Finalize interrupt count and add interrupt sources + * SI_ORDER_MIDDLE: SMP interrupt counters + * SI_ORDER_ANY: Enable interrupts on BSP + */ + static int intr_pic_registered(struct pic *pic) { @@ -143,6 +160,58 @@ intr_register_pic(struct pic *pic) return (error); } +/* + * Allocate interrupt source arrays and register interrupt sources + * once the number of interrupts is known. + */ +static void +intr_init_sources(void *arg) +{ + struct pic *pic; + + MPASS(num_io_irqs > 0); + + interrupt_sources = mallocarray(num_io_irqs, sizeof(*interrupt_sources), + M_INTR, M_WAITOK | M_ZERO); + interrupt_sorted = mallocarray(num_io_irqs, sizeof(*interrupt_sorted), + M_INTR, M_WAITOK | M_ZERO); + + /* + * - 1 ??? dummy counter. + * - 2 counters for each I/O interrupt. + * - 1 counter for each CPU for lapic timer. + * - 1 counter for each CPU for the Hyper-V vmbus driver. + * - 8 counters for each CPU for IPI counters for SMP. + */ + nintrcnt = 1 + num_io_irqs * 2 + mp_ncpus * 2; +#ifdef COUNT_IPIS + if (mp_ncpus > 1) + nintrcnt += 8 * mp_ncpus; +#endif + intrcnt = mallocarray(nintrcnt, sizeof(u_long), M_INTR, M_WAITOK | + M_ZERO); + intrnames = mallocarray(nintrcnt, MAXCOMLEN + 1, M_INTR, M_WAITOK | + M_ZERO); + sintrcnt = nintrcnt * sizeof(u_long); + sintrnames = nintrcnt * (MAXCOMLEN + 1); + + intrcnt_setname("???", 0); + intrcnt_index = 1; + + /* + * NB: intrpic_lock is not held here to avoid LORs due to + * malloc() in intr_register_source(). However, we are still + * single-threaded at this point in startup so the list of + * PICs shouldn't change. + */ + TAILQ_FOREACH(pic, &pics, pics) { + if (pic->pic_register_sources != NULL) + pic->pic_register_sources(pic); + } +} +SYSINIT(intr_init_sources, SI_SUB_INTR, SI_ORDER_FOURTH + 1, intr_init_sources, + NULL); + /* * Register a new interrupt source with the global interrupt system. * The global interrupts need to be disabled when this function is @@ -155,6 +224,8 @@ intr_register_source(struct intsrc *isrc) KASSERT(intr_pic_registered(isrc->is_pic), ("unregistered PIC")); vector = isrc->is_pic->pic_vector(isrc); + KASSERT(vector < num_io_irqs, ("IRQ %d too large (%u irqs)", vector, + num_io_irqs)); if (interrupt_sources[vector] != NULL) return (EEXIST); error = intr_event_create(&isrc->is_event, isrc, 0, vector, @@ -180,7 +251,7 @@ struct intsrc * intr_lookup_source(int vector) { - if (vector < 0 || vector >= nitems(interrupt_sources)) + if (vector < 0 || vector >= num_io_irqs) return (NULL); return (interrupt_sources[vector]); } @@ -378,6 +449,7 @@ intrcnt_register(struct intsrc *is) KASSERT(is->is_event != NULL, ("%s: isrc with no event", __func__)); mtx_lock_spin(&intrcnt_lock); + MPASS(intrcnt_index + 2 <= nintrcnt); is->is_index = intrcnt_index; intrcnt_index += 2; snprintf(straystr, MAXCOMLEN + 1, "stray irq%d", @@ -394,6 +466,7 @@ intrcnt_add(const char *name, u_long **countp) { mtx_lock_spin(&intrcnt_lock); + MPASS(intrcnt_index < nintrcnt); *countp = &intrcnt[intrcnt_index]; intrcnt_setname(name, intrcnt_index); intrcnt_index++; @@ -404,8 +477,6 @@ static void intr_init(void *dummy __unused) { - intrcnt_setname("???", 0); - intrcnt_index = 1; TAILQ_INIT(&pics); mtx_init(&intrpic_lock, "intrpic", NULL, MTX_DEF); sx_init(&intrsrc_lock, "intrsrc"); @@ -471,10 +542,10 @@ void intr_reprogram(void) { struct intsrc *is; - int v; + u_int v; sx_xlock(&intrsrc_lock); - for (v = 0; v < NUM_IO_INTS; v++) { + for (v = 0; v < num_io_irqs; v++) { is = interrupt_sources[v]; if (is == NULL) continue; @@ -491,14 +562,15 @@ intr_reprogram(void) DB_SHOW_COMMAND(irqs, db_show_irqs) { struct intsrc **isrc; - int i, verbose; + u_int i; + int verbose; if (strcmp(modif, "v") == 0) verbose = 1; else verbose = 0; isrc = interrupt_sources; - for (i = 0; i < NUM_IO_INTS && !db_pager_quit; i++, isrc++) + for (i = 0; i < num_io_irqs && !db_pager_quit; i++, isrc++) if (*isrc != NULL) db_dump_intr_event((*isrc)->is_event, verbose); } @@ -606,8 +678,7 @@ static void intr_shuffle_irqs(void *arg __unused) { struct intsrc *isrc; - u_int cpu; - int i; + u_int cpu, i; intr_init_cpus(); /* Don't bother on UP. */ @@ -617,7 +688,7 @@ intr_shuffle_irqs(void *arg __unused) /* Round-robin assign a CPU to each enabled source. */ sx_xlock(&intrsrc_lock); assign_cpu = 1; - for (i = 0; i < NUM_IO_INTS; i++) { + for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc != NULL && isrc->is_handlers > 0) { /* @@ -652,8 +723,8 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) { struct sbuf sbuf; struct intsrc *isrc; + u_int i; int error; - int i; error = sysctl_wire_old_buffer(req, 0); if (error != 0) @@ -661,7 +732,7 @@ sysctl_hw_intrs(SYSCTL_HANDLER_ARGS) sbuf_new_for_sysctl(&sbuf, NULL, 128, req); sx_slock(&intrsrc_lock); - for (i = 0; i < NUM_IO_INTS; i++) { + for (i = 0; i < num_io_irqs; i++) { isrc = interrupt_sources[i]; if (isrc == NULL) continue; @@ -720,8 +791,9 @@ intr_balance(void *dummy __unused, int pending __unused) * Sort interrupts according to count. */ sx_xlock(&intrsrc_lock); - memcpy(interrupt_sorted, interrupt_sources, sizeof(interrupt_sorted)); - qsort(interrupt_sorted, NUM_IO_INTS, sizeof(interrupt_sorted[0]), + memcpy(interrupt_sorted, interrupt_sources, num_io_irqs * + sizeof(interrupt_sorted[0])); + qsort(interrupt_sorted, num_io_irqs, sizeof(interrupt_sorted[0]), intrcmp); /* @@ -733,7 +805,7 @@ intr_balance(void *dummy __unused, int pending __unused) /* * Assign round-robin from most loaded to least. */ - for (i = NUM_IO_INTS - 1; i >= 0; i--) { + for (i = num_io_irqs - 1; i >= 0; i--) { isrc = interrupt_sorted[i]; if (isrc == NULL || isrc->is_event->ie_cpu != NOCPU) continue; diff --git a/sys/x86/x86/io_apic.c b/sys/x86/x86/io_apic.c index 81d91044aa24..f33a55d7acf4 100644 --- a/sys/x86/x86/io_apic.c +++ b/sys/x86/x86/io_apic.c @@ -80,7 +80,7 @@ static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures"); struct ioapic_intsrc { struct intsrc io_intsrc; - u_int io_irq; + int io_irq; u_int io_intpin:8; u_int io_vector:8; u_int io_cpu; @@ -112,6 +112,7 @@ static u_int ioapic_read(volatile ioapic_t *apic, int reg); static void ioapic_write(volatile ioapic_t *apic, int reg, u_int val); static const char *ioapic_bus_string(int bus_type); static void ioapic_print_irq(struct ioapic_intsrc *intpin); +static void ioapic_register_sources(struct pic *pic); static void ioapic_enable_source(struct intsrc *isrc); static void ioapic_disable_source(struct intsrc *isrc, int eoi); static void ioapic_eoi_source(struct intsrc *isrc); @@ -128,6 +129,7 @@ static void ioapic_reprogram_intpin(struct intsrc *isrc); static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list); struct pic ioapic_template = { + .pic_register_sources = ioapic_register_sources, .pic_enable_source = ioapic_enable_source, .pic_disable_source = ioapic_disable_source, .pic_eoi_source = ioapic_eoi_source, @@ -142,7 +144,7 @@ struct pic ioapic_template = { .pic_reprogram_pin = ioapic_reprogram_intpin, }; -static int next_ioapic_base; +static u_int next_ioapic_base; static u_int next_id; static int enable_extint; @@ -250,7 +252,7 @@ ioapic_print_irq(struct ioapic_intsrc *intpin) printf("SMI"); break; default: - printf("%s IRQ %u", ioapic_bus_string(intpin->io_bus), + printf("%s IRQ %d", ioapic_bus_string(intpin->io_bus), intpin->io_irq); } } @@ -318,7 +320,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin) * been enabled yet, just ensure that the pin is masked. */ mtx_assert(&icu_lock, MA_OWNED); - if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq < NUM_IO_INTS && + if (intpin->io_irq == IRQ_DISABLED || (intpin->io_irq >= 0 && intpin->io_vector == 0)) { low = ioapic_read(io->io_addr, IOAPIC_REDTBL_LO(intpin->io_intpin)); @@ -651,6 +653,8 @@ ioapic_create(vm_paddr_t addr, int32_t apic_id, int intbase) io->io_id, intbase, next_ioapic_base); io->io_intbase = intbase; next_ioapic_base = intbase + numintr; + if (next_ioapic_base > num_io_irqs) + num_io_irqs = next_ioapic_base; io->io_numintr = numintr; io->io_addr = apic; io->io_paddr = addr; @@ -759,7 +763,7 @@ ioapic_remap_vector(void *cookie, u_int pin, int vector) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || vector < 0) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_irq = vector; if (bootverbose) @@ -778,7 +782,7 @@ ioapic_set_bus(void *cookie, u_int pin, int bus_type) io = (struct ioapic *)cookie; if (pin >= io->io_numintr) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); if (io->io_pins[pin].io_bus == bus_type) return (0); @@ -799,7 +803,7 @@ ioapic_set_nmi(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_NMI) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_NMI; @@ -822,7 +826,7 @@ ioapic_set_smi(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_SMI) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_SMI; @@ -845,7 +849,7 @@ ioapic_set_extint(void *cookie, u_int pin) return (EINVAL); if (io->io_pins[pin].io_irq == IRQ_EXTINT) return (0); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); io->io_pins[pin].io_bus = APIC_BUS_UNKNOWN; io->io_pins[pin].io_irq = IRQ_EXTINT; @@ -870,7 +874,7 @@ ioapic_set_polarity(void *cookie, u_int pin, enum intr_polarity pol) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || pol == INTR_POLARITY_CONFORM) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); activehi = (pol == INTR_POLARITY_HIGH); if (io->io_pins[pin].io_activehi == activehi) @@ -891,7 +895,7 @@ ioapic_set_triggermode(void *cookie, u_int pin, enum intr_trigger trigger) io = (struct ioapic *)cookie; if (pin >= io->io_numintr || trigger == INTR_TRIGGER_CONFORM) return (EINVAL); - if (io->io_pins[pin].io_irq >= NUM_IO_INTS) + if (io->io_pins[pin].io_irq < 0) return (EINVAL); edgetrigger = (trigger == INTR_TRIGGER_EDGE); if (io->io_pins[pin].io_edgetrigger == edgetrigger) @@ -927,12 +931,26 @@ ioapic_register(void *cookie) /* * Reprogram pins to handle special case pins (such as NMI and - * SMI) and register valid pins as interrupt sources. + * SMI) and disable normal pins until a handler is registered. */ intr_register_pic(&io->io_pic); - for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { + for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) ioapic_reprogram_intpin(&pin->io_intsrc); - if (pin->io_irq < NUM_IO_INTS) +} + +/* + * Add interrupt sources for I/O APIC interrupt pins. + */ +static void +ioapic_register_sources(struct pic *pic) +{ + struct ioapic_intsrc *pin; + struct ioapic *io; + int i; + + io = (struct ioapic *)pic; + for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++) { + if (pin->io_irq >= 0) intr_register_source(&pin->io_intsrc); } } diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index 7e3fcac1e78e..92e71977da4f 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -92,11 +92,16 @@ CTASSERT(APIC_TIMER_INT < APIC_LOCAL_INTS); CTASSERT(APIC_LOCAL_INTS == 240); CTASSERT(IPI_STOP < APIC_SPURIOUS_INT); -/* Magic IRQ values for the timer and syscalls. */ -#define IRQ_TIMER (NUM_IO_INTS + 1) -#define IRQ_SYSCALL (NUM_IO_INTS + 2) -#define IRQ_DTRACE_RET (NUM_IO_INTS + 3) -#define IRQ_EVTCHN (NUM_IO_INTS + 4) +/* + * I/O interrupts use non-negative IRQ values. These values are used + * to mark unused IDT entries or IDT entries reserved for a non-I/O + * interrupt. + */ +#define IRQ_FREE -1 +#define IRQ_TIMER -2 +#define IRQ_SYSCALL -3 +#define IRQ_DTRACE_RET -4 +#define IRQ_EVTCHN -5 enum lat_timer_mode { LAT_MODE_UNDEF = 0, @@ -648,7 +653,7 @@ native_lapic_create(u_int apic_id, int boot_cpu) lapics[apic_id].la_elvts[i].lvt_active = 0; } for (i = 0; i <= APIC_NUM_IOINTS; i++) - lapics[apic_id].la_ioint_irqs[i] = -1; + lapics[apic_id].la_ioint_irqs[i] = IRQ_FREE; lapics[apic_id].la_ioint_irqs[IDT_SYSCALL - APIC_IO_INTS] = IRQ_SYSCALL; lapics[apic_id].la_ioint_irqs[APIC_TIMER_INT - APIC_IO_INTS] = IRQ_TIMER; @@ -751,7 +756,6 @@ native_lapic_setup(int boot) uint32_t version; uint32_t maxlvt; register_t saveintr; - char buf[MAXCOMLEN + 1]; int elvt_count; int i; @@ -780,15 +784,11 @@ native_lapic_setup(int boot) LAPIC_LVT_PCINT)); } - /* Program timer LVT and setup handler. */ + /* Program timer LVT. */ la->lvt_timer_base = lvt_mode(la, APIC_LVT_TIMER, lapic_read32(LAPIC_LVT_TIMER)); la->lvt_timer_last = la->lvt_timer_base; lapic_write32(LAPIC_LVT_TIMER, la->lvt_timer_base); - if (boot) { - snprintf(buf, sizeof(buf), "cpu%d:timer", PCPU_GET(cpuid)); - intrcnt_add(buf, &la->la_timer_count); - } /* Calibrate the timer parameters using BSP. */ if (boot && IS_BSP()) { @@ -842,6 +842,28 @@ native_lapic_setup(int boot) intr_restore(saveintr); } +static void +native_lapic_intrcnt(void *dummy __unused) +{ + struct pcpu *pc; + struct lapic *la; + char buf[MAXCOMLEN + 1]; + + /* If there are no APICs, skip this function. */ + if (lapics == NULL) + return; + + STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { + la = &lapics[pc->pc_apic_id]; + KASSERT(la->la_present, ("missing APIC structure")); + + snprintf(buf, sizeof(buf), "cpu%d:timer", pc->pc_cpuid); + intrcnt_add(buf, &la->la_timer_count); + } +} +SYSINIT(native_lapic_intrcnt, SI_SUB_INTR, SI_ORDER_MIDDLE, native_lapic_intrcnt, + NULL); + static void native_lapic_reenable_pmc(void) { @@ -1493,7 +1515,7 @@ native_apic_alloc_vector(u_int apic_id, u_int irq) { u_int vector; - KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); + KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); /* * Search for a free vector. Currently we just use a very simple @@ -1501,7 +1523,7 @@ native_apic_alloc_vector(u_int apic_id, u_int irq) */ mtx_lock_spin(&icu_lock); for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { - if (lapics[apic_id].la_ioint_irqs[vector] != -1) + if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) continue; lapics[apic_id].la_ioint_irqs[vector] = irq; mtx_unlock_spin(&icu_lock); @@ -1527,7 +1549,7 @@ native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) KASSERT(align >= count, ("align < count")); #ifdef INVARIANTS for (run = 0; run < count; run++) - KASSERT(irqs[run] < NUM_IO_INTS, ("Invalid IRQ %u at index %u", + KASSERT(irqs[run] < num_io_irqs, ("Invalid IRQ %u at index %u", irqs[run], run)); #endif @@ -1541,7 +1563,7 @@ native_apic_alloc_vectors(u_int apic_id, u_int *irqs, u_int count, u_int align) for (vector = 0; vector < APIC_NUM_IOINTS; vector++) { /* Vector is in use, end run. */ - if (lapics[apic_id].la_ioint_irqs[vector] != -1) { + if (lapics[apic_id].la_ioint_irqs[vector] != IRQ_FREE) { run = 0; first = 0; continue; @@ -1622,7 +1644,7 @@ native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) KASSERT(vector >= APIC_IO_INTS && vector != IDT_SYSCALL && vector <= APIC_IO_INTS + APIC_NUM_IOINTS, ("Vector %u does not map to an IRQ line", vector)); - KASSERT(irq < NUM_IO_INTS, ("Invalid IRQ %u", irq)); + KASSERT(irq < num_io_irqs, ("Invalid IRQ %u", irq)); KASSERT(lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] == irq, ("IRQ mismatch")); #ifdef KDTRACE_HOOKS @@ -1643,7 +1665,7 @@ native_apic_free_vector(u_int apic_id, u_int vector, u_int irq) thread_unlock(td); } mtx_lock_spin(&icu_lock); - lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = -1; + lapics[apic_id].la_ioint_irqs[vector - APIC_IO_INTS] = IRQ_FREE; mtx_unlock_spin(&icu_lock); if (!rebooting) { thread_lock(td); @@ -1694,7 +1716,7 @@ DB_SHOW_COMMAND(apic, db_show_apic) db_printf("Interrupts bound to lapic %u\n", apic_id); for (i = 0; i < APIC_NUM_IOINTS + 1 && !db_pager_quit; i++) { irq = lapics[apic_id].la_ioint_irqs[i]; - if (irq == -1 || irq == IRQ_SYSCALL) + if (irq == IRQ_FREE || irq == IRQ_SYSCALL) continue; #ifdef KDTRACE_HOOKS if (irq == IRQ_DTRACE_RET) @@ -1707,7 +1729,7 @@ DB_SHOW_COMMAND(apic, db_show_apic) db_printf("vec 0x%2x -> ", i + APIC_IO_INTS); if (irq == IRQ_TIMER) db_printf("lapic timer\n"); - else if (irq < NUM_IO_INTS) { + else if (irq < num_io_irqs) { isrc = intr_lookup_source(irq); if (isrc == NULL || verbose == 0) db_printf("IRQ %u\n", irq); @@ -1934,6 +1956,10 @@ apic_setup_io(void *dummy __unused) /* Enable the MSI "pic". */ init_ops.msi_init(); + +#ifdef XENHVM + xen_intr_alloc_irqs(); +#endif } SYSINIT(apic_setup_io, SI_SUB_INTR, SI_ORDER_THIRD, apic_setup_io, NULL); diff --git a/sys/x86/x86/msi.c b/sys/x86/x86/msi.c index 0890ab31f947..1c5502cedde6 100644 --- a/sys/x86/x86/msi.c +++ b/sys/x86/x86/msi.c @@ -120,7 +120,7 @@ struct msi_intsrc { u_int msi_cpu; /* Local APIC ID. (g) */ u_int msi_count:8; /* Messages in this group. (g) */ u_int msi_maxcount:8; /* Alignment for this group. (g) */ - int *msi_irqs; /* Group's IRQ list. (g) */ + u_int *msi_irqs; /* Group's IRQ list. (g) */ u_int msi_remap_cookie; }; @@ -151,6 +151,8 @@ struct pic msi_pic = { .pic_reprogram_pin = NULL, }; +u_int first_msi_irq; + #ifdef SMP /** * Xen hypervisors prior to 4.6.0 do not properly handle updates to @@ -168,7 +170,7 @@ SYSCTL_INT(_machdep, OID_AUTO, disable_msix_migration, CTLFLAG_RDTUN, #endif static int msi_enabled; -static int msi_last_irq; +static u_int msi_last_irq; static struct mtx msi_lock; static void @@ -329,6 +331,10 @@ msi_init(void) } #endif + MPASS(num_io_irqs > 0); + first_msi_irq = max(MINIMUM_MSI_INT, num_io_irqs); + num_io_irqs = first_msi_irq + NUM_MSI_INTS; + msi_enabled = 1; intr_register_pic(&msi_pic); mtx_init(&msi_lock, "msi", NULL, MTX_DEF); @@ -345,7 +351,7 @@ msi_create_source(void) mtx_unlock(&msi_lock); return; } - irq = msi_last_irq + FIRST_MSI_INT; + irq = msi_last_irq + first_msi_irq; msi_last_irq++; mtx_unlock(&msi_lock); @@ -363,8 +369,8 @@ int msi_alloc(device_t dev, int count, int maxcount, int *irqs) { struct msi_intsrc *msi, *fsrc; - u_int cpu, domain; - int cnt, i, *mirqs, vector; + u_int cpu, domain, *mirqs; + int cnt, i, vector; #ifdef ACPI_DMAR u_int cookies[count]; int error; @@ -385,7 +391,7 @@ msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Try to find 'count' free IRQs. */ cnt = 0; - for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (i = first_msi_irq; i < first_msi_irq + NUM_MSI_INTS; i++) { msi = (struct msi_intsrc *)intr_lookup_source(i); /* End of allocated sources, so break. */ @@ -404,7 +410,7 @@ msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Do we need to create some new sources? */ if (cnt < count) { /* If we would exceed the max, give up. */ - if (i + (count - cnt) >= FIRST_MSI_INT + NUM_MSI_INTS) { + if (i + (count - cnt) >= first_msi_irq + NUM_MSI_INTS) { mtx_unlock(&msi_lock); free(mirqs, M_MSI); return (ENXIO); @@ -579,8 +585,8 @@ msi_map(int irq, uint64_t *addr, uint32_t *data) #ifdef ACPI_DMAR if (!msi->msi_msix) { - for (k = msi->msi_count - 1, i = FIRST_MSI_INT; k > 0 && - i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (k = msi->msi_count - 1, i = first_msi_irq; k > 0 && + i < first_msi_irq + NUM_MSI_INTS; i++) { if (i == msi->msi_irq) continue; msi1 = (struct msi_intsrc *)intr_lookup_source(i); @@ -630,7 +636,7 @@ msix_alloc(device_t dev, int *irq) mtx_lock(&msi_lock); /* Find a free IRQ. */ - for (i = FIRST_MSI_INT; i < FIRST_MSI_INT + NUM_MSI_INTS; i++) { + for (i = first_msi_irq; i < first_msi_irq + NUM_MSI_INTS; i++) { msi = (struct msi_intsrc *)intr_lookup_source(i); /* End of allocated sources, so break. */ @@ -645,7 +651,7 @@ msix_alloc(device_t dev, int *irq) /* Do we need to create a new source? */ if (msi == NULL) { /* If we would exceed the max, give up. */ - if (i + 1 >= FIRST_MSI_INT + NUM_MSI_INTS) { + if (i + 1 >= first_msi_irq + NUM_MSI_INTS) { mtx_unlock(&msi_lock); return (ENXIO); } diff --git a/sys/x86/x86/nexus.c b/sys/x86/x86/nexus.c index 9bceff204863..4762d5ae2cfb 100644 --- a/sys/x86/x86/nexus.c +++ b/sys/x86/x86/nexus.c @@ -223,7 +223,7 @@ nexus_init_resources(void) irq_rman.rm_start = 0; irq_rman.rm_type = RMAN_ARRAY; irq_rman.rm_descr = "Interrupt request lines"; - irq_rman.rm_end = NUM_IO_INTS - 1; + irq_rman.rm_end = num_io_irqs - 1; if (rman_init(&irq_rman)) panic("nexus_init_resources irq_rman"); @@ -231,7 +231,7 @@ nexus_init_resources(void) * We search for regions of existing IRQs and add those to the IRQ * resource manager. */ - for (irq = 0; irq < NUM_IO_INTS; irq++) + for (irq = 0; irq < num_io_irqs; irq++) if (intr_lookup_source(irq) != NULL) if (rman_manage_region(&irq_rman, irq, irq) != 0) panic("nexus_init_resources irq_rman add"); diff --git a/sys/x86/xen/xen_intr.c b/sys/x86/xen/xen_intr.c index f823b9303f62..559f3192255a 100644 --- a/sys/x86/xen/xen_intr.c +++ b/sys/x86/xen/xen_intr.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -72,6 +73,8 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_XENINTR, "xen_intr", "Xen Interrupt Services"); +static u_int first_evtchn_irq; + /** * Per-cpu event channel processing state. */ @@ -187,7 +190,7 @@ struct pic xen_intr_pirq_pic = { }; static struct mtx xen_intr_isrc_lock; -static int xen_intr_auto_vector_count; +static u_int xen_intr_auto_vector_count; static struct xenisrc *xen_intr_port_to_isrc[NR_EVENT_CHANNELS]; static u_long *xen_intr_pirq_eoi_map; static boolean_t xen_intr_pirq_eoi_map_enabled; @@ -276,7 +279,7 @@ xen_intr_find_unused_isrc(enum evtchn_type type) struct xenisrc *isrc; u_int vector; - vector = FIRST_EVTCHN_INT + isrc_idx; + vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL && isrc->xi_type == EVTCHN_TYPE_UNBOUND) { @@ -314,7 +317,7 @@ xen_intr_alloc_isrc(enum evtchn_type type, int vector) } if (type != EVTCHN_TYPE_PIRQ) { - vector = FIRST_EVTCHN_INT + xen_intr_auto_vector_count; + vector = first_evtchn_irq + xen_intr_auto_vector_count; xen_intr_auto_vector_count++; } @@ -473,8 +476,8 @@ xen_intr_isrc(xen_intr_handle_t handle) return (NULL); vector = *(int *)handle; - KASSERT(vector >= FIRST_EVTCHN_INT && - vector < (FIRST_EVTCHN_INT + xen_intr_auto_vector_count), + KASSERT(vector >= first_evtchn_irq && + vector < (first_evtchn_irq + xen_intr_auto_vector_count), ("Xen interrupt vector is out of range")); return ((struct xenisrc *)intr_lookup_source(vector)); @@ -631,17 +634,13 @@ xen_intr_init(void *dummy __unused) mtx_init(&xen_intr_isrc_lock, "xen-irq-lock", NULL, MTX_DEF); /* - * Register interrupt count manually as we aren't - * guaranteed to see a call to xen_intr_assign_cpu() - * before our first interrupt. Also set the per-cpu - * mask of CPU#0 to enable all, since by default - * all event channels are bound to CPU#0. + * Set the per-cpu mask of CPU#0 to enable all, since by default all + * event channels are bound to CPU#0. */ CPU_FOREACH(i) { pcpu = DPCPU_ID_PTR(i, xen_intr_pcpu); memset(pcpu->evtchn_enabled, i == 0 ? ~0 : 0, sizeof(pcpu->evtchn_enabled)); - xen_intr_intrcnt_add(i); } for (i = 0; i < nitems(s->evtchn_mask); i++) @@ -666,6 +665,31 @@ xen_intr_init(void *dummy __unused) } SYSINIT(xen_intr_init, SI_SUB_INTR, SI_ORDER_SECOND, xen_intr_init, NULL); +static void +xen_intrcnt_init(void *dummy __unused) +{ + unsigned int i; + + if (!xen_domain()) + return; + + /* + * Register interrupt count manually as we aren't guaranteed to see a + * call to xen_intr_assign_cpu() before our first interrupt. + */ + CPU_FOREACH(i) + xen_intr_intrcnt_add(i); +} +SYSINIT(xen_intrcnt_init, SI_SUB_INTR, SI_ORDER_MIDDLE, xen_intrcnt_init, NULL); + +void +xen_intr_alloc_irqs(void) +{ + + first_evtchn_irq = num_io_irqs; + num_io_irqs += NR_EVENT_CHANNELS; +} + /*--------------------------- Common PIC Functions ---------------------------*/ /** * Prepare this PIC for system suspension. @@ -768,7 +792,7 @@ xen_intr_resume(struct pic *unused, bool suspend_cancelled) for (isrc_idx = 0; isrc_idx < xen_intr_auto_vector_count; isrc_idx++) { u_int vector; - vector = FIRST_EVTCHN_INT + isrc_idx; + vector = first_evtchn_irq + isrc_idx; isrc = (struct xenisrc *)intr_lookup_source(vector); if (isrc != NULL) { isrc->xi_port = 0; @@ -872,7 +896,6 @@ xen_intr_assign_cpu(struct intsrc *base_isrc, u_int apic_id) to_cpu = apic_cpuid(apic_id); vcpu_id = pcpu_find(to_cpu)->pc_vcpu_id; - xen_intr_intrcnt_add(to_cpu); mtx_lock(&xen_intr_isrc_lock); isrc = (struct xenisrc *)base_isrc; @@ -1273,9 +1296,6 @@ xen_intr_bind_virq(device_t dev, u_int virq, u_int cpu, struct evtchn_bind_virq bind_virq = { .virq = virq, .vcpu = vcpu_id }; int error; - /* Ensure the target CPU is ready to handle evtchn interrupts. */ - xen_intr_intrcnt_add(cpu); - isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_virq, &bind_virq); if (error != 0) { @@ -1338,9 +1358,6 @@ xen_intr_alloc_and_bind_ipi(u_int cpu, driver_filter_t filter, char name[MAXCOMLEN + 1]; int error; - /* Ensure the target CPU is ready to handle evtchn interrupts. */ - xen_intr_intrcnt_add(cpu); - isrc = NULL; error = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, &bind_ipi); if (error != 0) { diff --git a/sys/x86/xen/xen_msi.c b/sys/x86/xen/xen_msi.c index 0f678b164344..0d2544d293c9 100644 --- a/sys/x86/xen/xen_msi.c +++ b/sys/x86/xen/xen_msi.c @@ -44,16 +44,21 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include static struct mtx msi_lock; -static int msi_last_irq; +static u_int msi_last_irq; void xen_msi_init(void) { + MPASS(num_io_irqs > 0); + first_msi_irq = min(MINIMUM_MSI_INT, num_io_irqs); + num_io_irqs = first_msi_irq + NUM_MSI_INTS; + mtx_init(&msi_lock, "msi", NULL, MTX_DEF); } @@ -75,7 +80,7 @@ xen_msi_alloc(device_t dev, int count, int maxcount, int *irqs) /* Allocate MSI vectors */ for (i = 0; i < count; i++) - irqs[i] = FIRST_MSI_INT + msi_last_irq++; + irqs[i] = first_msi_irq + msi_last_irq++; mtx_unlock(&msi_lock); diff --git a/sys/x86/xen/xen_nexus.c b/sys/x86/xen/xen_nexus.c index 73506fc955f0..65d4281a1426 100644 --- a/sys/x86/xen/xen_nexus.c +++ b/sys/x86/xen/xen_nexus.c @@ -99,7 +99,7 @@ nexus_xen_config_intr(device_t dev, int irq, enum intr_trigger trig, * ISA and PCI intline IRQs are not preregistered on Xen, so * intercept calls to configure those and register them on the fly. */ - if ((irq < FIRST_MSI_INT) && (intr_lookup_source(irq) == NULL)) { + if ((irq < first_msi_irq) && (intr_lookup_source(irq) == NULL)) { ret = xen_register_pirq(irq, trig, pol); if (ret != 0) return (ret); diff --git a/usr.bin/vmstat/vmstat.c b/usr.bin/vmstat/vmstat.c index 75f012f293e1..01b258bf668e 100644 --- a/usr.bin/vmstat/vmstat.c +++ b/usr.bin/vmstat/vmstat.c @@ -86,7 +86,7 @@ __FBSDID("$FreeBSD$"); static char da[] = "da"; enum x_stats { X_SUM, X_HZ, X_STATHZ, X_NCHSTATS, X_INTRNAMES, X_SINTRNAMES, - X_INTRCNT, X_SINTRCNT }; + X_INTRCNT, X_SINTRCNT, X_NINTRCNT }; static struct nlist namelist[] = { [X_SUM] = { .n_name = "_vm_cnt", }, @@ -97,6 +97,7 @@ static struct nlist namelist[] = { [X_SINTRNAMES] = { .n_name = "_sintrnames", }, [X_INTRCNT] = { .n_name = "_intrcnt", }, [X_SINTRCNT] = { .n_name = "_sintrcnt", }, + [X_NINTRCNT] = { .n_name = "_nintrcnt", }, { .n_name = NULL, }, }; @@ -196,6 +197,7 @@ static void domemstat_malloc(void); static void domemstat_zone(void); static void kread(int, void *, size_t); static void kreado(int, void *, size_t, size_t); +static void kreadptr(uintptr_t, void *, size_t); static void needhdr(int); static void needresize(int); static void doresize(void); @@ -318,6 +320,13 @@ main(int argc, char *argv[]) goto retry_nlist; } + /* + * 'nintrcnt' doesn't exist in older kernels, but + * that isn't fatal. + */ + if (namelist[X_NINTRCNT].n_type == 0 && c == 1) + goto nlist_ok; + for (c = 0; c < (int)(nitems(namelist)); c++) if (namelist[c].n_type == 0) bufsize += strlen(namelist[c].n_name) @@ -341,6 +350,7 @@ main(int argc, char *argv[]) xo_finish(); exit(1); } +nlist_ok: if (kd && Pflag) xo_errx(1, "Cannot use -P with crash dumps"); @@ -1232,12 +1242,18 @@ static unsigned int read_intrcnts(unsigned long **intrcnts) { size_t intrcntlen; + uintptr_t kaddr; if (kd != NULL) { kread(X_SINTRCNT, &intrcntlen, sizeof(intrcntlen)); if ((*intrcnts = malloc(intrcntlen)) == NULL) err(1, "malloc()"); - kread(X_INTRCNT, *intrcnts, intrcntlen); + if (namelist[X_NINTRCNT].n_type == 0) + kread(X_INTRCNT, *intrcnts, intrcntlen); + else { + kread(X_INTRCNT, &kaddr, sizeof(kaddr)); + kreadptr(kaddr, *intrcnts, intrcntlen); + } } else { for (*intrcnts = NULL, intrcntlen = 1024; ; intrcntlen *= 2) { *intrcnts = reallocf(*intrcnts, intrcntlen); @@ -1294,6 +1310,7 @@ dointr(unsigned int interval, int reps) char *intrname, *intrnames; long long period_ms, old_uptime, uptime; size_t clen, inamlen, istrnamlen; + uintptr_t kaddr; unsigned int nintr; old_intrcnts = NULL; @@ -1304,7 +1321,12 @@ dointr(unsigned int interval, int reps) kread(X_SINTRNAMES, &inamlen, sizeof(inamlen)); if ((intrnames = malloc(inamlen)) == NULL) xo_err(1, "malloc()"); - kread(X_INTRNAMES, intrnames, inamlen); + if (namelist[X_NINTRCNT].n_type == 0) + kread(X_INTRNAMES, intrnames, inamlen); + else { + kread(X_INTRNAMES, &kaddr, sizeof(kaddr)); + kreadptr(kaddr, intrnames, inamlen); + } } else { for (intrnames = NULL, inamlen = 1024; ; inamlen *= 2) { if ((intrnames = reallocf(intrnames, inamlen)) == NULL) @@ -1646,6 +1668,14 @@ kread(int nlx, void *addr, size_t size) kreado(nlx, addr, size, 0); } +static void +kreadptr(uintptr_t addr, void *buf, size_t size) +{ + + if ((size_t)kvm_read(kd, addr, buf, size) != size) + xo_errx(1, "%s", kvm_geterr(kd)); +} + static void __dead2 usage(void) {