From 215e7c161a62b94575223c79f33e1e1369f34e86 Mon Sep 17 00:00:00 2001
From: John Baldwin <jhb@FreeBSD.org>
Date: Tue, 28 Feb 2006 22:24:55 +0000
Subject: [PATCH] Rework how we wire up interrupt sources to CPUs: - Throw out
 all of the logical APIC ID stuff.  The Intel docs are somewhat   ambiguous,
 but it seems that the "flat" cluster model we are currently   using is only
 supported on Pentium and P6 family CPUs.  The other   "hierarchy" cluster
 model that is supported on all Intel CPUs with   local APICs is severely
 underdocumented.  For example, it's not clear   if the OS needs to glean the
 topology of the APIC hierarchy from   somewhere (neither ACPI nor MP Table
 include it) and setup the logical   clusters based on the physical hierarchy
 or not.  Not only that, but on   certain Intel chipsets, even though there
 were 4 CPUs in a logical   cluster, all the interrupts were only sent to one
 CPU anyway. - We now bind interrupts to individual CPUs using physical
 addressing via   the local APIC IDs.  This code has also moved out of the
 ioapic PIC   driver and into the common interrupt source code so that it can
 be   shared with MSI interrupt sources since MSI is addressed to APICs the  
 same way that I/O APIC pins are. - Interrupt source classes grow a new method
 pic_assign_cpu() to bind an   interrupt source to a specific local APIC ID. -
 The SMP code now tells the interrupt code which CPUs are avaiable to   handle
 interrupts in a simpler and more intuitive manner.  For one thing,   it means
 we could now choose to not route interrupts to HT cores if we   wanted to
 (this code is currently in place in fact, but under an #if 0   for now). -
 For now we simply do static round-robin of IRQs to CPUs when the first  
 interrupt handler just as before, with the change that IRQs are now   bound
 to individual CPUs rather than groups of up to 4 CPUs. - Because the IRQ to
 CPU mapping has now been moved up a layer, it would   be easier to manage
 this mapping from higher levels.  For example, we   could allow drivers to
 specify a CPU affinity map for their interrupts,   or we could allow a
 userland tool to bind IRQs to specific CPUs.

The MFC is tentative, but I want to see if this fixes problems some folks
had with UP APIC kernels on 6.0 on SMP machines (an SMP kernel would work
fine, but a UP APIC kernel (such as GENERIC in RELENG_6) would lose
interrupts).

MFC after:	1 week
---
 sys/amd64/amd64/intr_machdep.c   |  92 ++++++++++++++++++++++++-
 sys/amd64/amd64/io_apic.c        | 115 ++++++-------------------------
 sys/amd64/amd64/local_apic.c     |  16 +----
 sys/amd64/amd64/mp_machdep.c     |  40 +++++------
 sys/amd64/include/apicvar.h      |   1 -
 sys/amd64/include/intr_machdep.h |   7 ++
 sys/amd64/isa/atpic.c            |  16 ++++-
 sys/i386/i386/intr_machdep.c     |  92 ++++++++++++++++++++++++-
 sys/i386/i386/io_apic.c          | 115 ++++++-------------------------
 sys/i386/i386/local_apic.c       |  16 +----
 sys/i386/i386/mp_machdep.c       |  40 +++++------
 sys/i386/include/apicvar.h       |   1 -
 sys/i386/include/intr_machdep.h  |   7 ++
 sys/i386/isa/atpic.c             |  16 ++++-
 14 files changed, 304 insertions(+), 270 deletions(-)

diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index 7338bae31ea3..9d70d06c211c 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -63,6 +63,12 @@ static int intrcnt_index;
 static struct intsrc *interrupt_sources[NUM_IO_INTS];
 static struct mtx intr_table_lock;
 
+#ifdef SMP
+static int assign_cpu;
+
+static void	intr_assign_next_cpu(struct intsrc *isrc);
+#endif
+
 static void	intr_init(void *__dummy);
 static void	intrcnt_setname(const char *name, int index);
 static void	intrcnt_updatename(struct intsrc *is);
@@ -93,6 +99,7 @@ intr_register_source(struct intsrc *isrc)
 	}
 	intrcnt_register(isrc);
 	interrupt_sources[vector] = isrc;
+	isrc->is_enabled = 0;
 	mtx_unlock_spin(&intr_table_lock);
 	return (0);
 }
@@ -118,7 +125,17 @@ intr_add_handler(const char *name, int vector, driver_intr_t handler,
 	    intr_priority(flags), flags, cookiep);
 	if (error == 0) {
 		intrcnt_updatename(isrc);
-		isrc->is_pic->pic_enable_intr(isrc);
+		mtx_lock_spin(&intr_table_lock);
+		if (!isrc->is_enabled) {
+			isrc->is_enabled = 1;
+#ifdef SMP
+			if (assign_cpu)
+				intr_assign_next_cpu(isrc);
+#endif
+			mtx_unlock_spin(&intr_table_lock);
+			isrc->is_pic->pic_enable_intr(isrc);
+		} else
+			mtx_unlock_spin(&intr_table_lock);
 		isrc->is_pic->pic_enable_source(isrc);
 	}
 	return (error);
@@ -335,3 +352,76 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
 			db_dump_intr_event((*isrc)->is_event, verbose);
 }
 #endif
+
+#ifdef SMP
+/*
+ * Support for balancing interrupt sources across CPUs.  For now we just
+ * allocate CPUs round-robin.
+ */
+
+static u_int cpu_apic_ids[MAXCPU];
+static int current_cpu, num_cpus;
+
+static void
+intr_assign_next_cpu(struct intsrc *isrc)
+{
+	struct pic *pic;
+	u_int apic_id;
+
+	/*
+	 * Assign this source to a local APIC in a round-robin fashion.
+	 */
+	pic = isrc->is_pic;
+	apic_id = cpu_apic_ids[current_cpu];
+	current_cpu++;
+	if (current_cpu >= num_cpus)
+		current_cpu = 0;
+	if (bootverbose) {
+		printf("INTR: Assigning IRQ %d", pic->pic_vector(isrc));
+		printf(" to local APIC %u\n", apic_id);
+	}
+	pic->pic_assign_cpu(isrc, apic_id);
+}
+
+/*
+ * Add a local APIC ID to our list of valid local APIC IDs that can
+ * be destinations of interrupts.
+ */
+void
+intr_add_cpu(u_int apic_id)
+{
+
+	if (bootverbose)
+		printf("INTR: Adding local APIC %d as a target\n", apic_id);
+	if (num_cpus >= MAXCPU)
+		panic("WARNING: Local APIC IDs exhausted!");
+	cpu_apic_ids[num_cpus] = apic_id;
+	num_cpus++;
+}
+
+/*
+ * Distribute all the interrupt sources among the available CPUs once the
+ * AP's have been launched.
+ */
+static void
+intr_shuffle_irqs(void *arg __unused)
+{
+	struct intsrc *isrc;
+	int i;
+
+	/* Don't bother on UP. */
+	if (num_cpus <= 1)
+		return;
+
+	/* Round-robin assign each enabled source a CPU. */
+	mtx_lock_spin(&intr_table_lock);
+	assign_cpu = 1;
+	for (i = 0; i < NUM_IO_INTS; i++) {
+		isrc = interrupt_sources[i];
+		if (isrc != NULL && isrc->is_enabled)
+			intr_assign_next_cpu(isrc);
+	}
+	mtx_unlock_spin(&intr_table_lock);
+}
+SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL)
+#endif
diff --git a/sys/amd64/amd64/io_apic.c b/sys/amd64/amd64/io_apic.c
index 46c6cd35a9c3..6105986994c2 100644
--- a/sys/amd64/amd64/io_apic.c
+++ b/sys/amd64/amd64/io_apic.c
@@ -61,8 +61,6 @@ __FBSDID("$FreeBSD$");
 #define	IRQ_SMI			(NUM_IO_INTS + 3)
 #define	IRQ_DISABLED		(NUM_IO_INTS + 4)
 
-#define	DEST_NONE		-1
-
 #define	TODO		printf("%s: not implemented!\n", __func__)
 
 static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
@@ -82,10 +80,10 @@ struct ioapic_intsrc {
 	u_int io_irq;
 	u_int io_intpin:8;
 	u_int io_vector:8;
+	u_int io_cpu:8;
 	u_int io_activehi:1;
 	u_int io_edgetrigger:1;
 	u_int io_masked:1;
-	int io_dest:5;
 	int io_bus:4;
 };
 
@@ -114,7 +112,7 @@ static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static void	ioapic_suspend(struct intsrc *isrc);
 static void	ioapic_resume(struct intsrc *isrc);
-static void	ioapic_program_destination(struct ioapic_intsrc *intpin);
+static void	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
 static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
@@ -122,10 +120,10 @@ struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
 			       ioapic_eoi_source, ioapic_enable_intr,
 			       ioapic_vector, ioapic_source_pending,
 			       ioapic_suspend, ioapic_resume,
-			       ioapic_config_intr };
-	
-static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
-static u_int next_id, program_logical_dest;
+			       ioapic_config_intr, ioapic_assign_cpu };
+
+static int next_ioapic_base;
+static u_int next_id;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 static int enable_extint;
@@ -273,14 +271,8 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	}
 
 	/* Set the destination. */
-	if (intpin->io_dest == DEST_NONE) {
-		low = IOART_DESTPHY;
-		high = bsp_id << APIC_ID_SHIFT;
-	} else {
-		low = IOART_DESTLOG;
-		high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
-		    APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
-	}
+	low = IOART_DESTPHY;
+	high = intpin->io_cpu << APIC_ID_SHIFT;
 
 	/* Program the rest of the low word. */
 	if (intpin->io_edgetrigger)
@@ -312,7 +304,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	default:
 		KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u",
 		    intpin->io_irq));
-		low |= IOART_DELLOPRI | intpin->io_vector;
+		low |= IOART_DELFIXED | intpin->io_vector;
 	}
 
 	/* Write the values to the APIC. */
@@ -325,60 +317,31 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	mtx_unlock_spin(&icu_lock);
 }
 
-/*
- * Program an individual intpin's logical destination.
- */
 static void
-ioapic_program_destination(struct ioapic_intsrc *intpin)
+ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
-	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	KASSERT(intpin->io_dest != DEST_NONE,
-	    ("intpin not assigned to a cluster"));
+	intpin->io_cpu = apic_id;
 	if (bootverbose) {
-		printf("ioapic%u: routing intpin %u (", io->io_id,
-		    intpin->io_intpin);
+		printf("ioapic%u: Assigning ", io->io_id);
 		ioapic_print_irq(intpin);
-		printf(") to cluster %u\n", intpin->io_dest);
+		printf(" to local APIC %u\n", intpin->io_cpu);
 	}
 	ioapic_program_intpin(intpin);
 }
 
-static void
-ioapic_assign_cluster(struct ioapic_intsrc *intpin)
-{
-
-	/*
-	 * Assign this intpin to a logical APIC cluster in a
-	 * round-robin fashion.  We don't actually use the logical
-	 * destination for this intpin until after all the CPU's
-	 * have been started so that we don't end up with interrupts
-	 * that don't go anywhere.  Another alternative might be to
-	 * start up the CPU's earlier so that they can handle interrupts
-	 * sooner.
-	 */
-	intpin->io_dest = current_cluster;
-	current_cluster++;
-	if (current_cluster >= logical_clusters)
-		current_cluster = 0;
-	if (program_logical_dest)
-		ioapic_program_destination(intpin);
-}
-
 static void
 ioapic_enable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	if (intpin->io_dest == DEST_NONE) {
+	if (intpin->io_vector == 0) {
 		/*
 		 * Allocate an APIC vector for this interrupt pin.  Once
-		 * we have a vector we program the interrupt pin.  Note
-		 * that after we have booted ioapic_assign_cluster()
-		 * will program the interrupt pin again, but it doesn't
-		 * hurt to do that and trying to avoid that adds needless
-		 * complication.
+		 * we have a vector we program the interrupt pin.
 		 */
 		intpin->io_vector = apic_alloc_vector(intpin->io_irq);
 		if (bootverbose) {
@@ -388,7 +351,6 @@ ioapic_enable_intr(struct intsrc *isrc)
 			printf(") to vector %u\n", intpin->io_vector);
 		}
 		ioapic_program_intpin(intpin);
-		ioapic_assign_cluster(intpin);
 		apic_enable_vector(intpin->io_vector);
 	}
 }
@@ -468,22 +430,6 @@ ioapic_resume(struct intsrc *isrc)
 	ioapic_program_intpin((struct ioapic_intsrc *)isrc);
 }
 
-/*
- * Allocate and return a logical cluster ID.  Note that the first time
- * this is called, it returns cluster 0.  ioapic_enable_intr() treats
- * the two cases of logical_clusters == 0 and logical_clusters == 1 the
- * same: one cluster of ID 0 exists.  The logical_clusters == 0 case is
- * for UP kernels, which should never call this function.
- */
-int
-ioapic_next_logical_cluster(void)
-{
-
-	if (logical_clusters >= APIC_MAX_CLUSTER)
-		panic("WARNING: Local APIC cluster IDs exhausted!");
-	return (logical_clusters++);
-}
-
 /*
  * Create a plain I/O APIC object.
  */
@@ -568,11 +514,10 @@ ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
 		}
 
 		/*
-		 * Route interrupts to the BSP by default using physical
-		 * addressing.  Vectored interrupts get readdressed using
-		 * logical IDs to CPU clusters when they are enabled.
+		 * Route interrupts to the BSP by default.  Interrupts may
+		 * be routed to other CPUs later after they are enabled.
 		 */
-		intpin->io_dest = DEST_NONE;
+		intpin->io_cpu = PCPU_GET(apic_id);
 		if (bootverbose && intpin->io_irq != IRQ_DISABLED) {
 			printf("ioapic%u: intpin %d -> ",  io->io_id, i);
 			ioapic_print_irq(intpin);
@@ -778,29 +723,9 @@ ioapic_register(void *cookie)
 	printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
 	    io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
 	    io->io_intbase + io->io_numintr - 1);
-	bsp_id = PCPU_GET(apic_id);
 
 	/* Register valid pins as interrupt sources. */
 	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
 		if (pin->io_irq < NUM_IO_INTS)
 			intr_register_source(&pin->io_intsrc);
 }
-
-/*
- * Program all the intpins to use logical destinations once the AP's
- * have been launched.
- */
-static void
-ioapic_set_logical_destinations(void *arg __unused)
-{
-	struct ioapic *io;
-	int i;
-
-	program_logical_dest = 1;
-	STAILQ_FOREACH(io, &ioapic_list, io_next)
-	    for (i = 0; i < io->io_numintr; i++)
-		    if (io->io_pins[i].io_dest != DEST_NONE)
-			    ioapic_program_destination(&io->io_pins[i]);
-}
-SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
-    ioapic_set_logical_destinations, NULL)
diff --git a/sys/amd64/amd64/local_apic.c b/sys/amd64/amd64/local_apic.c
index a2409b6320fb..2e6a22041661 100644
--- a/sys/amd64/amd64/local_apic.c
+++ b/sys/amd64/amd64/local_apic.c
@@ -217,6 +217,7 @@ lapic_init(uintptr_t addr)
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
+	intr_add_cpu(PCPU_GET(apic_id));
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYSIGT, SEL_KPL, 0);
@@ -279,7 +280,7 @@ void
 lapic_setup(void)
 {
 	struct lapic *la;
-	u_int32_t value, maxlvt;
+	u_int32_t maxlvt;
 	register_t eflags;
 	char buf[MAXCOMLEN + 1];
 
@@ -291,19 +292,6 @@ lapic_setup(void)
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
-	/* Use the cluster model for logical IDs. */
-	value = lapic->dfr;
-	value &= ~APIC_DFR_MODEL_MASK;
-	value |= APIC_DFR_MODEL_CLUSTER;
-	lapic->dfr = value;
-
-	/* Set this APIC's logical ID. */
-	value = lapic->ldr;
-	value &= ~APIC_ID_MASK;
-	value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
-	    1 << la->la_cluster_id) << APIC_ID_SHIFT;
-	lapic->ldr = value;
-
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index a152bc120a23..9e6775130d3a 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -160,7 +160,7 @@ static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 
-static void	set_logical_apic_ids(void);
+static void	set_interrupt_apic_ids(void);
 static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
@@ -405,7 +405,7 @@ cpu_mp_start(void)
 			hyperthreading_cpus = logical_cpus;
 	}
 
-	set_logical_apic_ids();
+	set_interrupt_apic_ids();
 }
 
 
@@ -596,33 +596,29 @@ init_secondary(void)
  */
 
 /*
- * Set the APIC logical IDs.
- *
- * We want to cluster logical CPU's within the same APIC ID cluster.
- * Since logical CPU's are aligned simply filling in the clusters in
- * APIC ID order works fine.  Note that this does not try to balance
- * the number of CPU's in each cluster. (XXX?)
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts.  If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
-set_logical_apic_ids(void)
+set_interrupt_apic_ids(void)
 {
-	u_int apic_id, cluster, cluster_id;
+	u_int apic_id;
 
-	/* Force us to allocate cluster 0 at the start. */
-	cluster = -1;
-	cluster_id = APIC_MAX_INTRACLUSTER_ID;
 	for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
 		if (!cpu_info[apic_id].cpu_present)
 			continue;
-		if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
-			cluster = ioapic_next_logical_cluster();
-			cluster_id = 0;
-		} else
-			cluster_id++;
-		if (bootverbose)
-			printf("APIC ID: physical %u, logical %u:%u\n",
-			    apic_id, cluster, cluster_id);
-		lapic_set_logical_id(apic_id, cluster, cluster_id);
+		if (cpu_info[apic_id].cpu_bsp)
+			continue;
+#if 0
+		/* Don't let hyperthreads service interrupts. */
+		if (hyperthreading_cpus > 1 &&
+		    apic_id % hyperthreading_cpus != 0)
+			continue;
+#endif
+		intr_add_cpu(apic_id);
 	}
 }
 
diff --git a/sys/amd64/include/apicvar.h b/sys/amd64/include/apicvar.h
index 72603097ae74..c87dc7ee6665 100644
--- a/sys/amd64/include/apicvar.h
+++ b/sys/amd64/include/apicvar.h
@@ -181,7 +181,6 @@ void	apic_register_enumerator(struct apic_enumerator *enumerator);
 void	*ioapic_create(uintptr_t addr, int32_t id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
-int	ioapic_next_logical_cluster(void);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
diff --git a/sys/amd64/include/intr_machdep.h b/sys/amd64/include/intr_machdep.h
index 4d5743a4aa2b..0d187c18e664 100644
--- a/sys/amd64/include/intr_machdep.h
+++ b/sys/amd64/include/intr_machdep.h
@@ -85,6 +85,7 @@ struct pic {
 	void (*pic_resume)(struct intsrc *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
+	void (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 };
 
 /* Flags for pic_disable_source() */
@@ -105,6 +106,7 @@ struct intsrc {
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
+	u_int is_enabled:1;
 };
 
 struct trapframe;
@@ -117,6 +119,11 @@ int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
+void	intr_add_cpu(u_int apic_id);
+#else
+#define	intr_add_cpu(apic_id)
+#endif
 int	intr_add_handler(const char *name, int vector, driver_intr_t handler,
     void *arg, enum intr_type flags, void **cookiep);
 int	intr_config_intr(int vector, enum intr_trigger trig,
diff --git a/sys/amd64/isa/atpic.c b/sys/amd64/isa/atpic.c
index 906eddaccea0..1398e47f7be8 100644
--- a/sys/amd64/isa/atpic.c
+++ b/sys/amd64/isa/atpic.c
@@ -108,8 +108,8 @@ inthand_t
 #define	ATPIC(io, base, eoi, imenptr)					\
      	{ { atpic_enable_source, atpic_disable_source, (eoi),		\
 	    atpic_enable_intr, atpic_vector, atpic_source_pending, NULL, \
-	    atpic_resume, atpic_config_intr }, (io), (base),		\
-	    IDT_IO_INTS + (base), (imenptr) }
+	    atpic_resume, atpic_config_intr, atpic_assign_cpu }, (io),  \
+	    (base), IDT_IO_INTS + (base), (imenptr) }
 
 #define	INTSRC(irq)							\
 	{ { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ),	\
@@ -142,6 +142,7 @@ static void atpic_resume(struct intsrc *isrc);
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
+static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void i8259_init(struct atpic *pic, int slave);
 
 static struct atpic atpics[] = {
@@ -352,6 +353,17 @@ atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 	return (0);
 }
 
+static void
+atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+
+	/*
+	 * 8259A's are only used in UP in which case all interrupts always
+	 * go to the sole CPU and this function shouldn't even be called.
+	 */
+	panic("%s: bad cookie", __func__);
+}
+
 static void
 i8259_init(struct atpic *pic, int slave)
 {
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index b343780a62d9..f163ff92630e 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -63,6 +63,12 @@ static int intrcnt_index;
 static struct intsrc *interrupt_sources[NUM_IO_INTS];
 static struct mtx intr_table_lock;
 
+#ifdef SMP
+static int assign_cpu;
+
+static void	intr_assign_next_cpu(struct intsrc *isrc);
+#endif
+
 static void	intr_init(void *__dummy);
 static void	intrcnt_setname(const char *name, int index);
 static void	intrcnt_updatename(struct intsrc *is);
@@ -93,6 +99,7 @@ intr_register_source(struct intsrc *isrc)
 	}
 	intrcnt_register(isrc);
 	interrupt_sources[vector] = isrc;
+	isrc->is_enabled = 0;
 	mtx_unlock_spin(&intr_table_lock);
 	return (0);
 }
@@ -118,7 +125,17 @@ intr_add_handler(const char *name, int vector, driver_intr_t handler,
 	    intr_priority(flags), flags, cookiep);
 	if (error == 0) {
 		intrcnt_updatename(isrc);
-		isrc->is_pic->pic_enable_intr(isrc);
+		mtx_lock_spin(&intr_table_lock);
+		if (!isrc->is_enabled) {
+			isrc->is_enabled = 1;
+#ifdef SMP
+			if (assign_cpu)
+				intr_assign_next_cpu(isrc);
+#endif
+			mtx_unlock_spin(&intr_table_lock);
+			isrc->is_pic->pic_enable_intr(isrc);
+		} else
+			mtx_unlock_spin(&intr_table_lock);
 		isrc->is_pic->pic_enable_source(isrc);
 	}
 	return (error);
@@ -335,3 +352,76 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
 			db_dump_intr_event((*isrc)->is_event, verbose);
 }
 #endif
+
+#ifdef SMP
+/*
+ * Support for balancing interrupt sources across CPUs.  For now we just
+ * allocate CPUs round-robin.
+ */
+
+static u_int cpu_apic_ids[MAXCPU];
+static int current_cpu, num_cpus;
+
+static void
+intr_assign_next_cpu(struct intsrc *isrc)
+{
+	struct pic *pic;
+	u_int apic_id;
+
+	/*
+	 * Assign this source to a local APIC in a round-robin fashion.
+	 */
+	pic = isrc->is_pic;
+	apic_id = cpu_apic_ids[current_cpu];
+	current_cpu++;
+	if (current_cpu >= num_cpus)
+		current_cpu = 0;
+	if (bootverbose) {
+		printf("INTR: Assigning IRQ %d", pic->pic_vector(isrc));
+		printf(" to local APIC %u\n", apic_id);
+	}
+	pic->pic_assign_cpu(isrc, apic_id);
+}
+
+/*
+ * Add a local APIC ID to our list of valid local APIC IDs that can
+ * be destinations of interrupts.
+ */
+void
+intr_add_cpu(u_int apic_id)
+{
+
+	if (bootverbose)
+		printf("INTR: Adding local APIC %d as a target\n", apic_id);
+	if (num_cpus >= MAXCPU)
+		panic("WARNING: Local APIC IDs exhausted!");
+	cpu_apic_ids[num_cpus] = apic_id;
+	num_cpus++;
+}
+
+/*
+ * Distribute all the interrupt sources among the available CPUs once the
+ * AP's have been launched.
+ */
+static void
+intr_shuffle_irqs(void *arg __unused)
+{
+	struct intsrc *isrc;
+	int i;
+
+	/* Don't bother on UP. */
+	if (num_cpus <= 1)
+		return;
+
+	/* Round-robin assign each enabled source a CPU. */
+	mtx_lock_spin(&intr_table_lock);
+	assign_cpu = 1;
+	for (i = 0; i < NUM_IO_INTS; i++) {
+		isrc = interrupt_sources[i];
+		if (isrc != NULL && isrc->is_enabled)
+			intr_assign_next_cpu(isrc);
+	}
+	mtx_unlock_spin(&intr_table_lock);
+}
+SYSINIT(intr_shuffle_irqs, SI_SUB_SMP, SI_ORDER_SECOND, intr_shuffle_irqs, NULL)
+#endif
diff --git a/sys/i386/i386/io_apic.c b/sys/i386/i386/io_apic.c
index b9999e23832d..78135bab8ce9 100644
--- a/sys/i386/i386/io_apic.c
+++ b/sys/i386/i386/io_apic.c
@@ -60,8 +60,6 @@ __FBSDID("$FreeBSD$");
 #define	IRQ_SMI			(NUM_IO_INTS + 3)
 #define	IRQ_DISABLED		(NUM_IO_INTS + 4)
 
-#define	DEST_NONE		-1
-
 #define	TODO		printf("%s: not implemented!\n", __func__)
 
 static MALLOC_DEFINE(M_IOAPIC, "io_apic", "I/O APIC structures");
@@ -85,10 +83,10 @@ struct ioapic_intsrc {
 	u_int io_irq;
 	u_int io_intpin:8;
 	u_int io_vector:8;
+	u_int io_cpu:8;
 	u_int io_activehi:1;
 	u_int io_edgetrigger:1;
 	u_int io_masked:1;
-	int io_dest:5;
 	int io_bus:4;
 };
 
@@ -117,7 +115,7 @@ static int	ioapic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 		    enum intr_polarity pol);
 static void	ioapic_suspend(struct intsrc *isrc);
 static void	ioapic_resume(struct intsrc *isrc);
-static void	ioapic_program_destination(struct ioapic_intsrc *intpin);
+static void	ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void	ioapic_program_intpin(struct ioapic_intsrc *intpin);
 
 static STAILQ_HEAD(,ioapic) ioapic_list = STAILQ_HEAD_INITIALIZER(ioapic_list);
@@ -125,10 +123,10 @@ struct pic ioapic_template = { ioapic_enable_source, ioapic_disable_source,
 			       ioapic_eoi_source, ioapic_enable_intr,
 			       ioapic_vector, ioapic_source_pending,
 			       ioapic_suspend, ioapic_resume,
-			       ioapic_config_intr };
-	
-static int bsp_id, current_cluster, logical_clusters, next_ioapic_base;
-static u_int next_id, program_logical_dest;
+			       ioapic_config_intr, ioapic_assign_cpu };
+
+static int next_ioapic_base;
+static u_int next_id;
 
 SYSCTL_NODE(_hw, OID_AUTO, apic, CTLFLAG_RD, 0, "APIC options");
 static int enable_extint;
@@ -276,14 +274,8 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	}
 
 	/* Set the destination. */
-	if (intpin->io_dest == DEST_NONE) {
-		low = IOART_DESTPHY;
-		high = bsp_id << APIC_ID_SHIFT;
-	} else {
-		low = IOART_DESTLOG;
-		high = (intpin->io_dest << APIC_ID_CLUSTER_SHIFT |
-		    APIC_ID_CLUSTER_ID) << APIC_ID_SHIFT;
-	}
+	low = IOART_DESTPHY;
+	high = intpin->io_cpu << APIC_ID_SHIFT;
 
 	/* Program the rest of the low word. */
 	if (intpin->io_edgetrigger)
@@ -315,7 +307,7 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	default:
 		KASSERT(intpin->io_vector != 0, ("No vector for IRQ %u",
 		    intpin->io_irq));
-		low |= IOART_DELLOPRI | intpin->io_vector;
+		low |= IOART_DELFIXED | intpin->io_vector;
 	}
 
 	/* Write the values to the APIC. */
@@ -328,60 +320,31 @@ ioapic_program_intpin(struct ioapic_intsrc *intpin)
 	mtx_unlock_spin(&icu_lock);
 }
 
-/*
- * Program an individual intpin's logical destination.
- */
 static void
-ioapic_program_destination(struct ioapic_intsrc *intpin)
+ioapic_assign_cpu(struct intsrc *isrc, u_int apic_id)
 {
-	struct ioapic *io = (struct ioapic *)intpin->io_intsrc.is_pic;
+	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
+	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	KASSERT(intpin->io_dest != DEST_NONE,
-	    ("intpin not assigned to a cluster"));
+	intpin->io_cpu = apic_id;
 	if (bootverbose) {
-		printf("ioapic%u: routing intpin %u (", io->io_id,
-		    intpin->io_intpin);
+		printf("ioapic%u: Assigning ", io->io_id);
 		ioapic_print_irq(intpin);
-		printf(") to cluster %u\n", intpin->io_dest);
+		printf(" to local APIC %u\n", intpin->io_cpu);
 	}
 	ioapic_program_intpin(intpin);
 }
 
-static void
-ioapic_assign_cluster(struct ioapic_intsrc *intpin)
-{
-
-	/*
-	 * Assign this intpin to a logical APIC cluster in a
-	 * round-robin fashion.  We don't actually use the logical
-	 * destination for this intpin until after all the CPU's
-	 * have been started so that we don't end up with interrupts
-	 * that don't go anywhere.  Another alternative might be to
-	 * start up the CPU's earlier so that they can handle interrupts
-	 * sooner.
-	 */
-	intpin->io_dest = current_cluster;
-	current_cluster++;
-	if (current_cluster >= logical_clusters)
-		current_cluster = 0;
-	if (program_logical_dest)
-		ioapic_program_destination(intpin);
-}
-
 static void
 ioapic_enable_intr(struct intsrc *isrc)
 {
 	struct ioapic_intsrc *intpin = (struct ioapic_intsrc *)isrc;
 	struct ioapic *io = (struct ioapic *)isrc->is_pic;
 
-	if (intpin->io_dest == DEST_NONE) {
+	if (intpin->io_vector == 0) {
 		/*
 		 * Allocate an APIC vector for this interrupt pin.  Once
-		 * we have a vector we program the interrupt pin.  Note
-		 * that after we have booted ioapic_assign_cluster()
-		 * will program the interrupt pin again, but it doesn't
-		 * hurt to do that and trying to avoid that adds needless
-		 * complication.
+		 * we have a vector we program the interrupt pin.
 		 */
 		intpin->io_vector = apic_alloc_vector(intpin->io_irq);
 		if (bootverbose) {
@@ -391,7 +354,6 @@ ioapic_enable_intr(struct intsrc *isrc)
 			printf(") to vector %u\n", intpin->io_vector);
 		}
 		ioapic_program_intpin(intpin);
-		ioapic_assign_cluster(intpin);
 		apic_enable_vector(intpin->io_vector);
 	}
 }
@@ -471,22 +433,6 @@ ioapic_resume(struct intsrc *isrc)
 	ioapic_program_intpin((struct ioapic_intsrc *)isrc);
 }
 
-/*
- * Allocate and return a logical cluster ID.  Note that the first time
- * this is called, it returns cluster 0.  ioapic_enable_intr() treats
- * the two cases of logical_clusters == 0 and logical_clusters == 1 the
- * same: one cluster of ID 0 exists.  The logical_clusters == 0 case is
- * for UP kernels, which should never call this function.
- */
-int
-ioapic_next_logical_cluster(void)
-{
-
-	if (logical_clusters >= APIC_MAX_CLUSTER)
-		panic("WARNING: Local APIC cluster IDs exhausted!");
-	return (logical_clusters++);
-}
-
 /*
  * Create a plain I/O APIC object.
  */
@@ -571,11 +517,10 @@ ioapic_create(uintptr_t addr, int32_t apic_id, int intbase)
 		}
 
 		/*
-		 * Route interrupts to the BSP by default using physical
-		 * addressing.  Vectored interrupts get readdressed using
-		 * logical IDs to CPU clusters when they are enabled.
+		 * Route interrupts to the BSP by default.  Interrupts may
+		 * be routed to other CPUs later after they are enabled.
 		 */
-		intpin->io_dest = DEST_NONE;
+		intpin->io_cpu = PCPU_GET(apic_id);
 		if (bootverbose && intpin->io_irq != IRQ_DISABLED) {
 			printf("ioapic%u: intpin %d -> ",  io->io_id, i);
 			ioapic_print_irq(intpin);
@@ -781,29 +726,9 @@ ioapic_register(void *cookie)
 	printf("ioapic%u <Version %u.%u> irqs %u-%u on motherboard\n",
 	    io->io_id, flags >> 4, flags & 0xf, io->io_intbase,
 	    io->io_intbase + io->io_numintr - 1);
-	bsp_id = PCPU_GET(apic_id);
 
 	/* Register valid pins as interrupt sources. */
 	for (i = 0, pin = io->io_pins; i < io->io_numintr; i++, pin++)
 		if (pin->io_irq < NUM_IO_INTS)
 			intr_register_source(&pin->io_intsrc);
 }
-
-/*
- * Program all the intpins to use logical destinations once the AP's
- * have been launched.
- */
-static void
-ioapic_set_logical_destinations(void *arg __unused)
-{
-	struct ioapic *io;
-	int i;
-
-	program_logical_dest = 1;
-	STAILQ_FOREACH(io, &ioapic_list, io_next)
-	    for (i = 0; i < io->io_numintr; i++)
-		    if (io->io_pins[i].io_dest != DEST_NONE)
-			    ioapic_program_destination(&io->io_pins[i]);
-}
-SYSINIT(ioapic_destinations, SI_SUB_SMP, SI_ORDER_SECOND,
-    ioapic_set_logical_destinations, NULL)
diff --git a/sys/i386/i386/local_apic.c b/sys/i386/i386/local_apic.c
index ddd7d32bb453..9814f288fbeb 100644
--- a/sys/i386/i386/local_apic.c
+++ b/sys/i386/i386/local_apic.c
@@ -218,6 +218,7 @@ lapic_init(uintptr_t addr)
 
 	/* Set BSP's per-CPU local APIC ID. */
 	PCPU_SET(apic_id, lapic_id());
+	intr_add_cpu(PCPU_GET(apic_id));
 
 	/* Local APIC timer interrupt. */
 	setidt(APIC_TIMER_INT, IDTVEC(timerint), SDT_SYS386IGT, SEL_KPL,
@@ -281,7 +282,7 @@ void
 lapic_setup(void)
 {
 	struct lapic *la;
-	u_int32_t value, maxlvt;
+	u_int32_t maxlvt;
 	register_t eflags;
 	char buf[MAXCOMLEN + 1];
 
@@ -293,19 +294,6 @@ lapic_setup(void)
 	/* Initialize the TPR to allow all interrupts. */
 	lapic_set_tpr(0);
 
-	/* Use the cluster model for logical IDs. */
-	value = lapic->dfr;
-	value &= ~APIC_DFR_MODEL_MASK;
-	value |= APIC_DFR_MODEL_CLUSTER;
-	lapic->dfr = value;
-
-	/* Set this APIC's logical ID. */
-	value = lapic->ldr;
-	value &= ~APIC_ID_MASK;
-	value |= (la->la_cluster << APIC_ID_CLUSTER_SHIFT |
-	    1 << la->la_cluster_id) << APIC_ID_SHIFT;
-	lapic->ldr = value;
-
 	/* Setup spurious vector and enable the local APIC. */
 	lapic_enable();
 
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index bd8dfdfd0662..4b960b73df24 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -217,7 +217,7 @@ static volatile u_int cpu_ipi_pending[MAXCPU];
 
 static u_int boot_address;
 
-static void	set_logical_apic_ids(void);
+static void	set_interrupt_apic_ids(void);
 static int	start_all_aps(void);
 static void	install_ap_tramp(void);
 static int	start_ap(int apic_id);
@@ -454,7 +454,7 @@ cpu_mp_start(void)
 			hyperthreading_cpus = logical_cpus;
 	}
 
-	set_logical_apic_ids();
+	set_interrupt_apic_ids();
 }
 
 
@@ -645,33 +645,29 @@ init_secondary(void)
  */
 
 /*
- * Set the APIC logical IDs.
- *
- * We want to cluster logical CPU's within the same APIC ID cluster.
- * Since logical CPU's are aligned simply filling in the clusters in
- * APIC ID order works fine.  Note that this does not try to balance
- * the number of CPU's in each cluster. (XXX?)
+ * We tell the I/O APIC code about all the CPUs we want to receive
+ * interrupts.  If we don't want certain CPUs to receive IRQs we
+ * can simply not tell the I/O APIC code about them in this function.
+ * We also do not tell it about the BSP since it tells itself about
+ * the BSP internally to work with UP kernels and on UP machines.
  */
 static void
-set_logical_apic_ids(void)
+set_interrupt_apic_ids(void)
 {
-	u_int apic_id, cluster, cluster_id;
+	u_int apic_id;
 
-	/* Force us to allocate cluster 0 at the start. */
-	cluster = -1;
-	cluster_id = APIC_MAX_INTRACLUSTER_ID;
 	for (apic_id = 0; apic_id < MAXCPU; apic_id++) {
 		if (!cpu_info[apic_id].cpu_present)
 			continue;
-		if (cluster_id == APIC_MAX_INTRACLUSTER_ID) {
-			cluster = ioapic_next_logical_cluster();
-			cluster_id = 0;
-		} else
-			cluster_id++;
-		if (bootverbose)
-			printf("APIC ID: physical %u, logical %u:%u\n",
-			    apic_id, cluster, cluster_id);
-		lapic_set_logical_id(apic_id, cluster, cluster_id);
+		if (cpu_info[apic_id].cpu_bsp)
+			continue;
+#if 0
+		/* Don't let hyperthreads service interrupts. */
+		if (hyperthreading_cpus > 1 &&
+		    apic_id % hyperthreading_cpus != 0)
+			continue;
+#endif
+		intr_add_cpu(apic_id);
 	}
 }
 
diff --git a/sys/i386/include/apicvar.h b/sys/i386/include/apicvar.h
index 8102567d45f1..8d1a6be46fab 100644
--- a/sys/i386/include/apicvar.h
+++ b/sys/i386/include/apicvar.h
@@ -180,7 +180,6 @@ void	apic_register_enumerator(struct apic_enumerator *enumerator);
 void	*ioapic_create(uintptr_t addr, int32_t id, int intbase);
 int	ioapic_disable_pin(void *cookie, u_int pin);
 int	ioapic_get_vector(void *cookie, u_int pin);
-int	ioapic_next_logical_cluster(void);
 void	ioapic_register(void *cookie);
 int	ioapic_remap_vector(void *cookie, u_int pin, int vector);
 int	ioapic_set_bus(void *cookie, u_int pin, int bus_type);
diff --git a/sys/i386/include/intr_machdep.h b/sys/i386/include/intr_machdep.h
index 4d5743a4aa2b..0d187c18e664 100644
--- a/sys/i386/include/intr_machdep.h
+++ b/sys/i386/include/intr_machdep.h
@@ -85,6 +85,7 @@ struct pic {
 	void (*pic_resume)(struct intsrc *);
 	int (*pic_config_intr)(struct intsrc *, enum intr_trigger,
 	    enum intr_polarity);
+	void (*pic_assign_cpu)(struct intsrc *, u_int apic_id);
 };
 
 /* Flags for pic_disable_source() */
@@ -105,6 +106,7 @@ struct intsrc {
 	u_long *is_count;
 	u_long *is_straycount;
 	u_int is_index;
+	u_int is_enabled:1;
 };
 
 struct trapframe;
@@ -117,6 +119,11 @@ int	elcr_probe(void);
 enum intr_trigger elcr_read_trigger(u_int irq);
 void	elcr_resume(void);
 void	elcr_write_trigger(u_int irq, enum intr_trigger trigger);
+#ifdef SMP
+void	intr_add_cpu(u_int apic_id);
+#else
+#define	intr_add_cpu(apic_id)
+#endif
 int	intr_add_handler(const char *name, int vector, driver_intr_t handler,
     void *arg, enum intr_type flags, void **cookiep);
 int	intr_config_intr(int vector, enum intr_trigger trig,
diff --git a/sys/i386/isa/atpic.c b/sys/i386/isa/atpic.c
index 860392f98034..c73e253e5f7a 100644
--- a/sys/i386/isa/atpic.c
+++ b/sys/i386/isa/atpic.c
@@ -126,8 +126,8 @@ inthand_t
 #define	ATPIC(io, base, eoi, imenptr)					\
      	{ { atpic_enable_source, atpic_disable_source, (eoi),		\
 	    atpic_enable_intr, atpic_vector, atpic_source_pending, NULL, \
-	    atpic_resume, atpic_config_intr }, (io), (base),		\
-	    IDT_IO_INTS + (base), (imenptr) }
+	    atpic_resume, atpic_config_intr, atpic_assign_cpu }, (io),  \
+	    (base), IDT_IO_INTS + (base), (imenptr) }
 
 #define	INTSRC(irq)							\
 	{ { &atpics[(irq) / 8].at_pic }, IDTVEC(atpic_intr ## irq ),	\
@@ -160,6 +160,7 @@ static void atpic_resume(struct intsrc *isrc);
 static int atpic_source_pending(struct intsrc *isrc);
 static int atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
     enum intr_polarity pol);
+static void atpic_assign_cpu(struct intsrc *isrc, u_int apic_id);
 static void i8259_init(struct atpic *pic, int slave);
 
 static struct atpic atpics[] = {
@@ -384,6 +385,17 @@ atpic_config_intr(struct intsrc *isrc, enum intr_trigger trig,
 #endif /* PC98 */
 }
 
+static void
+atpic_assign_cpu(struct intsrc *isrc, u_int apic_id)
+{
+
+	/*
+	 * 8259A's are only used in UP in which case all interrupts always
+	 * go to the sole CPU and this function shouldn't even be called.
+	 */
+	panic("%s: bad cookie", __func__);
+}
+
 static void
 i8259_init(struct atpic *pic, int slave)
 {