From 71a19bdc640eb9d54e7db3d1d135753888f3469c Mon Sep 17 00:00:00 2001
From: Attilio Rao <attilio@FreeBSD.org>
Date: Thu, 5 May 2011 14:39:14 +0000
Subject: [PATCH] Commit the support for removing cpumask_t and replacing it
 directly with cpuset_t objects. That is going to offer the underlying support
 for a simple bump of MAXCPU and then support for number of cpus > 32 (as it
 is today).

Right now, cpumask_t is an int, 32 bits on all our supported architecture.
cpumask_t on the other side is implemented as an array of longs, and
easilly extendible by definition.

The architectures touched by this commit are the following:
- amd64
- i386
- pc98
- arm
- ia64
- XEN

while the others are still missing.
Userland is believed to be fully converted with the changes contained
here.

Some technical notes:
- This commit may be considered an ABI nop for all the architectures
  different from amd64 and ia64 (and sparc64 in the future)
- per-cpu members, which are now converted to cpuset_t, needs to be
  accessed avoiding migration, because the size of cpuset_t should be
  considered unknown
- size of cpuset_t objects is different from kernel and userland (this is
  primirally done in order to leave some more space in userland to cope
  with KBI extensions). If you need to access kernel cpuset_t from the
  userland please refer to example in this patch on how to do that
  correctly (kgdb may be a good source, for example).
- Support for other architectures is going to be added soon
- Only MAXCPU for amd64 is bumped now

The patch has been tested by sbruno and Nicholas Esborn on opteron
4 x 12 pack CPUs. More testing on big SMP is expected to came soon.
pluknet tested the patch with his 8-ways on both amd64 and i386.

Tested by:	pluknet, sbruno, gianni, Nicholas Esborn
Reviewed by:	jeff, jhb, sbruno
---
 gnu/usr.bin/gdb/kgdb/kthr.c               |  18 +-
 lib/libmemstat/memstat_uma.c              |  10 +-
 sys/amd64/acpica/acpi_wakeup.c            |  21 +-
 sys/amd64/amd64/intr_machdep.c            |  10 +-
 sys/amd64/amd64/mp_machdep.c              | 224 ++++++++++++----------
 sys/amd64/amd64/pmap.c                    |  84 ++++----
 sys/amd64/amd64/vm_machdep.c              |  21 +-
 sys/amd64/include/_types.h                |   1 -
 sys/amd64/include/param.h                 |   2 +-
 sys/amd64/include/pmap.h                  |   3 +-
 sys/amd64/include/smp.h                   |   8 +-
 sys/arm/arm/pmap.c                        |   4 +-
 sys/arm/include/_types.h                  |   1 -
 sys/arm/include/pmap.h                    |   3 +-
 sys/cddl/dev/cyclic/i386/cyclic_machdep.c |   4 +-
 sys/cddl/dev/dtrace/amd64/dtrace_subr.c   |   9 +-
 sys/cddl/dev/dtrace/i386/dtrace_subr.c    |  10 +-
 sys/dev/hwpmc/hwpmc_mod.c                 |   8 +-
 sys/dev/xen/control/control.c             |  21 +-
 sys/geom/eli/g_eli.c                      |   2 +-
 sys/i386/i386/intr_machdep.c              |  10 +-
 sys/i386/i386/mp_machdep.c                | 212 ++++++++++----------
 sys/i386/i386/pmap.c                      |  91 +++++----
 sys/i386/i386/vm_machdep.c                |  36 ++--
 sys/i386/include/_types.h                 |   1 -
 sys/i386/include/pmap.h                   |   3 +-
 sys/i386/include/sf_buf.h                 |   3 +-
 sys/i386/include/smp.h                    |   8 +-
 sys/i386/xen/mp_machdep.c                 | 117 ++++++-----
 sys/i386/xen/pmap.c                       |  54 +++---
 sys/ia64/ia64/mp_machdep.c                |  14 +-
 sys/ia64/include/_types.h                 |   1 -
 sys/ia64/include/smp.h                    |   4 +-
 sys/kern/kern_cpuset.c                    |  48 ++++-
 sys/kern/kern_pmc.c                       |   6 +-
 sys/kern/kern_rmlock.c                    |  15 +-
 sys/kern/sched_4bsd.c                     |  66 ++++---
 sys/kern/sched_ule.c                      |   9 +-
 sys/kern/subr_kdb.c                       |   3 +-
 sys/kern/subr_pcpu.c                      |   2 +-
 sys/kern/subr_smp.c                       |  87 +++++----
 sys/ofed/include/linux/list.h             |   1 +
 sys/sys/_cpuset.h                         |  52 +++++
 sys/sys/_rmlock.h                         |   2 +-
 sys/sys/cpuset.h                          |  62 ++++--
 sys/sys/pcpu.h                            |   5 +-
 sys/sys/pmckern.h                         |   4 +-
 sys/sys/smp.h                             |  26 +--
 sys/sys/types.h                           |   1 -
 sys/x86/x86/local_apic.c                  |   2 +-
 usr.sbin/pmccontrol/pmccontrol.c          |  23 ++-
 51 files changed, 871 insertions(+), 561 deletions(-)
 create mode 100644 sys/sys/_cpuset.h

diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c
index 5036c9c9aaa9..461f408a73d9 100644
--- a/gnu/usr.bin/gdb/kgdb/kthr.c
+++ b/gnu/usr.bin/gdb/kgdb/kthr.c
@@ -28,6 +28,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/cpuset.h>
 #include <sys/proc.h>
 #include <sys/types.h>
 #include <sys/signal.h>
@@ -37,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include <defs.h>
 #include <frame-unwind.h>
@@ -48,7 +50,7 @@ static CORE_ADDR dumppcb;
 static int dumptid;
 
 static CORE_ADDR stoppcbs;
-static __cpumask_t stopped_cpus;
+static cpuset_t stopped_cpus;
 
 static struct kthr *first;
 struct kthr *curkthr;
@@ -76,6 +78,7 @@ kgdb_thr_init(void)
 {
 	struct proc p;
 	struct thread td;
+	long cpusetsize;
 	struct kthr *kt;
 	CORE_ADDR addr;
 	uintptr_t paddr;
@@ -102,10 +105,11 @@ kgdb_thr_init(void)
 		dumptid = -1;
 
 	addr = kgdb_lookup("stopped_cpus");
-	if (addr != 0)
-		kvm_read(kvm, addr, &stopped_cpus, sizeof(stopped_cpus));
-	else
-		stopped_cpus = 0;
+	CPU_ZERO(&stopped_cpus);
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize != -1 && (u_long)cpusetsize <= sizeof(cpuset_t) &&
+	    addr != 0)
+		kvm_read(kvm, addr, &stopped_cpus, cpusetsize);
 
 	stoppcbs = kgdb_lookup("stoppcbs");
 
@@ -126,8 +130,8 @@ kgdb_thr_init(void)
 			kt->kaddr = addr;
 			if (td.td_tid == dumptid)
 				kt->pcb = dumppcb;
-			else if (td.td_state == TDS_RUNNING && ((1 << td.td_oncpu) & stopped_cpus)
-				&& stoppcbs != 0)
+			else if (td.td_state == TDS_RUNNING && stoppcbs != 0 &&
+			    CPU_ISSET(td.td_oncpu, &stopped_cpus))
 				kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu;
 			else
 				kt->pcb = (uintptr_t)td.td_pcb;
diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c
index 10ff8ec36a81..302093744b5c 100644
--- a/lib/libmemstat/memstat_uma.c
+++ b/lib/libmemstat/memstat_uma.c
@@ -45,6 +45,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include "memstat.h"
 #include "memstat_internal.h"
@@ -315,6 +316,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
 	int hint_dontsearch, i, mp_maxid, ret;
 	char name[MEMTYPE_MAXNAME];
 	cpuset_t all_cpus;
+	long cpusetsize;
 	kvm_t *kvm;
 
 	kvm = (kvm_t *)kvm_handle;
@@ -338,7 +340,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
 		list->mtl_error = ret;
 		return (-1);
 	}
-	ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0);
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
+		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
+		return (-1);
+	}
+	CPU_ZERO(&all_cpus);
+	ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
 	if (ret != 0) {
 		list->mtl_error = ret;
 		return (-1);
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 57341c938f14..29e66c52a2f2 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -78,7 +78,7 @@ static void		acpi_stop_beep(void *);
 
 #ifdef SMP
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
-static void		acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
+static void		acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *);
 #endif
 
 #define	WAKECODE_VADDR(sc)	((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
@@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
 #define	BIOS_WARM		(0x0a)
 
 static void
-acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
+acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus)
 {
 	uint32_t	mpbioswarmvec;
 	int		cpu;
@@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
 
 	/* Wake up each AP. */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
-		if ((wakeup_cpus & (1 << cpu)) == 0)
+		if (!CPU_ISSET(cpu, wakeup_cpus))
 			continue;
 		if (acpi_wakeup_ap(sc, cpu) == 0) {
 			/* restore the warmstart vector */
@@ -214,7 +214,7 @@ int
 acpi_sleep_machdep(struct acpi_softc *sc, int state)
 {
 #ifdef SMP
-	cpumask_t	wakeup_cpus;
+	cpuset_t	wakeup_cpus;
 #endif
 	register_t	cr3, rf;
 	ACPI_STATUS	status;
@@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 
 	if (savectx(susppcbs[0])) {
 #ifdef SMP
-		if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
-			device_printf(sc->acpi_dev,
-			    "Failed to suspend APs: CPU mask = 0x%jx\n",
-			    (uintmax_t)(wakeup_cpus & ~stopped_cpus));
+		if (!CPU_EMPTY(&wakeup_cpus) &&
+		    suspend_cpus(wakeup_cpus) == 0) {
+			device_printf(sc->acpi_dev, "Failed to suspend APs\n");
 			goto out;
 		}
 #endif
@@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 #ifdef SMP
-		if (wakeup_cpus != 0)
-			acpi_wakeup_cpus(sc, wakeup_cpus);
+		if (!CPU_EMPTY(&wakeup_cpus))
+			acpi_wakeup_cpus(sc, &wakeup_cpus);
 #endif
 		acpi_resync_clock(sc);
 		ret = 0;
@@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 
 out:
 #ifdef SMP
-	if (wakeup_cpus != 0)
+	if (!CPU_EMPTY(&wakeup_cpus))
 		restart_cpus(wakeup_cpus);
 #endif
 
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index 4edef81c01fa..3a8953153ff8 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -443,8 +443,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 /*
@@ -466,7 +465,7 @@ intr_next_cpu(void)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -497,7 +496,7 @@ intr_add_cpu(u_int cpu)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -510,6 +509,9 @@ intr_shuffle_irqs(void *arg __unused)
 	struct intsrc *isrc;
 	int i;
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 8e8dc67bdc1c..57e91a8444cc 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -125,7 +126,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  * Local data and functions.
  */
 
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -161,7 +162,7 @@ static void	release_aps(void *dummy);
 
 static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
 static struct	sysctl_ctx_list logical_cpu_clist;
 static u_int	bootMP_size;
@@ -311,7 +312,7 @@ topo_probe(void)
 	if (cpu_topo_probed)
 		return;
 
-	logical_cpus_mask = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -455,7 +456,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -582,6 +583,7 @@ cpu_mp_announce(void)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
 	u_int64_t msr, cr0;
@@ -713,19 +715,22 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
-	
+		CPU_OR(&logical_cpus_mask, &tcpuset);
+
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	if (bootverbose)
 		lapic_dump("AP");
@@ -868,6 +873,7 @@ assign_cpu_ids(void)
 static int
 start_all_aps(void)
 {
+	cpuset_t tallcpus, tcpuset;
 	vm_offset_t va = boot_address + KERNBASE;
 	u_int64_t *pt4, *pt3, *pt2;
 	u_int32_t mpbioswarmvec;
@@ -932,11 +938,14 @@ start_all_aps(void)
 			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
 		}
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	tcpuset = PCPU_GET(cpumask);
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1064,6 +1073,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending)); 
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
 /*
  * Flush the TLB on all other CPU's
  */
@@ -1088,28 +1121,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (cpumask_t)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		sched_pin();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		sched_unpin();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_rflags() & PSL_I))
@@ -1118,39 +1142,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (cpumask_t)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+			    cpu, vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
-	if (IPI_IS_BITMAPED(ipi)) {
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending)); 
-		if (old_pending)
-			return;
-	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
 void
 smp_cache_flush(void)
 {
@@ -1197,7 +1207,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1209,7 +1219,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1221,7 +1231,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1274,7 +1284,7 @@ ipi_bitmap_handler(struct trapframe frame)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1284,12 +1294,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
@@ -1307,7 +1317,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1320,8 +1330,10 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
+	sched_pin();
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
+		sched_unpin();
 		return;
 	}
 
@@ -1331,7 +1343,8 @@ ipi_all_but_self(u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+	sched_unpin();
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
@@ -1340,7 +1353,7 @@ ipi_all_but_self(u_int ipi)
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1348,11 +1361,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1364,23 +1379,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int cpu;
 
+	sched_pin();
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
@@ -1395,7 +1412,7 @@ cpustop_handler(void)
 void
 cpususpend_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	register_t cr3, rf;
 	u_int cpu;
 
@@ -1407,7 +1424,7 @@ cpususpend_handler(void)
 
 	if (savectx(susppcbs[cpu])) {
 		wbinvd();
-		atomic_set_int(&stopped_cpus, cpumask);
+		CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 	} else {
 		pmap_init_pat();
 		PCPU_SET(switchtime, 0);
@@ -1415,11 +1432,11 @@ cpususpend_handler(void)
 	}
 
 	/* Wait for resume */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 		ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Restore CR3 and enable interrupts */
 	load_cr3(cr3);
@@ -1447,30 +1464,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 static int
 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 	int error;
 
 	mask = hlt_cpus_mask;
-	error = sysctl_handle_int(oidp, &mask, 0, req);
+	error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req);
 	if (error || !req->newptr)
 		return (error);
 
-	if (logical_cpus_mask != 0 &&
-	    (mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
 	if (! hyperthreading_allowed)
-		mask |= hyperthreading_cpus_mask;
+		CPU_OR(&mask, &hyperthreading_cpus_mask);
 
-	if ((mask & all_cpus) == all_cpus)
-		mask &= ~(1<<0);
+	if (CPU_SUBSET(&mask, &all_cpus))
+		CPU_CLR(0, &mask);
 	hlt_cpus_mask = mask;
 	return (error);
 }
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_hlt_cpus, "IU",
+SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus,
+    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S",
     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 
 static int
@@ -1484,15 +1501,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 		return (error);
 
 	if (disable)
-		hlt_cpus_mask |= logical_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 	else
-		hlt_cpus_mask &= ~logical_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask);
 
 	if (! hyperthreading_allowed)
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hlt_logical_cpus = disable;
 	return (error);
@@ -1519,18 +1536,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 #endif
 
 	if (allowed)
-		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 	else
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if (logical_cpus_mask != 0 &&
-	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hyperthreading_allowed = allowed;
 	return (error);
@@ -1540,7 +1557,7 @@ static void
 cpu_hlt_setup(void *dummy __unused)
 {
 
-	if (logical_cpus_mask != 0) {
+	if (!CPU_EMPTY(&logical_cpus_mask)) {
 		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 		    &hlt_logical_cpus);
 		sysctl_ctx_init(&logical_cpu_clist);
@@ -1554,20 +1571,21 @@ cpu_hlt_setup(void *dummy __unused)
 		    &logical_cpus_mask, 0, "");
 
 		if (hlt_logical_cpus)
-			hlt_cpus_mask |= logical_cpus_mask;
+			CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 
 		/*
 		 * If necessary for security purposes, force
 		 * hyperthreading off, regardless of the value
 		 * of hlt_logical_cpus.
 		 */
-		if (hyperthreading_cpus_mask) {
+		if (!CPU_EMPTY(&hyperthreading_cpus_mask)) {
 			SYSCTL_ADD_PROC(&logical_cpu_clist,
 			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
 			if (! hyperthreading_allowed)
-				hlt_cpus_mask |= hyperthreading_cpus_mask;
+				CPU_OR(&hlt_cpus_mask,
+				    &hyperthreading_cpus_mask);
 		}
 	}
 }
@@ -1576,7 +1594,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 int
 mp_grab_cpu_hlt(void)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 #ifdef MP_WATCHDOG
 	u_int cpuid;
 #endif
@@ -1589,7 +1607,7 @@ mp_grab_cpu_hlt(void)
 #endif
 
 	retval = 0;
-	while (mask & hlt_cpus_mask) {
+	while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) {
 		retval = 1;
 		__asm __volatile("sti; hlt" : : : "memory");
 	}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index c9ff9bcc14ae..025ca5f981c9 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	kernel_pmap->pm_root = NULL;
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
 	/*
@@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 }
@@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 }
@@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -995,8 +999,8 @@ pmap_invalidate_cache(void)
 }
 
 struct pde_action {
-	cpumask_t store;	/* processor that updates the PDE */
-	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	cpuset_t store;		/* processor that updates the PDE */
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pde_store(act->pde, act->newpde);
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+	sched_pin();
+	if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pmap_update_pde_invalidate(act->va, act->newpde);
+	} else
+		sched_unpin();
 }
 
 /*
@@ -1032,26 +1044,28 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
-	cpumask_t active, cpumask;
+	cpuset_t active, cpumask, other_cpus;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
+	other_cpus = PCPU_GET(other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
-	if ((active & PCPU_GET(other_cpus)) != 0) {
+	if (CPU_OVERLAP(&active, &other_cpus)) { 
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
-		smp_rendezvous_cpus(cpumask | active,
+		CPU_OR(&cpumask, &active);
+		smp_rendezvous_cpus(cpumask,
 		    smp_no_rendevous_barrier, pmap_update_pde_action,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		pde_store(pde, newpde);
-		if ((active & cpumask) != 0)
+		if (CPU_OVERLAP(&active, &cpumask))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
@@ -1065,7 +1079,7 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
@@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
@@ -1083,7 +1097,7 @@ PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	pde_store(pde, newpde);
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
@@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap)
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap)
 	pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #ifdef SMP
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~PCPU_GET(cpumask);
-	pmap->pm_active |= PCPU_GET(cpumask);
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 	cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
 	td->td_pcb->pcb_cr3 = cr3;
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 972484a80a08..13f5cd06b2b3 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
+#include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
 
@@ -512,11 +514,13 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
 static void
 cpu_reset_proxy()
 {
+	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
-	stop_cpus((1<<cpu_reset_proxyid));
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -527,24 +531,28 @@ void
 cpu_reset()
 {
 #ifdef SMP
-	cpumask_t map;
+	cpuset_t map;
 	u_int cnt;
 
 	if (smp_active) {
-		map = PCPU_GET(other_cpus) & ~stopped_cpus;
-		if (map != 0) {
+		sched_pin();
+		map = PCPU_GET(other_cpus);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
+			sched_unpin();
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
-			atomic_store_rel_int(&started_cpus, 1 << 0);
+			CPU_SETOF(0, &started_cpus);
+			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -556,7 +564,8 @@ cpu_reset()
 
 			while (1);
 			/* NOTREACHED */
-		}
+		} else
+			sched_unpin();
 
 		DELAY(1000000);
 	}
diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h
index 89d2e861b418..13dc3ea28a7a 100644
--- a/sys/amd64/include/_types.h
+++ b/sys/amd64/include/_types.h
@@ -61,7 +61,6 @@ typedef	unsigned long		__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int64_t	__critical_t;
 typedef	double		__double_t;
 typedef	float		__float_t;
diff --git a/sys/amd64/include/param.h b/sys/amd64/include/param.h
index 9a742f94e9b9..92689bae21ab 100644
--- a/sys/amd64/include/param.h
+++ b/sys/amd64/include/param.h
@@ -64,7 +64,7 @@
 #endif
 
 #if defined(SMP) || defined(KLD_MODULE)
-#define MAXCPU		32
+#define MAXCPU		64
 #else
 #define MAXCPU		1
 #endif
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 7a628513af65..1b8108a3e3c6 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -152,6 +152,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -251,7 +252,7 @@ struct pmap {
 	struct mtx		pm_mtx;
 	pml4_entry_t		*pm_pml4;	/* KVA of level 4 page table */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	/* spare u_int here due to padding */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	vm_page_t		pm_root;	/* spare page table pages */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index ec107f931055..a009b35b150e 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -63,17 +63,17 @@ void	ipi_all_but_self(u_int ipi);
 void 	ipi_bitmap_handler(struct trapframe frame);
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
-void	ipi_selected(cpumask_t cpus, u_int ipi);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
 int	mp_grab_cpu_hlt(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
-void	smp_masked_invltlb(cpumask_t mask);
+void	smp_masked_invltlb(cpuset_t mask);
 
 #endif /* !LOCORE */
 #endif /* SMP */
diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c
index f33a692e7302..16487b08ef9d 100644
--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt
 	cpu_cpwait();
 
 	PMAP_LOCK_INIT(kernel_pmap);
-	kernel_pmap->pm_active = -1;
+	CPU_FILL(&kernel_pmap->pm_active);
 	kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	
@@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap)
 	pmap_alloc_l1(pmap);
 	bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
 
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 		
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h
index 48dd2a784fd7..d8386f3225d0 100644
--- a/sys/arm/include/_types.h
+++ b/sys/arm/include/_types.h
@@ -67,7 +67,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	__uint32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int32_t	__critical_t;
 typedef	double		__double_t;
 typedef	double		__float_t;
diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h
index 701390a33852..3d63432e332a 100644
--- a/sys/arm/include/pmap.h
+++ b/sys/arm/include/pmap.h
@@ -62,6 +62,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -134,7 +135,7 @@ struct	pmap {
 	struct l1_ttable	*pm_l1;
 	struct l2_dtable	*pm_l2[L2_SIZE];
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statictics */
 	TAILQ_HEAD(,pv_entry)	pm_pvlist;	/* list of mappings in pmap */
 };
diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
index 1c18fc14e96c..dc6cb712f228 100644
--- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
+++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
@@ -121,7 +121,9 @@ static void reprogram(cyb_arg_t arg, hrtime_t exp)
 
 static void xcall(cyb_arg_t arg, cpu_t *c, cyc_func_t func, void *param)
 {
+	cpuset_t cpus;
 
-	smp_rendezvous_cpus((cpumask_t) (1 << c->cpuid),
+	CPU_SETOF(c->cpuid, &cpus);
+	smp_rendezvous_cpus(cpus,
 	    smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param);
 }
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
index 102ea0495aff..bc4b8b62b985 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
@@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 void
 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 {
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (cpu == DTRACE_CPUALL)
 		cpus = all_cpus;
 	else
-		cpus = (cpumask_t)1 << cpu;
+		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
 	    smp_no_rendevous_barrier, arg);
@@ -394,7 +394,7 @@ dtrace_gethrtime_init(void *arg)
 {
 	struct pcpu *pc;
 	uint64_t tsc_f;
-	cpumask_t map;
+	cpuset_t map;
 	int i;
 
 	/*
@@ -432,7 +432,8 @@ dtrace_gethrtime_init(void *arg)
 			continue;
 
 		pc = pcpu_find(i);
-		map = PCPU_GET(cpumask) | pc->pc_cpumask;
+		map = PCPU_GET(cpumask);
+		CPU_OR(&map, &pc->pc_cpumask);
 
 		smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync,
 		    dtrace_gethrtime_init_cpu,
diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
index 50793c4da4fc..7d460bc2e03b 100644
--- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
@@ -30,6 +30,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
+#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/kmem.h>
@@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 void
 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 {
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (cpu == DTRACE_CPUALL)
 		cpus = all_cpus;
 	else
-		cpus = (cpumask_t)1 << cpu;
+		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
 	    smp_no_rendevous_barrier, arg);
@@ -392,9 +393,9 @@ dtrace_gethrtime_init_cpu(void *arg)
 static void
 dtrace_gethrtime_init(void *arg)
 {
+	cpuset_t map;
 	struct pcpu *pc;
 	uint64_t tsc_f;
-	cpumask_t map;
 	int i;
 
 	/*
@@ -432,7 +433,8 @@ dtrace_gethrtime_init(void *arg)
 			continue;
 
 		pc = pcpu_find(i);
-		map = PCPU_GET(cpumask) | pc->pc_cpumask;
+		map = PCPU_GET(cpumask);
+		CPU_OR(&map, &pc->pc_cpumask);
 
 		smp_rendezvous_cpus(map, dtrace_gethrtime_init_sync,
 		    dtrace_gethrtime_init_cpu,
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index d6225d8f5ae3..4cfcea87fa5b 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
 		 * had already processed the interrupt).  We don't
 		 * lose the interrupt sample.
 		 */
-		atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid)));
+		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
 		pmc_process_samples(PCPU_GET(cpuid));
 		break;
 
@@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
 
  done:
 	/* mark CPU as needing processing */
-	atomic_set_int(&pmc_cpumask, (1 << cpu));
+	CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 
 	return (error);
 }
@@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu)
 			break;
 		if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
 			/* Need a rescan at a later time. */
-			atomic_set_int(&pmc_cpumask, (1 << cpu));
+			CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 			break;
 		}
 
@@ -4782,7 +4782,7 @@ pmc_cleanup(void)
 	PMCDBG(MOD,INI,0, "%s", "cleanup");
 
 	/* switch off sampling */
-	pmc_cpumask = 0;
+	CPU_ZERO(&pmc_cpumask);
 	pmc_intr = NULL;
 
 	sx_xlock(&pmc_sx);
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index c03d5365530b..0f4418142684 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -203,24 +203,29 @@ xctrl_suspend()
 	unsigned long max_pfn, start_info_mfn;
 
 #ifdef SMP
-	cpumask_t map;
+	struct thread *td;
+	cpuset_t map;
 	/*
 	 * Bind us to CPU 0 and stop any other VCPUs.
 	 */
-	thread_lock(curthread);
-	sched_bind(curthread, 0);
-	thread_unlock(curthread);
+	td = curthread;
+	thread_lock(td);
+	sched_bind(td, 0);
+	thread_unlock(td);
 	KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
 
-	map = PCPU_GET(other_cpus) & ~stopped_cpus;
-	if (map)
+	sched_pin();
+	map = PCPU_GET(other_cpus);
+	sched_unpin();
+	CPU_NAND(&map, &stopped_cpus);
+	if (!CPU_EMPTY(&map))
 		stop_cpus(map);
 #endif
 
 	if (DEVICE_SUSPEND(root_bus) != 0) {
 		printf("xen_suspend: device_suspend failed\n");
 #ifdef SMP
-		if (map)
+		if (!CPU_EMPTY(&map))
 			restart_cpus(map);
 #endif
 		return;
@@ -289,7 +294,7 @@ xctrl_suspend()
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
-	if (map)
+	if (!CPU_EMPTY(&map))
 		restart_cpus(map);
 #endif
 }
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index 8b22ee58d72b..6466aef72ab7 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -661,7 +661,7 @@ static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
-	return ((hlt_cpus_mask & (1 << cpu)) != 0);
+	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index 77b80048620f..56529f7b5b03 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 /*
@@ -432,7 +431,7 @@ intr_next_cpu(void)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused)
 	return;
 #endif
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index a07b06c53f75..dfe6434518f0 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -172,7 +173,7 @@ static u_long *ipi_hardclock_counts[MAXCPU];
  * Local data and functions.
  */
 
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -209,7 +210,7 @@ static void	release_aps(void *dummy);
 
 static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
 static struct	sysctl_ctx_list logical_cpu_clist;
 
@@ -358,7 +359,7 @@ topo_probe(void)
 	if (cpu_topo_probed)
 		return;
 
-	logical_cpus_mask = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -500,7 +501,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -631,6 +632,7 @@ cpu_mp_announce(void)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	struct pcpu *pc;
 	vm_offset_t addr;
 	int	gsel_tss;
@@ -755,19 +757,22 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	if (bootverbose)
 		lapic_dump("AP");
@@ -904,6 +909,7 @@ assign_cpu_ids(void)
 static int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 #ifndef PC98
 	u_char mpbiosreason;
 #endif
@@ -963,11 +969,13 @@ start_all_aps(void)
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1163,6 +1171,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending));	
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
 /*
  * Flush the TLB on all other CPU's
  */
@@ -1187,28 +1219,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		sched_pin();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		sched_unpin();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1217,39 +1240,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
-	if (IPI_IS_BITMAPED(ipi)) {
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending));	
-		if (old_pending)
-			return;
-	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
 void
 smp_cache_flush(void)
 {
@@ -1296,7 +1305,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1308,7 +1317,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1320,7 +1329,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1373,7 +1382,7 @@ ipi_bitmap_handler(struct trapframe frame)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1383,12 +1392,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
@@ -1406,7 +1415,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1419,8 +1428,10 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
+	sched_pin();
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
+		sched_unpin();
 		return;
 	}
 
@@ -1430,7 +1441,9 @@ ipi_all_but_self(u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+	sched_unpin();
+
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
@@ -1438,7 +1451,7 @@ ipi_all_but_self(u_int ipi)
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1446,11 +1459,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1462,23 +1477,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int cpu;
 
+	sched_pin();
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
@@ -1505,30 +1522,30 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 static int
 sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 	int error;
 
 	mask = hlt_cpus_mask;
-	error = sysctl_handle_int(oidp, &mask, 0, req);
+	error = sysctl_handle_opaque(oidp, &mask, sizeof(mask), req);
 	if (error || !req->newptr)
 		return (error);
 
-	if (logical_cpus_mask != 0 &&
-	    (mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
 	if (! hyperthreading_allowed)
-		mask |= hyperthreading_cpus_mask;
+		CPU_OR(&mask, &hyperthreading_cpus_mask);
 
-	if ((mask & all_cpus) == all_cpus)
-		mask &= ~(1<<0);
+	if (CPU_SUBSET(&mask, &all_cpus))
+		CPU_CLR(0, &mask);
 	hlt_cpus_mask = mask;
 	return (error);
 }
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_hlt_cpus, "IU",
+SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus,
+    CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0, sysctl_hlt_cpus, "S",
     "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
 
 static int
@@ -1542,15 +1559,15 @@ sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
 		return (error);
 
 	if (disable)
-		hlt_cpus_mask |= logical_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 	else
-		hlt_cpus_mask &= ~logical_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &logical_cpus_mask);
 
 	if (! hyperthreading_allowed)
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hlt_logical_cpus = disable;
 	return (error);
@@ -1577,18 +1594,18 @@ sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
 #endif
 
 	if (allowed)
-		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
+		CPU_NAND(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 	else
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
+		CPU_OR(&hlt_cpus_mask, &hyperthreading_cpus_mask);
 
-	if (logical_cpus_mask != 0 &&
-	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
+	if (!CPU_EMPTY(&logical_cpus_mask) &&
+	    CPU_SUBSET(&hlt_cpus_mask, &logical_cpus_mask))
 		hlt_logical_cpus = 1;
 	else
 		hlt_logical_cpus = 0;
 
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
+	if (CPU_SUBSET(&hlt_cpus_mask, &all_cpus))
+		CPU_CLR(0, &hlt_cpus_mask);
 
 	hyperthreading_allowed = allowed;
 	return (error);
@@ -1598,7 +1615,7 @@ static void
 cpu_hlt_setup(void *dummy __unused)
 {
 
-	if (logical_cpus_mask != 0) {
+	if (!CPU_EMPTY(&logical_cpus_mask)) {
 		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
 		    &hlt_logical_cpus);
 		sysctl_ctx_init(&logical_cpu_clist);
@@ -1612,20 +1629,21 @@ cpu_hlt_setup(void *dummy __unused)
 		    &logical_cpus_mask, 0, "");
 
 		if (hlt_logical_cpus)
-			hlt_cpus_mask |= logical_cpus_mask;
+			CPU_OR(&hlt_cpus_mask, &logical_cpus_mask);
 
 		/*
 		 * If necessary for security purposes, force
 		 * hyperthreading off, regardless of the value
 		 * of hlt_logical_cpus.
 		 */
-		if (hyperthreading_cpus_mask) {
+		if (!CPU_EMPTY(&hyperthreading_cpus_mask)) {
 			SYSCTL_ADD_PROC(&logical_cpu_clist,
 			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
 			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
 			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
 			if (! hyperthreading_allowed)
-				hlt_cpus_mask |= hyperthreading_cpus_mask;
+				CPU_OR(&hlt_cpus_mask,
+				    &hyperthreading_cpus_mask);
 		}
 	}
 }
@@ -1634,7 +1652,7 @@ SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
 int
 mp_grab_cpu_hlt(void)
 {
-	cpumask_t mask;
+	cpuset_t mask;
 #ifdef MP_WATCHDOG
 	u_int cpuid;
 #endif
@@ -1647,7 +1665,7 @@ mp_grab_cpu_hlt(void)
 #endif
 
 	retval = 0;
-	while (mask & hlt_cpus_mask) {
+	while (CPU_OVERLAP(&mask, &hlt_cpus_mask)) {
 		retval = 1;
 		__asm __volatile("sti; hlt" : : : "memory");
 	}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index e1fe137e4d31..c85f1387030e 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	kernel_pmap->pm_root = NULL;
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 
@@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 }
@@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 }
@@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void)
 }
 
 struct pde_action {
-	cpumask_t store;	/* processor that updates the PDE */
-	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	cpuset_t store;		/* processor that updates the PDE */
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg)
 	pd_entry_t *pde;
 	pmap_t pmap;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
+
 		/*
 		 * Elsewhere, this operation requires allpmaps_lock for
 		 * synchronization.  Here, it does not because it is being
@@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg)
 			pde = pmap_pde(pmap, act->va);
 			pde_store(pde, act->newpde);
 		}
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pde_store(act->pde, act->newpde);
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+	sched_pin();
+	if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pmap_update_pde_invalidate(act->va, act->newpde);
+	} else
+		sched_unpin();
 }
 
 /*
@@ -1058,21 +1075,23 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
-	cpumask_t active, cpumask;
+	cpuset_t active, cpumask, other_cpus;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
+	other_cpus = PCPU_GET(other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
-	if ((active & PCPU_GET(other_cpus)) != 0) {
+	if (CPU_OVERLAP(&active, &other_cpus)) {
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
-		smp_rendezvous_cpus(cpumask | active,
+		CPU_OR(&cpumask, &active);
+		smp_rendezvous_cpus(cpumask,
 		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
 		    pmap_update_pde_kernel : pmap_update_pde_user,
 		    pmap_update_pde_teardown, &act);
@@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 			pmap_kenter_pde(va, newpde);
 		else
 			pde_store(pde, newpde);
-		if ((active & cpumask) != 0)
+		if (CPU_OVERLAP(&active, &cpumask))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
@@ -1095,7 +1114,7 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
@@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
@@ -1113,7 +1132,7 @@ PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
@@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap)
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap)
 #endif
 	}
 
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -4985,11 +5004,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 232e1a1f308e..a084e09526aa 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -573,11 +573,13 @@ kvtop(void *addr)
 static void
 cpu_reset_proxy()
 {
+	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
-	stop_cpus((1<<cpu_reset_proxyid));
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -596,25 +598,29 @@ cpu_reset()
 #endif
 
 #ifdef SMP
-	cpumask_t map;
+	cpuset_t map;
 	u_int cnt;
 
 	if (smp_active) {
-		map = PCPU_GET(other_cpus) & ~stopped_cpus;
-		if (map != 0) {
+		sched_pin();
+		map = PCPU_GET(other_cpus);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
+			sched_unpin();
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
 			/* XXX: restart_cpus(1 << 0); */
-			atomic_store_rel_int(&started_cpus, (1 << 0));
+			CPU_SETOF(0, &started_cpus);
+			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -626,7 +632,8 @@ cpu_reset()
 
 			while (1);
 			/* NOTREACHED */
-		}
+		} else
+			sched_unpin();
 
 		DELAY(1000000);
 	}
@@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	struct sf_head *hash_list;
 	struct sf_buf *sf;
 #ifdef SMP
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 #endif
 	int error;
 
@@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	 */
 #ifdef SMP
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
-		sf->cpumask = 0;
+		CPU_ZERO(&sf->cpumask);
 shootdown:
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((sf->cpumask & cpumask) == 0) {
-		sf->cpumask |= cpumask;
+	if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) {
+		CPU_OR(&sf->cpumask, &cpumask);
 		invlpg(sf->kva);
 	}
 	if ((flags & SFB_CPUPRIVATE) == 0) {
-		other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
-		if (other_cpus != 0) {
-			sf->cpumask |= other_cpus;
+		other_cpus = PCPU_GET(other_cpus);
+		CPU_NAND(&other_cpus, &sf->cpumask);
+		if (!CPU_EMPTY(&other_cpus)) {
+			CPU_OR(&sf->cpumask, &other_cpus);
 			smp_masked_invlpg(other_cpus, sf->kva);
 		}
 	}
-	sched_unpin();	
+	sched_unpin();
 #else
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
 		pmap_invalidate_page(kernel_pmap, sf->kva);
diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h
index 7a969fedf160..3194fd691f67 100644
--- a/sys/i386/include/_types.h
+++ b/sys/i386/include/_types.h
@@ -69,7 +69,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	unsigned long	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int32_t	__critical_t;
 typedef	long double	__double_t;
 typedef	long double	__float_t;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index eeada2e400f4..3012a000ae5c 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -155,6 +155,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -433,7 +434,7 @@ struct pmap {
 	struct mtx		pm_mtx;
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
 #ifdef PAE
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 7bc1095c9590..415dcbb86e63 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -29,6 +29,7 @@
 #ifndef _MACHINE_SF_BUF_H_
 #define _MACHINE_SF_BUF_H_
 
+#include <sys/_cpuset.h>
 #include <sys/queue.h>
 
 struct vm_page;
@@ -40,7 +41,7 @@ struct sf_buf {
 	vm_offset_t	kva;		/* va of mapping */
 	int		ref_count;	/* usage of this mapping */
 #ifdef SMP
-	cpumask_t	cpumask;	/* cpus on which mapping is valid */
+	cpuset_t	cpumask;	/* cpus on which mapping is valid */
 #endif
 };
 
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index b512e00b4228..cdec9c87bace 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -64,17 +64,17 @@ void 	ipi_bitmap_handler(struct trapframe frame);
 #endif
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
-void	ipi_selected(cpumask_t cpus, u_int ipi);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
 int	mp_grab_cpu_hlt(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
-void	smp_masked_invltlb(cpumask_t mask);
+void	smp_masked_invltlb(cpuset_t mask);
 
 #ifdef XEN
 void ipi_to_irq_init(void);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 670d110af49c..1146ac2351e6 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -116,7 +117,7 @@ volatile int smp_tlb_wait;
 typedef void call_data_func_t(uintptr_t , uintptr_t);
 
 static u_int logical_cpus;
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -149,7 +150,7 @@ static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 
 extern void Xhypervisor_callback(void);
 extern void failsafe_callback(void);
@@ -238,7 +239,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -292,7 +293,8 @@ cpu_mp_start(void)
 	start_all_aps();
 
 	/* Setup the initial logical CPUs info. */
-	logical_cpus = logical_cpus_mask = 0;
+	logical_cpus = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (cpu_feature & CPUID_HTT)
 		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 
@@ -512,6 +514,7 @@ xen_smp_intr_init_cpus(void *unused)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	vm_offset_t addr;
 	int	gsel_tss;
 	
@@ -591,18 +594,21 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 #if 0
 	if (bootverbose)
 		lapic_dump("AP");
@@ -713,6 +719,7 @@ assign_cpu_ids(void)
 int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 	int x,apic_id, cpu;
 	struct pcpu *pc;
 	
@@ -766,12 +773,14 @@ start_all_aps(void)
 				panic("bye-bye");
 		}
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 	
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
 	
@@ -1000,29 +1009,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 	struct _call_data data;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		critical_enter();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		critical_exit();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1034,10 +1034,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	call_data->arg1 = addr1;
 	call_data->arg2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	call_data = NULL;
@@ -1080,7 +1090,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1089,7 +1099,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1098,7 +1108,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1110,7 +1120,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1120,11 +1130,11 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
@@ -1143,7 +1153,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1155,23 +1165,27 @@ ipi_cpu(int cpu, u_int ipi)
 void
 ipi_all_but_self(u_int ipi)
 {
+	cpuset_t other_cpus;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
+	sched_pin();
+	other_cpus = PCPU_GET(other_cpus);
+	sched_unpin();
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	ipi_selected(PCPU_GET(other_cpus), ipi);
+	ipi_selected(other_cpus, ipi);
 }
 
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1179,11 +1193,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1195,20 +1211,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	int cpu = PCPU_GET(cpuid);
-	int cpumask = PCPU_GET(cpumask);
+	cpuset_t cpumask;
+	int cpu;
+
+	sched_pin();
+	cpumask = PCPU_GET(cpumask);
+	cpu = PCPU_GET(cpuid);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index cf36f5edddb3..74ace613d133 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 #ifdef PAE
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
@@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 	
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
 	    pmap, sva, eva);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 	PT_UPDATES_FLUSH();
 }
@@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 		CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
 		    pmap, sva, eva);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 	PT_UPDATES_FLUSH();
@@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap)
 
 	CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
 	
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap)
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap)
 	}
 	xen_flush_queue();
 	vm_page_unlock_queues();
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -4030,11 +4032,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c
index 5804f8cd69b7..015378514687 100644
--- a/sys/ia64/ia64/mp_machdep.c
+++ b/sys/ia64/ia64/mp_machdep.c
@@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf)
 static u_int
 ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf)
 {
-	cpumask_t mybit;
+	cpuset_t mybit;
 
 	PCPU_INC(md.stats.pcs_nstops);
 	mybit = PCPU_GET(cpumask);
 
 	savectx(PCPU_PTR(md.pcb));
 
-	atomic_set_int(&stopped_cpus, mybit);
-	while ((started_cpus & mybit) == 0)
+	CPU_OR_ATOMIC(&stopped_cpus, &mybit);
+	while (!CPU_OVERLAP(&started_cpus, &mybit))
 		cpu_spinwait();
-	atomic_clear_int(&started_cpus, mybit);
-	atomic_clear_int(&stopped_cpus, mybit);
+	CPU_NAND_ATOMIC(&started_cpus, &mybit);
+	CPU_NAND_ATOMIC(&stopped_cpus, &mybit);
 	return (0);
 }
 
@@ -458,12 +458,12 @@ cpu_mp_unleash(void *dummy)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
 {
 	struct pcpu *pc;
 
 	SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
-		if (cpus & pc->pc_cpumask)
+		if (CPU_OVERLAP(&cpus, &pc->pc_cpumask))
 			ipi_send(pc, ipi);
 	}
 }
diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h
index 8fc1be2f3873..0c2f5cc38d0b 100644
--- a/sys/ia64/include/_types.h
+++ b/sys/ia64/include/_types.h
@@ -59,7 +59,6 @@ typedef	unsigned long		__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int64_t	__critical_t;
 typedef	double		__double_t;
 typedef	float		__float_t;
diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h
index 26557a712705..d2aff76c3ad5 100644
--- a/sys/ia64/include/smp.h
+++ b/sys/ia64/include/smp.h
@@ -14,6 +14,8 @@
 
 #ifndef LOCORE
 
+#include <sys/_cpuset.h>
+
 struct pcpu;
 
 struct ia64_ap_state {
@@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup;
 
 void	ipi_all_but_self(int ipi);
 void	ipi_cpu(int cpu, u_int ipi);
-void	ipi_selected(cpumask_t cpus, int ipi);
+void	ipi_selected(cpuset_t cpus, int ipi);
 void	ipi_send(struct pcpu *, int ipi);
 
 #endif /* !LOCORE */
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index bf9eac79ac8e..9ed19d4d60d9 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -616,6 +616,49 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask)
 	return (error);
 }
 
+/*
+ * Calculate the ffs() of the cpuset.
+ */
+int
+cpusetobj_ffs(const cpuset_t *set)
+{
+	size_t i;
+	int cbit;
+
+	cbit = 0;
+	for (i = 0; i < _NCPUWORDS; i++) {
+		if (set->__bits[i] != 0) {
+			cbit = ffsl(set->__bits[i]);
+			cbit += i * _NCPUBITS;
+			break;
+		}
+	}
+	return (cbit);
+}
+
+/*
+ * Return a string representing a valid layout for a cpuset_t object.
+ * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
+ */
+char *
+cpusetobj_strprint(char *buf, const cpuset_t *set)
+{
+	char *tbuf;
+	size_t i, bytesp, bufsiz;
+
+	tbuf = buf;
+	bytesp = 0;
+	bufsiz = CPUSETBUFSIZ;
+
+	for (i = 0; i < (_NCPUWORDS - 1); i++) {
+		bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]);
+		bufsiz -= bytesp;
+		tbuf += bytesp;
+	}
+	snprintf(tbuf, bufsiz, "%lx", set->__bits[_NCPUWORDS - 1]);
+	return (buf);
+}
+
 /*
  * Apply an anonymous mask to a single thread.
  */
@@ -754,11 +797,10 @@ cpuset_init(void *arg)
 {
 	cpuset_t mask;
 
-	CPU_ZERO(&mask);
 #ifdef SMP
-	mask.__bits[0] = all_cpus;
+	mask = all_cpus;
 #else
-	mask.__bits[0] = 1;
+	CPU_SETOF(0, &mask);
 #endif
 	if (cpuset_modify(cpuset_zero, &mask))
 		panic("Can't set initial cpuset mask.\n");
diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c
index 7532378c7a2d..8d9c7c04a711 100644
--- a/sys/kern/kern_pmc.c
+++ b/sys/kern/kern_pmc.c
@@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
 int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
 
 /* Bitmask of CPUs requiring servicing at hardclock time */
-volatile cpumask_t pmc_cpumask;
+volatile cpuset_t pmc_cpumask;
 
 /*
  * A global count of SS mode PMCs.  When non-zero, this means that
@@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu)
 {
 #ifdef	SMP
 	return (pmc_cpu_is_present(cpu) &&
-	    (hlt_cpus_mask & (1 << cpu)) == 0);
+	    !CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (1);
 #endif
@@ -139,7 +139,7 @@ int
 pmc_cpu_is_primary(int cpu)
 {
 #ifdef	SMP
-	return ((logical_cpus_mask & (1 << cpu)) == 0);
+	return (!CPU_ISSET(cpu, &logical_cpus_mask));
 #else
 	return (1);
 #endif
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index 7f2b4e7367be..3214e1b55e2f 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 	pc = pcpu_find(curcpu);
 
 	/* Check if we just need to do a proper critical_exit. */
-	if (!(pc->pc_cpumask & rm->rm_writecpus)) {
+	if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) {
 		critical_exit();
 		return (1);
 	}
@@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 
 	critical_enter();
 	pc = pcpu_find(curcpu);
-	rm->rm_writecpus &= ~pc->pc_cpumask;
+	CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask);
 	rm_tracker_add(pc, tracker);
 	sched_pin();
 	critical_exit();
@@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 	 * Fast path to combine two common conditions into a single
 	 * conditional jump.
 	 */
-	if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+	if (0 == (td->td_owepreempt |
+	    CPU_OVERLAP(&rm->rm_writecpus,  &pc->pc_cpumask)))
 		return (1);
 
 	/* We do not have a read token and need to acquire one. */
@@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm)
 {
 	struct rm_priotracker *prio;
 	struct turnstile *ts;
-	cpumask_t readcpus;
+	cpuset_t readcpus;
 
 	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
 		sx_xlock(&rm->rm_lock_sx);
 	else
 		mtx_lock(&rm->rm_lock_mtx);
 
-	if (rm->rm_writecpus != all_cpus) {
+	if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) {
 		/* Get all read tokens back */
-
-		readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus);
+		readcpus = all_cpus;
+		CPU_NAND(&readcpus, &rm->rm_writecpus);
 		rm->rm_writecpus = all_cpus;
 
 		/*
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index fef9e256f6bb..2fad27ce505c 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
 
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 	if (td->td_flags & TDF_IDLETD) {
 		TD_SET_CAN_RUN(td);
 #ifdef SMP
-		idle_cpus_mask &= ~PCPU_GET(cpumask);
+		/* Spinlock held here, assume no migration. */
+		CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	} else {
 		if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 
 #ifdef SMP
 	if (td->td_flags & TDF_IDLETD)
-		idle_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	sched_lock.mtx_lock = (uintptr_t)td;
 	td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
-	cpumask_t dontuse, id, map, map2, me;
+	cpuset_t dontuse, id, map, map2, me;
+	int iscpuset;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
 	/*
 	 * Check the idle mask we received against what we calculated
 	 * before in the old version.
+	 *
+	 * Also note that sched_lock is held now, thus no migration is
+	 * expected.
 	 */
 	me = PCPU_GET(cpumask);
 
 	/* Don't bother if we should be doing it ourself. */
-	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+	if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+	    (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
 		return (0);
 
-	dontuse = me | stopped_cpus | hlt_cpus_mask;
-	map2 = 0;
+	dontuse = me;
+	CPU_OR(&dontuse, &stopped_cpus);
+	CPU_OR(&dontuse, &hlt_cpus_mask);
+	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((id & dontuse) == 0 &&
+			if (!CPU_OVERLAP(&id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
-				map2 |= id;
+				CPU_OR(&map2, &id);
 			}
 		}
 	}
 
 	if (forward_wakeup_use_mask) {
-		map = 0;
-		map = idle_cpus_mask & ~dontuse;
+		map = idle_cpus_mask;
+		CPU_NAND(&map, &dontuse);
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
-			if (map != map2) {
+			if (CPU_CMP(&map, &map2)) {
 				printf("map != map2, loop method preferred\n");
 				map = map2;
 			}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
-		map &= (1 << cpunum);
+		iscpuset = CPU_ISSET(cpunum, &map);
+		if (iscpuset == 0)
+			CPU_ZERO(&map);
+		else
+			CPU_SETOF(cpunum, &map);
 	}
-	if (map) {
+	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
 		SLIST_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((map & id) == 0)
+			if (!CPU_OVERLAP(&map, &id))
 				continue;
 			if (cpu_idle_wakeup(pc->pc_cpuid))
-				map &= ~id;
+				CPU_NAND(&map, &id);
 		}
-		if (map)
+		if (!CPU_EMPTY(&map))
 			ipi_selected(map, IPI_AST);
 		return (1);
 	}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
 	int cpri;
 
 	pcpu = pcpu_find(cpuid);
-	if (idle_cpus_mask & pcpu->pc_cpumask) {
+	if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
 		forward_wakeups_delivered++;
 		if (!cpu_idle_wakeup(cpuid))
 			ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
 sched_add(struct thread *td, int flags)
 #ifdef SMP
 {
+	cpuset_t idle, me, tidlemsk;
 	struct td_sched *ts;
 	int forwarded = 0;
 	int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
 	        kick_other_cpu(td->td_priority, cpu);
 	} else {
 		if (!single_cpu) {
-			cpumask_t me = PCPU_GET(cpumask);
-			cpumask_t idle = idle_cpus_mask & me;
 
-			if (!idle && ((flags & SRQ_INTR) == 0) &&
-			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+			/*
+			 * Thread spinlock is held here, assume no
+			 * migration is possible.
+			 */
+			me = PCPU_GET(cpumask);
+			idle = idle_cpus_mask;
+			tidlemsk = idle;
+			CPU_AND(&idle, &me);
+			CPU_OR(&me, &hlt_cpus_mask);
+			CPU_NAND(&tidlemsk, &me);
+
+			if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+			    !CPU_EMPTY(&tidlemsk))
 				forwarded = forward_wakeup(cpu);
 		}
 
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index ac18e778ae28..05267f3d6adb 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -564,7 +564,7 @@ struct cpu_search {
 
 #define	CPUSET_FOREACH(cpu, mask)				\
 	for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++)		\
-		if ((mask) & 1 << (cpu))
+		if (CPU_ISSET(cpu, &mask))
 
 static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low,
     struct cpu_search *high, const int match);
@@ -2650,15 +2650,16 @@ static int
 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
     int indent)
 {
+	char cpusetbuf[CPUSETBUFSIZ];
 	int i, first;
 
 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
 	    "", 1 + indent / 2, cg->cg_level);
-	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "",
-	    cg->cg_count, cg->cg_mask);
+	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
+	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
 	first = TRUE;
 	for (i = 0; i < MAXCPU; i++) {
-		if ((cg->cg_mask & (1 << i)) != 0) {
+		if (CPU_ISSET(i, &cg->cg_mask)) {
 			if (!first)
 				sbuf_printf(sb, ", ");
 			else
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 342c5ca1879d..bb78c00a7591 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -413,7 +413,8 @@ kdb_thr_ctx(struct thread *thr)
 
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	SLIST_FOREACH(pc, &cpuhead, pc_allcpu)  {
-		if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask))
+		if (pc->pc_curthread == thr &&
+		    CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask))
 			return (KDB_STOPPEDPCB(pc));
 	}
 #endif
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index de5cafccf7f3..9201bb7e8cfc 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -87,7 +87,7 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
 	    ("pcpu_init: invalid cpuid %d", cpuid));
 	pcpu->pc_cpuid = cpuid;
-	pcpu->pc_cpumask = 1 << cpuid;
+	CPU_SETOF(cpuid, &pcpu->pc_cpumask);
 	cpuid_to_pcpu[cpuid] = pcpu;
 	SLIST_INSERT_HEAD(&cpuhead, pcpu, pc_allcpu);
 	cpu_pcpu_init(pcpu, cpuid, size);
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index aba6f0e1ea7e..ae061f36c3b5 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$");
 #include "opt_sched.h"
 
 #ifdef SMP
-volatile cpumask_t stopped_cpus;
-volatile cpumask_t started_cpus;
-cpumask_t hlt_cpus_mask;
-cpumask_t logical_cpus_mask;
+volatile cpuset_t stopped_cpus;
+volatile cpuset_t started_cpus;
+cpuset_t hlt_cpus_mask;
+cpuset_t logical_cpus_mask;
 
 void (*cpustop_restartfunc)(void);
 #endif
 /* This is used in modules that need to work in both SMP and UP. */
-cpumask_t all_cpus;
+cpuset_t all_cpus;
 
 int mp_ncpus;
 /* export this for libkvm consumers. */
@@ -199,8 +199,11 @@ forward_signal(struct thread *td)
  *
  */
 static int
-generic_stop_cpus(cpumask_t map, u_int type)
+generic_stop_cpus(cpuset_t map, u_int type)
 {
+#ifdef KTR
+	char cpusetbuf[CPUSETBUFSIZ];
+#endif
 	static volatile u_int stopping_cpu = NOCPU;
 	int i;
 
@@ -215,7 +218,8 @@ generic_stop_cpus(cpumask_t map, u_int type)
 	if (!smp_started)
 		return (0);
 
-	CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
+	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
+	    cpusetobj_strprint(cpusetbuf, &map), type);
 
 	if (stopping_cpu != PCPU_GET(cpuid))
 		while (atomic_cmpset_int(&stopping_cpu, NOCPU,
@@ -227,7 +231,7 @@ generic_stop_cpus(cpumask_t map, u_int type)
 	ipi_selected(map, type);
 
 	i = 0;
-	while ((stopped_cpus & map) != map) {
+	while (!CPU_SUBSET(&stopped_cpus, &map)) {
 		/* spin */
 		cpu_spinwait();
 		i++;
@@ -244,14 +248,14 @@ generic_stop_cpus(cpumask_t map, u_int type)
 }
 
 int
-stop_cpus(cpumask_t map)
+stop_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP));
 }
 
 int
-stop_cpus_hard(cpumask_t map)
+stop_cpus_hard(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP_HARD));
@@ -259,7 +263,7 @@ stop_cpus_hard(cpumask_t map)
 
 #if defined(__amd64__)
 int
-suspend_cpus(cpumask_t map)
+suspend_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_SUSPEND));
@@ -280,19 +284,22 @@ suspend_cpus(cpumask_t map)
  *   1: ok
  */
 int
-restart_cpus(cpumask_t map)
+restart_cpus(cpuset_t map)
 {
+#ifdef KTR
+	char cpusetbuf[CPUSETBUFSIZ];
+#endif
 
 	if (!smp_started)
 		return 0;
 
-	CTR1(KTR_SMP, "restart_cpus(%x)", map);
+	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
 
 	/* signal other cpus to restart */
-	atomic_store_rel_int(&started_cpus, map);
+	CPU_COPY_STORE_REL(&map, &started_cpus);
 
 	/* wait for each to clear its bit */
-	while ((stopped_cpus & map) != 0)
+	while (CPU_OVERLAP(&stopped_cpus, &map))
 		cpu_spinwait();
 
 	return 1;
@@ -348,13 +355,13 @@ smp_rendezvous_action(void)
 }
 
 void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
 	void (* setup_func)(void *), 
 	void (* action_func)(void *),
 	void (* teardown_func)(void *),
 	void *arg)
 {
-	int i, ncpus = 0;
+	int curcpumap, i, ncpus = 0;
 
 	if (!smp_started) {
 		if (setup_func != NULL)
@@ -367,11 +374,11 @@ smp_rendezvous_cpus(cpumask_t map,
 	}
 
 	CPU_FOREACH(i) {
-		if (((1 << i) & map) != 0)
+		if (CPU_ISSET(i, &map))
 			ncpus++;
 	}
 	if (ncpus == 0)
-		panic("ncpus is 0 with map=0x%x", map);
+		panic("ncpus is 0 with non-zero map");
 
 	/* obtain rendezvous lock */
 	mtx_lock_spin(&smp_ipi_mtx);
@@ -387,10 +394,12 @@ smp_rendezvous_cpus(cpumask_t map,
 	atomic_store_rel_int(&smp_rv_waiters[0], 0);
 
 	/* signal other processors, which will enter the IPI with interrupts off */
-	ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS);
+	curcpumap = CPU_ISSET(curcpu, &map);
+	CPU_CLR(curcpu, &map);
+	ipi_selected(map, IPI_RENDEZVOUS);
 
 	/* Check if the current CPU is in the map */
-	if ((map & (1 << curcpu)) != 0)
+	if (curcpumap != 0)
 		smp_rendezvous_action();
 
 	if (teardown_func == smp_no_rendevous_barrier)
@@ -415,6 +424,7 @@ static struct cpu_group group[MAXCPU];
 struct cpu_group *
 smp_topo(void)
 {
+	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
 	struct cpu_group *top;
 
 	/*
@@ -461,9 +471,10 @@ smp_topo(void)
 	if (top->cg_count != mp_ncpus)
 		panic("Built bad topology at %p.  CPU count %d != %d",
 		    top, top->cg_count, mp_ncpus);
-	if (top->cg_mask != all_cpus)
-		panic("Built bad topology at %p.  CPU mask 0x%X != 0x%X",
-		    top, top->cg_mask, all_cpus);
+	if (CPU_CMP(&top->cg_mask, &all_cpus))
+		panic("Built bad topology at %p.  CPU mask (%s) != (%s)",
+		    top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
+		    cpusetobj_strprint(cpusetbuf2, &all_cpus));
 	return (top);
 }
 
@@ -488,11 +499,13 @@ static int
 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
     int count, int flags, int start)
 {
-	cpumask_t mask;
+	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
+	cpuset_t mask;
 	int i;
 
-	for (mask = 0, i = 0; i < count; i++, start++)
-		mask |= (1 << start);
+	CPU_ZERO(&mask);
+	for (i = 0; i < count; i++, start++)
+		CPU_SET(start, &mask);
 	child->cg_parent = parent;
 	child->cg_child = NULL;
 	child->cg_children = 0;
@@ -502,10 +515,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
 	child->cg_mask = mask;
 	parent->cg_children++;
 	for (; parent != NULL; parent = parent->cg_parent) {
-		if ((parent->cg_mask & child->cg_mask) != 0)
-			panic("Duplicate children in %p.  mask 0x%X child 0x%X",
-			    parent, parent->cg_mask, child->cg_mask);
-		parent->cg_mask |= child->cg_mask;
+		if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
+			panic("Duplicate children in %p.  mask (%s) child (%s)",
+			    parent,
+			    cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
+			    cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
+		CPU_OR(&parent->cg_mask, &child->cg_mask);
 		parent->cg_count += child->cg_count;
 	}
 
@@ -565,20 +580,20 @@ struct cpu_group *
 smp_topo_find(struct cpu_group *top, int cpu)
 {
 	struct cpu_group *cg;
-	cpumask_t mask;
+	cpuset_t mask;
 	int children;
 	int i;
 
-	mask = (1 << cpu);
+	CPU_SETOF(cpu, &mask);
 	cg = top;
 	for (;;) {
-		if ((cg->cg_mask & mask) == 0)
+		if (!CPU_OVERLAP(&cg->cg_mask, &mask))
 			return (NULL);
 		if (cg->cg_children == 0)
 			return (cg);
 		children = cg->cg_children;
 		for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
-			if ((cg->cg_mask & mask) != 0)
+			if (CPU_OVERLAP(&cg->cg_mask, &mask))
 				break;
 	}
 	return (NULL);
@@ -586,7 +601,7 @@ smp_topo_find(struct cpu_group *top, int cpu)
 #else /* !SMP */
 
 void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
 	void (*setup_func)(void *), 
 	void (*action_func)(void *),
 	void (*teardown_func)(void *),
diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h
index f6f9404307c8..61b42d242a43 100644
--- a/sys/ofed/include/linux/list.h
+++ b/sys/ofed/include/linux/list.h
@@ -38,6 +38,7 @@
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h
new file mode 100644
index 000000000000..62a0e9d425d0
--- /dev/null
+++ b/sys/sys/_cpuset.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2008,	Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CPUSET_H_
+#define	_SYS__CPUSET_H_
+
+#ifdef _KERNEL
+#define	CPU_SETSIZE	MAXCPU
+#endif
+
+#define	CPU_MAXSIZE	(4 * MAXCPU)
+
+#ifndef	CPU_SETSIZE
+#define	CPU_SETSIZE	CPU_MAXSIZE
+#endif
+
+#define	_NCPUBITS	(sizeof(long) * NBBY)	/* bits per mask */
+#define	_NCPUWORDS	howmany(CPU_SETSIZE, _NCPUBITS)
+
+typedef	struct _cpuset {
+	long	__bits[howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpuset_t;
+
+#endif /* !_SYS__CPUSET_H_ */
diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h
index 75a159c1082e..15d6c4953e85 100644
--- a/sys/sys/_rmlock.h
+++ b/sys/sys/_rmlock.h
@@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker);
 
 struct rmlock {
 	struct lock_object lock_object; 
-	volatile cpumask_t rm_writecpus;
+	volatile cpuset_t rm_writecpus;
 	LIST_HEAD(,rm_priotracker) rm_activeReaders;
 	union {
 		struct mtx _rm_lock_mtx;
diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h
index 32639913973c..e06df54ab018 100644
--- a/sys/sys/cpuset.h
+++ b/sys/sys/cpuset.h
@@ -32,22 +32,9 @@
 #ifndef _SYS_CPUSET_H_
 #define	_SYS_CPUSET_H_
 
-#ifdef _KERNEL
-#define	CPU_SETSIZE	MAXCPU
-#endif
+#include <sys/_cpuset.h>
 
-#define	CPU_MAXSIZE	(4 * MAXCPU)
-
-#ifndef	CPU_SETSIZE
-#define	CPU_SETSIZE	CPU_MAXSIZE
-#endif
-
-#define	_NCPUBITS	(sizeof(long) * NBBY)	/* bits per mask */
-#define	_NCPUWORDS	howmany(CPU_SETSIZE, _NCPUBITS)
-
-typedef	struct _cpuset {
-	long	__bits[howmany(CPU_SETSIZE, _NCPUBITS)];
-} cpuset_t;
+#define	CPUSETBUFSIZ	((2 + sizeof(long) * 2) * _NCPUWORDS)
 
 #define	__cpuset_mask(n)	((long)1 << ((n) % _NCPUBITS))
 #define	CPU_CLR(n, p)	((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n))
@@ -66,6 +53,11 @@ typedef	struct _cpuset {
 		(p)->__bits[__i] = -1;			\
 } while (0)
 
+#define	CPU_SETOF(n, p) do {					\
+	CPU_ZERO(p);						\
+	((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n));	\
+} while (0)
+
 /* Is p empty. */
 #define	CPU_EMPTY(p) __extension__ ({			\
 	__size_t __i;					\
@@ -75,6 +67,15 @@ typedef	struct _cpuset {
 	__i == _NCPUWORDS;				\
 })
 
+/* Is p full set. */
+#define	CPU_ISFULLSET(p) __extension__ ({		\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		if ((p)->__bits[__i] != (long)-1)	\
+			break;				\
+	__i == _NCPUWORDS;				\
+})
+
 /* Is c a subset of p. */
 #define	CPU_SUBSET(p, c) __extension__ ({		\
 	__size_t __i;					\
@@ -124,6 +125,35 @@ typedef	struct _cpuset {
 		(d)->__bits[__i] &= ~(s)->__bits[__i];	\
 } while (0)
 
+#ifdef _KERNEL
+#define	CPU_CLR_ATOMIC(n, p)						\
+	atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define	CPU_SET_ATOMIC(n, p)						\
+	atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define	CPU_OR_ATOMIC(d, s) do {			\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		atomic_set_long(&(d)->__bits[__i],	\
+		    (s)->__bits[__i]);			\
+} while (0)
+
+#define	CPU_NAND_ATOMIC(d, s) do {			\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		atomic_clear_long(&(d)->__bits[__i],	\
+		    (s)->__bits[__i]);			\
+} while (0)
+
+#define	CPU_COPY_STORE_REL(f, t) do {				\
+	__size_t __i;						\
+	for (__i = 0; __i < _NCPUWORDS; __i++)			\
+		atomic_store_rel_long(&(t)->__bits[__i],	\
+		    (f)->__bits[__i]);				\
+} while (0)
+#endif /* !_KERNEL */
+
 /*
  * Valid cpulevel_t values.
  */
@@ -184,6 +214,8 @@ void	cpuset_rel(struct cpuset *);
 int	cpuset_setthread(lwpid_t id, cpuset_t *);
 int	cpuset_create_root(struct prison *, struct cpuset **);
 int	cpuset_setproc_update_set(struct proc *, struct cpuset *);
+int	cpusetobj_ffs(const cpuset_t *);
+char	*cpusetobj_strprint(char *, const cpuset_t *);
 
 #else
 __BEGIN_DECLS
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
index ad1cf33e05b2..1b7f24f39117 100644
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -37,6 +37,7 @@
 #error "no assembler-serviceable parts inside"
 #endif
 
+#include <sys/_cpuset.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/resource.h>
@@ -162,8 +163,8 @@ struct pcpu {
 	uint64_t	pc_switchtime;		/* cpu_ticks() at last csw */
 	int		pc_switchticks;		/* `ticks' at last csw */
 	u_int		pc_cpuid;		/* This cpu number */
-	cpumask_t	pc_cpumask;		/* This cpu mask */
-	cpumask_t	pc_other_cpus;		/* Mask of all other cpus */
+	cpuset_t	pc_cpumask;		/* This cpu mask */
+	cpuset_t	pc_other_cpus;		/* Mask of all other cpus */
 	SLIST_ENTRY(pcpu) pc_allcpu;
 	struct lock_list_entry *pc_spinlocks;
 #ifdef KTR
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 3e8c1ef3fb68..796c4cadab77 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
 extern struct sx pmc_sx;
 
 /* Per-cpu flags indicating availability of sampling data */
-extern volatile cpumask_t pmc_cpumask;
+extern volatile cpuset_t pmc_cpumask;
 
 /* Count of system-wide sampling PMCs in existence */
 extern volatile int pmc_ss_count;
@@ -122,7 +122,7 @@ do {						\
 #define	PMC_SYSTEM_SAMPLING_ACTIVE()		(pmc_ss_count > 0)
 
 /* Check if a CPU has recorded samples. */
-#define	PMC_CPU_HAS_SAMPLES(C)	(__predict_false(pmc_cpumask & (1 << (C))))
+#define	PMC_CPU_HAS_SAMPLES(C)	(__predict_false(CPU_ISSET(C, &pmc_cpumask)))
 
 /*
  * Helper functions.
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index 544cb9575f91..410adfa827e3 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -16,6 +16,8 @@
 
 #ifndef LOCORE
 
+#include <sys/cpuset.h>
+
 /*
  * Topology of a NUMA or HTT system.
  *
@@ -32,7 +34,7 @@
 struct cpu_group {
 	struct cpu_group *cg_parent;	/* Our parent group. */
 	struct cpu_group *cg_child;	/* Optional children groups. */
-	cpumask_t	cg_mask;	/* Mask of cpus in this group. */
+	cpuset_t	cg_mask;	/* Mask of cpus in this group. */
 	int8_t		cg_count;	/* Count of cpus in this group. */
 	int8_t		cg_children;	/* Number of children groups. */
 	int8_t		cg_level;	/* Shared cache level. */
@@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
 extern void (*cpustop_restartfunc)(void);
 extern int smp_active;
 extern int smp_cpus;
-extern volatile cpumask_t started_cpus;
-extern volatile cpumask_t stopped_cpus;
-extern cpumask_t hlt_cpus_mask;
-extern cpumask_t logical_cpus_mask;
+extern volatile cpuset_t started_cpus;
+extern volatile cpuset_t stopped_cpus;
+extern cpuset_t hlt_cpus_mask;
+extern cpuset_t logical_cpus_mask;
 #endif /* SMP */
 
 extern u_int mp_maxid;
@@ -82,14 +84,14 @@ extern int mp_maxcpus;
 extern int mp_ncpus;
 extern volatile int smp_started;
 
-extern cpumask_t all_cpus;
+extern cpuset_t all_cpus;
 
 /*
  * Macro allowing us to determine whether a CPU is absent at any given
  * time, thus permitting us to configure sparse maps of cpuid-dependent
  * (per-CPU) structures.
  */
-#define	CPU_ABSENT(x_cpu)	((all_cpus & (1 << (x_cpu))) == 0)
+#define	CPU_ABSENT(x_cpu)	(!CPU_ISSET(x_cpu, &all_cpus))
 
 /*
  * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
@@ -158,11 +160,11 @@ void	cpu_mp_setmaxid(void);
 void	cpu_mp_start(void);
 
 void	forward_signal(struct thread *);
-int	restart_cpus(cpumask_t);
-int	stop_cpus(cpumask_t);
-int	stop_cpus_hard(cpumask_t);
+int	restart_cpus(cpuset_t);
+int	stop_cpus(cpuset_t);
+int	stop_cpus_hard(cpuset_t);
 #if defined(__amd64__)
-int	suspend_cpus(cpumask_t);
+int	suspend_cpus(cpuset_t);
 #endif
 void	smp_rendezvous_action(void);
 extern	struct mtx smp_ipi_mtx;
@@ -173,7 +175,7 @@ void	smp_rendezvous(void (*)(void *),
 		       void (*)(void *),
 		       void (*)(void *),
 		       void *arg);
-void	smp_rendezvous_cpus(cpumask_t,
+void	smp_rendezvous_cpus(cpuset_t,
 		       void (*)(void *), 
 		       void (*)(void *),
 		       void (*)(void *),
diff --git a/sys/sys/types.h b/sys/sys/types.h
index c747d166e81d..5cc005d22e19 100644
--- a/sys/sys/types.h
+++ b/sys/sys/types.h
@@ -143,7 +143,6 @@ typedef	__clockid_t	clockid_t;
 #define	_CLOCKID_T_DECLARED
 #endif
 
-typedef	__cpumask_t	cpumask_t;
 typedef	__critical_t	critical_t;	/* Critical section value */
 typedef	__int64_t	daddr_t;	/* disk address */
 
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index 804c411b951c..e81d2fe8af4d 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -807,7 +807,7 @@ lapic_handle_timer(struct trapframe *frame)
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
-	if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
+	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c
index cce1e0ef14ac..67ed6cd46b4f 100644
--- a/usr.sbin/pmccontrol/pmccontrol.c
+++ b/usr.sbin/pmccontrol/pmccontrol.c
@@ -30,6 +30,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/types.h>
 #include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/sysctl.h>
 
 #include <assert.h>
@@ -133,26 +134,30 @@ pmcc_init_debug(void)
 static int
 pmcc_do_enable_disable(struct pmcc_op_list *op_list)
 {
+	long cpusetsize;
 	int c, error, i, j, ncpu, npmc, t;
-	cpumask_t haltedcpus, cpumask;
+	cpuset_t haltedcpus, cpumask;
 	struct pmcc_op *np;
 	unsigned char *map;
 	unsigned char op;
 	int cpu, pmc;
-	size_t dummy;
 
 	if ((ncpu = pmc_ncpu()) < 0)
 		err(EX_OSERR, "Unable to determine the number of cpus");
 
 	/* Determine the set of active CPUs. */
-	cpumask = (1 << ncpu) - 1;
-	dummy = sizeof(int);
-	haltedcpus = (cpumask_t) 0;
-	if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus,
-	    &dummy, NULL, 0) < 0)
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
 		err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
 		    "halted");
-	cpumask &= ~haltedcpus;
+	}
+	CPU_ZERO(&haltedcpus);
+	if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus,
+	    (size_t *)&cpusetsize, NULL, 0) < 0)
+		err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
+		    "halted");
+	CPU_FILL(&cpumask);
+	CPU_NAND(&cpumask, &haltedcpus);
 
 	/* Determine the maximum number of PMCs in any CPU. */
 	npmc = 0;
@@ -200,7 +205,7 @@ pmcc_do_enable_disable(struct pmcc_op_list *op_list)
 
 		if (cpu == PMCC_CPU_ALL)
 			for (i = 0; i < ncpu; i++) {
-				if ((1 << i) & cpumask)
+				if (CPU_ISSET(i, &cpumask))
 					SET_PMCS(i, pmc, op);
 			}
 		else