diff --git a/UPDATING b/UPDATING
index 1ff9e8ffb76d..c8c3582e81e2 100644
--- a/UPDATING
+++ b/UPDATING
@@ -22,6 +22,33 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW:
 	machines to maximize performance.  (To disable malloc debugging, run
 	ln -s aj /etc/malloc.conf.)
 
+20110608:
+	The following sysctls and tunables are retired on x86 platforms:
+		machdep.hlt_cpus
+		machdep.hlt_logical_cpus
+	The following sysctl is retired:
+		machdep.hyperthreading_allowed
+	The sysctls were supposed to provide a way to dynamically offline and
+	online selected CPUs on x86 platforms, but the implementation has not
+	been reliable especially with SCHED_ULE scheduler.
+	machdep.hyperthreading_allowed tunable is still available to ignore
+	hyperthreading CPUs at OS level.
+	Individual CPUs can be disabled using hint.lapic.X.disabled tunable,
+	where X is an APIC ID of a CPU.  Be advised, though, that disabling
+	CPUs in non-uniform fashion will result in non-uniform topology and
+	may lead to sub-optimal system performance with SCHED_ULE, which is
+	a default scheduler.
+
+20110607:
+	cpumask_t type is retired and cpuset_t is used in order to describe
+	a mask of CPUs.
+
+20110531:
+	Changes to ifconfig(8) for dynamic address family detection mandate
+	that you are running a kernel of 20110525 or later.  Make sure to
+	follow the update procedure to boot a new kernel before installing
+	world.
+
 20110513:
 	Support for sun4v architecture is officially dropped
 
diff --git a/bin/sh/eval.c b/bin/sh/eval.c
index 404de3312182..585f91ead32a 100644
--- a/bin/sh/eval.c
+++ b/bin/sh/eval.c
@@ -409,6 +409,7 @@ evalsubshell(union node *n, int flags)
 	struct job *jp;
 	int backgnd = (n->type == NBACKGND);
 
+	oexitstatus = exitstatus;
 	expredir(n->nredir.redirect);
 	if ((!backgnd && flags & EV_EXIT && !have_traps()) ||
 			forkshell(jp = makejob(n, 1), n, backgnd) == 0) {
@@ -436,6 +437,7 @@ evalredir(union node *n, int flags)
 	struct jmploc *savehandler;
 	volatile int in_redirect = 1;
 
+	oexitstatus = exitstatus;
 	expredir(n->nredir.redirect);
 	savehandler = handler;
 	if (setjmp(jmploc.loc)) {
@@ -478,7 +480,6 @@ expredir(union node *n)
 	for (redir = n ; redir ; redir = redir->nfile.next) {
 		struct arglist fn;
 		fn.lastp = &fn.list;
-		oexitstatus = exitstatus;
 		switch (redir->type) {
 		case NFROM:
 		case NTO:
diff --git a/contrib/groff/tmac/doc-common b/contrib/groff/tmac/doc-common
index 26dcc562f4d7..20d0cab8c875 100644
--- a/contrib/groff/tmac/doc-common
+++ b/contrib/groff/tmac/doc-common
@@ -543,6 +543,7 @@
 .ds doc-operating-system-FreeBSD-7.3     7.3
 .ds doc-operating-system-FreeBSD-8.0     8.0
 .ds doc-operating-system-FreeBSD-8.1     8.1
+.ds doc-operating-system-FreeBSD-8.2     8.2
 .
 .ds doc-operating-system-Darwin-8.0.0  8.0.0
 .ds doc-operating-system-Darwin-8.1.0  8.1.0
@@ -563,6 +564,17 @@
 .ds doc-operating-system-Darwin-9.4.0  9.4.0
 .ds doc-operating-system-Darwin-9.5.0  9.5.0
 .ds doc-operating-system-Darwin-9.6.0  9.6.0
+.ds doc-operating-system-Darwin-9.7.0  9.7.0
+.ds doc-operating-system-Darwin-9.8.0  9.8.0
+.ds doc-operating-system-Darwin-10.6.0 10.6.0
+.ds doc-operating-system-Darwin-10.1.0 10.1.0
+.ds doc-operating-system-Darwin-10.2.0 10.2.0
+.ds doc-operating-system-Darwin-10.3.0 10.3.0
+.ds doc-operating-system-Darwin-10.4.0 10.4.0
+.ds doc-operating-system-Darwin-10.5.0 10.5.0
+.ds doc-operating-system-Darwin-10.6.0 10.6.0
+.ds doc-operating-system-Darwin-10.7.0 10.7.0
+.ds doc-operating-system-Darwin-11.0.0 11.0.0
 .
 .ds doc-operating-system-DragonFly-1.0    1.0
 .ds doc-operating-system-DragonFly-1.1    1.1
diff --git a/contrib/groff/tmac/doc-syms b/contrib/groff/tmac/doc-syms
index d2a070d6d76e..0e862adb5e90 100644
--- a/contrib/groff/tmac/doc-syms
+++ b/contrib/groff/tmac/doc-syms
@@ -617,6 +617,8 @@
 .\" POSIX Part 1: System API
 .ds doc-str-St--p1003.1        \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1
 .as doc-str-St--p1003.1        " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
+.ds doc-str-St--p1003.1b       \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1b
+.as doc-str-St--p1003.1b       " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
 .ds doc-str-St--p1003.1-88     \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1-1988
 .as doc-str-St--p1003.1-88     " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq])
 .ds doc-str-St--p1003.1-90     \*[doc-Tn-font-size]ISO/IEC\*[doc-str-St] 9945-1:1990
@@ -754,6 +756,7 @@
 .
 .ds doc-str-Lb-libarm      ARM Architecture Library (libarm, \-larm)
 .ds doc-str-Lb-libarm32    ARM32 Architecture Library (libarm32, \-larm32)
+.ds doc-str-Lb-libbsm      Basic Security Module Library (libbsm, \-lbsm)
 .ds doc-str-Lb-libc        Standard C\~Library (libc, \-lc)
 .ds doc-str-Lb-libcdk      Curses Development Kit Library (libcdk, \-lcdk)
 .ds doc-str-Lb-libcompat   Compatibility Library (libcompat, \-lcompat)
@@ -779,6 +782,7 @@
 .ds doc-str-Lb-libpthread  \*[Px] \*[doc-str-Lb]Threads Library (libpthread, \-lpthread)
 .ds doc-str-Lb-libresolv   DNS Resolver Library (libresolv, \-lresolv)
 .ds doc-str-Lb-librt       \*[Px] \*[doc-str-Lb]Real-time Library (librt, \-lrt)
+.ds doc-str-Lb-libSystem   System Library (libSystem, \-lSystem)
 .ds doc-str-Lb-libtermcap  Termcap Access Library (libtermcap, \-ltermcap)
 .ds doc-str-Lb-libusbhid   USB Human Interface Devices Library (libusbhid, \-lusbhid)
 .ds doc-str-Lb-libutil     System Utilities Library (libutil, \-lutil)
diff --git a/contrib/groff/tmac/doc.tmac b/contrib/groff/tmac/doc.tmac
index 7bdbf325927f..c1c32dc21919 100644
--- a/contrib/groff/tmac/doc.tmac
+++ b/contrib/groff/tmac/doc.tmac
@@ -1197,8 +1197,14 @@
 .  if !\n[doc-arg-limit] \
 .    ds doc-macro-name Aq
 .
-.  ds doc-quote-left <
-.  ds doc-quote-right >
+.  ie "\*[doc-macro-name]"An" \{\
+.    ds doc-quote-left <
+.    ds doc-quote-right >
+.  \}
+.  el \{\
+.    ds doc-quote-left \[la]
+.    ds doc-quote-right \[ra]
+.  \}
 .
 .  doc-enclose-string \$@
 ..
@@ -1527,7 +1533,10 @@
 .  if !\n[doc-arg-limit] \
 .    ds doc-macro-name Ao
 .
-.  ds doc-quote-left \[la]
+.  ie "\*[doc-macro-name]"An" \
+.    ds doc-quote-left <
+.  el \
+.    ds doc-quote-left \[la]
 .
 .  doc-enclose-open \$@
 ..
@@ -1546,7 +1555,10 @@
 .  if !\n[doc-arg-limit] \
 .    ds doc-macro-name Ac
 .
-.  ds doc-quote-right \[ra]
+.  ie "\*[doc-macro-name]"An" \
+.    ds doc-quote-right >
+.  el \
+.    ds doc-quote-right \[ra]
 .
 .  doc-enclose-close \$@
 ..
diff --git a/etc/network.subr b/etc/network.subr
index 64fb0fe65bee..ce71b786f307 100644
--- a/etc/network.subr
+++ b/etc/network.subr
@@ -100,25 +100,19 @@ ifconfig_up()
 
 	# inet6 specific
 	if afexists inet6; then
-		if ipv6if $1; then
-			if checkyesno ipv6_gateway_enable; then
-				_ipv6_opts="-accept_rtadv"
-			fi
-		else
-			if checkyesno ipv6_activate_all_interfaces; then
-				_ipv6_opts="-ifdisabled"
-			else
-				_ipv6_opts="ifdisabled"
-			fi
-
-			# backward compatibility: $ipv6_enable
-			case $ipv6_enable in
-			[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
-				_ipv6_opts="${_ipv6_opts} accept_rtadv"
-				;;
-			esac
+		if checkyesno ipv6_activate_all_interfaces; then
+			_ipv6_opts="-ifdisabled"
+		elif [ "$1" != "lo0" ]; then
+			_ipv6_opts="ifdisabled"
 		fi
 
+		# backward compatibility: $ipv6_enable
+		case $ipv6_enable in
+		[Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1)
+			_ipv6_opts="${_ipv6_opts} accept_rtadv"
+			;;
+		esac
+
 		if [ -n "${_ipv6_opts}" ]; then
 			ifconfig $1 inet6 ${_ipv6_opts}
 		fi
diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c
index 5036c9c9aaa9..461f408a73d9 100644
--- a/gnu/usr.bin/gdb/kgdb/kthr.c
+++ b/gnu/usr.bin/gdb/kgdb/kthr.c
@@ -28,6 +28,7 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/cpuset.h>
 #include <sys/proc.h>
 #include <sys/types.h>
 #include <sys/signal.h>
@@ -37,6 +38,7 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include <defs.h>
 #include <frame-unwind.h>
@@ -48,7 +50,7 @@ static CORE_ADDR dumppcb;
 static int dumptid;
 
 static CORE_ADDR stoppcbs;
-static __cpumask_t stopped_cpus;
+static cpuset_t stopped_cpus;
 
 static struct kthr *first;
 struct kthr *curkthr;
@@ -76,6 +78,7 @@ kgdb_thr_init(void)
 {
 	struct proc p;
 	struct thread td;
+	long cpusetsize;
 	struct kthr *kt;
 	CORE_ADDR addr;
 	uintptr_t paddr;
@@ -102,10 +105,11 @@ kgdb_thr_init(void)
 		dumptid = -1;
 
 	addr = kgdb_lookup("stopped_cpus");
-	if (addr != 0)
-		kvm_read(kvm, addr, &stopped_cpus, sizeof(stopped_cpus));
-	else
-		stopped_cpus = 0;
+	CPU_ZERO(&stopped_cpus);
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize != -1 && (u_long)cpusetsize <= sizeof(cpuset_t) &&
+	    addr != 0)
+		kvm_read(kvm, addr, &stopped_cpus, cpusetsize);
 
 	stoppcbs = kgdb_lookup("stoppcbs");
 
@@ -126,8 +130,8 @@ kgdb_thr_init(void)
 			kt->kaddr = addr;
 			if (td.td_tid == dumptid)
 				kt->pcb = dumppcb;
-			else if (td.td_state == TDS_RUNNING && ((1 << td.td_oncpu) & stopped_cpus)
-				&& stoppcbs != 0)
+			else if (td.td_state == TDS_RUNNING && stoppcbs != 0 &&
+			    CPU_ISSET(td.td_oncpu, &stopped_cpus))
 				kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu;
 			else
 				kt->pcb = (uintptr_t)td.td_pcb;
diff --git a/gnu/usr.bin/groff/tmac/mdoc.local b/gnu/usr.bin/groff/tmac/mdoc.local
index d46f5db54f3b..befc87d3f6a6 100644
--- a/gnu/usr.bin/groff/tmac/mdoc.local
+++ b/gnu/usr.bin/groff/tmac/mdoc.local
@@ -34,7 +34,6 @@
 .\" FreeBSD .Lb values
 .ds doc-str-Lb-libarchive  Streaming Archive Library (libarchive, \-larchive)
 .ds doc-str-Lb-libbluetooth Bluetooth User Library (libbluetooth, \-lbluetooth)
-.ds doc-str-Lb-libbsm      Basic Security Module User Library (libbsm, \-lbsm)
 .ds doc-str-Lb-libc_r      Reentrant C\~Library (libc_r, \-lc_r)
 .ds doc-str-Lb-libcalendar Calendar Arithmetic Library (libcalendar, \-lcalendar)
 .ds doc-str-Lb-libcam      Common Access Method User Library (libcam, \-lcam)
@@ -75,7 +74,7 @@
 .
 .\" FreeBSD releases not found in doc-common
 .ds doc-operating-system-FreeBSD-7.4    7.4
-.ds doc-operating-system-FreeBSD-8.2    8.2
+.ds doc-operating-system-FreeBSD-8.3    8.3
 .ds doc-operating-system-FreeBSD-9.0    9.0
 .
 .\" Definitions not (yet) in doc-syms
diff --git a/lib/libiconv/Makefile b/lib/libiconv/Makefile
index 078771e9b05b..71c288573256 100644
--- a/lib/libiconv/Makefile
+++ b/lib/libiconv/Makefile
@@ -19,7 +19,6 @@ SRCS=	citrus_bcs.c citrus_bcs_strtol.c citrus_bcs_strtoul.c \
 	citrus_module.c citrus_none.c citrus_pivot_factory.c \
 	citrus_prop.c citrus_stdenc.c iconv.c
 
-WARNS?=	6
 CFLAGS+= --param max-inline-insns-single=128 -I ${.CURDIR}/../../include -I${.CURDIR}/../libc/include
 
 .include <bsd.lib.mk>
diff --git a/lib/libkvm/kvm_pcpu.c b/lib/libkvm/kvm_pcpu.c
index fd09fc88fb1c..bc73baff19cc 100644
--- a/lib/libkvm/kvm_pcpu.c
+++ b/lib/libkvm/kvm_pcpu.c
@@ -39,11 +39,13 @@
 __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
+#include <sys/cpuset.h>
 #include <sys/pcpu.h>
 #include <sys/sysctl.h>
 #include <kvm.h>
 #include <limits.h>
 #include <stdlib.h>
+#include <unistd.h>
 
 #include "kvm_private.h"
 
@@ -118,6 +120,9 @@ _kvm_pcpu_clear(void)
 void *
 kvm_getpcpu(kvm_t *kd, int cpu)
 {
+	long kcpusetsize;
+	ssize_t nbytes;
+	uintptr_t readptr;
 	char *buf;
 
 	if (kd == NULL) {
@@ -125,6 +130,10 @@ kvm_getpcpu(kvm_t *kd, int cpu)
 		return (NULL);
 	}
 
+	kcpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (kcpusetsize == -1 || (u_long)kcpusetsize > sizeof(cpuset_t))
+		return ((void *)-1);
+
 	if (maxcpu == 0)
 		if (_kvm_pcpu_init(kd) < 0)
 			return ((void *)-1);
@@ -137,8 +146,26 @@ kvm_getpcpu(kvm_t *kd, int cpu)
 		_kvm_err(kd, kd->program, "out of memory");
 		return ((void *)-1);
 	}
-	if (kvm_read(kd, (uintptr_t)pcpu_data[cpu], buf, sizeof(struct pcpu)) !=
-	    sizeof(struct pcpu)) {
+	nbytes = sizeof(struct pcpu) - 2 * kcpusetsize;
+	readptr = (uintptr_t)pcpu_data[cpu];
+	if (kvm_read(kd, readptr, buf, nbytes) != nbytes) {
+		_kvm_err(kd, kd->program, "unable to read per-CPU data");
+		free(buf);
+		return ((void *)-1);
+	}
+
+	/* Fetch the valid cpuset_t objects. */
+	CPU_ZERO((cpuset_t *)(buf + nbytes));
+	CPU_ZERO((cpuset_t *)(buf + nbytes + sizeof(cpuset_t)));
+	readptr += nbytes;
+	if (kvm_read(kd, readptr, buf + nbytes, kcpusetsize) != kcpusetsize) {
+		_kvm_err(kd, kd->program, "unable to read per-CPU data");
+		free(buf);
+		return ((void *)-1);
+	}
+	readptr += kcpusetsize;
+	if (kvm_read(kd, readptr, buf + nbytes + sizeof(cpuset_t),
+	    kcpusetsize) != kcpusetsize) {
 		_kvm_err(kd, kd->program, "unable to read per-CPU data");
 		free(buf);
 		return ((void *)-1);
diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c
index 4aae61a9bb05..485a4f279906 100644
--- a/lib/libmemstat/memstat_uma.c
+++ b/lib/libmemstat/memstat_uma.c
@@ -27,6 +27,7 @@
  */
 
 #include <sys/param.h>
+#include <sys/cpuset.h>
 #include <sys/sysctl.h>
 
 #define	LIBMEMSTAT	/* Cause vm_page.h not to include opt_vmpage.h */
@@ -44,6 +45,7 @@
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
+#include <unistd.h>
 
 #include "memstat.h"
 #include "memstat_internal.h"
@@ -313,7 +315,8 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
 	struct uma_keg *kzp, kz;
 	int hint_dontsearch, i, mp_maxid, ret;
 	char name[MEMTYPE_MAXNAME];
-	__cpumask_t all_cpus;
+	cpuset_t all_cpus;
+	long cpusetsize;
 	kvm_t *kvm;
 
 	kvm = (kvm_t *)kvm_handle;
@@ -337,7 +340,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
 		list->mtl_error = ret;
 		return (-1);
 	}
-	ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0);
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
+		list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL;
+		return (-1);
+	}
+	CPU_ZERO(&all_cpus);
+	ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0);
 	if (ret != 0) {
 		list->mtl_error = ret;
 		return (-1);
@@ -407,7 +416,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle)
 			if (kz.uk_flags & UMA_ZFLAG_INTERNAL)
 				goto skip_percpu;
 			for (i = 0; i < mp_maxid + 1; i++) {
-				if ((all_cpus & (1 << i)) == 0)
+				if (!CPU_ISSET(i, &all_cpus))
 					continue;
 				ucp = &ucp_array[i];
 				mtp->mt_numallocs += ucp->uc_allocs;
diff --git a/libexec/comsat/comsat.c b/libexec/comsat/comsat.c
index d0ff7a43dbd6..2a0fd3c764a9 100644
--- a/libexec/comsat/comsat.c
+++ b/libexec/comsat/comsat.c
@@ -180,7 +180,7 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder)
 		dsyslog(LOG_DEBUG, "%s: wrong mode on %s", utp->ut_user, tty);
 		return;
 	}
-	dsyslog(LOG_DEBUG, "notify %s on %s\n", utp->ut_user, tty);
+	dsyslog(LOG_DEBUG, "notify %s on %s", utp->ut_user, tty);
 	switch (fork()) {
 	case -1:
 		syslog(LOG_NOTICE, "fork failed (%m)");
diff --git a/release/ia64/mkisoimages.sh b/release/ia64/mkisoimages.sh
index 33ba1927ad61..8709c1221773 100644
--- a/release/ia64/mkisoimages.sh
+++ b/release/ia64/mkisoimages.sh
@@ -48,28 +48,32 @@ EFIPART=efipart.sys
 if [ $bootable = yes ]; then
     EFISZ=65536
     MNT=/mnt
-    dd if=/dev/zero of=$BASE/$EFIPART count=$EFISZ
-    md=`mdconfig -a -t vnode -f $BASE/$EFIPART`
+    dd if=/dev/zero of=$EFIPART count=$EFISZ
+    md=`mdconfig -a -t vnode -f $EFIPART`
     newfs_msdos -F 12 -S 512 -h 4 -o 0 -s $EFISZ -u 16 $md
     mount -t msdosfs /dev/$md $MNT
     mkdir -p $MNT/efi/boot $MNT/boot $MNT/boot/kernel
     cp -R $BASE/boot/defaults $MNT/boot
     cp $BASE/boot/kernel/kernel $MNT/boot/kernel
-    cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel
+    if [ -s $BASE/boot/kernel/ispfw.ko ]; then
+	cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel
+    fi
     cp $BASE/boot/device.hints $MNT/boot
     cp $BASE/boot/loader.* $MNT/boot
-    cp $BASE/boot/mfsroot.gz $MNT/boot
+    if [ -s $BASE/boot/mfsroot.gz ]; then
+	cp $BASE/boot/mfsroot.gz $MNT/boot
+    fi
     cp $BASE/boot/support.4th $MNT/boot
     mv $MNT/boot/loader.efi $MNT/efi/boot/bootia64.efi
     umount $MNT
     mdconfig -d -u $md
-    BOOTOPTS="-b bootimage=i386;$EFIPART -o no-emul-boot"
+    BOOTOPTS="-o bootimage=i386;$EFIPART -o no-emul-boot"
 else
     BOOTOPTS=""
 fi
 
-echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $1/etc/fstab
+echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $BASE/etc/fstab
 makefs -t cd9660 $BOOTOPTS -o rockridge -o label=$LABEL $NAME $BASE $*
-rm -f $BASE/$EFIPART
+rm -f $EFIPART
 rm $1/etc/fstab
 exit 0
diff --git a/sbin/geom/class/part/geom_part.c b/sbin/geom/class/part/geom_part.c
index ae9f4b7b4516..55a055e1e21a 100644
--- a/sbin/geom/class/part/geom_part.c
+++ b/sbin/geom/class/part/geom_part.c
@@ -341,9 +341,10 @@ gpart_autofill_resize(struct gctl_req *req)
 			errc(EXIT_FAILURE, error, "Invalid alignment param");
 		if (alignment == 0)
 			errx(EXIT_FAILURE, "Invalid alignment param");
+	} else {
 		lba = pp->lg_stripesize / pp->lg_sectorsize;
 		if (lba > 0)
-			alignment = g_lcm(lba, alignment);
+			alignment = lba;
 	}
 	error = gctl_delete_param(req, "alignment");
 	if (error)
@@ -491,13 +492,9 @@ gpart_autofill(struct gctl_req *req)
 	if (has_size && has_start && !has_alignment)
 		goto done;
 
-	/*
-	 * If stripesize is not zero, then recalculate alignment value.
-	 * Use LCM from stripesize and user specified alignment.
-	 */
 	len = pp->lg_stripesize / pp->lg_sectorsize;
-	if (len > 0 )
-		alignment = g_lcm(len, alignment);
+	if (len > 0 && !has_alignment)
+		alignment = len;
 
 	/* Adjust parameters to stripeoffset */
 	offset = pp->lg_stripeoffset / pp->lg_sectorsize;
diff --git a/sbin/geom/class/part/gpart.8 b/sbin/geom/class/part/gpart.8
index 940620c9b5a3..4365d6b8bcfa 100644
--- a/sbin/geom/class/part/gpart.8
+++ b/sbin/geom/class/part/gpart.8
@@ -24,7 +24,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 30, 2011
+.Dd June 6, 2011
 .Dt GPART 8
 .Os
 .Sh NAME
@@ -530,16 +530,17 @@ about its use.
 .El
 .\"
 .Sh PARTITION TYPES
+Partition types are identified on disk by particular strings or magic
+values.
 The
 .Nm
-utility uses symbolic names for common partition types to avoid that the
-user needs to know what the partitioning scheme in question is and what
-the actual number or identification needs to be used for a particular
-type.
+utility uses symbolic names for common partition types to avoid the
+user needing to know these values or other details of the partitioning
+scheme in question.
 The
 .Nm
 utility also allows the user to specify scheme-specific partition types
-for partition types that do not have symbol names.
+for partition types that do not have symbolic names.
 The symbolic names currently understood are:
 .Bl -tag -width ".Cm freebsd-vinum"
 .It Cm bios-boot
@@ -740,30 +741,30 @@ action or reverted with the
 .Cm undo
 action.
 .Sh RECOVERING
-The GEOM class PART supports recovering of partition tables only for GPT.
+The GEOM PART class supports recovering of partition tables only for GPT.
 The GUID partition table has a primary and secondary (backup) copy of
-metadata for redundance.
-They are stored in the begining and in the end of device respectively.
-Therefore it is acceptable to have some corruptions in the metadata that
-are not fatal to work with GPT.
-When kernel detects corrupt metadata it marks this table as corrupt and
-reports about corruption.
-Any changes in corrupt table are prohibited except
+metadata for redundance, these are stored at the begining and the end
+of the device respectively.
+As a result of having two copies, it is acceptable to have some corruption
+within the metadata that is not fatal to the working of GPT.
+When the kernel detects corrupt metadata it marks this table as corrupt and
+reports the corruption.
+Any operations on corrupt tables are prohibited except for
 .Cm destroy
 and
 .Cm recover .
 .Pp
-In case when only first sector is corrupt kernel can not detect GPT even
-if partition table is not corrupt.
-You can write protective MBR with
+If the first sector of a provider is corrupt, the kernel can not detect GPT
+even if partition table itself is not corrupt.
+You can rewrite the protective MBR using the
 .Xr dd 1
-command to restore ability of GPT detection.
-The copy of protective MBR is usually located in the
+command, to restore the ability to detect the GPT.
+The copy of the protective MBR is usually located in the
 .Pa /boot/pmbr
 file.
 .Pp
-In case when some of metadata is corrupt you will get to know about this
-from kernel's messages like these:
+If one GPT header appears to be corrupt but the other copy remains intact,
+the kernel will log the following:
 .Bd -literal -offset indent
 GEOM: provider: the primary GPT table is corrupt or invalid.
 GEOM: provider: using the secondary instead -- recovery strongly advised.
@@ -777,32 +778,31 @@ GEOM: provider: using the primary only -- recovery suggested.
 .Pp
 Also
 .Nm
-commands like
+commands such as
 .Cm show , status
 and
 .Cm list
-will report about corrupt table.
+will report about corrupt tables.
 .Pp
-In case when the size of device has changed (e.g.\& volume expansion) the
-secondary GPT header will become located not in the last sector.
+If the size of the device has changed (e.g.\& volume expansion) the
+secondary GPT header will no longer be located in the last sector.
 This is not a metadata corruption, but it is dangerous because any
-corruption of the primary GPT will lead to lost of partition table.
-Kernel reports about this problem with message:
+corruption of the primary GPT will lead to loss of partition table.
+This problem is reported by the kernel with the message:
 .Bd -literal -offset indent
 GEOM: provider: the secondary GPT header is not in the last LBA.
 .Ed
 .Pp
-A corrupt table can be recovered with
+This situation can be recovered with the
 .Cm recover
 command.
-This command does reconstruction of corrupt metadata using
-known valid metadata.
-Also it can relocate secondary GPT to the end of device.
+This command reconstructs the corrupt metadata using known valid
+metadata and relocates the secondary GPT to the end of the device.
 .Pp
 .Em NOTE :
-The GEOM class PART can detect the same partition table on different GEOM
-providers and some of them will be marked as corrupt.
-Be careful when choosing a provider for recovering.
+The GEOM PART class can detect the same partition table visible through
+different GEOM providers, and some of them will be marked as corrupt.
+Be careful when choosing a provider for recovery.
 If you choose incorrectly you can destroy the metadata of another GEOM class,
 e.g.\& GEOM MIRROR or GEOM LABEL.
 .Sh SYSCTL VARIABLES
@@ -815,11 +815,11 @@ The default value is shown next to each variable.
 .Bl -tag -width indent
 .It Va kern.geom.part.check_integrity : No 1
 This variable controls the behaviour of metadata integrity checks.
-When integrity checks are enabled
+When integrity checks are enabled, the
 .Nm PART
-GEOM class verifies all generic partition parameters that it gets from the
+GEOM class verifies all generic partition parameters obtained from the
 disk metadata.
-If some inconsistency is detected, partition table will be
+If some inconsistency is detected, the partition table will be
 rejected with a diagnostic message:
 .Sy "GEOM_PART: Integrity check failed (provider, scheme)" .
 .El
diff --git a/sbin/geom/class/sched/Makefile b/sbin/geom/class/sched/Makefile
index a6ccd584e217..6f54d3f36f9f 100644
--- a/sbin/geom/class/sched/Makefile
+++ b/sbin/geom/class/sched/Makefile
@@ -5,6 +5,4 @@
 
 GEOM_CLASS=	sched
 
-WARNS?=	6
-
 .include <bsd.lib.mk>
diff --git a/sbin/ifconfig/af_inet6.c b/sbin/ifconfig/af_inet6.c
index 7fdca0dd25ab..585be0500923 100644
--- a/sbin/ifconfig/af_inet6.c
+++ b/sbin/ifconfig/af_inet6.c
@@ -499,8 +499,8 @@ static struct cmd inet6_cmds[] = {
 	DEF_CMD("-autoconf",	-IN6_IFF_AUTOCONF,	setip6flags),
 	DEF_CMD("accept_rtadv",	ND6_IFF_ACCEPT_RTADV,	setnd6flags),
 	DEF_CMD("-accept_rtadv",-ND6_IFF_ACCEPT_RTADV,	setnd6flags),
-	DEF_CMD("defroute_rtadv",ND6_IFF_DEFROUTE_RTADV,setnd6flags),
-	DEF_CMD("-defroute_rtadv",-ND6_IFF_DEFROUTE_RTADV,setnd6flags),
+	DEF_CMD("no_radr",	ND6_IFF_NO_RADR,	setnd6flags),
+	DEF_CMD("-no_radr",	-ND6_IFF_NO_RADR,	setnd6flags),
 	DEF_CMD("defaultif",	1,			setnd6defif),
 	DEF_CMD("-defaultif",	-1,			setnd6defif),
 	DEF_CMD("ifdisabled",	ND6_IFF_IFDISABLED,	setnd6flags),
diff --git a/sbin/ifconfig/af_nd6.c b/sbin/ifconfig/af_nd6.c
index 273e8ff985d8..eed00eaf4044 100644
--- a/sbin/ifconfig/af_nd6.c
+++ b/sbin/ifconfig/af_nd6.c
@@ -58,7 +58,7 @@ static const char rcsid[] =
 #define	MAX_SYSCTL_TRY	5
 #define	ND6BITS	"\020\001PERFORMNUD\002ACCEPT_RTADV\003PREFER_SOURCE" \
 		"\004IFDISABLED\005DONT_SET_IFROUTE\006AUTO_LINKLOCAL" \
-		"\007DEFROUTE_RTADV\020DEFAULTIF"
+		"\007NO_RADR\020DEFAULTIF"
 
 static int isnd6defif(int);
 void setnd6flags(const char *, int, int, const struct afswtch *);
@@ -159,7 +159,6 @@ nd6_status(int s)
 	}
 	isdefif = isnd6defif(s6);
 	close(s6);
-
 	if (nd.ndi.flags == 0 && !isdefif)
 		return;
 	printb("\tnd6 options",
diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c
index fb3f3fbfdfb3..109b62b7de43 100644
--- a/sbin/ipfw/main.c
+++ b/sbin/ipfw/main.c
@@ -356,6 +356,7 @@ ipfw_main(int oldac, char **oldav)
 	 */
 	co.do_nat = 0;
 	co.do_pipe = 0;
+	co.use_set = 0;
 	if (!strncmp(*av, "nat", strlen(*av)))
  		co.do_nat = 1;
  	else if (!strncmp(*av, "pipe", strlen(*av)))
@@ -444,7 +445,7 @@ static void
 ipfw_readfile(int ac, char *av[])
 {
 #define MAX_ARGS	32
-	char	buf[BUFSIZ];
+	char buf[4096];
 	char *progname = av[0];		/* original program name */
 	const char *cmd = NULL;		/* preprocessor name, if any */
 	const char *filename = av[ac-1]; /* file to read */
@@ -552,7 +553,7 @@ ipfw_readfile(int ac, char *av[])
 		}
 	}
 
-	while (fgets(buf, BUFSIZ, f)) {		/* read commands */
+	while (fgets(buf, sizeof(buf), f)) {		/* read commands */
 		char linename[20];
 		char *args[2];
 
diff --git a/sbin/mount/mount.8 b/sbin/mount/mount.8
index 2140b3724b78..fdfd75c031a2 100644
--- a/sbin/mount/mount.8
+++ b/sbin/mount/mount.8
@@ -28,7 +28,7 @@
 .\"     @(#)mount.8	8.8 (Berkeley) 6/16/94
 .\" $FreeBSD$
 .\"
-.Dd April 28, 2011
+.Dd June 6, 2011
 .Dt MOUNT 8
 .Os
 .Sh NAME
@@ -348,7 +348,6 @@ option) may be passed as a comma separated list; these options are
 distinguished by a leading
 .Dq \&-
 (dash).
-Options that take a value are specified using the syntax -option=value.
 For example, the
 .Nm
 command:
@@ -363,6 +362,16 @@ to execute the equivalent of:
 /sbin/mount_cd9660 -e /dev/cd0 /cdrom
 .Ed
 .Pp
+Options that take a value are specified using the -option=value syntax:
+.Bd -literal -offset indent
+mount -t msdosfs -o -u=fred,-g=wheel /dev/da0s1 /mnt
+.Ed
+.Pp
+is equivalent to
+.Bd -literal -offset indent
+/sbin/mount_msdosfs -u fred -g wheel /dev/da0s1 /mnt
+.Ed
+.Pp
 Additional options specific to file system types
 which are not internally known
 (see the description of the
diff --git a/sbin/mount/mount.c b/sbin/mount/mount.c
index acded1c24379..2229419bc2ce 100644
--- a/sbin/mount/mount.c
+++ b/sbin/mount/mount.c
@@ -243,7 +243,7 @@ main(int argc, char *argv[])
 	const char *mntfromname, **vfslist, *vfstype;
 	struct fstab *fs;
 	struct statfs *mntbuf;
-	int all, ch, i, init_flags, late, mntsize, rval, have_fstab, ro;
+	int all, ch, i, init_flags, late, failok, mntsize, rval, have_fstab, ro;
 	char *cp, *ep, *options;
 
 	all = init_flags = late = 0;
@@ -328,6 +328,10 @@ main(int argc, char *argv[])
 					continue;
 				if (hasopt(fs->fs_mntops, "late") && !late)
 					continue;
+				if (hasopt(fs->fs_mntops, "failok"))
+					failok = 1;
+				else
+					failok = 0;
 				if (!(init_flags & MNT_UPDATE) &&
 				    ismounted(fs, mntbuf, mntsize))
 					continue;
@@ -335,7 +339,7 @@ main(int argc, char *argv[])
 				    mntbuf->f_flags);
 				if (mountfs(fs->fs_vfstype, fs->fs_spec,
 				    fs->fs_file, init_flags, options,
-				    fs->fs_mntops))
+				    fs->fs_mntops) && !failok)
 					rval = 1;
 			}
 		} else if (fstab_style) {
@@ -717,6 +721,14 @@ mangle(char *options, struct cpa *a)
 				 * before mountd starts.
 				 */
 				continue;
+			} else if (strcmp(p, "failok") == 0) {
+				/*
+				 * "failok" is used to prevent certain file
+				 * systems from being causing the system to
+				 * drop into single user mode in the boot
+				 * cycle, and is not a real mount option.
+				 */
+				continue;
 			} else if (strncmp(p, "mountprog", 9) == 0) {
 				/*
 				 * "mountprog" is used to force the use of
diff --git a/sbin/rcorder/rcorder.8 b/sbin/rcorder/rcorder.8
index b17b0a337ca1..a47f01306c33 100644
--- a/sbin/rcorder/rcorder.8
+++ b/sbin/rcorder/rcorder.8
@@ -31,7 +31,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd June 9, 2008
+.Dd June 6, 2011
 .Dt RCORDER 8
 .Os
 .Sh NAME
@@ -89,6 +89,12 @@ and
 lines may appear, but all such lines must appear in a sequence without
 any intervening lines, as once a line that does not follow the format
 is reached, parsing stops.
+Note that for historical reasons,
+.Dq Li REQUIRES ,
+.Dq Li PROVIDES ,
+and
+.Dq Li KEYWORDS
+are also accepted in addition to the above.
 .Pp
 The options are as follows:
 .Bl -tag -width indent
diff --git a/share/man/man4/amdsbwd.4 b/share/man/man4/amdsbwd.4
index 370cfa8220c0..cfb2f79624d8 100644
--- a/share/man/man4/amdsbwd.4
+++ b/share/man/man4/amdsbwd.4
@@ -25,12 +25,12 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 30, 2009
+.Dd June 7, 2011
 .Dt AMDSBWD 4
 .Os
 .Sh NAME
 .Nm amdsbwd
-.Nd device driver for the AMD SB600/SB700/SB710/SB750 watchdog timer
+.Nd device driver for the AMD SB600/SB7xx/SB8xx watchdog timers
 .Sh SYNOPSIS
 To compile this driver into the kernel,
 place the following line in your
@@ -51,7 +51,7 @@ The
 driver provides
 .Xr watchdog 4
 support for the watchdog timers present on
-AMD SB600 and SB7xx south bridge chips.
+AMD SB600, SB7xx and SB8xx southbridges.
 .Sh SEE ALSO
 .Xr watchdog 4 ,
 .Xr watchdog 8 ,
@@ -61,12 +61,14 @@ AMD SB600 and SB7xx south bridge chips.
 The
 .Nm
 driver first appeared in
-.Fx 9.0 .
+.Fx 7.3
+and
+.Fx 8.1 .
 .Sh AUTHORS
 .An -nosplit
 The
 .Nm
 driver was written by
-.An Andiry Gapon Aq avg@FreeBSD.org .
+.An Andriy Gapon Aq avg@FreeBSD.org .
 This manual page was written by
-.An Andiry Gapon Aq avg@FreeBSD.org .
+.An Andriy Gapon Aq avg@FreeBSD.org .
diff --git a/share/man/man4/atkbd.4 b/share/man/man4/atkbd.4
index 0c486e2eefbc..73831c2bb538 100644
--- a/share/man/man4/atkbd.4
+++ b/share/man/man4/atkbd.4
@@ -26,7 +26,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd May 20, 2011
+.Dd January 29, 2008
 .Dt ATKBD 4
 .Os
 .Sh NAME
@@ -176,11 +176,6 @@ When this option is given, the
 .Nm
 driver will not test the keyboard port during the probe routine.
 Some machines hang during boot when this test is performed.
-.It bit 4 (PROBE_TYPEMATIC)
-When this option is given, the
-.Nm
-driver will try to probe the keyboard typematic rate on boot.
-Some machines hang during boot when this test is performed.
 .El
 .\".Sh FILES
 .Sh EXAMPLES
diff --git a/share/man/man5/fstab.5 b/share/man/man5/fstab.5
index 1c6f17e40c51..adbf48928ef1 100644
--- a/share/man/man5/fstab.5
+++ b/share/man/man5/fstab.5
@@ -32,7 +32,7 @@
 .\"     @(#)fstab.5	8.1 (Berkeley) 6/5/93
 .\" $FreeBSD$
 .\"
-.Dd November 23, 2008
+.Dd June 7, 2011
 .Dt FSTAB 5
 .Os
 .Sh NAME
@@ -70,7 +70,8 @@ remote file system to be mounted.
 The second field,
 .Pq Fa fs_file ,
 describes the mount point for the file system.
-For swap partitions, this field should be specified as ``none''.
+For swap partitions, this field should be specified as
+.Dq none .
 .Pp
 The third field,
 .Pq Fa fs_vfstype ,
@@ -125,7 +126,11 @@ sync,noatime,-m=644,-M=755,-u=foo,-g=bar
 in the option field of
 .Nm .
 .Pp
-If the options ``userquota'' and/or ``groupquota'' are specified,
+If the options
+.Dq userquota
+and/or
+.Dq groupquota
+are specified,
 the file system is automatically processed by the
 .Xr quotacheck 8
 command, and user and/or group disk quotas are enabled with
@@ -147,7 +152,18 @@ this location can be specified as:
 userquota=/var/quotas/tmp.user
 .Ed
 .Pp
-If the option ``noauto'' is specified, the file system will not be automatically
+If the option
+.Dq failok
+is specified,
+the system will ignore any error which happens during the mount of that filesystem,
+which would otherwise cause the system to drop into single user mode.
+This option is implemented by the
+.Xr mount 8
+command and will not be passed to the kernel.
+.Pp
+If the option
+.Dq noauto
+is specified, the file system will not be automatically
 mounted at system startup.
 Note that, for network file systems
 of third party types
@@ -170,13 +186,19 @@ field (it is not deleted from the
 field).
 If
 .Fa fs_type
-is ``rw'' or ``ro'' then the file system whose name is given in the
+is
+.Dq rw
+or
+.Dq ro
+then the file system whose name is given in the
 .Fa fs_file
 field is normally mounted read-write or read-only on the
 specified special file.
 If
 .Fa fs_type
-is ``sw'' then the special file is made available as a piece of swap
+is
+.Dq sw
+then the special file is made available as a piece of swap
 space by the
 .Xr swapon 8
 command at the end of the system reboot procedure.
@@ -187,7 +209,9 @@ and
 are unused.
 If
 .Fa fs_type
-is specified as ``xx'' the entry is ignored.
+is specified as
+.Dq xx
+the entry is ignored.
 This is useful to show disk partitions which are currently unused.
 .Pp
 The fifth field,
diff --git a/share/misc/committers-ports.dot b/share/misc/committers-ports.dot
index bfe9f4ed9fd4..d97abcc92978 100644
--- a/share/misc/committers-ports.dot
+++ b/share/misc/committers-ports.dot
@@ -97,6 +97,7 @@ itetcu [label="Ion-Mihai Tetcu\nitetcu@FreeBSD.org\n2006/06/07"]
 jacula [label="Giuseppe Pilichi\njacula@FreeBSD.org\n2010/04/05"]
 jadawin [label="Philippe Audeoud\njadawin@FreeBSD.org\n2008/03/02"]
 jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2007/09/12"]
+jlaffaye [label="Julien Laffaye\njlaffaye@FreeBSD.org\n2011/06/06"]
 jmelo [label="Jean Milanez Melo\njmelo@FreeBSD.org\n2006/03/31"]
 joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1994/08/22"]
 johans [label="Johan Selst\njohans@FreeBSD.org\n2006/04/01"]
@@ -204,6 +205,8 @@ arved -> stefan
 
 asami -> obrien
 
+bapt -> jlaffaye
+
 beat -> decke
 
 beech -> glarkin
@@ -401,6 +404,7 @@ tabthorpe -> dhn
 tabthorpe -> fluffy
 tabthorpe -> jacula
 tabthorpe -> jadawin
+tabthorpe -> jlaffaye
 tabthorpe -> pgj
 tabthorpe -> rene
 
diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c
index 57341c938f14..29e66c52a2f2 100644
--- a/sys/amd64/acpica/acpi_wakeup.c
+++ b/sys/amd64/acpica/acpi_wakeup.c
@@ -78,7 +78,7 @@ static void		acpi_stop_beep(void *);
 
 #ifdef SMP
 static int		acpi_wakeup_ap(struct acpi_softc *, int);
-static void		acpi_wakeup_cpus(struct acpi_softc *, cpumask_t);
+static void		acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *);
 #endif
 
 #define	WAKECODE_VADDR(sc)	((sc)->acpi_wakeaddr + (3 * PAGE_SIZE))
@@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu)
 #define	BIOS_WARM		(0x0a)
 
 static void
-acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
+acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus)
 {
 	uint32_t	mpbioswarmvec;
 	int		cpu;
@@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus)
 
 	/* Wake up each AP. */
 	for (cpu = 1; cpu < mp_ncpus; cpu++) {
-		if ((wakeup_cpus & (1 << cpu)) == 0)
+		if (!CPU_ISSET(cpu, wakeup_cpus))
 			continue;
 		if (acpi_wakeup_ap(sc, cpu) == 0) {
 			/* restore the warmstart vector */
@@ -214,7 +214,7 @@ int
 acpi_sleep_machdep(struct acpi_softc *sc, int state)
 {
 #ifdef SMP
-	cpumask_t	wakeup_cpus;
+	cpuset_t	wakeup_cpus;
 #endif
 	register_t	cr3, rf;
 	ACPI_STATUS	status;
@@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 
 	if (savectx(susppcbs[0])) {
 #ifdef SMP
-		if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) {
-			device_printf(sc->acpi_dev,
-			    "Failed to suspend APs: CPU mask = 0x%jx\n",
-			    (uintmax_t)(wakeup_cpus & ~stopped_cpus));
+		if (!CPU_EMPTY(&wakeup_cpus) &&
+		    suspend_cpus(wakeup_cpus) == 0) {
+			device_printf(sc->acpi_dev, "Failed to suspend APs\n");
 			goto out;
 		}
 #endif
@@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 		PCPU_SET(switchtime, 0);
 		PCPU_SET(switchticks, ticks);
 #ifdef SMP
-		if (wakeup_cpus != 0)
-			acpi_wakeup_cpus(sc, wakeup_cpus);
+		if (!CPU_EMPTY(&wakeup_cpus))
+			acpi_wakeup_cpus(sc, &wakeup_cpus);
 #endif
 		acpi_resync_clock(sc);
 		ret = 0;
@@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state)
 
 out:
 #ifdef SMP
-	if (wakeup_cpus != 0)
+	if (!CPU_EMPTY(&wakeup_cpus))
 		restart_cpus(wakeup_cpus);
 #endif
 
diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c
index 4edef81c01fa..3a8953153ff8 100644
--- a/sys/amd64/amd64/intr_machdep.c
+++ b/sys/amd64/amd64/intr_machdep.c
@@ -443,8 +443,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 /*
@@ -466,7 +465,7 @@ intr_next_cpu(void)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -497,7 +496,7 @@ intr_add_cpu(u_int cpu)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -510,6 +509,9 @@ intr_shuffle_irqs(void *arg __unused)
 	struct intsrc *isrc;
 	int i;
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index 94b4037c8e20..f90ad03a5917 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
 #include "opt_perfmon.h"
 #include "opt_sched.h"
 #include "opt_kdtrace.h"
@@ -116,6 +117,7 @@ __FBSDID("$FreeBSD$");
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
+#include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/proc.h>
@@ -734,9 +736,8 @@ cpu_idle(int busy)
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
-#ifdef SMP
-	if (mp_grab_cpu_hlt())
-		return;
+#ifdef MP_WATCHDOG
+	ap_watchdog(PCPU_GET(cpuid));
 #endif
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c
index 5c900344518b..53988e985726 100644
--- a/sys/amd64/amd64/mp_machdep.c
+++ b/sys/amd64/amd64/mp_machdep.c
@@ -29,13 +29,13 @@ __FBSDID("$FreeBSD$");
 
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpufunc.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
@@ -125,7 +124,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32);
  * Local data and functions.
  */
 
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -159,11 +158,8 @@ static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
-static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
-static cpumask_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
-static struct	sysctl_ctx_list logical_cpu_clist;
 static u_int	bootMP_size;
 
 static void
@@ -241,8 +237,11 @@ topo_probe_0x4(void)
 	 * logical processors that belong to the same core
 	 * as BSP thus deducing number of threads per core.
 	 */
-	cpuid_count(0x04, 0, p);
-	max_cores = ((p[0] >> 26) & 0x3f) + 1;
+	if (cpu_high >= 0x4) {
+		cpuid_count(0x04, 0, p);
+		max_cores = ((p[0] >> 26) & 0x3f) + 1;
+	} else
+		max_cores = 1;
 	core_id_bits = mask_width(max_logical/max_cores);
 	if (core_id_bits < 0)
 		return;
@@ -334,7 +333,7 @@ topo_probe(void)
 	if (cpu_topo_probed)
 		return;
 
-	logical_cpus_mask = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -478,7 +477,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -605,6 +604,7 @@ cpu_mp_announce(void)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	struct pcpu *pc;
 	struct nmi_pcpu *np;
 	u_int64_t msr, cr0;
@@ -736,19 +736,17 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
-	
-	/* Determine if we are a hyperthread. */
-	if (hyperthreading_cpus > 1 &&
-	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	if (bootverbose)
 		lapic_dump("AP");
@@ -835,7 +833,7 @@ assign_cpu_ids(void)
 
 		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
 			cpu_info[i].cpu_hyperthread = 1;
-#if defined(SCHED_ULE)
+
 			/*
 			 * Don't use HT CPU if it has been disabled by a
 			 * tunable.
@@ -844,7 +842,6 @@ assign_cpu_ids(void)
 				cpu_info[i].cpu_disabled = 1;
 				continue;
 			}
-#endif
 		}
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
@@ -854,6 +851,11 @@ assign_cpu_ids(void)
 		}
 	}
 
+	if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+		hyperthreading_cpus = 0;
+		cpu_logical = 1;
+	}
+
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
@@ -891,6 +893,7 @@ assign_cpu_ids(void)
 static int
 start_all_aps(void)
 {
+	cpuset_t tallcpus, tcpuset;
 	vm_offset_t va = boot_address + KERNBASE;
 	u_int64_t *pt4, *pt3, *pt2;
 	u_int32_t mpbioswarmvec;
@@ -955,11 +958,14 @@ start_all_aps(void)
 			panic("AP #%d (PHY# %d) failed!", cpu, apic_id);
 		}
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	tcpuset = PCPU_GET(cpumask);
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1087,6 +1093,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending)); 
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
 /*
  * Flush the TLB on all other CPU's
  */
@@ -1111,28 +1141,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (cpumask_t)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		sched_pin();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		sched_unpin();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_rflags() & PSL_I))
@@ -1141,39 +1162,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (cpumask_t)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__,
+			    cpu, vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
-	if (IPI_IS_BITMAPED(ipi)) {
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending)); 
-		if (old_pending)
-			return;
-	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
 void
 smp_cache_flush(void)
 {
@@ -1220,7 +1227,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1232,7 +1239,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1244,7 +1251,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1297,7 +1304,7 @@ ipi_bitmap_handler(struct trapframe frame)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1307,12 +1314,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
@@ -1330,7 +1337,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1343,8 +1350,10 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
+	sched_pin();
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
+		sched_unpin();
 		return;
 	}
 
@@ -1354,7 +1363,8 @@ ipi_all_but_self(u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+	sched_unpin();
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
@@ -1363,7 +1373,7 @@ ipi_all_but_self(u_int ipi)
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1371,11 +1381,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1387,23 +1399,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int cpu;
 
+	sched_pin();
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
@@ -1418,7 +1432,7 @@ cpustop_handler(void)
 void
 cpususpend_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	register_t cr3, rf;
 	u_int cpu;
 
@@ -1430,7 +1444,7 @@ cpususpend_handler(void)
 
 	if (savectx(susppcbs[cpu])) {
 		wbinvd();
-		atomic_set_int(&stopped_cpus, cpumask);
+		CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 	} else {
 		pmap_init_pat();
 		PCPU_SET(switchtime, 0);
@@ -1438,11 +1452,11 @@ cpususpend_handler(void)
 	}
 
 	/* Wait for resume */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 		ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Restore CR3 and enable interrupts */
 	load_cr3(cr3);
@@ -1467,158 +1481,6 @@ release_aps(void *dummy __unused)
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
-static int
-sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
-{
-	cpumask_t mask;
-	int error;
-
-	mask = hlt_cpus_mask;
-	error = sysctl_handle_int(oidp, &mask, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-	if (logical_cpus_mask != 0 &&
-	    (mask & logical_cpus_mask) == logical_cpus_mask)
-		hlt_logical_cpus = 1;
-	else
-		hlt_logical_cpus = 0;
-
-	if (! hyperthreading_allowed)
-		mask |= hyperthreading_cpus_mask;
-
-	if ((mask & all_cpus) == all_cpus)
-		mask &= ~(1<<0);
-	hlt_cpus_mask = mask;
-	return (error);
-}
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_hlt_cpus, "IU",
-    "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
-
-static int
-sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
-{
-	int disable, error;
-
-	disable = hlt_logical_cpus;
-	error = sysctl_handle_int(oidp, &disable, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-	if (disable)
-		hlt_cpus_mask |= logical_cpus_mask;
-	else
-		hlt_cpus_mask &= ~logical_cpus_mask;
-
-	if (! hyperthreading_allowed)
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
-
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
-
-	hlt_logical_cpus = disable;
-	return (error);
-}
-
-static int
-sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
-{
-	int allowed, error;
-
-	allowed = hyperthreading_allowed;
-	error = sysctl_handle_int(oidp, &allowed, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-#ifdef SCHED_ULE
-	/*
-	 * SCHED_ULE doesn't allow enabling/disabling HT cores at
-	 * run-time.
-	 */
-	if (allowed != hyperthreading_allowed)
-		return (ENOTSUP);
-	return (error);
-#endif
-
-	if (allowed)
-		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
-	else
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
-
-	if (logical_cpus_mask != 0 &&
-	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
-		hlt_logical_cpus = 1;
-	else
-		hlt_logical_cpus = 0;
-
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
-
-	hyperthreading_allowed = allowed;
-	return (error);
-}
-
-static void
-cpu_hlt_setup(void *dummy __unused)
-{
-
-	if (logical_cpus_mask != 0) {
-		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
-		    &hlt_logical_cpus);
-		sysctl_ctx_init(&logical_cpu_clist);
-		SYSCTL_ADD_PROC(&logical_cpu_clist,
-		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
-		    sysctl_hlt_logical_cpus, "IU", "");
-		SYSCTL_ADD_UINT(&logical_cpu_clist,
-		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
-		    &logical_cpus_mask, 0, "");
-
-		if (hlt_logical_cpus)
-			hlt_cpus_mask |= logical_cpus_mask;
-
-		/*
-		 * If necessary for security purposes, force
-		 * hyperthreading off, regardless of the value
-		 * of hlt_logical_cpus.
-		 */
-		if (hyperthreading_cpus_mask) {
-			SYSCTL_ADD_PROC(&logical_cpu_clist,
-			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
-			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
-			if (! hyperthreading_allowed)
-				hlt_cpus_mask |= hyperthreading_cpus_mask;
-		}
-	}
-}
-SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
-
-int
-mp_grab_cpu_hlt(void)
-{
-	cpumask_t mask;
-#ifdef MP_WATCHDOG
-	u_int cpuid;
-#endif
-	int retval;
-
-	mask = PCPU_GET(cpumask);
-#ifdef MP_WATCHDOG
-	cpuid = PCPU_GET(cpuid);
-	ap_watchdog(cpuid);
-#endif
-
-	retval = 0;
-	while (mask & hlt_cpus_mask) {
-		retval = 1;
-		__asm __volatile("sti; hlt" : : : "memory");
-	}
-	return (retval);
-}
-
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index c9ff9bcc14ae..025ca5f981c9 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr)
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	kernel_pmap->pm_root = NULL;
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 
 	/*
@@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 }
@@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 }
@@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -995,8 +999,8 @@ pmap_invalidate_cache(void)
 }
 
 struct pde_action {
-	cpumask_t store;	/* processor that updates the PDE */
-	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	cpuset_t store;		/* processor that updates the PDE */
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pde_store(act->pde, act->newpde);
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+	sched_pin();
+	if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pmap_update_pde_invalidate(act->va, act->newpde);
+	} else
+		sched_unpin();
 }
 
 /*
@@ -1032,26 +1044,28 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
-	cpumask_t active, cpumask;
+	cpuset_t active, cpumask, other_cpus;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
+	other_cpus = PCPU_GET(other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
-	if ((active & PCPU_GET(other_cpus)) != 0) {
+	if (CPU_OVERLAP(&active, &other_cpus)) { 
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
-		smp_rendezvous_cpus(cpumask | active,
+		CPU_OR(&cpumask, &active);
+		smp_rendezvous_cpus(cpumask,
 		    smp_no_rendevous_barrier, pmap_update_pde_action,
 		    pmap_update_pde_teardown, &act);
 	} else {
 		pde_store(pde, newpde);
-		if ((active & cpumask) != 0)
+		if (CPU_OVERLAP(&active, &cpumask))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
@@ -1065,7 +1079,7 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
@@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
@@ -1083,7 +1097,7 @@ PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 
 	pde_store(pde, newpde);
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
@@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap)
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys);
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap)
 	pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M;
 
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #ifdef SMP
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~PCPU_GET(cpumask);
-	pmap->pm_active |= PCPU_GET(cpumask);
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 	cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4);
 	td->td_pcb->pcb_cr3 = cr3;
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index 972484a80a08..13f5cd06b2b3 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mutex.h>
 #include <sys/pioctl.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sf_buf.h>
 #include <sys/smp.h>
 #include <sys/sysctl.h>
@@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/cpu.h>
 #include <machine/md_var.h>
 #include <machine/pcb.h>
+#include <machine/smp.h>
 #include <machine/specialreg.h>
 #include <machine/tss.h>
 
@@ -512,11 +514,13 @@ cpu_set_user_tls(struct thread *td, void *tls_base)
 static void
 cpu_reset_proxy()
 {
+	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
-	stop_cpus((1<<cpu_reset_proxyid));
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -527,24 +531,28 @@ void
 cpu_reset()
 {
 #ifdef SMP
-	cpumask_t map;
+	cpuset_t map;
 	u_int cnt;
 
 	if (smp_active) {
-		map = PCPU_GET(other_cpus) & ~stopped_cpus;
-		if (map != 0) {
+		sched_pin();
+		map = PCPU_GET(other_cpus);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
+			sched_unpin();
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
-			atomic_store_rel_int(&started_cpus, 1 << 0);
+			CPU_SETOF(0, &started_cpus);
+			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -556,7 +564,8 @@ cpu_reset()
 
 			while (1);
 			/* NOTREACHED */
-		}
+		} else
+			sched_unpin();
 
 		DELAY(1000000);
 	}
diff --git a/sys/amd64/include/_types.h b/sys/amd64/include/_types.h
index 89d2e861b418..13dc3ea28a7a 100644
--- a/sys/amd64/include/_types.h
+++ b/sys/amd64/include/_types.h
@@ -61,7 +61,6 @@ typedef	unsigned long		__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int64_t	__critical_t;
 typedef	double		__double_t;
 typedef	float		__float_t;
diff --git a/sys/amd64/include/pmap.h b/sys/amd64/include/pmap.h
index 7a628513af65..1b8108a3e3c6 100644
--- a/sys/amd64/include/pmap.h
+++ b/sys/amd64/include/pmap.h
@@ -152,6 +152,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -251,7 +252,7 @@ struct pmap {
 	struct mtx		pm_mtx;
 	pml4_entry_t		*pm_pml4;	/* KVA of level 4 page table */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	/* spare u_int here due to padding */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	vm_page_t		pm_root;	/* spare page table pages */
diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h
index ec107f931055..de686b76a445 100644
--- a/sys/amd64/include/smp.h
+++ b/sys/amd64/include/smp.h
@@ -63,17 +63,16 @@ void	ipi_all_but_self(u_int ipi);
 void 	ipi_bitmap_handler(struct trapframe frame);
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
-void	ipi_selected(cpumask_t cpus, u_int ipi);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
-int	mp_grab_cpu_hlt(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
-void	smp_masked_invltlb(cpumask_t mask);
+void	smp_masked_invltlb(cpuset_t mask);
 
 #endif /* !LOCORE */
 #endif /* SMP */
diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c
index 087a744b349f..cecf3638d95f 100644
--- a/sys/arm/arm/pmap.c
+++ b/sys/arm/arm/pmap.c
@@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt
 	cpu_cpwait();
 
 	PMAP_LOCK_INIT(kernel_pmap);
-	kernel_pmap->pm_active = -1;
+	CPU_FILL(&kernel_pmap->pm_active);
 	kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL;
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	
@@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap)
 	pmap_alloc_l1(pmap);
 	bzero(pmap->pm_l2, sizeof(pmap->pm_l2));
 
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 		
 	TAILQ_INIT(&pmap->pm_pvlist);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h
index 48dd2a784fd7..d8386f3225d0 100644
--- a/sys/arm/include/_types.h
+++ b/sys/arm/include/_types.h
@@ -67,7 +67,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	__uint32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int32_t	__critical_t;
 typedef	double		__double_t;
 typedef	double		__float_t;
diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h
index 701390a33852..3d63432e332a 100644
--- a/sys/arm/include/pmap.h
+++ b/sys/arm/include/pmap.h
@@ -62,6 +62,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -134,7 +135,7 @@ struct	pmap {
 	struct l1_ttable	*pm_l1;
 	struct l2_dtable	*pm_l2[L2_SIZE];
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statictics */
 	TAILQ_HEAD(,pv_entry)	pm_pvlist;	/* list of mappings in pmap */
 };
diff --git a/sys/boot/ia64/common/Makefile b/sys/boot/ia64/common/Makefile
index f16f13d7b0bc..d90898f71db7 100644
--- a/sys/boot/ia64/common/Makefile
+++ b/sys/boot/ia64/common/Makefile
@@ -6,7 +6,7 @@ MK_SSP=		no
 LIB=		ia64
 INTERNALLIB=
 
-SRCS=		autoload.c bootinfo.c copy.c devicename.c exec.c
+SRCS=		autoload.c bootinfo.c copy.c devicename.c exec.c icache.c
 
 CFLAGS+=	-I${.CURDIR}/../../efi/include
 CFLAGS+=	-I${.CURDIR}/../../efi/include/${MACHINE_CPUARCH}
diff --git a/sys/boot/ia64/common/exec.c b/sys/boot/ia64/common/exec.c
index dd9c9ba05e63..65886fa87f93 100644
--- a/sys/boot/ia64/common/exec.c
+++ b/sys/boot/ia64/common/exec.c
@@ -258,6 +258,8 @@ ia64_loadseg(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta)
 	if (ph->p_flags & PF_X) {
 		ia64_text_start = ph->p_vaddr + delta;
 		ia64_text_size = ph->p_memsz;
+
+		ia64_sync_icache(ia64_text_start, ia64_text_size);
 	} else {
 		ia64_data_start = ph->p_vaddr + delta;
 		ia64_data_size = ph->p_memsz;
diff --git a/sys/boot/ia64/common/icache.c b/sys/boot/ia64/common/icache.c
new file mode 100644
index 000000000000..77a35d705b76
--- /dev/null
+++ b/sys/boot/ia64/common/icache.c
@@ -0,0 +1,51 @@
+/*-
+ * Copyright (c) 2011 Marcel Moolenaar
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <stand.h>
+#include <machine/ia64_cpu.h>
+
+#include "libia64.h"
+
+void
+ia64_sync_icache(vm_offset_t va, size_t sz)
+{
+	uintptr_t pa;
+	size_t cnt, max;
+
+	while (sz > 0) {
+		max = sz;
+		pa = (uintptr_t)ia64_va2pa(va, &max);
+		for (cnt = 0; cnt < max; cnt += 32)
+			ia64_fc_i(pa + cnt);
+		ia64_sync_i();
+		va += max;
+		sz -= max;
+	}
+	ia64_srlz_i();
+}
diff --git a/sys/boot/ia64/common/libia64.h b/sys/boot/ia64/common/libia64.h
index 29912f52fe1d..4bc76384dc34 100644
--- a/sys/boot/ia64/common/libia64.h
+++ b/sys/boot/ia64/common/libia64.h
@@ -64,6 +64,7 @@ void ia64_loadseg(void *, void *, uint64_t);
 
 ssize_t ia64_copyin(const void *, vm_offset_t, size_t);
 ssize_t ia64_copyout(vm_offset_t, void *, size_t);
+void ia64_sync_icache(vm_offset_t, size_t);
 ssize_t ia64_readin(int, vm_offset_t, size_t);
 void *ia64_va2pa(vm_offset_t, size_t *);
 
diff --git a/sys/boot/ia64/efi/efimd.c b/sys/boot/ia64/efi/efimd.c
index 0f7f02a08374..0b29e1280ce0 100644
--- a/sys/boot/ia64/efi/efimd.c
+++ b/sys/boot/ia64/efi/efimd.c
@@ -230,3 +230,35 @@ ia64_platform_enter(const char *kernel)
 
 	return (0);
 }
+
+COMMAND_SET(pbvm, "pbvm", "show PBVM details", command_pbvm);
+
+static int
+command_pbvm(int argc, char *argv[])
+{
+	uint64_t limit, pg, start;
+	u_int idx;
+
+	printf("Page table @ %p, size %x\n", ia64_pgtbl, ia64_pgtblsz);
+
+	if (ia64_pgtbl == NULL)
+		return (0);
+
+	limit = ~0;
+	start = ~0;
+	idx = 0;
+	while (ia64_pgtbl[idx] != 0) {
+		pg = ia64_pgtbl[idx];
+		if (pg != limit) {	
+			if (start != ~0)
+				printf("%#lx-%#lx\n", start, limit);
+			start = pg;
+		}
+		limit = pg + IA64_PBVM_PAGE_SIZE;
+		idx++;
+	}
+	if (start != ~0)
+		printf("%#lx-%#lx\n", start, limit);
+
+	return (0);
+}
diff --git a/sys/boot/ia64/efi/main.c b/sys/boot/ia64/efi/main.c
index 485a26d54842..ec12b4266a07 100644
--- a/sys/boot/ia64/efi/main.c
+++ b/sys/boot/ia64/efi/main.c
@@ -153,9 +153,7 @@ main(int argc, CHAR16 *argv[])
 	 */
 	cons_probe();
 
-	printf("\n");
-	printf("%s, Revision %s\n", bootprog_name, bootprog_rev);
-	printf("(%s, %s)\n", bootprog_maker, bootprog_date);
+	printf("\n%s, Revision %s\n", bootprog_name, bootprog_rev);
 
 	find_pal_proc();
 
@@ -214,6 +212,18 @@ static int
 command_quit(int argc, char *argv[])
 {
 	exit(0);
+	/* NOTREACHED */
+	return (CMD_OK);
+}
+
+COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot);
+ 
+static int
+command_reboot(int argc, char *argv[])
+{
+
+	RS->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, NULL);
+	/* NOTREACHED */
 	return (CMD_OK);
 }
 
@@ -585,3 +595,24 @@ command_hcdp(int argc, char *argv[])
 	printf("<EOT>\n");
 	return (CMD_OK);
 }
+
+COMMAND_SET(about, "about", "about the loader", command_about);
+
+extern uint64_t _start_plabel[];
+
+static int
+command_about(int argc, char *argv[])
+{
+	EFI_LOADED_IMAGE *img;
+
+	printf("%s\n", bootprog_name);
+	printf("revision %s\n", bootprog_rev);
+	printf("built by %s\n", bootprog_maker);
+	printf("built on %s\n", bootprog_date);
+
+	printf("\n");
+
+	BS->HandleProtocol(IH, &imgid, (VOID**)&img);
+	printf("image loaded at %p\n", img->ImageBase);
+	printf("entry at %#lx (%#lx)\n", _start_plabel[0], _start_plabel[1]);
+}
diff --git a/sys/boot/ia64/efi/version b/sys/boot/ia64/efi/version
index 3a947c8c5639..17d14ea1c70b 100644
--- a/sys/boot/ia64/efi/version
+++ b/sys/boot/ia64/efi/version
@@ -3,6 +3,8 @@ $FreeBSD$
 NOTE ANY CHANGES YOU MAKE TO THE BOOTBLOCKS HERE.  The format of this
 file is important.  Make sure the current version number is on line 6.
 
+3.1:	Add the about, reboot and pbvm commands.
+	I-cache coherency is maintained.
 3.0:	Add support for PBVM.
 2.2:	Create direct mapping based on start address instead of mapping
 	first 256M.
diff --git a/sys/cddl/compat/opensolaris/sys/atomic.h b/sys/cddl/compat/opensolaris/sys/atomic.h
index af9cc5d27e47..f34d77e6f38c 100644
--- a/sys/cddl/compat/opensolaris/sys/atomic.h
+++ b/sys/cddl/compat/opensolaris/sys/atomic.h
@@ -40,8 +40,6 @@
 extern void atomic_add_64(volatile uint64_t *target, int64_t delta);
 extern void atomic_dec_64(volatile uint64_t *target);
 #endif
-#ifndef __LP64__
-#endif
 #ifndef __sparc64__
 extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp,
     uint32_t newval);
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
index 942636b906ce..130c9180be1c 100644
--- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c
@@ -500,9 +500,11 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event)
 		    utsname.nodename, utsname.release, utsname.version,
 		    utsname.machine);
 	}
+#if 0
 	cmn_err(CE_CONT, "!%s version %llu pool %s using %llu",
 	    event == LOG_POOL_IMPORT ? "imported" :
 	    event == LOG_POOL_CREATE ? "created" : "accessed",
 	    (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION);
 #endif
+#endif
 }
diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
index 6f9366316115..9ba2fd3c0086 100644
--- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
+++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c
@@ -123,7 +123,9 @@ reprogram(cyb_arg_t arg __unused, hrtime_t exp)
 static void xcall(cyb_arg_t arg __unused, cpu_t *c, cyc_func_t func,
     void *param)
 {
+	cpuset_t cpus;
 
-	smp_rendezvous_cpus((cpumask_t)1 << c->cpuid,
+	CPU_SETOF(c->cpuid, &cpus);
+	smp_rendezvous_cpus(cpus,
 	    smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param);
 }
diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
index a081f6701ac2..0b86eacfe935 100644
--- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c
@@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 void
 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 {
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (cpu == DTRACE_CPUALL)
 		cpus = all_cpus;
 	else
-		cpus = (cpumask_t)1 << cpu;
+		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
 	    smp_no_rendevous_barrier, arg);
@@ -374,7 +374,7 @@ dtrace_gethrtime_init(void *arg)
 {
 	struct pcpu *pc;
 	uint64_t tsc_f;
-	cpumask_t map;
+	cpuset_t map;
 	int i;
 
 	/*
@@ -412,7 +412,8 @@ dtrace_gethrtime_init(void *arg)
 			continue;
 
 		pc = pcpu_find(i);
-		map = PCPU_GET(cpumask) | pc->pc_cpumask;
+		map = PCPU_GET(cpumask);
+		CPU_OR(&map, &pc->pc_cpumask);
 
 		smp_rendezvous_cpus(map, NULL,
 		    dtrace_gethrtime_init_cpu,
diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
index 2753ffc5bbf8..412fc38e6a56 100644
--- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c
+++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c
@@ -30,6 +30,7 @@
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/types.h>
+#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/malloc.h>
 #include <sys/kmem.h>
@@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit))
 void
 dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg)
 {
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (cpu == DTRACE_CPUALL)
 		cpus = all_cpus;
 	else
-		cpus = (cpumask_t)1 << cpu;
+		CPU_SETOF(cpu, &cpus);
 
 	smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func,
 	    smp_no_rendevous_barrier, arg);
@@ -372,9 +373,9 @@ dtrace_gethrtime_init_cpu(void *arg)
 static void
 dtrace_gethrtime_init(void *arg)
 {
+	cpuset_t map;
 	struct pcpu *pc;
 	uint64_t tsc_f;
-	cpumask_t map;
 	int i;
 
 	/*
@@ -412,7 +413,8 @@ dtrace_gethrtime_init(void *arg)
 			continue;
 
 		pc = pcpu_find(i);
-		map = PCPU_GET(cpumask) | pc->pc_cpumask;
+		map = PCPU_GET(cpumask);
+		CPU_OR(&map, &pc->pc_cpumask);
 
 		smp_rendezvous_cpus(map, NULL,
 		    dtrace_gethrtime_init_cpu,
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 94311c61cfda..b84d0c535b14 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -432,7 +432,10 @@ options 	KTRACE_REQUEST_POOL=101
 # defined by the KTR_* constants in <sys/ktr.h>.  KTR_MASK defines the
 # initial value of the ktr_mask variable which determines at runtime
 # what events to trace.  KTR_CPUMASK determines which CPU's log
-# events, with bit X corresponding to CPU X.  KTR_VERBOSE enables
+# events, with bit X corresponding to CPU X.  The layout of the string
+# passed as KTR_CPUMASK must match a serie of bitmasks each of them
+# separated by the ", " characters (ie:
+# KTR_CPUMASK=("0xAF, 0xFFFFFFFFFFFFFFFF")).  KTR_VERBOSE enables
 # dumping of KTR events to the console by default.  This functionality
 # can be toggled via the debug.ktr_verbose sysctl and defaults to off
 # if KTR_VERBOSE is not defined.  See ktr(4) and ktrdump(8) for details.
@@ -441,7 +444,7 @@ options 	KTR
 options 	KTR_ENTRIES=1024
 options 	KTR_COMPILE=(KTR_INTR|KTR_PROC)
 options 	KTR_MASK=KTR_INTR
-options 	KTR_CPUMASK=0x3
+options 	KTR_CPUMASK=("0x3")
 options 	KTR_VERBOSE
 
 #
diff --git a/sys/conf/files b/sys/conf/files
index 59286a5195a0..d654c6f00421 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2748,6 +2748,7 @@ netinet/ip_gre.c		optional gre inet
 netinet/ip_id.c			optional inet
 netinet/in_mcast.c		optional inet
 netinet/in_pcb.c		optional inet | inet6
+netinet/in_pcbgroup.c		optional inet pcbgroup | inet6 pcbgroup
 netinet/in_proto.c		optional inet | inet6 \
 	compile-with "${NORMAL_C} -I$S/contrib/pf"
 netinet/in_rmx.c		optional inet
@@ -2825,6 +2826,7 @@ netinet6/in6_gif.c		optional gif inet6 | netgraph_gif inet6
 netinet6/in6_ifattach.c		optional inet6
 netinet6/in6_mcast.c		optional inet6
 netinet6/in6_pcb.c		optional inet6
+netinet6/in6_pcbgroup.c		optional inet6 pcbgroup
 netinet6/in6_proto.c		optional inet6
 netinet6/in6_rmx.c		optional inet6
 netinet6/in6_src.c		optional inet6
diff --git a/sys/conf/options b/sys/conf/options
index a608d86382f6..ee696a8674d1 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -419,6 +419,7 @@ MROUTING		opt_mrouting.h
 NCP
 NETATALK		opt_atalk.h
 NFSLOCKD
+PCBGROUP		opt_pcbgroup.h
 RADIX_MPATH		opt_mpath.h
 ROUTETABLES		opt_route.h
 SLIP_IFF_OPTS		opt_slip.h
diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c
index 21cb7c5940de..f2e2c42accab 100644
--- a/sys/ddb/db_command.c
+++ b/sys/ddb/db_command.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/signalvar.h>
 #include <sys/systm.h>
 #include <sys/cons.h>
+#include <sys/conf.h>
 #include <sys/watchdog.h>
 #include <sys/kernel.h>
 
@@ -64,6 +65,7 @@ db_addr_t	db_last_addr;
 db_addr_t	db_prev;
 db_addr_t	db_next;
 
+static db_cmdfcn_t	db_dump;
 static db_cmdfcn_t	db_fncall;
 static db_cmdfcn_t	db_gdb;
 static db_cmdfcn_t	db_halt;
@@ -102,6 +104,7 @@ static struct command db_cmds[] = {
 	{ "w",		db_write_cmd,		CS_MORE|CS_SET_DOT, 0 },
 	{ "delete",	db_delete_cmd,		0,	0 },
 	{ "d",		db_delete_cmd,		0,	0 },
+	{ "dump",	db_dump,		0,	0 },
 	{ "break",	db_breakpoint_cmd,	0,	0 },
 	{ "b",		db_breakpoint_cmd,	0,	0 },
 	{ "dwatch",	db_deletewatch_cmd,	0,	0 },
@@ -526,6 +529,27 @@ db_error(s)
 	kdb_reenter();
 }
 
+static void
+db_dump(db_expr_t dummy, boolean_t dummy2, db_expr_t dummy3, char *dummy4)
+{
+	int error;
+
+	error = doadump(FALSE);
+	if (error) {
+		db_printf("Cannot dump: ");
+		switch (error) {
+		case EBUSY:
+			db_printf("debugger got invoked while dumping.\n");
+			break;
+		case ENXIO:
+			db_printf("no dump device specified.\n");
+			break;
+		default:
+			db_printf("unknown error (error=%d).\n", error);
+			break;
+		}
+	}
+}
 
 /*
  * Call random function:
diff --git a/sys/dev/amdsbwd/amdsbwd.c b/sys/dev/amdsbwd/amdsbwd.c
index f5f0f879d605..4256381f5312 100644
--- a/sys/dev/amdsbwd/amdsbwd.c
+++ b/sys/dev/amdsbwd/amdsbwd.c
@@ -25,8 +25,8 @@
  */
 
 /*
- * This is a driver for watchdog timer present in AMD SB600/SB7xx
- * south bridges and other watchdog timers advertised via WDRT ACPI table.
+ * This is a driver for watchdog timer present in AMD SB600/SB7xx/SB8xx
+ * southbridges.
  * Please see the following specifications for the descriptions of the
  * registers and flags:
  * - AMD SB600 Register Reference Guide, Public Version,  Rev. 3.03 (SB600 RRG)
@@ -35,11 +35,13 @@
  *   http://developer.amd.com/assets/43009_sb7xx_rrg_pub_1.00.pdf
  * - AMD SB700/710/750 Register Programming Requirements (RPR)
  *   http://developer.amd.com/assets/42413_sb7xx_rpr_pub_1.00.pdf
+ * - AMD SB800-Series Southbridges Register Reference Guide (RRG)
+ *   http://support.amd.com/us/Embedded_TechDocs/45482.pdf
  * Please see the following for Watchdog Resource Table specification:
  * - Watchdog Timer Hardware Requirements for Windows Server 2003 (WDRT)
  *   http://www.microsoft.com/whdc/system/sysinternals/watchdog.mspx
- * AMD SB600/SB7xx watchdog hardware seems to conform to the above,
- * but my system doesn't provide the table.
+ * AMD SB600/SB7xx/SB8xx watchdog hardware seems to conform to the above
+ * specifications, but the table hasn't been spotted in the wild yet.
  */
 
 #include <sys/cdefs.h>
@@ -59,15 +61,15 @@ __FBSDID("$FreeBSD$");
 #include <dev/pci/pcivar.h>
 #include <isa/isavar.h>
 
-/* RRG 2.3.3.1.1, page 161. */
+/* SB7xx RRG 2.3.3.1.1. */
 #define	AMDSB_PMIO_INDEX		0xcd6
 #define	AMDSB_PMIO_DATA			(PMIO_INDEX + 1)
 #define	AMDSB_PMIO_WIDTH		2
-/* RRG 2.3.3.2, page 181. */
+/* SB7xx RRG 2.3.3.2. */
 #define	AMDSB_PM_RESET_STATUS0		0x44
 #define	AMDSB_PM_RESET_STATUS1		0x45
 #define		AMDSB_WD_RST_STS	0x02
-/* RRG 2.3.3.2, page 188; RPR 2.36, page 30. */
+/* SB7xx RRG 2.3.3.2, RPR 2.36. */
 #define	AMDSB_PM_WDT_CTRL		0x69
 #define		AMDSB_WDT_DISABLE	0x01
 #define		AMDSB_WDT_RES_MASK	(0x02 | 0x04)
@@ -77,7 +79,18 @@ __FBSDID("$FreeBSD$");
 #define		AMDSB_WDT_RES_1S	0x06
 #define	AMDSB_PM_WDT_BASE_LSB		0x6c
 #define	AMDSB_PM_WDT_BASE_MSB		0x6f
-/* RRG 2.3.4, page 223, WDRT. */
+/* SB8xx RRG 2.3.3. */
+#define	AMDSB8_PM_WDT_EN		0x48
+#define		AMDSB8_WDT_DEC_EN	0x01
+#define		AMDSB8_WDT_DISABLE	0x02
+#define	AMDSB8_PM_WDT_CTRL		0x4c
+#define		AMDSB8_WDT_32KHZ	0x00
+#define		AMDSB8_WDT_1HZ		0x03
+#define		AMDSB8_WDT_RES_MASK	0x03
+#define	AMDSB8_PM_RESET_STATUS0		0xC0
+#define	AMDSB8_PM_RESET_STATUS1		0xC1
+#define		AMDSB8_WD_RST_STS	0x20
+/* SB7xx RRG 2.3.4, WDRT. */
 #define	AMDSB_WD_CTRL			0x00
 #define		AMDSB_WD_RUN		0x01
 #define		AMDSB_WD_FIRED		0x02
@@ -90,8 +103,9 @@ __FBSDID("$FreeBSD$");
 #define	AMDSB_WDIO_REG_WIDTH		4
 /* WDRT */
 #define	MAXCOUNT_MIN_VALUE		511
-/* RRG 2.3.1.1, page 122; SB600 RRG 2.3.1.1, page 97. */
-#define	AMDSB7xx_SMBUS_DEVID		0x43851002
+/* SB7xx RRG 2.3.1.1, SB600 RRG 2.3.1.1, SB8xx RRG 2.3.1.  */
+#define	AMDSB_SMBUS_DEVID		0x43851002
+#define	AMDSB8_SMBUS_REVID		0x40
 
 #define	amdsbwd_verbose_printf(dev, ...)	\
 	do {						\
@@ -265,7 +279,7 @@ amdsbwd_identify(driver_t *driver, device_t parent)
 	smb_dev = pci_find_bsf(0, 20, 0);
 	if (smb_dev == NULL)
 		return;
-	if (pci_get_devid(smb_dev) != AMDSB7xx_SMBUS_DEVID)
+	if (pci_get_devid(smb_dev) != AMDSB_SMBUS_DEVID)
 		return;
 
 	child = BUS_ADD_CHILD(parent, ISA_ORDER_SPECULATIVE, "amdsbwd", -1);
@@ -273,15 +287,102 @@ amdsbwd_identify(driver_t *driver, device_t parent)
 		device_printf(parent, "add amdsbwd child failed\n");
 }
 
+
+static void
+amdsbwd_probe_sb7xx(device_t dev, struct resource *pmres, uint32_t *addr)
+{
+	uint32_t	val;
+	int		i;
+
+	/* Report cause of previous reset for user's convenience. */
+	val = pmio_read(pmres, AMDSB_PM_RESET_STATUS0);
+	if (val != 0)
+		amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
+	val = pmio_read(pmres, AMDSB_PM_RESET_STATUS1);
+	if (val != 0)
+		amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
+	if ((val & AMDSB_WD_RST_STS) != 0)
+		device_printf(dev, "Previous Reset was caused by Watchdog\n");
+
+	/* Find base address of memory mapped WDT registers. */
+	for (*addr = 0, i = 0; i < 4; i++) {
+		*addr <<= 8;
+		*addr |= pmio_read(pmres, AMDSB_PM_WDT_BASE_MSB - i);
+	}
+	/* Set watchdog timer tick to 1s. */
+	val = pmio_read(pmres, AMDSB_PM_WDT_CTRL);
+	val &= ~AMDSB_WDT_RES_MASK;
+	val |= AMDSB_WDT_RES_10MS;
+	pmio_write(pmres, AMDSB_PM_WDT_CTRL, val);
+
+	/* Enable watchdog device (in stopped state). */
+	val = pmio_read(pmres, AMDSB_PM_WDT_CTRL);
+	val &= ~AMDSB_WDT_DISABLE;
+	pmio_write(pmres, AMDSB_PM_WDT_CTRL, val);
+
+	/*
+	 * XXX TODO: Ensure that watchdog decode is enabled
+	 * (register 0x41, bit 3).
+	 */
+	device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer");
+}
+
+static void
+amdsbwd_probe_sb8xx(device_t dev, struct resource *pmres, uint32_t *addr)
+{
+	uint32_t	val;
+	int		i;
+
+	/* Report cause of previous reset for user's convenience. */
+	val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS0);
+	if (val != 0)
+		amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
+	val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS1);
+	if (val != 0)
+		amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
+	if ((val & AMDSB8_WD_RST_STS) != 0)
+		device_printf(dev, "Previous Reset was caused by Watchdog\n");
+
+	/* Find base address of memory mapped WDT registers. */
+	for (*addr = 0, i = 0; i < 4; i++) {
+		*addr <<= 8;
+		*addr |= pmio_read(pmres, AMDSB8_PM_WDT_EN + 3 - i);
+	}
+	*addr &= ~0x07u;
+
+	/* Set watchdog timer tick to 1s. */
+	val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL);
+	val &= ~AMDSB8_WDT_RES_MASK;
+	val |= AMDSB8_WDT_1HZ;
+	pmio_write(pmres, AMDSB8_PM_WDT_CTRL, val);
+#ifdef AMDSBWD_DEBUG
+	val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL);
+	amdsbwd_verbose_printf(dev, "AMDSB8_PM_WDT_CTRL value = %#02x\n", val);
+#endif
+
+	/*
+	 * Enable watchdog device (in stopped state)
+	 * and decoding of its address.
+	 */
+	val = pmio_read(pmres, AMDSB8_PM_WDT_EN);
+	val &= ~AMDSB8_WDT_DISABLE;
+	val |= AMDSB8_WDT_DEC_EN;
+	pmio_write(pmres, AMDSB8_PM_WDT_EN, val);
+#ifdef AMDSBWD_DEBUG
+	val = pmio_read(pmres, AMDSB8_PM_WDT_EN);
+	device_printf(dev, "AMDSB8_PM_WDT_EN value = %#02x\n", val);
+#endif
+	device_set_desc(dev, "AMD SB8xx Watchdog Timer");
+}
+
 static int
 amdsbwd_probe(device_t dev)
 {
 	struct resource		*res;
+	device_t		smb_dev;
 	uint32_t		addr;
-	uint32_t		val;
 	int			rid;
 	int			rc;
-	int			i;
 
 	/* Do not claim some ISA PnP device by accident. */
 	if (isa_get_logicalid(dev) != 0)
@@ -301,21 +402,16 @@ amdsbwd_probe(device_t dev)
 		return (ENXIO);
 	}
 
-	/* Report cause of previous reset for user's convenience. */
-	val = pmio_read(res, AMDSB_PM_RESET_STATUS0);
-	if (val != 0)
-		amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val);
-	val = pmio_read(res, AMDSB_PM_RESET_STATUS1);
-	if (val != 0)
-		amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val);
-	if ((val & AMDSB_WD_RST_STS) != 0)
-		device_printf(dev, "Previous Reset was caused by Watchdog\n");
+	smb_dev = pci_find_bsf(0, 20, 0);
+	KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n"));
+	if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID)
+		amdsbwd_probe_sb7xx(dev, res, &addr);
+	else
+		amdsbwd_probe_sb8xx(dev, res, &addr);
+
+	bus_release_resource(dev, SYS_RES_IOPORT, rid, res);
+	bus_delete_resource(dev, SYS_RES_IOPORT, rid);
 
-	/* Find base address of memory mapped WDT registers. */
-	for (addr = 0, i = 0; i < 4; i++) {
-		addr <<= 8;
-		addr |= pmio_read(res, AMDSB_PM_WDT_BASE_MSB - i);
-	}
 	amdsbwd_verbose_printf(dev, "memory base address = %#010x\n", addr);
 	rc = bus_set_resource(dev, SYS_RES_MEMORY, 0, addr + AMDSB_WD_CTRL,
 	    AMDSB_WDIO_REG_WIDTH);
@@ -330,36 +426,25 @@ amdsbwd_probe(device_t dev)
 		return (ENXIO);
 	}
 
-	/* Set watchdog timer tick to 10ms. */
-	val = pmio_read(res, AMDSB_PM_WDT_CTRL);
-	val &= ~AMDSB_WDT_RES_MASK;
-	val |= AMDSB_WDT_RES_10MS;
-	pmio_write(res, AMDSB_PM_WDT_CTRL, val);
-
-	/* Enable watchdog device (in stopped state). */
-	val = pmio_read(res, AMDSB_PM_WDT_CTRL);
-	val &= ~AMDSB_WDT_DISABLE;
-	pmio_write(res, AMDSB_PM_WDT_CTRL, val);
-
-	/*
-	 * XXX TODO: Ensure that watchdog decode is enabled
-	 * (register 0x41, bit 3).
-	 */
-	bus_release_resource(dev, SYS_RES_IOPORT, rid, res);
-	bus_delete_resource(dev, SYS_RES_IOPORT, rid);
-
-	device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer");
 	return (0);
 }
 
 static int
 amdsbwd_attach_sb(device_t dev, struct amdsbwd_softc *sc)
 {
+	device_t	smb_dev;
+
 	sc->max_ticks = UINT16_MAX;
-	sc->ms_per_tick = 10;
 	sc->rid_ctrl = 0;
 	sc->rid_count = 1;
 
+	smb_dev = pci_find_bsf(0, 20, 0);
+	KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n"));
+	if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID)
+		sc->ms_per_tick = 10;
+	else
+		sc->ms_per_tick = 1000;
+
 	sc->res_ctrl = bus_alloc_resource_any(dev, SYS_RES_MEMORY,
 	    &sc->rid_ctrl, RF_ACTIVE);
 	if (sc->res_ctrl == NULL) {
@@ -388,6 +473,11 @@ amdsbwd_attach(device_t dev)
 	if (rc != 0)
 		goto fail;
 
+#ifdef AMDSBWD_DEBUG
+	device_printf(dev, "wd ctrl = %#04x\n", wdctrl_read(sc));
+	device_printf(dev, "wd count = %#04x\n", wdcount_read(sc));
+#endif
+
 	/* Setup initial state of Watchdog Control. */
 	wdctrl_write(sc, AMDSB_WD_FIRED);
 
diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h
index 165d919d03d8..7a01be354436 100644
--- a/sys/dev/ath/ath_hal/ah.h
+++ b/sys/dev/ath/ath_hal/ah.h
@@ -745,6 +745,17 @@ typedef enum {
 	HAL_QUIET_ADD_SWBA_RESP_TIME	= 0x4,	/* add beacon response time to next_start offset */
 } HAL_QUIET_FLAG;
 
+#define	HAL_DFS_EVENT_PRICH		0x0000001
+
+struct dfs_event {
+	uint64_t	re_full_ts;	/* 64-bit full timestamp from interrupt time */
+	uint32_t	re_ts;		/* Original 15 bit recv timestamp */
+	uint8_t		re_rssi;	/* rssi of radar event */
+	uint8_t		re_dur;		/* duration of radar pulse */
+	uint32_t	re_flags;	/* Flags (see above) */
+};
+typedef struct dfs_event HAL_DFS_EVENT;
+
 /*
  * Hardware Access Layer (HAL) API.
  *
@@ -928,6 +939,9 @@ struct ath_hal {
 				HAL_PHYERR_PARAM *pe);
 	void	  __ahdecl(*ah_getDfsThresh)(struct ath_hal *ah,
 				HAL_PHYERR_PARAM *pe);
+	HAL_BOOL  __ahdecl(*ah_procRadarEvent)(struct ath_hal *ah,
+				struct ath_rx_status *rxs, uint64_t fulltsf,
+				const char *buf, HAL_DFS_EVENT *event);
 
 	/* Key Cache Functions */
 	uint32_t __ahdecl(*ah_getKeyCacheSize)(struct ath_hal*);
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212.h b/sys/dev/ath/ath_hal/ar5212/ar5212.h
index 16394a396f11..8503a629aa36 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212.h
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212.h
@@ -622,5 +622,8 @@ extern	HAL_BOOL ar5212IsNFCalInProgress(struct ath_hal *ah);
 extern	HAL_BOOL ar5212WaitNFCalComplete(struct ath_hal *ah, int i);
 extern	void ar5212EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
 extern	void ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
+extern	HAL_BOOL ar5212ProcessRadarEvent(struct ath_hal *ah,
+	    struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf,
+	    HAL_DFS_EVENT *event);
 
 #endif	/* _ATH_AR5212_H_ */
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
index 5999a603f9ca..8e7f3cbddf04 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c
@@ -132,6 +132,7 @@ static const struct ath_hal_private ar5212hal = {{
 	/* DFS Functions */
 	.ah_enableDfs			= ar5212EnableDfs,
 	.ah_getDfsThresh		= ar5212GetDfsThresh,
+	.ah_procRadarEvent		= ar5212ProcessRadarEvent,
 
 	/* Key Cache Functions */
 	.ah_getKeyCacheSize		= ar5212GetKeyCacheSize,
diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
index 276671d6db33..3a6019d993fd 100644
--- a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
+++ b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c
@@ -21,9 +21,7 @@
 #include "ah.h"
 #include "ah_internal.h"
 #include "ah_devid.h"
-#ifdef AH_DEBUG
 #include "ah_desc.h"			/* NB: for HAL_PHYERR* */
-#endif
 
 #include "ar5212/ar5212.h"
 #include "ar5212/ar5212reg.h"
@@ -1180,3 +1178,47 @@ ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe)
 	pe->pe_extchannel = AH_FALSE;
 }
 
+/*
+ * Process the radar phy error and extract the pulse duration.
+ */
+HAL_BOOL
+ar5212ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs,
+    uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event)
+{
+	uint8_t dur;
+	uint8_t rssi;
+
+	/* Check whether the given phy error is a radar event */
+	if ((rxs->rs_phyerr != HAL_PHYERR_RADAR) &&
+	    (rxs->rs_phyerr != HAL_PHYERR_FALSE_RADAR_EXT))
+		return AH_FALSE;
+
+	/*
+	 * The first byte is the pulse width - if there's
+	 * no data, simply set the duration to 0
+	 */
+	if (rxs->rs_datalen >= 1)
+		/* The pulse width is byte 0 of the data */
+		dur = ((uint8_t) buf[0]) & 0xff;
+	else
+		dur = 0;
+
+	/* Pulse RSSI is the normal reported RSSI */
+	rssi = (uint8_t) rxs->rs_rssi;
+
+	/* 0 duration/rssi is not a valid radar event */
+	if (dur == 0 && rssi == 0)
+		return AH_FALSE;
+
+	HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, dur=%d\n",
+	    __func__, rssi, dur);
+
+	/* Record the event */
+	event->re_full_ts = fulltsf;
+	event->re_ts = rxs->rs_tstamp;
+	event->re_rssi = rssi;
+	event->re_dur = dur;
+	event->re_flags = HAL_DFS_EVENT_PRICH;
+
+	return AH_TRUE;
+}
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416.h b/sys/dev/ath/ath_hal/ar5416/ar5416.h
index 510afe0436e0..e5294b0634bb 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416.h
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416.h
@@ -205,6 +205,9 @@ extern	HAL_BOOL ar5416SetRifsDelay(struct ath_hal *ah,
 	    const struct ieee80211_channel *chan, HAL_BOOL enable);
 extern	void ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
 extern	void ar5416GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe);
+extern	HAL_BOOL ar5416ProcessRadarEvent(struct ath_hal *ah,
+	    struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf,
+	    HAL_DFS_EVENT *event);
 
 extern	HAL_BOOL ar5416SetPowerMode(struct ath_hal *ah, HAL_POWER_MODE mode,
 		int setChip);
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
index 22d05ff80ce8..e6363251f8a2 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c
@@ -147,6 +147,7 @@ ar5416InitState(struct ath_hal_5416 *ahp5416, uint16_t devid, HAL_SOFTC sc,
 	/* DFS Functions */
 	ah->ah_enableDfs		= ar5416EnableDfs;
 	ah->ah_getDfsThresh		= ar5416GetDfsThresh;
+	ah->ah_procRadarEvent		= ar5416ProcessRadarEvent;
 
 	/* Power Management Functions */
 	ah->ah_setPowerMode		= ar5416SetPowerMode;
diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
index 2c08730bbe91..2332656e8f54 100644
--- a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
+++ b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c
@@ -692,3 +692,19 @@ ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe)
 		OS_REG_WRITE(ah, AR_PHY_RADAR_1, val);
 	}
 }
+
+/*
+ * Extract the radar event information from the given phy error.
+ *
+ * Returns AH_TRUE if the phy error was actually a phy error,
+ * AH_FALSE if the phy error wasn't a phy error.
+ */
+HAL_BOOL
+ar5416ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs,
+    uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event)
+{
+	/*
+	 * For now, this isn't implemented.
+	 */
+	return AH_FALSE;
+}
diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h
index 97666c55cb87..3bc852266019 100644
--- a/sys/dev/ath/if_athvar.h
+++ b/sys/dev/ath/if_athvar.h
@@ -709,6 +709,8 @@ void	ath_intr(void *);
 	((*(_ah)->ah_enableDfs)((_ah), (_param)))
 #define	ath_hal_getdfsthresh(_ah, _param) \
 	((*(_ah)->ah_getDfsThresh)((_ah), (_param)))
+#define	ath_hal_procradarevent(_ah, _rxs, _fulltsf, _buf, _event) \
+	((*(_ah)->ah_procRadarEvent)((_ah), (_rxs), (_fulltsf), (_buf), (_event)))
 
 #define ath_hal_gpioCfgOutput(_ah, _gpio, _type) \
         ((*(_ah)->ah_gpioCfgOutput)((_ah), (_gpio), (_type)))
diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c
index 643554d1f4dd..b7156cf1a699 100644
--- a/sys/dev/atkbdc/atkbd.c
+++ b/sys/dev/atkbdc/atkbd.c
@@ -1097,10 +1097,8 @@ get_typematic(keyboard_t *kbd)
 	x86regs_t regs;
 	uint8_t *p;
 
-	if (!(kbd->kb_config & KB_CONF_PROBE_TYPEMATIC))
-		return (ENODEV);
-
-	if (x86bios_get_intr(0x15) == 0 || x86bios_get_intr(0x16) == 0)
+	if (x86bios_get_intr(0x15) != 0xf000f859 ||
+	    x86bios_get_intr(0x16) != 0xf000e82e)
 		return (ENODEV);
 
 	/* Is BIOS system configuration table supported? */
diff --git a/sys/dev/atkbdc/atkbdreg.h b/sys/dev/atkbdc/atkbdreg.h
index 3d54b4d70228..cf7ee6b31280 100644
--- a/sys/dev/atkbdc/atkbdreg.h
+++ b/sys/dev/atkbdc/atkbdreg.h
@@ -36,7 +36,6 @@
 #define KB_CONF_NO_RESET	(1 << 1) /* don't reset the keyboard */
 #define KB_CONF_ALT_SCANCODESET	(1 << 2) /* assume the XT type keyboard */
 #define	KB_CONF_NO_PROBE_TEST	(1 << 3) /* don't test keyboard during probe */
-#define	KB_CONF_PROBE_TYPEMATIC	(1 << 4) /* probe keyboard typematic */
 
 #ifdef _KERNEL
 
diff --git a/sys/dev/cardbus/cardbus_cis.c b/sys/dev/cardbus/cardbus_cis.c
index 2cfea19203f3..3352a56afa29 100644
--- a/sys/dev/cardbus/cardbus_cis.c
+++ b/sys/dev/cardbus/cardbus_cis.c
@@ -324,7 +324,7 @@ decode_tuple_bar(device_t cbdev, device_t child, int id,
 		 * hint when the cardbus bridge is a child of pci0 (the main
 		 * bus).  The PC Card spec seems to indicate that this should
 		 * only be done on x86 based machines, which suggests that on
-		 * non-x86 machines the adddresses can be anywhere.  Since the
+		 * non-x86 machines the addresses can be anywhere.  Since the
 		 * hardware can do it on non-x86 machines, it should be able
 		 * to do it on x86 machines too.  Therefore, we can and should
 		 * ignore this hint.  Furthermore, the PC Card spec recommends
@@ -430,7 +430,6 @@ cardbus_read_tuple_finish(device_t cbdev, device_t child, int rid,
 {
 	if (res != CIS_CONFIG_SPACE) {
 		bus_release_resource(child, SYS_RES_MEMORY, rid, res);
-		bus_delete_resource(child, SYS_RES_MEMORY, rid);
 	}
 }
 
@@ -467,7 +466,7 @@ cardbus_read_tuple_init(device_t cbdev, device_t child, uint32_t *start,
 	}
 
 	/* allocate the memory space to read CIS */
-	res = bus_alloc_resource(child, SYS_RES_MEMORY, rid, 0, ~0, 1,
+	res = bus_alloc_resource_any(child, SYS_RES_MEMORY, rid,
 	    rman_make_alignment_flags(4096) | RF_ACTIVE);
 	if (res == NULL) {
 		device_printf(cbdev, "Unable to allocate resource "
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index d6225d8f5ae3..4cfcea87fa5b 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
 		 * had already processed the interrupt).  We don't
 		 * lose the interrupt sample.
 		 */
-		atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid)));
+		CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask);
 		pmc_process_samples(PCPU_GET(cpuid));
 		break;
 
@@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
 
  done:
 	/* mark CPU as needing processing */
-	atomic_set_int(&pmc_cpumask, (1 << cpu));
+	CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 
 	return (error);
 }
@@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu)
 			break;
 		if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
 			/* Need a rescan at a later time. */
-			atomic_set_int(&pmc_cpumask, (1 << cpu));
+			CPU_SET_ATOMIC(cpu, &pmc_cpumask);
 			break;
 		}
 
@@ -4782,7 +4782,7 @@ pmc_cleanup(void)
 	PMCDBG(MOD,INI,0, "%s", "cleanup");
 
 	/* switch off sampling */
-	pmc_cpumask = 0;
+	CPU_ZERO(&pmc_cpumask);
 	pmc_intr = NULL;
 
 	sx_xlock(&pmc_sx);
diff --git a/sys/dev/pccard/pccard.c b/sys/dev/pccard/pccard.c
index 00cd1dc08b60..1de571c8e6c1 100644
--- a/sys/dev/pccard/pccard.c
+++ b/sys/dev/pccard/pccard.c
@@ -1405,8 +1405,8 @@ pccard_ccr_read_impl(device_t brdev, device_t child, uint32_t offset,
 	struct pccard_ivar *devi = PCCARD_IVAR(child);
 
 	*val = pccard_ccr_read(devi->pf, offset);
-	device_printf(child, "ccr_read of %#x (%#x) is %#x\n", offset,
-	  devi->pf->pf_ccr_offset, *val);
+	DEVPRINTF((child, "ccr_read of %#x (%#x) is %#x\n", offset,
+	  devi->pf->pf_ccr_offset, *val));
 	return 0;
 }
 
@@ -1421,8 +1421,8 @@ pccard_ccr_write_impl(device_t brdev, device_t child, uint32_t offset,
 	 * Can't use pccard_ccr_write since client drivers may access
 	 * registers not contained in the 'mask' if they are non-standard.
 	 */
-	device_printf(child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
-	  devi->pf->pf_ccr_offset);
+	DEVPRINTF((child, "ccr_write of %#x to %#x (%#x)\n", val, offset,
+	  devi->pf->pf_ccr_offset));
 	bus_space_write_1(pf->pf_ccrt, pf->pf_ccrh, pf->pf_ccr_offset + offset,
 	    val);
 	return 0;
diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c
index 22046c1f30f7..9cd5a1c7a1ab 100644
--- a/sys/dev/pci/pci.c
+++ b/sys/dev/pci/pci.c
@@ -2576,6 +2576,17 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl,
 	uint16_t cmd;
 	struct resource *res;
 
+	/*
+	 * The BAR may already exist if the device is a CardBus card
+	 * whose CIS is stored in this BAR.
+	 */
+	pm = pci_find_bar(dev, reg);
+	if (pm != NULL) {
+		maprange = pci_maprange(pm->pm_value);
+		barlen = maprange == 64 ? 2 : 1;
+		return (barlen);
+	}
+
 	pci_read_bar(dev, reg, &map, &testval);
 	if (PCI_BAR_MEM(map)) {
 		type = SYS_RES_MEMORY;
diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c
index a56971e19a0a..2b38d9b171ef 100644
--- a/sys/dev/puc/pucdata.c
+++ b/sys/dev/puc/pucdata.c
@@ -51,12 +51,12 @@ static puc_config_f puc_config_amc;
 static puc_config_f puc_config_diva;
 static puc_config_f puc_config_exar;
 static puc_config_f puc_config_icbook;
+static puc_config_f puc_config_oxford_pcie;
 static puc_config_f puc_config_quatech;
 static puc_config_f puc_config_syba;
 static puc_config_f puc_config_siig;
 static puc_config_f puc_config_timedia;
 static puc_config_f puc_config_titan;
-static puc_config_f puc_config_oxford_pcie;
 
 const struct puc_cfg puc_pci_devices[] = {
 
@@ -1366,14 +1366,12 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port,
 		bar = puc_get_bar(sc, cfg->rid);
 		if (bar == NULL)
 			return (ENXIO);
-
 		for (idx = 0; idx < sc->sc_nports; idx++) {
-			value = bus_read_1(bar->b_res, 0x1000 + (idx << 9)
-				+ 0x92);
+			value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) +
+			    0x92);
 			bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92,
-				value | 0x10);
+			    value | 0x10);
 		}
-
 		return (0);
 	case PUC_CFG_GET_LEN:
 		*res = 0x200;
diff --git a/sys/dev/sound/pcm/sound.c b/sys/dev/sound/pcm/sound.c
index caa78416e958..958065f3a541 100644
--- a/sys/dev/sound/pcm/sound.c
+++ b/sys/dev/sound/pcm/sound.c
@@ -51,7 +51,7 @@ int pcm_veto_load = 1;
 int snd_unit = -1;
 TUNABLE_INT("hw.snd.default_unit", &snd_unit);
 
-static int snd_unit_auto = 0;
+static int snd_unit_auto = -1;
 TUNABLE_INT("hw.snd.default_auto", &snd_unit_auto);
 SYSCTL_INT(_hw_snd, OID_AUTO, default_auto, CTLFLAG_RW,
     &snd_unit_auto, 0, "assign default unit to a newly attached device");
@@ -443,6 +443,7 @@ sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS)
 		if (!PCM_REGISTERED(d) || CHN_EMPTY(d, channels.pcm))
 			return EINVAL;
 		snd_unit = unit;
+		snd_unit_auto = 0;
 	}
 	return (error);
 }
@@ -737,6 +738,32 @@ pcm_killchan(device_t dev)
 	return (pcm_chn_destroy(ch));
 }
 
+static int
+pcm_best_unit(int old)
+{
+	struct snddev_info *d;
+	int i, best, bestprio, prio;
+
+	best = -1;
+	bestprio = -100;
+	for (i = 0; pcm_devclass != NULL &&
+	    i < devclass_get_maxunit(pcm_devclass); i++) {
+		d = devclass_get_softc(pcm_devclass, i);
+		if (!PCM_REGISTERED(d))
+			continue;
+		prio = 0;
+		if (d->playcount == 0)
+			prio -= 10;
+		if (d->reccount == 0)
+			prio -= 2;
+		if (prio > bestprio || (prio == bestprio && i == old)) {
+			best = i;
+			bestprio = prio;
+		}
+	}
+	return (best);
+}
+
 int
 pcm_setstatus(device_t dev, char *str)
 {
@@ -770,8 +797,12 @@ pcm_setstatus(device_t dev, char *str)
 
 	PCM_UNLOCK(d);
 
-	if (snd_unit < 0 || snd_unit_auto != 0)
+	if (snd_unit_auto < 0)
+		snd_unit_auto = (snd_unit < 0) ? 1 : 0;
+	if (snd_unit < 0 || snd_unit_auto > 1)
 		snd_unit = device_get_unit(dev);
+	else if (snd_unit_auto == 1)
+		snd_unit = pcm_best_unit(snd_unit);
 
 	return (0);
 }
@@ -1113,7 +1144,6 @@ pcm_unregister(device_t dev)
 	struct snddev_info *d;
 	struct pcm_channel *ch;
 	struct thread *td;
-	int i;
 
 	td = curthread;
 	d = device_get_softc(dev);
@@ -1216,21 +1246,9 @@ pcm_unregister(device_t dev)
 	sndstat_release(td);
 
 	if (snd_unit == device_get_unit(dev)) {
-		/*
-		 * Reassign default unit to the next available dev, but
-		 * first, reset snd_unit to something ridiculous.
-		 */
-		snd_unit = -1;
-		for (i = 0; pcm_devclass != NULL &&
-		    i < devclass_get_maxunit(pcm_devclass); i++) {
-			if (device_get_unit(dev) == i)
-				continue;
-			d = devclass_get_softc(pcm_devclass, i);
-			if (PCM_REGISTERED(d)) {
-				snd_unit = i;
-				break;
-			}
-		}
+		snd_unit = pcm_best_unit(-1);
+		if (snd_unit_auto == 0)
+			snd_unit_auto = 1;
 	}
 
 	return (0);
diff --git a/sys/dev/usb/usb_device.h b/sys/dev/usb/usb_device.h
index c8bc5eb95a18..bf412214dd4f 100644
--- a/sys/dev/usb/usb_device.h
+++ b/sys/dev/usb/usb_device.h
@@ -187,6 +187,8 @@ struct usb_device {
 	struct usb_host_endpoint *linux_endpoint_end;
 	uint16_t devnum;
 #endif
+
+	uint32_t clear_stall_errors;	/* number of clear-stall failures */
 };
 
 /* globals */
diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h
index a44e53004d91..ae69cdb8926e 100644
--- a/sys/dev/usb/usb_freebsd.h
+++ b/sys/dev/usb/usb_freebsd.h
@@ -66,6 +66,7 @@
 
 #define	USB_HUB_MAX_DEPTH	5
 #define	USB_EP0_BUFSIZE		1024	/* bytes */
+#define	USB_CS_RESET_LIMIT	20	/* failures = 20 * 50 ms = 1sec */
 
 typedef uint32_t usb_timeout_t;		/* milliseconds */
 typedef uint32_t usb_frlength_t;	/* bytes */
diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c
index 714ee6f368d5..d62f8f9466ce 100644
--- a/sys/dev/usb/usb_generic.c
+++ b/sys/dev/usb/usb_generic.c
@@ -966,10 +966,8 @@ ugen_re_enumerate(struct usb_fifo *f)
 		/* ignore any errors */
 		DPRINTFN(6, "no FIFOs\n");
 	}
-	if (udev->re_enumerate_wait == 0) {
-		udev->re_enumerate_wait = 1;
-		usb_needs_explore(udev->bus, 0);
-	}
+	/* start re-enumeration of device */
+	usbd_start_re_enumerate(udev);
 	return (0);
 }
 
diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c
index ce8a4a5139f3..351b1343dae5 100644
--- a/sys/dev/usb/usb_hub.c
+++ b/sys/dev/usb/usb_hub.c
@@ -242,9 +242,14 @@ uhub_explore_sub(struct uhub_softc *sc, struct usb_port *up)
 	if (child->flags.usb_mode == USB_MODE_HOST) {
 		usbd_enum_lock(child);
 		if (child->re_enumerate_wait) {
-			err = usbd_set_config_index(child, USB_UNCONFIG_INDEX);
-			if (err == 0)
-				err = usbd_req_re_enumerate(child, NULL);
+			err = usbd_set_config_index(child,
+			    USB_UNCONFIG_INDEX);
+			if (err != 0) {
+				DPRINTF("Unconfigure failed: "
+				    "%s: Ignored.\n",
+				    usbd_errstr(err));
+			}
+			err = usbd_req_re_enumerate(child, NULL);
 			if (err == 0)
 				err = usbd_set_config_index(child, 0);
 			if (err == 0) {
@@ -2471,3 +2476,19 @@ usbd_filter_power_mode(struct usb_device *udev, uint8_t power_mode)
 	/* use fixed power mode given by hardware driver */
 	return (temp);
 }
+
+/*------------------------------------------------------------------------*
+ *	usbd_start_re_enumerate
+ *
+ * This function starts re-enumeration of the given USB device. This
+ * function does not need to be called BUS-locked. This function does
+ * not wait until the re-enumeration is completed.
+ *------------------------------------------------------------------------*/
+void
+usbd_start_re_enumerate(struct usb_device *udev)
+{
+	if (udev->re_enumerate_wait == 0) {
+		udev->re_enumerate_wait = 1;
+		usb_needs_explore(udev->bus, 0);
+	}
+}
diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c
index c099e7173e2a..4358ef42030b 100644
--- a/sys/dev/usb/usb_request.c
+++ b/sys/dev/usb/usb_request.c
@@ -238,6 +238,10 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error)
 
 	switch (USB_GET_STATE(xfer)) {
 	case USB_ST_TRANSFERRED:
+
+		/* reset error counter */
+		udev->clear_stall_errors = 0;
+
 		if (ep == NULL)
 			goto tr_setup;		/* device was unconfigured */
 		if (ep->edesc &&
@@ -289,8 +293,23 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error)
 		goto tr_setup;
 
 	default:
-		if (xfer->error == USB_ERR_CANCELLED) {
+		if (error == USB_ERR_CANCELLED)
 			break;
+
+		DPRINTF("Clear stall failed.\n");
+		if (udev->clear_stall_errors == USB_CS_RESET_LIMIT)
+			goto tr_setup;
+
+		if (error == USB_ERR_TIMEOUT) {
+			udev->clear_stall_errors = USB_CS_RESET_LIMIT;
+			DPRINTF("Trying to re-enumerate.\n");
+			usbd_start_re_enumerate(udev);
+		} else {
+			udev->clear_stall_errors++;
+			if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) {
+				DPRINTF("Trying to re-enumerate.\n");
+				usbd_start_re_enumerate(udev);
+			}
 		}
 		goto tr_setup;
 	}
@@ -1936,6 +1955,23 @@ usbd_req_re_enumerate(struct usb_device *udev, struct mtx *mtx)
 		return (USB_ERR_INVAL);
 	}
 retry:
+	/*
+	 * Try to reset the High Speed parent HUB of a LOW- or FULL-
+	 * speed device, if any.
+	 */
+	if (udev->parent_hs_hub != NULL &&
+	    udev->speed != USB_SPEED_HIGH) {
+		DPRINTF("Trying to reset parent High Speed TT.\n");
+		err = usbd_req_reset_tt(udev->parent_hs_hub, NULL,
+		    udev->hs_port_no);
+		if (err) {
+			DPRINTF("Resetting parent High "
+			    "Speed TT failed (%s).\n",
+			    usbd_errstr(err));
+		}
+	}
+
+	/* Try to reset the parent HUB port. */
 	err = usbd_req_reset_port(parent_hub, mtx, udev->port_no);
 	if (err) {
 		DPRINTFN(0, "addr=%d, port reset failed, %s\n", 
@@ -2033,3 +2069,65 @@ usbd_req_set_device_feature(struct usb_device *udev, struct mtx *mtx,
 	USETW(req.wLength, 0);
 	return (usbd_do_request(udev, mtx, &req, 0));
 }
+
+/*------------------------------------------------------------------------*
+ *	usbd_req_reset_tt
+ *
+ * Returns:
+ *    0: Success
+ * Else: Failure
+ *------------------------------------------------------------------------*/
+usb_error_t
+usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx,
+    uint8_t port)
+{
+	struct usb_device_request req;
+
+	/* For single TT HUBs the port should be 1 */
+
+	if (udev->ddesc.bDeviceClass == UDCLASS_HUB &&
+	    udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT)
+		port = 1;
+
+	req.bmRequestType = UT_WRITE_CLASS_OTHER;
+	req.bRequest = UR_RESET_TT;
+	USETW(req.wValue, 0);
+	req.wIndex[0] = port;
+	req.wIndex[1] = 0;
+	USETW(req.wLength, 0);
+	return (usbd_do_request(udev, mtx, &req, 0));
+}
+
+/*------------------------------------------------------------------------*
+ *	usbd_req_clear_tt_buffer
+ *
+ * For single TT HUBs the port should be 1.
+ *
+ * Returns:
+ *    0: Success
+ * Else: Failure
+ *------------------------------------------------------------------------*/
+usb_error_t
+usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx,
+    uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint)
+{
+	struct usb_device_request req;
+	uint16_t wValue;
+
+	/* For single TT HUBs the port should be 1 */
+
+	if (udev->ddesc.bDeviceClass == UDCLASS_HUB &&
+	    udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT)
+		port = 1;
+
+	wValue = (endpoint & 0xF) | ((addr & 0x7F) << 4) |
+	    ((endpoint & 0x80) << 8) | ((type & 3) << 12);
+
+	req.bmRequestType = UT_WRITE_CLASS_OTHER;
+	req.bRequest = UR_CLEAR_TT_BUFFER;
+	USETW(req.wValue, wValue);
+	req.wIndex[0] = port;
+	req.wIndex[1] = 0;
+	USETW(req.wLength, 0);
+	return (usbd_do_request(udev, mtx, &req, 0));
+}
diff --git a/sys/dev/usb/usb_request.h b/sys/dev/usb/usb_request.h
index 12f373d5fa5f..ac7a7c160b6b 100644
--- a/sys/dev/usb/usb_request.h
+++ b/sys/dev/usb/usb_request.h
@@ -85,5 +85,9 @@ usb_error_t usbd_req_set_hub_u2_timeout(struct usb_device *udev,
 		    struct mtx *mtx, uint8_t port, uint8_t timeout);
 usb_error_t usbd_req_set_hub_depth(struct usb_device *udev,
 		    struct mtx *mtx, uint16_t depth);
+usb_error_t usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx,
+		    uint8_t port);
+usb_error_t usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx,
+		    uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint);
 
 #endif					/* _USB_REQUEST_H_ */
diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c
index 5fd4f5a19a92..d4c2408db7c9 100644
--- a/sys/dev/usb/usb_transfer.c
+++ b/sys/dev/usb/usb_transfer.c
@@ -2927,6 +2927,11 @@ usbd_ctrl_transfer_setup(struct usb_device *udev)
 	 */
 	usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX);
 
+	/*
+	 * Reset clear stall error counter.
+	 */
+	udev->clear_stall_errors = 0;
+
 	/*
 	 * Try to setup a new USB transfer for the
 	 * default control endpoint:
diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h
index 8f6da7c68c81..91cd3fae3aaf 100644
--- a/sys/dev/usb/usbdi.h
+++ b/sys/dev/usb/usbdi.h
@@ -542,6 +542,7 @@ void	usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset,
 	    struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len);
 void	usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset,
 	    usb_frlength_t len);
+void	usbd_start_re_enumerate(struct usb_device *udev);
 
 int	usb_fifo_attach(struct usb_device *udev, void *priv_sc,
 	    struct mtx *priv_mtx, struct usb_fifo_methods *pm,
diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c
index c03d5365530b..0f4418142684 100644
--- a/sys/dev/xen/control/control.c
+++ b/sys/dev/xen/control/control.c
@@ -203,24 +203,29 @@ xctrl_suspend()
 	unsigned long max_pfn, start_info_mfn;
 
 #ifdef SMP
-	cpumask_t map;
+	struct thread *td;
+	cpuset_t map;
 	/*
 	 * Bind us to CPU 0 and stop any other VCPUs.
 	 */
-	thread_lock(curthread);
-	sched_bind(curthread, 0);
-	thread_unlock(curthread);
+	td = curthread;
+	thread_lock(td);
+	sched_bind(td, 0);
+	thread_unlock(td);
 	KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0"));
 
-	map = PCPU_GET(other_cpus) & ~stopped_cpus;
-	if (map)
+	sched_pin();
+	map = PCPU_GET(other_cpus);
+	sched_unpin();
+	CPU_NAND(&map, &stopped_cpus);
+	if (!CPU_EMPTY(&map))
 		stop_cpus(map);
 #endif
 
 	if (DEVICE_SUSPEND(root_bus) != 0) {
 		printf("xen_suspend: device_suspend failed\n");
 #ifdef SMP
-		if (map)
+		if (!CPU_EMPTY(&map))
 			restart_cpus(map);
 #endif
 		return;
@@ -289,7 +294,7 @@ xctrl_suspend()
 	thread_lock(curthread);
 	sched_unbind(curthread);
 	thread_unlock(curthread);
-	if (map)
+	if (!CPU_EMPTY(&map))
 		restart_cpus(map);
 #endif
 }
diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h
index 8ed60a727cc9..5f944b5538f1 100644
--- a/sys/fs/nfs/nfs_var.h
+++ b/sys/fs/nfs/nfs_var.h
@@ -401,10 +401,10 @@ int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *,
 int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *,
     NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *);
 int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int,
-    struct ucred *, NFSPROC_T *);
+    struct ucred *, NFSPROC_T *, void *, int);
 int nfsrpc_lockt(struct nfsrv_descript *, vnode_t,
     struct nfsclclient *, u_int64_t, u_int64_t, struct flock *,
-    struct ucred *, NFSPROC_T *);
+    struct ucred *, NFSPROC_T *, void *, int);
 int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t,
     u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t,
     u_int64_t, short, struct ucred *, NFSPROC_T *, int);
@@ -439,16 +439,16 @@ struct nfsclclient *nfscl_findcl(struct nfsmount *);
 void nfscl_clientrelease(struct nfsclclient *);
 void nfscl_freelock(struct nfscllock *, int);
 int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short,
-    struct ucred *, NFSPROC_T *, struct nfsclclient *, int, u_int8_t *,
-    u_int8_t *, struct nfscllockowner **, int *, int *);
+    struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int,
+    u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *);
 int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t,
     struct ucred *, NFSPROC_T *, int, struct nfsclclient *,
-    struct nfscllockowner **, int *);
+    void *, int, struct nfscllockowner **, int *);
 int nfscl_checkwritelocked(vnode_t, struct flock *,
-    struct ucred *, NFSPROC_T *);
+    struct ucred *, NFSPROC_T *, void *, int);
 void nfscl_lockrelease(struct nfscllockowner *, int, int);
 void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t);
-void nfscl_filllockowner(NFSPROC_T *, u_int8_t *);
+void nfscl_filllockowner(void *, u_int8_t *, int);
 void nfscl_freeopen(struct nfsclopen *, int);
 void nfscl_umount(struct nfsmount *, NFSPROC_T *);
 void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *);
@@ -466,9 +466,10 @@ void nfscl_lockexcl(struct nfsv4lock *, void *);
 void nfscl_lockunlock(struct nfsv4lock *);
 void nfscl_lockderef(struct nfsv4lock *);
 void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *);
-void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *);
+void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *,
+    int);
 int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t,
-    u_int64_t, struct flock *, NFSPROC_T *);
+    u_int64_t, struct flock *, NFSPROC_T *, void *, int);
 int nfscl_mustflush(vnode_t);
 int nfscl_nodeleg(vnode_t, int);
 int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *);
diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c
index 0c3a4c90b07d..4d88bd27eab9 100644
--- a/sys/fs/nfsclient/nfs_clport.c
+++ b/sys/fs/nfsclient/nfs_clport.c
@@ -500,7 +500,7 @@ nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen)
  * Fill in a lock owner name. For now, pid + the process's creation time.
  */
 void
-nfscl_filllockowner(struct thread *td, u_int8_t *cp)
+nfscl_filllockowner(void *id, u_int8_t *cp, int flags)
 {
 	union {
 		u_int32_t	lval;
@@ -508,37 +508,35 @@ nfscl_filllockowner(struct thread *td, u_int8_t *cp)
 	} tl;
 	struct proc *p;
 
-if (td == NULL) {
-	printf("NULL td\n");
-	bzero(cp, 12);
-	return;
-}
-	p = td->td_proc;
-if (p == NULL) {
-	printf("NULL pid\n");
-	bzero(cp, 12);
-	return;
-}
-	tl.lval = p->p_pid;
-	*cp++ = tl.cval[0];
-	*cp++ = tl.cval[1];
-	*cp++ = tl.cval[2];
-	*cp++ = tl.cval[3];
-if (p->p_stats == NULL) {
-	printf("pstats null\n");
-	bzero(cp, 8);
-	return;
-}
-	tl.lval = p->p_stats->p_start.tv_sec;
-	*cp++ = tl.cval[0];
-	*cp++ = tl.cval[1];
-	*cp++ = tl.cval[2];
-	*cp++ = tl.cval[3];
-	tl.lval = p->p_stats->p_start.tv_usec;
-	*cp++ = tl.cval[0];
-	*cp++ = tl.cval[1];
-	*cp++ = tl.cval[2];
-	*cp = tl.cval[3];
+	if (id == NULL) {
+		printf("NULL id\n");
+		bzero(cp, NFSV4CL_LOCKNAMELEN);
+		return;
+	}
+	if ((flags & F_POSIX) != 0) {
+		p = (struct proc *)id;
+		tl.lval = p->p_pid;
+		*cp++ = tl.cval[0];
+		*cp++ = tl.cval[1];
+		*cp++ = tl.cval[2];
+		*cp++ = tl.cval[3];
+		tl.lval = p->p_stats->p_start.tv_sec;
+		*cp++ = tl.cval[0];
+		*cp++ = tl.cval[1];
+		*cp++ = tl.cval[2];
+		*cp++ = tl.cval[3];
+		tl.lval = p->p_stats->p_start.tv_usec;
+		*cp++ = tl.cval[0];
+		*cp++ = tl.cval[1];
+		*cp++ = tl.cval[2];
+		*cp = tl.cval[3];
+	} else if ((flags & F_FLOCK) != 0) {
+		bcopy(&id, cp, sizeof(id));
+		bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id));
+	} else {
+		printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n");
+		bzero(cp, NFSV4CL_LOCKNAMELEN);
+	}
 }
 
 /*
@@ -943,6 +941,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
 		sad.sin_family = AF_INET;
 		sad.sin_len = sizeof (struct sockaddr_in);
 		sad.sin_addr.s_addr = sin->sin_addr.s_addr;
+		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
 		rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL);
 		if (rt != NULL) {
 			if (rt->rt_ifp != NULL &&
@@ -956,6 +955,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
 			}
 			RTFREE_LOCKED(rt);
 		}
+		CURVNET_RESTORE();
 #ifdef INET6
 	} else if (nmp->nm_nam->sa_family == AF_INET6) {
 		struct sockaddr_in6 sad6, *sin6;
@@ -966,6 +966,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
 		sad6.sin6_family = AF_INET6;
 		sad6.sin6_len = sizeof (struct sockaddr_in6);
 		sad6.sin6_addr = sin6->sin6_addr;
+		CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred));
 		rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL);
 		if (rt != NULL) {
 			if (rt->rt_ifp != NULL &&
@@ -980,6 +981,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p)
 			}
 			RTFREE_LOCKED(rt);
 		}
+		CURVNET_RESTORE();
 #endif
 	}
 	return (retp);
diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c
index 0fc9bfd1da92..5d83d0bafd25 100644
--- a/sys/fs/nfsclient/nfs_clrpcops.c
+++ b/sys/fs/nfsclient/nfs_clrpcops.c
@@ -3459,7 +3459,7 @@ nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred,
  */
 APPLESTATIC int
 nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
-    int reclaim, struct ucred *cred, NFSPROC_T *p)
+    int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	struct nfscllockowner *lp;
 	struct nfsclclient *clp;
@@ -3511,11 +3511,11 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 		error = nfscl_getcl(vp, cred, p, &clp);
 		if (error)
 			return (error);
-		error = nfscl_lockt(vp, clp, off, len, fl, p);
+		error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags);
 		if (!error) {
 			clidrev = clp->nfsc_clientidrev;
 			error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred,
-			    p);
+			    p, id, flags);
 		} else if (error == -1) {
 			error = 0;
 		}
@@ -3530,7 +3530,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 			return (error);
 		do {
 		    error = nfscl_relbytelock(vp, off, len, cred, p, callcnt,
-			clp, &lp, &dorpc);
+			clp, id, flags, &lp, &dorpc);
 		    /*
 		     * If it returns a NULL lp, we're done.
 		     */
@@ -3538,7 +3538,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 			if (callcnt == 0)
 			    nfscl_clientrelease(clp);
 			else
-			    nfscl_releasealllocks(clp, vp, p);
+			    nfscl_releasealllocks(clp, vp, p, id, flags);
 			return (error);
 		    }
 		    if (nmp->nm_clp != NULL)
@@ -3572,10 +3572,10 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 		    }
 		    callcnt++;
 		} while (error == 0 && nd->nd_repstat == 0);
-		nfscl_releasealllocks(clp, vp, p);
+		nfscl_releasealllocks(clp, vp, p, id, flags);
 	    } else if (op == F_SETLK) {
 		error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p,
-		    NULL, 0, NULL, NULL, &lp, &newone, &donelocally);
+		    NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally);
 		if (error || donelocally) {
 			return (error);
 		}
@@ -3625,7 +3625,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl,
 APPLESTATIC int
 nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
     struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl,
-    struct ucred *cred, NFSPROC_T *p)
+    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	u_int32_t *tl;
 	int error, type, size;
@@ -3643,7 +3643,7 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp,
 	tl += 2;
 	*tl++ = clp->nfsc_clientid.lval[0];
 	*tl = clp->nfsc_clientid.lval[1];
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(id, own, flags);
 	(void) nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN);
 	error = nfscl_request(nd, vp, p, cred, NULL);
 	if (error)
diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c
index 86d71b66bc9b..aa81437fb2ed 100644
--- a/sys/fs/nfsclient/nfs_clstate.c
+++ b/sys/fs/nfsclient/nfs_clstate.c
@@ -226,7 +226,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg,
 	 * If none found, add the new one or return error, depending upon
 	 * "create".
 	 */
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(p->td_proc, own, F_POSIX);
 	NFSLOCKCLSTATE();
 	dp = NULL;
 	/* First check the delegation list */
@@ -521,7 +521,7 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode,
 		 * If p != NULL, we want to search the parentage tree
 		 * for a matching OpenOwner and use that.
 		 */
-		nfscl_filllockowner(p, own);
+		nfscl_filllockowner(p->td_proc, own, F_POSIX);
 		error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, NULL, p,
 		    mode, NULL, &op);
 		if (error == 0) {
@@ -596,7 +596,7 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen,
 	op = NULL;
 	while (op == NULL && (nproc != NULL || rown != NULL)) {
 		if (nproc != NULL) {
-			nfscl_filllockowner(nproc, own);
+			nfscl_filllockowner(nproc->td_proc, own, F_POSIX);
 			ownp = own;
 		} else {
 			ownp = rown;
@@ -881,7 +881,7 @@ nfscl_clientrelease(struct nfsclclient *clp)
 APPLESTATIC int
 nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
     short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp,
-    int recovery, u_int8_t *rownp, u_int8_t *ropenownp,
+    int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp,
     struct nfscllockowner **lpp, int *newonep, int *donelocallyp)
 {
 	struct nfscllockowner *lp;
@@ -942,7 +942,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
 	if (recovery) {
 		ownp = rownp;
 	} else {
-		nfscl_filllockowner(p, own);
+		nfscl_filllockowner(id, own, flags);
 		ownp = own;
 	}
 	if (!recovery) {
@@ -1079,7 +1079,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
 APPLESTATIC int
 nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
     __unused struct ucred *cred, NFSPROC_T *p, int callcnt,
-    struct nfsclclient *clp, struct nfscllockowner **lpp, int *dorpcp)
+    struct nfsclclient *clp, void *id, int flags,
+    struct nfscllockowner **lpp, int *dorpcp)
 {
 	struct nfscllockowner *lp;
 	struct nfsclowner *owp;
@@ -1116,7 +1117,7 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
 		    sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK);
 		*other_lop = *nlop;
 	}
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(id, own, flags);
 	dp = NULL;
 	NFSLOCKCLSTATE();
 	if (callcnt == 0)
@@ -1188,7 +1189,8 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len,
  * Release all lockowners marked in progess for this process and file.
  */
 APPLESTATIC void
-nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
+nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p,
+    void *id, int flags)
 {
 	struct nfsclowner *owp;
 	struct nfsclopen *op;
@@ -1197,7 +1199,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
 	u_int8_t own[NFSV4CL_LOCKNAMELEN];
 
 	np = VTONFS(vp);
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(id, own, flags);
 	NFSLOCKCLSTATE();
 	LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) {
 	    LIST_FOREACH(op, &owp->nfsow_open, nfso_list) {
@@ -1226,7 +1228,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p)
  */
 APPLESTATIC int
 nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
-    struct ucred *cred, NFSPROC_T *p)
+    struct ucred *cred, NFSPROC_T *p, void *id, int flags)
 {
 	struct nfsclowner *owp;
 	struct nfscllockowner *lp;
@@ -1266,7 +1268,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl,
 	error = nfscl_getcl(vp, cred, p, &clp);
 	if (error)
 		return (1);
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(id, own, flags);
 	NFSLOCKCLSTATE();
 
 	/*
@@ -1641,7 +1643,7 @@ nfscl_cleanup(NFSPROC_T *p)
 
 	if (!nfscl_inited)
 		return;
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(p->td_proc, own, F_POSIX);
 
 	NFSLOCKCLSTATE();
 	/*
@@ -3322,7 +3324,7 @@ nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop,
  */
 APPLESTATIC int
 nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
-    u_int64_t len, struct flock *fl, NFSPROC_T *p)
+    u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags)
 {
 	struct nfscllock *lop, nlck;
 	struct nfscldeleg *dp;
@@ -3340,7 +3342,7 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off,
 			return (NFSERR_INVAL);
 	}
 	np = VTONFS(vp);
-	nfscl_filllockowner(p, own);
+	nfscl_filllockowner(id, own, flags);
 	NFSLOCKCLSTATE();
 	dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len);
 	error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len,
@@ -3615,7 +3617,7 @@ nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp,
 	off = lop->nfslo_first;
 	len = lop->nfslo_end - lop->nfslo_first;
 	error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p,
-	    clp, 1, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
+	    clp, 1, NULL, 0, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone,
 	    &donelocally);
 	if (error || donelocally)
 		return (error);
diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c
index 984724d93aed..3e1c66d1de2d 100644
--- a/sys/fs/nfsclient/nfs_clvnops.c
+++ b/sys/fs/nfsclient/nfs_clvnops.c
@@ -2884,8 +2884,11 @@ nfs_advlock(struct vop_advlock_args *ap)
 	int ret, error = EOPNOTSUPP;
 	u_quad_t size;
 	
-	if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) {
-		cred = p->p_ucred;
+	if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) {
+		if ((ap->a_flags & F_POSIX) != 0)
+			cred = p->p_ucred;
+		else
+			cred = td->td_ucred;
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
 		if (vp->v_iflag & VI_DOOMED) {
 			VOP_UNLOCK(vp, 0);
@@ -2898,7 +2901,8 @@ nfs_advlock(struct vop_advlock_args *ap)
 		 * RFC3530 Sec. 9.3.2.
 		 */
 		if (ap->a_op == F_UNLCK &&
-		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td))
+		    nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id,
+		    ap->a_flags))
 			(void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0);
 
 		/*
@@ -2907,7 +2911,7 @@ nfs_advlock(struct vop_advlock_args *ap)
 		 */
 		do {
 			ret = nfsrpc_advlock(vp, np->n_size, ap->a_op,
-			    ap->a_fl, 0, cred, td);
+			    ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags);
 			if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) &&
 			    ap->a_op == F_SETLK) {
 				VOP_UNLOCK(vp, 0);
diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c
index 74c70ff2f07e..30497a453ea8 100644
--- a/sys/geom/eli/g_eli.c
+++ b/sys/geom/eli/g_eli.c
@@ -672,7 +672,7 @@ static int
 g_eli_cpu_is_disabled(int cpu)
 {
 #ifdef SMP
-	return ((hlt_cpus_mask & (1 << cpu)) != 0);
+	return (CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (0);
 #endif
diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c
index 77b80048620f..56529f7b5b03 100644
--- a/sys/i386/i386/intr_machdep.c
+++ b/sys/i386/i386/intr_machdep.c
@@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 /*
@@ -432,7 +431,7 @@ intr_next_cpu(void)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 	mtx_unlock_spin(&icu_lock);
 	return (apic_id);
 }
@@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu)
 		printf("INTR: Adding local APIC %d as a target\n",
 		    cpu_apic_ids[cpu]);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused)
 	return;
 #endif
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index fbf444a3eb4a..91050c43bee3 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 #include "opt_xbox.h"
@@ -118,6 +119,7 @@ __FBSDID("$FreeBSD$");
 #include <x86/mca.h>
 #include <machine/md_var.h>
 #include <machine/metadata.h>
+#include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
@@ -1357,9 +1359,8 @@ cpu_idle(int busy)
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
-#if defined(SMP) && !defined(XEN)
-	if (mp_grab_cpu_hlt())
-		return;
+#if defined(MP_WATCHDOG) && !defined(XEN)
+	ap_watchdog(PCPU_GET(cpuid));
 #endif
 #ifndef XEN
 	/* If we are busy - try to use fast methods. */
diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c
index a4db4016f8e9..78c90c0f73c7 100644
--- a/sys/i386/i386/mp_machdep.c
+++ b/sys/i386/i386/mp_machdep.c
@@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$");
 #include "opt_apic.h"
 #include "opt_cpu.h"
 #include "opt_kstack_pages.h"
-#include "opt_mp_watchdog.h"
 #include "opt_pmap.h"
 #include "opt_sched.h"
 #include "opt_smp.h"
@@ -51,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -77,7 +77,6 @@ __FBSDID("$FreeBSD$");
 #include <machine/cputypes.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
-#include <machine/mp_watchdog.h>
 #include <machine/pcb.h>
 #include <machine/psl.h>
 #include <machine/smp.h>
@@ -173,7 +172,7 @@ static u_long *ipi_hardclock_counts[MAXCPU];
  * Local data and functions.
  */
 
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -208,11 +207,8 @@ static int	start_all_aps(void);
 static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
-static int	hlt_logical_cpus;
 static u_int	hyperthreading_cpus;	/* logical cpus sharing L1 cache */
-static cpumask_t	hyperthreading_cpus_mask;
 static int	hyperthreading_allowed = 1;
-static struct	sysctl_ctx_list logical_cpu_clist;
 
 static void
 mem_range_AP_init(void)
@@ -289,8 +285,11 @@ topo_probe_0x4(void)
 	 * logical processors that belong to the same core
 	 * as BSP thus deducing number of threads per core.
 	 */
-	cpuid_count(0x04, 0, p);
-	max_cores = ((p[0] >> 26) & 0x3f) + 1;
+	if (cpu_high >= 0x4) {
+		cpuid_count(0x04, 0, p);
+		max_cores = ((p[0] >> 26) & 0x3f) + 1;
+	} else
+		max_cores = 1;
 	core_id_bits = mask_width(max_logical/max_cores);
 	if (core_id_bits < 0)
 		return;
@@ -382,7 +381,7 @@ topo_probe(void)
 	if (cpu_topo_probed)
 		return;
 
-	logical_cpus_mask = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (mp_ncpus <= 1)
 		cpu_cores = cpu_logical = 1;
 	else if (cpu_vendor_id == CPU_VENDOR_AMD)
@@ -524,7 +523,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -659,6 +658,7 @@ cpu_mp_announce(void)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	struct pcpu *pc;
 	vm_offset_t addr;
 	int	gsel_tss;
@@ -783,19 +783,17 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	/* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */
 	if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
-	
-	/* Determine if we are a hyperthread. */
-	if (hyperthreading_cpus > 1 &&
-	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 
 	if (bootverbose)
 		lapic_dump("AP");
@@ -874,7 +872,7 @@ assign_cpu_ids(void)
 
 		if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) {
 			cpu_info[i].cpu_hyperthread = 1;
-#if defined(SCHED_ULE)
+
 			/*
 			 * Don't use HT CPU if it has been disabled by a
 			 * tunable.
@@ -883,7 +881,6 @@ assign_cpu_ids(void)
 				cpu_info[i].cpu_disabled = 1;
 				continue;
 			}
-#endif
 		}
 
 		/* Don't use this CPU if it has been disabled by a tunable. */
@@ -893,6 +890,11 @@ assign_cpu_ids(void)
 		}
 	}
 
+	if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) {
+		hyperthreading_cpus = 0;
+		cpu_logical = 1;
+	}
+
 	/*
 	 * Assign CPU IDs to local APIC IDs and disable any CPUs
 	 * beyond MAXCPU.  CPU 0 is always assigned to the BSP.
@@ -932,6 +934,7 @@ assign_cpu_ids(void)
 static int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 #ifndef PC98
 	u_char mpbiosreason;
 #endif
@@ -991,11 +994,13 @@ start_all_aps(void)
 		}
 		CHECK_PRINT("trace");		/* show checkpoints */
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	/* restore the warmstart vector */
 	*(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec;
@@ -1191,6 +1196,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW,
     &ipi_masked_range_size, 0, "");
 #endif /* COUNT_XINVLTLB_HITS */
 
+/*
+ * Send an IPI to specified CPU handling the bitmap logic.
+ */
+static void
+ipi_send_cpu(int cpu, u_int ipi)
+{
+	u_int bitmap, old_pending, new_pending;
+
+	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
+
+	if (IPI_IS_BITMAPED(ipi)) {
+		bitmap = 1 << ipi;
+		ipi = IPI_BITMAP_VECTOR;
+		do {
+			old_pending = cpu_ipi_pending[cpu];
+			new_pending = old_pending | bitmap;
+		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
+		    old_pending, new_pending));	
+		if (old_pending)
+			return;
+	}
+	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
+}
+
 /*
  * Flush the TLB on all other CPU's
  */
@@ -1215,28 +1244,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		sched_pin();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		sched_unpin();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1245,39 +1265,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	smp_tlb_addr1 = addr1;
 	smp_tlb_addr2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	mtx_unlock_spin(&smp_ipi_mtx);
 }
 
-/*
- * Send an IPI to specified CPU handling the bitmap logic.
- */
-static void
-ipi_send_cpu(int cpu, u_int ipi)
-{
-	u_int bitmap, old_pending, new_pending;
-
-	KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu));
-
-	if (IPI_IS_BITMAPED(ipi)) {
-		bitmap = 1 << ipi;
-		ipi = IPI_BITMAP_VECTOR;
-		do {
-			old_pending = cpu_ipi_pending[cpu];
-			new_pending = old_pending | bitmap;
-		} while  (!atomic_cmpset_int(&cpu_ipi_pending[cpu],
-		    old_pending, new_pending));	
-		if (old_pending)
-			return;
-	}
-	lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]);
-}
-
 void
 smp_cache_flush(void)
 {
@@ -1324,7 +1330,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1336,7 +1342,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1348,7 +1354,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1401,7 +1407,7 @@ ipi_bitmap_handler(struct trapframe frame)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1411,12 +1417,12 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi);
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
+		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
 }
@@ -1434,7 +1440,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1447,8 +1453,10 @@ void
 ipi_all_but_self(u_int ipi)
 {
 
+	sched_pin();
 	if (IPI_IS_BITMAPED(ipi)) {
 		ipi_selected(PCPU_GET(other_cpus), ipi);
+		sched_unpin();
 		return;
 	}
 
@@ -1458,7 +1466,9 @@ ipi_all_but_self(u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus));
+	sched_unpin();
+
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
 	lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS);
 }
@@ -1466,7 +1476,7 @@ ipi_all_but_self(u_int ipi)
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1474,11 +1484,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1490,23 +1502,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int cpu;
 
+	sched_pin();
 	cpu = PCPU_GET(cpuid);
 	cpumask = PCPU_GET(cpumask);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
@@ -1530,158 +1544,6 @@ release_aps(void *dummy __unused)
 }
 SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL);
 
-static int
-sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS)
-{
-	cpumask_t mask;
-	int error;
-
-	mask = hlt_cpus_mask;
-	error = sysctl_handle_int(oidp, &mask, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-	if (logical_cpus_mask != 0 &&
-	    (mask & logical_cpus_mask) == logical_cpus_mask)
-		hlt_logical_cpus = 1;
-	else
-		hlt_logical_cpus = 0;
-
-	if (! hyperthreading_allowed)
-		mask |= hyperthreading_cpus_mask;
-
-	if ((mask & all_cpus) == all_cpus)
-		mask &= ~(1<<0);
-	hlt_cpus_mask = mask;
-	return (error);
-}
-SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW,
-    0, 0, sysctl_hlt_cpus, "IU",
-    "Bitmap of CPUs to halt.  101 (binary) will halt CPUs 0 and 2.");
-
-static int
-sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS)
-{
-	int disable, error;
-
-	disable = hlt_logical_cpus;
-	error = sysctl_handle_int(oidp, &disable, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-	if (disable)
-		hlt_cpus_mask |= logical_cpus_mask;
-	else
-		hlt_cpus_mask &= ~logical_cpus_mask;
-
-	if (! hyperthreading_allowed)
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
-
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
-
-	hlt_logical_cpus = disable;
-	return (error);
-}
-
-static int
-sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS)
-{
-	int allowed, error;
-
-	allowed = hyperthreading_allowed;
-	error = sysctl_handle_int(oidp, &allowed, 0, req);
-	if (error || !req->newptr)
-		return (error);
-
-#ifdef SCHED_ULE
-	/*
-	 * SCHED_ULE doesn't allow enabling/disabling HT cores at
-	 * run-time.
-	 */
-	if (allowed != hyperthreading_allowed)
-		return (ENOTSUP);
-	return (error);
-#endif
-
-	if (allowed)
-		hlt_cpus_mask &= ~hyperthreading_cpus_mask;
-	else
-		hlt_cpus_mask |= hyperthreading_cpus_mask;
-
-	if (logical_cpus_mask != 0 &&
-	    (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask)
-		hlt_logical_cpus = 1;
-	else
-		hlt_logical_cpus = 0;
-
-	if ((hlt_cpus_mask & all_cpus) == all_cpus)
-		hlt_cpus_mask &= ~(1<<0);
-
-	hyperthreading_allowed = allowed;
-	return (error);
-}
-
-static void
-cpu_hlt_setup(void *dummy __unused)
-{
-
-	if (logical_cpus_mask != 0) {
-		TUNABLE_INT_FETCH("machdep.hlt_logical_cpus",
-		    &hlt_logical_cpus);
-		sysctl_ctx_init(&logical_cpu_clist);
-		SYSCTL_ADD_PROC(&logical_cpu_clist,
-		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-		    "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0,
-		    sysctl_hlt_logical_cpus, "IU", "");
-		SYSCTL_ADD_UINT(&logical_cpu_clist,
-		    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-		    "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD,
-		    &logical_cpus_mask, 0, "");
-
-		if (hlt_logical_cpus)
-			hlt_cpus_mask |= logical_cpus_mask;
-
-		/*
-		 * If necessary for security purposes, force
-		 * hyperthreading off, regardless of the value
-		 * of hlt_logical_cpus.
-		 */
-		if (hyperthreading_cpus_mask) {
-			SYSCTL_ADD_PROC(&logical_cpu_clist,
-			    SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO,
-			    "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW,
-			    0, 0, sysctl_hyperthreading_allowed, "IU", "");
-			if (! hyperthreading_allowed)
-				hlt_cpus_mask |= hyperthreading_cpus_mask;
-		}
-	}
-}
-SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL);
-
-int
-mp_grab_cpu_hlt(void)
-{
-	cpumask_t mask;
-#ifdef MP_WATCHDOG
-	u_int cpuid;
-#endif
-	int retval;
-
-	mask = PCPU_GET(cpumask);
-#ifdef MP_WATCHDOG
-	cpuid = PCPU_GET(cpuid);
-	ap_watchdog(cpuid);
-#endif
-
-	retval = 0;
-	while (mask & hlt_cpus_mask) {
-		retval = 1;
-		__asm __volatile("sti; hlt" : : : "memory");
-	}
-	return (retval);
-}
-
 #ifdef COUNT_IPIS
 /*
  * Setup interrupt counters for IPI handlers.
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index d10bbe5a7701..3f9248df2b35 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #ifdef SMP
 #include <sys/smp.h>
+#else
+#include <sys/cpuset.h>
 #endif
 
 #include <vm/vm.h>
@@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
 	kernel_pmap->pm_root = NULL;
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 
@@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 }
@@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 }
@@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void)
 }
 
 struct pde_action {
-	cpumask_t store;	/* processor that updates the PDE */
-	cpumask_t invalidate;	/* processors that invalidate their TLB */
+	cpuset_t store;		/* processor that updates the PDE */
+	cpuset_t invalidate;	/* processors that invalidate their TLB */
 	vm_offset_t va;
 	pd_entry_t *pde;
 	pd_entry_t newpde;
@@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg)
 	pd_entry_t *pde;
 	pmap_t pmap;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
+
 		/*
 		 * Elsewhere, this operation requires allpmaps_lock for
 		 * synchronization.  Here, it does not because it is being
@@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg)
 			pde = pmap_pde(pmap, act->va);
 			pde_store(pde, act->newpde);
 		}
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if (act->store == PCPU_GET(cpumask))
+	sched_pin();
+	if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pde_store(act->pde, act->newpde);
+	} else
+		sched_unpin();
 }
 
 static void
@@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg)
 {
 	struct pde_action *act = arg;
 
-	if ((act->invalidate & PCPU_GET(cpumask)) != 0)
+	sched_pin();
+	if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		pmap_update_pde_invalidate(act->va, act->newpde);
+	} else
+		sched_unpin();
 }
 
 /*
@@ -1058,21 +1075,23 @@ static void
 pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 {
 	struct pde_action act;
-	cpumask_t active, cpumask;
+	cpuset_t active, cpumask, other_cpus;
 
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
+	other_cpus = PCPU_GET(other_cpus);
 	if (pmap == kernel_pmap)
 		active = all_cpus;
 	else
 		active = pmap->pm_active;
-	if ((active & PCPU_GET(other_cpus)) != 0) {
+	if (CPU_OVERLAP(&active, &other_cpus)) {
 		act.store = cpumask;
 		act.invalidate = active;
 		act.va = va;
 		act.pde = pde;
 		act.newpde = newpde;
-		smp_rendezvous_cpus(cpumask | active,
+		CPU_OR(&cpumask, &active);
+		smp_rendezvous_cpus(cpumask,
 		    smp_no_rendevous_barrier, pmap == kernel_pmap ?
 		    pmap_update_pde_kernel : pmap_update_pde_user,
 		    pmap_update_pde_teardown, &act);
@@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 			pmap_kenter_pde(va, newpde);
 		else
 			pde_store(pde, newpde);
-		if ((active & cpumask) != 0)
+		if (CPU_OVERLAP(&active, &cpumask))
 			pmap_update_pde_invalidate(va, newpde);
 	}
 	sched_unpin();
@@ -1095,7 +1114,7 @@ PMAP_INLINE void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 }
 
@@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
 	vm_offset_t addr;
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 }
@@ -1113,7 +1132,7 @@ PMAP_INLINE void
 pmap_invalidate_all(pmap_t pmap)
 {
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde)
 		pmap_kenter_pde(va, newpde);
 	else
 		pde_store(pde, newpde);
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		pmap_update_pde_invalidate(va, newpde);
 }
 #endif /* !SMP */
@@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap)
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
 	pmap->pm_root = NULL;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap)
 #endif
 	}
 
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -1886,7 +1905,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
  * Deal with a SMP shootdown of other users of the pmap that we are
  * trying to dispose of.  This can be a bit hairy.
  */
-static cpumask_t *lazymask;
+static cpuset_t *lazymask;
 static u_int lazyptd;
 static volatile u_int lazywait;
 
@@ -1895,36 +1914,42 @@ void pmap_lazyfix_action(void);
 void
 pmap_lazyfix_action(void)
 {
-	cpumask_t mymask = PCPU_GET(cpumask);
 
 #ifdef COUNT_IPIS
 	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 #endif
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-	atomic_clear_int(lazymask, mymask);
+	CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
 	atomic_store_rel_int(&lazywait, 1);
 }
 
 static void
-pmap_lazyfix_self(cpumask_t mymask)
+pmap_lazyfix_self(cpuset_t mymask)
 {
 
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-	atomic_clear_int(lazymask, mymask);
+	CPU_NAND_ATOMIC(lazymask, &mymask);
 }
 
 
 static void
 pmap_lazyfix(pmap_t pmap)
 {
-	cpumask_t mymask, mask;
+	cpuset_t mymask, mask;
 	u_int spins;
+	int lsb;
 
-	while ((mask = pmap->pm_active) != 0) {
+	mask = pmap->pm_active;
+	while (!CPU_EMPTY(&mask)) {
 		spins = 50000000;
-		mask = mask & -mask;	/* Find least significant set bit */
+
+		/* Find least significant set bit. */
+		lsb = cpusetobj_ffs(&mask);
+		MPASS(lsb != 0);
+		lsb--;
+		CPU_SETOF(lsb, &mask);
 		mtx_lock_spin(&smp_ipi_mtx);
 #ifdef PAE
 		lazyptd = vtophys(pmap->pm_pdpt);
@@ -1932,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap)
 		lazyptd = vtophys(pmap->pm_pdir);
 #endif
 		mymask = PCPU_GET(cpumask);
-		if (mask == mymask) {
+		if (!CPU_CMP(&mask, &mymask)) {
 			lazymask = &pmap->pm_active;
 			pmap_lazyfix_self(mymask);
 		} else {
@@ -1949,6 +1974,7 @@ pmap_lazyfix(pmap_t pmap)
 		mtx_unlock_spin(&smp_ipi_mtx);
 		if (spins == 0)
 			printf("pmap_lazyfix: spun for 50000000\n");
+		mask = pmap->pm_active;
 	}
 }
 
@@ -1968,7 +1994,7 @@ pmap_lazyfix(pmap_t pmap)
 	cr3 = vtophys(pmap->pm_pdir);
 	if (cr3 == rcr3()) {
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-		pmap->pm_active &= ~(PCPU_GET(cpumask));
+		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); 
 	}
 }
 #endif	/* SMP */
@@ -5078,11 +5104,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index 232e1a1f308e..a084e09526aa 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -573,11 +573,13 @@ kvtop(void *addr)
 static void
 cpu_reset_proxy()
 {
+	cpuset_t tcrp;
 
 	cpu_reset_proxy_active = 1;
 	while (cpu_reset_proxy_active == 1)
 		;	/* Wait for other cpu to see that we've started */
-	stop_cpus((1<<cpu_reset_proxyid));
+	CPU_SETOF(cpu_reset_proxyid, &tcrp);
+	stop_cpus(tcrp);
 	printf("cpu_reset_proxy: Stopped CPU %d\n", cpu_reset_proxyid);
 	DELAY(1000000);
 	cpu_reset_real();
@@ -596,25 +598,29 @@ cpu_reset()
 #endif
 
 #ifdef SMP
-	cpumask_t map;
+	cpuset_t map;
 	u_int cnt;
 
 	if (smp_active) {
-		map = PCPU_GET(other_cpus) & ~stopped_cpus;
-		if (map != 0) {
+		sched_pin();
+		map = PCPU_GET(other_cpus);
+		CPU_NAND(&map, &stopped_cpus);
+		if (!CPU_EMPTY(&map)) {
 			printf("cpu_reset: Stopping other CPUs\n");
 			stop_cpus(map);
 		}
 
 		if (PCPU_GET(cpuid) != 0) {
 			cpu_reset_proxyid = PCPU_GET(cpuid);
+			sched_unpin();
 			cpustop_restartfunc = cpu_reset_proxy;
 			cpu_reset_proxy_active = 0;
 			printf("cpu_reset: Restarting BSP\n");
 
 			/* Restart CPU #0. */
 			/* XXX: restart_cpus(1 << 0); */
-			atomic_store_rel_int(&started_cpus, (1 << 0));
+			CPU_SETOF(0, &started_cpus);
+			wmb();
 
 			cnt = 0;
 			while (cpu_reset_proxy_active == 0 && cnt < 10000000)
@@ -626,7 +632,8 @@ cpu_reset()
 
 			while (1);
 			/* NOTREACHED */
-		}
+		} else
+			sched_unpin();
 
 		DELAY(1000000);
 	}
@@ -795,7 +802,7 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	struct sf_head *hash_list;
 	struct sf_buf *sf;
 #ifdef SMP
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 #endif
 	int error;
 
@@ -867,22 +874,23 @@ sf_buf_alloc(struct vm_page *m, int flags)
 	 */
 #ifdef SMP
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
-		sf->cpumask = 0;
+		CPU_ZERO(&sf->cpumask);
 shootdown:
 	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((sf->cpumask & cpumask) == 0) {
-		sf->cpumask |= cpumask;
+	if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) {
+		CPU_OR(&sf->cpumask, &cpumask);
 		invlpg(sf->kva);
 	}
 	if ((flags & SFB_CPUPRIVATE) == 0) {
-		other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask;
-		if (other_cpus != 0) {
-			sf->cpumask |= other_cpus;
+		other_cpus = PCPU_GET(other_cpus);
+		CPU_NAND(&other_cpus, &sf->cpumask);
+		if (!CPU_EMPTY(&other_cpus)) {
+			CPU_OR(&sf->cpumask, &other_cpus);
 			smp_masked_invlpg(other_cpus, sf->kva);
 		}
 	}
-	sched_unpin();	
+	sched_unpin();
 #else
 	if ((opte & (PG_V | PG_A)) ==  (PG_V | PG_A))
 		pmap_invalidate_page(kernel_pmap, sf->kva);
diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h
index 7a969fedf160..3194fd691f67 100644
--- a/sys/i386/include/_types.h
+++ b/sys/i386/include/_types.h
@@ -69,7 +69,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	unsigned long	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int32_t	__critical_t;
 typedef	long double	__double_t;
 typedef	long double	__float_t;
diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h
index eeada2e400f4..3012a000ae5c 100644
--- a/sys/i386/include/pmap.h
+++ b/sys/i386/include/pmap.h
@@ -155,6 +155,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -433,7 +434,7 @@ struct pmap {
 	struct mtx		pm_mtx;
 	pd_entry_t		*pm_pdir;	/* KVA of page directory */
 	TAILQ_HEAD(,pv_chunk)	pm_pvchunk;	/* list of mappings in pmap */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 	LIST_ENTRY(pmap) 	pm_list;	/* List of all pmaps */
 #ifdef PAE
diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h
index 7bc1095c9590..415dcbb86e63 100644
--- a/sys/i386/include/sf_buf.h
+++ b/sys/i386/include/sf_buf.h
@@ -29,6 +29,7 @@
 #ifndef _MACHINE_SF_BUF_H_
 #define _MACHINE_SF_BUF_H_
 
+#include <sys/_cpuset.h>
 #include <sys/queue.h>
 
 struct vm_page;
@@ -40,7 +41,7 @@ struct sf_buf {
 	vm_offset_t	kva;		/* va of mapping */
 	int		ref_count;	/* usage of this mapping */
 #ifdef SMP
-	cpumask_t	cpumask;	/* cpus on which mapping is valid */
+	cpuset_t	cpumask;	/* cpus on which mapping is valid */
 #endif
 };
 
diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h
index d364cd9ebbcc..04d67c9f2f28 100644
--- a/sys/i386/include/smp.h
+++ b/sys/i386/include/smp.h
@@ -66,17 +66,16 @@ void 	ipi_bitmap_handler(struct trapframe frame);
 #endif
 void	ipi_cpu(int cpu, u_int ipi);
 int	ipi_nmi_handler(void);
-void	ipi_selected(cpumask_t cpus, u_int ipi);
+void	ipi_selected(cpuset_t cpus, u_int ipi);
 u_int	mp_bootaddress(u_int);
-int	mp_grab_cpu_hlt(void);
 void	smp_cache_flush(void);
 void	smp_invlpg(vm_offset_t addr);
-void	smp_masked_invlpg(cpumask_t mask, vm_offset_t addr);
+void	smp_masked_invlpg(cpuset_t mask, vm_offset_t addr);
 void	smp_invlpg_range(vm_offset_t startva, vm_offset_t endva);
-void	smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva,
+void	smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva,
 	    vm_offset_t endva);
 void	smp_invltlb(void);
-void	smp_masked_invltlb(cpumask_t mask);
+void	smp_masked_invltlb(cpuset_t mask);
 
 #ifdef XEN
 void ipi_to_irq_init(void);
diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c
index 2919570b647e..2d05596b89d2 100644
--- a/sys/i386/xen/mp_machdep.c
+++ b/sys/i386/xen/mp_machdep.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/cons.h>	/* cngetc() */
+#include <sys/cpuset.h>
 #ifdef GPROF 
 #include <sys/gmon.h>
 #endif
@@ -116,7 +117,7 @@ volatile int smp_tlb_wait;
 typedef void call_data_func_t(uintptr_t , uintptr_t);
 
 static u_int logical_cpus;
-static volatile cpumask_t ipi_nmi_pending;
+static volatile cpuset_t ipi_nmi_pending;
 
 /* used to hold the AP's until we are ready to release them */
 static struct mtx ap_boot_mtx;
@@ -149,7 +150,7 @@ static int	start_ap(int apic_id);
 static void	release_aps(void *dummy);
 
 static u_int	hyperthreading_cpus;
-static cpumask_t	hyperthreading_cpus_mask;
+static cpuset_t	hyperthreading_cpus_mask;
 
 extern void Xhypervisor_callback(void);
 extern void failsafe_callback(void);
@@ -239,7 +240,7 @@ cpu_mp_probe(void)
 	 * Always record BSP in CPU map so that the mbuf init code works
 	 * correctly.
 	 */
-	all_cpus = 1;
+	CPU_SETOF(0, &all_cpus);
 	if (mp_ncpus == 0) {
 		/*
 		 * No CPUs were found, so this must be a UP system.  Setup
@@ -293,7 +294,8 @@ cpu_mp_start(void)
 	start_all_aps();
 
 	/* Setup the initial logical CPUs info. */
-	logical_cpus = logical_cpus_mask = 0;
+	logical_cpus = 0;
+	CPU_ZERO(&logical_cpus_mask);
 	if (cpu_feature & CPUID_HTT)
 		logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16;
 
@@ -521,6 +523,7 @@ xen_smp_intr_init_cpus(void *unused)
 void
 init_secondary(void)
 {
+	cpuset_t tcpuset, tallcpus;
 	vm_offset_t addr;
 	int	gsel_tss;
 	
@@ -600,18 +603,21 @@ init_secondary(void)
 
 	CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid));
 	printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid));
+	tcpuset = PCPU_GET(cpumask);
 
 	/* Determine if we are a logical CPU. */
 	if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0)
-		logical_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&logical_cpus_mask, &tcpuset);
 	
 	/* Determine if we are a hyperthread. */
 	if (hyperthreading_cpus > 1 &&
 	    PCPU_GET(apic_id) % hyperthreading_cpus != 0)
-		hyperthreading_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&hyperthreading_cpus_mask, &tcpuset);
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, &tcpuset);
+	PCPU_SET(other_cpus, tallcpus);
 #if 0
 	if (bootverbose)
 		lapic_dump("AP");
@@ -725,6 +731,7 @@ assign_cpu_ids(void)
 int
 start_all_aps(void)
 {
+	cpuset_t tallcpus;
 	int x,apic_id, cpu;
 	struct pcpu *pc;
 	
@@ -778,12 +785,14 @@ start_all_aps(void)
 				panic("bye-bye");
 		}
 
-		all_cpus |= (1 << cpu);		/* record AP in CPU map */
+		CPU_SET(cpu, &all_cpus);	/* record AP in CPU map */
 	}
 	
 
 	/* build our map of 'other' CPUs */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	tallcpus = all_cpus;
+	CPU_NAND(&tallcpus, PCPU_PTR(cpumask));
+	PCPU_SET(other_cpus, tallcpus);
 
 	pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1);
 	
@@ -1012,29 +1021,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 }
 
 static void
-smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
+smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2)
 {
-	int ncpu, othercpus;
+	int cpu, ncpu, othercpus;
 	struct _call_data data;
 
 	othercpus = mp_ncpus - 1;
-	if (mask == (u_int)-1) {
-		ncpu = othercpus;
-		if (ncpu < 1)
+	if (CPU_ISFULLSET(&mask)) {
+		if (othercpus < 1)
 			return;
 	} else {
-		mask &= ~PCPU_GET(cpumask);
-		if (mask == 0)
-			return;
-		ncpu = bitcount32(mask);
-		if (ncpu > othercpus) {
-			/* XXX this should be a panic offence */
-			printf("SMP: tlb shootdown to %d other cpus (only have %d)\n",
-			    ncpu, othercpus);
-			ncpu = othercpus;
-		}
-		/* XXX should be a panic, implied by mask == 0 above */
-		if (ncpu < 1)
+		critical_enter();
+		CPU_NAND(&mask, PCPU_PTR(cpumask));
+		critical_exit();
+		if (CPU_EMPTY(&mask))
 			return;
 	}
 	if (!(read_eflags() & PSL_I))
@@ -1046,10 +1046,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o
 	call_data->arg1 = addr1;
 	call_data->arg2 = addr2;
 	atomic_store_rel_int(&smp_tlb_wait, 0);
-	if (mask == (u_int)-1)
+	if (CPU_ISFULLSET(&mask)) {
+		ncpu = othercpus;
 		ipi_all_but_self(vector);
-	else
-		ipi_selected(mask, vector);
+	} else {
+		ncpu = 0;
+		while ((cpu = cpusetobj_ffs(&mask)) != 0) {
+			cpu--;
+			CPU_CLR(cpu, &mask);
+			CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu,
+			    vector);
+			ipi_send_cpu(cpu, vector);
+			ncpu++;
+		}
+	}
 	while (smp_tlb_wait < ncpu)
 		ia32_pause();
 	call_data = NULL;
@@ -1092,7 +1102,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2)
 }
 
 void
-smp_masked_invltlb(cpumask_t mask)
+smp_masked_invltlb(cpuset_t mask)
 {
 
 	if (smp_started) {
@@ -1101,7 +1111,7 @@ smp_masked_invltlb(cpumask_t mask)
 }
 
 void
-smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
+smp_masked_invlpg(cpuset_t mask, vm_offset_t addr)
 {
 
 	if (smp_started) {
@@ -1110,7 +1120,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr)
 }
 
 void
-smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
+smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2)
 {
 
 	if (smp_started) {
@@ -1122,7 +1132,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 	int cpu;
 
@@ -1132,11 +1142,11 @@ ipi_selected(cpumask_t cpus, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, cpus);
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus);
 
-	while ((cpu = ffs(cpus)) != 0) {
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
 		cpu--;
-		cpus &= ~(1 << cpu);
+		CPU_CLR(cpu, &cpus);
 		CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 		ipi_send_cpu(cpu, ipi);
 	}
@@ -1155,7 +1165,7 @@ ipi_cpu(int cpu, u_int ipi)
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, 1 << cpu);
+		CPU_SET_ATOMIC(cpu, &ipi_nmi_pending);
 
 	CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi);
 	ipi_send_cpu(cpu, ipi);
@@ -1167,23 +1177,27 @@ ipi_cpu(int cpu, u_int ipi)
 void
 ipi_all_but_self(u_int ipi)
 {
+	cpuset_t other_cpus;
 
 	/*
 	 * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit
 	 * of help in order to understand what is the source.
 	 * Set the mask of receiving CPUs for this purpose.
 	 */
+	sched_pin();
+	other_cpus = PCPU_GET(other_cpus);
+	sched_unpin();
 	if (ipi == IPI_STOP_HARD)
-		atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus));
+		CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus);
 
 	CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi);
-	ipi_selected(PCPU_GET(other_cpus), ipi);
+	ipi_selected(other_cpus, ipi);
 }
 
 int
 ipi_nmi_handler()
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 
 	/*
 	 * As long as there is not a simple way to know about a NMI's
@@ -1191,11 +1205,13 @@ ipi_nmi_handler()
 	 * the global pending bitword an IPI_STOP_HARD has been issued
 	 * and should be handled.
 	 */
+	sched_pin();
 	cpumask = PCPU_GET(cpumask);
-	if ((ipi_nmi_pending & cpumask) == 0)
+	sched_unpin();
+	if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask))
 		return (1);
 
-	atomic_clear_int(&ipi_nmi_pending, cpumask);
+	CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask);
 	cpustop_handler();
 	return (0);
 }
@@ -1207,20 +1223,25 @@ ipi_nmi_handler()
 void
 cpustop_handler(void)
 {
-	int cpu = PCPU_GET(cpuid);
-	int cpumask = PCPU_GET(cpumask);
+	cpuset_t cpumask;
+	int cpu;
+
+	sched_pin();
+	cpumask = PCPU_GET(cpumask);
+	cpu = PCPU_GET(cpuid);
+	sched_unpin();
 
 	savectx(&stoppcbs[cpu]);
 
 	/* Indicate that we are stopped */
-	atomic_set_int(&stopped_cpus, cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 	/* Wait for restart */
-	while (!(started_cpus & cpumask))
+	while (!CPU_OVERLAP(&started_cpus, &cpumask))
 	    ia32_pause();
 
-	atomic_clear_int(&started_cpus, cpumask);
-	atomic_clear_int(&stopped_cpus, cpumask);
+	CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 
 	if (cpu == 0 && cpustop_restartfunc != NULL) {
 		cpustop_restartfunc();
diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c
index eb3c803ca924..3efa4f1e174a 100644
--- a/sys/i386/xen/pmap.c
+++ b/sys/i386/xen/pmap.c
@@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr)
 #ifdef PAE
 	kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT);
 #endif
-	kernel_pmap->pm_active = -1;	/* don't allow deactivation */
+	CPU_FILL(&kernel_pmap->pm_active);	/* don't allow deactivation */
 	TAILQ_INIT(&kernel_pmap->pm_pvchunk);
 	LIST_INIT(&allpmaps);
 	mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN);
@@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde)
 void
 pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 	
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invlpg(va);
 		smp_invlpg(va);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invlpg(va);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg(pmap->pm_active & other_cpus, va);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg(other_cpus, va);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 void
 pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 	vm_offset_t addr;
 
 	CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x",
 	    pmap, sva, eva);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 		smp_invlpg_range(sva, eva);
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			for (addr = sva; addr < eva; addr += PAGE_SIZE)
 				invlpg(addr);
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invlpg_range(pmap->pm_active & other_cpus,
-			    sva, eva);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invlpg_range(other_cpus, sva, eva);
 	}
 	sched_unpin();
 	PT_UPDATES_FLUSH();
@@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 void
 pmap_invalidate_all(pmap_t pmap)
 {
-	cpumask_t cpumask, other_cpus;
+	cpuset_t cpumask, other_cpus;
 
 	CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap);
 
 	sched_pin();
-	if (pmap == kernel_pmap || pmap->pm_active == all_cpus) {
+	if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) {
 		invltlb();
 		smp_invltlb();
 	} else {
 		cpumask = PCPU_GET(cpumask);
 		other_cpus = PCPU_GET(other_cpus);
-		if (pmap->pm_active & cpumask)
+		if (CPU_OVERLAP(&pmap->pm_active, &cpumask))
 			invltlb();
-		if (pmap->pm_active & other_cpus)
-			smp_masked_invltlb(pmap->pm_active & other_cpus);
+		CPU_AND(&other_cpus, &pmap->pm_active);
+		if (!CPU_EMPTY(&other_cpus))
+			smp_masked_invltlb(other_cpus);
 	}
 	sched_unpin();
 }
@@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va)
 	CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x",
 	    pmap, va);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invlpg(va);
 	PT_UPDATES_FLUSH();
 }
@@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva)
 		CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x",
 		    pmap, sva, eva);
 
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		for (addr = sva; addr < eva; addr += PAGE_SIZE)
 			invlpg(addr);
 	PT_UPDATES_FLUSH();
@@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap)
 
 	CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap);
 	
-	if (pmap == kernel_pmap || pmap->pm_active)
+	if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active))
 		invltlb();
 }
 
@@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap)
 #ifdef PAE
 	pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT);
 #endif
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	PCPU_SET(curpmap, pmap);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
@@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap)
 	}
 	xen_flush_queue();
 	vm_page_unlock_queues();
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	TAILQ_INIT(&pmap->pm_pvchunk);
 	bzero(&pmap->pm_stats, sizeof pmap->pm_stats);
 
@@ -1686,7 +1688,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags)
  * Deal with a SMP shootdown of other users of the pmap that we are
  * trying to dispose of.  This can be a bit hairy.
  */
-static cpumask_t *lazymask;
+static cpuset_t *lazymask;
 static u_int lazyptd;
 static volatile u_int lazywait;
 
@@ -1695,36 +1697,42 @@ void pmap_lazyfix_action(void);
 void
 pmap_lazyfix_action(void)
 {
-	cpumask_t mymask = PCPU_GET(cpumask);
 
 #ifdef COUNT_IPIS
 	(*ipi_lazypmap_counts[PCPU_GET(cpuid)])++;
 #endif
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-	atomic_clear_int(lazymask, mymask);
+	CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask);
 	atomic_store_rel_int(&lazywait, 1);
 }
 
 static void
-pmap_lazyfix_self(cpumask_t mymask)
+pmap_lazyfix_self(cpuset_t mymask)
 {
 
 	if (rcr3() == lazyptd)
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-	atomic_clear_int(lazymask, mymask);
+	CPU_NAND_ATOMIC(lazymask, &mymask);
 }
 
 
 static void
 pmap_lazyfix(pmap_t pmap)
 {
-	cpumask_t mymask, mask;
+	cpuset_t mymask, mask;
 	u_int spins;
+	int lsb;
 
-	while ((mask = pmap->pm_active) != 0) {
+	mask = pmap->pm_active;
+	while (!CPU_EMPTY(&mask)) {
 		spins = 50000000;
-		mask = mask & -mask;	/* Find least significant set bit */
+
+		/* Find least significant set bit. */
+		lsb = cpusetobj_ffs(&mask);
+		MPASS(lsb != 0);
+		lsb--;
+		CPU_SETOF(lsb, &mask);
 		mtx_lock_spin(&smp_ipi_mtx);
 #ifdef PAE
 		lazyptd = vtophys(pmap->pm_pdpt);
@@ -1732,7 +1740,7 @@ pmap_lazyfix(pmap_t pmap)
 		lazyptd = vtophys(pmap->pm_pdir);
 #endif
 		mymask = PCPU_GET(cpumask);
-		if (mask == mymask) {
+		if (!CPU_CMP(&mask, &mymask)) {
 			lazymask = &pmap->pm_active;
 			pmap_lazyfix_self(mymask);
 		} else {
@@ -1749,6 +1757,7 @@ pmap_lazyfix(pmap_t pmap)
 		mtx_unlock_spin(&smp_ipi_mtx);
 		if (spins == 0)
 			printf("pmap_lazyfix: spun for 50000000\n");
+		mask = pmap->pm_active;
 	}
 }
 
@@ -1768,7 +1777,7 @@ pmap_lazyfix(pmap_t pmap)
 	cr3 = vtophys(pmap->pm_pdir);
 	if (cr3 == rcr3()) {
 		load_cr3(PCPU_GET(curpcb)->pcb_cr3);
-		pmap->pm_active &= ~(PCPU_GET(cpumask));
+		CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active);
 	}
 }
 #endif	/* SMP */
@@ -4123,11 +4132,11 @@ pmap_activate(struct thread *td)
 	pmap = vmspace_pmap(td->td_proc->p_vmspace);
 	oldpmap = PCPU_GET(curpmap);
 #if defined(SMP)
-	atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 #else
-	oldpmap->pm_active &= ~1;
-	pmap->pm_active |= 1;
+	CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask));
 #endif
 #ifdef PAE
 	cr3 = vtophys(pmap->pm_pdpt);
diff --git a/sys/ia64/acpica/acpi_machdep.c b/sys/ia64/acpica/acpi_machdep.c
index b7b612fd220e..1466cfe59a98 100644
--- a/sys/ia64/acpica/acpi_machdep.c
+++ b/sys/ia64/acpica/acpi_machdep.c
@@ -56,7 +56,14 @@ acpi_machdep_quirks(int *quirks)
 void
 acpi_cpu_c1()
 {
+#ifdef INVARIANTS
+	register_t ie;
+
+	ie = intr_disable();
+	KASSERT(ie == 0, ("%s called with interrupts enabled\n", __func__));
+#endif
 	ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+	ia64_enable_intr();
 }
 
 void *
diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c
index 7252865afe7e..1463fb5c327e 100644
--- a/sys/ia64/ia64/machdep.c
+++ b/sys/ia64/ia64/machdep.c
@@ -411,12 +411,34 @@ cpu_halt()
 void
 cpu_idle(int busy)
 {
-	struct ia64_pal_result res;
+	register_t ie;
 
-	if (cpu_idle_hook != NULL)
+#if 0
+	if (!busy) {
+		critical_enter();
+		cpu_idleclock();
+	}
+#endif
+
+	ie = intr_disable();
+	KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__));
+
+	if (sched_runnable())
+		ia64_enable_intr();
+	else if (cpu_idle_hook != NULL) {
 		(*cpu_idle_hook)();
-	else
-		res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+		/* The hook must enable interrupts! */
+	} else {
+		ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0);
+		ia64_enable_intr();
+	}
+
+#if 0
+	if (!busy) {
+		cpu_activeclock();
+		critical_exit();
+	}
+#endif
 }
 
 int
@@ -644,9 +666,12 @@ calculate_frequencies(void)
 {
 	struct ia64_sal_result sal;
 	struct ia64_pal_result pal;
+	register_t ie;
 
+	ie = intr_disable();
 	sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0);
 	pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0);
+	intr_restore(ie);
 
 	if (sal.sal_status == 0 && pal.pal_status == 0) {
 		if (bootverbose) {
@@ -761,6 +786,8 @@ ia64_init(void)
 	ia64_sal_init();
 	calculate_frequencies();
 
+	set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0);
+
 	/*
 	 * Setup the PCPU data for the bootstrap processor. It is needed
 	 * by printf(). Also, since printf() has critical sections, we
diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c
index b6b0bef1d222..15afea030182 100644
--- a/sys/ia64/ia64/mp_machdep.c
+++ b/sys/ia64/ia64/mp_machdep.c
@@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf)
 static u_int
 ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf)
 {
-	cpumask_t mybit;
+	cpuset_t mybit;
 
 	PCPU_INC(md.stats.pcs_nstops);
 	mybit = PCPU_GET(cpumask);
 
 	savectx(PCPU_PTR(md.pcb));
 
-	atomic_set_int(&stopped_cpus, mybit);
-	while ((started_cpus & mybit) == 0)
+	CPU_OR_ATOMIC(&stopped_cpus, &mybit);
+	while (!CPU_OVERLAP(&started_cpus, &mybit))
 		cpu_spinwait();
-	atomic_clear_int(&started_cpus, mybit);
-	atomic_clear_int(&stopped_cpus, mybit);
+	CPU_NAND_ATOMIC(&started_cpus, &mybit);
+	CPU_NAND_ATOMIC(&stopped_cpus, &mybit);
 	return (0);
 }
 
@@ -286,7 +286,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid)
 	cpuid = (IA64_LID_GET_SAPIC_ID(ia64_get_lid()) == sapic_id)
 	    ? 0 : smp_cpus++;
 
-	KASSERT((all_cpus & (1UL << cpuid)) == 0,
+	KASSERT(!CPU_ISSET(cpuid, &all_cpus),
 	    ("%s: cpu%d already in CPU map", __func__, acpi_id));
 
 	if (cpuid != 0) {
@@ -300,7 +300,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid)
 	pc->pc_acpi_id = acpi_id;
 	pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id);
 
-	all_cpus |= (1UL << pc->pc_cpuid);
+	CPU_SET(pc->pc_cpuid, &all_cpus);
 }
 
 void
@@ -359,7 +359,8 @@ cpu_mp_start()
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		pc->pc_md.current_pmap = kernel_pmap;
-		pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask;
+		pc->pc_other_cpus = all_cpus;
+		CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask);
 		/* The BSP is obviously running already. */
 		if (pc->pc_cpuid == 0) {
 			pc->pc_md.awake = 1;
@@ -458,12 +459,12 @@ cpu_mp_unleash(void *dummy)
  * send an IPI to a set of cpus.
  */
 void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
 {
 	struct pcpu *pc;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
-		if (cpus & pc->pc_cpumask)
+		if (CPU_OVERLAP(&cpus, &pc->pc_cpumask))
 			ipi_send(pc, ipi);
 	}
 }
diff --git a/sys/ia64/ia64/pal.S b/sys/ia64/ia64/pal.S
index 2f0d0da72e6e..2e3f4cd85ac4 100644
--- a/sys/ia64/ia64/pal.S
+++ b/sys/ia64/ia64/pal.S
@@ -38,43 +38,40 @@ ia64_pal_entry:	.quad 0
  *	u_int64_t arg1, u_int64_t arg2, u_int64_t arg3)
  */
 ENTRY(ia64_call_pal_static, 4)
-	
-	.regstk	4,5,0,0
+
+	.regstk	4,4,0,0
 palret	=	loc0
 entry	=	loc1
 rpsave	=	loc2
 pfssave =	loc3
-psrsave	=	loc4
 
-	alloc	pfssave=ar.pfs,4,5,0,0
+	alloc	pfssave=ar.pfs,4,4,0,0
 	;; 
 	mov	rpsave=rp
-
 	movl	entry=@gprel(ia64_pal_entry)
+
 1:	mov	palret=ip		// for return address
 	;;
 	add	entry=entry,gp
-	mov	psrsave=psr
+	add	palret=2f-1b,palret	// calculate return address
 	mov	r28=in0			// procedure number
-	;;
-	ld8	entry=[entry]		// read entry point
 	mov	r29=in1			// copy arguments
 	mov	r30=in2
 	mov	r31=in3
 	;;
-	mov	b6=entry
-	add	palret=2f-1b,palret	// calculate return address
-	;;
+	ld8	entry=[entry]		// read entry point
 	mov	b0=palret
-	rsm	psr.i			// disable interrupts
+	;;
+	mov	b6=entry
 	;;
 	br.cond.sptk b6			// call into firmware
-2:	mov	psr.l=psrsave
+	;;
+2:
 	mov	rp=rpsave
 	mov	ar.pfs=pfssave
 	;;
-	srlz.d
 	br.ret.sptk rp
+	;;
 END(ia64_call_pal_static)
 
 /*
diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h
index 8fc1be2f3873..0c2f5cc38d0b 100644
--- a/sys/ia64/include/_types.h
+++ b/sys/ia64/include/_types.h
@@ -59,7 +59,6 @@ typedef	unsigned long		__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int64_t	__critical_t;
 typedef	double		__double_t;
 typedef	float		__float_t;
diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h
index 26557a712705..d2aff76c3ad5 100644
--- a/sys/ia64/include/smp.h
+++ b/sys/ia64/include/smp.h
@@ -14,6 +14,8 @@
 
 #ifndef LOCORE
 
+#include <sys/_cpuset.h>
+
 struct pcpu;
 
 struct ia64_ap_state {
@@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup;
 
 void	ipi_all_but_self(int ipi);
 void	ipi_cpu(int cpu, u_int ipi);
-void	ipi_selected(cpumask_t cpus, int ipi);
+void	ipi_selected(cpuset_t cpus, int ipi);
 void	ipi_send(struct pcpu *, int ipi);
 
 #endif /* !LOCORE */
diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c
index 6489ffb19e9b..e1f2801643ac 100644
--- a/sys/kern/kern_cpuset.c
+++ b/sys/kern/kern_cpuset.c
@@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/cpuset.h>
 #include <sys/sx.h>
 #include <sys/queue.h>
+#include <sys/libkern.h>
 #include <sys/limits.h>
 #include <sys/bus.h>
 #include <sys/interrupt.h>
@@ -616,6 +617,86 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask)
 	return (error);
 }
 
+/*
+ * Calculate the ffs() of the cpuset.
+ */
+int
+cpusetobj_ffs(const cpuset_t *set)
+{
+	size_t i;
+	int cbit;
+
+	cbit = 0;
+	for (i = 0; i < _NCPUWORDS; i++) {
+		if (set->__bits[i] != 0) {
+			cbit = ffsl(set->__bits[i]);
+			cbit += i * _NCPUBITS;
+			break;
+		}
+	}
+	return (cbit);
+}
+
+/*
+ * Return a string representing a valid layout for a cpuset_t object.
+ * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
+ */
+char *
+cpusetobj_strprint(char *buf, const cpuset_t *set)
+{
+	char *tbuf;
+	size_t i, bytesp, bufsiz;
+
+	tbuf = buf;
+	bytesp = 0;
+	bufsiz = CPUSETBUFSIZ;
+
+	for (i = _NCPUWORDS - 1; i > 0; i--) {
+		bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]);
+		bufsiz -= bytesp;
+		tbuf += bytesp;
+	}
+	snprintf(tbuf, bufsiz, "%lx", set->__bits[0]);
+	return (buf);
+}
+
+/*
+ * Build a valid cpuset_t object from a string representation.
+ * It expects an incoming buffer at least sized as CPUSETBUFSIZ.
+ */
+int
+cpusetobj_strscan(cpuset_t *set, const char *buf)
+{
+	u_int nwords;
+	int i, ret;
+
+	if (strlen(buf) > CPUSETBUFSIZ - 1)
+		return (-1);
+
+	/* Allow to pass a shorter version of the mask when necessary. */
+	nwords = 1;
+	for (i = 0; buf[i] != '\0'; i++)
+		if (buf[i] == ',')
+			nwords++;
+	if (nwords > _NCPUWORDS)
+		return (-1);
+
+	CPU_ZERO(set);
+	for (i = nwords - 1; i > 0; i--) {
+		ret = sscanf(buf, "%lx, ", &set->__bits[i]);
+		if (ret == 0 || ret == -1)
+			return (-1);
+		buf = strstr(buf, " ");
+		if (buf == NULL)
+			return (-1);
+		buf++;
+	}
+	ret = sscanf(buf, "%lx", &set->__bits[0]);
+	if (ret == 0 || ret == -1)
+		return (-1);
+	return (0);
+}
+
 /*
  * Apply an anonymous mask to a single thread.
  */
@@ -754,12 +835,7 @@ cpuset_init(void *arg)
 {
 	cpuset_t mask;
 
-	CPU_ZERO(&mask);
-#ifdef SMP
-	mask.__bits[0] = all_cpus;
-#else
-	mask.__bits[0] = 1;
-#endif
+	mask = all_cpus;
 	if (cpuset_modify(cpuset_zero, &mask))
 		panic("Can't set initial cpuset mask.\n");
 	cpuset_zero->cs_flags |= CPU_SET_RDONLY;
diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c
index 2e5e06f671b2..eff3d5bd68ab 100644
--- a/sys/kern/kern_ktr.c
+++ b/sys/kern/kern_ktr.c
@@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$");
 #include "opt_alq.h"
 
 #include <sys/param.h>
+#include <sys/queue.h>
 #include <sys/alq.h>
 #include <sys/cons.h>
+#include <sys/cpuset.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/libkern.h>
@@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$");
 #define	KTR_MASK	(0)
 #endif
 
-#ifndef KTR_CPUMASK
-#define	KTR_CPUMASK	(~0)
-#endif
-
 #ifndef KTR_TIME
 #define	KTR_TIME	get_cyclecount()
 #endif
@@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility");
 
 SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options");
 
-int	ktr_cpumask = KTR_CPUMASK;
-TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask);
-SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW,
-    &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled");
-
 int	ktr_mask = KTR_MASK;
 TUNABLE_INT("debug.ktr.mask", &ktr_mask);
 SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW,
@@ -106,6 +99,54 @@ int	ktr_version = KTR_VERSION;
 SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD,
     &ktr_version, 0, "Version of the KTR interface");
 
+cpuset_t ktr_cpumask;
+static char ktr_cpumask_str[CPUSETBUFSIZ];
+TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str));
+
+static void
+ktr_cpumask_initializer(void *dummy __unused)
+{
+
+	CPU_FILL(&ktr_cpumask);
+#ifdef KTR_CPUMASK
+	if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1)
+		CPU_FILL(&ktr_cpumask);
+#endif
+
+	/*
+	 * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be
+	 * already set, if necessary.
+	 */
+	if (ktr_cpumask_str[0] != '\0' &&
+	    cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1)
+		CPU_FILL(&ktr_cpumask);
+}
+SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY,
+    ktr_cpumask_initializer, NULL);
+
+static int
+sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS)
+{
+	char lktr_cpumask_str[CPUSETBUFSIZ];
+	cpuset_t imask;
+	int error;
+
+	cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask);
+	error = sysctl_handle_string(oidp, lktr_cpumask_str,
+	    sizeof(lktr_cpumask_str), req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1)
+		return (EINVAL);
+	CPU_COPY(&imask, &ktr_cpumask);
+
+	return (error);
+}
+SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask,
+    CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0,
+    sysctl_debug_ktr_cpumask, "S",
+    "Bitmask of CPUs on which KTR logging is enabled");
+
 volatile int	ktr_idx = 0;
 struct	ktr_entry ktr_buf[KTR_ENTRIES];
 
@@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format,
 	if ((ktr_mask & mask) == 0)
 		return;
 	cpu = KTR_CPU;
-	if (((1 << cpu) & ktr_cpumask) == 0)
+	if (!CPU_ISSET(cpu, &ktr_cpumask))
 		return;
 #if defined(KTR_VERBOSE) || defined(KTR_ALQ)
 	td = curthread;
diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c
index 7532378c7a2d..8d9c7c04a711 100644
--- a/sys/kern/kern_pmc.c
+++ b/sys/kern/kern_pmc.c
@@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
 int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
 
 /* Bitmask of CPUs requiring servicing at hardclock time */
-volatile cpumask_t pmc_cpumask;
+volatile cpuset_t pmc_cpumask;
 
 /*
  * A global count of SS mode PMCs.  When non-zero, this means that
@@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu)
 {
 #ifdef	SMP
 	return (pmc_cpu_is_present(cpu) &&
-	    (hlt_cpus_mask & (1 << cpu)) == 0);
+	    !CPU_ISSET(cpu, &hlt_cpus_mask));
 #else
 	return (1);
 #endif
@@ -139,7 +139,7 @@ int
 pmc_cpu_is_primary(int cpu)
 {
 #ifdef	SMP
-	return ((logical_cpus_mask & (1 << cpu)) == 0);
+	return (!CPU_ISSET(cpu, &logical_cpus_mask));
 #else
 	return (1);
 #endif
diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c
index 7f2b4e7367be..3214e1b55e2f 100644
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 	pc = pcpu_find(curcpu);
 
 	/* Check if we just need to do a proper critical_exit. */
-	if (!(pc->pc_cpumask & rm->rm_writecpus)) {
+	if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) {
 		critical_exit();
 		return (1);
 	}
@@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 
 	critical_enter();
 	pc = pcpu_find(curcpu);
-	rm->rm_writecpus &= ~pc->pc_cpumask;
+	CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask);
 	rm_tracker_add(pc, tracker);
 	sched_pin();
 	critical_exit();
@@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock)
 	 * Fast path to combine two common conditions into a single
 	 * conditional jump.
 	 */
-	if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask)))
+	if (0 == (td->td_owepreempt |
+	    CPU_OVERLAP(&rm->rm_writecpus,  &pc->pc_cpumask)))
 		return (1);
 
 	/* We do not have a read token and need to acquire one. */
@@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm)
 {
 	struct rm_priotracker *prio;
 	struct turnstile *ts;
-	cpumask_t readcpus;
+	cpuset_t readcpus;
 
 	if (rm->lock_object.lo_flags & RM_SLEEPABLE)
 		sx_xlock(&rm->rm_lock_sx);
 	else
 		mtx_lock(&rm->rm_lock_mtx);
 
-	if (rm->rm_writecpus != all_cpus) {
+	if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) {
 		/* Get all read tokens back */
-
-		readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus);
+		readcpus = all_cpus;
+		CPU_NAND(&readcpus, &rm->rm_writecpus);
 		rm->rm_writecpus = all_cpus;
 
 		/*
diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c
index 001da3da8449..da041faf99a8 100644
--- a/sys/kern/kern_shutdown.c
+++ b/sys/kern/kern_shutdown.c
@@ -233,30 +233,32 @@ print_uptime(void)
 	printf("%lds\n", (long)ts.tv_sec);
 }
 
-static void
-doadump(void)
+int
+doadump(boolean_t textdump)
 {
+	boolean_t coredump;
 
-	/*
-	 * Sometimes people have to call this from the kernel debugger. 
-	 * (if 'panic' can not dump)
-	 * Give them a clue as to why they can't dump.
-	 */
-	if (dumper.dumper == NULL) {
-		printf("Cannot dump. Device not defined or unavailable.\n");
-		return;
-	}
+	if (dumping)
+		return (EBUSY);
+	if (dumper.dumper == NULL)
+		return (ENXIO);
 
 	savectx(&dumppcb);
 	dumptid = curthread->td_tid;
 	dumping++;
+
+	coredump = TRUE;
 #ifdef DDB
-	if (textdump_pending)
+	if (textdump && textdump_pending) {
+		coredump = FALSE;
 		textdump_dumpsys(&dumper);
-	else
+	}
 #endif
+	if (coredump)
 		dumpsys(&dumper);
+
 	dumping--;
+	return (0);
 }
 
 static int
@@ -425,7 +427,7 @@ kern_reboot(int howto)
 	EVENTHANDLER_INVOKE(shutdown_post_sync, howto);
 
 	if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) 
-		doadump();
+		doadump(TRUE);
 
 	/* Now that we're going to really halt the system... */
 	EVENTHANDLER_INVOKE(shutdown_final, howto);
diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c
index 7ee56d50b13b..799b60d1a139 100644
--- a/sys/kern/ksched.c
+++ b/sys/kern/ksched.c
@@ -206,7 +206,7 @@ ksched_setscheduler(struct ksched *ksched,
 		if (param->sched_priority >= 0 &&
 			param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) {
 			rtp.type = RTP_PRIO_NORMAL;
-			rtp.prio = p4prio_to_rtpprio(param->sched_priority);
+			rtp.prio = p4prio_to_tsprio(param->sched_priority);
 			rtp_to_pri(&rtp, td);
 		} else
 			e = EINVAL;
diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c
index 519cae516e2d..592bb80aff52 100644
--- a/sys/kern/sched_4bsd.c
+++ b/sys/kern/sched_4bsd.c
@@ -156,7 +156,7 @@ static struct runq runq;
 static struct runq runq_pcpu[MAXCPU];
 long runq_length[MAXCPU];
 
-static cpumask_t idle_cpus_mask;
+static cpuset_t idle_cpus_mask;
 #endif
 
 struct pcpuidlestat {
@@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 	if (td->td_flags & TDF_IDLETD) {
 		TD_SET_CAN_RUN(td);
 #ifdef SMP
-		idle_cpus_mask &= ~PCPU_GET(cpumask);
+		/* Spinlock held here, assume no migration. */
+		CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	} else {
 		if (TD_IS_RUNNING(td)) {
@@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags)
 
 #ifdef SMP
 	if (td->td_flags & TDF_IDLETD)
-		idle_cpus_mask |= PCPU_GET(cpumask);
+		CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask));
 #endif
 	sched_lock.mtx_lock = (uintptr_t)td;
 	td->td_oncpu = PCPU_GET(cpuid);
@@ -1054,7 +1055,8 @@ static int
 forward_wakeup(int cpunum)
 {
 	struct pcpu *pc;
-	cpumask_t dontuse, id, map, map2, me;
+	cpuset_t dontuse, id, map, map2, me;
+	int iscpuset;
 
 	mtx_assert(&sched_lock, MA_OWNED);
 
@@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum)
 	/*
 	 * Check the idle mask we received against what we calculated
 	 * before in the old version.
+	 *
+	 * Also note that sched_lock is held now, thus no migration is
+	 * expected.
 	 */
 	me = PCPU_GET(cpumask);
 
 	/* Don't bother if we should be doing it ourself. */
-	if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum)))
+	if (CPU_OVERLAP(&me, &idle_cpus_mask) &&
+	    (cpunum == NOCPU || CPU_ISSET(cpunum, &me)))
 		return (0);
 
-	dontuse = me | stopped_cpus | hlt_cpus_mask;
-	map2 = 0;
+	dontuse = me;
+	CPU_OR(&dontuse, &stopped_cpus);
+	CPU_OR(&dontuse, &hlt_cpus_mask);
+	CPU_ZERO(&map2);
 	if (forward_wakeup_use_loop) {
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((id & dontuse) == 0 &&
+			if (!CPU_OVERLAP(&id, &dontuse) &&
 			    pc->pc_curthread == pc->pc_idlethread) {
-				map2 |= id;
+				CPU_OR(&map2, &id);
 			}
 		}
 	}
 
 	if (forward_wakeup_use_mask) {
-		map = 0;
-		map = idle_cpus_mask & ~dontuse;
+		map = idle_cpus_mask;
+		CPU_NAND(&map, &dontuse);
 
 		/* If they are both on, compare and use loop if different. */
 		if (forward_wakeup_use_loop) {
-			if (map != map2) {
+			if (CPU_CMP(&map, &map2)) {
 				printf("map != map2, loop method preferred\n");
 				map = map2;
 			}
@@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum)
 	/* If we only allow a specific CPU, then mask off all the others. */
 	if (cpunum != NOCPU) {
 		KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum."));
-		map &= (1 << cpunum);
+		iscpuset = CPU_ISSET(cpunum, &map);
+		if (iscpuset == 0)
+			CPU_ZERO(&map);
+		else
+			CPU_SETOF(cpunum, &map);
 	}
-	if (map) {
+	if (!CPU_EMPTY(&map)) {
 		forward_wakeups_delivered++;
 		STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 			id = pc->pc_cpumask;
-			if ((map & id) == 0)
+			if (!CPU_OVERLAP(&map, &id))
 				continue;
 			if (cpu_idle_wakeup(pc->pc_cpuid))
-				map &= ~id;
+				CPU_NAND(&map, &id);
 		}
-		if (map)
+		if (!CPU_EMPTY(&map))
 			ipi_selected(map, IPI_AST);
 		return (1);
 	}
@@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid)
 	int cpri;
 
 	pcpu = pcpu_find(cpuid);
-	if (idle_cpus_mask & pcpu->pc_cpumask) {
+	if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) {
 		forward_wakeups_delivered++;
 		if (!cpu_idle_wakeup(cpuid))
 			ipi_cpu(cpuid, IPI_AST);
@@ -1193,6 +1205,7 @@ void
 sched_add(struct thread *td, int flags)
 #ifdef SMP
 {
+	cpuset_t idle, me, tidlemsk;
 	struct td_sched *ts;
 	int forwarded = 0;
 	int cpu;
@@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags)
 	        kick_other_cpu(td->td_priority, cpu);
 	} else {
 		if (!single_cpu) {
-			cpumask_t me = PCPU_GET(cpumask);
-			cpumask_t idle = idle_cpus_mask & me;
 
-			if (!idle && ((flags & SRQ_INTR) == 0) &&
-			    (idle_cpus_mask & ~(hlt_cpus_mask | me)))
+			/*
+			 * Thread spinlock is held here, assume no
+			 * migration is possible.
+			 */
+			me = PCPU_GET(cpumask);
+			idle = idle_cpus_mask;
+			tidlemsk = idle;
+			CPU_AND(&idle, &me);
+			CPU_OR(&me, &hlt_cpus_mask);
+			CPU_NAND(&tidlemsk, &me);
+
+			if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) &&
+			    !CPU_EMPTY(&tidlemsk))
 				forwarded = forward_wakeup(cpu);
 		}
 
diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c
index ac18e778ae28..05267f3d6adb 100644
--- a/sys/kern/sched_ule.c
+++ b/sys/kern/sched_ule.c
@@ -564,7 +564,7 @@ struct cpu_search {
 
 #define	CPUSET_FOREACH(cpu, mask)				\
 	for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++)		\
-		if ((mask) & 1 << (cpu))
+		if (CPU_ISSET(cpu, &mask))
 
 static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low,
     struct cpu_search *high, const int match);
@@ -2650,15 +2650,16 @@ static int
 sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg,
     int indent)
 {
+	char cpusetbuf[CPUSETBUFSIZ];
 	int i, first;
 
 	sbuf_printf(sb, "%*s<group level=\"%d\" cache-level=\"%d\">\n", indent,
 	    "", 1 + indent / 2, cg->cg_level);
-	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"0x%x\">", indent, "",
-	    cg->cg_count, cg->cg_mask);
+	sbuf_printf(sb, "%*s <cpu count=\"%d\" mask=\"%s\">", indent, "",
+	    cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask));
 	first = TRUE;
 	for (i = 0; i < MAXCPU; i++) {
-		if ((cg->cg_mask & (1 << i)) != 0) {
+		if (CPU_ISSET(i, &cg->cg_mask)) {
 			if (!first)
 				sbuf_printf(sb, ", ");
 			else
diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c
index 5d68ae250d47..1d67864b1533 100644
--- a/sys/kern/subr_kdb.c
+++ b/sys/kern/subr_kdb.c
@@ -413,7 +413,8 @@ kdb_thr_ctx(struct thread *thr)
 
 #if defined(SMP) && defined(KDB_STOPPEDPCB)
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu)  {
-		if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask))
+		if (pc->pc_curthread == thr &&
+		    CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask))
 			return (KDB_STOPPEDPCB(pc));
 	}
 #endif
diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c
index 5cb4f26a7ee4..a6b3ae09defc 100644
--- a/sys/kern/subr_pcpu.c
+++ b/sys/kern/subr_pcpu.c
@@ -87,7 +87,7 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size)
 	KASSERT(cpuid >= 0 && cpuid < MAXCPU,
 	    ("pcpu_init: invalid cpuid %d", cpuid));
 	pcpu->pc_cpuid = cpuid;
-	pcpu->pc_cpumask = 1 << cpuid;
+	CPU_SETOF(cpuid, &pcpu->pc_cpumask);
 	cpuid_to_pcpu[cpuid] = pcpu;
 	STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu);
 	cpu_pcpu_init(pcpu, cpuid, size);
diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c
index 3334837747f5..48f2dd9ce8af 100644
--- a/sys/kern/subr_prf.c
+++ b/sys/kern/subr_prf.c
@@ -163,6 +163,7 @@ uprintf(const char *fmt, ...)
 		goto out;
 	}
 	pca.flags = TOTTY;
+	pca.p_bufr = NULL;
 	va_start(ap, fmt);
 	tty_lock(pca.tty);
 	retval = kvprintf(fmt, putchar, &pca, 10, ap);
@@ -206,6 +207,7 @@ tprintf(struct proc *p, int pri, const char *fmt, ...)
 	pca.pri = pri;
 	pca.tty = tp;
 	pca.flags = flags;
+	pca.p_bufr = NULL;
 	va_start(ap, fmt);
 	if (pca.tty != NULL)
 		tty_lock(pca.tty);
@@ -234,6 +236,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...)
 	va_start(ap, fmt);
 	pca.tty = tp;
 	pca.flags = TOTTY;
+	pca.p_bufr = NULL;
 	retval = kvprintf(fmt, putchar, &pca, 10, ap);
 	va_end(ap);
 	return (retval);
diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c
index 3014b1902014..abd72c03df0b 100644
--- a/sys/kern/subr_rman.c
+++ b/sys/kern/subr_rman.c
@@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r)
 		 * without freeing anything.
 		 */
 		r->r_flags &= ~RF_ALLOCATED;
+		r->r_dev = NULL;
 		return 0;
 	}
 
diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c
index 351f09677fa4..c38177b4e71f 100644
--- a/sys/kern/subr_smp.c
+++ b/sys/kern/subr_smp.c
@@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$");
 #include "opt_sched.h"
 
 #ifdef SMP
-volatile cpumask_t stopped_cpus;
-volatile cpumask_t started_cpus;
-cpumask_t hlt_cpus_mask;
-cpumask_t logical_cpus_mask;
+volatile cpuset_t stopped_cpus;
+volatile cpuset_t started_cpus;
+cpuset_t hlt_cpus_mask;
+cpuset_t logical_cpus_mask;
 
 void (*cpustop_restartfunc)(void);
 #endif
 /* This is used in modules that need to work in both SMP and UP. */
-cpumask_t all_cpus;
+cpuset_t all_cpus;
 
 int mp_ncpus;
 /* export this for libkvm consumers. */
@@ -200,8 +200,11 @@ forward_signal(struct thread *td)
  *
  */
 static int
-generic_stop_cpus(cpumask_t map, u_int type)
+generic_stop_cpus(cpuset_t map, u_int type)
 {
+#ifdef KTR
+	char cpusetbuf[CPUSETBUFSIZ];
+#endif
 	static volatile u_int stopping_cpu = NOCPU;
 	int i;
 
@@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type)
 	if (!smp_started)
 		return (0);
 
-	CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type);
+	CTR2(KTR_SMP, "stop_cpus(%s) with %u type",
+	    cpusetobj_strprint(cpusetbuf, &map), type);
 
 	if (stopping_cpu != PCPU_GET(cpuid))
 		while (atomic_cmpset_int(&stopping_cpu, NOCPU,
@@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type)
 	ipi_selected(map, type);
 
 	i = 0;
-	while ((stopped_cpus & map) != map) {
+	while (!CPU_SUBSET(&stopped_cpus, &map)) {
 		/* spin */
 		cpu_spinwait();
 		i++;
@@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type)
 }
 
 int
-stop_cpus(cpumask_t map)
+stop_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP));
 }
 
 int
-stop_cpus_hard(cpumask_t map)
+stop_cpus_hard(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_STOP_HARD));
@@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map)
 
 #if defined(__amd64__)
 int
-suspend_cpus(cpumask_t map)
+suspend_cpus(cpuset_t map)
 {
 
 	return (generic_stop_cpus(map, IPI_SUSPEND));
@@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map)
  *   1: ok
  */
 int
-restart_cpus(cpumask_t map)
+restart_cpus(cpuset_t map)
 {
+#ifdef KTR
+	char cpusetbuf[CPUSETBUFSIZ];
+#endif
 
 	if (!smp_started)
 		return 0;
 
-	CTR1(KTR_SMP, "restart_cpus(%x)", map);
+	CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map));
 
 	/* signal other cpus to restart */
-	atomic_store_rel_int(&started_cpus, map);
+	CPU_COPY_STORE_REL(&map, &started_cpus);
 
 	/* wait for each to clear its bit */
-	while ((stopped_cpus & map) != 0)
+	while (CPU_OVERLAP(&stopped_cpus, &map))
 		cpu_spinwait();
 
 	return 1;
@@ -409,13 +416,13 @@ smp_rendezvous_action(void)
 }
 
 void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
 	void (* setup_func)(void *), 
 	void (* action_func)(void *),
 	void (* teardown_func)(void *),
 	void *arg)
 {
-	int i, ncpus = 0;
+	int curcpumap, i, ncpus = 0;
 
 	if (!smp_started) {
 		if (setup_func != NULL)
@@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map,
 	}
 
 	CPU_FOREACH(i) {
-		if (((1 << i) & map) != 0)
+		if (CPU_ISSET(i, &map))
 			ncpus++;
 	}
 	if (ncpus == 0)
-		panic("ncpus is 0 with map=0x%x", map);
+		panic("ncpus is 0 with non-zero map");
 
 	mtx_lock_spin(&smp_ipi_mtx);
 
@@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map,
 	 * Signal other processors, which will enter the IPI with
 	 * interrupts off.
 	 */
-	ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS);
+	curcpumap = CPU_ISSET(curcpu, &map);
+	CPU_CLR(curcpu, &map);
+	ipi_selected(map, IPI_RENDEZVOUS);
 
 	/* Check if the current CPU is in the map */
-	if ((map & (1 << curcpu)) != 0)
+	if (curcpumap != 0)
 		smp_rendezvous_action();
 
 	/*
@@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU];
 struct cpu_group *
 smp_topo(void)
 {
+	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
 	struct cpu_group *top;
 
 	/*
@@ -530,9 +540,10 @@ smp_topo(void)
 	if (top->cg_count != mp_ncpus)
 		panic("Built bad topology at %p.  CPU count %d != %d",
 		    top, top->cg_count, mp_ncpus);
-	if (top->cg_mask != all_cpus)
-		panic("Built bad topology at %p.  CPU mask 0x%X != 0x%X",
-		    top, top->cg_mask, all_cpus);
+	if (CPU_CMP(&top->cg_mask, &all_cpus))
+		panic("Built bad topology at %p.  CPU mask (%s) != (%s)",
+		    top, cpusetobj_strprint(cpusetbuf, &top->cg_mask),
+		    cpusetobj_strprint(cpusetbuf2, &all_cpus));
 	return (top);
 }
 
@@ -557,11 +568,13 @@ static int
 smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
     int count, int flags, int start)
 {
-	cpumask_t mask;
+	char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ];
+	cpuset_t mask;
 	int i;
 
-	for (mask = 0, i = 0; i < count; i++, start++)
-		mask |= (1 << start);
+	CPU_ZERO(&mask);
+	for (i = 0; i < count; i++, start++)
+		CPU_SET(start, &mask);
 	child->cg_parent = parent;
 	child->cg_child = NULL;
 	child->cg_children = 0;
@@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share,
 	child->cg_mask = mask;
 	parent->cg_children++;
 	for (; parent != NULL; parent = parent->cg_parent) {
-		if ((parent->cg_mask & child->cg_mask) != 0)
-			panic("Duplicate children in %p.  mask 0x%X child 0x%X",
-			    parent, parent->cg_mask, child->cg_mask);
-		parent->cg_mask |= child->cg_mask;
+		if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask))
+			panic("Duplicate children in %p.  mask (%s) child (%s)",
+			    parent,
+			    cpusetobj_strprint(cpusetbuf, &parent->cg_mask),
+			    cpusetobj_strprint(cpusetbuf2, &child->cg_mask));
+		CPU_OR(&parent->cg_mask, &child->cg_mask);
 		parent->cg_count += child->cg_count;
 	}
 
@@ -634,20 +649,20 @@ struct cpu_group *
 smp_topo_find(struct cpu_group *top, int cpu)
 {
 	struct cpu_group *cg;
-	cpumask_t mask;
+	cpuset_t mask;
 	int children;
 	int i;
 
-	mask = (1 << cpu);
+	CPU_SETOF(cpu, &mask);
 	cg = top;
 	for (;;) {
-		if ((cg->cg_mask & mask) == 0)
+		if (!CPU_OVERLAP(&cg->cg_mask, &mask))
 			return (NULL);
 		if (cg->cg_children == 0)
 			return (cg);
 		children = cg->cg_children;
 		for (i = 0, cg = cg->cg_child; i < children; cg++, i++)
-			if ((cg->cg_mask & mask) != 0)
+			if (CPU_OVERLAP(&cg->cg_mask, &mask))
 				break;
 	}
 	return (NULL);
@@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu)
 #else /* !SMP */
 
 void
-smp_rendezvous_cpus(cpumask_t map,
+smp_rendezvous_cpus(cpuset_t map,
 	void (*setup_func)(void *), 
 	void (*action_func)(void *),
 	void (*teardown_func)(void *),
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index a4bbdba54e5a..19aaee01859a 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -747,6 +747,10 @@ kern_sendit(td, s, mp, flags, control, segflg)
 		return (error);
 	so = (struct socket *)fp->f_data;
 
+#ifdef KTRACE
+	if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
+		ktrsockaddr(mp->msg_name);
+#endif
 #ifdef MAC
 	if (mp->msg_name != NULL) {
 		error = mac_socket_check_connect(td->td_ucred, so,
diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c
index 78eafa678118..efddee86ae0f 100644
--- a/sys/mips/cavium/octeon_mp.c
+++ b/sys/mips/cavium/octeon_mp.c
@@ -102,10 +102,18 @@ platform_init_ap(int cpuid)
 	mips_wbflush();
 }
 
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
 {
-       return (octeon_bootinfo->core_mask);
+
+	CPU_ZERO(mask);
+
+	/*
+	 * XXX: hack in order to simplify CPU set building, assuming that
+	 * core_mask is 32-bits.
+	 */
+	memcpy(mask, &octeon_bootinfo->core_mask,
+	    sizeof(octeon_bootinfo->core_mask));
 }
 
 struct cpu_group *
diff --git a/sys/mips/include/_types.h b/sys/mips/include/_types.h
index 4d57e20db108..2f23db6d9b3a 100644
--- a/sys/mips/include/_types.h
+++ b/sys/mips/include/_types.h
@@ -73,7 +73,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	double		__double_t;
 typedef	double		__float_t;
 #ifdef __mips_n64
diff --git a/sys/mips/include/hwfunc.h b/sys/mips/include/hwfunc.h
index 683aedb0c9ac..a9e3285f5395 100644
--- a/sys/mips/include/hwfunc.h
+++ b/sys/mips/include/hwfunc.h
@@ -28,6 +28,8 @@
 #ifndef _MACHINE_HWFUNC_H_
 #define	_MACHINE_HWFUNC_H_
 
+#include <sys/_cpuset.h>
+
 struct trapframe;
 struct timecounter;
 /*
@@ -91,7 +93,7 @@ extern int platform_processor_id(void);
 /*
  * Return the cpumask of available processors.
  */
-extern cpumask_t platform_cpu_mask(void);
+extern void platform_cpu_mask(cpuset_t *mask);
 
 /*
  * Return the topology of processors on this platform
diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h
index e71063592a80..90375ebd3448 100644
--- a/sys/mips/include/pmap.h
+++ b/sys/mips/include/pmap.h
@@ -58,6 +58,7 @@
 #ifndef LOCORE
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 
@@ -83,7 +84,7 @@ struct pmap {
 	pd_entry_t *pm_segtab;	/* KVA of segment table */
 	TAILQ_HEAD(, pv_entry) pm_pvlist;	/* list of mappings in
 						 * pmap */
-	cpumask_t	pm_active;		/* active on cpus */
+	cpuset_t	pm_active;		/* active on cpus */
 	struct {
 		u_int32_t asid:ASID_BITS;	/* TLB address space tag */
 		u_int32_t gen:ASIDGEN_BITS;	/* its generation number */
diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h
index 58aaf03165bf..0fcca9af1542 100644
--- a/sys/mips/include/smp.h
+++ b/sys/mips/include/smp.h
@@ -17,6 +17,8 @@
 
 #ifdef _KERNEL
 
+#include <sys/_cpuset.h>
+
 #include <machine/pcb.h>
 
 /*
@@ -33,7 +35,7 @@
 
 void	ipi_all_but_self(int ipi);
 void	ipi_cpu(int cpu, u_int ipi);
-void	ipi_selected(cpumask_t cpus, int ipi);
+void	ipi_selected(cpuset_t cpus, int ipi);
 void	smp_init_secondary(u_int32_t cpuid);
 void	mpentry(void);
 
diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c
index 7191b37f9596..79a3476a7784 100644
--- a/sys/mips/mips/mp_machdep.c
+++ b/sys/mips/mips/mp_machdep.c
@@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/systm.h>
+#include <sys/cpuset.h>
 #include <sys/ktr.h>
 #include <sys/proc.h>
 #include <sys/lock.h>
@@ -80,15 +81,16 @@ ipi_all_but_self(int ipi)
 
 /* Send an IPI to a set of cpus. */
 void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
 {
 	struct pcpu *pc;
 
-	CTR3(KTR_SMP, "%s: cpus: %x, ipi: %x\n", __func__, cpus, ipi);
-
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
-		if ((cpus & pc->pc_cpumask) != 0)
+		if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) {
+			CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc,
+			    ipi);
 			ipi_send(pc, ipi);
+		}
 	}
 }
 
@@ -108,7 +110,7 @@ static int
 mips_ipi_handler(void *arg)
 {
 	int cpu;
-	cpumask_t cpumask;
+	cpuset_t cpumask;
 	u_int	ipi, ipi_bitmap;
 	int	bit;
 
@@ -148,14 +150,14 @@ mips_ipi_handler(void *arg)
 			tlb_save();
 
 			/* Indicate we are stopped */
-			atomic_set_int(&stopped_cpus, cpumask);
+			CPU_OR_ATOMIC(&stopped_cpus, &cpumask);
 
 			/* Wait for restart */
-			while ((started_cpus & cpumask) == 0)
+			while (!CPU_OVERLAP(&started_cpus, &cpumask))
 				cpu_spinwait();
 
-			atomic_clear_int(&started_cpus, cpumask);
-			atomic_clear_int(&stopped_cpus, cpumask);
+			CPU_NAND_ATOMIC(&started_cpus, &cpumask);
+			CPU_NAND_ATOMIC(&stopped_cpus, &cpumask);
 			CTR0(KTR_SMP, "IPI_STOP (restart)");
 			break;
 		case IPI_PREEMPT:
@@ -200,14 +202,22 @@ start_ap(int cpuid)
 void
 cpu_mp_setmaxid(void)
 {
-	cpumask_t cpumask;
+	cpuset_t cpumask;
+	int cpu, last;
 
-	cpumask = platform_cpu_mask();
-	mp_ncpus = bitcount32(cpumask);
+	platform_cpu_mask(&cpumask);
+	mp_ncpus = 0;
+	last = 1;
+	while ((cpu = cpusetobj_ffs(&cpumask)) != 0) {
+		last = cpu;
+		cpu--;
+		CPU_CLR(cpu, &cpumask);
+		mp_ncpus++;
+	}
 	if (mp_ncpus <= 0)
 		mp_ncpus = 1;
 
-	mp_maxid = min(fls(cpumask), MAXCPU) - 1;
+	mp_maxid = min(last, MAXCPU) - 1;
 }
 
 void
@@ -233,16 +243,16 @@ void
 cpu_mp_start(void)
 {
 	int error, cpuid;
-	cpumask_t cpumask;
+	cpuset_t cpumask, ocpus;
 
 	mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN);
 
-	all_cpus = 0;
-	cpumask = platform_cpu_mask();
+	CPU_ZERO(&all_cpus);
+	platform_cpu_mask(&cpumask);
 
-	while (cpumask != 0) {
-		cpuid = ffs(cpumask) - 1;
-		cpumask &= ~(1 << cpuid);
+	while (!CPU_EMPTY(&cpumask)) {
+		cpuid = cpusetobj_ffs(&cpumask) - 1;
+		CPU_CLR(cpuid, &cpumask);
 
 		if (cpuid >= MAXCPU) {
 			printf("cpu_mp_start: ignoring AP #%d.\n", cpuid);
@@ -257,15 +267,19 @@ cpu_mp_start(void)
 			if (bootverbose)
 				printf("AP #%d started!\n", cpuid);
 		}
-		all_cpus |= 1 << cpuid;
+		CPU_SET(cpuid, &all_cpus);
 	}
 
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	ocpus = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &ocpus);
+	PCPU_SET(other_cpus, ocpus);
 }
 
 void
 smp_init_secondary(u_int32_t cpuid)
 {
+	cpuset_t ocpus;
+
 	/* TLB */
 	mips_wr_wired(0);
 	tlb_invalidate_all();
@@ -303,7 +317,9 @@ smp_init_secondary(u_int32_t cpuid)
 	CTR1(KTR_SMP, "SMP: AP CPU #%d launched", PCPU_GET(cpuid));
 
 	/* Build our map of 'other' CPUs. */
-	PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask));
+	ocpus = all_cpus;
+	CPU_CLR(PCPU_GET(cpuid), &ocpus);
+	PCPU_SET(other_cpus, ocpus);
 
 	if (bootverbose)
 		printf("SMP: AP CPU #%d launched.\n", PCPU_GET(cpuid));
diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c
index 7f0f4f004c80..f7ea660d019c 100644
--- a/sys/mips/mips/pmap.c
+++ b/sys/mips/mips/pmap.c
@@ -471,7 +471,7 @@ pmap_create_kernel_pagetable(void)
 
 	PMAP_LOCK_INIT(kernel_pmap);
 	kernel_pmap->pm_segtab = kernel_segmap;
-	kernel_pmap->pm_active = ~0;
+	CPU_FILL(&kernel_pmap->pm_active);
 	TAILQ_INIT(&kernel_pmap->pm_pvlist);
 	kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED;
 	kernel_pmap->pm_asid[0].gen = 0;
@@ -630,10 +630,14 @@ pmap_invalidate_all_local(pmap_t pmap)
 		tlb_invalidate_all();
 		return;
 	}
-	if (pmap->pm_active & PCPU_GET(cpumask))
+	sched_pin();
+	if (CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
+		sched_unpin();
 		tlb_invalidate_all_user(pmap);
-	else
+	} else {
+		sched_unpin();
 		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+	}
 }
 
 #ifdef SMP
@@ -667,12 +671,16 @@ pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va)
 		tlb_invalidate_address(pmap, va);
 		return;
 	}
-	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
+	sched_pin();
+	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) {
+		sched_unpin();
 		return;
-	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
+	} else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
 		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+		sched_unpin();
 		return;
 	}
+	sched_unpin();
 	tlb_invalidate_address(pmap, va);
 }
 
@@ -716,12 +724,16 @@ pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte)
 		tlb_update(pmap, va, pte);
 		return;
 	}
-	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation))
+	sched_pin();
+	if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) {
+		sched_unpin();
 		return;
-	else if (!(pmap->pm_active & PCPU_GET(cpumask))) {
+	} else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) {
 		pmap->pm_asid[PCPU_GET(cpuid)].gen = 0;
+		sched_unpin();
 		return;
 	}
+	sched_unpin();
 	tlb_update(pmap, va, pte);
 }
 
@@ -1041,7 +1053,7 @@ pmap_pinit0(pmap_t pmap)
 
 	PMAP_LOCK_INIT(pmap);
 	pmap->pm_segtab = kernel_segmap;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	pmap->pm_ptphint = NULL;
 	for (i = 0; i < MAXCPU; i++) {
 		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
@@ -1102,7 +1114,7 @@ pmap_pinit(pmap_t pmap)
 
 	ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg));
 	pmap->pm_segtab = (pd_entry_t *)ptdva;
-	pmap->pm_active = 0;
+	CPU_ZERO(&pmap->pm_active);
 	pmap->pm_ptphint = NULL;
 	for (i = 0; i < MAXCPU; i++) {
 		pmap->pm_asid[i].asid = PMAP_ASID_RESERVED;
@@ -2948,8 +2960,8 @@ pmap_activate(struct thread *td)
 	oldpmap = PCPU_GET(curpmap);
 
 	if (oldpmap)
-		atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask));
-	atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask));
+		CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 	pmap_asid_alloc(pmap);
 	if (td == curthread) {
 		PCPU_SET(segbase, pmap->pm_segtab);
@@ -3283,7 +3295,7 @@ pmap_kextract(vm_offset_t va)
 		pt_entry_t *ptep;
 
 		/* Is the kernel pmap initialized? */
-		if (kernel_pmap->pm_active) {
+		if (!CPU_EMPTY(&kernel_pmap->pm_active)) {
 			/* It's inside the virtual address range */
 			ptep = pmap_pte(kernel_pmap, va);
 			if (ptep) {
diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c
index 4a1734a893fd..836c605f7de3 100644
--- a/sys/mips/rmi/xlr_machdep.c
+++ b/sys/mips/rmi/xlr_machdep.c
@@ -614,11 +614,15 @@ platform_processor_id(void)
 	return (xlr_hwtid_to_cpuid[xlr_cpu_id()]);
 }
 
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
 {
+	int i, s;
 
-	return (~0U >> (32 - (xlr_ncores * xlr_threads_per_core)));
+	CPU_ZERO(mask);
+	s = xlr_ncores * xlr_threads_per_core;
+	for (i = 0; i < s; i++)
+		CPU_SET(i, mask);
 }
 
 struct cpu_group *
diff --git a/sys/mips/sibyte/sb_scd.c b/sys/mips/sibyte/sb_scd.c
index e5ac23c17720..50b99876acaa 100644
--- a/sys/mips/sibyte/sb_scd.c
+++ b/sys/mips/sibyte/sb_scd.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>
 #include <sys/module.h>
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 
 #include <machine/resource.h>
 #include <machine/hwfunc.h>
@@ -242,11 +243,15 @@ sb_clear_mailbox(int cpu, uint64_t val)
 	sb_store64(regaddr, val);
 }
 
-cpumask_t
-platform_cpu_mask(void)
+void
+platform_cpu_mask(cpuset_t *mask)
 {
+	int i, s;
 
-	return (~0U >> (32 - SYSREV_NUM_PROCESSORS(sb_read_sysrev())));
+	CPU_ZERO(mask);
+	s = SYSREV_NUM_PROCESSORS(sb_read_sysrev());
+	for (i = 0; i < s; i++)
+		CPU_SET(i, mask);
 }
 #endif	/* SMP */
 
diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c
index 2993838ac683..e263b0b23d86 100644
--- a/sys/net/bridgestp.c
+++ b/sys/net/bridgestp.c
@@ -1860,6 +1860,8 @@ bstp_tick(void *arg)
 	if (bs->bs_running == 0)
 		return;
 
+	CURVNET_SET(bs->bs_vnet);
+
 	/* slow timer to catch missed link events */
 	if (bstp_timer_expired(&bs->bs_link_timer)) {
 		LIST_FOREACH(bp, &bs->bs_bplist, bp_next)
@@ -1893,6 +1895,8 @@ bstp_tick(void *arg)
 			bp->bp_txcount--;
 	}
 
+	CURVNET_RESTORE();
+
 	callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs);
 }
 
@@ -2126,6 +2130,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb)
 	bs->bs_protover = BSTP_PROTO_RSTP;
 	bs->bs_state_cb = cb->bcb_state;
 	bs->bs_rtage_cb = cb->bcb_rtage;
+	bs->bs_vnet = curvnet;
 
 	getmicrotime(&bs->bs_last_tc_time);
 
diff --git a/sys/net/bridgestp.h b/sys/net/bridgestp.h
index 74086fce478a..fdf16aa6b10f 100644
--- a/sys/net/bridgestp.h
+++ b/sys/net/bridgestp.h
@@ -358,6 +358,7 @@ struct bstp_state {
 	LIST_HEAD(, bstp_port)	bs_bplist;
 	bstp_state_cb_t		bs_state_cb;
 	bstp_rtage_cb_t		bs_rtage_cb;
+	struct vnet		*bs_vnet;
 };
 
 #define	BSTP_LOCK_INIT(_bs)	mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF)
diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c
index 4f904a57850d..7d136fd17d9f 100644
--- a/sys/net/if_stf.c
+++ b/sys/net/if_stf.c
@@ -3,7 +3,7 @@
 
 /*-
  * Copyright (C) 2000 WIDE Project.
- * Copyright (c) 2010 Hiroki Sato <hrs@FreeBSD.org>
+ * Copyright (c) 2010-2011 Hiroki Sato <hrs@FreeBSD.org>
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -32,7 +32,7 @@
  */
 
 /*
- * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support.
+ * 6to4 interface, based on RFC 3056 + 6rd (RFC 5969) support.
  *
  * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting.
  * There is no address mapping defined from IPv6 multicast address to IPv4
@@ -74,10 +74,9 @@
  * for details.  The code tries to filter out some of malicious packets.
  * Note that there is no way to be 100% secure.
  *
- * 6rd (RFC5569) extension is enabled when an IPv6 GUA other than
- * 2002::/16 is assigned.  The stf(4) recognizes a 32-bit just after
- * prefixlen as the IPv4 address of the 6rd customer site.  The
- * prefixlen must be shorter than 32.
+ * 6rd (RFC 5969) extension is enabled when an IPv6 GUA other than
+ * 2002::/16 is assigned.  The stf(4) calculates a 6rd delegated
+ * prefix from a 6rd prefix and an IPv4 address.
  *
  */
 
@@ -280,10 +279,10 @@ stf_clone_create(struct if_clone *ifc, int unit, caddr_t params)
 	LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list);
 	mtx_unlock(&stf_mtx);
 
-	sc->sc_ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event,
-							stf_ifaddr_change,
-							NULL,
-							EVENTHANDLER_PRI_ANY);
+	sc->sc_ifaddr_event_tag =
+	    EVENTHANDLER_REGISTER(ifaddr_event, stf_ifaddr_change, NULL,
+		EVENTHANDLER_PRI_ANY);
+
 	return (0);
 }
 
@@ -1367,35 +1366,20 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
 	case SIOCSIFADDR:
 		DEBUG_PRINTF(1, "enter SIOCSIFADDR.\n");
 		ifa = (struct ifaddr *)data;
-		if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) {
+		if (ifa == NULL) {
 			error = EAFNOSUPPORT;
 			break;
 		}
-		ifa->ifa_rtrequest = stf_rtrequest;
-		ifp->if_flags |= IFF_UP;
+		if (ifa->ifa_addr->sa_family == AF_INET6 &&
+		    ifa->ifa_dstaddr->sa_family == AF_INET &&
+		    ifa->ifa_netmask->sa_family == AF_INET6) {
+			ifa->ifa_rtrequest = stf_rtrequest;
+			ifp->if_flags |= IFF_UP;
+		} else {
+			error = EINVAL;
+			break;
+		}
 		break;
-
-/*
-	case STFSSRDADDR:
-		ifra6 = (struct in6_aliasreq *)data;
-		if (ifra6 == NULL || ifra6->ifra_addr->sa_family != AF_INET6) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		sa6 = &ifra6->ifra_addr;
-		if (ifra6->ifra_dstaddr->sa_family != AF_INET) {
-			error = EAFNOSUPPORT;
-			break;
-		}
-		memcpy(&ifra.ifra_addr, sa6, sizeof(ifra.ifra_addr));
-		error = in6_control(NULL, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, curthread);
-		if (error)
-			return (error);
-		
-		break;
-		
-	case STFDSRDADDR:
-*/
 	case SIOCADDMULTI:
 	case SIOCDELMULTI:
 		ifr = (struct ifreq *)data;
diff --git a/sys/netgraph/ng_nat.c b/sys/netgraph/ng_nat.c
index 84da50057494..59818d9ced83 100644
--- a/sys/netgraph/ng_nat.c
+++ b/sys/netgraph/ng_nat.c
@@ -43,6 +43,7 @@
 #include <machine/in_cksum.h>
 
 #include <netinet/libalias/alias.h>
+#include <netinet/libalias/alias_local.h>
 
 #include <netgraph/ng_message.h>
 #include <netgraph/ng_parse.h>
@@ -696,22 +697,35 @@ ng_nat_rcvdata(hook_p hook, item_p item )
 	KASSERT(m->m_pkthdr.len == ntohs(ip->ip_len),
 	    ("ng_nat: ip_len != m_pkthdr.len"));
 
+	/*
+	 * We drop packet when:
+	 * 1. libalias returns PKT_ALIAS_ERROR;
+	 * 2. For incoming packets:
+	 *	a) for unresolved fragments;
+	 *	b) libalias returns PKT_ALIAS_IGNORED and
+	 *		PKT_ALIAS_DENY_INCOMING flag is set.
+	 */
 	if (hook == priv->in) {
 		rval = LibAliasIn(priv->lib, c, m->m_len + M_TRAILINGSPACE(m));
-		if (rval != PKT_ALIAS_OK &&
-		    rval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+		if (rval == PKT_ALIAS_ERROR ||
+		    rval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+		    (rval == PKT_ALIAS_IGNORED &&
+		     (priv->lib->packetAliasMode &
+		      PKT_ALIAS_DENY_INCOMING) != 0)) {
 			NG_FREE_ITEM(item);
 			return (EINVAL);
 		}
 	} else if (hook == priv->out) {
 		rval = LibAliasOut(priv->lib, c, m->m_len + M_TRAILINGSPACE(m));
-		if (rval != PKT_ALIAS_OK) {
+		if (rval == PKT_ALIAS_ERROR) {
 			NG_FREE_ITEM(item);
 			return (EINVAL);
 		}
 	} else
 		panic("ng_nat: unknown hook!\n");
 
+	if (rval == PKT_ALIAS_RESPOND)
+		m->m_flags |= M_SKIP_FIREWALL;
 	m->m_pkthdr.len = m->m_len = ntohs(ip->ip_len);
 
 	if ((ip->ip_off & htons(IP_OFFMASK)) == 0 &&
diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c
index 4aa998f20cb2..4eb309aecde4 100644
--- a/sys/netinet/in_pcb.c
+++ b/sys/netinet/in_pcb.c
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ipsec.h"
 #include "opt_inet.h"
 #include "opt_inet6.h"
+#include "opt_pcbgroup.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -212,7 +213,7 @@ void
 in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
     struct inpcbhead *listhead, int hash_nelements, int porthash_nelements,
     char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini,
-    uint32_t inpcbzone_flags)
+    uint32_t inpcbzone_flags, u_int hashfields)
 {
 
 	INP_INFO_LOCK_INIT(pcbinfo, name);
@@ -227,6 +228,9 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name,
 	    &pcbinfo->ipi_hashmask);
 	pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB,
 	    &pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+	in_pcbgroup_init(pcbinfo, hashfields, hash_nelements);
+#endif
 	pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb),
 	    NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR,
 	    inpcbzone_flags);
@@ -246,6 +250,9 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo)
 	hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask);
 	hashdestroy(pcbinfo->ipi_porthashbase, M_PCB,
 	    pcbinfo->ipi_porthashmask);
+#ifdef PCBGROUP
+	in_pcbgroup_destroy(pcbinfo);
+#endif
 	uma_zdestroy(pcbinfo->ipi_zone);
 	INP_HASH_LOCK_DESTROY(pcbinfo);
 	INP_INFO_LOCK_DESTROY(pcbinfo);
@@ -1053,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp)
  * in_pcbref() bumps the reference count on an inpcb in order to maintain
  * stability of an inpcb pointer despite the inpcb lock being released.  This
  * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded,
- * but where the inpcb lock is already held.
+ * but where the inpcb lock may already held, or when acquiring a reference
+ * via a pcbgroup.
  *
  * in_pcbref() should be used only to provide brief memory stability, and
  * must always be followed by a call to INP_WLOCK() and in_pcbrele() to
@@ -1223,6 +1231,9 @@ in_pcbdrop(struct inpcb *inp)
 		}
 		INP_HASH_WUNLOCK(inp->inp_pcbinfo);
 		inp->inp_flags &= ~INP_INHASHLIST;
+#ifdef PCBGROUP
+		in_pcbgroup_remove(inp);
+#endif
 	}
 }
 
@@ -1472,6 +1483,148 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
 }
 #undef INP_LOOKUP_MAPPED_PCB_COST
 
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+    struct in_addr faddr, u_int fport_arg, struct in_addr laddr,
+    u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+	struct inpcbhead *head;
+	struct inpcb *inp, *tmpinp;
+	u_short fport = fport_arg, lport = lport_arg;
+
+	/*
+	 * First look for an exact match.
+	 */
+	tmpinp = NULL;
+	INP_GROUP_LOCK(pcbgroup);
+	head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
+	    pcbgroup->ipg_hashmask)];
+	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+#ifdef INET6
+		/* XXX inp locking */
+		if ((inp->inp_vflag & INP_IPV4) == 0)
+			continue;
+#endif
+		if (inp->inp_faddr.s_addr == faddr.s_addr &&
+		    inp->inp_laddr.s_addr == laddr.s_addr &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			/*
+			 * XXX We should be able to directly return
+			 * the inp here, without any checks.
+			 * Well unless both bound with SO_REUSEPORT?
+			 */
+			if (prison_flag(inp->inp_cred, PR_IP4))
+				goto found;
+			if (tmpinp == NULL)
+				tmpinp = inp;
+		}
+	}
+	if (tmpinp != NULL) {
+		inp = tmpinp;
+		goto found;
+	}
+
+	/*
+	 * Then look for a wildcard match, if requested.
+	 */
+	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+		struct inpcb *local_wild = NULL, *local_exact = NULL;
+#ifdef INET6
+		struct inpcb *local_wild_mapped = NULL;
+#endif
+		struct inpcb *jail_wild = NULL;
+		struct inpcbhead *head;
+		int injail;
+
+		/*
+		 * Order of socket selection - we always prefer jails.
+		 *      1. jailed, non-wild.
+		 *      2. jailed, wild.
+		 *      3. non-jailed, non-wild.
+		 *      4. non-jailed, wild.
+		 */
+		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_wildmask)];
+		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+#ifdef INET6
+			/* XXX inp locking */
+			if ((inp->inp_vflag & INP_IPV4) == 0)
+				continue;
+#endif
+			if (inp->inp_faddr.s_addr != INADDR_ANY ||
+			    inp->inp_lport != lport)
+				continue;
+
+			/* XXX inp locking */
+			if (ifp && ifp->if_type == IFT_FAITH &&
+			    (inp->inp_flags & INP_FAITH) == 0)
+				continue;
+
+			injail = prison_flag(inp->inp_cred, PR_IP4);
+			if (injail) {
+				if (prison_check_ip4(inp->inp_cred,
+				    &laddr) != 0)
+					continue;
+			} else {
+				if (local_exact != NULL)
+					continue;
+			}
+
+			if (inp->inp_laddr.s_addr == laddr.s_addr) {
+				if (injail)
+					goto found;
+				else
+					local_exact = inp;
+			} else if (inp->inp_laddr.s_addr == INADDR_ANY) {
+#ifdef INET6
+				/* XXX inp locking, NULL check */
+				if (inp->inp_vflag & INP_IPV6PROTO)
+					local_wild_mapped = inp;
+				else
+#endif /* INET6 */
+					if (injail)
+						jail_wild = inp;
+					else
+						local_wild = inp;
+			}
+		} /* LIST_FOREACH */
+		inp = jail_wild;
+		if (inp == NULL)
+			inp = local_exact;
+		if (inp == NULL)
+			inp = local_wild;
+#ifdef INET6
+		if (inp == NULL)
+			inp = local_wild_mapped;
+#endif /* defined(INET6) */
+		if (inp != NULL)
+			goto found;
+	} /* if (lookupflags & INPLOOKUP_WILDCARD) */
+	INP_GROUP_UNLOCK(pcbgroup);
+	return (NULL);
+
+found:
+	in_pcbref(inp);
+	INP_GROUP_UNLOCK(pcbgroup);
+	if (lookupflags & INPLOOKUP_WLOCKPCB) {
+		INP_WLOCK(inp);
+		if (in_pcbrele_wlocked(inp))
+			return (NULL);
+	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+		INP_RLOCK(inp);
+		if (in_pcbrele_rlocked(inp))
+			return (NULL);
+	} else
+		panic("%s: locking bug", __func__);
+	return (inp);
+}
+#endif /* PCBGROUP */
+
 /*
  * Lookup PCB in hash list, using pcbinfo tables.  This variation assumes
  * that the caller has locked the hash list, and will not perform any further
@@ -1636,17 +1789,30 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in_pcbgroup.c.
  */
 struct inpcb *
 in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport,
     struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp)
 {
+#if defined(PCBGROUP)
+	struct inpcbgroup *pcbgroup;
+#endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
+#if defined(PCBGROUP)
+	if (in_pcbgroup_enabled(pcbinfo)) {
+		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+		    fport);
+		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+		    laddr, lport, lookupflags, ifp));
+	}
+#endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
@@ -1656,12 +1822,28 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
     u_int fport, struct in_addr laddr, u_int lport, int lookupflags,
     struct ifnet *ifp, struct mbuf *m)
 {
+#ifdef PCBGROUP
+	struct inpcbgroup *pcbgroup;
+#endif
 
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
+#ifdef PCBGROUP
+	if (in_pcbgroup_enabled(pcbinfo)) {
+		pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+		    m->m_pkthdr.flowid);
+		if (pcbgroup != NULL)
+			return (in_pcblookup_group(pcbinfo, pcbgroup, faddr,
+			    fport, laddr, lport, lookupflags, ifp));
+		pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+		    fport);
+		return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+		    laddr, lport, lookupflags, ifp));
+	}
+#endif
 	return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
@@ -1670,8 +1852,8 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr,
 /*
  * Insert PCB onto various hash lists.
  */
-int
-in_pcbinshash(struct inpcb *inp)
+static int
+in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update)
 {
 	struct inpcbhead *pcbhash;
 	struct inpcbporthead *pcbporthash;
@@ -1721,9 +1903,38 @@ in_pcbinshash(struct inpcb *inp)
 	LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
 	LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
 	inp->inp_flags |= INP_INHASHLIST;
+#ifdef PCBGROUP
+	if (do_pcbgroup_update)
+		in_pcbgroup_update(inp);
+#endif
 	return (0);
 }
 
+/*
+ * For now, there are two public interfaces to insert an inpcb into the hash
+ * lists -- one that does update pcbgroups, and one that doesn't.  The latter
+ * is used only in the TCP syncache, where in_pcbinshash is called before the
+ * full 4-tuple is set for the inpcb, and we don't want to install in the
+ * pcbgroup until later.
+ *
+ * XXXRW: This seems like a misfeature.  in_pcbinshash should always update
+ * connection groups, and partially initialised inpcbs should not be exposed
+ * to either reservation hash tables or pcbgroups.
+ */
+int
+in_pcbinshash(struct inpcb *inp)
+{
+
+	return (in_pcbinshash_internal(inp, 1));
+}
+
+int
+in_pcbinshash_nopcbgroup(struct inpcb *inp)
+{
+
+	return (in_pcbinshash_internal(inp, 0));
+}
+
 /*
  * Move PCB to the proper hash bucket when { faddr, fport } have  been
  * changed. NOTE: This does not handle the case of the lport changing (the
@@ -1755,6 +1966,13 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m)
 
 	LIST_REMOVE(inp, inp_hash);
 	LIST_INSERT_HEAD(head, inp, inp_hash);
+
+#ifdef PCBGROUP
+	if (m != NULL)
+		in_pcbgroup_update_mbuf(inp, m);
+	else
+		in_pcbgroup_update(inp);
+#endif
 }
 
 void
@@ -1791,6 +2009,9 @@ in_pcbremlists(struct inpcb *inp)
 	}
 	LIST_REMOVE(inp, inp_list);
 	pcbinfo->ipi_count--;
+#ifdef PCBGROUP
+	in_pcbgroup_remove(inp);
+#endif
 }
 
 /*
diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h
index 809bc0576478..dfef96348efe 100644
--- a/sys/netinet/in_pcb.h
+++ b/sys/netinet/in_pcb.h
@@ -44,6 +44,7 @@
 #include <sys/_rwlock.h>
 
 #ifdef _KERNEL
+#include <sys/lock.h>
 #include <sys/rwlock.h>
 #include <net/vnet.h>
 #include <vm/uma.h>
@@ -141,6 +142,7 @@ struct	icmp6_filter;
  *
  * Key:
  * (c) - Constant after initialization
+ * (g) - Protected by the pcbgroup lock
  * (i) - Protected by the inpcb lock
  * (p) - Protected by the pcbinfo lock for the inpcb
  * (s) - Protected by another subsystem's locks
@@ -160,9 +162,12 @@ struct	icmp6_filter;
  */
 struct inpcb {
 	LIST_ENTRY(inpcb) inp_hash;	/* (i/p) hash list */
+	LIST_ENTRY(inpcb) inp_pcbgrouphash;	/* (g/i) hash list */
 	LIST_ENTRY(inpcb) inp_list;	/* (i/p) list for all PCBs for proto */
 	void	*inp_ppcb;		/* (i) pointer to per-protocol pcb */
 	struct	inpcbinfo *inp_pcbinfo;	/* (c) PCB list info */
+	struct	inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */
+	LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */
 	struct	socket *inp_socket;	/* (i) back pointer to socket */
 	struct	ucred	*inp_cred;	/* (c) cache of socket cred */
 	u_int32_t inp_flow;		/* (i) IPv6 flow information */
@@ -272,13 +277,14 @@ struct inpcbport {
  * the former covering mutable global fields (such as the global pcb list),
  * and the latter covering the hashed lookup tables.  The lock order is:
  *
- *    ipi_lock (before) inpcb locks (before) ipi_hash_lock
+ *    ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks}
  *
  * Locking key:
  *
  * (c) Constant or nearly constant after initialisation
  * (g) Locked by ipi_lock
- * (h) Read using either ipi_hash_lock or inpcb lock; write requires both.
+ * (h) Read using either ipi_hash_lock or inpcb lock; write requires both
+ * (p) Protected by one or more pcbgroup locks
  * (x) Synchronisation properties poorly defined
  */
 struct inpcbinfo {
@@ -312,7 +318,16 @@ struct inpcbinfo {
 	struct	uma_zone	*ipi_zone;		/* (c) */
 
 	/*
-	 * Global lock protecting hash lookup tables.
+	 * Connection groups associated with this protocol.  These fields are
+	 * constant, but pcbgroup structures themselves are protected by
+	 * per-pcbgroup locks.
+	 */
+	struct inpcbgroup	*ipi_pcbgroups;		/* (c) */
+	u_int			 ipi_npcbgroups;	/* (c) */
+	u_int			 ipi_hashfields;	/* (c) */
+
+	/*
+	 * Global lock protecting non-pcbgroup hash lookup tables.
 	 */
 	struct rwlock		 ipi_hash_lock;
 
@@ -329,6 +344,14 @@ struct inpcbinfo {
 	struct inpcbporthead	*ipi_porthashbase;	/* (h) */
 	u_long			 ipi_porthashmask;	/* (h) */
 
+	/*
+	 * List of wildcard inpcbs for use with pcbgroups.  In the past, was
+	 * per-pcbgroup but is now global.  All pcbgroup locks must be held
+	 * to modify the list, so any is sufficient to read it.
+	 */
+	struct inpcbhead	*ipi_wildbase;		/* (p) */
+	u_long			 ipi_wildmask;		/* (p) */
+
 	/*
 	 * Pointer to network stack instance
 	 */
@@ -340,6 +363,31 @@ struct inpcbinfo {
 	void 			*ipi_pspare[2];
 };
 
+/*
+ * Connection groups hold sets of connections that have similar CPU/thread
+ * affinity.  Each connection belongs to exactly one connection group.
+ */
+struct inpcbgroup {
+	/*
+	 * Per-connection group hash of inpcbs, hashed by local and foreign
+	 * addresses and port numbers.
+	 */
+	struct inpcbhead	*ipg_hashbase;		/* (c) */
+	u_long			 ipg_hashmask;		/* (c) */
+
+	/*
+	 * Notional affinity of this pcbgroup.
+	 */
+	u_int			 ipg_cpu;		/* (p) */
+
+	/*
+	 * Per-connection group lock, not to be confused with ipi_lock.
+	 * Protects the hash table hung off the group, but also the global
+	 * wildcard list in inpcbinfo.
+	 */
+	struct mtx		 ipg_lock;
+} __aligned(CACHE_LINE_SIZE);
+
 #define INP_LOCK_INIT(inp, d, t) \
 	rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE |  RW_DUPOK)
 #define INP_LOCK_DESTROY(inp)	rw_destroy(&(inp)->inp_lock)
@@ -423,6 +471,14 @@ void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 #define	INP_HASH_WLOCK_ASSERT(ipi)	rw_assert(&(ipi)->ipi_hash_lock, \
 					    RA_WLOCKED)
 
+#define	INP_GROUP_LOCK_INIT(ipg, d)	mtx_init(&(ipg)->ipg_lock, (d), NULL, \
+					    MTX_DEF | MTX_DUPOK)
+#define	INP_GROUP_LOCK_DESTROY(ipg)	mtx_destroy(&(ipg)->ipg_lock)
+
+#define	INP_GROUP_LOCK(ipg)		mtx_lock(&(ipg)->ipg_lock)
+#define	INP_GROUP_LOCK_ASSERT(ipg)	mtx_assert(&(ipg)->ipg_lock, MA_OWNED)
+#define	INP_GROUP_UNLOCK(ipg)		mtx_unlock(&(ipg)->ipg_lock)
+
 #define INP_PCBHASH(faddr, lport, fport, mask) \
 	(((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask))
 #define INP_PCBPORTHASH(lport, mask) \
@@ -482,6 +538,7 @@ void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
  */
 #define	INP_LLE_VALID		0x00000001 /* cached lle is valid */	
 #define	INP_RT_VALID		0x00000002 /* cached rtentry is valid */
+#define	INP_PCBGROUPWILD	0x00000004 /* in pcbgroup wildcard list */
 
 /*
  * Flags passed to in_pcblookup*() functions.
@@ -500,6 +557,13 @@ void 	inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
 
 #define	INP_CHECK_SOCKAF(so, af)	(INP_SOCKAF(so) == af)
 
+/*
+ * Constants for pcbinfo.ipi_hashfields.
+ */
+#define	IPI_HASHFIELDS_NONE	0
+#define	IPI_HASHFIELDS_2TUPLE	1
+#define	IPI_HASHFIELDS_4TUPLE	2
+
 #ifdef _KERNEL
 VNET_DECLARE(int, ipport_reservedhigh);
 VNET_DECLARE(int, ipport_reservedlow);
@@ -531,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs);
 
 void	in_pcbinfo_destroy(struct inpcbinfo *);
 void	in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *,
-	    int, int, char *, uma_init, uma_fini, uint32_t);
+	    int, int, char *, uma_init, uma_fini, uint32_t, u_int);
+
+struct inpcbgroup *
+	in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct inpcbgroup *
+	in_pcbgroup_byinpcb(struct inpcb *);
+struct inpcbgroup *
+	in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short,
+	    struct in_addr, u_short);
+void	in_pcbgroup_destroy(struct inpcbinfo *);
+int	in_pcbgroup_enabled(struct inpcbinfo *);
+void	in_pcbgroup_init(struct inpcbinfo *, u_int, int);
+void	in_pcbgroup_remove(struct inpcb *);
+void	in_pcbgroup_update(struct inpcb *);
+void	in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *);
 
 void	in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *);
 int	in_pcballoc(struct socket *, struct inpcbinfo *);
@@ -551,6 +629,7 @@ void	in_pcbdisconnect(struct inpcb *);
 void	in_pcbdrop(struct inpcb *);
 void	in_pcbfree(struct inpcb *);
 int	in_pcbinshash(struct inpcb *);
+int	in_pcbinshash_nopcbgroup(struct inpcb *);
 struct inpcb *
 	in_pcblookup_local(struct inpcbinfo *,
 	    struct in_addr, u_short, int, struct ucred *);
diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c
new file mode 100644
index 000000000000..c9f5c7083136
--- /dev/null
+++ b/sys/netinet/in_pcbgroup.c
@@ -0,0 +1,457 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/mbuf.h>
+#include <sys/mutex.h>
+#include <sys/smp.h>
+#include <sys/socketvar.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's
+ * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization
+ * Strategies in Modern Operating Systems".  This implementation differs
+ * significantly from that described in the paper, in that it attempts to
+ * introduce not just notions of affinity for connections and distribute work
+ * so as to reduce lock contention, but also align those notions with
+ * hardware work distribution strategies such as RSS.  In this construction,
+ * connection groups supplement, rather than replace, existing reservation
+ * tables for protocol 4-tuples, offering CPU-affine lookup tables with
+ * minimal cache line migration and lock contention during steady state
+ * operation.
+ *
+ * Internet protocols, such as UDP and TCP, register to use connection groups
+ * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this
+ * indicates to the connection group code whether a 2-tuple or 4-tuple is
+ * used as an argument to hashes that assign a connection to a particular
+ * group.  This must be aligned with any hardware offloaded distribution
+ * model, such as RSS or similar approaches taken in embedded network boards.
+ * Wildcard sockets require special handling, as in Willman 2006, and are
+ * shared between connection groups -- while being protected by group-local
+ * locks.  This means that connection establishment and teardown can be
+ * signficantly more expensive than without connection groups, but that
+ * steady-state processing can be significantly faster.
+ *
+ * Most of the implementation of connection groups is in this file; however,
+ * connection group lookup is implemented in in_pcb.c alongside reservation
+ * table lookups -- see in_pcblookup_group().
+ *
+ * TODO:
+ *
+ * Implement dynamic rebalancing of buckets with connection groups; when
+ * load is unevenly distributed, search for more optimal balancing on
+ * demand.  This might require scaling up the number of connection groups
+ * by <<1.
+ *
+ * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection
+ * groups for ip_input and ip6_input, allowing non-offloaded work
+ * distribution.
+ *
+ * Expose effective CPU affinity of connections to userspace using socket
+ * options.
+ *
+ * Investigate per-connection affinity overrides based on socket options; an
+ * option could be set, certainly resulting in work being distributed
+ * differently in software, and possibly propagated to supporting hardware
+ * with TCAMs or hardware hash tables.  This might require connections to
+ * exist in more than one connection group at a time.
+ *
+ * Hook netisr thread reconfiguration events, and propagate those to RSS so
+ * that rebalancing can occur when the thread pool grows or shrinks.
+ *
+ * Expose per-pcbgroup statistics to userspace monitoring tools such as
+ * netstat, in order to allow better debugging and profiling.
+ */
+
+void
+in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields,
+    int hash_nelements)
+{
+	struct inpcbgroup *pcbgroup;
+	u_int numpcbgroups, pgn;
+
+	/*
+	 * Only enable connection groups for a protocol if it has been
+	 * specifically requested.
+	 */
+	if (hashfields == IPI_HASHFIELDS_NONE)
+		return;
+
+	/*
+	 * Connection groups are about multi-processor load distribution,
+	 * lock contention, and connection CPU affinity.  As such, no point
+	 * in turning them on for a uniprocessor machine, it only wastes
+	 * memory.
+	 */
+	if (mp_ncpus == 1)
+		return;
+
+	/*
+	 * Use one group per CPU for now.  If we decide to do dynamic
+	 * rebalancing a la RSS, we'll need to shift left by at least 1.
+	 */
+	numpcbgroups = mp_ncpus;
+
+	pcbinfo->ipi_hashfields = hashfields;
+	pcbinfo->ipi_pcbgroups = malloc(numpcbgroups *
+	    sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO);
+	pcbinfo->ipi_npcbgroups = numpcbgroups;
+	pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB,
+	    &pcbinfo->ipi_wildmask);
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+		pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB,
+		    &pcbgroup->ipg_hashmask);
+		INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup");
+
+		/*
+		 * Initialise notional affinity of the pcbgroup -- for RSS,
+		 * we want the same notion of affinity as NICs to be used.
+		 * Just round robin for the time being.
+		 */
+		pcbgroup->ipg_cpu = (pgn % mp_ncpus);
+	}
+}
+
+void
+in_pcbgroup_destroy(struct inpcbinfo *pcbinfo)
+{
+	struct inpcbgroup *pcbgroup;
+	u_int pgn;
+
+	if (pcbinfo->ipi_npcbgroups == 0)
+		return;
+
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) {
+		pcbgroup = &pcbinfo->ipi_pcbgroups[pgn];
+		KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead),
+		    ("in_pcbinfo_destroy: listhead not empty"));
+		INP_GROUP_LOCK_DESTROY(pcbgroup);
+		hashdestroy(pcbgroup->ipg_hashbase, M_PCB,
+		    pcbgroup->ipg_hashmask);
+	}
+	hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask);
+	free(pcbinfo->ipi_pcbgroups, M_PCB);
+	pcbinfo->ipi_pcbgroups = NULL;
+	pcbinfo->ipi_npcbgroups = 0;
+	pcbinfo->ipi_hashfields = 0;
+}
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+	return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+	return (NULL);
+}
+
+static struct inpcbgroup *
+in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+	return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+	    m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr,
+    u_short lport, struct in_addr faddr, u_short fport)
+{
+	uint32_t hash;
+
+	switch (pcbinfo->ipi_hashfields) {
+	case IPI_HASHFIELDS_4TUPLE:
+		hash = faddr.s_addr ^ fport;
+		break;
+
+	case IPI_HASHFIELDS_2TUPLE:
+		hash = faddr.s_addr ^ laddr.s_addr;
+		break;
+
+	default:
+		hash = 0;
+	}
+	return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo,
+	    hash)]);
+}
+
+struct inpcbgroup *
+in_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+	return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr,
+	    inp->inp_lport, inp->inp_faddr, inp->inp_fport));
+}
+
+static void
+in_pcbwild_add(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo;
+	struct inpcbhead *head;
+	u_int pgn;
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD),
+	    ("%s: is wild",__func__));
+
+	pcbinfo = inp->inp_pcbinfo;
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+	head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport,
+	    0, pcbinfo->ipi_wildmask)];
+	LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild);
+	inp->inp_flags2 |= INP_PCBGROUPWILD;
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+}
+
+static void
+in_pcbwild_remove(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo;
+	u_int pgn;
+
+	INP_WLOCK_ASSERT(inp);
+	KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD),
+	    ("%s: not wild", __func__));
+
+	pcbinfo = inp->inp_pcbinfo;
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+		INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+	LIST_REMOVE(inp, inp_pcbgroup_wild);
+	for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++)
+		INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]);
+	inp->inp_flags2 &= ~INP_PCBGROUPWILD;
+}
+
+static __inline int
+in_pcbwild_needed(struct inpcb *inp)
+{
+
+#ifdef INET6
+	if (inp->inp_vflag & INP_IPV6)
+		return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr));
+	else
+#endif
+		return (inp->inp_faddr.s_addr == htonl(INADDR_ANY));
+}
+
+static void
+in_pcbwild_update_internal(struct inpcb *inp)
+{
+	int wildcard_needed;
+
+	wildcard_needed = in_pcbwild_needed(inp);
+	if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD))
+		in_pcbwild_add(inp);
+	else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD))
+		in_pcbwild_remove(inp);
+}
+
+/*
+ * Update the pcbgroup of an inpcb, which might include removing an old
+ * pcbgroup reference and/or adding a new one.  Wildcard processing is not
+ * performed here, although ideally we'll never install a pcbgroup for a
+ * wildcard inpcb (asserted below).
+ */
+static void
+in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo,
+    struct inpcbgroup *newpcbgroup, struct inpcb *inp)
+{
+	struct inpcbgroup *oldpcbgroup;
+	struct inpcbhead *pcbhash;
+	uint32_t hashkey_faddr;
+
+	INP_WLOCK_ASSERT(inp);
+
+	oldpcbgroup = inp->inp_pcbgroup;
+	if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+		INP_GROUP_LOCK(oldpcbgroup);
+		LIST_REMOVE(inp, inp_pcbgrouphash);
+		inp->inp_pcbgroup = NULL;
+		INP_GROUP_UNLOCK(oldpcbgroup);
+	}
+	if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6)
+			hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */
+		else
+#endif
+			hashkey_faddr = inp->inp_faddr.s_addr;
+		INP_GROUP_LOCK(newpcbgroup);
+		pcbhash = &newpcbgroup->ipg_hashbase[
+		    INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport,
+		    newpcbgroup->ipg_hashmask)];
+		LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash);
+		inp->inp_pcbgroup = newpcbgroup;
+		INP_GROUP_UNLOCK(newpcbgroup);
+	}
+
+	KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)),
+	    ("%s: pcbgroup and wildcard!", __func__));
+}
+
+/*
+ * Two update paths: one in which the 4-tuple on an inpcb has been updated
+ * and therefore connection groups may need to change (or a wildcard entry
+ * may needed to be installed), and another in which the 4-tuple has been
+ * set as a result of a packet received, in which case we may be able to use
+ * the hash on the mbuf to avoid doing a software hash calculation for RSS.
+ *
+ * In each case: first, let the wildcard code have a go at placing it as a
+ * wildcard socket.  If it was a wildcard, or if the connection has been
+ * dropped, then no pcbgroup is required (so potentially clear it);
+ * otherwise, calculate and update the pcbgroup for the inpcb.
+ */
+void
+in_pcbgroup_update(struct inpcb *inp)
+{
+	struct inpcbinfo *pcbinfo;
+	struct inpcbgroup *newpcbgroup;
+
+	INP_WLOCK_ASSERT(inp);
+
+	pcbinfo = inp->inp_pcbinfo;
+	if (!in_pcbgroup_enabled(pcbinfo))
+		return;
+
+	in_pcbwild_update_internal(inp);
+	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+	    !(inp->inp_flags & INP_DROPPED)) {
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6)
+			newpcbgroup = in6_pcbgroup_byinpcb(inp);
+		else
+#endif
+			newpcbgroup = in_pcbgroup_byinpcb(inp);
+	} else
+		newpcbgroup = NULL;
+	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+void
+in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m)
+{
+	struct inpcbinfo *pcbinfo;
+	struct inpcbgroup *newpcbgroup;
+
+	INP_WLOCK_ASSERT(inp);
+
+	pcbinfo = inp->inp_pcbinfo;
+	if (!in_pcbgroup_enabled(pcbinfo))
+		return;
+
+	/*
+	 * Possibly should assert !INP_PCBGROUPWILD rather than testing for
+	 * it; presumably this function should never be called for anything
+	 * other than non-wildcard socket?
+	 */
+	in_pcbwild_update_internal(inp);
+	if (!(inp->inp_flags2 & INP_PCBGROUPWILD) &&
+	    !(inp->inp_flags & INP_DROPPED)) {
+		newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m);
+#ifdef INET6
+		if (inp->inp_vflag & INP_IPV6) {
+			if (newpcbgroup == NULL)
+				newpcbgroup = in6_pcbgroup_byinpcb(inp);
+		} else {
+#endif
+			if (newpcbgroup == NULL)
+				newpcbgroup = in_pcbgroup_byinpcb(inp);
+#ifdef INET6
+		}
+#endif
+	} else
+		newpcbgroup = NULL;
+	in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp);
+}
+
+/*
+ * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb.
+ */
+void
+in_pcbgroup_remove(struct inpcb *inp)
+{
+	struct inpcbgroup *pcbgroup;
+
+	INP_WLOCK_ASSERT(inp);
+
+	if (!in_pcbgroup_enabled(inp->inp_pcbinfo))
+		return;
+
+	if (inp->inp_flags2 & INP_PCBGROUPWILD)
+		in_pcbwild_remove(inp);
+
+	pcbgroup = inp->inp_pcbgroup;
+	if (pcbgroup != NULL) {
+		INP_GROUP_LOCK(pcbgroup);
+		LIST_REMOVE(inp, inp_pcbgrouphash);
+		inp->inp_pcbgroup = NULL;
+		INP_GROUP_UNLOCK(pcbgroup);
+	}
+}
+
+/*
+ * Query whether or not it is appropriate to use pcbgroups to look up inpcbs
+ * for a protocol.
+ */
+int
+in_pcbgroup_enabled(struct inpcbinfo *pcbinfo)
+{
+
+	return (pcbinfo->ipi_npcbgroups > 0);
+}
diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c
index 6f5bce7d4876..527ce5683344 100644
--- a/sys/netinet/ip_divert.c
+++ b/sys/netinet/ip_divert.c
@@ -153,7 +153,8 @@ div_init(void)
 	 * place for hashbase == NULL.
 	 */
 	in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb",
-	    div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE);
+	    div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE,
+	    IPI_HASHFIELDS_NONE);
 }
 
 static void
diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c
index ac1c723a0acc..67fcb743a9a5 100644
--- a/sys/netinet/ip_input.c
+++ b/sys/netinet/ip_input.c
@@ -488,7 +488,7 @@ ip_input(struct mbuf *m)
 	}
 #ifdef IPSEC
 	/*
-	 * Bypass packet filtering for packets from a tunnel (gif).
+	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (ip_ipsec_filtertunnel(m))
 		goto passin;
diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c
index 50a6ce44a490..a3c87f5c442a 100644
--- a/sys/netinet/ip_ipsec.c
+++ b/sys/netinet/ip_ipsec.c
@@ -95,7 +95,7 @@ ip_ipsec_filtertunnel(struct mbuf *m)
 #if defined(IPSEC)
 
 	/*
-	 * Bypass packet filtering for packets from a tunnel.
+	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (!V_ip4_ipsec_filtertunnel &&
 	    m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c
index b4d3abbe6959..49c48b9ad03a 100644
--- a/sys/netinet/ipfw/ip_fw2.c
+++ b/sys/netinet/ipfw/ip_fw2.c
@@ -692,6 +692,10 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif,
 	lookupflags |= INPLOOKUP_RLOCKPCB;
 	match = 0;
 	if (*ugid_lookupp == 0) {
+		/*
+		 * XXXRW: If we had the mbuf here, could use
+		 * in_pcblookup_mbuf().
+		 */
 		pcb =  (oif) ?
 			in_pcblookup(pi,
 				dst_ip, htons(dst_port),
diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c
index f8c3e63ec235..fd6f09afdf88 100644
--- a/sys/netinet/ipfw/ip_fw_nat.c
+++ b/sys/netinet/ipfw/ip_fw_nat.c
@@ -262,17 +262,27 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m)
 	else
 		retval = LibAliasOut(t->lib, c,
 			mcl->m_len + M_TRAILINGSPACE(mcl));
-	if (retval == PKT_ALIAS_RESPOND) {
-		m->m_flags |= M_SKIP_FIREWALL;
-		retval = PKT_ALIAS_OK;
-	}
-	if (retval != PKT_ALIAS_OK &&
-	    retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) {
+
+	/*
+	 * We drop packet when:
+	 * 1. libalias returns PKT_ALIAS_ERROR;
+	 * 2. For incoming packets:
+	 *	a) for unresolved fragments;
+	 *	b) libalias returns PKT_ALIAS_IGNORED and
+	 *		PKT_ALIAS_DENY_INCOMING flag is set.
+	 */
+	if (retval == PKT_ALIAS_ERROR ||
+	    (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT ||
+	    (retval == PKT_ALIAS_IGNORED &&
+	    (t->lib->packetAliasMode & PKT_ALIAS_DENY_INCOMING) != 0)))) {
 		/* XXX - should i add some logging? */
 		m_free(mcl);
 		args->m = NULL;
 		return (IP_FW_DENY);
 	}
+
+	if (retval == PKT_ALIAS_RESPOND)
+		m->m_flags |= M_SKIP_FIREWALL;
 	mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len);
 
 	/*
diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c
index f81d57d52869..2347456a8c22 100644
--- a/sys/netinet/ipfw/ip_fw_sockopt.c
+++ b/sys/netinet/ipfw/ip_fw_sockopt.c
@@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg)
 		}
 
 		if (n == 0) {
-			/* A flush request (arg == 0) on empty ruleset
-			 * returns with no error. On the contrary,
+			/* A flush request (arg == 0 or cmd == 1) on empty
+			 * ruleset returns with no error. On the contrary,
 			 * if there is no match on a specific request,
 			 * we return EINVAL.
 			 */
-			error = (arg == 0) ? 0 : EINVAL;
+			if (arg != 0 && cmd != 1)
+				error = EINVAL;
 			break;
 		}
 
diff --git a/sys/netinet/libalias/alias_sctp.h b/sys/netinet/libalias/alias_sctp.h
index 80ed96568d4d..99d54cebedb0 100644
--- a/sys/netinet/libalias/alias_sctp.h
+++ b/sys/netinet/libalias/alias_sctp.h
@@ -135,13 +135,13 @@ struct sctp_nat_assoc {
 	struct in_addr a_addr;	/**< alias ip address */
 	int state;			/**< current state of NAT association */
 	int TableRegister;		/**< stores which look up tables association is registered in */
-	int	exp;			/**< timer expiration in seconds from uptime */
+	int exp;			/**< timer expiration in seconds from uptime */
 	int exp_loc;			/**< current location in timer_Q */
 	int num_Gaddr;		/**< number of global IP addresses in the list */
 	LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */
-							    LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
-											LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
-														    LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
+	LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/
+	LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */
+	LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */
 //Using libalias locking
 };
 
diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c
index 635f08f3146a..e754b8850382 100644
--- a/sys/netinet/raw_ip.c
+++ b/sys/netinet/raw_ip.c
@@ -205,7 +205,8 @@ rip_init(void)
 {
 
 	in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE,
-	    1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE);
+	    1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE,
+	    IPI_HASHFIELDS_NONE);
 	EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL,
 	    EVENTHANDLER_PRI_ANY);
 }
diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c
index 06854ec4f141..6ed589118d8b 100644
--- a/sys/netinet/tcp_subr.c
+++ b/sys/netinet/tcp_subr.c
@@ -300,7 +300,8 @@ tcp_init(void)
 		hashsize = 512; /* safe default */
 	}
 	in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize,
-	    "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+	    "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+	    IPI_HASHFIELDS_4TUPLE);
 
 	/*
 	 * These have to be type stable for the benefit of the timers.
diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c
index 5125134363c0..66e473262035 100644
--- a/sys/netinet/tcp_syncache.c
+++ b/sys/netinet/tcp_syncache.c
@@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -676,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m)
 #ifdef INET6
 	}
 #endif
+
+	/*
+	 * Install in the reservation hash table for now, but don't yet
+	 * install a connection group since the full 4-tuple isn't yet
+	 * configured.
+	 */
 	inp->inp_lport = sc->sc_inc.inc_lport;
-	if ((error = in_pcbinshash(inp)) != 0) {
+	if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) {
 		/*
 		 * Undo the assignments above if we failed to
 		 * put the PCB on the hash lists.
diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c
index fd864c097bcb..28eb8fd19f88 100644
--- a/sys/netinet/udp_usrreq.c
+++ b/sys/netinet/udp_usrreq.c
@@ -186,7 +186,8 @@ udp_init(void)
 {
 
 	in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE,
-	    "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE);
+	    "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE,
+	    IPI_HASHFIELDS_2TUPLE);
 	V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb),
 	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
 	uma_zone_set_max(V_udpcb_zone, maxsockets);
diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c
index da73f219d71e..d15c605b368e 100644
--- a/sys/netinet6/in6_pcb.c
+++ b/sys/netinet6/in6_pcb.c
@@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_inet.h"
 #include "opt_inet6.h"
 #include "opt_ipsec.h"
+#include "opt_pcbgroup.h"
 
 #include <sys/param.h>
 #include <sys/systm.h>
@@ -827,6 +828,141 @@ in6_rtchange(struct inpcb *inp, int errno)
 	return inp;
 }
 
+#ifdef PCBGROUP
+/*
+ * Lookup PCB in hash list, using pcbgroup tables.
+ */
+static struct inpcb *
+in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup,
+    struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr,
+    u_int lport_arg, int lookupflags, struct ifnet *ifp)
+{
+	struct inpcbhead *head;
+	struct inpcb *inp, *tmpinp;
+	u_short fport = fport_arg, lport = lport_arg;
+	int faith;
+
+	if (faithprefix_p != NULL)
+		faith = (*faithprefix_p)(laddr);
+	else
+		faith = 0;
+
+	/*
+	 * First look for an exact match.
+	 */
+	tmpinp = NULL;
+	INP_GROUP_LOCK(pcbgroup);
+	head = &pcbgroup->ipg_hashbase[
+	    INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport,
+	    pcbgroup->ipg_hashmask)];
+	LIST_FOREACH(inp, head, inp_pcbgrouphash) {
+		/* XXX inp locking */
+		if ((inp->inp_vflag & INP_IPV6) == 0)
+			continue;
+		if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) &&
+		    IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) &&
+		    inp->inp_fport == fport &&
+		    inp->inp_lport == lport) {
+			/*
+			 * XXX We should be able to directly return
+			 * the inp here, without any checks.
+			 * Well unless both bound with SO_REUSEPORT?
+			 */
+			if (prison_flag(inp->inp_cred, PR_IP6))
+				goto found;
+			if (tmpinp == NULL)
+				tmpinp = inp;
+		}
+	}
+	if (tmpinp != NULL) {
+		inp = tmpinp;
+		goto found;
+	}
+
+	/*
+	 * Then look for a wildcard match, if requested.
+	 */
+	if ((lookupflags & INPLOOKUP_WILDCARD) != 0) {
+		struct inpcb *local_wild = NULL, *local_exact = NULL;
+		struct inpcb *jail_wild = NULL;
+		int injail;
+
+		/*
+		 * Order of socket selection - we always prefer jails.
+		 *      1. jailed, non-wild.
+		 *      2. jailed, wild.
+		 *      3. non-jailed, non-wild.
+		 *      4. non-jailed, wild.
+		 */
+		head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport,
+		    0, pcbinfo->ipi_wildmask)];
+		LIST_FOREACH(inp, head, inp_pcbgroup_wild) {
+			/* XXX inp locking */
+			if ((inp->inp_vflag & INP_IPV6) == 0)
+				continue;
+
+			if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) ||
+			    inp->inp_lport != lport) {
+				continue;
+			}
+
+			/* XXX inp locking */
+			if (faith && (inp->inp_flags & INP_FAITH) == 0)
+				continue;
+
+			injail = prison_flag(inp->inp_cred, PR_IP6);
+			if (injail) {
+				if (prison_check_ip6(inp->inp_cred,
+				    laddr) != 0)
+					continue;
+			} else {
+				if (local_exact != NULL)
+					continue;
+			}
+
+			if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) {
+				if (injail)
+					goto found;
+				else
+					local_exact = inp;
+			} else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) {
+				if (injail)
+					jail_wild = inp;
+				else
+					local_wild = inp;
+			}
+		} /* LIST_FOREACH */
+
+		inp = jail_wild;
+		if (inp == NULL)
+			inp = jail_wild;
+		if (inp == NULL)
+			inp = local_exact;
+		if (inp == NULL)
+			inp = local_wild;
+		if (inp != NULL)
+			goto found;
+	} /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */
+	INP_GROUP_UNLOCK(pcbgroup);
+	return (NULL);
+
+found:
+	in_pcbref(inp);
+	INP_GROUP_UNLOCK(pcbgroup);
+	if (lookupflags & INPLOOKUP_WLOCKPCB) {
+		INP_WLOCK(inp);
+		if (in_pcbrele_wlocked(inp))
+			return (NULL);
+	} else if (lookupflags & INPLOOKUP_RLOCKPCB) {
+		INP_RLOCK(inp);
+		if (in_pcbrele_rlocked(inp))
+			return (NULL);
+	} else
+		panic("%s: locking buf", __func__);
+	return (inp);
+}
+#endif /* PCBGROUP */
+
 /*
  * Lookup PCB in hash list.
  */
@@ -983,16 +1119,30 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
 /*
  * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf
  * from which a pre-calculated hash value may be extracted.
+ *
+ * Possibly more of this logic should be in in6_pcbgroup.c.
  */
 struct inpcb *
 in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport,
     struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp)
 {
+#if defined(PCBGROUP)
+	struct inpcbgroup *pcbgroup;
+#endif
+
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
+#if defined(PCBGROUP)
+	if (in_pcbgroup_enabled(pcbinfo)) {
+		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+		    fport);
+		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+		    laddr, lport, lookupflags, ifp));
+	}
+#endif
 	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
@@ -1002,11 +1152,28 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr,
     u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags,
     struct ifnet *ifp, struct mbuf *m)
 {
+#ifdef PCBGROUP
+	struct inpcbgroup *pcbgroup;
+#endif
+
 	KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0,
 	    ("%s: invalid lookup flags %d", __func__, lookupflags));
 	KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0,
 	    ("%s: LOCKPCB not set", __func__));
 
+#ifdef PCBGROUP
+	if (in_pcbgroup_enabled(pcbinfo)) {
+		pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+		    m->m_pkthdr.flowid);
+		if (pcbgroup != NULL)
+			return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr,
+			    fport, laddr, lport, lookupflags, ifp));
+		pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr,
+		    fport);
+		return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport,
+		    laddr, lport, lookupflags, ifp));
+	}
+#endif
 	return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport,
 	    lookupflags, ifp));
 }
diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h
index cf247043b507..8398d547dd6b 100644
--- a/sys/netinet6/in6_pcb.h
+++ b/sys/netinet6/in6_pcb.h
@@ -69,6 +69,16 @@
 #define	sin6tosa(sin6)	((struct sockaddr *)(sin6))
 #define	ifatoia6(ifa)	((struct in6_ifaddr *)(ifa))
 
+struct	inpcbgroup *
+	in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t);
+struct	inpcbgroup *
+	in6_pcbgroup_byinpcb __P((struct inpcb *));
+struct inpcbgroup *
+	in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *);
+struct	inpcbgroup *
+	in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *,
+	    u_short, const struct in6_addr *, u_short));
+
 void	in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *));
 void	in6_losing __P((struct inpcb *));
 int	in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *));
diff --git a/sys/netinet6/in6_pcbgroup.c b/sys/netinet6/in6_pcbgroup.c
new file mode 100644
index 000000000000..850d7f471560
--- /dev/null
+++ b/sys/netinet6/in6_pcbgroup.c
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 2010-2011 Juniper Networks, Inc.
+ * All rights reserved.
+ *
+ * This software was developed by Robert N. M. Watson under contract
+ * to Juniper Networks, Inc.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+
+__FBSDID("$FreeBSD$");
+
+#include "opt_inet6.h"
+
+#include <sys/param.h>
+#include <sys/mbuf.h>
+
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#ifdef INET6
+#include <netinet6/in6_pcb.h>
+#endif /* INET6 */
+
+/*
+ * Given a hash of whatever the covered tuple might be, return a pcbgroup
+ * index.
+ */
+static __inline u_int
+in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash)
+{
+
+	return (hash % pcbinfo->ipi_npcbgroups);
+}
+
+/*
+ * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash 
+ * information is insufficient to identify the pcbgroup.
+ */
+struct inpcbgroup *
+in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash)
+{
+
+	return (NULL);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m)
+{
+
+	return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m),
+	    m->m_pkthdr.flowid));
+}
+
+struct inpcbgroup *
+in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp,
+    u_short lport, const struct in6_addr *faddrp, u_short fport)
+{
+	uint32_t hash;
+
+	switch (pcbinfo->ipi_hashfields) {
+	case IPI_HASHFIELDS_4TUPLE:
+		hash = faddrp->s6_addr32[3] ^ fport;
+		break;
+
+	case IPI_HASHFIELDS_2TUPLE:
+		hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3];
+		break;
+
+	default:
+		hash = 0;
+	}
+	return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo,
+	    hash)]);
+}
+
+struct inpcbgroup *
+in6_pcbgroup_byinpcb(struct inpcb *inp)
+{
+
+	return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr,
+	    inp->inp_lport, &inp->in6p_faddr, inp->inp_fport));
+}
diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c
index 8731e1261f99..bbbc9c99b1ec 100644
--- a/sys/netinet6/ip6_ipsec.c
+++ b/sys/netinet6/ip6_ipsec.c
@@ -97,7 +97,7 @@ SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO,
 
 /*
  * Check if we have to jump over firewall processing for this packet.
- * Called from ip_input().
+ * Called from ip6_input().
  * 1 = jump over firewall, 0 = packet goes through firewall.
  */
 int
@@ -106,7 +106,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m)
 #if defined(IPSEC)
 
 	/*
-	 * Bypass packet filtering for packets from a tunnel.
+	 * Bypass packet filtering for packets previously handled by IPsec.
 	 */
 	if (!V_ip6_ipsec6_filtertunnel &&
 	    m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL)
@@ -118,7 +118,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m)
 /*
  * Check if this packet has an active SA and needs to be dropped instead
  * of forwarded.
- * Called from ip_input().
+ * Called from ip6_input().
  * 1 = drop packet, 0 = forward packet.
  */
 int
@@ -141,7 +141,7 @@ ip6_ipsec_fwd(struct mbuf *m)
 	if (sp == NULL) {	/* NB: can happen if error */
 		splx(s);
 		/*XXX error stat???*/
-		DPRINTF(("ip_input: no SP for forwarding\n"));	/*XXX*/
+		DPRINTF(("%s: no SP for forwarding\n", __func__));	/*XXX*/
 		return 1;
 	}
 
@@ -163,7 +163,7 @@ ip6_ipsec_fwd(struct mbuf *m)
  * Check if protocol type doesn't have a further header and do IPSEC
  * decryption or reject right now.  Protocols with further headers get
  * their IPSEC treatment within the protocol specific processing.
- * Called from ip_input().
+ * Called from ip6_input().
  * 1 = drop packet, 0 = continue processing packet.
  */
 int
@@ -206,7 +206,7 @@ ip6_ipsec_input(struct mbuf *m, int nxt)
 		} else {
 			/* XXX error stat??? */
 			error = EINVAL;
-			DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/
+			DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/
 			return 1;
 		}
 		splx(s);
diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h
index f6f9404307c8..61b42d242a43 100644
--- a/sys/ofed/include/linux/list.h
+++ b/sys/ofed/include/linux/list.h
@@ -38,6 +38,7 @@
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c
index 853ac69911b2..8bcb618759ac 100644
--- a/sys/pc98/pc98/machdep.c
+++ b/sys/pc98/pc98/machdep.c
@@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_isa.h"
 #include "opt_kstack_pages.h"
 #include "opt_maxmem.h"
+#include "opt_mp_watchdog.h"
 #include "opt_npx.h"
 #include "opt_perfmon.h"
 
@@ -115,6 +116,7 @@ __FBSDID("$FreeBSD$");
 #include <machine/intr_machdep.h>
 #include <x86/mca.h>
 #include <machine/md_var.h>
+#include <machine/mp_watchdog.h>
 #include <machine/pc/bios.h>
 #include <machine/pcb.h>
 #include <machine/pcb_ext.h>
@@ -1193,9 +1195,8 @@ cpu_idle(int busy)
 
 	CTR2(KTR_SPARE2, "cpu_idle(%d) at %d",
 	    busy, curcpu);
-#ifdef SMP
-	if (mp_grab_cpu_hlt())
-		return;
+#ifdef MP_WATCHDOG
+	ap_watchdog(PCPU_GET(cpuid));
 #endif
 	/* If we are busy - try to use fast methods. */
 	if (busy) {
diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c
index 51c6f8a0dc90..be80455dab6c 100644
--- a/sys/powerpc/aim/mmu_oea.c
+++ b/sys/powerpc/aim/mmu_oea.c
@@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
@@ -820,7 +823,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 	PMAP_LOCK_INIT(kernel_pmap);
 	for (i = 0; i < 16; i++)
 		kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i;
-	kernel_pmap->pm_active = ~0;
+	CPU_FILL(&kernel_pmap->pm_active);
 
 	/*
 	 * Set up the Open Firmware mappings
@@ -942,7 +945,9 @@ moea_activate(mmu_t mmu, struct thread *td)
 	pm = &td->td_proc->p_vmspace->vm_pmap;
 	pmr = pm->pmap_phys;
 
-	pm->pm_active |= PCPU_GET(cpumask);
+	sched_pin();
+	CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
+	sched_unpin();
 	PCPU_SET(curpmap, pmr);
 }
 
@@ -952,7 +957,9 @@ moea_deactivate(mmu_t mmu, struct thread *td)
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
-	pm->pm_active &= ~PCPU_GET(cpumask);
+	sched_pin();
+	CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask));
+	sched_unpin();
 	PCPU_SET(curpmap, NULL);
 }
 
diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c
index 12a120162ecd..291d89b3e760 100644
--- a/sys/powerpc/aim/mmu_oea64.c
+++ b/sys/powerpc/aim/mmu_oea64.c
@@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$");
 
 #include <sys/param.h>
 #include <sys/kernel.h>
+#include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
 #include <sys/msgbuf.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/sched.h>
 #include <sys/sysctl.h>
 #include <sys/systm.h>
 #include <sys/vmmeter.h>
@@ -827,7 +830,7 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend)
 	#endif
 
 	kernel_pmap->pmap_phys = kernel_pmap;
-	kernel_pmap->pm_active = ~0;
+	CPU_FILL(&kernel_pmap->pm_active);
 
 	PMAP_LOCK_INIT(kernel_pmap);
 
@@ -995,7 +998,9 @@ moea64_activate(mmu_t mmu, struct thread *td)
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
-	pm->pm_active |= PCPU_GET(cpumask);
+	sched_pin();
+	CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
+	sched_unpin();
 
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, pm->pm_slb);
@@ -1010,7 +1015,9 @@ moea64_deactivate(mmu_t mmu, struct thread *td)
 	pmap_t	pm;
 
 	pm = &td->td_proc->p_vmspace->vm_pmap;
-	pm->pm_active &= ~(PCPU_GET(cpumask));
+	sched_pin();
+	CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask));
+	sched_unpin();
 	#ifdef __powerpc64__
 	PCPU_SET(userslb, NULL);
 	#else
diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c
index 90c73e07f01e..d76664e7f925 100644
--- a/sys/powerpc/booke/platform_bare.c
+++ b/sys/powerpc/booke/platform_bare.c
@@ -256,7 +256,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc)
 	int timeout;
 
 	eebpcr = ccsr_read4(OCP85XX_EEBPCR);
-	if ((eebpcr & (pc->pc_cpumask << 24)) != 0) {
+	if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) {
 		printf("%s: CPU=%d already out of hold-off state!\n",
 		    __func__, pc->pc_cpuid);
 		return (ENXIO);
@@ -274,7 +274,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc)
 	/*
 	 * Release AP from hold-off state
 	 */
-	eebpcr |= (pc->pc_cpumask << 24);
+	eebpcr |= (1 << (pc->pc_cpuid + 24));
 	ccsr_write4(OCP85XX_EEBPCR, eebpcr);
 	__asm __volatile("isync; msync");
 
diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c
index cabe58fef611..e1cd071db40d 100644
--- a/sys/powerpc/booke/pmap.c
+++ b/sys/powerpc/booke/pmap.c
@@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/msgbuf.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
+#include <sys/sched.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
 
@@ -1225,7 +1226,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend)
 		    PTE_VALID;
 	}
 	/* Mark kernel_pmap active on all CPUs */
-	kernel_pmap->pm_active = ~0;
+	CPU_FILL(&kernel_pmap->pm_active);
 
 	/*******************************************************/
 	/* Final setup */
@@ -1480,7 +1481,7 @@ mmu_booke_pinit(mmu_t mmu, pmap_t pmap)
 	PMAP_LOCK_INIT(pmap);
 	for (i = 0; i < MAXCPU; i++)
 		pmap->pm_tid[i] = TID_NONE;
-	pmap->pm_active = 0;
+	CPU_ZERO(&kernel_pmap->pm_active);
 	bzero(&pmap->pm_stats, sizeof(pmap->pm_stats));
 	bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES);
 	TAILQ_INIT(&pmap->pm_ptbl_list);
@@ -1835,7 +1836,7 @@ mmu_booke_activate(mmu_t mmu, struct thread *td)
 
 	mtx_lock_spin(&sched_lock);
 
-	atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask));
+	CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
 	PCPU_SET(curpmap, pmap);
 	
 	if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE)
@@ -1864,7 +1865,9 @@ mmu_booke_deactivate(mmu_t mmu, struct thread *td)
 	CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x",
 	    __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap);
 
-	atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask));
+	sched_pin();
+	CPU_NAND_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask));
+	sched_unpin();
 	PCPU_SET(curpmap, NULL);
 }
 
diff --git a/sys/powerpc/include/_types.h b/sys/powerpc/include/_types.h
index fae241676ad3..b0b582e7080b 100644
--- a/sys/powerpc/include/_types.h
+++ b/sys/powerpc/include/_types.h
@@ -72,7 +72,6 @@ typedef	unsigned long long	__uint64_t;
  * Standard type definitions.
  */
 typedef	__uint32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	double		__double_t;
 typedef	double		__float_t;
 #ifdef __LP64__
diff --git a/sys/powerpc/include/openpicvar.h b/sys/powerpc/include/openpicvar.h
index 4fb9aa73bba4..605dc0f8daa3 100644
--- a/sys/powerpc/include/openpicvar.h
+++ b/sys/powerpc/include/openpicvar.h
@@ -57,7 +57,7 @@ int	openpic_common_attach(device_t, uint32_t);
 /*
  * PIC interface.
  */
-void	openpic_bind(device_t dev, u_int irq, cpumask_t cpumask);
+void	openpic_bind(device_t dev, u_int irq, cpuset_t cpumask);
 void	openpic_config(device_t, u_int, enum intr_trigger, enum intr_polarity);
 void	openpic_dispatch(device_t, struct trapframe *);
 void	openpic_enable(device_t, u_int, u_int);
diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h
index 369ca9d97d60..9166d04ba7d9 100644
--- a/sys/powerpc/include/pmap.h
+++ b/sys/powerpc/include/pmap.h
@@ -66,6 +66,7 @@
 
 #include <sys/queue.h>
 #include <sys/tree.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <machine/sr.h>
@@ -98,7 +99,7 @@ struct	pmap {
     #else
 	register_t	pm_sr[16];
     #endif
-	cpumask_t	pm_active;
+	cpuset_t	pm_active;
 
 	struct pmap	*pmap_phys;
 	struct		pmap_statistics	pm_stats;
@@ -175,7 +176,7 @@ void	slb_free_user_cache(struct slb **);
 struct pmap {
 	struct mtx		pm_mtx;		/* pmap mutex */
 	tlbtid_t		pm_tid[MAXCPU];	/* TID to identify this pmap entries in TLB */
-	cpumask_t		pm_active;	/* active on cpus */
+	cpuset_t		pm_active;	/* active on cpus */
 	struct pmap_statistics	pm_stats;	/* pmap statistics */
 
 	/* Page table directory, array of pointers to page tables. */
diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h
index cf952788c680..32fcfb4b8f60 100644
--- a/sys/powerpc/include/smp.h
+++ b/sys/powerpc/include/smp.h
@@ -40,9 +40,11 @@
 
 #ifndef LOCORE
 
+#include <sys/_cpuset.h>
+
 void	ipi_all_but_self(int ipi);
 void	ipi_cpu(int cpu, u_int ipi);
-void	ipi_selected(cpumask_t cpus, int ipi);
+void	ipi_selected(cpuset_t cpus, int ipi);
 
 struct cpuref {
 	uintptr_t	cr_hwref;
diff --git a/sys/powerpc/mpc85xx/openpic_fdt.c b/sys/powerpc/mpc85xx/openpic_fdt.c
index 7cf18eaaff90..1cd936956281 100644
--- a/sys/powerpc/mpc85xx/openpic_fdt.c
+++ b/sys/powerpc/mpc85xx/openpic_fdt.c
@@ -37,11 +37,12 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/bus.h>
 #include <machine/intr_machdep.h>
-#include <machine/openpicvar.h>
 
 #include <dev/ofw/ofw_bus.h>
 #include <dev/ofw/ofw_bus_subr.h>
 
+#include <machine/openpicvar.h>
+
 #include "pic_if.h"
 
 static int openpic_fdt_probe(device_t);
diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c
index f2bfa33f2615..1e6342cb754a 100644
--- a/sys/powerpc/powerpc/intr_machdep.c
+++ b/sys/powerpc/powerpc/intr_machdep.c
@@ -67,6 +67,7 @@
 #include <sys/kernel.h>
 #include <sys/queue.h>
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 #include <sys/interrupt.h>
 #include <sys/ktr.h>
 #include <sys/lock.h>
@@ -98,7 +99,7 @@ struct powerpc_intr {
 	u_int	intline;
 	u_int	vector;
 	u_int	cntindex;
-	cpumask_t cpu;
+	cpuset_t cpu;
 	enum intr_trigger trig;
 	enum intr_polarity pol;
 };
@@ -205,7 +206,7 @@ intr_lookup(u_int irq)
 #ifdef SMP
 	i->cpu = all_cpus;
 #else
-	i->cpu = 1;
+	CPU_SETOF(0, &i->cpu);
 #endif
 
 	for (vector = 0; vector < INTR_VECTORS && vector <= nvectors;
@@ -296,7 +297,7 @@ powerpc_assign_intr_cpu(void *arg, u_char cpu)
 	if (cpu == NOCPU)
 		i->cpu = all_cpus;
 	else
-		i->cpu = 1 << cpu;
+		CPU_SETOF(cpu, &i->cpu);
 
 	if (!cold && i->pic != NULL && i->pic == root_pic)
 		PIC_BIND(i->pic, i->intline, i->cpu);
diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c
index 577d4dcbe153..62a97e925050 100644
--- a/sys/powerpc/powerpc/mp_machdep.c
+++ b/sys/powerpc/powerpc/mp_machdep.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/pcpu.h>
@@ -157,7 +158,7 @@ cpu_mp_start(void)
 			    cpu.cr_cpuid);
 			goto next;
 		}
-		if (all_cpus & (1 << cpu.cr_cpuid)) {
+		if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) {
 			printf("SMP: cpu%d: skipped - duplicate ID\n",
 			    cpu.cr_cpuid);
 			goto next;
@@ -174,9 +175,9 @@ cpu_mp_start(void)
 			pc->pc_cpuid = bsp.cr_cpuid;
 			pc->pc_bsp = 1;
 		}
-		pc->pc_cpumask = 1 << pc->pc_cpuid;
+		CPU_SETOF(pc->pc_cpuid, &pc->pc_cpumask);
 		pc->pc_hwref = cpu.cr_hwref;
-		all_cpus |= pc->pc_cpumask;
+		CPU_OR(&all_cpus, &pc->pc_cpumask);
 next:
 		error = platform_smp_next_cpu(&cpu);
 	}
@@ -214,7 +215,8 @@ cpu_mp_unleash(void *dummy)
 	smp_cpus = 0;
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
 		cpus++;
-		pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask;
+		pc->pc_other_cpus = all_cpus;
+		CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask);
 		if (!pc->pc_bsp) {
 			if (bootverbose)
 				printf("Waking up CPU %d (dev=%x)\n",
@@ -236,7 +238,7 @@ cpu_mp_unleash(void *dummy)
 				    pc->pc_cpuid, pc->pc_pir, pc->pc_awake);
 			smp_cpus++;
 		} else
-			stopped_cpus |= (1 << pc->pc_cpuid);
+			CPU_SET(pc->pc_cpuid, &stopped_cpus);
 	}
 
 	ap_awake = 1;
@@ -276,7 +278,7 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL);
 int
 powerpc_ipi_handler(void *arg)
 {
-	cpumask_t self;
+	cpuset_t self;
 	uint32_t ipimask;
 	int msg;
 
@@ -311,11 +313,11 @@ powerpc_ipi_handler(void *arg)
 			savectx(&stoppcbs[PCPU_GET(cpuid)]);
 			self = PCPU_GET(cpumask);
 			savectx(PCPU_GET(curpcb));
-			atomic_set_int(&stopped_cpus, self);
-			while ((started_cpus & self) == 0)
+			CPU_OR_ATOMIC(&stopped_cpus, &self);
+			while (!CPU_OVERLAP(&started_cpus, &self))
 				cpu_spinwait();
-			atomic_clear_int(&started_cpus, self);
-			atomic_clear_int(&stopped_cpus, self);
+			CPU_NAND_ATOMIC(&started_cpus, &self);
+			CPU_NAND_ATOMIC(&stopped_cpus, &self);
 			CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__);
 			break;
 		case IPI_HARDCLOCK:
@@ -343,12 +345,12 @@ ipi_send(struct pcpu *pc, int ipi)
 
 /* Send an IPI to a set of cpus. */
 void
-ipi_selected(cpumask_t cpus, int ipi)
+ipi_selected(cpuset_t cpus, int ipi)
 {
 	struct pcpu *pc;
 
 	STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) {
-		if (cpus & pc->pc_cpumask)
+		if (CPU_OVERLAP(&cpus, &pc->pc_cpumask))
 			ipi_send(pc, ipi);
 	}
 }
diff --git a/sys/powerpc/powerpc/openpic.c b/sys/powerpc/powerpc/openpic.c
index 042f8b846fa1..347dc3fb4eeb 100644
--- a/sys/powerpc/powerpc/openpic.c
+++ b/sys/powerpc/powerpc/openpic.c
@@ -231,7 +231,7 @@ openpic_common_attach(device_t dev, uint32_t node)
  */
 
 void
-openpic_bind(device_t dev, u_int irq, cpumask_t cpumask)
+openpic_bind(device_t dev, u_int irq, cpuset_t cpumask)
 {
 	struct openpic_softc *sc;
 
@@ -240,7 +240,12 @@ openpic_bind(device_t dev, u_int irq, cpumask_t cpumask)
 		return;
 
 	sc = device_get_softc(dev);
-	openpic_write(sc, OPENPIC_IDEST(irq), cpumask);
+
+	/*
+	 * XXX: openpic_write() is very special and just needs a 32 bits mask.
+	 * For the moment, just play dirty and get the first half word.
+	 */
+	openpic_write(sc, OPENPIC_IDEST(irq), cpumask.__bits[0] & 0xffffffff);
 }
 
 void
diff --git a/sys/powerpc/powerpc/pic_if.m b/sys/powerpc/powerpc/pic_if.m
index 185cc0887796..e429d31f5038 100644
--- a/sys/powerpc/powerpc/pic_if.m
+++ b/sys/powerpc/powerpc/pic_if.m
@@ -28,6 +28,7 @@
 #
 
 #include <sys/bus.h>
+#include <sys/cpuset.h>
 #include <machine/frame.h>
 
 INTERFACE pic;
@@ -35,7 +36,7 @@ INTERFACE pic;
 METHOD void bind {
 	device_t	dev;
 	u_int		irq;
-	cpumask_t	cpumask;
+	cpuset_t	cpumask;
 };
 
 METHOD void config {
diff --git a/sys/sparc64/include/_types.h b/sys/sparc64/include/_types.h
index f810c159a6ef..7e993c4c370a 100644
--- a/sys/sparc64/include/_types.h
+++ b/sys/sparc64/include/_types.h
@@ -55,7 +55,6 @@ typedef	unsigned long		__uint64_t;
  * Standard type definitions.
  */
 typedef	__int32_t	__clock_t;		/* clock()... */
-typedef	unsigned int	__cpumask_t;
 typedef	__int64_t	__critical_t;
 typedef	double		__double_t;
 typedef	float		__float_t;
diff --git a/sys/sparc64/include/ktr.h b/sys/sparc64/include/ktr.h
index 5948ba29fb3e..f13865f1a27e 100644
--- a/sys/sparc64/include/ktr.h
+++ b/sys/sparc64/include/ktr.h
@@ -40,16 +40,6 @@
 
 #else
 
-#define	AND(var, mask, r1, r2) \
-	SET(var, r2, r1) ; \
-	lduw	[r1], r2 ; \
-	and	r2, mask, r1
-
-#define	TEST(var, mask, r1, r2, l1) \
-	AND(var, mask, r1, r2) ; \
-	brz	r1, l1 ## f ; \
-	 nop
-
 /*
  * XXX could really use another register...
  */
@@ -79,13 +69,37 @@ l2:	add	r2, 1, r3 ; \
 	SET(l1 ## b, r3, r2) ; \
 	stx	r2, [r1 + KTR_DESC]
 
+/*
+ * NB: this clobbers %y.
+ */
 #define CATR(mask, desc, r1, r2, r3, l1, l2, l3) \
 	set	mask, r1 ; \
-	TEST(ktr_mask, r1, r2, r2, l3) ; \
-	lduw	[PCPU(MID)], r1 ; \
+	SET(ktr_mask, r3, r2) ; \
+	lduw	[r2], r2 ; \
+	and	r2, r1, r1 ; \
+	brz	r1, l3 ## f ; \
+	 nop ; \
+	lduw	[PCPU(CPUID)], r2 ; \
+	mov	_NCPUBITS, r3 ; \
+	mov	%g0, %y ; \
+	udiv	r2, r3, r2 ; \
+	srl	r2, 0, r2 ; \
+	sllx	r2, PTR_SHIFT, r2 ; \
+	SET(ktr_cpumask, r3, r1) ; \
+	ldx	[r1 + r2], r1 ; \
+	lduw	[PCPU(CPUID)], r2 ; \
+	mov	_NCPUBITS, r3 ; \
+	mov	%g0, %y ; \
+	udiv	r2, r3, r2 ; \
+	srl	r2, 0, r2 ; \
+	smul	r2, r3, r3 ; \
+	lduw	[PCPU(CPUID)], r2 ; \
+	sub	r2, r3, r3 ; \
 	mov	1, r2 ; \
-	sllx	r2, r1, r1 ; \
-	TEST(ktr_cpumask, r1, r2, r3, l3) ; \
+	sllx	r2, r3, r2 ; \
+	andn	r1, r2, r1 ; \
+	brz	r1, l3 ## f ; \
+	 nop ; \
 	ATR(desc, r1, r2, r3, l1, l2)
 
 #endif /* LOCORE */
diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h
index e16ea9776029..adad2575598e 100644
--- a/sys/sparc64/include/pmap.h
+++ b/sys/sparc64/include/pmap.h
@@ -40,6 +40,7 @@
 #define	_MACHINE_PMAP_H_
 
 #include <sys/queue.h>
+#include <sys/_cpuset.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
 #include <machine/cache.h>
@@ -61,7 +62,7 @@ struct pmap {
 	struct	mtx pm_mtx;
 	struct	tte *pm_tsb;
 	vm_object_t pm_tsb_obj;
-	cpumask_t pm_active;
+	cpuset_t pm_active;
 	u_int	pm_context[MAXCPU];
 	struct	pmap_statistics pm_stats;
 };
diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h
index 3ca8e0380444..1ba0d9e9f137 100644
--- a/sys/sparc64/include/smp.h
+++ b/sys/sparc64/include/smp.h
@@ -38,6 +38,7 @@
 
 #ifndef	LOCORE
 
+#include <sys/cpuset.h>
 #include <sys/proc.h>
 #include <sys/sched.h>
 
@@ -76,17 +77,17 @@ struct cpu_start_args {
 };
 
 struct ipi_cache_args {
-	cpumask_t ica_mask;
+	cpuset_t ica_mask;
 	vm_paddr_t ica_pa;
 };
 
 struct ipi_rd_args {
-	cpumask_t ira_mask;
+	cpuset_t ira_mask;
 	register_t *ira_val;
 };
 
 struct ipi_tlb_args {
-	cpumask_t ita_mask;
+	cpuset_t ita_mask;
 	struct	pmap *ita_pmap;
 	u_long	ita_start;
 	u_long	ita_end;
@@ -100,7 +101,7 @@ extern struct pcb stoppcbs[];
 void	cpu_mp_bootstrap(struct pcpu *pc);
 void	cpu_mp_shutdown(void);
 
-typedef	void cpu_ipi_selected_t(u_int, u_long, u_long, u_long);
+typedef	void cpu_ipi_selected_t(cpuset_t, u_long, u_long, u_long);
 extern	cpu_ipi_selected_t *cpu_ipi_selected;
 typedef	void cpu_ipi_single_t(u_int, u_long, u_long, u_long);
 extern	cpu_ipi_single_t *cpu_ipi_single;
@@ -140,7 +141,7 @@ ipi_all_but_self(u_int ipi)
 }
 
 static __inline void
-ipi_selected(u_int cpus, u_int ipi)
+ipi_selected(cpuset_t cpus, u_int ipi)
 {
 
 	cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi);
@@ -197,7 +198,8 @@ ipi_rd(u_int cpu, void *func, u_long *val)
 	sched_pin();
 	ira = &ipi_rd_args;
 	mtx_lock_spin(&ipi_mtx);
-	ira->ira_mask = 1 << cpu | PCPU_GET(cpumask);
+	ira->ira_mask = PCPU_GET(cpumask);
+	CPU_SET(cpu, &ira->ira_mask);
 	ira->ira_val = val;
 	cpu_ipi_single(cpu, 0, (u_long)func, (u_long)ira);
 	return (&ira->ira_mask);
@@ -207,18 +209,21 @@ static __inline void *
 ipi_tlb_context_demap(struct pmap *pm)
 {
 	struct ipi_tlb_args *ita;
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (smp_cpus == 1)
 		return (NULL);
 	sched_pin();
-	if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+	cpus = pm->pm_active;
+	CPU_AND(&cpus, PCPU_PTR(other_cpus));
+	if (CPU_EMPTY(&cpus)) {
 		sched_unpin();
 		return (NULL);
 	}
 	ita = &ipi_tlb_args;
 	mtx_lock_spin(&ipi_mtx);
-	ita->ita_mask = cpus | PCPU_GET(cpumask);
+	ita->ita_mask = cpus;
+	CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
 	ita->ita_pmap = pm;
 	cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap,
 	    (u_long)ita);
@@ -229,18 +234,21 @@ static __inline void *
 ipi_tlb_page_demap(struct pmap *pm, vm_offset_t va)
 {
 	struct ipi_tlb_args *ita;
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (smp_cpus == 1)
 		return (NULL);
 	sched_pin();
-	if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+	cpus = pm->pm_active;
+	CPU_AND(&cpus, PCPU_PTR(other_cpus));
+	if (CPU_EMPTY(&cpus)) {
 		sched_unpin();
 		return (NULL);
 	}
 	ita = &ipi_tlb_args;
 	mtx_lock_spin(&ipi_mtx);
-	ita->ita_mask = cpus | PCPU_GET(cpumask);
+	ita->ita_mask = cpus;
+	CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
 	ita->ita_pmap = pm;
 	ita->ita_va = va;
 	cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita);
@@ -251,18 +259,21 @@ static __inline void *
 ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
 {
 	struct ipi_tlb_args *ita;
-	cpumask_t cpus;
+	cpuset_t cpus;
 
 	if (smp_cpus == 1)
 		return (NULL);
 	sched_pin();
-	if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) {
+	cpus = pm->pm_active;
+	CPU_AND(&cpus, PCPU_PTR(other_cpus));
+	if (CPU_EMPTY(&cpus)) {
 		sched_unpin();
 		return (NULL);
 	}
 	ita = &ipi_tlb_args;
 	mtx_lock_spin(&ipi_mtx);
-	ita->ita_mask = cpus | PCPU_GET(cpumask);
+	ita->ita_mask = cpus;
+	CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask));
 	ita->ita_pmap = pm;
 	ita->ita_start = start;
 	ita->ita_end = end;
@@ -274,11 +285,11 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
 static __inline void
 ipi_wait(void *cookie)
 {
-	volatile cpumask_t *mask;
+	volatile cpuset_t *mask;
 
 	if ((mask = cookie) != NULL) {
-		atomic_clear_int(mask, PCPU_GET(cpumask));
-		while (*mask != 0)
+		CPU_NAND_ATOMIC(mask, PCPU_PTR(cpumask));
+		while (!CPU_EMPTY(mask))
 			;
 		mtx_unlock_spin(&ipi_mtx);
 		sched_unpin();
diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S
index ed0e3810e259..0b8a0faf78ce 100644
--- a/sys/sparc64/sparc64/exception.S
+++ b/sys/sparc64/sparc64/exception.S
@@ -1280,6 +1280,7 @@ ENTRY(tl1_data_excptn_trap)
 END(tl1_data_excptn_trap)
 
 	.macro	tl1_align
+	wrpr	%g0, PSTATE_ALT, %pstate
 	ba,a	%xcc, tl1_align_trap
 	 nop
 	.align	32
@@ -1289,7 +1290,7 @@ ENTRY(tl1_align_trap)
 	RESUME_SPILLFILL_ALIGN
 	ba	%xcc, tl1_sfsr_trap
 	 mov	T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2
-END(tl1_data_excptn_trap)
+END(tl1_align_trap)
 
 ENTRY(tl1_sfsr_trap)
 	wr	%g0, ASI_DMMU, %asi
@@ -2615,9 +2616,9 @@ ENTRY(tl0_ret)
 	andn	%l4, TSTATE_CWP_MASK, %g2
 
 	/*
-	 * Restore %y.  Could also be below if we had more alternate globals.
+	 * Save %y in an alternate global.
 	 */
-	wr	%l5, 0, %y
+	mov	%l5, %g4
 
 	/*
 	 * Setup %wstate for return.  We need to restore the user window state
@@ -2662,8 +2663,8 @@ tl0_ret_fill:
 	 * Fixup %tstate so the saved %cwp points to the current window and
 	 * restore it.
 	 */
-	rdpr	%cwp, %g4
-	wrpr	%g2, %g4, %tstate
+	rdpr	%cwp, %g1
+	wrpr	%g2, %g1, %tstate
 
 	/*
 	 * Restore the user window state.  The transition bit was set above
@@ -2673,19 +2674,24 @@ tl0_ret_fill:
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx"
-	    , %g2, %g3, %g4, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g3
-	stx	%g3, [%g2 + KTR_PARM1]
-	rdpr	%pil, %g3
-	stx	%g3, [%g2 + KTR_PARM2]
-	rdpr	%tpc, %g3
-	stx	%g3, [%g2 + KTR_PARM3]
-	rdpr	%tnpc, %g3
-	stx	%g3, [%g2 + KTR_PARM4]
-	stx	%sp, [%g2 + KTR_PARM5]
+	    , %g1, %g2, %g3, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g2
+	stx	%g2, [%g1 + KTR_PARM1]
+	rdpr	%pil, %g2
+	stx	%g2, [%g1 + KTR_PARM2]
+	rdpr	%tpc, %g2
+	stx	%g2, [%g1 + KTR_PARM3]
+	rdpr	%tnpc, %g2
+	stx	%g2, [%g1 + KTR_PARM4]
+	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
+	/*
+	 * Restore %y.  Note that the CATR above clobbered it.
+	 */
+	wr	%g4, 0, %y
+
 	/*
 	 * Return to usermode.
 	 */
@@ -2697,9 +2703,14 @@ tl0_ret_fill_end:
 	    , %l0, %l1, %l2, 7, 8, 9)
 	rdpr	%pstate, %l1
 	stx	%l1, [%l0 + KTR_PARM1]
-	stx	%l5, [%l0 + KTR_PARM2]
+	stx	%l6, [%l0 + KTR_PARM2]
 	stx	%sp, [%l0 + KTR_PARM3]
 9:
+
+	/*
+	 * Restore %y clobbered by the CATR.  This was saved in %l5 above.
+	 */
+	wr	%l5, 0, %y
 #endif
 
 	/*
@@ -2867,34 +2878,36 @@ ENTRY(tl1_ret)
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
+	mov	%l4, %g4
 
 	wrpr	%l3, 0, %pil
-	wr	%l4, 0, %y
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
-	rdpr	%cwp, %g4
-	wrpr	%g1, %g4, %tstate
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
+	rdpr	%cwp, %g2
+	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_TRAP
 	CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
-	    , %g2, %g3, %g4, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g3
-	stx	%g3, [%g2 + KTR_PARM1]
-	rdpr	%pil, %g3
-	stx	%g3, [%g2 + KTR_PARM2]
-	rdpr	%tstate, %g3
-	stx	%g3, [%g2 + KTR_PARM3]
-	rdpr	%tpc, %g3
-	stx	%g3, [%g2 + KTR_PARM4]
-	stx	%sp, [%g2 + KTR_PARM5]
+	    , %g1, %g2, %g3, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g2
+	stx	%g2, [%g1 + KTR_PARM1]
+	rdpr	%pil, %g2
+	stx	%g2, [%g1 + KTR_PARM2]
+	rdpr	%tstate, %g2
+	stx	%g2, [%g1 + KTR_PARM3]
+	rdpr	%tpc, %g2
+	stx	%g2, [%g1 + KTR_PARM4]
+	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
+	wr	%g4, 0, %y
+
 	retry
 END(tl1_ret)
 
@@ -2995,33 +3008,35 @@ ENTRY(tl1_intr)
 	andn	%l0, TSTATE_CWP_MASK, %g1
 	mov	%l1, %g2
 	mov	%l2, %g3
+	mov	%l4, %g4
 	wrpr	%l3, 0, %pil
-	wr	%l4, 0, %y
 
 	restore
 
 	wrpr	%g0, 2, %tl
 
-	rdpr	%cwp, %g4
-	wrpr	%g1, %g4, %tstate
 	wrpr	%g2, 0, %tpc
 	wrpr	%g3, 0, %tnpc
+	rdpr	%cwp, %g2
+	wrpr	%g1, %g2, %tstate
 
 #if KTR_COMPILE & KTR_INTR
 	CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx"
-	    , %g2, %g3, %g4, 7, 8, 9)
-	ldx	[PCPU(CURTHREAD)], %g3
-	stx	%g3, [%g2 + KTR_PARM1]
-	rdpr	%pil, %g3
-	stx	%g3, [%g2 + KTR_PARM2]
-	rdpr	%tstate, %g3
-	stx	%g3, [%g2 + KTR_PARM3]
-	rdpr	%tpc, %g3
-	stx	%g3, [%g2 + KTR_PARM4]
-	stx	%sp, [%g2 + KTR_PARM5]
+	    , %g1, %g2, %g3, 7, 8, 9)
+	ldx	[PCPU(CURTHREAD)], %g2
+	stx	%g2, [%g1 + KTR_PARM1]
+	rdpr	%pil, %g2
+	stx	%g2, [%g1 + KTR_PARM2]
+	rdpr	%tstate, %g2
+	stx	%g2, [%g1 + KTR_PARM3]
+	rdpr	%tpc, %g2
+	stx	%g2, [%g1 + KTR_PARM4]
+	stx	%sp, [%g1 + KTR_PARM5]
 9:
 #endif
 
+	wr	%g4, 0, %y
+
 	retry
 END(tl1_intr)
 
diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c
index e33e581bb91a..89ec718b94a2 100644
--- a/sys/sparc64/sparc64/genassym.c
+++ b/sys/sparc64/sparc64/genassym.c
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/proc.h>
 #include <sys/smp.h>
 #include <sys/vmmeter.h>
+#include <sys/_cpuset.h>
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
@@ -59,6 +60,8 @@ ASSYM(PCPU_PAGES, PCPU_PAGES);
 
 ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT);
 
+ASSYM(_NCPUBITS, _NCPUBITS);
+
 #ifdef SUN4U
 ASSYM(TLB_DEMAP_ALL, TLB_DEMAP_ALL);
 #endif
@@ -137,7 +140,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN);
 ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread));
 ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb));
 ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid));
-ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask));
 ASSYM(PC_IRHEAD, offsetof(struct pcpu, pc_irhead));
 ASSYM(PC_IRTAIL, offsetof(struct pcpu, pc_irtail));
 ASSYM(PC_IRFREE, offsetof(struct pcpu, pc_irfree));
diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c
index f6ef9a7bf113..ed301826693c 100644
--- a/sys/sparc64/sparc64/intr_machdep.c
+++ b/sys/sparc64/sparc64/intr_machdep.c
@@ -445,8 +445,7 @@ intr_describe(int vec, void *ih, const char *descr)
  * allocate CPUs round-robin.
  */
 
-/* The BSP is always a valid target. */
-static cpumask_t intr_cpus = (1 << 0);
+static cpuset_t intr_cpus;
 static int current_cpu;
 
 static void
@@ -468,7 +467,7 @@ intr_assign_next_cpu(struct intr_vector *iv)
 		current_cpu++;
 		if (current_cpu > mp_maxid)
 			current_cpu = 0;
-	} while (!(intr_cpus & (1 << current_cpu)));
+	} while (!CPU_ISSET(current_cpu, &intr_cpus));
 }
 
 /* Attempt to bind the specified IRQ to the specified CPU. */
@@ -504,7 +503,7 @@ intr_add_cpu(u_int cpu)
 	if (bootverbose)
 		printf("INTR: Adding CPU %d as a target\n", cpu);
 
-	intr_cpus |= (1 << cpu);
+	CPU_SET(cpu, &intr_cpus);
 }
 
 /*
@@ -518,6 +517,9 @@ intr_shuffle_irqs(void *arg __unused)
 	struct intr_vector *iv;
 	int i;
 
+	/* The BSP is always a valid target. */
+	CPU_SETOF(0, &intr_cpus);
+
 	/* Don't bother on UP. */
 	if (mp_ncpus == 1)
 		return;
diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S
index 5a8a1054c859..f1b323aeebed 100644
--- a/sys/sparc64/sparc64/mp_exception.S
+++ b/sys/sparc64/sparc64/mp_exception.S
@@ -38,9 +38,21 @@ __FBSDID("$FreeBSD$");
 	.register	%g2, #ignore
 	.register	%g3, #ignore
 
-#define	IPI_DONE(r1, r2, r3, r4) \
-	lduw	[PCPU(CPUMASK)], r4 ;  \
-	ATOMIC_CLEAR_INT(r1, r2, r3, r4)
+#define	IPI_DONE(r1, r2, r3, r4, r5, r6)				\
+	rd	%y, r6 ;						\
+	lduw	[PCPU(CPUID)], r2 ;					\
+	mov	_NCPUBITS, r3 ;						\
+	mov	%g0, %y ;						\
+	udiv	r2, r3, r4 ;						\
+	srl	r4, 0, r5 ;						\
+	sllx	r5, PTR_SHIFT, r5 ;					\
+	add	r1, r5, r1 ;						\
+	smul	r4, r3, r3 ;						\
+	sub	r2, r3, r3 ;						\
+	mov	1, r4 ;							\
+	sllx	r4, r3, r4 ;						\
+	wr	r6, %y ;						\
+	ATOMIC_CLEAR_LONG(r1, r2, r3, r4)
 
 /*
  * Invalidate a physical page in the data cache.  For UltraSPARC I and II.
@@ -77,7 +89,7 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval)
 2:	brgz,pt	%g2, 1b
 	 sub	%g2, %g4, %g2
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_spitfire_dcache_page_inval)
 
@@ -117,7 +129,7 @@ ENTRY(tl_ipi_spitfire_icache_page_inval)
 2:	brgz,pt	%g2, 1b
 	 sub	%g2, %g4, %g2
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_spitfire_icache_page_inval)
 
@@ -148,7 +160,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval)
 	blt,a,pt %xcc, 1b
 	 nop
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_cheetah_dcache_page_inval)
 
@@ -204,7 +216,7 @@ ENTRY(tl_ipi_tlb_page_demap)
 	stxa	%g0, [%g2] ASI_IMMU_DEMAP
 	flush	%g3
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_tlb_page_demap)
 
@@ -247,7 +259,7 @@ ENTRY(tl_ipi_tlb_range_demap)
 	blt,a,pt %xcc, 1b
 	 nop
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_tlb_range_demap)
 
@@ -271,7 +283,7 @@ ENTRY(tl_ipi_tlb_context_demap)
 	stxa	%g0, [%g1] ASI_IMMU_DEMAP
 	flush	%g3
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_tlb_context_demap)
 
@@ -283,7 +295,7 @@ ENTRY(tl_ipi_stick_rd)
 	rd	%asr24, %g2
 	stx	%g2, [%g1]
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_stick_rd)
 
@@ -295,6 +307,6 @@ ENTRY(tl_ipi_tick_rd)
 	rd	%tick, %g2
 	stx	%g2, [%g1]
 
-	IPI_DONE(%g5, %g1, %g2, %g3)
+	IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6)
 	retry
 END(tl_ipi_tick_rd)
diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S
index fbcb767e0896..fd4357eaec1f 100644
--- a/sys/sparc64/sparc64/mp_locore.S
+++ b/sys/sparc64/sparc64/mp_locore.S
@@ -269,13 +269,17 @@ ENTRY(mp_startup)
 	add	%l1, %l2, %l1
 	sub	%l1, SPOFF + CCFSZ, %sp
 
+	/* Initialize global registers. */
+	call	cpu_setregs
+	 mov	%l1, %o0
+
 #if KTR_COMPILE & KTR_SMP
 	CATR(KTR_SMP,
 	    "mp_startup: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx"
 	    , %g1, %g2, %g3, 7, 8, 9)
-	lduw	[%l1 + PC_CPUID], %g2
+	lduw	[PCPU(CPUID)], %g2
 	stx	%g2, [%g1 + KTR_PARM1]
-	lduw	[%l1 + PC_MID], %g2
+	lduw	[PCPU(MID)], %g2
 	stx	%g2, [%g1 + KTR_PARM2]
 	stx	%l1, [%g1 + KTR_PARM3]
 	stx	%sp, [%g1 + KTR_PARM5]
diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c
index 4d9151e156c0..f2e76df2da37 100644
--- a/sys/sparc64/sparc64/mp_machdep.c
+++ b/sys/sparc64/sparc64/mp_machdep.c
@@ -121,7 +121,7 @@ cpu_ipi_single_t *cpu_ipi_single;
 static vm_offset_t mp_tramp;
 static u_int cpuid_to_mid[MAXCPU];
 static int isjbus;
-static volatile cpumask_t shutdown_cpus;
+static volatile cpuset_t shutdown_cpus;
 
 static void ap_count(phandle_t node, u_int mid, u_int cpu_impl);
 static void ap_start(phandle_t node, u_int mid, u_int cpu_impl);
@@ -228,7 +228,7 @@ void
 cpu_mp_setmaxid()
 {
 
-	all_cpus = 1 << curcpu;
+	CPU_SETOF(curcpu, &all_cpus);
 	mp_ncpus = 1;
 	mp_maxid = 0;
 
@@ -283,6 +283,7 @@ sun4u_startcpu(phandle_t cpu, void *func, u_long arg)
 void
 cpu_mp_start(void)
 {
+	cpuset_t ocpus;
 
 	mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN);
 
@@ -299,7 +300,9 @@ cpu_mp_start(void)
 	KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS,
 	    ("%s: can only IPI a maximum of %d JBus-CPUs",
 	    __func__, IDR_JALAPENO_MAX_BN_PAIRS));
-	PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu));
+	ocpus = all_cpus;
+	CPU_CLR(curcpu, &ocpus);
+	PCPU_SET(other_cpus, ocpus);
 	smp_active = 1;
 }
 
@@ -357,7 +360,7 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl)
 
 	cache_init(pc);
 
-	all_cpus |= 1 << cpuid;
+	CPU_SET(cpuid, &all_cpus);
 	intr_add_cpu(cpuid);
 }
 
@@ -421,6 +424,7 @@ cpu_mp_unleash(void *v)
 void
 cpu_mp_bootstrap(struct pcpu *pc)
 {
+	cpuset_t ocpus;
 	volatile struct cpu_start_args *csa;
 
 	csa = &cpu_start_args;
@@ -453,9 +457,6 @@ cpu_mp_bootstrap(struct pcpu *pc)
 	 */
 	tlb_flush_nonlocked();
 
-	/* Initialize global registers. */
-	cpu_setregs(pc);
-
 	/*
 	 * Enable interrupts.
 	 * Note that the PIL we be lowered indirectly via sched_throw(NULL)
@@ -465,7 +466,9 @@ cpu_mp_bootstrap(struct pcpu *pc)
 
 	smp_cpus++;
 	KASSERT(curthread != NULL, ("%s: curthread", __func__));
-	PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu));
+	ocpus = all_cpus;
+	CPU_CLR(curcpu, &ocpus);
+	PCPU_SET(other_cpus, ocpus);
 	printf("SMP: AP CPU #%d Launched!\n", curcpu);
 
 	csa->csa_count--;
@@ -484,14 +487,22 @@ cpu_mp_bootstrap(struct pcpu *pc)
 void
 cpu_mp_shutdown(void)
 {
+	cpuset_t cpus;
 	int i;
 
 	critical_enter();
 	shutdown_cpus = PCPU_GET(other_cpus);
-	if (stopped_cpus != PCPU_GET(other_cpus))	/* XXX */
-		stop_cpus(stopped_cpus ^ PCPU_GET(other_cpus));
+	cpus = shutdown_cpus;
+
+	/* XXX: Stop all the CPUs which aren't already. */
+	if (CPU_CMP(&stopped_cpus, &cpus)) {
+
+		/* pc_other_cpus is just a flat "on" mask without curcpu. */
+		CPU_NAND(&cpus, &stopped_cpus);
+		stop_cpus(cpus);
+	}
 	i = 0;
-	while (shutdown_cpus != 0) {
+	while (!CPU_EMPTY(&shutdown_cpus)) {
 		if (i++ > 100000) {
 			printf("timeout shutting down CPUs.\n");
 			break;
@@ -509,20 +520,24 @@ cpu_ipi_ast(struct trapframe *tf)
 static void
 cpu_ipi_stop(struct trapframe *tf)
 {
+	cpuset_t tcmask;
 
 	CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu);
+	sched_pin();
 	savectx(&stoppcbs[curcpu]);
-	atomic_set_acq_int(&stopped_cpus, PCPU_GET(cpumask));
-	while ((started_cpus & PCPU_GET(cpumask)) == 0) {
-		if ((shutdown_cpus & PCPU_GET(cpumask)) != 0) {
-			atomic_clear_int(&shutdown_cpus, PCPU_GET(cpumask));
+	tcmask = PCPU_GET(cpumask);
+	CPU_OR_ATOMIC(&stopped_cpus, &tcmask);
+	while (!CPU_OVERLAP(&started_cpus, &tcmask)) {
+		if (CPU_OVERLAP(&shutdown_cpus, &tcmask)) {
+			CPU_NAND_ATOMIC(&shutdown_cpus, &tcmask);
 			(void)intr_disable();
 			for (;;)
 				;
 		}
 	}
-	atomic_clear_rel_int(&started_cpus, PCPU_GET(cpumask));
-	atomic_clear_rel_int(&stopped_cpus, PCPU_GET(cpumask));
+	CPU_NAND_ATOMIC(&started_cpus, &tcmask);
+	CPU_NAND_ATOMIC(&stopped_cpus, &tcmask);
+	sched_unpin();
 	CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu);
 }
 
@@ -551,13 +566,13 @@ cpu_ipi_hardclock(struct trapframe *tf)
 }
 
 static void
-spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
 	u_int cpu;
 
-	while (cpus) {
-		cpu = ffs(cpus) - 1;
-		cpus &= ~(1 << cpu);
+	while ((cpu = cpusetobj_ffs(&cpus)) != 0) {
+		cpu--;
+		CPU_CLR(cpu, &cpus);
 		spitfire_ipi_single(cpu, d0, d1, d2);
 	}
 }
@@ -657,20 +672,21 @@ cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
 }
 
 static void
-cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
+	char pbuf[CPUSETBUFSIZ];
 	register_t s;
 	u_long ids;
 	u_int bnp;
 	u_int cpu;
 	int i;
 
-	KASSERT((cpus & (1 << curcpu)) == 0,
-	    ("%s: CPU can't IPI itself", __func__));
+	KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
+	    __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
-	if (cpus == 0)
+	if (CPU_EMPTY(&cpus))
 		return;
 	ids = 0;
 	for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) {
@@ -681,7 +697,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		membar(Sync);
 		bnp = 0;
 		for (cpu = 0; cpu < mp_ncpus; cpu++) {
-			if ((cpus & (1 << cpu)) != 0) {
+			if (CPU_ISSET(cpu, &cpus)) {
 				stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
 				    IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT,
 				    ASI_SDB_INTR_W, 0);
@@ -698,9 +714,9 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 			return;
 		bnp = 0;
 		for (cpu = 0; cpu < mp_ncpus; cpu++) {
-			if ((cpus & (1 << cpu)) != 0) {
+			if (CPU_ISSET(cpu, &cpus)) {
 				if ((ids & (IDR_NACK << (2 * bnp))) == 0)
-					cpus &= ~(1 << cpu);
+					CPU_CLR(cpu, &cpus);
 				bnp++;
 			}
 		}
@@ -709,7 +725,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		 * CPUs we actually haven't tried to send an IPI to,
 		 * but which apparently can be safely ignored.
 		 */
-		if (cpus == 0)
+		if (CPU_EMPTY(&cpus))
 			return;
 		/*
 		 * Leave interrupts enabled for a bit before retrying
@@ -719,11 +735,11 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		DELAY(2 * mp_ncpus);
 	}
 	if (kdb_active != 0 || panicstr != NULL)
-		printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n",
-		    __func__, cpus, ids);
+		printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
+		    __func__, cpusetobj_strprint(pbuf, &cpus), ids);
 	else
-		panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
-		    __func__, cpus, ids);
+		panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
+		    __func__, cpusetobj_strprint(pbuf, &cpus), ids);
 }
 
 static void
@@ -772,19 +788,20 @@ jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2)
 }
 
 static void
-jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
+jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2)
 {
+	char pbuf[CPUSETBUFSIZ];
 	register_t s;
 	u_long ids;
 	u_int cpu;
 	int i;
 
-	KASSERT((cpus & (1 << curcpu)) == 0,
-	    ("%s: CPU can't IPI itself", __func__));
+	KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself",
+	    __func__));
 	KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) &
 	    IDR_CHEETAH_ALL_BUSY) == 0,
 	    ("%s: outstanding dispatch", __func__));
-	if (cpus == 0)
+	if (CPU_EMPTY(&cpus))
 		return;
 	ids = 0;
 	for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) {
@@ -794,7 +811,7 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2);
 		membar(Sync);
 		for (cpu = 0; cpu < mp_ncpus; cpu++) {
-			if ((cpus & (1 << cpu)) != 0) {
+			if (CPU_ISSET(cpu, &cpus)) {
 				stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] <<
 				    IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0);
 				membar(Sync);
@@ -808,10 +825,10 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		    (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0)
 			return;
 		for (cpu = 0; cpu < mp_ncpus; cpu++)
-			if ((cpus & (1 << cpu)) != 0)
+			if (CPU_ISSET(cpu, &cpus))
 				if ((ids & (IDR_NACK <<
 				    (2 * cpuid_to_mid[cpu]))) == 0)
-					cpus &= ~(1 << cpu);
+					CPU_CLR(cpu, &cpus);
 		/*
 		 * Leave interrupts enabled for a bit before retrying
 		 * in order to avoid deadlocks if the other CPUs are
@@ -820,9 +837,9 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2)
 		DELAY(2 * mp_ncpus);
 	}
 	if (kdb_active != 0 || panicstr != NULL)
-		printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n",
-		    __func__, cpus, ids);
+		printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n",
+		    __func__, cpusetobj_strprint(pbuf, &cpus), ids);
 	else
-		panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)",
-		    __func__, cpus, ids);
+		panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)",
+		    __func__, cpusetobj_strprint(pbuf, &cpus), ids);
 }
diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c
index c34fc4506089..b01a558c3b24 100644
--- a/sys/sparc64/sparc64/pmap.c
+++ b/sys/sparc64/sparc64/pmap.c
@@ -664,7 +664,7 @@ pmap_bootstrap(u_int cpu_impl)
 	pm = kernel_pmap;
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = TLB_CTX_KERNEL;
-	pm->pm_active = ~0;
+	CPU_FILL(&pm->pm_active);
 
 	/*
 	 * Flush all non-locked TLB entries possibly left over by the
@@ -1189,7 +1189,7 @@ pmap_pinit0(pmap_t pm)
 	PMAP_LOCK_INIT(pm);
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = TLB_CTX_KERNEL;
-	pm->pm_active = 0;
+	CPU_ZERO(&pm->pm_active);
 	pm->pm_tsb = NULL;
 	pm->pm_tsb_obj = NULL;
 	bzero(&pm->pm_stats, sizeof(pm->pm_stats));
@@ -1229,7 +1229,7 @@ pmap_pinit(pmap_t pm)
 	mtx_lock_spin(&sched_lock);
 	for (i = 0; i < MAXCPU; i++)
 		pm->pm_context[i] = -1;
-	pm->pm_active = 0;
+	CPU_ZERO(&pm->pm_active);
 	mtx_unlock_spin(&sched_lock);
 
 	VM_OBJECT_LOCK(pm->pm_tsb_obj);
@@ -2230,7 +2230,7 @@ pmap_activate(struct thread *td)
 	PCPU_SET(tlb_ctx, context + 1);
 
 	pm->pm_context[curcpu] = context;
-	pm->pm_active |= PCPU_GET(cpumask);
+	CPU_OR(&pm->pm_active, PCPU_PTR(cpumask));
 	PCPU_SET(pmap, pm);
 
 	stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb);
diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S
index ea13779c46e4..75157343cdc8 100644
--- a/sys/sparc64/sparc64/swtch.S
+++ b/sys/sparc64/sparc64/swtch.S
@@ -164,20 +164,29 @@ ENTRY(cpu_switch)
 	 * If there was no non-kernel pmap, don't try to deactivate it.
 	 */
 	brz,pn	%l2, 3f
-	 lduw	[PCPU(CPUMASK)], %l4
+	 lduw	[PCPU(CPUID)], %l3
 
 	/*
 	 * Mark the pmap of the last non-kernel vmspace to run as no longer
 	 * active on this CPU.
 	 */
-	lduw	[%l2 + PM_ACTIVE], %l3
-	andn	%l3, %l4, %l3
-	stw	%l3, [%l2 + PM_ACTIVE]
+	mov	_NCPUBITS, %l5
+	mov	%g0, %y
+	udiv	%l3, %l5, %l6
+	srl	%l6, 0, %l4
+	sllx	%l4, PTR_SHIFT, %l4
+	add	%l4, PM_ACTIVE, %l4
+	smul	%l6, %l5, %l5
+	sub	%l3, %l5, %l5
+	mov	1, %l6
+	sllx	%l6, %l5, %l5
+	ldx	[%l2 + %l4], %l6
+	andn	%l6, %l5, %l6
+	stx	%l6, [%l2 + %l4]
 
 	/*
 	 * Take away its context number.
 	 */
-	lduw	[PCPU(CPUID)], %l3
 	sllx	%l3, INT_SHIFT, %l3
 	add	%l2, PM_CONTEXT, %l4
 	mov	-1, %l5
@@ -210,18 +219,27 @@ ENTRY(cpu_switch)
 	/*
 	 * Set the new context number in the pmap.
 	 */
-	lduw	[PCPU(CPUID)], %i4
-	sllx	%i4, INT_SHIFT, %i4
+	lduw	[PCPU(CPUID)], %l3
+	sllx	%l3, INT_SHIFT, %i4
 	add	%l1, PM_CONTEXT, %i5
 	stw	%i3, [%i4 + %i5]
 
 	/*
 	 * Mark the pmap as active on this CPU.
 	 */
-	lduw	[%l1 + PM_ACTIVE], %i4
-	lduw	[PCPU(CPUMASK)], %i5
-	or	%i4, %i5, %i4
-	stw	%i4, [%l1 + PM_ACTIVE]
+	mov	_NCPUBITS, %l5
+	mov	%g0, %y
+	udiv	%l3, %l5, %l6
+	srl	%l6, 0, %l4
+	sllx	%l4, PTR_SHIFT, %l4
+	add	%l4, PM_ACTIVE, %l4
+	smul	%l6, %l5, %l5
+	sub	%l3, %l5, %l5
+	mov	1, %l6
+	sllx	%l6, %l5, %l5
+	ldx	[%l1 + %l4], %l6
+	or	%l6, %l5, %l6
+	stx	%l6, [%l1 + %l4]
 
 	/*
 	 * Make note of the change in pmap.
diff --git a/sys/sparc64/sparc64/tlb.c b/sys/sparc64/sparc64/tlb.c
index 990c77763750..9fcece6f43a6 100644
--- a/sys/sparc64/sparc64/tlb.c
+++ b/sys/sparc64/sparc64/tlb.c
@@ -80,7 +80,7 @@ tlb_context_demap(struct pmap *pm)
 	PMAP_STATS_INC(tlb_ncontext_demap);
 	cookie = ipi_tlb_context_demap(pm);
 	s = intr_disable();
-	if (pm->pm_active & PCPU_GET(cpumask)) {
+	if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
 		KASSERT(pm->pm_context[curcpu] != -1,
 		    ("tlb_context_demap: inactive pmap?"));
 		stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0);
@@ -101,7 +101,7 @@ tlb_page_demap(struct pmap *pm, vm_offset_t va)
 	PMAP_STATS_INC(tlb_npage_demap);
 	cookie = ipi_tlb_page_demap(pm, va);
 	s = intr_disable();
-	if (pm->pm_active & PCPU_GET(cpumask)) {
+	if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
 		KASSERT(pm->pm_context[curcpu] != -1,
 		    ("tlb_page_demap: inactive pmap?"));
 		if (pm == kernel_pmap)
@@ -128,7 +128,7 @@ tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end)
 	PMAP_STATS_INC(tlb_nrange_demap);
 	cookie = ipi_tlb_range_demap(pm, start, end);
 	s = intr_disable();
-	if (pm->pm_active & PCPU_GET(cpumask)) {
+	if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) {
 		KASSERT(pm->pm_context[curcpu] != -1,
 		    ("tlb_range_demap: inactive pmap?"));
 		if (pm == kernel_pmap)
diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h
new file mode 100644
index 000000000000..42a0a6a9db81
--- /dev/null
+++ b/sys/sys/_cpuset.h
@@ -0,0 +1,52 @@
+/*-
+ * Copyright (c) 2008,	Jeffrey Roberson <jeff@freebsd.org>
+ * All rights reserved.
+ *
+ * Copyright (c) 2008 Nokia Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice unmodified, this list of conditions, and the following
+ *    disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
+ * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
+ * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
+ * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS__CPUSET_H_
+#define	_SYS__CPUSET_H_
+
+#ifdef _KERNEL
+#define	CPU_SETSIZE	MAXCPU
+#endif
+
+#define	CPU_MAXSIZE	128
+
+#ifndef	CPU_SETSIZE
+#define	CPU_SETSIZE	CPU_MAXSIZE
+#endif
+
+#define	_NCPUBITS	(sizeof(long) * NBBY)	/* bits per mask */
+#define	_NCPUWORDS	howmany(CPU_SETSIZE, _NCPUBITS)
+
+typedef	struct _cpuset {
+	long	__bits[howmany(CPU_SETSIZE, _NCPUBITS)];
+} cpuset_t;
+
+#endif /* !_SYS__CPUSET_H_ */
diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h
index 75a159c1082e..15d6c4953e85 100644
--- a/sys/sys/_rmlock.h
+++ b/sys/sys/_rmlock.h
@@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker);
 
 struct rmlock {
 	struct lock_object lock_object; 
-	volatile cpumask_t rm_writecpus;
+	volatile cpuset_t rm_writecpus;
 	LIST_HEAD(,rm_priotracker) rm_activeReaders;
 	union {
 		struct mtx _rm_lock_mtx;
diff --git a/sys/sys/conf.h b/sys/sys/conf.h
index 31fd34dc47ab..0c7ed41c3c41 100644
--- a/sys/sys/conf.h
+++ b/sys/sys/conf.h
@@ -332,6 +332,7 @@ struct dumperinfo {
 int set_dumper(struct dumperinfo *);
 int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t);
 void dumpsys(struct dumperinfo *);
+int doadump(boolean_t);
 extern int dumping;		/* system is dumping */
 
 #endif /* _KERNEL */
diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h
index 854fa29608ca..030a874bf7d4 100644
--- a/sys/sys/cpuset.h
+++ b/sys/sys/cpuset.h
@@ -32,22 +32,9 @@
 #ifndef _SYS_CPUSET_H_
 #define	_SYS_CPUSET_H_
 
-#ifdef _KERNEL
-#define	CPU_SETSIZE	MAXCPU
-#endif
+#include <sys/_cpuset.h>
 
-#define	CPU_MAXSIZE	128
-
-#ifndef	CPU_SETSIZE
-#define	CPU_SETSIZE	CPU_MAXSIZE
-#endif
-
-#define	_NCPUBITS	(sizeof(long) * NBBY)	/* bits per mask */
-#define	_NCPUWORDS	howmany(CPU_SETSIZE, _NCPUBITS)
-
-typedef	struct _cpuset {
-	long	__bits[howmany(CPU_SETSIZE, _NCPUBITS)];
-} cpuset_t;
+#define	CPUSETBUFSIZ	((2 + sizeof(long) * 2) * _NCPUWORDS)
 
 #define	__cpuset_mask(n)	((long)1 << ((n) % _NCPUBITS))
 #define	CPU_CLR(n, p)	((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n))
@@ -66,6 +53,11 @@ typedef	struct _cpuset {
 		(p)->__bits[__i] = -1;			\
 } while (0)
 
+#define	CPU_SETOF(n, p) do {					\
+	CPU_ZERO(p);						\
+	((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n));	\
+} while (0)
+
 /* Is p empty. */
 #define	CPU_EMPTY(p) __extension__ ({			\
 	__size_t __i;					\
@@ -75,6 +67,15 @@ typedef	struct _cpuset {
 	__i == _NCPUWORDS;				\
 })
 
+/* Is p full set. */
+#define	CPU_ISFULLSET(p) __extension__ ({		\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		if ((p)->__bits[__i] != (long)-1)	\
+			break;				\
+	__i == _NCPUWORDS;				\
+})
+
 /* Is c a subset of p. */
 #define	CPU_SUBSET(p, c) __extension__ ({		\
 	__size_t __i;					\
@@ -124,6 +125,33 @@ typedef	struct _cpuset {
 		(d)->__bits[__i] &= ~(s)->__bits[__i];	\
 } while (0)
 
+#define	CPU_CLR_ATOMIC(n, p)						\
+	atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define	CPU_SET_ATOMIC(n, p)						\
+	atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n))
+
+#define	CPU_OR_ATOMIC(d, s) do {			\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		atomic_set_long(&(d)->__bits[__i],	\
+		    (s)->__bits[__i]);			\
+} while (0)
+
+#define	CPU_NAND_ATOMIC(d, s) do {			\
+	__size_t __i;					\
+	for (__i = 0; __i < _NCPUWORDS; __i++)		\
+		atomic_clear_long(&(d)->__bits[__i],	\
+		    (s)->__bits[__i]);			\
+} while (0)
+
+#define	CPU_COPY_STORE_REL(f, t) do {				\
+	__size_t __i;						\
+	for (__i = 0; __i < _NCPUWORDS; __i++)			\
+		atomic_store_rel_long(&(t)->__bits[__i],	\
+		    (f)->__bits[__i]);				\
+} while (0)
+
 /*
  * Valid cpulevel_t values.
  */
@@ -184,6 +212,9 @@ void	cpuset_rel(struct cpuset *);
 int	cpuset_setthread(lwpid_t id, cpuset_t *);
 int	cpuset_create_root(struct prison *, struct cpuset **);
 int	cpuset_setproc_update_set(struct proc *, struct cpuset *);
+int	cpusetobj_ffs(const cpuset_t *);
+char	*cpusetobj_strprint(char *, const cpuset_t *);
+int	cpusetobj_strscan(cpuset_t *, const char *);
 
 #else
 __BEGIN_DECLS
diff --git a/sys/sys/ktr.h b/sys/sys/ktr.h
index 3b78101dd1b4..7885b22f4aee 100644
--- a/sys/sys/ktr.h
+++ b/sys/sys/ktr.h
@@ -97,6 +97,9 @@
 
 #ifndef LOCORE
 
+#include <sys/param.h>
+#include <sys/_cpuset.h>
+
 struct ktr_entry {
 	u_int64_t ktr_timestamp;
 	int	ktr_cpu;
@@ -107,7 +110,7 @@ struct ktr_entry {
 	u_long	ktr_parms[KTR_PARMS];
 };
 
-extern int ktr_cpumask;
+extern cpuset_t ktr_cpumask;
 extern int ktr_mask;
 extern int ktr_entries;
 extern int ktr_verbose;
diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h
index 0bb2cbd196f3..e6044a7a57b6 100644
--- a/sys/sys/pcpu.h
+++ b/sys/sys/pcpu.h
@@ -37,6 +37,7 @@
 #error "no assembler-serviceable parts inside"
 #endif
 
+#include <sys/_cpuset.h>
 #include <sys/queue.h>
 #include <sys/vmmeter.h>
 #include <sys/resource.h>
@@ -162,8 +163,6 @@ struct pcpu {
 	uint64_t	pc_switchtime;		/* cpu_ticks() at last csw */
 	int		pc_switchticks;		/* `ticks' at last csw */
 	u_int		pc_cpuid;		/* This cpu number */
-	cpumask_t	pc_cpumask;		/* This cpu mask */
-	cpumask_t	pc_other_cpus;		/* Mask of all other cpus */
 	STAILQ_ENTRY(pcpu) pc_allcpu;
 	struct lock_list_entry *pc_spinlocks;
 #ifdef KTR
@@ -197,6 +196,18 @@ struct pcpu {
 	 * if only to make kernel debugging easier.
 	 */
 	PCPU_MD_FIELDS;
+
+	/*
+	 * XXX
+	 * For the time being, keep the cpuset_t objects as the very last
+	 * members of the structure.
+	 * They are actually tagged to be removed soon, but as long as this
+	 * does not happen, it is necessary to find a way to implement
+	 * easilly interfaces to userland and leaving them last makes that
+	 * possible.
+	 */
+	cpuset_t	pc_cpumask;		/* This cpu mask */
+	cpuset_t	pc_other_cpus;		/* Mask of all other cpus */
 } __aligned(CACHE_LINE_SIZE);
 
 #ifdef _KERNEL
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 3e8c1ef3fb68..796c4cadab77 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
 extern struct sx pmc_sx;
 
 /* Per-cpu flags indicating availability of sampling data */
-extern volatile cpumask_t pmc_cpumask;
+extern volatile cpuset_t pmc_cpumask;
 
 /* Count of system-wide sampling PMCs in existence */
 extern volatile int pmc_ss_count;
@@ -122,7 +122,7 @@ do {						\
 #define	PMC_SYSTEM_SAMPLING_ACTIVE()		(pmc_ss_count > 0)
 
 /* Check if a CPU has recorded samples. */
-#define	PMC_CPU_HAS_SAMPLES(C)	(__predict_false(pmc_cpumask & (1 << (C))))
+#define	PMC_CPU_HAS_SAMPLES(C)	(__predict_false(CPU_ISSET(C, &pmc_cpumask)))
 
 /*
  * Helper functions.
diff --git a/sys/sys/smp.h b/sys/sys/smp.h
index f8cce5f11177..66e800873e4b 100644
--- a/sys/sys/smp.h
+++ b/sys/sys/smp.h
@@ -16,6 +16,8 @@
 
 #ifndef LOCORE
 
+#include <sys/cpuset.h>
+
 /*
  * Topology of a NUMA or HTT system.
  *
@@ -32,7 +34,7 @@
 struct cpu_group {
 	struct cpu_group *cg_parent;	/* Our parent group. */
 	struct cpu_group *cg_child;	/* Optional children groups. */
-	cpumask_t	cg_mask;	/* Mask of cpus in this group. */
+	cpuset_t	cg_mask;	/* Mask of cpus in this group. */
 	int32_t		cg_count;	/* Count of cpus in this group. */
 	int16_t		cg_children;	/* Number of children groups. */
 	int8_t		cg_level;	/* Shared cache level. */
@@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu);
 extern void (*cpustop_restartfunc)(void);
 extern int smp_active;
 extern int smp_cpus;
-extern volatile cpumask_t started_cpus;
-extern volatile cpumask_t stopped_cpus;
-extern cpumask_t hlt_cpus_mask;
-extern cpumask_t logical_cpus_mask;
+extern volatile cpuset_t started_cpus;
+extern volatile cpuset_t stopped_cpus;
+extern cpuset_t hlt_cpus_mask;
+extern cpuset_t logical_cpus_mask;
 #endif /* SMP */
 
 extern u_int mp_maxid;
@@ -82,14 +84,14 @@ extern int mp_maxcpus;
 extern int mp_ncpus;
 extern volatile int smp_started;
 
-extern cpumask_t all_cpus;
+extern cpuset_t all_cpus;
 
 /*
  * Macro allowing us to determine whether a CPU is absent at any given
  * time, thus permitting us to configure sparse maps of cpuid-dependent
  * (per-CPU) structures.
  */
-#define	CPU_ABSENT(x_cpu)	((all_cpus & (1 << (x_cpu))) == 0)
+#define	CPU_ABSENT(x_cpu)	(!CPU_ISSET(x_cpu, &all_cpus))
 
 /*
  * Macros to iterate over non-absent CPUs.  CPU_FOREACH() takes an
@@ -158,11 +160,11 @@ void	cpu_mp_setmaxid(void);
 void	cpu_mp_start(void);
 
 void	forward_signal(struct thread *);
-int	restart_cpus(cpumask_t);
-int	stop_cpus(cpumask_t);
-int	stop_cpus_hard(cpumask_t);
+int	restart_cpus(cpuset_t);
+int	stop_cpus(cpuset_t);
+int	stop_cpus_hard(cpuset_t);
 #if defined(__amd64__)
-int	suspend_cpus(cpumask_t);
+int	suspend_cpus(cpuset_t);
 #endif
 void	smp_rendezvous_action(void);
 extern	struct mtx smp_ipi_mtx;
@@ -173,7 +175,7 @@ void	smp_rendezvous(void (*)(void *),
 		       void (*)(void *),
 		       void (*)(void *),
 		       void *arg);
-void	smp_rendezvous_cpus(cpumask_t,
+void	smp_rendezvous_cpus(cpuset_t,
 		       void (*)(void *), 
 		       void (*)(void *),
 		       void (*)(void *),
diff --git a/sys/sys/soundcard.h b/sys/sys/soundcard.h
index c4cfc276bdbe..a6817df79d02 100644
--- a/sys/sys/soundcard.h
+++ b/sys/sys/soundcard.h
@@ -311,7 +311,8 @@ typedef struct _snd_capabilities {
  * IOCTL Commands for /dev/sequencer
  */
 
-#define SNDCTL_SEQ_RESET	_IO  ('Q', 0)
+#define SNDCTL_SEQ_HALT		_IO  ('Q', 0)
+#define SNDCTL_SEQ_RESET	SNDCTL_SEQ_HALT	/* Historic interface */
 #define SNDCTL_SEQ_SYNC		_IO  ('Q', 1)
 #define SNDCTL_SYNTH_INFO	_IOWR('Q', 2, struct synth_info)
 #define SNDCTL_SEQ_CTRLRATE	_IOWR('Q', 3, int) /* Set/get timer res.(hz) */
diff --git a/sys/sys/types.h b/sys/sys/types.h
index 4bc1a8dbd228..cb513afeb5ee 100644
--- a/sys/sys/types.h
+++ b/sys/sys/types.h
@@ -99,7 +99,6 @@ typedef	__clockid_t	clockid_t;
 #define	_CLOCKID_T_DECLARED
 #endif
 
-typedef	__cpumask_t	cpumask_t;
 typedef	__critical_t	critical_t;	/* Critical section value */
 typedef	__int64_t	daddr_t;	/* disk address */
 
diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c
index 5b8f3c837a80..7f5d1b4a6a1d 100644
--- a/sys/ufs/ffs/ffs_alloc.c
+++ b/sys/ufs/ffs/ffs_alloc.c
@@ -420,13 +420,13 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp)
 	 */
 	if (reclaimed == 0) {
 		reclaimed = 1;
-		softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
 		UFS_UNLOCK(ump);
 		if (bp) {
 			brelse(bp);
 			bp = NULL;
 		}
 		UFS_LOCK(ump);
+		softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT);
 		goto retry;
 	}
 	UFS_UNLOCK(ump);
@@ -2356,8 +2356,8 @@ ffs_fserr(fs, inum, cp)
  *	specified inode by the specified amount. Under normal
  *	operation the count should always go down. Decrementing
  *	the count to zero will cause the inode to be freed.
- * adjblkcnt(inode, amt) - adjust the number of blocks used to
- *	by the specifed amount.
+ * adjblkcnt(inode, amt) - adjust the number of blocks used by the
+ *	inode by the specified amount.
  * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) -
  *	adjust the superblock summary.
  * freedirs(inode, count) - directory inodes [inode..inode + count - 1]
diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c
index f5ac443577f5..dfef3a76fc73 100644
--- a/sys/x86/x86/local_apic.c
+++ b/sys/x86/x86/local_apic.c
@@ -802,7 +802,7 @@ lapic_handle_timer(struct trapframe *frame)
 	 * and unlike other schedulers it actually schedules threads to
 	 * those CPUs.
 	 */
-	if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0)
+	if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask))
 		return;
 #endif
 
diff --git a/tools/regression/bin/sh/expansion/heredoc1.0 b/tools/regression/bin/sh/expansion/heredoc1.0
new file mode 100644
index 000000000000..a67b2da2e5f2
--- /dev/null
+++ b/tools/regression/bin/sh/expansion/heredoc1.0
@@ -0,0 +1,25 @@
+# $FreeBSD$
+
+f() { return $1; }
+
+[ `f 42; { cat; } <<EOF
+$?
+EOF
+` = 42 ] || echo compound command bad
+
+[ `f 42; (cat) <<EOF
+$?
+EOF
+` = 42 ] || echo subshell bad
+
+long=`printf %08192d 0`
+
+[ `f 42; { cat; } <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long compound command bad
+
+[ `f 42; (cat) <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long subshell bad
diff --git a/tools/regression/bin/sh/expansion/heredoc2.0 b/tools/regression/bin/sh/expansion/heredoc2.0
new file mode 100644
index 000000000000..255143296d4a
--- /dev/null
+++ b/tools/regression/bin/sh/expansion/heredoc2.0
@@ -0,0 +1,15 @@
+# $FreeBSD$
+
+f() { return $1; }
+
+[ `f 42; cat <<EOF
+$?
+EOF
+` = 42 ] || echo simple command bad
+
+long=`printf %08192d 0`
+
+[ `f 42; cat <<EOF
+$long.$?
+EOF
+` = $long.42 ] || echo long simple command bad
diff --git a/usr.bin/calendar/calendars/calendar.freebsd b/usr.bin/calendar/calendars/calendar.freebsd
index a04476a12185..32e13e0f507e 100644
--- a/usr.bin/calendar/calendars/calendar.freebsd
+++ b/usr.bin/calendar/calendars/calendar.freebsd
@@ -294,6 +294,7 @@
 10/22	Jean-Sebastien Pedron <dumbbell@FreeBSD.org> born in Redon, Ille-et-Vilaine, France, 1980
 10/23	Mario Sergio Fujikawa Ferreira <lioux@FreeBSD.org> born in Brasilia, Distrito Federal, Brazil, 1976
 10/25	Eric Melville <eric@FreeBSD.org> born in Los Gatos, California, United States, 1980
+10/25	Julien Laffaye <jlaffaye@FreeBSD.org> born in Toulouse, France, 1988
 10/26	Philip M. Gollucci <pgollucci@FreeBSD.org> born in Silver Spring, Maryland, United States, 1979
 10/27	Takanori Watanabe <takawata@FreeBSD.org> born in Numazu, Shizuoka, Japan, 1972
 11/05	M. Warner Losh <imp@FreeBSD.org> born in Kansas City, Kansas, United States, 1966
diff --git a/usr.bin/calendar/io.c b/usr.bin/calendar/io.c
index ef98d5da9b5f..eb37eace8bdf 100644
--- a/usr.bin/calendar/io.c
+++ b/usr.bin/calendar/io.c
@@ -346,7 +346,7 @@ closecal(FILE *fp)
 	write(pdes[1], pw->pw_name, strlen(pw->pw_name));
 	write(pdes[1], ">\nTo: <", 7);
 	write(pdes[1], pw->pw_name, strlen(pw->pw_name));
-	write(pdes[1], ">\nSubject: ", 12);
+	write(pdes[1], ">\nSubject: ", 11);
 	write(pdes[1], dayname, strlen(dayname));
 	write(pdes[1], "'s Calendar\nPrecedence: bulk\n\n", 30);
 
diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile
index 8cd490d9b390..f09a7d6f2e9f 100644
--- a/usr.bin/grep/Makefile
+++ b/usr.bin/grep/Makefile
@@ -28,8 +28,6 @@ MLINKS= grep.1 egrep.1 \
 bsdgrep.1: grep.1
 	cp ${.ALLSRC} ${.TARGET}
 
-WARNS?=	6
-
 LDADD=	-lz -lbz2
 DPADD=	${LIBZ} ${LIBBZ2}
 
diff --git a/usr.bin/iconv/Makefile b/usr.bin/iconv/Makefile
index 7e8f6e7613a1..deab0920fbde 100644
--- a/usr.bin/iconv/Makefile
+++ b/usr.bin/iconv/Makefile
@@ -7,8 +7,6 @@ PROG=	iconv
 #SRCS=	iconv.c
 MAN=	iconv.1
 
-WARNS?=	6
-
 LDADD+=	-lcrypt
 DPADD+=	${LIBCRYPT}
 
diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr
index afff24d5fd6b..dbde92b762ec 100644
--- a/usr.bin/kdump/mksubr
+++ b/usr.bin/kdump/mksubr
@@ -345,7 +345,7 @@ auto_if_type "sockfamilyname" "AF_[[:alnum:]]+[[:space:]]+" "sys/socket.h"
 auto_if_type "sockipprotoname" "IPPROTO_[[:alnum:]]+[[:space:]]+" "netinet/in.h"
 auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h"
 auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" "sys/socket.h"
-auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+" "sys/ptrace.h"
+auto_switch_type "ptraceopname" "PT_[[:alnum:]_]+[[:space:]]+[0-9]+" "sys/ptrace.h"
 
 cat <<_EOF_
 /*
diff --git a/usr.bin/rctl/Makefile b/usr.bin/rctl/Makefile
index 1088cf11e9de..c5c32eba52eb 100644
--- a/usr.bin/rctl/Makefile
+++ b/usr.bin/rctl/Makefile
@@ -6,6 +6,4 @@ MAN=	rctl.8
 DPADD=	${LIBUTIL}
 LDADD=	-lutil
 
-WARNS?=	6
-
 .include <bsd.prog.mk>
diff --git a/usr.bin/su/su.1 b/usr.bin/su/su.1
index 8b79d41b6cb7..d9180e37a0be 100644
--- a/usr.bin/su/su.1
+++ b/usr.bin/su/su.1
@@ -28,7 +28,7 @@
 .\"	@(#)su.1	8.2 (Berkeley) 4/18/94
 .\" $FreeBSD$
 .\"
-.Dd July 1, 2008
+.Dd June 6, 2011
 .Dt SU 1
 .Os
 .Sh NAME
@@ -193,16 +193,22 @@ PAM configuration for
 .Sh EXAMPLES
 .Bl -tag -width 5n -compact
 .It Li "su -m man -c catman"
-Runs the command
-.Li catman
-as user
-.Li man .
+Starts a shell as user
+.Li man ,
+and runs the command
+.Li catman .
 You will be asked for man's password unless your real UID is 0.
 Note that the
 .Fl m
 option is required since user
 .Dq man
 does not have a valid shell by default.
+In this example,
+.Fl c
+is passed to the shell of the user
+.Dq man ,
+and is not interpreted as an argument to
+.Nm .
 .It Li "su -m man -c 'catman /usr/share/man /usr/local/man'"
 Same as above, but the target command consists of more than a
 single word and hence is quoted for use with the
diff --git a/usr.sbin/bluetooth/ath3kfw/Makefile b/usr.sbin/bluetooth/ath3kfw/Makefile
index 0ff010f3394f..373655b210dd 100644
--- a/usr.sbin/bluetooth/ath3kfw/Makefile
+++ b/usr.sbin/bluetooth/ath3kfw/Makefile
@@ -2,7 +2,6 @@
 
 PROG=		ath3kfw
 MAN=		ath3kfw.8
-WARNS?=		6
 DPADD+=		${LIBUSB}
 LDADD+=		-lusb
 
diff --git a/usr.sbin/bsdinstall/scripts/netconfig_ipv6 b/usr.sbin/bsdinstall/scripts/netconfig_ipv6
index 70bd203e19d7..8bff816f792b 100755
--- a/usr.sbin/bsdinstall/scripts/netconfig_ipv6
+++ b/usr.sbin/bsdinstall/scripts/netconfig_ipv6
@@ -33,7 +33,6 @@
 
 #
 # TODO: 
-# - Add -R /sbin/resolvconf to rtsol once support is in tree.
 # - Add DHCPv6 support once FreeBSD ships with it.
 # 
 
diff --git a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
index 9b8628bfc509..6a53d46fb297 100644
--- a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
+++ b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile
@@ -6,8 +6,6 @@ MOD=	wlan
 SRCS=	wlan_snmp.c wlan_sys.c
 CFLAGS+= -DSNMPTREE_TYPES
 
-WARNS=            6
-
 XSYM=	begemotWlan
 
 BMIBS=	BEGEMOT-WIRELESS-MIB.txt
diff --git a/usr.sbin/lastlogin/lastlogin.8 b/usr.sbin/lastlogin/lastlogin.8
index 063016399e77..fdbc871dcf17 100644
--- a/usr.sbin/lastlogin/lastlogin.8
+++ b/usr.sbin/lastlogin/lastlogin.8
@@ -31,7 +31,7 @@
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
-.Dd January 11, 1996
+.Dd June 6, 2011
 .Dt LASTLOGIN 8
 .Os
 .Sh NAME
@@ -39,6 +39,8 @@
 .Nd indicate last login time of users
 .Sh SYNOPSIS
 .Nm
+.Op Fl f Ar file
+.Op Fl rt
 .Op Ar user ...
 .Sh DESCRIPTION
 The
@@ -54,8 +56,8 @@ If more than one
 .Ar user
 is given, the session information for each user is printed in
 the order given on the command line.
-Otherwise, information
-for all users is printed, sorted by name.
+Otherwise, information for all users is printed.
+By default, the entries are sorted by user name.
 .Pp
 The
 .Nm
@@ -63,6 +65,18 @@ utility differs from
 .Xr last 1
 in that it only prints information regarding the very last login session.
 The last login database is never turned over or deleted in standard usage.
+.Pp
+The following options are available:
+.Bl -tag -width indent
+.It Fl f Ar file
+Open last login database
+.Ar file
+instead of the system-wide database.
+.It Fl r
+Print the entries in reverse sorted order.
+.It Fl t
+Sort the elements by last login time, instead of user name.
+.El
 .Sh FILES
 .Bl -tag -width /var/log/utx.lastlogin -compact
 .It Pa /var/log/utx.lastlogin
diff --git a/usr.sbin/lastlogin/lastlogin.c b/usr.sbin/lastlogin/lastlogin.c
index 4c085475ad0b..2f8dd78331e2 100644
--- a/usr.sbin/lastlogin/lastlogin.c
+++ b/usr.sbin/lastlogin/lastlogin.c
@@ -47,30 +47,59 @@ __RCSID("$NetBSD: lastlogin.c,v 1.4 1998/02/03 04:45:35 perry Exp $");
 	int	main(int, char **);
 static	void	output(struct utmpx *);
 static	void	usage(void);
+static int	utcmp_user(const void *, const void *);
+
+static int	order = 1;
+static const char *file = NULL;
+static int	(*utcmp)(const void *, const void *) = utcmp_user;
 
 static int
-utcmp(const void *u1, const void *u2)
+utcmp_user(const void *u1, const void *u2)
 {
 
-	return (strcmp(((const struct utmpx *)u1)->ut_user,
+	return (order * strcmp(((const struct utmpx *)u1)->ut_user,
 	    ((const struct utmpx *)u2)->ut_user));
 }
 
+static int
+utcmp_time(const void *u1, const void *u2)
+{
+	time_t t1, t2;
+
+	t1 = ((const struct utmpx *)u1)->ut_tv.tv_sec;
+	t2 = ((const struct utmpx *)u2)->ut_tv.tv_sec;
+	return (t1 < t2 ? order : t1 > t2 ? -order : 0);
+}
+
 int
 main(int argc, char *argv[])
 {
 	int	ch, i, ulistsize;
 	struct utmpx *u, *ulist;
 
-	while ((ch = getopt(argc, argv, "")) != -1) {
-		usage();
+	while ((ch = getopt(argc, argv, "f:rt")) != -1) {
+		switch (ch) {
+		case 'f':
+			file = optarg;
+			break;
+		case 'r':
+			order = -1;
+			break;
+		case 't':
+			utcmp = utcmp_time;
+			break;
+		default:
+			usage();
+		}
 	}
+	argc -= optind;
+	argv += optind;
 
-	/* Process usernames given on the command line. */
-	if (argc > 1) {
-		for (i = 1; i < argc; ++i) {
-			if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0)
-				errx(1, "failed to open lastlog database");
+	if (argc > 0) {
+		/* Process usernames given on the command line. */
+		for (i = 0; i < argc; i++) {
+			if (setutxdb(UTXDB_LASTLOGIN, file) != 0)
+				err(1, "failed to open lastlog database");
 			if ((u = getutxuser(argv[i])) == NULL) {
 				warnx("user '%s' not found", argv[i]);
 				continue;
@@ -78,11 +107,10 @@ main(int argc, char *argv[])
 			output(u);
 			endutxent();
 		}
-	}
-	/* Read all lastlog entries, looking for active ones */
-	else {
-		if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0)
-			errx(1, "failed to open lastlog database");
+	} else {
+		/* Read all lastlog entries, looking for active ones. */
+		if (setutxdb(UTXDB_LASTLOGIN, file) != 0)
+			err(1, "failed to open lastlog database");
 		ulist = NULL;
 		ulistsize = 0;
 		while ((u = getutxent()) != NULL) {
@@ -119,6 +147,6 @@ output(struct utmpx *u)
 static void
 usage(void)
 {
-	fprintf(stderr, "usage: lastlogin [user ...]\n");
+	fprintf(stderr, "usage: lastlogin [-f file] [-rt] [user ...]\n");
 	exit(1);
 }
diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c
index cce1e0ef14ac..80d4bd7c3dde 100644
--- a/usr.sbin/pmccontrol/pmccontrol.c
+++ b/usr.sbin/pmccontrol/pmccontrol.c
@@ -28,8 +28,9 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include <sys/types.h>
+#include <sys/param.h>
 #include <sys/queue.h>
+#include <sys/cpuset.h>
 #include <sys/sysctl.h>
 
 #include <assert.h>
@@ -133,26 +134,32 @@ pmcc_init_debug(void)
 static int
 pmcc_do_enable_disable(struct pmcc_op_list *op_list)
 {
+	long cpusetsize;
 	int c, error, i, j, ncpu, npmc, t;
-	cpumask_t haltedcpus, cpumask;
+	cpuset_t haltedcpus, cpumask;
 	struct pmcc_op *np;
 	unsigned char *map;
 	unsigned char op;
 	int cpu, pmc;
-	size_t dummy;
+	size_t setsize;
 
 	if ((ncpu = pmc_ncpu()) < 0)
 		err(EX_OSERR, "Unable to determine the number of cpus");
 
 	/* Determine the set of active CPUs. */
-	cpumask = (1 << ncpu) - 1;
-	dummy = sizeof(int);
-	haltedcpus = (cpumask_t) 0;
-	if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus,
-	    &dummy, NULL, 0) < 0)
+	cpusetsize = sysconf(_SC_CPUSET_SIZE);
+	if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) {
 		err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
 		    "halted");
-	cpumask &= ~haltedcpus;
+	}
+	CPU_ZERO(&haltedcpus);
+	setsize = (size_t)cpusetsize;
+	if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus,
+	    &setsize, NULL, 0) < 0)
+		err(EX_OSERR, "ERROR: Cannot determine which CPUs are "
+		    "halted");
+	CPU_FILL(&cpumask);
+	CPU_NAND(&cpumask, &haltedcpus);
 
 	/* Determine the maximum number of PMCs in any CPU. */
 	npmc = 0;
@@ -200,7 +207,7 @@ pmcc_do_enable_disable(struct pmcc_op_list *op_list)
 
 		if (cpu == PMCC_CPU_ALL)
 			for (i = 0; i < ncpu; i++) {
-				if ((1 << i) & cpumask)
+				if (CPU_ISSET(i, &cpumask))
 					SET_PMCS(i, pmc, op);
 			}
 		else
diff --git a/usr.sbin/route6d/route6d.c b/usr.sbin/route6d/route6d.c
index 761deeb18dbf..4868829c56b2 100644
--- a/usr.sbin/route6d/route6d.c
+++ b/usr.sbin/route6d/route6d.c
@@ -106,7 +106,7 @@ static const char _rcsid[] = "$KAME: route6d.c,v 1.104 2003/10/31 00:30:20 itoju
 
 struct	ifc {			/* Configuration of an interface */
 	char	ifc_name[IFNAMSIZ];		/* if name */
-	struct	ifc *ifc_next;
+	TAILQ_ENTRY(ifc) ifc_next;
 	int	ifc_index;			/* if index */
 	int	ifc_mtu;			/* if mtu */
 	int	ifc_metric;			/* if metric */
@@ -120,7 +120,7 @@ struct	ifc {			/* Configuration of an interface */
 
 struct	ifac {			/* Adddress associated to an interface */ 
 	struct	ifc *ifa_conf;		/* back pointer */
-	struct	ifac *ifa_next;
+	TAILQ_ENTRY(ifac) ifa_next;
 	struct	in6_addr ifa_addr;	/* address */
 	struct	in6_addr ifa_raddr;	/* remote address, valid in p2p */
 	int	ifa_plen;		/* prefix length */
@@ -134,8 +134,10 @@ struct	iff {
 	struct	iff *iff_next;
 };
 
-struct	ifc *ifc;
-struct  iff *iff_head;
+TAILQ_HEAD(, ifc) ifc = 
+    TAILQ_HEAD_INITIALIZER(ifc);
+TAILQ_HEAD(, iff) iff_head =
+    TAILQ_HEAD_INITIALIZER(iff_head);
 int	nifc;		/* number of valid ifc's */
 struct	ifc **index2ifc;
 unsigned int	nindex2ifc;
@@ -166,7 +168,7 @@ struct	rip6 *ripbuf;	/* packet buffer for sending */
  */
 
 struct	riprt {
-	struct	riprt *rrt_next;	/* next destination */
+	TAILQ_ENTRY(riprt) rrt_next;	/* next destination */
 	struct	riprt *rrt_same;	/* same destination - future use */
 	struct	netinfo6 rrt_info;	/* network info */
 	struct	in6_addr rrt_gw;	/* gateway */
diff --git a/usr.sbin/rtadvd/advcap.c b/usr.sbin/rtadvd/advcap.c
index b0f5ee0f8629..7280f4097d66 100644
--- a/usr.sbin/rtadvd/advcap.c
+++ b/usr.sbin/rtadvd/advcap.c
@@ -81,7 +81,6 @@
 static	char *tbuf;
 static	int hopcount;	/* detect infinite loops in termcap, init 0 */
 
-static const char *remotefile;
 extern const char *conffile;
 
 int tgetent(char *, char *);
@@ -204,7 +203,7 @@ tnchktc(void)
 		write(STDERR_FILENO, "Infinite tc= loop\n", 18);
 		return (0);
 	}
-	if (getent(tcbuf, tcname, remotefile) != 1) {
+	if (getent(tcbuf, tcname, conffile) != 1) {
 		return (0);
 	}
 	for (q = tcbuf; *q++ != ':'; )
diff --git a/usr.sbin/rtadvd/config.c b/usr.sbin/rtadvd/config.c
index 4c870b9f0596..1b48868692ba 100644
--- a/usr.sbin/rtadvd/config.c
+++ b/usr.sbin/rtadvd/config.c
@@ -109,7 +109,7 @@ dname_labelenc(char *dst, const char *src)
 	/* Always need a 0-length label at the tail. */
 	*dst++ = '\0';
 
-	syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, dst - dst_origin);
+	syslog(LOG_DEBUG, "<%s> labellen = %td", __func__, dst - dst_origin);
 	return (dst - dst_origin);
 }
 
@@ -229,13 +229,7 @@ getconfig(int idx)
 		     __func__, intface);
 	}
 
-	rai = malloc(sizeof(*rai));
-	if (rai == NULL) {
-		syslog(LOG_INFO, "<%s> %s: can't allocate enough memory",
-		    __func__, intface);
-		exit(1);
-	}
-	memset(rai, 0, sizeof(*rai));
+	ELM_MALLOC(rai, exit(1));
 	TAILQ_INIT(&rai->rai_prefix);
 #ifdef ROUTEINFO
 	TAILQ_INIT(&rai->rai_route);
@@ -394,10 +388,7 @@ getconfig(int idx)
 
 		/* allocate memory to store prefix information */
 		ELM_MALLOC(pfx, exit(1));
-
-		/* link into chain */
-		TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next);
-		rai->rai_pfxs++;
+		pfx->pfx_rainfo = rai;
 		pfx->pfx_origin = PREFIX_FROM_CONFIG;
 
 		if (inet_pton(AF_INET6, addr, &pfx->pfx_prefix) != 1) {
@@ -481,6 +472,9 @@ getconfig(int idx)
 			pfx->pfx_pltimeexpire =
 			    now.tv_sec + pfx->pfx_preflifetime;
 		}
+		/* link into chain */
+		TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next);
+		rai->rai_pfxs++;
 	}
 	if (rai->rai_advifprefix && rai->rai_pfxs == 0)
 		get_prefix(rai);
diff --git a/usr.sbin/rtadvd/dump.c b/usr.sbin/rtadvd/dump.c
index f79319bd3e36..fac3fb24de81 100644
--- a/usr.sbin/rtadvd/dump.c
+++ b/usr.sbin/rtadvd/dump.c
@@ -310,7 +310,7 @@ dname_labeldec(char *dst, size_t dlen, const char *src)
 	    (src + len) <= src_last) {
 		if (dst != dst_origin)
 			*dst++ = '.';
-		syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, len);
+		syslog(LOG_DEBUG, "<%s> labellen = %zd", __func__, len);
 		memcpy(dst, src, len);
 		src += len;
 		dst += len;
diff --git a/usr.sbin/rtadvd/rtadvd.c b/usr.sbin/rtadvd/rtadvd.c
index 614d109f7596..6423e7bf97e4 100644
--- a/usr.sbin/rtadvd/rtadvd.c
+++ b/usr.sbin/rtadvd/rtadvd.c
@@ -789,7 +789,7 @@ rtadvd_input(void)
 #else
 	if ((size_t)i < sizeof(struct icmp6_hdr)) {
 		syslog(LOG_ERR,
-		    "<%s> packet size(%d) is too short",
+		    "<%s> packet size(%zd) is too short",
 		    __func__, i);
 		return;
 	}
@@ -827,7 +827,7 @@ rtadvd_input(void)
 		if ((size_t)i < sizeof(struct nd_router_solicit)) {
 			syslog(LOG_NOTICE,
 			    "<%s> RS from %s on %s does not have enough "
-			    "length (len = %d)",
+			    "length (len = %zd)",
 			    __func__,
 			    inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf,
 			    sizeof(ntopbuf)),
@@ -873,7 +873,7 @@ rtadvd_input(void)
 		if ((size_t)i < sizeof(struct nd_router_advert)) {
 			syslog(LOG_NOTICE,
 			    "<%s> RA from %s on %s does not have enough "
-			    "length (len = %d)",
+			    "length (len = %zd)",
 			    __func__,
 			    inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf,
 			    sizeof(ntopbuf)),
diff --git a/usr.sbin/rtsold/rtsol.c b/usr.sbin/rtsold/rtsol.c
index afa935e72da7..fe8bfaf5d08e 100644
--- a/usr.sbin/rtsold/rtsol.c
+++ b/usr.sbin/rtsold/rtsol.c
@@ -625,8 +625,10 @@ ra_opt_handler(struct ifinfo *ifi)
 
 	if (!TAILQ_EMPTY(&sm_rdnss_head))
 		CALL_SCRIPT(RESADD, &sm_rdnss_head);
+#if 0
 	else
 		CALL_SCRIPT(RESDEL, NULL);
+#endif
 
 ra_opt_handler_freeit:
 	/* Clear script message queue. */
diff --git a/usr.sbin/rtsold/rtsold.c b/usr.sbin/rtsold/rtsold.c
index a8b7ca727cfd..4413d55f7853 100644
--- a/usr.sbin/rtsold/rtsold.c
+++ b/usr.sbin/rtsold/rtsold.c
@@ -124,6 +124,7 @@ main(int argc, char **argv)
 	int maxfd;
 #endif
 	int rtsock;
+	char *argv0;
 
 #ifndef SMALL
 	/* rtsold */
@@ -134,6 +135,8 @@ main(int argc, char **argv)
 	fflag = 1;
 	once = 1;
 #endif
+	argv0 = argv[0];
+
 	while ((ch = getopt(argc, argv, opts)) != -1) {
 		switch (ch) {
 		case 'a':
diff --git a/usr.sbin/tcpdrop/tcpdrop.c b/usr.sbin/tcpdrop/tcpdrop.c
index 6aae85ccb4d1..dce6c6d4be9d 100644
--- a/usr.sbin/tcpdrop/tcpdrop.c
+++ b/usr.sbin/tcpdrop/tcpdrop.c
@@ -20,12 +20,13 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
+#include <sys/param.h>
 #include <sys/types.h>
 #include <sys/socket.h>
 #include <sys/socketvar.h>
 #include <sys/sysctl.h>
-#include <netinet/in.h>
 
+#include <netinet/in.h>
 #include <netinet/in_pcb.h>
 #define TCPSTATES
 #include <netinet/tcp_fsm.h>