diff --git a/UPDATING b/UPDATING index 1ff9e8ffb76d..c8c3582e81e2 100644 --- a/UPDATING +++ b/UPDATING @@ -22,6 +22,33 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 9.x IS SLOW: machines to maximize performance. (To disable malloc debugging, run ln -s aj /etc/malloc.conf.) +20110608: + The following sysctls and tunables are retired on x86 platforms: + machdep.hlt_cpus + machdep.hlt_logical_cpus + The following sysctl is retired: + machdep.hyperthreading_allowed + The sysctls were supposed to provide a way to dynamically offline and + online selected CPUs on x86 platforms, but the implementation has not + been reliable especially with SCHED_ULE scheduler. + machdep.hyperthreading_allowed tunable is still available to ignore + hyperthreading CPUs at OS level. + Individual CPUs can be disabled using hint.lapic.X.disabled tunable, + where X is an APIC ID of a CPU. Be advised, though, that disabling + CPUs in non-uniform fashion will result in non-uniform topology and + may lead to sub-optimal system performance with SCHED_ULE, which is + a default scheduler. + +20110607: + cpumask_t type is retired and cpuset_t is used in order to describe + a mask of CPUs. + +20110531: + Changes to ifconfig(8) for dynamic address family detection mandate + that you are running a kernel of 20110525 or later. Make sure to + follow the update procedure to boot a new kernel before installing + world. + 20110513: Support for sun4v architecture is officially dropped diff --git a/bin/sh/eval.c b/bin/sh/eval.c index 404de3312182..585f91ead32a 100644 --- a/bin/sh/eval.c +++ b/bin/sh/eval.c @@ -409,6 +409,7 @@ evalsubshell(union node *n, int flags) struct job *jp; int backgnd = (n->type == NBACKGND); + oexitstatus = exitstatus; expredir(n->nredir.redirect); if ((!backgnd && flags & EV_EXIT && !have_traps()) || forkshell(jp = makejob(n, 1), n, backgnd) == 0) { @@ -436,6 +437,7 @@ evalredir(union node *n, int flags) struct jmploc *savehandler; volatile int in_redirect = 1; + oexitstatus = exitstatus; expredir(n->nredir.redirect); savehandler = handler; if (setjmp(jmploc.loc)) { @@ -478,7 +480,6 @@ expredir(union node *n) for (redir = n ; redir ; redir = redir->nfile.next) { struct arglist fn; fn.lastp = &fn.list; - oexitstatus = exitstatus; switch (redir->type) { case NFROM: case NTO: diff --git a/contrib/groff/tmac/doc-common b/contrib/groff/tmac/doc-common index 26dcc562f4d7..20d0cab8c875 100644 --- a/contrib/groff/tmac/doc-common +++ b/contrib/groff/tmac/doc-common @@ -543,6 +543,7 @@ .ds doc-operating-system-FreeBSD-7.3 7.3 .ds doc-operating-system-FreeBSD-8.0 8.0 .ds doc-operating-system-FreeBSD-8.1 8.1 +.ds doc-operating-system-FreeBSD-8.2 8.2 . .ds doc-operating-system-Darwin-8.0.0 8.0.0 .ds doc-operating-system-Darwin-8.1.0 8.1.0 @@ -563,6 +564,17 @@ .ds doc-operating-system-Darwin-9.4.0 9.4.0 .ds doc-operating-system-Darwin-9.5.0 9.5.0 .ds doc-operating-system-Darwin-9.6.0 9.6.0 +.ds doc-operating-system-Darwin-9.7.0 9.7.0 +.ds doc-operating-system-Darwin-9.8.0 9.8.0 +.ds doc-operating-system-Darwin-10.6.0 10.6.0 +.ds doc-operating-system-Darwin-10.1.0 10.1.0 +.ds doc-operating-system-Darwin-10.2.0 10.2.0 +.ds doc-operating-system-Darwin-10.3.0 10.3.0 +.ds doc-operating-system-Darwin-10.4.0 10.4.0 +.ds doc-operating-system-Darwin-10.5.0 10.5.0 +.ds doc-operating-system-Darwin-10.6.0 10.6.0 +.ds doc-operating-system-Darwin-10.7.0 10.7.0 +.ds doc-operating-system-Darwin-11.0.0 11.0.0 . .ds doc-operating-system-DragonFly-1.0 1.0 .ds doc-operating-system-DragonFly-1.1 1.1 diff --git a/contrib/groff/tmac/doc-syms b/contrib/groff/tmac/doc-syms index d2a070d6d76e..0e862adb5e90 100644 --- a/contrib/groff/tmac/doc-syms +++ b/contrib/groff/tmac/doc-syms @@ -617,6 +617,8 @@ .\" POSIX Part 1: System API .ds doc-str-St--p1003.1 \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1 .as doc-str-St--p1003.1 " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq]) +.ds doc-str-St--p1003.1b \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1b +.as doc-str-St--p1003.1b " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq]) .ds doc-str-St--p1003.1-88 \*[doc-Tn-font-size]\%IEEE\*[doc-str-St] Std 1003.1-1988 .as doc-str-St--p1003.1-88 " (\*[Lq]\)\*[Px]\*[doc-str-St].1\*[Rq]) .ds doc-str-St--p1003.1-90 \*[doc-Tn-font-size]ISO/IEC\*[doc-str-St] 9945-1:1990 @@ -754,6 +756,7 @@ . .ds doc-str-Lb-libarm ARM Architecture Library (libarm, \-larm) .ds doc-str-Lb-libarm32 ARM32 Architecture Library (libarm32, \-larm32) +.ds doc-str-Lb-libbsm Basic Security Module Library (libbsm, \-lbsm) .ds doc-str-Lb-libc Standard C\~Library (libc, \-lc) .ds doc-str-Lb-libcdk Curses Development Kit Library (libcdk, \-lcdk) .ds doc-str-Lb-libcompat Compatibility Library (libcompat, \-lcompat) @@ -779,6 +782,7 @@ .ds doc-str-Lb-libpthread \*[Px] \*[doc-str-Lb]Threads Library (libpthread, \-lpthread) .ds doc-str-Lb-libresolv DNS Resolver Library (libresolv, \-lresolv) .ds doc-str-Lb-librt \*[Px] \*[doc-str-Lb]Real-time Library (librt, \-lrt) +.ds doc-str-Lb-libSystem System Library (libSystem, \-lSystem) .ds doc-str-Lb-libtermcap Termcap Access Library (libtermcap, \-ltermcap) .ds doc-str-Lb-libusbhid USB Human Interface Devices Library (libusbhid, \-lusbhid) .ds doc-str-Lb-libutil System Utilities Library (libutil, \-lutil) diff --git a/contrib/groff/tmac/doc.tmac b/contrib/groff/tmac/doc.tmac index 7bdbf325927f..c1c32dc21919 100644 --- a/contrib/groff/tmac/doc.tmac +++ b/contrib/groff/tmac/doc.tmac @@ -1197,8 +1197,14 @@ . if !\n[doc-arg-limit] \ . ds doc-macro-name Aq . -. ds doc-quote-left < -. ds doc-quote-right > +. ie "\*[doc-macro-name]"An" \{\ +. ds doc-quote-left < +. ds doc-quote-right > +. \} +. el \{\ +. ds doc-quote-left \[la] +. ds doc-quote-right \[ra] +. \} . . doc-enclose-string \$@ .. @@ -1527,7 +1533,10 @@ . if !\n[doc-arg-limit] \ . ds doc-macro-name Ao . -. ds doc-quote-left \[la] +. ie "\*[doc-macro-name]"An" \ +. ds doc-quote-left < +. el \ +. ds doc-quote-left \[la] . . doc-enclose-open \$@ .. @@ -1546,7 +1555,10 @@ . if !\n[doc-arg-limit] \ . ds doc-macro-name Ac . -. ds doc-quote-right \[ra] +. ie "\*[doc-macro-name]"An" \ +. ds doc-quote-right > +. el \ +. ds doc-quote-right \[ra] . . doc-enclose-close \$@ .. diff --git a/etc/network.subr b/etc/network.subr index 64fb0fe65bee..ce71b786f307 100644 --- a/etc/network.subr +++ b/etc/network.subr @@ -100,25 +100,19 @@ ifconfig_up() # inet6 specific if afexists inet6; then - if ipv6if $1; then - if checkyesno ipv6_gateway_enable; then - _ipv6_opts="-accept_rtadv" - fi - else - if checkyesno ipv6_activate_all_interfaces; then - _ipv6_opts="-ifdisabled" - else - _ipv6_opts="ifdisabled" - fi - - # backward compatibility: $ipv6_enable - case $ipv6_enable in - [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1) - _ipv6_opts="${_ipv6_opts} accept_rtadv" - ;; - esac + if checkyesno ipv6_activate_all_interfaces; then + _ipv6_opts="-ifdisabled" + elif [ "$1" != "lo0" ]; then + _ipv6_opts="ifdisabled" fi + # backward compatibility: $ipv6_enable + case $ipv6_enable in + [Yy][Ee][Ss]|[Tt][Rr][Uu][Ee]|[Oo][Nn]|1) + _ipv6_opts="${_ipv6_opts} accept_rtadv" + ;; + esac + if [ -n "${_ipv6_opts}" ]; then ifconfig $1 inet6 ${_ipv6_opts} fi diff --git a/gnu/usr.bin/gdb/kgdb/kthr.c b/gnu/usr.bin/gdb/kgdb/kthr.c index 5036c9c9aaa9..461f408a73d9 100644 --- a/gnu/usr.bin/gdb/kgdb/kthr.c +++ b/gnu/usr.bin/gdb/kgdb/kthr.c @@ -28,6 +28,7 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include @@ -37,6 +38,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -48,7 +50,7 @@ static CORE_ADDR dumppcb; static int dumptid; static CORE_ADDR stoppcbs; -static __cpumask_t stopped_cpus; +static cpuset_t stopped_cpus; static struct kthr *first; struct kthr *curkthr; @@ -76,6 +78,7 @@ kgdb_thr_init(void) { struct proc p; struct thread td; + long cpusetsize; struct kthr *kt; CORE_ADDR addr; uintptr_t paddr; @@ -102,10 +105,11 @@ kgdb_thr_init(void) dumptid = -1; addr = kgdb_lookup("stopped_cpus"); - if (addr != 0) - kvm_read(kvm, addr, &stopped_cpus, sizeof(stopped_cpus)); - else - stopped_cpus = 0; + CPU_ZERO(&stopped_cpus); + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize != -1 && (u_long)cpusetsize <= sizeof(cpuset_t) && + addr != 0) + kvm_read(kvm, addr, &stopped_cpus, cpusetsize); stoppcbs = kgdb_lookup("stoppcbs"); @@ -126,8 +130,8 @@ kgdb_thr_init(void) kt->kaddr = addr; if (td.td_tid == dumptid) kt->pcb = dumppcb; - else if (td.td_state == TDS_RUNNING && ((1 << td.td_oncpu) & stopped_cpus) - && stoppcbs != 0) + else if (td.td_state == TDS_RUNNING && stoppcbs != 0 && + CPU_ISSET(td.td_oncpu, &stopped_cpus)) kt->pcb = (uintptr_t) stoppcbs + sizeof(struct pcb) * td.td_oncpu; else kt->pcb = (uintptr_t)td.td_pcb; diff --git a/gnu/usr.bin/groff/tmac/mdoc.local b/gnu/usr.bin/groff/tmac/mdoc.local index d46f5db54f3b..befc87d3f6a6 100644 --- a/gnu/usr.bin/groff/tmac/mdoc.local +++ b/gnu/usr.bin/groff/tmac/mdoc.local @@ -34,7 +34,6 @@ .\" FreeBSD .Lb values .ds doc-str-Lb-libarchive Streaming Archive Library (libarchive, \-larchive) .ds doc-str-Lb-libbluetooth Bluetooth User Library (libbluetooth, \-lbluetooth) -.ds doc-str-Lb-libbsm Basic Security Module User Library (libbsm, \-lbsm) .ds doc-str-Lb-libc_r Reentrant C\~Library (libc_r, \-lc_r) .ds doc-str-Lb-libcalendar Calendar Arithmetic Library (libcalendar, \-lcalendar) .ds doc-str-Lb-libcam Common Access Method User Library (libcam, \-lcam) @@ -75,7 +74,7 @@ . .\" FreeBSD releases not found in doc-common .ds doc-operating-system-FreeBSD-7.4 7.4 -.ds doc-operating-system-FreeBSD-8.2 8.2 +.ds doc-operating-system-FreeBSD-8.3 8.3 .ds doc-operating-system-FreeBSD-9.0 9.0 . .\" Definitions not (yet) in doc-syms diff --git a/lib/libiconv/Makefile b/lib/libiconv/Makefile index 078771e9b05b..71c288573256 100644 --- a/lib/libiconv/Makefile +++ b/lib/libiconv/Makefile @@ -19,7 +19,6 @@ SRCS= citrus_bcs.c citrus_bcs_strtol.c citrus_bcs_strtoul.c \ citrus_module.c citrus_none.c citrus_pivot_factory.c \ citrus_prop.c citrus_stdenc.c iconv.c -WARNS?= 6 CFLAGS+= --param max-inline-insns-single=128 -I ${.CURDIR}/../../include -I${.CURDIR}/../libc/include .include diff --git a/lib/libkvm/kvm_pcpu.c b/lib/libkvm/kvm_pcpu.c index fd09fc88fb1c..bc73baff19cc 100644 --- a/lib/libkvm/kvm_pcpu.c +++ b/lib/libkvm/kvm_pcpu.c @@ -39,11 +39,13 @@ __FBSDID("$FreeBSD$"); #include +#include #include #include #include #include #include +#include #include "kvm_private.h" @@ -118,6 +120,9 @@ _kvm_pcpu_clear(void) void * kvm_getpcpu(kvm_t *kd, int cpu) { + long kcpusetsize; + ssize_t nbytes; + uintptr_t readptr; char *buf; if (kd == NULL) { @@ -125,6 +130,10 @@ kvm_getpcpu(kvm_t *kd, int cpu) return (NULL); } + kcpusetsize = sysconf(_SC_CPUSET_SIZE); + if (kcpusetsize == -1 || (u_long)kcpusetsize > sizeof(cpuset_t)) + return ((void *)-1); + if (maxcpu == 0) if (_kvm_pcpu_init(kd) < 0) return ((void *)-1); @@ -137,8 +146,26 @@ kvm_getpcpu(kvm_t *kd, int cpu) _kvm_err(kd, kd->program, "out of memory"); return ((void *)-1); } - if (kvm_read(kd, (uintptr_t)pcpu_data[cpu], buf, sizeof(struct pcpu)) != - sizeof(struct pcpu)) { + nbytes = sizeof(struct pcpu) - 2 * kcpusetsize; + readptr = (uintptr_t)pcpu_data[cpu]; + if (kvm_read(kd, readptr, buf, nbytes) != nbytes) { + _kvm_err(kd, kd->program, "unable to read per-CPU data"); + free(buf); + return ((void *)-1); + } + + /* Fetch the valid cpuset_t objects. */ + CPU_ZERO((cpuset_t *)(buf + nbytes)); + CPU_ZERO((cpuset_t *)(buf + nbytes + sizeof(cpuset_t))); + readptr += nbytes; + if (kvm_read(kd, readptr, buf + nbytes, kcpusetsize) != kcpusetsize) { + _kvm_err(kd, kd->program, "unable to read per-CPU data"); + free(buf); + return ((void *)-1); + } + readptr += kcpusetsize; + if (kvm_read(kd, readptr, buf + nbytes + sizeof(cpuset_t), + kcpusetsize) != kcpusetsize) { _kvm_err(kd, kd->program, "unable to read per-CPU data"); free(buf); return ((void *)-1); diff --git a/lib/libmemstat/memstat_uma.c b/lib/libmemstat/memstat_uma.c index 4aae61a9bb05..485a4f279906 100644 --- a/lib/libmemstat/memstat_uma.c +++ b/lib/libmemstat/memstat_uma.c @@ -27,6 +27,7 @@ */ #include +#include #include #define LIBMEMSTAT /* Cause vm_page.h not to include opt_vmpage.h */ @@ -44,6 +45,7 @@ #include #include #include +#include #include "memstat.h" #include "memstat_internal.h" @@ -313,7 +315,8 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) struct uma_keg *kzp, kz; int hint_dontsearch, i, mp_maxid, ret; char name[MEMTYPE_MAXNAME]; - __cpumask_t all_cpus; + cpuset_t all_cpus; + long cpusetsize; kvm_t *kvm; kvm = (kvm_t *)kvm_handle; @@ -337,7 +340,13 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) list->mtl_error = ret; return (-1); } - ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, sizeof(all_cpus), 0); + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { + list->mtl_error = MEMSTAT_ERROR_KVM_NOSYMBOL; + return (-1); + } + CPU_ZERO(&all_cpus); + ret = kread_symbol(kvm, X_ALL_CPUS, &all_cpus, cpusetsize, 0); if (ret != 0) { list->mtl_error = ret; return (-1); @@ -407,7 +416,7 @@ memstat_kvm_uma(struct memory_type_list *list, void *kvm_handle) if (kz.uk_flags & UMA_ZFLAG_INTERNAL) goto skip_percpu; for (i = 0; i < mp_maxid + 1; i++) { - if ((all_cpus & (1 << i)) == 0) + if (!CPU_ISSET(i, &all_cpus)) continue; ucp = &ucp_array[i]; mtp->mt_numallocs += ucp->uc_allocs; diff --git a/libexec/comsat/comsat.c b/libexec/comsat/comsat.c index d0ff7a43dbd6..2a0fd3c764a9 100644 --- a/libexec/comsat/comsat.c +++ b/libexec/comsat/comsat.c @@ -180,7 +180,7 @@ notify(struct utmpx *utp, char file[], off_t offset, int folder) dsyslog(LOG_DEBUG, "%s: wrong mode on %s", utp->ut_user, tty); return; } - dsyslog(LOG_DEBUG, "notify %s on %s\n", utp->ut_user, tty); + dsyslog(LOG_DEBUG, "notify %s on %s", utp->ut_user, tty); switch (fork()) { case -1: syslog(LOG_NOTICE, "fork failed (%m)"); diff --git a/release/ia64/mkisoimages.sh b/release/ia64/mkisoimages.sh index 33ba1927ad61..8709c1221773 100644 --- a/release/ia64/mkisoimages.sh +++ b/release/ia64/mkisoimages.sh @@ -48,28 +48,32 @@ EFIPART=efipart.sys if [ $bootable = yes ]; then EFISZ=65536 MNT=/mnt - dd if=/dev/zero of=$BASE/$EFIPART count=$EFISZ - md=`mdconfig -a -t vnode -f $BASE/$EFIPART` + dd if=/dev/zero of=$EFIPART count=$EFISZ + md=`mdconfig -a -t vnode -f $EFIPART` newfs_msdos -F 12 -S 512 -h 4 -o 0 -s $EFISZ -u 16 $md mount -t msdosfs /dev/$md $MNT mkdir -p $MNT/efi/boot $MNT/boot $MNT/boot/kernel cp -R $BASE/boot/defaults $MNT/boot cp $BASE/boot/kernel/kernel $MNT/boot/kernel - cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel + if [ -s $BASE/boot/kernel/ispfw.ko ]; then + cp $BASE/boot/kernel/ispfw.ko $MNT/boot/kernel + fi cp $BASE/boot/device.hints $MNT/boot cp $BASE/boot/loader.* $MNT/boot - cp $BASE/boot/mfsroot.gz $MNT/boot + if [ -s $BASE/boot/mfsroot.gz ]; then + cp $BASE/boot/mfsroot.gz $MNT/boot + fi cp $BASE/boot/support.4th $MNT/boot mv $MNT/boot/loader.efi $MNT/efi/boot/bootia64.efi umount $MNT mdconfig -d -u $md - BOOTOPTS="-b bootimage=i386;$EFIPART -o no-emul-boot" + BOOTOPTS="-o bootimage=i386;$EFIPART -o no-emul-boot" else BOOTOPTS="" fi -echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $1/etc/fstab +echo "/dev/iso9660/$LABEL / cd9660 ro 0 0" > $BASE/etc/fstab makefs -t cd9660 $BOOTOPTS -o rockridge -o label=$LABEL $NAME $BASE $* -rm -f $BASE/$EFIPART +rm -f $EFIPART rm $1/etc/fstab exit 0 diff --git a/sbin/geom/class/part/geom_part.c b/sbin/geom/class/part/geom_part.c index ae9f4b7b4516..55a055e1e21a 100644 --- a/sbin/geom/class/part/geom_part.c +++ b/sbin/geom/class/part/geom_part.c @@ -341,9 +341,10 @@ gpart_autofill_resize(struct gctl_req *req) errc(EXIT_FAILURE, error, "Invalid alignment param"); if (alignment == 0) errx(EXIT_FAILURE, "Invalid alignment param"); + } else { lba = pp->lg_stripesize / pp->lg_sectorsize; if (lba > 0) - alignment = g_lcm(lba, alignment); + alignment = lba; } error = gctl_delete_param(req, "alignment"); if (error) @@ -491,13 +492,9 @@ gpart_autofill(struct gctl_req *req) if (has_size && has_start && !has_alignment) goto done; - /* - * If stripesize is not zero, then recalculate alignment value. - * Use LCM from stripesize and user specified alignment. - */ len = pp->lg_stripesize / pp->lg_sectorsize; - if (len > 0 ) - alignment = g_lcm(len, alignment); + if (len > 0 && !has_alignment) + alignment = len; /* Adjust parameters to stripeoffset */ offset = pp->lg_stripeoffset / pp->lg_sectorsize; diff --git a/sbin/geom/class/part/gpart.8 b/sbin/geom/class/part/gpart.8 index 940620c9b5a3..4365d6b8bcfa 100644 --- a/sbin/geom/class/part/gpart.8 +++ b/sbin/geom/class/part/gpart.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 30, 2011 +.Dd June 6, 2011 .Dt GPART 8 .Os .Sh NAME @@ -530,16 +530,17 @@ about its use. .El .\" .Sh PARTITION TYPES +Partition types are identified on disk by particular strings or magic +values. The .Nm -utility uses symbolic names for common partition types to avoid that the -user needs to know what the partitioning scheme in question is and what -the actual number or identification needs to be used for a particular -type. +utility uses symbolic names for common partition types to avoid the +user needing to know these values or other details of the partitioning +scheme in question. The .Nm utility also allows the user to specify scheme-specific partition types -for partition types that do not have symbol names. +for partition types that do not have symbolic names. The symbolic names currently understood are: .Bl -tag -width ".Cm freebsd-vinum" .It Cm bios-boot @@ -740,30 +741,30 @@ action or reverted with the .Cm undo action. .Sh RECOVERING -The GEOM class PART supports recovering of partition tables only for GPT. +The GEOM PART class supports recovering of partition tables only for GPT. The GUID partition table has a primary and secondary (backup) copy of -metadata for redundance. -They are stored in the begining and in the end of device respectively. -Therefore it is acceptable to have some corruptions in the metadata that -are not fatal to work with GPT. -When kernel detects corrupt metadata it marks this table as corrupt and -reports about corruption. -Any changes in corrupt table are prohibited except +metadata for redundance, these are stored at the begining and the end +of the device respectively. +As a result of having two copies, it is acceptable to have some corruption +within the metadata that is not fatal to the working of GPT. +When the kernel detects corrupt metadata it marks this table as corrupt and +reports the corruption. +Any operations on corrupt tables are prohibited except for .Cm destroy and .Cm recover . .Pp -In case when only first sector is corrupt kernel can not detect GPT even -if partition table is not corrupt. -You can write protective MBR with +If the first sector of a provider is corrupt, the kernel can not detect GPT +even if partition table itself is not corrupt. +You can rewrite the protective MBR using the .Xr dd 1 -command to restore ability of GPT detection. -The copy of protective MBR is usually located in the +command, to restore the ability to detect the GPT. +The copy of the protective MBR is usually located in the .Pa /boot/pmbr file. .Pp -In case when some of metadata is corrupt you will get to know about this -from kernel's messages like these: +If one GPT header appears to be corrupt but the other copy remains intact, +the kernel will log the following: .Bd -literal -offset indent GEOM: provider: the primary GPT table is corrupt or invalid. GEOM: provider: using the secondary instead -- recovery strongly advised. @@ -777,32 +778,31 @@ GEOM: provider: using the primary only -- recovery suggested. .Pp Also .Nm -commands like +commands such as .Cm show , status and .Cm list -will report about corrupt table. +will report about corrupt tables. .Pp -In case when the size of device has changed (e.g.\& volume expansion) the -secondary GPT header will become located not in the last sector. +If the size of the device has changed (e.g.\& volume expansion) the +secondary GPT header will no longer be located in the last sector. This is not a metadata corruption, but it is dangerous because any -corruption of the primary GPT will lead to lost of partition table. -Kernel reports about this problem with message: +corruption of the primary GPT will lead to loss of partition table. +This problem is reported by the kernel with the message: .Bd -literal -offset indent GEOM: provider: the secondary GPT header is not in the last LBA. .Ed .Pp -A corrupt table can be recovered with +This situation can be recovered with the .Cm recover command. -This command does reconstruction of corrupt metadata using -known valid metadata. -Also it can relocate secondary GPT to the end of device. +This command reconstructs the corrupt metadata using known valid +metadata and relocates the secondary GPT to the end of the device. .Pp .Em NOTE : -The GEOM class PART can detect the same partition table on different GEOM -providers and some of them will be marked as corrupt. -Be careful when choosing a provider for recovering. +The GEOM PART class can detect the same partition table visible through +different GEOM providers, and some of them will be marked as corrupt. +Be careful when choosing a provider for recovery. If you choose incorrectly you can destroy the metadata of another GEOM class, e.g.\& GEOM MIRROR or GEOM LABEL. .Sh SYSCTL VARIABLES @@ -815,11 +815,11 @@ The default value is shown next to each variable. .Bl -tag -width indent .It Va kern.geom.part.check_integrity : No 1 This variable controls the behaviour of metadata integrity checks. -When integrity checks are enabled +When integrity checks are enabled, the .Nm PART -GEOM class verifies all generic partition parameters that it gets from the +GEOM class verifies all generic partition parameters obtained from the disk metadata. -If some inconsistency is detected, partition table will be +If some inconsistency is detected, the partition table will be rejected with a diagnostic message: .Sy "GEOM_PART: Integrity check failed (provider, scheme)" . .El diff --git a/sbin/geom/class/sched/Makefile b/sbin/geom/class/sched/Makefile index a6ccd584e217..6f54d3f36f9f 100644 --- a/sbin/geom/class/sched/Makefile +++ b/sbin/geom/class/sched/Makefile @@ -5,6 +5,4 @@ GEOM_CLASS= sched -WARNS?= 6 - .include diff --git a/sbin/ifconfig/af_inet6.c b/sbin/ifconfig/af_inet6.c index 7fdca0dd25ab..585be0500923 100644 --- a/sbin/ifconfig/af_inet6.c +++ b/sbin/ifconfig/af_inet6.c @@ -499,8 +499,8 @@ static struct cmd inet6_cmds[] = { DEF_CMD("-autoconf", -IN6_IFF_AUTOCONF, setip6flags), DEF_CMD("accept_rtadv", ND6_IFF_ACCEPT_RTADV, setnd6flags), DEF_CMD("-accept_rtadv",-ND6_IFF_ACCEPT_RTADV, setnd6flags), - DEF_CMD("defroute_rtadv",ND6_IFF_DEFROUTE_RTADV,setnd6flags), - DEF_CMD("-defroute_rtadv",-ND6_IFF_DEFROUTE_RTADV,setnd6flags), + DEF_CMD("no_radr", ND6_IFF_NO_RADR, setnd6flags), + DEF_CMD("-no_radr", -ND6_IFF_NO_RADR, setnd6flags), DEF_CMD("defaultif", 1, setnd6defif), DEF_CMD("-defaultif", -1, setnd6defif), DEF_CMD("ifdisabled", ND6_IFF_IFDISABLED, setnd6flags), diff --git a/sbin/ifconfig/af_nd6.c b/sbin/ifconfig/af_nd6.c index 273e8ff985d8..eed00eaf4044 100644 --- a/sbin/ifconfig/af_nd6.c +++ b/sbin/ifconfig/af_nd6.c @@ -58,7 +58,7 @@ static const char rcsid[] = #define MAX_SYSCTL_TRY 5 #define ND6BITS "\020\001PERFORMNUD\002ACCEPT_RTADV\003PREFER_SOURCE" \ "\004IFDISABLED\005DONT_SET_IFROUTE\006AUTO_LINKLOCAL" \ - "\007DEFROUTE_RTADV\020DEFAULTIF" + "\007NO_RADR\020DEFAULTIF" static int isnd6defif(int); void setnd6flags(const char *, int, int, const struct afswtch *); @@ -159,7 +159,6 @@ nd6_status(int s) } isdefif = isnd6defif(s6); close(s6); - if (nd.ndi.flags == 0 && !isdefif) return; printb("\tnd6 options", diff --git a/sbin/ipfw/main.c b/sbin/ipfw/main.c index fb3f3fbfdfb3..109b62b7de43 100644 --- a/sbin/ipfw/main.c +++ b/sbin/ipfw/main.c @@ -356,6 +356,7 @@ ipfw_main(int oldac, char **oldav) */ co.do_nat = 0; co.do_pipe = 0; + co.use_set = 0; if (!strncmp(*av, "nat", strlen(*av))) co.do_nat = 1; else if (!strncmp(*av, "pipe", strlen(*av))) @@ -444,7 +445,7 @@ static void ipfw_readfile(int ac, char *av[]) { #define MAX_ARGS 32 - char buf[BUFSIZ]; + char buf[4096]; char *progname = av[0]; /* original program name */ const char *cmd = NULL; /* preprocessor name, if any */ const char *filename = av[ac-1]; /* file to read */ @@ -552,7 +553,7 @@ ipfw_readfile(int ac, char *av[]) } } - while (fgets(buf, BUFSIZ, f)) { /* read commands */ + while (fgets(buf, sizeof(buf), f)) { /* read commands */ char linename[20]; char *args[2]; diff --git a/sbin/mount/mount.8 b/sbin/mount/mount.8 index 2140b3724b78..fdfd75c031a2 100644 --- a/sbin/mount/mount.8 +++ b/sbin/mount/mount.8 @@ -28,7 +28,7 @@ .\" @(#)mount.8 8.8 (Berkeley) 6/16/94 .\" $FreeBSD$ .\" -.Dd April 28, 2011 +.Dd June 6, 2011 .Dt MOUNT 8 .Os .Sh NAME @@ -348,7 +348,6 @@ option) may be passed as a comma separated list; these options are distinguished by a leading .Dq \&- (dash). -Options that take a value are specified using the syntax -option=value. For example, the .Nm command: @@ -363,6 +362,16 @@ to execute the equivalent of: /sbin/mount_cd9660 -e /dev/cd0 /cdrom .Ed .Pp +Options that take a value are specified using the -option=value syntax: +.Bd -literal -offset indent +mount -t msdosfs -o -u=fred,-g=wheel /dev/da0s1 /mnt +.Ed +.Pp +is equivalent to +.Bd -literal -offset indent +/sbin/mount_msdosfs -u fred -g wheel /dev/da0s1 /mnt +.Ed +.Pp Additional options specific to file system types which are not internally known (see the description of the diff --git a/sbin/mount/mount.c b/sbin/mount/mount.c index acded1c24379..2229419bc2ce 100644 --- a/sbin/mount/mount.c +++ b/sbin/mount/mount.c @@ -243,7 +243,7 @@ main(int argc, char *argv[]) const char *mntfromname, **vfslist, *vfstype; struct fstab *fs; struct statfs *mntbuf; - int all, ch, i, init_flags, late, mntsize, rval, have_fstab, ro; + int all, ch, i, init_flags, late, failok, mntsize, rval, have_fstab, ro; char *cp, *ep, *options; all = init_flags = late = 0; @@ -328,6 +328,10 @@ main(int argc, char *argv[]) continue; if (hasopt(fs->fs_mntops, "late") && !late) continue; + if (hasopt(fs->fs_mntops, "failok")) + failok = 1; + else + failok = 0; if (!(init_flags & MNT_UPDATE) && ismounted(fs, mntbuf, mntsize)) continue; @@ -335,7 +339,7 @@ main(int argc, char *argv[]) mntbuf->f_flags); if (mountfs(fs->fs_vfstype, fs->fs_spec, fs->fs_file, init_flags, options, - fs->fs_mntops)) + fs->fs_mntops) && !failok) rval = 1; } } else if (fstab_style) { @@ -717,6 +721,14 @@ mangle(char *options, struct cpa *a) * before mountd starts. */ continue; + } else if (strcmp(p, "failok") == 0) { + /* + * "failok" is used to prevent certain file + * systems from being causing the system to + * drop into single user mode in the boot + * cycle, and is not a real mount option. + */ + continue; } else if (strncmp(p, "mountprog", 9) == 0) { /* * "mountprog" is used to force the use of diff --git a/sbin/rcorder/rcorder.8 b/sbin/rcorder/rcorder.8 index b17b0a337ca1..a47f01306c33 100644 --- a/sbin/rcorder/rcorder.8 +++ b/sbin/rcorder/rcorder.8 @@ -31,7 +31,7 @@ .\" .\" $FreeBSD$ .\" -.Dd June 9, 2008 +.Dd June 6, 2011 .Dt RCORDER 8 .Os .Sh NAME @@ -89,6 +89,12 @@ and lines may appear, but all such lines must appear in a sequence without any intervening lines, as once a line that does not follow the format is reached, parsing stops. +Note that for historical reasons, +.Dq Li REQUIRES , +.Dq Li PROVIDES , +and +.Dq Li KEYWORDS +are also accepted in addition to the above. .Pp The options are as follows: .Bl -tag -width indent diff --git a/share/man/man4/amdsbwd.4 b/share/man/man4/amdsbwd.4 index 370cfa8220c0..cfb2f79624d8 100644 --- a/share/man/man4/amdsbwd.4 +++ b/share/man/man4/amdsbwd.4 @@ -25,12 +25,12 @@ .\" .\" $FreeBSD$ .\" -.Dd November 30, 2009 +.Dd June 7, 2011 .Dt AMDSBWD 4 .Os .Sh NAME .Nm amdsbwd -.Nd device driver for the AMD SB600/SB700/SB710/SB750 watchdog timer +.Nd device driver for the AMD SB600/SB7xx/SB8xx watchdog timers .Sh SYNOPSIS To compile this driver into the kernel, place the following line in your @@ -51,7 +51,7 @@ The driver provides .Xr watchdog 4 support for the watchdog timers present on -AMD SB600 and SB7xx south bridge chips. +AMD SB600, SB7xx and SB8xx southbridges. .Sh SEE ALSO .Xr watchdog 4 , .Xr watchdog 8 , @@ -61,12 +61,14 @@ AMD SB600 and SB7xx south bridge chips. The .Nm driver first appeared in -.Fx 9.0 . +.Fx 7.3 +and +.Fx 8.1 . .Sh AUTHORS .An -nosplit The .Nm driver was written by -.An Andiry Gapon Aq avg@FreeBSD.org . +.An Andriy Gapon Aq avg@FreeBSD.org . This manual page was written by -.An Andiry Gapon Aq avg@FreeBSD.org . +.An Andriy Gapon Aq avg@FreeBSD.org . diff --git a/share/man/man4/atkbd.4 b/share/man/man4/atkbd.4 index 0c486e2eefbc..73831c2bb538 100644 --- a/share/man/man4/atkbd.4 +++ b/share/man/man4/atkbd.4 @@ -26,7 +26,7 @@ .\" .\" $FreeBSD$ .\" -.Dd May 20, 2011 +.Dd January 29, 2008 .Dt ATKBD 4 .Os .Sh NAME @@ -176,11 +176,6 @@ When this option is given, the .Nm driver will not test the keyboard port during the probe routine. Some machines hang during boot when this test is performed. -.It bit 4 (PROBE_TYPEMATIC) -When this option is given, the -.Nm -driver will try to probe the keyboard typematic rate on boot. -Some machines hang during boot when this test is performed. .El .\".Sh FILES .Sh EXAMPLES diff --git a/share/man/man5/fstab.5 b/share/man/man5/fstab.5 index 1c6f17e40c51..adbf48928ef1 100644 --- a/share/man/man5/fstab.5 +++ b/share/man/man5/fstab.5 @@ -32,7 +32,7 @@ .\" @(#)fstab.5 8.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd November 23, 2008 +.Dd June 7, 2011 .Dt FSTAB 5 .Os .Sh NAME @@ -70,7 +70,8 @@ remote file system to be mounted. The second field, .Pq Fa fs_file , describes the mount point for the file system. -For swap partitions, this field should be specified as ``none''. +For swap partitions, this field should be specified as +.Dq none . .Pp The third field, .Pq Fa fs_vfstype , @@ -125,7 +126,11 @@ sync,noatime,-m=644,-M=755,-u=foo,-g=bar in the option field of .Nm . .Pp -If the options ``userquota'' and/or ``groupquota'' are specified, +If the options +.Dq userquota +and/or +.Dq groupquota +are specified, the file system is automatically processed by the .Xr quotacheck 8 command, and user and/or group disk quotas are enabled with @@ -147,7 +152,18 @@ this location can be specified as: userquota=/var/quotas/tmp.user .Ed .Pp -If the option ``noauto'' is specified, the file system will not be automatically +If the option +.Dq failok +is specified, +the system will ignore any error which happens during the mount of that filesystem, +which would otherwise cause the system to drop into single user mode. +This option is implemented by the +.Xr mount 8 +command and will not be passed to the kernel. +.Pp +If the option +.Dq noauto +is specified, the file system will not be automatically mounted at system startup. Note that, for network file systems of third party types @@ -170,13 +186,19 @@ field (it is not deleted from the field). If .Fa fs_type -is ``rw'' or ``ro'' then the file system whose name is given in the +is +.Dq rw +or +.Dq ro +then the file system whose name is given in the .Fa fs_file field is normally mounted read-write or read-only on the specified special file. If .Fa fs_type -is ``sw'' then the special file is made available as a piece of swap +is +.Dq sw +then the special file is made available as a piece of swap space by the .Xr swapon 8 command at the end of the system reboot procedure. @@ -187,7 +209,9 @@ and are unused. If .Fa fs_type -is specified as ``xx'' the entry is ignored. +is specified as +.Dq xx +the entry is ignored. This is useful to show disk partitions which are currently unused. .Pp The fifth field, diff --git a/share/misc/committers-ports.dot b/share/misc/committers-ports.dot index bfe9f4ed9fd4..d97abcc92978 100644 --- a/share/misc/committers-ports.dot +++ b/share/misc/committers-ports.dot @@ -97,6 +97,7 @@ itetcu [label="Ion-Mihai Tetcu\nitetcu@FreeBSD.org\n2006/06/07"] jacula [label="Giuseppe Pilichi\njacula@FreeBSD.org\n2010/04/05"] jadawin [label="Philippe Audeoud\njadawin@FreeBSD.org\n2008/03/02"] jkim [label="Jung-uk Kim\njkim@FreeBSD.org\n2007/09/12"] +jlaffaye [label="Julien Laffaye\njlaffaye@FreeBSD.org\n2011/06/06"] jmelo [label="Jean Milanez Melo\njmelo@FreeBSD.org\n2006/03/31"] joerg [label="Joerg Wunsch\njoerg@FreeBSD.org\n1994/08/22"] johans [label="Johan Selst\njohans@FreeBSD.org\n2006/04/01"] @@ -204,6 +205,8 @@ arved -> stefan asami -> obrien +bapt -> jlaffaye + beat -> decke beech -> glarkin @@ -401,6 +404,7 @@ tabthorpe -> dhn tabthorpe -> fluffy tabthorpe -> jacula tabthorpe -> jadawin +tabthorpe -> jlaffaye tabthorpe -> pgj tabthorpe -> rene diff --git a/sys/amd64/acpica/acpi_wakeup.c b/sys/amd64/acpica/acpi_wakeup.c index 57341c938f14..29e66c52a2f2 100644 --- a/sys/amd64/acpica/acpi_wakeup.c +++ b/sys/amd64/acpica/acpi_wakeup.c @@ -78,7 +78,7 @@ static void acpi_stop_beep(void *); #ifdef SMP static int acpi_wakeup_ap(struct acpi_softc *, int); -static void acpi_wakeup_cpus(struct acpi_softc *, cpumask_t); +static void acpi_wakeup_cpus(struct acpi_softc *, const cpuset_t *); #endif #define WAKECODE_VADDR(sc) ((sc)->acpi_wakeaddr + (3 * PAGE_SIZE)) @@ -173,7 +173,7 @@ acpi_wakeup_ap(struct acpi_softc *sc, int cpu) #define BIOS_WARM (0x0a) static void -acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) +acpi_wakeup_cpus(struct acpi_softc *sc, const cpuset_t *wakeup_cpus) { uint32_t mpbioswarmvec; int cpu; @@ -192,7 +192,7 @@ acpi_wakeup_cpus(struct acpi_softc *sc, cpumask_t wakeup_cpus) /* Wake up each AP. */ for (cpu = 1; cpu < mp_ncpus; cpu++) { - if ((wakeup_cpus & (1 << cpu)) == 0) + if (!CPU_ISSET(cpu, wakeup_cpus)) continue; if (acpi_wakeup_ap(sc, cpu) == 0) { /* restore the warmstart vector */ @@ -214,7 +214,7 @@ int acpi_sleep_machdep(struct acpi_softc *sc, int state) { #ifdef SMP - cpumask_t wakeup_cpus; + cpuset_t wakeup_cpus; #endif register_t cr3, rf; ACPI_STATUS status; @@ -244,10 +244,9 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) if (savectx(susppcbs[0])) { #ifdef SMP - if (wakeup_cpus != 0 && suspend_cpus(wakeup_cpus) == 0) { - device_printf(sc->acpi_dev, - "Failed to suspend APs: CPU mask = 0x%jx\n", - (uintmax_t)(wakeup_cpus & ~stopped_cpus)); + if (!CPU_EMPTY(&wakeup_cpus) && + suspend_cpus(wakeup_cpus) == 0) { + device_printf(sc->acpi_dev, "Failed to suspend APs\n"); goto out; } #endif @@ -282,8 +281,8 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) PCPU_SET(switchtime, 0); PCPU_SET(switchticks, ticks); #ifdef SMP - if (wakeup_cpus != 0) - acpi_wakeup_cpus(sc, wakeup_cpus); + if (!CPU_EMPTY(&wakeup_cpus)) + acpi_wakeup_cpus(sc, &wakeup_cpus); #endif acpi_resync_clock(sc); ret = 0; @@ -291,7 +290,7 @@ acpi_sleep_machdep(struct acpi_softc *sc, int state) out: #ifdef SMP - if (wakeup_cpus != 0) + if (!CPU_EMPTY(&wakeup_cpus)) restart_cpus(wakeup_cpus); #endif diff --git a/sys/amd64/amd64/intr_machdep.c b/sys/amd64/amd64/intr_machdep.c index 4edef81c01fa..3a8953153ff8 100644 --- a/sys/amd64/amd64/intr_machdep.c +++ b/sys/amd64/amd64/intr_machdep.c @@ -443,8 +443,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -466,7 +465,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -497,7 +496,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -510,6 +509,9 @@ intr_shuffle_irqs(void *arg __unused) struct intsrc *isrc; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c index 94b4037c8e20..f90ad03a5917 100644 --- a/sys/amd64/amd64/machdep.c +++ b/sys/amd64/amd64/machdep.c @@ -51,6 +51,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_perfmon.h" #include "opt_sched.h" #include "opt_kdtrace.h" @@ -116,6 +117,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -734,9 +736,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; +#ifdef MP_WATCHDOG + ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { diff --git a/sys/amd64/amd64/mp_machdep.c b/sys/amd64/amd64/mp_machdep.c index 5c900344518b..53988e985726 100644 --- a/sys/amd64/amd64/mp_machdep.c +++ b/sys/amd64/amd64/mp_machdep.c @@ -29,13 +29,13 @@ __FBSDID("$FreeBSD$"); #include "opt_cpu.h" #include "opt_kstack_pages.h" -#include "opt_mp_watchdog.h" #include "opt_sched.h" #include "opt_smp.h" #include #include #include +#include #ifdef GPROF #include #endif @@ -63,7 +63,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -125,7 +124,7 @@ extern inthand_t IDTVEC(fast_syscall), IDTVEC(fast_syscall32); * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -159,11 +158,8 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; -static struct sysctl_ctx_list logical_cpu_clist; static u_int bootMP_size; static void @@ -241,8 +237,11 @@ topo_probe_0x4(void) * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ - cpuid_count(0x04, 0, p); - max_cores = ((p[0] >> 26) & 0x3f) + 1; + if (cpu_high >= 0x4) { + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + } else + max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; @@ -334,7 +333,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -478,7 +477,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -605,6 +604,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; struct nmi_pcpu *np; u_int64_t msr, cr0; @@ -736,19 +736,17 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - - /* Determine if we are a hyperthread. */ - if (hyperthreading_cpus > 1 && - PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -835,7 +833,7 @@ assign_cpu_ids(void) if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; -#if defined(SCHED_ULE) + /* * Don't use HT CPU if it has been disabled by a * tunable. @@ -844,7 +842,6 @@ assign_cpu_ids(void) cpu_info[i].cpu_disabled = 1; continue; } -#endif } /* Don't use this CPU if it has been disabled by a tunable. */ @@ -854,6 +851,11 @@ assign_cpu_ids(void) } } + if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { + hyperthreading_cpus = 0; + cpu_logical = 1; + } + /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. @@ -891,6 +893,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus, tcpuset; vm_offset_t va = boot_address + KERNBASE; u_int64_t *pt4, *pt3, *pt2; u_int32_t mpbioswarmvec; @@ -955,11 +958,14 @@ start_all_aps(void) panic("AP #%d (PHY# %d) failed!", cpu, apic_id); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + tcpuset = PCPU_GET(cpumask); + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1087,6 +1093,30 @@ SYSCTL_UINT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ +/* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + /* * Flush the TLB on all other CPU's */ @@ -1111,28 +1141,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (cpumask_t)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_rflags() & PSL_I)) @@ -1141,39 +1162,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (cpumask_t)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, + cpu, vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1220,7 +1227,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1232,7 +1239,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1244,7 +1251,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1297,7 +1304,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1307,12 +1314,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1330,7 +1337,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1343,8 +1350,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1354,7 +1363,8 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); @@ -1363,7 +1373,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1371,11 +1381,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1387,23 +1399,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1418,7 +1432,7 @@ cpustop_handler(void) void cpususpend_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; register_t cr3, rf; u_int cpu; @@ -1430,7 +1444,7 @@ cpususpend_handler(void) if (savectx(susppcbs[cpu])) { wbinvd(); - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); } else { pmap_init_pat(); PCPU_SET(switchtime, 0); @@ -1438,11 +1452,11 @@ cpususpend_handler(void) } /* Wait for resume */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); /* Restore CR3 and enable interrupts */ load_cr3(cr3); @@ -1467,158 +1481,6 @@ release_aps(void *dummy __unused) } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); -static int -sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) -{ - cpumask_t mask; - int error; - - mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); - if (error || !req->newptr) - return (error); - - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; - - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); - hlt_cpus_mask = mask; - return (error); -} -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", - "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); - -static int -sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) -{ - int disable, error; - - disable = hlt_logical_cpus; - error = sysctl_handle_int(oidp, &disable, 0, req); - if (error || !req->newptr) - return (error); - - if (disable) - hlt_cpus_mask |= logical_cpus_mask; - else - hlt_cpus_mask &= ~logical_cpus_mask; - - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hlt_logical_cpus = disable; - return (error); -} - -static int -sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) -{ - int allowed, error; - - allowed = hyperthreading_allowed; - error = sysctl_handle_int(oidp, &allowed, 0, req); - if (error || !req->newptr) - return (error); - -#ifdef SCHED_ULE - /* - * SCHED_ULE doesn't allow enabling/disabling HT cores at - * run-time. - */ - if (allowed != hyperthreading_allowed) - return (ENOTSUP); - return (error); -#endif - - if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; - else - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hyperthreading_allowed = allowed; - return (error); -} - -static void -cpu_hlt_setup(void *dummy __unused) -{ - - if (logical_cpus_mask != 0) { - TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", - &hlt_logical_cpus); - sysctl_ctx_init(&logical_cpu_clist); - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, - sysctl_hlt_logical_cpus, "IU", ""); - SYSCTL_ADD_UINT(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, - &logical_cpus_mask, 0, ""); - - if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; - - /* - * If necessary for security purposes, force - * hyperthreading off, regardless of the value - * of hlt_logical_cpus. - */ - if (hyperthreading_cpus_mask) { - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hyperthreading_allowed, "IU", ""); - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - } - } -} -SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); - -int -mp_grab_cpu_hlt(void) -{ - cpumask_t mask; -#ifdef MP_WATCHDOG - u_int cpuid; -#endif - int retval; - - mask = PCPU_GET(cpumask); -#ifdef MP_WATCHDOG - cpuid = PCPU_GET(cpuid); - ap_watchdog(cpuid); -#endif - - retval = 0; - while (mask & hlt_cpus_mask) { - retval = 1; - __asm __volatile("sti; hlt" : : : "memory"); - } - return (retval); -} - #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c index c9ff9bcc14ae..025ca5f981c9 100644 --- a/sys/amd64/amd64/pmap.c +++ b/sys/amd64/amd64/pmap.c @@ -123,6 +123,8 @@ __FBSDID("$FreeBSD$"); #include #ifdef SMP #include +#else +#include #endif #include @@ -581,7 +583,7 @@ pmap_bootstrap(vm_paddr_t *firstaddr) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_pml4 = (pdp_entry_t *)PHYS_TO_DMAP(KPML4phys); kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); /* @@ -923,19 +925,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -943,23 +946,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -967,19 +970,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -995,8 +999,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1007,8 +1011,12 @@ pmap_update_pde_action(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1016,8 +1024,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1032,26 +1044,28 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap_update_pde_action, pmap_update_pde_teardown, &act); } else { pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1065,7 +1079,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1074,7 +1088,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1083,7 +1097,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1099,7 +1113,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1607,7 +1621,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_pml4 = (pml4_entry_t *)PHYS_TO_DMAP(KPML4phys); pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1649,7 +1663,7 @@ pmap_pinit(pmap_t pmap) pmap->pm_pml4[PML4PML4I] = VM_PAGE_TO_PHYS(pml4pg) | PG_V | PG_RW | PG_A | PG_M; pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -5087,11 +5101,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #ifdef SMP - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~PCPU_GET(cpumask); - pmap->pm_active |= PCPU_GET(cpumask); + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif cr3 = DMAP_TO_PHYS((vm_offset_t)pmap->pm_pml4); td->td_pcb->pcb_cr3 = cr3; diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c index 972484a80a08..13f5cd06b2b3 100644 --- a/sys/amd64/amd64/vm_machdep.c +++ b/sys/amd64/amd64/vm_machdep.c @@ -59,6 +59,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -70,6 +71,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -512,11 +514,13 @@ cpu_set_user_tls(struct thread *td, void *tls_base) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1< +#include #include #include @@ -251,7 +252,7 @@ struct pmap { struct mtx pm_mtx; pml4_entry_t *pm_pml4; /* KVA of level 4 page table */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ /* spare u_int here due to padding */ struct pmap_statistics pm_stats; /* pmap statistics */ vm_page_t pm_root; /* spare page table pages */ diff --git a/sys/amd64/include/smp.h b/sys/amd64/include/smp.h index ec107f931055..de686b76a445 100644 --- a/sys/amd64/include/smp.h +++ b/sys/amd64/include/smp.h @@ -63,17 +63,16 @@ void ipi_all_but_self(u_int ipi); void ipi_bitmap_handler(struct trapframe frame); void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); -int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #endif /* !LOCORE */ #endif /* SMP */ diff --git a/sys/arm/arm/pmap.c b/sys/arm/arm/pmap.c index 087a744b349f..cecf3638d95f 100644 --- a/sys/arm/arm/pmap.c +++ b/sys/arm/arm/pmap.c @@ -2395,7 +2395,7 @@ pmap_bootstrap(vm_offset_t firstaddr, vm_offset_t lastaddr, struct pv_addr *l1pt cpu_cpwait(); PMAP_LOCK_INIT(kernel_pmap); - kernel_pmap->pm_active = -1; + CPU_FILL(&kernel_pmap->pm_active); kernel_pmap->pm_domain = PMAP_DOMAIN_KERNEL; TAILQ_INIT(&kernel_pmap->pm_pvlist); @@ -3826,7 +3826,7 @@ pmap_pinit(pmap_t pmap) pmap_alloc_l1(pmap); bzero(pmap->pm_l2, sizeof(pmap->pm_l2)); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvlist); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); diff --git a/sys/arm/include/_types.h b/sys/arm/include/_types.h index 48dd2a784fd7..d8386f3225d0 100644 --- a/sys/arm/include/_types.h +++ b/sys/arm/include/_types.h @@ -67,7 +67,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef double __double_t; typedef double __float_t; diff --git a/sys/arm/include/pmap.h b/sys/arm/include/pmap.h index 701390a33852..3d63432e332a 100644 --- a/sys/arm/include/pmap.h +++ b/sys/arm/include/pmap.h @@ -62,6 +62,7 @@ #ifndef LOCORE #include +#include #include #include @@ -134,7 +135,7 @@ struct pmap { struct l1_ttable *pm_l1; struct l2_dtable *pm_l2[L2_SIZE]; pd_entry_t *pm_pdir; /* KVA of page directory */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statictics */ TAILQ_HEAD(,pv_entry) pm_pvlist; /* list of mappings in pmap */ }; diff --git a/sys/boot/ia64/common/Makefile b/sys/boot/ia64/common/Makefile index f16f13d7b0bc..d90898f71db7 100644 --- a/sys/boot/ia64/common/Makefile +++ b/sys/boot/ia64/common/Makefile @@ -6,7 +6,7 @@ MK_SSP= no LIB= ia64 INTERNALLIB= -SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c +SRCS= autoload.c bootinfo.c copy.c devicename.c exec.c icache.c CFLAGS+= -I${.CURDIR}/../../efi/include CFLAGS+= -I${.CURDIR}/../../efi/include/${MACHINE_CPUARCH} diff --git a/sys/boot/ia64/common/exec.c b/sys/boot/ia64/common/exec.c index dd9c9ba05e63..65886fa87f93 100644 --- a/sys/boot/ia64/common/exec.c +++ b/sys/boot/ia64/common/exec.c @@ -258,6 +258,8 @@ ia64_loadseg(Elf_Ehdr *eh, Elf_Phdr *ph, uint64_t delta) if (ph->p_flags & PF_X) { ia64_text_start = ph->p_vaddr + delta; ia64_text_size = ph->p_memsz; + + ia64_sync_icache(ia64_text_start, ia64_text_size); } else { ia64_data_start = ph->p_vaddr + delta; ia64_data_size = ph->p_memsz; diff --git a/sys/boot/ia64/common/icache.c b/sys/boot/ia64/common/icache.c new file mode 100644 index 000000000000..77a35d705b76 --- /dev/null +++ b/sys/boot/ia64/common/icache.c @@ -0,0 +1,51 @@ +/*- + * Copyright (c) 2011 Marcel Moolenaar + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +__FBSDID("$FreeBSD$"); + +#include +#include + +#include "libia64.h" + +void +ia64_sync_icache(vm_offset_t va, size_t sz) +{ + uintptr_t pa; + size_t cnt, max; + + while (sz > 0) { + max = sz; + pa = (uintptr_t)ia64_va2pa(va, &max); + for (cnt = 0; cnt < max; cnt += 32) + ia64_fc_i(pa + cnt); + ia64_sync_i(); + va += max; + sz -= max; + } + ia64_srlz_i(); +} diff --git a/sys/boot/ia64/common/libia64.h b/sys/boot/ia64/common/libia64.h index 29912f52fe1d..4bc76384dc34 100644 --- a/sys/boot/ia64/common/libia64.h +++ b/sys/boot/ia64/common/libia64.h @@ -64,6 +64,7 @@ void ia64_loadseg(void *, void *, uint64_t); ssize_t ia64_copyin(const void *, vm_offset_t, size_t); ssize_t ia64_copyout(vm_offset_t, void *, size_t); +void ia64_sync_icache(vm_offset_t, size_t); ssize_t ia64_readin(int, vm_offset_t, size_t); void *ia64_va2pa(vm_offset_t, size_t *); diff --git a/sys/boot/ia64/efi/efimd.c b/sys/boot/ia64/efi/efimd.c index 0f7f02a08374..0b29e1280ce0 100644 --- a/sys/boot/ia64/efi/efimd.c +++ b/sys/boot/ia64/efi/efimd.c @@ -230,3 +230,35 @@ ia64_platform_enter(const char *kernel) return (0); } + +COMMAND_SET(pbvm, "pbvm", "show PBVM details", command_pbvm); + +static int +command_pbvm(int argc, char *argv[]) +{ + uint64_t limit, pg, start; + u_int idx; + + printf("Page table @ %p, size %x\n", ia64_pgtbl, ia64_pgtblsz); + + if (ia64_pgtbl == NULL) + return (0); + + limit = ~0; + start = ~0; + idx = 0; + while (ia64_pgtbl[idx] != 0) { + pg = ia64_pgtbl[idx]; + if (pg != limit) { + if (start != ~0) + printf("%#lx-%#lx\n", start, limit); + start = pg; + } + limit = pg + IA64_PBVM_PAGE_SIZE; + idx++; + } + if (start != ~0) + printf("%#lx-%#lx\n", start, limit); + + return (0); +} diff --git a/sys/boot/ia64/efi/main.c b/sys/boot/ia64/efi/main.c index 485a26d54842..ec12b4266a07 100644 --- a/sys/boot/ia64/efi/main.c +++ b/sys/boot/ia64/efi/main.c @@ -153,9 +153,7 @@ main(int argc, CHAR16 *argv[]) */ cons_probe(); - printf("\n"); - printf("%s, Revision %s\n", bootprog_name, bootprog_rev); - printf("(%s, %s)\n", bootprog_maker, bootprog_date); + printf("\n%s, Revision %s\n", bootprog_name, bootprog_rev); find_pal_proc(); @@ -214,6 +212,18 @@ static int command_quit(int argc, char *argv[]) { exit(0); + /* NOTREACHED */ + return (CMD_OK); +} + +COMMAND_SET(reboot, "reboot", "reboot the system", command_reboot); + +static int +command_reboot(int argc, char *argv[]) +{ + + RS->ResetSystem(EfiResetWarm, EFI_SUCCESS, 0, NULL); + /* NOTREACHED */ return (CMD_OK); } @@ -585,3 +595,24 @@ command_hcdp(int argc, char *argv[]) printf("\n"); return (CMD_OK); } + +COMMAND_SET(about, "about", "about the loader", command_about); + +extern uint64_t _start_plabel[]; + +static int +command_about(int argc, char *argv[]) +{ + EFI_LOADED_IMAGE *img; + + printf("%s\n", bootprog_name); + printf("revision %s\n", bootprog_rev); + printf("built by %s\n", bootprog_maker); + printf("built on %s\n", bootprog_date); + + printf("\n"); + + BS->HandleProtocol(IH, &imgid, (VOID**)&img); + printf("image loaded at %p\n", img->ImageBase); + printf("entry at %#lx (%#lx)\n", _start_plabel[0], _start_plabel[1]); +} diff --git a/sys/boot/ia64/efi/version b/sys/boot/ia64/efi/version index 3a947c8c5639..17d14ea1c70b 100644 --- a/sys/boot/ia64/efi/version +++ b/sys/boot/ia64/efi/version @@ -3,6 +3,8 @@ $FreeBSD$ NOTE ANY CHANGES YOU MAKE TO THE BOOTBLOCKS HERE. The format of this file is important. Make sure the current version number is on line 6. +3.1: Add the about, reboot and pbvm commands. + I-cache coherency is maintained. 3.0: Add support for PBVM. 2.2: Create direct mapping based on start address instead of mapping first 256M. diff --git a/sys/cddl/compat/opensolaris/sys/atomic.h b/sys/cddl/compat/opensolaris/sys/atomic.h index af9cc5d27e47..f34d77e6f38c 100644 --- a/sys/cddl/compat/opensolaris/sys/atomic.h +++ b/sys/cddl/compat/opensolaris/sys/atomic.h @@ -40,8 +40,6 @@ extern void atomic_add_64(volatile uint64_t *target, int64_t delta); extern void atomic_dec_64(volatile uint64_t *target); #endif -#ifndef __LP64__ -#endif #ifndef __sparc64__ extern uint32_t atomic_cas_32(volatile uint32_t *target, uint32_t cmp, uint32_t newval); diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c index 942636b906ce..130c9180be1c 100644 --- a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c +++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/spa_history.c @@ -500,9 +500,11 @@ spa_history_log_version(spa_t *spa, history_internal_events_t event) utsname.nodename, utsname.release, utsname.version, utsname.machine); } +#if 0 cmn_err(CE_CONT, "!%s version %llu pool %s using %llu", event == LOG_POOL_IMPORT ? "imported" : event == LOG_POOL_CREATE ? "created" : "accessed", (u_longlong_t)current_vers, spa_name(spa), SPA_VERSION); #endif +#endif } diff --git a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c index 6f9366316115..9ba2fd3c0086 100644 --- a/sys/cddl/dev/cyclic/i386/cyclic_machdep.c +++ b/sys/cddl/dev/cyclic/i386/cyclic_machdep.c @@ -123,7 +123,9 @@ reprogram(cyb_arg_t arg __unused, hrtime_t exp) static void xcall(cyb_arg_t arg __unused, cpu_t *c, cyc_func_t func, void *param) { + cpuset_t cpus; - smp_rendezvous_cpus((cpumask_t)1 << c->cpuid, + CPU_SETOF(c->cpuid, &cpus); + smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, param); } diff --git a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c index a081f6701ac2..0b86eacfe935 100644 --- a/sys/cddl/dev/dtrace/amd64/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/amd64/dtrace_subr.c @@ -113,12 +113,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -374,7 +374,7 @@ dtrace_gethrtime_init(void *arg) { struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; + cpuset_t map; int i; /* @@ -412,7 +412,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/cddl/dev/dtrace/i386/dtrace_subr.c b/sys/cddl/dev/dtrace/i386/dtrace_subr.c index 2753ffc5bbf8..412fc38e6a56 100644 --- a/sys/cddl/dev/dtrace/i386/dtrace_subr.c +++ b/sys/cddl/dev/dtrace/i386/dtrace_subr.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -113,12 +114,12 @@ dtrace_toxic_ranges(void (*func)(uintptr_t base, uintptr_t limit)) void dtrace_xcall(processorid_t cpu, dtrace_xcall_t func, void *arg) { - cpumask_t cpus; + cpuset_t cpus; if (cpu == DTRACE_CPUALL) cpus = all_cpus; else - cpus = (cpumask_t)1 << cpu; + CPU_SETOF(cpu, &cpus); smp_rendezvous_cpus(cpus, smp_no_rendevous_barrier, func, smp_no_rendevous_barrier, arg); @@ -372,9 +373,9 @@ dtrace_gethrtime_init_cpu(void *arg) static void dtrace_gethrtime_init(void *arg) { + cpuset_t map; struct pcpu *pc; uint64_t tsc_f; - cpumask_t map; int i; /* @@ -412,7 +413,8 @@ dtrace_gethrtime_init(void *arg) continue; pc = pcpu_find(i); - map = PCPU_GET(cpumask) | pc->pc_cpumask; + map = PCPU_GET(cpumask); + CPU_OR(&map, &pc->pc_cpumask); smp_rendezvous_cpus(map, NULL, dtrace_gethrtime_init_cpu, diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 94311c61cfda..b84d0c535b14 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -432,7 +432,10 @@ options KTRACE_REQUEST_POOL=101 # defined by the KTR_* constants in . KTR_MASK defines the # initial value of the ktr_mask variable which determines at runtime # what events to trace. KTR_CPUMASK determines which CPU's log -# events, with bit X corresponding to CPU X. KTR_VERBOSE enables +# events, with bit X corresponding to CPU X. The layout of the string +# passed as KTR_CPUMASK must match a serie of bitmasks each of them +# separated by the ", " characters (ie: +# KTR_CPUMASK=("0xAF, 0xFFFFFFFFFFFFFFFF")). KTR_VERBOSE enables # dumping of KTR events to the console by default. This functionality # can be toggled via the debug.ktr_verbose sysctl and defaults to off # if KTR_VERBOSE is not defined. See ktr(4) and ktrdump(8) for details. @@ -441,7 +444,7 @@ options KTR options KTR_ENTRIES=1024 options KTR_COMPILE=(KTR_INTR|KTR_PROC) options KTR_MASK=KTR_INTR -options KTR_CPUMASK=0x3 +options KTR_CPUMASK=("0x3") options KTR_VERBOSE # diff --git a/sys/conf/files b/sys/conf/files index 59286a5195a0..d654c6f00421 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2748,6 +2748,7 @@ netinet/ip_gre.c optional gre inet netinet/ip_id.c optional inet netinet/in_mcast.c optional inet netinet/in_pcb.c optional inet | inet6 +netinet/in_pcbgroup.c optional inet pcbgroup | inet6 pcbgroup netinet/in_proto.c optional inet | inet6 \ compile-with "${NORMAL_C} -I$S/contrib/pf" netinet/in_rmx.c optional inet @@ -2825,6 +2826,7 @@ netinet6/in6_gif.c optional gif inet6 | netgraph_gif inet6 netinet6/in6_ifattach.c optional inet6 netinet6/in6_mcast.c optional inet6 netinet6/in6_pcb.c optional inet6 +netinet6/in6_pcbgroup.c optional inet6 pcbgroup netinet6/in6_proto.c optional inet6 netinet6/in6_rmx.c optional inet6 netinet6/in6_src.c optional inet6 diff --git a/sys/conf/options b/sys/conf/options index a608d86382f6..ee696a8674d1 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -419,6 +419,7 @@ MROUTING opt_mrouting.h NCP NETATALK opt_atalk.h NFSLOCKD +PCBGROUP opt_pcbgroup.h RADIX_MPATH opt_mpath.h ROUTETABLES opt_route.h SLIP_IFF_OPTS opt_slip.h diff --git a/sys/ddb/db_command.c b/sys/ddb/db_command.c index 21cb7c5940de..f2e2c42accab 100644 --- a/sys/ddb/db_command.c +++ b/sys/ddb/db_command.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -64,6 +65,7 @@ db_addr_t db_last_addr; db_addr_t db_prev; db_addr_t db_next; +static db_cmdfcn_t db_dump; static db_cmdfcn_t db_fncall; static db_cmdfcn_t db_gdb; static db_cmdfcn_t db_halt; @@ -102,6 +104,7 @@ static struct command db_cmds[] = { { "w", db_write_cmd, CS_MORE|CS_SET_DOT, 0 }, { "delete", db_delete_cmd, 0, 0 }, { "d", db_delete_cmd, 0, 0 }, + { "dump", db_dump, 0, 0 }, { "break", db_breakpoint_cmd, 0, 0 }, { "b", db_breakpoint_cmd, 0, 0 }, { "dwatch", db_deletewatch_cmd, 0, 0 }, @@ -526,6 +529,27 @@ db_error(s) kdb_reenter(); } +static void +db_dump(db_expr_t dummy, boolean_t dummy2, db_expr_t dummy3, char *dummy4) +{ + int error; + + error = doadump(FALSE); + if (error) { + db_printf("Cannot dump: "); + switch (error) { + case EBUSY: + db_printf("debugger got invoked while dumping.\n"); + break; + case ENXIO: + db_printf("no dump device specified.\n"); + break; + default: + db_printf("unknown error (error=%d).\n", error); + break; + } + } +} /* * Call random function: diff --git a/sys/dev/amdsbwd/amdsbwd.c b/sys/dev/amdsbwd/amdsbwd.c index f5f0f879d605..4256381f5312 100644 --- a/sys/dev/amdsbwd/amdsbwd.c +++ b/sys/dev/amdsbwd/amdsbwd.c @@ -25,8 +25,8 @@ */ /* - * This is a driver for watchdog timer present in AMD SB600/SB7xx - * south bridges and other watchdog timers advertised via WDRT ACPI table. + * This is a driver for watchdog timer present in AMD SB600/SB7xx/SB8xx + * southbridges. * Please see the following specifications for the descriptions of the * registers and flags: * - AMD SB600 Register Reference Guide, Public Version, Rev. 3.03 (SB600 RRG) @@ -35,11 +35,13 @@ * http://developer.amd.com/assets/43009_sb7xx_rrg_pub_1.00.pdf * - AMD SB700/710/750 Register Programming Requirements (RPR) * http://developer.amd.com/assets/42413_sb7xx_rpr_pub_1.00.pdf + * - AMD SB800-Series Southbridges Register Reference Guide (RRG) + * http://support.amd.com/us/Embedded_TechDocs/45482.pdf * Please see the following for Watchdog Resource Table specification: * - Watchdog Timer Hardware Requirements for Windows Server 2003 (WDRT) * http://www.microsoft.com/whdc/system/sysinternals/watchdog.mspx - * AMD SB600/SB7xx watchdog hardware seems to conform to the above, - * but my system doesn't provide the table. + * AMD SB600/SB7xx/SB8xx watchdog hardware seems to conform to the above + * specifications, but the table hasn't been spotted in the wild yet. */ #include @@ -59,15 +61,15 @@ __FBSDID("$FreeBSD$"); #include #include -/* RRG 2.3.3.1.1, page 161. */ +/* SB7xx RRG 2.3.3.1.1. */ #define AMDSB_PMIO_INDEX 0xcd6 #define AMDSB_PMIO_DATA (PMIO_INDEX + 1) #define AMDSB_PMIO_WIDTH 2 -/* RRG 2.3.3.2, page 181. */ +/* SB7xx RRG 2.3.3.2. */ #define AMDSB_PM_RESET_STATUS0 0x44 #define AMDSB_PM_RESET_STATUS1 0x45 #define AMDSB_WD_RST_STS 0x02 -/* RRG 2.3.3.2, page 188; RPR 2.36, page 30. */ +/* SB7xx RRG 2.3.3.2, RPR 2.36. */ #define AMDSB_PM_WDT_CTRL 0x69 #define AMDSB_WDT_DISABLE 0x01 #define AMDSB_WDT_RES_MASK (0x02 | 0x04) @@ -77,7 +79,18 @@ __FBSDID("$FreeBSD$"); #define AMDSB_WDT_RES_1S 0x06 #define AMDSB_PM_WDT_BASE_LSB 0x6c #define AMDSB_PM_WDT_BASE_MSB 0x6f -/* RRG 2.3.4, page 223, WDRT. */ +/* SB8xx RRG 2.3.3. */ +#define AMDSB8_PM_WDT_EN 0x48 +#define AMDSB8_WDT_DEC_EN 0x01 +#define AMDSB8_WDT_DISABLE 0x02 +#define AMDSB8_PM_WDT_CTRL 0x4c +#define AMDSB8_WDT_32KHZ 0x00 +#define AMDSB8_WDT_1HZ 0x03 +#define AMDSB8_WDT_RES_MASK 0x03 +#define AMDSB8_PM_RESET_STATUS0 0xC0 +#define AMDSB8_PM_RESET_STATUS1 0xC1 +#define AMDSB8_WD_RST_STS 0x20 +/* SB7xx RRG 2.3.4, WDRT. */ #define AMDSB_WD_CTRL 0x00 #define AMDSB_WD_RUN 0x01 #define AMDSB_WD_FIRED 0x02 @@ -90,8 +103,9 @@ __FBSDID("$FreeBSD$"); #define AMDSB_WDIO_REG_WIDTH 4 /* WDRT */ #define MAXCOUNT_MIN_VALUE 511 -/* RRG 2.3.1.1, page 122; SB600 RRG 2.3.1.1, page 97. */ -#define AMDSB7xx_SMBUS_DEVID 0x43851002 +/* SB7xx RRG 2.3.1.1, SB600 RRG 2.3.1.1, SB8xx RRG 2.3.1. */ +#define AMDSB_SMBUS_DEVID 0x43851002 +#define AMDSB8_SMBUS_REVID 0x40 #define amdsbwd_verbose_printf(dev, ...) \ do { \ @@ -265,7 +279,7 @@ amdsbwd_identify(driver_t *driver, device_t parent) smb_dev = pci_find_bsf(0, 20, 0); if (smb_dev == NULL) return; - if (pci_get_devid(smb_dev) != AMDSB7xx_SMBUS_DEVID) + if (pci_get_devid(smb_dev) != AMDSB_SMBUS_DEVID) return; child = BUS_ADD_CHILD(parent, ISA_ORDER_SPECULATIVE, "amdsbwd", -1); @@ -273,15 +287,102 @@ amdsbwd_identify(driver_t *driver, device_t parent) device_printf(parent, "add amdsbwd child failed\n"); } + +static void +amdsbwd_probe_sb7xx(device_t dev, struct resource *pmres, uint32_t *addr) +{ + uint32_t val; + int i; + + /* Report cause of previous reset for user's convenience. */ + val = pmio_read(pmres, AMDSB_PM_RESET_STATUS0); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); + val = pmio_read(pmres, AMDSB_PM_RESET_STATUS1); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); + if ((val & AMDSB_WD_RST_STS) != 0) + device_printf(dev, "Previous Reset was caused by Watchdog\n"); + + /* Find base address of memory mapped WDT registers. */ + for (*addr = 0, i = 0; i < 4; i++) { + *addr <<= 8; + *addr |= pmio_read(pmres, AMDSB_PM_WDT_BASE_MSB - i); + } + /* Set watchdog timer tick to 1s. */ + val = pmio_read(pmres, AMDSB_PM_WDT_CTRL); + val &= ~AMDSB_WDT_RES_MASK; + val |= AMDSB_WDT_RES_10MS; + pmio_write(pmres, AMDSB_PM_WDT_CTRL, val); + + /* Enable watchdog device (in stopped state). */ + val = pmio_read(pmres, AMDSB_PM_WDT_CTRL); + val &= ~AMDSB_WDT_DISABLE; + pmio_write(pmres, AMDSB_PM_WDT_CTRL, val); + + /* + * XXX TODO: Ensure that watchdog decode is enabled + * (register 0x41, bit 3). + */ + device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer"); +} + +static void +amdsbwd_probe_sb8xx(device_t dev, struct resource *pmres, uint32_t *addr) +{ + uint32_t val; + int i; + + /* Report cause of previous reset for user's convenience. */ + val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS0); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); + val = pmio_read(pmres, AMDSB8_PM_RESET_STATUS1); + if (val != 0) + amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); + if ((val & AMDSB8_WD_RST_STS) != 0) + device_printf(dev, "Previous Reset was caused by Watchdog\n"); + + /* Find base address of memory mapped WDT registers. */ + for (*addr = 0, i = 0; i < 4; i++) { + *addr <<= 8; + *addr |= pmio_read(pmres, AMDSB8_PM_WDT_EN + 3 - i); + } + *addr &= ~0x07u; + + /* Set watchdog timer tick to 1s. */ + val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL); + val &= ~AMDSB8_WDT_RES_MASK; + val |= AMDSB8_WDT_1HZ; + pmio_write(pmres, AMDSB8_PM_WDT_CTRL, val); +#ifdef AMDSBWD_DEBUG + val = pmio_read(pmres, AMDSB8_PM_WDT_CTRL); + amdsbwd_verbose_printf(dev, "AMDSB8_PM_WDT_CTRL value = %#02x\n", val); +#endif + + /* + * Enable watchdog device (in stopped state) + * and decoding of its address. + */ + val = pmio_read(pmres, AMDSB8_PM_WDT_EN); + val &= ~AMDSB8_WDT_DISABLE; + val |= AMDSB8_WDT_DEC_EN; + pmio_write(pmres, AMDSB8_PM_WDT_EN, val); +#ifdef AMDSBWD_DEBUG + val = pmio_read(pmres, AMDSB8_PM_WDT_EN); + device_printf(dev, "AMDSB8_PM_WDT_EN value = %#02x\n", val); +#endif + device_set_desc(dev, "AMD SB8xx Watchdog Timer"); +} + static int amdsbwd_probe(device_t dev) { struct resource *res; + device_t smb_dev; uint32_t addr; - uint32_t val; int rid; int rc; - int i; /* Do not claim some ISA PnP device by accident. */ if (isa_get_logicalid(dev) != 0) @@ -301,21 +402,16 @@ amdsbwd_probe(device_t dev) return (ENXIO); } - /* Report cause of previous reset for user's convenience. */ - val = pmio_read(res, AMDSB_PM_RESET_STATUS0); - if (val != 0) - amdsbwd_verbose_printf(dev, "ResetStatus0 = %#04x\n", val); - val = pmio_read(res, AMDSB_PM_RESET_STATUS1); - if (val != 0) - amdsbwd_verbose_printf(dev, "ResetStatus1 = %#04x\n", val); - if ((val & AMDSB_WD_RST_STS) != 0) - device_printf(dev, "Previous Reset was caused by Watchdog\n"); + smb_dev = pci_find_bsf(0, 20, 0); + KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n")); + if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID) + amdsbwd_probe_sb7xx(dev, res, &addr); + else + amdsbwd_probe_sb8xx(dev, res, &addr); + + bus_release_resource(dev, SYS_RES_IOPORT, rid, res); + bus_delete_resource(dev, SYS_RES_IOPORT, rid); - /* Find base address of memory mapped WDT registers. */ - for (addr = 0, i = 0; i < 4; i++) { - addr <<= 8; - addr |= pmio_read(res, AMDSB_PM_WDT_BASE_MSB - i); - } amdsbwd_verbose_printf(dev, "memory base address = %#010x\n", addr); rc = bus_set_resource(dev, SYS_RES_MEMORY, 0, addr + AMDSB_WD_CTRL, AMDSB_WDIO_REG_WIDTH); @@ -330,36 +426,25 @@ amdsbwd_probe(device_t dev) return (ENXIO); } - /* Set watchdog timer tick to 10ms. */ - val = pmio_read(res, AMDSB_PM_WDT_CTRL); - val &= ~AMDSB_WDT_RES_MASK; - val |= AMDSB_WDT_RES_10MS; - pmio_write(res, AMDSB_PM_WDT_CTRL, val); - - /* Enable watchdog device (in stopped state). */ - val = pmio_read(res, AMDSB_PM_WDT_CTRL); - val &= ~AMDSB_WDT_DISABLE; - pmio_write(res, AMDSB_PM_WDT_CTRL, val); - - /* - * XXX TODO: Ensure that watchdog decode is enabled - * (register 0x41, bit 3). - */ - bus_release_resource(dev, SYS_RES_IOPORT, rid, res); - bus_delete_resource(dev, SYS_RES_IOPORT, rid); - - device_set_desc(dev, "AMD SB600/SB7xx Watchdog Timer"); return (0); } static int amdsbwd_attach_sb(device_t dev, struct amdsbwd_softc *sc) { + device_t smb_dev; + sc->max_ticks = UINT16_MAX; - sc->ms_per_tick = 10; sc->rid_ctrl = 0; sc->rid_count = 1; + smb_dev = pci_find_bsf(0, 20, 0); + KASSERT(smb_dev != NULL, ("can't find SMBus PCI device\n")); + if (pci_get_revid(smb_dev) < AMDSB8_SMBUS_REVID) + sc->ms_per_tick = 10; + else + sc->ms_per_tick = 1000; + sc->res_ctrl = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &sc->rid_ctrl, RF_ACTIVE); if (sc->res_ctrl == NULL) { @@ -388,6 +473,11 @@ amdsbwd_attach(device_t dev) if (rc != 0) goto fail; +#ifdef AMDSBWD_DEBUG + device_printf(dev, "wd ctrl = %#04x\n", wdctrl_read(sc)); + device_printf(dev, "wd count = %#04x\n", wdcount_read(sc)); +#endif + /* Setup initial state of Watchdog Control. */ wdctrl_write(sc, AMDSB_WD_FIRED); diff --git a/sys/dev/ath/ath_hal/ah.h b/sys/dev/ath/ath_hal/ah.h index 165d919d03d8..7a01be354436 100644 --- a/sys/dev/ath/ath_hal/ah.h +++ b/sys/dev/ath/ath_hal/ah.h @@ -745,6 +745,17 @@ typedef enum { HAL_QUIET_ADD_SWBA_RESP_TIME = 0x4, /* add beacon response time to next_start offset */ } HAL_QUIET_FLAG; +#define HAL_DFS_EVENT_PRICH 0x0000001 + +struct dfs_event { + uint64_t re_full_ts; /* 64-bit full timestamp from interrupt time */ + uint32_t re_ts; /* Original 15 bit recv timestamp */ + uint8_t re_rssi; /* rssi of radar event */ + uint8_t re_dur; /* duration of radar pulse */ + uint32_t re_flags; /* Flags (see above) */ +}; +typedef struct dfs_event HAL_DFS_EVENT; + /* * Hardware Access Layer (HAL) API. * @@ -928,6 +939,9 @@ struct ath_hal { HAL_PHYERR_PARAM *pe); void __ahdecl(*ah_getDfsThresh)(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); + HAL_BOOL __ahdecl(*ah_procRadarEvent)(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, + const char *buf, HAL_DFS_EVENT *event); /* Key Cache Functions */ uint32_t __ahdecl(*ah_getKeyCacheSize)(struct ath_hal*); diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212.h b/sys/dev/ath/ath_hal/ar5212/ar5212.h index 16394a396f11..8503a629aa36 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212.h +++ b/sys/dev/ath/ath_hal/ar5212/ar5212.h @@ -622,5 +622,8 @@ extern HAL_BOOL ar5212IsNFCalInProgress(struct ath_hal *ah); extern HAL_BOOL ar5212WaitNFCalComplete(struct ath_hal *ah, int i); extern void ar5212EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); extern void ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern HAL_BOOL ar5212ProcessRadarEvent(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf, + HAL_DFS_EVENT *event); #endif /* _ATH_AR5212_H_ */ diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c index 5999a603f9ca..8e7f3cbddf04 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c +++ b/sys/dev/ath/ath_hal/ar5212/ar5212_attach.c @@ -132,6 +132,7 @@ static const struct ath_hal_private ar5212hal = {{ /* DFS Functions */ .ah_enableDfs = ar5212EnableDfs, .ah_getDfsThresh = ar5212GetDfsThresh, + .ah_procRadarEvent = ar5212ProcessRadarEvent, /* Key Cache Functions */ .ah_getKeyCacheSize = ar5212GetKeyCacheSize, diff --git a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c index 276671d6db33..3a6019d993fd 100644 --- a/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c +++ b/sys/dev/ath/ath_hal/ar5212/ar5212_misc.c @@ -21,9 +21,7 @@ #include "ah.h" #include "ah_internal.h" #include "ah_devid.h" -#ifdef AH_DEBUG #include "ah_desc.h" /* NB: for HAL_PHYERR* */ -#endif #include "ar5212/ar5212.h" #include "ar5212/ar5212reg.h" @@ -1180,3 +1178,47 @@ ar5212GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) pe->pe_extchannel = AH_FALSE; } +/* + * Process the radar phy error and extract the pulse duration. + */ +HAL_BOOL +ar5212ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs, + uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event) +{ + uint8_t dur; + uint8_t rssi; + + /* Check whether the given phy error is a radar event */ + if ((rxs->rs_phyerr != HAL_PHYERR_RADAR) && + (rxs->rs_phyerr != HAL_PHYERR_FALSE_RADAR_EXT)) + return AH_FALSE; + + /* + * The first byte is the pulse width - if there's + * no data, simply set the duration to 0 + */ + if (rxs->rs_datalen >= 1) + /* The pulse width is byte 0 of the data */ + dur = ((uint8_t) buf[0]) & 0xff; + else + dur = 0; + + /* Pulse RSSI is the normal reported RSSI */ + rssi = (uint8_t) rxs->rs_rssi; + + /* 0 duration/rssi is not a valid radar event */ + if (dur == 0 && rssi == 0) + return AH_FALSE; + + HALDEBUG(ah, HAL_DEBUG_DFS, "%s: rssi=%d, dur=%d\n", + __func__, rssi, dur); + + /* Record the event */ + event->re_full_ts = fulltsf; + event->re_ts = rxs->rs_tstamp; + event->re_rssi = rssi; + event->re_dur = dur; + event->re_flags = HAL_DFS_EVENT_PRICH; + + return AH_TRUE; +} diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416.h b/sys/dev/ath/ath_hal/ar5416/ar5416.h index 510afe0436e0..e5294b0634bb 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416.h +++ b/sys/dev/ath/ath_hal/ar5416/ar5416.h @@ -205,6 +205,9 @@ extern HAL_BOOL ar5416SetRifsDelay(struct ath_hal *ah, const struct ieee80211_channel *chan, HAL_BOOL enable); extern void ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); extern void ar5416GetDfsThresh(struct ath_hal *ah, HAL_PHYERR_PARAM *pe); +extern HAL_BOOL ar5416ProcessRadarEvent(struct ath_hal *ah, + struct ath_rx_status *rxs, uint64_t fulltsf, const char *buf, + HAL_DFS_EVENT *event); extern HAL_BOOL ar5416SetPowerMode(struct ath_hal *ah, HAL_POWER_MODE mode, int setChip); diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c index 22d05ff80ce8..e6363251f8a2 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_attach.c @@ -147,6 +147,7 @@ ar5416InitState(struct ath_hal_5416 *ahp5416, uint16_t devid, HAL_SOFTC sc, /* DFS Functions */ ah->ah_enableDfs = ar5416EnableDfs; ah->ah_getDfsThresh = ar5416GetDfsThresh; + ah->ah_procRadarEvent = ar5416ProcessRadarEvent; /* Power Management Functions */ ah->ah_setPowerMode = ar5416SetPowerMode; diff --git a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c index 2c08730bbe91..2332656e8f54 100644 --- a/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c +++ b/sys/dev/ath/ath_hal/ar5416/ar5416_misc.c @@ -692,3 +692,19 @@ ar5416EnableDfs(struct ath_hal *ah, HAL_PHYERR_PARAM *pe) OS_REG_WRITE(ah, AR_PHY_RADAR_1, val); } } + +/* + * Extract the radar event information from the given phy error. + * + * Returns AH_TRUE if the phy error was actually a phy error, + * AH_FALSE if the phy error wasn't a phy error. + */ +HAL_BOOL +ar5416ProcessRadarEvent(struct ath_hal *ah, struct ath_rx_status *rxs, + uint64_t fulltsf, const char *buf, HAL_DFS_EVENT *event) +{ + /* + * For now, this isn't implemented. + */ + return AH_FALSE; +} diff --git a/sys/dev/ath/if_athvar.h b/sys/dev/ath/if_athvar.h index 97666c55cb87..3bc852266019 100644 --- a/sys/dev/ath/if_athvar.h +++ b/sys/dev/ath/if_athvar.h @@ -709,6 +709,8 @@ void ath_intr(void *); ((*(_ah)->ah_enableDfs)((_ah), (_param))) #define ath_hal_getdfsthresh(_ah, _param) \ ((*(_ah)->ah_getDfsThresh)((_ah), (_param))) +#define ath_hal_procradarevent(_ah, _rxs, _fulltsf, _buf, _event) \ + ((*(_ah)->ah_procRadarEvent)((_ah), (_rxs), (_fulltsf), (_buf), (_event))) #define ath_hal_gpioCfgOutput(_ah, _gpio, _type) \ ((*(_ah)->ah_gpioCfgOutput)((_ah), (_gpio), (_type))) diff --git a/sys/dev/atkbdc/atkbd.c b/sys/dev/atkbdc/atkbd.c index 643554d1f4dd..b7156cf1a699 100644 --- a/sys/dev/atkbdc/atkbd.c +++ b/sys/dev/atkbdc/atkbd.c @@ -1097,10 +1097,8 @@ get_typematic(keyboard_t *kbd) x86regs_t regs; uint8_t *p; - if (!(kbd->kb_config & KB_CONF_PROBE_TYPEMATIC)) - return (ENODEV); - - if (x86bios_get_intr(0x15) == 0 || x86bios_get_intr(0x16) == 0) + if (x86bios_get_intr(0x15) != 0xf000f859 || + x86bios_get_intr(0x16) != 0xf000e82e) return (ENODEV); /* Is BIOS system configuration table supported? */ diff --git a/sys/dev/atkbdc/atkbdreg.h b/sys/dev/atkbdc/atkbdreg.h index 3d54b4d70228..cf7ee6b31280 100644 --- a/sys/dev/atkbdc/atkbdreg.h +++ b/sys/dev/atkbdc/atkbdreg.h @@ -36,7 +36,6 @@ #define KB_CONF_NO_RESET (1 << 1) /* don't reset the keyboard */ #define KB_CONF_ALT_SCANCODESET (1 << 2) /* assume the XT type keyboard */ #define KB_CONF_NO_PROBE_TEST (1 << 3) /* don't test keyboard during probe */ -#define KB_CONF_PROBE_TYPEMATIC (1 << 4) /* probe keyboard typematic */ #ifdef _KERNEL diff --git a/sys/dev/cardbus/cardbus_cis.c b/sys/dev/cardbus/cardbus_cis.c index 2cfea19203f3..3352a56afa29 100644 --- a/sys/dev/cardbus/cardbus_cis.c +++ b/sys/dev/cardbus/cardbus_cis.c @@ -324,7 +324,7 @@ decode_tuple_bar(device_t cbdev, device_t child, int id, * hint when the cardbus bridge is a child of pci0 (the main * bus). The PC Card spec seems to indicate that this should * only be done on x86 based machines, which suggests that on - * non-x86 machines the adddresses can be anywhere. Since the + * non-x86 machines the addresses can be anywhere. Since the * hardware can do it on non-x86 machines, it should be able * to do it on x86 machines too. Therefore, we can and should * ignore this hint. Furthermore, the PC Card spec recommends @@ -430,7 +430,6 @@ cardbus_read_tuple_finish(device_t cbdev, device_t child, int rid, { if (res != CIS_CONFIG_SPACE) { bus_release_resource(child, SYS_RES_MEMORY, rid, res); - bus_delete_resource(child, SYS_RES_MEMORY, rid); } } @@ -467,7 +466,7 @@ cardbus_read_tuple_init(device_t cbdev, device_t child, uint32_t *start, } /* allocate the memory space to read CIS */ - res = bus_alloc_resource(child, SYS_RES_MEMORY, rid, 0, ~0, 1, + res = bus_alloc_resource_any(child, SYS_RES_MEMORY, rid, rman_make_alignment_flags(4096) | RF_ACTIVE); if (res == NULL) { device_printf(cbdev, "Unable to allocate resource " diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index d6225d8f5ae3..4cfcea87fa5b 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1991,7 +1991,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg) * had already processed the interrupt). We don't * lose the interrupt sample. */ - atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid))); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), &pmc_cpumask); pmc_process_samples(PCPU_GET(cpuid)); break; @@ -4083,7 +4083,7 @@ pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, done: /* mark CPU as needing processing */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); return (error); } @@ -4193,7 +4193,7 @@ pmc_process_samples(int cpu) break; if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { /* Need a rescan at a later time. */ - atomic_set_int(&pmc_cpumask, (1 << cpu)); + CPU_SET_ATOMIC(cpu, &pmc_cpumask); break; } @@ -4782,7 +4782,7 @@ pmc_cleanup(void) PMCDBG(MOD,INI,0, "%s", "cleanup"); /* switch off sampling */ - pmc_cpumask = 0; + CPU_ZERO(&pmc_cpumask); pmc_intr = NULL; sx_xlock(&pmc_sx); diff --git a/sys/dev/pccard/pccard.c b/sys/dev/pccard/pccard.c index 00cd1dc08b60..1de571c8e6c1 100644 --- a/sys/dev/pccard/pccard.c +++ b/sys/dev/pccard/pccard.c @@ -1405,8 +1405,8 @@ pccard_ccr_read_impl(device_t brdev, device_t child, uint32_t offset, struct pccard_ivar *devi = PCCARD_IVAR(child); *val = pccard_ccr_read(devi->pf, offset); - device_printf(child, "ccr_read of %#x (%#x) is %#x\n", offset, - devi->pf->pf_ccr_offset, *val); + DEVPRINTF((child, "ccr_read of %#x (%#x) is %#x\n", offset, + devi->pf->pf_ccr_offset, *val)); return 0; } @@ -1421,8 +1421,8 @@ pccard_ccr_write_impl(device_t brdev, device_t child, uint32_t offset, * Can't use pccard_ccr_write since client drivers may access * registers not contained in the 'mask' if they are non-standard. */ - device_printf(child, "ccr_write of %#x to %#x (%#x)\n", val, offset, - devi->pf->pf_ccr_offset); + DEVPRINTF((child, "ccr_write of %#x to %#x (%#x)\n", val, offset, + devi->pf->pf_ccr_offset)); bus_space_write_1(pf->pf_ccrt, pf->pf_ccrh, pf->pf_ccr_offset + offset, val); return 0; diff --git a/sys/dev/pci/pci.c b/sys/dev/pci/pci.c index 22046c1f30f7..9cd5a1c7a1ab 100644 --- a/sys/dev/pci/pci.c +++ b/sys/dev/pci/pci.c @@ -2576,6 +2576,17 @@ pci_add_map(device_t bus, device_t dev, int reg, struct resource_list *rl, uint16_t cmd; struct resource *res; + /* + * The BAR may already exist if the device is a CardBus card + * whose CIS is stored in this BAR. + */ + pm = pci_find_bar(dev, reg); + if (pm != NULL) { + maprange = pci_maprange(pm->pm_value); + barlen = maprange == 64 ? 2 : 1; + return (barlen); + } + pci_read_bar(dev, reg, &map, &testval); if (PCI_BAR_MEM(map)) { type = SYS_RES_MEMORY; diff --git a/sys/dev/puc/pucdata.c b/sys/dev/puc/pucdata.c index a56971e19a0a..2b38d9b171ef 100644 --- a/sys/dev/puc/pucdata.c +++ b/sys/dev/puc/pucdata.c @@ -51,12 +51,12 @@ static puc_config_f puc_config_amc; static puc_config_f puc_config_diva; static puc_config_f puc_config_exar; static puc_config_f puc_config_icbook; +static puc_config_f puc_config_oxford_pcie; static puc_config_f puc_config_quatech; static puc_config_f puc_config_syba; static puc_config_f puc_config_siig; static puc_config_f puc_config_timedia; static puc_config_f puc_config_titan; -static puc_config_f puc_config_oxford_pcie; const struct puc_cfg puc_pci_devices[] = { @@ -1366,14 +1366,12 @@ puc_config_oxford_pcie(struct puc_softc *sc, enum puc_cfg_cmd cmd, int port, bar = puc_get_bar(sc, cfg->rid); if (bar == NULL) return (ENXIO); - for (idx = 0; idx < sc->sc_nports; idx++) { - value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) - + 0x92); + value = bus_read_1(bar->b_res, 0x1000 + (idx << 9) + + 0x92); bus_write_1(bar->b_res, 0x1000 + (idx << 9) + 0x92, - value | 0x10); + value | 0x10); } - return (0); case PUC_CFG_GET_LEN: *res = 0x200; diff --git a/sys/dev/sound/pcm/sound.c b/sys/dev/sound/pcm/sound.c index caa78416e958..958065f3a541 100644 --- a/sys/dev/sound/pcm/sound.c +++ b/sys/dev/sound/pcm/sound.c @@ -51,7 +51,7 @@ int pcm_veto_load = 1; int snd_unit = -1; TUNABLE_INT("hw.snd.default_unit", &snd_unit); -static int snd_unit_auto = 0; +static int snd_unit_auto = -1; TUNABLE_INT("hw.snd.default_auto", &snd_unit_auto); SYSCTL_INT(_hw_snd, OID_AUTO, default_auto, CTLFLAG_RW, &snd_unit_auto, 0, "assign default unit to a newly attached device"); @@ -443,6 +443,7 @@ sysctl_hw_snd_default_unit(SYSCTL_HANDLER_ARGS) if (!PCM_REGISTERED(d) || CHN_EMPTY(d, channels.pcm)) return EINVAL; snd_unit = unit; + snd_unit_auto = 0; } return (error); } @@ -737,6 +738,32 @@ pcm_killchan(device_t dev) return (pcm_chn_destroy(ch)); } +static int +pcm_best_unit(int old) +{ + struct snddev_info *d; + int i, best, bestprio, prio; + + best = -1; + bestprio = -100; + for (i = 0; pcm_devclass != NULL && + i < devclass_get_maxunit(pcm_devclass); i++) { + d = devclass_get_softc(pcm_devclass, i); + if (!PCM_REGISTERED(d)) + continue; + prio = 0; + if (d->playcount == 0) + prio -= 10; + if (d->reccount == 0) + prio -= 2; + if (prio > bestprio || (prio == bestprio && i == old)) { + best = i; + bestprio = prio; + } + } + return (best); +} + int pcm_setstatus(device_t dev, char *str) { @@ -770,8 +797,12 @@ pcm_setstatus(device_t dev, char *str) PCM_UNLOCK(d); - if (snd_unit < 0 || snd_unit_auto != 0) + if (snd_unit_auto < 0) + snd_unit_auto = (snd_unit < 0) ? 1 : 0; + if (snd_unit < 0 || snd_unit_auto > 1) snd_unit = device_get_unit(dev); + else if (snd_unit_auto == 1) + snd_unit = pcm_best_unit(snd_unit); return (0); } @@ -1113,7 +1144,6 @@ pcm_unregister(device_t dev) struct snddev_info *d; struct pcm_channel *ch; struct thread *td; - int i; td = curthread; d = device_get_softc(dev); @@ -1216,21 +1246,9 @@ pcm_unregister(device_t dev) sndstat_release(td); if (snd_unit == device_get_unit(dev)) { - /* - * Reassign default unit to the next available dev, but - * first, reset snd_unit to something ridiculous. - */ - snd_unit = -1; - for (i = 0; pcm_devclass != NULL && - i < devclass_get_maxunit(pcm_devclass); i++) { - if (device_get_unit(dev) == i) - continue; - d = devclass_get_softc(pcm_devclass, i); - if (PCM_REGISTERED(d)) { - snd_unit = i; - break; - } - } + snd_unit = pcm_best_unit(-1); + if (snd_unit_auto == 0) + snd_unit_auto = 1; } return (0); diff --git a/sys/dev/usb/usb_device.h b/sys/dev/usb/usb_device.h index c8bc5eb95a18..bf412214dd4f 100644 --- a/sys/dev/usb/usb_device.h +++ b/sys/dev/usb/usb_device.h @@ -187,6 +187,8 @@ struct usb_device { struct usb_host_endpoint *linux_endpoint_end; uint16_t devnum; #endif + + uint32_t clear_stall_errors; /* number of clear-stall failures */ }; /* globals */ diff --git a/sys/dev/usb/usb_freebsd.h b/sys/dev/usb/usb_freebsd.h index a44e53004d91..ae69cdb8926e 100644 --- a/sys/dev/usb/usb_freebsd.h +++ b/sys/dev/usb/usb_freebsd.h @@ -66,6 +66,7 @@ #define USB_HUB_MAX_DEPTH 5 #define USB_EP0_BUFSIZE 1024 /* bytes */ +#define USB_CS_RESET_LIMIT 20 /* failures = 20 * 50 ms = 1sec */ typedef uint32_t usb_timeout_t; /* milliseconds */ typedef uint32_t usb_frlength_t; /* bytes */ diff --git a/sys/dev/usb/usb_generic.c b/sys/dev/usb/usb_generic.c index 714ee6f368d5..d62f8f9466ce 100644 --- a/sys/dev/usb/usb_generic.c +++ b/sys/dev/usb/usb_generic.c @@ -966,10 +966,8 @@ ugen_re_enumerate(struct usb_fifo *f) /* ignore any errors */ DPRINTFN(6, "no FIFOs\n"); } - if (udev->re_enumerate_wait == 0) { - udev->re_enumerate_wait = 1; - usb_needs_explore(udev->bus, 0); - } + /* start re-enumeration of device */ + usbd_start_re_enumerate(udev); return (0); } diff --git a/sys/dev/usb/usb_hub.c b/sys/dev/usb/usb_hub.c index ce8a4a5139f3..351b1343dae5 100644 --- a/sys/dev/usb/usb_hub.c +++ b/sys/dev/usb/usb_hub.c @@ -242,9 +242,14 @@ uhub_explore_sub(struct uhub_softc *sc, struct usb_port *up) if (child->flags.usb_mode == USB_MODE_HOST) { usbd_enum_lock(child); if (child->re_enumerate_wait) { - err = usbd_set_config_index(child, USB_UNCONFIG_INDEX); - if (err == 0) - err = usbd_req_re_enumerate(child, NULL); + err = usbd_set_config_index(child, + USB_UNCONFIG_INDEX); + if (err != 0) { + DPRINTF("Unconfigure failed: " + "%s: Ignored.\n", + usbd_errstr(err)); + } + err = usbd_req_re_enumerate(child, NULL); if (err == 0) err = usbd_set_config_index(child, 0); if (err == 0) { @@ -2471,3 +2476,19 @@ usbd_filter_power_mode(struct usb_device *udev, uint8_t power_mode) /* use fixed power mode given by hardware driver */ return (temp); } + +/*------------------------------------------------------------------------* + * usbd_start_re_enumerate + * + * This function starts re-enumeration of the given USB device. This + * function does not need to be called BUS-locked. This function does + * not wait until the re-enumeration is completed. + *------------------------------------------------------------------------*/ +void +usbd_start_re_enumerate(struct usb_device *udev) +{ + if (udev->re_enumerate_wait == 0) { + udev->re_enumerate_wait = 1; + usb_needs_explore(udev->bus, 0); + } +} diff --git a/sys/dev/usb/usb_request.c b/sys/dev/usb/usb_request.c index c099e7173e2a..4358ef42030b 100644 --- a/sys/dev/usb/usb_request.c +++ b/sys/dev/usb/usb_request.c @@ -238,6 +238,10 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error) switch (USB_GET_STATE(xfer)) { case USB_ST_TRANSFERRED: + + /* reset error counter */ + udev->clear_stall_errors = 0; + if (ep == NULL) goto tr_setup; /* device was unconfigured */ if (ep->edesc && @@ -289,8 +293,23 @@ usb_do_clear_stall_callback(struct usb_xfer *xfer, usb_error_t error) goto tr_setup; default: - if (xfer->error == USB_ERR_CANCELLED) { + if (error == USB_ERR_CANCELLED) break; + + DPRINTF("Clear stall failed.\n"); + if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) + goto tr_setup; + + if (error == USB_ERR_TIMEOUT) { + udev->clear_stall_errors = USB_CS_RESET_LIMIT; + DPRINTF("Trying to re-enumerate.\n"); + usbd_start_re_enumerate(udev); + } else { + udev->clear_stall_errors++; + if (udev->clear_stall_errors == USB_CS_RESET_LIMIT) { + DPRINTF("Trying to re-enumerate.\n"); + usbd_start_re_enumerate(udev); + } } goto tr_setup; } @@ -1936,6 +1955,23 @@ usbd_req_re_enumerate(struct usb_device *udev, struct mtx *mtx) return (USB_ERR_INVAL); } retry: + /* + * Try to reset the High Speed parent HUB of a LOW- or FULL- + * speed device, if any. + */ + if (udev->parent_hs_hub != NULL && + udev->speed != USB_SPEED_HIGH) { + DPRINTF("Trying to reset parent High Speed TT.\n"); + err = usbd_req_reset_tt(udev->parent_hs_hub, NULL, + udev->hs_port_no); + if (err) { + DPRINTF("Resetting parent High " + "Speed TT failed (%s).\n", + usbd_errstr(err)); + } + } + + /* Try to reset the parent HUB port. */ err = usbd_req_reset_port(parent_hub, mtx, udev->port_no); if (err) { DPRINTFN(0, "addr=%d, port reset failed, %s\n", @@ -2033,3 +2069,65 @@ usbd_req_set_device_feature(struct usb_device *udev, struct mtx *mtx, USETW(req.wLength, 0); return (usbd_do_request(udev, mtx, &req, 0)); } + +/*------------------------------------------------------------------------* + * usbd_req_reset_tt + * + * Returns: + * 0: Success + * Else: Failure + *------------------------------------------------------------------------*/ +usb_error_t +usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx, + uint8_t port) +{ + struct usb_device_request req; + + /* For single TT HUBs the port should be 1 */ + + if (udev->ddesc.bDeviceClass == UDCLASS_HUB && + udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT) + port = 1; + + req.bmRequestType = UT_WRITE_CLASS_OTHER; + req.bRequest = UR_RESET_TT; + USETW(req.wValue, 0); + req.wIndex[0] = port; + req.wIndex[1] = 0; + USETW(req.wLength, 0); + return (usbd_do_request(udev, mtx, &req, 0)); +} + +/*------------------------------------------------------------------------* + * usbd_req_clear_tt_buffer + * + * For single TT HUBs the port should be 1. + * + * Returns: + * 0: Success + * Else: Failure + *------------------------------------------------------------------------*/ +usb_error_t +usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx, + uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint) +{ + struct usb_device_request req; + uint16_t wValue; + + /* For single TT HUBs the port should be 1 */ + + if (udev->ddesc.bDeviceClass == UDCLASS_HUB && + udev->ddesc.bDeviceProtocol == UDPROTO_HSHUBSTT) + port = 1; + + wValue = (endpoint & 0xF) | ((addr & 0x7F) << 4) | + ((endpoint & 0x80) << 8) | ((type & 3) << 12); + + req.bmRequestType = UT_WRITE_CLASS_OTHER; + req.bRequest = UR_CLEAR_TT_BUFFER; + USETW(req.wValue, wValue); + req.wIndex[0] = port; + req.wIndex[1] = 0; + USETW(req.wLength, 0); + return (usbd_do_request(udev, mtx, &req, 0)); +} diff --git a/sys/dev/usb/usb_request.h b/sys/dev/usb/usb_request.h index 12f373d5fa5f..ac7a7c160b6b 100644 --- a/sys/dev/usb/usb_request.h +++ b/sys/dev/usb/usb_request.h @@ -85,5 +85,9 @@ usb_error_t usbd_req_set_hub_u2_timeout(struct usb_device *udev, struct mtx *mtx, uint8_t port, uint8_t timeout); usb_error_t usbd_req_set_hub_depth(struct usb_device *udev, struct mtx *mtx, uint16_t depth); +usb_error_t usbd_req_reset_tt(struct usb_device *udev, struct mtx *mtx, + uint8_t port); +usb_error_t usbd_req_clear_tt_buffer(struct usb_device *udev, struct mtx *mtx, + uint8_t port, uint8_t addr, uint8_t type, uint8_t endpoint); #endif /* _USB_REQUEST_H_ */ diff --git a/sys/dev/usb/usb_transfer.c b/sys/dev/usb/usb_transfer.c index 5fd4f5a19a92..d4c2408db7c9 100644 --- a/sys/dev/usb/usb_transfer.c +++ b/sys/dev/usb/usb_transfer.c @@ -2927,6 +2927,11 @@ usbd_ctrl_transfer_setup(struct usb_device *udev) */ usbd_transfer_unsetup(udev->ctrl_xfer, USB_CTRL_XFER_MAX); + /* + * Reset clear stall error counter. + */ + udev->clear_stall_errors = 0; + /* * Try to setup a new USB transfer for the * default control endpoint: diff --git a/sys/dev/usb/usbdi.h b/sys/dev/usb/usbdi.h index 8f6da7c68c81..91cd3fae3aaf 100644 --- a/sys/dev/usb/usbdi.h +++ b/sys/dev/usb/usbdi.h @@ -542,6 +542,7 @@ void usbd_m_copy_in(struct usb_page_cache *cache, usb_frlength_t dst_offset, struct mbuf *m, usb_size_t src_offset, usb_frlength_t src_len); void usbd_frame_zero(struct usb_page_cache *cache, usb_frlength_t offset, usb_frlength_t len); +void usbd_start_re_enumerate(struct usb_device *udev); int usb_fifo_attach(struct usb_device *udev, void *priv_sc, struct mtx *priv_mtx, struct usb_fifo_methods *pm, diff --git a/sys/dev/xen/control/control.c b/sys/dev/xen/control/control.c index c03d5365530b..0f4418142684 100644 --- a/sys/dev/xen/control/control.c +++ b/sys/dev/xen/control/control.c @@ -203,24 +203,29 @@ xctrl_suspend() unsigned long max_pfn, start_info_mfn; #ifdef SMP - cpumask_t map; + struct thread *td; + cpuset_t map; /* * Bind us to CPU 0 and stop any other VCPUs. */ - thread_lock(curthread); - sched_bind(curthread, 0); - thread_unlock(curthread); + td = curthread; + thread_lock(td); + sched_bind(td, 0); + thread_unlock(td); KASSERT(PCPU_GET(cpuid) == 0, ("xen_suspend: not running on cpu 0")); - map = PCPU_GET(other_cpus) & ~stopped_cpus; - if (map) + sched_pin(); + map = PCPU_GET(other_cpus); + sched_unpin(); + CPU_NAND(&map, &stopped_cpus); + if (!CPU_EMPTY(&map)) stop_cpus(map); #endif if (DEVICE_SUSPEND(root_bus) != 0) { printf("xen_suspend: device_suspend failed\n"); #ifdef SMP - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif return; @@ -289,7 +294,7 @@ xctrl_suspend() thread_lock(curthread); sched_unbind(curthread); thread_unlock(curthread); - if (map) + if (!CPU_EMPTY(&map)) restart_cpus(map); #endif } diff --git a/sys/fs/nfs/nfs_var.h b/sys/fs/nfs/nfs_var.h index 8ed60a727cc9..5f944b5538f1 100644 --- a/sys/fs/nfs/nfs_var.h +++ b/sys/fs/nfs/nfs_var.h @@ -401,10 +401,10 @@ int nfsrpc_readdirplus(vnode_t, struct uio *, nfsuint64 *, int nfsrpc_commit(vnode_t, u_quad_t, int, struct ucred *, NFSPROC_T *, u_char *, struct nfsvattr *, int *, void *); int nfsrpc_advlock(vnode_t, off_t, int, struct flock *, int, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lockt(struct nfsrv_descript *, vnode_t, struct nfsclclient *, u_int64_t, u_int64_t, struct flock *, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); int nfsrpc_lock(struct nfsrv_descript *, struct nfsmount *, vnode_t, u_int8_t *, int, struct nfscllockowner *, int, int, u_int64_t, u_int64_t, short, struct ucred *, NFSPROC_T *, int); @@ -439,16 +439,16 @@ struct nfsclclient *nfscl_findcl(struct nfsmount *); void nfscl_clientrelease(struct nfsclclient *); void nfscl_freelock(struct nfscllock *, int); int nfscl_getbytelock(vnode_t, u_int64_t, u_int64_t, short, - struct ucred *, NFSPROC_T *, struct nfsclclient *, int, u_int8_t *, - u_int8_t *, struct nfscllockowner **, int *, int *); + struct ucred *, NFSPROC_T *, struct nfsclclient *, int, void *, int, + u_int8_t *, u_int8_t *, struct nfscllockowner **, int *, int *); int nfscl_relbytelock(vnode_t, u_int64_t, u_int64_t, struct ucred *, NFSPROC_T *, int, struct nfsclclient *, - struct nfscllockowner **, int *); + void *, int, struct nfscllockowner **, int *); int nfscl_checkwritelocked(vnode_t, struct flock *, - struct ucred *, NFSPROC_T *); + struct ucred *, NFSPROC_T *, void *, int); void nfscl_lockrelease(struct nfscllockowner *, int, int); void nfscl_fillclid(u_int64_t, char *, u_int8_t *, u_int16_t); -void nfscl_filllockowner(NFSPROC_T *, u_int8_t *); +void nfscl_filllockowner(void *, u_int8_t *, int); void nfscl_freeopen(struct nfsclopen *, int); void nfscl_umount(struct nfsmount *, NFSPROC_T *); void nfscl_renewthread(struct nfsclclient *, NFSPROC_T *); @@ -466,9 +466,10 @@ void nfscl_lockexcl(struct nfsv4lock *, void *); void nfscl_lockunlock(struct nfsv4lock *); void nfscl_lockderef(struct nfsv4lock *); void nfscl_docb(struct nfsrv_descript *, NFSPROC_T *); -void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *); +void nfscl_releasealllocks(struct nfsclclient *, vnode_t, NFSPROC_T *, void *, + int); int nfscl_lockt(vnode_t, struct nfsclclient *, u_int64_t, - u_int64_t, struct flock *, NFSPROC_T *); + u_int64_t, struct flock *, NFSPROC_T *, void *, int); int nfscl_mustflush(vnode_t); int nfscl_nodeleg(vnode_t, int); int nfscl_removedeleg(vnode_t, NFSPROC_T *, nfsv4stateid_t *); diff --git a/sys/fs/nfsclient/nfs_clport.c b/sys/fs/nfsclient/nfs_clport.c index 0c3a4c90b07d..4d88bd27eab9 100644 --- a/sys/fs/nfsclient/nfs_clport.c +++ b/sys/fs/nfsclient/nfs_clport.c @@ -500,7 +500,7 @@ nfscl_fillclid(u_int64_t clval, char *uuid, u_int8_t *cp, u_int16_t idlen) * Fill in a lock owner name. For now, pid + the process's creation time. */ void -nfscl_filllockowner(struct thread *td, u_int8_t *cp) +nfscl_filllockowner(void *id, u_int8_t *cp, int flags) { union { u_int32_t lval; @@ -508,37 +508,35 @@ nfscl_filllockowner(struct thread *td, u_int8_t *cp) } tl; struct proc *p; -if (td == NULL) { - printf("NULL td\n"); - bzero(cp, 12); - return; -} - p = td->td_proc; -if (p == NULL) { - printf("NULL pid\n"); - bzero(cp, 12); - return; -} - tl.lval = p->p_pid; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp++ = tl.cval[3]; -if (p->p_stats == NULL) { - printf("pstats null\n"); - bzero(cp, 8); - return; -} - tl.lval = p->p_stats->p_start.tv_sec; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp++ = tl.cval[3]; - tl.lval = p->p_stats->p_start.tv_usec; - *cp++ = tl.cval[0]; - *cp++ = tl.cval[1]; - *cp++ = tl.cval[2]; - *cp = tl.cval[3]; + if (id == NULL) { + printf("NULL id\n"); + bzero(cp, NFSV4CL_LOCKNAMELEN); + return; + } + if ((flags & F_POSIX) != 0) { + p = (struct proc *)id; + tl.lval = p->p_pid; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp++ = tl.cval[3]; + tl.lval = p->p_stats->p_start.tv_sec; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp++ = tl.cval[3]; + tl.lval = p->p_stats->p_start.tv_usec; + *cp++ = tl.cval[0]; + *cp++ = tl.cval[1]; + *cp++ = tl.cval[2]; + *cp = tl.cval[3]; + } else if ((flags & F_FLOCK) != 0) { + bcopy(&id, cp, sizeof(id)); + bzero(&cp[sizeof(id)], NFSV4CL_LOCKNAMELEN - sizeof(id)); + } else { + printf("nfscl_filllockowner: not F_POSIX or F_FLOCK\n"); + bzero(cp, NFSV4CL_LOCKNAMELEN); + } } /* @@ -943,6 +941,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad.sin_family = AF_INET; sad.sin_len = sizeof (struct sockaddr_in); sad.sin_addr.s_addr = sin->sin_addr.s_addr; + CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1((struct sockaddr *)&sad, 0, 0UL); if (rt != NULL) { if (rt->rt_ifp != NULL && @@ -956,6 +955,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) } RTFREE_LOCKED(rt); } + CURVNET_RESTORE(); #ifdef INET6 } else if (nmp->nm_nam->sa_family == AF_INET6) { struct sockaddr_in6 sad6, *sin6; @@ -966,6 +966,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) sad6.sin6_family = AF_INET6; sad6.sin6_len = sizeof (struct sockaddr_in6); sad6.sin6_addr = sin6->sin6_addr; + CURVNET_SET(CRED_TO_VNET(nmp->nm_sockreq.nr_cred)); rt = rtalloc1((struct sockaddr *)&sad6, 0, 0UL); if (rt != NULL) { if (rt->rt_ifp != NULL && @@ -980,6 +981,7 @@ nfscl_getmyip(struct nfsmount *nmp, int *isinet6p) } RTFREE_LOCKED(rt); } + CURVNET_RESTORE(); #endif } return (retp); diff --git a/sys/fs/nfsclient/nfs_clrpcops.c b/sys/fs/nfsclient/nfs_clrpcops.c index 0fc9bfd1da92..5d83d0bafd25 100644 --- a/sys/fs/nfsclient/nfs_clrpcops.c +++ b/sys/fs/nfsclient/nfs_clrpcops.c @@ -3459,7 +3459,7 @@ nfsrpc_commit(vnode_t vp, u_quad_t offset, int cnt, struct ucred *cred, */ APPLESTATIC int nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, - int reclaim, struct ucred *cred, NFSPROC_T *p) + int reclaim, struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfscllockowner *lp; struct nfsclclient *clp; @@ -3511,11 +3511,11 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, error = nfscl_getcl(vp, cred, p, &clp); if (error) return (error); - error = nfscl_lockt(vp, clp, off, len, fl, p); + error = nfscl_lockt(vp, clp, off, len, fl, p, id, flags); if (!error) { clidrev = clp->nfsc_clientidrev; error = nfsrpc_lockt(nd, vp, clp, off, len, fl, cred, - p); + p, id, flags); } else if (error == -1) { error = 0; } @@ -3530,7 +3530,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, return (error); do { error = nfscl_relbytelock(vp, off, len, cred, p, callcnt, - clp, &lp, &dorpc); + clp, id, flags, &lp, &dorpc); /* * If it returns a NULL lp, we're done. */ @@ -3538,7 +3538,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, if (callcnt == 0) nfscl_clientrelease(clp); else - nfscl_releasealllocks(clp, vp, p); + nfscl_releasealllocks(clp, vp, p, id, flags); return (error); } if (nmp->nm_clp != NULL) @@ -3572,10 +3572,10 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, } callcnt++; } while (error == 0 && nd->nd_repstat == 0); - nfscl_releasealllocks(clp, vp, p); + nfscl_releasealllocks(clp, vp, p, id, flags); } else if (op == F_SETLK) { error = nfscl_getbytelock(vp, off, len, fl->l_type, cred, p, - NULL, 0, NULL, NULL, &lp, &newone, &donelocally); + NULL, 0, id, flags, NULL, NULL, &lp, &newone, &donelocally); if (error || donelocally) { return (error); } @@ -3625,7 +3625,7 @@ nfsrpc_advlock(vnode_t vp, off_t size, int op, struct flock *fl, APPLESTATIC int nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, struct nfsclclient *clp, u_int64_t off, u_int64_t len, struct flock *fl, - struct ucred *cred, NFSPROC_T *p) + struct ucred *cred, NFSPROC_T *p, void *id, int flags) { u_int32_t *tl; int error, type, size; @@ -3643,7 +3643,7 @@ nfsrpc_lockt(struct nfsrv_descript *nd, vnode_t vp, tl += 2; *tl++ = clp->nfsc_clientid.lval[0]; *tl = clp->nfsc_clientid.lval[1]; - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); (void) nfsm_strtom(nd, own, NFSV4CL_LOCKNAMELEN); error = nfscl_request(nd, vp, p, cred, NULL); if (error) diff --git a/sys/fs/nfsclient/nfs_clstate.c b/sys/fs/nfsclient/nfs_clstate.c index 86d71b66bc9b..aa81437fb2ed 100644 --- a/sys/fs/nfsclient/nfs_clstate.c +++ b/sys/fs/nfsclient/nfs_clstate.c @@ -226,7 +226,7 @@ nfscl_open(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t amode, int usedeleg, * If none found, add the new one or return error, depending upon * "create". */ - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); NFSLOCKCLSTATE(); dp = NULL; /* First check the delegation list */ @@ -521,7 +521,7 @@ nfscl_getstateid(vnode_t vp, u_int8_t *nfhp, int fhlen, u_int32_t mode, * If p != NULL, we want to search the parentage tree * for a matching OpenOwner and use that. */ - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); error = nfscl_getopen(&clp->nfsc_owner, nfhp, fhlen, NULL, p, mode, NULL, &op); if (error == 0) { @@ -596,7 +596,7 @@ nfscl_getopen(struct nfsclownerhead *ohp, u_int8_t *nfhp, int fhlen, op = NULL; while (op == NULL && (nproc != NULL || rown != NULL)) { if (nproc != NULL) { - nfscl_filllockowner(nproc, own); + nfscl_filllockowner(nproc->td_proc, own, F_POSIX); ownp = own; } else { ownp = rown; @@ -881,7 +881,7 @@ nfscl_clientrelease(struct nfsclclient *clp) APPLESTATIC int nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, short type, struct ucred *cred, NFSPROC_T *p, struct nfsclclient *rclp, - int recovery, u_int8_t *rownp, u_int8_t *ropenownp, + int recovery, void *id, int flags, u_int8_t *rownp, u_int8_t *ropenownp, struct nfscllockowner **lpp, int *newonep, int *donelocallyp) { struct nfscllockowner *lp; @@ -942,7 +942,7 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, if (recovery) { ownp = rownp; } else { - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); ownp = own; } if (!recovery) { @@ -1079,7 +1079,8 @@ nfscl_getbytelock(vnode_t vp, u_int64_t off, u_int64_t len, APPLESTATIC int nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, __unused struct ucred *cred, NFSPROC_T *p, int callcnt, - struct nfsclclient *clp, struct nfscllockowner **lpp, int *dorpcp) + struct nfsclclient *clp, void *id, int flags, + struct nfscllockowner **lpp, int *dorpcp) { struct nfscllockowner *lp; struct nfsclowner *owp; @@ -1116,7 +1117,7 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, sizeof (struct nfscllock), M_NFSCLLOCK, M_WAITOK); *other_lop = *nlop; } - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); dp = NULL; NFSLOCKCLSTATE(); if (callcnt == 0) @@ -1188,7 +1189,8 @@ nfscl_relbytelock(vnode_t vp, u_int64_t off, u_int64_t len, * Release all lockowners marked in progess for this process and file. */ APPLESTATIC void -nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) +nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p, + void *id, int flags) { struct nfsclowner *owp; struct nfsclopen *op; @@ -1197,7 +1199,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) u_int8_t own[NFSV4CL_LOCKNAMELEN]; np = VTONFS(vp); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); LIST_FOREACH(owp, &clp->nfsc_owner, nfsow_list) { LIST_FOREACH(op, &owp->nfsow_open, nfso_list) { @@ -1226,7 +1228,7 @@ nfscl_releasealllocks(struct nfsclclient *clp, vnode_t vp, NFSPROC_T *p) */ APPLESTATIC int nfscl_checkwritelocked(vnode_t vp, struct flock *fl, - struct ucred *cred, NFSPROC_T *p) + struct ucred *cred, NFSPROC_T *p, void *id, int flags) { struct nfsclowner *owp; struct nfscllockowner *lp; @@ -1266,7 +1268,7 @@ nfscl_checkwritelocked(vnode_t vp, struct flock *fl, error = nfscl_getcl(vp, cred, p, &clp); if (error) return (1); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); /* @@ -1641,7 +1643,7 @@ nfscl_cleanup(NFSPROC_T *p) if (!nfscl_inited) return; - nfscl_filllockowner(p, own); + nfscl_filllockowner(p->td_proc, own, F_POSIX); NFSLOCKCLSTATE(); /* @@ -3322,7 +3324,7 @@ nfscl_checkconflict(struct nfscllockownerhead *lhp, struct nfscllock *nlop, */ APPLESTATIC int nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, - u_int64_t len, struct flock *fl, NFSPROC_T *p) + u_int64_t len, struct flock *fl, NFSPROC_T *p, void *id, int flags) { struct nfscllock *lop, nlck; struct nfscldeleg *dp; @@ -3340,7 +3342,7 @@ nfscl_lockt(vnode_t vp, struct nfsclclient *clp, u_int64_t off, return (NFSERR_INVAL); } np = VTONFS(vp); - nfscl_filllockowner(p, own); + nfscl_filllockowner(id, own, flags); NFSLOCKCLSTATE(); dp = nfscl_finddeleg(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len); error = nfscl_localconflict(clp, np->n_fhp->nfh_fh, np->n_fhp->nfh_len, @@ -3615,7 +3617,7 @@ nfscl_relock(vnode_t vp, struct nfsclclient *clp, struct nfsmount *nmp, off = lop->nfslo_first; len = lop->nfslo_end - lop->nfslo_first; error = nfscl_getbytelock(vp, off, len, lop->nfslo_type, cred, p, - clp, 1, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, + clp, 1, NULL, 0, lp->nfsl_owner, lp->nfsl_openowner, &nlp, &newone, &donelocally); if (error || donelocally) return (error); diff --git a/sys/fs/nfsclient/nfs_clvnops.c b/sys/fs/nfsclient/nfs_clvnops.c index 984724d93aed..3e1c66d1de2d 100644 --- a/sys/fs/nfsclient/nfs_clvnops.c +++ b/sys/fs/nfsclient/nfs_clvnops.c @@ -2884,8 +2884,11 @@ nfs_advlock(struct vop_advlock_args *ap) int ret, error = EOPNOTSUPP; u_quad_t size; - if (NFS_ISV4(vp) && (ap->a_flags & F_POSIX)) { - cred = p->p_ucred; + if (NFS_ISV4(vp) && (ap->a_flags & (F_POSIX | F_FLOCK)) != 0) { + if ((ap->a_flags & F_POSIX) != 0) + cred = p->p_ucred; + else + cred = td->td_ucred; vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); if (vp->v_iflag & VI_DOOMED) { VOP_UNLOCK(vp, 0); @@ -2898,7 +2901,8 @@ nfs_advlock(struct vop_advlock_args *ap) * RFC3530 Sec. 9.3.2. */ if (ap->a_op == F_UNLCK && - nfscl_checkwritelocked(vp, ap->a_fl, cred, td)) + nfscl_checkwritelocked(vp, ap->a_fl, cred, td, ap->a_id, + ap->a_flags)) (void) ncl_flush(vp, MNT_WAIT, cred, td, 1, 0); /* @@ -2907,7 +2911,7 @@ nfs_advlock(struct vop_advlock_args *ap) */ do { ret = nfsrpc_advlock(vp, np->n_size, ap->a_op, - ap->a_fl, 0, cred, td); + ap->a_fl, 0, cred, td, ap->a_id, ap->a_flags); if (ret == NFSERR_DENIED && (ap->a_flags & F_WAIT) && ap->a_op == F_SETLK) { VOP_UNLOCK(vp, 0); diff --git a/sys/geom/eli/g_eli.c b/sys/geom/eli/g_eli.c index 74c70ff2f07e..30497a453ea8 100644 --- a/sys/geom/eli/g_eli.c +++ b/sys/geom/eli/g_eli.c @@ -672,7 +672,7 @@ static int g_eli_cpu_is_disabled(int cpu) { #ifdef SMP - return ((hlt_cpus_mask & (1 << cpu)) != 0); + return (CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (0); #endif diff --git a/sys/i386/i386/intr_machdep.c b/sys/i386/i386/intr_machdep.c index 77b80048620f..56529f7b5b03 100644 --- a/sys/i386/i386/intr_machdep.c +++ b/sys/i386/i386/intr_machdep.c @@ -409,8 +409,7 @@ DB_SHOW_COMMAND(irqs, db_show_irqs) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; /* @@ -432,7 +431,7 @@ intr_next_cpu(void) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); mtx_unlock_spin(&icu_lock); return (apic_id); } @@ -463,7 +462,7 @@ intr_add_cpu(u_int cpu) printf("INTR: Adding local APIC %d as a target\n", cpu_apic_ids[cpu]); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -483,6 +482,9 @@ intr_shuffle_irqs(void *arg __unused) return; #endif + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c index fbf444a3eb4a..91050c43bee3 100644 --- a/sys/i386/i386/machdep.c +++ b/sys/i386/i386/machdep.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" #include "opt_xbox.h" @@ -118,6 +119,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1357,9 +1359,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#if defined(SMP) && !defined(XEN) - if (mp_grab_cpu_hlt()) - return; +#if defined(MP_WATCHDOG) && !defined(XEN) + ap_watchdog(PCPU_GET(cpuid)); #endif #ifndef XEN /* If we are busy - try to use fast methods. */ diff --git a/sys/i386/i386/mp_machdep.c b/sys/i386/i386/mp_machdep.c index a4db4016f8e9..78c90c0f73c7 100644 --- a/sys/i386/i386/mp_machdep.c +++ b/sys/i386/i386/mp_machdep.c @@ -29,7 +29,6 @@ __FBSDID("$FreeBSD$"); #include "opt_apic.h" #include "opt_cpu.h" #include "opt_kstack_pages.h" -#include "opt_mp_watchdog.h" #include "opt_pmap.h" #include "opt_sched.h" #include "opt_smp.h" @@ -51,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include #include /* cngetc() */ +#include #ifdef GPROF #include #endif @@ -77,7 +77,6 @@ __FBSDID("$FreeBSD$"); #include #include #include -#include #include #include #include @@ -173,7 +172,7 @@ static u_long *ipi_hardclock_counts[MAXCPU]; * Local data and functions. */ -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -208,11 +207,8 @@ static int start_all_aps(void); static int start_ap(int apic_id); static void release_aps(void *dummy); -static int hlt_logical_cpus; static u_int hyperthreading_cpus; /* logical cpus sharing L1 cache */ -static cpumask_t hyperthreading_cpus_mask; static int hyperthreading_allowed = 1; -static struct sysctl_ctx_list logical_cpu_clist; static void mem_range_AP_init(void) @@ -289,8 +285,11 @@ topo_probe_0x4(void) * logical processors that belong to the same core * as BSP thus deducing number of threads per core. */ - cpuid_count(0x04, 0, p); - max_cores = ((p[0] >> 26) & 0x3f) + 1; + if (cpu_high >= 0x4) { + cpuid_count(0x04, 0, p); + max_cores = ((p[0] >> 26) & 0x3f) + 1; + } else + max_cores = 1; core_id_bits = mask_width(max_logical/max_cores); if (core_id_bits < 0) return; @@ -382,7 +381,7 @@ topo_probe(void) if (cpu_topo_probed) return; - logical_cpus_mask = 0; + CPU_ZERO(&logical_cpus_mask); if (mp_ncpus <= 1) cpu_cores = cpu_logical = 1; else if (cpu_vendor_id == CPU_VENDOR_AMD) @@ -524,7 +523,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -659,6 +658,7 @@ cpu_mp_announce(void) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; struct pcpu *pc; vm_offset_t addr; int gsel_tss; @@ -783,19 +783,17 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ /* XXX Calculation depends on cpu_logical being a power of 2, e.g. 2 */ if (cpu_logical > 1 && PCPU_GET(apic_id) % cpu_logical != 0) - logical_cpus_mask |= PCPU_GET(cpumask); - - /* Determine if we are a hyperthread. */ - if (hyperthreading_cpus > 1 && - PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); if (bootverbose) lapic_dump("AP"); @@ -874,7 +872,7 @@ assign_cpu_ids(void) if (hyperthreading_cpus > 1 && i % hyperthreading_cpus != 0) { cpu_info[i].cpu_hyperthread = 1; -#if defined(SCHED_ULE) + /* * Don't use HT CPU if it has been disabled by a * tunable. @@ -883,7 +881,6 @@ assign_cpu_ids(void) cpu_info[i].cpu_disabled = 1; continue; } -#endif } /* Don't use this CPU if it has been disabled by a tunable. */ @@ -893,6 +890,11 @@ assign_cpu_ids(void) } } + if (hyperthreading_allowed == 0 && hyperthreading_cpus > 1) { + hyperthreading_cpus = 0; + cpu_logical = 1; + } + /* * Assign CPU IDs to local APIC IDs and disable any CPUs * beyond MAXCPU. CPU 0 is always assigned to the BSP. @@ -932,6 +934,7 @@ assign_cpu_ids(void) static int start_all_aps(void) { + cpuset_t tallcpus; #ifndef PC98 u_char mpbiosreason; #endif @@ -991,11 +994,13 @@ start_all_aps(void) } CHECK_PRINT("trace"); /* show checkpoints */ - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); /* restore the warmstart vector */ *(u_int32_t *) WARMBOOT_OFF = mpbioswarmvec; @@ -1191,6 +1196,30 @@ SYSCTL_INT(_debug_xhits, OID_AUTO, ipi_masked_range_size, CTLFLAG_RW, &ipi_masked_range_size, 0, ""); #endif /* COUNT_XINVLTLB_HITS */ +/* + * Send an IPI to specified CPU handling the bitmap logic. + */ +static void +ipi_send_cpu(int cpu, u_int ipi) +{ + u_int bitmap, old_pending, new_pending; + + KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); + + if (IPI_IS_BITMAPED(ipi)) { + bitmap = 1 << ipi; + ipi = IPI_BITMAP_VECTOR; + do { + old_pending = cpu_ipi_pending[cpu]; + new_pending = old_pending | bitmap; + } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], + old_pending, new_pending)); + if (old_pending) + return; + } + lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); +} + /* * Flush the TLB on all other CPU's */ @@ -1215,28 +1244,19 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + sched_pin(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + sched_unpin(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1245,39 +1265,25 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o smp_tlb_addr1 = addr1; smp_tlb_addr2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); mtx_unlock_spin(&smp_ipi_mtx); } -/* - * Send an IPI to specified CPU handling the bitmap logic. - */ -static void -ipi_send_cpu(int cpu, u_int ipi) -{ - u_int bitmap, old_pending, new_pending; - - KASSERT(cpu_apic_ids[cpu] != -1, ("IPI to non-existent CPU %d", cpu)); - - if (IPI_IS_BITMAPED(ipi)) { - bitmap = 1 << ipi; - ipi = IPI_BITMAP_VECTOR; - do { - old_pending = cpu_ipi_pending[cpu]; - new_pending = old_pending | bitmap; - } while (!atomic_cmpset_int(&cpu_ipi_pending[cpu], - old_pending, new_pending)); - if (old_pending) - return; - } - lapic_ipi_vectored(ipi, cpu_apic_ids[cpu]); -} - void smp_cache_flush(void) { @@ -1324,7 +1330,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1336,7 +1342,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1348,7 +1354,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1401,7 +1407,7 @@ ipi_bitmap_handler(struct trapframe frame) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1411,12 +1417,12 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - CTR3(KTR_SMP, "%s: cpus: %x ipi: %x", __func__, cpus, ipi); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } } @@ -1434,7 +1440,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1447,8 +1453,10 @@ void ipi_all_but_self(u_int ipi) { + sched_pin(); if (IPI_IS_BITMAPED(ipi)) { ipi_selected(PCPU_GET(other_cpus), ipi); + sched_unpin(); return; } @@ -1458,7 +1466,9 @@ ipi_all_but_self(u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, PCPU_PTR(other_cpus)); + sched_unpin(); + CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); lapic_ipi_vectored(ipi, APIC_IPI_DEST_OTHERS); } @@ -1466,7 +1476,7 @@ ipi_all_but_self(u_int ipi) int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1474,11 +1484,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1490,23 +1502,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - cpumask_t cpumask; + cpuset_t cpumask; u_int cpu; + sched_pin(); cpu = PCPU_GET(cpuid); cpumask = PCPU_GET(cpumask); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); @@ -1530,158 +1544,6 @@ release_aps(void *dummy __unused) } SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, release_aps, NULL); -static int -sysctl_hlt_cpus(SYSCTL_HANDLER_ARGS) -{ - cpumask_t mask; - int error; - - mask = hlt_cpus_mask; - error = sysctl_handle_int(oidp, &mask, 0, req); - if (error || !req->newptr) - return (error); - - if (logical_cpus_mask != 0 && - (mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if (! hyperthreading_allowed) - mask |= hyperthreading_cpus_mask; - - if ((mask & all_cpus) == all_cpus) - mask &= ~(1<<0); - hlt_cpus_mask = mask; - return (error); -} -SYSCTL_PROC(_machdep, OID_AUTO, hlt_cpus, CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hlt_cpus, "IU", - "Bitmap of CPUs to halt. 101 (binary) will halt CPUs 0 and 2."); - -static int -sysctl_hlt_logical_cpus(SYSCTL_HANDLER_ARGS) -{ - int disable, error; - - disable = hlt_logical_cpus; - error = sysctl_handle_int(oidp, &disable, 0, req); - if (error || !req->newptr) - return (error); - - if (disable) - hlt_cpus_mask |= logical_cpus_mask; - else - hlt_cpus_mask &= ~logical_cpus_mask; - - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hlt_logical_cpus = disable; - return (error); -} - -static int -sysctl_hyperthreading_allowed(SYSCTL_HANDLER_ARGS) -{ - int allowed, error; - - allowed = hyperthreading_allowed; - error = sysctl_handle_int(oidp, &allowed, 0, req); - if (error || !req->newptr) - return (error); - -#ifdef SCHED_ULE - /* - * SCHED_ULE doesn't allow enabling/disabling HT cores at - * run-time. - */ - if (allowed != hyperthreading_allowed) - return (ENOTSUP); - return (error); -#endif - - if (allowed) - hlt_cpus_mask &= ~hyperthreading_cpus_mask; - else - hlt_cpus_mask |= hyperthreading_cpus_mask; - - if (logical_cpus_mask != 0 && - (hlt_cpus_mask & logical_cpus_mask) == logical_cpus_mask) - hlt_logical_cpus = 1; - else - hlt_logical_cpus = 0; - - if ((hlt_cpus_mask & all_cpus) == all_cpus) - hlt_cpus_mask &= ~(1<<0); - - hyperthreading_allowed = allowed; - return (error); -} - -static void -cpu_hlt_setup(void *dummy __unused) -{ - - if (logical_cpus_mask != 0) { - TUNABLE_INT_FETCH("machdep.hlt_logical_cpus", - &hlt_logical_cpus); - sysctl_ctx_init(&logical_cpu_clist); - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hlt_logical_cpus", CTLTYPE_INT|CTLFLAG_RW, 0, 0, - sysctl_hlt_logical_cpus, "IU", ""); - SYSCTL_ADD_UINT(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "logical_cpus_mask", CTLTYPE_INT|CTLFLAG_RD, - &logical_cpus_mask, 0, ""); - - if (hlt_logical_cpus) - hlt_cpus_mask |= logical_cpus_mask; - - /* - * If necessary for security purposes, force - * hyperthreading off, regardless of the value - * of hlt_logical_cpus. - */ - if (hyperthreading_cpus_mask) { - SYSCTL_ADD_PROC(&logical_cpu_clist, - SYSCTL_STATIC_CHILDREN(_machdep), OID_AUTO, - "hyperthreading_allowed", CTLTYPE_INT|CTLFLAG_RW, - 0, 0, sysctl_hyperthreading_allowed, "IU", ""); - if (! hyperthreading_allowed) - hlt_cpus_mask |= hyperthreading_cpus_mask; - } - } -} -SYSINIT(cpu_hlt, SI_SUB_SMP, SI_ORDER_ANY, cpu_hlt_setup, NULL); - -int -mp_grab_cpu_hlt(void) -{ - cpumask_t mask; -#ifdef MP_WATCHDOG - u_int cpuid; -#endif - int retval; - - mask = PCPU_GET(cpumask); -#ifdef MP_WATCHDOG - cpuid = PCPU_GET(cpuid); - ap_watchdog(cpuid); -#endif - - retval = 0; - while (mask & hlt_cpus_mask) { - retval = 1; - __asm __volatile("sti; hlt" : : : "memory"); - } - return (retval); -} - #ifdef COUNT_IPIS /* * Setup interrupt counters for IPI handlers. diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c index d10bbe5a7701..3f9248df2b35 100644 --- a/sys/i386/i386/pmap.c +++ b/sys/i386/i386/pmap.c @@ -125,6 +125,8 @@ __FBSDID("$FreeBSD$"); #include #ifdef SMP #include +#else +#include #endif #include @@ -386,7 +388,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif kernel_pmap->pm_root = NULL; - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); @@ -930,19 +932,20 @@ pmap_update_pde_invalidate(vm_offset_t va, pd_entry_t newpde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); } @@ -950,23 +953,23 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); } @@ -974,19 +977,20 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -1002,8 +1006,8 @@ pmap_invalidate_cache(void) } struct pde_action { - cpumask_t store; /* processor that updates the PDE */ - cpumask_t invalidate; /* processors that invalidate their TLB */ + cpuset_t store; /* processor that updates the PDE */ + cpuset_t invalidate; /* processors that invalidate their TLB */ vm_offset_t va; pd_entry_t *pde; pd_entry_t newpde; @@ -1016,7 +1020,10 @@ pmap_update_pde_kernel(void *arg) pd_entry_t *pde; pmap_t pmap; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); + /* * Elsewhere, this operation requires allpmaps_lock for * synchronization. Here, it does not because it is being @@ -1026,6 +1033,8 @@ pmap_update_pde_kernel(void *arg) pde = pmap_pde(pmap, act->va); pde_store(pde, act->newpde); } + } else + sched_unpin(); } static void @@ -1033,8 +1042,12 @@ pmap_update_pde_user(void *arg) { struct pde_action *act = arg; - if (act->store == PCPU_GET(cpumask)) + sched_pin(); + if (!CPU_CMP(&act->store, PCPU_PTR(cpumask))) { + sched_unpin(); pde_store(act->pde, act->newpde); + } else + sched_unpin(); } static void @@ -1042,8 +1055,12 @@ pmap_update_pde_teardown(void *arg) { struct pde_action *act = arg; - if ((act->invalidate & PCPU_GET(cpumask)) != 0) + sched_pin(); + if (CPU_OVERLAP(&act->invalidate, PCPU_PTR(cpumask))) { + sched_unpin(); pmap_update_pde_invalidate(act->va, act->newpde); + } else + sched_unpin(); } /* @@ -1058,21 +1075,23 @@ static void pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) { struct pde_action act; - cpumask_t active, cpumask; + cpuset_t active, cpumask, other_cpus; sched_pin(); cpumask = PCPU_GET(cpumask); + other_cpus = PCPU_GET(other_cpus); if (pmap == kernel_pmap) active = all_cpus; else active = pmap->pm_active; - if ((active & PCPU_GET(other_cpus)) != 0) { + if (CPU_OVERLAP(&active, &other_cpus)) { act.store = cpumask; act.invalidate = active; act.va = va; act.pde = pde; act.newpde = newpde; - smp_rendezvous_cpus(cpumask | active, + CPU_OR(&cpumask, &active); + smp_rendezvous_cpus(cpumask, smp_no_rendevous_barrier, pmap == kernel_pmap ? pmap_update_pde_kernel : pmap_update_pde_user, pmap_update_pde_teardown, &act); @@ -1081,7 +1100,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if ((active & cpumask) != 0) + if (CPU_OVERLAP(&active, &cpumask)) pmap_update_pde_invalidate(va, newpde); } sched_unpin(); @@ -1095,7 +1114,7 @@ PMAP_INLINE void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); } @@ -1104,7 +1123,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { vm_offset_t addr; - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); } @@ -1113,7 +1132,7 @@ PMAP_INLINE void pmap_invalidate_all(pmap_t pmap) { - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1132,7 +1151,7 @@ pmap_update_pde(pmap_t pmap, vm_offset_t va, pd_entry_t *pde, pd_entry_t newpde) pmap_kenter_pde(va, newpde); else pde_store(pde, newpde); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) pmap_update_pde_invalidate(va, newpde); } #endif /* !SMP */ @@ -1689,7 +1708,7 @@ pmap_pinit0(pmap_t pmap) pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif pmap->pm_root = NULL; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1770,7 +1789,7 @@ pmap_pinit(pmap_t pmap) #endif } - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1886,7 +1905,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1895,36 +1914,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1932,7 +1957,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1949,6 +1974,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1968,7 +1994,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -5078,11 +5104,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c index 232e1a1f308e..a084e09526aa 100644 --- a/sys/i386/i386/vm_machdep.c +++ b/sys/i386/i386/vm_machdep.c @@ -573,11 +573,13 @@ kvtop(void *addr) static void cpu_reset_proxy() { + cpuset_t tcrp; cpu_reset_proxy_active = 1; while (cpu_reset_proxy_active == 1) ; /* Wait for other cpu to see that we've started */ - stop_cpus((1<cpumask = 0; + CPU_ZERO(&sf->cpumask); shootdown: sched_pin(); cpumask = PCPU_GET(cpumask); - if ((sf->cpumask & cpumask) == 0) { - sf->cpumask |= cpumask; + if (!CPU_OVERLAP(&cpumask, &sf->cpumask)) { + CPU_OR(&sf->cpumask, &cpumask); invlpg(sf->kva); } if ((flags & SFB_CPUPRIVATE) == 0) { - other_cpus = PCPU_GET(other_cpus) & ~sf->cpumask; - if (other_cpus != 0) { - sf->cpumask |= other_cpus; + other_cpus = PCPU_GET(other_cpus); + CPU_NAND(&other_cpus, &sf->cpumask); + if (!CPU_EMPTY(&other_cpus)) { + CPU_OR(&sf->cpumask, &other_cpus); smp_masked_invlpg(other_cpus, sf->kva); } } - sched_unpin(); + sched_unpin(); #else if ((opte & (PG_V | PG_A)) == (PG_V | PG_A)) pmap_invalidate_page(kernel_pmap, sf->kva); diff --git a/sys/i386/include/_types.h b/sys/i386/include/_types.h index 7a969fedf160..3194fd691f67 100644 --- a/sys/i386/include/_types.h +++ b/sys/i386/include/_types.h @@ -69,7 +69,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef unsigned long __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int32_t __critical_t; typedef long double __double_t; typedef long double __float_t; diff --git a/sys/i386/include/pmap.h b/sys/i386/include/pmap.h index eeada2e400f4..3012a000ae5c 100644 --- a/sys/i386/include/pmap.h +++ b/sys/i386/include/pmap.h @@ -155,6 +155,7 @@ #ifndef LOCORE #include +#include #include #include @@ -433,7 +434,7 @@ struct pmap { struct mtx pm_mtx; pd_entry_t *pm_pdir; /* KVA of page directory */ TAILQ_HEAD(,pv_chunk) pm_pvchunk; /* list of mappings in pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ LIST_ENTRY(pmap) pm_list; /* List of all pmaps */ #ifdef PAE diff --git a/sys/i386/include/sf_buf.h b/sys/i386/include/sf_buf.h index 7bc1095c9590..415dcbb86e63 100644 --- a/sys/i386/include/sf_buf.h +++ b/sys/i386/include/sf_buf.h @@ -29,6 +29,7 @@ #ifndef _MACHINE_SF_BUF_H_ #define _MACHINE_SF_BUF_H_ +#include #include struct vm_page; @@ -40,7 +41,7 @@ struct sf_buf { vm_offset_t kva; /* va of mapping */ int ref_count; /* usage of this mapping */ #ifdef SMP - cpumask_t cpumask; /* cpus on which mapping is valid */ + cpuset_t cpumask; /* cpus on which mapping is valid */ #endif }; diff --git a/sys/i386/include/smp.h b/sys/i386/include/smp.h index d364cd9ebbcc..04d67c9f2f28 100644 --- a/sys/i386/include/smp.h +++ b/sys/i386/include/smp.h @@ -66,17 +66,16 @@ void ipi_bitmap_handler(struct trapframe frame); #endif void ipi_cpu(int cpu, u_int ipi); int ipi_nmi_handler(void); -void ipi_selected(cpumask_t cpus, u_int ipi); +void ipi_selected(cpuset_t cpus, u_int ipi); u_int mp_bootaddress(u_int); -int mp_grab_cpu_hlt(void); void smp_cache_flush(void); void smp_invlpg(vm_offset_t addr); -void smp_masked_invlpg(cpumask_t mask, vm_offset_t addr); +void smp_masked_invlpg(cpuset_t mask, vm_offset_t addr); void smp_invlpg_range(vm_offset_t startva, vm_offset_t endva); -void smp_masked_invlpg_range(cpumask_t mask, vm_offset_t startva, +void smp_masked_invlpg_range(cpuset_t mask, vm_offset_t startva, vm_offset_t endva); void smp_invltlb(void); -void smp_masked_invltlb(cpumask_t mask); +void smp_masked_invltlb(cpuset_t mask); #ifdef XEN void ipi_to_irq_init(void); diff --git a/sys/i386/xen/mp_machdep.c b/sys/i386/xen/mp_machdep.c index 2919570b647e..2d05596b89d2 100644 --- a/sys/i386/xen/mp_machdep.c +++ b/sys/i386/xen/mp_machdep.c @@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$"); #include #include #include /* cngetc() */ +#include #ifdef GPROF #include #endif @@ -116,7 +117,7 @@ volatile int smp_tlb_wait; typedef void call_data_func_t(uintptr_t , uintptr_t); static u_int logical_cpus; -static volatile cpumask_t ipi_nmi_pending; +static volatile cpuset_t ipi_nmi_pending; /* used to hold the AP's until we are ready to release them */ static struct mtx ap_boot_mtx; @@ -149,7 +150,7 @@ static int start_ap(int apic_id); static void release_aps(void *dummy); static u_int hyperthreading_cpus; -static cpumask_t hyperthreading_cpus_mask; +static cpuset_t hyperthreading_cpus_mask; extern void Xhypervisor_callback(void); extern void failsafe_callback(void); @@ -239,7 +240,7 @@ cpu_mp_probe(void) * Always record BSP in CPU map so that the mbuf init code works * correctly. */ - all_cpus = 1; + CPU_SETOF(0, &all_cpus); if (mp_ncpus == 0) { /* * No CPUs were found, so this must be a UP system. Setup @@ -293,7 +294,8 @@ cpu_mp_start(void) start_all_aps(); /* Setup the initial logical CPUs info. */ - logical_cpus = logical_cpus_mask = 0; + logical_cpus = 0; + CPU_ZERO(&logical_cpus_mask); if (cpu_feature & CPUID_HTT) logical_cpus = (cpu_procinfo & CPUID_HTT_CORES) >> 16; @@ -521,6 +523,7 @@ xen_smp_intr_init_cpus(void *unused) void init_secondary(void) { + cpuset_t tcpuset, tallcpus; vm_offset_t addr; int gsel_tss; @@ -600,18 +603,21 @@ init_secondary(void) CTR1(KTR_SMP, "SMP: AP CPU #%d Launched", PCPU_GET(cpuid)); printf("SMP: AP CPU #%d Launched!\n", PCPU_GET(cpuid)); + tcpuset = PCPU_GET(cpumask); /* Determine if we are a logical CPU. */ if (logical_cpus > 1 && PCPU_GET(apic_id) % logical_cpus != 0) - logical_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&logical_cpus_mask, &tcpuset); /* Determine if we are a hyperthread. */ if (hyperthreading_cpus > 1 && PCPU_GET(apic_id) % hyperthreading_cpus != 0) - hyperthreading_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&hyperthreading_cpus_mask, &tcpuset); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, &tcpuset); + PCPU_SET(other_cpus, tallcpus); #if 0 if (bootverbose) lapic_dump("AP"); @@ -725,6 +731,7 @@ assign_cpu_ids(void) int start_all_aps(void) { + cpuset_t tallcpus; int x,apic_id, cpu; struct pcpu *pc; @@ -778,12 +785,14 @@ start_all_aps(void) panic("bye-bye"); } - all_cpus |= (1 << cpu); /* record AP in CPU map */ + CPU_SET(cpu, &all_cpus); /* record AP in CPU map */ } /* build our map of 'other' CPUs */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + tallcpus = all_cpus; + CPU_NAND(&tallcpus, PCPU_PTR(cpumask)); + PCPU_SET(other_cpus, tallcpus); pmap_invalidate_range(kernel_pmap, 0, NKPT * NBPDR - 1); @@ -1012,29 +1021,20 @@ smp_tlb_shootdown(u_int vector, vm_offset_t addr1, vm_offset_t addr2) } static void -smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) +smp_targeted_tlb_shootdown(cpuset_t mask, u_int vector, vm_offset_t addr1, vm_offset_t addr2) { - int ncpu, othercpus; + int cpu, ncpu, othercpus; struct _call_data data; othercpus = mp_ncpus - 1; - if (mask == (u_int)-1) { - ncpu = othercpus; - if (ncpu < 1) + if (CPU_ISFULLSET(&mask)) { + if (othercpus < 1) return; } else { - mask &= ~PCPU_GET(cpumask); - if (mask == 0) - return; - ncpu = bitcount32(mask); - if (ncpu > othercpus) { - /* XXX this should be a panic offence */ - printf("SMP: tlb shootdown to %d other cpus (only have %d)\n", - ncpu, othercpus); - ncpu = othercpus; - } - /* XXX should be a panic, implied by mask == 0 above */ - if (ncpu < 1) + critical_enter(); + CPU_NAND(&mask, PCPU_PTR(cpumask)); + critical_exit(); + if (CPU_EMPTY(&mask)) return; } if (!(read_eflags() & PSL_I)) @@ -1046,10 +1046,20 @@ smp_targeted_tlb_shootdown(cpumask_t mask, u_int vector, vm_offset_t addr1, vm_o call_data->arg1 = addr1; call_data->arg2 = addr2; atomic_store_rel_int(&smp_tlb_wait, 0); - if (mask == (u_int)-1) + if (CPU_ISFULLSET(&mask)) { + ncpu = othercpus; ipi_all_but_self(vector); - else - ipi_selected(mask, vector); + } else { + ncpu = 0; + while ((cpu = cpusetobj_ffs(&mask)) != 0) { + cpu--; + CPU_CLR(cpu, &mask); + CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, + vector); + ipi_send_cpu(cpu, vector); + ncpu++; + } + } while (smp_tlb_wait < ncpu) ia32_pause(); call_data = NULL; @@ -1092,7 +1102,7 @@ smp_invlpg_range(vm_offset_t addr1, vm_offset_t addr2) } void -smp_masked_invltlb(cpumask_t mask) +smp_masked_invltlb(cpuset_t mask) { if (smp_started) { @@ -1101,7 +1111,7 @@ smp_masked_invltlb(cpumask_t mask) } void -smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) +smp_masked_invlpg(cpuset_t mask, vm_offset_t addr) { if (smp_started) { @@ -1110,7 +1120,7 @@ smp_masked_invlpg(cpumask_t mask, vm_offset_t addr) } void -smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) +smp_masked_invlpg_range(cpuset_t mask, vm_offset_t addr1, vm_offset_t addr2) { if (smp_started) { @@ -1122,7 +1132,7 @@ smp_masked_invlpg_range(cpumask_t mask, vm_offset_t addr1, vm_offset_t addr2) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { int cpu; @@ -1132,11 +1142,11 @@ ipi_selected(cpumask_t cpus, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, cpus); + CPU_OR_ATOMIC(&ipi_nmi_pending, &cpus); - while ((cpu = ffs(cpus)) != 0) { + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { cpu--; - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); } @@ -1155,7 +1165,7 @@ ipi_cpu(int cpu, u_int ipi) * Set the mask of receiving CPUs for this purpose. */ if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, 1 << cpu); + CPU_SET_ATOMIC(cpu, &ipi_nmi_pending); CTR3(KTR_SMP, "%s: cpu: %d ipi: %x", __func__, cpu, ipi); ipi_send_cpu(cpu, ipi); @@ -1167,23 +1177,27 @@ ipi_cpu(int cpu, u_int ipi) void ipi_all_but_self(u_int ipi) { + cpuset_t other_cpus; /* * IPI_STOP_HARD maps to a NMI and the trap handler needs a bit * of help in order to understand what is the source. * Set the mask of receiving CPUs for this purpose. */ + sched_pin(); + other_cpus = PCPU_GET(other_cpus); + sched_unpin(); if (ipi == IPI_STOP_HARD) - atomic_set_int(&ipi_nmi_pending, PCPU_GET(other_cpus)); + CPU_OR_ATOMIC(&ipi_nmi_pending, &other_cpus); CTR2(KTR_SMP, "%s: ipi: %x", __func__, ipi); - ipi_selected(PCPU_GET(other_cpus), ipi); + ipi_selected(other_cpus, ipi); } int ipi_nmi_handler() { - cpumask_t cpumask; + cpuset_t cpumask; /* * As long as there is not a simple way to know about a NMI's @@ -1191,11 +1205,13 @@ ipi_nmi_handler() * the global pending bitword an IPI_STOP_HARD has been issued * and should be handled. */ + sched_pin(); cpumask = PCPU_GET(cpumask); - if ((ipi_nmi_pending & cpumask) == 0) + sched_unpin(); + if (!CPU_OVERLAP(&ipi_nmi_pending, &cpumask)) return (1); - atomic_clear_int(&ipi_nmi_pending, cpumask); + CPU_NAND_ATOMIC(&ipi_nmi_pending, &cpumask); cpustop_handler(); return (0); } @@ -1207,20 +1223,25 @@ ipi_nmi_handler() void cpustop_handler(void) { - int cpu = PCPU_GET(cpuid); - int cpumask = PCPU_GET(cpumask); + cpuset_t cpumask; + int cpu; + + sched_pin(); + cpumask = PCPU_GET(cpumask); + cpu = PCPU_GET(cpuid); + sched_unpin(); savectx(&stoppcbs[cpu]); /* Indicate that we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while (!(started_cpus & cpumask)) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) ia32_pause(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); if (cpu == 0 && cpustop_restartfunc != NULL) { cpustop_restartfunc(); diff --git a/sys/i386/xen/pmap.c b/sys/i386/xen/pmap.c index eb3c803ca924..3efa4f1e174a 100644 --- a/sys/i386/xen/pmap.c +++ b/sys/i386/xen/pmap.c @@ -422,7 +422,7 @@ pmap_bootstrap(vm_paddr_t firstaddr) #ifdef PAE kernel_pmap->pm_pdpt = (pdpt_entry_t *) (KERNBASE + (u_int)IdlePDPT); #endif - kernel_pmap->pm_active = -1; /* don't allow deactivation */ + CPU_FILL(&kernel_pmap->pm_active); /* don't allow deactivation */ TAILQ_INIT(&kernel_pmap->pm_pvchunk); LIST_INIT(&allpmaps); mtx_init(&allpmaps_lock, "allpmaps", NULL, MTX_SPIN); @@ -802,22 +802,23 @@ pmap_cache_bits(int mode, boolean_t is_pde) void pmap_invalidate_page(pmap_t pmap, vm_offset_t va) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invlpg(va); smp_invlpg(va); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invlpg(va); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg(pmap->pm_active & other_cpus, va); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg(other_cpus, va); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -826,26 +827,26 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) void pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; vm_offset_t addr; CTR3(KTR_PMAP, "pmap_invalidate_page: pmap=%p eva=0x%x sva=0x%x", pmap, sva, eva); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); smp_invlpg_range(sva, eva); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); - if (pmap->pm_active & other_cpus) - smp_masked_invlpg_range(pmap->pm_active & other_cpus, - sva, eva); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invlpg_range(other_cpus, sva, eva); } sched_unpin(); PT_UPDATES_FLUSH(); @@ -854,21 +855,22 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) void pmap_invalidate_all(pmap_t pmap) { - cpumask_t cpumask, other_cpus; + cpuset_t cpumask, other_cpus; CTR1(KTR_PMAP, "pmap_invalidate_page: pmap=%p", pmap); sched_pin(); - if (pmap == kernel_pmap || pmap->pm_active == all_cpus) { + if (pmap == kernel_pmap || !CPU_CMP(&pmap->pm_active, &all_cpus)) { invltlb(); smp_invltlb(); } else { cpumask = PCPU_GET(cpumask); other_cpus = PCPU_GET(other_cpus); - if (pmap->pm_active & cpumask) + if (CPU_OVERLAP(&pmap->pm_active, &cpumask)) invltlb(); - if (pmap->pm_active & other_cpus) - smp_masked_invltlb(pmap->pm_active & other_cpus); + CPU_AND(&other_cpus, &pmap->pm_active); + if (!CPU_EMPTY(&other_cpus)) + smp_masked_invltlb(other_cpus); } sched_unpin(); } @@ -893,7 +895,7 @@ pmap_invalidate_page(pmap_t pmap, vm_offset_t va) CTR2(KTR_PMAP, "pmap_invalidate_page: pmap=%p va=0x%x", pmap, va); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invlpg(va); PT_UPDATES_FLUSH(); } @@ -907,7 +909,7 @@ pmap_invalidate_range(pmap_t pmap, vm_offset_t sva, vm_offset_t eva) CTR3(KTR_PMAP, "pmap_invalidate_range: pmap=%p sva=0x%x eva=0x%x", pmap, sva, eva); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) for (addr = sva; addr < eva; addr += PAGE_SIZE) invlpg(addr); PT_UPDATES_FLUSH(); @@ -919,7 +921,7 @@ pmap_invalidate_all(pmap_t pmap) CTR1(KTR_PMAP, "pmap_invalidate_all: pmap=%p", pmap); - if (pmap == kernel_pmap || pmap->pm_active) + if (pmap == kernel_pmap || !CPU_EMPTY(&pmap->pm_active)) invltlb(); } @@ -1449,7 +1451,7 @@ pmap_pinit0(pmap_t pmap) #ifdef PAE pmap->pm_pdpt = (pdpt_entry_t *)(KERNBASE + (vm_offset_t)IdlePDPT); #endif - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); PCPU_SET(curpmap, pmap); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1556,7 +1558,7 @@ pmap_pinit(pmap_t pmap) } xen_flush_queue(); vm_page_unlock_queues(); - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); TAILQ_INIT(&pmap->pm_pvchunk); bzero(&pmap->pm_stats, sizeof pmap->pm_stats); @@ -1686,7 +1688,7 @@ pmap_allocpte(pmap_t pmap, vm_offset_t va, int flags) * Deal with a SMP shootdown of other users of the pmap that we are * trying to dispose of. This can be a bit hairy. */ -static cpumask_t *lazymask; +static cpuset_t *lazymask; static u_int lazyptd; static volatile u_int lazywait; @@ -1695,36 +1697,42 @@ void pmap_lazyfix_action(void); void pmap_lazyfix_action(void) { - cpumask_t mymask = PCPU_GET(cpumask); #ifdef COUNT_IPIS (*ipi_lazypmap_counts[PCPU_GET(cpuid)])++; #endif if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_CLR_ATOMIC(PCPU_GET(cpuid), lazymask); atomic_store_rel_int(&lazywait, 1); } static void -pmap_lazyfix_self(cpumask_t mymask) +pmap_lazyfix_self(cpuset_t mymask) { if (rcr3() == lazyptd) load_cr3(PCPU_GET(curpcb)->pcb_cr3); - atomic_clear_int(lazymask, mymask); + CPU_NAND_ATOMIC(lazymask, &mymask); } static void pmap_lazyfix(pmap_t pmap) { - cpumask_t mymask, mask; + cpuset_t mymask, mask; u_int spins; + int lsb; - while ((mask = pmap->pm_active) != 0) { + mask = pmap->pm_active; + while (!CPU_EMPTY(&mask)) { spins = 50000000; - mask = mask & -mask; /* Find least significant set bit */ + + /* Find least significant set bit. */ + lsb = cpusetobj_ffs(&mask); + MPASS(lsb != 0); + lsb--; + CPU_SETOF(lsb, &mask); mtx_lock_spin(&smp_ipi_mtx); #ifdef PAE lazyptd = vtophys(pmap->pm_pdpt); @@ -1732,7 +1740,7 @@ pmap_lazyfix(pmap_t pmap) lazyptd = vtophys(pmap->pm_pdir); #endif mymask = PCPU_GET(cpumask); - if (mask == mymask) { + if (!CPU_CMP(&mask, &mymask)) { lazymask = &pmap->pm_active; pmap_lazyfix_self(mymask); } else { @@ -1749,6 +1757,7 @@ pmap_lazyfix(pmap_t pmap) mtx_unlock_spin(&smp_ipi_mtx); if (spins == 0) printf("pmap_lazyfix: spun for 50000000\n"); + mask = pmap->pm_active; } } @@ -1768,7 +1777,7 @@ pmap_lazyfix(pmap_t pmap) cr3 = vtophys(pmap->pm_pdir); if (cr3 == rcr3()) { load_cr3(PCPU_GET(curpcb)->pcb_cr3); - pmap->pm_active &= ~(PCPU_GET(cpumask)); + CPU_CLR(PCPU_GET(cpuid), &pmap->pm_active); } } #endif /* SMP */ @@ -4123,11 +4132,11 @@ pmap_activate(struct thread *td) pmap = vmspace_pmap(td->td_proc->p_vmspace); oldpmap = PCPU_GET(curpmap); #if defined(SMP) - atomic_clear_int(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); #else - oldpmap->pm_active &= ~1; - pmap->pm_active |= 1; + CPU_NAND(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR(&pmap->pm_active, PCPU_PTR(cpumask)); #endif #ifdef PAE cr3 = vtophys(pmap->pm_pdpt); diff --git a/sys/ia64/acpica/acpi_machdep.c b/sys/ia64/acpica/acpi_machdep.c index b7b612fd220e..1466cfe59a98 100644 --- a/sys/ia64/acpica/acpi_machdep.c +++ b/sys/ia64/acpica/acpi_machdep.c @@ -56,7 +56,14 @@ acpi_machdep_quirks(int *quirks) void acpi_cpu_c1() { +#ifdef INVARIANTS + register_t ie; + + ie = intr_disable(); + KASSERT(ie == 0, ("%s called with interrupts enabled\n", __func__)); +#endif ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + ia64_enable_intr(); } void * diff --git a/sys/ia64/ia64/machdep.c b/sys/ia64/ia64/machdep.c index 7252865afe7e..1463fb5c327e 100644 --- a/sys/ia64/ia64/machdep.c +++ b/sys/ia64/ia64/machdep.c @@ -411,12 +411,34 @@ cpu_halt() void cpu_idle(int busy) { - struct ia64_pal_result res; + register_t ie; - if (cpu_idle_hook != NULL) +#if 0 + if (!busy) { + critical_enter(); + cpu_idleclock(); + } +#endif + + ie = intr_disable(); + KASSERT(ie != 0, ("%s called with interrupts disabled\n", __func__)); + + if (sched_runnable()) + ia64_enable_intr(); + else if (cpu_idle_hook != NULL) { (*cpu_idle_hook)(); - else - res = ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + /* The hook must enable interrupts! */ + } else { + ia64_call_pal_static(PAL_HALT_LIGHT, 0, 0, 0); + ia64_enable_intr(); + } + +#if 0 + if (!busy) { + cpu_activeclock(); + critical_exit(); + } +#endif } int @@ -644,9 +666,12 @@ calculate_frequencies(void) { struct ia64_sal_result sal; struct ia64_pal_result pal; + register_t ie; + ie = intr_disable(); sal = ia64_sal_entry(SAL_FREQ_BASE, 0, 0, 0, 0, 0, 0, 0); pal = ia64_call_pal_static(PAL_FREQ_RATIOS, 0, 0, 0); + intr_restore(ie); if (sal.sal_status == 0 && pal.pal_status == 0) { if (bootverbose) { @@ -761,6 +786,8 @@ ia64_init(void) ia64_sal_init(); calculate_frequencies(); + set_cputicker(ia64_get_itc, (u_long)itc_freq * 1000000, 0); + /* * Setup the PCPU data for the bootstrap processor. It is needed * by printf(). Also, since printf() has critical sections, we diff --git a/sys/ia64/ia64/mp_machdep.c b/sys/ia64/ia64/mp_machdep.c index b6b0bef1d222..15afea030182 100644 --- a/sys/ia64/ia64/mp_machdep.c +++ b/sys/ia64/ia64/mp_machdep.c @@ -139,18 +139,18 @@ ia64_ih_rndzvs(struct thread *td, u_int xiv, struct trapframe *tf) static u_int ia64_ih_stop(struct thread *td, u_int xiv, struct trapframe *tf) { - cpumask_t mybit; + cpuset_t mybit; PCPU_INC(md.stats.pcs_nstops); mybit = PCPU_GET(cpumask); savectx(PCPU_PTR(md.pcb)); - atomic_set_int(&stopped_cpus, mybit); - while ((started_cpus & mybit) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &mybit); + while (!CPU_OVERLAP(&started_cpus, &mybit)) cpu_spinwait(); - atomic_clear_int(&started_cpus, mybit); - atomic_clear_int(&stopped_cpus, mybit); + CPU_NAND_ATOMIC(&started_cpus, &mybit); + CPU_NAND_ATOMIC(&stopped_cpus, &mybit); return (0); } @@ -286,7 +286,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) cpuid = (IA64_LID_GET_SAPIC_ID(ia64_get_lid()) == sapic_id) ? 0 : smp_cpus++; - KASSERT((all_cpus & (1UL << cpuid)) == 0, + KASSERT(!CPU_ISSET(cpuid, &all_cpus), ("%s: cpu%d already in CPU map", __func__, acpi_id)); if (cpuid != 0) { @@ -300,7 +300,7 @@ cpu_mp_add(u_int acpi_id, u_int id, u_int eid) pc->pc_acpi_id = acpi_id; pc->pc_md.lid = IA64_LID_SET_SAPIC_ID(sapic_id); - all_cpus |= (1UL << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &all_cpus); } void @@ -359,7 +359,8 @@ cpu_mp_start() STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { pc->pc_md.current_pmap = kernel_pmap; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); /* The BSP is obviously running already. */ if (pc->pc_cpuid == 0) { pc->pc_md.awake = 1; @@ -458,12 +459,12 @@ cpu_mp_unleash(void *dummy) * send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } diff --git a/sys/ia64/ia64/pal.S b/sys/ia64/ia64/pal.S index 2f0d0da72e6e..2e3f4cd85ac4 100644 --- a/sys/ia64/ia64/pal.S +++ b/sys/ia64/ia64/pal.S @@ -38,43 +38,40 @@ ia64_pal_entry: .quad 0 * u_int64_t arg1, u_int64_t arg2, u_int64_t arg3) */ ENTRY(ia64_call_pal_static, 4) - - .regstk 4,5,0,0 + + .regstk 4,4,0,0 palret = loc0 entry = loc1 rpsave = loc2 pfssave = loc3 -psrsave = loc4 - alloc pfssave=ar.pfs,4,5,0,0 + alloc pfssave=ar.pfs,4,4,0,0 ;; mov rpsave=rp - movl entry=@gprel(ia64_pal_entry) + 1: mov palret=ip // for return address ;; add entry=entry,gp - mov psrsave=psr + add palret=2f-1b,palret // calculate return address mov r28=in0 // procedure number - ;; - ld8 entry=[entry] // read entry point mov r29=in1 // copy arguments mov r30=in2 mov r31=in3 ;; - mov b6=entry - add palret=2f-1b,palret // calculate return address - ;; + ld8 entry=[entry] // read entry point mov b0=palret - rsm psr.i // disable interrupts + ;; + mov b6=entry ;; br.cond.sptk b6 // call into firmware -2: mov psr.l=psrsave + ;; +2: mov rp=rpsave mov ar.pfs=pfssave ;; - srlz.d br.ret.sptk rp + ;; END(ia64_call_pal_static) /* diff --git a/sys/ia64/include/_types.h b/sys/ia64/include/_types.h index 8fc1be2f3873..0c2f5cc38d0b 100644 --- a/sys/ia64/include/_types.h +++ b/sys/ia64/include/_types.h @@ -59,7 +59,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/ia64/include/smp.h b/sys/ia64/include/smp.h index 26557a712705..d2aff76c3ad5 100644 --- a/sys/ia64/include/smp.h +++ b/sys/ia64/include/smp.h @@ -14,6 +14,8 @@ #ifndef LOCORE +#include + struct pcpu; struct ia64_ap_state { @@ -44,7 +46,7 @@ extern int ia64_ipi_wakeup; void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void ipi_send(struct pcpu *, int ipi); #endif /* !LOCORE */ diff --git a/sys/kern/kern_cpuset.c b/sys/kern/kern_cpuset.c index 6489ffb19e9b..e1f2801643ac 100644 --- a/sys/kern/kern_cpuset.c +++ b/sys/kern/kern_cpuset.c @@ -50,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -616,6 +617,86 @@ cpuset_setproc(pid_t pid, struct cpuset *set, cpuset_t *mask) return (error); } +/* + * Calculate the ffs() of the cpuset. + */ +int +cpusetobj_ffs(const cpuset_t *set) +{ + size_t i; + int cbit; + + cbit = 0; + for (i = 0; i < _NCPUWORDS; i++) { + if (set->__bits[i] != 0) { + cbit = ffsl(set->__bits[i]); + cbit += i * _NCPUBITS; + break; + } + } + return (cbit); +} + +/* + * Return a string representing a valid layout for a cpuset_t object. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +char * +cpusetobj_strprint(char *buf, const cpuset_t *set) +{ + char *tbuf; + size_t i, bytesp, bufsiz; + + tbuf = buf; + bytesp = 0; + bufsiz = CPUSETBUFSIZ; + + for (i = _NCPUWORDS - 1; i > 0; i--) { + bytesp = snprintf(tbuf, bufsiz, "%lx, ", set->__bits[i]); + bufsiz -= bytesp; + tbuf += bytesp; + } + snprintf(tbuf, bufsiz, "%lx", set->__bits[0]); + return (buf); +} + +/* + * Build a valid cpuset_t object from a string representation. + * It expects an incoming buffer at least sized as CPUSETBUFSIZ. + */ +int +cpusetobj_strscan(cpuset_t *set, const char *buf) +{ + u_int nwords; + int i, ret; + + if (strlen(buf) > CPUSETBUFSIZ - 1) + return (-1); + + /* Allow to pass a shorter version of the mask when necessary. */ + nwords = 1; + for (i = 0; buf[i] != '\0'; i++) + if (buf[i] == ',') + nwords++; + if (nwords > _NCPUWORDS) + return (-1); + + CPU_ZERO(set); + for (i = nwords - 1; i > 0; i--) { + ret = sscanf(buf, "%lx, ", &set->__bits[i]); + if (ret == 0 || ret == -1) + return (-1); + buf = strstr(buf, " "); + if (buf == NULL) + return (-1); + buf++; + } + ret = sscanf(buf, "%lx", &set->__bits[0]); + if (ret == 0 || ret == -1) + return (-1); + return (0); +} + /* * Apply an anonymous mask to a single thread. */ @@ -754,12 +835,7 @@ cpuset_init(void *arg) { cpuset_t mask; - CPU_ZERO(&mask); -#ifdef SMP - mask.__bits[0] = all_cpus; -#else - mask.__bits[0] = 1; -#endif + mask = all_cpus; if (cpuset_modify(cpuset_zero, &mask)) panic("Can't set initial cpuset mask.\n"); cpuset_zero->cs_flags |= CPU_SET_RDONLY; diff --git a/sys/kern/kern_ktr.c b/sys/kern/kern_ktr.c index 2e5e06f671b2..eff3d5bd68ab 100644 --- a/sys/kern/kern_ktr.c +++ b/sys/kern/kern_ktr.c @@ -40,8 +40,10 @@ __FBSDID("$FreeBSD$"); #include "opt_alq.h" #include +#include #include #include +#include #include #include #include @@ -68,10 +70,6 @@ __FBSDID("$FreeBSD$"); #define KTR_MASK (0) #endif -#ifndef KTR_CPUMASK -#define KTR_CPUMASK (~0) -#endif - #ifndef KTR_TIME #define KTR_TIME get_cyclecount() #endif @@ -84,11 +82,6 @@ FEATURE(ktr, "Kernel support for KTR kernel tracing facility"); SYSCTL_NODE(_debug, OID_AUTO, ktr, CTLFLAG_RD, 0, "KTR options"); -int ktr_cpumask = KTR_CPUMASK; -TUNABLE_INT("debug.ktr.cpumask", &ktr_cpumask); -SYSCTL_INT(_debug_ktr, OID_AUTO, cpumask, CTLFLAG_RW, - &ktr_cpumask, 0, "Bitmask of CPUs on which KTR logging is enabled"); - int ktr_mask = KTR_MASK; TUNABLE_INT("debug.ktr.mask", &ktr_mask); SYSCTL_INT(_debug_ktr, OID_AUTO, mask, CTLFLAG_RW, @@ -106,6 +99,54 @@ int ktr_version = KTR_VERSION; SYSCTL_INT(_debug_ktr, OID_AUTO, version, CTLFLAG_RD, &ktr_version, 0, "Version of the KTR interface"); +cpuset_t ktr_cpumask; +static char ktr_cpumask_str[CPUSETBUFSIZ]; +TUNABLE_STR("debug.ktr.cpumask", ktr_cpumask_str, sizeof(ktr_cpumask_str)); + +static void +ktr_cpumask_initializer(void *dummy __unused) +{ + + CPU_FILL(&ktr_cpumask); +#ifdef KTR_CPUMASK + if (cpusetobj_strscan(&ktr_cpumask, KTR_CPUMASK) == -1) + CPU_FILL(&ktr_cpumask); +#endif + + /* + * TUNABLE_STR() runs with SI_ORDER_MIDDLE priority, thus it must be + * already set, if necessary. + */ + if (ktr_cpumask_str[0] != '\0' && + cpusetobj_strscan(&ktr_cpumask, ktr_cpumask_str) == -1) + CPU_FILL(&ktr_cpumask); +} +SYSINIT(ktr_cpumask_initializer, SI_SUB_TUNABLES, SI_ORDER_ANY, + ktr_cpumask_initializer, NULL); + +static int +sysctl_debug_ktr_cpumask(SYSCTL_HANDLER_ARGS) +{ + char lktr_cpumask_str[CPUSETBUFSIZ]; + cpuset_t imask; + int error; + + cpusetobj_strprint(lktr_cpumask_str, &ktr_cpumask); + error = sysctl_handle_string(oidp, lktr_cpumask_str, + sizeof(lktr_cpumask_str), req); + if (error != 0 || req->newptr == NULL) + return (error); + if (cpusetobj_strscan(&imask, lktr_cpumask_str) == -1) + return (EINVAL); + CPU_COPY(&imask, &ktr_cpumask); + + return (error); +} +SYSCTL_PROC(_debug_ktr, OID_AUTO, cpumask, + CTLFLAG_RW | CTLFLAG_MPSAFE | CTLTYPE_STRING, NULL, 0, + sysctl_debug_ktr_cpumask, "S", + "Bitmask of CPUs on which KTR logging is enabled"); + volatile int ktr_idx = 0; struct ktr_entry ktr_buf[KTR_ENTRIES]; @@ -213,7 +254,7 @@ ktr_tracepoint(u_int mask, const char *file, int line, const char *format, if ((ktr_mask & mask) == 0) return; cpu = KTR_CPU; - if (((1 << cpu) & ktr_cpumask) == 0) + if (!CPU_ISSET(cpu, &ktr_cpumask)) return; #if defined(KTR_VERBOSE) || defined(KTR_ALQ) td = curthread; diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c index 7532378c7a2d..8d9c7c04a711 100644 --- a/sys/kern/kern_pmc.c +++ b/sys/kern/kern_pmc.c @@ -55,7 +55,7 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL; int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL; /* Bitmask of CPUs requiring servicing at hardclock time */ -volatile cpumask_t pmc_cpumask; +volatile cpuset_t pmc_cpumask; /* * A global count of SS mode PMCs. When non-zero, this means that @@ -112,7 +112,7 @@ pmc_cpu_is_active(int cpu) { #ifdef SMP return (pmc_cpu_is_present(cpu) && - (hlt_cpus_mask & (1 << cpu)) == 0); + !CPU_ISSET(cpu, &hlt_cpus_mask)); #else return (1); #endif @@ -139,7 +139,7 @@ int pmc_cpu_is_primary(int cpu) { #ifdef SMP - return ((logical_cpus_mask & (1 << cpu)) == 0); + return (!CPU_ISSET(cpu, &logical_cpus_mask)); #else return (1); #endif diff --git a/sys/kern/kern_rmlock.c b/sys/kern/kern_rmlock.c index 7f2b4e7367be..3214e1b55e2f 100644 --- a/sys/kern/kern_rmlock.c +++ b/sys/kern/kern_rmlock.c @@ -263,7 +263,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) pc = pcpu_find(curcpu); /* Check if we just need to do a proper critical_exit. */ - if (!(pc->pc_cpumask & rm->rm_writecpus)) { + if (!CPU_OVERLAP(&pc->pc_cpumask, &rm->rm_writecpus)) { critical_exit(); return (1); } @@ -325,7 +325,7 @@ _rm_rlock_hard(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) critical_enter(); pc = pcpu_find(curcpu); - rm->rm_writecpus &= ~pc->pc_cpumask; + CPU_NAND(&rm->rm_writecpus, &pc->pc_cpumask); rm_tracker_add(pc, tracker); sched_pin(); critical_exit(); @@ -366,7 +366,8 @@ _rm_rlock(struct rmlock *rm, struct rm_priotracker *tracker, int trylock) * Fast path to combine two common conditions into a single * conditional jump. */ - if (0 == (td->td_owepreempt | (rm->rm_writecpus & pc->pc_cpumask))) + if (0 == (td->td_owepreempt | + CPU_OVERLAP(&rm->rm_writecpus, &pc->pc_cpumask))) return (1); /* We do not have a read token and need to acquire one. */ @@ -429,17 +430,17 @@ _rm_wlock(struct rmlock *rm) { struct rm_priotracker *prio; struct turnstile *ts; - cpumask_t readcpus; + cpuset_t readcpus; if (rm->lock_object.lo_flags & RM_SLEEPABLE) sx_xlock(&rm->rm_lock_sx); else mtx_lock(&rm->rm_lock_mtx); - if (rm->rm_writecpus != all_cpus) { + if (CPU_CMP(&rm->rm_writecpus, &all_cpus)) { /* Get all read tokens back */ - - readcpus = all_cpus & (all_cpus & ~rm->rm_writecpus); + readcpus = all_cpus; + CPU_NAND(&readcpus, &rm->rm_writecpus); rm->rm_writecpus = all_cpus; /* diff --git a/sys/kern/kern_shutdown.c b/sys/kern/kern_shutdown.c index 001da3da8449..da041faf99a8 100644 --- a/sys/kern/kern_shutdown.c +++ b/sys/kern/kern_shutdown.c @@ -233,30 +233,32 @@ print_uptime(void) printf("%lds\n", (long)ts.tv_sec); } -static void -doadump(void) +int +doadump(boolean_t textdump) { + boolean_t coredump; - /* - * Sometimes people have to call this from the kernel debugger. - * (if 'panic' can not dump) - * Give them a clue as to why they can't dump. - */ - if (dumper.dumper == NULL) { - printf("Cannot dump. Device not defined or unavailable.\n"); - return; - } + if (dumping) + return (EBUSY); + if (dumper.dumper == NULL) + return (ENXIO); savectx(&dumppcb); dumptid = curthread->td_tid; dumping++; + + coredump = TRUE; #ifdef DDB - if (textdump_pending) + if (textdump && textdump_pending) { + coredump = FALSE; textdump_dumpsys(&dumper); - else + } #endif + if (coredump) dumpsys(&dumper); + dumping--; + return (0); } static int @@ -425,7 +427,7 @@ kern_reboot(int howto) EVENTHANDLER_INVOKE(shutdown_post_sync, howto); if ((howto & (RB_HALT|RB_DUMP)) == RB_DUMP && !cold && !dumping) - doadump(); + doadump(TRUE); /* Now that we're going to really halt the system... */ EVENTHANDLER_INVOKE(shutdown_final, howto); diff --git a/sys/kern/ksched.c b/sys/kern/ksched.c index 7ee56d50b13b..799b60d1a139 100644 --- a/sys/kern/ksched.c +++ b/sys/kern/ksched.c @@ -206,7 +206,7 @@ ksched_setscheduler(struct ksched *ksched, if (param->sched_priority >= 0 && param->sched_priority <= (PRI_MAX_TIMESHARE - PRI_MIN_TIMESHARE)) { rtp.type = RTP_PRIO_NORMAL; - rtp.prio = p4prio_to_rtpprio(param->sched_priority); + rtp.prio = p4prio_to_tsprio(param->sched_priority); rtp_to_pri(&rtp, td); } else e = EINVAL; diff --git a/sys/kern/sched_4bsd.c b/sys/kern/sched_4bsd.c index 519cae516e2d..592bb80aff52 100644 --- a/sys/kern/sched_4bsd.c +++ b/sys/kern/sched_4bsd.c @@ -156,7 +156,7 @@ static struct runq runq; static struct runq runq_pcpu[MAXCPU]; long runq_length[MAXCPU]; -static cpumask_t idle_cpus_mask; +static cpuset_t idle_cpus_mask; #endif struct pcpuidlestat { @@ -951,7 +951,8 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) if (td->td_flags & TDF_IDLETD) { TD_SET_CAN_RUN(td); #ifdef SMP - idle_cpus_mask &= ~PCPU_GET(cpumask); + /* Spinlock held here, assume no migration. */ + CPU_NAND(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif } else { if (TD_IS_RUNNING(td)) { @@ -1025,7 +1026,7 @@ sched_switch(struct thread *td, struct thread *newtd, int flags) #ifdef SMP if (td->td_flags & TDF_IDLETD) - idle_cpus_mask |= PCPU_GET(cpumask); + CPU_OR(&idle_cpus_mask, PCPU_PTR(cpumask)); #endif sched_lock.mtx_lock = (uintptr_t)td; td->td_oncpu = PCPU_GET(cpuid); @@ -1054,7 +1055,8 @@ static int forward_wakeup(int cpunum) { struct pcpu *pc; - cpumask_t dontuse, id, map, map2, me; + cpuset_t dontuse, id, map, map2, me; + int iscpuset; mtx_assert(&sched_lock, MA_OWNED); @@ -1071,32 +1073,38 @@ forward_wakeup(int cpunum) /* * Check the idle mask we received against what we calculated * before in the old version. + * + * Also note that sched_lock is held now, thus no migration is + * expected. */ me = PCPU_GET(cpumask); /* Don't bother if we should be doing it ourself. */ - if ((me & idle_cpus_mask) && (cpunum == NOCPU || me == (1 << cpunum))) + if (CPU_OVERLAP(&me, &idle_cpus_mask) && + (cpunum == NOCPU || CPU_ISSET(cpunum, &me))) return (0); - dontuse = me | stopped_cpus | hlt_cpus_mask; - map2 = 0; + dontuse = me; + CPU_OR(&dontuse, &stopped_cpus); + CPU_OR(&dontuse, &hlt_cpus_mask); + CPU_ZERO(&map2); if (forward_wakeup_use_loop) { STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((id & dontuse) == 0 && + if (!CPU_OVERLAP(&id, &dontuse) && pc->pc_curthread == pc->pc_idlethread) { - map2 |= id; + CPU_OR(&map2, &id); } } } if (forward_wakeup_use_mask) { - map = 0; - map = idle_cpus_mask & ~dontuse; + map = idle_cpus_mask; + CPU_NAND(&map, &dontuse); /* If they are both on, compare and use loop if different. */ if (forward_wakeup_use_loop) { - if (map != map2) { + if (CPU_CMP(&map, &map2)) { printf("map != map2, loop method preferred\n"); map = map2; } @@ -1108,18 +1116,22 @@ forward_wakeup(int cpunum) /* If we only allow a specific CPU, then mask off all the others. */ if (cpunum != NOCPU) { KASSERT((cpunum <= mp_maxcpus),("forward_wakeup: bad cpunum.")); - map &= (1 << cpunum); + iscpuset = CPU_ISSET(cpunum, &map); + if (iscpuset == 0) + CPU_ZERO(&map); + else + CPU_SETOF(cpunum, &map); } - if (map) { + if (!CPU_EMPTY(&map)) { forward_wakeups_delivered++; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { id = pc->pc_cpumask; - if ((map & id) == 0) + if (!CPU_OVERLAP(&map, &id)) continue; if (cpu_idle_wakeup(pc->pc_cpuid)) - map &= ~id; + CPU_NAND(&map, &id); } - if (map) + if (!CPU_EMPTY(&map)) ipi_selected(map, IPI_AST); return (1); } @@ -1135,7 +1147,7 @@ kick_other_cpu(int pri, int cpuid) int cpri; pcpu = pcpu_find(cpuid); - if (idle_cpus_mask & pcpu->pc_cpumask) { + if (CPU_OVERLAP(&idle_cpus_mask, &pcpu->pc_cpumask)) { forward_wakeups_delivered++; if (!cpu_idle_wakeup(cpuid)) ipi_cpu(cpuid, IPI_AST); @@ -1193,6 +1205,7 @@ void sched_add(struct thread *td, int flags) #ifdef SMP { + cpuset_t idle, me, tidlemsk; struct td_sched *ts; int forwarded = 0; int cpu; @@ -1262,11 +1275,20 @@ sched_add(struct thread *td, int flags) kick_other_cpu(td->td_priority, cpu); } else { if (!single_cpu) { - cpumask_t me = PCPU_GET(cpumask); - cpumask_t idle = idle_cpus_mask & me; - if (!idle && ((flags & SRQ_INTR) == 0) && - (idle_cpus_mask & ~(hlt_cpus_mask | me))) + /* + * Thread spinlock is held here, assume no + * migration is possible. + */ + me = PCPU_GET(cpumask); + idle = idle_cpus_mask; + tidlemsk = idle; + CPU_AND(&idle, &me); + CPU_OR(&me, &hlt_cpus_mask); + CPU_NAND(&tidlemsk, &me); + + if (CPU_EMPTY(&idle) && ((flags & SRQ_INTR) == 0) && + !CPU_EMPTY(&tidlemsk)) forwarded = forward_wakeup(cpu); } diff --git a/sys/kern/sched_ule.c b/sys/kern/sched_ule.c index ac18e778ae28..05267f3d6adb 100644 --- a/sys/kern/sched_ule.c +++ b/sys/kern/sched_ule.c @@ -564,7 +564,7 @@ struct cpu_search { #define CPUSET_FOREACH(cpu, mask) \ for ((cpu) = 0; (cpu) <= mp_maxid; (cpu)++) \ - if ((mask) & 1 << (cpu)) + if (CPU_ISSET(cpu, &mask)) static __inline int cpu_search(struct cpu_group *cg, struct cpu_search *low, struct cpu_search *high, const int match); @@ -2650,15 +2650,16 @@ static int sysctl_kern_sched_topology_spec_internal(struct sbuf *sb, struct cpu_group *cg, int indent) { + char cpusetbuf[CPUSETBUFSIZ]; int i, first; sbuf_printf(sb, "%*s\n", indent, "", 1 + indent / 2, cg->cg_level); - sbuf_printf(sb, "%*s ", indent, "", - cg->cg_count, cg->cg_mask); + sbuf_printf(sb, "%*s ", indent, "", + cg->cg_count, cpusetobj_strprint(cpusetbuf, &cg->cg_mask)); first = TRUE; for (i = 0; i < MAXCPU; i++) { - if ((cg->cg_mask & (1 << i)) != 0) { + if (CPU_ISSET(i, &cg->cg_mask)) { if (!first) sbuf_printf(sb, ", "); else diff --git a/sys/kern/subr_kdb.c b/sys/kern/subr_kdb.c index 5d68ae250d47..1d67864b1533 100644 --- a/sys/kern/subr_kdb.c +++ b/sys/kern/subr_kdb.c @@ -413,7 +413,8 @@ kdb_thr_ctx(struct thread *thr) #if defined(SMP) && defined(KDB_STOPPEDPCB) STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (pc->pc_curthread == thr && (stopped_cpus & pc->pc_cpumask)) + if (pc->pc_curthread == thr && + CPU_OVERLAP(&stopped_cpus, &pc->pc_cpumask)) return (KDB_STOPPEDPCB(pc)); } #endif diff --git a/sys/kern/subr_pcpu.c b/sys/kern/subr_pcpu.c index 5cb4f26a7ee4..a6b3ae09defc 100644 --- a/sys/kern/subr_pcpu.c +++ b/sys/kern/subr_pcpu.c @@ -87,7 +87,7 @@ pcpu_init(struct pcpu *pcpu, int cpuid, size_t size) KASSERT(cpuid >= 0 && cpuid < MAXCPU, ("pcpu_init: invalid cpuid %d", cpuid)); pcpu->pc_cpuid = cpuid; - pcpu->pc_cpumask = 1 << cpuid; + CPU_SETOF(cpuid, &pcpu->pc_cpumask); cpuid_to_pcpu[cpuid] = pcpu; STAILQ_INSERT_TAIL(&cpuhead, pcpu, pc_allcpu); cpu_pcpu_init(pcpu, cpuid, size); diff --git a/sys/kern/subr_prf.c b/sys/kern/subr_prf.c index 3334837747f5..48f2dd9ce8af 100644 --- a/sys/kern/subr_prf.c +++ b/sys/kern/subr_prf.c @@ -163,6 +163,7 @@ uprintf(const char *fmt, ...) goto out; } pca.flags = TOTTY; + pca.p_bufr = NULL; va_start(ap, fmt); tty_lock(pca.tty); retval = kvprintf(fmt, putchar, &pca, 10, ap); @@ -206,6 +207,7 @@ tprintf(struct proc *p, int pri, const char *fmt, ...) pca.pri = pri; pca.tty = tp; pca.flags = flags; + pca.p_bufr = NULL; va_start(ap, fmt); if (pca.tty != NULL) tty_lock(pca.tty); @@ -234,6 +236,7 @@ ttyprintf(struct tty *tp, const char *fmt, ...) va_start(ap, fmt); pca.tty = tp; pca.flags = TOTTY; + pca.p_bufr = NULL; retval = kvprintf(fmt, putchar, &pca, 10, ap); va_end(ap); return (retval); diff --git a/sys/kern/subr_rman.c b/sys/kern/subr_rman.c index 3014b1902014..abd72c03df0b 100644 --- a/sys/kern/subr_rman.c +++ b/sys/kern/subr_rman.c @@ -839,6 +839,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r) * without freeing anything. */ r->r_flags &= ~RF_ALLOCATED; + r->r_dev = NULL; return 0; } diff --git a/sys/kern/subr_smp.c b/sys/kern/subr_smp.c index 351f09677fa4..c38177b4e71f 100644 --- a/sys/kern/subr_smp.c +++ b/sys/kern/subr_smp.c @@ -53,15 +53,15 @@ __FBSDID("$FreeBSD$"); #include "opt_sched.h" #ifdef SMP -volatile cpumask_t stopped_cpus; -volatile cpumask_t started_cpus; -cpumask_t hlt_cpus_mask; -cpumask_t logical_cpus_mask; +volatile cpuset_t stopped_cpus; +volatile cpuset_t started_cpus; +cpuset_t hlt_cpus_mask; +cpuset_t logical_cpus_mask; void (*cpustop_restartfunc)(void); #endif /* This is used in modules that need to work in both SMP and UP. */ -cpumask_t all_cpus; +cpuset_t all_cpus; int mp_ncpus; /* export this for libkvm consumers. */ @@ -200,8 +200,11 @@ forward_signal(struct thread *td) * */ static int -generic_stop_cpus(cpumask_t map, u_int type) +generic_stop_cpus(cpuset_t map, u_int type) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif static volatile u_int stopping_cpu = NOCPU; int i; @@ -216,7 +219,8 @@ generic_stop_cpus(cpumask_t map, u_int type) if (!smp_started) return (0); - CTR2(KTR_SMP, "stop_cpus(%x) with %u type", map, type); + CTR2(KTR_SMP, "stop_cpus(%s) with %u type", + cpusetobj_strprint(cpusetbuf, &map), type); if (stopping_cpu != PCPU_GET(cpuid)) while (atomic_cmpset_int(&stopping_cpu, NOCPU, @@ -228,7 +232,7 @@ generic_stop_cpus(cpumask_t map, u_int type) ipi_selected(map, type); i = 0; - while ((stopped_cpus & map) != map) { + while (!CPU_SUBSET(&stopped_cpus, &map)) { /* spin */ cpu_spinwait(); i++; @@ -245,14 +249,14 @@ generic_stop_cpus(cpumask_t map, u_int type) } int -stop_cpus(cpumask_t map) +stop_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP)); } int -stop_cpus_hard(cpumask_t map) +stop_cpus_hard(cpuset_t map) { return (generic_stop_cpus(map, IPI_STOP_HARD)); @@ -260,7 +264,7 @@ stop_cpus_hard(cpumask_t map) #if defined(__amd64__) int -suspend_cpus(cpumask_t map) +suspend_cpus(cpuset_t map) { return (generic_stop_cpus(map, IPI_SUSPEND)); @@ -281,19 +285,22 @@ suspend_cpus(cpumask_t map) * 1: ok */ int -restart_cpus(cpumask_t map) +restart_cpus(cpuset_t map) { +#ifdef KTR + char cpusetbuf[CPUSETBUFSIZ]; +#endif if (!smp_started) return 0; - CTR1(KTR_SMP, "restart_cpus(%x)", map); + CTR1(KTR_SMP, "restart_cpus(%s)", cpusetobj_strprint(cpusetbuf, &map)); /* signal other cpus to restart */ - atomic_store_rel_int(&started_cpus, map); + CPU_COPY_STORE_REL(&map, &started_cpus); /* wait for each to clear its bit */ - while ((stopped_cpus & map) != 0) + while (CPU_OVERLAP(&stopped_cpus, &map)) cpu_spinwait(); return 1; @@ -409,13 +416,13 @@ smp_rendezvous_action(void) } void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (* setup_func)(void *), void (* action_func)(void *), void (* teardown_func)(void *), void *arg) { - int i, ncpus = 0; + int curcpumap, i, ncpus = 0; if (!smp_started) { if (setup_func != NULL) @@ -428,11 +435,11 @@ smp_rendezvous_cpus(cpumask_t map, } CPU_FOREACH(i) { - if (((1 << i) & map) != 0) + if (CPU_ISSET(i, &map)) ncpus++; } if (ncpus == 0) - panic("ncpus is 0 with map=0x%x", map); + panic("ncpus is 0 with non-zero map"); mtx_lock_spin(&smp_ipi_mtx); @@ -452,10 +459,12 @@ smp_rendezvous_cpus(cpumask_t map, * Signal other processors, which will enter the IPI with * interrupts off. */ - ipi_selected(map & ~(1 << curcpu), IPI_RENDEZVOUS); + curcpumap = CPU_ISSET(curcpu, &map); + CPU_CLR(curcpu, &map); + ipi_selected(map, IPI_RENDEZVOUS); /* Check if the current CPU is in the map */ - if ((map & (1 << curcpu)) != 0) + if (curcpumap != 0) smp_rendezvous_action(); /* @@ -484,6 +493,7 @@ static struct cpu_group group[MAXCPU]; struct cpu_group * smp_topo(void) { + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; struct cpu_group *top; /* @@ -530,9 +540,10 @@ smp_topo(void) if (top->cg_count != mp_ncpus) panic("Built bad topology at %p. CPU count %d != %d", top, top->cg_count, mp_ncpus); - if (top->cg_mask != all_cpus) - panic("Built bad topology at %p. CPU mask 0x%X != 0x%X", - top, top->cg_mask, all_cpus); + if (CPU_CMP(&top->cg_mask, &all_cpus)) + panic("Built bad topology at %p. CPU mask (%s) != (%s)", + top, cpusetobj_strprint(cpusetbuf, &top->cg_mask), + cpusetobj_strprint(cpusetbuf2, &all_cpus)); return (top); } @@ -557,11 +568,13 @@ static int smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, int count, int flags, int start) { - cpumask_t mask; + char cpusetbuf[CPUSETBUFSIZ], cpusetbuf2[CPUSETBUFSIZ]; + cpuset_t mask; int i; - for (mask = 0, i = 0; i < count; i++, start++) - mask |= (1 << start); + CPU_ZERO(&mask); + for (i = 0; i < count; i++, start++) + CPU_SET(start, &mask); child->cg_parent = parent; child->cg_child = NULL; child->cg_children = 0; @@ -571,10 +584,12 @@ smp_topo_addleaf(struct cpu_group *parent, struct cpu_group *child, int share, child->cg_mask = mask; parent->cg_children++; for (; parent != NULL; parent = parent->cg_parent) { - if ((parent->cg_mask & child->cg_mask) != 0) - panic("Duplicate children in %p. mask 0x%X child 0x%X", - parent, parent->cg_mask, child->cg_mask); - parent->cg_mask |= child->cg_mask; + if (CPU_OVERLAP(&parent->cg_mask, &child->cg_mask)) + panic("Duplicate children in %p. mask (%s) child (%s)", + parent, + cpusetobj_strprint(cpusetbuf, &parent->cg_mask), + cpusetobj_strprint(cpusetbuf2, &child->cg_mask)); + CPU_OR(&parent->cg_mask, &child->cg_mask); parent->cg_count += child->cg_count; } @@ -634,20 +649,20 @@ struct cpu_group * smp_topo_find(struct cpu_group *top, int cpu) { struct cpu_group *cg; - cpumask_t mask; + cpuset_t mask; int children; int i; - mask = (1 << cpu); + CPU_SETOF(cpu, &mask); cg = top; for (;;) { - if ((cg->cg_mask & mask) == 0) + if (!CPU_OVERLAP(&cg->cg_mask, &mask)) return (NULL); if (cg->cg_children == 0) return (cg); children = cg->cg_children; for (i = 0, cg = cg->cg_child; i < children; cg++, i++) - if ((cg->cg_mask & mask) != 0) + if (CPU_OVERLAP(&cg->cg_mask, &mask)) break; } return (NULL); @@ -655,7 +670,7 @@ smp_topo_find(struct cpu_group *top, int cpu) #else /* !SMP */ void -smp_rendezvous_cpus(cpumask_t map, +smp_rendezvous_cpus(cpuset_t map, void (*setup_func)(void *), void (*action_func)(void *), void (*teardown_func)(void *), diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index a4bbdba54e5a..19aaee01859a 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -747,6 +747,10 @@ kern_sendit(td, s, mp, flags, control, segflg) return (error); so = (struct socket *)fp->f_data; +#ifdef KTRACE + if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) + ktrsockaddr(mp->msg_name); +#endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, diff --git a/sys/mips/cavium/octeon_mp.c b/sys/mips/cavium/octeon_mp.c index 78eafa678118..efddee86ae0f 100644 --- a/sys/mips/cavium/octeon_mp.c +++ b/sys/mips/cavium/octeon_mp.c @@ -102,10 +102,18 @@ platform_init_ap(int cpuid) mips_wbflush(); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { - return (octeon_bootinfo->core_mask); + + CPU_ZERO(mask); + + /* + * XXX: hack in order to simplify CPU set building, assuming that + * core_mask is 32-bits. + */ + memcpy(mask, &octeon_bootinfo->core_mask, + sizeof(octeon_bootinfo->core_mask)); } struct cpu_group * diff --git a/sys/mips/include/_types.h b/sys/mips/include/_types.h index 4d57e20db108..2f23db6d9b3a 100644 --- a/sys/mips/include/_types.h +++ b/sys/mips/include/_types.h @@ -73,7 +73,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __mips_n64 diff --git a/sys/mips/include/hwfunc.h b/sys/mips/include/hwfunc.h index 683aedb0c9ac..a9e3285f5395 100644 --- a/sys/mips/include/hwfunc.h +++ b/sys/mips/include/hwfunc.h @@ -28,6 +28,8 @@ #ifndef _MACHINE_HWFUNC_H_ #define _MACHINE_HWFUNC_H_ +#include + struct trapframe; struct timecounter; /* @@ -91,7 +93,7 @@ extern int platform_processor_id(void); /* * Return the cpumask of available processors. */ -extern cpumask_t platform_cpu_mask(void); +extern void platform_cpu_mask(cpuset_t *mask); /* * Return the topology of processors on this platform diff --git a/sys/mips/include/pmap.h b/sys/mips/include/pmap.h index e71063592a80..90375ebd3448 100644 --- a/sys/mips/include/pmap.h +++ b/sys/mips/include/pmap.h @@ -58,6 +58,7 @@ #ifndef LOCORE #include +#include #include #include @@ -83,7 +84,7 @@ struct pmap { pd_entry_t *pm_segtab; /* KVA of segment table */ TAILQ_HEAD(, pv_entry) pm_pvlist; /* list of mappings in * pmap */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct { u_int32_t asid:ASID_BITS; /* TLB address space tag */ u_int32_t gen:ASIDGEN_BITS; /* its generation number */ diff --git a/sys/mips/include/smp.h b/sys/mips/include/smp.h index 58aaf03165bf..0fcca9af1542 100644 --- a/sys/mips/include/smp.h +++ b/sys/mips/include/smp.h @@ -17,6 +17,8 @@ #ifdef _KERNEL +#include + #include /* @@ -33,7 +35,7 @@ void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); void smp_init_secondary(u_int32_t cpuid); void mpentry(void); diff --git a/sys/mips/mips/mp_machdep.c b/sys/mips/mips/mp_machdep.c index 7191b37f9596..79a3476a7784 100644 --- a/sys/mips/mips/mp_machdep.c +++ b/sys/mips/mips/mp_machdep.c @@ -29,6 +29,7 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include @@ -80,15 +81,16 @@ ipi_all_but_self(int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; - CTR3(KTR_SMP, "%s: cpus: %x, ipi: %x\n", __func__, cpus, ipi); - STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if ((cpus & pc->pc_cpumask) != 0) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) { + CTR3(KTR_SMP, "%s: pc: %p, ipi: %x\n", __func__, pc, + ipi); ipi_send(pc, ipi); + } } } @@ -108,7 +110,7 @@ static int mips_ipi_handler(void *arg) { int cpu; - cpumask_t cpumask; + cpuset_t cpumask; u_int ipi, ipi_bitmap; int bit; @@ -148,14 +150,14 @@ mips_ipi_handler(void *arg) tlb_save(); /* Indicate we are stopped */ - atomic_set_int(&stopped_cpus, cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &cpumask); /* Wait for restart */ - while ((started_cpus & cpumask) == 0) + while (!CPU_OVERLAP(&started_cpus, &cpumask)) cpu_spinwait(); - atomic_clear_int(&started_cpus, cpumask); - atomic_clear_int(&stopped_cpus, cpumask); + CPU_NAND_ATOMIC(&started_cpus, &cpumask); + CPU_NAND_ATOMIC(&stopped_cpus, &cpumask); CTR0(KTR_SMP, "IPI_STOP (restart)"); break; case IPI_PREEMPT: @@ -200,14 +202,22 @@ start_ap(int cpuid) void cpu_mp_setmaxid(void) { - cpumask_t cpumask; + cpuset_t cpumask; + int cpu, last; - cpumask = platform_cpu_mask(); - mp_ncpus = bitcount32(cpumask); + platform_cpu_mask(&cpumask); + mp_ncpus = 0; + last = 1; + while ((cpu = cpusetobj_ffs(&cpumask)) != 0) { + last = cpu; + cpu--; + CPU_CLR(cpu, &cpumask); + mp_ncpus++; + } if (mp_ncpus <= 0) mp_ncpus = 1; - mp_maxid = min(fls(cpumask), MAXCPU) - 1; + mp_maxid = min(last, MAXCPU) - 1; } void @@ -233,16 +243,16 @@ void cpu_mp_start(void) { int error, cpuid; - cpumask_t cpumask; + cpuset_t cpumask, ocpus; mtx_init(&ap_boot_mtx, "ap boot", NULL, MTX_SPIN); - all_cpus = 0; - cpumask = platform_cpu_mask(); + CPU_ZERO(&all_cpus); + platform_cpu_mask(&cpumask); - while (cpumask != 0) { - cpuid = ffs(cpumask) - 1; - cpumask &= ~(1 << cpuid); + while (!CPU_EMPTY(&cpumask)) { + cpuid = cpusetobj_ffs(&cpumask) - 1; + CPU_CLR(cpuid, &cpumask); if (cpuid >= MAXCPU) { printf("cpu_mp_start: ignoring AP #%d.\n", cpuid); @@ -257,15 +267,19 @@ cpu_mp_start(void) if (bootverbose) printf("AP #%d started!\n", cpuid); } - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); } - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); } void smp_init_secondary(u_int32_t cpuid) { + cpuset_t ocpus; + /* TLB */ mips_wr_wired(0); tlb_invalidate_all(); @@ -303,7 +317,9 @@ smp_init_secondary(u_int32_t cpuid) CTR1(KTR_SMP, "SMP: AP CPU #%d launched", PCPU_GET(cpuid)); /* Build our map of 'other' CPUs. */ - PCPU_SET(other_cpus, all_cpus & ~PCPU_GET(cpumask)); + ocpus = all_cpus; + CPU_CLR(PCPU_GET(cpuid), &ocpus); + PCPU_SET(other_cpus, ocpus); if (bootverbose) printf("SMP: AP CPU #%d launched.\n", PCPU_GET(cpuid)); diff --git a/sys/mips/mips/pmap.c b/sys/mips/mips/pmap.c index 7f0f4f004c80..f7ea660d019c 100644 --- a/sys/mips/mips/pmap.c +++ b/sys/mips/mips/pmap.c @@ -471,7 +471,7 @@ pmap_create_kernel_pagetable(void) PMAP_LOCK_INIT(kernel_pmap); kernel_pmap->pm_segtab = kernel_segmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); TAILQ_INIT(&kernel_pmap->pm_pvlist); kernel_pmap->pm_asid[0].asid = PMAP_ASID_RESERVED; kernel_pmap->pm_asid[0].gen = 0; @@ -630,10 +630,14 @@ pmap_invalidate_all_local(pmap_t pmap) tlb_invalidate_all(); return; } - if (pmap->pm_active & PCPU_GET(cpumask)) + sched_pin(); + if (CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { + sched_unpin(); tlb_invalidate_all_user(pmap); - else + } else { + sched_unpin(); pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + } } #ifdef SMP @@ -667,12 +671,16 @@ pmap_invalidate_page_local(pmap_t pmap, vm_offset_t va) tlb_invalidate_address(pmap, va); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_invalidate_address(pmap, va); } @@ -716,12 +724,16 @@ pmap_update_page_local(pmap_t pmap, vm_offset_t va, pt_entry_t pte) tlb_update(pmap, va, pte); return; } - if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) + sched_pin(); + if (pmap->pm_asid[PCPU_GET(cpuid)].gen != PCPU_GET(asid_generation)) { + sched_unpin(); return; - else if (!(pmap->pm_active & PCPU_GET(cpumask))) { + } else if (!CPU_OVERLAP(&pmap->pm_active, PCPU_PTR(cpumask))) { pmap->pm_asid[PCPU_GET(cpuid)].gen = 0; + sched_unpin(); return; } + sched_unpin(); tlb_update(pmap, va, pte); } @@ -1041,7 +1053,7 @@ pmap_pinit0(pmap_t pmap) PMAP_LOCK_INIT(pmap); pmap->pm_segtab = kernel_segmap; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -1102,7 +1114,7 @@ pmap_pinit(pmap_t pmap) ptdva = MIPS_PHYS_TO_DIRECT(VM_PAGE_TO_PHYS(ptdpg)); pmap->pm_segtab = (pd_entry_t *)ptdva; - pmap->pm_active = 0; + CPU_ZERO(&pmap->pm_active); pmap->pm_ptphint = NULL; for (i = 0; i < MAXCPU; i++) { pmap->pm_asid[i].asid = PMAP_ASID_RESERVED; @@ -2948,8 +2960,8 @@ pmap_activate(struct thread *td) oldpmap = PCPU_GET(curpmap); if (oldpmap) - atomic_clear_32(&oldpmap->pm_active, PCPU_GET(cpumask)); - atomic_set_32(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&oldpmap->pm_active, PCPU_PTR(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); pmap_asid_alloc(pmap); if (td == curthread) { PCPU_SET(segbase, pmap->pm_segtab); @@ -3283,7 +3295,7 @@ pmap_kextract(vm_offset_t va) pt_entry_t *ptep; /* Is the kernel pmap initialized? */ - if (kernel_pmap->pm_active) { + if (!CPU_EMPTY(&kernel_pmap->pm_active)) { /* It's inside the virtual address range */ ptep = pmap_pte(kernel_pmap, va); if (ptep) { diff --git a/sys/mips/rmi/xlr_machdep.c b/sys/mips/rmi/xlr_machdep.c index 4a1734a893fd..836c605f7de3 100644 --- a/sys/mips/rmi/xlr_machdep.c +++ b/sys/mips/rmi/xlr_machdep.c @@ -614,11 +614,15 @@ platform_processor_id(void) return (xlr_hwtid_to_cpuid[xlr_cpu_id()]); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - (xlr_ncores * xlr_threads_per_core))); + CPU_ZERO(mask); + s = xlr_ncores * xlr_threads_per_core; + for (i = 0; i < s; i++) + CPU_SET(i, mask); } struct cpu_group * diff --git a/sys/mips/sibyte/sb_scd.c b/sys/mips/sibyte/sb_scd.c index e5ac23c17720..50b99876acaa 100644 --- a/sys/mips/sibyte/sb_scd.c +++ b/sys/mips/sibyte/sb_scd.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -242,11 +243,15 @@ sb_clear_mailbox(int cpu, uint64_t val) sb_store64(regaddr, val); } -cpumask_t -platform_cpu_mask(void) +void +platform_cpu_mask(cpuset_t *mask) { + int i, s; - return (~0U >> (32 - SYSREV_NUM_PROCESSORS(sb_read_sysrev()))); + CPU_ZERO(mask); + s = SYSREV_NUM_PROCESSORS(sb_read_sysrev()); + for (i = 0; i < s; i++) + CPU_SET(i, mask); } #endif /* SMP */ diff --git a/sys/net/bridgestp.c b/sys/net/bridgestp.c index 2993838ac683..e263b0b23d86 100644 --- a/sys/net/bridgestp.c +++ b/sys/net/bridgestp.c @@ -1860,6 +1860,8 @@ bstp_tick(void *arg) if (bs->bs_running == 0) return; + CURVNET_SET(bs->bs_vnet); + /* slow timer to catch missed link events */ if (bstp_timer_expired(&bs->bs_link_timer)) { LIST_FOREACH(bp, &bs->bs_bplist, bp_next) @@ -1893,6 +1895,8 @@ bstp_tick(void *arg) bp->bp_txcount--; } + CURVNET_RESTORE(); + callout_reset(&bs->bs_bstpcallout, hz, bstp_tick, bs); } @@ -2126,6 +2130,7 @@ bstp_attach(struct bstp_state *bs, struct bstp_cb_ops *cb) bs->bs_protover = BSTP_PROTO_RSTP; bs->bs_state_cb = cb->bcb_state; bs->bs_rtage_cb = cb->bcb_rtage; + bs->bs_vnet = curvnet; getmicrotime(&bs->bs_last_tc_time); diff --git a/sys/net/bridgestp.h b/sys/net/bridgestp.h index 74086fce478a..fdf16aa6b10f 100644 --- a/sys/net/bridgestp.h +++ b/sys/net/bridgestp.h @@ -358,6 +358,7 @@ struct bstp_state { LIST_HEAD(, bstp_port) bs_bplist; bstp_state_cb_t bs_state_cb; bstp_rtage_cb_t bs_rtage_cb; + struct vnet *bs_vnet; }; #define BSTP_LOCK_INIT(_bs) mtx_init(&(_bs)->bs_mtx, "bstp", NULL, MTX_DEF) diff --git a/sys/net/if_stf.c b/sys/net/if_stf.c index 4f904a57850d..7d136fd17d9f 100644 --- a/sys/net/if_stf.c +++ b/sys/net/if_stf.c @@ -3,7 +3,7 @@ /*- * Copyright (C) 2000 WIDE Project. - * Copyright (c) 2010 Hiroki Sato + * Copyright (c) 2010-2011 Hiroki Sato * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -32,7 +32,7 @@ */ /* - * 6to4 interface, based on RFC3056 + 6rd (RFC5569) support. + * 6to4 interface, based on RFC 3056 + 6rd (RFC 5969) support. * * 6to4 interface is NOT capable of link-layer (I mean, IPv4) multicasting. * There is no address mapping defined from IPv6 multicast address to IPv4 @@ -74,10 +74,9 @@ * for details. The code tries to filter out some of malicious packets. * Note that there is no way to be 100% secure. * - * 6rd (RFC5569) extension is enabled when an IPv6 GUA other than - * 2002::/16 is assigned. The stf(4) recognizes a 32-bit just after - * prefixlen as the IPv4 address of the 6rd customer site. The - * prefixlen must be shorter than 32. + * 6rd (RFC 5969) extension is enabled when an IPv6 GUA other than + * 2002::/16 is assigned. The stf(4) calculates a 6rd delegated + * prefix from a 6rd prefix and an IPv4 address. * */ @@ -280,10 +279,10 @@ stf_clone_create(struct if_clone *ifc, int unit, caddr_t params) LIST_INSERT_HEAD(&V_stf_softc_list, sc, stf_list); mtx_unlock(&stf_mtx); - sc->sc_ifaddr_event_tag = EVENTHANDLER_REGISTER(ifaddr_event, - stf_ifaddr_change, - NULL, - EVENTHANDLER_PRI_ANY); + sc->sc_ifaddr_event_tag = + EVENTHANDLER_REGISTER(ifaddr_event, stf_ifaddr_change, NULL, + EVENTHANDLER_PRI_ANY); + return (0); } @@ -1367,35 +1366,20 @@ stf_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) case SIOCSIFADDR: DEBUG_PRINTF(1, "enter SIOCSIFADDR.\n"); ifa = (struct ifaddr *)data; - if (ifa == NULL || ifa->ifa_addr->sa_family != AF_INET6) { + if (ifa == NULL) { error = EAFNOSUPPORT; break; } - ifa->ifa_rtrequest = stf_rtrequest; - ifp->if_flags |= IFF_UP; + if (ifa->ifa_addr->sa_family == AF_INET6 && + ifa->ifa_dstaddr->sa_family == AF_INET && + ifa->ifa_netmask->sa_family == AF_INET6) { + ifa->ifa_rtrequest = stf_rtrequest; + ifp->if_flags |= IFF_UP; + } else { + error = EINVAL; + break; + } break; - -/* - case STFSSRDADDR: - ifra6 = (struct in6_aliasreq *)data; - if (ifra6 == NULL || ifra6->ifra_addr->sa_family != AF_INET6) { - error = EAFNOSUPPORT; - break; - } - sa6 = &ifra6->ifra_addr; - if (ifra6->ifra_dstaddr->sa_family != AF_INET) { - error = EAFNOSUPPORT; - break; - } - memcpy(&ifra.ifra_addr, sa6, sizeof(ifra.ifra_addr)); - error = in6_control(NULL, SIOCAIFADDR_IN6, (caddr_t)&ifra, ifp, curthread); - if (error) - return (error); - - break; - - case STFDSRDADDR: -*/ case SIOCADDMULTI: case SIOCDELMULTI: ifr = (struct ifreq *)data; diff --git a/sys/netgraph/ng_nat.c b/sys/netgraph/ng_nat.c index 84da50057494..59818d9ced83 100644 --- a/sys/netgraph/ng_nat.c +++ b/sys/netgraph/ng_nat.c @@ -43,6 +43,7 @@ #include #include +#include #include #include @@ -696,22 +697,35 @@ ng_nat_rcvdata(hook_p hook, item_p item ) KASSERT(m->m_pkthdr.len == ntohs(ip->ip_len), ("ng_nat: ip_len != m_pkthdr.len")); + /* + * We drop packet when: + * 1. libalias returns PKT_ALIAS_ERROR; + * 2. For incoming packets: + * a) for unresolved fragments; + * b) libalias returns PKT_ALIAS_IGNORED and + * PKT_ALIAS_DENY_INCOMING flag is set. + */ if (hook == priv->in) { rval = LibAliasIn(priv->lib, c, m->m_len + M_TRAILINGSPACE(m)); - if (rval != PKT_ALIAS_OK && - rval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { + if (rval == PKT_ALIAS_ERROR || + rval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + (rval == PKT_ALIAS_IGNORED && + (priv->lib->packetAliasMode & + PKT_ALIAS_DENY_INCOMING) != 0)) { NG_FREE_ITEM(item); return (EINVAL); } } else if (hook == priv->out) { rval = LibAliasOut(priv->lib, c, m->m_len + M_TRAILINGSPACE(m)); - if (rval != PKT_ALIAS_OK) { + if (rval == PKT_ALIAS_ERROR) { NG_FREE_ITEM(item); return (EINVAL); } } else panic("ng_nat: unknown hook!\n"); + if (rval == PKT_ALIAS_RESPOND) + m->m_flags |= M_SKIP_FIREWALL; m->m_pkthdr.len = m->m_len = ntohs(ip->ip_len); if ((ip->ip_off & htons(IP_OFFMASK)) == 0 && diff --git a/sys/netinet/in_pcb.c b/sys/netinet/in_pcb.c index 4aa998f20cb2..4eb309aecde4 100644 --- a/sys/netinet/in_pcb.c +++ b/sys/netinet/in_pcb.c @@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$"); #include "opt_ipsec.h" #include "opt_inet.h" #include "opt_inet6.h" +#include "opt_pcbgroup.h" #include #include @@ -212,7 +213,7 @@ void in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, struct inpcbhead *listhead, int hash_nelements, int porthash_nelements, char *inpcbzone_name, uma_init inpcbzone_init, uma_fini inpcbzone_fini, - uint32_t inpcbzone_flags) + uint32_t inpcbzone_flags, u_int hashfields) { INP_INFO_LOCK_INIT(pcbinfo, name); @@ -227,6 +228,9 @@ in_pcbinfo_init(struct inpcbinfo *pcbinfo, const char *name, &pcbinfo->ipi_hashmask); pcbinfo->ipi_porthashbase = hashinit(porthash_nelements, M_PCB, &pcbinfo->ipi_porthashmask); +#ifdef PCBGROUP + in_pcbgroup_init(pcbinfo, hashfields, hash_nelements); +#endif pcbinfo->ipi_zone = uma_zcreate(inpcbzone_name, sizeof(struct inpcb), NULL, NULL, inpcbzone_init, inpcbzone_fini, UMA_ALIGN_PTR, inpcbzone_flags); @@ -246,6 +250,9 @@ in_pcbinfo_destroy(struct inpcbinfo *pcbinfo) hashdestroy(pcbinfo->ipi_hashbase, M_PCB, pcbinfo->ipi_hashmask); hashdestroy(pcbinfo->ipi_porthashbase, M_PCB, pcbinfo->ipi_porthashmask); +#ifdef PCBGROUP + in_pcbgroup_destroy(pcbinfo); +#endif uma_zdestroy(pcbinfo->ipi_zone); INP_HASH_LOCK_DESTROY(pcbinfo); INP_INFO_LOCK_DESTROY(pcbinfo); @@ -1053,7 +1060,8 @@ in_pcbdetach(struct inpcb *inp) * in_pcbref() bumps the reference count on an inpcb in order to maintain * stability of an inpcb pointer despite the inpcb lock being released. This * is used in TCP when the inpcbinfo lock needs to be acquired or upgraded, - * but where the inpcb lock is already held. + * but where the inpcb lock may already held, or when acquiring a reference + * via a pcbgroup. * * in_pcbref() should be used only to provide brief memory stability, and * must always be followed by a call to INP_WLOCK() and in_pcbrele() to @@ -1223,6 +1231,9 @@ in_pcbdrop(struct inpcb *inp) } INP_HASH_WUNLOCK(inp->inp_pcbinfo); inp->inp_flags &= ~INP_INHASHLIST; +#ifdef PCBGROUP + in_pcbgroup_remove(inp); +#endif } } @@ -1472,6 +1483,148 @@ in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr, } #undef INP_LOOKUP_MAPPED_PCB_COST +#ifdef PCBGROUP +/* + * Lookup PCB in hash list, using pcbgroup tables. + */ +static struct inpcb * +in_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, + struct in_addr faddr, u_int fport_arg, struct in_addr laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + INP_GROUP_LOCK(pcbgroup); + head = &pcbgroup->ipg_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, + pcbgroup->ipg_hashmask)]; + LIST_FOREACH(inp, head, inp_pcbgrouphash) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr == faddr.s_addr && + inp->inp_laddr.s_addr == laddr.s_addr && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP4)) + goto found; + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) { + inp = tmpinp; + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; +#ifdef INET6 + struct inpcb *local_wild_mapped = NULL; +#endif + struct inpcb *jail_wild = NULL; + struct inpcbhead *head; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_FOREACH(inp, head, inp_pcbgroup_wild) { +#ifdef INET6 + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV4) == 0) + continue; +#endif + if (inp->inp_faddr.s_addr != INADDR_ANY || + inp->inp_lport != lport) + continue; + + /* XXX inp locking */ + if (ifp && ifp->if_type == IFT_FAITH && + (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP4); + if (injail) { + if (prison_check_ip4(inp->inp_cred, + &laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (inp->inp_laddr.s_addr == laddr.s_addr) { + if (injail) + goto found; + else + local_exact = inp; + } else if (inp->inp_laddr.s_addr == INADDR_ANY) { +#ifdef INET6 + /* XXX inp locking, NULL check */ + if (inp->inp_vflag & INP_IPV6PROTO) + local_wild_mapped = inp; + else +#endif /* INET6 */ + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; +#ifdef INET6 + if (inp == NULL) + inp = local_wild_mapped; +#endif /* defined(INET6) */ + if (inp != NULL) + goto found; + } /* if (lookupflags & INPLOOKUP_WILDCARD) */ + INP_GROUP_UNLOCK(pcbgroup); + return (NULL); + +found: + in_pcbref(inp); + INP_GROUP_UNLOCK(pcbgroup); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking bug", __func__); + return (inp); +} +#endif /* PCBGROUP */ + /* * Lookup PCB in hash list, using pcbinfo tables. This variation assumes * that the caller has locked the hash list, and will not perform any further @@ -1636,17 +1789,30 @@ in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr, /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in_pcbgroup.c. */ struct inpcb * in_pcblookup(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp) { +#if defined(PCBGROUP) + struct inpcbgroup *pcbgroup; +#endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); +#if defined(PCBGROUP) + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } @@ -1656,12 +1822,28 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr, u_int fport, struct in_addr laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { +#ifdef PCBGROUP + struct inpcbgroup *pcbgroup; +#endif KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); +#ifdef PCBGROUP + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid); + if (pcbgroup != NULL) + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, + fport, laddr, lport, lookupflags, ifp)); + pcbgroup = in_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif return (in_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } @@ -1670,8 +1852,8 @@ in_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in_addr faddr, /* * Insert PCB onto various hash lists. */ -int -in_pcbinshash(struct inpcb *inp) +static int +in_pcbinshash_internal(struct inpcb *inp, int do_pcbgroup_update) { struct inpcbhead *pcbhash; struct inpcbporthead *pcbporthash; @@ -1721,9 +1903,38 @@ in_pcbinshash(struct inpcb *inp) LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist); LIST_INSERT_HEAD(pcbhash, inp, inp_hash); inp->inp_flags |= INP_INHASHLIST; +#ifdef PCBGROUP + if (do_pcbgroup_update) + in_pcbgroup_update(inp); +#endif return (0); } +/* + * For now, there are two public interfaces to insert an inpcb into the hash + * lists -- one that does update pcbgroups, and one that doesn't. The latter + * is used only in the TCP syncache, where in_pcbinshash is called before the + * full 4-tuple is set for the inpcb, and we don't want to install in the + * pcbgroup until later. + * + * XXXRW: This seems like a misfeature. in_pcbinshash should always update + * connection groups, and partially initialised inpcbs should not be exposed + * to either reservation hash tables or pcbgroups. + */ +int +in_pcbinshash(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 1)); +} + +int +in_pcbinshash_nopcbgroup(struct inpcb *inp) +{ + + return (in_pcbinshash_internal(inp, 0)); +} + /* * Move PCB to the proper hash bucket when { faddr, fport } have been * changed. NOTE: This does not handle the case of the lport changing (the @@ -1755,6 +1966,13 @@ in_pcbrehash_mbuf(struct inpcb *inp, struct mbuf *m) LIST_REMOVE(inp, inp_hash); LIST_INSERT_HEAD(head, inp, inp_hash); + +#ifdef PCBGROUP + if (m != NULL) + in_pcbgroup_update_mbuf(inp, m); + else + in_pcbgroup_update(inp); +#endif } void @@ -1791,6 +2009,9 @@ in_pcbremlists(struct inpcb *inp) } LIST_REMOVE(inp, inp_list); pcbinfo->ipi_count--; +#ifdef PCBGROUP + in_pcbgroup_remove(inp); +#endif } /* diff --git a/sys/netinet/in_pcb.h b/sys/netinet/in_pcb.h index 809bc0576478..dfef96348efe 100644 --- a/sys/netinet/in_pcb.h +++ b/sys/netinet/in_pcb.h @@ -44,6 +44,7 @@ #include #ifdef _KERNEL +#include #include #include #include @@ -141,6 +142,7 @@ struct icmp6_filter; * * Key: * (c) - Constant after initialization + * (g) - Protected by the pcbgroup lock * (i) - Protected by the inpcb lock * (p) - Protected by the pcbinfo lock for the inpcb * (s) - Protected by another subsystem's locks @@ -160,9 +162,12 @@ struct icmp6_filter; */ struct inpcb { LIST_ENTRY(inpcb) inp_hash; /* (i/p) hash list */ + LIST_ENTRY(inpcb) inp_pcbgrouphash; /* (g/i) hash list */ LIST_ENTRY(inpcb) inp_list; /* (i/p) list for all PCBs for proto */ void *inp_ppcb; /* (i) pointer to per-protocol pcb */ struct inpcbinfo *inp_pcbinfo; /* (c) PCB list info */ + struct inpcbgroup *inp_pcbgroup; /* (g/i) PCB group list */ + LIST_ENTRY(inpcb) inp_pcbgroup_wild; /* (g/i/p) group wildcard entry */ struct socket *inp_socket; /* (i) back pointer to socket */ struct ucred *inp_cred; /* (c) cache of socket cred */ u_int32_t inp_flow; /* (i) IPv6 flow information */ @@ -272,13 +277,14 @@ struct inpcbport { * the former covering mutable global fields (such as the global pcb list), * and the latter covering the hashed lookup tables. The lock order is: * - * ipi_lock (before) inpcb locks (before) ipi_hash_lock + * ipi_lock (before) inpcb locks (before) {ipi_hash_lock, pcbgroup locks} * * Locking key: * * (c) Constant or nearly constant after initialisation * (g) Locked by ipi_lock - * (h) Read using either ipi_hash_lock or inpcb lock; write requires both. + * (h) Read using either ipi_hash_lock or inpcb lock; write requires both + * (p) Protected by one or more pcbgroup locks * (x) Synchronisation properties poorly defined */ struct inpcbinfo { @@ -312,7 +318,16 @@ struct inpcbinfo { struct uma_zone *ipi_zone; /* (c) */ /* - * Global lock protecting hash lookup tables. + * Connection groups associated with this protocol. These fields are + * constant, but pcbgroup structures themselves are protected by + * per-pcbgroup locks. + */ + struct inpcbgroup *ipi_pcbgroups; /* (c) */ + u_int ipi_npcbgroups; /* (c) */ + u_int ipi_hashfields; /* (c) */ + + /* + * Global lock protecting non-pcbgroup hash lookup tables. */ struct rwlock ipi_hash_lock; @@ -329,6 +344,14 @@ struct inpcbinfo { struct inpcbporthead *ipi_porthashbase; /* (h) */ u_long ipi_porthashmask; /* (h) */ + /* + * List of wildcard inpcbs for use with pcbgroups. In the past, was + * per-pcbgroup but is now global. All pcbgroup locks must be held + * to modify the list, so any is sufficient to read it. + */ + struct inpcbhead *ipi_wildbase; /* (p) */ + u_long ipi_wildmask; /* (p) */ + /* * Pointer to network stack instance */ @@ -340,6 +363,31 @@ struct inpcbinfo { void *ipi_pspare[2]; }; +/* + * Connection groups hold sets of connections that have similar CPU/thread + * affinity. Each connection belongs to exactly one connection group. + */ +struct inpcbgroup { + /* + * Per-connection group hash of inpcbs, hashed by local and foreign + * addresses and port numbers. + */ + struct inpcbhead *ipg_hashbase; /* (c) */ + u_long ipg_hashmask; /* (c) */ + + /* + * Notional affinity of this pcbgroup. + */ + u_int ipg_cpu; /* (p) */ + + /* + * Per-connection group lock, not to be confused with ipi_lock. + * Protects the hash table hung off the group, but also the global + * wildcard list in inpcbinfo. + */ + struct mtx ipg_lock; +} __aligned(CACHE_LINE_SIZE); + #define INP_LOCK_INIT(inp, d, t) \ rw_init_flags(&(inp)->inp_lock, (t), RW_RECURSE | RW_DUPOK) #define INP_LOCK_DESTROY(inp) rw_destroy(&(inp)->inp_lock) @@ -423,6 +471,14 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_HASH_WLOCK_ASSERT(ipi) rw_assert(&(ipi)->ipi_hash_lock, \ RA_WLOCKED) +#define INP_GROUP_LOCK_INIT(ipg, d) mtx_init(&(ipg)->ipg_lock, (d), NULL, \ + MTX_DEF | MTX_DUPOK) +#define INP_GROUP_LOCK_DESTROY(ipg) mtx_destroy(&(ipg)->ipg_lock) + +#define INP_GROUP_LOCK(ipg) mtx_lock(&(ipg)->ipg_lock) +#define INP_GROUP_LOCK_ASSERT(ipg) mtx_assert(&(ipg)->ipg_lock, MA_OWNED) +#define INP_GROUP_UNLOCK(ipg) mtx_unlock(&(ipg)->ipg_lock) + #define INP_PCBHASH(faddr, lport, fport, mask) \ (((faddr) ^ ((faddr) >> 16) ^ ntohs((lport) ^ (fport))) & (mask)) #define INP_PCBPORTHASH(lport, mask) \ @@ -482,6 +538,7 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, */ #define INP_LLE_VALID 0x00000001 /* cached lle is valid */ #define INP_RT_VALID 0x00000002 /* cached rtentry is valid */ +#define INP_PCBGROUPWILD 0x00000004 /* in pcbgroup wildcard list */ /* * Flags passed to in_pcblookup*() functions. @@ -500,6 +557,13 @@ void inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp, #define INP_CHECK_SOCKAF(so, af) (INP_SOCKAF(so) == af) +/* + * Constants for pcbinfo.ipi_hashfields. + */ +#define IPI_HASHFIELDS_NONE 0 +#define IPI_HASHFIELDS_2TUPLE 1 +#define IPI_HASHFIELDS_4TUPLE 2 + #ifdef _KERNEL VNET_DECLARE(int, ipport_reservedhigh); VNET_DECLARE(int, ipport_reservedlow); @@ -531,7 +595,21 @@ VNET_DECLARE(int, ipport_tcpallocs); void in_pcbinfo_destroy(struct inpcbinfo *); void in_pcbinfo_init(struct inpcbinfo *, const char *, struct inpcbhead *, - int, int, char *, uma_init, uma_fini, uint32_t); + int, int, char *, uma_init, uma_fini, uint32_t, u_int); + +struct inpcbgroup * + in_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in_pcbgroup_byinpcb(struct inpcb *); +struct inpcbgroup * + in_pcbgroup_bytuple(struct inpcbinfo *, struct in_addr, u_short, + struct in_addr, u_short); +void in_pcbgroup_destroy(struct inpcbinfo *); +int in_pcbgroup_enabled(struct inpcbinfo *); +void in_pcbgroup_init(struct inpcbinfo *, u_int, int); +void in_pcbgroup_remove(struct inpcb *); +void in_pcbgroup_update(struct inpcb *); +void in_pcbgroup_update_mbuf(struct inpcb *, struct mbuf *); void in_pcbpurgeif0(struct inpcbinfo *, struct ifnet *); int in_pcballoc(struct socket *, struct inpcbinfo *); @@ -551,6 +629,7 @@ void in_pcbdisconnect(struct inpcb *); void in_pcbdrop(struct inpcb *); void in_pcbfree(struct inpcb *); int in_pcbinshash(struct inpcb *); +int in_pcbinshash_nopcbgroup(struct inpcb *); struct inpcb * in_pcblookup_local(struct inpcbinfo *, struct in_addr, u_short, int, struct ucred *); diff --git a/sys/netinet/in_pcbgroup.c b/sys/netinet/in_pcbgroup.c new file mode 100644 index 000000000000..c9f5c7083136 --- /dev/null +++ b/sys/netinet/in_pcbgroup.c @@ -0,0 +1,457 @@ +/*- + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * This software was developed by Robert N. M. Watson under contract + * to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#ifdef INET6 +#include +#endif /* INET6 */ + +/* + * pcbgroups, or "connection groups" are based on Willman, Rixner, and Cox's + * 2006 USENIX paper, "An Evaluation of Network Stack Parallelization + * Strategies in Modern Operating Systems". This implementation differs + * significantly from that described in the paper, in that it attempts to + * introduce not just notions of affinity for connections and distribute work + * so as to reduce lock contention, but also align those notions with + * hardware work distribution strategies such as RSS. In this construction, + * connection groups supplement, rather than replace, existing reservation + * tables for protocol 4-tuples, offering CPU-affine lookup tables with + * minimal cache line migration and lock contention during steady state + * operation. + * + * Internet protocols, such as UDP and TCP, register to use connection groups + * by providing an ipi_hashfields value other than IPI_HASHFIELDS_NONE; this + * indicates to the connection group code whether a 2-tuple or 4-tuple is + * used as an argument to hashes that assign a connection to a particular + * group. This must be aligned with any hardware offloaded distribution + * model, such as RSS or similar approaches taken in embedded network boards. + * Wildcard sockets require special handling, as in Willman 2006, and are + * shared between connection groups -- while being protected by group-local + * locks. This means that connection establishment and teardown can be + * signficantly more expensive than without connection groups, but that + * steady-state processing can be significantly faster. + * + * Most of the implementation of connection groups is in this file; however, + * connection group lookup is implemented in in_pcb.c alongside reservation + * table lookups -- see in_pcblookup_group(). + * + * TODO: + * + * Implement dynamic rebalancing of buckets with connection groups; when + * load is unevenly distributed, search for more optimal balancing on + * demand. This might require scaling up the number of connection groups + * by <<1. + * + * Provide an IP 2-tuple or 4-tuple netisr m2cpu handler based on connection + * groups for ip_input and ip6_input, allowing non-offloaded work + * distribution. + * + * Expose effective CPU affinity of connections to userspace using socket + * options. + * + * Investigate per-connection affinity overrides based on socket options; an + * option could be set, certainly resulting in work being distributed + * differently in software, and possibly propagated to supporting hardware + * with TCAMs or hardware hash tables. This might require connections to + * exist in more than one connection group at a time. + * + * Hook netisr thread reconfiguration events, and propagate those to RSS so + * that rebalancing can occur when the thread pool grows or shrinks. + * + * Expose per-pcbgroup statistics to userspace monitoring tools such as + * netstat, in order to allow better debugging and profiling. + */ + +void +in_pcbgroup_init(struct inpcbinfo *pcbinfo, u_int hashfields, + int hash_nelements) +{ + struct inpcbgroup *pcbgroup; + u_int numpcbgroups, pgn; + + /* + * Only enable connection groups for a protocol if it has been + * specifically requested. + */ + if (hashfields == IPI_HASHFIELDS_NONE) + return; + + /* + * Connection groups are about multi-processor load distribution, + * lock contention, and connection CPU affinity. As such, no point + * in turning them on for a uniprocessor machine, it only wastes + * memory. + */ + if (mp_ncpus == 1) + return; + + /* + * Use one group per CPU for now. If we decide to do dynamic + * rebalancing a la RSS, we'll need to shift left by at least 1. + */ + numpcbgroups = mp_ncpus; + + pcbinfo->ipi_hashfields = hashfields; + pcbinfo->ipi_pcbgroups = malloc(numpcbgroups * + sizeof(*pcbinfo->ipi_pcbgroups), M_PCB, M_WAITOK | M_ZERO); + pcbinfo->ipi_npcbgroups = numpcbgroups; + pcbinfo->ipi_wildbase = hashinit(hash_nelements, M_PCB, + &pcbinfo->ipi_wildmask); + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { + pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; + pcbgroup->ipg_hashbase = hashinit(hash_nelements, M_PCB, + &pcbgroup->ipg_hashmask); + INP_GROUP_LOCK_INIT(pcbgroup, "pcbgroup"); + + /* + * Initialise notional affinity of the pcbgroup -- for RSS, + * we want the same notion of affinity as NICs to be used. + * Just round robin for the time being. + */ + pcbgroup->ipg_cpu = (pgn % mp_ncpus); + } +} + +void +in_pcbgroup_destroy(struct inpcbinfo *pcbinfo) +{ + struct inpcbgroup *pcbgroup; + u_int pgn; + + if (pcbinfo->ipi_npcbgroups == 0) + return; + + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) { + pcbgroup = &pcbinfo->ipi_pcbgroups[pgn]; + KASSERT(LIST_EMPTY(pcbinfo->ipi_listhead), + ("in_pcbinfo_destroy: listhead not empty")); + INP_GROUP_LOCK_DESTROY(pcbgroup); + hashdestroy(pcbgroup->ipg_hashbase, M_PCB, + pcbgroup->ipg_hashmask); + } + hashdestroy(pcbinfo->ipi_wildbase, M_PCB, pcbinfo->ipi_wildmask); + free(pcbinfo->ipi_pcbgroups, M_PCB); + pcbinfo->ipi_pcbgroups = NULL; + pcbinfo->ipi_npcbgroups = 0; + pcbinfo->ipi_hashfields = 0; +} + +/* + * Given a hash of whatever the covered tuple might be, return a pcbgroup + * index. + */ +static __inline u_int +in_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash) +{ + + return (hash % pcbinfo->ipi_npcbgroups); +} + +/* + * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash + * information is insufficient to identify the pcbgroup. + */ +struct inpcbgroup * +in_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash) +{ + + return (NULL); +} + +static struct inpcbgroup * +in_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m) +{ + + return (in_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid)); +} + +struct inpcbgroup * +in_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, struct in_addr laddr, + u_short lport, struct in_addr faddr, u_short fport) +{ + uint32_t hash; + + switch (pcbinfo->ipi_hashfields) { + case IPI_HASHFIELDS_4TUPLE: + hash = faddr.s_addr ^ fport; + break; + + case IPI_HASHFIELDS_2TUPLE: + hash = faddr.s_addr ^ laddr.s_addr; + break; + + default: + hash = 0; + } + return (&pcbinfo->ipi_pcbgroups[in_pcbgroup_getbucket(pcbinfo, + hash)]); +} + +struct inpcbgroup * +in_pcbgroup_byinpcb(struct inpcb *inp) +{ + + return (in_pcbgroup_bytuple(inp->inp_pcbinfo, inp->inp_laddr, + inp->inp_lport, inp->inp_faddr, inp->inp_fport)); +} + +static void +in_pcbwild_add(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + struct inpcbhead *head; + u_int pgn; + + INP_WLOCK_ASSERT(inp); + KASSERT(!(inp->inp_flags2 & INP_PCBGROUPWILD), + ("%s: is wild",__func__)); + + pcbinfo = inp->inp_pcbinfo; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, inp->inp_lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_INSERT_HEAD(head, inp, inp_pcbgroup_wild); + inp->inp_flags2 |= INP_PCBGROUPWILD; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); +} + +static void +in_pcbwild_remove(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + u_int pgn; + + INP_WLOCK_ASSERT(inp); + KASSERT((inp->inp_flags2 & INP_PCBGROUPWILD), + ("%s: not wild", __func__)); + + pcbinfo = inp->inp_pcbinfo; + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_LOCK(&pcbinfo->ipi_pcbgroups[pgn]); + LIST_REMOVE(inp, inp_pcbgroup_wild); + for (pgn = 0; pgn < pcbinfo->ipi_npcbgroups; pgn++) + INP_GROUP_UNLOCK(&pcbinfo->ipi_pcbgroups[pgn]); + inp->inp_flags2 &= ~INP_PCBGROUPWILD; +} + +static __inline int +in_pcbwild_needed(struct inpcb *inp) +{ + +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + return (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr)); + else +#endif + return (inp->inp_faddr.s_addr == htonl(INADDR_ANY)); +} + +static void +in_pcbwild_update_internal(struct inpcb *inp) +{ + int wildcard_needed; + + wildcard_needed = in_pcbwild_needed(inp); + if (wildcard_needed && !(inp->inp_flags2 & INP_PCBGROUPWILD)) + in_pcbwild_add(inp); + else if (!wildcard_needed && (inp->inp_flags2 & INP_PCBGROUPWILD)) + in_pcbwild_remove(inp); +} + +/* + * Update the pcbgroup of an inpcb, which might include removing an old + * pcbgroup reference and/or adding a new one. Wildcard processing is not + * performed here, although ideally we'll never install a pcbgroup for a + * wildcard inpcb (asserted below). + */ +static void +in_pcbgroup_update_internal(struct inpcbinfo *pcbinfo, + struct inpcbgroup *newpcbgroup, struct inpcb *inp) +{ + struct inpcbgroup *oldpcbgroup; + struct inpcbhead *pcbhash; + uint32_t hashkey_faddr; + + INP_WLOCK_ASSERT(inp); + + oldpcbgroup = inp->inp_pcbgroup; + if (oldpcbgroup != NULL && oldpcbgroup != newpcbgroup) { + INP_GROUP_LOCK(oldpcbgroup); + LIST_REMOVE(inp, inp_pcbgrouphash); + inp->inp_pcbgroup = NULL; + INP_GROUP_UNLOCK(oldpcbgroup); + } + if (newpcbgroup != NULL && oldpcbgroup != newpcbgroup) { +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + hashkey_faddr = inp->in6p_faddr.s6_addr32[3]; /* XXX */ + else +#endif + hashkey_faddr = inp->inp_faddr.s_addr; + INP_GROUP_LOCK(newpcbgroup); + pcbhash = &newpcbgroup->ipg_hashbase[ + INP_PCBHASH(hashkey_faddr, inp->inp_lport, inp->inp_fport, + newpcbgroup->ipg_hashmask)]; + LIST_INSERT_HEAD(pcbhash, inp, inp_pcbgrouphash); + inp->inp_pcbgroup = newpcbgroup; + INP_GROUP_UNLOCK(newpcbgroup); + } + + KASSERT(!(newpcbgroup != NULL && in_pcbwild_needed(inp)), + ("%s: pcbgroup and wildcard!", __func__)); +} + +/* + * Two update paths: one in which the 4-tuple on an inpcb has been updated + * and therefore connection groups may need to change (or a wildcard entry + * may needed to be installed), and another in which the 4-tuple has been + * set as a result of a packet received, in which case we may be able to use + * the hash on the mbuf to avoid doing a software hash calculation for RSS. + * + * In each case: first, let the wildcard code have a go at placing it as a + * wildcard socket. If it was a wildcard, or if the connection has been + * dropped, then no pcbgroup is required (so potentially clear it); + * otherwise, calculate and update the pcbgroup for the inpcb. + */ +void +in_pcbgroup_update(struct inpcb *inp) +{ + struct inpcbinfo *pcbinfo; + struct inpcbgroup *newpcbgroup; + + INP_WLOCK_ASSERT(inp); + + pcbinfo = inp->inp_pcbinfo; + if (!in_pcbgroup_enabled(pcbinfo)) + return; + + in_pcbwild_update_internal(inp); + if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && + !(inp->inp_flags & INP_DROPPED)) { +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) + newpcbgroup = in6_pcbgroup_byinpcb(inp); + else +#endif + newpcbgroup = in_pcbgroup_byinpcb(inp); + } else + newpcbgroup = NULL; + in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); +} + +void +in_pcbgroup_update_mbuf(struct inpcb *inp, struct mbuf *m) +{ + struct inpcbinfo *pcbinfo; + struct inpcbgroup *newpcbgroup; + + INP_WLOCK_ASSERT(inp); + + pcbinfo = inp->inp_pcbinfo; + if (!in_pcbgroup_enabled(pcbinfo)) + return; + + /* + * Possibly should assert !INP_PCBGROUPWILD rather than testing for + * it; presumably this function should never be called for anything + * other than non-wildcard socket? + */ + in_pcbwild_update_internal(inp); + if (!(inp->inp_flags2 & INP_PCBGROUPWILD) && + !(inp->inp_flags & INP_DROPPED)) { + newpcbgroup = in_pcbgroup_bymbuf(pcbinfo, m); +#ifdef INET6 + if (inp->inp_vflag & INP_IPV6) { + if (newpcbgroup == NULL) + newpcbgroup = in6_pcbgroup_byinpcb(inp); + } else { +#endif + if (newpcbgroup == NULL) + newpcbgroup = in_pcbgroup_byinpcb(inp); +#ifdef INET6 + } +#endif + } else + newpcbgroup = NULL; + in_pcbgroup_update_internal(pcbinfo, newpcbgroup, inp); +} + +/* + * Remove pcbgroup entry and optional pcbgroup wildcard entry for this inpcb. + */ +void +in_pcbgroup_remove(struct inpcb *inp) +{ + struct inpcbgroup *pcbgroup; + + INP_WLOCK_ASSERT(inp); + + if (!in_pcbgroup_enabled(inp->inp_pcbinfo)) + return; + + if (inp->inp_flags2 & INP_PCBGROUPWILD) + in_pcbwild_remove(inp); + + pcbgroup = inp->inp_pcbgroup; + if (pcbgroup != NULL) { + INP_GROUP_LOCK(pcbgroup); + LIST_REMOVE(inp, inp_pcbgrouphash); + inp->inp_pcbgroup = NULL; + INP_GROUP_UNLOCK(pcbgroup); + } +} + +/* + * Query whether or not it is appropriate to use pcbgroups to look up inpcbs + * for a protocol. + */ +int +in_pcbgroup_enabled(struct inpcbinfo *pcbinfo) +{ + + return (pcbinfo->ipi_npcbgroups > 0); +} diff --git a/sys/netinet/ip_divert.c b/sys/netinet/ip_divert.c index 6f5bce7d4876..527ce5683344 100644 --- a/sys/netinet/ip_divert.c +++ b/sys/netinet/ip_divert.c @@ -153,7 +153,8 @@ div_init(void) * place for hashbase == NULL. */ in_pcbinfo_init(&V_divcbinfo, "div", &V_divcb, 1, 1, "divcb", - div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE); + div_inpcb_init, div_inpcb_fini, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_NONE); } static void diff --git a/sys/netinet/ip_input.c b/sys/netinet/ip_input.c index ac1c723a0acc..67fcb743a9a5 100644 --- a/sys/netinet/ip_input.c +++ b/sys/netinet/ip_input.c @@ -488,7 +488,7 @@ ip_input(struct mbuf *m) } #ifdef IPSEC /* - * Bypass packet filtering for packets from a tunnel (gif). + * Bypass packet filtering for packets previously handled by IPsec. */ if (ip_ipsec_filtertunnel(m)) goto passin; diff --git a/sys/netinet/ip_ipsec.c b/sys/netinet/ip_ipsec.c index 50a6ce44a490..a3c87f5c442a 100644 --- a/sys/netinet/ip_ipsec.c +++ b/sys/netinet/ip_ipsec.c @@ -95,7 +95,7 @@ ip_ipsec_filtertunnel(struct mbuf *m) #if defined(IPSEC) /* - * Bypass packet filtering for packets from a tunnel. + * Bypass packet filtering for packets previously handled by IPsec. */ if (!V_ip4_ipsec_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) diff --git a/sys/netinet/ipfw/ip_fw2.c b/sys/netinet/ipfw/ip_fw2.c index b4d3abbe6959..49c48b9ad03a 100644 --- a/sys/netinet/ipfw/ip_fw2.c +++ b/sys/netinet/ipfw/ip_fw2.c @@ -692,6 +692,10 @@ check_uidgid(ipfw_insn_u32 *insn, int proto, struct ifnet *oif, lookupflags |= INPLOOKUP_RLOCKPCB; match = 0; if (*ugid_lookupp == 0) { + /* + * XXXRW: If we had the mbuf here, could use + * in_pcblookup_mbuf(). + */ pcb = (oif) ? in_pcblookup(pi, dst_ip, htons(dst_port), diff --git a/sys/netinet/ipfw/ip_fw_nat.c b/sys/netinet/ipfw/ip_fw_nat.c index f8c3e63ec235..fd6f09afdf88 100644 --- a/sys/netinet/ipfw/ip_fw_nat.c +++ b/sys/netinet/ipfw/ip_fw_nat.c @@ -262,17 +262,27 @@ ipfw_nat(struct ip_fw_args *args, struct cfg_nat *t, struct mbuf *m) else retval = LibAliasOut(t->lib, c, mcl->m_len + M_TRAILINGSPACE(mcl)); - if (retval == PKT_ALIAS_RESPOND) { - m->m_flags |= M_SKIP_FIREWALL; - retval = PKT_ALIAS_OK; - } - if (retval != PKT_ALIAS_OK && - retval != PKT_ALIAS_FOUND_HEADER_FRAGMENT) { + + /* + * We drop packet when: + * 1. libalias returns PKT_ALIAS_ERROR; + * 2. For incoming packets: + * a) for unresolved fragments; + * b) libalias returns PKT_ALIAS_IGNORED and + * PKT_ALIAS_DENY_INCOMING flag is set. + */ + if (retval == PKT_ALIAS_ERROR || + (args->oif == NULL && (retval == PKT_ALIAS_UNRESOLVED_FRAGMENT || + (retval == PKT_ALIAS_IGNORED && + (t->lib->packetAliasMode & PKT_ALIAS_DENY_INCOMING) != 0)))) { /* XXX - should i add some logging? */ m_free(mcl); args->m = NULL; return (IP_FW_DENY); } + + if (retval == PKT_ALIAS_RESPOND) + m->m_flags |= M_SKIP_FIREWALL; mcl->m_pkthdr.len = mcl->m_len = ntohs(ip->ip_len); /* diff --git a/sys/netinet/ipfw/ip_fw_sockopt.c b/sys/netinet/ipfw/ip_fw_sockopt.c index f81d57d52869..2347456a8c22 100644 --- a/sys/netinet/ipfw/ip_fw_sockopt.c +++ b/sys/netinet/ipfw/ip_fw_sockopt.c @@ -349,12 +349,13 @@ del_entry(struct ip_fw_chain *chain, uint32_t arg) } if (n == 0) { - /* A flush request (arg == 0) on empty ruleset - * returns with no error. On the contrary, + /* A flush request (arg == 0 or cmd == 1) on empty + * ruleset returns with no error. On the contrary, * if there is no match on a specific request, * we return EINVAL. */ - error = (arg == 0) ? 0 : EINVAL; + if (arg != 0 && cmd != 1) + error = EINVAL; break; } diff --git a/sys/netinet/libalias/alias_sctp.h b/sys/netinet/libalias/alias_sctp.h index 80ed96568d4d..99d54cebedb0 100644 --- a/sys/netinet/libalias/alias_sctp.h +++ b/sys/netinet/libalias/alias_sctp.h @@ -135,13 +135,13 @@ struct sctp_nat_assoc { struct in_addr a_addr; /**< alias ip address */ int state; /**< current state of NAT association */ int TableRegister; /**< stores which look up tables association is registered in */ - int exp; /**< timer expiration in seconds from uptime */ + int exp; /**< timer expiration in seconds from uptime */ int exp_loc; /**< current location in timer_Q */ int num_Gaddr; /**< number of global IP addresses in the list */ LIST_HEAD(sctpGlobalAddresshead,sctp_GlobalAddress) Gaddr; /**< List of global addresses */ - LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/ - LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */ - LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */ + LIST_ENTRY (sctp_nat_assoc) list_L; /**< Linked list of pointers for Local table*/ + LIST_ENTRY (sctp_nat_assoc) list_G; /**< Linked list of pointers for Global table */ + LIST_ENTRY (sctp_nat_assoc) timer_Q; /**< Linked list of pointers for timer Q */ //Using libalias locking }; diff --git a/sys/netinet/raw_ip.c b/sys/netinet/raw_ip.c index 635f08f3146a..e754b8850382 100644 --- a/sys/netinet/raw_ip.c +++ b/sys/netinet/raw_ip.c @@ -205,7 +205,8 @@ rip_init(void) { in_pcbinfo_init(&V_ripcbinfo, "rip", &V_ripcb, INP_PCBHASH_RAW_SIZE, - 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE); + 1, "ripcb", rip_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_NONE); EVENTHANDLER_REGISTER(maxsockets_change, rip_zone_change, NULL, EVENTHANDLER_PRI_ANY); } diff --git a/sys/netinet/tcp_subr.c b/sys/netinet/tcp_subr.c index 06854ec4f141..6ed589118d8b 100644 --- a/sys/netinet/tcp_subr.c +++ b/sys/netinet/tcp_subr.c @@ -300,7 +300,8 @@ tcp_init(void) hashsize = 512; /* safe default */ } in_pcbinfo_init(&V_tcbinfo, "tcp", &V_tcb, hashsize, hashsize, - "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE); + "tcp_inpcb", tcp_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_4TUPLE); /* * These have to be type stable for the benefit of the timers. diff --git a/sys/netinet/tcp_syncache.c b/sys/netinet/tcp_syncache.c index 5125134363c0..66e473262035 100644 --- a/sys/netinet/tcp_syncache.c +++ b/sys/netinet/tcp_syncache.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_pcbgroup.h" #include #include @@ -676,8 +677,14 @@ syncache_socket(struct syncache *sc, struct socket *lso, struct mbuf *m) #ifdef INET6 } #endif + + /* + * Install in the reservation hash table for now, but don't yet + * install a connection group since the full 4-tuple isn't yet + * configured. + */ inp->inp_lport = sc->sc_inc.inc_lport; - if ((error = in_pcbinshash(inp)) != 0) { + if ((error = in_pcbinshash_nopcbgroup(inp)) != 0) { /* * Undo the assignments above if we failed to * put the PCB on the hash lists. diff --git a/sys/netinet/udp_usrreq.c b/sys/netinet/udp_usrreq.c index fd864c097bcb..28eb8fd19f88 100644 --- a/sys/netinet/udp_usrreq.c +++ b/sys/netinet/udp_usrreq.c @@ -186,7 +186,8 @@ udp_init(void) { in_pcbinfo_init(&V_udbinfo, "udp", &V_udb, UDBHASHSIZE, UDBHASHSIZE, - "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE); + "udp_inpcb", udp_inpcb_init, NULL, UMA_ZONE_NOFREE, + IPI_HASHFIELDS_2TUPLE); V_udpcb_zone = uma_zcreate("udpcb", sizeof(struct udpcb), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(V_udpcb_zone, maxsockets); diff --git a/sys/netinet6/in6_pcb.c b/sys/netinet6/in6_pcb.c index da73f219d71e..d15c605b368e 100644 --- a/sys/netinet6/in6_pcb.c +++ b/sys/netinet6/in6_pcb.c @@ -70,6 +70,7 @@ __FBSDID("$FreeBSD$"); #include "opt_inet.h" #include "opt_inet6.h" #include "opt_ipsec.h" +#include "opt_pcbgroup.h" #include #include @@ -827,6 +828,141 @@ in6_rtchange(struct inpcb *inp, int errno) return inp; } +#ifdef PCBGROUP +/* + * Lookup PCB in hash list, using pcbgroup tables. + */ +static struct inpcb * +in6_pcblookup_group(struct inpcbinfo *pcbinfo, struct inpcbgroup *pcbgroup, + struct in6_addr *faddr, u_int fport_arg, struct in6_addr *laddr, + u_int lport_arg, int lookupflags, struct ifnet *ifp) +{ + struct inpcbhead *head; + struct inpcb *inp, *tmpinp; + u_short fport = fport_arg, lport = lport_arg; + int faith; + + if (faithprefix_p != NULL) + faith = (*faithprefix_p)(laddr); + else + faith = 0; + + /* + * First look for an exact match. + */ + tmpinp = NULL; + INP_GROUP_LOCK(pcbgroup); + head = &pcbgroup->ipg_hashbase[ + INP_PCBHASH(faddr->s6_addr32[3] /* XXX */, lport, fport, + pcbgroup->ipg_hashmask)]; + LIST_FOREACH(inp, head, inp_pcbgrouphash) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_faddr, faddr) && + IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr) && + inp->inp_fport == fport && + inp->inp_lport == lport) { + /* + * XXX We should be able to directly return + * the inp here, without any checks. + * Well unless both bound with SO_REUSEPORT? + */ + if (prison_flag(inp->inp_cred, PR_IP6)) + goto found; + if (tmpinp == NULL) + tmpinp = inp; + } + } + if (tmpinp != NULL) { + inp = tmpinp; + goto found; + } + + /* + * Then look for a wildcard match, if requested. + */ + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { + struct inpcb *local_wild = NULL, *local_exact = NULL; + struct inpcb *jail_wild = NULL; + int injail; + + /* + * Order of socket selection - we always prefer jails. + * 1. jailed, non-wild. + * 2. jailed, wild. + * 3. non-jailed, non-wild. + * 4. non-jailed, wild. + */ + head = &pcbinfo->ipi_wildbase[INP_PCBHASH(INADDR_ANY, lport, + 0, pcbinfo->ipi_wildmask)]; + LIST_FOREACH(inp, head, inp_pcbgroup_wild) { + /* XXX inp locking */ + if ((inp->inp_vflag & INP_IPV6) == 0) + continue; + + if (!IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_faddr) || + inp->inp_lport != lport) { + continue; + } + + /* XXX inp locking */ + if (faith && (inp->inp_flags & INP_FAITH) == 0) + continue; + + injail = prison_flag(inp->inp_cred, PR_IP6); + if (injail) { + if (prison_check_ip6(inp->inp_cred, + laddr) != 0) + continue; + } else { + if (local_exact != NULL) + continue; + } + + if (IN6_ARE_ADDR_EQUAL(&inp->in6p_laddr, laddr)) { + if (injail) + goto found; + else + local_exact = inp; + } else if (IN6_IS_ADDR_UNSPECIFIED(&inp->in6p_laddr)) { + if (injail) + jail_wild = inp; + else + local_wild = inp; + } + } /* LIST_FOREACH */ + + inp = jail_wild; + if (inp == NULL) + inp = jail_wild; + if (inp == NULL) + inp = local_exact; + if (inp == NULL) + inp = local_wild; + if (inp != NULL) + goto found; + } /* if ((lookupflags & INPLOOKUP_WILDCARD) != 0) */ + INP_GROUP_UNLOCK(pcbgroup); + return (NULL); + +found: + in_pcbref(inp); + INP_GROUP_UNLOCK(pcbgroup); + if (lookupflags & INPLOOKUP_WLOCKPCB) { + INP_WLOCK(inp); + if (in_pcbrele_wlocked(inp)) + return (NULL); + } else if (lookupflags & INPLOOKUP_RLOCKPCB) { + INP_RLOCK(inp); + if (in_pcbrele_rlocked(inp)) + return (NULL); + } else + panic("%s: locking buf", __func__); + return (inp); +} +#endif /* PCBGROUP */ + /* * Lookup PCB in hash list. */ @@ -983,16 +1119,30 @@ in6_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, /* * Public inpcb lookup routines, accepting a 4-tuple, and optionally, an mbuf * from which a pre-calculated hash value may be extracted. + * + * Possibly more of this logic should be in in6_pcbgroup.c. */ struct inpcb * in6_pcblookup(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp) { +#if defined(PCBGROUP) + struct inpcbgroup *pcbgroup; +#endif + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); +#if defined(PCBGROUP) + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } @@ -1002,11 +1152,28 @@ in6_pcblookup_mbuf(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, u_int fport, struct in6_addr *laddr, u_int lport, int lookupflags, struct ifnet *ifp, struct mbuf *m) { +#ifdef PCBGROUP + struct inpcbgroup *pcbgroup; +#endif + KASSERT((lookupflags & ~INPLOOKUP_MASK) == 0, ("%s: invalid lookup flags %d", __func__, lookupflags)); KASSERT((lookupflags & (INPLOOKUP_RLOCKPCB | INPLOOKUP_WLOCKPCB)) != 0, ("%s: LOCKPCB not set", __func__)); +#ifdef PCBGROUP + if (in_pcbgroup_enabled(pcbinfo)) { + pcbgroup = in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid); + if (pcbgroup != NULL) + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, + fport, laddr, lport, lookupflags, ifp)); + pcbgroup = in6_pcbgroup_bytuple(pcbinfo, laddr, lport, faddr, + fport); + return (in6_pcblookup_group(pcbinfo, pcbgroup, faddr, fport, + laddr, lport, lookupflags, ifp)); + } +#endif return (in6_pcblookup_hash(pcbinfo, faddr, fport, laddr, lport, lookupflags, ifp)); } diff --git a/sys/netinet6/in6_pcb.h b/sys/netinet6/in6_pcb.h index cf247043b507..8398d547dd6b 100644 --- a/sys/netinet6/in6_pcb.h +++ b/sys/netinet6/in6_pcb.h @@ -69,6 +69,16 @@ #define sin6tosa(sin6) ((struct sockaddr *)(sin6)) #define ifatoia6(ifa) ((struct in6_ifaddr *)(ifa)) +struct inpcbgroup * + in6_pcbgroup_byhash(struct inpcbinfo *, u_int, uint32_t); +struct inpcbgroup * + in6_pcbgroup_byinpcb __P((struct inpcb *)); +struct inpcbgroup * + in6_pcbgroup_bymbuf(struct inpcbinfo *, struct mbuf *); +struct inpcbgroup * + in6_pcbgroup_bytuple __P((struct inpcbinfo *, const struct in6_addr *, + u_short, const struct in6_addr *, u_short)); + void in6_pcbpurgeif0 __P((struct inpcbinfo *, struct ifnet *)); void in6_losing __P((struct inpcb *)); int in6_pcbbind __P((struct inpcb *, struct sockaddr *, struct ucred *)); diff --git a/sys/netinet6/in6_pcbgroup.c b/sys/netinet6/in6_pcbgroup.c new file mode 100644 index 000000000000..850d7f471560 --- /dev/null +++ b/sys/netinet6/in6_pcbgroup.c @@ -0,0 +1,103 @@ +/*- + * Copyright (c) 2010-2011 Juniper Networks, Inc. + * All rights reserved. + * + * This software was developed by Robert N. M. Watson under contract + * to Juniper Networks, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include + +__FBSDID("$FreeBSD$"); + +#include "opt_inet6.h" + +#include +#include + +#include +#include +#ifdef INET6 +#include +#endif /* INET6 */ + +/* + * Given a hash of whatever the covered tuple might be, return a pcbgroup + * index. + */ +static __inline u_int +in6_pcbgroup_getbucket(struct inpcbinfo *pcbinfo, uint32_t hash) +{ + + return (hash % pcbinfo->ipi_npcbgroups); +} + +/* + * Map a (hashtype, hash) tuple into a connection group, or NULL if the hash + * information is insufficient to identify the pcbgroup. + */ +struct inpcbgroup * +in6_pcbgroup_byhash(struct inpcbinfo *pcbinfo, u_int hashtype, uint32_t hash) +{ + + return (NULL); +} + +struct inpcbgroup * +in6_pcbgroup_bymbuf(struct inpcbinfo *pcbinfo, struct mbuf *m) +{ + + return (in6_pcbgroup_byhash(pcbinfo, M_HASHTYPE_GET(m), + m->m_pkthdr.flowid)); +} + +struct inpcbgroup * +in6_pcbgroup_bytuple(struct inpcbinfo *pcbinfo, const struct in6_addr *laddrp, + u_short lport, const struct in6_addr *faddrp, u_short fport) +{ + uint32_t hash; + + switch (pcbinfo->ipi_hashfields) { + case IPI_HASHFIELDS_4TUPLE: + hash = faddrp->s6_addr32[3] ^ fport; + break; + + case IPI_HASHFIELDS_2TUPLE: + hash = faddrp->s6_addr32[3] ^ laddrp->s6_addr32[3]; + break; + + default: + hash = 0; + } + return (&pcbinfo->ipi_pcbgroups[in6_pcbgroup_getbucket(pcbinfo, + hash)]); +} + +struct inpcbgroup * +in6_pcbgroup_byinpcb(struct inpcb *inp) +{ + + return (in6_pcbgroup_bytuple(inp->inp_pcbinfo, &inp->in6p_laddr, + inp->inp_lport, &inp->in6p_faddr, inp->inp_fport)); +} diff --git a/sys/netinet6/ip6_ipsec.c b/sys/netinet6/ip6_ipsec.c index 8731e1261f99..bbbc9c99b1ec 100644 --- a/sys/netinet6/ip6_ipsec.c +++ b/sys/netinet6/ip6_ipsec.c @@ -97,7 +97,7 @@ SYSCTL_VNET_INT(_net_inet6_ipsec6, OID_AUTO, /* * Check if we have to jump over firewall processing for this packet. - * Called from ip_input(). + * Called from ip6_input(). * 1 = jump over firewall, 0 = packet goes through firewall. */ int @@ -106,7 +106,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m) #if defined(IPSEC) /* - * Bypass packet filtering for packets from a tunnel. + * Bypass packet filtering for packets previously handled by IPsec. */ if (!V_ip6_ipsec6_filtertunnel && m_tag_find(m, PACKET_TAG_IPSEC_IN_DONE, NULL) != NULL) @@ -118,7 +118,7 @@ ip6_ipsec_filtertunnel(struct mbuf *m) /* * Check if this packet has an active SA and needs to be dropped instead * of forwarded. - * Called from ip_input(). + * Called from ip6_input(). * 1 = drop packet, 0 = forward packet. */ int @@ -141,7 +141,7 @@ ip6_ipsec_fwd(struct mbuf *m) if (sp == NULL) { /* NB: can happen if error */ splx(s); /*XXX error stat???*/ - DPRINTF(("ip_input: no SP for forwarding\n")); /*XXX*/ + DPRINTF(("%s: no SP for forwarding\n", __func__)); /*XXX*/ return 1; } @@ -163,7 +163,7 @@ ip6_ipsec_fwd(struct mbuf *m) * Check if protocol type doesn't have a further header and do IPSEC * decryption or reject right now. Protocols with further headers get * their IPSEC treatment within the protocol specific processing. - * Called from ip_input(). + * Called from ip6_input(). * 1 = drop packet, 0 = continue processing packet. */ int @@ -206,7 +206,7 @@ ip6_ipsec_input(struct mbuf *m, int nxt) } else { /* XXX error stat??? */ error = EINVAL; - DPRINTF(("ip_input: no SP, packet discarded\n"));/*XXX*/ + DPRINTF(("%s: no SP, packet discarded\n", __func__));/*XXX*/ return 1; } splx(s); diff --git a/sys/ofed/include/linux/list.h b/sys/ofed/include/linux/list.h index f6f9404307c8..61b42d242a43 100644 --- a/sys/ofed/include/linux/list.h +++ b/sys/ofed/include/linux/list.h @@ -38,6 +38,7 @@ #include #include #include +#include #include #include #include diff --git a/sys/pc98/pc98/machdep.c b/sys/pc98/pc98/machdep.c index 853ac69911b2..8bcb618759ac 100644 --- a/sys/pc98/pc98/machdep.c +++ b/sys/pc98/pc98/machdep.c @@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$"); #include "opt_isa.h" #include "opt_kstack_pages.h" #include "opt_maxmem.h" +#include "opt_mp_watchdog.h" #include "opt_npx.h" #include "opt_perfmon.h" @@ -115,6 +116,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -1193,9 +1195,8 @@ cpu_idle(int busy) CTR2(KTR_SPARE2, "cpu_idle(%d) at %d", busy, curcpu); -#ifdef SMP - if (mp_grab_cpu_hlt()) - return; +#ifdef MP_WATCHDOG + ap_watchdog(PCPU_GET(cpuid)); #endif /* If we are busy - try to use fast methods. */ if (busy) { diff --git a/sys/powerpc/aim/mmu_oea.c b/sys/powerpc/aim/mmu_oea.c index 51c6f8a0dc90..be80455dab6c 100644 --- a/sys/powerpc/aim/mmu_oea.c +++ b/sys/powerpc/aim/mmu_oea.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -820,7 +823,7 @@ moea_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) PMAP_LOCK_INIT(kernel_pmap); for (i = 0; i < 16; i++) kernel_pmap->pm_sr[i] = EMPTY_SEGMENT + i; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /* * Set up the Open Firmware mappings @@ -942,7 +945,9 @@ moea_activate(mmu_t mmu, struct thread *td) pm = &td->td_proc->p_vmspace->vm_pmap; pmr = pm->pmap_phys; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, pmr); } @@ -952,7 +957,9 @@ moea_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~PCPU_GET(cpumask); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } diff --git a/sys/powerpc/aim/mmu_oea64.c b/sys/powerpc/aim/mmu_oea64.c index 12a120162ecd..291d89b3e760 100644 --- a/sys/powerpc/aim/mmu_oea64.c +++ b/sys/powerpc/aim/mmu_oea64.c @@ -118,11 +118,14 @@ __FBSDID("$FreeBSD$"); #include #include +#include +#include #include #include #include #include #include +#include #include #include #include @@ -827,7 +830,7 @@ moea64_mid_bootstrap(mmu_t mmup, vm_offset_t kernelstart, vm_offset_t kernelend) #endif kernel_pmap->pmap_phys = kernel_pmap; - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); PMAP_LOCK_INIT(kernel_pmap); @@ -995,7 +998,9 @@ moea64_activate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active |= PCPU_GET(cpumask); + sched_pin(); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, pm->pm_slb); @@ -1010,7 +1015,9 @@ moea64_deactivate(mmu_t mmu, struct thread *td) pmap_t pm; pm = &td->td_proc->p_vmspace->vm_pmap; - pm->pm_active &= ~(PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND(&pm->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); #ifdef __powerpc64__ PCPU_SET(userslb, NULL); #else diff --git a/sys/powerpc/booke/platform_bare.c b/sys/powerpc/booke/platform_bare.c index 90c73e07f01e..d76664e7f925 100644 --- a/sys/powerpc/booke/platform_bare.c +++ b/sys/powerpc/booke/platform_bare.c @@ -256,7 +256,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) int timeout; eebpcr = ccsr_read4(OCP85XX_EEBPCR); - if ((eebpcr & (pc->pc_cpumask << 24)) != 0) { + if ((eebpcr & (1 << (pc->pc_cpuid + 24))) != 0) { printf("%s: CPU=%d already out of hold-off state!\n", __func__, pc->pc_cpuid); return (ENXIO); @@ -274,7 +274,7 @@ bare_smp_start_cpu(platform_t plat, struct pcpu *pc) /* * Release AP from hold-off state */ - eebpcr |= (pc->pc_cpumask << 24); + eebpcr |= (1 << (pc->pc_cpuid + 24)); ccsr_write4(OCP85XX_EEBPCR, eebpcr); __asm __volatile("isync; msync"); diff --git a/sys/powerpc/booke/pmap.c b/sys/powerpc/booke/pmap.c index cabe58fef611..e1cd071db40d 100644 --- a/sys/powerpc/booke/pmap.c +++ b/sys/powerpc/booke/pmap.c @@ -63,6 +63,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -1225,7 +1226,7 @@ mmu_booke_bootstrap(mmu_t mmu, vm_offset_t start, vm_offset_t kernelend) PTE_VALID; } /* Mark kernel_pmap active on all CPUs */ - kernel_pmap->pm_active = ~0; + CPU_FILL(&kernel_pmap->pm_active); /*******************************************************/ /* Final setup */ @@ -1480,7 +1481,7 @@ mmu_booke_pinit(mmu_t mmu, pmap_t pmap) PMAP_LOCK_INIT(pmap); for (i = 0; i < MAXCPU; i++) pmap->pm_tid[i] = TID_NONE; - pmap->pm_active = 0; + CPU_ZERO(&kernel_pmap->pm_active); bzero(&pmap->pm_stats, sizeof(pmap->pm_stats)); bzero(&pmap->pm_pdir, sizeof(pte_t *) * PDIR_NENTRIES); TAILQ_INIT(&pmap->pm_ptbl_list); @@ -1835,7 +1836,7 @@ mmu_booke_activate(mmu_t mmu, struct thread *td) mtx_lock_spin(&sched_lock); - atomic_set_int(&pmap->pm_active, PCPU_GET(cpumask)); + CPU_OR_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); PCPU_SET(curpmap, pmap); if (pmap->pm_tid[PCPU_GET(cpuid)] == TID_NONE) @@ -1864,7 +1865,9 @@ mmu_booke_deactivate(mmu_t mmu, struct thread *td) CTR5(KTR_PMAP, "%s: td=%p, proc = '%s', id = %d, pmap = 0x%08x", __func__, td, td->td_proc->p_comm, td->td_proc->p_pid, pmap); - atomic_clear_int(&pmap->pm_active, PCPU_GET(cpumask)); + sched_pin(); + CPU_NAND_ATOMIC(&pmap->pm_active, PCPU_PTR(cpumask)); + sched_unpin(); PCPU_SET(curpmap, NULL); } diff --git a/sys/powerpc/include/_types.h b/sys/powerpc/include/_types.h index fae241676ad3..b0b582e7080b 100644 --- a/sys/powerpc/include/_types.h +++ b/sys/powerpc/include/_types.h @@ -72,7 +72,6 @@ typedef unsigned long long __uint64_t; * Standard type definitions. */ typedef __uint32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef double __double_t; typedef double __float_t; #ifdef __LP64__ diff --git a/sys/powerpc/include/openpicvar.h b/sys/powerpc/include/openpicvar.h index 4fb9aa73bba4..605dc0f8daa3 100644 --- a/sys/powerpc/include/openpicvar.h +++ b/sys/powerpc/include/openpicvar.h @@ -57,7 +57,7 @@ int openpic_common_attach(device_t, uint32_t); /* * PIC interface. */ -void openpic_bind(device_t dev, u_int irq, cpumask_t cpumask); +void openpic_bind(device_t dev, u_int irq, cpuset_t cpumask); void openpic_config(device_t, u_int, enum intr_trigger, enum intr_polarity); void openpic_dispatch(device_t, struct trapframe *); void openpic_enable(device_t, u_int, u_int); diff --git a/sys/powerpc/include/pmap.h b/sys/powerpc/include/pmap.h index 369ca9d97d60..9166d04ba7d9 100644 --- a/sys/powerpc/include/pmap.h +++ b/sys/powerpc/include/pmap.h @@ -66,6 +66,7 @@ #include #include +#include #include #include #include @@ -98,7 +99,7 @@ struct pmap { #else register_t pm_sr[16]; #endif - cpumask_t pm_active; + cpuset_t pm_active; struct pmap *pmap_phys; struct pmap_statistics pm_stats; @@ -175,7 +176,7 @@ void slb_free_user_cache(struct slb **); struct pmap { struct mtx pm_mtx; /* pmap mutex */ tlbtid_t pm_tid[MAXCPU]; /* TID to identify this pmap entries in TLB */ - cpumask_t pm_active; /* active on cpus */ + cpuset_t pm_active; /* active on cpus */ struct pmap_statistics pm_stats; /* pmap statistics */ /* Page table directory, array of pointers to page tables. */ diff --git a/sys/powerpc/include/smp.h b/sys/powerpc/include/smp.h index cf952788c680..32fcfb4b8f60 100644 --- a/sys/powerpc/include/smp.h +++ b/sys/powerpc/include/smp.h @@ -40,9 +40,11 @@ #ifndef LOCORE +#include + void ipi_all_but_self(int ipi); void ipi_cpu(int cpu, u_int ipi); -void ipi_selected(cpumask_t cpus, int ipi); +void ipi_selected(cpuset_t cpus, int ipi); struct cpuref { uintptr_t cr_hwref; diff --git a/sys/powerpc/mpc85xx/openpic_fdt.c b/sys/powerpc/mpc85xx/openpic_fdt.c index 7cf18eaaff90..1cd936956281 100644 --- a/sys/powerpc/mpc85xx/openpic_fdt.c +++ b/sys/powerpc/mpc85xx/openpic_fdt.c @@ -37,11 +37,12 @@ __FBSDID("$FreeBSD$"); #include #include -#include #include #include +#include + #include "pic_if.h" static int openpic_fdt_probe(device_t); diff --git a/sys/powerpc/powerpc/intr_machdep.c b/sys/powerpc/powerpc/intr_machdep.c index f2bfa33f2615..1e6342cb754a 100644 --- a/sys/powerpc/powerpc/intr_machdep.c +++ b/sys/powerpc/powerpc/intr_machdep.c @@ -67,6 +67,7 @@ #include #include #include +#include #include #include #include @@ -98,7 +99,7 @@ struct powerpc_intr { u_int intline; u_int vector; u_int cntindex; - cpumask_t cpu; + cpuset_t cpu; enum intr_trigger trig; enum intr_polarity pol; }; @@ -205,7 +206,7 @@ intr_lookup(u_int irq) #ifdef SMP i->cpu = all_cpus; #else - i->cpu = 1; + CPU_SETOF(0, &i->cpu); #endif for (vector = 0; vector < INTR_VECTORS && vector <= nvectors; @@ -296,7 +297,7 @@ powerpc_assign_intr_cpu(void *arg, u_char cpu) if (cpu == NOCPU) i->cpu = all_cpus; else - i->cpu = 1 << cpu; + CPU_SETOF(cpu, &i->cpu); if (!cold && i->pic != NULL && i->pic == root_pic) PIC_BIND(i->pic, i->intline, i->cpu); diff --git a/sys/powerpc/powerpc/mp_machdep.c b/sys/powerpc/powerpc/mp_machdep.c index 577d4dcbe153..62a97e925050 100644 --- a/sys/powerpc/powerpc/mp_machdep.c +++ b/sys/powerpc/powerpc/mp_machdep.c @@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include #include @@ -157,7 +158,7 @@ cpu_mp_start(void) cpu.cr_cpuid); goto next; } - if (all_cpus & (1 << cpu.cr_cpuid)) { + if (CPU_ISSET(cpu.cr_cpuid, &all_cpus)) { printf("SMP: cpu%d: skipped - duplicate ID\n", cpu.cr_cpuid); goto next; @@ -174,9 +175,9 @@ cpu_mp_start(void) pc->pc_cpuid = bsp.cr_cpuid; pc->pc_bsp = 1; } - pc->pc_cpumask = 1 << pc->pc_cpuid; + CPU_SETOF(pc->pc_cpuid, &pc->pc_cpumask); pc->pc_hwref = cpu.cr_hwref; - all_cpus |= pc->pc_cpumask; + CPU_OR(&all_cpus, &pc->pc_cpumask); next: error = platform_smp_next_cpu(&cpu); } @@ -214,7 +215,8 @@ cpu_mp_unleash(void *dummy) smp_cpus = 0; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { cpus++; - pc->pc_other_cpus = all_cpus & ~pc->pc_cpumask; + pc->pc_other_cpus = all_cpus; + CPU_NAND(&pc->pc_other_cpus, &pc->pc_cpumask); if (!pc->pc_bsp) { if (bootverbose) printf("Waking up CPU %d (dev=%x)\n", @@ -236,7 +238,7 @@ cpu_mp_unleash(void *dummy) pc->pc_cpuid, pc->pc_pir, pc->pc_awake); smp_cpus++; } else - stopped_cpus |= (1 << pc->pc_cpuid); + CPU_SET(pc->pc_cpuid, &stopped_cpus); } ap_awake = 1; @@ -276,7 +278,7 @@ SYSINIT(start_aps, SI_SUB_SMP, SI_ORDER_FIRST, cpu_mp_unleash, NULL); int powerpc_ipi_handler(void *arg) { - cpumask_t self; + cpuset_t self; uint32_t ipimask; int msg; @@ -311,11 +313,11 @@ powerpc_ipi_handler(void *arg) savectx(&stoppcbs[PCPU_GET(cpuid)]); self = PCPU_GET(cpumask); savectx(PCPU_GET(curpcb)); - atomic_set_int(&stopped_cpus, self); - while ((started_cpus & self) == 0) + CPU_OR_ATOMIC(&stopped_cpus, &self); + while (!CPU_OVERLAP(&started_cpus, &self)) cpu_spinwait(); - atomic_clear_int(&started_cpus, self); - atomic_clear_int(&stopped_cpus, self); + CPU_NAND_ATOMIC(&started_cpus, &self); + CPU_NAND_ATOMIC(&stopped_cpus, &self); CTR1(KTR_SMP, "%s: IPI_STOP (restart)", __func__); break; case IPI_HARDCLOCK: @@ -343,12 +345,12 @@ ipi_send(struct pcpu *pc, int ipi) /* Send an IPI to a set of cpus. */ void -ipi_selected(cpumask_t cpus, int ipi) +ipi_selected(cpuset_t cpus, int ipi) { struct pcpu *pc; STAILQ_FOREACH(pc, &cpuhead, pc_allcpu) { - if (cpus & pc->pc_cpumask) + if (CPU_OVERLAP(&cpus, &pc->pc_cpumask)) ipi_send(pc, ipi); } } diff --git a/sys/powerpc/powerpc/openpic.c b/sys/powerpc/powerpc/openpic.c index 042f8b846fa1..347dc3fb4eeb 100644 --- a/sys/powerpc/powerpc/openpic.c +++ b/sys/powerpc/powerpc/openpic.c @@ -231,7 +231,7 @@ openpic_common_attach(device_t dev, uint32_t node) */ void -openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) +openpic_bind(device_t dev, u_int irq, cpuset_t cpumask) { struct openpic_softc *sc; @@ -240,7 +240,12 @@ openpic_bind(device_t dev, u_int irq, cpumask_t cpumask) return; sc = device_get_softc(dev); - openpic_write(sc, OPENPIC_IDEST(irq), cpumask); + + /* + * XXX: openpic_write() is very special and just needs a 32 bits mask. + * For the moment, just play dirty and get the first half word. + */ + openpic_write(sc, OPENPIC_IDEST(irq), cpumask.__bits[0] & 0xffffffff); } void diff --git a/sys/powerpc/powerpc/pic_if.m b/sys/powerpc/powerpc/pic_if.m index 185cc0887796..e429d31f5038 100644 --- a/sys/powerpc/powerpc/pic_if.m +++ b/sys/powerpc/powerpc/pic_if.m @@ -28,6 +28,7 @@ # #include +#include #include INTERFACE pic; @@ -35,7 +36,7 @@ INTERFACE pic; METHOD void bind { device_t dev; u_int irq; - cpumask_t cpumask; + cpuset_t cpumask; }; METHOD void config { diff --git a/sys/sparc64/include/_types.h b/sys/sparc64/include/_types.h index f810c159a6ef..7e993c4c370a 100644 --- a/sys/sparc64/include/_types.h +++ b/sys/sparc64/include/_types.h @@ -55,7 +55,6 @@ typedef unsigned long __uint64_t; * Standard type definitions. */ typedef __int32_t __clock_t; /* clock()... */ -typedef unsigned int __cpumask_t; typedef __int64_t __critical_t; typedef double __double_t; typedef float __float_t; diff --git a/sys/sparc64/include/ktr.h b/sys/sparc64/include/ktr.h index 5948ba29fb3e..f13865f1a27e 100644 --- a/sys/sparc64/include/ktr.h +++ b/sys/sparc64/include/ktr.h @@ -40,16 +40,6 @@ #else -#define AND(var, mask, r1, r2) \ - SET(var, r2, r1) ; \ - lduw [r1], r2 ; \ - and r2, mask, r1 - -#define TEST(var, mask, r1, r2, l1) \ - AND(var, mask, r1, r2) ; \ - brz r1, l1 ## f ; \ - nop - /* * XXX could really use another register... */ @@ -79,13 +69,37 @@ l2: add r2, 1, r3 ; \ SET(l1 ## b, r3, r2) ; \ stx r2, [r1 + KTR_DESC] +/* + * NB: this clobbers %y. + */ #define CATR(mask, desc, r1, r2, r3, l1, l2, l3) \ set mask, r1 ; \ - TEST(ktr_mask, r1, r2, r2, l3) ; \ - lduw [PCPU(MID)], r1 ; \ + SET(ktr_mask, r3, r2) ; \ + lduw [r2], r2 ; \ + and r2, r1, r1 ; \ + brz r1, l3 ## f ; \ + nop ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r2 ; \ + srl r2, 0, r2 ; \ + sllx r2, PTR_SHIFT, r2 ; \ + SET(ktr_cpumask, r3, r1) ; \ + ldx [r1 + r2], r1 ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r2 ; \ + srl r2, 0, r2 ; \ + smul r2, r3, r3 ; \ + lduw [PCPU(CPUID)], r2 ; \ + sub r2, r3, r3 ; \ mov 1, r2 ; \ - sllx r2, r1, r1 ; \ - TEST(ktr_cpumask, r1, r2, r3, l3) ; \ + sllx r2, r3, r2 ; \ + andn r1, r2, r1 ; \ + brz r1, l3 ## f ; \ + nop ; \ ATR(desc, r1, r2, r3, l1, l2) #endif /* LOCORE */ diff --git a/sys/sparc64/include/pmap.h b/sys/sparc64/include/pmap.h index e16ea9776029..adad2575598e 100644 --- a/sys/sparc64/include/pmap.h +++ b/sys/sparc64/include/pmap.h @@ -40,6 +40,7 @@ #define _MACHINE_PMAP_H_ #include +#include #include #include #include @@ -61,7 +62,7 @@ struct pmap { struct mtx pm_mtx; struct tte *pm_tsb; vm_object_t pm_tsb_obj; - cpumask_t pm_active; + cpuset_t pm_active; u_int pm_context[MAXCPU]; struct pmap_statistics pm_stats; }; diff --git a/sys/sparc64/include/smp.h b/sys/sparc64/include/smp.h index 3ca8e0380444..1ba0d9e9f137 100644 --- a/sys/sparc64/include/smp.h +++ b/sys/sparc64/include/smp.h @@ -38,6 +38,7 @@ #ifndef LOCORE +#include #include #include @@ -76,17 +77,17 @@ struct cpu_start_args { }; struct ipi_cache_args { - cpumask_t ica_mask; + cpuset_t ica_mask; vm_paddr_t ica_pa; }; struct ipi_rd_args { - cpumask_t ira_mask; + cpuset_t ira_mask; register_t *ira_val; }; struct ipi_tlb_args { - cpumask_t ita_mask; + cpuset_t ita_mask; struct pmap *ita_pmap; u_long ita_start; u_long ita_end; @@ -100,7 +101,7 @@ extern struct pcb stoppcbs[]; void cpu_mp_bootstrap(struct pcpu *pc); void cpu_mp_shutdown(void); -typedef void cpu_ipi_selected_t(u_int, u_long, u_long, u_long); +typedef void cpu_ipi_selected_t(cpuset_t, u_long, u_long, u_long); extern cpu_ipi_selected_t *cpu_ipi_selected; typedef void cpu_ipi_single_t(u_int, u_long, u_long, u_long); extern cpu_ipi_single_t *cpu_ipi_single; @@ -140,7 +141,7 @@ ipi_all_but_self(u_int ipi) } static __inline void -ipi_selected(u_int cpus, u_int ipi) +ipi_selected(cpuset_t cpus, u_int ipi) { cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_level, ipi); @@ -197,7 +198,8 @@ ipi_rd(u_int cpu, void *func, u_long *val) sched_pin(); ira = &ipi_rd_args; mtx_lock_spin(&ipi_mtx); - ira->ira_mask = 1 << cpu | PCPU_GET(cpumask); + ira->ira_mask = PCPU_GET(cpumask); + CPU_SET(cpu, &ira->ira_mask); ira->ira_val = val; cpu_ipi_single(cpu, 0, (u_long)func, (u_long)ira); return (&ira->ira_mask); @@ -207,18 +209,21 @@ static __inline void * ipi_tlb_context_demap(struct pmap *pm) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_context_demap, (u_long)ita); @@ -229,18 +234,21 @@ static __inline void * ipi_tlb_page_demap(struct pmap *pm, vm_offset_t va) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; ita->ita_va = va; cpu_ipi_selected(cpus, 0, (u_long)tl_ipi_tlb_page_demap, (u_long)ita); @@ -251,18 +259,21 @@ static __inline void * ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) { struct ipi_tlb_args *ita; - cpumask_t cpus; + cpuset_t cpus; if (smp_cpus == 1) return (NULL); sched_pin(); - if ((cpus = (pm->pm_active & PCPU_GET(other_cpus))) == 0) { + cpus = pm->pm_active; + CPU_AND(&cpus, PCPU_PTR(other_cpus)); + if (CPU_EMPTY(&cpus)) { sched_unpin(); return (NULL); } ita = &ipi_tlb_args; mtx_lock_spin(&ipi_mtx); - ita->ita_mask = cpus | PCPU_GET(cpumask); + ita->ita_mask = cpus; + CPU_OR(&ita->ita_mask, PCPU_PTR(cpumask)); ita->ita_pmap = pm; ita->ita_start = start; ita->ita_end = end; @@ -274,11 +285,11 @@ ipi_tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) static __inline void ipi_wait(void *cookie) { - volatile cpumask_t *mask; + volatile cpuset_t *mask; if ((mask = cookie) != NULL) { - atomic_clear_int(mask, PCPU_GET(cpumask)); - while (*mask != 0) + CPU_NAND_ATOMIC(mask, PCPU_PTR(cpumask)); + while (!CPU_EMPTY(mask)) ; mtx_unlock_spin(&ipi_mtx); sched_unpin(); diff --git a/sys/sparc64/sparc64/exception.S b/sys/sparc64/sparc64/exception.S index ed0e3810e259..0b8a0faf78ce 100644 --- a/sys/sparc64/sparc64/exception.S +++ b/sys/sparc64/sparc64/exception.S @@ -1280,6 +1280,7 @@ ENTRY(tl1_data_excptn_trap) END(tl1_data_excptn_trap) .macro tl1_align + wrpr %g0, PSTATE_ALT, %pstate ba,a %xcc, tl1_align_trap nop .align 32 @@ -1289,7 +1290,7 @@ ENTRY(tl1_align_trap) RESUME_SPILLFILL_ALIGN ba %xcc, tl1_sfsr_trap mov T_MEM_ADDRESS_NOT_ALIGNED | T_KERNEL, %g2 -END(tl1_data_excptn_trap) +END(tl1_align_trap) ENTRY(tl1_sfsr_trap) wr %g0, ASI_DMMU, %asi @@ -2615,9 +2616,9 @@ ENTRY(tl0_ret) andn %l4, TSTATE_CWP_MASK, %g2 /* - * Restore %y. Could also be below if we had more alternate globals. + * Save %y in an alternate global. */ - wr %l5, 0, %y + mov %l5, %g4 /* * Setup %wstate for return. We need to restore the user window state @@ -2662,8 +2663,8 @@ tl0_ret_fill: * Fixup %tstate so the saved %cwp points to the current window and * restore it. */ - rdpr %cwp, %g4 - wrpr %g2, %g4, %tstate + rdpr %cwp, %g1 + wrpr %g2, %g1, %tstate /* * Restore the user window state. The transition bit was set above @@ -2673,19 +2674,24 @@ tl0_ret_fill: #if KTR_COMPILE & KTR_TRAP CATR(KTR_TRAP, "tl0_ret: td=%#lx pil=%#lx pc=%#lx npc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tnpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tnpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif + /* + * Restore %y. Note that the CATR above clobbered it. + */ + wr %g4, 0, %y + /* * Return to usermode. */ @@ -2697,9 +2703,14 @@ tl0_ret_fill_end: , %l0, %l1, %l2, 7, 8, 9) rdpr %pstate, %l1 stx %l1, [%l0 + KTR_PARM1] - stx %l5, [%l0 + KTR_PARM2] + stx %l6, [%l0 + KTR_PARM2] stx %sp, [%l0 + KTR_PARM3] 9: + + /* + * Restore %y clobbered by the CATR. This was saved in %l5 above. + */ + wr %l5, 0, %y #endif /* @@ -2867,34 +2878,36 @@ ENTRY(tl1_ret) andn %l0, TSTATE_CWP_MASK, %g1 mov %l1, %g2 mov %l2, %g3 + mov %l4, %g4 wrpr %l3, 0, %pil - wr %l4, 0, %y restore wrpr %g0, 2, %tl - rdpr %cwp, %g4 - wrpr %g1, %g4, %tstate wrpr %g2, 0, %tpc wrpr %g3, 0, %tnpc + rdpr %cwp, %g2 + wrpr %g1, %g2, %tstate #if KTR_COMPILE & KTR_TRAP CATR(KTR_TRAP, "tl1_ret: td=%#lx pil=%#lx ts=%#lx pc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tstate, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tstate, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif + wr %g4, 0, %y + retry END(tl1_ret) @@ -2995,33 +3008,35 @@ ENTRY(tl1_intr) andn %l0, TSTATE_CWP_MASK, %g1 mov %l1, %g2 mov %l2, %g3 + mov %l4, %g4 wrpr %l3, 0, %pil - wr %l4, 0, %y restore wrpr %g0, 2, %tl - rdpr %cwp, %g4 - wrpr %g1, %g4, %tstate wrpr %g2, 0, %tpc wrpr %g3, 0, %tnpc + rdpr %cwp, %g2 + wrpr %g1, %g2, %tstate #if KTR_COMPILE & KTR_INTR CATR(KTR_INTR, "tl1_intr: td=%#x pil=%#lx ts=%#lx pc=%#lx sp=%#lx" - , %g2, %g3, %g4, 7, 8, 9) - ldx [PCPU(CURTHREAD)], %g3 - stx %g3, [%g2 + KTR_PARM1] - rdpr %pil, %g3 - stx %g3, [%g2 + KTR_PARM2] - rdpr %tstate, %g3 - stx %g3, [%g2 + KTR_PARM3] - rdpr %tpc, %g3 - stx %g3, [%g2 + KTR_PARM4] - stx %sp, [%g2 + KTR_PARM5] + , %g1, %g2, %g3, 7, 8, 9) + ldx [PCPU(CURTHREAD)], %g2 + stx %g2, [%g1 + KTR_PARM1] + rdpr %pil, %g2 + stx %g2, [%g1 + KTR_PARM2] + rdpr %tstate, %g2 + stx %g2, [%g1 + KTR_PARM3] + rdpr %tpc, %g2 + stx %g2, [%g1 + KTR_PARM4] + stx %sp, [%g1 + KTR_PARM5] 9: #endif + wr %g4, 0, %y + retry END(tl1_intr) diff --git a/sys/sparc64/sparc64/genassym.c b/sys/sparc64/sparc64/genassym.c index e33e581bb91a..89ec718b94a2 100644 --- a/sys/sparc64/sparc64/genassym.c +++ b/sys/sparc64/sparc64/genassym.c @@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$"); #include #include #include +#include #include #include @@ -59,6 +60,8 @@ ASSYM(PCPU_PAGES, PCPU_PAGES); ASSYM(TAR_VPN_SHIFT, TAR_VPN_SHIFT); +ASSYM(_NCPUBITS, _NCPUBITS); + #ifdef SUN4U ASSYM(TLB_DEMAP_ALL, TLB_DEMAP_ALL); #endif @@ -137,7 +140,6 @@ ASSYM(MAXCOMLEN, MAXCOMLEN); ASSYM(PC_CURTHREAD, offsetof(struct pcpu, pc_curthread)); ASSYM(PC_CURPCB, offsetof(struct pcpu, pc_curpcb)); ASSYM(PC_CPUID, offsetof(struct pcpu, pc_cpuid)); -ASSYM(PC_CPUMASK, offsetof(struct pcpu, pc_cpumask)); ASSYM(PC_IRHEAD, offsetof(struct pcpu, pc_irhead)); ASSYM(PC_IRTAIL, offsetof(struct pcpu, pc_irtail)); ASSYM(PC_IRFREE, offsetof(struct pcpu, pc_irfree)); diff --git a/sys/sparc64/sparc64/intr_machdep.c b/sys/sparc64/sparc64/intr_machdep.c index f6ef9a7bf113..ed301826693c 100644 --- a/sys/sparc64/sparc64/intr_machdep.c +++ b/sys/sparc64/sparc64/intr_machdep.c @@ -445,8 +445,7 @@ intr_describe(int vec, void *ih, const char *descr) * allocate CPUs round-robin. */ -/* The BSP is always a valid target. */ -static cpumask_t intr_cpus = (1 << 0); +static cpuset_t intr_cpus; static int current_cpu; static void @@ -468,7 +467,7 @@ intr_assign_next_cpu(struct intr_vector *iv) current_cpu++; if (current_cpu > mp_maxid) current_cpu = 0; - } while (!(intr_cpus & (1 << current_cpu))); + } while (!CPU_ISSET(current_cpu, &intr_cpus)); } /* Attempt to bind the specified IRQ to the specified CPU. */ @@ -504,7 +503,7 @@ intr_add_cpu(u_int cpu) if (bootverbose) printf("INTR: Adding CPU %d as a target\n", cpu); - intr_cpus |= (1 << cpu); + CPU_SET(cpu, &intr_cpus); } /* @@ -518,6 +517,9 @@ intr_shuffle_irqs(void *arg __unused) struct intr_vector *iv; int i; + /* The BSP is always a valid target. */ + CPU_SETOF(0, &intr_cpus); + /* Don't bother on UP. */ if (mp_ncpus == 1) return; diff --git a/sys/sparc64/sparc64/mp_exception.S b/sys/sparc64/sparc64/mp_exception.S index 5a8a1054c859..f1b323aeebed 100644 --- a/sys/sparc64/sparc64/mp_exception.S +++ b/sys/sparc64/sparc64/mp_exception.S @@ -38,9 +38,21 @@ __FBSDID("$FreeBSD$"); .register %g2, #ignore .register %g3, #ignore -#define IPI_DONE(r1, r2, r3, r4) \ - lduw [PCPU(CPUMASK)], r4 ; \ - ATOMIC_CLEAR_INT(r1, r2, r3, r4) +#define IPI_DONE(r1, r2, r3, r4, r5, r6) \ + rd %y, r6 ; \ + lduw [PCPU(CPUID)], r2 ; \ + mov _NCPUBITS, r3 ; \ + mov %g0, %y ; \ + udiv r2, r3, r4 ; \ + srl r4, 0, r5 ; \ + sllx r5, PTR_SHIFT, r5 ; \ + add r1, r5, r1 ; \ + smul r4, r3, r3 ; \ + sub r2, r3, r3 ; \ + mov 1, r4 ; \ + sllx r4, r3, r4 ; \ + wr r6, %y ; \ + ATOMIC_CLEAR_LONG(r1, r2, r3, r4) /* * Invalidate a physical page in the data cache. For UltraSPARC I and II. @@ -77,7 +89,7 @@ ENTRY(tl_ipi_spitfire_dcache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_spitfire_dcache_page_inval) @@ -117,7 +129,7 @@ ENTRY(tl_ipi_spitfire_icache_page_inval) 2: brgz,pt %g2, 1b sub %g2, %g4, %g2 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_spitfire_icache_page_inval) @@ -148,7 +160,7 @@ ENTRY(tl_ipi_cheetah_dcache_page_inval) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_cheetah_dcache_page_inval) @@ -204,7 +216,7 @@ ENTRY(tl_ipi_tlb_page_demap) stxa %g0, [%g2] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_page_demap) @@ -247,7 +259,7 @@ ENTRY(tl_ipi_tlb_range_demap) blt,a,pt %xcc, 1b nop - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_range_demap) @@ -271,7 +283,7 @@ ENTRY(tl_ipi_tlb_context_demap) stxa %g0, [%g1] ASI_IMMU_DEMAP flush %g3 - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tlb_context_demap) @@ -283,7 +295,7 @@ ENTRY(tl_ipi_stick_rd) rd %asr24, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_stick_rd) @@ -295,6 +307,6 @@ ENTRY(tl_ipi_tick_rd) rd %tick, %g2 stx %g2, [%g1] - IPI_DONE(%g5, %g1, %g2, %g3) + IPI_DONE(%g5, %g1, %g2, %g3, %g4, %g6) retry END(tl_ipi_tick_rd) diff --git a/sys/sparc64/sparc64/mp_locore.S b/sys/sparc64/sparc64/mp_locore.S index fbcb767e0896..fd4357eaec1f 100644 --- a/sys/sparc64/sparc64/mp_locore.S +++ b/sys/sparc64/sparc64/mp_locore.S @@ -269,13 +269,17 @@ ENTRY(mp_startup) add %l1, %l2, %l1 sub %l1, SPOFF + CCFSZ, %sp + /* Initialize global registers. */ + call cpu_setregs + mov %l1, %o0 + #if KTR_COMPILE & KTR_SMP CATR(KTR_SMP, "mp_startup: bootstrap cpuid=%d mid=%d pcpu=%#lx data=%#lx sp=%#lx" , %g1, %g2, %g3, 7, 8, 9) - lduw [%l1 + PC_CPUID], %g2 + lduw [PCPU(CPUID)], %g2 stx %g2, [%g1 + KTR_PARM1] - lduw [%l1 + PC_MID], %g2 + lduw [PCPU(MID)], %g2 stx %g2, [%g1 + KTR_PARM2] stx %l1, [%g1 + KTR_PARM3] stx %sp, [%g1 + KTR_PARM5] diff --git a/sys/sparc64/sparc64/mp_machdep.c b/sys/sparc64/sparc64/mp_machdep.c index 4d9151e156c0..f2e76df2da37 100644 --- a/sys/sparc64/sparc64/mp_machdep.c +++ b/sys/sparc64/sparc64/mp_machdep.c @@ -121,7 +121,7 @@ cpu_ipi_single_t *cpu_ipi_single; static vm_offset_t mp_tramp; static u_int cpuid_to_mid[MAXCPU]; static int isjbus; -static volatile cpumask_t shutdown_cpus; +static volatile cpuset_t shutdown_cpus; static void ap_count(phandle_t node, u_int mid, u_int cpu_impl); static void ap_start(phandle_t node, u_int mid, u_int cpu_impl); @@ -228,7 +228,7 @@ void cpu_mp_setmaxid() { - all_cpus = 1 << curcpu; + CPU_SETOF(curcpu, &all_cpus); mp_ncpus = 1; mp_maxid = 0; @@ -283,6 +283,7 @@ sun4u_startcpu(phandle_t cpu, void *func, u_long arg) void cpu_mp_start(void) { + cpuset_t ocpus; mtx_init(&ipi_mtx, "ipi", NULL, MTX_SPIN); @@ -299,7 +300,9 @@ cpu_mp_start(void) KASSERT(!isjbus || mp_ncpus <= IDR_JALAPENO_MAX_BN_PAIRS, ("%s: can only IPI a maximum of %d JBus-CPUs", __func__, IDR_JALAPENO_MAX_BN_PAIRS)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); smp_active = 1; } @@ -357,7 +360,7 @@ ap_start(phandle_t node, u_int mid, u_int cpu_impl) cache_init(pc); - all_cpus |= 1 << cpuid; + CPU_SET(cpuid, &all_cpus); intr_add_cpu(cpuid); } @@ -421,6 +424,7 @@ cpu_mp_unleash(void *v) void cpu_mp_bootstrap(struct pcpu *pc) { + cpuset_t ocpus; volatile struct cpu_start_args *csa; csa = &cpu_start_args; @@ -453,9 +457,6 @@ cpu_mp_bootstrap(struct pcpu *pc) */ tlb_flush_nonlocked(); - /* Initialize global registers. */ - cpu_setregs(pc); - /* * Enable interrupts. * Note that the PIL we be lowered indirectly via sched_throw(NULL) @@ -465,7 +466,9 @@ cpu_mp_bootstrap(struct pcpu *pc) smp_cpus++; KASSERT(curthread != NULL, ("%s: curthread", __func__)); - PCPU_SET(other_cpus, all_cpus & ~(1 << curcpu)); + ocpus = all_cpus; + CPU_CLR(curcpu, &ocpus); + PCPU_SET(other_cpus, ocpus); printf("SMP: AP CPU #%d Launched!\n", curcpu); csa->csa_count--; @@ -484,14 +487,22 @@ cpu_mp_bootstrap(struct pcpu *pc) void cpu_mp_shutdown(void) { + cpuset_t cpus; int i; critical_enter(); shutdown_cpus = PCPU_GET(other_cpus); - if (stopped_cpus != PCPU_GET(other_cpus)) /* XXX */ - stop_cpus(stopped_cpus ^ PCPU_GET(other_cpus)); + cpus = shutdown_cpus; + + /* XXX: Stop all the CPUs which aren't already. */ + if (CPU_CMP(&stopped_cpus, &cpus)) { + + /* pc_other_cpus is just a flat "on" mask without curcpu. */ + CPU_NAND(&cpus, &stopped_cpus); + stop_cpus(cpus); + } i = 0; - while (shutdown_cpus != 0) { + while (!CPU_EMPTY(&shutdown_cpus)) { if (i++ > 100000) { printf("timeout shutting down CPUs.\n"); break; @@ -509,20 +520,24 @@ cpu_ipi_ast(struct trapframe *tf) static void cpu_ipi_stop(struct trapframe *tf) { + cpuset_t tcmask; CTR2(KTR_SMP, "%s: stopped %d", __func__, curcpu); + sched_pin(); savectx(&stoppcbs[curcpu]); - atomic_set_acq_int(&stopped_cpus, PCPU_GET(cpumask)); - while ((started_cpus & PCPU_GET(cpumask)) == 0) { - if ((shutdown_cpus & PCPU_GET(cpumask)) != 0) { - atomic_clear_int(&shutdown_cpus, PCPU_GET(cpumask)); + tcmask = PCPU_GET(cpumask); + CPU_OR_ATOMIC(&stopped_cpus, &tcmask); + while (!CPU_OVERLAP(&started_cpus, &tcmask)) { + if (CPU_OVERLAP(&shutdown_cpus, &tcmask)) { + CPU_NAND_ATOMIC(&shutdown_cpus, &tcmask); (void)intr_disable(); for (;;) ; } } - atomic_clear_rel_int(&started_cpus, PCPU_GET(cpumask)); - atomic_clear_rel_int(&stopped_cpus, PCPU_GET(cpumask)); + CPU_NAND_ATOMIC(&started_cpus, &tcmask); + CPU_NAND_ATOMIC(&stopped_cpus, &tcmask); + sched_unpin(); CTR2(KTR_SMP, "%s: restarted %d", __func__, curcpu); } @@ -551,13 +566,13 @@ cpu_ipi_hardclock(struct trapframe *tf) } static void -spitfire_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +spitfire_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { u_int cpu; - while (cpus) { - cpu = ffs(cpus) - 1; - cpus &= ~(1 << cpu); + while ((cpu = cpusetobj_ffs(&cpus)) != 0) { + cpu--; + CPU_CLR(cpu, &cpus); spitfire_ipi_single(cpu, d0, d1, d2); } } @@ -657,20 +672,21 @@ cheetah_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +cheetah_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int bnp; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -681,7 +697,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) membar(Sync); bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT) | bnp << IDC_BN_SHIFT, ASI_SDB_INTR_W, 0); @@ -698,9 +714,9 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) return; bnp = 0; for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { if ((ids & (IDR_NACK << (2 * bnp))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); bnp++; } } @@ -709,7 +725,7 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) * CPUs we actually haven't tried to send an IPI to, * but which apparently can be safely ignored. */ - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; /* * Leave interrupts enabled for a bit before retrying @@ -719,11 +735,11 @@ cheetah_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } static void @@ -772,19 +788,20 @@ jalapeno_ipi_single(u_int cpu, u_long d0, u_long d1, u_long d2) } static void -jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) +jalapeno_ipi_selected(cpuset_t cpus, u_long d0, u_long d1, u_long d2) { + char pbuf[CPUSETBUFSIZ]; register_t s; u_long ids; u_int cpu; int i; - KASSERT((cpus & (1 << curcpu)) == 0, - ("%s: CPU can't IPI itself", __func__)); + KASSERT(!CPU_ISSET(curcpu, &cpus), ("%s: CPU can't IPI itself", + __func__)); KASSERT((ldxa(0, ASI_INTR_DISPATCH_STATUS) & IDR_CHEETAH_ALL_BUSY) == 0, ("%s: outstanding dispatch", __func__)); - if (cpus == 0) + if (CPU_EMPTY(&cpus)) return; ids = 0; for (i = 0; i < IPI_RETRIES * mp_ncpus; i++) { @@ -794,7 +811,7 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) stxa(AA_SDB_INTR_D2, ASI_SDB_INTR_W, d2); membar(Sync); for (cpu = 0; cpu < mp_ncpus; cpu++) { - if ((cpus & (1 << cpu)) != 0) { + if (CPU_ISSET(cpu, &cpus)) { stxa(AA_INTR_SEND | (cpuid_to_mid[cpu] << IDC_ITID_SHIFT), ASI_SDB_INTR_W, 0); membar(Sync); @@ -808,10 +825,10 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) (IDR_CHEETAH_ALL_BUSY | IDR_CHEETAH_ALL_NACK)) == 0) return; for (cpu = 0; cpu < mp_ncpus; cpu++) - if ((cpus & (1 << cpu)) != 0) + if (CPU_ISSET(cpu, &cpus)) if ((ids & (IDR_NACK << (2 * cpuid_to_mid[cpu]))) == 0) - cpus &= ~(1 << cpu); + CPU_CLR(cpu, &cpus); /* * Leave interrupts enabled for a bit before retrying * in order to avoid deadlocks if the other CPUs are @@ -820,9 +837,9 @@ jalapeno_ipi_selected(u_int cpus, u_long d0, u_long d1, u_long d2) DELAY(2 * mp_ncpus); } if (kdb_active != 0 || panicstr != NULL) - printf("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)\n", - __func__, cpus, ids); + printf("%s: couldn't send IPI (cpus=%s ids=0x%lu)\n", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); else - panic("%s: couldn't send IPI (cpus=0x%u ids=0x%lu)", - __func__, cpus, ids); + panic("%s: couldn't send IPI (cpus=%s ids=0x%lu)", + __func__, cpusetobj_strprint(pbuf, &cpus), ids); } diff --git a/sys/sparc64/sparc64/pmap.c b/sys/sparc64/sparc64/pmap.c index c34fc4506089..b01a558c3b24 100644 --- a/sys/sparc64/sparc64/pmap.c +++ b/sys/sparc64/sparc64/pmap.c @@ -664,7 +664,7 @@ pmap_bootstrap(u_int cpu_impl) pm = kernel_pmap; for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = ~0; + CPU_FILL(&pm->pm_active); /* * Flush all non-locked TLB entries possibly left over by the @@ -1189,7 +1189,7 @@ pmap_pinit0(pmap_t pm) PMAP_LOCK_INIT(pm); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = TLB_CTX_KERNEL; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); pm->pm_tsb = NULL; pm->pm_tsb_obj = NULL; bzero(&pm->pm_stats, sizeof(pm->pm_stats)); @@ -1229,7 +1229,7 @@ pmap_pinit(pmap_t pm) mtx_lock_spin(&sched_lock); for (i = 0; i < MAXCPU; i++) pm->pm_context[i] = -1; - pm->pm_active = 0; + CPU_ZERO(&pm->pm_active); mtx_unlock_spin(&sched_lock); VM_OBJECT_LOCK(pm->pm_tsb_obj); @@ -2230,7 +2230,7 @@ pmap_activate(struct thread *td) PCPU_SET(tlb_ctx, context + 1); pm->pm_context[curcpu] = context; - pm->pm_active |= PCPU_GET(cpumask); + CPU_OR(&pm->pm_active, PCPU_PTR(cpumask)); PCPU_SET(pmap, pm); stxa(AA_DMMU_TSB, ASI_DMMU, pm->pm_tsb); diff --git a/sys/sparc64/sparc64/swtch.S b/sys/sparc64/sparc64/swtch.S index ea13779c46e4..75157343cdc8 100644 --- a/sys/sparc64/sparc64/swtch.S +++ b/sys/sparc64/sparc64/swtch.S @@ -164,20 +164,29 @@ ENTRY(cpu_switch) * If there was no non-kernel pmap, don't try to deactivate it. */ brz,pn %l2, 3f - lduw [PCPU(CPUMASK)], %l4 + lduw [PCPU(CPUID)], %l3 /* * Mark the pmap of the last non-kernel vmspace to run as no longer * active on this CPU. */ - lduw [%l2 + PM_ACTIVE], %l3 - andn %l3, %l4, %l3 - stw %l3, [%l2 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l2 + %l4], %l6 + andn %l6, %l5, %l6 + stx %l6, [%l2 + %l4] /* * Take away its context number. */ - lduw [PCPU(CPUID)], %l3 sllx %l3, INT_SHIFT, %l3 add %l2, PM_CONTEXT, %l4 mov -1, %l5 @@ -210,18 +219,27 @@ ENTRY(cpu_switch) /* * Set the new context number in the pmap. */ - lduw [PCPU(CPUID)], %i4 - sllx %i4, INT_SHIFT, %i4 + lduw [PCPU(CPUID)], %l3 + sllx %l3, INT_SHIFT, %i4 add %l1, PM_CONTEXT, %i5 stw %i3, [%i4 + %i5] /* * Mark the pmap as active on this CPU. */ - lduw [%l1 + PM_ACTIVE], %i4 - lduw [PCPU(CPUMASK)], %i5 - or %i4, %i5, %i4 - stw %i4, [%l1 + PM_ACTIVE] + mov _NCPUBITS, %l5 + mov %g0, %y + udiv %l3, %l5, %l6 + srl %l6, 0, %l4 + sllx %l4, PTR_SHIFT, %l4 + add %l4, PM_ACTIVE, %l4 + smul %l6, %l5, %l5 + sub %l3, %l5, %l5 + mov 1, %l6 + sllx %l6, %l5, %l5 + ldx [%l1 + %l4], %l6 + or %l6, %l5, %l6 + stx %l6, [%l1 + %l4] /* * Make note of the change in pmap. diff --git a/sys/sparc64/sparc64/tlb.c b/sys/sparc64/sparc64/tlb.c index 990c77763750..9fcece6f43a6 100644 --- a/sys/sparc64/sparc64/tlb.c +++ b/sys/sparc64/sparc64/tlb.c @@ -80,7 +80,7 @@ tlb_context_demap(struct pmap *pm) PMAP_STATS_INC(tlb_ncontext_demap); cookie = ipi_tlb_context_demap(pm); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_context_demap: inactive pmap?")); stxa(TLB_DEMAP_PRIMARY | TLB_DEMAP_CONTEXT, ASI_DMMU_DEMAP, 0); @@ -101,7 +101,7 @@ tlb_page_demap(struct pmap *pm, vm_offset_t va) PMAP_STATS_INC(tlb_npage_demap); cookie = ipi_tlb_page_demap(pm, va); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_page_demap: inactive pmap?")); if (pm == kernel_pmap) @@ -128,7 +128,7 @@ tlb_range_demap(struct pmap *pm, vm_offset_t start, vm_offset_t end) PMAP_STATS_INC(tlb_nrange_demap); cookie = ipi_tlb_range_demap(pm, start, end); s = intr_disable(); - if (pm->pm_active & PCPU_GET(cpumask)) { + if (CPU_OVERLAP(&pm->pm_active, PCPU_PTR(cpumask))) { KASSERT(pm->pm_context[curcpu] != -1, ("tlb_range_demap: inactive pmap?")); if (pm == kernel_pmap) diff --git a/sys/sys/_cpuset.h b/sys/sys/_cpuset.h new file mode 100644 index 000000000000..42a0a6a9db81 --- /dev/null +++ b/sys/sys/_cpuset.h @@ -0,0 +1,52 @@ +/*- + * Copyright (c) 2008, Jeffrey Roberson + * All rights reserved. + * + * Copyright (c) 2008 Nokia Corporation + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS__CPUSET_H_ +#define _SYS__CPUSET_H_ + +#ifdef _KERNEL +#define CPU_SETSIZE MAXCPU +#endif + +#define CPU_MAXSIZE 128 + +#ifndef CPU_SETSIZE +#define CPU_SETSIZE CPU_MAXSIZE +#endif + +#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ +#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) + +typedef struct _cpuset { + long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; +} cpuset_t; + +#endif /* !_SYS__CPUSET_H_ */ diff --git a/sys/sys/_rmlock.h b/sys/sys/_rmlock.h index 75a159c1082e..15d6c4953e85 100644 --- a/sys/sys/_rmlock.h +++ b/sys/sys/_rmlock.h @@ -45,7 +45,7 @@ LIST_HEAD(rmpriolist,rm_priotracker); struct rmlock { struct lock_object lock_object; - volatile cpumask_t rm_writecpus; + volatile cpuset_t rm_writecpus; LIST_HEAD(,rm_priotracker) rm_activeReaders; union { struct mtx _rm_lock_mtx; diff --git a/sys/sys/conf.h b/sys/sys/conf.h index 31fd34dc47ab..0c7ed41c3c41 100644 --- a/sys/sys/conf.h +++ b/sys/sys/conf.h @@ -332,6 +332,7 @@ struct dumperinfo { int set_dumper(struct dumperinfo *); int dump_write(struct dumperinfo *, void *, vm_offset_t, off_t, size_t); void dumpsys(struct dumperinfo *); +int doadump(boolean_t); extern int dumping; /* system is dumping */ #endif /* _KERNEL */ diff --git a/sys/sys/cpuset.h b/sys/sys/cpuset.h index 854fa29608ca..030a874bf7d4 100644 --- a/sys/sys/cpuset.h +++ b/sys/sys/cpuset.h @@ -32,22 +32,9 @@ #ifndef _SYS_CPUSET_H_ #define _SYS_CPUSET_H_ -#ifdef _KERNEL -#define CPU_SETSIZE MAXCPU -#endif +#include -#define CPU_MAXSIZE 128 - -#ifndef CPU_SETSIZE -#define CPU_SETSIZE CPU_MAXSIZE -#endif - -#define _NCPUBITS (sizeof(long) * NBBY) /* bits per mask */ -#define _NCPUWORDS howmany(CPU_SETSIZE, _NCPUBITS) - -typedef struct _cpuset { - long __bits[howmany(CPU_SETSIZE, _NCPUBITS)]; -} cpuset_t; +#define CPUSETBUFSIZ ((2 + sizeof(long) * 2) * _NCPUWORDS) #define __cpuset_mask(n) ((long)1 << ((n) % _NCPUBITS)) #define CPU_CLR(n, p) ((p)->__bits[(n)/_NCPUBITS] &= ~__cpuset_mask(n)) @@ -66,6 +53,11 @@ typedef struct _cpuset { (p)->__bits[__i] = -1; \ } while (0) +#define CPU_SETOF(n, p) do { \ + CPU_ZERO(p); \ + ((p)->__bits[(n)/_NCPUBITS] = __cpuset_mask(n)); \ +} while (0) + /* Is p empty. */ #define CPU_EMPTY(p) __extension__ ({ \ __size_t __i; \ @@ -75,6 +67,15 @@ typedef struct _cpuset { __i == _NCPUWORDS; \ }) +/* Is p full set. */ +#define CPU_ISFULLSET(p) __extension__ ({ \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + if ((p)->__bits[__i] != (long)-1) \ + break; \ + __i == _NCPUWORDS; \ +}) + /* Is c a subset of p. */ #define CPU_SUBSET(p, c) __extension__ ({ \ __size_t __i; \ @@ -124,6 +125,33 @@ typedef struct _cpuset { (d)->__bits[__i] &= ~(s)->__bits[__i]; \ } while (0) +#define CPU_CLR_ATOMIC(n, p) \ + atomic_clear_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_SET_ATOMIC(n, p) \ + atomic_set_long(&(p)->__bits[(n)/_NCPUBITS], __cpuset_mask(n)) + +#define CPU_OR_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_set_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_NAND_ATOMIC(d, s) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_clear_long(&(d)->__bits[__i], \ + (s)->__bits[__i]); \ +} while (0) + +#define CPU_COPY_STORE_REL(f, t) do { \ + __size_t __i; \ + for (__i = 0; __i < _NCPUWORDS; __i++) \ + atomic_store_rel_long(&(t)->__bits[__i], \ + (f)->__bits[__i]); \ +} while (0) + /* * Valid cpulevel_t values. */ @@ -184,6 +212,9 @@ void cpuset_rel(struct cpuset *); int cpuset_setthread(lwpid_t id, cpuset_t *); int cpuset_create_root(struct prison *, struct cpuset **); int cpuset_setproc_update_set(struct proc *, struct cpuset *); +int cpusetobj_ffs(const cpuset_t *); +char *cpusetobj_strprint(char *, const cpuset_t *); +int cpusetobj_strscan(cpuset_t *, const char *); #else __BEGIN_DECLS diff --git a/sys/sys/ktr.h b/sys/sys/ktr.h index 3b78101dd1b4..7885b22f4aee 100644 --- a/sys/sys/ktr.h +++ b/sys/sys/ktr.h @@ -97,6 +97,9 @@ #ifndef LOCORE +#include +#include + struct ktr_entry { u_int64_t ktr_timestamp; int ktr_cpu; @@ -107,7 +110,7 @@ struct ktr_entry { u_long ktr_parms[KTR_PARMS]; }; -extern int ktr_cpumask; +extern cpuset_t ktr_cpumask; extern int ktr_mask; extern int ktr_entries; extern int ktr_verbose; diff --git a/sys/sys/pcpu.h b/sys/sys/pcpu.h index 0bb2cbd196f3..e6044a7a57b6 100644 --- a/sys/sys/pcpu.h +++ b/sys/sys/pcpu.h @@ -37,6 +37,7 @@ #error "no assembler-serviceable parts inside" #endif +#include #include #include #include @@ -162,8 +163,6 @@ struct pcpu { uint64_t pc_switchtime; /* cpu_ticks() at last csw */ int pc_switchticks; /* `ticks' at last csw */ u_int pc_cpuid; /* This cpu number */ - cpumask_t pc_cpumask; /* This cpu mask */ - cpumask_t pc_other_cpus; /* Mask of all other cpus */ STAILQ_ENTRY(pcpu) pc_allcpu; struct lock_list_entry *pc_spinlocks; #ifdef KTR @@ -197,6 +196,18 @@ struct pcpu { * if only to make kernel debugging easier. */ PCPU_MD_FIELDS; + + /* + * XXX + * For the time being, keep the cpuset_t objects as the very last + * members of the structure. + * They are actually tagged to be removed soon, but as long as this + * does not happen, it is necessary to find a way to implement + * easilly interfaces to userland and leaving them last makes that + * possible. + */ + cpuset_t pc_cpumask; /* This cpu mask */ + cpuset_t pc_other_cpus; /* Mask of all other cpus */ } __aligned(CACHE_LINE_SIZE); #ifdef _KERNEL diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h index 3e8c1ef3fb68..796c4cadab77 100644 --- a/sys/sys/pmckern.h +++ b/sys/sys/pmckern.h @@ -76,7 +76,7 @@ extern int (*pmc_intr)(int _cpu, struct trapframe *_frame); extern struct sx pmc_sx; /* Per-cpu flags indicating availability of sampling data */ -extern volatile cpumask_t pmc_cpumask; +extern volatile cpuset_t pmc_cpumask; /* Count of system-wide sampling PMCs in existence */ extern volatile int pmc_ss_count; @@ -122,7 +122,7 @@ do { \ #define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0) /* Check if a CPU has recorded samples. */ -#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C)))) +#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(CPU_ISSET(C, &pmc_cpumask))) /* * Helper functions. diff --git a/sys/sys/smp.h b/sys/sys/smp.h index f8cce5f11177..66e800873e4b 100644 --- a/sys/sys/smp.h +++ b/sys/sys/smp.h @@ -16,6 +16,8 @@ #ifndef LOCORE +#include + /* * Topology of a NUMA or HTT system. * @@ -32,7 +34,7 @@ struct cpu_group { struct cpu_group *cg_parent; /* Our parent group. */ struct cpu_group *cg_child; /* Optional children groups. */ - cpumask_t cg_mask; /* Mask of cpus in this group. */ + cpuset_t cg_mask; /* Mask of cpus in this group. */ int32_t cg_count; /* Count of cpus in this group. */ int16_t cg_children; /* Number of children groups. */ int8_t cg_level; /* Shared cache level. */ @@ -71,10 +73,10 @@ struct cpu_group *smp_topo_find(struct cpu_group *top, int cpu); extern void (*cpustop_restartfunc)(void); extern int smp_active; extern int smp_cpus; -extern volatile cpumask_t started_cpus; -extern volatile cpumask_t stopped_cpus; -extern cpumask_t hlt_cpus_mask; -extern cpumask_t logical_cpus_mask; +extern volatile cpuset_t started_cpus; +extern volatile cpuset_t stopped_cpus; +extern cpuset_t hlt_cpus_mask; +extern cpuset_t logical_cpus_mask; #endif /* SMP */ extern u_int mp_maxid; @@ -82,14 +84,14 @@ extern int mp_maxcpus; extern int mp_ncpus; extern volatile int smp_started; -extern cpumask_t all_cpus; +extern cpuset_t all_cpus; /* * Macro allowing us to determine whether a CPU is absent at any given * time, thus permitting us to configure sparse maps of cpuid-dependent * (per-CPU) structures. */ -#define CPU_ABSENT(x_cpu) ((all_cpus & (1 << (x_cpu))) == 0) +#define CPU_ABSENT(x_cpu) (!CPU_ISSET(x_cpu, &all_cpus)) /* * Macros to iterate over non-absent CPUs. CPU_FOREACH() takes an @@ -158,11 +160,11 @@ void cpu_mp_setmaxid(void); void cpu_mp_start(void); void forward_signal(struct thread *); -int restart_cpus(cpumask_t); -int stop_cpus(cpumask_t); -int stop_cpus_hard(cpumask_t); +int restart_cpus(cpuset_t); +int stop_cpus(cpuset_t); +int stop_cpus_hard(cpuset_t); #if defined(__amd64__) -int suspend_cpus(cpumask_t); +int suspend_cpus(cpuset_t); #endif void smp_rendezvous_action(void); extern struct mtx smp_ipi_mtx; @@ -173,7 +175,7 @@ void smp_rendezvous(void (*)(void *), void (*)(void *), void (*)(void *), void *arg); -void smp_rendezvous_cpus(cpumask_t, +void smp_rendezvous_cpus(cpuset_t, void (*)(void *), void (*)(void *), void (*)(void *), diff --git a/sys/sys/soundcard.h b/sys/sys/soundcard.h index c4cfc276bdbe..a6817df79d02 100644 --- a/sys/sys/soundcard.h +++ b/sys/sys/soundcard.h @@ -311,7 +311,8 @@ typedef struct _snd_capabilities { * IOCTL Commands for /dev/sequencer */ -#define SNDCTL_SEQ_RESET _IO ('Q', 0) +#define SNDCTL_SEQ_HALT _IO ('Q', 0) +#define SNDCTL_SEQ_RESET SNDCTL_SEQ_HALT /* Historic interface */ #define SNDCTL_SEQ_SYNC _IO ('Q', 1) #define SNDCTL_SYNTH_INFO _IOWR('Q', 2, struct synth_info) #define SNDCTL_SEQ_CTRLRATE _IOWR('Q', 3, int) /* Set/get timer res.(hz) */ diff --git a/sys/sys/types.h b/sys/sys/types.h index 4bc1a8dbd228..cb513afeb5ee 100644 --- a/sys/sys/types.h +++ b/sys/sys/types.h @@ -99,7 +99,6 @@ typedef __clockid_t clockid_t; #define _CLOCKID_T_DECLARED #endif -typedef __cpumask_t cpumask_t; typedef __critical_t critical_t; /* Critical section value */ typedef __int64_t daddr_t; /* disk address */ diff --git a/sys/ufs/ffs/ffs_alloc.c b/sys/ufs/ffs/ffs_alloc.c index 5b8f3c837a80..7f5d1b4a6a1d 100644 --- a/sys/ufs/ffs/ffs_alloc.c +++ b/sys/ufs/ffs/ffs_alloc.c @@ -420,13 +420,13 @@ ffs_realloccg(ip, lbprev, bprev, bpref, osize, nsize, flags, cred, bpp) */ if (reclaimed == 0) { reclaimed = 1; - softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); UFS_UNLOCK(ump); if (bp) { brelse(bp); bp = NULL; } UFS_LOCK(ump); + softdep_request_cleanup(fs, vp, cred, FLUSH_BLOCKS_WAIT); goto retry; } UFS_UNLOCK(ump); @@ -2356,8 +2356,8 @@ ffs_fserr(fs, inum, cp) * specified inode by the specified amount. Under normal * operation the count should always go down. Decrementing * the count to zero will cause the inode to be freed. - * adjblkcnt(inode, amt) - adjust the number of blocks used to - * by the specifed amount. + * adjblkcnt(inode, amt) - adjust the number of blocks used by the + * inode by the specified amount. * adjndir, adjbfree, adjifree, adjffree, adjnumclusters(amt) - * adjust the superblock summary. * freedirs(inode, count) - directory inodes [inode..inode + count - 1] diff --git a/sys/x86/x86/local_apic.c b/sys/x86/x86/local_apic.c index f5ac443577f5..dfef3a76fc73 100644 --- a/sys/x86/x86/local_apic.c +++ b/sys/x86/x86/local_apic.c @@ -802,7 +802,7 @@ lapic_handle_timer(struct trapframe *frame) * and unlike other schedulers it actually schedules threads to * those CPUs. */ - if ((hlt_cpus_mask & (1 << PCPU_GET(cpuid))) != 0) + if (CPU_ISSET(PCPU_GET(cpuid), &hlt_cpus_mask)) return; #endif diff --git a/tools/regression/bin/sh/expansion/heredoc1.0 b/tools/regression/bin/sh/expansion/heredoc1.0 new file mode 100644 index 000000000000..a67b2da2e5f2 --- /dev/null +++ b/tools/regression/bin/sh/expansion/heredoc1.0 @@ -0,0 +1,25 @@ +# $FreeBSD$ + +f() { return $1; } + +[ `f 42; { cat; } < born in Redon, Ille-et-Vilaine, France, 1980 10/23 Mario Sergio Fujikawa Ferreira born in Brasilia, Distrito Federal, Brazil, 1976 10/25 Eric Melville born in Los Gatos, California, United States, 1980 +10/25 Julien Laffaye born in Toulouse, France, 1988 10/26 Philip M. Gollucci born in Silver Spring, Maryland, United States, 1979 10/27 Takanori Watanabe born in Numazu, Shizuoka, Japan, 1972 11/05 M. Warner Losh born in Kansas City, Kansas, United States, 1966 diff --git a/usr.bin/calendar/io.c b/usr.bin/calendar/io.c index ef98d5da9b5f..eb37eace8bdf 100644 --- a/usr.bin/calendar/io.c +++ b/usr.bin/calendar/io.c @@ -346,7 +346,7 @@ closecal(FILE *fp) write(pdes[1], pw->pw_name, strlen(pw->pw_name)); write(pdes[1], ">\nTo: <", 7); write(pdes[1], pw->pw_name, strlen(pw->pw_name)); - write(pdes[1], ">\nSubject: ", 12); + write(pdes[1], ">\nSubject: ", 11); write(pdes[1], dayname, strlen(dayname)); write(pdes[1], "'s Calendar\nPrecedence: bulk\n\n", 30); diff --git a/usr.bin/grep/Makefile b/usr.bin/grep/Makefile index 8cd490d9b390..f09a7d6f2e9f 100644 --- a/usr.bin/grep/Makefile +++ b/usr.bin/grep/Makefile @@ -28,8 +28,6 @@ MLINKS= grep.1 egrep.1 \ bsdgrep.1: grep.1 cp ${.ALLSRC} ${.TARGET} -WARNS?= 6 - LDADD= -lz -lbz2 DPADD= ${LIBZ} ${LIBBZ2} diff --git a/usr.bin/iconv/Makefile b/usr.bin/iconv/Makefile index 7e8f6e7613a1..deab0920fbde 100644 --- a/usr.bin/iconv/Makefile +++ b/usr.bin/iconv/Makefile @@ -7,8 +7,6 @@ PROG= iconv #SRCS= iconv.c MAN= iconv.1 -WARNS?= 6 - LDADD+= -lcrypt DPADD+= ${LIBCRYPT} diff --git a/usr.bin/kdump/mksubr b/usr.bin/kdump/mksubr index afff24d5fd6b..dbde92b762ec 100644 --- a/usr.bin/kdump/mksubr +++ b/usr.bin/kdump/mksubr @@ -345,7 +345,7 @@ auto_if_type "sockfamilyname" "AF_[[:alnum:]]+[[:space:]]+" "sys/socket.h" auto_if_type "sockipprotoname" "IPPROTO_[[:alnum:]]+[[:space:]]+" "netinet/in.h" auto_switch_type "sockoptname" "SO_[A-Z]+[[:space:]]+0x[0-9]+" "sys/socket.h" auto_switch_type "socktypename" "SOCK_[A-Z]+[[:space:]]+[1-9]+[0-9]*" "sys/socket.h" -auto_switch_type "ptraceopname" "PT_[[:alnum:]]+[[:space:]]+[0-9]+" "sys/ptrace.h" +auto_switch_type "ptraceopname" "PT_[[:alnum:]_]+[[:space:]]+[0-9]+" "sys/ptrace.h" cat <<_EOF_ /* diff --git a/usr.bin/rctl/Makefile b/usr.bin/rctl/Makefile index 1088cf11e9de..c5c32eba52eb 100644 --- a/usr.bin/rctl/Makefile +++ b/usr.bin/rctl/Makefile @@ -6,6 +6,4 @@ MAN= rctl.8 DPADD= ${LIBUTIL} LDADD= -lutil -WARNS?= 6 - .include diff --git a/usr.bin/su/su.1 b/usr.bin/su/su.1 index 8b79d41b6cb7..d9180e37a0be 100644 --- a/usr.bin/su/su.1 +++ b/usr.bin/su/su.1 @@ -28,7 +28,7 @@ .\" @(#)su.1 8.2 (Berkeley) 4/18/94 .\" $FreeBSD$ .\" -.Dd July 1, 2008 +.Dd June 6, 2011 .Dt SU 1 .Os .Sh NAME @@ -193,16 +193,22 @@ PAM configuration for .Sh EXAMPLES .Bl -tag -width 5n -compact .It Li "su -m man -c catman" -Runs the command -.Li catman -as user -.Li man . +Starts a shell as user +.Li man , +and runs the command +.Li catman . You will be asked for man's password unless your real UID is 0. Note that the .Fl m option is required since user .Dq man does not have a valid shell by default. +In this example, +.Fl c +is passed to the shell of the user +.Dq man , +and is not interpreted as an argument to +.Nm . .It Li "su -m man -c 'catman /usr/share/man /usr/local/man'" Same as above, but the target command consists of more than a single word and hence is quoted for use with the diff --git a/usr.sbin/bluetooth/ath3kfw/Makefile b/usr.sbin/bluetooth/ath3kfw/Makefile index 0ff010f3394f..373655b210dd 100644 --- a/usr.sbin/bluetooth/ath3kfw/Makefile +++ b/usr.sbin/bluetooth/ath3kfw/Makefile @@ -2,7 +2,6 @@ PROG= ath3kfw MAN= ath3kfw.8 -WARNS?= 6 DPADD+= ${LIBUSB} LDADD+= -lusb diff --git a/usr.sbin/bsdinstall/scripts/netconfig_ipv6 b/usr.sbin/bsdinstall/scripts/netconfig_ipv6 index 70bd203e19d7..8bff816f792b 100755 --- a/usr.sbin/bsdinstall/scripts/netconfig_ipv6 +++ b/usr.sbin/bsdinstall/scripts/netconfig_ipv6 @@ -33,7 +33,6 @@ # # TODO: -# - Add -R /sbin/resolvconf to rtsol once support is in tree. # - Add DHCPv6 support once FreeBSD ships with it. # diff --git a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile index 9b8628bfc509..6a53d46fb297 100644 --- a/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile +++ b/usr.sbin/bsnmpd/modules/snmp_wlan/Makefile @@ -6,8 +6,6 @@ MOD= wlan SRCS= wlan_snmp.c wlan_sys.c CFLAGS+= -DSNMPTREE_TYPES -WARNS= 6 - XSYM= begemotWlan BMIBS= BEGEMOT-WIRELESS-MIB.txt diff --git a/usr.sbin/lastlogin/lastlogin.8 b/usr.sbin/lastlogin/lastlogin.8 index 063016399e77..fdbc871dcf17 100644 --- a/usr.sbin/lastlogin/lastlogin.8 +++ b/usr.sbin/lastlogin/lastlogin.8 @@ -31,7 +31,7 @@ .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF .\" SUCH DAMAGE. .\" -.Dd January 11, 1996 +.Dd June 6, 2011 .Dt LASTLOGIN 8 .Os .Sh NAME @@ -39,6 +39,8 @@ .Nd indicate last login time of users .Sh SYNOPSIS .Nm +.Op Fl f Ar file +.Op Fl rt .Op Ar user ... .Sh DESCRIPTION The @@ -54,8 +56,8 @@ If more than one .Ar user is given, the session information for each user is printed in the order given on the command line. -Otherwise, information -for all users is printed, sorted by name. +Otherwise, information for all users is printed. +By default, the entries are sorted by user name. .Pp The .Nm @@ -63,6 +65,18 @@ utility differs from .Xr last 1 in that it only prints information regarding the very last login session. The last login database is never turned over or deleted in standard usage. +.Pp +The following options are available: +.Bl -tag -width indent +.It Fl f Ar file +Open last login database +.Ar file +instead of the system-wide database. +.It Fl r +Print the entries in reverse sorted order. +.It Fl t +Sort the elements by last login time, instead of user name. +.El .Sh FILES .Bl -tag -width /var/log/utx.lastlogin -compact .It Pa /var/log/utx.lastlogin diff --git a/usr.sbin/lastlogin/lastlogin.c b/usr.sbin/lastlogin/lastlogin.c index 4c085475ad0b..2f8dd78331e2 100644 --- a/usr.sbin/lastlogin/lastlogin.c +++ b/usr.sbin/lastlogin/lastlogin.c @@ -47,30 +47,59 @@ __RCSID("$NetBSD: lastlogin.c,v 1.4 1998/02/03 04:45:35 perry Exp $"); int main(int, char **); static void output(struct utmpx *); static void usage(void); +static int utcmp_user(const void *, const void *); + +static int order = 1; +static const char *file = NULL; +static int (*utcmp)(const void *, const void *) = utcmp_user; static int -utcmp(const void *u1, const void *u2) +utcmp_user(const void *u1, const void *u2) { - return (strcmp(((const struct utmpx *)u1)->ut_user, + return (order * strcmp(((const struct utmpx *)u1)->ut_user, ((const struct utmpx *)u2)->ut_user)); } +static int +utcmp_time(const void *u1, const void *u2) +{ + time_t t1, t2; + + t1 = ((const struct utmpx *)u1)->ut_tv.tv_sec; + t2 = ((const struct utmpx *)u2)->ut_tv.tv_sec; + return (t1 < t2 ? order : t1 > t2 ? -order : 0); +} + int main(int argc, char *argv[]) { int ch, i, ulistsize; struct utmpx *u, *ulist; - while ((ch = getopt(argc, argv, "")) != -1) { - usage(); + while ((ch = getopt(argc, argv, "f:rt")) != -1) { + switch (ch) { + case 'f': + file = optarg; + break; + case 'r': + order = -1; + break; + case 't': + utcmp = utcmp_time; + break; + default: + usage(); + } } + argc -= optind; + argv += optind; - /* Process usernames given on the command line. */ - if (argc > 1) { - for (i = 1; i < argc; ++i) { - if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0) - errx(1, "failed to open lastlog database"); + if (argc > 0) { + /* Process usernames given on the command line. */ + for (i = 0; i < argc; i++) { + if (setutxdb(UTXDB_LASTLOGIN, file) != 0) + err(1, "failed to open lastlog database"); if ((u = getutxuser(argv[i])) == NULL) { warnx("user '%s' not found", argv[i]); continue; @@ -78,11 +107,10 @@ main(int argc, char *argv[]) output(u); endutxent(); } - } - /* Read all lastlog entries, looking for active ones */ - else { - if (setutxdb(UTXDB_LASTLOGIN, NULL) != 0) - errx(1, "failed to open lastlog database"); + } else { + /* Read all lastlog entries, looking for active ones. */ + if (setutxdb(UTXDB_LASTLOGIN, file) != 0) + err(1, "failed to open lastlog database"); ulist = NULL; ulistsize = 0; while ((u = getutxent()) != NULL) { @@ -119,6 +147,6 @@ output(struct utmpx *u) static void usage(void) { - fprintf(stderr, "usage: lastlogin [user ...]\n"); + fprintf(stderr, "usage: lastlogin [-f file] [-rt] [user ...]\n"); exit(1); } diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c index cce1e0ef14ac..80d4bd7c3dde 100644 --- a/usr.sbin/pmccontrol/pmccontrol.c +++ b/usr.sbin/pmccontrol/pmccontrol.c @@ -28,8 +28,9 @@ #include __FBSDID("$FreeBSD$"); -#include +#include #include +#include #include #include @@ -133,26 +134,32 @@ pmcc_init_debug(void) static int pmcc_do_enable_disable(struct pmcc_op_list *op_list) { + long cpusetsize; int c, error, i, j, ncpu, npmc, t; - cpumask_t haltedcpus, cpumask; + cpuset_t haltedcpus, cpumask; struct pmcc_op *np; unsigned char *map; unsigned char op; int cpu, pmc; - size_t dummy; + size_t setsize; if ((ncpu = pmc_ncpu()) < 0) err(EX_OSERR, "Unable to determine the number of cpus"); /* Determine the set of active CPUs. */ - cpumask = (1 << ncpu) - 1; - dummy = sizeof(int); - haltedcpus = (cpumask_t) 0; - if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus, - &dummy, NULL, 0) < 0) + cpusetsize = sysconf(_SC_CPUSET_SIZE); + if (cpusetsize == -1 || (u_long)cpusetsize > sizeof(cpuset_t)) { err(EX_OSERR, "ERROR: Cannot determine which CPUs are " "halted"); - cpumask &= ~haltedcpus; + } + CPU_ZERO(&haltedcpus); + setsize = (size_t)cpusetsize; + if (ncpu > 1 && sysctlbyname("machdep.hlt_cpus", &haltedcpus, + &setsize, NULL, 0) < 0) + err(EX_OSERR, "ERROR: Cannot determine which CPUs are " + "halted"); + CPU_FILL(&cpumask); + CPU_NAND(&cpumask, &haltedcpus); /* Determine the maximum number of PMCs in any CPU. */ npmc = 0; @@ -200,7 +207,7 @@ pmcc_do_enable_disable(struct pmcc_op_list *op_list) if (cpu == PMCC_CPU_ALL) for (i = 0; i < ncpu; i++) { - if ((1 << i) & cpumask) + if (CPU_ISSET(i, &cpumask)) SET_PMCS(i, pmc, op); } else diff --git a/usr.sbin/route6d/route6d.c b/usr.sbin/route6d/route6d.c index 761deeb18dbf..4868829c56b2 100644 --- a/usr.sbin/route6d/route6d.c +++ b/usr.sbin/route6d/route6d.c @@ -106,7 +106,7 @@ static const char _rcsid[] = "$KAME: route6d.c,v 1.104 2003/10/31 00:30:20 itoju struct ifc { /* Configuration of an interface */ char ifc_name[IFNAMSIZ]; /* if name */ - struct ifc *ifc_next; + TAILQ_ENTRY(ifc) ifc_next; int ifc_index; /* if index */ int ifc_mtu; /* if mtu */ int ifc_metric; /* if metric */ @@ -120,7 +120,7 @@ struct ifc { /* Configuration of an interface */ struct ifac { /* Adddress associated to an interface */ struct ifc *ifa_conf; /* back pointer */ - struct ifac *ifa_next; + TAILQ_ENTRY(ifac) ifa_next; struct in6_addr ifa_addr; /* address */ struct in6_addr ifa_raddr; /* remote address, valid in p2p */ int ifa_plen; /* prefix length */ @@ -134,8 +134,10 @@ struct iff { struct iff *iff_next; }; -struct ifc *ifc; -struct iff *iff_head; +TAILQ_HEAD(, ifc) ifc = + TAILQ_HEAD_INITIALIZER(ifc); +TAILQ_HEAD(, iff) iff_head = + TAILQ_HEAD_INITIALIZER(iff_head); int nifc; /* number of valid ifc's */ struct ifc **index2ifc; unsigned int nindex2ifc; @@ -166,7 +168,7 @@ struct rip6 *ripbuf; /* packet buffer for sending */ */ struct riprt { - struct riprt *rrt_next; /* next destination */ + TAILQ_ENTRY(riprt) rrt_next; /* next destination */ struct riprt *rrt_same; /* same destination - future use */ struct netinfo6 rrt_info; /* network info */ struct in6_addr rrt_gw; /* gateway */ diff --git a/usr.sbin/rtadvd/advcap.c b/usr.sbin/rtadvd/advcap.c index b0f5ee0f8629..7280f4097d66 100644 --- a/usr.sbin/rtadvd/advcap.c +++ b/usr.sbin/rtadvd/advcap.c @@ -81,7 +81,6 @@ static char *tbuf; static int hopcount; /* detect infinite loops in termcap, init 0 */ -static const char *remotefile; extern const char *conffile; int tgetent(char *, char *); @@ -204,7 +203,7 @@ tnchktc(void) write(STDERR_FILENO, "Infinite tc= loop\n", 18); return (0); } - if (getent(tcbuf, tcname, remotefile) != 1) { + if (getent(tcbuf, tcname, conffile) != 1) { return (0); } for (q = tcbuf; *q++ != ':'; ) diff --git a/usr.sbin/rtadvd/config.c b/usr.sbin/rtadvd/config.c index 4c870b9f0596..1b48868692ba 100644 --- a/usr.sbin/rtadvd/config.c +++ b/usr.sbin/rtadvd/config.c @@ -109,7 +109,7 @@ dname_labelenc(char *dst, const char *src) /* Always need a 0-length label at the tail. */ *dst++ = '\0'; - syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, dst - dst_origin); + syslog(LOG_DEBUG, "<%s> labellen = %td", __func__, dst - dst_origin); return (dst - dst_origin); } @@ -229,13 +229,7 @@ getconfig(int idx) __func__, intface); } - rai = malloc(sizeof(*rai)); - if (rai == NULL) { - syslog(LOG_INFO, "<%s> %s: can't allocate enough memory", - __func__, intface); - exit(1); - } - memset(rai, 0, sizeof(*rai)); + ELM_MALLOC(rai, exit(1)); TAILQ_INIT(&rai->rai_prefix); #ifdef ROUTEINFO TAILQ_INIT(&rai->rai_route); @@ -394,10 +388,7 @@ getconfig(int idx) /* allocate memory to store prefix information */ ELM_MALLOC(pfx, exit(1)); - - /* link into chain */ - TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next); - rai->rai_pfxs++; + pfx->pfx_rainfo = rai; pfx->pfx_origin = PREFIX_FROM_CONFIG; if (inet_pton(AF_INET6, addr, &pfx->pfx_prefix) != 1) { @@ -481,6 +472,9 @@ getconfig(int idx) pfx->pfx_pltimeexpire = now.tv_sec + pfx->pfx_preflifetime; } + /* link into chain */ + TAILQ_INSERT_TAIL(&rai->rai_prefix, pfx, pfx_next); + rai->rai_pfxs++; } if (rai->rai_advifprefix && rai->rai_pfxs == 0) get_prefix(rai); diff --git a/usr.sbin/rtadvd/dump.c b/usr.sbin/rtadvd/dump.c index f79319bd3e36..fac3fb24de81 100644 --- a/usr.sbin/rtadvd/dump.c +++ b/usr.sbin/rtadvd/dump.c @@ -310,7 +310,7 @@ dname_labeldec(char *dst, size_t dlen, const char *src) (src + len) <= src_last) { if (dst != dst_origin) *dst++ = '.'; - syslog(LOG_DEBUG, "<%s> labellen = %d", __func__, len); + syslog(LOG_DEBUG, "<%s> labellen = %zd", __func__, len); memcpy(dst, src, len); src += len; dst += len; diff --git a/usr.sbin/rtadvd/rtadvd.c b/usr.sbin/rtadvd/rtadvd.c index 614d109f7596..6423e7bf97e4 100644 --- a/usr.sbin/rtadvd/rtadvd.c +++ b/usr.sbin/rtadvd/rtadvd.c @@ -789,7 +789,7 @@ rtadvd_input(void) #else if ((size_t)i < sizeof(struct icmp6_hdr)) { syslog(LOG_ERR, - "<%s> packet size(%d) is too short", + "<%s> packet size(%zd) is too short", __func__, i); return; } @@ -827,7 +827,7 @@ rtadvd_input(void) if ((size_t)i < sizeof(struct nd_router_solicit)) { syslog(LOG_NOTICE, "<%s> RS from %s on %s does not have enough " - "length (len = %d)", + "length (len = %zd)", __func__, inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf, sizeof(ntopbuf)), @@ -873,7 +873,7 @@ rtadvd_input(void) if ((size_t)i < sizeof(struct nd_router_advert)) { syslog(LOG_NOTICE, "<%s> RA from %s on %s does not have enough " - "length (len = %d)", + "length (len = %zd)", __func__, inet_ntop(AF_INET6, &rcvfrom.sin6_addr, ntopbuf, sizeof(ntopbuf)), diff --git a/usr.sbin/rtsold/rtsol.c b/usr.sbin/rtsold/rtsol.c index afa935e72da7..fe8bfaf5d08e 100644 --- a/usr.sbin/rtsold/rtsol.c +++ b/usr.sbin/rtsold/rtsol.c @@ -625,8 +625,10 @@ ra_opt_handler(struct ifinfo *ifi) if (!TAILQ_EMPTY(&sm_rdnss_head)) CALL_SCRIPT(RESADD, &sm_rdnss_head); +#if 0 else CALL_SCRIPT(RESDEL, NULL); +#endif ra_opt_handler_freeit: /* Clear script message queue. */ diff --git a/usr.sbin/rtsold/rtsold.c b/usr.sbin/rtsold/rtsold.c index a8b7ca727cfd..4413d55f7853 100644 --- a/usr.sbin/rtsold/rtsold.c +++ b/usr.sbin/rtsold/rtsold.c @@ -124,6 +124,7 @@ main(int argc, char **argv) int maxfd; #endif int rtsock; + char *argv0; #ifndef SMALL /* rtsold */ @@ -134,6 +135,8 @@ main(int argc, char **argv) fflag = 1; once = 1; #endif + argv0 = argv[0]; + while ((ch = getopt(argc, argv, opts)) != -1) { switch (ch) { case 'a': diff --git a/usr.sbin/tcpdrop/tcpdrop.c b/usr.sbin/tcpdrop/tcpdrop.c index 6aae85ccb4d1..dce6c6d4be9d 100644 --- a/usr.sbin/tcpdrop/tcpdrop.c +++ b/usr.sbin/tcpdrop/tcpdrop.c @@ -20,12 +20,13 @@ #include __FBSDID("$FreeBSD$"); +#include #include #include #include #include -#include +#include #include #define TCPSTATES #include