Merge ^/head r340368 through r340426.

2018-11-14 06:46:44 +00:00 · 2018-11-14 06:46:44 +00:00 · 6149ed01a1
commit 6149ed01a1
parent 9b5cb2f639 fbd5d78209
105 changed files with 2362 additions and 1689 deletions
--- a/Makefile.inc1
+++ b/Makefile.inc1
@ -1287,7 +1287,7 @@ ITOOLS+=makewhatis
 #

 # Non-base distributions produced by the base system
-EXTRA_DISTRIBUTIONS=	doc
+EXTRA_DISTRIBUTIONS=
 .if defined(LIBCOMPAT)
 EXTRA_DISTRIBUTIONS+=	lib${libcompat}
 .endif
@ -1297,7 +1297,7 @@ EXTRA_DISTRIBUTIONS+=	tests

 DEBUG_DISTRIBUTIONS=
 .if ${MK_DEBUG_FILES} != "no"
-DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,doc,,:S,tests,,}
+DEBUG_DISTRIBUTIONS+=	base ${EXTRA_DISTRIBUTIONS:S,tests,,}
 .endif

 MTREE_MAGIC?=	mtree 2.0
--- a/Makefile.libcompat
+++ b/Makefile.libcompat
@ -17,7 +17,7 @@ LIB32CPUFLAGS=	-march=${TARGET_CPUTYPE}
 .if ${WANT_COMPILER_TYPE} == gcc || \
    (defined(X_COMPILER_TYPE) && ${X_COMPILER_TYPE} == gcc)
 .else
-LIB32CPUFLAGS+=	-target x86_64-unknown-freebsd12.0
+LIB32CPUFLAGS+=	-target x86_64-unknown-freebsd13.0
 .endif
 LIB32CPUFLAGS+=	-m32
 LIB32WMAKEENV=	MACHINE=i386 MACHINE_ARCH=i386 \
@ -49,9 +49,9 @@ LIB32CPUFLAGS=	-march=${TARGET_CPUTYPE}
 .endif
 .else
 .if ${TARGET_ARCH:Mmips64el*} != ""
-LIB32CPUFLAGS=  -target mipsel-unknown-freebsd12.0
+LIB32CPUFLAGS=  -target mipsel-unknown-freebsd13.0
 .else
-LIB32CPUFLAGS=  -target mips-unknown-freebsd12.0
+LIB32CPUFLAGS=  -target mips-unknown-freebsd13.0
 .endif
 .endif
 LIB32CPUFLAGS+= -mabi=32
--- a/lib/Makefile
+++ b/lib/Makefile
@ -42,7 +42,7 @@ SUBDIR=	${SUBDIR_BOOTSTRAP} \
 	libdevctl \
 	libdevinfo \
 	libdevstat \
-	${_libdl} \
+	libdl \
 	libdwarf \
 	libedit \
 	libelftc \
@ -193,10 +193,6 @@ _libproc=	libproc
 _librtld_db=	librtld_db
 .endif

-.if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mfilter}
-_libdl=		libdl
-.endif
-
 SUBDIR.${MK_OPENSSL}+=	libmp
 SUBDIR.${MK_PMC}+=	libpmc libpmcstat
 SUBDIR.${MK_RADIUS_SUPPORT}+=	libradius
--- a/lib/csu/common/crtbegin.c
+++ b/lib/csu/common/crtbegin.c
@ -32,10 +32,27 @@ typedef void (*crt_func)(void);

 extern void *__dso_handle __hidden;

-#ifdef SHARED
-void *__dso_handle = &__dso_handle;
-#else
+#ifndef SHARED
 void *__dso_handle = 0;
+#else
+void *__dso_handle = &__dso_handle;
+void __cxa_finalize(void *) __weak_symbol;
+
+/*
+ * Call __cxa_finalize with the dso handle in shared objects.
+ * When we have ctors/dtors call from the dtor handler before calling
+ * any dtors, otherwise use a destructor.
+ */
+#ifndef HAVE_CTORS
+__attribute__((destructor))
+#endif
+static void
+run_cxa_finalize(void)
+{
+
+	if (__cxa_finalize != NULL)
+		__cxa_finalize(__dso_handle);
+}
 #endif

 /*
@ -58,6 +75,10 @@ __do_global_dtors_aux(void)
 	crt_func fn;
 	int n;

+#ifdef SHARED
+	run_cxa_finalize();
+#endif
+
 	for (n = 1;; n++) {
 		fn = __DTOR_LIST__[n];
 		if (fn == (crt_func)0 || fn == (crt_func)-1)
--- a/lib/libc/Versions.def
+++ b/lib/libc/Versions.def
@ -31,6 +31,9 @@ FBSD_1.4 {
 FBSD_1.5 {
 } FBSD_1.4;

+# This version was first added to 13.0-current.
+FBSD_1.6 {
+} FBSD_1.5;

 # This is our private namespace.  Any global interfaces that are
 # strictly for use only by other FreeBSD applications and libraries
@ -39,4 +42,4 @@ FBSD_1.5 {
 #
 # Please do NOT increment the version of this namespace.
 FBSDprivate_1.0 {
-} FBSD_1.5;
+} FBSD_1.6;
--- a/lib/libcasper/services/Makefile
+++ b/lib/libcasper/services/Makefile
@ -3,6 +3,7 @@
 .include <src.opts.mk>

 SUBDIR=		cap_dns
+SUBDIR+=	cap_fileargs
 SUBDIR+=	cap_grp
 SUBDIR+=	cap_pwd
 SUBDIR+=	cap_random
--- a/lib/libcasper/services/cap_fileargs/Makefile
+++ b/lib/libcasper/services/cap_fileargs/Makefile
@ -0,0 +1,35 @@
+# $FreeBSD$
+
+SHLIBDIR?=	/lib/casper
+
+.include <src.opts.mk>
+
+PACKAGE=libcasper
+
+SHLIB_MAJOR=	1
+INCSDIR?=	${INCLUDEDIR}/casper
+
+.if ${MK_CASPER} != "no"
+SHLIB=	cap_fileargs
+
+SRCS=	cap_fileargs.c
+.endif
+
+INCS=	cap_fileargs.h
+
+LIBADD=	nv
+
+CFLAGS+=-I${.CURDIR}
+
+MAN+=	cap_fileargs.3
+
+MLINKS+=cap_fileargs.3 libcap_fileargs.3
+MLINKS+=cap_fileargs.3 fileargs_cinit.3
+MLINKS+=cap_fileargs.3 fileargs_cinitnv.3
+MLINKS+=cap_fileargs.3 fileargs_fopen.3
+MLINKS+=cap_fileargs.3 fileargs_free.3
+MLINKS+=cap_fileargs.3 fileargs_init.3
+MLINKS+=cap_fileargs.3 fileargs_initnv.3
+MLINKS+=cap_fileargs.3 fileargs_open.3
+
+.include <bsd.lib.mk>
--- a/lib/libcasper/services/cap_fileargs/cap_fileargs.3
+++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.3
@ -0,0 +1,241 @@
+.\" Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\"    notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\"    notice, this list of conditions and the following disclaimer in the
+.\"    documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 12, 2018
+.Dt CAP_FILEARGS 3
+.Os
+.Sh NAME
+.Nm fileargs_cinit ,
+.Nm fileargs_cinitnv ,
+.Nm fileargs_init ,
+.Nm fileargs_initnv ,
+.Nm fileargs_free ,
+.Nm fileargs_open ,
+.Nm fileargs_fopen
+.Nd "library for handling files in capability mode"
+.Sh LIBRARY
+.Lb libcap_fileargs
+.Sh SYNOPSIS
+.In sys/nv.h
+.In libcasper.h
+.In casper/cap_fileargs.h
+.Ft "fileargs_t *"
+.Fn fileargs_init "int argc" "char *argv[]" "int flags" "mode_t mode" "cap_rights_t *rightsp"
+.Ft "fileargs_t *"
+.Fn fileargs_cinit "cap_channel_t *cas" "int argc" "char *argv[]" "int flags" "mode_t mode" "cap_rights_t *rightsp"
+.Ft "fileargs_t *"
+.Fn fileargs_cinitnv "cap_channel_t *cas" "nvlist_t *limits"
+.Ft "fileargs_t *"
+.Fn fileargs_initnv "nvlist_t *limits"
+.Ft "void"
+.Fn fileargs_free "fileargs_t *fa"
+.Ft "int"
+.Fn fileargs_open "fileargs_t *fa" "const char *name"
+.Ft "FILE *"
+.Fn fileargs_fopen "fileargs_t *fa" "const char *name" "const char *mode"
+.Sh DESCRIPTION
+The library is used to simplify Capsicumizing a tools that are using file system.
+Idea behind the library is that we are passing a remaining
+.Fa argc
+and
+.Fa argv
+which contains a list of files that should be open for this program.
+The library will create a service that will serve those files.
+.Pp
+The function
+.Fn fileargs_init
+create a service to the
+.Nm system.fileargs .
+The
+.Fa argv
+contains a list of files that should be opened.
+The argument can be set to
+.Dv NULL
+which will not create a service and all files will be prohibited to be opened.
+The
+.Fa argc
+argument contains a number of passed files.
+The
+.Fa flags
+argument limits opened files for either execution or reading and/or writing.
+The
+.Fa mode
+argument tells which what mode file should be created if the
+.Dv O_CREATE
+flag is present .
+For more details of the
+.Fa flags
+and
+.Fa mode
+arguments see
+.Xr open 2 .
+The
+.Fa rightsp
+argument contains a list of the capability rights which file should be limited to.
+For more details of the capability rights see
+.Xr cap_rights_init 3 .
+.Pp
+The function
+.Fn fileargs_cinit
+is equivalent to
+.Fn fileargs_init
+except that the connection to the Casper needs to be provided.
+.Pp
+The functions
+.Fn fileargs_ininv
+and
+.Fn fileargs_cininv
+are respectively equivalent to
+.Fn fileargs_init
+and
+.Fn fileargs_cinit
+expect that all arguments all provided as
+.Xr nvlist 9 .
+For details see
+.Sx LIMITS .
+.Pp
+The
+.Fa fileargs_free
+close connection to the
+.Nm system.filerags
+service and free are structures.
+The function handle
+.Dv NULL
+argument.
+.Pp
+The functions
+.Fn fileargs_open
+and
+.Fn fileargs_fopen
+are respectively equivalent to
+.Xr open 2
+and
+.Xr fopen 3
+expect that all arguments are fetched from the
+.Va fileargs_t
+structure.
+.Sh LIMITS
+This section describe which values and types should be used to pass arguments to the
+.Fa system.filerags
+through the
+.Fn fileargs_ininv
+and
+.Fn fileargs_cinit
+functions.
+The
+.Xr nvlist 9
+for that functions must contain the following values and types:
+.Bl -ohang -offset indent
+.It flags ( NV_TYPE_NUMBER )
+The
+.Va flags
+limits opened files for either execution or reading and/or writing.
+.It mode (NV_TYPE_NUMBER)
+If in the
+.Va flags
+argument the
+.Dv O_CREATE
+flag was defined the
+.Xr nvlist 9
+must contain the
+.Va mode .
+The
+.Va mode
+argument tells which what mode file should be created.
+.El
+.Pp
+The
+.Xr nvlist 9
+for that functions may contain the following values and types:
+.Bl -ohang -offset indent
+.It cap_rights ( NV_TYPE_BINARY )
+The
+.Va cap_rights
+argument contains a list of the capability rights which file should be limited to.
+.It ( NV_TYPE_NULL )
+Any number of
+.Dv NV_TYPE_NULL
+where the name of the element is name of the file which can be opened.
+.Sh EXAMPLES
+The following example first parse some options and then create the
+.Nm system.filerags
+service with remaining arguments.
+.Bd -literal
+int ch, fd, i;
+cap_rights_t rights;
+fileargs_t *fa;
+
+while ((ch = getopt(argc, argv, "h")) != -1) {
+	switch (ch) {
+		case 'h':
+		default:
+			usage();
+	}
+}
+
+argc -= optind;
+argv += optind;
+
+/* Create capability to the system.fileargs service. */
+fa = fileargs_init(argc, argv, O_RDONLY, 0,
+    cap_rights_init(&rights, CAP_READ));
+if (fa == NULL)
+	err(1, "unable to open system.fileargs service");
+
+/* Enter capability mode sandbox. */
+if (cap_enter() < 0 && errno != ENOSYS)
+	err(1, "unable to enter capability mode");
+
+/* Open files. */
+for (i = 0; i < argc; i++) {
+	fd = fileargs_open(fa, argv[i]);
+	if (fd < 0)
+		err(1, "unable to open file %s", argv[i]);
+	printf("File %s opened in capability mode\n", argv[i]);
+	close(fd);
+}
+
+fileargs_free(fa);
+.Ed
+.Sh SEE ALSO
+.Xr cap_enter 2 ,
+.Xr open 2 ,
+.Xr cap_rights_init 3 ,
+.Xr err 3 ,
+.Xr fopen 3,
+.Xr getopt 3,
+.Xr capsicum 4 ,
+.Xr nv 9
+.Sh BUGS
+The
+.Lb cap_fileargs
+included in
+.Fx
+is considered experimental, and should not be deployed in production
+environments without careful consideration of the risks associated with
+the use of experimental operating system features.
+.Sh AUTHORS
+.An Mariusz Zaborski Aq Mt oshogbo@FreeBSD.org
--- a/lib/libcasper/services/cap_fileargs/cap_fileargs.c
+++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.c
@ -0,0 +1,505 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/types.h>
+#include <sys/capsicum.h>
+#include <sys/sysctl.h>
+#include <sys/cnv.h>
+#include <sys/dnv.h>
+#include <sys/nv.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+
+#include <libcasper.h>
+#include <libcasper_service.h>
+
+#include "cap_fileargs.h"
+
+#define CACHE_SIZE	128
+
+#define FILEARGS_MAGIC	0xFA00FA00
+
+struct fileargs {
+	uint32_t	 fa_magic;
+	nvlist_t	*fa_cache;
+	cap_channel_t	*fa_chann;
+	int		 fa_fdflags;
+};
+
+static int
+fileargs_get_cache(fileargs_t *fa, const char *name)
+{
+	int fd;
+	const nvlist_t *nvl;
+	nvlist_t *tnvl;
+
+	assert(fa != NULL);
+	assert(fa->fa_magic == FILEARGS_MAGIC);
+	assert(name != NULL);
+
+	if (fa->fa_cache == NULL)
+		return (-1);
+
+	if ((fa->fa_fdflags & O_CREAT) != 0)
+		return (-1);
+
+	nvl = dnvlist_get_nvlist(fa->fa_cache, name, NULL);
+	if (nvl == NULL)
+		return (-1);
+
+	tnvl = nvlist_take_nvlist(fa->fa_cache, name);
+	fd = nvlist_take_descriptor(tnvl, "fd");
+	nvlist_destroy(tnvl);
+
+	if ((fa->fa_fdflags & O_CLOEXEC) != O_CLOEXEC) {
+		if (fcntl(fd, F_SETFD, fa->fa_fdflags) == -1) {
+			close(fd);
+			return (-1);
+		}
+	}
+
+	return (fd);
+}
+
+static void
+fileargs_set_cache(fileargs_t *fa, nvlist_t *nvl)
+{
+
+	nvlist_destroy(fa->fa_cache);
+	fa->fa_cache = nvl;
+}
+
+static nvlist_t*
+fileargs_fetch(fileargs_t *fa, const char *name)
+{
+	nvlist_t *nvl;
+	int serrno;
+
+	assert(fa != NULL);
+	assert(name != NULL);
+
+	nvl = nvlist_create(NV_FLAG_NO_UNIQUE);
+	nvlist_add_string(nvl, "cmd", "open");
+	nvlist_add_string(nvl, "name", name);
+
+	nvl = cap_xfer_nvlist(fa->fa_chann, nvl);
+	if (nvl == NULL)
+		return (NULL);
+
+	if (nvlist_get_number(nvl, "error") != 0) {
+		serrno = (int)nvlist_get_number(nvl, "error");
+		nvlist_destroy(nvl);
+		errno = serrno;
+		return (NULL);
+	}
+
+	return (nvl);
+}
+
+static nvlist_t *
+fileargs_create_limit(int argc, const char * const *argv, int flags,
+    mode_t mode, cap_rights_t *rightsp)
+{
+	nvlist_t *limits;
+	int i;
+
+	limits = nvlist_create(NV_FLAG_NO_UNIQUE);
+	if (limits == NULL)
+		return (NULL);
+
+	nvlist_add_number(limits, "flags", flags);
+	if (rightsp != NULL) {
+		nvlist_add_binary(limits, "cap_rights", rightsp,
+		    sizeof(*rightsp));
+	}
+	if ((flags & O_CREAT) != 0)
+		nvlist_add_number(limits, "mode", (uint64_t)mode);
+
+	for (i = 0; i < argc; i++) {
+		nvlist_add_null(limits, argv[i]);
+	}
+
+	return (limits);
+}
+
+static fileargs_t *
+fileargs_create(cap_channel_t *chan, int fdflags)
+{
+	fileargs_t *fa;
+
+	fa = malloc(sizeof(*fa));
+	if (fa != NULL) {
+		fa->fa_cache = NULL;
+		fa->fa_chann = chan;
+		fa->fa_fdflags = fdflags;
+		fa->fa_magic = FILEARGS_MAGIC;
+	}
+
+	return (fa);
+}
+
+fileargs_t *
+fileargs_init(int argc, char *argv[], int flags, mode_t mode,
+    cap_rights_t *rightsp)
+{
+	nvlist_t *limits;
+
+	if (argc <= 0 || argv == NULL) {
+		return (fileargs_create(NULL, 0));
+	}
+
+	limits = fileargs_create_limit(argc, (const char * const *)argv, flags,
+	   mode, rightsp);
+	if (limits == NULL)
+		return (NULL);
+
+	return (fileargs_initnv(limits));
+}
+
+fileargs_t *
+fileargs_cinit(cap_channel_t *cas, int argc, char *argv[], int flags,
+     mode_t mode, cap_rights_t *rightsp)
+{
+	nvlist_t *limits;
+
+	if (argc <= 0 || argv == NULL) {
+		return (fileargs_create(NULL, 0));
+	}
+
+	limits = fileargs_create_limit(argc, (const char * const *)argv, flags,
+	   mode, rightsp);
+	if (limits == NULL)
+		return (NULL);
+
+	return (fileargs_cinitnv(cas, limits));
+}
+
+fileargs_t *
+fileargs_initnv(nvlist_t *limits)
+{
+        cap_channel_t *cas;
+	fileargs_t *fa;
+
+	if (limits == NULL) {
+		return (fileargs_create(NULL, 0));
+	}
+
+        cas = cap_init();
+        if (cas == NULL) {
+		nvlist_destroy(limits);
+                return (NULL);
+	}
+
+        fa = fileargs_cinitnv(cas, limits);
+        cap_close(cas);
+
+	return (fa);
+}
+
+fileargs_t *
+fileargs_cinitnv(cap_channel_t *cas, nvlist_t *limits)
+{
+	cap_channel_t *chann;
+	fileargs_t *fa;
+	int serrno, ret;
+	int flags;
+
+	assert(cas != NULL);
+
+	if (limits == NULL) {
+		return (fileargs_create(NULL, 0));
+	}
+
+	chann = NULL;
+	fa = NULL;
+
+	chann = cap_service_open(cas, "system.fileargs");
+	if (chann == NULL) {
+		nvlist_destroy(limits);
+		return (NULL);
+	}
+
+	flags = nvlist_get_number(limits, "flags");
+
+	/* Limits are consumed no need to free them. */
+	ret = cap_limit_set(chann, limits);
+	if (ret < 0)
+		goto out;
+
+	fa = fileargs_create(chann, flags);
+	if (fa == NULL)
+		goto out;
+
+	return (fa);
+out:
+	serrno = errno;
+	if (chann != NULL)
+		cap_close(chann);
+	errno = serrno;
+	return (NULL);
+}
+
+int
+fileargs_open(fileargs_t *fa, const char *name)
+{
+	int fd;
+	nvlist_t *nvl;
+	char *cmd;
+
+	assert(fa != NULL);
+	assert(fa->fa_magic == FILEARGS_MAGIC);
+
+	if (name == NULL) {
+		errno = EINVAL;
+		return (-1);
+	}
+
+	if (fa->fa_chann == NULL) {
+		errno = ENOTCAPABLE;
+		return (-1);
+	}
+
+	fd = fileargs_get_cache(fa, name);
+	if (fd != -1)
+		return (fd);
+
+	nvl = fileargs_fetch(fa, name);
+	if (nvl == NULL)
+		return (-1);
+
+	fd = nvlist_take_descriptor(nvl, "fd");
+	cmd = nvlist_take_string(nvl, "cmd");
+	if (strcmp(cmd, "cache") == 0)
+		fileargs_set_cache(fa, nvl);
+	else
+		nvlist_destroy(nvl);
+	free(cmd);
+
+	return (fd);
+}
+
+FILE *
+fileargs_fopen(fileargs_t *fa, const char *name, const char *mode)
+{
+	int fd;
+
+	if ((fd = fileargs_open(fa, name)) < 0) {
+		return (NULL);
+	}
+
+	return (fdopen(fd, mode));
+}
+
+void
+fileargs_free(fileargs_t *fa)
+{
+
+	if (fa == NULL)
+		return;
+
+	assert(fa->fa_magic == FILEARGS_MAGIC);
+
+	nvlist_destroy(fa->fa_cache);
+	if (fa->fa_chann != NULL) {
+		cap_close(fa->fa_chann);
+	}
+	explicit_bzero(&fa->fa_magic, sizeof(fa->fa_magic));
+	free(fa);
+}
+
+/*
+ * Service functions.
+ */
+
+static const char *lastname;
+static void *cacheposition;
+static bool allcached;
+static const cap_rights_t *caprightsp;
+static int capflags;
+static mode_t capmode;
+
+static int
+open_file(const char *name)
+{
+	int fd, serrno;
+
+	if ((capflags & O_CREAT) == 0)
+		fd = open(name, capflags);
+	else
+		fd = open(name, capflags, capmode);
+	if (fd < 0)
+		return (-1);
+
+	if (caprightsp != NULL) {
+		if (cap_rights_limit(fd, caprightsp) < 0) {
+			serrno = errno;
+			close(fd);
+			errno = serrno;
+			return (-1);
+		}
+	}
+
+	return (fd);
+}
+
+static void
+fileargs_add_cache(nvlist_t *nvlout, const nvlist_t *limits,
+    const char *curent_name)
+{
+	int type, i, fd;
+	void *cookie;
+	nvlist_t *new;
+	const char *fname;
+
+	if ((capflags & O_CREAT) != 0) {
+		allcached = true;
+		return;
+	}
+
+	cookie = cacheposition;
+	for (i = 0; i < CACHE_SIZE + 1; i++) {
+		fname = nvlist_next(limits, &type, &cookie);
+		if (fname == NULL) {
+			cacheposition = NULL;
+			lastname = NULL;
+			allcached = true;
+			return;
+		}
+		/* We doing that to catch next element name. */
+		if (i == CACHE_SIZE) {
+			break;
+		}
+
+		if (type != NV_TYPE_NULL ||
+		    (curent_name != NULL && strcmp(fname, curent_name) == 0)) {
+			curent_name = NULL;
+			i--;
+			continue;
+		}
+
+		fd = open_file(fname);
+		if (fd < 0) {
+			i--;
+			continue;
+		}
+
+		new = nvlist_create(NV_FLAG_NO_UNIQUE);
+		nvlist_move_descriptor(new, "fd", fd);
+		nvlist_add_nvlist(nvlout, fname, new);
+	}
+	cacheposition = cookie;
+	lastname = fname;
+}
+
+static bool
+fileargs_allowed(const nvlist_t *limits, const nvlist_t *request)
+{
+	const char *name;
+
+	name = dnvlist_get_string(request, "name", NULL);
+	if (name == NULL)
+		return (false);
+
+	/* Fast path. */
+	if (lastname != NULL && strcmp(name, lastname) == 0)
+		return (true);
+
+	if (!nvlist_exists_null(limits, name))
+		return (false);
+
+	return (true);
+}
+
+static int
+fileargs_limit(const nvlist_t *oldlimits, const nvlist_t *newlimits)
+{
+
+	if (oldlimits != NULL)
+		return (ENOTCAPABLE);
+
+	capflags = (int)dnvlist_get_number(newlimits, "flags", 0);
+	if ((capflags & O_CREAT) != 0)
+		capmode = (mode_t)nvlist_get_number(newlimits, "mode");
+	else
+		capmode = 0;
+
+	caprightsp = dnvlist_get_binary(newlimits, "cap_rights", NULL, NULL, 0);
+
+	return (0);
+}
+
+static int
+fileargs_command_open(const nvlist_t *limits, nvlist_t *nvlin,
+    nvlist_t *nvlout)
+{
+	int fd;
+	const char *name;
+
+	if (limits == NULL)
+		return (ENOTCAPABLE);
+
+	if (!fileargs_allowed(limits, nvlin))
+		return (ENOTCAPABLE);
+
+	name = nvlist_get_string(nvlin, "name");
+
+	fd = open_file(name);
+	if (fd < 0)
+		return (errno);
+
+	if (!allcached && (lastname == NULL ||
+	    strcmp(name, lastname) == 0)) {
+		nvlist_add_string(nvlout, "cmd", "cache");
+		fileargs_add_cache(nvlout, limits, name);
+	} else {
+		nvlist_add_string(nvlout, "cmd", "open");
+	}
+	nvlist_move_descriptor(nvlout, "fd", fd);
+	return (0);
+}
+
+static int
+fileargs_command(const char *cmd, const nvlist_t *limits,
+    nvlist_t *nvlin, nvlist_t *nvlout)
+{
+
+	if (strcmp(cmd, "open") == 0)
+		return (fileargs_command_open(limits, nvlin, nvlout));
+
+	return (EINVAL);
+}
+
+CREATE_SERVICE("system.fileargs", fileargs_limit, fileargs_command,
+    CASPER_SERVICE_FD | CASPER_SERVICE_STDIO | CASPER_SERVICE_NO_UNIQ_LIMITS);
--- a/lib/libcasper/services/cap_fileargs/cap_fileargs.h
+++ b/lib/libcasper/services/cap_fileargs/cap_fileargs.h
@ -0,0 +1,108 @@
+/*-
+ * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
+ *
+ * Copyright (c) 2018 Mariusz Zaborski <oshogbo@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _FILEARGS_H_
+#define	_FILEARGS_H_
+
+#include <sys/dnv.h>
+#include <sys/nv.h>
+
+#include <stdbool.h>
+
+#ifdef WITH_CASPER
+struct fileargs;
+typedef struct fileargs fileargs_t;
+
+fileargs_t *fileargs_init(int argc, char *argv[], int flags, mode_t mode,
+    cap_rights_t *rightsp);
+fileargs_t *fileargs_cinit(cap_channel_t *cas, int argc, char *argv[],
+    int flags, mode_t mode, cap_rights_t *rightsp);
+fileargs_t *fileargs_initnv(nvlist_t *limits);
+fileargs_t *fileargs_cinitnv(cap_channel_t *cas, nvlist_t *limits);
+int fileargs_open(fileargs_t *fa, const char *name);
+void fileargs_free(fileargs_t *fa);
+FILE *fileargs_fopen(fileargs_t *fa, const char *name, const char *mode);
+#else
+typedef struct fileargs {
+	int	fa_flags;
+	mode_t	fa_mode;
+} fileargs_t;
+
+static inline fileargs_t *
+fileargs_init(int argc __unused, char *argv[] __unused, int flags, mode_t mode,
+    cap_rights_t *rightsp __unused) {
+	fileargs_t *fa;
+
+	fa = malloc(sizeof(*fa));
+	if (fa != NULL) {
+		fa->fa_flags = flags;
+		fa->fa_mode = mode;
+	}
+
+	return (fa);
+}
+
+static inline fileargs_t *
+fileargs_cinit(cap_channel_t *cas __unused, int argc, char *argv[], int flags,
+    mode_t mode, cap_rights_t *rightsp)
+{
+
+	return (fileargs_init(argc, argv, flags, mode, rightsp));
+}
+
+static inline fileargs_t *
+fileargs_initnv(nvlist_t *limits)
+{
+	fileargs_t *fa;
+
+	fa = fileargs_init(0, NULL,
+	    nvlist_get_number(limits, "flags"),
+	    dnvlist_get_number(limits, "mode", 0),
+	    NULL);
+	nvlist_destroy(limits);
+
+	return (fa);
+}
+
+static inline fileargs_t *
+fileargs_cinitnv(cap_channel_t *cas __unused, nvlist_t *limits)
+{
+
+	return (fileargs_initnv(limits));
+}
+
+#define	fileargs_open(fa, name)							\
+	open(name, fa->fa_flags, fa->fa_mode)
+#define	fileargs_fopen(fa, name, mode)						\
+	fopen(name, mode)
+#define	fileargs_free(fa)	(free(fa))
+#endif
+
+#endif	/* !_FILEARGS_H_ */
--- a/lib/libnv/common_impl.h
+++ b/lib/libnv/common_impl.h
@ -34,6 +34,15 @@
 #ifndef	_COMMON_IMPL_H_
 #define	_COMMON_IMPL_H_

-#define	fd_is_valid(fd)	(fcntl((fd), F_GETFL) != -1 || errno != EBADF)
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+
+static inline bool
+fd_is_valid(int fd)
+{
+
+	return (fcntl(fd, F_GETFD) != -1 || errno != EBADF);
+}

 #endif	/* !_COMMON_IMPL_H_ */
--- a/lib/libnv/msgio.c
+++ b/lib/libnv/msgio.c
@ -66,11 +66,6 @@ msghdr_add_fd(struct cmsghdr *cmsg, int fd)

 	PJDLOG_ASSERT(fd >= 0);

-	if (!fd_is_valid(fd)) {
-		errno = EBADF;
-		return (-1);
-	}
-
 	cmsg->cmsg_level = SOL_SOCKET;
 	cmsg->cmsg_type = SCM_RIGHTS;
 	cmsg->cmsg_len = CMSG_LEN(sizeof(fd));
--- a/lib/libnv/tests/nvlist_send_recv_test.c
+++ b/lib/libnv/tests/nvlist_send_recv_test.c
@ -306,15 +306,12 @@ parent(int sock)
 	CHECK(name == NULL);
 }

-int
-main(void)
+static void
+send_nvlist(void)
 {
 	int status, socks[2];
 	pid_t pid;

-	printf("1..134\n");
-	fflush(stdout);
-
 	if (socketpair(PF_UNIX, SOCK_STREAM, 0, socks) < 0)
 		err(1, "socketpair() failed");
 	pid = fork();
@ -326,7 +323,7 @@ main(void)
 		/* Child. */
 		close(socks[0]);
 		child(socks[1]);
-		return (0);
+		_exit(0);
 	default:
 		/* Parent. */
 		close(socks[1]);
@ -336,6 +333,35 @@ main(void)

 	if (waitpid(pid, &status, 0) < 0)
 		err(1, "waitpid() failed");
+}
+
+static void
+send_closed_fd(void)
+{
+	nvlist_t *nvl;
+	int error, socks[2];
+
+	if (socketpair(PF_UNIX, SOCK_STREAM, 0, socks) < 0)
+		err(1, "socketpair() failed");
+
+	nvl = nvlist_create(0);
+	nvlist_add_descriptor(nvl, "fd", 12345);
+	error = nvlist_error(nvl);
+	CHECK(error == EBADF);
+
+	error = nvlist_send(socks[1], nvl);
+	CHECK(error != 0 && errno == EBADF);
+}
+
+int
+main(void)
+{
+
+	printf("1..136\n");
+	fflush(stdout);
+
+	send_nvlist();
+	send_closed_fd();

 	return (0);
 }
--- a/lib/libufs/Makefile
+++ b/lib/libufs/Makefile
@ -3,12 +3,12 @@
 PACKAGE=lib${LIB}
 LIB=	ufs
 SHLIBDIR?= /lib
-SHLIB_MAJOR=	6
+SHLIB_MAJOR=	7

 SRCS=	block.c cgroup.c crc32.c inode.c sblock.c type.c ffs_subr.c ffs_tables.c
 INCS=	libufs.h

-MAN=	bread.3 cgread.3 libufs.3 sbread.3 ufs_disk_close.3
+MAN=	bread.3 cgread.3 getinode.3 libufs.3 sbread.3 ufs_disk_close.3
 MLINKS+= bread.3 bwrite.3
 MLINKS+= bread.3 berase.3
 MLINKS+= cgread.3 cgread1.3
@ -16,6 +16,7 @@ MLINKS+= cgread.3 cgget.3
 MLINKS+= cgread.3 cgwrite.3
 MLINKS+= cgread.3 cgwrite1.3
 MLINKS+= cgread.3 cgput.3
+MLINKS+= getinode.3 putinode.3
 MLINKS+= sbread.3 sbwrite.3
 MLINKS+= sbread.3 sbget.3
 MLINKS+= sbread.3 sbput.3
--- a/lib/libufs/getinode.3
+++ b/lib/libufs/getinode.3
@ -0,0 +1,131 @@
+.\" Author:	Marshall Kirk McKusick <mckusick@freebsd.org>
+.\" Date:	January 19, 2018
+.\" Description:
+.\" 	Manual page for libufs functions:
+.\"		getinode(3)
+.\"		putinode(3)
+.\"
+.\" This file is in the public domain.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd November 10, 2018
+.Dt GETINODE 3
+.Os
+.Sh NAME
+.Nm getinode , putinode
+.Nd fetch and store inodes on a UFS file system
+.Sh LIBRARY
+.Lb libufs
+.Sh SYNOPSIS
+.In ufs/ufs/dinode.h
+.In ufs/ffs/fs.h
+.In libufs.h
+.Ft int
+.Fn getinode "struct uufsd *disk" "union dinodep *dp" "ino_t inumber"
+.Ft int
+.Fn putinode "struct uufsd *disk"
+.Sh DESCRIPTION
+The
+.Fn getinode
+and
+.Fn putinode
+functions provide an inode fetch and store API for
+.Xr libufs 3
+consumers.
+They operate on a userland UFS disk structure.
+The
+.Fn getinode
+function fetches the specified inode from the filesystem.
+The
+.Fn putinode
+function stores the most recently fetched inode to the filesystem.
+.Pp
+The
+.Va dinodep
+union is defined as:
+.Bd -literal -offset indent
+union dinodep {
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+};
+.Ed
+.Pp
+Sample code to clear write permissions for inode number
+.Fa inumber
+stored on the filesystem described by
+.Fa diskp .
+.Bd -literal -offset indent
+#include <sys/stat.h>
+#include <err.h>
+
+#include <ufs/ufs/dinode.h>
+#include <ufs/ffs/fs.h>
+#include <libufs.h>
+
+void
+clearwrite(struct uufsd *diskp, ino_t inumber)
+{
+	union dinodep dp;
+
+	if (getinode(diskp, &dp, inumber) == -1)
+		err(1, "getinode: %s", diskp->d_error);
+	switch (diskp->d_ufs) {
+	case 1: /* UFS 1 filesystem */
+		dp.dp1->di_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+		break;
+	case 2: /* UFS 2 filesystem */
+		dp.dp2->di_mode &= ~(S_IWUSR | S_IWGRP | S_IWOTH);
+		break;
+	default:
+		errx(1, "unknown filesystem type");
+	}
+	if (putinode(diskp) == -1)
+		err(1, "putinode: %s", diskp->d_error);
+}
+.Ed
+.Sh RETURN VALUES
+The
+.Fn getinode
+and
+.Fn putinode
+functions return 0 on success, or \-1 in case of any error.
+A string describing the error is stored in
+.Fa diskp->d_error .
+The global
+.Fa errno
+often provides additional information.
+.Sh ERRORS
+The function
+.Fn getinode
+may fail and set
+.Va errno
+for any of the errors specified for the library function
+.Xr pread 2 .
+It can also fail if the inode number is out of the range of inodes
+in the filesystem.
+.Pp
+The function
+.Fn putinode
+may fail and set
+.Va errno
+for any of the errors specified for the library functions
+.Xr ufs_disk_write 3
+or
+.Xr pwrite 2 .
+.Pp
+Additionally both functions may follow the
+.Xr libufs 3
+error methodologies in case of a device error.
+.Sh SEE ALSO
+.Xr pread 2 ,
+.Xr pwrite 2 ,
+.Xr libufs 3 ,
+.Xr ufs_disk_write 3
+.Sh HISTORY
+These functions first appeared as part of
+.Xr libufs 3
+in
+.Fx 13.0 .
+.Sh AUTHORS
+.An Marshall Kirk McKusick Aq Mt mckusick@freebsd.org
--- a/lib/libufs/inode.c
+++ b/lib/libufs/inode.c
@ -49,18 +49,16 @@ __FBSDID("$FreeBSD$");
 #include <libufs.h>

 int
-getino(struct uufsd *disk, void **dino, ino_t inode, int *mode)
+getinode(struct uufsd *disk, union dinodep *dp, ino_t inum)
 {
 	ino_t min, max;
 	caddr_t inoblock;
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
 	struct fs *fs;

 	ERROR(disk, NULL);

 	fs = &disk->d_fs;
-	if (inode >= (ino_t)fs->fs_ipg * fs->fs_ncg) {
+	if (inum >= (ino_t)fs->fs_ipg * fs->fs_ncg) {
 		ERROR(disk, "inode number out of range");
 		return (-1);
 	}
@ -76,26 +74,22 @@ getino(struct uufsd *disk, void **dino, ino_t inode, int *mode)
 		}
 		disk->d_inoblock = inoblock;
 	}
-	if (inode >= min && inode < max)
+	if (inum >= min && inum < max)
 		goto gotit;
-	bread(disk, fsbtodb(fs, ino_to_fsba(fs, inode)), inoblock,
+	bread(disk, fsbtodb(fs, ino_to_fsba(fs, inum)), inoblock,
 	    fs->fs_bsize);
-	disk->d_inomin = min = inode - (inode % INOPB(fs));
+	disk->d_inomin = min = inum - (inum % INOPB(fs));
 	disk->d_inomax = max = min + INOPB(fs);
 gotit:	switch (disk->d_ufs) {
 	case 1:
-		dp1 = &((struct ufs1_dinode *)inoblock)[inode - min];
-		if (mode != NULL)
-			*mode = dp1->di_mode & IFMT;
-		if (dino != NULL)
-			*dino = dp1;
+		disk->d_dp.dp1 = &((struct ufs1_dinode *)inoblock)[inum - min];
+		if (dp != NULL)
+			*dp = disk->d_dp;
 		return (0);
 	case 2:
-		dp2 = &((struct ufs2_dinode *)inoblock)[inode - min];
-		if (mode != NULL)
-			*mode = dp2->di_mode & IFMT;
-		if (dino != NULL)
-			*dino = dp2;
+		disk->d_dp.dp2 = &((struct ufs2_dinode *)inoblock)[inum - min];
+		if (dp != NULL)
+			*dp = disk->d_dp;
 		return (0);
 	default:
 		break;
@ -105,7 +99,7 @@ gotit:	switch (disk->d_ufs) {
 }

 int
-putino(struct uufsd *disk)
+putinode(struct uufsd *disk)
 {
 	struct fs *fs;

--- a/lib/libufs/libufs.h
+++ b/lib/libufs/libufs.h
@ -35,6 +35,10 @@
 /*
 * libufs structures.
 */
+union dinodep {
+	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
+};

 /*
 * userland ufs disk.
@ -49,6 +53,7 @@ struct uufsd {
 	caddr_t d_inoblock;	/* inode block */
 	uint32_t d_inomin;	/* low inode (not ino_t for ABI compat) */
 	uint32_t d_inomax;	/* high inode (not ino_t for ABI compat) */
+	union dinodep d_dp;	/* pointer to currently active inode */
 	union {
 		struct fs d_fs;	/* filesystem information */
 		char d_sb[MAXBSIZE];
@ -135,8 +140,8 @@ int cgwrite1(struct uufsd *, int);
 /*
 * inode.c
 */
-int getino(struct uufsd *, void **, ino_t, int *);
-int putino(struct uufsd *);
+int getinode(struct uufsd *, union dinodep *, ino_t);
+int putinode(struct uufsd *);

 /*
 * sblock.c
--- a/release/scripts/make-manifest.sh
+++ b/release/scripts/make-manifest.sh
@ -10,7 +10,6 @@
 # $FreeBSD$

 base="Base system"
-doc="Additional Documentation"
 kernel="Kernel"
 ports="Ports tree"
 src="System source tree"
@ -19,7 +18,6 @@ tests="Test suite"

 desc_base="${base} (MANDATORY)"
 desc_base_dbg="${base} (Debugging)"
-desc_doc="${doc}"
 desc_kernel="${kernel} (MANDATORY)"
 desc_kernel_dbg="${kernel} (Debugging)"
 desc_kernel_alt="Alternate ${kernel}"
@ -30,7 +28,6 @@ desc_ports="${ports}"
 desc_src="${src}"
 desc_tests="${tests}"

-default_doc=off
 default_src=off
 default_ports=off
 default_tests=off
@ -51,6 +48,9 @@ for i in ${*}; do
 	desc="$(eval echo \"\${desc_${distname}}\")"

 	case ${i} in
+		doc.txz)
+			continue
+			;;
 		kernel-dbg.txz)
 			desc="${desc_kernel_dbg}"
 			;;
--- a/sbin/clri/clri.c
+++ b/sbin/clri/clri.c
@ -62,11 +62,6 @@ __FBSDID("$FreeBSD$");
 #include <stdio.h>
 #include <unistd.h>

-union dinodep {
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
-};
-
 static void
 usage(void)
 {
@ -104,8 +99,8 @@ main(int argc, char *argv[])
 		}
 		(void)printf("clearing %d\n", inonum);

-		if (getino(&disk, (void **)&dp, inonum, NULL) == -1) {
-			printf("getino: %s\n", disk.d_error);
+		if (getinode(&disk, &dp, inonum) == -1) {
+			printf("getinode: %s\n", disk.d_error);
 			exitval = 1;
 			continue;
 		}
@ -119,7 +114,7 @@ main(int argc, char *argv[])
 			memset(dp.dp2, 0, sizeof(*dp.dp2));
 			dp.dp2->di_gen = generation;
 		}
-		putino(&disk);
+		putinode(&disk);
 		(void)fsync(disk.d_fd);
 	}
 	(void)ufs_disk_close(&disk);
--- a/sbin/dump/dump.h
+++ b/sbin/dump/dump.h
@ -126,7 +126,7 @@ void	dumpabort(int signo) __dead2;
 void	dump_getfstab(void);

 char	*rawname(char *cp);
-union	dinode *getinode(ino_t inum, int *mode);
+union	dinode *getino(ino_t inum, int *mode);

 /* rdump routines */
 #ifdef RDUMP
--- a/sbin/dump/main.c
+++ b/sbin/dump/main.c
@ -549,7 +549,7 @@ main(int argc, char *argv[])
 		/*
 		 * Skip directory inodes deleted and maybe reallocated
 		 */
-		dp = getinode(ino, &mode);
+		dp = getino(ino, &mode);
 		if (mode != IFDIR)
 			continue;
 		(void)dumpino(dp, ino);
@ -568,7 +568,7 @@ main(int argc, char *argv[])
 		/*
 		 * Skip inodes deleted and reallocated as directories.
 		 */
-		dp = getinode(ino, &mode);
+		dp = getino(ino, &mode);
 		if (mode == IFDIR)
 			continue;
 		(void)dumpino(dp, ino);
--- a/sbin/dump/traverse.c
+++ b/sbin/dump/traverse.c
@ -195,7 +195,7 @@ mapfiles(ino_t maxino, long *tapesize)
 		}
 		for (i = 0; i < inosused; i++, ino++) {
 			if (ino < UFS_ROOTINO ||
-			    (dp = getinode(ino, &mode)) == NULL ||
+			    (dp = getino(ino, &mode)) == NULL ||
 			    (mode & IFMT) == 0)
 				continue;
 			if (ino >= maxino) {
@ -277,7 +277,7 @@ mapdirs(ino_t maxino, long *tapesize)
 		nodump = !nonodump && (TSTINO(ino, usedinomap) == 0);
 		if ((isdir & 1) == 0 || (TSTINO(ino, dumpinomap) && !nodump))
 			continue;
-		dp = getinode(ino, &i);
+		dp = getino(ino, &i);
 		/*
 		 * inode buf may change in searchdir().
 		 */
@ -421,7 +421,7 @@ searchdir(
 				continue;
 		}
 		if (nodump) {
-			ip = getinode(dp->d_ino, &mode);
+			ip = getino(dp->d_ino, &mode);
 			if (TSTINO(dp->d_ino, dumpinomap)) {
 				CLRINO(dp->d_ino, dumpinomap);
 				*tapesize -= blockest(ip);
@ -875,7 +875,7 @@ writeheader(ino_t ino)
 }

 union dinode *
-getinode(ino_t inum, int *modep)
+getino(ino_t inum, int *modep)
 {
 	static ino_t minino, maxino;
 	static caddr_t inoblock;
--- a/sbin/ffsinfo/ffsinfo.c
+++ b/sbin/ffsinfo/ffsinfo.c
@ -262,7 +262,7 @@ main(int argc, char **argv)

 		dbg_csp = fscs;
 		/* ... and dump it */
-		for(dbg_csc=0; dbg_csc<sblock.fs_ncg; dbg_csc++) {
+		for (dbg_csc = 0; dbg_csc < sblock.fs_ncg; dbg_csc++) {
 			snprintf(dbg_line, sizeof(dbg_line),
 			    "%d. csum in fscs", dbg_csc);
 			DBG_DUMP_CSUM(&sblock,
@ -342,8 +342,8 @@ void
 dump_whole_ufs1_inode(ino_t inode, int level)
 {
 	DBG_FUNC("dump_whole_ufs1_inode")
-	struct ufs1_dinode	*ino;
-	int	rb, mode;
+	union dinodep dp;
+	int	rb;
 	unsigned int	ind2ctr, ind3ctr;
 	ufs1_daddr_t	*ind2ptr, *ind3ptr;
 	char	comment[80];
@ -353,10 +353,10 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 	/*
 	 * Read the inode from disk/cache.
 	 */
-	if (getino(&disk, (void **)&ino, inode, &mode) == -1)
-		err(1, "getino: %s", disk.d_error);
+	if (getinode(&disk, &dp, inode) == -1)
+		err(1, "getinode: %s", disk.d_error);

-	if(ino->di_nlink==0) {
+	if (dp.dp1->di_nlink == 0) {
 		DBG_LEAVE;
 		return;	/* inode not in use */
 	}
@ -368,7 +368,7 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 	if (level & 0x100) {
 		DBG_DUMP_INO(&sblock,
 		    comment,
-		    ino);
+		    dp.dp1);
 	}

 	if (!(level & 0x200)) {
@ -379,13 +379,13 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 	/*
 	 * Ok, now prepare for dumping all direct and indirect pointers.
 	 */
-	rb = howmany(ino->di_size, sblock.fs_bsize) - UFS_NDADDR;
-	if(rb>0) {
+	rb = howmany(dp.dp1->di_size, sblock.fs_bsize) - UFS_NDADDR;
+	if (rb > 0) {
 		/*
 		 * Dump single indirect block.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[0]), (void *)&i1blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[0]),
+		    (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 0",
@ -394,14 +394,14 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 		    comment,
 		    i1blk,
 		    (size_t)rb);
-		rb-=howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t));
+		rb -= howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t));
 	}
-	if(rb>0) {
+	if (rb > 0) {
 		/*
 		 * Dump double indirect blocks.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[1]), (void *)&i2blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[1]),
+		    (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 1",
@ -410,12 +410,12 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 		    comment,
 		    i2blk,
 		    howmany(rb, howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t))));
-		for(ind2ctr=0; ((ind2ctr < howmany(sblock.fs_bsize,
-			sizeof(ufs1_daddr_t))) && (rb>0)); ind2ctr++) {
-			ind2ptr=&((ufs1_daddr_t *)(void *)&i2blk)[ind2ctr];
+		for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize,
+			sizeof(ufs1_daddr_t))) && (rb > 0)); ind2ctr++) {
+			ind2ptr = &((ufs1_daddr_t *)(void *)&i2blk)[ind2ctr];

-			if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, 
-				(size_t)sblock.fs_bsize) == -1) {
+			if (bread(&disk, fsbtodb(&sblock, *ind2ptr),
+			    (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) {
 				err(1, "bread: %s", disk.d_error);
 			}
 			snprintf(comment, sizeof(comment),
@ -425,15 +425,15 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 			    comment,
 			    i1blk,
 			    (size_t)rb);
-			rb-=howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t));
+			rb -= howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t));
 		}
 	}
-	if(rb>0) {
+	if (rb > 0) {
 		/*
 		 * Dump triple indirect blocks.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[2]), (void *)&i3blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp1->di_ib[2]),
+		    (void *)&i3blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 2",
@ -445,12 +445,12 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 		    howmany(rb,
 		      SQUARE(howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t)))));
 #undef SQUARE
-		for(ind3ctr=0; ((ind3ctr<howmany(sblock.fs_bsize,
-			sizeof(ufs1_daddr_t)))&&(rb>0)); ind3ctr++) {
-			ind3ptr=&((ufs1_daddr_t *)(void *)&i3blk)[ind3ctr];
+		for (ind3ctr = 0; ((ind3ctr < howmany(sblock.fs_bsize,
+			sizeof(ufs1_daddr_t))) && (rb > 0)); ind3ctr++) {
+			ind3ptr = &((ufs1_daddr_t *)(void *)&i3blk)[ind3ctr];

-			if (bread(&disk, fsbtodb(&sblock, *ind3ptr), (void *)&i2blk, 
-				(size_t)sblock.fs_bsize) == -1) {
+			if (bread(&disk, fsbtodb(&sblock, *ind3ptr),
+			    (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) {
 				err(1, "bread: %s", disk.d_error);
 			}
 			snprintf(comment, sizeof(comment),
@ -461,8 +461,8 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 			    i2blk,
 			    howmany(rb,
 			      howmany(sblock.fs_bsize, sizeof(ufs1_daddr_t))));
-			for(ind2ctr=0; ((ind2ctr < howmany(sblock.fs_bsize,
-			    sizeof(ufs1_daddr_t)))&&(rb>0)); ind2ctr++) {
+			for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize,
+			     sizeof(ufs1_daddr_t))) && (rb > 0)); ind2ctr++) {
 				ind2ptr=&((ufs1_daddr_t *)(void *)&i2blk)
 				    [ind2ctr];
 				if (bread(&disk, fsbtodb(&sblock, *ind2ptr),
@ -477,7 +477,7 @@ dump_whole_ufs1_inode(ino_t inode, int level)
 				    comment,
 				    i1blk,
 				    (size_t)rb);
-				rb-=howmany(sblock.fs_bsize,
+				rb -= howmany(sblock.fs_bsize,
 				    sizeof(ufs1_daddr_t));
 			}
 		}
@ -496,8 +496,8 @@ void
 dump_whole_ufs2_inode(ino_t inode, int level)
 {
 	DBG_FUNC("dump_whole_ufs2_inode")
-	struct ufs2_dinode	*ino;
-	int	rb, mode;
+	union dinodep dp;
+	int	rb;
 	unsigned int	ind2ctr, ind3ctr;
 	ufs2_daddr_t	*ind2ptr, *ind3ptr;
 	char	comment[80];
@ -507,10 +507,10 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 	/*
 	 * Read the inode from disk/cache.
 	 */
-	if (getino(&disk, (void **)&ino, inode, &mode) == -1)
-		err(1, "getino: %s", disk.d_error);
+	if (getinode(&disk, &dp, inode) == -1)
+		err(1, "getinode: %s", disk.d_error);

-	if (ino->di_nlink == 0) {
+	if (dp.dp2->di_nlink == 0) {
 		DBG_LEAVE;
 		return;	/* inode not in use */
 	}
@ -520,7 +520,7 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 	 */
 	snprintf(comment, sizeof(comment), "Inode 0x%08jx", (uintmax_t)inode);
 	if (level & 0x100) {
-		DBG_DUMP_INO(&sblock, comment, ino);
+		DBG_DUMP_INO(&sblock, comment, dp.dp2);
 	}

 	if (!(level & 0x200)) {
@ -531,13 +531,13 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 	/*
 	 * Ok, now prepare for dumping all direct and indirect pointers.
 	 */
-	rb = howmany(ino->di_size, sblock.fs_bsize) - UFS_NDADDR;
+	rb = howmany(dp.dp2->di_size, sblock.fs_bsize) - UFS_NDADDR;
 	if (rb > 0) {
 		/*
 		 * Dump single indirect block.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[0]), (void *)&i1blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[0]),
+		    (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 0",
@ -549,8 +549,8 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 		/*
 		 * Dump double indirect blocks.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[1]), (void *)&i2blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[1]),
+		    (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 1",
@ -563,8 +563,8 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 			sizeof(ufs2_daddr_t))) && (rb>0)); ind2ctr++) {
 			ind2ptr = &((ufs2_daddr_t *)(void *)&i2blk)[ind2ctr];

-			if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, 
-				(size_t)sblock.fs_bsize) == -1) {
+			if (bread(&disk, fsbtodb(&sblock, *ind2ptr),
+			    (void *)&i1blk, (size_t)sblock.fs_bsize) == -1) {
 				err(1, "bread: %s", disk.d_error);
 			}
 			snprintf(comment, sizeof(comment),
@ -578,8 +578,8 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 		/*
 		 * Dump triple indirect blocks.
 		 */
-		if (bread(&disk, fsbtodb(&sblock, ino->di_ib[2]), (void *)&i3blk, 
-			(size_t)sblock.fs_bsize) == -1) {
+		if (bread(&disk, fsbtodb(&sblock, dp.dp2->di_ib[2]),
+		    (void *)&i3blk, (size_t)sblock.fs_bsize) == -1) {
 			err(1, "bread: %s", disk.d_error);
 		}
 		snprintf(comment, sizeof(comment), "Inode 0x%08jx: indirect 2",
@ -595,8 +595,8 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 			sizeof(ufs2_daddr_t))) && (rb > 0)); ind3ctr++) {
 			ind3ptr = &((ufs2_daddr_t *)(void *)&i3blk)[ind3ctr];

-			if (bread(&disk, fsbtodb(&sblock, *ind3ptr), (void *)&i2blk, 
-				(size_t)sblock.fs_bsize) == -1) {
+			if (bread(&disk, fsbtodb(&sblock, *ind3ptr),
+			    (void *)&i2blk, (size_t)sblock.fs_bsize) == -1) {
 				err(1, "bread: %s", disk.d_error);
 			}
 			snprintf(comment, sizeof(comment),
@ -610,8 +610,9 @@ dump_whole_ufs2_inode(ino_t inode, int level)
 			for (ind2ctr = 0; ((ind2ctr < howmany(sblock.fs_bsize,
 				sizeof(ufs2_daddr_t))) && (rb > 0)); ind2ctr++) {
 				ind2ptr = &((ufs2_daddr_t *)(void *)&i2blk) [ind2ctr];
-				if (bread(&disk, fsbtodb(&sblock, *ind2ptr), (void *)&i1blk, 
-					(size_t)sblock.fs_bsize) == -1) {
+				if (bread(&disk, fsbtodb(&sblock, *ind2ptr),
+				    (void *)&i1blk, (size_t)sblock.fs_bsize)
+				    == -1) {
 					err(1, "bread: %s", disk.d_error);
 				}
 				snprintf(comment, sizeof(comment),
--- a/sbin/fsck_ffs/dir.c
+++ b/sbin/fsck_ffs/dir.c
@ -254,14 +254,14 @@ fileerror(ino_t cwd, ino_t ino, const char *errmesg)
 	char pathbuf[MAXPATHLEN + 1];

 	pwarn("%s ", errmesg);
-	pinode(ino);
-	printf("\n");
-	getpathname(pathbuf, cwd, ino);
 	if (ino < UFS_ROOTINO || ino > maxino) {
-		pfatal("NAME=%s\n", pathbuf);
+		pfatal("out-of-range inode number %ju", (uintmax_t)ino);
 		return;
 	}
 	dp = ginode(ino);
+	prtinode(ino, dp);
+	printf("\n");
+	getpathname(pathbuf, cwd, ino);
 	if (ftypeok(dp))
 		pfatal("%s=%s\n",
 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE",
@ -309,7 +309,7 @@ adjust(struct inodesc *idesc, int lcnt)
 	if (lcnt != 0) {
 		pwarn("LINK COUNT %s", (lfdir == idesc->id_number) ? lfname :
 			((DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE"));
-		pinode(idesc->id_number);
+		prtinode(idesc->id_number, dp);
 		printf(" COUNT %d SHOULD BE %d",
 			DIP(dp, di_nlink), DIP(dp, di_nlink) - lcnt);
 		if (preen || usedsoftdep) {
@ -390,7 +390,8 @@ linkup(ino_t orphan, ino_t parentdir, char *name)
 	dp = ginode(orphan);
 	lostdir = (DIP(dp, di_mode) & IFMT) == IFDIR;
 	pwarn("UNREF %s ", lostdir ? "DIR" : "FILE");
-	pinode(orphan);
+	prtinode(orphan, dp);
+	printf("\n");
 	if (preen && DIP(dp, di_size) == 0)
 		return (0);
 	if (cursnapshot != 0) {
--- a/sbin/fsck_ffs/fsck.h
+++ b/sbin/fsck_ffs/fsck.h
@ -463,8 +463,8 @@ void		pass4(void);
 int		pass4check(struct inodesc *);
 void		pass5(void);
 void		pfatal(const char *fmt, ...) __printflike(1, 2);
-void		pinode(ino_t ino);
 void		propagate(void);
+void		prtinode(ino_t ino, union dinode *dp);
 void		pwarn(const char *fmt, ...) __printflike(1, 2);
 int		readsb(int listerr);
 int		reply(const char *question);
--- a/sbin/fsck_ffs/gjournal.c
+++ b/sbin/fsck_ffs/gjournal.c
@ -392,13 +392,12 @@ clear_inode(struct ufs2_dinode *dino)
 void
 gjournal_check(const char *filesys)
 {
-	struct ufs2_dinode *dino;
-	void *p;
+	union dinodep dp;
 	struct cgchain *cgc;
 	struct cg *cgp;
 	uint8_t *inosused;
 	ino_t cino, ino;
-	int cg, mode;
+	int cg;

 	devnam = filesys;
 	opendisk();
@ -444,19 +443,20 @@ gjournal_check(const char *filesys)
 			/* Unallocated? Skip it. */
 			if (isclr(inosused, cino))
 				continue;
-			if (getino(diskp, &p, ino, &mode) == -1)
-				err(1, "getino(cg=%d ino=%ju)",
-				    cg, (uintmax_t)ino);
-			dino = p;
+			if (getinode(diskp, &dp, ino) == -1)
+				err(1, "getinode (cg=%d ino=%ju) %s",
+				    cg, (uintmax_t)ino, diskp->d_error);
 			/* Not a regular file nor directory? Skip it. */
-			if (!S_ISREG(dino->di_mode) && !S_ISDIR(dino->di_mode))
+			if (!S_ISREG(dp.dp2->di_mode) &&
+			    !S_ISDIR(dp.dp2->di_mode))
 				continue;
 			/* Has reference(s)? Skip it. */
-			if (dino->di_nlink > 0)
+			if (dp.dp2->di_nlink > 0)
 				continue;
-			//printf("Clearing inode=%d (size=%jd)\n", ino, (intmax_t)dino->di_size);
+			/* printf("Clearing inode=%d (size=%jd)\n", ino,
+			    (intmax_t)dp.dp2->di_size); */
 			/* Free inode's blocks. */
-			clear_inode(dino);
+			clear_inode(dp.dp2);
 			/* Deallocate it. */
 			clrbit(inosused, cino);
 			/* Update position of last used inode. */
@ -469,17 +469,17 @@ gjournal_check(const char *filesys)
 			cgp->cg_unrefs--;
 			fs->fs_unrefs--;
 			/* If this is directory, update related statistics. */
-			if (S_ISDIR(dino->di_mode)) {
+			if (S_ISDIR(dp.dp2->di_mode)) {
 				cgp->cg_cs.cs_ndir--;
 				fs->fs_cs(fs, cg).cs_ndir--;
 				fs->fs_cstotal.cs_ndir--;
 			}
 			/* Zero-fill the inode. */
-			*dino = ufs2_zino;
+			*dp.dp2 = ufs2_zino;
 			/* Write the inode back. */
-			if (putino(diskp) == -1)
-				err(1, "putino(cg=%d ino=%ju)",
-				    cg, (uintmax_t)ino);
+			if (putinode(diskp) == -1)
+				err(1, "putinode (cg=%d ino=%ju) %s",
+				    cg, (uintmax_t)ino, diskp->d_error);
 			if (cgp->cg_unrefs == 0) {
 				//printf("No more unreferenced inodes in cg=%d.\n", cg);
 				break;
--- a/sbin/fsck_ffs/inode.c
+++ b/sbin/fsck_ffs/inode.c
@ -49,6 +49,7 @@ __FBSDID("$FreeBSD$");
 #include <pwd.h>
 #include <string.h>
 #include <time.h>
+#include <libufs.h>

 #include "fsck.h"

@ -342,7 +343,11 @@ getnextinode(ino_t inumber, int rebuildcg)
 		nextinop = inobuf.b_un.b_buf;
 	}
 	dp = (union dinode *)nextinop;
-	if (rebuildcg && nextinop == inobuf.b_un.b_buf) {
+	if (sblock.fs_magic == FS_UFS1_MAGIC)
+		nextinop += sizeof(struct ufs1_dinode);
+	else
+		nextinop += sizeof(struct ufs2_dinode);
+	if (rebuildcg && (char *)dp == inobuf.b_un.b_buf) {
 		/*
 		 * Try to determine if we have reached the end of the
 		 * allocated inodes.
@ -355,7 +360,7 @@ getnextinode(ino_t inumber, int rebuildcg)
 				UFS_NIADDR * sizeof(ufs2_daddr_t)) ||
 			      dp->dp2.di_mode || dp->dp2.di_size)
 				return (NULL);
-			goto inodegood;
+			return (dp);
 		}
 		if (!ftypeok(dp))
 			return (NULL);
@ -389,11 +394,6 @@ getnextinode(ino_t inumber, int rebuildcg)
 			if (DIP(dp, di_ib[j]) != 0)
 				return (NULL);
 	}
-inodegood:
-	if (sblock.fs_magic == FS_UFS1_MAGIC)
-		nextinop += sizeof(struct ufs1_dinode);
-	else
-		nextinop += sizeof(struct ufs2_dinode);
 	return (dp);
 }

@ -534,7 +534,8 @@ clri(struct inodesc *idesc, const char *type, int flag)
 	if (flag == 1) {
 		pwarn("%s %s", type,
 		    (DIP(dp, di_mode) & IFMT) == IFDIR ? "DIR" : "FILE");
-		pinode(idesc->id_number);
+		prtinode(idesc->id_number, dp);
+		printf("\n");
 	}
 	if (preen || reply("CLEAR") == 1) {
 		if (preen)
@ -600,9 +601,8 @@ clearentry(struct inodesc *idesc)
 }

 void
-pinode(ino_t ino)
+prtinode(ino_t ino, union dinode *dp)
 {
-	union dinode *dp;
 	char *p;
 	struct passwd *pw;
 	time_t t;
@ -610,7 +610,6 @@ pinode(ino_t ino)
 	printf(" I=%lu ", (u_long)ino);
 	if (ino < UFS_ROOTINO || ino > maxino)
 		return;
-	dp = ginode(ino);
 	printf(" OWNER=");
 	if ((pw = getpwuid((int)DIP(dp, di_uid))) != NULL)
 		printf("%s ", pw->pw_name);
--- a/sbin/fsck_ffs/main.c
+++ b/sbin/fsck_ffs/main.c
@ -458,30 +458,40 @@ checkfilesys(char *filesys)
 	if (preen == 0 && yflag == 0 && sblock.fs_magic != FS_UFS1_MAGIC &&
 	    fswritefd != -1 && getosreldate() >= P_OSREL_CK_CYLGRP) {
 		if ((sblock.fs_metackhash & CK_CYLGRP) == 0 &&
-		    reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0)
+		    reply("ADD CYLINDER GROUP CHECK-HASH PROTECTION") != 0) {
 			ckhashadd |= CK_CYLGRP;
+			sblock.fs_metackhash |= CK_CYLGRP;
+		}
 		if ((sblock.fs_metackhash & CK_SUPERBLOCK) == 0 &&
 		    getosreldate() >= P_OSREL_CK_SUPERBLOCK &&
 		    reply("ADD SUPERBLOCK CHECK-HASH PROTECTION") != 0) {
+			ckhashadd |= CK_SUPERBLOCK;
 			sblock.fs_metackhash |= CK_SUPERBLOCK;
-			sbdirty();
 		}
 #ifdef notyet
 		if ((sblock.fs_metackhash & CK_INODE) == 0 &&
 		    getosreldate() >= P_OSREL_CK_INODE &&
-		    reply("ADD INODE CHECK-HASH PROTECTION") != 0)
+		    reply("ADD INODE CHECK-HASH PROTECTION") != 0) {
 			ckhashadd |= CK_INODE;
+			sblock.fs_metackhash |= CK_INODE;
+		}
 		if ((sblock.fs_metackhash & CK_INDIR) == 0 &&
 		    getosreldate() >= P_OSREL_CK_INDIR &&
-		    reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0)
+		    reply("ADD INDIRECT BLOCK CHECK-HASH PROTECTION") != 0) {
 			ckhashadd |= CK_INDIR;
+			sblock.fs_metackhash |= CK_INDIR;
+		}
 		if ((sblock.fs_metackhash & CK_DIR) == 0 &&
 		    getosreldate() >= P_OSREL_CK_DIR &&
-		    reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0)
+		    reply("ADD DIRECTORY CHECK-HASH PROTECTION") != 0) {
 			ckhashadd |= CK_DIR;
+			sblock.fs_metackhash |= CK_DIR;
+		}
 #endif /* notyet */
-		if (ckhashadd != 0)
+		if (ckhashadd != 0) {
 			sblock.fs_flags |= FS_METACKHASH;
+			sbdirty();
+		}
 	}
 	/*
 	 * Cleared if any questions answered no. Used to decide if
--- a/sbin/fsck_ffs/pass5.c
+++ b/sbin/fsck_ffs/pass5.c
@ -74,11 +74,8 @@ pass5(void)
 	memset(newcg, 0, (size_t)fs->fs_cgsize);
 	newcg->cg_niblk = fs->fs_ipg;
 	/* check to see if we are to add a cylinder group check hash */
-	if ((ckhashadd & CK_CYLGRP) != 0) {
-		fs->fs_metackhash |= CK_CYLGRP;
+	if ((ckhashadd & CK_CYLGRP) != 0)
 		rewritecg = 1;
-		sbdirty();
-	}
 	if (cvtlevel >= 3) {
 		if (fs->fs_maxcontig < 2 && fs->fs_contigsumsize > 0) {
 			if (preen)
--- a/sbin/fsirand/fsirand.c
+++ b/sbin/fsirand/fsirand.c
@ -175,7 +175,7 @@ fsirand(char *device)
 	}

 	/* For each cylinder group, randomize inodes and update backup sblock */
-	for (cg = 0, inumber = 0; cg < (int)sblock->fs_ncg; cg++) {
+	for (cg = 0, inumber = UFS_ROOTINO; cg < (int)sblock->fs_ncg; cg++) {
 		/* Read in inodes, then print or randomize generation nums */
 		dblk = fsbtodb(sblock, ino_to_fsba(sblock, inumber));
 		if (lseek(devfd, (off_t)dblk * bsize, SEEK_SET) < 0) {
@ -187,21 +187,22 @@ fsirand(char *device)
 			return (1);
 		}

-		for (n = 0; n < (int)sblock->fs_ipg; n++, inumber++) {
-			if (sblock->fs_magic == FS_UFS1_MAGIC)
-				dp1 = &((struct ufs1_dinode *)inodebuf)[n];
-			else
-				dp2 = &((struct ufs2_dinode *)inodebuf)[n];
-			if (inumber >= UFS_ROOTINO) {
-				if (printonly)
-					(void)printf("ino %ju gen %08x\n",
-					    (uintmax_t)inumber,
-					    sblock->fs_magic == FS_UFS1_MAGIC ?
-					    dp1->di_gen : dp2->di_gen);
-				else if (sblock->fs_magic == FS_UFS1_MAGIC) 
-					dp1->di_gen = random(); 
-				else
-					dp2->di_gen = random();
+		dp1 = (struct ufs1_dinode *)(void *)inodebuf;
+		dp2 = (struct ufs2_dinode *)(void *)inodebuf;
+		for (n = cg > 0 ? 0 : UFS_ROOTINO;
+		     n < (int)sblock->fs_ipg;
+		     n++, inumber++) {
+			if (printonly) {
+				(void)printf("ino %ju gen %08x\n",
+				    (uintmax_t)inumber,
+				    sblock->fs_magic == FS_UFS1_MAGIC ?
+				    dp1->di_gen : dp2->di_gen);
+			} else if (sblock->fs_magic == FS_UFS1_MAGIC) {
+				dp1->di_gen = arc4random(); 
+				dp1++;
+			} else {
+				dp2->di_gen = arc4random();
+				dp2++;
 			}
 		}

--- a/sbin/growfs/growfs.c
+++ b/sbin/growfs/growfs.c
@ -301,16 +301,21 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag)
 {
 	DBG_FUNC("initcg")
 	static caddr_t iobuf;
+	static long iobufsize;
 	long blkno, start;
 	ino_t ino;
 	ufs2_daddr_t i, cbase, dmax;
 	struct ufs1_dinode *dp1;
+	struct ufs2_dinode *dp2;
 	struct csum *cs;
 	uint j, d, dupper, dlower;

-	if (iobuf == NULL && (iobuf = malloc(sblock.fs_bsize * 3)) == NULL)
-		errx(37, "panic: cannot allocate I/O buffer");
-
+	if (iobuf == NULL) {
+		iobufsize = 2 * sblock.fs_bsize;
+		if ((iobuf = malloc(iobufsize)) == NULL)
+			errx(37, "panic: cannot allocate I/O buffer");
+		memset(iobuf, '\0', iobufsize);
+	}
 	/*
 	 * Determine block bounds for cylinder group.
 	 * Allow space for super block summary information in first
@ -374,13 +379,30 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag)
 			setbit(cg_inosused(&acg), ino);
 			acg.cg_cs.cs_nifree--;
 		}
+	/*
+	 * Initialize the initial inode blocks.
+	 */
+	dp1 = (struct ufs1_dinode *)(void *)iobuf;
+	dp2 = (struct ufs2_dinode *)(void *)iobuf;
+	for (i = 0; i < acg.cg_initediblk; i++) {
+		if (sblock.fs_magic == FS_UFS1_MAGIC) {
+			dp1->di_gen = arc4random();
+			dp1++;
+		} else {
+			dp2->di_gen = arc4random();
+			dp2++;
+		}
+	}
+	wtfs(fsbtodb(&sblock, cgimin(&sblock, cylno)), iobufsize, iobuf,
+	    fso, Nflag);
 	/*
 	 * For the old file system, we have to initialize all the inodes.
 	 */
-	if (sblock.fs_magic == FS_UFS1_MAGIC) {
-		bzero(iobuf, sblock.fs_bsize);
-		for (i = 0; i < sblock.fs_ipg / INOPF(&sblock);
-		    i += sblock.fs_frag) {
+	if (sblock.fs_magic == FS_UFS1_MAGIC &&
+	    sblock.fs_ipg > 2 * INOPB(&sblock)) {
+		for (i = 2 * sblock.fs_frag;
+		     i < sblock.fs_ipg / INOPF(&sblock);
+		     i += sblock.fs_frag) {
 			dp1 = (struct ufs1_dinode *)(void *)iobuf;
 			for (j = 0; j < INOPB(&sblock); j++) {
 				dp1->di_gen = arc4random();
@ -463,12 +485,8 @@ initcg(int cylno, time_t modtime, int fso, unsigned int Nflag)
 	*cs = acg.cg_cs;

 	cgckhash(&acg);
-	memcpy(iobuf, &acg, sblock.fs_cgsize);
-	memset(iobuf + sblock.fs_cgsize, '\0',
-	    sblock.fs_bsize * 3 - sblock.fs_cgsize);
-
-	wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)),
-	    sblock.fs_bsize * 3, iobuf, fso, Nflag);
+	wtfs(fsbtodb(&sblock, cgtod(&sblock, cylno)), sblock.fs_cgsize, &acg,
+	    fso, Nflag);
 	DBG_DUMP_CG(&sblock, "new cg", &acg);

 	DBG_LEAVE;
--- a/sbin/ipfw/ipfw.8
+++ b/sbin/ipfw/ipfw.8
@ -1,7 +1,7 @@
 .\"
 .\" $FreeBSD$
 .\"
-.Dd November 12, 2018
+.Dd November 13, 2018
 .Dt IPFW 8
 .Os
 .Sh NAME
@ -105,16 +105,6 @@ in-kernel NAT.
 .Ar number
 .Cm config
 .Ar config-options
-.Pp
-.Nm
-.Op Fl cfnNqS
-.Oo
-.Fl p Ar preproc
-.Oo
-.Ar preproc-flags
-.Oc
-.Oc
-.Ar pathname
 .Ss STATEFUL IPv6/IPv4 NETWORK ADDRESS AND PROTOCOL TRANSLATION
 .Nm
 .Oo Cm set Ar N Oc Cm nat64lsn Ar name Cm create Ar create-options
@ -166,6 +156,16 @@ in-kernel NAT.
 .Cm internal talist
 .Nm
 .Cm internal vlist
+.Ss LIST OF RULES AND PREPROCESSING
+.Nm
+.Op Fl cfnNqS
+.Oo
+.Fl p Ar preproc
+.Oo
+.Ar preproc-flags
+.Oc
+.Oc
+.Ar pathname
 .Sh DESCRIPTION
 The
 .Nm
--- a/sbin/newfs/mkfs.c
+++ b/sbin/newfs/mkfs.c
@ -1029,7 +1029,7 @@ goth:
 void
 iput(union dinode *ip, ino_t ino)
 {
-	ufs2_daddr_t d;
+	union dinodep dp;

 	bread(&disk, part_ofs + fsbtodb(&sblock, cgtod(&sblock, 0)), (char *)&acg,
 	    sblock.fs_cgsize);
@ -1043,20 +1043,15 @@ iput(union dinode *ip, ino_t ino)
 		err(1, "iput: cgput: %s", disk.d_error);
 	sblock.fs_cstotal.cs_nifree--;
 	fscs[0].cs_nifree--;
-	if (ino >= (unsigned long)sblock.fs_ipg * sblock.fs_ncg) {
-		printf("fsinit: inode value out of range (%ju).\n",
-		    (uintmax_t)ino);
+	if (getinode(&disk, &dp, ino) == -1) {
+		printf("iput: %s\n", disk.d_error);
 		exit(32);
 	}
-	d = fsbtodb(&sblock, ino_to_fsba(&sblock, ino));
-	bread(&disk, part_ofs + d, (char *)iobuf, sblock.fs_bsize);
 	if (sblock.fs_magic == FS_UFS1_MAGIC)
-		((struct ufs1_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] =
-		    ip->dp1;
+		*dp.dp1 = ip->dp1;
 	else
-		((struct ufs2_dinode *)iobuf)[ino_to_fsbo(&sblock, ino)] =
-		    ip->dp2;
-	wtfs(d, sblock.fs_bsize, (char *)iobuf);
+		*dp.dp2 = ip->dp2;
+	putinode(&disk);
 }

 /*
--- a/sbin/tunefs/tunefs.c
+++ b/sbin/tunefs/tunefs.c
@ -679,41 +679,36 @@ dir_search(ufs2_daddr_t blk, int bytes)
 static ino_t
 journal_findfile(void)
 {
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
+	union dinodep dp;
 	ino_t ino;
-	int mode;
-	void *ip;
 	int i;

-	if (getino(&disk, &ip, UFS_ROOTINO, &mode) != 0) {
-		warn("Failed to get root inode");
+	if (getinode(&disk, &dp, UFS_ROOTINO) != 0) {
+		warn("Failed to get root inode: %s", disk.d_error);
 		return (-1);
 	}
-	dp2 = ip;
-	dp1 = ip;
 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
-		if ((off_t)dp1->di_size >= lblktosize(&sblock, UFS_NDADDR)) {
+		if ((off_t)dp.dp1->di_size >= lblktosize(&sblock, UFS_NDADDR)) {
 			warnx("UFS_ROOTINO extends beyond direct blocks.");
 			return (-1);
 		}
 		for (i = 0; i < UFS_NDADDR; i++) {
-			if (dp1->di_db[i] == 0)
+			if (dp.dp1->di_db[i] == 0)
 				break;
-			if ((ino = dir_search(dp1->di_db[i],
-			    sblksize(&sblock, (off_t)dp1->di_size, i))) != 0)
+			if ((ino = dir_search(dp.dp1->di_db[i],
+			    sblksize(&sblock, (off_t)dp.dp1->di_size, i))) != 0)
 				return (ino);
 		}
 	} else {
-		if ((off_t)dp2->di_size >= lblktosize(&sblock, UFS_NDADDR)) {
+		if ((off_t)dp.dp2->di_size >= lblktosize(&sblock, UFS_NDADDR)) {
 			warnx("UFS_ROOTINO extends beyond direct blocks.");
 			return (-1);
 		}
 		for (i = 0; i < UFS_NDADDR; i++) {
-			if (dp2->di_db[i] == 0)
+			if (dp.dp2->di_db[i] == 0)
 				break;
-			if ((ino = dir_search(dp2->di_db[i],
-			    sblksize(&sblock, (off_t)dp2->di_size, i))) != 0)
+			if ((ino = dir_search(dp.dp2->di_db[i],
+			    sblksize(&sblock, (off_t)dp.dp2->di_size, i))) != 0)
 				return (ino);
 		}
 	}
@ -795,23 +790,18 @@ dir_extend(ufs2_daddr_t blk, ufs2_daddr_t nblk, off_t size, ino_t ino)
 static int
 journal_insertfile(ino_t ino)
 {
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
-	void *ip;
+	union dinodep dp;
 	ufs2_daddr_t nblk;
 	ufs2_daddr_t blk;
 	ufs_lbn_t lbn;
 	int size;
-	int mode;
 	int off;

-	if (getino(&disk, &ip, UFS_ROOTINO, &mode) != 0) {
-		warn("Failed to get root inode");
+	if (getinode(&disk, &dp, UFS_ROOTINO) != 0) {
+		warn("Failed to get root inode: %s", disk.d_error);
 		sbdirty();
 		return (-1);
 	}
-	dp2 = ip;
-	dp1 = ip;
 	blk = 0;
 	size = 0;
 	nblk = journal_balloc();
@ -824,15 +814,15 @@ journal_insertfile(ino_t ino)
 	 * have to free them and extend the block.
 	 */
 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
-		lbn = lblkno(&sblock, dp1->di_size);
-		off = blkoff(&sblock, dp1->di_size);
-		blk = dp1->di_db[lbn];
-		size = sblksize(&sblock, (off_t)dp1->di_size, lbn);
+		lbn = lblkno(&sblock, dp.dp1->di_size);
+		off = blkoff(&sblock, dp.dp1->di_size);
+		blk = dp.dp1->di_db[lbn];
+		size = sblksize(&sblock, (off_t)dp.dp1->di_size, lbn);
 	} else {
-		lbn = lblkno(&sblock, dp2->di_size);
-		off = blkoff(&sblock, dp2->di_size);
-		blk = dp2->di_db[lbn];
-		size = sblksize(&sblock, (off_t)dp2->di_size, lbn);
+		lbn = lblkno(&sblock, dp.dp2->di_size);
+		off = blkoff(&sblock, dp.dp2->di_size);
+		blk = dp.dp2->di_db[lbn];
+		size = sblksize(&sblock, (off_t)dp.dp2->di_size, lbn);
 	}
 	if (off != 0) {
 		if (dir_extend(blk, nblk, off, ino) == -1)
@ -843,16 +833,16 @@ journal_insertfile(ino_t ino)
 			return (-1);
 	}
 	if (sblock.fs_magic == FS_UFS1_MAGIC) {
-		dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
-		dp1->di_db[lbn] = nblk;
-		dp1->di_size = lblktosize(&sblock, lbn+1);
+		dp.dp1->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+		dp.dp1->di_db[lbn] = nblk;
+		dp.dp1->di_size = lblktosize(&sblock, lbn+1);
 	} else {
-		dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
-		dp2->di_db[lbn] = nblk;
-		dp2->di_size = lblktosize(&sblock, lbn+1);
+		dp.dp2->di_blocks += (sblock.fs_bsize - size) / DEV_BSIZE;
+		dp.dp2->di_db[lbn] = nblk;
+		dp.dp2->di_size = lblktosize(&sblock, lbn+1);
 	}
-	if (putino(&disk) < 0) {
-		warn("Failed to write root inode");
+	if (putinode(&disk) < 0) {
+		warn("Failed to write root inode: %s", disk.d_error);
 		return (-1);
 	}
 	if (cgwrite(&disk) < 0) {
@ -916,11 +906,8 @@ indir_fill(ufs2_daddr_t blk, int level, int *resid)
 static void
 journal_clear(void)
 {
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
+	union dinodep dp;
 	ino_t ino;
-	int mode;
-	void *ip;

 	ino = journal_findfile();
 	if (ino == (ino_t)-1 || ino == 0) {
@ -928,18 +915,16 @@ journal_clear(void)
 		return;
 	}
 	printf("Clearing journal flags from inode %ju\n", (uintmax_t)ino);
-	if (getino(&disk, &ip, ino, &mode) != 0) {
-		warn("Failed to get journal inode");
+	if (getinode(&disk, &dp, ino) != 0) {
+		warn("Failed to get journal inode: %s", disk.d_error);
 		return;
 	}
-	dp2 = ip;
-	dp1 = ip;
 	if (sblock.fs_magic == FS_UFS1_MAGIC)
-		dp1->di_flags = 0;
+		dp.dp1->di_flags = 0;
 	else
-		dp2->di_flags = 0;
-	if (putino(&disk) < 0) {
-		warn("Failed to write journal inode");
+		dp.dp2->di_flags = 0;
+	if (putinode(&disk) < 0) {
+		warn("Failed to write journal inode: %s", disk.d_error);
 		return;
 	}
 }
@ -947,15 +932,12 @@ journal_clear(void)
 static int
 journal_alloc(int64_t size)
 {
-	struct ufs1_dinode *dp1;
-	struct ufs2_dinode *dp2;
+	union dinodep dp;
 	ufs2_daddr_t blk;
-	void *ip;
 	struct cg *cgp;
 	int resid;
 	ino_t ino;
 	int blks;
-	int mode;
 	time_t utime;
 	int i;

@ -1007,8 +989,8 @@ journal_alloc(int64_t size)
 			break;
 		printf("Using inode %ju in cg %d for %jd byte journal\n",
 		    (uintmax_t)ino, cgp->cg_cgx, size);
-		if (getino(&disk, &ip, ino, &mode) != 0) {
-			warn("Failed to get allocated inode");
+		if (getinode(&disk, &dp, ino) != 0) {
+			warn("Failed to get allocated inode: %s", disk.d_error);
 			sbdirty();
 			goto out;
 		}
@ -1017,39 +999,39 @@ journal_alloc(int64_t size)
 		 * blocks and size uninitialized.  This causes legacy
 		 * fsck implementations to clear the inode.
 		 */
-		dp2 = ip;
-		dp1 = ip;
 		time(&utime);
 		if (sblock.fs_magic == FS_UFS1_MAGIC) {
-			bzero(dp1, sizeof(*dp1));
-			dp1->di_size = size;
-			dp1->di_mode = IFREG | IREAD;
-			dp1->di_nlink = 1;
-			dp1->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
-			dp1->di_atime = utime;
-			dp1->di_mtime = utime;
-			dp1->di_ctime = utime;
+			bzero(dp.dp1, sizeof(*dp.dp1));
+			dp.dp1->di_size = size;
+			dp.dp1->di_mode = IFREG | IREAD;
+			dp.dp1->di_nlink = 1;
+			dp.dp1->di_flags =
+			    SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
+			dp.dp1->di_atime = utime;
+			dp.dp1->di_mtime = utime;
+			dp.dp1->di_ctime = utime;
 		} else {
-			bzero(dp2, sizeof(*dp2));
-			dp2->di_size = size;
-			dp2->di_mode = IFREG | IREAD;
-			dp2->di_nlink = 1;
-			dp2->di_flags = SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
-			dp2->di_atime = utime;
-			dp2->di_mtime = utime;
-			dp2->di_ctime = utime;
-			dp2->di_birthtime = utime;
+			bzero(dp.dp2, sizeof(*dp.dp2));
+			dp.dp2->di_size = size;
+			dp.dp2->di_mode = IFREG | IREAD;
+			dp.dp2->di_nlink = 1;
+			dp.dp2->di_flags =
+			    SF_IMMUTABLE | SF_NOUNLINK | UF_NODUMP;
+			dp.dp2->di_atime = utime;
+			dp.dp2->di_mtime = utime;
+			dp.dp2->di_ctime = utime;
+			dp.dp2->di_birthtime = utime;
 		}
 		for (i = 0; i < UFS_NDADDR && resid; i++, resid--) {
 			blk = journal_balloc();
 			if (blk <= 0)
 				goto out;
 			if (sblock.fs_magic == FS_UFS1_MAGIC) {
-				dp1->di_db[i] = blk;
-				dp1->di_blocks++;
+				dp.dp1->di_db[i] = blk;
+				dp.dp1->di_blocks++;
 			} else {
-				dp2->di_db[i] = blk;
-				dp2->di_blocks++;
+				dp.dp2->di_db[i] = blk;
+				dp.dp2->di_blocks++;
 			}
 		}
 		for (i = 0; i < UFS_NIADDR && resid; i++) {
@ -1062,19 +1044,20 @@ journal_alloc(int64_t size)
 				goto out;
 			}
 			if (sblock.fs_magic == FS_UFS1_MAGIC) {
-				dp1->di_ib[i] = blk;
-				dp1->di_blocks += blks;
+				dp.dp1->di_ib[i] = blk;
+				dp.dp1->di_blocks += blks;
 			} else {
-				dp2->di_ib[i] = blk;
-				dp2->di_blocks += blks;
+				dp.dp2->di_ib[i] = blk;
+				dp.dp2->di_blocks += blks;
 			}
 		}
 		if (sblock.fs_magic == FS_UFS1_MAGIC)
-			dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize;
+			dp.dp1->di_blocks *= sblock.fs_bsize / disk.d_bsize;
 		else
-			dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize;
-		if (putino(&disk) < 0) {
-			warn("Failed to write inode");
+			dp.dp2->di_blocks *= sblock.fs_bsize / disk.d_bsize;
+		if (putinode(&disk) < 0) {
+			warn("Failed to write allocated inode: %s",
+			    disk.d_error);
 			sbdirty();
 			return (-1);
 		}
--- a/share/mk/bsd.linker.mk
+++ b/share/mk/bsd.linker.mk
@ -86,9 +86,6 @@ ${X_}LINKER_FEATURES=
 ${X_}LINKER_FEATURES+=	build-id
 ${X_}LINKER_FEATURES+=	ifunc
 .endif
-.if ${${X_}LINKER_TYPE} != "lld" || ${${X_}LINKER_VERSION} >= 50000
-${X_}LINKER_FEATURES+=	filter
-.endif
 .if ${${X_}LINKER_TYPE} == "lld" && ${${X_}LINKER_VERSION} >= 60000
 ${X_}LINKER_FEATURES+=	retpoline
 .endif
--- a/share/mk/src.libnames.mk
+++ b/share/mk/src.libnames.mk
@ -75,6 +75,7 @@ _LIBRARIES=	\
 		cam \
 		casper \
 		cap_dns \
+		cap_fileargs \
 		cap_grp \
 		cap_pwd \
 		cap_random \
@ -238,6 +239,7 @@ _DP_cam=	sbuf
 _DP_kvm=	elf
 _DP_casper=	nv
 _DP_cap_dns=	nv
+_DP_cap_fileargs=	nv
 _DP_cap_grp=	nv
 _DP_cap_pwd=	nv
 _DP_cap_random=	nv
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@ -1581,6 +1581,7 @@ hammer_time(u_int64_t modulep, u_int64_t physfree)

 	identify_cpu1();
 	identify_hypervisor();
+	identify_cpu_fixup_bsp();
 	identify_cpu2();
 	initializecpucache();

--- a/sys/amd64/conf/GENERIC
+++ b/sys/amd64/conf/GENERIC
@ -372,3 +372,8 @@ device		vmx			# VMware VMXNET3 Ethernet

 # Netmap provides direct access to TX/RX rings on supported NICs
 device		netmap			# netmap(4) support
+
+# evdev interface
+options 	EVDEV_SUPPORT		# evdev support in legacy drivers
+device		evdev			# input event device support
+device		uinput			# install /dev/uinput cdev
--- a/sys/amd64/conf/MINIMAL
+++ b/sys/amd64/conf/MINIMAL
@ -147,3 +147,8 @@ device		bpf			# Berkeley packet filter
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
+
+# evdev interface
+options 	EVDEV_SUPPORT		# evdev support in legacy drivers
+device		evdev			# input event device support
+device		uinput			# install /dev/uinput cdev
--- a/sys/amd64/conf/NOTES
+++ b/sys/amd64/conf/NOTES
@ -641,6 +641,11 @@ options 	LINPROCFS
 # and PSEUDOFS)
 options 	LINSYSFS

+#####################################################################
+# ZFS support
+
+options		ZFS
+
 #####################################################################
 # VM OPTIONS

--- a/sys/compat/freebsd32/Makefile
+++ b/sys/compat/freebsd32/Makefile
@ -11,7 +11,7 @@ all:
 sysent:  freebsd32_sysent.c freebsd32_syscall.h freebsd32_proto.h freebsd32_systrace_args.c

 freebsd32_sysent.c freebsd32_syscalls.c freebsd32_syscall.h freebsd32_proto.h freebsd32_systrace_args.c : \
-	    ../../kern/makesyscalls.sh syscalls.master syscalls.conf capabilities.conf
+	    ../../kern/makesyscalls.sh syscalls.master syscalls.conf ../../kern/capabilities.conf
 	sh ../../kern/makesyscalls.sh syscalls.master syscalls.conf

 clean:
--- a/sys/compat/freebsd32/capabilities.conf
+++ b/sys/compat/freebsd32/capabilities.conf
@ -1,298 +0,0 @@
-##
-## Copyright (c) 2008-2010 Robert N. M. Watson
-## Copyright (c) 2016 The FreeBSD Foundation
-## All rights reserved.
-##
-## This software was developed at the University of Cambridge Computer
-## Laboratory with support from a grant from Google, Inc.
-##
-## Portions of this software were developed by Konstantin Belousov
-## under sponsorship from the FreeBSD Foundation.
-##
-## Redistribution and use in source and binary forms, with or without
-## modification, are permitted provided that the following conditions
-## are met:
-## 1. Redistributions of source code must retain the above copyright
-##    notice, this list of conditions and the following disclaimer.
-## 2. Redistributions in binary form must reproduce the above copyright
-##    notice, this list of conditions and the following disclaimer in the
-##    documentation and/or other materials provided with the distribution.
-##
-## THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
-## ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
-## ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
-## FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
-## DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
-## OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-## HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
-## LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
-## OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
-## SUCH DAMAGE.
-##
-## List of system calls enabled in freebsd32 capability mode, one name
-## per line.  See the original list in the sys/kern/capabilities.conf.
-## Position of the compat syscall in this file must be identical to
-## the master, to facilitate comparision and diagnostic.
-##
-## $FreeBSD$
-##
-
-__acl_aclcheck_fd
-__acl_delete_fd
-__acl_get_fd
-__acl_set_fd
-__mac_get_fd
-#__mac_get_pid
-__mac_get_proc
-__mac_set_fd
-__mac_set_proc
-freebsd32___sysctl
-freebsd32__umtx_op
-abort2
-accept
-accept4
-aio_cancel
-freebsd32_aio_error
-freebsd32_aio_fsync
-freebsd32_aio_read
-freebsd32_aio_return
-freebsd32_aio_suspend
-freebsd32_aio_waitcomplete
-freebsd32_aio_write
-#audit
-bindat
-cap_enter
-cap_fcntls_get
-cap_fcntls_limit
-cap_getmode
-freebsd32_cap_ioctls_get
-freebsd32_cap_ioctls_limit
-__cap_rights_get
-cap_rights_limit
-freebsd32_clock_getres
-freebsd32_clock_gettime
-close
-closefrom
-connectat
-#cpuset
-freebsd32_cpuset_getaffinity
-#freebsd32_cpuset_getid
-freebsd32_cpuset_setaffinity
-#freebsd32_cpuset_setid
-dup
-dup2
-extattr_delete_fd
-extattr_get_fd
-extattr_list_fd
-extattr_set_fd
-fchflags
-fchmod
-fchown
-freebsd32_fcntl
-freebsd32_fexecve
-flock
-fork
-fpathconf
-freebsd32_fstat
-freebsd32_fstatat
-freebsd32_getdirentries
-freebsd32_fstatfs
-freebsd32_mknodat
-freebsd32_ftruncate
-freebsd32_lseek
-freebsd32_mmap
-mmap
-freebsd32_pread
-freebsd32_pwrite
-freebsd32_fstat
-fstatfs
-fsync
-ftruncate
-freebsd32_ftruncate
-freebsd32_futimens
-freebsd32_futimes
-getaudit
-getaudit_addr
-getauid
-freebsd32_getcontext
-freebsd32_getdents
-freebsd32_getdirentries
-getdirentries
-getdomainname
-getdtablesize
-getegid
-geteuid
-gethostid
-gethostname
-freebsd32_getitimer
-getgid
-getgroups
-getlogin
-freebsd32_getpagesize
-getpeername
-getpgid
-getpgrp
-getpid
-getppid
-getpriority
-getrandom
-getresgid
-getresuid
-getrlimit
-freebsd32_getrusage
-getsid
-getsockname
-getsockopt
-freebsd32_gettimeofday
-getuid
-freebsd32_ioctl
-issetugid
-freebsd32_kevent
-kill
-freebsd32_kmq_notify
-freebsd32_kmq_setattr
-freebsd32_kmq_timedreceive
-freebsd32_kmq_timedsend
-kqueue
-freebsd32_ktimer_create
-ktimer_delete
-ktimer_getoverrun
-freebsd32_ktimer_gettime
-freebsd32_ktimer_settime
-#ktrace
-freebsd32_lio_listio
-listen
-freebsd32_lseek
-madvise
-mincore
-minherit
-mlock
-mlockall
-freebsd32_mmap
-freebsd32_mprotect
-msync
-munlock
-munlockall
-munmap
-freebsd32_nanosleep
-ntp_gettime
-freebsd6_freebsd32_aio_read
-freebsd6_freebsd32_aio_write
-break
-freebsd6_freebsd32_lio_listio
-chflagsat
-faccessat
-fchmodat
-fchownat
-freebsd32_fstatat
-freebsd32_futimesat
-linkat
-mkdirat
-mkfifoat
-mknodat
-openat
-readlinkat
-renameat
-symlinkat
-unlinkat
-freebsd32_utimensat
-pdfork
-pdgetpid
-pdkill
-#pdwait4	# not yet implemented
-freebsd32_pipe
-pipe2
-poll
-freebsd32_ppoll
-freebsd32_posix_fallocate
-freebsd32_pread
-freebsd32_preadv
-profil
-#ptrace
-freebsd32_pwrite
-freebsd32_pwritev
-read
-freebsd32_readv
-freebsd6_freebsd32_recv
-freebsd32_recvfrom
-freebsd32_recvmsg
-rtprio
-rtprio_thread
-sbrk
-sched_get_priority_max
-sched_get_priority_min
-sched_getparam
-sched_getscheduler
-freebsd32_sched_rr_get_interval
-sched_setparam
-sched_setscheduler
-sched_yield
-sctp_generic_recvmsg
-sctp_generic_sendmsg
-sctp_generic_sendmsg_iov
-sctp_peeloff
-freebsd32_pselect
-freebsd32_select
-freebsd6_freebsd32_send
-freebsd32_sendfile
-freebsd32_sendmsg
-sendto
-setaudit
-setaudit_addr
-setauid
-freebsd32_setcontext
-setegid
-seteuid
-setgid
-freebsd32_setitimer
-setpriority
-setregid
-setresgid
-setresuid
-setreuid
-setrlimit
-setsid
-setsockopt
-setuid
-shm_open
-shutdown
-freebsd32_sigaction
-freebsd32_sigaltstack
-freebsd32_sigblock
-freebsd32_sigpending
-sigpending
-freebsd32_sigprocmask
-sigprocmask
-freebsd32_sigqueue
-sigqueue
-freebsd32_sigreturn
-freebsd32_sigsetmask
-freebsd32_sigstack
-freebsd32_sigsuspend
-sigsuspend
-freebsd32_sigtimedwait
-freebsd32_sigvec
-freebsd32_sigwaitinfo
-sigwait
-socket
-socketpair
-sstk
-sync
-sys_exit
-freebsd32_sysarch
-thr_create
-thr_exit
-thr_kill
-#thr_kill2
-freebsd32_thr_new
-thr_self
-thr_set_name
-freebsd32_thr_suspend
-thr_wake
-umask
-utrace
-uuidgen
-write
-freebsd32_writev
-yield
--- a/sys/compat/freebsd32/syscalls.conf
+++ b/sys/compat/freebsd32/syscalls.conf
@ -9,3 +9,5 @@ syscallprefix="FREEBSD32_SYS_"
 switchname="freebsd32_sysent"
 namesname="freebsd32_syscallnames"
 systrace="freebsd32_systrace_args.c"
+abi_func_prefix="freebsd32_"
+capabilities_conf="../../kern/capabilities.conf"
--- a/sys/conf/dtb.mk
+++ b/sys/conf/dtb.mk
@ -55,21 +55,21 @@ DTBO=${DTSO:R:S/$/.dtbo/}
 all: ${DTB} ${DTBO}

 .if defined(DTS)
-.export DTC
+.export DTC ECHO
 .for _dts in ${DTS}
 ${_dts:R:S/$/.dtb/}:	${_dts} ${OP_META}
 	@${ECHO} Generating ${.TARGET} from ${_dts}
-	@env ECHO=${ECHO} ${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${_dts} ${.OBJDIR}
+	@${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${_dts} ${.OBJDIR}
 CLEANFILES+=${_dts:R:S/$/.dtb/}
 .endfor
 .endif

 .if defined(DTSO)
-.export DTC
+.export DTC ECHO
 .for _dtso in ${DTSO}
 ${_dtso:R:S/$/.dtbo/}:	${_dtso} ${OP_META}
 	@${ECHO} Generating ${.TARGET} from ${_dtso}
-	@env ECHO=${ECHO} ${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} overlays/${_dtso} ${.OBJDIR}
+	@${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} overlays/${_dtso} ${.OBJDIR}
 CLEANFILES+=${_dtso:R:S/$/.dtbo/}
 .endfor
 .endif
--- a/sys/contrib/libnv/nvpair.c
+++ b/sys/contrib/libnv/nvpair.c
@ -1276,11 +1276,6 @@ nvpair_create_descriptor(const char *name, int value)
 {
 	nvpair_t *nvp;

-	if (value < 0 || !fd_is_valid(value)) {
-		ERRNO_SET(EBADF);
-		return (NULL);
-	}
-
 	value = fcntl(value, F_DUPFD_CLOEXEC, 0);
 	if (value < 0)
 		return (NULL);
@ -1517,11 +1512,6 @@ nvpair_create_descriptor_array(const char *name, const int *value,
 		if (value[ii] == -1) {
 			fds[ii] = -1;
 		} else {
-			if (!fd_is_valid(value[ii])) {
-				ERRNO_SET(EBADF);
-				goto fail;
-			}
-
 			fds[ii] = fcntl(value[ii], F_DUPFD_CLOEXEC, 0);
 			if (fds[ii] == -1)
 				goto fail;
@ -2035,10 +2025,6 @@ nvpair_append_descriptor_array(nvpair_t *nvp, const int value)

 	NVPAIR_ASSERT(nvp);
 	PJDLOG_ASSERT(nvp->nvp_type == NV_TYPE_DESCRIPTOR_ARRAY);
-	if (value < 0 || !fd_is_valid(value)) {
-		ERRNO_SET(EBADF);
-		return -1;
-	}
 	fd = fcntl(value, F_DUPFD_CLOEXEC, 0);
 	if (fd == -1) {
 		return (-1);
--- a/sys/dev/amdsmn/amdsmn.c
+++ b/sys/dev/amdsmn/amdsmn.c
@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/systm.h>

 #include <machine/cpufunc.h>
+#include <machine/cputypes.h>
 #include <machine/md_var.h>
 #include <machine/specialreg.h>

@ -53,14 +54,21 @@ __FBSDID("$FreeBSD$");
 #define	SMN_ADDR_REG	0x60
 #define	SMN_DATA_REG	0x64

+#define	PCI_DEVICE_ID_AMD_17H_ROOT		0x1450
+#define	PCI_DEVICE_ID_AMD_17H_ROOT_DF_F3	0x1463
+#define	PCI_DEVICE_ID_AMD_17H_M10H_ROOT		0x15d0
+#define	PCI_DEVICE_ID_AMD_17H_M10H_ROOT_DF_F3	0x15eb
+
 struct amdsmn_softc {
 	struct mtx smn_lock;
 };

 static struct pciid {
-	uint32_t	device_id;
+	uint16_t	amdsmn_vendorid;
+	uint16_t	amdsmn_deviceid;
 } amdsmn_ids[] = {
-	{ 0x14501022 },
+	{ CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_ROOT },
+	{ CPU_VENDOR_AMD, PCI_DEVICE_ID_AMD_17H_M10H_ROOT },
 };

 /*
@ -89,18 +97,21 @@ static driver_t amdsmn_driver = {
 static devclass_t amdsmn_devclass;
 DRIVER_MODULE(amdsmn, hostb, amdsmn_driver, amdsmn_devclass, NULL, NULL);
 MODULE_VERSION(amdsmn, 1);
-MODULE_PNP_INFO("W32:vendor/device", pci, amdsmn, amdsmn_ids,
+MODULE_PNP_INFO("U16:vendor;U16:device", pci, amdsmn, amdsmn_ids,
    nitems(amdsmn_ids));

 static bool
 amdsmn_match(device_t parent)
 {
-	uint32_t devid;
+	uint16_t vendor, device;
 	size_t i;

-	devid = pci_get_devid(parent);
+	vendor = pci_get_vendor(parent);
+	device = pci_get_device(parent);
+
 	for (i = 0; i < nitems(amdsmn_ids); i++)
-		if (amdsmn_ids[i].device_id == devid)
+		if (vendor == amdsmn_ids[i].amdsmn_vendorid &&
+		    device == amdsmn_ids[i].amdsmn_deviceid)
 			return (true);
 	return (false);
 }
--- a/sys/dev/amdtemp/amdtemp.c
+++ b/sys/dev/amdtemp/amdtemp.c
@ -86,7 +86,10 @@ struct amdtemp_softc {
 #define	DEVICEID_AMD_MISC16	0x1533
 #define	DEVICEID_AMD_MISC16_M30H	0x1583
 #define	DEVICEID_AMD_MISC17	0x141d
-#define	DEVICEID_AMD_HOSTB17H	0x1450
+#define	DEVICEID_AMD_HOSTB17H_ROOT	0x1450
+#define	DEVICEID_AMD_HOSTB17H_DF_F3	0x1463
+#define	DEVICEID_AMD_HOSTB17H_M10H_ROOT	0x15d0
+#define	DEVICEID_AMD_HOSTB17H_M10H_DF_F3 0x15eb

 static struct amdtemp_product {
 	uint16_t	amdtemp_vendorid;
@ -101,7 +104,8 @@ static struct amdtemp_product {
 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16 },
 	{ VENDORID_AMD,	DEVICEID_AMD_MISC16_M30H },
 	{ VENDORID_AMD,	DEVICEID_AMD_MISC17 },
-	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H },
+	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_ROOT },
+	{ VENDORID_AMD,	DEVICEID_AMD_HOSTB17H_M10H_ROOT },
 };

 /*
@ -111,8 +115,15 @@ static struct amdtemp_product {

 /*
 * Reported Temperature, Family 17h
+ *
+ * According to AMD OSRR for 17H, section 4.2.1, bits 31-21 of this register
+ * provide the current temp.  bit 19, when clear, means the temp is reported in
+ * a range 0.."225C" (probable typo for 255C), and when set changes the range
+ * to -49..206C.
 */
-#define	AMDTEMP_17H_CUR_TMP	0x59800
+#define	AMDTEMP_17H_CUR_TMP		0x59800
+#define	AMDTEMP_17H_CUR_TMP_RANGE_SEL	(1 << 19)
+#define	AMDTEMP_17H_CUR_TMP_RANGE_OFF	490

 /*
 * Thermaltrip Status Register (Family 0Fh only)
@ -591,13 +602,15 @@ static int32_t
 amdtemp_gettemp17h(device_t dev, amdsensor_t sensor)
 {
 	struct amdtemp_softc *sc = device_get_softc(dev);
-	uint32_t temp;
+	uint32_t temp, val;
 	int error;

-	error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &temp);
+	error = amdsmn_read(sc->sc_smn, AMDTEMP_17H_CUR_TMP, &val);
 	KASSERT(error == 0, ("amdsmn_read"));

-	temp = ((temp >> 21) & 0x7ff) * 5 / 4;
+	temp = ((val >> 21) & 0x7ff) * 5 / 4;
+	if ((val & AMDTEMP_17H_CUR_TMP_RANGE_SEL) != 0)
+		temp -= AMDTEMP_17H_CUR_TMP_RANGE_OFF;
 	temp += AMDTEMP_ZERO_C_TO_K + sc->sc_offset * 10;

 	return (temp);
--- a/sys/dev/cxgbe/t4_netmap.c
+++ b/sys/dev/cxgbe/t4_netmap.c
@ -492,6 +492,9 @@ cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,

 	ASSERT_SYNCHRONIZED_OP(sc);

+	if (!nm_netmap_on(na))
+		return (0);
+
 	if ((vi->flags & VI_INIT_DONE) == 0)
 		return (0);

--- a/sys/dev/nvme/nvme_ns.c
+++ b/sys/dev/nvme/nvme_ns.c
@ -535,11 +535,11 @@ nvme_ns_construct(struct nvme_namespace *ns, uint32_t id,
 	if (!mtx_initialized(&ns->lock))
 		mtx_init(&ns->lock, "nvme ns lock", NULL, MTX_DEF);

-	status.done = FALSE;
+	status.done = 0;
 	nvme_ctrlr_cmd_identify_namespace(ctrlr, id, &ns->data,
 	    nvme_completion_poll_cb, &status);
-	while (status.done == FALSE)
-		DELAY(5);
+	while (!atomic_load_acq_int(&status.done))
+		pause("nvme", 1);
 	if (nvme_completion_is_error(&status.cpl)) {
 		nvme_printf(ctrlr, "nvme_identify_namespace failed\n");
 		return (ENXIO);
--- a/sys/dev/tws/tws.c
+++ b/sys/dev/tws/tws.c
@ -445,9 +445,7 @@ tws_setup_intr(struct tws_softc *sc, int irqs)
        if (!(sc->intr_handle[i])) {
            if ((error = bus_setup_intr(sc->tws_dev, sc->irq_res[i],
                                    INTR_TYPE_CAM | INTR_MPSAFE,
-#if (__FreeBSD_version >= 700000)
                                    NULL, 
-#endif
                                    tws_intr, sc, &sc->intr_handle[i]))) {
                tws_log(sc, SETUP_INTR_RES);
                return(FAILURE);
--- a/sys/dev/tws/tws_cam.c
+++ b/sys/dev/tws/tws_cam.c
@ -160,9 +160,7 @@ tws_cam_attach(struct tws_softc *sc)
    */
    sc->sim = cam_sim_alloc(tws_action, tws_poll, "tws", sc,
                      device_get_unit(sc->tws_dev), 
-#if (__FreeBSD_version >= 700000)
                      &sc->sim_lock,
-#endif
                      tws_cam_depth, 1, devq);
                      /* 1, 1, devq); */
    if (sc->sim == NULL) {
@ -172,9 +170,7 @@ tws_cam_attach(struct tws_softc *sc)
    /* Register the bus. */
    mtx_lock(&sc->sim_lock);
    if (xpt_bus_register(sc->sim, 
-#if (__FreeBSD_version >= 700000)
                         sc->tws_dev, 
-#endif
                         0) != CAM_SUCCESS) {
        cam_sim_free(sc->sim, TRUE); /* passing true will free the devq */
        sc->sim = NULL; /* so cam_detach will not try to free it */
@ -269,7 +265,6 @@ tws_action(struct cam_sim *sim, union ccb *ccb)
        {
            TWS_TRACE_DEBUG(sc, "get tran settings", sim, ccb);

-#if (__FreeBSD_version >= 700000 )
            ccb->cts.protocol = PROTO_SCSI;
            ccb->cts.protocol_version = SCSI_REV_2;
            ccb->cts.transport = XPORT_SPI;
@ -279,10 +274,6 @@ tws_action(struct cam_sim *sim, union ccb *ccb)
            ccb->cts.xport_specific.spi.flags = CTS_SPI_FLAGS_DISC_ENB;
            ccb->cts.proto_specific.scsi.valid = CTS_SCSI_VALID_TQ;
            ccb->cts.proto_specific.scsi.flags = CTS_SCSI_FLAGS_TAG_ENB;
-#else
-            ccb->cts.valid = (CCB_TRANS_DISC_VALID | CCB_TRANS_TQ_VALID);
-            ccb->cts.flags &= ~(CCB_TRANS_DISC_ENB | CCB_TRANS_TAG_ENB);
-#endif
            ccb->ccb_h.status = CAM_REQ_CMP;
            xpt_done(ccb);

@ -314,13 +305,11 @@ tws_action(struct cam_sim *sim, union ccb *ccb)
            strlcpy(ccb->cpi.sim_vid, "FreeBSD", SIM_IDLEN);
            strlcpy(ccb->cpi.hba_vid, "3ware", HBA_IDLEN);
            strlcpy(ccb->cpi.dev_name, cam_sim_name(sim), DEV_IDLEN);
-#if (__FreeBSD_version >= 700000 )
            ccb->cpi.transport = XPORT_SPI;
            ccb->cpi.transport_version = 2;
            ccb->cpi.protocol = PROTO_SCSI;
            ccb->cpi.protocol_version = SCSI_REV_2;
            ccb->cpi.maxio = TWS_MAX_IO_SIZE;
-#endif
            ccb->ccb_h.status = CAM_REQ_CMP;
            xpt_done(ccb);

--- a/sys/dev/tws/tws_services.h
+++ b/sys/dev/tws/tws_services.h
@ -131,12 +131,5 @@ struct error_desc {


 /* ------------------------ */
-#if (__FreeBSD_version >= 700000)
 #include <sys/clock.h>
 #define TWS_LOCAL_TIME (time_second - utc_offset())
-#else
-#include <machine/clock.h>
-#define TWS_LOCAL_TIME (time_second - (tz_minuteswest * 60) -   \
-                  (wall_cmos_clock ? adjkerntz : 0))
-#endif
-
--- a/sys/dts/Makefile.inc
+++ b/sys/dts/Makefile.inc
@ -5,9 +5,9 @@ SYSDIR?=${SRCTOP}/sys
 test-dts:
 .for dts in ${DTS}
 	@env MACHINE=`basename ${.CURDIR}` ${SYSDIR}/tools/fdt/make_dtb.sh ${SYSDIR} ${dts} /tmp
+.endfor

 test-dtso:
 .for dtso in ${DTSO}
 	@env MACHINE=`basename ${.CURDIR}` ${SYSDIR}/tools/fdt/make_dtbo.sh ${SYSDIR} ${dtso} /tmp
-
 .endfor
--- a/sys/dts/arm/Makefile
+++ b/sys/dts/arm/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTS!=ls *.dts
+DTS!=ls ${.CURDIR}/*.dts

 all: test-dts

--- a/sys/dts/arm/Makefile.inc
+++ b/sys/dts/arm/Makefile.inc
@ -0,0 +1,3 @@
+# $FreeBSD$
+
+.include "../Makefile.inc"
--- a/sys/dts/arm/overlays/Makefile
+++ b/sys/dts/arm/overlays/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTSO!=ls *.dtso
+DTSO!=ls ${.CURDIR}/*.dtso

 all: test-dtso

--- a/sys/dts/arm64/Makefile
+++ b/sys/dts/arm64/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTS!=ls *.dts
+DTS!=ls ${.CURDIR}/*.dts

 all: test-dts

--- a/sys/dts/arm64/Makefile.inc
+++ b/sys/dts/arm64/Makefile.inc
@ -0,0 +1,3 @@
+# $FreeBSD$
+
+.include "../Makefile.inc"
--- a/sys/dts/arm64/overlays/Makefile
+++ b/sys/dts/arm64/overlays/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTSO!=ls *.dtso
+DTSO!=ls ${.CURDIR}/*.dtso

 all: test-dtso

--- a/sys/dts/mips/Makefile
+++ b/sys/dts/mips/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTS!=ls *.dts
+DTS!=ls ${.CURDIR}/*.dts

 all: test-dts

--- a/sys/dts/powerpc/Makefile
+++ b/sys/dts/powerpc/Makefile
@ -1,6 +1,6 @@
 # $FreeBSD$

-DTS!=ls *.dts
+DTS!=ls ${.CURDIR}/*.dts

 all: test-dts

--- a/sys/i386/conf/GENERIC
+++ b/sys/i386/conf/GENERIC
@ -366,3 +366,8 @@ device		xenpci			# Xen HVM Hypervisor services driver

 # VMware support
 device		vmx			# VMware VMXNET3 Ethernet
+
+# evdev interface
+options 	EVDEV_SUPPORT		# evdev support in legacy drivers
+device		evdev			# input event device support
+device		uinput			# install /dev/uinput cdev
--- a/sys/i386/conf/MINIMAL
+++ b/sys/i386/conf/MINIMAL
@ -148,3 +148,8 @@ device		bpf			# Berkeley packet filter
 # NOTE: XENHVM depends on xenpci.  They must be added or removed together.
 options 	XENHVM			# Xen HVM kernel infrastructure
 device		xenpci			# Xen HVM Hypervisor services driver
+
+# evdev interface
+options 	EVDEV_SUPPORT		# evdev support in legacy drivers
+device		evdev			# input event device support
+device		uinput			# install /dev/uinput cdev
--- a/sys/kern/genoffset.c
+++ b/sys/kern/genoffset.c
@ -36,7 +36,6 @@ __FBSDID("$FreeBSD$");
 #include <sys/assym.h>
 #include <sys/proc.h>

-OFFSYM(td_pre_epoch_prio, thread, u_char);
 OFFSYM(td_priority, thread, u_char);
 OFFSYM(td_epochnest, thread, u_char);
 OFFSYM(td_critnest, thread, u_int);
--- a/sys/kern/kern_environment.c
+++ b/sys/kern/kern_environment.c
@ -249,6 +249,7 @@ init_static_kenv(char *buf, size_t len)
 {
 	char *eval;

+	KASSERT(!dynamic_kenv, ("kenv: dynamic_kenv already initialized"));
 	/*
 	 * Give the static environment a chance to disable the loader(8)
 	 * environment first.  This is done with loader_env.disabled=1.
--- a/sys/kern/kern_mutex.c
+++ b/sys/kern/kern_mutex.c
@ -486,7 +486,7 @@ __mtx_lock_sleep(volatile uintptr_t *c, uintptr_t v)
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
-	int doing_lockprof;
+	int doing_lockprof = 0;
 #endif

 	td = curthread;
@ -690,7 +690,7 @@ _mtx_lock_spin_cookie(volatile uintptr_t *c, uintptr_t v)
 	int64_t spin_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
-	int doing_lockprof;
+	int doing_lockprof = 0;
 #endif

 	tid = (uintptr_t)curthread;
--- a/sys/kern/kern_rmlock.c
+++ b/sys/kern/kern_rmlock.c
@ -742,7 +742,7 @@ _rm_assert(const struct rmlock *rm, int what, const char *file, int line)
 {
 	int count;

-	if (panicstr != NULL)
+	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case RA_LOCKED:
--- a/sys/kern/kern_rwlock.c
+++ b/sys/kern/kern_rwlock.c
@ -445,7 +445,7 @@ __rw_rlock_hard(struct rwlock *rw, struct thread *td, uintptr_t v
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
-	uintptr_t state;
+	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif

@ -913,7 +913,7 @@ __rw_wlock_hard(volatile uintptr_t *c, uintptr_t v LOCK_FILE_LINE_ARG_DEF)
 	int64_t all_time = 0;
 #endif
 #if defined(KDTRACE_HOOKS) || defined(LOCK_PROFILING)
-	uintptr_t state;
+	uintptr_t state = 0;
 	int doing_lockprof = 0;
 #endif
 	int extra_work = 0;
@ -1439,7 +1439,7 @@ __rw_assert(const volatile uintptr_t *c, int what, const char *file, int line)
 {
 	const struct rwlock *rw;

-	if (panicstr != NULL)
+	if (SCHEDULER_STOPPED())
 		return;

 	rw = rwlock2rw(c);
--- a/sys/kern/kern_sx.c
+++ b/sys/kern/kern_sx.c
@ -1416,7 +1416,7 @@ _sx_assert(const struct sx *sx, int what, const char *file, int line)
 	int slocked = 0;
 #endif

-	if (panicstr != NULL)
+	if (SCHEDULER_STOPPED())
 		return;
 	switch (what) {
 	case SA_SLOCKED:
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/lock.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>
+#include <sys/epoch.h>
 #include <sys/rangelock.h>
 #include <sys/resourcevar.h>
 #include <sys/sdt.h>
@ -272,6 +273,7 @@ thread_init(void *mem, int size, int flags)
 	td->td_rlqe = NULL;
 	EVENTHANDLER_DIRECT_INVOKE(thread_init, td);
 	umtx_thread_init(td);
+	epoch_thread_init(td);
 	td->td_kstack = 0;
 	td->td_sel = NULL;
 	return (0);
@ -291,6 +293,7 @@ thread_fini(void *mem, int size)
 	turnstile_free(td->td_turnstile);
 	sleepq_free(td->td_sleepqueue);
 	umtx_thread_fini(td);
+	epoch_thread_fini(td);
 	seltdfini(td);
 }

--- a/sys/kern/makesyscalls.sh
+++ b/sys/kern/makesyscalls.sh
@ -45,13 +45,7 @@ sysarg="sysarg.switch.$$"
 sysprotoend="sysprotoend.$$"
 systracetmp="systrace.$$"
 systraceret="systraceret.$$"
-
-if [ -r capabilities.conf ]; then
-	capenabled=`egrep -v '^#|^$' capabilities.conf`
-	capenabled=`echo $capenabled | sed 's/ /,/g'`
-else
-	capenabled=""
-fi
+capabilities_conf="capabilities.conf"

 trap "rm $sysaue $sysdcl $syscompat $syscompatdcl $syscompat4 $syscompat4dcl $syscompat6 $syscompat6dcl $syscompat7 $syscompat7dcl $syscompat10 $syscompat10dcl $syscompat11 $syscompat11dcl $sysent $sysinc $sysarg $sysprotoend $systracetmp $systraceret" 0

@ -67,6 +61,13 @@ if [ -n "$2" ]; then
 	. $2
 fi

+if [ -r $capabilities_conf ]; then
+	capenabled=`egrep -v '^#|^$' $capabilities_conf`
+	capenabled=`echo $capenabled | sed 's/ /,/g'`
+else
+	capenabled=""
+fi
+
 sed -e '
 	# FreeBSD ID, includes, comments, and blank lines
 	/.*\$FreeBSD/b done_joining
@ -137,6 +138,7 @@ sed -e '
 		switchname = \"$switchname\"
 		namesname = \"$namesname\"
 		infile = \"$1\"
+		abi_func_prefix = \"$abi_func_prefix\"
 		capenabled_string = \"$capenabled\"
 		"'

@ -381,7 +383,8 @@ sed -e '
 		# from it.
 		#
 		for (cap in capenabled) {
-			if (funcname == capenabled[cap]) {
+			if (funcname == capenabled[cap] ||
+			    funcname == abi_func_prefix capenabled[cap]) {
 				flags = "SYF_CAPENABLED";
 				break;
 			}
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@ -46,11 +46,11 @@
 *	upper bound on a potential allocation, but not necessarily a tight upper
 *	bound.
 *
- *	The radix tree also implements two collapsed states for meta nodes:
- *	the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is
- *	in either of these two states, all information contained underneath
- *	the node is considered stale.  These states are used to optimize
- *	allocation and freeing operations.
+ *	The bitmap field in each node directs the search for available blocks.
+ *	For a leaf node, a bit is set if the corresponding block is free.  For a
+ *	meta node, a bit is set if the corresponding subtree contains a free
+ *	block somewhere within it.  The search at a meta node considers only
+ *	children of that node that represent a range that includes a free block.
 *
 * 	The hinting greatly increases code efficiency for allocations while
 *	the general radix structure optimizes both allocations and frees.  The
@ -59,19 +59,19 @@
 *
 *	The blist code wires all necessary memory at creation time.  Neither
 *	allocations nor frees require interaction with the memory subsystem.
- *	The non-blocking features of the blist code are used in the swap code
- *	(vm/swap_pager.c).
+ *	The non-blocking nature of allocations and frees is required by swap
+ *	code (vm/swap_pager.c).
 *
- *	LAYOUT: The radix tree is laid out recursively using a
- *	linear array.  Each meta node is immediately followed (laid out
- *	sequentially in memory) by BLIST_META_RADIX lower level nodes.  This
- *	is a recursive structure but one that can be easily scanned through
- *	a very simple 'skip' calculation.  In order to support large radixes,
- *	portions of the tree may reside outside our memory allocation.  We
- *	handle this with an early-termination optimization (when bighint is
- *	set to -1) on the scan.  The memory allocation is only large enough
- *	to cover the number of blocks requested at creation time even if it
- *	must be encompassed in larger root-node radix.
+ *	LAYOUT: The radix tree is laid out recursively using a linear array.
+ *	Each meta node is immediately followed (laid out sequentially in
+ *	memory) by BLIST_META_RADIX lower level nodes.  This is a recursive
+ *	structure but one that can be easily scanned through a very simple
+ *	'skip' calculation.  The memory allocation is only large enough to
+ *	cover the number of blocks requested at creation time.  Nodes that
+ *	represent blocks beyond that limit, nodes that would never be read
+ *	or written, are not allocated, so that the last of the
+ *	BLIST_META_RADIX lower level nodes of a some nodes may not be
+ *	allocated.
 *
 *	NOTE: the allocator cannot currently allocate more than
 *	BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too
@ -105,6 +105,7 @@ __FBSDID("$FreeBSD$");
 #define BLIST_DEBUG
 #endif

+#include <sys/errno.h>
 #include <sys/types.h>
 #include <sys/malloc.h>
 #include <sys/sbuf.h>
@ -118,7 +119,7 @@ __FBSDID("$FreeBSD$");
 #define	bitcount64(x)	__bitcount64((uint64_t)(x))
 #define malloc(a,b,c)	calloc(a, 1)
 #define free(a,b)	free(a)
-static __inline int imax(int a, int b) { return (a > b ? a : b); }
+#define ummin(a,b)	((a) < (b) ? (a) : (b))

 #include <sys/blist.h>

@ -178,6 +179,18 @@ radix_to_skip(daddr_t radix)
 	    ((BLIST_BMAP_RADIX / BLIST_META_RADIX) * BLIST_META_MASK));
 }

+/*
+ * Provide a mask with count bits set, starting as position n.
+ */
+static inline u_daddr_t
+bitrange(int n, int count)
+{
+
+	return (((u_daddr_t)-1 << n) &
+	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count))));
+}
+
+
 /*
 * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t.
 * Assumes that the argument has only one bit set.
@ -220,9 +233,7 @@ blist_t
 blist_create(daddr_t blocks, int flags)
 {
 	blist_t bl;
-	daddr_t i, last_block;
-	u_daddr_t nodes, radix, skip;
-	int digit;
+	u_daddr_t nodes, radix;

 	if (blocks == 0)
 		panic("invalid block count");
@ -230,30 +241,13 @@ blist_create(daddr_t blocks, int flags)
 	/*
 	 * Calculate the radix and node count used for scanning.
 	 */
-	last_block = blocks - 1;
+	nodes = 1;
 	radix = BLIST_BMAP_RADIX;
-	while (radix < blocks) {
-		if (((last_block / radix + 1) & BLIST_META_MASK) != 0)
-			/*
-			 * We must widen the blist to avoid partially
-			 * filled nodes.
-			 */
-			last_block |= radix - 1;
+	while (radix <= blocks) {
+		nodes += 1 + (blocks - 1) / radix;
 		radix *= BLIST_META_RADIX;
 	}

-	/*
-	 * Count the meta-nodes in the expanded tree, including the final
-	 * terminator, from the bottom level up to the root.
-	 */
-	nodes = 1;
-	if (radix - blocks >= BLIST_BMAP_RADIX)
-		nodes++;
-	last_block /= BLIST_BMAP_RADIX;
-	while (last_block > 0) {
-		nodes += last_block + 1;
-		last_block /= BLIST_META_RADIX;
-	}
 	bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags |
 	    M_ZERO);
 	if (bl == NULL)
@ -261,33 +255,6 @@ blist_create(daddr_t blocks, int flags)

 	bl->bl_blocks = blocks;
 	bl->bl_radix = radix;
-	bl->bl_cursor = 0;
-
-	/*
-	 * Initialize the empty tree by filling in root values, then initialize
-	 * just the terminators in the rest of the tree.
-	 */
-	bl->bl_root[0].bm_bighint = 0;
-	if (radix == BLIST_BMAP_RADIX)
-		bl->bl_root[0].u.bmu_bitmap = 0;
-	else
-		bl->bl_root[0].u.bmu_avail = 0;
-	last_block = blocks - 1;
-	i = 0;
-	while (radix > BLIST_BMAP_RADIX) {
-		radix /= BLIST_META_RADIX;
-		skip = radix_to_skip(radix);
-		digit = last_block / radix;
-		i += 1 + digit * skip;
-		if (digit != BLIST_META_MASK) {
-			/*
-			 * Add a terminator.
-			 */
-			bl->bl_root[i + skip].bm_bighint = (daddr_t)-1;
-			bl->bl_root[i + skip].u.bmu_bitmap = 0;
-		}
-		last_block %= radix;
-	}

 #if defined(BLIST_DEBUG)
 	printf(
@ -321,6 +288,9 @@ blist_alloc(blist_t bl, daddr_t count)
 {
 	daddr_t blk;

+	if (count > BLIST_MAX_ALLOC)
+		panic("allocation too large");
+
 	/*
 	 * This loop iterates at most twice.  An allocation failure in the
 	 * first iteration leads to a second iteration only if the cursor was
@ -331,12 +301,13 @@ blist_alloc(blist_t bl, daddr_t count)
 		blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count,
 		    bl->bl_radix);
 		if (blk != SWAPBLK_NONE) {
+			bl->bl_avail -= count;
 			bl->bl_cursor = blk + count;
 			if (bl->bl_cursor == bl->bl_blocks)
 				bl->bl_cursor = 0;
 			return (blk);
-		} else if (bl->bl_cursor != 0)
-			bl->bl_cursor = 0;
+		}
+		bl->bl_cursor = 0;
 	}
 	return (SWAPBLK_NONE);
 }
@ -348,10 +319,7 @@ daddr_t
 blist_avail(blist_t bl)
 {

-	if (bl->bl_radix == BLIST_BMAP_RADIX)
-		return (bitcount64(bl->bl_root->u.bmu_bitmap));
-	else
-		return (bl->bl_root->u.bmu_avail);
+	return (bl->bl_avail);
 }

 /*
@ -363,7 +331,10 @@ void
 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
 {

+	if (blkno < 0 || blkno + count > bl->bl_blocks)
+		panic("freeing invalid range");
 	blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix);
+	bl->bl_avail += count;
 }

 /*
@ -375,8 +346,13 @@ blist_free(blist_t bl, daddr_t blkno, daddr_t count)
 daddr_t
 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
 {
+	daddr_t filled;

-	return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix));
+	if (blkno < 0 || blkno + count > bl->bl_blocks)
+		panic("filling invalid range");
+	filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix);
+	bl->bl_avail -= filled;
+	return (filled);
 }

 /*
@ -414,8 +390,11 @@ blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags)
 void
 blist_print(blist_t bl)
 {
-	printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor);
-	blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
+	printf("BLIST avail = %jd, cursor = %08jx {\n",
+	    (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor);
+
+	if (bl->bl_root->bm_bitmap != 0)
+		blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4);
 	printf("}\n");
 }

@ -569,16 +548,11 @@ blist_stats(blist_t bl, struct sbuf *s)
 		 * Check for skippable subtrees starting at i.
 		 */
 		while (radix > BLIST_BMAP_RADIX) {
-			if (bl->bl_root[nodes].u.bmu_avail == 0) {
+			if (bl->bl_root[nodes].bm_bitmap == 0) {
 				if (gap_stats_counting(stats))
 					update_gap_stats(stats, i);
 				break;
 			}
-			if (bl->bl_root[nodes].u.bmu_avail == radix) {
-				if (!gap_stats_counting(stats))
-					update_gap_stats(stats, i);
-				break;
-			}

 			/*
 			 * Skip subtree root.
@ -590,7 +564,7 @@ blist_stats(blist_t bl, struct sbuf *s)
 			/*
 			 * Scan leaf.
 			 */
-			mask = bl->bl_root[nodes].u.bmu_bitmap;
+			mask = bl->bl_root[nodes].bm_bitmap;
 			diff = mask ^ (mask << 1);
 			if (gap_stats_counting(stats))
 				diff ^= 1;
@ -618,7 +592,57 @@ blist_stats(blist_t bl, struct sbuf *s)
 */

 /*
- * blist_leaf_alloc() -	allocate at a leaf in the radix tree (a bitmap).
+ * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf.
+ *
+ *	'scan' is a leaf node, associated with a block containing 'blk'.
+ *	The next leaf node could be adjacent, or several nodes away if the
+ *	least common ancestor of 'scan' and its neighbor is several levels
+ *	up.  Use 'blk' to determine how many meta-nodes lie between the
+ *	leaves.  If the next leaf has enough initial bits set, clear them
+ *	and clear the bits in the meta nodes on the path up to the least
+ *	common ancestor to mark any subtrees made completely empty.
+ */
+static int
+blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
+{
+	blmeta_t *next;
+	daddr_t skip;
+	u_daddr_t radix;
+	int digit;
+
+	next = scan + 1;
+	blk += BLIST_BMAP_RADIX;
+	radix = BLIST_BMAP_RADIX;
+	while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 &&
+	    (next->bm_bitmap & 1) == 1) {
+		next++;
+		radix *= BLIST_META_RADIX;
+	}
+	if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) {
+		/*
+		 * The next leaf doesn't have enough free blocks at the
+		 * beginning to complete the spanning allocation.
+		 */
+		return (ENOMEM);
+	}
+	/* Clear the first 'count' bits in the next leaf to allocate. */
+	next->bm_bitmap &= (u_daddr_t)-1 << count;
+
+	/*
+	 * Update bitmaps of next-ancestors, up to least common ancestor.
+	 */
+	skip = radix_to_skip(radix);
+	while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) {
+		(--next)->bm_bitmap ^= 1;
+		radix /= BLIST_META_RADIX;
+	}
+	if (next->bm_bitmap == 0)
+		scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit;
+	return (0);
+}
+
+/*
+ * BLST_LEAF_ALLOC() -	allocate at a leaf in the radix tree (a bitmap).
 *
 *	This is the core of the allocator and is optimized for the
 *	BLIST_BMAP_RADIX block allocation case.  Otherwise, execution
@ -633,15 +657,15 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
 	range1 = 0;
 	count1 = count - 1;
 	num_shifts = fls(count1);
-	mask = scan->u.bmu_bitmap;
+	mask = scan->bm_bitmap;
 	while ((-mask & ~mask) != 0 && num_shifts > 0) {
 		/*
 		 * If bit i is set in mask, then bits in [i, i+range1] are set
-		 * in scan->u.bmu_bitmap.  The value of range1 is equal to
+		 * in scan->bm_bitmap.  The value of range1 is equal to
 		 * count1 >> num_shifts.  Grow range and reduce num_shifts to 0,
 		 * while preserving these invariants.  The updates to mask leave
 		 * fewer bits set, but each bit that remains set represents a
-		 * longer string of consecutive bits set in scan->u.bmu_bitmap.
+		 * longer string of consecutive bits set in scan->bm_bitmap.
 		 * If more updates to mask cannot clear more bits, because mask
 		 * is partitioned with all 0 bits preceding all 1 bits, the loop
 		 * terminates immediately.
@ -685,31 +709,14 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
 		 * An allocation within this leaf is impossible, so a successful
 		 * allocation depends on the next leaf providing some of the blocks.
 		 */
-		if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) {
+		if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0)
 			/*
-			 * The next leaf has a different meta-node parent, so it
-			 * is not necessarily initialized.  Update bighint,
-			 * comparing the range found at the end of mask to the
-			 * largest earlier range that could have been made to
-			 * vanish in the initial processing of mask.
-			 */
-			scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1);
-			return (SWAPBLK_NONE);
-		}
-		hi -= BLIST_BMAP_RADIX;
-		if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) {
-			/*
-			 * The next leaf doesn't have enough free blocks at the
-			 * beginning to complete the spanning allocation.  The
-			 * hint cannot be updated, because the same allocation
-			 * request could be satisfied later, by this leaf, if
-			 * the state of the next leaf changes, and without any
-			 * changes to this leaf.
+			 * The hint cannot be updated, because the same
+			 * allocation request could be satisfied later, by this
+			 * leaf, if the state of the next leaf changes, and
+			 * without any changes to this leaf.
 			 */
 			return (SWAPBLK_NONE);
-		}
-		/* Clear the first 'hi' bits in the next leaf, allocating them. */
-		scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi;
 		hi = BLIST_BMAP_RADIX;
 	}

@ -724,12 +731,9 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
 	} else {
 		/* Clear the bits of mask at position 'hi' and higher. */
 		mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi);
-		/* If this allocation uses all the bits, clear the hint. */
-		if (mask == scan->u.bmu_bitmap)
-			scan->bm_bighint = 0;
 	}
 	/* Clear the allocated bits from this leaf. */
-	scan->u.bmu_bitmap &= ~mask;
+	scan->bm_bitmap &= ~mask;
 	return ((blk & ~BLIST_BMAP_MASK) + lo);
 }

@ -744,81 +748,61 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count)
 static daddr_t
 blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix)
 {
-	daddr_t blk, i, next_skip, r, skip;
-	int child;
+	daddr_t blk, i, r, skip;
+	u_daddr_t bit, mask;
 	bool scan_from_start;
+	int digit;

 	if (radix == BLIST_BMAP_RADIX)
 		return (blst_leaf_alloc(scan, cursor, count));
-	if (scan->u.bmu_avail < count) {
-		/*
-		 * The meta node's hint must be too large if the allocation
-		 * exceeds the number of free blocks.  Reduce the hint, and
-		 * return failure.
-		 */
-		scan->bm_bighint = scan->u.bmu_avail;
-		return (SWAPBLK_NONE);
-	}
 	blk = cursor & -radix;
+	scan_from_start = (cursor == blk);
+	radix /= BLIST_META_RADIX;
 	skip = radix_to_skip(radix);
-	next_skip = skip / BLIST_META_RADIX;
+	mask = scan->bm_bitmap;
+
+	/* Discard any candidates that appear before cursor. */
+	digit = (cursor / radix) & BLIST_META_MASK;
+	mask &= (u_daddr_t)-1 << digit;

 	/*
-	 * An ALL-FREE meta node requires special handling before allocating
-	 * any of its blocks.
+	 * If the first try is for a block that includes the cursor, pre-undo
+	 * the digit * radix offset in the first call; otherwise, ignore the
+	 * cursor entirely.
 	 */
-	if (scan->u.bmu_avail == radix) {
-		radix /= BLIST_META_RADIX;
+	if (((mask >> digit) & 1) == 1)
+		cursor -= digit * radix;
+	else
+		cursor = blk;

-		/*
-		 * Reinitialize each of the meta node's children.  An ALL-FREE
-		 * meta node cannot have a terminator in any subtree.
-		 */
-		for (i = 1; i < skip; i += next_skip) {
-			if (next_skip == 1)
-				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
-			else
-				scan[i].u.bmu_avail = radix;
-			scan[i].bm_bighint = radix;
-		}
-	} else {
-		radix /= BLIST_META_RADIX;
-	}
-
-	if (count > radix) {
-		/*
-		 * The allocation exceeds the number of blocks that are
-		 * managed by a subtree of this meta node.
-		 */
-		panic("allocation too large");
-	}
-	scan_from_start = cursor == blk;
-	child = (cursor - blk) / radix;
-	blk += child * radix;
-	for (i = 1 + child * next_skip; i < skip; i += next_skip) {
+	/*
+	 * Examine the nonempty subtree associated with each bit set in mask.
+	 */
+	do {
+		bit = mask & -mask;
+		digit = bitpos(bit);
+		i = 1 + digit * skip;
 		if (count <= scan[i].bm_bighint) {
 			/*
 			 * The allocation might fit beginning in the i'th subtree.
 			 */
-			r = blst_meta_alloc(&scan[i],
-			    cursor > blk ? cursor : blk, count, radix);
+			r = blst_meta_alloc(&scan[i], cursor + digit * radix,
+			    count, radix);
 			if (r != SWAPBLK_NONE) {
-				scan->u.bmu_avail -= count;
+				if (scan[i].bm_bitmap == 0)
+					scan->bm_bitmap ^= bit;
 				return (r);
 			}
-		} else if (scan[i].bm_bighint == (daddr_t)-1) {
-			/*
-			 * Terminator
-			 */
-			break;
 		}
-		blk += radix;
-	}
+		cursor = blk;
+	} while ((mask ^= bit) != 0);

 	/*
-	 * We couldn't allocate count in this subtree, update bighint.
+	 * We couldn't allocate count in this subtree.  If the whole tree was
+	 * scanned, and the last tree node is allocated, update bighint.
 	 */
-	if (scan_from_start && scan->bm_bighint >= count)
+	if (scan_from_start && !(digit == BLIST_META_RADIX - 1 &&
+	    scan[i].bm_bighint == BLIST_MAX_ALLOC))
 		scan->bm_bighint = count - 1;

 	return (SWAPBLK_NONE);
@ -832,7 +816,6 @@ static void
 blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
 {
 	u_daddr_t mask;
-	int n;

 	/*
 	 * free some data in this bitmap
@ -840,20 +823,10 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
 	 *          \_________/\__/
 	 *		count   n
 	 */
-	n = blk & BLIST_BMAP_MASK;
-	mask = ((u_daddr_t)-1 << n) &
-	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
-	if (scan->u.bmu_bitmap & mask)
+	mask = bitrange(blk & BLIST_BMAP_MASK, count);
+	if (scan->bm_bitmap & mask)
 		panic("freeing free block");
-	scan->u.bmu_bitmap |= mask;
-
-	/*
-	 * We could probably do a better job here.  We are required to make
-	 * bighint at least as large as the biggest contiguous block of
-	 * data.  If we just shoehorn it, a little extra overhead will
-	 * be incured on the next allocation (but only that one typically).
-	 */
-	scan->bm_bighint = BLIST_BMAP_RADIX;
+	scan->bm_bitmap |= mask;
 }

 /*
@ -869,79 +842,37 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count)
 static void
 blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix)
 {
-	daddr_t blk, i, next_skip, skip, v;
-	int child;
+	daddr_t blk, endBlk, i, skip;
+	int digit, endDigit;
+
+	/*
+	 * We could probably do a better job here.  We are required to make
+	 * bighint at least as large as the biggest allocable block of data.
+	 * If we just shoehorn it, a little extra overhead will be incurred
+	 * on the next allocation (but only that one typically).
+	 */
+	scan->bm_bighint = BLIST_MAX_ALLOC;

-	if (scan->bm_bighint == (daddr_t)-1)
-		panic("freeing invalid range");
 	if (radix == BLIST_BMAP_RADIX)
 		return (blst_leaf_free(scan, freeBlk, count));
-	skip = radix_to_skip(radix);
-	next_skip = skip / BLIST_META_RADIX;

-	if (scan->u.bmu_avail == 0) {
-		/*
-		 * ALL-ALLOCATED special case, with possible
-		 * shortcut to ALL-FREE special case.
-		 */
-		scan->u.bmu_avail = count;
-		scan->bm_bighint = count;
-
-		if (count != radix)  {
-			for (i = 1; i < skip; i += next_skip) {
-				if (scan[i].bm_bighint == (daddr_t)-1)
-					break;
-				scan[i].bm_bighint = 0;
-				if (next_skip == 1) {
-					scan[i].u.bmu_bitmap = 0;
-				} else {
-					scan[i].u.bmu_avail = 0;
-				}
-			}
-			/* fall through */
-		}
-	} else {
-		scan->u.bmu_avail += count;
-		/* scan->bm_bighint = radix; */
-	}
-
-	/*
-	 * ALL-FREE special case.
-	 */
-
-	if (scan->u.bmu_avail == radix)
-		return;
-	if (scan->u.bmu_avail > radix)
-		panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
-		    (long long)count, (long long)scan->u.bmu_avail,
-		    (long long)radix);
-
-	/*
-	 * Break the free down into its components
-	 */
-
-	blk = freeBlk & -radix;
+	endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix);
 	radix /= BLIST_META_RADIX;
-
-	child = (freeBlk - blk) / radix;
-	blk += child * radix;
-	i = 1 + child * next_skip;
-	while (i < skip && blk < freeBlk + count) {
-		v = blk + radix - freeBlk;
-		if (v > count)
-			v = count;
-		blst_meta_free(&scan[i], freeBlk, v, radix);
-		if (scan->bm_bighint < scan[i].bm_bighint)
-			scan->bm_bighint = scan[i].bm_bighint;
-		count -= v;
-		freeBlk += v;
+	skip = radix_to_skip(radix);
+	blk = freeBlk & -radix;
+	digit = (blk / radix) & BLIST_META_MASK;
+	endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK);
+	scan->bm_bitmap |= bitrange(digit, endDigit - digit);
+	for (i = 1 + digit * skip; blk < endBlk; i += skip) {
 		blk += radix;
-		i += next_skip;
+		count = ummin(blk, endBlk) - freeBlk;
+		blst_meta_free(&scan[i], freeBlk, count, radix);
+		freeBlk = blk;
 	}
 }

 /*
- * BLIST_RADIX_COPY() - copy one radix tree to another
+ * BLST_COPY() - copy one radix tree to another
 *
 *	Locates free space in the source tree and frees it in the destination
 *	tree.  The space may not already be free in the destination.
@ -950,21 +881,21 @@ static void
 blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
    daddr_t count)
 {
-	daddr_t i, next_skip, skip;
+	daddr_t endBlk, i, skip;

 	/*
 	 * Leaf node
 	 */

 	if (radix == BLIST_BMAP_RADIX) {
-		u_daddr_t v = scan->u.bmu_bitmap;
+		u_daddr_t v = scan->bm_bitmap;

 		if (v == (u_daddr_t)-1) {
 			blist_free(dest, blk, count);
 		} else if (v != 0) {
 			int i;

-			for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
+			for (i = 0; i < count; ++i) {
 				if (v & ((u_daddr_t)1 << i))
 					blist_free(dest, blk + i, 1);
 			}
@ -976,42 +907,22 @@ blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest,
 	 * Meta node
 	 */

-	if (scan->u.bmu_avail == 0) {
+	if (scan->bm_bitmap == 0) {
 		/*
 		 * Source all allocated, leave dest allocated
 		 */
 		return;
 	}
-	if (scan->u.bmu_avail == radix) {
-		/*
-		 * Source all free, free entire dest
-		 */
-		if (count < radix)
-			blist_free(dest, blk, count);
-		else
-			blist_free(dest, blk, radix);
-		return;
-	}

-
-	skip = radix_to_skip(radix);
-	next_skip = skip / BLIST_META_RADIX;
+	endBlk = blk + count;
 	radix /= BLIST_META_RADIX;
-
-	for (i = 1; count && i < skip; i += next_skip) {
-		if (scan[i].bm_bighint == (daddr_t)-1)
-			break;
-
-		if (count >= radix) {
-			blst_copy(&scan[i], blk, radix, dest, radix);
-			count -= radix;
-		} else {
-			if (count) {
-				blst_copy(&scan[i], blk, radix, dest, count);
-			}
-			count = 0;
-		}
+	skip = radix_to_skip(radix);
+	for (i = 1; blk < endBlk; i += skip) {
 		blk += radix;
+		count = radix;
+		if (blk >= endBlk)
+			count -= blk - endBlk;
+		blst_copy(&scan[i], blk - radix, radix, dest, count);
 	}
 }

@ -1027,16 +938,13 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
 {
 	daddr_t nblks;
 	u_daddr_t mask;
-	int n;

-	n = blk & BLIST_BMAP_MASK;
-	mask = ((u_daddr_t)-1 << n) &
-	    ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
+	mask = bitrange(blk & BLIST_BMAP_MASK, count);

 	/* Count the number of blocks that we are allocating. */
-	nblks = bitcount64(scan->u.bmu_bitmap & mask);
+	nblks = bitcount64(scan->bm_bitmap & mask);

-	scan->u.bmu_bitmap &= ~mask;
+	scan->bm_bitmap &= ~mask;
 	return (nblks);
 }

@ -1051,70 +959,27 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
 static daddr_t
 blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
 {
-	daddr_t blk, i, nblks, next_skip, skip, v;
-	int child;
+	daddr_t blk, endBlk, i, nblks, skip;
+	int digit;

-	if (scan->bm_bighint == (daddr_t)-1)
-		panic("filling invalid range");
-	if (count > radix) {
-		/*
-		 * The allocation exceeds the number of blocks that are
-		 * managed by this node.
-		 */
-		panic("fill too large");
-	}
 	if (radix == BLIST_BMAP_RADIX)
 		return (blst_leaf_fill(scan, allocBlk, count));
-	if (count == radix || scan->u.bmu_avail == 0)  {
-		/*
-		 * ALL-ALLOCATED special case
-		 */
-		nblks = scan->u.bmu_avail;
-		scan->u.bmu_avail = 0;
-		scan->bm_bighint = 0;
-		return (nblks);
-	}
+
+	endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix);
+	radix /= BLIST_META_RADIX;
 	skip = radix_to_skip(radix);
-	next_skip = skip / BLIST_META_RADIX;
 	blk = allocBlk & -radix;
-
-	/*
-	 * An ALL-FREE meta node requires special handling before allocating
-	 * any of its blocks.
-	 */
-	if (scan->u.bmu_avail == radix) {
-		radix /= BLIST_META_RADIX;
-
-		/*
-		 * Reinitialize each of the meta node's children.  An ALL-FREE
-		 * meta node cannot have a terminator in any subtree.
-		 */
-		for (i = 1; i < skip; i += next_skip) {
-			if (next_skip == 1)
-				scan[i].u.bmu_bitmap = (u_daddr_t)-1;
-			else
-				scan[i].u.bmu_avail = radix;
-			scan[i].bm_bighint = radix;
-		}
-	} else {
-		radix /= BLIST_META_RADIX;
-	}
-
 	nblks = 0;
-	child = (allocBlk - blk) / radix;
-	blk += child * radix;
-	i = 1 + child * next_skip;
-	while (i < skip && blk < allocBlk + count) {
-		v = blk + radix - allocBlk;
-		if (v > count)
-			v = count;
-		nblks += blst_meta_fill(&scan[i], allocBlk, v, radix);
-		count -= v;
-		allocBlk += v;
+	while (blk < endBlk) {
+		digit = (blk / radix) & BLIST_META_MASK;
+		i = 1 + digit * skip;
 		blk += radix;
-		i += next_skip;
+		count = ummin(blk, endBlk) - allocBlk;
+		nblks += blst_meta_fill(&scan[i], allocBlk, count, radix);
+		if (scan[i].bm_bitmap == 0)
+			scan->bm_bitmap &= ~((u_daddr_t)1 << digit);
+		allocBlk = blk;
 	}
-	scan->u.bmu_avail -= nblks;
 	return (nblks);
 }

@ -1123,64 +988,44 @@ blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix)
 static void
 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
 {
-	daddr_t i, next_skip, skip;
+	daddr_t skip;
+	u_daddr_t bit, mask;
+	int digit;

 	if (radix == BLIST_BMAP_RADIX) {
 		printf(
-		    "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n",
+		    "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n",
 		    tab, tab, "",
 		    (long long)blk, (long long)radix,
-		    (long long)scan->u.bmu_bitmap,
+		    1 + (BLIST_BMAP_RADIX - 1) / 4,
+		    (long long)scan->bm_bitmap,
 		    (long long)scan->bm_bighint
 		);
 		return;
 	}

-	if (scan->u.bmu_avail == 0) {
-		printf(
-		    "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
-		    tab, tab, "",
-		    (long long)blk,
-		    (long long)radix
-		);
-		return;
-	}
-	if (scan->u.bmu_avail == radix) {
-		printf(
-		    "%*.*s(%08llx,%lld) ALL FREE\n",
-		    tab, tab, "",
-		    (long long)blk,
-		    (long long)radix
-		);
-		return;
-	}
-
 	printf(
-	    "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
+	    "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n",
 	    tab, tab, "",
 	    (long long)blk, (long long)radix,
-	    (long long)scan->u.bmu_avail,
 	    (long long)radix,
+	    1 + (BLIST_META_RADIX - 1) / 4,
+	    (long long)scan->bm_bitmap,
 	    (long long)scan->bm_bighint
 	);

-	skip = radix_to_skip(radix);
-	next_skip = skip / BLIST_META_RADIX;
 	radix /= BLIST_META_RADIX;
+	skip = radix_to_skip(radix);
 	tab += 4;

-	for (i = 1; i < skip; i += next_skip) {
-		if (scan[i].bm_bighint == (daddr_t)-1) {
-			printf(
-			    "%*.*s(%08llx,%lld): Terminator\n",
-			    tab, tab, "",
-			    (long long)blk, (long long)radix
-			);
-			break;
-		}
-		blst_radix_print(&scan[i], blk, radix, tab);
-		blk += radix;
-	}
+	mask = scan->bm_bitmap;
+	/* Examine the nonempty subtree associated with each bit set in mask */
+	do {
+		bit = mask & -mask;
+		digit = bitpos(bit);
+		blst_radix_print(&scan[1 + digit * skip], blk + digit * radix,
+		    radix, tab);
+	} while ((mask ^= bit) != 0);
 	tab -= 4;

 	printf(
@ -1196,7 +1041,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab)
 int
 main(int ac, char **av)
 {
-	int size = 1024;
+	int size = BLIST_META_RADIX * BLIST_BMAP_RADIX;
 	int i;
 	blist_t bl;
 	struct sbuf *s;
--- a/sys/kern/subr_epoch.c
+++ b/sys/kern/subr_epoch.c
@ -55,6 +55,27 @@ __FBSDID("$FreeBSD$");

 static MALLOC_DEFINE(M_EPOCH, "epoch", "epoch based reclamation");

+#ifdef __amd64__
+#define EPOCH_ALIGN CACHE_LINE_SIZE*2
+#else
+#define EPOCH_ALIGN CACHE_LINE_SIZE
+#endif
+
+TAILQ_HEAD (epoch_tdlist, epoch_tracker);
+typedef struct epoch_record {
+	ck_epoch_record_t er_record;
+	volatile struct epoch_tdlist er_tdlist;
+	volatile uint32_t er_gen;
+	uint32_t er_cpuid;
+} __aligned(EPOCH_ALIGN)     *epoch_record_t;
+
+struct epoch {
+	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
+	epoch_record_t e_pcpu_record;
+	int	e_idx;
+	int	e_flags;
+};
+
 /* arbitrary --- needs benchmarking */
 #define MAX_ADAPTIVE_SPIN 100
 #define MAX_EPOCHS 64
@ -119,11 +140,15 @@ epoch_init(void *arg __unused)
 	epoch_call_count = counter_u64_alloc(M_WAITOK);
 	epoch_call_task_count = counter_u64_alloc(M_WAITOK);

-	pcpu_zone_record = uma_zcreate("epoch_record pcpu", sizeof(struct epoch_record),
-	    NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_PCPU);
+	pcpu_zone_record = uma_zcreate("epoch_record pcpu",
+	    sizeof(struct epoch_record), NULL, NULL, NULL, NULL,
+	    UMA_ALIGN_PTR, UMA_ZONE_PCPU);
 	CPU_FOREACH(cpu) {
-		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0, epoch_call_task, NULL);
-		taskqgroup_attach_cpu(qgroup_softirq, DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1, "epoch call task");
+		GROUPTASK_INIT(DPCPU_ID_PTR(cpu, epoch_cb_task), 0,
+		    epoch_call_task, NULL);
+		taskqgroup_attach_cpu(qgroup_softirq,
+		    DPCPU_ID_PTR(cpu, epoch_cb_task), NULL, cpu, -1,
+		    "epoch call task");
 	}
 	inited = 1;
 	global_epoch = epoch_alloc(0);
@ -150,13 +175,21 @@ epoch_ctor(epoch_t epoch)
 	CPU_FOREACH(cpu) {
 		er = zpcpu_get_cpu(epoch->e_pcpu_record, cpu);
 		bzero(er, sizeof(*er));
-		ck_epoch_register(&epoch->e_epoch, &er->er_read_record, NULL);
-		ck_epoch_register(&epoch->e_epoch, &er->er_write_record, NULL);
+		ck_epoch_register(&epoch->e_epoch, &er->er_record, NULL);
 		TAILQ_INIT((struct threadlist *)(uintptr_t)&er->er_tdlist);
 		er->er_cpuid = cpu;
 	}
 }

+static void
+epoch_adjust_prio(struct thread *td, u_char prio)
+{
+
+	thread_lock(td);
+	sched_prio(td, prio);
+	thread_unlock(td);
+}
+
 epoch_t
 epoch_alloc(int flags)
 {
@ -192,51 +225,126 @@ epoch_free(epoch_t epoch)
 	free(epoch, M_EPOCH);
 }

-void
-epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
+static epoch_record_t
+epoch_currecord(epoch_t epoch)
 {

-	epoch_enter_preempt(epoch, et);
+	return (zpcpu_get_cpu(epoch->e_pcpu_record, curcpu));
+}
+
+#define INIT_CHECK(epoch)					\
+	do {							\
+		if (__predict_false((epoch) == NULL))		\
+			return;					\
+	} while (0)
+
+void
+epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
+{
+	struct epoch_record *er;
+	struct thread *td;
+
+	MPASS(cold || epoch != NULL);
+	INIT_CHECK(epoch);
+	MPASS(epoch->e_flags & EPOCH_PREEMPT);
+#ifdef EPOCH_TRACKER_DEBUG
+	et->et_magic_pre = EPOCH_MAGIC0;
+	et->et_magic_post = EPOCH_MAGIC1;
+#endif
+	td = curthread;
+	et->et_td = td;
+	td->td_epochnest++;
+	critical_enter();
+	sched_pin();
+
+	td->td_pre_epoch_prio = td->td_priority;
+	er = epoch_currecord(epoch);
+	TAILQ_INSERT_TAIL(&er->er_tdlist, et, et_link);
+	ck_epoch_begin(&er->er_record, &et->et_section);
+	critical_exit();
 }

 void
-epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et)
+epoch_enter(epoch_t epoch)
 {
+	struct thread *td;
+	epoch_record_t er;

-	epoch_exit_preempt(epoch, et);
+	MPASS(cold || epoch != NULL);
+	INIT_CHECK(epoch);
+	td = curthread;
+
+	td->td_epochnest++;
+	critical_enter();
+	er = epoch_currecord(epoch);
+	ck_epoch_begin(&er->er_record, NULL);
 }

 void
-epoch_enter_KBI(epoch_t epoch)
+epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et)
 {
+	struct epoch_record *er;
+	struct thread *td;

-	epoch_enter(epoch);
+	INIT_CHECK(epoch);
+	td = curthread;
+	critical_enter();
+	sched_unpin();
+	MPASS(td->td_epochnest);
+	td->td_epochnest--;
+	er = epoch_currecord(epoch);
+	MPASS(epoch->e_flags & EPOCH_PREEMPT);
+	MPASS(et != NULL);
+	MPASS(et->et_td == td);
+#ifdef EPOCH_TRACKER_DEBUG
+	MPASS(et->et_magic_pre == EPOCH_MAGIC0);
+	MPASS(et->et_magic_post == EPOCH_MAGIC1);
+	et->et_magic_pre = 0;
+	et->et_magic_post = 0;
+#endif
+#ifdef INVARIANTS
+	et->et_td = (void*)0xDEADBEEF;
+#endif
+	ck_epoch_end(&er->er_record, &et->et_section);
+	TAILQ_REMOVE(&er->er_tdlist, et, et_link);
+	er->er_gen++;
+	if (__predict_false(td->td_pre_epoch_prio != td->td_priority))
+		epoch_adjust_prio(td, td->td_pre_epoch_prio);
+	critical_exit();
 }

 void
-epoch_exit_KBI(epoch_t epoch)
+epoch_exit(epoch_t epoch)
 {
+	struct thread *td;
+	epoch_record_t er;

-	epoch_exit(epoch);
+	INIT_CHECK(epoch);
+	td = curthread;
+	MPASS(td->td_epochnest);
+	td->td_epochnest--;
+	er = epoch_currecord(epoch);
+	ck_epoch_end(&er->er_record, NULL);
+	critical_exit();
 }

 /*
- * epoch_block_handler_preempt is a callback from the ck code when another thread is
- * currently in an epoch section.
+ * epoch_block_handler_preempt() is a callback from the CK code when another
+ * thread is currently in an epoch section.
 */
 static void
-epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t *cr,
-    void *arg __unused)
+epoch_block_handler_preempt(struct ck_epoch *global __unused,
+    ck_epoch_record_t *cr, void *arg __unused)
 {
 	epoch_record_t record;
 	struct thread *td, *owner, *curwaittd;
-	struct epoch_thread *tdwait;
+	struct epoch_tracker *tdwait;
 	struct turnstile *ts;
 	struct lock_object *lock;
 	int spincount, gen;
 	int locksheld __unused;

-	record = __containerof(cr, struct epoch_record, er_read_record);
+	record = __containerof(cr, struct epoch_record, er_record);
 	td = curthread;
 	locksheld = td->td_locks;
 	spincount = 0;
@ -318,25 +426,27 @@ epoch_block_handler_preempt(struct ck_epoch *global __unused, ck_epoch_record_t
 		if (TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd) &&
 		    ((ts = curwaittd->td_blocked) != NULL)) {
 			/*
-			 * We unlock td to allow turnstile_wait to reacquire the
-			 * the thread lock. Before unlocking it we enter a critical
-			 * section to prevent preemption after we reenable interrupts
-			 * by dropping the thread lock in order to prevent curwaittd
-			 * from getting to run.
+			 * We unlock td to allow turnstile_wait to reacquire
+			 * the thread lock. Before unlocking it we enter a
+			 * critical section to prevent preemption after we
+			 * reenable interrupts by dropping the thread lock in
+			 * order to prevent curwaittd from getting to run.
 			 */
 			critical_enter();
 			thread_unlock(td);
 			owner = turnstile_lock(ts, &lock);
 			/*
-			 * The owner pointer indicates that the lock succeeded. Only
-			 * in case we hold the lock and the turnstile we locked is still
-			 * the one that curwaittd is blocked on can we continue. Otherwise
-			 * The turnstile pointer has been changed out from underneath
-			 * us, as in the case where the lock holder has signalled curwaittd,
+			 * The owner pointer indicates that the lock succeeded.
+			 * Only in case we hold the lock and the turnstile we
+			 * locked is still the one that curwaittd is blocked on
+			 * can we continue. Otherwise the turnstile pointer has
+			 * been changed out from underneath us, as in the case
+			 * where the lock holder has signalled curwaittd,
 			 * and we need to continue.
 			 */
 			if (owner != NULL && ts == curwaittd->td_blocked) {
-				MPASS(TD_IS_INHIBITED(curwaittd) && TD_ON_LOCK(curwaittd));
+				MPASS(TD_IS_INHIBITED(curwaittd) &&
+				    TD_ON_LOCK(curwaittd));
 				critical_exit();
 				turnstile_wait(ts, owner, curwaittd->td_tsqueue);
 				counter_u64_add(turnstile_count, 1);
@ -386,9 +496,8 @@ epoch_wait_preempt(epoch_t epoch)
 	if ((epoch->e_flags & EPOCH_LOCKED) == 0)
 		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
 		    "epoch_wait() can be long running");
-	KASSERT(!in_epoch(epoch),
-			("epoch_wait_preempt() called in the middle "
-			 "of an epoch section of the same epoch"));
+	KASSERT(!in_epoch(epoch), ("epoch_wait_preempt() called in the middle "
+	    "of an epoch section of the same epoch"));
 #endif
 	thread_lock(td);
 	DROP_GIANT();
@ -401,7 +510,8 @@ epoch_wait_preempt(epoch_t epoch)
 	td->td_pinned = 0;
 	sched_bind(td, old_cpu);

-	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt, NULL);
+	ck_epoch_synchronize_wait(&epoch->e_epoch, epoch_block_handler_preempt,
+	    NULL);

 	/* restore CPU binding, if any */
 	if (was_bound != 0) {
@ -462,7 +572,7 @@ epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t
 	critical_enter();
 	*DPCPU_PTR(epoch_cb_count) += 1;
 	er = epoch_currecord(epoch);
-	ck_epoch_call(&er->er_write_record, cb, (ck_epoch_cb_t *)callback);
+	ck_epoch_call(&er->er_record, cb, (ck_epoch_cb_t *)callback);
 	critical_exit();
 	return;
 boottime:
@ -486,7 +596,7 @@ epoch_call_task(void *arg __unused)
 		if (__predict_false((epoch = allepochs[i]) == NULL))
 			continue;
 		er = epoch_currecord(epoch);
-		record = &er->er_write_record;
+		record = &er->er_record;
 		if ((npending = record->n_pending) == 0)
 			continue;
 		ck_epoch_poll_deferred(record, &cb_stack);
@ -502,7 +612,7 @@ epoch_call_task(void *arg __unused)
 	head = ck_stack_batch_pop_npsc(&cb_stack);
 	for (cursor = head; cursor != NULL; cursor = next) {
 		struct ck_epoch_entry *entry =
-		ck_epoch_entry_container(cursor);
+		    ck_epoch_entry_container(cursor);

 		next = CK_STACK_NEXT(cursor);
 		entry->function(entry);
@ -512,7 +622,7 @@ epoch_call_task(void *arg __unused)
 int
 in_epoch_verbose(epoch_t epoch, int dump_onfail)
 {
-	struct epoch_thread *tdwait;
+	struct epoch_tracker *tdwait;
 	struct thread *td;
 	epoch_record_t er;

@ -548,9 +658,15 @@ in_epoch(epoch_t epoch)
 }

 void
-epoch_adjust_prio(struct thread *td, u_char prio)
+epoch_thread_init(struct thread *td)
 {
-	thread_lock(td);
-	sched_prio(td, prio);
-	thread_unlock(td);
+
+	td->td_et = malloc(sizeof(struct epoch_tracker), M_EPOCH, M_WAITOK);
+}
+
+void
+epoch_thread_fini(struct thread *td)
+{
+
+	free(td->td_et, M_EPOCH);
 }
--- a/sys/net/if.c
+++ b/sys/net/if.c
@ -1767,35 +1767,29 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd)
 void
 if_addr_rlock(struct ifnet *ifp)
 {
-	MPASS(*(uint64_t *)&ifp->if_addr_et == 0);
-	epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et);
+
+	epoch_enter_preempt(net_epoch_preempt, curthread->td_et);
 }

 void
 if_addr_runlock(struct ifnet *ifp)
 {
-	epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et);
-#ifdef INVARIANTS
-	bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker));
-#endif
+
+	epoch_exit_preempt(net_epoch_preempt, curthread->td_et);
 }

 void
 if_maddr_rlock(if_t ifp)
 {

-	MPASS(*(uint64_t *)&ifp->if_maddr_et == 0);
-	epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et);
+	epoch_enter_preempt(net_epoch_preempt, curthread->td_et);
 }

 void
 if_maddr_runlock(if_t ifp)
 {

-	epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et);
-#ifdef INVARIANTS
-	bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker));
-#endif
+	epoch_exit_preempt(net_epoch_preempt, curthread->td_et);
 }

 /*
--- a/sys/net/if_var.h
+++ b/sys/net/if_var.h
@ -381,8 +381,6 @@ struct ifnet {
 	 */
 	struct netdump_methods *if_netdump_methods;
 	struct epoch_context	if_epoch_ctx;
-	struct epoch_tracker	if_addr_et;
-	struct epoch_tracker	if_maddr_et;

 	/*
 	 * Spare fields to be added before branching a stable branch, so
--- a/sys/powerpc/conf/GENERIC64
+++ b/sys/powerpc/conf/GENERIC64
@ -39,6 +39,10 @@ options 	PREEMPTION		#Enable kernel thread preemption
 options 	VIMAGE			# Subsystem virtualization, e.g. VNET
 options 	INET			#InterNETworking
 options 	INET6			#IPv6 communications protocols
+options 	IPSEC			# IP (v4/v6) security
+options 	IPSEC_SUPPORT		# Allow kldload of ipsec and tcpmd5
+options 	TCP_OFFLOAD		# TCP offload
+options 	TCP_BLACKBOX		# Enhanced TCP event logging
 options 	TCP_HHOOK		# hhook(9) framework for TCP
 options 	TCP_RFC7413		# TCP Fast Open
 options 	SCTP			#Stream Control Transmission Protocol
@ -83,6 +87,9 @@ options 	MAC			# TrustedBSD MAC Framework
 options 	KDTRACE_HOOKS		# Kernel DTrace hooks
 options 	DDB_CTF			# Kernel ELF linker loads CTF data
 options 	INCLUDE_CONFIG_FILE     # Include this file in kernel
+options 	RACCT			# Resource accounting framework
+options 	RACCT_DEFAULT_TO_DISABLED # Set kern.racct.enable=0 by default
+options 	RCTL			# Resource limits

 # Debugging support.  Always need this:
 options 	KDB			# Enable kernel debugger support.
@ -136,10 +143,12 @@ device		sym		# NCR/Symbios/LSI Logic 53C8XX/53C1010/53C1510D

 # ATA/SCSI peripherals
 device		scbus		# SCSI bus (required for ATA/SCSI)
+device		ch		# SCSI media changers
 device		da		# Direct Access (disks)
 device		sa		# Sequential Access (tape etc)
 device		cd		# CD
 device		pass		# Passthrough device (direct ATA/SCSI access)
+device		ses		# Enclosure Service (SES and SAF-TE)

 # vt is the default console driver, resembling an SCO console
 device		vt		# Core console driver
@ -168,6 +177,7 @@ device		re		# RealTek 8139C+/8169/8169S/8110S
 device		rl		# RealTek 8129/8139

 # Pseudo devices.
+device		crypto		# core crypto support
 device		loop		# Network loopback
 device		random		# Entropy device
 device		ether		# Ethernet support
@ -193,10 +203,8 @@ device		usb		# USB Bus (required)
 device		uhid		# "Human Interface Devices"
 device		ukbd		# Keyboard
 options 	KBD_INSTALL_CDEV # install a CDEV entry in /dev
-device		ulpt		# Printer
 device		umass		# Disks/Mass storage - Requires scbus and da0
 device		ums		# Mouse
-device		urio		# Diamond Rio 500 MP3 player
 # USB Ethernet
 device		aue		# ADMtek USB Ethernet
 device		axe		# ASIX Electronics USB Ethernet
@ -236,3 +244,5 @@ device		sound		# Generic sound driver (required)
 device		snd_ai2s	# Apple I2S audio
 device		snd_uaudio	# USB Audio

+# Netmap provides direct access to TX/RX rings on supported NICs
+device		netmap		# netmap(4) support
--- a/sys/riscv/include/cpu.h
+++ b/sys/riscv/include/cpu.h
@ -38,6 +38,7 @@
 #define	_MACHINE_CPU_H_

 #include <machine/atomic.h>
+#include <machine/cpufunc.h>
 #include <machine/frame.h>

 #define	TRAPF_PC(tfp)		((tfp)->tf_ra)
@ -86,8 +87,7 @@ static __inline uint64_t
 get_cyclecount(void)
 {

-	/* TODO: This is bogus */
-	return (1);
+	return (rdcycle());
 }

 #endif
--- a/sys/riscv/include/cpufunc.h
+++ b/sys/riscv/include/cpufunc.h
@ -104,6 +104,11 @@ sfence_vma_page(uintptr_t addr)
 	__asm __volatile("sfence.vma %0" :: "r" (addr) : "memory");
 }

+#define	rdcycle()			csr_read64(cycle)
+#define	rdtime()			csr_read64(time)
+#define	rdinstret()			csr_read64(instret)
+#define	rdhpmcounter(n)			csr_read64(hpmcounter##n)
+
 #define	cpufunc_nullop()		riscv_nullop()

 void riscv_nullop(void);
--- a/sys/riscv/include/riscvreg.h
+++ b/sys/riscv/include/riscvreg.h
@ -223,4 +223,23 @@
 	val;								\
 })

+#if __riscv_xlen == 32
+#define	csr_read64(csr)							\
+({	uint64_t val;							\
+	uint32_t high, low;						\
+	__asm __volatile("1: "						\
+			 "csrr t0, " #csr "h\n"				\
+			 "csrr %0, " #csr "\n"				\
+			 "csrr %1, " #csr "h\n"				\
+			 "bne t0, %1, 1b"				\
+			 : "=r" (low), "=r" (high)			\
+			 :						\
+			 : "t0");					\
+	val = (low | ((uint64_t)high << 32));				\
+	val;								\
+})
+#else
+#define	csr_read64(csr)		((uint64_t)csr_read(csr))
+#endif
+
 #endif /* !_MACHINE_RISCVREG_H_ */
--- a/sys/sys/blist.h
+++ b/sys/sys/blist.h
@ -73,22 +73,20 @@ typedef	uint64_t	u_daddr_t;	/* unsigned disk address */
 */

 typedef struct blmeta {
-	union {
-	    daddr_t	bmu_avail;	/* space available under us	*/
-	    u_daddr_t	bmu_bitmap;	/* bitmap if we are a leaf	*/
-	} u;
+	u_daddr_t	bm_bitmap;	/* bitmap if we are a leaf	*/
 	daddr_t		bm_bighint;	/* biggest contiguous block hint*/
 } blmeta_t;

 typedef struct blist {
 	daddr_t		bl_blocks;	/* area of coverage		*/
+	daddr_t		bl_avail;	/* # available blocks */
 	u_daddr_t	bl_radix;	/* coverage radix		*/
 	daddr_t		bl_cursor;	/* next-fit search starts at	*/
 	blmeta_t	bl_root[1];	/* root of radix tree		*/
 } *blist_t;

-#define BLIST_META_RADIX	16
 #define BLIST_BMAP_RADIX	(sizeof(u_daddr_t)*8)
+#define BLIST_META_RADIX	BLIST_BMAP_RADIX

 #define BLIST_MAX_ALLOC		BLIST_BMAP_RADIX

--- a/sys/sys/epoch.h
+++ b/sys/sys/epoch.h
@ -29,10 +29,17 @@

 #ifndef _SYS_EPOCH_H_
 #define _SYS_EPOCH_H_
+
+struct epoch_context {
+	void   *data[2];
+} __aligned(sizeof(void *));
+
+typedef struct epoch_context *epoch_context_t;
+
 #ifdef _KERNEL
 #include <sys/lock.h>
 #include <sys/pcpu.h>
-#endif
+#include <ck_epoch.h>

 struct epoch;
 typedef struct epoch *epoch_t;
@ -43,22 +50,19 @@ typedef struct epoch *epoch_t;
 extern epoch_t global_epoch;
 extern epoch_t global_epoch_preempt;

-struct epoch_context {
-	void   *data[2];
-} __aligned(sizeof(void *));
-
-typedef struct epoch_context *epoch_context_t;
-
-
 struct epoch_tracker {
-	void *datap[3];
-#ifdef EPOCH_TRACKER_DEBUG
-	int datai[5];
-#else
-	int datai[1];
+#ifdef	EPOCH_TRACKER_DEBUG
+#define	EPOCH_MAGIC0 0xFADECAFEF00DD00D
+#define	EPOCH_MAGIC1 0xBADDBABEDEEDFEED
+	uint64_t et_magic_pre;
+#endif
+	TAILQ_ENTRY(epoch_tracker) et_link;
+	struct thread *et_td;
+	ck_epoch_section_t et_section;
+#ifdef	EPOCH_TRACKER_DEBUG
+	uint64_t et_magic_post;
 #endif
 }  __aligned(sizeof(void *));
-
 typedef struct epoch_tracker *epoch_tracker_t;

 epoch_t	epoch_alloc(int flags);
@ -68,26 +72,18 @@ void	epoch_wait_preempt(epoch_t epoch);
 void	epoch_call(epoch_t epoch, epoch_context_t ctx, void (*callback) (epoch_context_t));
 int	in_epoch(epoch_t epoch);
 int in_epoch_verbose(epoch_t epoch, int dump_onfail);
-#ifdef _KERNEL
 DPCPU_DECLARE(int, epoch_cb_count);
 DPCPU_DECLARE(struct grouptask, epoch_cb_task);
 #define EPOCH_MAGIC0 0xFADECAFEF00DD00D
 #define EPOCH_MAGIC1 0xBADDBABEDEEDFEED

-void epoch_enter_preempt_KBI(epoch_t epoch, epoch_tracker_t et);
-void epoch_exit_preempt_KBI(epoch_t epoch, epoch_tracker_t et);
-void epoch_enter_KBI(epoch_t epoch);
-void epoch_exit_KBI(epoch_t epoch);
+void epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et);
+void epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et);
+void epoch_enter(epoch_t epoch);
+void epoch_exit(epoch_t epoch);

+void epoch_thread_init(struct thread *);
+void epoch_thread_fini(struct thread *);

-#if defined(KLD_MODULE) && !defined(KLD_TIED)
-#define epoch_enter_preempt(e, t)	epoch_enter_preempt_KBI((e), (t))
-#define epoch_exit_preempt(e, t)	epoch_exit_preempt_KBI((e), (t))
-#define epoch_enter(e)	epoch_enter_KBI((e))
-#define epoch_exit(e)	epoch_exit_KBI((e))
-#else
-#include <sys/epoch_private.h>
-#endif /* KLD_MODULE */
-
-#endif /* _KERNEL */
-#endif
+#endif	/* _KERNEL */
+#endif	/* _SYS_EPOCH_H_ */
--- a/sys/sys/epoch_private.h
+++ b/sys/sys/epoch_private.h
@ -1,211 +0,0 @@
-/*-
- * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
- *
- * Copyright (c) 2018, Matthew Macy <mmacy@freebsd.org>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * $FreeBSD$
- */
-
-#ifndef _SYS_EPOCH_PRIVATE_H_
-#define _SYS_EPOCH_PRIVATE_H_
-#ifndef _KERNEL
-#error "no user serviceable parts"
-#else
-#include <ck_epoch.h>
-#include <sys/kpilite.h>
-
-#include <sys/mutex.h>
-
-extern void epoch_adjust_prio(struct thread *td, u_char prio);
-#ifndef _SYS_SYSTM_H_
-extern void    critical_exit_preempt(void);
-#endif
-
-#ifdef __amd64__
-#define EPOCH_ALIGN CACHE_LINE_SIZE*2
-#else
-#define EPOCH_ALIGN CACHE_LINE_SIZE
-#endif
-
-/*
- * Standalone (_sa) routines for thread state manipulation
- */
-static __inline void
-critical_enter_sa(void *tdarg)
-{
-	struct thread_lite *td;
-
-	td = tdarg;
-	td->td_critnest++;
-	__compiler_membar();
-}
-
-static __inline void
-critical_exit_sa(void *tdarg)
-{
-	struct thread_lite *td;
-
-	td = tdarg;
-	MPASS(td->td_critnest > 0);
-	__compiler_membar();
-	td->td_critnest--;
-	__compiler_membar();
-	if (__predict_false(td->td_owepreempt != 0))
-		critical_exit_preempt();
-}
-
-typedef struct epoch_thread {
-#ifdef EPOCH_TRACKER_DEBUG
-	uint64_t et_magic_pre;
-#endif
-	TAILQ_ENTRY(epoch_thread) et_link;	/* Epoch queue. */
-	struct thread *et_td;		/* pointer to thread in section */
-	ck_epoch_section_t et_section; /* epoch section object */
-#ifdef EPOCH_TRACKER_DEBUG
-	uint64_t et_magic_post;
-#endif
-} *epoch_thread_t;
-TAILQ_HEAD (epoch_tdlist, epoch_thread);
-
-typedef struct epoch_record {
-	ck_epoch_record_t er_read_record;
-	ck_epoch_record_t er_write_record;
-	volatile struct epoch_tdlist er_tdlist;
-	volatile uint32_t er_gen;
-	uint32_t er_cpuid;
-} __aligned(EPOCH_ALIGN)     *epoch_record_t;
-
-struct epoch {
-	struct ck_epoch e_epoch __aligned(EPOCH_ALIGN);
-	epoch_record_t e_pcpu_record;
-	int	e_idx;
-	int	e_flags;
-};
-
-static epoch_record_t
-epoch_currecord(epoch_t epoch)
-{
-	return zpcpu_get_cpu(epoch->e_pcpu_record, curcpu);
-}
-
-#define INIT_CHECK(epoch)							\
-	do {											\
-		if (__predict_false((epoch) == NULL))		\
-			return;									\
-	} while (0)
-
-static __inline void
-epoch_enter_preempt(epoch_t epoch, epoch_tracker_t et)
-{
-	struct epoch_record *er;
-	struct epoch_thread *etd;
-	struct thread_lite *td;
-
-	MPASS(cold || epoch != NULL);
-	INIT_CHECK(epoch);
-	etd = (void *)et;
-	MPASS(epoch->e_flags & EPOCH_PREEMPT);
-#ifdef EPOCH_TRACKER_DEBUG
-	etd->et_magic_pre = EPOCH_MAGIC0;
-	etd->et_magic_post = EPOCH_MAGIC1;
-#endif
-	td = (struct thread_lite *)curthread;
-	etd->et_td = (void*)td;
-	td->td_epochnest++;
-	critical_enter_sa(td);
-	sched_pin_lite(td);
-
-	td->td_pre_epoch_prio = td->td_priority;
-	er = epoch_currecord(epoch);
-	TAILQ_INSERT_TAIL(&er->er_tdlist, etd, et_link);
-	ck_epoch_begin(&er->er_read_record, (ck_epoch_section_t *)&etd->et_section);
-	critical_exit_sa(td);
-}
-
-static __inline void
-epoch_enter(epoch_t epoch)
-{
-	struct thread_lite *td;
-	epoch_record_t er;
-
-	MPASS(cold || epoch != NULL);
-	INIT_CHECK(epoch);
-	td = (struct thread_lite *)curthread;
-
-	td->td_epochnest++;
-	critical_enter_sa(td);
-	er = epoch_currecord(epoch);
-	ck_epoch_begin(&er->er_read_record, NULL);
-}
-
-static __inline void
-epoch_exit_preempt(epoch_t epoch, epoch_tracker_t et)
-{
-	struct epoch_record *er;
-	struct epoch_thread *etd;
-	struct thread_lite *td;
-
-	INIT_CHECK(epoch);
-	td = (struct thread_lite *)curthread;
-	critical_enter_sa(td);
-	sched_unpin_lite(td);
-	MPASS(td->td_epochnest);
-	td->td_epochnest--;
-	er = epoch_currecord(epoch);
-	MPASS(epoch->e_flags & EPOCH_PREEMPT);
-	etd = (void *)et;
-	MPASS(etd != NULL);
-	MPASS(etd->et_td == (struct thread *)td);
-#ifdef EPOCH_TRACKER_DEBUG
-	MPASS(etd->et_magic_pre == EPOCH_MAGIC0);
-	MPASS(etd->et_magic_post == EPOCH_MAGIC1);
-	etd->et_magic_pre = 0;
-	etd->et_magic_post = 0;
-#endif
-	etd->et_td = (void*)0xDEADBEEF;
-	ck_epoch_end(&er->er_read_record,
-		(ck_epoch_section_t *)&etd->et_section);
-	TAILQ_REMOVE(&er->er_tdlist, etd, et_link);
-	er->er_gen++;
-	if (__predict_false(td->td_pre_epoch_prio != td->td_priority))
-		epoch_adjust_prio((struct thread *)td, td->td_pre_epoch_prio);
-	critical_exit_sa(td);
-}
-
-static __inline void
-epoch_exit(epoch_t epoch)
-{
-	struct thread_lite *td;
-	epoch_record_t er;
-
-	INIT_CHECK(epoch);
-	td = (struct thread_lite *)curthread;
-	MPASS(td->td_epochnest);
-	td->td_epochnest--;
-	er = epoch_currecord(epoch);
-	ck_epoch_end(&er->er_read_record, NULL);
-	critical_exit_sa(td);
-}
-#endif /* _KERNEL */
-#endif /* _SYS_EPOCH_PRIVATE_H_ */
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@ -193,6 +193,7 @@ struct trapframe;
 struct turnstile;
 struct vm_map;
 struct vm_map_entry;
+struct epoch_tracker;

 /*
 * XXX: Does this belong in resource.h or resourcevar.h instead?
@ -360,6 +361,7 @@ struct thread {
 	int		td_lastcpu;	/* (t) Last cpu we were on. */
 	int		td_oncpu;	/* (t) Which cpu we are on. */
 	void		*td_lkpi_task;	/* LinuxKPI task struct pointer */
+	struct epoch_tracker *td_et;	/* (k) compat KPI spare tracker */
 	int		td_pmcpend;
 };

--- a/sys/ufs/ffs/ffs_extern.h
+++ b/sys/ufs/ffs/ffs_extern.h
@ -82,7 +82,7 @@ int	ffs_getcg(struct fs *, struct vnode *, u_int, struct buf **,
 	    struct cg **);
 int	ffs_isblock(struct fs *, u_char *, ufs1_daddr_t);
 int	ffs_isfreeblock(struct fs *, u_char *, ufs1_daddr_t);
-void	ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
+int	ffs_load_inode(struct buf *, struct inode *, struct fs *, ino_t);
 void	ffs_oldfscompat_write(struct fs *, struct ufsmount *);
 int	ffs_own_mount(const struct mount *mp);
 int	ffs_reallocblks(struct vop_reallocblks_args *);
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@ -148,12 +148,18 @@ loop:
 	if (I_IS_UFS1(ip)) {
 		*((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
-		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
+		/*
+		 * XXX: FIX? The entropy here is desirable,
+		 * but the harvesting may be expensive
+		 */
 		random_harvest_queue(&(ip->i_din1), sizeof(ip->i_din1), RANDOM_FS_ATIME);
 	} else {
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
-		/* XXX: FIX? The entropy here is desirable, but the harvesting may be expensive */
+		/*
+		 * XXX: FIX? The entropy here is desirable,
+		 * but the harvesting may be expensive
+		 */
 		random_harvest_queue(&(ip->i_din2), sizeof(ip->i_din2), RANDOM_FS_ATIME);
 	}
 	if (waitfor)
--- a/sys/ufs/ffs/ffs_snapshot.c
+++ b/sys/ufs/ffs/ffs_snapshot.c
@ -1333,12 +1333,12 @@ expunge_ufs2(snapvp, cancelip, fs, acctfunc, expungetype, clearmode)
 	 */
 	dip = (struct ufs2_dinode *)bp->b_data +
 	    ino_to_fsbo(fs, cancelip->i_number);
-	if (clearmode || cancelip->i_effnlink == 0)
-		dip->di_mode = 0;
 	dip->di_size = 0;
 	dip->di_blocks = 0;
 	dip->di_flags &= ~SF_SNAPSHOT;
 	bzero(&dip->di_db[0], (UFS_NDADDR + UFS_NIADDR) * sizeof(ufs2_daddr_t));
+	if (clearmode || cancelip->i_effnlink == 0)
+		dip->di_mode = 0;
 	bdwrite(bp);
 	/*
 	 * Now go through and expunge all the blocks in the file
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@ -6698,12 +6698,13 @@ softdep_journal_freeblocks(ip, cred, length, flags)
 	if (bp->b_bufsize == fs->fs_bsize)
 		bp->b_flags |= B_CLUSTEROK;
 	softdep_update_inodeblock(ip, bp, 0);
-	if (ump->um_fstype == UFS1)
+	if (ump->um_fstype == UFS1) {
 		*((struct ufs1_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din1;
-	else
+	} else {
 		*((struct ufs2_dinode *)bp->b_data +
 		    ino_to_fsbo(fs, ip->i_number)) = *ip->i_din2;
+	}
 	ACQUIRE_LOCK(ump);
 	(void) inodedep_lookup(mp, ip->i_number, DEPALLOC, &inodedep);
 	if ((inodedep->id_state & IOSTARTED) != 0)
@ -9640,6 +9641,7 @@ static void
 clear_unlinked_inodedep(inodedep)
 	struct inodedep *inodedep;
 {
+	struct ufs2_dinode *dip;
 	struct ufsmount *ump;
 	struct inodedep *idp;
 	struct inodedep *idn;
@ -9743,12 +9745,14 @@ clear_unlinked_inodedep(inodedep)
 			ffs_oldfscompat_write((struct fs *)bp->b_data, ump);
 			softdep_setup_sbupdate(ump, (struct fs *)bp->b_data,
 			    bp);
-		} else if (fs->fs_magic == FS_UFS1_MAGIC)
+		} else if (fs->fs_magic == FS_UFS1_MAGIC) {
 			((struct ufs1_dinode *)bp->b_data +
 			    ino_to_fsbo(fs, pino))->di_freelink = nino;
-		else
-			((struct ufs2_dinode *)bp->b_data +
-			    ino_to_fsbo(fs, pino))->di_freelink = nino;
+		} else {
+			dip = (struct ufs2_dinode *)bp->b_data +
+			    ino_to_fsbo(fs, pino);
+			dip->di_freelink = nino;
+		}
 		/*
 		 * If the bwrite fails we have no recourse to recover.  The
 		 * filesystem is corrupted already.
--- a/sys/ufs/ffs/ffs_subr.c
+++ b/sys/ufs/ffs/ffs_subr.c
@ -108,31 +108,35 @@ ffs_blkatoff(struct vnode *vp, off_t offset, char **res, struct buf **bpp)
 * Load up the contents of an inode and copy the appropriate pieces
 * to the incore copy.
 */
-void
+int
 ffs_load_inode(struct buf *bp, struct inode *ip, struct fs *fs, ino_t ino)
 {
+	struct ufs1_dinode *dip1;
+	struct ufs2_dinode *dip2;

 	if (I_IS_UFS1(ip)) {
-		*ip->i_din1 =
+		dip1 = ip->i_din1;
+		*dip1 =
 		    *((struct ufs1_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
-		ip->i_mode = ip->i_din1->di_mode;
-		ip->i_nlink = ip->i_din1->di_nlink;
-		ip->i_size = ip->i_din1->di_size;
-		ip->i_flags = ip->i_din1->di_flags;
-		ip->i_gen = ip->i_din1->di_gen;
-		ip->i_uid = ip->i_din1->di_uid;
-		ip->i_gid = ip->i_din1->di_gid;
-	} else {
-		*ip->i_din2 =
-		    *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
-		ip->i_mode = ip->i_din2->di_mode;
-		ip->i_nlink = ip->i_din2->di_nlink;
-		ip->i_size = ip->i_din2->di_size;
-		ip->i_flags = ip->i_din2->di_flags;
-		ip->i_gen = ip->i_din2->di_gen;
-		ip->i_uid = ip->i_din2->di_uid;
-		ip->i_gid = ip->i_din2->di_gid;
+		ip->i_mode = dip1->di_mode;
+		ip->i_nlink = dip1->di_nlink;
+		ip->i_size = dip1->di_size;
+		ip->i_flags = dip1->di_flags;
+		ip->i_gen = dip1->di_gen;
+		ip->i_uid = dip1->di_uid;
+		ip->i_gid = dip1->di_gid;
+		return (0);
 	}
+	dip2 = ip->i_din2;
+	*dip2 = *((struct ufs2_dinode *)bp->b_data + ino_to_fsbo(fs, ino));
+	ip->i_mode = dip2->di_mode;
+	ip->i_nlink = dip2->di_nlink;
+	ip->i_size = dip2->di_size;
+	ip->i_flags = dip2->di_flags;
+	ip->i_gen = dip2->di_gen;
+	ip->i_uid = dip2->di_uid;
+	ip->i_gid = dip2->di_gid;
+	return (0);
 }
 #endif /* KERNEL */

--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@ -740,16 +740,19 @@ loop:
 		    bread(devvp, fsbtodb(fs, ino_to_fsba(fs, ip->i_number)),
 		    (int)fs->fs_bsize, NOCRED, &bp);
 		if (error) {
-			VOP_UNLOCK(vp, 0);
-			vrele(vp);
+			vput(vp);
+			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
+			return (error);
+		}
+		if ((error = ffs_load_inode(bp, ip, fs, ip->i_number)) != 0) {
+			brelse(bp);
+			vput(vp);
 			MNT_VNODE_FOREACH_ALL_ABORT(mp, mvp);
 			return (error);
 		}
-		ffs_load_inode(bp, ip, fs, ip->i_number);
 		ip->i_effnlink = ip->i_nlink;
 		brelse(bp);
-		VOP_UNLOCK(vp, 0);
-		vrele(vp);
+		vput(vp);
 	}
 	return (0);
 }
@ -1729,7 +1732,12 @@ ffs_vgetf(mp, ino, flags, vpp, ffs_flags)
 		ip->i_din1 = uma_zalloc(uma_ufs1, M_WAITOK);
 	else
 		ip->i_din2 = uma_zalloc(uma_ufs2, M_WAITOK);
-	ffs_load_inode(bp, ip, fs, ino);
+	if ((error = ffs_load_inode(bp, ip, fs, ino)) != 0) {
+		bqrelse(bp);
+		vput(vp);
+		*vpp = NULL;
+		return (error);
+	}
 	if (DOINGSOFTDEP(vp))
 		softdep_load_inodeblock(ip);
 	else
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@ -459,6 +459,36 @@ bucket_zone_drain(void)
 		zone_drain(ubz->ubz_zone);
 }

+static uma_bucket_t
+zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws)
+{
+	uma_bucket_t bucket;
+
+	ZONE_LOCK_ASSERT(zone);
+
+	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+		MPASS(zdom->uzd_nitems >= bucket->ub_cnt);
+		LIST_REMOVE(bucket, ub_link);
+		zdom->uzd_nitems -= bucket->ub_cnt;
+		if (ws && zdom->uzd_imin > zdom->uzd_nitems)
+			zdom->uzd_imin = zdom->uzd_nitems;
+	}
+	return (bucket);
+}
+
+static void
+zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket,
+    const bool ws)
+{
+
+	ZONE_LOCK_ASSERT(zone);
+
+	LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+	zdom->uzd_nitems += bucket->ub_cnt;
+	if (ws && zdom->uzd_imax < zdom->uzd_nitems)
+		zdom->uzd_imax = zdom->uzd_nitems;
+}
+
 static void
 zone_log_warning(uma_zone_t zone)
 {
@ -508,6 +538,23 @@ uma_timeout(void *unused)
 	callout_reset(&uma_callout, UMA_TIMEOUT * hz, uma_timeout, NULL);
 }

+/*
+ * Update the working set size estimate for the zone's bucket cache.
+ * The constants chosen here are somewhat arbitrary.  With an update period of
+ * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the
+ * last 100s.
+ */
+static void
+zone_domain_update_wss(uma_zone_domain_t zdom)
+{
+	long wss;
+
+	MPASS(zdom->uzd_imax >= zdom->uzd_imin);
+	wss = zdom->uzd_imax - zdom->uzd_imin;
+	zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems;
+	zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5;
+}
+
 /*
 * Routine to perform timeout driven calculations.  This expands the
 * hashes and does per cpu statistics aggregation.
@ -560,8 +607,14 @@ keg_timeout(uma_keg_t keg)
 static void
 zone_timeout(uma_zone_t zone)
 {
+	int i;

 	zone_foreach_keg(zone, &keg_timeout);
+
+	ZONE_LOCK(zone);
+	for (i = 0; i < vm_ndomains; i++)
+		zone_domain_update_wss(&zone->uz_domain[i]);
+	ZONE_UNLOCK(zone);
 }

 /*
@ -772,16 +825,16 @@ cache_drain_safe_cpu(uma_zone_t zone)
 	cache = &zone->uz_cpu[curcpu];
 	if (cache->uc_allocbucket) {
 		if (cache->uc_allocbucket->ub_cnt != 0)
-			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
-			    cache->uc_allocbucket, ub_link);
+			zone_put_bucket(zone, &zone->uz_domain[domain],
+			    cache->uc_allocbucket, false);
 		else
 			b1 = cache->uc_allocbucket;
 		cache->uc_allocbucket = NULL;
 	}
 	if (cache->uc_freebucket) {
 		if (cache->uc_freebucket->ub_cnt != 0)
-			LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets,
-			    cache->uc_freebucket, ub_link);
+			zone_put_bucket(zone, &zone->uz_domain[domain],
+			    cache->uc_freebucket, false);
 		else
 			b2 = cache->uc_freebucket;
 		cache->uc_freebucket = NULL;
@ -844,8 +897,8 @@ bucket_cache_drain(uma_zone_t zone)
 	 */
 	for (i = 0; i < vm_ndomains; i++) {
 		zdom = &zone->uz_domain[i];
-		while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
-			LIST_REMOVE(bucket, ub_link);
+		while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) !=
+		    NULL) {
 			ZONE_UNLOCK(zone);
 			bucket_drain(zone, bucket);
 			bucket_free(zone, bucket, NULL);
@ -2523,11 +2576,9 @@ zalloc_start:
 		zdom = &zone->uz_domain[0];
 	else
 		zdom = &zone->uz_domain[domain];
-	if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) {
+	if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) {
 		KASSERT(bucket->ub_cnt != 0,
 		    ("uma_zalloc_arg: Returning an empty bucket."));
-
-		LIST_REMOVE(bucket, ub_link);
 		cache->uc_allocbucket = bucket;
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
@ -2556,6 +2607,7 @@ zalloc_start:
 		critical_enter();
 		cpu = curcpu;
 		cache = &zone->uz_cpu[cpu];
+
 		/*
 		 * See if we lost the race or were migrated.  Cache the
 		 * initialized bucket to make this less likely or claim
@ -2565,6 +2617,7 @@ zalloc_start:
 		    ((zone->uz_flags & UMA_ZONE_NUMA) == 0 ||
 		    domain == PCPU_GET(domain))) {
 			cache->uc_allocbucket = bucket;
+			zdom->uzd_imax += bucket->ub_cnt;
 		} else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) {
 			critical_exit();
 			ZONE_UNLOCK(zone);
@ -2572,7 +2625,7 @@ zalloc_start:
 			bucket_free(zone, bucket, udata);
 			goto zalloc_restart;
 		} else
-			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+			zone_put_bucket(zone, zdom, bucket, false);
 		ZONE_UNLOCK(zone);
 		goto zalloc_start;
 	}
@ -3200,7 +3253,7 @@ zfree_start:
 			bucket_free(zone, bucket, udata);
 			goto zfree_restart;
 		} else
-			LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link);
+			zone_put_bucket(zone, zdom, bucket, true);
 	}

 	/*
@ -3649,6 +3702,7 @@ uma_reclaim_locked(bool kmem_danger)
 		cache_drain_safe(NULL);
 		zone_foreach(zone_drain);
 	}
+
 	/*
 	 * Some slabs may have been freed but this zone will be visited early
 	 * we visit again so that we can free pages that are empty once other
@ -3882,7 +3936,7 @@ uma_print_zone(uma_zone_t zone)
 * directly so that we don't have to.
 */
 static void
-uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp,
+uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp,
    uint64_t *freesp, uint64_t *sleepsp)
 {
 	uma_cache_t cache;
@ -3937,7 +3991,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)
 	struct uma_stream_header ush;
 	struct uma_type_header uth;
 	struct uma_percpu_stat *ups;
-	uma_bucket_t bucket;
 	uma_zone_domain_t zdom;
 	struct sbuf sbuf;
 	uma_cache_t cache;
@ -3997,9 +4050,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS)

 			for (i = 0; i < vm_ndomains; i++) {
 				zdom = &z->uz_domain[i];
-				LIST_FOREACH(bucket, &zdom->uzd_buckets,
-				    ub_link)
-					uth.uth_zone_free += bucket->ub_cnt;
+				uth.uth_zone_free += zdom->uzd_nitems;
 			}
 			uth.uth_allocs = z->uz_allocs;
 			uth.uth_frees = z->uz_frees;
@ -4199,12 +4250,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item)
 #ifdef DDB
 DB_SHOW_COMMAND(uma, db_show_uma)
 {
-	uma_bucket_t bucket;
 	uma_keg_t kz;
 	uma_zone_t z;
-	uma_zone_domain_t zdom;
 	uint64_t allocs, frees, sleeps;
-	int cachefree, i;
+	long cachefree;
+	int i;

 	db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used",
 	    "Free", "Requests", "Sleeps", "Bucket");
@ -4221,13 +4271,10 @@ DB_SHOW_COMMAND(uma, db_show_uma)
 			if (!((z->uz_flags & UMA_ZONE_SECONDARY) &&
 			    (LIST_FIRST(&kz->uk_zones) != z)))
 				cachefree += kz->uk_free;
-			for (i = 0; i < vm_ndomains; i++) {
-				zdom = &z->uz_domain[i];
-				LIST_FOREACH(bucket, &zdom->uzd_buckets,
-				    ub_link)
-					cachefree += bucket->ub_cnt;
-			}
-			db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n",
+			for (i = 0; i < vm_ndomains; i++)
+				cachefree += z->uz_domain[i].uzd_nitems;
+
+			db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n",
 			    z->uz_name, (uintmax_t)kz->uk_size,
 			    (intmax_t)(allocs - frees), cachefree,
 			    (uintmax_t)allocs, sleeps, z->uz_count);
@ -4239,22 +4286,18 @@ DB_SHOW_COMMAND(uma, db_show_uma)

 DB_SHOW_COMMAND(umacache, db_show_umacache)
 {
-	uma_bucket_t bucket;
 	uma_zone_t z;
-	uma_zone_domain_t zdom;
 	uint64_t allocs, frees;
-	int cachefree, i;
+	long cachefree;
+	int i;

 	db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free",
 	    "Requests", "Bucket");
 	LIST_FOREACH(z, &uma_cachezones, uz_link) {
 		uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL);
-		for (i = 0; i < vm_ndomains; i++) {
-			zdom = &z->uz_domain[i];
-			LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link)
-				cachefree += bucket->ub_cnt;
-		}
-		db_printf("%18s %8ju %8jd %8d %12ju %8u\n",
+		for (i = 0; i < vm_ndomains; i++)
+			cachefree += z->uz_domain[i].uzd_nitems;
+		db_printf("%18s %8ju %8jd %8ld %12ju %8u\n",
 		    z->uz_name, (uintmax_t)z->uz_size,
 		    (intmax_t)(allocs - frees), cachefree,
 		    (uintmax_t)allocs, z->uz_count);
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@ -304,6 +304,10 @@ typedef struct uma_klink *uma_klink_t;

 struct uma_zone_domain {
 	LIST_HEAD(,uma_bucket)	uzd_buckets;	/* full buckets */
+	long		uzd_nitems;	/* total item count */
+	long		uzd_imax;	/* maximum item count this period */
+	long		uzd_imin;	/* minimum item count this period */
+	long		uzd_wss;	/* working set size estimate */
 };

 typedef struct uma_zone_domain * uma_zone_domain_t;
@ -423,11 +427,12 @@ void uma_large_free(uma_slab_t slab);
 			mtx_init(&(z)->uz_lock, (z)->uz_name,	\
 			    "UMA zone", MTX_DEF | MTX_DUPOK);	\
 	} while (0)
-	    
+
 #define	ZONE_LOCK(z)	mtx_lock((z)->uz_lockptr)
 #define	ZONE_TRYLOCK(z)	mtx_trylock((z)->uz_lockptr)
 #define	ZONE_UNLOCK(z)	mtx_unlock((z)->uz_lockptr)
 #define	ZONE_LOCK_FINI(z)	mtx_destroy(&(z)->uz_lock)
+#define	ZONE_LOCK_ASSERT(z)	mtx_assert((z)->uz_lockptr, MA_OWNED)

 /*
 * Find a slab within a hash table.  This is used for OFFPAGE zones to lookup
--- a/sys/x86/include/x86_var.h
+++ b/sys/x86/include/x86_var.h
@ -129,6 +129,7 @@ void	dump_drop_page(vm_paddr_t);
 void	finishidentcpu(void);
 void	identify_cpu1(void);
 void	identify_cpu2(void);
+void	identify_cpu_fixup_bsp(void);
 void	identify_hypervisor(void);
 void	initializecpu(void);
 void	initializecpucache(void);
--- a/sys/x86/x86/identcpu.c
+++ b/sys/x86/x86/identcpu.c
@ -1467,6 +1467,19 @@ identify_cpu2(void)
 	}
 }

+void
+identify_cpu_fixup_bsp(void)
+{
+	u_int regs[4];
+
+	cpu_vendor_id = find_cpu_vendor_id();
+
+	if (fix_cpuid()) {
+		do_cpuid(0, regs);
+		cpu_high = regs[0];
+	}
+}
+
 /*
 * Final stage of CPU identification.
 */
@ -1478,12 +1491,7 @@ finishidentcpu(void)
 	u_char ccr3;
 #endif

-	cpu_vendor_id = find_cpu_vendor_id();
-
-	if (fix_cpuid()) {
-		do_cpuid(0, regs);
-		cpu_high = regs[0];
-	}
+	identify_cpu_fixup_bsp();

 	if (cpu_high >= 5 && (cpu_feature2 & CPUID2_MON) != 0) {
 		do_cpuid(5, regs);
--- a/targets/pseudo/userland/lib/Makefile.depend
+++ b/targets/pseudo/userland/lib/Makefile.depend
@ -60,6 +60,7 @@ DIRDEPS = \
 	lib/libdevdctl \
 	lib/libdevinfo \
 	lib/libdevstat \
+	lib/libdl \
 	lib/libdwarf \
 	lib/libedit/edit/readline \
 	lib/libelf \
@ -214,10 +215,6 @@ DIRDEPS+= \
 DIRDEPS+= stand/libsa32
 .endif

-.if defined(LINKER_FEATURES) && ${LINKER_FEATURES:Mfilter}
-DIRDEPS+= lib/libdl
-.endif
-
 .if ${MK_NAND} != "no"
 DIRDEPS+= lib/libnandfs
 .endif
--- a/tools/diag/prtblknos/main.c
+++ b/tools/diag/prtblknos/main.c
@ -47,7 +47,7 @@ main(argc, argv)
 	char *argv[];
 {
 	struct uufsd disk;
-	union dinode *dp;
+	union dinodep dp;
 	struct fs *fs;
 	struct stat sb;
 	struct statfs sfb;
@ -98,11 +98,11 @@ main(argc, argv)
 			(void)printf("%s (inode #%jd): ", filename,
 			    (intmax_t)inonum);

-		if ((error = getino(&disk, (void **)&dp, inonum, NULL)) < 0)
-			warn("Read of inode %jd on %s failed",
-			    (intmax_t)inonum, fsname);
+		if ((error = getinode(&disk, &dp, inonum)) < 0)
+			warn("Read of inode %jd on %s failed: %s",
+			    (intmax_t)inonum, fsname, disk.d_error);

-		prtblknos(&disk, dp);
+		prtblknos(&disk, (union dinode *)dp.dp1);
 	}
 	exit(0);
 }
--- a/usr.bin/head/Makefile
+++ b/usr.bin/head/Makefile
@ -8,4 +8,10 @@ PROG=	head
 HAS_TESTS=
 SUBDIR.${MK_TESTS}+= tests

+.if ${MK_CASPER} != "no" && !defined(RESCUE)
+LIBADD+=        casper
+LIBADD+=        cap_fileargs
+CFLAGS+=-DWITH_CASPER
+.endif
+
 .include <bsd.prog.mk>
--- a/usr.bin/head/head.c
+++ b/usr.bin/head/head.c
@ -43,10 +43,13 @@ static char sccsid[] = "@(#)head.c	8.2 (Berkeley) 5/4/95";
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");

+#include <sys/capsicum.h>
 #include <sys/types.h>

+#include <capsicum_helpers.h>
 #include <ctype.h>
 #include <err.h>
+#include <errno.h>
 #include <getopt.h>
 #include <inttypes.h>
 #include <stdio.h>
@ -54,6 +57,9 @@ __FBSDID("$FreeBSD$");
 #include <string.h>
 #include <unistd.h>

+#include <libcasper.h>
+#include <casper/cap_fileargs.h>
+
 /*
 * head - give the first few lines of a stream or of each of a set of files
 *
@ -75,14 +81,19 @@ static const struct option long_opts[] =
 int
 main(int argc, char *argv[])
 {
-	int ch;
 	FILE *fp;
-	int first, linecnt = -1, eval = 0;
-	off_t bytecnt = -1;
 	char *ep;
+	off_t bytecnt;
+	int ch, first, linecnt, eval;
+	fileargs_t *fa;
+	cap_rights_t rights;
+
+	linecnt = -1;
+	eval = 0;
+	bytecnt = -1;

 	obsolete(argv);
-	while ((ch = getopt_long(argc, argv, "+n:c:", long_opts, NULL)) != -1)
+	while ((ch = getopt_long(argc, argv, "+n:c:", long_opts, NULL)) != -1) {
 		switch(ch) {
 		case 'c':
 			bytecnt = strtoimax(optarg, &ep, 10);
@ -97,17 +108,28 @@ main(int argc, char *argv[])
 		case '?':
 		default:
 			usage();
+			/* NOTREACHED */
 		}
+	}
 	argc -= optind;
 	argv += optind;

+	fa = fileargs_init(argc, argv, O_RDONLY, 0,
+	    cap_rights_init(&rights, CAP_READ, CAP_FSTAT, CAP_FCNTL));
+	if (fa == NULL)
+		errx(1, "unable to init casper");
+
+	caph_cache_catpages();
+	if (caph_limit_stdio() < 0 || caph_enter_casper() < 0)
+		err(1, "unable to enter capability mode");
+
 	if (linecnt != -1 && bytecnt != -1)
 		errx(1, "can't combine line and byte counts");
-	if (linecnt == -1 )
+	if (linecnt == -1)
 		linecnt = 10;
-	if (*argv) {
-		for (first = 1; *argv; ++argv) {
-			if ((fp = fopen(*argv, "r")) == NULL) {
+	if (*argv != NULL) {
+		for (first = 1; *argv != NULL; ++argv) {
+			if ((fp = fileargs_fopen(fa, *argv, "r")) == NULL) {
 				warn("%s", *argv);
 				eval = 1;
 				continue;
@ -128,6 +150,7 @@ main(int argc, char *argv[])
 	else
 		head_bytes(stdin, bytecnt);

+	fileargs_free(fa);
 	exit(eval);
 }

@ -137,7 +160,7 @@ head(FILE *fp, int cnt)
 	char *cp;
 	size_t error, readlen;

-	while (cnt && (cp = fgetln(fp, &readlen)) != NULL) {
+	while (cnt != 0 && (cp = fgetln(fp, &readlen)) != NULL) {
 		error = fwrite(cp, sizeof(char), readlen, stdout);
 		if (error != readlen)
 			err(1, "stdout");
--- a/usr.bin/wc/Makefile
+++ b/usr.bin/wc/Makefile
@ -1,7 +1,15 @@
 #	@(#)Makefile	8.1 (Berkeley) 6/6/93
 # $FreeBSD$

+.include <src.opts.mk>
+
 PROG=	wc
 LIBADD=	xo

+.if ${MK_CASPER} != "no"
+LIBADD+=        casper
+LIBADD+=        cap_fileargs
+CFLAGS+=-DWITH_CASPER
+.endif
+
 .include <bsd.prog.mk>
--- a/usr.bin/wc/wc.c
+++ b/usr.bin/wc/wc.c
@ -44,9 +44,11 @@ static char sccsid[] = "@(#)wc.c	8.1 (Berkeley) 6/6/93";
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");

+#include <sys/capsicum.h>
 #include <sys/param.h>
 #include <sys/stat.h>

+#include <capsicum_helpers.h>
 #include <ctype.h>
 #include <err.h>
 #include <errno.h>
@ -61,6 +63,10 @@ __FBSDID("$FreeBSD$");
 #include <wctype.h>
 #include <libxo/xo.h>

+#include <libcasper.h>
+#include <casper/cap_fileargs.h>
+
+static fileargs_t *fa;
 static uintmax_t tlinect, twordct, tcharct, tlongline;
 static int doline, doword, dochar, domulti, dolongline;
 static volatile sig_atomic_t siginfo;
@ -90,6 +96,7 @@ int
 main(int argc, char *argv[])
 {
 	int ch, errors, total;
+	cap_rights_t rights;

 	(void) setlocale(LC_CTYPE, "");

@ -125,6 +132,26 @@ main(int argc, char *argv[])

 	(void)signal(SIGINFO, siginfo_handler);

+	fa = fileargs_init(argc, argv, O_RDONLY, 0,
+	    cap_rights_init(&rights, CAP_READ, CAP_FSTAT));
+	if (fa == NULL) {
+		xo_warn("Unable to init casper");
+		exit(1);
+	}
+
+	caph_cache_catpages();
+	if (caph_limit_stdio() < 0) {
+		xo_warn("Unable to limit stdio");
+		fileargs_free(fa);
+		exit(1);
+	}
+
+	if (caph_enter_casper() < 0) {
+		xo_warn("Unable to enter capability mode");
+		fileargs_free(fa);
+		exit(1);
+	}
+
 	/* Wc's flags are on by default. */
 	if (doline + doword + dochar + domulti + dolongline == 0)
 		doline = doword = dochar = 1;
@ -158,6 +185,7 @@ main(int argc, char *argv[])
 		xo_close_container("total");
 	}

+	fileargs_free(fa);
 	xo_close_container("wc");
 	xo_finish();
 	exit(errors == 0 ? 0 : 1);
@ -206,7 +234,7 @@ cnt(const char *file)
 	linect = wordct = charct = llct = tmpll = 0;
 	if (file == NULL)
 		fd = STDIN_FILENO;
-	else if ((fd = open(file, O_RDONLY, 0)) < 0) {
+	else if ((fd = fileargs_open(fa, file)) < 0) {
 		xo_warn("%s: open", file);
 		return (1);
 	}
--- a/usr.sbin/cpucontrol/amd.c
+++ b/usr.sbin/cpucontrol/amd.c
@ -73,16 +73,16 @@ amd_probe(int fd)
 }

 void
-amd_update(const char *dev, const char *path)
+amd_update(const struct ucode_update_params *params)
 {
-	int fd, devfd;
+	int devfd;
 	unsigned int i;
-	struct stat st;
-	uint32_t *fw_image;
-	amd_fw_header_t *fw_header;
+	const char *dev, *path;
+	const uint32_t *fw_image;
+	const amd_fw_header_t *fw_header;
 	uint32_t sum;
 	uint32_t signature;
-	uint32_t *fw_data;
+	const uint32_t *fw_data;
 	size_t fw_size;
 	cpuctl_cpuid_args_t idargs = {
 		.level  = 1,	/* Request signature. */
@ -90,16 +90,14 @@ amd_update(const char *dev, const char *path)
 	cpuctl_update_args_t args;
 	int error;

+	dev = params->dev_path;
+	path = params->fw_path;
+	devfd = params->devfd;
+	fw_image = params->fwimage;
+
 	assert(path);
 	assert(dev);

-	fd  = -1;
-	fw_image = MAP_FAILED;
-	devfd = open(dev, O_RDWR);
-	if (devfd < 0) {
-		WARN(0, "could not open %s for writing", dev);
-		return;
-	}
 	error = ioctl(devfd, CPUCTL_CPUID, &idargs);
 	if (error < 0) {
 		WARN(0, "ioctl()");
@ -115,37 +113,18 @@ amd_update(const char *dev, const char *path)
 	/*
 	 * Open the firmware file.
 	 */
-	fd = open(path, O_RDONLY, 0);
-	if (fd < 0) {
-		WARN(0, "open(%s)", path);
-		goto fail;
-	}
-	error = fstat(fd, &st);
-	if (error != 0) {
-		WARN(0, "fstat(%s)", path);
-		goto fail;
-	}
-	if (st.st_size < 0 || (unsigned)st.st_size < sizeof(*fw_header)) {
+	if (params->fwsize < sizeof(*fw_header)) {
 		WARNX(2, "file too short: %s", path);
 		goto fail;
 	}
-	/*
-	 * mmap the whole image.
-	 */
-	fw_image = (uint32_t *)mmap(NULL, st.st_size, PROT_READ,
-	    MAP_PRIVATE, fd, 0);
-	if  (fw_image == MAP_FAILED) {
-		WARN(0, "mmap(%s)", path);
-		goto fail;
-	}
-	fw_header = (amd_fw_header_t *)fw_image;
+	fw_header = (const amd_fw_header_t *)fw_image;
 	if ((fw_header->magic >> 8) != AMD_MAGIC) {
 		WARNX(2, "%s is not a valid amd firmware: version mismatch",
 		    path);
 		goto fail;
 	}
-	fw_data = (uint32_t *)(fw_header + 1);
-	fw_size = (st.st_size - sizeof(*fw_header)) / sizeof(uint32_t);
+	fw_data = (const uint32_t *)(fw_header + 1);
+	fw_size = (params->fwsize - sizeof(*fw_header)) / sizeof(uint32_t);

 	/*
 	 * Check the primary checksum.
@ -160,8 +139,8 @@ amd_update(const char *dev, const char *path)
 	if (signature == fw_header->signature) {
 		fprintf(stderr, "%s: updating cpu %s... ", path, dev);

-		args.data = fw_image;
-		args.size = st.st_size;
+		args.data = __DECONST(void *, fw_image);
+		args.size = params->fwsize;
 		error = ioctl(devfd, CPUCTL_UPDATE, &args);
 		if (error < 0) {
 			fprintf(stderr, "failed.\n");
@ -172,12 +151,5 @@ amd_update(const char *dev, const char *path)
 	}

 fail:
-	if (fd >= 0)
-		close(fd);
-	if (devfd >= 0)
-		close(devfd);
-	if (fw_image != MAP_FAILED)
-		if(munmap(fw_image, st.st_size) != 0)
-			warn("munmap(%s)", path);
 	return;
 }
--- a/Show More
+++ b/Show More