Remove GEOM_SCHED class and gsched tool.

This code was not actively maintained since it was introduced 10 years ago. It lacks support for many later GEOM features, such as direct dispatch, unmapped I/O, stripesize/stripeoffset, resize, etc. Plus it is the only remaining use of GEOM nstart/nend request counters, used there to implement live insertion/removal, questionable by itself. Plus, as number of people commented, GEOM is not the best place for I/O scheduler, since it has limited information about layers both above and below it, required for efficient scheduling. Plus with the modern shift to SSDs there is just no more significant need for this kind of scheduling. Approved by: imp, phk, luigi Relnotes: yes
2019-12-29 21:16:03 +00:00 · 2019-12-29 21:16:03 +00:00 · 86c06ff886
commit 86c06ff886
parent 2a73387f1c
22 changed files with 4 additions and 3637 deletions
--- a/lib/geom/Makefile.classes
+++ b/lib/geom/Makefile.classes
@ -20,7 +20,6 @@ GEOM_CLASSES+=	nop
 GEOM_CLASSES+=	part
 GEOM_CLASSES+=	raid
 GEOM_CLASSES+=	raid3
 GEOM_CLASSES+=	sched
 GEOM_CLASSES+=	shsec
 GEOM_CLASSES+=	stripe
 GEOM_CLASSES+=	virstor
--- a/lib/geom/sched/Makefile
+++ b/lib/geom/sched/Makefile
@ -1,9 +0,0 @@
 # GEOM_LIBRARY_PATH
 # $FreeBSD$
 PACKAGE=runtime
 .PATH: ${.CURDIR:H:H}/misc
 GEOM_CLASS=	sched
 .include <bsd.lib.mk>
--- a/lib/geom/sched/Makefile.depend
+++ b/lib/geom/sched/Makefile.depend
@ -1,19 +0,0 @@
 # $FreeBSD$
 # Autogenerated - do NOT edit!
 DIRDEPS = \
 	gnu/lib/csu \
 	include \
 	include/xlocale \
 	lib/${CSU_DIR} \
 	lib/libc \
 	lib/libcompiler_rt \
 	lib/libgeom \
 	sbin/geom/core \
 .include <dirdeps.mk>
 .if ${DEP_RELDIR} == ${_DEP_RELDIR}
 # local dependencies - needed for -jN in clean tree
 .endif
--- a/lib/geom/sched/geom_sched.c
+++ b/lib/geom/sched/geom_sched.c
@ -1,128 +0,0 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009 Fabio Checconi
 * Copyright (c) 2010 Luigi Rizzo, Universita` di Pisa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*
 * $Id$
 * $FreeBSD$
 *
 * This file implements the userspace library used by the 'geom'
 * command to load and manipulate disk schedulers.
 */
 #include <sys/cdefs.h>
 #include <sys/param.h>
 #include <sys/linker.h>
 #include <sys/module.h>
 #include <stdio.h>
 #include <stdint.h>
 #include <libgeom.h>
 #include "core/geom.h"
 #include "misc/subr.h"
 #define	G_SCHED_VERSION	0
 uint32_t lib_version = G_LIB_VERSION;
 uint32_t version = G_SCHED_VERSION;
 /*
 * storage for parameters used by this geom class.
 * Right now only the scheduler name is used.
 */
 #define	GSCHED_ALGO	"rr"	/* default scheduler */
 /*
 * Adapt to differences in geom library.
 * in V1 struct g_command misses gc_argname, eld, and G_BOOL is undefined
 */
 #if G_LIB_VERSION <= 1
 #define G_TYPE_BOOL	G_TYPE_NUMBER
 #endif
 #if G_LIB_VERSION >= 3 && G_LIB_VERSION <= 4
 #define G_ARGNAME	NULL,
 #else
 #define	G_ARGNAME
 #endif
 static void
 gcmd_createinsert(struct gctl_req *req, unsigned flags __unused)
 {
 	const char *reqalgo;
 	char name[64];
 	if (gctl_has_param(req, "algo"))
 		reqalgo = gctl_get_ascii(req, "algo");
 	else
 		reqalgo = GSCHED_ALGO;
 	snprintf(name, sizeof(name), "gsched_%s", reqalgo);
 	/*
 	 * Do not complain about errors here, gctl_issue()
 	 * will fail anyway.
 	 */
 	if (modfind(name) < 0)
 		kldload(name);
 	gctl_issue(req);
 }
 struct g_command class_commands[] = {
 	{ "create", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
 	    {
 		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
 		G_OPT_SENTINEL
 	    },
 	    G_ARGNAME "[-v] [-a algorithm_name] dev ..."
 	},
 	{ "insert", G_FLAG_VERBOSE | G_FLAG_LOADKLD, gcmd_createinsert,
 	    {
 		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
 		G_OPT_SENTINEL
 	    },
 	    G_ARGNAME "[-v] [-a algorithm_name] dev ..."
 	},
 	{ "configure", G_FLAG_VERBOSE, NULL,
 	    {
 		{ 'a', "algo", GSCHED_ALGO, G_TYPE_STRING },
 		G_OPT_SENTINEL
 	    },
 	    G_ARGNAME "[-v] [-a algorithm_name] prov ..."
 	},
 	{ "destroy", G_FLAG_VERBOSE, NULL,
 	    {
 		{ 'f', "force", NULL, G_TYPE_BOOL },
 		G_OPT_SENTINEL
 	    },
 	    G_ARGNAME "[-fv] prov ..."
 	},
 	{ "reset", G_FLAG_VERBOSE, NULL, G_NULL_OPTS,
 	    G_ARGNAME "[-v] prov ..."
 	},
 	G_CMD_SENTINEL
 };
--- a/lib/geom/sched/gsched.8
+++ b/lib/geom/sched/gsched.8
@ -1,162 +0,0 @@
 .\" Copyright (c) 2009-2010 Fabio Checconi
 .\" Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
 .\" All rights reserved.
 .\"
 .\" Redistribution and use in source and binary forms, with or without
 .\" modification, are permitted provided that the following conditions
 .\" are met:
 .\" 1. Redistributions of source code must retain the above copyright
 .\"    notice, this list of conditions and the following disclaimer.
 .\" 2. Redistributions in binary form must reproduce the above copyright
 .\"    notice, this list of conditions and the following disclaimer in the
 .\"    documentation and/or other materials provided with the distribution.
 .\"
 .\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 .\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 .\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 .\" ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 .\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 .\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 .\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 .\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 .\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 .\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 .\" SUCH DAMAGE.
 .\"
 .\" $FreeBSD$
 .\"
 .Dd July 26, 2012
 .Dt GSCHED 8
 .Os
 .Sh NAME
 .Nm gsched
 .Nd "control utility for disk scheduler GEOM class"
 .Sh SYNOPSIS
 .Nm
 .Cm create
 .Op Fl v
 .Op Fl a Ar algorithm
 .Ar provider ...
 .Nm
 .Cm insert
 .Op Fl v
 .Op Fl a Ar algorithm
 .Ar provider ...
 .Nm
 .Cm configure
 .Op Fl v
 .Op Fl a Ar algorithm
 .Ar node ...
 .Nm
 .Cm destroy
 .Op Fl fv
 .Ar node ...
 .Nm
 .Cm reset
 .Op Fl v
 .Ar node ...
 .Nm
 .Cm { list | status | load | unload }
 .Sh DESCRIPTION
 The
 .Nm
 utility (also callable as
 .Nm geom sched ... )
 changes the scheduling policy of the requests going to a provider.
 .Pp
 The first argument to
 .Nm
 indicates an action to be performed:
 .Bl -tag -width ".Cm configure"
 .It Cm create
 Create a new provider and geom node using the specified scheduling algorithm.
 .Ar algorithm
 is the name of the scheduling algorithm used for the provider.
 Available algorithms include:
 .Ar rr ,
 which implements anticipatory scheduling with round robin service
 among clients;
 .Ar as ,
 which implements a simple form of anticipatory scheduling with
 no per-client queue.
 .Pp
 If the operation succeeds, the new provider should appear with name
 .Pa /dev/ Ns Ao Ar dev Ac Ns Pa .sched. .
 The kernel module
 .Pa geom_sched.ko
 will be loaded if it is not loaded already.
 .It Cm insert
 Operates as "create", but the insertion is "transparent",
 i.e. the existing provider is rerouted to the newly created geom,
 which in turn forwards requests to the existing geom.
 This operation allows one to start/stop a scheduling service
 on an already existing provider.
 .Pp
 A subsequent "destroy" will remove the newly created geom and
 hook the provider back to the original geom.
 .It Cm configure
 Configure existing scheduling provider.  It supports the same options
 as the
 .Nm create
 command.
 .It Cm destroy
 Destroy the geom specified in the parameter.
 .It Cm reset
 Do nothing.
 .It Cm list | status | load | unload
 See
 .Xr geom 8 .
 .El
 .Pp
 Additional options:
 .Bl -tag -width ".Fl f"
 .It Fl f
 Force the removal of the specified provider.
 .It Fl v
 Be more verbose.
 .El
 .Sh SYSCTL VARIABLES
 The following
 .Xr sysctl 8
 variables can be used to control the behavior of the
 .Nm SCHED
 GEOM class.
 The default value is shown next to each variable.
 .Bl -tag -width indent
 .It Va kern.geom.sched.debug : No 0
 Debug level of the
 .Nm SCHED
 GEOM class.
 This can be set to a number between 0 and 2 inclusive.
 If set to 0 minimal debug information is printed, and if set to 2 the
 maximum amount of debug information is printed.
 .El
 .Sh EXIT STATUS
 Exit status is 0 on success, and 1 if the command fails.
 .Sh EXAMPLES
 The following example shows how to create a scheduling provider for disk
 .Pa /dev/ada0 ,
 and how to destroy it.
 .Bd -literal -offset indent
 # Load the geom_sched module:
 kldload geom_sched
 # Load some scheduler classes used by geom_sched:
 kldload gsched_rr
 # Configure device ada0 to use scheduler "rr":
 geom sched insert -a rr ada0
 # Now provider ada0 uses the "rr" algorithm;
 # the new geom is ada0.sched.
 # Remove the scheduler on the device:
 geom sched destroy -v ada0.sched.
 .Ed
 .Sh SEE ALSO
 .Xr geom 4 ,
 .Xr geom 8
 .Sh HISTORY
 The
 .Nm
 utility first appeared in
 .Fx 8.1 .
 .Sh AUTHORS
 .An Fabio Checconi Aq Mt fabio@FreeBSD.org
 .An Luigi Rizzo Aq Mt luigi@FreeBSD.org
--- a/sys/geom/geom.h
+++ b/sys/geom/geom.h
@ -231,17 +231,6 @@ struct g_provider {
 	u_int			index;
 };
 /*
 * Descriptor of a classifier. We can register a function and
 * an argument, which is called by g_io_request() on bio's
 * that are not previously classified.
 */
 struct g_classifier_hook {
 	TAILQ_ENTRY(g_classifier_hook) link;
 	int			(*func)(void *arg, struct bio *bp);
 	void			*arg;
 };
 /* BIO_GETATTR("GEOM::setstate") argument values. */
 #define G_STATE_FAILED		0
 #define G_STATE_REBUILD		1
@ -344,8 +333,6 @@ int g_io_getattr(const char *attr, struct g_consumer *cp, int *len, void *ptr);
 int g_io_zonecmd(struct disk_zone_args *zone_args, struct g_consumer *cp);
 int g_io_flush(struct g_consumer *cp);
 int g_io_speedup(size_t shortage, u_int flags, size_t *resid, struct g_consumer *cp);
 int g_register_classifier(struct g_classifier_hook *hook);
 void g_unregister_classifier(struct g_classifier_hook *hook);
 void g_io_request(struct bio *bp, struct g_consumer *cp);
 struct bio *g_new_bio(void);
 struct bio *g_alloc_bio(void);
--- a/sys/geom/geom_io.c
+++ b/sys/geom/geom_io.c
@ -87,15 +87,6 @@ static volatile u_int __read_mostly pace;
 static uma_zone_t __read_mostly biozone;
 /*
 * The head of the list of classifiers used in g_io_request.
 * Use g_register_classifier() and g_unregister_classifier()
 * to add/remove entries to the list.
 * Classifiers are invoked in registration order.
 */
 static TAILQ_HEAD(, g_classifier_hook) g_classifier_tailq __read_mostly =
    TAILQ_HEAD_INITIALIZER(g_classifier_tailq);
 #include <machine/atomic.h>
 static void
@ -224,9 +215,6 @@ g_clone_bio(struct bio *bp)
 		if (bp->bio_cmd == BIO_ZONE)
 			bcopy(&bp->bio_zone, &bp2->bio_zone,
 			    sizeof(bp->bio_zone));
 		/* Inherit classification info from the parent */
 		bp2->bio_classifier1 = bp->bio_classifier1;
 		bp2->bio_classifier2 = bp->bio_classifier2;
 #if defined(BUF_TRACKING) || defined(FULL_BUF_TRACKING)
 		bp2->bio_track_bp = bp->bio_track_bp;
 #endif
@ -498,65 +486,6 @@ g_io_check(struct bio *bp)
 	return (EJUSTRETURN);
 }
 /*
 * bio classification support.
 *
 * g_register_classifier() and g_unregister_classifier()
 * are used to add/remove a classifier from the list.
 * The list is protected using the g_bio_run_down lock,
 * because the classifiers are called in this path.
 *
 * g_io_request() passes bio's that are not already classified
 * (i.e. those with bio_classifier1 == NULL) to g_run_classifiers().
 * Classifiers can store their result in the two fields
 * bio_classifier1 and bio_classifier2.
 * A classifier that updates one of the fields should
 * return a non-zero value.
 * If no classifier updates the field, g_run_classifiers() sets
 * bio_classifier1 = BIO_NOTCLASSIFIED to avoid further calls.
 */
 int
 g_register_classifier(struct g_classifier_hook *hook)
 {
 	g_bioq_lock(&g_bio_run_down);
 	TAILQ_INSERT_TAIL(&g_classifier_tailq, hook, link);
 	g_bioq_unlock(&g_bio_run_down);
 	return (0);
 }
 void
 g_unregister_classifier(struct g_classifier_hook *hook)
 {
 	struct g_classifier_hook *entry;
 	g_bioq_lock(&g_bio_run_down);
 	TAILQ_FOREACH(entry, &g_classifier_tailq, link) {
 		if (entry == hook) {
 			TAILQ_REMOVE(&g_classifier_tailq, hook, link);
 			break;
 		}
 	}
 	g_bioq_unlock(&g_bio_run_down);
 }
 static void
 g_run_classifiers(struct bio *bp)
 {
 	struct g_classifier_hook *hook;
 	int classified = 0;
 	biotrack(bp, __func__);
 	TAILQ_FOREACH(hook, &g_classifier_tailq, link)
 		classified |= hook->func(hook->arg, bp);
 	if (!classified)
 		bp->bio_classifier1 = BIO_NOTCLASSIFIED;
 }
 void
 g_io_request(struct bio *bp, struct g_consumer *cp)
 {
@ -640,12 +569,6 @@ g_io_request(struct bio *bp, struct g_consumer *cp)
 	direct = 0;
 #endif
 	if (!TAILQ_EMPTY(&g_classifier_tailq) && !bp->bio_classifier1) {
 		g_bioq_lock(&g_bio_run_down);
 		g_run_classifiers(bp);
 		g_bioq_unlock(&g_bio_run_down);
 	}
 	/*
 	 * The statistics collection is lockless, as such, but we
 	 * can not update one instance of the statistics from more
--- a/sys/geom/sched/README
+++ b/sys/geom/sched/README
@ -1,142 +0,0 @@
 	--- GEOM BASED DISK SCHEDULERS FOR FREEBSD ---
 This code contains a framework for GEOM-based disk schedulers and a
 couple of sample scheduling algorithms that use the framework and
 implement two forms of "anticipatory scheduling" (see below for more
 details).
 As a quick example of what this code can give you, try to run "dd",
 "tar", or some other program with highly SEQUENTIAL access patterns,
 together with "cvs", "cvsup", "svn" or other highly RANDOM access patterns
 (this is not a made-up example: it is pretty common for developers
 to have one or more apps doing random accesses, and others that do
 sequential accesses e.g., loading large binaries from disk, checking
 the integrity of tarballs, watching media streams and so on).
 These are the results we get on a local machine (AMD BE2400 dual
 core CPU, SATA 250GB disk):
    /mnt is a partition mounted on /dev/ad0s1f
    cvs: 	cvs -d /mnt/home/ncvs-local update -Pd /mnt/ports
    dd-read:	dd bs=128k of=/dev/null if=/dev/ad0 (or ad0-sched-)
    dd-writew	dd bs=128k if=/dev/zero of=/mnt/largefile
 			NO SCHEDULER		RR SCHEDULER
                	dd	cvs		dd	cvs
    dd-read only        72 MB/s	----		72 MB/s	---
    dd-write only	55 MB/s	---		55 MB/s	---
    dd-read+cvs		 6 MB/s	ok    		30 MB/s	ok
    dd-write+cvs	55 MB/s slooow		14 MB/s	ok
 As you can see, when a cvs is running concurrently with dd, the
 performance drops dramatically, and depending on read or write mode,
 one of the two is severely penalized.  The use of the RR scheduler
 in this example makes the dd-reader go much faster when competing
 with cvs, and lets cvs progress when competing with a writer.
 To try it out:
 1. PLEASE MAKE SURE THAT THE DISK THAT YOU WILL BE USING FOR TESTS
   DOES NOT CONTAIN PRECIOUS DATA.
    This is experimental code, so we make no guarantees, though
    I am routinely using it on my desktop and laptop.
 2. EXTRACT AND BUILD THE PROGRAMS
    A 'make install' in the directory should work (with root privs),
    or you can even try the binary modules.
    If you want to build the modules yourself, look at the Makefile.
 3. LOAD THE MODULE, CREATE A GEOM NODE, RUN TESTS
    The scheduler's module must be loaded first:
      # kldload gsched_rr
    substitute with gsched_as to test AS.  Then, supposing that you are
    using /dev/ad0 for testing, a scheduler can be attached to it with:
      # geom sched insert ad0
    The scheduler is inserted transparently in the geom chain, so
    mounted partitions and filesystems will keep working, but
    now requests will go through the scheduler.
    To change scheduler on-the-fly, you can reconfigure the geom:
      # geom sched configure -a as ad0.sched.
    assuming that gsched_as was loaded previously.
 5. SCHEDULER REMOVAL
    In principle it is possible to remove the scheduler module
    even on an active chain by doing
 	# geom sched destroy ad0.sched.
    However, there is some race in the geom subsystem which makes
    the removal unsafe if there are active requests on a chain.
    So, in order to reduce the risk of data losses, make sure
    you don't remove a scheduler from a chain with ongoing transactions.
 --- NOTES ON THE SCHEDULERS ---
 The important contribution of this code is the framework to experiment
 with different scheduling algorithms.  'Anticipatory scheduling'
 is a very powerful technique based on the following reasoning:
    The disk throughput is much better if it serves sequential requests.
    If we have a mix of sequential and random requests, and we see a
    non-sequential request, do not serve it immediately but instead wait
    a little bit (2..5ms) to see if there is another one coming that
    the disk can serve more efficiently.
 There are many details that should be added to make sure that the
 mechanism is effective with different workloads and systems, to
 gain a few extra percent in performance, to improve fairness,
 insulation among processes etc.  A discussion of the vast literature
 on the subject is beyond the purpose of this short note.
 --------------------------------------------------------------------------
 TRANSPARENT INSERT/DELETE
 geom_sched is an ordinary geom module, however it is convenient
 to plug it transparently into the geom graph, so that one can
 enable or disable scheduling on a mounted filesystem, and the
 names in /etc/fstab do not depend on the presence of the scheduler.
 To understand how this works in practice, remember that in GEOM
 we have "providers" and "geom" objects.
 Say that we want to hook a scheduler on provider "ad0",
 accessible through pointer 'pp'. Originally, pp is attached to
 geom "ad0" (same name, different object) accessible through pointer old_gp
  BEFORE	---> [ pp    --> old_gp ...]
 A normal "geom sched create ad0" call would create a new geom node
 on top of provider ad0/pp, and export a newly created provider
 ("ad0.sched." accessible through pointer newpp).
  AFTER create  ---> [ newpp --> gp --> cp ] ---> [ pp    --> old_gp ... ]
 On top of newpp, a whole tree will be created automatically, and we
 can e.g. mount partitions on /dev/ad0.sched.s1d, and those requests
 will go through the scheduler, whereas any partition mounted on
 the pre-existing device entries will not go through the scheduler.
 With the transparent insert mechanism, the original provider "ad0"/pp
 is hooked to the newly created geom, as follows:
  AFTER insert  ---> [ pp    --> gp --> cp ] ---> [ newpp --> old_gp ... ]
 so anything that was previously using provider pp will now have
 the requests routed through the scheduler node.
 A removal ("geom sched destroy ad0.sched.") will restore the original
 configuration.
 # $FreeBSD$
--- a/sys/geom/sched/g_sched.c
+++ b/sys/geom/sched/g_sched.c
--- a/sys/geom/sched/g_sched.h
+++ b/sys/geom/sched/g_sched.h
@ -1,111 +0,0 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009-2010 Fabio Checconi
 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 #ifndef	_G_SCHED_H_
 #define	_G_SCHED_H_
 /*
 * $Id$
 * $FreeBSD$
 *
 * Header for the geom_sched class (userland library and kernel part).
 * See g_sched.c for documentation.
 * The userland code only needs the three G_SCHED_* values below.
 */
 #define	G_SCHED_CLASS_NAME	"SCHED"
 #define	G_SCHED_VERSION		0
 #define	G_SCHED_SUFFIX		".sched."
 #ifdef _KERNEL
 #define	G_SCHED_DEBUG(lvl, ...) \
    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, (lvl), NULL, __VA_ARGS__)
 #define	G_SCHED_LOGREQ(bp, ...) \
    _GEOM_DEBUG("GEOM_SCHED", me.gs_debug, 2, (bp), __VA_ARGS__)
 LIST_HEAD(g_hash, g_sched_class);
 /*
 * Descriptor of a scheduler.
 * In addition to the obvious fields, sc_flushing and sc_pending
 * support dynamic switching of scheduling algorithm.
 * Normally, sc_flushing is 0, and requests that are scheduled are
 * also added to the sc_pending queue, and removed when we receive
 * the 'done' event.
 *
 * When we are transparently inserted on an existing provider,
 * sc_proxying is set. The detach procedure is slightly different.
 *
 * When switching schedulers, sc_flushing is set so requests bypass us,
 * and at the same time we update the pointer in the pending bios
 * to ignore us when they return up.
 * XXX it would be more efficient to implement sc_pending with
 * a generation number: the softc generation is increased when
 * we change scheduling algorithm, we store the current generation
 * number in the pending bios, and when they come back we ignore
 * the done() call if the generation number do not match.
 */
 struct g_sched_softc {
 	/*
 	 * Generic fields used by any scheduling algorithm:
 	 * a mutex, the class descriptor, flags, list of pending
 	 * requests (used when flushing the module) and support
 	 * for hash tables where we store per-flow queues.
 	 */
 	struct mtx	sc_mtx;
 	struct g_gsched	*sc_gsched;	/* Scheduler descriptor. */
 	int		sc_pending;	/* Pending requests. */
 	int		sc_flags;	/* Various flags. */
 	/*
 	 * Hash tables to store per-flow queues are generally useful
 	 * so we handle them in the common code.
 	 * sc_hash and sc_mask are parameters of the hash table,
 	 * the last two fields are used to periodically remove
 	 * expired items from the hash table.
 	 */
 	struct g_hash	*sc_hash;
 	u_long		sc_mask;
 	int		sc_flush_ticks;	/* Next tick for a flush. */
 	int		sc_flush_bucket; /* Next bucket to flush. */
 	/*
 	 * Pointer to the algorithm's private data, which is the value
 	 * returned by sc_gsched->gs_init() . A NULL here means failure.
 	 * XXX intptr_t might be more appropriate.
 	 */
 	void		*sc_data;
 };
 #define	G_SCHED_PROXYING	1
 #define	G_SCHED_FLUSHING	2
 #endif	/* _KERNEL */
 #endif	/* _G_SCHED_H_ */
--- a/sys/geom/sched/gs_delay.c
+++ b/sys/geom/sched/gs_delay.c
@ -1,264 +0,0 @@
 /*-
 * Copyright (c) 2015 Netflix, Inc.
 *
 * Derived from gs_rr.c:
 * Copyright (c) 2009-2010 Fabio Checconi
 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*
 * $Id$
 * $FreeBSD$
 *
 * A simple scheduler that just delays certain transactions by a certain
 * amount. We collect all the transactions that are 'done' and put them on
 * a queue. The queue is run through every so often and the transactions that
 * have taken longer than the threshold delay are completed.
 */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
 #include <sys/callout.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include "gs_scheduler.h"
 /* Useful constants */
 #define BTFRAC_1US 18446744073709ULL	/* 2^64 / 1000000 */
 /* list of scheduler instances */
 LIST_HEAD(g_scheds, g_delay_softc);
 /*
 * Per device descriptor, holding the Round Robin list of queues
 * accessing the disk, a reference to the geom, and the timer.
 */
 struct g_delay_softc {
 	struct g_geom	*sc_geom;
 	struct bio_queue_head sc_bioq;	/* queue of pending requests */
 	struct callout	sc_wait;	/* timer for completing with delays */
 	/* Statistics */
 	int		sc_in_flight;	/* requests in the driver */
 };
 /*
 * parameters, config and stats
 */
 struct g_delay_params {
 	uint64_t io;
 	int	bypass;			/* bypass scheduling */
 	int	units;			/* how many instances */
 	int	latency;		/* How big a latncy are hoping for */
 };
 static struct g_delay_params me = {
 	.bypass = 0,
 	.units = 0,
 	.latency = 0,
 	.io = 0,
 };
 struct g_delay_params *gs_delay_me = &me;
 SYSCTL_DECL(_kern_geom_sched);
 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, delay, CTLFLAG_RW, 0,
    "GEOM_SCHED DELAY stuff");
 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, bypass, CTLFLAG_RD,
    &me.bypass, 0, "Scheduler bypass");
 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, units, CTLFLAG_RD,
    &me.units, 0, "Scheduler instances");
 SYSCTL_INT(_kern_geom_sched_delay, OID_AUTO, latency, CTLFLAG_RW,
    &me.latency, 0, "Minimum latency for requests, in microseconds (1/hz resolution)");
 SYSCTL_QUAD(_kern_geom_sched_delay, OID_AUTO, io, CTLFLAG_RW,
    &me.io, 0, "I/Os delayed\n");
 static int
 g_delay_init_class(void *data, void *priv)
 {
 	return (0);
 }
 static void
 g_delay_fini_class(void *data, void *priv)
 {
 }
 /*
 * Called on a request arrival, timeout or completion.
 * Try to serve a request among those queued.
 */
 static struct bio *
 g_delay_next(void *data, int force)
 {
 	struct g_delay_softc *sc = data;
 	struct bio *bp;
 	struct bintime bt;
 	bp = bioq_first(&sc->sc_bioq);
 	if (bp == NULL)
 		return (NULL);
 	/*
 	 * If the time isn't yet ripe for this bp to be let loose,
 	 * then the time isn't ripe for any of its friends either
 	 * since we insert in-order. Terminate if the bio hasn't
 	 * aged appropriately. Note that there's pathology here
 	 * such that we may be up to one tick early in releasing
 	 * this I/O. We could implement this up to a tick late too
 	 * but choose not to.
 	 */
 	getbinuptime(&bt);	/* BIO's bio_t0 is uptime */
 	if (bintime_cmp(&bp->bio_t0, &bt, >))
 		return (NULL);
 	me.io++;
 	/*
 	 * The bp has mellowed enough, let it through and update stats.
 	 * If there's others, we'll catch them next time we get called.
 	 */
 	sc->sc_in_flight++;
 	bp = bioq_takefirst(&sc->sc_bioq);
 	return (bp);
 }
 /*
 * Called when a real request for disk I/O arrives.
 * Locate the queue associated with the client.
 * If the queue is the one we are anticipating for, reset its timeout;
 * if the queue is not in the round robin list, insert it in the list.
 * On any error, do not queue the request and return -1, the caller
 * will take care of this request.
 */
 static int
 g_delay_start(void *data, struct bio *bp)
 {
 	struct g_delay_softc *sc = data;
 	if (me.bypass)
 		return (-1);	/* bypass the scheduler */
 	bp->bio_caller1 = sc;
 	getbinuptime(&bp->bio_t0);	/* BIO's bio_t0 is uptime */
 	bintime_addx(&bp->bio_t0, BTFRAC_1US * me.latency);
 	/*
 	 * Keep the I/Os ordered. Lower layers will reorder as we release them down.
 	 * We rely on this in g_delay_next() so that we delay all things equally. Even
 	 * if we move to multiple queues to push stuff down the stack, we'll want to
 	 * insert in order and let the lower layers do whatever reordering they want.
 	 */
 	bioq_insert_tail(&sc->sc_bioq, bp);
 	return (0);
 }
 static void
 g_delay_timeout(void *data)
 {
 	struct g_delay_softc *sc = data;
 	g_sched_lock(sc->sc_geom);
 	g_sched_dispatch(sc->sc_geom);
 	g_sched_unlock(sc->sc_geom);
 	callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
 }
 /*
 * Module glue: allocate descriptor, initialize its fields.
 */
 static void *
 g_delay_init(struct g_geom *geom)
 {
 	struct g_delay_softc *sc;
 	sc = malloc(sizeof *sc, M_GEOM_SCHED, M_WAITOK | M_ZERO);
 	sc->sc_geom = geom;
 	bioq_init(&sc->sc_bioq);
 	callout_init(&sc->sc_wait, CALLOUT_MPSAFE);
 	callout_reset(&sc->sc_wait, 1, g_delay_timeout, sc);
 	me.units++;
 	return (sc);
 }
 /*
 * Module glue -- drain the callout structure, destroy the
 * hash table and its element, and free the descriptor.
 */
 static void
 g_delay_fini(void *data)
 {
 	struct g_delay_softc *sc = data;
 	/* We're force drained before getting here */
 	/* Kick out timers */
 	callout_drain(&sc->sc_wait);
 	me.units--;
 	free(sc, M_GEOM_SCHED);
 }
 /*
 * Called when the request under service terminates.
 * Start the anticipation timer if needed.
 */
 static void
 g_delay_done(void *data, struct bio *bp)
 {
 	struct g_delay_softc *sc = data;
 	sc->sc_in_flight--;
 	g_sched_dispatch(sc->sc_geom);
 }
 static void
 g_delay_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
    struct g_consumer *cp, struct g_provider *pp)
 {
 }
 static struct g_gsched g_delay = {
 	.gs_name = "delay",
 	.gs_priv_size = 0,
 	.gs_init = g_delay_init,
 	.gs_fini = g_delay_fini,
 	.gs_start = g_delay_start,
 	.gs_done = g_delay_done,
 	.gs_next = g_delay_next,
 	.gs_dumpconf = g_delay_dumpconf,
 	.gs_init_class = g_delay_init_class,
 	.gs_fini_class = g_delay_fini_class,
 };
 DECLARE_GSCHED_MODULE(delay, &g_delay);
--- a/sys/geom/sched/gs_rr.c
+++ b/sys/geom/sched/gs_rr.c
@ -1,701 +0,0 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009-2010 Fabio Checconi
 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*
 * $Id$
 * $FreeBSD$
 *
 * A round-robin (RR) anticipatory scheduler, with per-client queues.
 *
 * The goal of this implementation is to improve throughput compared
 * to the pure elevator algorithm, and insure some fairness among
 * clients.
 * 
 * Requests coming from the same client are put in the same queue.
 * We use anticipation to help reducing seeks, and each queue
 * is never served continuously for more than a given amount of
 * time or data. Queues are then served in a round-robin fashion.
 *
 * Each queue can be in any of the following states:
 *     READY	immediately serve the first pending request;
 *     BUSY	one request is under service, wait for completion;
 *     IDLING	do not serve incoming requests immediately, unless
 * 		they are "eligible" as defined later.
 *
 * Scheduling is made looking at the status of all queues,
 * and the first one in round-robin order is privileged.
 */
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/kernel.h>
 #include <sys/bio.h>
 #include <sys/callout.h>
 #include <sys/malloc.h>
 #include <sys/module.h>
 #include <sys/proc.h>
 #include <sys/queue.h>
 #include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include "gs_scheduler.h"
 /* possible states of the scheduler */
 enum g_rr_state {
 	G_QUEUE_READY = 0,	/* Ready to dispatch. */
 	G_QUEUE_BUSY,		/* Waiting for a completion. */
 	G_QUEUE_IDLING		/* Waiting for a new request. */
 };
 /* possible queue flags */
 enum g_rr_flags {
 	/* G_FLAG_COMPLETED means that the field q_slice_end is valid. */
 	G_FLAG_COMPLETED = 1,	/* Completed a req. in the current budget. */
 };
 struct g_rr_softc;
 /*
 * Queue descriptor, containing reference count, scheduling
 * state, a queue of pending requests, configuration parameters.
 * Queues with pending request(s) and not under service are also
 * stored in a Round Robin (RR) list.
 */
 struct g_rr_queue {
 	struct g_rr_softc *q_sc;	/* link to the parent */
 	enum g_rr_state	q_status;
 	unsigned int	q_service;	/* service received so far */
 	int		q_slice_end;	/* actual slice end time, in ticks */
 	enum g_rr_flags	q_flags;	/* queue flags */
 	struct bio_queue_head q_bioq;
 	/* Scheduling parameters */
 	unsigned int	q_budget;	/* slice size in bytes */
 	unsigned int	q_slice_duration; /* slice size in ticks */
 	unsigned int	q_wait_ticks;	/* wait time for anticipation */
 	/* Stats to drive the various heuristics. */
 	struct g_savg	q_thinktime;	/* Thinktime average. */
 	struct g_savg	q_seekdist;	/* Seek distance average. */
 	int		q_bionum;	/* Number of requests. */
 	off_t		q_lastoff;	/* Last submitted req. offset. */
 	int		q_lastsub;	/* Last submitted req. time. */
 	/* Expiration deadline for an empty queue. */
 	int		q_expire;
 	TAILQ_ENTRY(g_rr_queue) q_tailq; /* RR list link field */
 };
 /* List types. */
 TAILQ_HEAD(g_rr_tailq, g_rr_queue);
 /* list of scheduler instances */
 LIST_HEAD(g_scheds, g_rr_softc);
 /* Default quantum for RR between queues. */
 #define	G_RR_DEFAULT_BUDGET	0x00800000
 /*
 * Per device descriptor, holding the Round Robin list of queues
 * accessing the disk, a reference to the geom, and the timer.
 */
 struct g_rr_softc {
 	struct g_geom	*sc_geom;
 	/*
 	 * sc_active is the queue we are anticipating for.
 	 * It is set only in gs_rr_next(), and possibly cleared
 	 * only in gs_rr_next() or on a timeout.
 	 * The active queue is never in the Round Robin list
 	 * even if it has requests queued.
 	 */
 	struct g_rr_queue *sc_active;
 	struct callout	sc_wait;	/* timer for sc_active */
 	struct g_rr_tailq sc_rr_tailq;	/* the round-robin list */
 	int		sc_nqueues;	/* number of queues */
 	/* Statistics */
 	int		sc_in_flight;	/* requests in the driver */
 	LIST_ENTRY(g_rr_softc)	sc_next;
 };
 /* Descriptor for bounded values, min and max are constant. */
 struct x_bound {		
 	const int	x_min;
 	int		x_cur;
 	const int	x_max;
 };
 /*
 * parameters, config and stats
 */
 struct g_rr_params {
 	int	queues;			/* total number of queues */
 	int	w_anticipate;		/* anticipate writes */
 	int	bypass;			/* bypass scheduling writes */
 	int	units;			/* how many instances */
 	/* sc_head is used for debugging */
 	struct g_scheds	sc_head;	/* first scheduler instance */
 	struct x_bound queue_depth;	/* max parallel requests */
 	struct x_bound wait_ms;		/* wait time, milliseconds */
 	struct x_bound quantum_ms;	/* quantum size, milliseconds */
 	struct x_bound quantum_kb;	/* quantum size, Kb (1024 bytes) */
 	/* statistics */
 	int	wait_hit;		/* success in anticipation */
 	int	wait_miss;		/* failure in anticipation */
 };
 /*
 * Default parameters for the scheduler.  The quantum sizes target
 * a 80MB/s disk; if the hw is faster or slower the minimum of the
 * two will have effect: the clients will still be isolated but
 * the fairness may be limited.  A complete solution would involve
 * the on-line measurement of the actual disk throughput to derive
 * these parameters.  Or we may just choose to ignore service domain
 * fairness and accept what can be achieved with time-only budgets.
 */
 static struct g_rr_params me = {
 	.sc_head = LIST_HEAD_INITIALIZER(&me.sc_head),
 	.w_anticipate =	1,
 	.queue_depth =	{ 1,	1,	50 },
 	.wait_ms =	{ 1, 	10,	30 },
 	.quantum_ms =	{ 1, 	100,	500 },
 	.quantum_kb =	{ 16, 	8192,	65536 },
 };
 struct g_rr_params *gs_rr_me = &me;
 SYSCTL_DECL(_kern_geom_sched);
 static SYSCTL_NODE(_kern_geom_sched, OID_AUTO, rr, CTLFLAG_RW, 0,
    "GEOM_SCHED ROUND ROBIN stuff");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, units, CTLFLAG_RD,
    &me.units, 0, "Scheduler instances");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queues, CTLFLAG_RD,
    &me.queues, 0, "Total rr queues");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_ms, CTLFLAG_RW,
    &me.wait_ms.x_cur, 0, "Wait time milliseconds");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_ms, CTLFLAG_RW,
    &me.quantum_ms.x_cur, 0, "Quantum size milliseconds");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, bypass, CTLFLAG_RW,
    &me.bypass, 0, "Bypass scheduler");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, w_anticipate, CTLFLAG_RW,
    &me.w_anticipate, 0, "Do anticipation on writes");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, quantum_kb, CTLFLAG_RW,
    &me.quantum_kb.x_cur, 0, "Quantum size Kbytes");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, queue_depth, CTLFLAG_RW,
    &me.queue_depth.x_cur, 0, "Maximum simultaneous requests");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_hit, CTLFLAG_RW,
    &me.wait_hit, 0, "Hits in anticipation");
 SYSCTL_INT(_kern_geom_sched_rr, OID_AUTO, wait_miss, CTLFLAG_RW,
    &me.wait_miss, 0, "Misses in anticipation");
 #ifdef DEBUG_QUEUES
 /* print the status of a queue */
 static void
 gs_rr_dump_q(struct g_rr_queue *qp, int index)
 {
 	int l = 0;
 	struct bio *bp;
 	TAILQ_FOREACH(bp, &(qp->q_bioq.queue), bio_queue) {
 		l++;
 	}
 	printf("--- rr queue %d %p status %d len %d ---\n",
 	    index, qp, qp->q_status, l);
 }
 /*
 * Dump the scheduler status when writing to this sysctl variable.
 * XXX right now we only dump the status of the last instance created.
 * not a severe issue because this is only for debugging
 */
 static int
 gs_rr_sysctl_status(SYSCTL_HANDLER_ARGS)
 {
        int error, val = 0;
 	struct g_rr_softc *sc;
        error = sysctl_handle_int(oidp, &val, 0, req);
        if (error || !req->newptr )
                return (error);
        printf("called %s\n", __FUNCTION__);
 	LIST_FOREACH(sc, &me.sc_head, sc_next) {
 		int i, tot = 0;
 		printf("--- sc %p active %p nqueues %d "
 		    "callout %d in_flight %d ---\n",
 		    sc, sc->sc_active, sc->sc_nqueues,
 		    callout_active(&sc->sc_wait),
 		    sc->sc_in_flight);
 		for (i = 0; i < G_RR_HASH_SIZE; i++) {
 			struct g_rr_queue *qp;
 			LIST_FOREACH(qp, &sc->sc_hash[i], q_hash) {
 				gs_rr_dump_q(qp, tot);
 				tot++;
 			}
 		}
 	}
        return (0);
 }
 SYSCTL_PROC(_kern_geom_sched_rr, OID_AUTO, status,
 	CTLTYPE_UINT | CTLFLAG_RW,
    0, sizeof(int), gs_rr_sysctl_status, "I", "status");
 #endif	/* DEBUG_QUEUES */
 /*
 * Get a bounded value, optionally convert to a min of t_min ticks.
 */
 static int
 get_bounded(struct x_bound *v, int t_min)
 {
 	int x;
 	x = v->x_cur;
 	if (x < v->x_min)
 		x = v->x_min;
 	else if (x > v->x_max)
 		x = v->x_max;
 	if (t_min) {
 		x = x * hz / 1000;	/* convert to ticks */
 		if (x < t_min)
 			x = t_min;
 	}
 	return x;
 }
 /*
 * Get a reference to the queue for bp, using the generic
 * classification mechanism.
 */
 static struct g_rr_queue *
 g_rr_queue_get(struct g_rr_softc *sc, struct bio *bp)
 {
 	return (g_sched_get_class(sc->sc_geom, bp));
 }
 static int
 g_rr_init_class(void *data, void *priv)
 {
 	struct g_rr_softc *sc = data;
 	struct g_rr_queue *qp = priv;
 	bioq_init(&qp->q_bioq);
 	/*
 	 * Set the initial parameters for the client:
 	 * slice size in bytes and ticks, and wait ticks.
 	 * Right now these are constant, but we could have
 	 * autoconfiguration code to adjust the values based on
 	 * the actual workload.
 	 */
 	qp->q_budget = 1024 * get_bounded(&me.quantum_kb, 0);
 	qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 	qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 	qp->q_sc = sc;		/* link to the parent */
 	qp->q_sc->sc_nqueues++;
 	me.queues++;
 	return (0);
 }
 /*
 * Release a reference to the queue.
 */
 static void
 g_rr_queue_put(struct g_rr_queue *qp)
 {
 	g_sched_put_class(qp->q_sc->sc_geom, qp);
 }
 static void
 g_rr_fini_class(void *data, void *priv)
 {
 	struct g_rr_queue *qp = priv;
 	KASSERT(bioq_first(&qp->q_bioq) == NULL,
 			("released nonempty queue"));
 	qp->q_sc->sc_nqueues--;
 	me.queues--;
 }
 static inline int
 g_rr_queue_expired(struct g_rr_queue *qp)
 {
 	if (qp->q_service >= qp->q_budget)
 		return (1);
 	if ((qp->q_flags & G_FLAG_COMPLETED) &&
 	    ticks - qp->q_slice_end >= 0)
 		return (1);
 	return (0);
 }
 static inline int
 g_rr_should_anticipate(struct g_rr_queue *qp, struct bio *bp)
 {
 	int wait = get_bounded(&me.wait_ms, 2);
 	if (!me.w_anticipate && (bp->bio_cmd == BIO_WRITE))
 		return (0);
 	if (g_savg_valid(&qp->q_thinktime) &&
 	    g_savg_read(&qp->q_thinktime) > wait)
 		return (0);
 	if (g_savg_valid(&qp->q_seekdist) &&
 	    g_savg_read(&qp->q_seekdist) > 8192)
 		return (0);
 	return (1);
 }
 /*
 * Called on a request arrival, timeout or completion.
 * Try to serve a request among those queued.
 */
 static struct bio *
 g_rr_next(void *data, int force)
 {
 	struct g_rr_softc *sc = data;
 	struct g_rr_queue *qp;
 	struct bio *bp, *next;
 	int expired;
 	qp = sc->sc_active;
 	if (me.bypass == 0 && !force) {
 		if (sc->sc_in_flight >= get_bounded(&me.queue_depth, 0))
 			return (NULL);
 		/* Try with the queue under service first. */
 		if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 			/*
 			 * Queue is anticipating, ignore request.
 			 * We should check that we are not past
 			 * the timeout, but in that case the timeout
 			 * will fire immediately afterwards so we
 			 * don't bother.
 			 */
 			return (NULL);
 		}
 	} else if (qp != NULL && qp->q_status != G_QUEUE_READY) {
 		g_rr_queue_put(qp);
 		sc->sc_active = qp = NULL;
 	}
 	/*
 	 * No queue under service, look for the first in RR order.
 	 * If we find it, select if as sc_active, clear service
 	 * and record the end time of the slice.
 	 */
 	if (qp == NULL) {
 		qp = TAILQ_FIRST(&sc->sc_rr_tailq);
 		if (qp == NULL)
 			return (NULL); /* no queues at all, return */
 		/* otherwise select the new queue for service. */
 		TAILQ_REMOVE(&sc->sc_rr_tailq, qp, q_tailq);
 		sc->sc_active = qp;
 		qp->q_service = 0;
 		qp->q_flags &= ~G_FLAG_COMPLETED;
 	}
 	bp = bioq_takefirst(&qp->q_bioq);	/* surely not NULL */
 	qp->q_service += bp->bio_length;	/* charge the service */
 	/*
 	 * The request at the head of the active queue is always
 	 * dispatched, and gs_rr_next() will be called again
 	 * immediately.
 	 * We need to prepare for what to do next:
 	 *
 	 * 1. have we reached the end of the (time or service) slice ?
 	 *    If so, clear sc_active and possibly requeue the previous
 	 *    active queue if it has more requests pending;
 	 * 2. do we have more requests in sc_active ?
 	 *    If yes, do not anticipate, as gs_rr_next() will run again;
 	 *    if no, decide whether or not to anticipate depending
 	 *    on read or writes (e.g., anticipate only on reads).
 	 */
 	expired = g_rr_queue_expired(qp);	/* are we expired ? */
 	next = bioq_first(&qp->q_bioq);	/* do we have one more ? */
 	if (expired) {
 		sc->sc_active = NULL;
 		/* Either requeue or release reference. */
 		if (next != NULL)
 			TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 		else
 			g_rr_queue_put(qp);
 	} else if (next != NULL) {
 		qp->q_status = G_QUEUE_READY;
 	} else {
 		if (!force && g_rr_should_anticipate(qp, bp)) {
 			/* anticipate */
 			qp->q_status = G_QUEUE_BUSY;
 		} else {
 			/* do not anticipate, release reference */
 			g_rr_queue_put(qp);
 			sc->sc_active = NULL;
 		}
 	}
 	/* If sc_active != NULL, its q_status is always correct. */
 	sc->sc_in_flight++;
 	return (bp);
 }
 static inline void
 g_rr_update_thinktime(struct g_rr_queue *qp)
 {
 	int delta = ticks - qp->q_lastsub, wait = get_bounded(&me.wait_ms, 2);
 	if (qp->q_sc->sc_active != qp)
 		return;
 	qp->q_lastsub = ticks;
 	delta = (delta > 2 * wait) ? 2 * wait : delta;
 	if (qp->q_bionum > 7)
 		g_savg_add_sample(&qp->q_thinktime, delta);
 }
 static inline void
 g_rr_update_seekdist(struct g_rr_queue *qp, struct bio *bp)
 {
 	off_t dist;
 	if (qp->q_lastoff > bp->bio_offset)
 		dist = qp->q_lastoff - bp->bio_offset;
 	else
 		dist = bp->bio_offset - qp->q_lastoff;
 	if (dist > (8192 * 8))
 		dist = 8192 * 8;
 	qp->q_lastoff = bp->bio_offset + bp->bio_length;
 	if (qp->q_bionum > 7)
 		g_savg_add_sample(&qp->q_seekdist, dist);
 }
 /*
 * Called when a real request for disk I/O arrives.
 * Locate the queue associated with the client.
 * If the queue is the one we are anticipating for, reset its timeout;
 * if the queue is not in the round robin list, insert it in the list.
 * On any error, do not queue the request and return -1, the caller
 * will take care of this request.
 */
 static int
 g_rr_start(void *data, struct bio *bp)
 {
 	struct g_rr_softc *sc = data;
 	struct g_rr_queue *qp;
 	if (me.bypass)
 		return (-1);	/* bypass the scheduler */
 	/* Get the queue for the request. */
 	qp = g_rr_queue_get(sc, bp);
 	if (qp == NULL)
 		return (-1); /* allocation failed, tell upstream */
 	if (bioq_first(&qp->q_bioq) == NULL) {
 		/*
 		 * We are inserting into an empty queue.
 		 * Reset its state if it is sc_active,
 		 * otherwise insert it in the RR list.
 		 */
 		if (qp == sc->sc_active) {
 			qp->q_status = G_QUEUE_READY;
 			callout_stop(&sc->sc_wait);
 		} else {
 			g_sched_priv_ref(qp);
 			TAILQ_INSERT_TAIL(&sc->sc_rr_tailq, qp, q_tailq);
 		}
 	}
 	qp->q_bionum = 1 + qp->q_bionum - (qp->q_bionum >> 3);
 	g_rr_update_thinktime(qp);
 	g_rr_update_seekdist(qp, bp);
 	/* Inherit the reference returned by g_rr_queue_get(). */
 	bp->bio_caller1 = qp;
 	bioq_disksort(&qp->q_bioq, bp);
 	return (0);
 }
 /*
 * Callout executed when a queue times out anticipating a new request.
 */
 static void
 g_rr_wait_timeout(void *data)
 {
 	struct g_rr_softc *sc = data;
 	struct g_geom *geom = sc->sc_geom;
 	g_sched_lock(geom);
 	/*
 	 * We can race with other events, so check if
 	 * sc_active is still valid.
 	 */
 	if (sc->sc_active != NULL) {
 		/* Release the reference to the queue. */
 		g_rr_queue_put(sc->sc_active);
 		sc->sc_active = NULL;
 		me.wait_hit--;
 		me.wait_miss++;	/* record the miss */
 	}
 	g_sched_dispatch(geom);
 	g_sched_unlock(geom);
 }
 /*
 * Module glue: allocate descriptor, initialize its fields.
 */
 static void *
 g_rr_init(struct g_geom *geom)
 {
 	struct g_rr_softc *sc;
 	/* XXX check whether we can sleep */
 	sc = malloc(sizeof *sc, M_GEOM_SCHED, M_NOWAIT | M_ZERO);
 	sc->sc_geom = geom;
 	TAILQ_INIT(&sc->sc_rr_tailq);
 	callout_init(&sc->sc_wait, 1);
 	LIST_INSERT_HEAD(&me.sc_head, sc, sc_next);
 	me.units++;
 	return (sc);
 }
 /*
 * Module glue -- drain the callout structure, destroy the
 * hash table and its element, and free the descriptor.
 */
 static void
 g_rr_fini(void *data)
 {
 	struct g_rr_softc *sc = data;
 	callout_drain(&sc->sc_wait);
 	KASSERT(sc->sc_active == NULL, ("still a queue under service"));
 	KASSERT(TAILQ_EMPTY(&sc->sc_rr_tailq), ("still scheduled queues"));
 	LIST_REMOVE(sc, sc_next);
 	me.units--;
 	free(sc, M_GEOM_SCHED);
 }
 /*
 * Called when the request under service terminates.
 * Start the anticipation timer if needed.
 */
 static void
 g_rr_done(void *data, struct bio *bp)
 {
 	struct g_rr_softc *sc = data;
 	struct g_rr_queue *qp;
 	sc->sc_in_flight--;
 	qp = bp->bio_caller1;
 	/*
 	 * When the first request for this queue completes, update the
 	 * duration and end of the slice. We do not do it when the
 	 * slice starts to avoid charging to the queue the time for
 	 * the first seek.
 	 */
 	if (!(qp->q_flags & G_FLAG_COMPLETED)) {
 		qp->q_flags |= G_FLAG_COMPLETED;
 		/*
 		 * recompute the slice duration, in case we want
 		 * to make it adaptive. This is not used right now.
 		 * XXX should we do the same for q_quantum and q_wait_ticks ?
 		 */
 		qp->q_slice_duration = get_bounded(&me.quantum_ms, 2);
 		qp->q_slice_end = ticks + qp->q_slice_duration;
 	}
 	if (qp == sc->sc_active && qp->q_status == G_QUEUE_BUSY) {
 		/* The queue is trying anticipation, start the timer. */
 		qp->q_status = G_QUEUE_IDLING;
 		/* may make this adaptive */
 		qp->q_wait_ticks = get_bounded(&me.wait_ms, 2);
 		me.wait_hit++;
 		callout_reset(&sc->sc_wait, qp->q_wait_ticks,
 		    g_rr_wait_timeout, sc);
 	} else
 		g_sched_dispatch(sc->sc_geom);
 	/* Release a reference to the queue. */
 	g_rr_queue_put(qp);
 }
 static void
 g_rr_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp,
    struct g_consumer *cp, struct g_provider *pp)
 {
 	if (indent == NULL) {   /* plaintext */
 		sbuf_printf(sb, " units %d queues %d",
 			me.units, me.queues);
        }
 }
 static struct g_gsched g_rr = {
 	.gs_name = "rr",
 	.gs_priv_size = sizeof(struct g_rr_queue),
 	.gs_init = g_rr_init,
 	.gs_fini = g_rr_fini,
 	.gs_start = g_rr_start,
 	.gs_done = g_rr_done,
 	.gs_next = g_rr_next,
 	.gs_dumpconf = g_rr_dumpconf,
 	.gs_init_class = g_rr_init_class,
 	.gs_fini_class = g_rr_fini_class,
 };
 DECLARE_GSCHED_MODULE(rr, &g_rr);
--- a/sys/geom/sched/gs_scheduler.h
+++ b/sys/geom/sched/gs_scheduler.h
@ -1,239 +0,0 @@
 /*-
 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
 *
 * Copyright (c) 2009-2010 Fabio Checconi
 * Copyright (c) 2009-2010 Luigi Rizzo, Universita` di Pisa
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */
 /*
 * $Id$
 * $FreeBSD$
 *
 * Prototypes for GEOM-based disk scheduling algorithms.
 * See g_sched.c for generic documentation.
 *
 * This file is used by the kernel modules implementing the various
 * scheduling algorithms. They should provide all the methods
 * defined in struct g_gsched, and also invoke the macro
 *	DECLARE_GSCHED_MODULE
 * which registers the scheduling algorithm with the geom_sched module.
 *
 * The various scheduling algorithms do not need to know anything
 * about geom, they only need to handle the 'bio' requests they
 * receive, pass them down when needed, and use the locking interface
 * defined below.
 */
 #ifndef	_G_GSCHED_H_
 #define	_G_GSCHED_H_
 #ifdef _KERNEL
 #include <sys/param.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
 #include <sys/module.h>
 #include <sys/queue.h>
 #include <geom/geom.h>
 #include "g_sched.h"
 /*
 * This is the interface exported to scheduling modules.
 *
 * gs_init() is called when our scheduling algorithm
 *    starts being used by a geom 'sched'
 *
 * gs_fini() is called when the algorithm is released.
 *
 * gs_start() is called when a new request comes in. It should
 *    enqueue the request and return 0 if success, or return non-zero
 *    in case of failure (meaning the request is passed down).
 *    The scheduler can use bio->bio_caller1 to store a non-null
 *    pointer meaning the request is under its control.
 *
 * gs_next() is called in a loop by g_sched_dispatch(), right after
 *    gs_start(), or on timeouts or 'done' events. It should return
 *    immediately, either a pointer to the bio to be served or NULL
 *    if no bio should be served now.  If force is specified, a
 *    work-conserving behavior is expected.
 *
 * gs_done() is called when a request under service completes.
 *    In turn the scheduler may decide to call the dispatch loop
 *    to serve other pending requests (or make sure there is a pending
 *    timeout to avoid stalls).
 *
 * gs_init_class() is called when a new client (as determined by
 *    the classifier) starts being used.
 *
 * gs_hash_unref() is called right before the class hashtable is
 *    destroyed; after this call, the scheduler is supposed to hold no
 *    more references to the elements in the table.
 */
 /* Forward declarations for prototypes. */
 struct g_geom;
 struct g_sched_class;
 typedef void *gs_init_t (struct g_geom *geom);
 typedef void gs_fini_t (void *data);
 typedef int gs_start_t (void *data, struct bio *bio);
 typedef void gs_done_t (void *data, struct bio *bio);
 typedef struct bio *gs_next_t (void *data, int force);
 typedef int gs_init_class_t (void *data, void *priv);
 typedef void gs_fini_class_t (void *data, void *priv);
 typedef void gs_hash_unref_t (void *data);
 struct g_gsched {
 	const char	*gs_name;
 	int		gs_refs;
 	int		gs_priv_size;
 	gs_init_t	*gs_init;
 	gs_fini_t	*gs_fini;
 	gs_start_t	*gs_start;
 	gs_done_t	*gs_done;
 	gs_next_t	*gs_next;
 	g_dumpconf_t	*gs_dumpconf;
 	gs_init_class_t	*gs_init_class;
 	gs_fini_class_t	*gs_fini_class;
 	gs_hash_unref_t *gs_hash_unref;
 	LIST_ENTRY(g_gsched) glist;
 };
 #define	KTR_GSCHED	KTR_SPARE4
 MALLOC_DECLARE(M_GEOM_SCHED);
 /*
 * Basic classification mechanism.  Each request is associated to
 * a g_sched_class, and each scheduler has the opportunity to set
 * its own private data for the given (class, geom) pair.  The
 * private data have a base type of g_sched_private, and are
 * extended at the end with the actual private fields of each
 * scheduler.
 */
 struct g_sched_class {
 	int	gsc_refs;
 	int	gsc_expire;
 	u_long	gsc_key;
 	LIST_ENTRY(g_sched_class) gsc_clist;
 	void	*gsc_priv[0];
 };
 /*
 * Manipulate the classifier's data.  g_sched_get_class() gets a reference
 * to the class corresponding to bp in gp, allocating and initializing
 * it if necessary.  g_sched_put_class() releases the reference.
 * The returned value points to the private data for the class.
 */
 void *g_sched_get_class(struct g_geom *gp, struct bio *bp);
 void g_sched_put_class(struct g_geom *gp, void *priv);
 static inline struct g_sched_class *
 g_sched_priv2class(void *priv)
 {
 	return ((struct g_sched_class *)((u_long)priv -
 	    offsetof(struct g_sched_class, gsc_priv)));
 }
 static inline void
 g_sched_priv_ref(void *priv)
 {
 	struct g_sched_class *gsc;
 	gsc = g_sched_priv2class(priv);
 	gsc->gsc_refs++;
 }
 /*
 * Locking interface.  When each operation registered with the
 * scheduler is invoked, a per-instance lock is taken to protect
 * the data associated with it.  If the scheduler needs something
 * else to access the same data (e.g., a callout) it must use
 * these functions.
 */
 void g_sched_lock(struct g_geom *gp);
 void g_sched_unlock(struct g_geom *gp);
 /*
 * Restart request dispatching.  Must be called with the per-instance
 * mutex held.
 */
 void g_sched_dispatch(struct g_geom *geom);
 /*
 * Simple gathering of statistical data, used by schedulers to collect
 * info on process history.  Just keep an exponential average of the
 * samples, with some extra bits of precision.
 */
 struct g_savg {
 	uint64_t	gs_avg;
 	unsigned int	gs_smpl;
 };
 static inline void
 g_savg_add_sample(struct g_savg *ss, uint64_t sample)
 {
 	/* EMA with alpha = 0.125, fixed point, 3 bits of precision. */
 	ss->gs_avg = sample + ss->gs_avg - (ss->gs_avg >> 3);
 	ss->gs_smpl = 1 + ss->gs_smpl - (ss->gs_smpl >> 3);
 }
 static inline int
 g_savg_valid(struct g_savg *ss)
 {
 	/* We want at least 8 samples to deem an average as valid. */
 	return (ss->gs_smpl > 7);
 }
 static inline uint64_t
 g_savg_read(struct g_savg *ss)
 {
 	return (ss->gs_avg / ss->gs_smpl);
 }
 /*
 * Declaration of a scheduler module.
 */
 int g_gsched_modevent(module_t mod, int cmd, void *arg);
 #define	DECLARE_GSCHED_MODULE(name, gsched)			\
 	static moduledata_t name##_mod = {			\
 		#name,						\
 		g_gsched_modevent,				\
 		gsched,						\
 	};							\
 	DECLARE_MODULE(name, name##_mod, SI_SUB_DRIVERS, SI_ORDER_MIDDLE); \
 	MODULE_DEPEND(name, geom_sched, 0, 0, 0);
 #endif	/* _KERNEL */
 #endif	/* _G_GSCHED_H_ */
--- a/sys/modules/geom/Makefile
+++ b/sys/modules/geom/Makefile
@ -19,7 +19,6 @@ SUBDIR=	geom_bde \
 	geom_part \
 	geom_raid \
 	geom_raid3 \
 	geom_sched \
 	geom_shsec \
 	geom_stripe \
 	geom_uzip \
--- a/sys/modules/geom/geom_sched/Makefile
+++ b/sys/modules/geom/geom_sched/Makefile
@ -1,5 +0,0 @@
 # $FreeBSD$
 SUBDIR=	gs_sched gsched_rr gsched_delay
 .include <bsd.subdir.mk>
--- a/sys/modules/geom/geom_sched/Makefile.inc
+++ b/sys/modules/geom/geom_sched/Makefile.inc
@ -1,9 +0,0 @@
 # $FreeBSD$
 # included by geom_sched children
 .PATH: ${SRCTOP}/sys/geom/sched
 # 6.x needs this path
 #CFLAGS += -I${SRCTOP}/sys/geom/sched
 # .include <bsd.kmod.mk>
--- a/sys/modules/geom/geom_sched/gs_sched/Makefile
+++ b/sys/modules/geom/geom_sched/gs_sched/Makefile
@ -1,6 +0,0 @@
 # $FreeBSD$
 KMOD=   geom_sched
 SRCS=   g_sched.c
 # ../Makefile.inc automatically included
 .include <bsd.kmod.mk>
--- a/sys/modules/geom/geom_sched/gsched_delay/Makefile
+++ b/sys/modules/geom/geom_sched/gsched_delay/Makefile
@ -1,7 +0,0 @@
 # $FreeBSD$
 KMOD=   gsched_delay
 SRCS=   gs_delay.c
 # ../Makefile.inc automatically included
 .include <bsd.kmod.mk>
--- a/sys/modules/geom/geom_sched/gsched_rr/Makefile
+++ b/sys/modules/geom/geom_sched/gsched_rr/Makefile
@ -1,7 +0,0 @@
 # $FreeBSD$
 KMOD=   gsched_rr
 SRCS=   gs_rr.c
 # ../Makefile.inc automatically included
 .include <bsd.kmod.mk>
--- a/sys/sys/bio.h
+++ b/sys/sys/bio.h
@ -79,9 +79,6 @@ struct disk;
 struct bio;
 struct vm_map;
 /* Empty classifier tag, to prevent further classification. */
 #define	BIO_NOTCLASSIFIED		(void *)(~0UL)
 typedef void bio_task_t(void *);
 /*
@ -122,8 +119,8 @@ struct bio {
 	bio_task_t *bio_task;		/* Task_queue handler */
 	void	*bio_task_arg;		/* Argument to above */
-	void	*bio_classifier1;	/* Classifier tag. */
+	void	*bio_spare1;
-	void	*bio_classifier2;	/* Classifier tag. */
+	void	*bio_spare2;
 #ifdef DIAGNOSTIC
 	void	*_bio_caller1;
--- a/sys/sys/ktr_class.h
+++ b/sys/sys/ktr_class.h
@ -58,7 +58,7 @@
 #define	KTR_SYSC	0x00002000		/* System call */
 #define	KTR_INIT	0x00004000		/* System initialization */
 #define	KTR_SPARE3	0x00008000		/* cxgb, drm2, ioat, ntb */
-#define	KTR_SPARE4	0x00010000		/* geom_sched */
+#define	KTR_SPARE4	0x00010000
 #define	KTR_EVH		0x00020000		/* Eventhandler */
 #define	KTR_VFS		0x00040000		/* VFS events */
 #define	KTR_VOP		0x00080000		/* Auto-generated vop events */
--- a/sys/sys/param.h
+++ b/sys/sys/param.h
@ -60,7 +60,7 @@
 *		in the range 5 to 9.
 */
 #undef __FreeBSD_version
-#define __FreeBSD_version 1300071	/* Master, propagated to newvers */
+#define __FreeBSD_version 1300072	/* Master, propagated to newvers */
 /*
 * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,