From 0acf08d985eab5704b19cac57c3829d4879cf8dd Mon Sep 17 00:00:00 2001 From: Warner Losh Date: Sat, 20 Dec 2014 00:04:01 +0000 Subject: [PATCH] Remove support for FreeBSD 7 and really old FreeBSD 8. The classifiers have been in the base for a while, so the gymnastics here aren't needed. In addition, the bugs in subr_disk.c have been fixed since 2009, so there's no need for an identical copy of it in the tree anymore. There's really no need to binary patch g_io_request, so let's get rid of the code (not compiled in anymore) lest others think it is a good idea. --- sys/geom/sched/README | 26 +- sys/geom/sched/g_sched.c | 170 +------------ sys/geom/sched/g_sched.h | 13 - sys/geom/sched/gs_rr.c | 12 +- sys/geom/sched/subr_disk.c | 226 ------------------ sys/modules/geom/geom_sched/gs_sched/Makefile | 2 +- 6 files changed, 13 insertions(+), 436 deletions(-) delete mode 100644 sys/geom/sched/subr_disk.c diff --git a/sys/geom/sched/README b/sys/geom/sched/README index 1b52d901162b..b62d46889326 100644 --- a/sys/geom/sched/README +++ b/sys/geom/sched/README @@ -39,37 +39,17 @@ with cvs, and lets cvs progress when competing with a writer. To try it out: -1. USERS OF FREEBSD 7, PLEASE READ CAREFULLY THE FOLLOWING: - - On loading, this module patches one kernel function (g_io_request()) - so that I/O requests ("bio's") carry a classification tag, useful - for scheduling purposes. - - ON FREEBSD 7, the tag is stored in an existing (though rarely used) - field of the "struct bio", a solution which makes this module - incompatible with other modules using it, such as ZFS and gjournal. - Additionally, g_io_request() is patched in-memory to add a call - to the function that initializes this field (i386/amd64 only; - for other architectures you need to manually patch sys/geom/geom_io.c). - See details in the file g_sched.c. - - On FreeBSD 8.0 and above, the above trick is not necessary, - as the struct bio contains dedicated fields for the classifier, - and hooks for request classifiers. - - If you don't like the above, don't run this code. - -2. PLEASE MAKE SURE THAT THE DISK THAT YOU WILL BE USING FOR TESTS +1. PLEASE MAKE SURE THAT THE DISK THAT YOU WILL BE USING FOR TESTS DOES NOT CONTAIN PRECIOUS DATA. This is experimental code, so we make no guarantees, though I am routinely using it on my desktop and laptop. -3. EXTRACT AND BUILD THE PROGRAMS +2. EXTRACT AND BUILD THE PROGRAMS A 'make install' in the directory should work (with root privs), or you can even try the binary modules. If you want to build the modules yourself, look at the Makefile. -4. LOAD THE MODULE, CREATE A GEOM NODE, RUN TESTS +3. LOAD THE MODULE, CREATE A GEOM NODE, RUN TESTS The scheduler's module must be loaded first: diff --git a/sys/geom/sched/g_sched.c b/sys/geom/sched/g_sched.c index 009a58cedee5..f1c9a3db7e71 100644 --- a/sys/geom/sched/g_sched.c +++ b/sys/geom/sched/g_sched.c @@ -346,17 +346,8 @@ static inline u_long g_sched_classify(struct bio *bp) { -#if __FreeBSD_version > 800098 /* we have classifier fields in the struct bio */ -#define HAVE_BIO_CLASSIFIER return ((u_long)bp->bio_classifier1); -#else -#warning old version!!! - while (bp->bio_parent != NULL) - bp = bp->bio_parent; - - return ((u_long)bp->bio_caller1); -#endif } /* Return the hash chain for the given key. */ @@ -705,7 +696,7 @@ g_gsched_global_init(void) G_SCHED_DEBUG(0, "Initializing global data."); mtx_init(&me.gs_mtx, "gsched", NULL, MTX_DEF); LIST_INIT(&me.gs_scheds); - gs_bioq_init(&me.gs_pending); + bioq_init(&me.gs_pending); me.gs_initialized = 1; } } @@ -914,7 +905,7 @@ g_sched_temporary_start(struct bio *bio) mtx_lock(&me.gs_mtx); me.gs_npending++; - gs_bioq_disksort(&me.gs_pending, bio); + bioq_disksort(&me.gs_pending, bio); mtx_unlock(&me.gs_mtx); } @@ -923,7 +914,7 @@ g_sched_flush_pending(g_start_t *start) { struct bio *bp; - while ((bp = gs_bioq_takefirst(&me.gs_pending))) + while ((bp = bioq_takefirst(&me.gs_pending))) start(bp); } @@ -1365,162 +1356,8 @@ g_sched_destroy_geom(struct gctl_req *req, struct g_class *mp, * to the issuer of a request in bp->bio_classifier1 as soon * as the bio is posted to the geom queue (and not later, because * requests are managed by the g_down thread afterwards). - * - * On older versions of the system (but this code is not used - * in any existing release), we [ab]use the caller1 field in the - * root element of the bio tree to store the classification info. - * The marking is done at the beginning of g_io_request() - * and only if we find that the field is NULL. - * - * To avoid rebuilding the kernel, this module will patch the - * initial part of g_io_request() so it jumps to some hand-coded - * assembly that does the marking and then executes the original - * body of g_io_request(). - * - * fake_ioreq[] is architecture-specific machine code - * that implements the above. CODE_SIZE, STORE_SIZE etc. - * are constants used in the patching routine. Look at the - * code in g_ioreq_patch() for the details. */ -#ifndef HAVE_BIO_CLASSIFIER -/* - * Support for old FreeBSD versions - */ -#if defined(__i386__) -#define CODE_SIZE 29 -#define STORE_SIZE 5 -#define EPILOGUE 5 -#define SIZE (CODE_SIZE + STORE_SIZE + EPILOGUE) - -static u_char fake_ioreq[SIZE] = { - 0x8b, 0x44, 0x24, 0x04, /* mov bp, %eax */ - /* 1: */ - 0x89, 0xc2, /* mov %eax, %edx # edx = bp */ - 0x8b, 0x40, 0x64, /* mov bp->bio_parent, %eax */ - 0x85, 0xc0, /* test %eax, %eax */ - 0x75, 0xf7, /* jne 1b */ - 0x8b, 0x42, 0x30, /* mov bp->bp_caller1, %eax */ - 0x85, 0xc0, /* test %eax, %eax */ - 0x75, 0x09, /* jne 2f */ - 0x64, 0xa1, 0x00, 0x00, /* mov %fs:0, %eax */ - 0x00, 0x00, - 0x89, 0x42, 0x30, /* mov %eax, bp->bio_caller1 */ - /* 2: */ - 0x55, 0x89, 0xe5, 0x57, 0x56, - 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp back... */ -}; -#elif defined(__amd64) -#define CODE_SIZE 38 -#define STORE_SIZE 6 -#define EPILOGUE 5 -#define SIZE (CODE_SIZE + STORE_SIZE + EPILOGUE) - -static u_char fake_ioreq[SIZE] = { - 0x48, 0x89, 0xf8, /* mov bp, %rax */ - /* 1: */ - 0x48, 0x89, 0xc2, /* mov %rax, %rdx # rdx = bp */ - 0x48, 0x8b, 0x82, 0xa8, /* mov bp->bio_parent, %rax */ - 0x00, 0x00, 0x00, - 0x48, 0x85, 0xc0, /* test %rax, %rax */ - 0x75, 0xf1, /* jne 1b */ - 0x48, 0x83, 0x7a, 0x58, /* cmp $0, bp->bp_caller1 */ - 0x00, - 0x75, 0x0d, /* jne 2f */ - 0x65, 0x48, 0x8b, 0x04, /* mov %gs:0, %rax */ - 0x25, 0x00, 0x00, 0x00, - 0x00, - 0x48, 0x89, 0x42, 0x58, /* mov %rax, bp->bio_caller1 */ - /* 2: */ - 0x55, 0x48, 0x89, 0xe5, 0x41, 0x56, - 0xe9, 0x00, 0x00, 0x00, 0x00, /* jmp back... */ -}; -#else /* neither x86 nor amd64 */ -static void -g_new_io_request(struct bio *bp, struct g_consumer *cp) -{ - struct bio *top = bp; - - /* - * bio classification: if bio_caller1 is available in the - * root of the 'struct bio' tree, store there the thread id - * of the thread that originated the request. - * More sophisticated classification schemes can be used. - */ - while (top->bio_parent) - top = top->bio_parent; - - if (top->bio_caller1 == NULL) - top->bio_caller1 = curthread; -} - -#error please add the code above in g_new_io_request() to the beginning of \ - /sys/geom/geom_io.c::g_io_request(), and remove this line. -#endif /* end of arch-specific code */ - -static int -g_ioreq_patch(void) -{ - u_char *original; - u_long ofs; - int found; - - if (me.gs_patched) - return (-1); - - original = (u_char *)g_io_request; - - found = !bcmp(original, fake_ioreq + CODE_SIZE, STORE_SIZE); - if (!found) - return (-1); - - /* Jump back to the original + STORE_SIZE. */ - ofs = (original + STORE_SIZE) - (fake_ioreq + SIZE); - bcopy(&ofs, fake_ioreq + CODE_SIZE + STORE_SIZE + 1, 4); - - /* Patch the original address with a jump to the trampoline. */ - *original = 0xe9; /* jump opcode */ - ofs = fake_ioreq - (original + 5); - bcopy(&ofs, original + 1, 4); - - me.gs_patched = 1; - - return (0); -} - -/* - * Restore the original code, this is easy. - */ -static void -g_ioreq_restore(void) -{ - u_char *original; - - if (me.gs_patched) { - original = (u_char *)g_io_request; - bcopy(fake_ioreq + CODE_SIZE, original, STORE_SIZE); - me.gs_patched = 0; - } -} - -static inline void -g_classifier_ini(void) -{ - - g_ioreq_patch(); -} - -static inline void -g_classifier_fini(void) -{ - - g_ioreq_restore(); -} - -/*--- end of support code for older FreeBSD versions */ - -#else /* HAVE_BIO_CLASSIFIER */ - /* * Classifier support for recent FreeBSD versions: we use * a very simple classifier, only use curthread to tag a request. @@ -1552,7 +1389,6 @@ g_classifier_fini(void) g_unregister_classifier(&g_sched_classifier); } -#endif /* HAVE_BIO_CLASSIFIER */ static void g_sched_init(struct g_class *mp) diff --git a/sys/geom/sched/g_sched.h b/sys/geom/sched/g_sched.h index 3a34e2922b1e..9fdadc4e3005 100644 --- a/sys/geom/sched/g_sched.h +++ b/sys/geom/sched/g_sched.h @@ -120,19 +120,6 @@ struct g_sched_softc { #define G_SCHED_PROXYING 1 #define G_SCHED_FLUSHING 2 -/* - * Temporary- our own version of the disksort, because the - * version in 7.x and 8.x before march 2009 is buggy. - */ -void gs_bioq_init(struct bio_queue_head *); -void gs_bioq_remove(struct bio_queue_head *, struct bio *); -void gs_bioq_flush(struct bio_queue_head *, struct devstat *, int); -void gs_bioq_insert_head(struct bio_queue_head *, struct bio *); -void gs_bioq_insert_tail(struct bio_queue_head *, struct bio *); -struct bio *gs_bioq_first(struct bio_queue_head *); -struct bio *gs_bioq_takefirst(struct bio_queue_head *); -void gs_bioq_disksort(struct bio_queue_head *, struct bio *); - #endif /* _KERNEL */ #endif /* _G_SCHED_H_ */ diff --git a/sys/geom/sched/gs_rr.c b/sys/geom/sched/gs_rr.c index 6f268798c0ae..b9d5d1b4e22e 100644 --- a/sys/geom/sched/gs_rr.c +++ b/sys/geom/sched/gs_rr.c @@ -315,7 +315,7 @@ g_rr_init_class(void *data, void *priv) struct g_rr_softc *sc = data; struct g_rr_queue *qp = priv; - gs_bioq_init(&qp->q_bioq); + bioq_init(&qp->q_bioq); /* * Set the initial parameters for the client: @@ -350,7 +350,7 @@ g_rr_fini_class(void *data, void *priv) { struct g_rr_queue *qp = priv; - KASSERT(gs_bioq_first(&qp->q_bioq) == NULL, + KASSERT(bioq_first(&qp->q_bioq) == NULL, ("released nonempty queue")); qp->q_sc->sc_nqueues--; me.queues--; @@ -438,7 +438,7 @@ g_rr_next(void *data, int force) qp->q_flags &= ~G_FLAG_COMPLETED; } - bp = gs_bioq_takefirst(&qp->q_bioq); /* surely not NULL */ + bp = bioq_takefirst(&qp->q_bioq); /* surely not NULL */ qp->q_service += bp->bio_length; /* charge the service */ /* @@ -456,7 +456,7 @@ g_rr_next(void *data, int force) * on read or writes (e.g., anticipate only on reads). */ expired = g_rr_queue_expired(qp); /* are we expired ? */ - next = gs_bioq_first(&qp->q_bioq); /* do we have one more ? */ + next = bioq_first(&qp->q_bioq); /* do we have one more ? */ if (expired) { sc->sc_active = NULL; /* Either requeue or release reference. */ @@ -538,7 +538,7 @@ g_rr_start(void *data, struct bio *bp) if (qp == NULL) return (-1); /* allocation failed, tell upstream */ - if (gs_bioq_first(&qp->q_bioq) == NULL) { + if (bioq_first(&qp->q_bioq) == NULL) { /* * We are inserting into an empty queue. * Reset its state if it is sc_active, @@ -560,7 +560,7 @@ g_rr_start(void *data, struct bio *bp) /* Inherit the reference returned by g_rr_queue_get(). */ bp->bio_caller1 = qp; - gs_bioq_disksort(&qp->q_bioq, bp); + bioq_disksort(&qp->q_bioq, bp); return (0); } diff --git a/sys/geom/sched/subr_disk.c b/sys/geom/sched/subr_disk.c deleted file mode 100644 index db2a9ef11f8c..000000000000 --- a/sys/geom/sched/subr_disk.c +++ /dev/null @@ -1,226 +0,0 @@ -/*- - * ---------------------------------------------------------------------------- - * "THE BEER-WARE LICENSE" (Revision 42): - * wrote this file. As long as you retain this notice you - * can do whatever you want with this stuff. If we meet some day, and you think - * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp - * ---------------------------------------------------------------------------- - * - * The bioq_disksort() (and the specification of the bioq API) - * have been written by Luigi Rizzo and Fabio Checconi under the same - * license as above. - */ - -#include -__FBSDID("$FreeBSD$"); - -//#include "opt_geom.h" - -#include -#include -#include -#include -#include -#include -#include "g_sched.h" - -/* - * BIO queue implementation - * - * Please read carefully the description below before making any change - * to the code, or you might change the behaviour of the data structure - * in undesirable ways. - * - * A bioq stores disk I/O request (bio), normally sorted according to - * the distance of the requested position (bio->bio_offset) from the - * current head position (bioq->last_offset) in the scan direction, i.e. - * - * (uoff_t)(bio_offset - last_offset) - * - * Note that the cast to unsigned (uoff_t) is fundamental to insure - * that the distance is computed in the scan direction. - * - * The main methods for manipulating the bioq are: - * - * bioq_disksort() performs an ordered insertion; - * - * bioq_first() return the head of the queue, without removing; - * - * bioq_takefirst() return and remove the head of the queue, - * updating the 'current head position' as - * bioq->last_offset = bio->bio_offset + bio->bio_length; - * - * When updating the 'current head position', we assume that the result of - * bioq_takefirst() is dispatched to the device, so bioq->last_offset - * represents the head position once the request is complete. - * - * If the bioq is manipulated using only the above calls, it starts - * with a sorted sequence of requests with bio_offset >= last_offset, - * possibly followed by another sorted sequence of requests with - * 0 <= bio_offset < bioq->last_offset - * - * NOTE: historical behaviour was to ignore bio->bio_length in the - * update, but its use tracks the head position in a better way. - * Historical behaviour was also to update the head position when - * the request under service is complete, rather than when the - * request is extracted from the queue. However, the current API - * has no method to update the head position; secondly, once - * a request has been submitted to the disk, we have no idea of - * the actual head position, so the final one is our best guess. - * - * --- Direct queue manipulation --- - * - * A bioq uses an underlying TAILQ to store requests, so we also - * export methods to manipulate the TAILQ, in particular: - * - * bioq_insert_tail() insert an entry at the end. - * It also creates a 'barrier' so all subsequent - * insertions through bioq_disksort() will end up - * after this entry; - * - * bioq_insert_head() insert an entry at the head, update - * bioq->last_offset = bio->bio_offset so that - * all subsequent insertions through bioq_disksort() - * will end up after this entry; - * - * bioq_remove() remove a generic element from the queue, act as - * bioq_takefirst() if invoked on the head of the queue. - * - * The semantic of these methods is the same as the operations - * on the underlying TAILQ, but with additional guarantees on - * subsequent bioq_disksort() calls. E.g. bioq_insert_tail() - * can be useful for making sure that all previous ops are flushed - * to disk before continuing. - * - * Updating bioq->last_offset on a bioq_insert_head() guarantees - * that the bio inserted with the last bioq_insert_head() will stay - * at the head of the queue even after subsequent bioq_disksort(). - * - * Note that when the direct queue manipulation functions are used, - * the queue may contain multiple inversion points (i.e. more than - * two sorted sequences of requests). - * - */ - -void -gs_bioq_init(struct bio_queue_head *head) -{ - - TAILQ_INIT(&head->queue); - head->last_offset = 0; - head->insert_point = NULL; -} - -void -gs_bioq_remove(struct bio_queue_head *head, struct bio *bp) -{ - - if (head->insert_point == NULL) { - if (bp == TAILQ_FIRST(&head->queue)) - head->last_offset = bp->bio_offset + bp->bio_length; - } else if (bp == head->insert_point) - head->insert_point = NULL; - - TAILQ_REMOVE(&head->queue, bp, bio_queue); -} - -void -gs_bioq_flush(struct bio_queue_head *head, struct devstat *stp, int error) -{ - struct bio *bp; - - while ((bp = gs_bioq_takefirst(head)) != NULL) - biofinish(bp, stp, error); -} - -void -gs_bioq_insert_head(struct bio_queue_head *head, struct bio *bp) -{ - - if (head->insert_point == NULL) - head->last_offset = bp->bio_offset; - TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); -} - -void -gs_bioq_insert_tail(struct bio_queue_head *head, struct bio *bp) -{ - - TAILQ_INSERT_TAIL(&head->queue, bp, bio_queue); - head->insert_point = bp; - head->last_offset = bp->bio_offset; -} - -struct bio * -gs_bioq_first(struct bio_queue_head *head) -{ - - return (TAILQ_FIRST(&head->queue)); -} - -struct bio * -gs_bioq_takefirst(struct bio_queue_head *head) -{ - struct bio *bp; - - bp = TAILQ_FIRST(&head->queue); - if (bp != NULL) - gs_bioq_remove(head, bp); - return (bp); -} - -/* - * Compute the sorting key. The cast to unsigned is - * fundamental for correctness, see the description - * near the beginning of the file. - */ -static inline uoff_t -gs_bioq_bio_key(struct bio_queue_head *head, struct bio *bp) -{ - - return ((uoff_t)(bp->bio_offset - head->last_offset)); -} - -/* - * Seek sort for disks. - * - * Sort all requests in a single queue while keeping - * track of the current position of the disk with last_offset. - * See above for details. - */ -void -gs_bioq_disksort(struct bio_queue_head *head, struct bio *bp) -{ - struct bio *cur, *prev; - uoff_t key; - - if ((bp->bio_flags & BIO_ORDERED) != 0) { - /* - * Ordered transactions can only be dispatched - * after any currently queued transactions. They - * also have barrier semantics - no transactions - * queued in the future can pass them. - */ - gs_bioq_insert_tail(head, bp); - return; - } - - prev = NULL; - key = gs_bioq_bio_key(head, bp); - cur = TAILQ_FIRST(&head->queue); - - if (head->insert_point) { - prev = head->insert_point; - cur = TAILQ_NEXT(head->insert_point, bio_queue); - } - - while (cur != NULL && key >= gs_bioq_bio_key(head, cur)) { - prev = cur; - cur = TAILQ_NEXT(cur, bio_queue); - } - - if (prev == NULL) - TAILQ_INSERT_HEAD(&head->queue, bp, bio_queue); - else - TAILQ_INSERT_AFTER(&head->queue, prev, bp, bio_queue); -} diff --git a/sys/modules/geom/geom_sched/gs_sched/Makefile b/sys/modules/geom/geom_sched/gs_sched/Makefile index 5739365a3807..13bb91bc99f5 100644 --- a/sys/modules/geom/geom_sched/gs_sched/Makefile +++ b/sys/modules/geom/geom_sched/gs_sched/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ KMOD= geom_sched -SRCS= g_sched.c subr_disk.c +SRCS= g_sched.c # ../Makefile.inc automatically included .include