From 79565afed84a8a70569eaa6eb0bf757cabdc4cb8 Mon Sep 17 00:00:00 2001 From: Alexander Motin Date: Sat, 14 Mar 2015 21:15:45 +0000 Subject: [PATCH] Give block I/O interface multiple (8) execution threads. On parallel random I/O this allows better utilize wide storage pools. To not confuse prefetcher on linear I/O, consecutive requests are executed sequentially, following the same logic as was earlier implemented in CTL. Benchmarks of virtual AHCI disk, backed by ZVOL on RAID10 pool of 4 HDDs, show ~3.5 times random read performance improvements, while no degradation on linear I/O. MFC after: 2 weeks --- usr.sbin/bhyve/block_if.c | 129 ++++++++++++++++++++++---------------- 1 file changed, 76 insertions(+), 53 deletions(-) diff --git a/usr.sbin/bhyve/block_if.c b/usr.sbin/bhyve/block_if.c index de4302046789..23aad019b138 100644 --- a/usr.sbin/bhyve/block_if.c +++ b/usr.sbin/bhyve/block_if.c @@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$"); #define BLOCKIF_SIG 0xb109b109 #define BLOCKIF_MAXREQ 33 +#define BLOCKIF_NUMTHR 8 enum blockop { BOP_READ, @@ -65,6 +66,7 @@ enum blockop { enum blockstat { BST_FREE, + BST_BLOCK, BST_PEND, BST_BUSY, BST_DONE @@ -76,6 +78,7 @@ struct blockif_elem { enum blockop be_op; enum blockstat be_status; pthread_t be_tid; + off_t be_block; }; struct blockif_ctxt { @@ -88,16 +91,15 @@ struct blockif_ctxt { int bc_sectsz; int bc_psectsz; int bc_psectoff; - pthread_t bc_btid; + int bc_closing; + pthread_t bc_btid[BLOCKIF_NUMTHR]; pthread_mutex_t bc_mtx; pthread_cond_t bc_cond; - int bc_closing; /* Request elements and free/pending/busy queues */ TAILQ_HEAD(, blockif_elem) bc_freeq; TAILQ_HEAD(, blockif_elem) bc_pendq; TAILQ_HEAD(, blockif_elem) bc_busyq; - u_int bc_req_count; struct blockif_elem bc_reqs[BLOCKIF_MAXREQ]; }; @@ -116,58 +118,83 @@ static int blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq, enum blockop op) { - struct blockif_elem *be; - - assert(bc->bc_req_count < BLOCKIF_MAXREQ); + struct blockif_elem *be, *tbe; + off_t off; + int i; be = TAILQ_FIRST(&bc->bc_freeq); assert(be != NULL); assert(be->be_status == BST_FREE); - TAILQ_REMOVE(&bc->bc_freeq, be, be_link); - be->be_status = BST_PEND; be->be_req = breq; be->be_op = op; + switch (op) { + case BOP_READ: + case BOP_WRITE: + case BOP_DELETE: + off = breq->br_offset; + for (i = 0; i < breq->br_iovcnt; i++) + off += breq->br_iov[i].iov_len; + break; + default: + off = OFF_MAX; + } + be->be_block = off; + TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { + if (tbe->be_block == breq->br_offset) + break; + } + if (tbe == NULL) { + TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) { + if (tbe->be_block == breq->br_offset) + break; + } + } + if (tbe == NULL) + be->be_status = BST_PEND; + else + be->be_status = BST_BLOCK; TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link); - - bc->bc_req_count++; - - return (0); + return (be->be_status == BST_PEND); } static int -blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep) +blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep) { struct blockif_elem *be; - if (bc->bc_req_count == 0) - return (ENOENT); - - be = TAILQ_FIRST(&bc->bc_pendq); - assert(be != NULL); - assert(be->be_status == BST_PEND); + TAILQ_FOREACH(be, &bc->bc_pendq, be_link) { + if (be->be_status == BST_PEND) + break; + assert(be->be_status == BST_BLOCK); + } + if (be == NULL) + return (0); TAILQ_REMOVE(&bc->bc_pendq, be, be_link); be->be_status = BST_BUSY; - be->be_tid = bc->bc_btid; + be->be_tid = t; TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link); - *bep = be; - - return (0); + return (1); } static void blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be) { - assert(be->be_status == BST_DONE); + struct blockif_elem *tbe; - TAILQ_REMOVE(&bc->bc_busyq, be, be_link); + if (be->be_status == BST_DONE || be->be_status == BST_BUSY) + TAILQ_REMOVE(&bc->bc_busyq, be, be_link); + else + TAILQ_REMOVE(&bc->bc_pendq, be, be_link); + TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) { + if (tbe->be_req->br_offset == be->be_block) + tbe->be_status = BST_PEND; + } be->be_tid = 0; be->be_status = BST_FREE; be->be_req = NULL; TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - - bc->bc_req_count--; } static void @@ -226,28 +253,27 @@ blockif_thr(void *arg) { struct blockif_ctxt *bc; struct blockif_elem *be; + pthread_t t; bc = arg; + t = pthread_self(); + pthread_mutex_lock(&bc->bc_mtx); for (;;) { - pthread_mutex_lock(&bc->bc_mtx); - while (!blockif_dequeue(bc, &be)) { + while (blockif_dequeue(bc, t, &be)) { pthread_mutex_unlock(&bc->bc_mtx); blockif_proc(bc, be); pthread_mutex_lock(&bc->bc_mtx); blockif_complete(bc, be); } - pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); - pthread_mutex_unlock(&bc->bc_mtx); - - /* - * Check ctxt status here to see if exit requested - */ + /* Check ctxt status here to see if exit requested */ if (bc->bc_closing) - pthread_exit(NULL); + break; + pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx); } + pthread_mutex_unlock(&bc->bc_mtx); - /* Not reached */ + pthread_exit(NULL); return (NULL); } @@ -386,16 +412,16 @@ blockif_open(const char *optstr, const char *ident) TAILQ_INIT(&bc->bc_freeq); TAILQ_INIT(&bc->bc_pendq); TAILQ_INIT(&bc->bc_busyq); - bc->bc_req_count = 0; for (i = 0; i < BLOCKIF_MAXREQ; i++) { bc->bc_reqs[i].be_status = BST_FREE; TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link); } - pthread_create(&bc->bc_btid, NULL, blockif_thr, bc); - - snprintf(tname, sizeof(tname), "blk-%s", ident); - pthread_set_name_np(bc->bc_btid, tname); + for (i = 0; i < BLOCKIF_NUMTHR; i++) { + pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc); + snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i); + pthread_set_name_np(bc->bc_btid[i], tname); + } return (bc); } @@ -409,13 +435,13 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq, err = 0; pthread_mutex_lock(&bc->bc_mtx); - if (bc->bc_req_count < BLOCKIF_MAXREQ) { + if (!TAILQ_EMPTY(&bc->bc_freeq)) { /* * Enqueue and inform the block i/o thread * that there is work available */ - blockif_enqueue(bc, breq, op); - pthread_cond_signal(&bc->bc_cond); + if (blockif_enqueue(bc, breq, op)) + pthread_cond_signal(&bc->bc_cond); } else { /* * Callers are not allowed to enqueue more than @@ -481,11 +507,7 @@ blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq) /* * Found it. */ - TAILQ_REMOVE(&bc->bc_pendq, be, be_link); - be->be_status = BST_FREE; - be->be_req = NULL; - TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link); - bc->bc_req_count--; + blockif_complete(bc, be); pthread_mutex_unlock(&bc->bc_mtx); return (0); @@ -546,7 +568,7 @@ int blockif_close(struct blockif_ctxt *bc) { void *jval; - int err; + int err, i; err = 0; @@ -556,8 +578,9 @@ blockif_close(struct blockif_ctxt *bc) * Stop the block i/o thread */ bc->bc_closing = 1; - pthread_cond_signal(&bc->bc_cond); - pthread_join(bc->bc_btid, &jval); + pthread_cond_broadcast(&bc->bc_cond); + for (i = 0; i < BLOCKIF_NUMTHR; i++) + pthread_join(bc->bc_btid[i], &jval); /* XXX Cancel queued i/o's ??? */