Give block I/O interface multiple (8) execution threads.

On parallel random I/O this allows better utilize wide storage pools.
To not confuse prefetcher on linear I/O, consecutive requests are executed
sequentially, following the same logic as was earlier implemented in CTL.

Benchmarks of virtual AHCI disk, backed by ZVOL on RAID10 pool of 4 HDDs,
show ~3.5 times random read performance improvements, while no degradation
on linear I/O.

MFC after:	2 weeks
This commit is contained in:
Alexander Motin 2015-03-14 21:15:45 +00:00
parent b773372938
commit 79565afed8

View File

@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
#define BLOCKIF_SIG 0xb109b109
#define BLOCKIF_MAXREQ 33
#define BLOCKIF_NUMTHR 8
enum blockop {
BOP_READ,
@ -65,6 +66,7 @@ enum blockop {
enum blockstat {
BST_FREE,
BST_BLOCK,
BST_PEND,
BST_BUSY,
BST_DONE
@ -76,6 +78,7 @@ struct blockif_elem {
enum blockop be_op;
enum blockstat be_status;
pthread_t be_tid;
off_t be_block;
};
struct blockif_ctxt {
@ -88,16 +91,15 @@ struct blockif_ctxt {
int bc_sectsz;
int bc_psectsz;
int bc_psectoff;
pthread_t bc_btid;
int bc_closing;
pthread_t bc_btid[BLOCKIF_NUMTHR];
pthread_mutex_t bc_mtx;
pthread_cond_t bc_cond;
int bc_closing;
/* Request elements and free/pending/busy queues */
TAILQ_HEAD(, blockif_elem) bc_freeq;
TAILQ_HEAD(, blockif_elem) bc_pendq;
TAILQ_HEAD(, blockif_elem) bc_busyq;
u_int bc_req_count;
struct blockif_elem bc_reqs[BLOCKIF_MAXREQ];
};
@ -116,58 +118,83 @@ static int
blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
enum blockop op)
{
struct blockif_elem *be;
assert(bc->bc_req_count < BLOCKIF_MAXREQ);
struct blockif_elem *be, *tbe;
off_t off;
int i;
be = TAILQ_FIRST(&bc->bc_freeq);
assert(be != NULL);
assert(be->be_status == BST_FREE);
TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
be->be_status = BST_PEND;
be->be_req = breq;
be->be_op = op;
switch (op) {
case BOP_READ:
case BOP_WRITE:
case BOP_DELETE:
off = breq->br_offset;
for (i = 0; i < breq->br_iovcnt; i++)
off += breq->br_iov[i].iov_len;
break;
default:
off = OFF_MAX;
}
be->be_block = off;
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
if (tbe->be_block == breq->br_offset)
break;
}
if (tbe == NULL) {
TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
if (tbe->be_block == breq->br_offset)
break;
}
}
if (tbe == NULL)
be->be_status = BST_PEND;
else
be->be_status = BST_BLOCK;
TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
bc->bc_req_count++;
return (0);
return (be->be_status == BST_PEND);
}
static int
blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
{
struct blockif_elem *be;
if (bc->bc_req_count == 0)
return (ENOENT);
be = TAILQ_FIRST(&bc->bc_pendq);
assert(be != NULL);
assert(be->be_status == BST_PEND);
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
if (be->be_status == BST_PEND)
break;
assert(be->be_status == BST_BLOCK);
}
if (be == NULL)
return (0);
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
be->be_status = BST_BUSY;
be->be_tid = bc->bc_btid;
be->be_tid = t;
TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
*bep = be;
return (0);
return (1);
}
static void
blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
{
assert(be->be_status == BST_DONE);
struct blockif_elem *tbe;
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
else
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
if (tbe->be_req->br_offset == be->be_block)
tbe->be_status = BST_PEND;
}
be->be_tid = 0;
be->be_status = BST_FREE;
be->be_req = NULL;
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
bc->bc_req_count--;
}
static void
@ -226,28 +253,27 @@ blockif_thr(void *arg)
{
struct blockif_ctxt *bc;
struct blockif_elem *be;
pthread_t t;
bc = arg;
t = pthread_self();
pthread_mutex_lock(&bc->bc_mtx);
for (;;) {
pthread_mutex_lock(&bc->bc_mtx);
while (!blockif_dequeue(bc, &be)) {
while (blockif_dequeue(bc, t, &be)) {
pthread_mutex_unlock(&bc->bc_mtx);
blockif_proc(bc, be);
pthread_mutex_lock(&bc->bc_mtx);
blockif_complete(bc, be);
}
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
pthread_mutex_unlock(&bc->bc_mtx);
/*
* Check ctxt status here to see if exit requested
*/
/* Check ctxt status here to see if exit requested */
if (bc->bc_closing)
pthread_exit(NULL);
break;
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
}
pthread_mutex_unlock(&bc->bc_mtx);
/* Not reached */
pthread_exit(NULL);
return (NULL);
}
@ -386,16 +412,16 @@ blockif_open(const char *optstr, const char *ident)
TAILQ_INIT(&bc->bc_freeq);
TAILQ_INIT(&bc->bc_pendq);
TAILQ_INIT(&bc->bc_busyq);
bc->bc_req_count = 0;
for (i = 0; i < BLOCKIF_MAXREQ; i++) {
bc->bc_reqs[i].be_status = BST_FREE;
TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
}
pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
snprintf(tname, sizeof(tname), "blk-%s", ident);
pthread_set_name_np(bc->bc_btid, tname);
for (i = 0; i < BLOCKIF_NUMTHR; i++) {
pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
pthread_set_name_np(bc->bc_btid[i], tname);
}
return (bc);
}
@ -409,13 +435,13 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
err = 0;
pthread_mutex_lock(&bc->bc_mtx);
if (bc->bc_req_count < BLOCKIF_MAXREQ) {
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
/*
* Enqueue and inform the block i/o thread
* that there is work available
*/
blockif_enqueue(bc, breq, op);
pthread_cond_signal(&bc->bc_cond);
if (blockif_enqueue(bc, breq, op))
pthread_cond_signal(&bc->bc_cond);
} else {
/*
* Callers are not allowed to enqueue more than
@ -481,11 +507,7 @@ blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
/*
* Found it.
*/
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
be->be_status = BST_FREE;
be->be_req = NULL;
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
bc->bc_req_count--;
blockif_complete(bc, be);
pthread_mutex_unlock(&bc->bc_mtx);
return (0);
@ -546,7 +568,7 @@ int
blockif_close(struct blockif_ctxt *bc)
{
void *jval;
int err;
int err, i;
err = 0;
@ -556,8 +578,9 @@ blockif_close(struct blockif_ctxt *bc)
* Stop the block i/o thread
*/
bc->bc_closing = 1;
pthread_cond_signal(&bc->bc_cond);
pthread_join(bc->bc_btid, &jval);
pthread_cond_broadcast(&bc->bc_cond);
for (i = 0; i < BLOCKIF_NUMTHR; i++)
pthread_join(bc->bc_btid[i], &jval);
/* XXX Cancel queued i/o's ??? */