Give block I/O interface multiple (8) execution threads.
On parallel random I/O this allows better utilize wide storage pools. To not confuse prefetcher on linear I/O, consecutive requests are executed sequentially, following the same logic as was earlier implemented in CTL. Benchmarks of virtual AHCI disk, backed by ZVOL on RAID10 pool of 4 HDDs, show ~3.5 times random read performance improvements, while no degradation on linear I/O. MFC after: 2 weeks
This commit is contained in:
parent
adc83897f5
commit
eb63aed246
@ -55,6 +55,7 @@ __FBSDID("$FreeBSD$");
|
||||
#define BLOCKIF_SIG 0xb109b109
|
||||
|
||||
#define BLOCKIF_MAXREQ 33
|
||||
#define BLOCKIF_NUMTHR 8
|
||||
|
||||
enum blockop {
|
||||
BOP_READ,
|
||||
@ -65,6 +66,7 @@ enum blockop {
|
||||
|
||||
enum blockstat {
|
||||
BST_FREE,
|
||||
BST_BLOCK,
|
||||
BST_PEND,
|
||||
BST_BUSY,
|
||||
BST_DONE
|
||||
@ -76,6 +78,7 @@ struct blockif_elem {
|
||||
enum blockop be_op;
|
||||
enum blockstat be_status;
|
||||
pthread_t be_tid;
|
||||
off_t be_block;
|
||||
};
|
||||
|
||||
struct blockif_ctxt {
|
||||
@ -88,16 +91,15 @@ struct blockif_ctxt {
|
||||
int bc_sectsz;
|
||||
int bc_psectsz;
|
||||
int bc_psectoff;
|
||||
pthread_t bc_btid;
|
||||
int bc_closing;
|
||||
pthread_t bc_btid[BLOCKIF_NUMTHR];
|
||||
pthread_mutex_t bc_mtx;
|
||||
pthread_cond_t bc_cond;
|
||||
int bc_closing;
|
||||
|
||||
/* Request elements and free/pending/busy queues */
|
||||
TAILQ_HEAD(, blockif_elem) bc_freeq;
|
||||
TAILQ_HEAD(, blockif_elem) bc_pendq;
|
||||
TAILQ_HEAD(, blockif_elem) bc_busyq;
|
||||
u_int bc_req_count;
|
||||
struct blockif_elem bc_reqs[BLOCKIF_MAXREQ];
|
||||
};
|
||||
|
||||
@ -116,58 +118,83 @@ static int
|
||||
blockif_enqueue(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
enum blockop op)
|
||||
{
|
||||
struct blockif_elem *be;
|
||||
|
||||
assert(bc->bc_req_count < BLOCKIF_MAXREQ);
|
||||
struct blockif_elem *be, *tbe;
|
||||
off_t off;
|
||||
int i;
|
||||
|
||||
be = TAILQ_FIRST(&bc->bc_freeq);
|
||||
assert(be != NULL);
|
||||
assert(be->be_status == BST_FREE);
|
||||
|
||||
TAILQ_REMOVE(&bc->bc_freeq, be, be_link);
|
||||
be->be_status = BST_PEND;
|
||||
be->be_req = breq;
|
||||
be->be_op = op;
|
||||
switch (op) {
|
||||
case BOP_READ:
|
||||
case BOP_WRITE:
|
||||
case BOP_DELETE:
|
||||
off = breq->br_offset;
|
||||
for (i = 0; i < breq->br_iovcnt; i++)
|
||||
off += breq->br_iov[i].iov_len;
|
||||
break;
|
||||
default:
|
||||
off = OFF_MAX;
|
||||
}
|
||||
be->be_block = off;
|
||||
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
|
||||
if (tbe->be_block == breq->br_offset)
|
||||
break;
|
||||
}
|
||||
if (tbe == NULL) {
|
||||
TAILQ_FOREACH(tbe, &bc->bc_busyq, be_link) {
|
||||
if (tbe->be_block == breq->br_offset)
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (tbe == NULL)
|
||||
be->be_status = BST_PEND;
|
||||
else
|
||||
be->be_status = BST_BLOCK;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_pendq, be, be_link);
|
||||
|
||||
bc->bc_req_count++;
|
||||
|
||||
return (0);
|
||||
return (be->be_status == BST_PEND);
|
||||
}
|
||||
|
||||
static int
|
||||
blockif_dequeue(struct blockif_ctxt *bc, struct blockif_elem **bep)
|
||||
blockif_dequeue(struct blockif_ctxt *bc, pthread_t t, struct blockif_elem **bep)
|
||||
{
|
||||
struct blockif_elem *be;
|
||||
|
||||
if (bc->bc_req_count == 0)
|
||||
return (ENOENT);
|
||||
|
||||
be = TAILQ_FIRST(&bc->bc_pendq);
|
||||
assert(be != NULL);
|
||||
assert(be->be_status == BST_PEND);
|
||||
TAILQ_FOREACH(be, &bc->bc_pendq, be_link) {
|
||||
if (be->be_status == BST_PEND)
|
||||
break;
|
||||
assert(be->be_status == BST_BLOCK);
|
||||
}
|
||||
if (be == NULL)
|
||||
return (0);
|
||||
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
|
||||
be->be_status = BST_BUSY;
|
||||
be->be_tid = bc->bc_btid;
|
||||
be->be_tid = t;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_busyq, be, be_link);
|
||||
|
||||
*bep = be;
|
||||
|
||||
return (0);
|
||||
return (1);
|
||||
}
|
||||
|
||||
static void
|
||||
blockif_complete(struct blockif_ctxt *bc, struct blockif_elem *be)
|
||||
{
|
||||
assert(be->be_status == BST_DONE);
|
||||
struct blockif_elem *tbe;
|
||||
|
||||
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
|
||||
if (be->be_status == BST_DONE || be->be_status == BST_BUSY)
|
||||
TAILQ_REMOVE(&bc->bc_busyq, be, be_link);
|
||||
else
|
||||
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
|
||||
TAILQ_FOREACH(tbe, &bc->bc_pendq, be_link) {
|
||||
if (tbe->be_req->br_offset == be->be_block)
|
||||
tbe->be_status = BST_PEND;
|
||||
}
|
||||
be->be_tid = 0;
|
||||
be->be_status = BST_FREE;
|
||||
be->be_req = NULL;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
|
||||
|
||||
bc->bc_req_count--;
|
||||
}
|
||||
|
||||
static void
|
||||
@ -226,28 +253,27 @@ blockif_thr(void *arg)
|
||||
{
|
||||
struct blockif_ctxt *bc;
|
||||
struct blockif_elem *be;
|
||||
pthread_t t;
|
||||
|
||||
bc = arg;
|
||||
t = pthread_self();
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
for (;;) {
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
while (!blockif_dequeue(bc, &be)) {
|
||||
while (blockif_dequeue(bc, t, &be)) {
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
blockif_proc(bc, be);
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
blockif_complete(bc, be);
|
||||
}
|
||||
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
/*
|
||||
* Check ctxt status here to see if exit requested
|
||||
*/
|
||||
/* Check ctxt status here to see if exit requested */
|
||||
if (bc->bc_closing)
|
||||
pthread_exit(NULL);
|
||||
break;
|
||||
pthread_cond_wait(&bc->bc_cond, &bc->bc_mtx);
|
||||
}
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
/* Not reached */
|
||||
pthread_exit(NULL);
|
||||
return (NULL);
|
||||
}
|
||||
|
||||
@ -386,16 +412,16 @@ blockif_open(const char *optstr, const char *ident)
|
||||
TAILQ_INIT(&bc->bc_freeq);
|
||||
TAILQ_INIT(&bc->bc_pendq);
|
||||
TAILQ_INIT(&bc->bc_busyq);
|
||||
bc->bc_req_count = 0;
|
||||
for (i = 0; i < BLOCKIF_MAXREQ; i++) {
|
||||
bc->bc_reqs[i].be_status = BST_FREE;
|
||||
TAILQ_INSERT_HEAD(&bc->bc_freeq, &bc->bc_reqs[i], be_link);
|
||||
}
|
||||
|
||||
pthread_create(&bc->bc_btid, NULL, blockif_thr, bc);
|
||||
|
||||
snprintf(tname, sizeof(tname), "blk-%s", ident);
|
||||
pthread_set_name_np(bc->bc_btid, tname);
|
||||
for (i = 0; i < BLOCKIF_NUMTHR; i++) {
|
||||
pthread_create(&bc->bc_btid[i], NULL, blockif_thr, bc);
|
||||
snprintf(tname, sizeof(tname), "blk-%s-%d", ident, i);
|
||||
pthread_set_name_np(bc->bc_btid[i], tname);
|
||||
}
|
||||
|
||||
return (bc);
|
||||
}
|
||||
@ -409,13 +435,13 @@ blockif_request(struct blockif_ctxt *bc, struct blockif_req *breq,
|
||||
err = 0;
|
||||
|
||||
pthread_mutex_lock(&bc->bc_mtx);
|
||||
if (bc->bc_req_count < BLOCKIF_MAXREQ) {
|
||||
if (!TAILQ_EMPTY(&bc->bc_freeq)) {
|
||||
/*
|
||||
* Enqueue and inform the block i/o thread
|
||||
* that there is work available
|
||||
*/
|
||||
blockif_enqueue(bc, breq, op);
|
||||
pthread_cond_signal(&bc->bc_cond);
|
||||
if (blockif_enqueue(bc, breq, op))
|
||||
pthread_cond_signal(&bc->bc_cond);
|
||||
} else {
|
||||
/*
|
||||
* Callers are not allowed to enqueue more than
|
||||
@ -481,11 +507,7 @@ blockif_cancel(struct blockif_ctxt *bc, struct blockif_req *breq)
|
||||
/*
|
||||
* Found it.
|
||||
*/
|
||||
TAILQ_REMOVE(&bc->bc_pendq, be, be_link);
|
||||
be->be_status = BST_FREE;
|
||||
be->be_req = NULL;
|
||||
TAILQ_INSERT_TAIL(&bc->bc_freeq, be, be_link);
|
||||
bc->bc_req_count--;
|
||||
blockif_complete(bc, be);
|
||||
pthread_mutex_unlock(&bc->bc_mtx);
|
||||
|
||||
return (0);
|
||||
@ -546,7 +568,7 @@ int
|
||||
blockif_close(struct blockif_ctxt *bc)
|
||||
{
|
||||
void *jval;
|
||||
int err;
|
||||
int err, i;
|
||||
|
||||
err = 0;
|
||||
|
||||
@ -556,8 +578,9 @@ blockif_close(struct blockif_ctxt *bc)
|
||||
* Stop the block i/o thread
|
||||
*/
|
||||
bc->bc_closing = 1;
|
||||
pthread_cond_signal(&bc->bc_cond);
|
||||
pthread_join(bc->bc_btid, &jval);
|
||||
pthread_cond_broadcast(&bc->bc_cond);
|
||||
for (i = 0; i < BLOCKIF_NUMTHR; i++)
|
||||
pthread_join(bc->bc_btid[i], &jval);
|
||||
|
||||
/* XXX Cancel queued i/o's ??? */
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user