bdev/aio: assign new io-ctx to each io-ch

It aims to avoid bdev starvation by setting separate
io_context to each bdev_aio io channel and link them
into group channel.

Previous, each ch of aio_bdev on a same spdk_thread shares
same io_context. If one of them submits too many IO to the
io_context, then other channels will be starved.
Moreover, if the first bdev_io gets NOMEM error, then the
following IO of this bdev have no chance to get processed.
The bdev_aio starvation can be reproduced by bdevperf in
v20.04, by:
./bdevperf -q 100 -w randwrite -t 5 -c hdd.conf -o 1048576
(hdd.conf defines 2 aio_bdevs of HDD)

Change-Id: Ic709323f5baeb2f8f7250b75ad872ec4156b5e78
Signed-off-by: Xiaodong Liu <xiaodong.liu@intel.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3808
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Paul Luse <paul.e.luse@intel.com>
Community-CI: Broadcom CI
This commit is contained in:
Xiaodong Liu 2020-08-14 23:57:10 -04:00 committed by Tomasz Zawadzki
parent 4e06de69b8
commit 195fb4e40e

View File

@ -53,12 +53,14 @@
struct bdev_aio_io_channel {
uint64_t io_inflight;
io_context_t io_ctx;
struct bdev_aio_group_channel *group_ch;
TAILQ_ENTRY(bdev_aio_io_channel) link;
};
struct bdev_aio_group_channel {
struct spdk_poller *poller;
io_context_t io_ctx;
TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head;
};
struct bdev_aio_task {
@ -178,7 +180,7 @@ bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch,
SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n",
iovcnt, nbytes, offset);
rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
rc = io_submit(aio_ch->io_ctx, 1, &iocb);
if (rc < 0) {
if (rc == -EAGAIN) {
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
@ -209,7 +211,7 @@ bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch,
SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n",
iovcnt, len, offset);
rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb);
rc = io_submit(aio_ch->io_ctx, 1, &iocb);
if (rc < 0) {
if (rc == -EAGAIN) {
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM);
@ -312,18 +314,17 @@ bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *u
}
static int
bdev_aio_group_poll(void *arg)
bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch)
{
struct bdev_aio_group_channel *group_ch = arg;
int nr, i = 0;
enum spdk_bdev_io_status status;
struct bdev_aio_task *aio_task;
struct io_event events[SPDK_AIO_QUEUE_DEPTH];
nr = bdev_user_io_getevents(group_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events);
if (nr < 0) {
return SPDK_POLLER_IDLE;
return 0;
}
for (i = 0; i < nr; i++) {
@ -338,6 +339,20 @@ bdev_aio_group_poll(void *arg)
aio_task->ch->io_inflight--;
}
return nr;
}
static int
bdev_aio_group_poll(void *arg)
{
struct bdev_aio_group_channel *group_ch = arg;
struct bdev_aio_io_channel *io_ch;
int nr = 0;
TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) {
nr += bdev_aio_io_channel_poll(io_ch);
}
return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE;
}
@ -481,7 +496,13 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf)
{
struct bdev_aio_io_channel *ch = ctx_buf;
if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
SPDK_ERRLOG("async I/O context setup failure\n");
return -1;
}
ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if));
TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link);
return 0;
}
@ -491,6 +512,11 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf)
{
struct bdev_aio_io_channel *ch = ctx_buf;
io_destroy(ch->io_ctx);
assert(ch->group_ch);
TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link);
spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch));
}
@ -561,10 +587,7 @@ bdev_aio_group_create_cb(void *io_device, void *ctx_buf)
{
struct bdev_aio_group_channel *ch = ctx_buf;
if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) {
SPDK_ERRLOG("async I/O context setup failure\n");
return -1;
}
TAILQ_INIT(&ch->io_ch_head);
ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0);
return 0;
@ -575,7 +598,9 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf)
{
struct bdev_aio_group_channel *ch = ctx_buf;
io_destroy(ch->io_ctx);
if (!TAILQ_EMPTY(&ch->io_ch_head)) {
SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n");
}
spdk_poller_unregister(&ch->poller);
}