From 195fb4e40e75f65b30e953d8f63175b5b757c1fe Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Fri, 14 Aug 2020 23:57:10 -0400 Subject: [PATCH] bdev/aio: assign new io-ctx to each io-ch It aims to avoid bdev starvation by setting separate io_context to each bdev_aio io channel and link them into group channel. Previous, each ch of aio_bdev on a same spdk_thread shares same io_context. If one of them submits too many IO to the io_context, then other channels will be starved. Moreover, if the first bdev_io gets NOMEM error, then the following IO of this bdev have no chance to get processed. The bdev_aio starvation can be reproduced by bdevperf in v20.04, by: ./bdevperf -q 100 -w randwrite -t 5 -c hdd.conf -o 1048576 (hdd.conf defines 2 aio_bdevs of HDD) Change-Id: Ic709323f5baeb2f8f7250b75ad872ec4156b5e78 Signed-off-by: Xiaodong Liu Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3808 Tested-by: SPDK CI Jenkins Reviewed-by: Changpeng Liu Reviewed-by: Ziye Yang Reviewed-by: Ben Walker Reviewed-by: Jim Harris Reviewed-by: Paul Luse Community-CI: Broadcom CI --- module/bdev/aio/bdev_aio.c | 49 ++++++++++++++++++++++++++++---------- 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/module/bdev/aio/bdev_aio.c b/module/bdev/aio/bdev_aio.c index 4b49fb2c33..46cf5f69b3 100644 --- a/module/bdev/aio/bdev_aio.c +++ b/module/bdev/aio/bdev_aio.c @@ -53,12 +53,14 @@ struct bdev_aio_io_channel { uint64_t io_inflight; + io_context_t io_ctx; struct bdev_aio_group_channel *group_ch; + TAILQ_ENTRY(bdev_aio_io_channel) link; }; struct bdev_aio_group_channel { struct spdk_poller *poller; - io_context_t io_ctx; + TAILQ_HEAD(, bdev_aio_io_channel) io_ch_head; }; struct bdev_aio_task { @@ -178,7 +180,7 @@ bdev_aio_readv(struct file_disk *fdisk, struct spdk_io_channel *ch, SPDK_DEBUGLOG(SPDK_LOG_AIO, "read %d iovs size %lu to off: %#lx\n", iovcnt, nbytes, offset); - rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb); + rc = io_submit(aio_ch->io_ctx, 1, &iocb); if (rc < 0) { if (rc == -EAGAIN) { spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); @@ -209,7 +211,7 @@ bdev_aio_writev(struct file_disk *fdisk, struct spdk_io_channel *ch, SPDK_DEBUGLOG(SPDK_LOG_AIO, "write %d iovs size %lu from off: %#lx\n", iovcnt, len, offset); - rc = io_submit(aio_ch->group_ch->io_ctx, 1, &iocb); + rc = io_submit(aio_ch->io_ctx, 1, &iocb); if (rc < 0) { if (rc == -EAGAIN) { spdk_bdev_io_complete(spdk_bdev_io_from_ctx(aio_task), SPDK_BDEV_IO_STATUS_NOMEM); @@ -312,18 +314,17 @@ bdev_user_io_getevents(io_context_t io_ctx, unsigned int max, struct io_event *u } static int -bdev_aio_group_poll(void *arg) +bdev_aio_io_channel_poll(struct bdev_aio_io_channel *io_ch) { - struct bdev_aio_group_channel *group_ch = arg; int nr, i = 0; enum spdk_bdev_io_status status; struct bdev_aio_task *aio_task; struct io_event events[SPDK_AIO_QUEUE_DEPTH]; - nr = bdev_user_io_getevents(group_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); + nr = bdev_user_io_getevents(io_ch->io_ctx, SPDK_AIO_QUEUE_DEPTH, events); if (nr < 0) { - return SPDK_POLLER_IDLE; + return 0; } for (i = 0; i < nr; i++) { @@ -338,6 +339,20 @@ bdev_aio_group_poll(void *arg) aio_task->ch->io_inflight--; } + return nr; +} + +static int +bdev_aio_group_poll(void *arg) +{ + struct bdev_aio_group_channel *group_ch = arg; + struct bdev_aio_io_channel *io_ch; + int nr = 0; + + TAILQ_FOREACH(io_ch, &group_ch->io_ch_head, link) { + nr += bdev_aio_io_channel_poll(io_ch); + } + return nr > 0 ? SPDK_POLLER_BUSY : SPDK_POLLER_IDLE; } @@ -481,7 +496,13 @@ bdev_aio_create_cb(void *io_device, void *ctx_buf) { struct bdev_aio_io_channel *ch = ctx_buf; + if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { + SPDK_ERRLOG("async I/O context setup failure\n"); + return -1; + } + ch->group_ch = spdk_io_channel_get_ctx(spdk_get_io_channel(&aio_if)); + TAILQ_INSERT_TAIL(&ch->group_ch->io_ch_head, ch, link); return 0; } @@ -491,6 +512,11 @@ bdev_aio_destroy_cb(void *io_device, void *ctx_buf) { struct bdev_aio_io_channel *ch = ctx_buf; + io_destroy(ch->io_ctx); + + assert(ch->group_ch); + TAILQ_REMOVE(&ch->group_ch->io_ch_head, ch, link); + spdk_put_io_channel(spdk_io_channel_from_ctx(ch->group_ch)); } @@ -561,10 +587,7 @@ bdev_aio_group_create_cb(void *io_device, void *ctx_buf) { struct bdev_aio_group_channel *ch = ctx_buf; - if (io_setup(SPDK_AIO_QUEUE_DEPTH, &ch->io_ctx) < 0) { - SPDK_ERRLOG("async I/O context setup failure\n"); - return -1; - } + TAILQ_INIT(&ch->io_ch_head); ch->poller = SPDK_POLLER_REGISTER(bdev_aio_group_poll, ch, 0); return 0; @@ -575,7 +598,9 @@ bdev_aio_group_destroy_cb(void *io_device, void *ctx_buf) { struct bdev_aio_group_channel *ch = ctx_buf; - io_destroy(ch->io_ctx); + if (!TAILQ_EMPTY(&ch->io_ch_head)) { + SPDK_ERRLOG("Group channel of bdev aio has uncleared io channel\n"); + } spdk_poller_unregister(&ch->poller); }