bdev/nvme: Retry admin passthru a second later if any ctrlr may become available

When resetting ctrlr, adminq is disconnected first. If adminq is disconnected,
admin passthrough request is rejected with -ENXIO.

But resetting ctrlr may succeed. If resetting ctrlr succeeds, adminq is
connected again, and admin passthrough request will be
submitted successfully.

On the other hand, if ctrlr is failed, admin passthrough request is
rejected with -ENXIO. But when resetting ctrlr, ctrlr is set to unfailed.

Hence bdev_nvme_admin_passthru() skips any ctrlr which is resetting
or failed, and calls bdev_nvme_admin_passthru_complete() with -ENXIO
if no available ctrlr is found.

bdev_nvme_admin_passthru_complete() queues admin passthrough request
and retry it one second later if ctrlr is resetting.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Ic748dc4faf29ebf717ae5c29dcf7c55fe2ea9243
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9942
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Shuhei Matsumoto 2021-11-19 02:29:28 +09:00 committed by Tomasz Zawadzki
parent 1fa4d590d8
commit 35a2f4e22e
2 changed files with 146 additions and 1 deletions

View File

@ -785,6 +785,20 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
return false;
}
static bool
any_ctrlr_may_become_available(struct nvme_bdev_channel *nbdev_ch)
{
struct nvme_io_path *io_path;
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
if (!nvme_io_path_is_failed(io_path)) {
return true;
}
}
return false;
}
static int
bdev_nvme_retry_ios(void *arg)
{
@ -949,6 +963,7 @@ static inline void
bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
{
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
struct nvme_bdev_channel *nbdev_ch;
enum spdk_bdev_io_status io_status;
switch (rc) {
@ -958,6 +973,15 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
case -ENOMEM:
io_status = SPDK_BDEV_IO_STATUS_NOMEM;
break;
case -ENXIO:
nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
if (any_ctrlr_may_become_available(nbdev_ch)) {
bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
return;
}
/* fallthrough */
default:
io_status = SPDK_BDEV_IO_STATUS_FAILED;
break;
@ -4632,7 +4656,13 @@ bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(io_path->ctrlr_ch);
if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
/* When resetting a ctrlr, its adminq is disconnected first.
* spdk_nvme_ctrlr_cmd_admin_raw() returns -ENXIO if the ctrlr is
* failed or its adminq is disconnected. We should skip any ctrlr
* which is failed or resetting rather than checking if the return
* value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
*/
if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr) || nvme_ctrlr->resetting) {
continue;
}

View File

@ -4665,6 +4665,120 @@ test_retry_io_for_ana_error(void)
g_opts.bdev_retry_count = 0;
}
static void
test_retry_admin_passthru_if_ctrlr_is_resetting(void)
{
struct nvme_path_id path = {};
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_bdev_ctrlr *nbdev_ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
const int STRING_SIZE = 32;
const char *attached_names[STRING_SIZE];
struct nvme_bdev *bdev;
struct spdk_bdev_io *admin_io;
struct spdk_io_channel *ch;
struct nvme_bdev_channel *nbdev_ch;
int rc;
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&path.trid);
set_thread(0);
ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
CU_ASSERT(nvme_ctrlr != NULL);
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
CU_ASSERT(bdev != NULL);
admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
ch = spdk_get_io_channel(bdev);
SPDK_CU_ASSERT_FATAL(ch != NULL);
nbdev_ch = spdk_io_channel_get_ctx(ch);
admin_io->internal.ch = (struct spdk_bdev_channel *)ch;
/* If ctrlr is available, admin passthrough should succeed. */
admin_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, admin_io);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
CU_ASSERT(admin_io->internal.in_submit_request == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(admin_io->internal.in_submit_request == false);
CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
/* If ctrlr is resetting, admin passthrough request should be queued
* if it is submitted while resetting ctrlr.
*/
bdev_nvme_reset(nvme_ctrlr);
poll_thread_times(0, 1);
admin_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, admin_io);
CU_ASSERT(admin_io->internal.in_submit_request == true);
CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
spdk_delay_us(1000000);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
CU_ASSERT(admin_io->internal.in_submit_request == true);
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
CU_ASSERT(admin_io->internal.in_submit_request == false);
CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
free(admin_io);
spdk_put_io_channel(ch);
poll_threads();
rc = bdev_nvme_delete("nvme0", &g_any_path);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
}
int
main(int argc, const char **argv)
{
@ -4703,6 +4817,7 @@ main(int argc, const char **argv)
CU_ADD_TEST(suite, test_retry_io_count);
CU_ADD_TEST(suite, test_concurrent_read_ana_log_page);
CU_ADD_TEST(suite, test_retry_io_for_ana_error);
CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting);
CU_basic_set_mode(CU_BRM_VERBOSE);