bdev/nvme: Retry admin passthru a second later if any ctrlr may become available
When resetting ctrlr, adminq is disconnected first. If adminq is disconnected, admin passthrough request is rejected with -ENXIO. But resetting ctrlr may succeed. If resetting ctrlr succeeds, adminq is connected again, and admin passthrough request will be submitted successfully. On the other hand, if ctrlr is failed, admin passthrough request is rejected with -ENXIO. But when resetting ctrlr, ctrlr is set to unfailed. Hence bdev_nvme_admin_passthru() skips any ctrlr which is resetting or failed, and calls bdev_nvme_admin_passthru_complete() with -ENXIO if no available ctrlr is found. bdev_nvme_admin_passthru_complete() queues admin passthrough request and retry it one second later if ctrlr is resetting. Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Change-Id: Ic748dc4faf29ebf717ae5c29dcf7c55fe2ea9243 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9942 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
parent
1fa4d590d8
commit
35a2f4e22e
@ -785,6 +785,20 @@ any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
|
||||
return false;
|
||||
}
|
||||
|
||||
static bool
|
||||
any_ctrlr_may_become_available(struct nvme_bdev_channel *nbdev_ch)
|
||||
{
|
||||
struct nvme_io_path *io_path;
|
||||
|
||||
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
|
||||
if (!nvme_io_path_is_failed(io_path)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_nvme_retry_ios(void *arg)
|
||||
{
|
||||
@ -949,6 +963,7 @@ static inline void
|
||||
bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
|
||||
struct nvme_bdev_channel *nbdev_ch;
|
||||
enum spdk_bdev_io_status io_status;
|
||||
|
||||
switch (rc) {
|
||||
@ -958,6 +973,15 @@ bdev_nvme_admin_passthru_complete(struct nvme_bdev_io *bio, int rc)
|
||||
case -ENOMEM:
|
||||
io_status = SPDK_BDEV_IO_STATUS_NOMEM;
|
||||
break;
|
||||
case -ENXIO:
|
||||
nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
|
||||
|
||||
if (any_ctrlr_may_become_available(nbdev_ch)) {
|
||||
bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
|
||||
return;
|
||||
}
|
||||
|
||||
/* fallthrough */
|
||||
default:
|
||||
io_status = SPDK_BDEV_IO_STATUS_FAILED;
|
||||
break;
|
||||
@ -4632,7 +4656,13 @@ bdev_nvme_admin_passthru(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io
|
||||
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
|
||||
nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(io_path->ctrlr_ch);
|
||||
|
||||
if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
|
||||
/* When resetting a ctrlr, its adminq is disconnected first.
|
||||
* spdk_nvme_ctrlr_cmd_admin_raw() returns -ENXIO if the ctrlr is
|
||||
* failed or its adminq is disconnected. We should skip any ctrlr
|
||||
* which is failed or resetting rather than checking if the return
|
||||
* value of spdk_nvme_ctrlr_cmd_admin_raw() is -ENXIO.
|
||||
*/
|
||||
if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr) || nvme_ctrlr->resetting) {
|
||||
continue;
|
||||
}
|
||||
|
||||
|
@ -4665,6 +4665,120 @@ test_retry_io_for_ana_error(void)
|
||||
g_opts.bdev_retry_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
test_retry_admin_passthru_if_ctrlr_is_resetting(void)
|
||||
{
|
||||
struct nvme_path_id path = {};
|
||||
struct spdk_nvme_ctrlr *ctrlr;
|
||||
struct nvme_bdev_ctrlr *nbdev_ctrlr;
|
||||
struct nvme_ctrlr *nvme_ctrlr;
|
||||
const int STRING_SIZE = 32;
|
||||
const char *attached_names[STRING_SIZE];
|
||||
struct nvme_bdev *bdev;
|
||||
struct spdk_bdev_io *admin_io;
|
||||
struct spdk_io_channel *ch;
|
||||
struct nvme_bdev_channel *nbdev_ch;
|
||||
int rc;
|
||||
|
||||
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
|
||||
ut_init_trid(&path.trid);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
|
||||
|
||||
g_ut_attach_ctrlr_status = 0;
|
||||
g_ut_attach_bdev_count = 1;
|
||||
|
||||
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
|
||||
attach_ctrlr_done, NULL, NULL, false);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
spdk_delay_us(1000);
|
||||
poll_threads();
|
||||
|
||||
nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
|
||||
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
|
||||
|
||||
nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
|
||||
CU_ASSERT(nvme_ctrlr != NULL);
|
||||
|
||||
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
|
||||
CU_ASSERT(bdev != NULL);
|
||||
|
||||
admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
|
||||
admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
|
||||
|
||||
ch = spdk_get_io_channel(bdev);
|
||||
SPDK_CU_ASSERT_FATAL(ch != NULL);
|
||||
|
||||
nbdev_ch = spdk_io_channel_get_ctx(ch);
|
||||
|
||||
admin_io->internal.ch = (struct spdk_bdev_channel *)ch;
|
||||
|
||||
/* If ctrlr is available, admin passthrough should succeed. */
|
||||
admin_io->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, admin_io);
|
||||
|
||||
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
|
||||
CU_ASSERT(admin_io->internal.in_submit_request == true);
|
||||
|
||||
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(admin_io->internal.in_submit_request == false);
|
||||
CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
/* If ctrlr is resetting, admin passthrough request should be queued
|
||||
* if it is submitted while resetting ctrlr.
|
||||
*/
|
||||
bdev_nvme_reset(nvme_ctrlr);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
admin_io->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, admin_io);
|
||||
|
||||
CU_ASSERT(admin_io->internal.in_submit_request == true);
|
||||
CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_ctrlr->resetting == false);
|
||||
|
||||
spdk_delay_us(1000000);
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
|
||||
CU_ASSERT(admin_io->internal.in_submit_request == true);
|
||||
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
|
||||
|
||||
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
|
||||
CU_ASSERT(admin_io->internal.in_submit_request == false);
|
||||
CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
free(admin_io);
|
||||
|
||||
spdk_put_io_channel(ch);
|
||||
|
||||
poll_threads();
|
||||
|
||||
rc = bdev_nvme_delete("nvme0", &g_any_path);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
poll_threads();
|
||||
spdk_delay_us(1000);
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
@ -4703,6 +4817,7 @@ main(int argc, const char **argv)
|
||||
CU_ADD_TEST(suite, test_retry_io_count);
|
||||
CU_ADD_TEST(suite, test_concurrent_read_ana_log_page);
|
||||
CU_ADD_TEST(suite, test_retry_io_for_ana_error);
|
||||
CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user