bdev/nvme: Reset the nvme_ctrlr if an I/O qpair is disconnected
Previously, if an I/O qpair is disconnected, we tried reconnecting the qpair. However, this reconnect operation was very likely to fail and will not match the upcoming asynchronous connect/reconnect operation. We need an extra callback to make this reconnect operation asynchronous, but we do not want to have it. Hence if an I/O qpair is disconnected, we free the I/O qpair and then reset the corresponding nvme_ctrlr immediately. If the admin qpair is also disconnected, the nvme_ctrlr is reset immediately. However this event may never happen. So we do not wait for the error of the admin qpair. The NVMf host may disconnect connections by itself intentionally. In this case, resetting the nvme_ctrlr will surely fail. But resetting the nvme_ctrlr frees all I/O qpairs of the nvme_ctrlr and these I/O qpairs are not created again until resetting the nvme_ctrlr succeeds. Resetting the nvme_ctrlr once at most is more efficient than repeating reconnecting the I/O qpair. So this change is valuable even for such intentional disconnection. However, it is helpful to know the event that I/O qpair is disconnected. Hence change DEBUGLOG to NOTICELOG in the disconnected callback. The disconnected callback is not repeated, and we do not need to worry about NOTICELOG flooding. Refine the unit test case to verify this change. Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Change-Id: I376b749c2f55d010692bf916370e8bb4249b795f Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9515 Community-CI: Mellanox Build Bot Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com>
This commit is contained in:
parent
c9ab7ae4b3
commit
14535253f1
@ -193,6 +193,7 @@ static int bdev_nvme_io_passthru_md(struct spdk_nvme_ns *ns, struct spdk_nvme_qp
|
||||
static int bdev_nvme_abort(struct nvme_bdev_channel *nbdev_ch,
|
||||
struct nvme_bdev_io *bio, struct nvme_bdev_io *bio_to_abort);
|
||||
static int bdev_nvme_reset_io(struct nvme_bdev_channel *nbdev_ch, struct nvme_bdev_io *bio);
|
||||
static int bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr);
|
||||
static int bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove);
|
||||
static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr);
|
||||
|
||||
@ -503,19 +504,47 @@ bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
|
||||
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), io_status);
|
||||
}
|
||||
|
||||
static struct nvme_ctrlr_channel *
|
||||
nvme_poll_group_get_ctrlr_channel(struct nvme_poll_group *group,
|
||||
struct spdk_nvme_qpair *qpair)
|
||||
{
|
||||
struct nvme_ctrlr_channel *ctrlr_ch;
|
||||
|
||||
TAILQ_FOREACH(ctrlr_ch, &group->ctrlr_ch_list, tailq) {
|
||||
if (ctrlr_ch->qpair == qpair) {
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return ctrlr_ch;
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_destroy_qpair(struct nvme_ctrlr_channel *ctrlr_ch)
|
||||
{
|
||||
if (ctrlr_ch->qpair != NULL) {
|
||||
spdk_nvme_ctrlr_free_io_qpair(ctrlr_ch->qpair);
|
||||
ctrlr_ch->qpair = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_disconnected_qpair_cb(struct spdk_nvme_qpair *qpair, void *poll_group_ctx)
|
||||
{
|
||||
int rc;
|
||||
struct nvme_poll_group *group = poll_group_ctx;
|
||||
struct nvme_ctrlr_channel *ctrlr_ch;
|
||||
struct nvme_ctrlr *nvme_ctrlr;
|
||||
|
||||
SPDK_DEBUGLOG(bdev_nvme, "qpair %p is disconnected, attempting reconnect.\n", qpair);
|
||||
SPDK_NOTICELOG("qpair %p is disconnected, free the qpair and reset controller.\n", qpair);
|
||||
/*
|
||||
* Currently, just try to reconnect indefinitely. If we are doing a reset, the reset will
|
||||
* reconnect a qpair and we will stop getting a callback for this one.
|
||||
* Free the I/O qpair and reset the nvme_ctrlr.
|
||||
*/
|
||||
rc = spdk_nvme_ctrlr_reconnect_io_qpair(qpair);
|
||||
if (rc != 0) {
|
||||
SPDK_DEBUGLOG(bdev_nvme, "Failed to reconnect to qpair %p, errno %d\n", qpair, -rc);
|
||||
ctrlr_ch = nvme_poll_group_get_ctrlr_channel(group, qpair);
|
||||
if (ctrlr_ch != NULL) {
|
||||
bdev_nvme_destroy_qpair(ctrlr_ch);
|
||||
|
||||
nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(ctrlr_ch);
|
||||
bdev_nvme_reset(nvme_ctrlr);
|
||||
}
|
||||
}
|
||||
|
||||
@ -652,15 +681,6 @@ err:
|
||||
return rc;
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_destroy_qpair(struct nvme_ctrlr_channel *ctrlr_ch)
|
||||
{
|
||||
if (ctrlr_ch->qpair != NULL) {
|
||||
spdk_nvme_ctrlr_free_io_qpair(ctrlr_ch->qpair);
|
||||
ctrlr_ch->qpair = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
_bdev_nvme_check_pending_destruct(struct nvme_ctrlr *nvme_ctrlr)
|
||||
{
|
||||
|
@ -1767,63 +1767,6 @@ test_attach_ctrlr(void)
|
||||
g_ut_register_bdev_status = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
test_reconnect_qpair(void)
|
||||
{
|
||||
struct spdk_nvme_transport_id trid = {};
|
||||
struct spdk_nvme_ctrlr ctrlr = {};
|
||||
struct nvme_ctrlr *nvme_ctrlr = NULL;
|
||||
struct spdk_io_channel *ch;
|
||||
struct nvme_ctrlr_channel *ctrlr_ch;
|
||||
int rc;
|
||||
|
||||
set_thread(0);
|
||||
|
||||
ut_init_trid(&trid);
|
||||
TAILQ_INIT(&ctrlr.active_io_qpairs);
|
||||
|
||||
rc = nvme_ctrlr_create(&ctrlr, "nvme0", &trid, 0, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
|
||||
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
|
||||
|
||||
ch = spdk_get_io_channel(nvme_ctrlr);
|
||||
SPDK_CU_ASSERT_FATAL(ch != NULL);
|
||||
|
||||
ctrlr_ch = spdk_io_channel_get_ctx(ch);
|
||||
CU_ASSERT(ctrlr_ch->qpair != NULL);
|
||||
CU_ASSERT(ctrlr_ch->group != NULL);
|
||||
CU_ASSERT(ctrlr_ch->group->group != NULL);
|
||||
CU_ASSERT(ctrlr_ch->group->poller != NULL);
|
||||
|
||||
/* Test if the disconnected qpair is reconnected. */
|
||||
ctrlr_ch->qpair->is_connected = false;
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->is_connected == true);
|
||||
|
||||
/* If the ctrlr is failed, reconnecting qpair should fail too. */
|
||||
ctrlr_ch->qpair->is_connected = false;
|
||||
ctrlr.is_failed = true;
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->is_connected == false);
|
||||
|
||||
spdk_put_io_channel(ch);
|
||||
|
||||
poll_threads();
|
||||
|
||||
rc = bdev_nvme_delete("nvme0", NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
|
||||
}
|
||||
|
||||
static void
|
||||
test_aer_cb(void)
|
||||
{
|
||||
@ -2710,6 +2653,133 @@ test_get_memory_domains(void)
|
||||
MOCK_CLEAR(spdk_nvme_ctrlr_get_memory_domain);
|
||||
}
|
||||
|
||||
static void
|
||||
test_reconnect_qpair(void)
|
||||
{
|
||||
struct spdk_nvme_transport_id trid = {};
|
||||
struct spdk_nvme_ctrlr *ctrlr;
|
||||
struct nvme_ctrlr *nvme_ctrlr;
|
||||
const int STRING_SIZE = 32;
|
||||
const char *attached_names[STRING_SIZE];
|
||||
struct nvme_bdev *bdev;
|
||||
struct spdk_io_channel *ch1, *ch2;
|
||||
struct nvme_bdev_channel *nbdev_ch1, *nbdev_ch2;
|
||||
struct nvme_ctrlr_channel *ctrlr_ch1, *ctrlr_ch2;
|
||||
int rc;
|
||||
|
||||
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
|
||||
ut_init_trid(&trid);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
ctrlr = ut_attach_ctrlr(&trid, 1, false);
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
|
||||
|
||||
g_ut_attach_ctrlr_status = 0;
|
||||
g_ut_attach_bdev_count = 1;
|
||||
|
||||
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
|
||||
attach_ctrlr_done, NULL, NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
spdk_delay_us(1000);
|
||||
poll_threads();
|
||||
|
||||
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
|
||||
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
|
||||
|
||||
bdev = nvme_ctrlr_get_ns(nvme_ctrlr, 1)->bdev;
|
||||
SPDK_CU_ASSERT_FATAL(bdev != NULL);
|
||||
|
||||
ch1 = spdk_get_io_channel(bdev);
|
||||
SPDK_CU_ASSERT_FATAL(ch1 != NULL);
|
||||
|
||||
nbdev_ch1 = spdk_io_channel_get_ctx(ch1);
|
||||
ctrlr_ch1 = nbdev_ch1->ctrlr_ch;
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr_ch1 != NULL);
|
||||
|
||||
set_thread(1);
|
||||
|
||||
ch2 = spdk_get_io_channel(bdev);
|
||||
SPDK_CU_ASSERT_FATAL(ch2 != NULL);
|
||||
|
||||
nbdev_ch2 = spdk_io_channel_get_ctx(ch2);
|
||||
ctrlr_ch2 = nbdev_ch2->ctrlr_ch;
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr_ch2 != NULL);
|
||||
|
||||
/* If a qpair is disconnected, it is freed and then reconnected via
|
||||
* resetting the corresponding nvme_ctrlr.
|
||||
*/
|
||||
ctrlr_ch2->qpair->is_connected = false;
|
||||
ctrlr->is_failed = true;
|
||||
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr_ch1->qpair != NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair == NULL);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == true);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr_ch1->qpair == NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair == NULL);
|
||||
CU_ASSERT(ctrlr->is_failed == true);
|
||||
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr->is_failed == false);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr_ch1->qpair != NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair != NULL);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == true);
|
||||
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == false);
|
||||
|
||||
poll_threads();
|
||||
|
||||
/* If a qpair is disconnected and resetting the corresponding nvme_ctrlr
|
||||
* fails, the qpair is just freed.
|
||||
*/
|
||||
ctrlr_ch2->qpair->is_connected = false;
|
||||
ctrlr->is_failed = true;
|
||||
ctrlr->fail_reset = true;
|
||||
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr_ch1->qpair != NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair == NULL);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == true);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr_ch1->qpair == NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair == NULL);
|
||||
CU_ASSERT(ctrlr->is_failed == true);
|
||||
|
||||
poll_thread_times(1, 1);
|
||||
CU_ASSERT(ctrlr->is_failed == true);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == false);
|
||||
CU_ASSERT(ctrlr_ch1->qpair == NULL);
|
||||
CU_ASSERT(ctrlr_ch2->qpair == NULL);
|
||||
|
||||
poll_threads();
|
||||
|
||||
spdk_put_io_channel(ch2);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
spdk_put_io_channel(ch1);
|
||||
|
||||
poll_threads();
|
||||
|
||||
rc = bdev_nvme_delete("nvme0", NULL);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
@ -2727,7 +2797,6 @@ main(int argc, const char **argv)
|
||||
CU_ADD_TEST(suite, test_failover_ctrlr);
|
||||
CU_ADD_TEST(suite, test_pending_reset);
|
||||
CU_ADD_TEST(suite, test_attach_ctrlr);
|
||||
CU_ADD_TEST(suite, test_reconnect_qpair);
|
||||
CU_ADD_TEST(suite, test_aer_cb);
|
||||
CU_ADD_TEST(suite, test_submit_nvme_cmd);
|
||||
CU_ADD_TEST(suite, test_add_remove_trid);
|
||||
@ -2737,6 +2806,7 @@ main(int argc, const char **argv)
|
||||
CU_ADD_TEST(suite, test_compare_ns);
|
||||
CU_ADD_TEST(suite, test_init_ana_log_page);
|
||||
CU_ADD_TEST(suite, test_get_memory_domains);
|
||||
CU_ADD_TEST(suite, test_reconnect_qpair);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user