bdev/nvme: Retry I/O a second later if any I/O path may become available
If ANA state is inaccessible or qpair is disconnected, I/O cannot be submitted. But if qpair is connected, ANA state may become accessible, or if qpair is disconnected, it may become connected via resetting. Hence even if find_io_path() returned NULL, queue I/O and retry it one second later if qpair is connected or ctrlr is resetting. Sort retried I/Os by expiration values in ticks, and activate a timed poller per nvme_bdev_channel only if there is any retried I/O. So the poller function bdev_nvme_retry_ios() always returns BUSY because if the poller runs earlier than the closest retried I/O or runs when there is no retried I/O, it is more like a bug of the framework. Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com> Change-Id: Id28110a0d63ebc1c5772814e2ff8a47934df1644 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9830 Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
parent
3f4474d5cb
commit
ef409194a1
@ -104,6 +104,9 @@ struct nvme_bdev_io {
|
||||
|
||||
/** Keep track of how many zones that have been copied to the spdk_bdev_zone_info struct */
|
||||
uint64_t handled_zones;
|
||||
|
||||
/** Expiration value in ticks to retry the current I/O. */
|
||||
uint64_t retry_ticks;
|
||||
};
|
||||
|
||||
struct nvme_probe_ctx {
|
||||
@ -156,6 +159,8 @@ static void nvme_ctrlr_populate_namespaces_done(struct nvme_ctrlr *nvme_ctrlr,
|
||||
struct nvme_async_probe_ctx *ctx);
|
||||
static int bdev_nvme_library_init(void);
|
||||
static void bdev_nvme_library_fini(void);
|
||||
static void bdev_nvme_submit_request(struct spdk_io_channel *ch,
|
||||
struct spdk_bdev_io *bdev_io);
|
||||
static int bdev_nvme_readv(struct nvme_bdev_io *bio, struct iovec *iov, int iovcnt,
|
||||
void *md, uint64_t lba_count, uint64_t lba,
|
||||
uint32_t flags, struct spdk_bdev_ext_io_opts *ext_opts);
|
||||
@ -605,6 +610,7 @@ bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
|
||||
int rc;
|
||||
|
||||
STAILQ_INIT(&nbdev_ch->io_path_list);
|
||||
TAILQ_INIT(&nbdev_ch->retry_io_list);
|
||||
|
||||
pthread_mutex_lock(&nbdev->mutex);
|
||||
TAILQ_FOREACH(nvme_ns, &nbdev->nvme_ns_list, tailq) {
|
||||
@ -621,11 +627,25 @@ bdev_nvme_create_bdev_channel_cb(void *io_device, void *ctx_buf)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_abort_retry_ios(struct nvme_bdev_channel *nbdev_ch)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io, *tmp_io;
|
||||
|
||||
TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_io) {
|
||||
TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
|
||||
spdk_bdev_io_complete(bdev_io, SPDK_BDEV_IO_STATUS_ABORTED);
|
||||
}
|
||||
|
||||
spdk_poller_unregister(&nbdev_ch->retry_io_poller);
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_destroy_bdev_channel_cb(void *io_device, void *ctx_buf)
|
||||
{
|
||||
struct nvme_bdev_channel *nbdev_ch = ctx_buf;
|
||||
|
||||
bdev_nvme_abort_retry_ios(nbdev_ch);
|
||||
_bdev_nvme_delete_io_paths(nbdev_ch);
|
||||
}
|
||||
|
||||
@ -678,6 +698,16 @@ nvme_io_path_is_available(struct nvme_io_path *io_path)
|
||||
return true;
|
||||
}
|
||||
|
||||
static bool
|
||||
nvme_io_path_is_failed(struct nvme_io_path *io_path)
|
||||
{
|
||||
struct nvme_ctrlr *nvme_ctrlr;
|
||||
|
||||
nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(io_path->ctrlr_ch);
|
||||
|
||||
return spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr);
|
||||
}
|
||||
|
||||
static inline struct nvme_io_path *
|
||||
bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
|
||||
{
|
||||
@ -705,6 +735,98 @@ bdev_nvme_find_io_path(struct nvme_bdev_channel *nbdev_ch)
|
||||
return non_optimized;
|
||||
}
|
||||
|
||||
/* Return true if there is any io_path whose qpair is active or ctrlr is not failed,
|
||||
* or false otherwise.
|
||||
*
|
||||
* If any io_path has an active qpair but find_io_path() returned NULL, its namespace
|
||||
* is likely to be non-accessible now but may become accessible.
|
||||
*
|
||||
* If any io_path has an unfailed ctrlr but find_io_path() returned NULL, the ctrlr
|
||||
* is likely to be resetting now but the reset may succeeed. A ctrlr is set to unfailed
|
||||
* when starting to reset it but it is set to failed when the reset failed. Hence, if
|
||||
* a ctrlr is unfailed, it is likely that it works fine or is resetting.
|
||||
*/
|
||||
static bool
|
||||
any_io_path_may_become_available(struct nvme_bdev_channel *nbdev_ch)
|
||||
{
|
||||
struct nvme_io_path *io_path;
|
||||
|
||||
STAILQ_FOREACH(io_path, &nbdev_ch->io_path_list, stailq) {
|
||||
if (nvme_io_path_is_connected(io_path) ||
|
||||
!nvme_io_path_is_failed(io_path)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
static int
|
||||
bdev_nvme_retry_ios(void *arg)
|
||||
{
|
||||
struct nvme_bdev_channel *nbdev_ch = arg;
|
||||
struct spdk_io_channel *ch = spdk_io_channel_from_ctx(nbdev_ch);
|
||||
struct spdk_bdev_io *bdev_io, *tmp_bdev_io;
|
||||
struct nvme_bdev_io *bio;
|
||||
uint64_t now, delay_us;
|
||||
|
||||
now = spdk_get_ticks();
|
||||
|
||||
TAILQ_FOREACH_SAFE(bdev_io, &nbdev_ch->retry_io_list, module_link, tmp_bdev_io) {
|
||||
bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
|
||||
if (bio->retry_ticks > now) {
|
||||
break;
|
||||
}
|
||||
|
||||
TAILQ_REMOVE(&nbdev_ch->retry_io_list, bdev_io, module_link);
|
||||
|
||||
bdev_nvme_submit_request(ch, bdev_io);
|
||||
}
|
||||
|
||||
spdk_poller_unregister(&nbdev_ch->retry_io_poller);
|
||||
|
||||
bdev_io = TAILQ_FIRST(&nbdev_ch->retry_io_list);
|
||||
if (bdev_io != NULL) {
|
||||
bio = (struct nvme_bdev_io *)bdev_io->driver_ctx;
|
||||
|
||||
delay_us = (bio->retry_ticks - now) * SPDK_SEC_TO_USEC / spdk_get_ticks_hz();
|
||||
|
||||
nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
|
||||
delay_us);
|
||||
}
|
||||
|
||||
return SPDK_POLLER_BUSY;
|
||||
}
|
||||
|
||||
static void
|
||||
bdev_nvme_queue_retry_io(struct nvme_bdev_channel *nbdev_ch,
|
||||
struct nvme_bdev_io *bio, uint64_t delay_ms)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
|
||||
struct spdk_bdev_io *tmp_bdev_io;
|
||||
struct nvme_bdev_io *tmp_bio;
|
||||
|
||||
bio->retry_ticks = spdk_get_ticks() + delay_ms * spdk_get_ticks_hz() / 1000ULL;
|
||||
|
||||
TAILQ_FOREACH_REVERSE(tmp_bdev_io, &nbdev_ch->retry_io_list, retry_io_head, module_link) {
|
||||
tmp_bio = (struct nvme_bdev_io *)tmp_bdev_io->driver_ctx;
|
||||
|
||||
if (tmp_bio->retry_ticks <= bio->retry_ticks) {
|
||||
TAILQ_INSERT_AFTER(&nbdev_ch->retry_io_list, tmp_bdev_io, bdev_io,
|
||||
module_link);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
/* No earlier I/Os were found. This I/O must be the new head. */
|
||||
TAILQ_INSERT_HEAD(&nbdev_ch->retry_io_list, bdev_io, module_link);
|
||||
|
||||
spdk_poller_unregister(&nbdev_ch->retry_io_poller);
|
||||
|
||||
nbdev_ch->retry_io_poller = SPDK_POLLER_REGISTER(bdev_nvme_retry_ios, nbdev_ch,
|
||||
delay_ms * 1000ULL);
|
||||
}
|
||||
|
||||
static inline void
|
||||
bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
|
||||
const struct spdk_nvme_cpl *cpl)
|
||||
@ -716,17 +838,33 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio,
|
||||
static inline void
|
||||
bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc)
|
||||
{
|
||||
struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio);
|
||||
struct nvme_bdev_channel *nbdev_ch;
|
||||
enum spdk_bdev_io_status io_status;
|
||||
|
||||
if (rc == 0) {
|
||||
switch (rc) {
|
||||
case 0:
|
||||
io_status = SPDK_BDEV_IO_STATUS_SUCCESS;
|
||||
} else if (rc == -ENOMEM) {
|
||||
break;
|
||||
case -ENOMEM:
|
||||
io_status = SPDK_BDEV_IO_STATUS_NOMEM;
|
||||
} else {
|
||||
io_status = SPDK_BDEV_IO_STATUS_FAILED;
|
||||
break;
|
||||
case -ENXIO:
|
||||
nbdev_ch = spdk_io_channel_get_ctx(spdk_bdev_io_get_io_channel(bdev_io));
|
||||
|
||||
if (!bdev_nvme_io_type_is_admin(bdev_io->type) &&
|
||||
any_io_path_may_become_available(nbdev_ch)) {
|
||||
bdev_nvme_queue_retry_io(nbdev_ch, bio, 1000ULL);
|
||||
return;
|
||||
}
|
||||
|
||||
spdk_bdev_io_complete(spdk_bdev_io_from_ctx(bio), io_status);
|
||||
/* fallthrough */
|
||||
default:
|
||||
io_status = SPDK_BDEV_IO_STATUS_FAILED;
|
||||
break;
|
||||
}
|
||||
|
||||
spdk_bdev_io_complete(bdev_io, io_status);
|
||||
}
|
||||
|
||||
static struct nvme_ctrlr_channel *
|
||||
|
@ -173,6 +173,8 @@ struct nvme_io_path {
|
||||
|
||||
struct nvme_bdev_channel {
|
||||
STAILQ_HEAD(, nvme_io_path) io_path_list;
|
||||
TAILQ_HEAD(retry_io_head, spdk_bdev_io) retry_io_list;
|
||||
struct spdk_poller *retry_io_poller;
|
||||
};
|
||||
|
||||
struct nvme_poll_group {
|
||||
|
@ -3788,6 +3788,194 @@ test_find_io_path(void)
|
||||
CU_ASSERT(bdev_nvme_find_io_path(&nbdev_ch) == &io_path1);
|
||||
}
|
||||
|
||||
static void
|
||||
test_retry_io_if_ctrlr_is_resetting(void)
|
||||
{
|
||||
struct nvme_path_id path = {};
|
||||
struct spdk_nvme_ctrlr *ctrlr;
|
||||
struct nvme_bdev_ctrlr *nbdev_ctrlr;
|
||||
struct nvme_ctrlr *nvme_ctrlr;
|
||||
const int STRING_SIZE = 32;
|
||||
const char *attached_names[STRING_SIZE];
|
||||
struct nvme_bdev *bdev;
|
||||
struct nvme_ns *nvme_ns;
|
||||
struct spdk_bdev_io *bdev_io1, *bdev_io2;
|
||||
struct spdk_io_channel *ch;
|
||||
struct nvme_bdev_channel *nbdev_ch;
|
||||
struct nvme_io_path *io_path;
|
||||
struct nvme_ctrlr_channel *ctrlr_ch;
|
||||
int rc;
|
||||
|
||||
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
|
||||
ut_init_trid(&path.trid);
|
||||
|
||||
set_thread(0);
|
||||
|
||||
ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
|
||||
|
||||
g_ut_attach_ctrlr_status = 0;
|
||||
g_ut_attach_bdev_count = 1;
|
||||
|
||||
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
|
||||
attach_ctrlr_done, NULL, NULL, false);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
spdk_delay_us(1000);
|
||||
poll_threads();
|
||||
|
||||
nbdev_ctrlr = nvme_bdev_ctrlr_get("nvme0");
|
||||
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
|
||||
|
||||
nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
|
||||
CU_ASSERT(nvme_ctrlr != NULL);
|
||||
|
||||
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
|
||||
CU_ASSERT(bdev != NULL);
|
||||
|
||||
nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
|
||||
CU_ASSERT(nvme_ns != NULL);
|
||||
|
||||
bdev_io1 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL);
|
||||
ut_bdev_io_set_buf(bdev_io1);
|
||||
|
||||
bdev_io2 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL);
|
||||
ut_bdev_io_set_buf(bdev_io1);
|
||||
|
||||
ch = spdk_get_io_channel(bdev);
|
||||
SPDK_CU_ASSERT_FATAL(ch != NULL);
|
||||
|
||||
nbdev_ch = spdk_io_channel_get_ctx(ch);
|
||||
|
||||
io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr);
|
||||
SPDK_CU_ASSERT_FATAL(io_path != NULL);
|
||||
|
||||
ctrlr_ch = io_path->ctrlr_ch;
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL);
|
||||
SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL);
|
||||
|
||||
bdev_io1->internal.ch = (struct spdk_bdev_channel *)ch;
|
||||
bdev_io2->internal.ch = (struct spdk_bdev_channel *)ch;
|
||||
|
||||
/* If qpair is connected, I/O should succeed. */
|
||||
bdev_io1->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, bdev_io1);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
|
||||
|
||||
poll_threads();
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == false);
|
||||
CU_ASSERT(bdev_io1->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
/* If qpair is disconnected, it is freed and then reconnected via resetting
|
||||
* the corresponding nvme_ctrlr. I/O should be queued if it is submitted
|
||||
* while resetting the nvme_ctrlr.
|
||||
*/
|
||||
ctrlr_ch->qpair->is_connected = false;
|
||||
ctrlr->is_failed = true;
|
||||
|
||||
poll_thread_times(0, 3);
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair == NULL);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == true);
|
||||
CU_ASSERT(ctrlr->is_failed == false);
|
||||
|
||||
bdev_io1->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, bdev_io1);
|
||||
|
||||
spdk_delay_us(1);
|
||||
|
||||
bdev_io2->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, bdev_io2);
|
||||
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||
CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link));
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair != NULL);
|
||||
CU_ASSERT(nvme_ctrlr->resetting == false);
|
||||
|
||||
spdk_delay_us(999999);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == false);
|
||||
CU_ASSERT(bdev_io1->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||
|
||||
spdk_delay_us(1);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1);
|
||||
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
|
||||
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
|
||||
CU_ASSERT(bdev_io2->internal.in_submit_request == false);
|
||||
CU_ASSERT(bdev_io2->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
/* If ANA state of namespace is inaccessible, I/O should be queued. */
|
||||
nvme_ns->ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE;
|
||||
|
||||
bdev_io1->internal.in_submit_request = true;
|
||||
|
||||
bdev_nvme_submit_request(ch, bdev_io1);
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
|
||||
CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
|
||||
|
||||
/* ANA state became accessible while I/O was queued. */
|
||||
nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
|
||||
|
||||
spdk_delay_us(1000000);
|
||||
|
||||
poll_thread_times(0, 1);
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
|
||||
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
|
||||
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
|
||||
CU_ASSERT(bdev_io1->internal.in_submit_request == false);
|
||||
CU_ASSERT(bdev_io1->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
|
||||
|
||||
free(bdev_io1);
|
||||
free(bdev_io2);
|
||||
|
||||
spdk_put_io_channel(ch);
|
||||
|
||||
poll_threads();
|
||||
|
||||
rc = bdev_nvme_delete("nvme0", &g_any_path);
|
||||
CU_ASSERT(rc == 0);
|
||||
|
||||
poll_threads();
|
||||
spdk_delay_us(1000);
|
||||
poll_threads();
|
||||
|
||||
CU_ASSERT(nvme_bdev_ctrlr_get("nvme0") == NULL);
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, const char **argv)
|
||||
{
|
||||
@ -3821,6 +4009,7 @@ main(int argc, const char **argv)
|
||||
CU_ADD_TEST(suite, test_admin_path);
|
||||
CU_ADD_TEST(suite, test_reset_bdev_ctrlr);
|
||||
CU_ADD_TEST(suite, test_find_io_path);
|
||||
CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting);
|
||||
|
||||
CU_basic_set_mode(CU_BRM_VERBOSE);
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user