diff --git a/CHANGELOG.md b/CHANGELOG.md index fc13222535..488b3241de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,7 +5,9 @@ ### bdev The parameter `retry_count` of the RPC `bdev_nvme_set_options` was deprecated and will be -removed in SPDK 22.01, and the parameter `transport_retry_count` is added and used instead. +removed in SPDK 22.04, and the parameter `transport_retry_count` is added and used instead. + +An new parameter `bdev_retry_count` is added to the RPC `bdev_nvme_set_options`. ## v21.10 diff --git a/doc/jsonrpc.md b/doc/jsonrpc.md index e33821f60b..4da4ddf8dc 100644 --- a/doc/jsonrpc.md +++ b/doc/jsonrpc.md @@ -2787,6 +2787,7 @@ nvme_ioq_poll_period_us | Optional | number | How often I/O queues are p io_queue_requests | Optional | number | The number of requests allocated for each NVMe I/O queue. Default: 512. delay_cmd_submit | Optional | boolean | Enable delaying NVMe command submission to allow batching of multiple commands. Default: `true`. transport_retry_count | Optional | number | The number of attempts per I/O in the transport layer before an I/O fails. +bdev_retry_count | Optional | number | The number of attempts per I/O in the bdev layer before an I/O fails. -1 means infinite retries. #### Example diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index fcfb0a232c..dd1791fe42 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -107,6 +107,9 @@ struct nvme_bdev_io { /** Expiration value in ticks to retry the current I/O. */ uint64_t retry_ticks; + + /* How many times the current I/O was retried. */ + int32_t retry_count; }; struct nvme_probe_ctx { @@ -140,6 +143,7 @@ static struct spdk_bdev_nvme_opts g_opts = { .nvme_ioq_poll_period_us = 0, .io_queue_requests = 0, .delay_cmd_submit = SPDK_BDEV_NVME_DEFAULT_DELAY_CMD_SUBMIT, + .bdev_retry_count = 0, }; #define NVME_HOTPLUG_POLL_PERIOD_MAX 10000000ULL @@ -833,12 +837,16 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, { struct spdk_bdev_io *bdev_io = spdk_bdev_io_from_ctx(bio); struct nvme_bdev_channel *nbdev_ch; + struct nvme_ctrlr *nvme_ctrlr; + const struct spdk_nvme_ctrlr_data *cdata; + uint64_t delay_ms; if (spdk_likely(spdk_nvme_cpl_is_success(cpl))) { goto complete; } - if (cpl->status.dnr != 0 || bdev_nvme_io_type_is_admin(bdev_io->type)) { + if (cpl->status.dnr != 0 || bdev_nvme_io_type_is_admin(bdev_io->type) || + (g_opts.bdev_retry_count != -1 && bio->retry_count >= g_opts.bdev_retry_count)) { goto complete; } @@ -850,13 +858,29 @@ bdev_nvme_io_complete_nvme_status(struct nvme_bdev_io *bio, spdk_nvme_cpl_is_aborted_sq_deletion(cpl) || !nvme_io_path_is_available(bio->io_path) || nvme_io_path_is_failed(bio->io_path)) { - if (any_io_path_may_become_available(nbdev_ch)) { - bdev_nvme_queue_retry_io(nbdev_ch, bio, 0); - return; + delay_ms = 0; + } else if (spdk_nvme_cpl_is_aborted_by_request(cpl)) { + goto complete; + } else { + bio->retry_count++; + + nvme_ctrlr = nvme_ctrlr_channel_get_ctrlr(bio->io_path->ctrlr_ch); + cdata = spdk_nvme_ctrlr_get_data(nvme_ctrlr->ctrlr); + + if (cpl->status.crd != 0) { + delay_ms = cdata->crdt[cpl->status.crd] * 100; + } else { + delay_ms = 0; } } + if (any_io_path_may_become_available(nbdev_ch)) { + bdev_nvme_queue_retry_io(nbdev_ch, bio, delay_ms); + return; + } + complete: + bio->retry_count = 0; spdk_bdev_io_complete_nvme_status(bdev_io, cpl->cdw0, cpl->status.sct, cpl->status.sc); } @@ -889,6 +913,7 @@ bdev_nvme_io_complete(struct nvme_bdev_io *bio, int rc) break; } + bio->retry_count = 0; spdk_bdev_io_complete(bdev_io, io_status); } @@ -3191,6 +3216,11 @@ bdev_nvme_validate_opts(const struct spdk_bdev_nvme_opts *opts) return -EINVAL; } + if (opts->bdev_retry_count < -1) { + SPDK_WARNLOG("Invalid option: bdev_retry_count can't be less than -1.\n"); + return -EINVAL; + } + return 0; } @@ -4662,6 +4692,7 @@ bdev_nvme_opts_config_json(struct spdk_json_write_ctx *w) spdk_json_write_named_uint64(w, "nvme_ioq_poll_period_us", g_opts.nvme_ioq_poll_period_us); spdk_json_write_named_uint32(w, "io_queue_requests", g_opts.io_queue_requests); spdk_json_write_named_bool(w, "delay_cmd_submit", g_opts.delay_cmd_submit); + spdk_json_write_named_int32(w, "bdev_retry_count", g_opts.bdev_retry_count); spdk_json_write_object_end(w); spdk_json_write_object_end(w); diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index 312daaaf6a..b968f78e02 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -223,6 +223,8 @@ struct spdk_bdev_nvme_opts { uint64_t nvme_ioq_poll_period_us; uint32_t io_queue_requests; bool delay_cmd_submit; + /* The number of attempts per I/O in the bdev layer before an I/O fails. */ + int32_t bdev_retry_count; }; struct spdk_nvme_qpair *bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch); diff --git a/module/bdev/nvme/bdev_nvme_rpc.c b/module/bdev/nvme/bdev_nvme_rpc.c index 1573a22acb..6837bcd830 100644 --- a/module/bdev/nvme/bdev_nvme_rpc.c +++ b/module/bdev/nvme/bdev_nvme_rpc.c @@ -89,6 +89,7 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_options_decoders[] = {"io_queue_requests", offsetof(struct spdk_bdev_nvme_opts, io_queue_requests), spdk_json_decode_uint32, true}, {"delay_cmd_submit", offsetof(struct spdk_bdev_nvme_opts, delay_cmd_submit), spdk_json_decode_bool, true}, {"transport_retry_count", offsetof(struct spdk_bdev_nvme_opts, transport_retry_count), spdk_json_decode_uint32, true}, + {"bdev_retry_count", offsetof(struct spdk_bdev_nvme_opts, bdev_retry_count), spdk_json_decode_int32, true}, }; static void diff --git a/scripts/rpc.py b/scripts/rpc.py index 2fe2bf518b..f9ed8a80de 100755 --- a/scripts/rpc.py +++ b/scripts/rpc.py @@ -467,7 +467,8 @@ if __name__ == "__main__": nvme_ioq_poll_period_us=args.nvme_ioq_poll_period_us, io_queue_requests=args.io_queue_requests, delay_cmd_submit=args.delay_cmd_submit, - transport_retry_count=args.transport_retry_count) + transport_retry_count=args.transport_retry_count, + bdev_retry_count=args.bdev_retry_count) p = subparsers.add_parser('bdev_nvme_set_options', aliases=['set_bdev_nvme_options'], help='Set options for the bdev nvme type. This is startup command.') @@ -500,6 +501,9 @@ if __name__ == "__main__": action='store_false', dest='delay_cmd_submit', default=True) p.add_argument('-c', '--transport-retry-count', help='the number of attempts per I/O in the transport layer when an I/O fails.', type=int) + p.add_argument('-r', '--bdev-retry-count', + help='the number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries.', type=int) + p.set_defaults(func=bdev_nvme_set_options) def bdev_nvme_set_hotplug(args): diff --git a/scripts/rpc/bdev.py b/scripts/rpc/bdev.py index dff340abbc..d325b25bac 100644 --- a/scripts/rpc/bdev.py +++ b/scripts/rpc/bdev.py @@ -429,7 +429,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo keep_alive_timeout_ms=None, retry_count=None, arbitration_burst=None, low_priority_weight=None, medium_priority_weight=None, high_priority_weight=None, nvme_adminq_poll_period_us=None, nvme_ioq_poll_period_us=None, io_queue_requests=None, - delay_cmd_submit=None, transport_retry_count=None): + delay_cmd_submit=None, transport_retry_count=None, bdev_retry_count=None): """Set options for the bdev nvme. This is startup command. Args: @@ -447,6 +447,7 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo io_queue_requests: The number of requests allocated for each NVMe I/O queue. Default: 512 (optional) delay_cmd_submit: Enable delayed NVMe command submission to allow batching of multiple commands (optional) transport_retry_count: The number of attempts per I/O in the transport layer when an I/O fails (optional) + bdev_retry_count: The number of attempts per I/O in the bdev layer when an I/O fails. -1 means infinite retries. (optional) """ params = {} @@ -493,6 +494,9 @@ def bdev_nvme_set_options(client, action_on_timeout=None, timeout_us=None, timeo if transport_retry_count is not None: params['transport_retry_count'] = transport_retry_count + if bdev_retry_count is not None: + params['bdev_retry_count'] = bdev_retry_count + return client.call('bdev_nvme_set_options', params) diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index 68431aa3d2..c27d2a5ad3 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -4014,6 +4014,8 @@ test_retry_io_for_io_path_error(void) ut_init_trid(&path1.trid); ut_init_trid2(&path2.trid); + g_opts.bdev_retry_count = 1; + set_thread(0); g_ut_attach_ctrlr_status = 0; @@ -4192,6 +4194,202 @@ test_retry_io_for_io_path_error(void) poll_threads(); CU_ASSERT(nvme_bdev_ctrlr_get("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; +} + +static void +test_retry_io_count(void) +{ + struct nvme_path_id path = {}; + struct spdk_nvme_ctrlr *ctrlr; + struct nvme_bdev_ctrlr *nbdev_ctrlr; + struct nvme_ctrlr *nvme_ctrlr; + const int STRING_SIZE = 32; + const char *attached_names[STRING_SIZE]; + struct nvme_bdev *bdev; + struct nvme_ns *nvme_ns; + struct spdk_bdev_io *bdev_io; + struct nvme_bdev_io *bio; + struct spdk_io_channel *ch; + struct nvme_bdev_channel *nbdev_ch; + struct nvme_io_path *io_path; + struct nvme_ctrlr_channel *ctrlr_ch; + struct ut_nvme_req *req; + int rc; + + memset(attached_names, 0, sizeof(char *) * STRING_SIZE); + ut_init_trid(&path.trid); + + set_thread(0); + + ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false); + SPDK_CU_ASSERT_FATAL(ctrlr != NULL); + + g_ut_attach_ctrlr_status = 0; + g_ut_attach_bdev_count = 1; + + rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0, + attach_ctrlr_done, NULL, NULL, false); + CU_ASSERT(rc == 0); + + spdk_delay_us(1000); + poll_threads(); + + nbdev_ctrlr = nvme_bdev_ctrlr_get("nvme0"); + SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL); + + nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid); + CU_ASSERT(nvme_ctrlr != NULL); + + bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1); + CU_ASSERT(bdev != NULL); + + nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); + CU_ASSERT(nvme_ns != NULL); + + bdev_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL); + ut_bdev_io_set_buf(bdev_io); + + bio = (struct nvme_bdev_io *)bdev_io->driver_ctx; + + ch = spdk_get_io_channel(bdev); + SPDK_CU_ASSERT_FATAL(ch != NULL); + + nbdev_ch = spdk_io_channel_get_ctx(ch); + + io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr); + SPDK_CU_ASSERT_FATAL(io_path != NULL); + + ctrlr_ch = io_path->ctrlr_ch; + SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL); + SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL); + + bdev_io->internal.ch = (struct spdk_bdev_channel *)ch; + + /* If I/O is aborted by request, it should not be retried. */ + g_opts.bdev_retry_count = 1; + + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(ctrlr_ch->qpair, bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_ABORTED_BY_REQUEST; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED); + + /* If bio->retry_count is not less than g_opts.bdev_retry_count, + * the failed I/O should not be retried. + */ + g_opts.bdev_retry_count = 4; + + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(ctrlr_ch->qpair, bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_NAMESPACE_NOT_READY; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + bio->retry_count = 4; + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_NVME_ERROR); + + /* If g_opts.bdev_retry_count is -1, the failed I/O always should be retried. */ + g_opts.bdev_retry_count = -1; + + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(ctrlr_ch->qpair, bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_NAMESPACE_NOT_READY; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + bio->retry_count = 4; + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + /* If bio->retry_count is less than g_opts.bdev_retry_count, + * the failed I/O should be retried. + */ + g_opts.bdev_retry_count = 4; + + bdev_io->internal.in_submit_request = true; + + bdev_nvme_submit_request(ch, bdev_io); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + + req = ut_get_outstanding_nvme_request(ctrlr_ch->qpair, bio); + SPDK_CU_ASSERT_FATAL(req != NULL); + + req->cpl.status.sc = SPDK_NVME_SC_NAMESPACE_NOT_READY; + req->cpl.status.sct = SPDK_NVME_SCT_GENERIC; + bio->retry_count = 3; + + poll_thread_times(0, 1); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == true); + CU_ASSERT(bdev_io == TAILQ_FIRST(&nbdev_ch->retry_io_list)); + + poll_threads(); + + CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0); + CU_ASSERT(bdev_io->internal.in_submit_request == false); + CU_ASSERT(bdev_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS); + + free(bdev_io); + + spdk_put_io_channel(ch); + + poll_threads(); + + rc = bdev_nvme_delete("nvme0", &g_any_path); + CU_ASSERT(rc == 0); + + poll_threads(); + spdk_delay_us(1000); + poll_threads(); + + CU_ASSERT(nvme_bdev_ctrlr_get("nvme0") == NULL); + + g_opts.bdev_retry_count = 0; } int @@ -4229,6 +4427,7 @@ main(int argc, const char **argv) CU_ADD_TEST(suite, test_find_io_path); CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting); CU_ADD_TEST(suite, test_retry_io_for_io_path_error); + CU_ADD_TEST(suite, test_retry_io_count); CU_basic_set_mode(CU_BRM_VERBOSE);