diff --git a/etc/spdk/iscsi.conf.in b/etc/spdk/iscsi.conf.in index 8f705ba8ee..93aaa1ea54 100644 --- a/etc/spdk/iscsi.conf.in +++ b/etc/spdk/iscsi.conf.in @@ -100,10 +100,13 @@ # The number of attempts per I/O when an I/O fails. Do not include # this key to get the default behavior. NvmeRetryCount 4 - # Registers the application to receive timeout callback and to reset the controller. - ResetControllerOnTimeout Yes - # Timeout value. - NvmeTimeoutValue 30 + # Timeout for each command, in seconds. If 0, don't track timeouts. + NvmeTimeoutValue 0 + # Action to take on command time out. Only valid when Timeout is greater + # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort + # the command, or 'None' to just print a message but do nothing. + # Admin command timeouts will always result in a reset. + ActionOnTimeout None # Set how often the admin queue is polled for asynchronous events. # Units in microseconds. AdminPollRate 100000 diff --git a/etc/spdk/nvmf.conf.in b/etc/spdk/nvmf.conf.in index 4c998af34c..1bc527f3d5 100644 --- a/etc/spdk/nvmf.conf.in +++ b/etc/spdk/nvmf.conf.in @@ -91,10 +91,13 @@ # The number of attempts per I/O when an I/O fails. Do not include # this key to get the default behavior. NvmeRetryCount 4 - # Registers the application to receive timeout callback and to reset the controller. - ResetControllerOnTimeout Yes - # Timeout value. - NvmeTimeoutValue 30 + # Timeout for each command, in seconds. If 0, don't track timeouts. + NvmeTimeoutValue 0 + # Action to take on command time out. Only valid when Timeout is greater + # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort + # the command, or 'None' to just print a message but do nothing. + # Admin command timeouts will always result in a reset. + ActionOnTimeout None # Set how often the admin queue is polled for asynchronous events. # Units in microseconds. AdminPollRate 100000 diff --git a/etc/spdk/vhost.conf.in b/etc/spdk/vhost.conf.in index cb56e6635c..bf7838069b 100644 --- a/etc/spdk/vhost.conf.in +++ b/etc/spdk/vhost.conf.in @@ -79,13 +79,13 @@ # The number of attempts per I/O when an I/O fails. Do not include # this key to get the default behavior. NvmeRetryCount 4 - # The maximum number of NVMe controllers to claim. Do not include this key to - # claim all of them. - NumControllers 2 - # Registers the application to receive timeout callback and to reset the controller. - ResetControllerOnTimeout Yes - # Timeout value. - NvmeTimeoutValue 30 + # Timeout for each command, in seconds. If 0, don't track timeouts. + NvmeTimeoutValue 0 + # Action to take on command time out. Only valid when Timeout is greater + # than 0. This may be 'Reset' to reset the controller, 'Abort' to abort + # the command, or 'None' to just print a message but do nothing. + # Admin command timeouts will always result in a reset. + ActionOnTimeout None # Set how often the admin queue is polled for asynchronous events. # Units in microseconds. AdminPollRate 100000 diff --git a/lib/bdev/nvme/blockdev_nvme.c b/lib/bdev/nvme/blockdev_nvme.c index 17b431fc57..0dd8410324 100644 --- a/lib/bdev/nvme/blockdev_nvme.c +++ b/lib/bdev/nvme/blockdev_nvme.c @@ -111,8 +111,14 @@ struct nvme_probe_ctx { const char *names[NVME_MAX_CONTROLLERS]; }; +enum timeout_action { + TIMEOUT_ACTION_NONE = 0, + TIMEOUT_ACTION_RESET, + TIMEOUT_ACTION_ABORT, +}; + static int g_hot_insert_nvme_controller_index = 0; -static bool g_reset_controller_on_timeout = false; +static enum timeout_action g_action_on_timeout = TIMEOUT_ACTION_NONE; static int g_timeout = 0; static int g_nvme_adminq_poll_timeout_us = 0; static bool g_nvme_hotplug_enabled; @@ -559,6 +565,21 @@ probe_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, return true; } +static void +spdk_nvme_abort_cpl(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_nvme_ctrlr *ctrlr = ctx; + int rc; + + if (spdk_nvme_cpl_is_error(cpl)) { + SPDK_WARNLOG("Abort failed. Resetting controller.\n"); + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc) { + SPDK_ERRLOG("Resetting controller failed.\n"); + } + } +} + static void timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_qpair *qpair, uint16_t cid) @@ -567,9 +588,27 @@ timeout_cb(void *cb_arg, struct spdk_nvme_ctrlr *ctrlr, SPDK_WARNLOG("Warning: Detected a timeout. ctrlr=%p qpair=%p cid=%u\n", ctrlr, qpair, cid); - rc = spdk_nvme_ctrlr_reset(ctrlr); - if (rc) { - SPDK_ERRLOG("resetting controller failed\n"); + switch (g_action_on_timeout) { + case TIMEOUT_ACTION_ABORT: + if (qpair) { + rc = spdk_nvme_ctrlr_cmd_abort(ctrlr, qpair, cid, + spdk_nvme_abort_cpl, ctrlr); + if (rc == 0) { + return; + } + + SPDK_ERRLOG("Unable to send abort. Resetting.\n"); + } + + /* Fallthrough */ + case TIMEOUT_ACTION_RESET: + rc = spdk_nvme_ctrlr_reset(ctrlr); + if (rc) { + SPDK_ERRLOG("Resetting controller failed.\n"); + } + break; + case TIMEOUT_ACTION_NONE: + break; } } @@ -621,7 +660,7 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, sizeof(struct nvme_io_channel)); TAILQ_INSERT_TAIL(&g_nvme_ctrlrs, nvme_ctrlr, tailq); - if (g_reset_controller_on_timeout) { + if (g_action_on_timeout != TIMEOUT_ACTION_NONE) { spdk_nvme_ctrlr_register_timeout_callback(ctrlr, g_timeout, timeout_cb, NULL); } @@ -763,13 +802,32 @@ bdev_nvme_library_init(void) probe_ctx.count++; } - g_reset_controller_on_timeout = - spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false); - if ((g_timeout = spdk_conf_section_get_intval(sp, "NvmeTimeoutValue")) < 0) { g_timeout = 0; } + if (g_timeout > 0) { + val = spdk_conf_section_get_val(sp, "ActionOnTimeout"); + if (val != NULL) { + if (!strcasecmp(val, "Reset")) { + g_action_on_timeout = TIMEOUT_ACTION_RESET; + } else if (!strcasecmp(val, "Abort")) { + g_action_on_timeout = TIMEOUT_ACTION_ABORT; + } + } else { + /* Handle old name for backward compatibility */ + val = spdk_conf_section_get_val(sp, "ResetControllerOnTimeout"); + if (val) { + SPDK_WARNLOG("ResetControllerOnTimeout was renamed to ActionOnTimeout\n"); + SPDK_WARNLOG("Please update your configuration file\n"); + + if (spdk_conf_section_get_boolval(sp, "ResetControllerOnTimeout", false)) { + g_action_on_timeout = TIMEOUT_ACTION_RESET; + } + } + } + } + g_nvme_adminq_poll_timeout_us = spdk_conf_section_get_intval(sp, "AdminPollRate"); if (g_nvme_adminq_poll_timeout_us <= 0) { g_nvme_adminq_poll_timeout_us = 1000000;