nvme: Add three APIs for disconnect, start re-enable, and poll re-enable ctrlr

The NVMe bdev module will support two features, delayed reconnect and
delete after multiple failures of reconnect to improve error recovery.

The recently added two APIs, spdk_nvme_ctrlr_reset_async() and
spdk_nvme_ctrlr_reset_poll_async(), were not good enough.

spdk_nvme_ctrlr_reset_ctx was not necessary. It had only a pointer to ctrlr.
Using a pointer to ctrlr directly saves us from undesirable malloc error
processing.

Separate spdk_nvme_ctrlr_reset_async() into spdk_nvme_ctrlr_disconnect()
and spdk_nvme_ctrlr_reconnect_async(). spdk_nvme_ctrlr_disconnect()
disconnects ctrlr including disconnecting adminq.
spdk_nvme_ctrlr_reconnect_async() moves the ctrlr state to INIT.

Then rename spdk_nvme_ctrlr_reset_poll_async() by
spdk_nvme_ctrlr_reconnect_poll_async().

Finally deprecate spdk_nvme_ctrlr_reset_async() and
spdk_nvme_ctrlr_reset_poll_async().

The following patches will change the NVMe bdev module to use these new APIs.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Id1d6858dcdc5fc2e9db0a6ebf3f79cab4f9bbcb7
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10091
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
Shuhei Matsumoto 2021-11-27 03:22:08 +09:00 committed by Tomasz Zawadzki
parent dbc1c48aeb
commit 7a0a2800e0
6 changed files with 82 additions and 7 deletions

View File

@ -19,6 +19,13 @@ removed in SPDK 22.04, and the parameter `transport_retry_count` is added and us
An new parameter `bdev_retry_count` is added to the RPC `bdev_nvme_set_options`.
### nvme
New APIs, `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`, and
`spdk_nvme_ctrlr_reconnect_poll_async`, have been added to improve error recovery, and
the existing APIs,`spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async`
were deprecated.
## v21.10
Structure `spdk_nvmf_target_opts` has been extended with new member `discovery_filter` which allows to specify

View File

@ -18,3 +18,9 @@ ABI cannot be removed without providing deprecation notice for at least single S
Deprecated `spdk_bdev_module_finish_done()` API, which will be removed in SPDK 22.01.
Bdev modules should use `spdk_bdev_module_fini_done()` instead.
### nvme
Deprecated `spdk_nvme_ctrlr_reset_async` and `spdk_nvme_ctrlr_reset_poll_async` APIs,
which will be removed in SPDK 22.01. `spdk_nvme_ctrlr_disconnect`, `spdk_nvme_ctrlr_reconnect_async`,
and `spdk_nvme_ctrlr_reconnect_poll_async` should be used instead.

View File

@ -1079,6 +1079,8 @@ struct spdk_nvme_ctrlr_reset_ctx;
/**
* Create a context object that can be polled to perform a full hardware reset of the NVMe controller.
* (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and
* spdk_nvme_ctrlr_reconnect_poll_async() instead.)
*
* The function will set the controller reset context on success, user must call
* spdk_nvme_ctrlr_reset_poll_async() until it returns a value other than -EAGAIN.
@ -1097,6 +1099,8 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr,
/**
* Proceed with resetting controller associated with the controller reset context.
* (Deprecated, please use spdk_nvme_ctrlr_disconnect(), spdk_nvme_ctrlr_reconnect_async(), and
* spdk_nvme_ctrlr_reconnect_poll_async() instead.)
*
* The controller reset context is one returned from a previous call to
* spdk_nvme_ctrlr_reset_async(). Users must call this function on the
@ -1111,6 +1115,37 @@ int spdk_nvme_ctrlr_reset_async(struct spdk_nvme_ctrlr *ctrlr,
*/
int spdk_nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx);
/**
* Disconnect the given NVMe controller.
*
* This function is used as the first operation of a full reset sequence of the given NVMe
* controller. The NVMe controller is ready to reconnect after completing this function.
*
* \param ctrlr Opaque handle to NVMe controller.
*
* \return 0 on success, -EBUSY if controller is already resetting, or -ENXIO if controller
* has been removed.
*/
int spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr);
/**
* Start re-enabling the given NVMe controller in a full reset sequence
*
* \param ctrlr Opaque handle to NVMe controller.
*/
void spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr);
/**
* Proceed with re-enabling the given NVMe controller.
*
* Users must call this function in a full reset sequence until it returns a value other
* than -EAGAIN.
*
* \return 0 if the given NVMe controller is enabled, or -EBUSY if there are still
* pending operations to enable it.
*/
int spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr);
/**
* Perform a NVMe subsystem reset.
*

View File

@ -35,7 +35,7 @@ SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 7
SO_MINOR := 0
SO_MINOR := 1
C_SRCS = nvme_ctrlr_cmd.c nvme_ctrlr.c nvme_fabric.c nvme_ns_cmd.c nvme_ns.c nvme_pcie_common.c nvme_pcie.c nvme_qpair.c nvme.c nvme_quirks.c nvme_transport.c \
nvme_ctrlr_ocssd_cmd.c nvme_ns_ocssd_cmd.c nvme_tcp.c nvme_opal.c nvme_io_msg.c nvme_poll_group.c nvme_zns.c

View File

@ -1610,8 +1610,8 @@ nvme_ctrlr_abort_queued_aborts(struct spdk_nvme_ctrlr *ctrlr)
}
}
static int
nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
int
spdk_nvme_ctrlr_disconnect(struct spdk_nvme_ctrlr *ctrlr)
{
struct spdk_nvme_qpair *qpair;
@ -1657,10 +1657,34 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
spdk_bit_array_free(&ctrlr->free_io_qids);
nvme_robust_mutex_unlock(&ctrlr->ctrlr_lock);
return 0;
}
void
spdk_nvme_ctrlr_reconnect_async(struct spdk_nvme_ctrlr *ctrlr)
{
nvme_robust_mutex_lock(&ctrlr->ctrlr_lock);
/* Set the state back to INIT to cause a full hardware reset. */
nvme_ctrlr_set_state(ctrlr, NVME_CTRLR_STATE_INIT, NVME_TIMEOUT_INFINITE);
/* Return without releasing ctrlr_lock. ctrlr_lock will be released when spdk_nvme_ctrlr_reset_poll_async() returns 0. */
/* Return without releasing ctrlr_lock. ctrlr_lock will be released when
* spdk_nvme_ctrlr_reset_poll_async() returns 0.
*/
}
static int
nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
{
int rc;
rc = spdk_nvme_ctrlr_disconnect(ctrlr);
if (rc != 0) {
return rc;
}
spdk_nvme_ctrlr_reconnect_async(ctrlr);
return 0;
}
@ -1668,8 +1692,8 @@ nvme_ctrlr_reset_pre(struct spdk_nvme_ctrlr *ctrlr)
* This function will be called when the controller is being reinitialized.
* Note: the ctrlr_lock must be held when calling this function.
*/
static int
nvme_ctrlr_reinit_on_reset(struct spdk_nvme_ctrlr *ctrlr)
int
spdk_nvme_ctrlr_reconnect_poll_async(struct spdk_nvme_ctrlr *ctrlr)
{
struct spdk_nvme_qpair *qpair;
int rc = 0, rc_tmp = 0;
@ -1742,7 +1766,7 @@ nvme_ctrlr_reset_poll_async(struct spdk_nvme_ctrlr_reset_ctx *ctrlr_reset_ctx)
{
struct spdk_nvme_ctrlr *ctrlr = ctrlr_reset_ctx->ctrlr;
return nvme_ctrlr_reinit_on_reset(ctrlr);
return spdk_nvme_ctrlr_reconnect_poll_async(ctrlr);
}
int

View File

@ -40,6 +40,9 @@
spdk_nvme_ctrlr_prepare_for_reset;
spdk_nvme_ctrlr_reset_async;
spdk_nvme_ctrlr_reset_poll_async;
spdk_nvme_ctrlr_disconnect;
spdk_nvme_ctrlr_reconnect_async;
spdk_nvme_ctrlr_reconnect_poll_async;
spdk_nvme_ctrlr_fail;
spdk_nvme_ctrlr_is_failed;
spdk_nvme_ctrlr_get_data;