bdev/nvme: Retry reconnecting ctrlr after seconds if reset failed

Previously reconnect retry was not controlled and was repeated indefinitely.

This patch adds two options, ctrlr_loss_timeout_sec and reconnect_delay_sec,
to nvme_ctrlr and add reset_start_tsc, reconnect_is_delayed, and
reconnect_delay_timer to nvme_ctrlr to control reconnect retry.

Both of ctrlr_loss_timeout_sec and reconnect_delay_sec are initialized to
zero. This means reconnect is not throttled as we did before this patch.

A few more changes are added.

Change nvme_io_path_is_failed() to return false if reset is throttled
even if nvme_ctrlr is reseting or is to be reconnected.

spdk_nvme_ctrlr_reconnect_poll_async() may continue returning -EAGAIN
infinitely. To check out such exceptional case, use ctrlr_loss_timeout_sec.

Not only ctrlr reset but also non-multipath ctrlr failover is controlled.
So we need to include path failover into ctrlr reconnect.

When the active path is removed and switched to one of the alternative paths,
if ctrlr reconnect is scheduled, connecting to the alternative path is left
to the scheduled reconnect.

If reset or reconnect ctrlr is failed and the retry is scheduled,
switch the active path to one of alternative paths.

Restore unit test cases removed in the previous patches.

Change-Id: Idec636c4eced39eb47ff4ef6fde72d6fd9fe4f85
Signed-off-by: Shuhei Matsumoto <smatsumoto@nvidia.com>
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/10128
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
Reviewed-by: Monica Kenguva <monica.kenguva@intel.com>
This commit is contained in:
Shuhei Matsumoto 2022-01-13 16:03:36 +09:00 committed by Tomasz Zawadzki
parent f85370b168
commit ae4e54fdc3
7 changed files with 845 additions and 56 deletions

View File

@ -2956,6 +2956,10 @@ multipathing. This is done by specifying the `name` parameter as an existing con
path, the hostnqn, hostsvcid, hostaddr, prchk_reftag, and prchk_guard_arguments must not be specified and are assumed
to have the same value as the existing path.
The parameters, `ctrlr_loss_timeout_sec` and `reconnect_delay_sec`, are mutually dependent.
If `reconnect_delay_sec` is non-zero, `ctrlr_loss_timeout_sec` has to be -1 or not less than `reconnect_delay_sec`.
If `reconnect_delay_sec` is zero, `ctrlr_loss_timeout_sec` has to be zero.
#### Result
Array of names of newly created bdevs.
@ -2980,6 +2984,8 @@ ddgst | Optional | bool | Enable TCP data digest
fabrics_connect_timeout_us | Optional | bool | Timeout for fabrics connect (in microseconds)
multipath | Optional | string | Multipathing behavior: disable, failover, multipath. Default is failover.
num_io_queues | Optional | uint32_t | The number of IO queues to request during initialization. Range: (0, UINT16_MAX + 1], Default is 1024.
ctrlr_loss_timeout_sec | Optional | number | Time to wait until ctrlr is reconnected before deleting ctrlr. -1 means infinite reconnects. 0 means no reconnect.
reconnect_delay_sec | Optional | number | Time to delay a reconnect trial. 0 means no reconnect.
#### Example

View File

@ -470,6 +470,8 @@ nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
{
int rc;
spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
/* First, unregister the adminq poller, as the driver will poll adminq if necessary */
spdk_poller_unregister(&nvme_ctrlr->adminq_timer_poller);
@ -735,9 +737,16 @@ nvme_io_path_is_failed(struct nvme_io_path *io_path)
return true;
}
/* TODO: Regard path as unfailed only if the reset is throttoled. */
if (nvme_ctrlr->resetting) {
return true;
if (nvme_ctrlr->reconnect_delay_sec != 0) {
return false;
} else {
return true;
}
}
if (nvme_ctrlr->reconnect_is_delayed) {
return false;
}
if (spdk_nvme_ctrlr_is_failed(nvme_ctrlr->ctrlr)) {
@ -758,7 +767,7 @@ nvme_ctrlr_is_available(struct nvme_ctrlr *nvme_ctrlr)
return false;
}
if (nvme_ctrlr->resetting) {
if (nvme_ctrlr->resetting || nvme_ctrlr->reconnect_is_delayed) {
return false;
}
@ -1285,9 +1294,29 @@ bdev_nvme_failover_trid(struct nvme_ctrlr *nvme_ctrlr, bool remove)
}
}
static bool
bdev_nvme_check_ctrlr_loss_timeout(struct nvme_ctrlr *nvme_ctrlr)
{
int32_t elapsed;
if (nvme_ctrlr->ctrlr_loss_timeout_sec == 0 ||
nvme_ctrlr->ctrlr_loss_timeout_sec == -1) {
return false;
}
elapsed = (spdk_get_ticks() - nvme_ctrlr->reset_start_tsc) / spdk_get_ticks_hz();
if (elapsed >= nvme_ctrlr->ctrlr_loss_timeout_sec) {
return true;
} else {
return false;
}
}
enum bdev_nvme_op_after_reset {
OP_NONE,
OP_COMPLETE_PENDING_DESTRUCT,
OP_DESTRUCT,
OP_DELAYED_RECONNECT,
};
typedef enum bdev_nvme_op_after_reset _bdev_nvme_op_after_reset;
@ -1298,9 +1327,60 @@ bdev_nvme_check_op_after_reset(struct nvme_ctrlr *nvme_ctrlr, bool success)
if (nvme_ctrlr_can_be_unregistered(nvme_ctrlr)) {
/* Complete pending destruct after reset completes. */
return OP_COMPLETE_PENDING_DESTRUCT;
} else if (success || nvme_ctrlr->reconnect_delay_sec == 0) {
nvme_ctrlr->reset_start_tsc = 0;
return OP_NONE;
} else if (bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
return OP_DESTRUCT;
} else {
bdev_nvme_failover_trid(nvme_ctrlr, false);
return OP_DELAYED_RECONNECT;
}
}
static int _bdev_nvme_delete(struct nvme_ctrlr *nvme_ctrlr, bool hotplug);
static void bdev_nvme_reconnect_ctrlr(struct nvme_ctrlr *nvme_ctrlr);
static int
bdev_nvme_reconnect_delay_timer_expired(void *ctx)
{
struct nvme_ctrlr *nvme_ctrlr = ctx;
pthread_mutex_lock(&nvme_ctrlr->mutex);
spdk_poller_unregister(&nvme_ctrlr->reconnect_delay_timer);
assert(nvme_ctrlr->reconnect_is_delayed == true);
nvme_ctrlr->reconnect_is_delayed = false;
if (nvme_ctrlr->destruct) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return SPDK_POLLER_BUSY;
}
return OP_NONE;
assert(nvme_ctrlr->resetting == false);
nvme_ctrlr->resetting = true;
pthread_mutex_unlock(&nvme_ctrlr->mutex);
spdk_poller_resume(nvme_ctrlr->adminq_timer_poller);
bdev_nvme_reconnect_ctrlr(nvme_ctrlr);
return SPDK_POLLER_BUSY;
}
static void
bdev_nvme_start_reconnect_delay_timer(struct nvme_ctrlr *nvme_ctrlr)
{
spdk_poller_pause(nvme_ctrlr->adminq_timer_poller);
assert(nvme_ctrlr->reconnect_is_delayed == false);
nvme_ctrlr->reconnect_is_delayed = true;
assert(nvme_ctrlr->reconnect_delay_timer == NULL);
nvme_ctrlr->reconnect_delay_timer = SPDK_POLLER_REGISTER(bdev_nvme_reconnect_delay_timer_expired,
nvme_ctrlr,
nvme_ctrlr->reconnect_delay_sec * SPDK_SEC_TO_USEC);
}
static void
@ -1345,6 +1425,13 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status)
case OP_COMPLETE_PENDING_DESTRUCT:
nvme_ctrlr_unregister(nvme_ctrlr);
break;
case OP_DESTRUCT:
_bdev_nvme_delete(nvme_ctrlr, false);
break;
case OP_DELAYED_RECONNECT:
spdk_nvme_ctrlr_disconnect(nvme_ctrlr->ctrlr);
bdev_nvme_start_reconnect_delay_timer(nvme_ctrlr);
break;
default:
break;
}
@ -1411,11 +1498,13 @@ static int
bdev_nvme_reconnect_ctrlr_poll(void *arg)
{
struct nvme_ctrlr *nvme_ctrlr = arg;
int rc;
int rc = -ETIMEDOUT;
rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
if (rc == -EAGAIN) {
return SPDK_POLLER_BUSY;
if (!bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr)) {
rc = spdk_nvme_ctrlr_reconnect_poll_async(nvme_ctrlr->ctrlr);
if (rc == -EAGAIN) {
return SPDK_POLLER_BUSY;
}
}
spdk_poller_unregister(&nvme_ctrlr->reset_detach_poller);
@ -1491,7 +1580,17 @@ bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr)
return -EBUSY;
}
if (nvme_ctrlr->reconnect_is_delayed) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
SPDK_NOTICELOG("Reconnect is already scheduled.\n");
return -EBUSY;
}
nvme_ctrlr->resetting = true;
assert(nvme_ctrlr->reset_start_tsc == 0);
nvme_ctrlr->reset_start_tsc = spdk_get_ticks();
pthread_mutex_unlock(&nvme_ctrlr->mutex);
spdk_thread_send_msg(nvme_ctrlr->thread, _bdev_nvme_reset, nvme_ctrlr);
@ -1643,6 +1742,14 @@ bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove)
bdev_nvme_failover_trid(nvme_ctrlr, remove);
if (nvme_ctrlr->reconnect_is_delayed) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
SPDK_NOTICELOG("Reconnect is already scheduled.\n");
/* We rely on the next reconnect for the failover. */
return 0;
}
nvme_ctrlr->resetting = true;
pthread_mutex_unlock(&nvme_ctrlr->mutex);
@ -3261,6 +3368,8 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
if (ctx != NULL) {
nvme_ctrlr->prchk_flags = ctx->prchk_flags;
nvme_ctrlr->ctrlr_loss_timeout_sec = ctx->ctrlr_loss_timeout_sec;
nvme_ctrlr->reconnect_delay_sec = ctx->reconnect_delay_sec;
}
nvme_ctrlr->adminq_timer_poller = SPDK_POLLER_REGISTER(bdev_nvme_poll_adminq, nvme_ctrlr,
@ -3726,6 +3835,34 @@ bdev_nvme_async_poll(void *arg)
return SPDK_POLLER_BUSY;
}
static bool
bdev_nvme_check_multipath_params(int32_t ctrlr_loss_timeout_sec,
uint32_t reconnect_delay_sec)
{
if (ctrlr_loss_timeout_sec < -1) {
SPDK_ERRLOG("ctrlr_loss_timeout_sec can't be less than -1.\n");
return false;
} else if (ctrlr_loss_timeout_sec == -1) {
if (reconnect_delay_sec == 0) {
SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
return false;
}
} else if (ctrlr_loss_timeout_sec != 0) {
if (reconnect_delay_sec == 0) {
SPDK_ERRLOG("reconnect_delay_sec can't be 0 if ctrlr_loss_timeout_sec is not 0.\n");
return false;
} else if (reconnect_delay_sec > (uint32_t)ctrlr_loss_timeout_sec) {
SPDK_ERRLOG("reconnect_delay_sec can't be more than ctrlr_loss_timeout_sec.\n");
return false;
}
} else if (reconnect_delay_sec != 0) {
SPDK_ERRLOG("reconnect_delay_sec must be 0 if ctrlr_loss_timeout_sec is 0.\n");
return false;
}
return true;
}
int
bdev_nvme_create(struct spdk_nvme_transport_id *trid,
const char *base_name,
@ -3735,7 +3872,9 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,
spdk_bdev_create_nvme_fn cb_fn,
void *cb_ctx,
struct spdk_nvme_ctrlr_opts *opts,
bool multipath)
bool multipath,
int32_t ctrlr_loss_timeout_sec,
uint32_t reconnect_delay_sec)
{
struct nvme_probe_skip_entry *entry, *tmp;
struct nvme_async_probe_ctx *ctx;
@ -3749,6 +3888,10 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,
return -EEXIST;
}
if (!bdev_nvme_check_multipath_params(ctrlr_loss_timeout_sec, reconnect_delay_sec)) {
return -EINVAL;
}
ctx = calloc(1, sizeof(*ctx));
if (!ctx) {
return -ENOMEM;
@ -3760,6 +3903,8 @@ bdev_nvme_create(struct spdk_nvme_transport_id *trid,
ctx->cb_ctx = cb_ctx;
ctx->prchk_flags = prchk_flags;
ctx->trid = *trid;
ctx->ctrlr_loss_timeout_sec = ctrlr_loss_timeout_sec;
ctx->reconnect_delay_sec = reconnect_delay_sec;
if (trid->trtype == SPDK_NVME_TRANSPORT_PCIE) {
TAILQ_FOREACH_SAFE(entry, &g_skipped_nvme_ctrlrs, tailq, tmp) {
@ -4085,7 +4230,7 @@ discovery_log_page_cb(void *cb_arg, int rc, const struct spdk_nvme_cpl *cpl,
snprintf(new_ctx->opts.hostnqn, sizeof(new_ctx->opts.hostnqn), "%s", ctx->hostnqn);
rc = bdev_nvme_create(&new_ctx->trid, new_ctx->name, NULL, 0, 0,
discovery_attach_controller_done, new_ctx,
&new_ctx->opts, true);
&new_ctx->opts, true, 0, 0);
if (rc == 0) {
TAILQ_INSERT_TAIL(&ctx->ctrlr_ctxs, new_ctx, tailq);
ctx->attach_in_progress++;
@ -5414,6 +5559,8 @@ nvme_ctrlr_config_json(struct spdk_json_write_ctx *w,
(nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) != 0);
spdk_json_write_named_bool(w, "prchk_guard",
(nvme_ctrlr->prchk_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) != 0);
spdk_json_write_named_int32(w, "ctrlr_loss_timeout_sec", nvme_ctrlr->ctrlr_loss_timeout_sec);
spdk_json_write_named_uint32(w, "reconnect_delay_sec", nvme_ctrlr->reconnect_delay_sec);
spdk_json_write_object_end(w);

View File

@ -3,6 +3,7 @@
*
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2019 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -57,6 +58,8 @@ struct nvme_async_probe_ctx {
const char **names;
uint32_t count;
uint32_t prchk_flags;
int32_t ctrlr_loss_timeout_sec;
uint32_t reconnect_delay_sec;
struct spdk_poller *poller;
struct spdk_nvme_transport_id trid;
struct spdk_nvme_ctrlr_opts opts;
@ -106,6 +109,7 @@ struct nvme_ctrlr {
int ref;
uint32_t resetting : 1;
uint32_t reconnect_is_delayed : 1;
uint32_t destruct : 1;
uint32_t ana_log_page_updating : 1;
/**
@ -127,6 +131,9 @@ struct nvme_ctrlr {
struct spdk_poller *reset_detach_poller;
struct spdk_nvme_detach_ctx *detach_ctx;
uint64_t reset_start_tsc;
struct spdk_poller *reconnect_delay_timer;
/** linked list pointer for device list */
TAILQ_ENTRY(nvme_ctrlr) tailq;
struct nvme_bdev_ctrlr *nbdev_ctrlr;
@ -139,6 +146,9 @@ struct nvme_ctrlr {
struct nvme_async_probe_ctx *probe_ctx;
uint32_t reconnect_delay_sec;
int32_t ctrlr_loss_timeout_sec;
pthread_mutex_t mutex;
};
@ -255,7 +265,9 @@ int bdev_nvme_create(struct spdk_nvme_transport_id *trid,
spdk_bdev_create_nvme_fn cb_fn,
void *cb_ctx,
struct spdk_nvme_ctrlr_opts *opts,
bool multipath);
bool multipath,
int32_t ctrlr_loss_timeout_sec,
uint32_t reconnect_delay_sec);
int bdev_nvme_start_discovery(struct spdk_nvme_transport_id *trid, const char *base_name,
struct spdk_nvme_ctrlr_opts *opts,

View File

@ -3,6 +3,7 @@
*
* Copyright (c) Intel Corporation. All rights reserved.
* Copyright (c) 2019-2021 Mellanox Technologies LTD. All rights reserved.
* Copyright (c) 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -183,6 +184,8 @@ struct rpc_bdev_nvme_attach_controller {
bool prchk_guard;
uint64_t fabrics_connect_timeout_us;
char *multipath;
int32_t ctrlr_loss_timeout_sec;
uint32_t reconnect_delay_sec;
struct spdk_nvme_ctrlr_opts opts;
};
@ -222,6 +225,8 @@ static const struct spdk_json_object_decoder rpc_bdev_nvme_attach_controller_dec
{"fabrics_connect_timeout_us", offsetof(struct rpc_bdev_nvme_attach_controller, opts.fabrics_connect_timeout_us), spdk_json_decode_uint64, true},
{"multipath", offsetof(struct rpc_bdev_nvme_attach_controller, multipath), spdk_json_decode_string, true},
{"num_io_queues", offsetof(struct rpc_bdev_nvme_attach_controller, opts.num_io_queues), spdk_json_decode_uint32, true},
{"ctrlr_loss_timeout_sec", offsetof(struct rpc_bdev_nvme_attach_controller, ctrlr_loss_timeout_sec), spdk_json_decode_int32, true},
{"reconnect_delay_sec", offsetof(struct rpc_bdev_nvme_attach_controller, reconnect_delay_sec), spdk_json_decode_uint32, true},
};
#define NVME_MAX_BDEVS_PER_RPC 128
@ -491,7 +496,8 @@ rpc_bdev_nvme_attach_controller(struct spdk_jsonrpc_request *request,
ctx->count = NVME_MAX_BDEVS_PER_RPC;
rc = bdev_nvme_create(&trid, ctx->req.name, ctx->names, ctx->count, prchk_flags,
rpc_bdev_nvme_attach_controller_done, ctx, &ctx->req.opts,
multipath);
multipath, ctx->req.ctrlr_loss_timeout_sec,
ctx->req.reconnect_delay_sec);
if (rc) {
spdk_jsonrpc_send_error_response(request, rc, spdk_strerror(-rc));
goto cleanup;

View File

@ -538,7 +538,9 @@ if __name__ == "__main__":
ddgst=args.ddgst,
fabrics_timeout=args.fabrics_timeout,
multipath=args.multipath,
num_io_queues=args.num_io_queues))
num_io_queues=args.num_io_queues,
ctrlr_loss_timeout_sec=args.ctrlr_loss_timeout_sec,
reconnect_delay_sec=args.reconnect_delay_sec))
p = subparsers.add_parser('bdev_nvme_attach_controller', aliases=['construct_nvme_bdev'],
help='Add bdevs with nvme backend')
@ -570,6 +572,20 @@ if __name__ == "__main__":
p.add_argument('--fabrics-timeout', type=int, help='Fabrics connect timeout in microseconds')
p.add_argument('-x', '--multipath', help='Set multipath behavior (disable, failover, multipath)')
p.add_argument('--num-io-queues', type=int, help='Set the number of IO queues to request during initialization.')
p.add_argument('-l', '--ctrlr-loss-timeout-sec',
help="""Time to wait until ctrlr is reconnected before deleting ctrlr.
-1 means infinite reconnect retries. 0 means no reconnect retry.
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than
reconnect_delay_sec.""",
type=int)
p.add_argument('-o', '--reconnect-delay-sec',
help="""Time to delay a reconnect retry.
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_delay_sec has to be non-zero and
less than ctrlr_loss_timeout_sec.""",
type=int)
p.set_defaults(func=bdev_nvme_attach_controller)
def bdev_nvme_get_controllers(args):

View File

@ -523,7 +523,8 @@ def bdev_nvme_set_hotplug(client, enable, period_us=None):
def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvcid=None,
priority=None, subnqn=None, hostnqn=None, hostaddr=None,
hostsvcid=None, prchk_reftag=None, prchk_guard=None,
hdgst=None, ddgst=None, fabrics_timeout=None, multipath=None, num_io_queues=None):
hdgst=None, ddgst=None, fabrics_timeout=None, multipath=None, num_io_queues=None,
ctrlr_loss_timeout_sec=None, reconnect_delay_sec=None):
"""Construct block device for each NVMe namespace in the attached controller.
Args:
@ -544,6 +545,16 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc
fabrics_timeout: Fabrics connect timeout in us (optional)
multipath: The behavior when multiple paths are created ("disable", "failover", or "multipath"; failover if not specified)
num_io_queues: The number of IO queues to request during initialization. (optional)
ctrlr_loss_timeout_sec: Time to wait until ctrlr is reconnected before deleting ctrlr.
-1 means infinite reconnect retries. 0 means no reconnect retry.
If reconnect_delay_sec is zero, ctrlr_loss_timeout_sec has to be zero.
If reconnect_delay_sec is non-zero, ctrlr_loss_timeout_sec has to be -1 or not less than reconnect_delay_sec.
(optional)
reconnect_delay_sec: Time to delay a reconnect retry.
If ctrlr_loss_timeout_sec is zero, reconnect_delay_sec has to be zero.
If ctrlr_loss_timeout_sec is -1, reconnect_delay_sec has to be non-zero.
If ctrlr_loss_timeout_sec is not -1 or zero, reconnect_sec has to be non-zero and less than ctrlr_loss_timeout_sec.
(optional)
Returns:
Names of created block devices.
@ -594,6 +605,12 @@ def bdev_nvme_attach_controller(client, name, trtype, traddr, adrfam=None, trsvc
if num_io_queues:
params['num_io_queues'] = num_io_queues
if ctrlr_loss_timeout_sec is not None:
params['ctrlr_loss_timeout_sec'] = ctrlr_loss_timeout_sec
if reconnect_delay_sec is not None:
params['reconnect_delay_sec'] = reconnect_delay_sec
return client.call('bdev_nvme_attach_controller', params)

View File

@ -3,7 +3,7 @@
*
* Copyright (c) Intel Corporation.
* All rights reserved.
* Copyright (c) 2021 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
* Copyright (c) 2021, 2022 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@ -1670,7 +1670,7 @@ test_pending_reset(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -1801,7 +1801,7 @@ test_attach_ctrlr(void)
g_ut_attach_bdev_count = 0;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -1816,7 +1816,7 @@ test_attach_ctrlr(void)
g_ut_attach_ctrlr_status = 0;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -1844,7 +1844,7 @@ test_attach_ctrlr(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -1880,7 +1880,7 @@ test_attach_ctrlr(void)
g_ut_attach_bdev_count = 0;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -1934,7 +1934,7 @@ test_aer_cb(void)
g_ut_attach_bdev_count = 3;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2140,7 +2140,7 @@ test_submit_nvme_cmd(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2232,7 +2232,7 @@ test_add_remove_trid(void)
SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL);
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2247,7 +2247,7 @@ test_add_remove_trid(void)
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2277,7 +2277,7 @@ test_add_remove_trid(void)
SPDK_CU_ASSERT_FATAL(ctrlr3 != NULL);
rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2324,7 +2324,7 @@ test_add_remove_trid(void)
SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL);
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2339,7 +2339,7 @@ test_add_remove_trid(void)
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2397,7 +2397,7 @@ test_abort(void)
set_thread(1);
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, -1, 1);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2522,6 +2522,37 @@ test_abort(void)
set_thread(0);
/* If qpair is disconnected, it is freed and then reconnected via resetting
* the corresponding nvme_ctrlr. I/O should be queued if it is submitted
* while resetting the nvme_ctrlr.
*/
ctrlr_ch1->qpair->is_connected = false;
poll_thread_times(0, 3);
CU_ASSERT(ctrlr_ch1->qpair == NULL);
CU_ASSERT(nvme_ctrlr->resetting == true);
write_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch1, write_io);
CU_ASSERT(write_io->internal.in_submit_request == true);
CU_ASSERT(write_io == TAILQ_FIRST(&nbdev_ch1->retry_io_list));
/* Aborting the queued write request should succeed immediately. */
abort_io->internal.ch = (struct spdk_bdev_channel *)ch1;
abort_io->u.abort.bio_to_abort = write_io;
abort_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch1, abort_io);
CU_ASSERT(abort_io->internal.in_submit_request == false);
CU_ASSERT(abort_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
CU_ASSERT(write_io->internal.in_submit_request == false);
CU_ASSERT(write_io->internal.status == SPDK_BDEV_IO_STATUS_ABORTED);
spdk_put_io_channel(ch1);
set_thread(1);
@ -2615,7 +2646,7 @@ test_bdev_unregister(void)
g_ut_attach_bdev_count = 2;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2731,7 +2762,7 @@ test_init_ana_log_page(void)
g_ut_attach_bdev_count = 5;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2832,7 +2863,7 @@ test_reconnect_qpair(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -2964,7 +2995,8 @@ test_create_bdev_ctrlr(void)
g_ut_attach_bdev_count = 0;
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
@ -2985,7 +3017,7 @@ test_create_bdev_ctrlr(void)
ctrlr2->cdata.cntlid = ctrlr1->cdata.cntlid;
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3003,7 +3035,7 @@ test_create_bdev_ctrlr(void)
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3036,7 +3068,7 @@ test_create_bdev_ctrlr(void)
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3046,7 +3078,7 @@ test_create_bdev_ctrlr(void)
poll_threads();
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3136,7 +3168,7 @@ test_add_multi_ns_to_bdev(void)
g_ut_attach_bdev_count = 3;
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3162,7 +3194,7 @@ test_add_multi_ns_to_bdev(void)
g_ut_attach_bdev_count = 2;
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3239,7 +3271,7 @@ test_add_multi_ns_to_bdev(void)
memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, 32, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3256,7 +3288,7 @@ test_add_multi_ns_to_bdev(void)
memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, 32, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3342,7 +3374,7 @@ test_add_multi_io_paths_to_nbdev_ch(void)
memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3357,7 +3389,7 @@ test_add_multi_io_paths_to_nbdev_ch(void)
memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3405,7 +3437,7 @@ test_add_multi_io_paths_to_nbdev_ch(void)
memset(&ctrlr3->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path3.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3484,7 +3516,7 @@ test_admin_path(void)
memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3499,7 +3531,7 @@ test_admin_path(void)
memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3613,7 +3645,7 @@ test_reset_bdev_ctrlr(void)
SPDK_CU_ASSERT_FATAL(ctrlr1 != NULL);
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3626,7 +3658,7 @@ test_reset_bdev_ctrlr(void)
SPDK_CU_ASSERT_FATAL(ctrlr2 != NULL);
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -3915,7 +3947,7 @@ test_retry_io_if_ana_state_is_updating(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, -1, 1);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4042,7 +4074,7 @@ test_retry_io_for_io_path_error(void)
memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4137,7 +4169,7 @@ test_retry_io_for_io_path_error(void)
memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4245,7 +4277,7 @@ test_retry_io_count(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4431,7 +4463,7 @@ test_concurrent_read_ana_log_page(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4526,7 +4558,7 @@ test_retry_io_for_ana_error(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4672,7 +4704,7 @@ test_retry_admin_passthru_for_path_error(void)
memset(&ctrlr1->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path1.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4754,7 +4786,7 @@ test_retry_admin_passthru_for_path_error(void)
memset(&ctrlr2->ns[0].uuid, 1, sizeof(struct spdk_uuid));
rc = bdev_nvme_create(&path2.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, true);
attach_ctrlr_done, NULL, NULL, true, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4846,7 +4878,7 @@ test_retry_admin_passthru_by_count(void)
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
attach_ctrlr_done, NULL, NULL, false, 0, 0);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
@ -4938,6 +4970,554 @@ test_retry_admin_passthru_by_count(void)
g_opts.bdev_retry_count = 0;
}
static void
test_check_multipath_params(void)
{
/* 1st parameter is ctrlr_loss_timeout_sec and 2nd parameter is reconnect_delay_sec. */
CU_ASSERT(bdev_nvme_check_multipath_params(-2, 1) == false);
CU_ASSERT(bdev_nvme_check_multipath_params(-1, 0) == false);
CU_ASSERT(bdev_nvme_check_multipath_params(1, 0) == false);
CU_ASSERT(bdev_nvme_check_multipath_params(1, 2) == false);
CU_ASSERT(bdev_nvme_check_multipath_params(0, 1) == false);
CU_ASSERT(bdev_nvme_check_multipath_params(-1, 1) == true);
CU_ASSERT(bdev_nvme_check_multipath_params(2, 2) == true);
CU_ASSERT(bdev_nvme_check_multipath_params(2, 1) == true);
CU_ASSERT(bdev_nvme_check_multipath_params(INT32_MAX, INT32_MAX) == true);
CU_ASSERT(bdev_nvme_check_multipath_params(-1, UINT32_MAX) == true);
}
static void
test_retry_io_if_ctrlr_is_resetting(void)
{
struct nvme_path_id path = {};
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_bdev_ctrlr *nbdev_ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
const int STRING_SIZE = 32;
const char *attached_names[STRING_SIZE];
struct nvme_bdev *bdev;
struct nvme_ns *nvme_ns;
struct spdk_bdev_io *bdev_io1, *bdev_io2;
struct spdk_io_channel *ch;
struct nvme_bdev_channel *nbdev_ch;
struct nvme_io_path *io_path;
struct nvme_ctrlr_channel *ctrlr_ch;
int rc;
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&path.trid);
set_thread(0);
ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false, -1, 1);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
CU_ASSERT(nvme_ctrlr != NULL);
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
CU_ASSERT(bdev != NULL);
nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr);
CU_ASSERT(nvme_ns != NULL);
bdev_io1 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL);
ut_bdev_io_set_buf(bdev_io1);
bdev_io2 = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_WRITE, bdev, NULL);
ut_bdev_io_set_buf(bdev_io2);
ch = spdk_get_io_channel(bdev);
SPDK_CU_ASSERT_FATAL(ch != NULL);
nbdev_ch = spdk_io_channel_get_ctx(ch);
io_path = ut_get_io_path_by_ctrlr(nbdev_ch, nvme_ctrlr);
SPDK_CU_ASSERT_FATAL(io_path != NULL);
ctrlr_ch = io_path->ctrlr_ch;
SPDK_CU_ASSERT_FATAL(ctrlr_ch != NULL);
SPDK_CU_ASSERT_FATAL(ctrlr_ch->qpair != NULL);
bdev_io1->internal.ch = (struct spdk_bdev_channel *)ch;
bdev_io2->internal.ch = (struct spdk_bdev_channel *)ch;
/* If qpair is connected, I/O should succeed. */
bdev_io1->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, bdev_io1);
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
poll_threads();
CU_ASSERT(bdev_io1->internal.in_submit_request == false);
CU_ASSERT(bdev_io1->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
/* If qpair is disconnected, it is freed and then reconnected via resetting
* the corresponding nvme_ctrlr. I/O should be queued if it is submitted
* while resetting the nvme_ctrlr.
*/
ctrlr_ch->qpair->is_connected = false;
ctrlr->is_failed = true;
poll_thread_times(0, 4);
CU_ASSERT(ctrlr_ch->qpair == NULL);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr->is_failed == false);
bdev_io1->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, bdev_io1);
spdk_delay_us(1);
bdev_io2->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, bdev_io2);
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
CU_ASSERT(bdev_io1 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
CU_ASSERT(bdev_io2 == TAILQ_NEXT(bdev_io1, module_link));
poll_threads();
CU_ASSERT(ctrlr_ch->qpair != NULL);
CU_ASSERT(nvme_ctrlr->resetting == false);
spdk_delay_us(999999);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1);
CU_ASSERT(bdev_io1->internal.in_submit_request == true);
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
poll_threads();
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
CU_ASSERT(bdev_io1->internal.in_submit_request == false);
CU_ASSERT(bdev_io1->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
CU_ASSERT(bdev_io2 == TAILQ_FIRST(&nbdev_ch->retry_io_list));
spdk_delay_us(1);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 1);
CU_ASSERT(bdev_io2->internal.in_submit_request == true);
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
poll_threads();
CU_ASSERT(ctrlr_ch->qpair->num_outstanding_reqs == 0);
CU_ASSERT(bdev_io2->internal.in_submit_request == false);
CU_ASSERT(bdev_io2->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
free(bdev_io1);
free(bdev_io2);
spdk_put_io_channel(ch);
poll_threads();
rc = bdev_nvme_delete("nvme0", &g_any_path);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
}
static void
test_retry_admin_passthru_if_ctrlr_is_resetting(void)
{
struct nvme_path_id path = {};
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_bdev_ctrlr *nbdev_ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
const int STRING_SIZE = 32;
const char *attached_names[STRING_SIZE];
struct nvme_bdev *bdev;
struct spdk_bdev_io *admin_io;
struct spdk_io_channel *ch;
struct nvme_bdev_channel *nbdev_ch;
int rc;
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&path.trid);
g_opts.bdev_retry_count = 1;
set_thread(0);
ctrlr = ut_attach_ctrlr(&path.trid, 1, false, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&path.trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false, -1, 1);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
nbdev_ctrlr = nvme_bdev_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nbdev_ctrlr != NULL);
nvme_ctrlr = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path.trid);
CU_ASSERT(nvme_ctrlr != NULL);
bdev = nvme_bdev_ctrlr_get_bdev(nbdev_ctrlr, 1);
CU_ASSERT(bdev != NULL);
admin_io = ut_alloc_bdev_io(SPDK_BDEV_IO_TYPE_NVME_ADMIN, bdev, NULL);
admin_io->u.nvme_passthru.cmd.opc = SPDK_NVME_OPC_GET_FEATURES;
ch = spdk_get_io_channel(bdev);
SPDK_CU_ASSERT_FATAL(ch != NULL);
nbdev_ch = spdk_io_channel_get_ctx(ch);
admin_io->internal.ch = (struct spdk_bdev_channel *)ch;
/* If ctrlr is available, admin passthrough should succeed. */
admin_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, admin_io);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
CU_ASSERT(admin_io->internal.in_submit_request == true);
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(admin_io->internal.in_submit_request == false);
CU_ASSERT(admin_io->internal.status = SPDK_BDEV_IO_STATUS_SUCCESS);
/* If ctrlr is resetting, admin passthrough request should be queued
* if it is submitted while resetting ctrlr.
*/
bdev_nvme_reset(nvme_ctrlr);
poll_thread_times(0, 1);
admin_io->internal.in_submit_request = true;
bdev_nvme_submit_request(ch, admin_io);
CU_ASSERT(admin_io->internal.in_submit_request == true);
CU_ASSERT(admin_io == TAILQ_FIRST(&nbdev_ch->retry_io_list));
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
spdk_delay_us(1000000);
poll_thread_times(0, 1);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
CU_ASSERT(admin_io->internal.in_submit_request == true);
CU_ASSERT(TAILQ_EMPTY(&nbdev_ch->retry_io_list));
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
CU_ASSERT(admin_io->internal.in_submit_request == false);
CU_ASSERT(admin_io->internal.status == SPDK_BDEV_IO_STATUS_SUCCESS);
free(admin_io);
spdk_put_io_channel(ch);
poll_threads();
rc = bdev_nvme_delete("nvme0", &g_any_path);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_bdev_ctrlr_get_by_name("nvme0") == NULL);
g_opts.bdev_retry_count = 0;
}
static void
test_reconnect_ctrlr(void)
{
struct spdk_nvme_transport_id trid = {};
struct spdk_nvme_ctrlr ctrlr = {};
struct nvme_ctrlr *nvme_ctrlr;
struct spdk_io_channel *ch1, *ch2;
struct nvme_ctrlr_channel *ctrlr_ch1, *ctrlr_ch2;
int rc;
ut_init_trid(&trid);
TAILQ_INIT(&ctrlr.active_io_qpairs);
set_thread(0);
rc = nvme_ctrlr_create(&ctrlr, "nvme0", &trid, NULL);
CU_ASSERT(rc == 0);
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
nvme_ctrlr->ctrlr_loss_timeout_sec = 2;
nvme_ctrlr->reconnect_delay_sec = 1;
ch1 = spdk_get_io_channel(nvme_ctrlr);
SPDK_CU_ASSERT_FATAL(ch1 != NULL);
ctrlr_ch1 = spdk_io_channel_get_ctx(ch1);
CU_ASSERT(ctrlr_ch1->qpair != NULL);
set_thread(1);
ch2 = spdk_get_io_channel(nvme_ctrlr);
SPDK_CU_ASSERT_FATAL(ch2 != NULL);
ctrlr_ch2 = spdk_io_channel_get_ctx(ch2);
/* Reset starts from thread 1. */
set_thread(1);
/* The reset should fail and a reconnect timer should be registered. */
ctrlr.fail_reset = true;
ctrlr.is_failed = true;
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr.is_failed == true);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
/* Then a reconnect retry should suceeed. */
ctrlr.fail_reset = false;
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_thread_times(0, 1);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr_ch1->qpair != NULL);
CU_ASSERT(ctrlr_ch2->qpair != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
/* The reset should fail and a reconnect timer should be registered. */
ctrlr.fail_reset = true;
ctrlr.is_failed = true;
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(ctrlr.is_failed == true);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
/* Then a reconnect retry should still fail. */
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_thread_times(0, 1);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch1->qpair == NULL);
CU_ASSERT(ctrlr_ch2->qpair == NULL);
CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == false);
/* Then a reconnect retry should still fail and the ctrlr should be deleted. */
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_threads();
CU_ASSERT(nvme_ctrlr == nvme_ctrlr_get_by_name("nvme0"));
CU_ASSERT(bdev_nvme_check_ctrlr_loss_timeout(nvme_ctrlr) == true);
CU_ASSERT(nvme_ctrlr->destruct == true);
spdk_put_io_channel(ch2);
set_thread(0);
spdk_put_io_channel(ch1);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
}
static struct nvme_path_id *
ut_get_path_id_by_trid(struct nvme_ctrlr *nvme_ctrlr,
const struct spdk_nvme_transport_id *trid)
{
struct nvme_path_id *p;
TAILQ_FOREACH(p, &nvme_ctrlr->trids, link) {
if (spdk_nvme_transport_id_compare(&p->trid, trid) == 0) {
break;
}
}
return p;
}
static void
test_retry_failover_ctrlr(void)
{
struct spdk_nvme_transport_id trid1 = {}, trid2 = {}, trid3 = {};
struct spdk_nvme_ctrlr ctrlr = {};
struct nvme_ctrlr *nvme_ctrlr = NULL;
struct nvme_path_id *path_id1, *path_id2, *path_id3;
struct spdk_io_channel *ch;
struct nvme_ctrlr_channel *ctrlr_ch;
int rc;
ut_init_trid(&trid1);
ut_init_trid2(&trid2);
ut_init_trid3(&trid3);
TAILQ_INIT(&ctrlr.active_io_qpairs);
set_thread(0);
rc = nvme_ctrlr_create(&ctrlr, "nvme0", &trid1, NULL);
CU_ASSERT(rc == 0);
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
nvme_ctrlr->ctrlr_loss_timeout_sec = -1;
nvme_ctrlr->reconnect_delay_sec = 1;
rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, &ctrlr, &trid2);
CU_ASSERT(rc == 0);
rc = bdev_nvme_add_secondary_trid(nvme_ctrlr, &ctrlr, &trid3);
CU_ASSERT(rc == 0);
ch = spdk_get_io_channel(nvme_ctrlr);
SPDK_CU_ASSERT_FATAL(ch != NULL);
ctrlr_ch = spdk_io_channel_get_ctx(ch);
path_id1 = ut_get_path_id_by_trid(nvme_ctrlr, &trid1);
SPDK_CU_ASSERT_FATAL(path_id1 != NULL);
CU_ASSERT(path_id1->is_failed == false);
CU_ASSERT(path_id1 == nvme_ctrlr->active_path_id);
/* If reset failed and reconnect is scheduled, path_id is switched from trid1 to trid2. */
ctrlr.fail_reset = true;
ctrlr.is_failed = true;
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
poll_threads();
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr.is_failed == false);
CU_ASSERT(ctrlr_ch->qpair == NULL);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == true);
CU_ASSERT(path_id1->is_failed == true);
path_id2 = ut_get_path_id_by_trid(nvme_ctrlr, &trid2);
SPDK_CU_ASSERT_FATAL(path_id2 != NULL);
CU_ASSERT(path_id2->is_failed == false);
CU_ASSERT(path_id2 == nvme_ctrlr->active_path_id);
/* If we remove trid2 while reconnect is scheduled, trid2 is removed and path_id is
* switched to trid3 but reset is not started.
*/
rc = bdev_nvme_failover(nvme_ctrlr, true);
CU_ASSERT(rc == 0);
CU_ASSERT(ut_get_path_id_by_trid(nvme_ctrlr, &trid2) == NULL);
path_id3 = ut_get_path_id_by_trid(nvme_ctrlr, &trid3);
SPDK_CU_ASSERT_FATAL(path_id3 != NULL);
CU_ASSERT(path_id3->is_failed == false);
CU_ASSERT(path_id3 == nvme_ctrlr->active_path_id);
CU_ASSERT(nvme_ctrlr->resetting == false);
/* If reconnect succeeds, trid3 should be the active path_id */
ctrlr.fail_reset = false;
spdk_delay_us(SPDK_SEC_TO_USEC);
poll_thread_times(0, 1);
CU_ASSERT(nvme_ctrlr->resetting == true);
CU_ASSERT(nvme_ctrlr->reconnect_delay_timer == NULL);
poll_threads();
CU_ASSERT(path_id3->is_failed == false);
CU_ASSERT(path_id3 == nvme_ctrlr->active_path_id);
CU_ASSERT(nvme_ctrlr->resetting == false);
CU_ASSERT(ctrlr_ch->qpair != NULL);
CU_ASSERT(nvme_ctrlr->reconnect_is_delayed == false);
spdk_put_io_channel(ch);
poll_threads();
rc = bdev_nvme_delete("nvme0", &g_any_path);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
}
int
main(int argc, const char **argv)
{
@ -4979,6 +5559,11 @@ main(int argc, const char **argv)
CU_ADD_TEST(suite, test_retry_io_for_ana_error);
CU_ADD_TEST(suite, test_retry_admin_passthru_for_path_error);
CU_ADD_TEST(suite, test_retry_admin_passthru_by_count);
CU_ADD_TEST(suite, test_check_multipath_params);
CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting);
CU_ADD_TEST(suite, test_retry_admin_passthru_if_ctrlr_is_resetting);
CU_ADD_TEST(suite, test_reconnect_ctrlr);
CU_ADD_TEST(suite, test_retry_failover_ctrlr);
CU_basic_set_mode(CU_BRM_VERBOSE);