bdev/nvme: Protect ANA log page from concurrent reads by using an new flag

If an I/O failed by ANA error, the corresponding ANA state might be
out of date. In the following patches, for this case, read the latest
ANA log page and update the ANA state. Such reading ANA log page may be
done on multiple threads concurrently including AER ANA change.
Hence protect ANA log page by adding an new flag ana_log_page_updating
to struct nvme_ctrlr and using it.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: I8bb84091d50a5fdc0d9893b585be972dfd31c0f1
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9526
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Shuhei Matsumoto 2021-09-16 14:24:37 +09:00 committed by Tomasz Zawadzki
parent 1e79d21967
commit f3fec96c20
3 changed files with 112 additions and 19 deletions

View File

@ -532,7 +532,7 @@ nvme_ctrlr_release(struct nvme_ctrlr *nvme_ctrlr)
nvme_ctrlr->ref--;
if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct ||
nvme_ctrlr->resetting) {
nvme_ctrlr->resetting || nvme_ctrlr->ana_log_page_updating) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return;
}
@ -1149,7 +1149,8 @@ _bdev_nvme_reset_complete(struct spdk_io_channel_iter *i, int status)
path_id->is_failed = !success;
if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct) {
if (nvme_ctrlr->ref == 0 && nvme_ctrlr->destruct &&
!nvme_ctrlr->ana_log_page_updating) {
/* Complete pending destruct after reset completes. */
complete_pending_destruct = true;
}
@ -2716,19 +2717,6 @@ nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr)
}
}
static bool
nvme_ctrlr_acquire(struct nvme_ctrlr *nvme_ctrlr)
{
pthread_mutex_lock(&nvme_ctrlr->mutex);
if (nvme_ctrlr->destruct || nvme_ctrlr->resetting) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return false;
}
nvme_ctrlr->ref++;
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return true;
}
static int
nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc,
void *cb_arg)
@ -2763,12 +2751,25 @@ nvme_ctrlr_read_ana_log_page_done(void *ctx, const struct spdk_nvme_cpl *cpl)
{
struct nvme_ctrlr *nvme_ctrlr = ctx;
if (spdk_nvme_cpl_is_success(cpl)) {
if (cpl != NULL && spdk_nvme_cpl_is_success(cpl)) {
bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ctrlr_set_ana_states,
nvme_ctrlr);
}
nvme_ctrlr_release(nvme_ctrlr);
pthread_mutex_lock(&nvme_ctrlr->mutex);
assert(nvme_ctrlr->ana_log_page_updating == true);
nvme_ctrlr->ana_log_page_updating = false;
if (nvme_ctrlr->ref > 0 || !nvme_ctrlr->destruct ||
nvme_ctrlr->resetting) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return;
}
pthread_mutex_unlock(&nvme_ctrlr->mutex);
nvme_ctrlr_unregister(nvme_ctrlr);
}
static void
@ -2780,10 +2781,16 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
return;
}
if (!nvme_ctrlr_acquire(nvme_ctrlr)) {
pthread_mutex_lock(&nvme_ctrlr->mutex);
if (nvme_ctrlr->destruct || nvme_ctrlr->resetting ||
nvme_ctrlr->ana_log_page_updating) {
pthread_mutex_unlock(&nvme_ctrlr->mutex);
return;
}
nvme_ctrlr->ana_log_page_updating = true;
pthread_mutex_unlock(&nvme_ctrlr->mutex);
rc = spdk_nvme_ctrlr_cmd_get_log_page(nvme_ctrlr->ctrlr,
SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
SPDK_NVME_GLOBAL_NS_TAG,
@ -2792,7 +2799,7 @@ nvme_ctrlr_read_ana_log_page(struct nvme_ctrlr *nvme_ctrlr)
nvme_ctrlr_read_ana_log_page_done,
nvme_ctrlr);
if (rc != 0) {
nvme_ctrlr_release(nvme_ctrlr);
nvme_ctrlr_read_ana_log_page_done(nvme_ctrlr, NULL);
}
}

View File

@ -104,6 +104,7 @@ struct nvme_ctrlr {
uint32_t resetting : 1;
uint32_t failover_in_progress : 1;
uint32_t destruct : 1;
uint32_t ana_log_page_updating : 1;
/**
* PI check flags. This flags is set to NVMe controllers created only
* through bdev_nvme_attach_controller RPC or .INI config file. Hot added

View File

@ -4392,6 +4392,90 @@ test_retry_io_count(void)
g_opts.bdev_retry_count = 0;
}
static void
test_concurrent_read_ana_log_page(void)
{
struct spdk_nvme_transport_id trid = {};
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
const int STRING_SIZE = 32;
const char *attached_names[STRING_SIZE];
int rc;
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&trid);
set_thread(0);
ctrlr = ut_attach_ctrlr(&trid, 1, true, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
ctrlr->ns[0].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
g_ut_attach_ctrlr_status = 0;
g_ut_attach_bdev_count = 1;
rc = bdev_nvme_create(&trid, "nvme0", attached_names, STRING_SIZE, 0,
attach_ctrlr_done, NULL, NULL, false);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
CU_ASSERT(nvme_ctrlr->ana_log_page_updating == true);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
/* Following read request should be rejected. */
nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
set_thread(1);
nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 1);
/* Reset request while reading ANA log page should not be rejected. */
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(g_opts.nvme_adminq_poll_period_us);
poll_threads();
CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);
CU_ASSERT(ctrlr->adminq.num_outstanding_reqs == 0);
/* Read ANA log page while resetting ctrlr should be rejected. */
rc = bdev_nvme_reset(nvme_ctrlr);
CU_ASSERT(rc == 0);
nvme_ctrlr_read_ana_log_page(nvme_ctrlr);
CU_ASSERT(nvme_ctrlr->ana_log_page_updating == false);
set_thread(0);
rc = bdev_nvme_delete("nvme0", &g_any_path);
CU_ASSERT(rc == 0);
poll_threads();
spdk_delay_us(1000);
poll_threads();
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
}
int
main(int argc, const char **argv)
{
@ -4428,6 +4512,7 @@ main(int argc, const char **argv)
CU_ADD_TEST(suite, test_retry_io_if_ctrlr_is_resetting);
CU_ADD_TEST(suite, test_retry_io_for_io_path_error);
CU_ADD_TEST(suite, test_retry_io_count);
CU_ADD_TEST(suite, test_concurrent_read_ana_log_page);
CU_basic_set_mode(CU_BRM_VERBOSE);