From c93b5564c806b0e577834a212bc7ae06d99e6bf5 Mon Sep 17 00:00:00 2001 From: Ben Walker Date: Wed, 25 Aug 2021 09:58:16 -0700 Subject: [PATCH] bdev/nvme: Use an RB_TREE to hold namespaces in the controller If NN is very large this saves a lot of memory. This lookup is not generally used in the I/O path anyway. Change-Id: I98e190006843ad5d0bac8483bf9feb800d4a665a Signed-off-by: Ben Walker Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9884 Community-CI: Broadcom CI Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: GangCao Reviewed-by: Aleksey Marchuk --- module/bdev/nvme/bdev_nvme.c | 90 ++++++------------- module/bdev/nvme/bdev_nvme.h | 5 +- module/bdev/nvme/vbdev_opal.c | 10 ++- .../lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c | 7 -- 4 files changed, 38 insertions(+), 74 deletions(-) diff --git a/module/bdev/nvme/bdev_nvme.c b/module/bdev/nvme/bdev_nvme.c index 1b068cdeee..71ffc35d52 100644 --- a/module/bdev/nvme/bdev_nvme.c +++ b/module/bdev/nvme/bdev_nvme.c @@ -192,6 +192,14 @@ static int bdev_nvme_reset(struct nvme_ctrlr *nvme_ctrlr); static int bdev_nvme_failover(struct nvme_ctrlr *nvme_ctrlr, bool remove); static void remove_cb(void *cb_ctx, struct spdk_nvme_ctrlr *ctrlr); +static int +nvme_ns_cmp(struct nvme_ns *ns1, struct nvme_ns *ns2) +{ + return ns1->id - ns2->id; +} + +RB_GENERATE_STATIC(nvme_ns_tree, nvme_ns, node, nvme_ns_cmp); + struct spdk_nvme_qpair * bdev_nvme_get_io_qpair(struct spdk_io_channel *ctrlr_io_ch) { @@ -273,48 +281,28 @@ nvme_bdev_ctrlr_get_bdev(struct nvme_bdev_ctrlr *nbdev_ctrlr, uint32_t nsid) struct nvme_ns * nvme_ctrlr_get_ns(struct nvme_ctrlr *nvme_ctrlr, uint32_t nsid) { - assert(nsid > 0); - assert(nsid <= nvme_ctrlr->num_ns); - if (nsid == 0 || nsid > nvme_ctrlr->num_ns) { - return NULL; - } + struct nvme_ns ns; - return nvme_ctrlr->namespaces[nsid - 1]; + assert(nsid > 0); + + ns.id = nsid; + return RB_FIND(nvme_ns_tree, &nvme_ctrlr->namespaces, &ns); } struct nvme_ns * nvme_ctrlr_get_first_active_ns(struct nvme_ctrlr *nvme_ctrlr) { - uint32_t i; - - for (i = 0; i < nvme_ctrlr->num_ns; i++) { - if (nvme_ctrlr->namespaces[i] != NULL) { - return nvme_ctrlr->namespaces[i]; - } - } - - return NULL; + return RB_MIN(nvme_ns_tree, &nvme_ctrlr->namespaces); } struct nvme_ns * nvme_ctrlr_get_next_active_ns(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *ns) { - uint32_t i; - if (ns == NULL) { return NULL; } - /* ns->id is a 1's based value and we want to start at the next - * entry in this array, so we start at ns->id and don't subtract to - * convert to 0's based. */ - for (i = ns->id; i < nvme_ctrlr->num_ns; i++) { - if (nvme_ctrlr->namespaces[i] != NULL) { - return nvme_ctrlr->namespaces[i]; - } - } - - return NULL; + return RB_NEXT(nvme_ns_tree, &nvme_ctrlr->namespaces, ns); } static struct nvme_ctrlr * @@ -425,7 +413,7 @@ static void _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr) { struct nvme_path_id *path_id, *tmp_path; - uint32_t i; + struct nvme_ns *ns, *tmp_ns; free(nvme_ctrlr->copied_ana_desc); spdk_free(nvme_ctrlr->ana_log_page); @@ -439,8 +427,9 @@ _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr) nvme_bdev_ctrlr_delete(nvme_ctrlr->nbdev_ctrlr, nvme_ctrlr); } - for (i = 0; i < nvme_ctrlr->num_ns; i++) { - free(nvme_ctrlr->namespaces[i]); + RB_FOREACH_SAFE(ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp_ns) { + RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, ns); + free(ns); } TAILQ_FOREACH_SAFE(path_id, &nvme_ctrlr->trids, link, tmp_path) { @@ -450,7 +439,6 @@ _nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr) pthread_mutex_destroy(&nvme_ctrlr->mutex); - free(nvme_ctrlr->namespaces); free(nvme_ctrlr); pthread_mutex_lock(&g_bdev_nvme_mutex); @@ -2229,7 +2217,7 @@ nvme_ctrlr_populate_namespace_done(struct nvme_ns *nvme_ns, int rc) nvme_ctrlr->ref++; pthread_mutex_unlock(&nvme_ctrlr->mutex); } else { - nvme_ctrlr->namespaces[nvme_ns->id - 1] = NULL; + RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns); free(nvme_ns); } @@ -2372,7 +2360,7 @@ nvme_ctrlr_depopulate_namespace_done(struct nvme_ns *nvme_ns) pthread_mutex_lock(&nvme_ctrlr->mutex); - nvme_ctrlr->namespaces[nvme_ns->id - 1] = NULL; + RB_REMOVE(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns); if (nvme_ns->bdev != NULL) { pthread_mutex_unlock(&nvme_ctrlr->mutex); @@ -2497,8 +2485,6 @@ nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, continue; } - nvme_ctrlr->namespaces[nsid - 1] = nvme_ns; - nvme_ns->id = nsid; nvme_ns->ctrlr = nvme_ctrlr; @@ -2509,6 +2495,8 @@ nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, } nvme_ns->probe_ctx = ctx; + RB_INSERT(nvme_ns_tree, &nvme_ctrlr->namespaces, nvme_ns); + nvme_ctrlr_populate_namespace(nvme_ctrlr, nvme_ns); } @@ -2532,17 +2520,10 @@ nvme_ctrlr_populate_namespaces(struct nvme_ctrlr *nvme_ctrlr, static void nvme_ctrlr_depopulate_namespaces(struct nvme_ctrlr *nvme_ctrlr) { - uint32_t i; - struct nvme_ns *nvme_ns; + struct nvme_ns *nvme_ns, *tmp; - for (i = 0; i < nvme_ctrlr->num_ns; i++) { - uint32_t nsid = i + 1; - - nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); - if (nvme_ns != NULL) { - assert(nvme_ns->id == nsid); - nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns); - } + RB_FOREACH_SAFE(nvme_ns, nvme_ns_tree, &nvme_ctrlr->namespaces, tmp) { + nvme_ctrlr_depopulate_namespace(nvme_ctrlr, nvme_ns); } } @@ -2569,7 +2550,7 @@ nvme_ctrlr_set_ana_states(const struct spdk_nvme_ana_group_descriptor *desc, for (i = 0; i < desc->num_of_nsid; i++) { nsid = desc->nsid[i]; - if (nsid == 0 || nsid > nvme_ctrlr->num_ns) { + if (nsid == 0) { continue; } @@ -2823,7 +2804,6 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, { struct nvme_ctrlr *nvme_ctrlr; struct nvme_path_id *path_id; - uint32_t num_ns; const struct spdk_nvme_ctrlr_data *cdata; int rc; @@ -2841,17 +2821,7 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr, TAILQ_INIT(&nvme_ctrlr->trids); - num_ns = spdk_nvme_ctrlr_get_num_ns(ctrlr); - if (num_ns != 0) { - nvme_ctrlr->namespaces = calloc(num_ns, sizeof(struct nvme_ns *)); - if (!nvme_ctrlr->namespaces) { - SPDK_ERRLOG("Failed to allocate block namespaces pointer\n"); - rc = -ENOMEM; - goto err; - } - - nvme_ctrlr->num_ns = num_ns; - } + RB_INIT(&nvme_ctrlr->namespaces); path_id = calloc(1, sizeof(*path_id)); if (path_id == NULL) { @@ -3199,10 +3169,6 @@ bdev_nvme_compare_namespaces(struct nvme_ctrlr *nvme_ctrlr, struct nvme_ns *nvme_ns; struct spdk_nvme_ns *new_ns; - if (spdk_nvme_ctrlr_get_num_ns(new_ctrlr) != nvme_ctrlr->num_ns) { - return -EINVAL; - } - nvme_ns = nvme_ctrlr_get_first_active_ns(nvme_ctrlr); while (nvme_ns != NULL) { new_ns = spdk_nvme_ctrlr_get_ns(new_ctrlr, nvme_ns->id); diff --git a/module/bdev/nvme/bdev_nvme.h b/module/bdev/nvme/bdev_nvme.h index a8d3313eea..ae263d0f3e 100644 --- a/module/bdev/nvme/bdev_nvme.h +++ b/module/bdev/nvme/bdev_nvme.h @@ -75,6 +75,7 @@ struct nvme_ns { enum spdk_nvme_ana_state ana_state; struct nvme_async_probe_ctx *probe_ctx; TAILQ_ENTRY(nvme_ns) tailq; + RB_ENTRY(nvme_ns) node; }; struct nvme_bdev_io; @@ -107,9 +108,7 @@ struct nvme_ctrlr { * NVMe controllers are not included. */ uint32_t prchk_flags; - uint32_t num_ns; - /** Array of pointers to namespaces indexed by nsid - 1 */ - struct nvme_ns **namespaces; + RB_HEAD(nvme_ns_tree, nvme_ns) namespaces; struct spdk_opal_dev *opal_dev; diff --git a/module/bdev/nvme/vbdev_opal.c b/module/bdev/nvme/vbdev_opal.c index fa0de5c956..db9f957cf1 100644 --- a/module/bdev/nvme/vbdev_opal.c +++ b/module/bdev/nvme/vbdev_opal.c @@ -326,6 +326,7 @@ vbdev_opal_create(const char *nvme_ctrlr_name, uint32_t nsid, uint8_t locking_ra struct vbdev_opal_part_base *opal_part_base = NULL; struct spdk_bdev_part *part_bdev; struct nvme_bdev *nvme_bdev; + struct nvme_ns *nvme_ns; if (nsid != NSID_SUPPORTED) { SPDK_ERRLOG("nsid %d not supported", nsid); @@ -356,8 +357,13 @@ vbdev_opal_create(const char *nvme_ctrlr_name, uint32_t nsid, uint8_t locking_ra opal_bdev->nvme_ctrlr = nvme_ctrlr; opal_bdev->opal_dev = nvme_ctrlr->opal_dev; - assert(nsid <= nvme_ctrlr->num_ns); - nvme_bdev = nvme_ctrlr_get_ns(nvme_ctrlr, nsid)->bdev; + nvme_ns = nvme_ctrlr_get_ns(nvme_ctrlr, nsid); + if (nvme_ns == NULL) { + free(opal_bdev); + return -ENODEV; + } + + nvme_bdev = nvme_ns->bdev; assert(nvme_bdev != NULL); base_bdev_name = nvme_bdev->disk.name; diff --git a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c index f2e3c84c52..e23069dfc1 100644 --- a/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c +++ b/test/unit/lib/bdev/nvme/bdev_nvme.c/bdev_nvme_ut.c @@ -1734,7 +1734,6 @@ test_attach_ctrlr(void) nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); CU_ASSERT(nvme_ctrlr->ctrlr == ctrlr); - CU_ASSERT(nvme_ctrlr->num_ns == 0); rc = bdev_nvme_delete("nvme0", &g_any_path); CU_ASSERT(rc == 0); @@ -1763,7 +1762,6 @@ test_attach_ctrlr(void) nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); CU_ASSERT(nvme_ctrlr->ctrlr == ctrlr); - CU_ASSERT(nvme_ctrlr->num_ns == 1); CU_ASSERT(attached_names[0] != NULL && strcmp(attached_names[0], "nvme0n1") == 0); attached_names[0] = NULL; @@ -1800,7 +1798,6 @@ test_attach_ctrlr(void) nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); CU_ASSERT(nvme_ctrlr->ctrlr == ctrlr); - CU_ASSERT(nvme_ctrlr->num_ns == 1); CU_ASSERT(attached_names[0] == NULL); @@ -1858,7 +1855,6 @@ test_aer_cb(void) nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); - CU_ASSERT(nvme_ctrlr->num_ns == 4); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 1) == NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 2) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 3) != NULL); @@ -2656,7 +2652,6 @@ test_init_ana_log_page(void) nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0"); SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL); - CU_ASSERT(nvme_ctrlr->num_ns == 5); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 1) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 2) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr, 3) != NULL); @@ -3093,7 +3088,6 @@ test_add_multi_ns_to_bdev(void) nvme_ctrlr1 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path1.trid); SPDK_CU_ASSERT_FATAL(nvme_ctrlr1 != NULL); - CU_ASSERT(nvme_ctrlr1->num_ns == 5); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr1, 1) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr1, 2) == NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr1, 3) != NULL); @@ -3103,7 +3097,6 @@ test_add_multi_ns_to_bdev(void) nvme_ctrlr2 = nvme_bdev_ctrlr_get_ctrlr(nbdev_ctrlr, &path2.trid); SPDK_CU_ASSERT_FATAL(nvme_ctrlr2 != NULL); - CU_ASSERT(nvme_ctrlr2->num_ns == 5); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr2, 1) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr2, 2) != NULL); CU_ASSERT(nvme_ctrlr_get_ns(nvme_ctrlr2, 3) == NULL);