bdev/nvme: Read ANA log page itself when nvme_ctrlr is created

If ctrlr supports ANA log page, nvme_ctrlr allocates a buffer for ANA
log page and read ANA log page itself, and then each nvme_ns sets its
ANA state by parsing ANA log page.

Most code was brought from NVMe driver because NVMe driver already
supports ANA log page management. However it had a bug that assumed
each descriptor is 8-bytes aligned. Fix the bug together in this
patch. Besides, the implementation in NVMe driver was synchronous.
NVMe bdev module reads ANA log page asynchronously instead.

The next patch will support ANA log page update by AER handler.

Signed-off-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Change-Id: Ib8eab887633b043b394a45702037859414b8e0a0
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8318
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Monica Kenguva <monica.kenguva@intel.com>
Reviewed-by: Aleksey Marchuk <alexeymar@mellanox.com>
This commit is contained in:
Shuhei Matsumoto 2021-07-05 20:07:14 +09:00 committed by Tomasz Zawadzki
parent fa44df3627
commit 7ec6d1d559
4 changed files with 293 additions and 15 deletions

View File

@ -1340,6 +1340,62 @@ static const struct spdk_bdev_fn_table nvmelib_fn_table = {
.get_module_ctx = bdev_nvme_get_module_ctx,
};
typedef int (*bdev_nvme_parse_ana_log_page_cb)(
const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg);
static int
bdev_nvme_parse_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
bdev_nvme_parse_ana_log_page_cb cb_fn, void *cb_arg)
{
struct spdk_nvme_ana_group_descriptor *copied_desc;
uint8_t *orig_desc;
uint32_t i, desc_size, copy_len;
int rc = 0;
if (nvme_ctrlr->ana_log_page == NULL) {
return -EINVAL;
}
copied_desc = nvme_ctrlr->copied_ana_desc;
orig_desc = (uint8_t *)nvme_ctrlr->ana_log_page + sizeof(struct spdk_nvme_ana_page);
copy_len = nvme_ctrlr->ana_log_page_size - sizeof(struct spdk_nvme_ana_page);
for (i = 0; i < nvme_ctrlr->ana_log_page->num_ana_group_desc; i++) {
memcpy(copied_desc, orig_desc, copy_len);
rc = cb_fn(copied_desc, cb_arg);
if (rc != 0) {
break;
}
desc_size = sizeof(struct spdk_nvme_ana_group_descriptor) +
copied_desc->num_of_nsid * sizeof(uint32_t);
orig_desc += desc_size;
copy_len -= desc_size;
}
return rc;
}
static int
nvme_ns_set_ana_state(const struct spdk_nvme_ana_group_descriptor *desc, void *cb_arg)
{
struct nvme_ns *nvme_ns = cb_arg;
uint32_t i;
for (i = 0; i < desc->num_of_nsid; i++) {
if (desc->nsid[i] != spdk_nvme_ns_get_id(nvme_ns->ns)) {
continue;
}
nvme_ns->ana_group_id = desc->ana_group_id;
nvme_ns->ana_state = desc->ana_state;
return 1;
}
return 0;
}
static int
nvme_disk_create(struct spdk_bdev *disk, const char *base_name,
struct spdk_nvme_ctrlr *ctrlr, struct spdk_nvme_ns *ns,
@ -1510,6 +1566,11 @@ nvme_ctrlr_populate_standard_namespace(struct nvme_ctrlr *nvme_ctrlr,
nvme_ns->ns = ns;
nvme_ns->populated = true;
nvme_ns->ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
if (nvme_ctrlr->ana_log_page != NULL) {
bdev_nvme_parse_ana_log_page(nvme_ctrlr, nvme_ns_set_ana_state, nvme_ns);
}
rc = nvme_bdev_create(nvme_ctrlr, nvme_ns);
done:
@ -1811,6 +1872,72 @@ nvme_ctrlr_create_done(struct nvme_ctrlr *nvme_ctrlr,
nvme_ctrlr_populate_namespaces(nvme_ctrlr, ctx);
}
static void
nvme_ctrlr_init_ana_log_page_done(void *_ctx, const struct spdk_nvme_cpl *cpl)
{
struct nvme_ctrlr *nvme_ctrlr = _ctx;
struct nvme_async_probe_ctx *ctx = nvme_ctrlr->probe_ctx;
nvme_ctrlr->probe_ctx = NULL;
if (spdk_nvme_cpl_is_error(cpl)) {
nvme_ctrlr_delete(nvme_ctrlr);
if (ctx != NULL) {
populate_namespaces_cb(ctx, 0, -1);
}
return;
}
nvme_ctrlr_create_done(nvme_ctrlr, ctx);
}
static int
nvme_ctrlr_init_ana_log_page(struct nvme_ctrlr *nvme_ctrlr,
struct nvme_async_probe_ctx *ctx)
{
struct spdk_nvme_ctrlr *ctrlr = nvme_ctrlr->ctrlr;
const struct spdk_nvme_ctrlr_data *cdata;
uint32_t ana_log_page_size;
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
ana_log_page_size = sizeof(struct spdk_nvme_ana_page) + cdata->nanagrpid *
sizeof(struct spdk_nvme_ana_group_descriptor) + cdata->nn *
sizeof(uint32_t);
nvme_ctrlr->ana_log_page = spdk_zmalloc(ana_log_page_size, 64, NULL,
SPDK_ENV_SOCKET_ID_ANY, SPDK_MALLOC_DMA);
if (nvme_ctrlr->ana_log_page == NULL) {
SPDK_ERRLOG("could not allocate ANA log page buffer\n");
return -ENXIO;
}
/* Each descriptor in a ANA log page is not ensured to be 8-bytes aligned.
* Hence copy each descriptor to a temporary area when parsing it.
*
* Allocate a buffer whose size is as large as ANA log page buffer because
* we do not know the size of a descriptor until actually reading it.
*/
nvme_ctrlr->copied_ana_desc = calloc(1, ana_log_page_size);
if (nvme_ctrlr->copied_ana_desc == NULL) {
SPDK_ERRLOG("could not allocate a buffer to parse ANA descriptor\n");
return -ENOMEM;
}
nvme_ctrlr->ana_log_page_size = ana_log_page_size;
nvme_ctrlr->probe_ctx = ctx;
return spdk_nvme_ctrlr_cmd_get_log_page(ctrlr,
SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS,
SPDK_NVME_GLOBAL_NS_TAG,
nvme_ctrlr->ana_log_page,
nvme_ctrlr->ana_log_page_size, 0,
nvme_ctrlr_init_ana_log_page_done,
nvme_ctrlr);
}
static int
nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
const char *name,
@ -1821,6 +1948,7 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
struct nvme_ctrlr *nvme_ctrlr;
struct nvme_ctrlr_trid *trid_entry;
uint32_t i, num_ns;
const struct spdk_nvme_ctrlr_data *cdata;
int rc;
nvme_ctrlr = calloc(1, sizeof(*nvme_ctrlr));
@ -1879,7 +2007,7 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
goto err;
}
if (spdk_nvme_ctrlr_is_ocssd_supported(nvme_ctrlr->ctrlr)) {
if (spdk_nvme_ctrlr_is_ocssd_supported(ctrlr)) {
rc = bdev_ocssd_init_ctrlr(nvme_ctrlr);
if (spdk_unlikely(rc != 0)) {
SPDK_ERRLOG("Unable to initialize OCSSD controller\n");
@ -1908,13 +2036,22 @@ nvme_ctrlr_create(struct spdk_nvme_ctrlr *ctrlr,
spdk_nvme_ctrlr_register_aer_callback(ctrlr, aer_cb, nvme_ctrlr);
spdk_nvme_ctrlr_set_remove_cb(ctrlr, remove_cb, nvme_ctrlr);
if (spdk_nvme_ctrlr_get_flags(nvme_ctrlr->ctrlr) &
if (spdk_nvme_ctrlr_get_flags(ctrlr) &
SPDK_NVME_CTRLR_SECURITY_SEND_RECV_SUPPORTED) {
nvme_ctrlr->opal_dev = spdk_opal_dev_construct(nvme_ctrlr->ctrlr);
nvme_ctrlr->opal_dev = spdk_opal_dev_construct(ctrlr);
}
nvme_ctrlr_create_done(nvme_ctrlr, ctx);
return 0;
cdata = spdk_nvme_ctrlr_get_data(ctrlr);
if (cdata->cmic.ana_reporting) {
rc = nvme_ctrlr_init_ana_log_page(nvme_ctrlr, ctx);
if (rc == 0) {
return 0;
}
} else {
nvme_ctrlr_create_done(nvme_ctrlr, ctx);
return 0;
}
err:
nvme_ctrlr_delete(nvme_ctrlr);

View File

@ -122,6 +122,9 @@ nvme_ctrlr_delete(struct nvme_ctrlr *nvme_ctrlr)
struct nvme_ctrlr_trid *trid, *tmp_trid;
uint32_t i;
free(nvme_ctrlr->copied_ana_desc);
spdk_free(nvme_ctrlr->ana_log_page);
if (nvme_ctrlr->opal_dev) {
spdk_opal_dev_destruct(nvme_ctrlr->opal_dev);
nvme_ctrlr->opal_dev = NULL;

View File

@ -85,6 +85,8 @@ struct nvme_ns {
struct nvme_ctrlr *ctrlr;
struct nvme_bdev *bdev;
void *type_ctx;
uint32_t ana_group_id;
enum spdk_nvme_ana_state ana_state;
};
struct nvme_bdev_io;
@ -137,6 +139,12 @@ struct nvme_ctrlr {
TAILQ_HEAD(, nvme_ctrlr_trid) trids;
uint32_t ana_log_page_size;
struct spdk_nvme_ana_page *ana_log_page;
struct spdk_nvme_ana_group_descriptor *copied_ana_desc;
struct nvme_async_probe_ctx *probe_ctx;
pthread_mutex_t mutex;
};

View File

@ -236,6 +236,7 @@ struct spdk_nvme_ns {
uint32_t id;
bool is_active;
struct spdk_uuid uuid;
enum spdk_nvme_ana_state ana_state;
};
struct spdk_nvme_qpair {
@ -354,7 +355,8 @@ spdk_nvme_transport_id_compare(const struct spdk_nvme_transport_id *trid1,
}
static struct spdk_nvme_ctrlr *
ut_attach_ctrlr(const struct spdk_nvme_transport_id *trid, uint32_t num_ns)
ut_attach_ctrlr(const struct spdk_nvme_transport_id *trid, uint32_t num_ns,
bool ana_reporting)
{
struct spdk_nvme_ctrlr *ctrlr;
uint32_t i;
@ -394,10 +396,15 @@ ut_attach_ctrlr(const struct spdk_nvme_transport_id *trid, uint32_t num_ns)
ctrlr->ns[i].id = i + 1;
ctrlr->ns[i].ctrlr = ctrlr;
ctrlr->ns[i].is_active = true;
ctrlr->ns[i].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
ctrlr->nsdata[i].nsze = 1024;
}
ctrlr->cdata.nn = num_ns;
ctrlr->cdata.nanagrpid = num_ns;
}
ctrlr->cdata.cmic.ana_reporting = ana_reporting;
ctrlr->trid = *trid;
TAILQ_INIT(&ctrlr->active_io_qpairs);
@ -675,6 +682,62 @@ spdk_nvme_ctrlr_fail(struct spdk_nvme_ctrlr *ctrlr)
ctrlr->is_failed = true;
}
#define UT_ANA_DESC_SIZE (sizeof(struct spdk_nvme_ana_group_descriptor) + \
sizeof(uint32_t))
static void
ut_create_ana_log_page(struct spdk_nvme_ctrlr *ctrlr, char *buf, uint32_t length)
{
struct spdk_nvme_ana_page ana_hdr;
char _ana_desc[UT_ANA_DESC_SIZE];
struct spdk_nvme_ana_group_descriptor *ana_desc;
struct spdk_nvme_ns *ns;
uint32_t i;
memset(&ana_hdr, 0, sizeof(ana_hdr));
ana_hdr.num_ana_group_desc = ctrlr->num_ns;
SPDK_CU_ASSERT_FATAL(sizeof(ana_hdr) <= length);
memcpy(buf, (char *)&ana_hdr, sizeof(ana_hdr));
buf += sizeof(ana_hdr);
length -= sizeof(ana_hdr);
ana_desc = (struct spdk_nvme_ana_group_descriptor *)_ana_desc;
for (i = 0; i < ctrlr->num_ns; i++) {
ns = &ctrlr->ns[i];
memset(ana_desc, 0, UT_ANA_DESC_SIZE);
ana_desc->ana_group_id = ns->id;
ana_desc->num_of_nsid = 1;
ana_desc->ana_state = ns->ana_state;
ana_desc->nsid[0] = ns->id;
SPDK_CU_ASSERT_FATAL(UT_ANA_DESC_SIZE <= length);
memcpy(buf, (char *)ana_desc, UT_ANA_DESC_SIZE);
buf += UT_ANA_DESC_SIZE;
length -= UT_ANA_DESC_SIZE;
}
}
int
spdk_nvme_ctrlr_cmd_get_log_page(struct spdk_nvme_ctrlr *ctrlr,
uint8_t log_page, uint32_t nsid,
void *payload, uint32_t payload_size,
uint64_t offset,
spdk_nvme_cmd_cb cb_fn, void *cb_arg)
{
if (log_page == SPDK_NVME_LOG_ASYMMETRIC_NAMESPACE_ACCESS) {
SPDK_CU_ASSERT_FATAL(offset == 0);
ut_create_ana_log_page(ctrlr, payload, payload_size);
}
return ut_submit_nvme_request(NULL, &ctrlr->adminq, SPDK_NVME_OPC_GET_LOG_PAGE,
cb_fn, cb_arg);
}
int
spdk_nvme_ctrlr_cmd_admin_raw(struct spdk_nvme_ctrlr *ctrlr,
struct spdk_nvme_cmd *cmd, void *buf, uint32_t len,
@ -1386,7 +1449,7 @@ test_pending_reset(void)
set_thread(0);
ctrlr = ut_attach_ctrlr(&trid, 1);
ctrlr = ut_attach_ctrlr(&trid, 1, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
@ -1507,7 +1570,7 @@ test_attach_ctrlr(void)
/* If ctrlr fails, no nvme_ctrlr is created. Failed ctrlr is removed
* by probe polling.
*/
ctrlr = ut_attach_ctrlr(&trid, 0);
ctrlr = ut_attach_ctrlr(&trid, 0, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
ctrlr->is_failed = true;
@ -1524,7 +1587,7 @@ test_attach_ctrlr(void)
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
/* If ctrlr has no namespace, one nvme_ctrlr with no namespace is created */
ctrlr = ut_attach_ctrlr(&trid, 0);
ctrlr = ut_attach_ctrlr(&trid, 0, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
@ -1551,7 +1614,7 @@ test_attach_ctrlr(void)
/* If ctrlr has one namespace, one nvme_ctrlr with one namespace and
* one nvme_bdev is created.
*/
ctrlr = ut_attach_ctrlr(&trid, 1);
ctrlr = ut_attach_ctrlr(&trid, 1, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_bdev_count = 1;
@ -1585,7 +1648,7 @@ test_attach_ctrlr(void)
/* Ctrlr has one namespace but one nvme_ctrlr with no namespace is
* created because creating one nvme_bdev failed.
*/
ctrlr = ut_attach_ctrlr(&trid, 1);
ctrlr = ut_attach_ctrlr(&trid, 1, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_register_bdev_status = -EINVAL;
@ -1694,7 +1757,7 @@ test_aer_cb(void)
/* Attach a ctrlr, whose max number of namespaces is 4, and 2nd, 3rd, and 4th
* namespaces are populated.
*/
ctrlr = ut_attach_ctrlr(&trid, 4);
ctrlr = ut_attach_ctrlr(&trid, 4, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
ctrlr->ns[0].is_active = false;
@ -1874,7 +1937,7 @@ test_submit_nvme_cmd(void)
set_thread(1);
ctrlr = ut_attach_ctrlr(&trid, 1);
ctrlr = ut_attach_ctrlr(&trid, 1, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
@ -2040,7 +2103,7 @@ test_abort(void)
ut_init_trid(&trid);
ctrlr = ut_attach_ctrlr(&trid, 1);
ctrlr = ut_attach_ctrlr(&trid, 1, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
@ -2253,7 +2316,7 @@ test_bdev_unregister(void)
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&trid);
ctrlr = ut_attach_ctrlr(&trid, 2);
ctrlr = ut_attach_ctrlr(&trid, 2, false);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
g_ut_attach_ctrlr_status = 0;
@ -2346,6 +2409,72 @@ test_compare_ns(void)
CU_ASSERT(bdev_nvme_compare_ns(&ns1, &ns2) == true);
}
static void
test_init_ana_log_page(void)
{
struct spdk_nvme_transport_id trid = {};
struct spdk_nvme_host_id hostid = {};
struct spdk_nvme_ctrlr *ctrlr;
struct nvme_ctrlr *nvme_ctrlr;
const int STRING_SIZE = 32;
const char *attached_names[STRING_SIZE];
int rc;
set_thread(0);
memset(attached_names, 0, sizeof(char *) * STRING_SIZE);
ut_init_trid(&trid);
ctrlr = ut_attach_ctrlr(&trid, 5, true);
SPDK_CU_ASSERT_FATAL(ctrlr != NULL);
ctrlr->ns[0].ana_state = SPDK_NVME_ANA_OPTIMIZED_STATE;
ctrlr->ns[1].ana_state = SPDK_NVME_ANA_NON_OPTIMIZED_STATE;
ctrlr->ns[2].ana_state = SPDK_NVME_ANA_INACCESSIBLE_STATE;
ctrlr->ns[3].ana_state = SPDK_NVME_ANA_PERSISTENT_LOSS_STATE;
ctrlr->ns[4].ana_state = SPDK_NVME_ANA_CHANGE_STATE;
g_ut_attach_ctrlr_status = 0;
g_ut_attach_bdev_count = 5;
rc = bdev_nvme_create(&trid, &hostid, "nvme0", attached_names, STRING_SIZE, NULL, 0,
attach_ctrlr_done, NULL, NULL);
CU_ASSERT(rc == 0);
spdk_delay_us(1000);
poll_threads();
spdk_delay_us(10000);
poll_threads();
nvme_ctrlr = nvme_ctrlr_get_by_name("nvme0");
SPDK_CU_ASSERT_FATAL(nvme_ctrlr != NULL);
CU_ASSERT(nvme_ctrlr->num_ns == 5);
CU_ASSERT(nvme_ctrlr->namespaces[0]->populated == true);
CU_ASSERT(nvme_ctrlr->namespaces[1]->populated == true);
CU_ASSERT(nvme_ctrlr->namespaces[2]->populated == true);
CU_ASSERT(nvme_ctrlr->namespaces[3]->populated == true);
CU_ASSERT(nvme_ctrlr->namespaces[4]->populated == true);
CU_ASSERT(nvme_ctrlr->namespaces[0]->ana_state == SPDK_NVME_ANA_OPTIMIZED_STATE);
CU_ASSERT(nvme_ctrlr->namespaces[1]->ana_state == SPDK_NVME_ANA_NON_OPTIMIZED_STATE);
CU_ASSERT(nvme_ctrlr->namespaces[2]->ana_state == SPDK_NVME_ANA_INACCESSIBLE_STATE);
CU_ASSERT(nvme_ctrlr->namespaces[3]->ana_state == SPDK_NVME_ANA_PERSISTENT_LOSS_STATE);
CU_ASSERT(nvme_ctrlr->namespaces[4]->ana_state == SPDK_NVME_ANA_CHANGE_STATE);
CU_ASSERT(nvme_ctrlr->namespaces[0]->bdev != NULL);
CU_ASSERT(nvme_ctrlr->namespaces[1]->bdev != NULL);
CU_ASSERT(nvme_ctrlr->namespaces[2]->bdev != NULL);
CU_ASSERT(nvme_ctrlr->namespaces[3]->bdev != NULL);
CU_ASSERT(nvme_ctrlr->namespaces[4]->bdev != NULL);
rc = bdev_nvme_delete("nvme0", NULL);
CU_ASSERT(rc == 0);
poll_threads();
CU_ASSERT(nvme_ctrlr_get_by_name("nvme0") == NULL);
}
static void
init_accel(void)
{
@ -2384,6 +2513,7 @@ main(int argc, const char **argv)
CU_ADD_TEST(suite, test_get_io_qpair);
CU_ADD_TEST(suite, test_bdev_unregister);
CU_ADD_TEST(suite, test_compare_ns);
CU_ADD_TEST(suite, test_init_ana_log_page);
CU_basic_set_mode(CU_BRM_VERBOSE);