From b5853777fe507910b92faac7673be8e7b1a422e4 Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Tue, 26 Feb 2019 04:02:10 -0500 Subject: [PATCH] fio_plugin: enable separate metadata(DIX) support Existing APIs used in the fio plugin tool already contain the separate metadata parameter, so we just need to allocate a separate metadata buffer for each request, by default, each request will have 4096 metadata buffer size when PI enabled with separate metadata, but also providing an option here to let users can input bigger value in case one request will need larger metadata buffer size. Change-Id: I51679c5cb7f7b1599b81287b1fbb8d9be7959191 Signed-off-by: Changpeng Liu Reviewed-on: https://review.gerrithub.io/c/spdk/spdk/+/446375 Tested-by: SPDK CI Jenkins Reviewed-by: Shuhei Matsumoto Reviewed-by: Jim Harris Reviewed-by: Ben Walker --- examples/nvme/fio_plugin/README.md | 5 ++ examples/nvme/fio_plugin/fio_plugin.c | 122 +++++++++++++++++++++++--- 2 files changed, 115 insertions(+), 12 deletions(-) diff --git a/examples/nvme/fio_plugin/README.md b/examples/nvme/fio_plugin/README.md index 2c53328282..309931a8ce 100644 --- a/examples/nvme/fio_plugin/README.md +++ b/examples/nvme/fio_plugin/README.md @@ -95,3 +95,8 @@ Blocksize should be set as the sum of data and metadata. For example, if data bl PI metadata is 8 Byte, then blocksize in fio configure file should be 520 Byte: bs=520 + +The storage device may use a block format that requires separate metadata (DIX). In this scenario, the fio_plugin +will automatically allocate an extra 4KiB buffer per I/O to hold this metadata. For some cases, such as 512 byte +blocks with 32 metadata bytes per block and a 128KiB I/O size, 4KiB isn't large enough. In this case, the +`md_per_io_size` option may be specified to increase the size of the metadata buffer. diff --git a/examples/nvme/fio_plugin/fio_plugin.c b/examples/nvme/fio_plugin/fio_plugin.c index b390c12ede..636114cf15 100644 --- a/examples/nvme/fio_plugin/fio_plugin.c +++ b/examples/nvme/fio_plugin/fio_plugin.c @@ -39,6 +39,7 @@ #include "spdk/log.h" #include "spdk/endian.h" #include "spdk/dif.h" +#include "spdk/util.h" #include "config-host.h" #include "fio.h" @@ -51,6 +52,7 @@ static bool g_spdk_env_initialized; static int g_spdk_enable_sgl = 0; static uint32_t g_spdk_pract_flag; static uint32_t g_spdk_prchk_flags; +static uint32_t g_spdk_md_per_io_size = 4096; struct spdk_fio_options { void *pad; /* off1 used in option descriptions may not be 0 */ @@ -60,6 +62,7 @@ struct spdk_fio_options { char *hostnqn; int pi_act; char *pi_chk; + int md_per_io_size; char *digest_enable; }; @@ -70,6 +73,8 @@ struct spdk_fio_request { /** Context for NVMe PI */ struct spdk_dif_ctx dif_ctx; + /** Separate metadata buffer pointer */ + void *md_buf; struct spdk_fio_thread *fio_thread; struct spdk_fio_qpair *fio_qpair; @@ -94,6 +99,8 @@ struct spdk_fio_qpair { struct spdk_nvme_ns *ns; uint32_t io_flags; bool do_nvme_pi; + /* True for DIF and false for DIX, and this is valid only if do_nvme_pi is true. */ + bool extended_lba; struct spdk_fio_qpair *next; struct spdk_fio_ctrlr *fio_ctrlr; }; @@ -198,10 +205,6 @@ fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair) ns = fio_qpair->ns; nsdata = spdk_nvme_ns_get_data(ns); - if (!spdk_nvme_ns_supports_extended_lba(ns)) { - return false; - } - if (spdk_nvme_ns_get_pi_type(ns) == SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { return false; @@ -311,10 +314,14 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { fio_qpair->io_flags = g_spdk_pract_flag | g_spdk_prchk_flags; + fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); + if (fio_qpair->do_nvme_pi) { + fio_qpair->extended_lba = spdk_nvme_ns_supports_extended_lba(ns); + fprintf(stdout, "PI type%u enabled with %s\n", spdk_nvme_ns_get_pi_type(ns), + fio_qpair->extended_lba ? "extended lba" : "separate metadata"); + } } - fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); - f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); if (f->real_file_size <= 0) { g_error = true; @@ -394,6 +401,7 @@ static int spdk_fio_setup(struct thread_data *td) opts.shm_id = fio_options->shm_id; g_spdk_enable_sgl = fio_options->enable_sgl; parse_pract_flag(fio_options->pi_act); + g_spdk_md_per_io_size = spdk_max(fio_options->md_per_io_size, 4096); parse_prchk_flags(fio_options->pi_chk); if (spdk_env_init(&opts) < 0) { SPDK_ERRLOG("Unable to initialize SPDK env\n"); @@ -509,6 +517,14 @@ static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) if (fio_req == NULL) { return 1; } + + fio_req->md_buf = spdk_dma_zmalloc(g_spdk_md_per_io_size, NVME_IO_ALIGN, NULL); + if (fio_req->md_buf == NULL) { + fprintf(stderr, "Allocate %u metadata failed\n", g_spdk_md_per_io_size); + free(fio_req); + return 1; + } + fio_req->io = io_u; fio_req->fio_thread = fio_thread; @@ -523,6 +539,7 @@ static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) if (fio_req) { assert(fio_req->io == io_u); + spdk_dma_free(fio_req->md_buf); free(fio_req); io_u->engine_data = NULL; } @@ -562,6 +579,42 @@ fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) return rc; } +static int +fio_separate_md_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t md_size, block_size, lba_count; + uint64_t lba; + struct iovec iov, md_iov; + int rc; + + block_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + lba = io_u->offset / block_size; + lba_count = io_u->xfer_buflen / block_size; + + rc = spdk_dif_ctx_init(&fio_req->dif_ctx, block_size, md_size, + false, false, + (enum spdk_dif_type)spdk_nvme_ns_get_pi_type(ns), + fio_qpair->io_flags, lba, 0xFFFF, FIO_NVME_PI_APPTAG, 0); + if (rc != 0) { + fprintf(stderr, "Initialization of DIF context failed\n"); + return rc; + } + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + md_iov.iov_base = fio_req->md_buf; + md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); + rc = spdk_dix_generate(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx); + if (rc < 0) { + fprintf(stderr, "Generation of DIX failed\n"); + } + + return rc; +} + static int fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) { @@ -585,6 +638,32 @@ fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) return rc; } +static int +fio_separate_md_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = fio_qpair->ns; + struct spdk_fio_request *fio_req = io_u->engine_data; + uint32_t md_size, lba_count; + struct iovec iov, md_iov; + struct spdk_dif_error err_blk = {}; + int rc; + + iov.iov_base = io_u->buf; + iov.iov_len = io_u->xfer_buflen; + lba_count = io_u->xfer_buflen / spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + md_iov.iov_base = fio_req->md_buf; + md_iov.iov_len = spdk_min(md_size * lba_count, g_spdk_md_per_io_size); + + rc = spdk_dix_verify(&iov, 1, &md_iov, lba_count, &fio_req->dif_ctx, &err_blk); + if (rc != 0) { + fprintf(stderr, "DIX error detected. type=%d, offset=%" PRIu32 "\n", + err_blk.err_type, err_blk.err_offset); + } + + return rc; +} + static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) { struct spdk_fio_request *fio_req = ctx; @@ -593,7 +672,11 @@ static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) int rc; if (fio_qpair->do_nvme_pi && fio_req->io->ddir == DDIR_READ) { - rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io); + if (fio_qpair->extended_lba) { + rc = fio_extended_lba_verify_pi(fio_qpair, fio_req->io); + } else { + rc = fio_separate_md_verify_pi(fio_qpair, fio_req->io); + } if (rc != 0) { fio_req->io->error = abs(rc); } @@ -669,7 +752,11 @@ spdk_fio_queue(struct thread_data *td, struct io_u *io_u) /* TODO: considering situations that fio will randomize and verify io_u */ if (fio_qpair->do_nvme_pi && io_u->ddir == DDIR_WRITE) { - rc = fio_extended_lba_setup_pi(fio_qpair, io_u); + if (fio_qpair->extended_lba) { + rc = fio_extended_lba_setup_pi(fio_qpair, io_u); + } else { + rc = fio_separate_md_setup_pi(fio_qpair, io_u); + } if (rc < 0) { io_u->error = -rc; return FIO_Q_COMPLETED; @@ -679,25 +766,26 @@ spdk_fio_queue(struct thread_data *td, struct io_u *io_u) switch (io_u->ddir) { case DDIR_READ: if (!g_spdk_enable_sgl) { - rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, fio_req->md_buf, lba, lba_count, spdk_fio_completion_cb, fio_req, dif_ctx->dif_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); } else { rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, lba_count, spdk_fio_completion_cb, fio_req, dif_ctx->dif_flags, - spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, fio_req->md_buf, dif_ctx->apptag_mask, dif_ctx->app_tag); } break; case DDIR_WRITE: if (!g_spdk_enable_sgl) { - rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, fio_req->md_buf, lba, + lba_count, spdk_fio_completion_cb, fio_req, dif_ctx->dif_flags, dif_ctx->apptag_mask, dif_ctx->app_tag); } else { rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, lba_count, spdk_fio_completion_cb, fio_req, dif_ctx->dif_flags, - spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, fio_req->md_buf, dif_ctx->apptag_mask, dif_ctx->app_tag); } break; @@ -888,6 +976,16 @@ static struct fio_option options[] = { .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_INVALID, }, + { + .name = "md_per_io_size", + .lname = "Separate Metadata Buffer Size per I/O", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, md_per_io_size), + .def = "4096", + .help = "Size of separate metadata buffer per I/O (Default: 4096)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, { .name = "digest_enable", .lname = "PDU digest choice for NVMe/TCP Transport(NONE|HEADER|DATA|BOTH)",