From a691aa3121d0fdc9d85b83b754dda8ccf4823aa3 Mon Sep 17 00:00:00 2001 From: Xiaodong Liu Date: Mon, 11 Jun 2018 04:26:40 -0400 Subject: [PATCH] fio_plugin: add extended lba format pi support Following the PI operation inside nvme/perf, add 2 PI options into fio engine option. SPDK ioengine will check whether PI information should be generated or verified. Running with PI setting, following settings are required: 1. In fio configure file, add PRACT like "pi_act=0", and set PRCHK by flags(GUARD|REFTAG|APPTAG), like "pi_chk=GUARD". 2. Format device namespace with proper PI setting 3. In fio configure file, blocksize should be set as the sum of data and metadata. Sequential Write and following Sequential Read can be used to evaluate it quickly. Change-Id: I54e202bb8510cd987772117dff241aec714c8fb9 Signed-off-by: Xiaodong Liu Reviewed-on: https://review.gerrithub.io/414753 Tested-by: SPDK Automated Test System Reviewed-by: John Kariuki Reviewed-by: Changpeng Liu Reviewed-by: Jim Harris Chandler-Test-Pool: SPDK Automated Test System --- examples/nvme/fio_plugin/README.md | 17 ++ examples/nvme/fio_plugin/fio_plugin.c | 234 ++++++++++++++++++++++++-- 2 files changed, 241 insertions(+), 10 deletions(-) diff --git a/examples/nvme/fio_plugin/README.md b/examples/nvme/fio_plugin/README.md index bcc0aba185..2c53328282 100644 --- a/examples/nvme/fio_plugin/README.md +++ b/examples/nvme/fio_plugin/README.md @@ -78,3 +78,20 @@ FIO test, the performance is worse than SPDK perf (also using one CPU core) agai multiple jobs for FIO test, the performance of FIO is similiar with SPDK perf. After analyzing this phenomenon, we think that is caused by the FIO architecture. Mainly FIO can scale with multiple threads (i.e., using CPU cores), but it is not good to use one thread against many I/O devices. + +# End-to-end Data Protection (Optional) + +Running with PI setting, following settings steps are required. +First, format device namespace with proper PI setting. For example: + + nvme format /dev/nvme0n1 -l 1 -i 1 -p 0 -m 1 + +In fio configure file, add PRACT and set PRCHK by flags(GUARD|REFTAG|APPTAG) properly. For example: + + pi_act=0 + pi_chk=GUARD + +Blocksize should be set as the sum of data and metadata. For example, if data blocksize is 512 Byte, host generated +PI metadata is 8 Byte, then blocksize in fio configure file should be 520 Byte: + + bs=520 diff --git a/examples/nvme/fio_plugin/fio_plugin.c b/examples/nvme/fio_plugin/fio_plugin.c index 1689a0fd90..5bbcf1ab83 100644 --- a/examples/nvme/fio_plugin/fio_plugin.c +++ b/examples/nvme/fio_plugin/fio_plugin.c @@ -37,15 +37,20 @@ #include "spdk/env.h" #include "spdk/string.h" #include "spdk/log.h" +#include "spdk/endian.h" +#include "spdk/crc16.h" #include "config-host.h" #include "fio.h" #include "optgroup.h" #define NVME_IO_ALIGN 4096 +#define FIO_NVME_PI_APPTAG 0x1234 static bool spdk_env_initialized; static int spdk_enable_sgl = 0; +static uint32_t spdk_pract_flag; +static uint32_t spdk_prchk_flags; struct spdk_fio_options { void *pad; /* off1 used in option descriptions may not be 0 */ @@ -53,6 +58,8 @@ struct spdk_fio_options { int shm_id; int enable_sgl; char *hostnqn; + int pi_act; + char *pi_chk; }; struct spdk_fio_request { @@ -60,6 +67,10 @@ struct spdk_fio_request { /** Offset in current iovec, fio only uses 1 vector */ uint32_t iov_offset; + /** Application tag and its mask for NVMe PI */ + uint16_t appmask; + uint16_t apptag; + struct spdk_fio_thread *fio_thread; }; @@ -80,6 +91,8 @@ struct spdk_fio_qpair { struct fio_file *f; struct spdk_nvme_qpair *qpair; struct spdk_nvme_ns *ns; + uint32_t io_flags; + bool do_nvme_pi; struct spdk_fio_qpair *next; struct spdk_fio_ctrlr *fio_ctrlr; }; @@ -164,6 +177,45 @@ get_fio_ctrlr(const struct spdk_nvme_transport_id *trid) return NULL; } +static bool +fio_do_nvme_pi_check(struct spdk_fio_qpair *fio_qpair) +{ + struct spdk_nvme_ns *ns = NULL; + const struct spdk_nvme_ns_data *nsdata; + + ns = fio_qpair->ns; + nsdata = spdk_nvme_ns_get_data(ns); + + if (!spdk_nvme_ns_supports_extended_lba(ns)) { + return false; + } + + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_DISABLE) { + return false; + } + + /* PI locates at the first 8 bytes of metadata, + * doesn't support now + */ + if (nsdata->dps.md_start) { + return false; + } + + /* Controller performs PI setup and check */ + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRACT) { + return false; + } + + /* Type3 don't support REFTAG */ + if (spdk_nvme_ns_get_pi_type(ns) == + SPDK_NVME_FMT_NVM_PROTECTION_TYPE3) { + return false; + } + + return true; +} + static void attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, struct spdk_nvme_ctrlr *ctrlr, const struct spdk_nvme_ctrlr_opts *opts) @@ -249,6 +301,12 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, fio_qpair->next = fio_thread->fio_qpair; fio_thread->fio_qpair = fio_qpair; + if (spdk_nvme_ns_get_flags(ns) & SPDK_NVME_NS_DPS_PI_SUPPORTED) { + fio_qpair->io_flags = spdk_pract_flag | spdk_prchk_flags; + } + + fio_qpair->do_nvme_pi = fio_do_nvme_pi_check(fio_qpair); + f->real_file_size = spdk_nvme_ns_get_size(fio_qpair->ns); if (f->real_file_size <= 0) { g_error = true; @@ -260,6 +318,23 @@ attach_cb(void *cb_ctx, const struct spdk_nvme_transport_id *trid, fio_file_set_size_known(f); } +static void parse_prchk_flags(const char *prchk_str) +{ + if (!prchk_str) { + return; + } + + if (strstr(prchk_str, "GUARD") != NULL) { + spdk_prchk_flags = SPDK_NVME_IO_FLAGS_PRCHK_GUARD; + } + if (strstr(prchk_str, "REFTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_REFTAG; + } + if (strstr(prchk_str, "APPTAG") != NULL) { + spdk_prchk_flags |= SPDK_NVME_IO_FLAGS_PRCHK_APPTAG; + } +} + /* Called once at initialization. This is responsible for gathering the size of * each "file", which in our case are in the form * 'key=value [key=value] ... ns=value' @@ -301,6 +376,8 @@ static int spdk_fio_setup(struct thread_data *td) opts.mem_size = fio_options->mem_size; opts.shm_id = fio_options->shm_id; spdk_enable_sgl = fio_options->enable_sgl; + spdk_pract_flag = fio_options->pi_act; + parse_prchk_flags(fio_options->pi_chk); if (spdk_env_init(&opts) < 0) { SPDK_ERRLOG("Unable to initialize SPDK env\n"); free(fio_thread->iocq); @@ -434,11 +511,119 @@ static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) } } +static void +fio_extended_lba_setup_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_nvme_protection_info *pi; + uint16_t crc16; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + fio_req->appmask = 0xffff; + fio_req->apptag = FIO_NVME_PI_APPTAG; + } + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + memset(pi, 0, sizeof(*pi)); + + if (io_u->ddir == DDIR_WRITE) { + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&pi->guard, crc16); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Let's use number of lbas for application tag */ + to_be16(&pi->app_tag, FIO_NVME_PI_APPTAG); + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&pi->ref_tag, (uint32_t)lba + i); + } + } + } +} + +static void +fio_extended_lba_verify_pi(struct spdk_fio_qpair *fio_qpair, struct io_u *io_u) +{ + struct spdk_nvme_ns *ns = NULL; + struct spdk_nvme_protection_info *pi; + uint16_t crc16, guard, app_tag; + uint32_t i, md_size, sector_size, pi_offset, extended_lba_size, ref_tag, lba_count; + uint64_t lba; + + ns = fio_qpair->ns; + sector_size = spdk_nvme_ns_get_sector_size(ns); + md_size = spdk_nvme_ns_get_md_size(ns); + extended_lba_size = sector_size + md_size; + lba = io_u->offset / extended_lba_size; + lba_count = io_u->xfer_buflen / extended_lba_size; + + for (i = 0; i < lba_count; i++) { + pi_offset = (extended_lba_size * (i + 1)) - 8; + pi = (struct spdk_nvme_protection_info *)(io_u->buf + pi_offset); + + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_GUARD) { + /* CRC buffer should not include last 8 bytes of PI */ + crc16 = spdk_crc16_t10dif(io_u->buf + extended_lba_size * i, + extended_lba_size - 8); + to_be16(&guard, crc16); + if (pi->guard != guard) { + fprintf(stdout, "Get Guard Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, guard, pi->guard); + } + + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_APPTAG) { + /* Previously we used the number of lbas as + * application tag for writes + */ + to_be16(&app_tag, FIO_NVME_PI_APPTAG); + if (pi->app_tag != app_tag) { + fprintf(stdout, "Get Application Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%04x but returned with 0x%04x," + " may read the LBA without write it first\n", + lba + i, app_tag, pi->app_tag); + } + } + if (fio_qpair->io_flags & SPDK_NVME_IO_FLAGS_PRCHK_REFTAG) { + to_be32(&ref_tag, (uint32_t)lba + i); + if (pi->ref_tag != ref_tag) { + fprintf(stdout, "Get Reference Tag Error LBA 0x%16.16"PRIx64"," + " Expected 0x%08x but returned with 0x%08x," + " may read the LBA without write it first\n", + lba + i, ref_tag, pi->ref_tag); + } + } + } +} + static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) { struct spdk_fio_request *fio_req = ctx; struct spdk_fio_thread *fio_thread = fio_req->fio_thread; + if (fio_thread->fio_qpair->do_nvme_pi) { + fio_extended_lba_verify_pi(fio_thread->fio_qpair, fio_req->io); + } + assert(fio_thread->iocq_count < fio_thread->iocq_size); fio_thread->iocq[fio_thread->iocq_count++] = fio_req->io; } @@ -504,25 +689,34 @@ spdk_fio_queue(struct thread_data *td, struct io_u *io_u) lba = io_u->offset / block_size; lba_count = io_u->xfer_buflen / block_size; + // TODO: considering situations that fio will randomize and verify io_u + if (fio_qpair->do_nvme_pi) { + fio_extended_lba_setup_pi(fio_qpair, io_u); + } + switch (io_u->ddir) { case DDIR_READ: if (!spdk_enable_sgl) { - rc = spdk_nvme_ns_cmd_read(ns, fio_qpair->qpair, io_u->buf, lba, lba_count, - spdk_fio_completion_cb, fio_req, 0); + rc = spdk_nvme_ns_cmd_read_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); } else { - rc = spdk_nvme_ns_cmd_readv(ns, fio_qpair->qpair, lba, - lba_count, spdk_fio_completion_cb, fio_req, 0, - spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge); + rc = spdk_nvme_ns_cmd_readv_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); } break; case DDIR_WRITE: if (!spdk_enable_sgl) { - rc = spdk_nvme_ns_cmd_write(ns, fio_qpair->qpair, io_u->buf, lba, lba_count, - spdk_fio_completion_cb, fio_req, 0); + rc = spdk_nvme_ns_cmd_write_with_md(ns, fio_qpair->qpair, io_u->buf, NULL, lba, lba_count, + spdk_fio_completion_cb, fio_req, + fio_qpair->io_flags, fio_req->appmask, fio_req->apptag); } else { - rc = spdk_nvme_ns_cmd_writev(ns, fio_qpair->qpair, lba, - lba_count, spdk_fio_completion_cb, fio_req, 0, - spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge); + rc = spdk_nvme_ns_cmd_writev_with_md(ns, fio_qpair->qpair, lba, + lba_count, spdk_fio_completion_cb, fio_req, fio_qpair->io_flags, + spdk_nvme_io_reset_sgl, spdk_nvme_io_next_sge, NULL, + fio_req->appmask, fio_req->apptag); } break; default: @@ -685,6 +879,26 @@ static struct fio_option options[] = { .category = FIO_OPT_C_ENGINE, .group = FIO_OPT_G_INVALID, }, + { + .name = "pi_act", + .lname = "Protection Information Action", + .type = FIO_OPT_INT, + .off1 = offsetof(struct spdk_fio_options, pi_act), + .def = "1", + .help = "Protection Information Action bit (pi_act=1 or pi_act=0)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, + { + .name = "pi_chk", + .lname = "Protection Information Check(GUARD|REFTAG|APPTAG)", + .type = FIO_OPT_STR_STORE, + .off1 = offsetof(struct spdk_fio_options, pi_chk), + .def = NULL, + .help = "Control of Protection Information Checking (pi_chk=GUARD|REFTAG|APPTAG)", + .category = FIO_OPT_C_ENGINE, + .group = FIO_OPT_G_INVALID, + }, { .name = NULL, },