diff --git a/CONFIG b/CONFIG index 00de5a1106..5c54a0eb78 100644 --- a/CONFIG +++ b/CONFIG @@ -60,3 +60,11 @@ CONFIG_IOAT_IMPL?=ioat_impl.h # libpciaccess library is used to map pci devices; else DPDK library is used to # map pci devices. CONFIG_PCIACCESS?=y + +# Build SPDK FIO plugin. Requires FIO_SOURCE_DIR set to a valid +# fio source code directory. +CONFIG_FIO_PLUGIN?=n + +# This directory should contain the source code directory for fio +# which is required for building the SPDK FIO plugin. +FIO_SOURCE_DIR?=/usr/src/fio diff --git a/autobuild.sh b/autobuild.sh index e782441419..24f204816a 100755 --- a/autobuild.sh +++ b/autobuild.sh @@ -11,6 +11,10 @@ umask 022 cd $rootdir +if [ -d /usr/src/fio ]; then + MAKECONFIG="$MAKECONFIG CONFIG_FIO_PLUGIN=y FIO_SOURCE_DIR=/usr/src/fio" +fi + timing_enter autobuild timing_enter check_format diff --git a/examples/nvme/Makefile b/examples/nvme/Makefile index 65a3bf5c32..ea8d6070b6 100644 --- a/examples/nvme/Makefile +++ b/examples/nvme/Makefile @@ -36,6 +36,8 @@ include $(SPDK_ROOT_DIR)/mk/spdk.common.mk DIRS-y += identify perf reserve nvme_manage +DIRS-$(CONFIG_FIO_PLUGIN) += fio_plugin + .PHONY: all clean $(DIRS-y) all: $(DIRS-y) diff --git a/examples/nvme/fio_plugin/.gitignore b/examples/nvme/fio_plugin/.gitignore new file mode 100644 index 0000000000..1b0b36ac47 --- /dev/null +++ b/examples/nvme/fio_plugin/.gitignore @@ -0,0 +1 @@ +fio_plugin diff --git a/examples/nvme/fio_plugin/Makefile b/examples/nvme/fio_plugin/Makefile new file mode 100644 index 0000000000..e820bed00d --- /dev/null +++ b/examples/nvme/fio_plugin/Makefile @@ -0,0 +1,63 @@ +# +# BSD LICENSE +# +# Copyright (c) Intel Corporation. +# Copyright (c) 2015-2016, Micron Technology, Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. + +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP := fio_plugin + +C_SRCS := fio_plugin.c + +CFLAGS += -I. $(DPDK_INC) -I$(FIO_SOURCE_DIR) + +LDFLAGS += -shared -rdynamic + +SPDK_LIBS += $(SPDK_ROOT_DIR)/lib/nvme/libspdk_nvme.a \ + $(SPDK_ROOT_DIR)/lib/util/libspdk_util.a \ + $(SPDK_ROOT_DIR)/lib/memory/libspdk_memory.a + +LIBS += $(SPDK_LIBS) $(PCIACCESS_LIB) $(DPDK_LIB) -lpthread -lrt + +all : $(APP) + +$(APP) : $(OBJS) $(SPDK_LIBS) + $(LINK_C) + +clean : + $(CLEAN_C) $(APP) + +.PHONY: all clean + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/examples/nvme/fio_plugin/README.md b/examples/nvme/fio_plugin/README.md new file mode 100644 index 0000000000..3cba591ad2 --- /dev/null +++ b/examples/nvme/fio_plugin/README.md @@ -0,0 +1,40 @@ +Compiling +---------- + +First, clone the fio source repository from http://github.com/axboe/fio + + git clone http://github.com/axboe/fio + +Then check out the fio 2.8 tag + + cd fio && git checkout fio-2.8 + +Finally, compile the code with + + ./configure && make + +Next, edit the CONFIG file located in the root of the SPDK repository and set CONFIG_FIO_PLUGIN +to y and FIO_SOURCE_DIR to the location of the fio repository that was just created. + +Further, you'll need to build DPDK with -fPIC set. You can do this by modifying your +DPDK config file (i.e. config/defconfig_x86_64-native-linux-gcc) to include the line + + EXTRA_CFLAGS=-fPIC + +At this point, build SPDK as per normal. The fio plugin will be placed in the same directory +as this README. + +Usage +------ + +To use the SPDK fio plugin with fio, simply set the following in the fio configuration file +(see example_config.fio in the same directory as this README). + + ioengine= + +To select NVMe devices, you simply pass an identifier as the filename in the format + + domain.bus.slot.func/namespace + +Remember that NVMe namespaces start at 1, not 0! Also, the notation uses '.' throughout, +not ':'. For example - 0000.04.00.0/1. diff --git a/examples/nvme/fio_plugin/example_config.fio b/examples/nvme/fio_plugin/example_config.fio new file mode 100644 index 0000000000..bb41694961 --- /dev/null +++ b/examples/nvme/fio_plugin/example_config.fio @@ -0,0 +1,15 @@ +[global] +ioengine=./examples/nvme/fio_plugin/fio_plugin +thread=1 +group_reporting=1 +direct=1 +verify=0 +time_based=1 +ramp_time=0 +runtime=1 + +[test] +iodepth=128 +rw=randrw +bs=4k +numjobs=1 diff --git a/examples/nvme/fio_plugin/fio_plugin.c b/examples/nvme/fio_plugin/fio_plugin.c new file mode 100644 index 0000000000..b92c83ba39 --- /dev/null +++ b/examples/nvme/fio_plugin/fio_plugin.c @@ -0,0 +1,446 @@ +/*- + * BSD LICENSE + * + * Copyright (c) Intel Corporation. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +#include "rte_config.h" +#include "rte_mempool.h" +#include "rte_malloc.h" +#include "rte_eal.h" +#include "rte_memcpy.h" + +#include "spdk/nvme.h" +#include "spdk/pci.h" +#include "spdk/string.h" + +#include "config-host.h" +#include "fio.h" +#include "optgroup.h" + +#define NVME_IO_ALIGN 4096 + +#define MAX_LCORE_COUNT 63 + +struct spdk_fio_request { + struct io_u *io; + + struct spdk_fio_thread *fio_thread; +}; + +struct spdk_fio_ns { + struct fio_file *f; + + struct spdk_nvme_ns *ns; + struct spdk_fio_ns *next; +}; + +struct spdk_fio_ctrlr { + struct spdk_nvme_ctrlr *ctrlr; + struct spdk_fio_ctrlr *next; + + struct spdk_nvme_qpair *qpair; + + struct spdk_fio_ns *ns_list; +}; + +struct spdk_fio_thread { + struct thread_data *td; + + struct spdk_fio_ctrlr *ctrlr_list; + + struct io_u **iocq; // io completion queue + unsigned int next_completion; // index where next completion will be placed + unsigned int getevents_start; // index where the next getevents call will start + unsigned int getevents_count; // The number of events in the current getevents window + +}; + +// Global request_mempool is used by libspdk_nvme.a and must be defined +struct rte_mempool *request_mempool; + +static bool +probe_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr_opts *opts) +{ + int found_bus = spdk_pci_device_get_bus(dev); + int found_slot = spdk_pci_device_get_dev(dev); + int found_func = spdk_pci_device_get_func(dev); + struct fio_file *f; + unsigned int i; + struct thread_data *td = cb_ctx; + int rc; + + /* Check if we want to claim this device */ + for_each_file(td, f, i) { + int domain, bus, slot, func, nsid; + rc = sscanf(f->file_name, "%x.%x.%x.%x/%x", &domain, &bus, &slot, &func, &nsid); + if (rc != 5) { + fprintf(stderr, "Invalid filename: %s\n", f->file_name); + continue; + } + if (bus == found_bus && slot == found_slot && func == found_func) { + /* We do want to claim this device */ + if (spdk_pci_device_has_non_uio_driver(dev)) { + fprintf(stderr, + "Requested to attach to %02x:%02x.%02x but that device is not unbound from the kernel\n", + bus, slot, func); + return false; + } + return true; + } + } + + return false; +} + +static void +attach_cb(void *cb_ctx, struct spdk_pci_device *dev, struct spdk_nvme_ctrlr *ctrlr, + const struct spdk_nvme_ctrlr_opts *opts) +{ + int found_bus = spdk_pci_device_get_bus(dev); + int found_slot = spdk_pci_device_get_dev(dev); + int found_func = spdk_pci_device_get_func(dev); + struct thread_data *td = cb_ctx; + struct spdk_fio_thread *fio_thread = td->io_ops->data; + struct spdk_fio_ctrlr *fio_ctrlr; + struct spdk_fio_ns *fio_ns; + struct fio_file *f; + unsigned int i; + + /* Create an fio_ctrlr and add it to the list */ + fio_ctrlr = calloc(1, sizeof(*fio_ctrlr)); + fio_ctrlr->ctrlr = ctrlr; + fio_ctrlr->qpair = spdk_nvme_ctrlr_alloc_io_qpair(ctrlr, 0); + fio_ctrlr->ns_list = NULL; + fio_ctrlr->next = fio_thread->ctrlr_list; + fio_thread->ctrlr_list = fio_ctrlr; + + /* Loop through all of the file names provided and grab the matching namespaces */ + for_each_file(fio_thread->td, f, i) { + int domain, bus, slot, func, nsid, rc; + rc = sscanf(f->file_name, "%x.%x.%x.%x/%x", &domain, &bus, &slot, &func, &nsid); + if (rc == 5 && bus == found_bus && slot == found_slot && func == found_func) { + fio_ns = calloc(1, sizeof(*fio_ns)); + if (fio_ns == NULL) { + continue; + } + fio_ns->f = f; + fio_ns->ns = spdk_nvme_ctrlr_get_ns(ctrlr, nsid); + if (fio_ns->ns == NULL) { + free(fio_ns); + continue; + } + + + f->real_file_size = spdk_nvme_ns_get_size(fio_ns->ns); + if (f->real_file_size <= 0) { + free(fio_ns); + continue; + } + + f->filetype = FIO_TYPE_BD; + fio_file_set_size_known(f); + + fio_ns->next = fio_ctrlr->ns_list; + fio_ctrlr->ns_list = fio_ns; + } + } +} + +static char *ealargs[] = { + "fio", + "-n 4", +}; + +/* Called once at initialization. This is responsible for gathering the size of + * each "file", which in our case are in the form + * "05:00.0/0" (PCI bus:device.function/NVMe NSID) */ +static int spdk_fio_setup(struct thread_data *td) +{ + int rc; + struct spdk_fio_thread *fio_thread; + + fio_thread = calloc(1, sizeof(*fio_thread)); + assert(fio_thread != NULL); + + td->io_ops->data = fio_thread; + fio_thread->td = td; + + fio_thread->iocq = calloc(td->o.iodepth + 1, sizeof(struct io_u *)); + assert(fio_thread->iocq != NULL); + + rc = rte_eal_init(sizeof(ealargs) / sizeof(ealargs[0]), ealargs); + if (rc < 0) { + fprintf(stderr, "could not initialize dpdk\n"); + return 1; + } + + request_mempool = rte_mempool_create("nvme_request", 8192, + spdk_nvme_request_size(), 128, 0, + NULL, NULL, NULL, NULL, + SOCKET_ID_ANY, 0); + if (!request_mempool) { + fprintf(stderr, "rte_mempool_create failed\n"); + return 1; + } + + /* Enumerate all of the controllers */ + if (spdk_nvme_probe(td, probe_cb, attach_cb) != 0) { + fprintf(stderr, "spdk_nvme_probe() failed\n"); + return 1; + } + + return 0; +} + +static int spdk_fio_open(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_close(struct thread_data *td, struct fio_file *f) +{ + return 0; +} + +static int spdk_fio_iomem_alloc(struct thread_data *td, size_t total_mem) +{ + td->orig_buffer = rte_malloc(NULL, total_mem, NVME_IO_ALIGN); + return td->orig_buffer == NULL; +} + +static void spdk_fio_iomem_free(struct thread_data *td) +{ + rte_free(td->orig_buffer); +} + +static int spdk_fio_io_u_init(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_thread *fio_thread = td->io_ops->data; + struct spdk_fio_request *fio_req; + + fio_req = calloc(1, sizeof(*fio_req)); + if (fio_req == NULL) { + return 1; + } + fio_req->io = io_u; + fio_req->fio_thread = fio_thread; + + io_u->engine_data = fio_req; + + return 0; +} + +static void spdk_fio_io_u_free(struct thread_data *td, struct io_u *io_u) +{ + struct spdk_fio_request *fio_req = io_u->engine_data; + + if (fio_req) { + assert(fio_req->io == io_u); + free(fio_req); + io_u->engine_data = NULL; + } +} + +static void spdk_fio_completion_cb(void *ctx, const struct spdk_nvme_cpl *cpl) +{ + struct spdk_fio_request *fio_req = ctx; + struct spdk_fio_thread *fio_thread = fio_req->fio_thread; + + fio_thread->iocq[fio_thread->next_completion] = fio_req->io; + if (++fio_thread->next_completion >= fio_thread->td->o.iodepth) { + fio_thread->next_completion = 0; + } +} + +static int spdk_fio_queue(struct thread_data *td, struct io_u *io_u) +{ + int rc = 1; + struct spdk_fio_thread *fio_thread = td->io_ops->data; + struct spdk_fio_request *fio_req = io_u->engine_data; + struct spdk_fio_ctrlr *fio_ctrlr; + struct spdk_fio_ns *fio_ns; + bool found_ns = false; + + /* Find the namespace that corresponds to the file in the io_u */ + fio_ctrlr = fio_thread->ctrlr_list; + while (fio_ctrlr != NULL) { + fio_ns = fio_ctrlr->ns_list; + while (fio_ns != NULL) { + if (fio_ns->f == io_u->file) { + found_ns = true; + break; + } + fio_ns = fio_ns->next; + } + if (found_ns) { + break; + } + fio_ctrlr = fio_ctrlr->next; + } + if (fio_ctrlr == NULL || fio_ns == NULL) { + return FIO_Q_COMPLETED; + } + assert(found_ns == true); + + uint32_t block_size = spdk_nvme_ns_get_sector_size(fio_ns->ns); + uint64_t lba = io_u->offset / block_size; + uint32_t lba_count = io_u->xfer_buflen / block_size; + + switch (io_u->ddir) { + case DDIR_READ: + rc = spdk_nvme_ns_cmd_read(fio_ns->ns, fio_ctrlr->qpair, io_u->buf, lba, lba_count, + spdk_fio_completion_cb, fio_req, 0); + break; + case DDIR_WRITE: + rc = spdk_nvme_ns_cmd_write(fio_ns->ns, fio_ctrlr->qpair, io_u->buf, lba, lba_count, + spdk_fio_completion_cb, fio_req, 0); + break; + default: + assert(false); + break; + } + + assert(rc == 0); + + return rc ? FIO_Q_COMPLETED : FIO_Q_QUEUED; +} + +static struct io_u *spdk_fio_event(struct thread_data *td, int event) +{ + struct spdk_fio_thread *fio_thread = td->io_ops->data; + int idx = (fio_thread->getevents_start + event) % td->o.iodepth; + + if (event > (int)fio_thread->getevents_count) { + return NULL; + } + + return fio_thread->iocq[idx]; +} + +static int spdk_fio_getevents(struct thread_data *td, unsigned int min, + unsigned int max, const struct timespec *t) +{ + struct spdk_fio_thread *fio_thread = td->io_ops->data; + struct spdk_fio_ctrlr *fio_ctrlr; + unsigned int count = 0; + struct timespec t0, t1; + uint64_t timeout = 0; + + if (t) { + timeout = t->tv_sec * 1000000000L + t->tv_nsec; + clock_gettime(CLOCK_MONOTONIC_RAW, &t0); + } + + fio_thread->getevents_start = (fio_thread->getevents_start + fio_thread->getevents_count) % + fio_thread->td->o.iodepth; + + for (;;) { + fio_ctrlr = fio_thread->ctrlr_list; + while (fio_ctrlr != NULL) { + count += spdk_nvme_qpair_process_completions(fio_ctrlr->qpair, max - count); + fio_ctrlr = fio_ctrlr->next; + } + + + if (count >= min) { + break; + } + + if (t) { + clock_gettime(CLOCK_MONOTONIC_RAW, &t1); + uint64_t elapse = ((t1.tv_sec - t0.tv_sec) * 1000000000L) + + t1.tv_nsec - t0.tv_nsec; + if (elapse > timeout) { + break; + } + } + } + + fio_thread->getevents_count = count; + + return count; +} + +static int spdk_fio_invalidate(struct thread_data *td, struct fio_file *f) +{ + /* TODO: This should probably send a flush to the device, but for now just return successful. */ + return 0; +} + +static void spdk_fio_cleanup(struct thread_data *td) +{ + struct spdk_fio_thread *fio_thread = td->io_ops->data; + struct spdk_fio_ctrlr *fio_ctrlr, *fio_ctrlr_tmp; + struct spdk_fio_ns *fio_ns, *fio_ns_tmp; + + fio_ctrlr = fio_thread->ctrlr_list; + while (fio_ctrlr != NULL) { + fio_ns = fio_ctrlr->ns_list; + while (fio_ns != NULL) { + fio_ns_tmp = fio_ns->next; + free(fio_ns); + fio_ns = fio_ns_tmp; + } + spdk_nvme_ctrlr_free_io_qpair(fio_ctrlr->qpair); + spdk_nvme_detach(fio_ctrlr->ctrlr); + fio_ctrlr_tmp = fio_ctrlr->next; + free(fio_ctrlr); + fio_ctrlr = fio_ctrlr_tmp; + } + + free(fio_thread); +} + +/* FIO imports this structure using dlsym */ +struct ioengine_ops ioengine = { + .name = "spdk_fio", + .version = FIO_IOOPS_VERSION, + .queue = spdk_fio_queue, + .getevents = spdk_fio_getevents, + .event = spdk_fio_event, + .cleanup = spdk_fio_cleanup, + .open_file = spdk_fio_open, + .close_file = spdk_fio_close, + .invalidate = spdk_fio_invalidate, + .iomem_alloc = spdk_fio_iomem_alloc, + .iomem_free = spdk_fio_iomem_free, + .setup = spdk_fio_setup, + .io_u_init = spdk_fio_io_u_init, + .io_u_free = spdk_fio_io_u_free, + .flags = FIO_RAWIO | FIO_NOEXTEND | FIO_NODISKUTIL | FIO_MEMALIGN, +}; diff --git a/test/lib/nvme/nvme.sh b/test/lib/nvme/nvme.sh index 55db9adfb7..4bbe558644 100755 --- a/test/lib/nvme/nvme.sh +++ b/test/lib/nvme/nvme.sh @@ -6,6 +6,10 @@ testdir=$(readlink -f $(dirname $0)) rootdir="$testdir/../../.." source $rootdir/scripts/autotest_common.sh +function linux_iter_pci { + lspci -mm -n | grep $1 | tr -d '"' | awk -F " " '{print "0000:"$1}' +} + timing_enter nvme timing_enter unit @@ -34,6 +38,16 @@ timing_enter reserve $rootdir/examples/nvme/reserve/reserve timing_exit reserve +if [ -d /usr/src/fio ]; then + timing_enter fio_plugin + for bdf in $(linux_iter_pci 0108); do + /usr/src/fio/fio $rootdir/examples/nvme/fio_plugin/example_config.fio --filename=${bdf//:/.}/1 + break + done + + timing_exit fio_plugin +fi + #Now test nvme reset function timing_enter reset $testdir/reset/reset -q 64 -w write -s 4096 -t 2