diff --git a/examples/ioat/Makefile b/examples/ioat/Makefile index 17c41aac28..cc105f3197 100644 --- a/examples/ioat/Makefile +++ b/examples/ioat/Makefile @@ -34,7 +34,7 @@ SPDK_ROOT_DIR := $(CURDIR)/../.. include $(SPDK_ROOT_DIR)/mk/spdk.common.mk -DIRS-y += perf verify +DIRS-y += perf verify kperf .PHONY: all clean $(DIRS-y) diff --git a/examples/ioat/kperf/.gitignore b/examples/ioat/kperf/.gitignore new file mode 100644 index 0000000000..bc25a0bd3e --- /dev/null +++ b/examples/ioat/kperf/.gitignore @@ -0,0 +1 @@ +ioat_kperf diff --git a/examples/ioat/kperf/Makefile b/examples/ioat/kperf/Makefile new file mode 100644 index 0000000000..a4cfdfa6f2 --- /dev/null +++ b/examples/ioat/kperf/Makefile @@ -0,0 +1,53 @@ +# +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +SPDK_ROOT_DIR := $(CURDIR)/../../.. +include $(SPDK_ROOT_DIR)/mk/spdk.common.mk + +APP = ioat_kperf + +C_SRCS := ioat_kperf.c + +CFLAGS += -I. + +LIBS += -lrt + +all: $(APP) + +$(APP): $(OBJS) + $(LINK_C) + +clean: + $(CLEAN_C) $(APP) + +include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk diff --git a/examples/ioat/kperf/README b/examples/ioat/kperf/README new file mode 100644 index 0000000000..61bb8e153d --- /dev/null +++ b/examples/ioat/kperf/README @@ -0,0 +1,42 @@ +IOAT Kernel Driver Test Tool +============================ + +For the purpose to make performance comparison with user space IOAT +driver, we developed the test tool based on IOAT kernel driver, the +test tool contains 2 components: kernel test module and user space +application. The kernel test module allocated one kernel thread for +each DMA channel, and the kernel thread did not pin to specified +CPU cores, but ensure all the thread run in the same NUMA socket +with DMA channel, and the user space application communicated +with kernel test module via sysfs interface. + +Building & Usage +================ + +1. Compile and load the kernel test module first. + + modprobe -v ioatdma + cd kmod && make && insmod dmaperf.ko + +2. Run the test application. + + Parameters: + [-h usage] + [-n number of DMA channels] + [-q queue depth, per DMA channel] + [-s [n^2] transfer size, per descriptor] + [-t total [n^2] data to tranfer, per DMA channel] + + For example: ./ioat_kperf -n 4 -q 128 -s 12 -t 32 + + Total 4 Channels, Queue_Depth 128, Transfer Size 4096 Bytes, Total Transfer Size 4 GB + Running I/O . . . . + Channel 0 Performance Data 1414 MB/s + Channel 1 Performance Data 1413 MB/s + Channel 2 Performance Data 1413 MB/s + Channel 3 Performance Data 1415 MB/s + +OS Support +========== +We have tested several Linux distributions, currently Fedora 21/22 with kernel +version >= 3.17 are supported. diff --git a/examples/ioat/kperf/ioat_kperf.c b/examples/ioat/kperf/ioat_kperf.c new file mode 100644 index 0000000000..335adf41f5 --- /dev/null +++ b/examples/ioat/kperf/ioat_kperf.c @@ -0,0 +1,320 @@ +/*- + * BSD LICENSE + * + * Copyright(c) 2010-2015 Intel Corporation. All rights reserved. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include +#include +#include +#include +#include +#include + +static int +check_modules(char *driver_name) +{ + FILE *fd; + const char *proc_modules = "/proc/modules"; + char buffer[256]; + + fd = fopen(proc_modules, "r"); + if (!fd) + return -1; + + while (fgets(buffer, sizeof(buffer), fd)) { + if (strstr(buffer, driver_name) == NULL) + continue; + else { + fclose(fd); + return 0; + } + } + fclose(fd); + + return -1; +} + +static int +get_u32_from_file(const char *sysfs_file, uint32_t *value) +{ + FILE *f; + char buf[BUFSIZ]; + + f = fopen(sysfs_file, "r"); + if (f == NULL) { + return -1; + } + + if (fgets(buf, sizeof(buf), f) != NULL) { + *value = strtoul(buf, NULL, 10); + } + + fclose(f); + + return 0; +} + +static int +get_str_from_file(const char *sysfs_file, char *buf, int len) +{ + FILE *f; + + f = fopen(sysfs_file, "r"); + if (f == NULL) { + return -1; + } + + if (fgets(buf, len, f) != NULL) { + fclose(f); + return 0; + } + + fclose(f); + return -1; +} + +static int +put_u32_to_file(const char *sysfs_file, uint32_t value) +{ + FILE *f; + int n; + char buf[BUFSIZ]; + + f = fopen(sysfs_file, "w"); + if (f == NULL) { + return -1; + } + + n = snprintf(buf, sizeof(buf), "%ul", value); + if ((n < 0) || (n >= (int)sizeof(buf))) { + fclose(f); + return -1; + } + + if (fwrite(buf, n, 1, f) == 0) { + fclose(f); + return -1; + } + + fclose(f); + return 0; +} + +static int +get_u64_from_file(const char *sysfs_file, uint64_t *value) +{ + FILE *f; + char buf[BUFSIZ]; + + f = fopen(sysfs_file, "r"); + if (f == NULL) { + return -1; + } + + if (fgets(buf, sizeof(buf), f) != NULL) { + *value = strtoull(buf, NULL, 10); + } + + fclose(f); + + return 0; +} + +static void +usage(char *program_name) +{ + printf("%s options\n", program_name); + printf("\t[-h usage]\n"); + printf("\t[-n number of DMA channels]\n"); + printf("\t[-q queue depth, per DMA channel]\n"); + printf("\t[-s [n^2] transfer size, per descriptor]\n"); + printf("\t[-t total [n^2] data to tranfer, per DMA channel]\n"); +} + +int main(int argc, char *argv[]) +{ + int op; + int rc; + char buf[BUFSIZ]; + uint32_t i, threads = 0; + uint32_t ring_size, queue_depth = 0; + uint32_t transfer_size, order = 0; + uint64_t total_size, copied = 0; + uint64_t elapsed_time = 0; + char channel[1024]; + + if (check_modules("ioatdma")) { + fprintf(stderr, "Ioat driver not loaded," + " run `modprove -v ioatdma` first\n"); + return -1; + } + if (check_modules("dmaperf")) { + fprintf(stderr, "Kernel Ioat test driver not loaded," + " run `insmod dmaperf.ko` in the kmod directory\n"); + return -1; + } + + rc = get_u32_from_file("/sys/module/ioatdma/parameters/ioat_ring_alloc_order", + &order); + if (rc < 0) { + fprintf(stderr, "Cannot get default ioat queue depth\n"); + return -1; + } + ring_size = 1UL << order; + + while ((op = getopt(argc, argv, "h:n:q:s:t:")) != -1) { + switch (op) { + case 'n': + threads = atoi(optarg); + rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/threads", threads); + if (rc < 0) { + fprintf(stderr, "Cannot set dma channels\n"); + return -1; + } + break; + case 'q': + queue_depth = atoi(optarg); + if (queue_depth > ring_size) { + fprintf(stderr, "Max Ioat DMA ring size %d\n", ring_size); + return -1; + } + rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/queue_depth", queue_depth); + if (rc < 0) { + fprintf(stderr, "Cannot set queue depth\n"); + return -1; + } + break; + case 's': + order = atoi(optarg); + rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/transfer_size_order", order); + if (rc < 0) { + fprintf(stderr, "Cannot set descriptor transfer size order\n"); + return -1; + } + break; + case 't': + order = atoi(optarg); + rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/total_size_order", order); + if (rc < 0) { + fprintf(stderr, "Cannot set channel total transfer size order\n"); + return -1; + } + break; + case 'h' : + usage(argv[0]); + exit(0); + default: + usage(argv[0]); + exit(1); + } + } + + /* get driver configuration */ + rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/transfer_size_order", + &order); + if (rc < 0) { + fprintf(stderr, "Cannot get channel descriptor transfer size\n"); + return -1; + } + transfer_size = 1UL << order; + + rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/total_size_order", + &order); + if (rc < 0) { + fprintf(stderr, "Cannot get channel total transfer size\n"); + return -1; + } + total_size = 1ULL << order; + + rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/threads", + &threads); + if (rc < 0) { + fprintf(stderr, "Cannot get dma channel threads\n"); + return -1; + } + + rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/queue_depth", + &queue_depth); + if (rc < 0) { + fprintf(stderr, "Cannot get queue depth\n"); + return -1; + } + + fprintf(stdout, + "Total %d Channels, Queue_Depth %d, Transfer Size %d Bytes, Total Transfer Size %"PRIu64" GB\n", + threads, queue_depth, transfer_size, total_size >> 30ULL); + + /* run the channels */ + rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/run", 1); + if (rc < 0) { + fprintf(stderr, "Cannot run the channels\n"); + return -1; + } + + fprintf(stdout, "Running I/O "); + fflush(stdout); + /* wait all the channels to be idle */ + while (!get_str_from_file("/sys/kernel/debug/dmaperf/dmaperf/status", buf, BUFSIZ)) { + if (strstr(buf, "idle") != NULL) { + fprintf(stdout, "\n"); + fflush(stdout); + sleep(1); + break; + } + fprintf(stdout, ". "); + fflush(stdout); + sleep(1); + } + + /* collect each channel performance data */ + + for (i = 0; i < threads; i++) { + /* total data transfer length for the DMA channel in Bytes */ + sprintf(channel, "/sys/kernel/debug/dmaperf/dmaperf/thread_%u/copied", i); + rc = get_u64_from_file(channel, &copied); + if (rc < 0) { + fprintf(stderr, "Cannot get channel copied bytes\n"); + return -1; + } + /* time in microseconds for total data transfer length */ + sprintf(channel, "/sys/kernel/debug/dmaperf/dmaperf/thread_%u/elapsed_time", i); + rc = get_u64_from_file(channel, &elapsed_time); + if (rc < 0) { + fprintf(stderr, "Cannot get channel elapsed time\n"); + return -1; + } + assert(elapsed_time != 0); + fprintf(stdout, "Channel %d Performance Data %"PRIu64" MB/s\n", + i, copied / elapsed_time); + } + + return 0; +} diff --git a/examples/ioat/kperf/kmod/Makefile b/examples/ioat/kperf/kmod/Makefile new file mode 100644 index 0000000000..3f03823233 --- /dev/null +++ b/examples/ioat/kperf/kmod/Makefile @@ -0,0 +1,42 @@ +# +# BSD LICENSE +# +# Copyright(c) 2010-2015 Intel Corporation. All rights reserved. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in +# the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Intel Corporation nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# + +obj-m := dmaperf.o +dmaperf-y := dma_perf.o + +KDIR := /lib/modules/$(shell uname -r)/build + +all: + make -C $(KDIR) M=$(shell pwd) modules +clean: + make -C $(KDIR) M=$(shell pwd) clean diff --git a/examples/ioat/kperf/kmod/dma_perf.c b/examples/ioat/kperf/kmod/dma_perf.c new file mode 100644 index 0000000000..12937405cd --- /dev/null +++ b/examples/ioat/kperf/kmod/dma_perf.c @@ -0,0 +1,669 @@ +/* + * This file is provided under a dual BSD/GPLv2 license. When using or + * redistributing this file, you may do so under either license. + * + * GPL LICENSE SUMMARY + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * BSD LICENSE + * + * Copyright(c) 2015 Intel Corporation. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above copy + * notice, this list of conditions and the following disclaimer in + * the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Intel Corporation nor the names of its + * contributors may be used to endorse or promote products derived + * from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * PCIe DMA Perf Linux driver + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_NAME "dma_perf" +#define DRIVER_DESCRIPTION "PCIe DMA Performance Measurement Tool" + +#define DRIVER_LICENSE "Dual BSD/GPL" +#define DRIVER_VERSION "1.0" +#define DRIVER_AUTHOR "Dave Jiang " + +#define MAX_THREADS 32 +#define MAX_TEST_SIZE 1024 * 1024 /* 1M */ +#define DMA_CHANNELS_PER_NODE 8 + +MODULE_LICENSE(DRIVER_LICENSE); +MODULE_VERSION(DRIVER_VERSION); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_AUTHOR("Changpeng Liu "); +MODULE_DESCRIPTION(DRIVER_DESCRIPTION); + +static struct dentry *perf_debugfs_dir; +static struct perf_ctx *g_perf = NULL; + +static unsigned int seg_order = 12; /* 4K */ +static unsigned int queue_depth = 256; +static unsigned int run_order = 32; /* 4G */ + +struct perf_mw { + size_t buf_size; + void *virt_addr; +}; + +struct perf_ctx; + +struct pthr_ctx { + struct dentry *debugfs_thr_dir; + struct dentry *debugfs_copied; + struct dentry *debugfs_elapsed_time; + struct device *dev; + int node; + wait_queue_head_t wq; + struct perf_mw mw; + struct task_struct *thread; + struct perf_ctx *perf; + atomic_t dma_sync; + struct dma_chan *dma_chan; + int dma_up; + int dma_down; + int dma_prep_err; + u64 copied; + u64 elapsed_time; +}; + +struct perf_ctx { + spinlock_t db_lock; + struct dentry *debugfs_node_dir; + struct dentry *debugfs_run; + struct dentry *debugfs_threads; + struct dentry *debugfs_queue_depth; + struct dentry *debugfs_transfer_size_order; + struct dentry *debugfs_total_size_order; + struct dentry *debugfs_status; + u8 numa_nodes; + u8 perf_threads; + bool run; + struct pthr_ctx pthr_ctx[MAX_THREADS]; + atomic_t tsync; +}; + +static void perf_free_mw(struct pthr_ctx *pctx); +static int perf_set_mw(struct pthr_ctx *pctx, size_t size); + +static void perf_copy_callback(void *data) +{ + struct pthr_ctx *pctx = data; + + atomic_dec(&pctx->dma_sync); + pctx->dma_down++; + + wake_up(&pctx->wq); +} + +static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst, + char *src, size_t size) +{ + struct dma_async_tx_descriptor *txd; + struct dma_chan *chan = pctx->dma_chan; + struct dma_device *device; + struct dmaengine_unmap_data *unmap; + dma_cookie_t cookie; + size_t src_off, dst_off; + int retries = 0; + + if (!chan) { + printk("DMA engine does not exist\n"); + return -EINVAL; + } + + device = chan->device; + src_off = (size_t)src & ~PAGE_MASK; + dst_off = (size_t)dst & ~PAGE_MASK; + + if (!is_dma_copy_aligned(device, src_off, dst_off, size)) + return -ENODEV; + + unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT); + if (!unmap) + return -ENOMEM; + + unmap->len = size; + unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src), + src_off, size, DMA_TO_DEVICE); + if (dma_mapping_error(device->dev, unmap->addr[0])) + goto err_get_unmap; + + unmap->to_cnt = 1; + + unmap->addr[1] = dma_map_page(device->dev, virt_to_page(dst), + dst_off, size, DMA_FROM_DEVICE); + if (dma_mapping_error(device->dev, unmap->addr[1])) + goto err_get_unmap; + unmap->from_cnt = 1; + +dma_prep_retry: + txd = device->device_prep_dma_memcpy(chan, unmap->addr[1], + unmap->addr[0], + size, DMA_PREP_INTERRUPT); + if (!txd) { + if (retries++ > 20) { + pctx->dma_prep_err++; + goto err_get_unmap; + } else { + set_current_state(TASK_INTERRUPTIBLE); + schedule_timeout(50); + goto dma_prep_retry; + } + } + + txd->callback = perf_copy_callback; + txd->callback_param = pctx; + dma_set_unmap(txd, unmap); + + cookie = dmaengine_submit(txd); + if (dma_submit_error(cookie)) + goto err_set_unmap; + + atomic_inc(&pctx->dma_sync); + + pctx->dma_up++; + dma_async_issue_pending(chan); + + return size; + +err_set_unmap: + dmaengine_unmap_put(unmap); +err_get_unmap: + dmaengine_unmap_put(unmap); + return 0; +} + +static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src, + u64 buf_size, u64 win_size, u64 total) +{ + int chunks, total_chunks, i; + int copied_chunks = 0; + u64 result; + char *tmp = dst; + u64 perf, diff_us; + ktime_t kstart, kstop, kdiff; + + chunks = win_size / buf_size; + total_chunks = total / buf_size; + + printk("%s: chunks: %d total_chunks: %d\n", current->comm, chunks, total_chunks); + + kstart = ktime_get(); + + for (i = 0; i < total_chunks; i++) { + + wait_event_interruptible(pctx->wq, atomic_read(&pctx->dma_sync) < queue_depth); + + result = perf_copy(pctx, tmp, src, buf_size); + pctx->copied += result; + copied_chunks++; + if (copied_chunks == chunks) { + tmp = dst; + copied_chunks = 0; + } else + tmp += buf_size; + } + + printk("%s: All DMA descriptors submitted\n", current->comm); + + /* FIXME: need a timeout here eventually */ + while (atomic_read(&pctx->dma_sync) != 0) + msleep(1); + + pr_info("%s: dma_up: %d dma_down: %d dma_prep_err: %d\n", + current->comm, pctx->dma_up, pctx->dma_down, + pctx->dma_prep_err); + + kstop = ktime_get(); + kdiff = ktime_sub(kstop, kstart); + diff_us = ktime_to_us(kdiff); + + pr_info("%s: copied %Lu bytes\n", current->comm, pctx->copied); + + pr_info("%s: lasted %Lu usecs\n", current->comm, diff_us); + + perf = pctx->copied / diff_us; + + pr_info("%s: MBytes/s: %Lu\n", current->comm, perf); + + pctx->elapsed_time = diff_us; + + return 0; +} + +static bool perf_dma_filter_fn(struct dma_chan *chan, void *node) +{ + return dev_to_node(&chan->dev->device) == (int)(unsigned long)node; +} + +static int dma_perf_thread(void *data) +{ + struct pthr_ctx *pctx = data; + struct perf_ctx *perf = pctx->perf; + struct perf_mw *mw = &pctx->mw; + char *dst; + u64 win_size, buf_size, total; + void *src; + int rc, node; + struct dma_chan *dma_chan = NULL; + + pr_info("kthread %s starting...\n", current->comm); + + node = pctx->node; + + if (!pctx->dma_chan) { + dma_cap_mask_t dma_mask; + + dma_cap_zero(dma_mask); + dma_cap_set(DMA_MEMCPY, dma_mask); + dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn, + (void *)(unsigned long)node); + if (!dma_chan) { + pr_warn("%s: cannot acquire DMA channel, quitting\n", + current->comm); + return -ENODEV; + } + pctx->dma_chan = dma_chan; + pctx->dev = dma_chan->device->dev; + } + + src = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node); + if (!src) { + rc = -ENOMEM; + goto err; + } + + rc = perf_set_mw(pctx, MAX_TEST_SIZE); + if (rc < 0) { + pr_err("%s: set mw failed\n", current->comm); + rc = -ENXIO; + goto err; + } + + win_size = mw->buf_size; + buf_size = 1ULL << seg_order; + total = 1ULL << run_order; + + if (buf_size > MAX_TEST_SIZE) + buf_size = MAX_TEST_SIZE; + + dst = (char *)mw->virt_addr; + + atomic_inc(&perf->tsync); + while (atomic_read(&perf->tsync) != perf->perf_threads) + schedule(); + + rc = perf_move_data(pctx, dst, src, buf_size, win_size, total); + + atomic_dec(&perf->tsync); + + if (rc < 0) { + pr_err("%s: failed\n", current->comm); + rc = -ENXIO; + goto err; + } + + return 0; + +err: + if (src) + kfree(src); + + if (dma_chan) { + dma_release_channel(dma_chan); + pctx->dma_chan = NULL; + } + + return rc; +} + +static void perf_free_mw(struct pthr_ctx *pctx) +{ + struct perf_mw *mw = &pctx->mw; + + if (!mw->virt_addr) + return; + + kfree(mw->virt_addr); + mw->buf_size = 0; + mw->virt_addr = NULL; +} + +static int perf_set_mw(struct pthr_ctx *pctx, size_t size) +{ + struct perf_mw *mw = &pctx->mw; + + if (!size) + return -EINVAL; + + mw->buf_size = size; + + mw->virt_addr = kmalloc_node(size, GFP_KERNEL, pctx->node); + + if (!mw->virt_addr) { + mw->buf_size = 0; + return -EINVAL; + } + + return 0; +} + +static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct perf_ctx *perf = filp->private_data; + char *buf; + ssize_t ret, out_offset; + + if (!perf) + return 0; + + buf = kmalloc(64, GFP_KERNEL); + out_offset = snprintf(buf, 64, "%d\n", perf->run); + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + kfree(buf); + + return ret; +} + +static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf, + size_t count, loff_t *offp) +{ + struct perf_ctx *perf = filp->private_data; + int node, i; + + if (perf->perf_threads == 0) + return 0; + + if (atomic_read(&perf->tsync) == 0) + perf->run = false; + + if (perf->run == true) { + /* lets stop the threads */ + perf->run = false; + for (i = 0; i < MAX_THREADS; i++) { + if (perf->pthr_ctx[i].thread) { + kthread_stop(perf->pthr_ctx[i].thread); + perf->pthr_ctx[i].thread = NULL; + } else + break; + } + } else { + perf->run = true; + + if (perf->perf_threads > MAX_THREADS) { + perf->perf_threads = MAX_THREADS; + pr_info("Reset total threads to: %u\n", MAX_THREADS); + } + + /* no greater than 1M */ + if (seg_order > 20) { + seg_order = 20; + pr_info("Fix seg_order to %u\n", seg_order); + } + + if (run_order < seg_order) { + run_order = seg_order; + pr_info("Fix run_order to %u\n", run_order); + } + + /* launch kernel thread */ + for (i = 0; i < perf->perf_threads; i++) { + struct pthr_ctx *pctx; + + pctx = &perf->pthr_ctx[i]; + atomic_set(&pctx->dma_sync, 0); + pctx->perf = perf; + pctx->elapsed_time = 0; + pctx->copied = 0; + + init_waitqueue_head(&pctx->wq); + + /* NUMA socket node */ + pctx->node = i / DMA_CHANNELS_PER_NODE; + node = pctx->node; + + pctx->thread = + kthread_create_on_node(dma_perf_thread, + (void *)pctx, + node, "dma_perf %d", i); + if (pctx->thread) + wake_up_process(pctx->thread); + else { + perf->run = false; + for (i = 0; i < MAX_THREADS; i++) { + if (pctx->thread) { + kthread_stop(pctx->thread); + pctx->thread = NULL; + } else + break; + } + } + + if (perf->run == false) + return -ENXIO; + } + + } + + return count; +} + +static const struct file_operations dma_perf_debugfs_run = { + .owner = THIS_MODULE, + .open = simple_open, + .read = debugfs_run_read, + .write = debugfs_run_write, +}; + +static ssize_t debugfs_status_read(struct file *filp, char __user *ubuf, + size_t count, loff_t *offp) +{ + struct perf_ctx *perf = filp->private_data; + char *buf; + ssize_t ret, out_offset; + + if (!perf) + return 0; + + buf = kmalloc(64, GFP_KERNEL); + out_offset = snprintf(buf, 64, "%s\n", atomic_read(&perf->tsync) ? "running" : "idle"); + ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset); + kfree(buf); + + return ret; +} + +static const struct file_operations dma_perf_debugfs_status = { + .owner = THIS_MODULE, + .open = simple_open, + .read = debugfs_status_read, +}; + +static int perf_debugfs_setup(struct perf_ctx *perf) +{ + + int i; + char temp_name[64]; + + if (!perf_debugfs_dir) + return -ENODEV; + + perf->debugfs_node_dir = debugfs_create_dir("dmaperf", + perf_debugfs_dir); + if (!perf->debugfs_node_dir) + return -ENODEV; + + perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR, + perf->debugfs_node_dir, perf, + &dma_perf_debugfs_run); + if (!perf->debugfs_run) + return -ENODEV; + + perf->debugfs_status = debugfs_create_file("status", S_IRUSR, + perf->debugfs_node_dir, perf, + &dma_perf_debugfs_status); + if (!perf->debugfs_status) + return -ENODEV; + + perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR, + perf->debugfs_node_dir, + &perf->perf_threads); + if (!perf->debugfs_threads) + return -ENODEV; + + perf->debugfs_queue_depth = debugfs_create_u32("queue_depth", S_IRUSR | S_IWUSR, + perf->debugfs_node_dir, + &queue_depth); + if (!perf->debugfs_queue_depth) + return -ENODEV; + + perf->debugfs_transfer_size_order = debugfs_create_u32("transfer_size_order", S_IRUSR | S_IWUSR, + perf->debugfs_node_dir, + &seg_order); + if (!perf->debugfs_transfer_size_order) + return -ENODEV; + + perf->debugfs_total_size_order = debugfs_create_u32("total_size_order", S_IRUSR | S_IWUSR, + perf->debugfs_node_dir, + &run_order); + if (!perf->debugfs_total_size_order) + return -ENODEV; + + for (i = 0; i < MAX_THREADS; i++) { + struct pthr_ctx *pctx = &perf->pthr_ctx[i]; + sprintf(temp_name, "thread_%d", i); + + pctx->debugfs_thr_dir = debugfs_create_dir(temp_name, perf->debugfs_node_dir); + if (!pctx->debugfs_thr_dir) + return -ENODEV; + + pctx->debugfs_copied = debugfs_create_u64("copied", S_IRUSR, + pctx->debugfs_thr_dir, + &pctx->copied); + if (!pctx->debugfs_copied) + return -ENODEV; + + pctx->debugfs_elapsed_time = debugfs_create_u64("elapsed_time", S_IRUSR, + pctx->debugfs_thr_dir, + &pctx->elapsed_time); + if (!pctx->debugfs_elapsed_time) + return -ENODEV; + } + + return 0; +} + +static int perf_probe(void) +{ + struct perf_ctx *perf; + int rc = 0; + + perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, 0); + if (!perf) { + rc = -ENOMEM; + goto err_perf; + } + + perf->numa_nodes = num_online_nodes(); + perf->perf_threads = 1; + atomic_set(&perf->tsync, 0); + perf->run = false; + spin_lock_init(&perf->db_lock); + + if (debugfs_initialized() && !perf_debugfs_dir) { + perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL); + if (!perf_debugfs_dir) + goto err_ctx; + + rc = perf_debugfs_setup(perf); + if (rc) + goto err_ctx; + } + + g_perf = perf; + return 0; + +err_ctx: + kfree(perf); +err_perf: + return rc; +} + +static void perf_remove(void) +{ + int i; + struct perf_ctx *perf = g_perf; + + if (perf_debugfs_dir) { + debugfs_remove_recursive(perf_debugfs_dir); + perf_debugfs_dir = NULL; + } + + for (i = 0; i < MAX_THREADS; i++) { + struct pthr_ctx *pctx = &perf->pthr_ctx[i]; + if (pctx->dma_chan) + dma_release_channel(pctx->dma_chan); + perf_free_mw(pctx); + } + + kfree(perf); +} + +static int __init perf_init_module(void) +{ + printk("DMA Performance Test Init\n"); + return perf_probe(); +} +module_init(perf_init_module); + +static void __exit perf_exit_module(void) +{ + printk("DMA Performance Test Exit\n"); + perf_remove(); +} +module_exit(perf_exit_module);