ioat: add ioat kernel driver performance test harness

For the purpose to make performance comparison between the ioat
kernel driver and user space driver, we added the kernel driver
test harness here, all the workload executed in the kernel space
and controlled via sysfs.

Change-Id: I2c8d826283405a5e1c9ba6a033503bcb98541370
Signed-off-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Changpeng Liu 2015-12-16 14:27:50 +08:00
parent fceb072b09
commit 0e6463d9b3
7 changed files with 1128 additions and 1 deletions

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(CURDIR)/../..
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
DIRS-y += perf verify
DIRS-y += perf verify kperf
.PHONY: all clean $(DIRS-y)

1
examples/ioat/kperf/.gitignore vendored Normal file
View File

@ -0,0 +1 @@
ioat_kperf

View File

@ -0,0 +1,53 @@
#
# BSD LICENSE
#
# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
SPDK_ROOT_DIR := $(CURDIR)/../../..
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
APP = ioat_kperf
C_SRCS := ioat_kperf.c
CFLAGS += -I.
LIBS += -lrt
all: $(APP)
$(APP): $(OBJS)
$(LINK_C)
clean:
$(CLEAN_C) $(APP)
include $(SPDK_ROOT_DIR)/mk/spdk.deps.mk

View File

@ -0,0 +1,42 @@
IOAT Kernel Driver Test Tool
============================
For the purpose to make performance comparison with user space IOAT
driver, we developed the test tool based on IOAT kernel driver, the
test tool contains 2 components: kernel test module and user space
application. The kernel test module allocated one kernel thread for
each DMA channel, and the kernel thread did not pin to specified
CPU cores, but ensure all the thread run in the same NUMA socket
with DMA channel, and the user space application communicated
with kernel test module via sysfs interface.
Building & Usage
================
1. Compile and load the kernel test module first.
modprobe -v ioatdma
cd kmod && make && insmod dmaperf.ko
2. Run the test application.
Parameters:
[-h usage]
[-n number of DMA channels]
[-q queue depth, per DMA channel]
[-s [n^2] transfer size, per descriptor]
[-t total [n^2] data to tranfer, per DMA channel]
For example: ./ioat_kperf -n 4 -q 128 -s 12 -t 32
Total 4 Channels, Queue_Depth 128, Transfer Size 4096 Bytes, Total Transfer Size 4 GB
Running I/O . . . .
Channel 0 Performance Data 1414 MB/s
Channel 1 Performance Data 1413 MB/s
Channel 2 Performance Data 1413 MB/s
Channel 3 Performance Data 1415 MB/s
OS Support
==========
We have tested several Linux distributions, currently Fedora 21/22 with kernel
version >= 3.17 are supported.

View File

@ -0,0 +1,320 @@
/*-
* BSD LICENSE
*
* Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include <stdio.h>
#include <unistd.h>
#include <stdlib.h>
#include <string.h>
#include <inttypes.h>
#include <assert.h>
static int
check_modules(char *driver_name)
{
FILE *fd;
const char *proc_modules = "/proc/modules";
char buffer[256];
fd = fopen(proc_modules, "r");
if (!fd)
return -1;
while (fgets(buffer, sizeof(buffer), fd)) {
if (strstr(buffer, driver_name) == NULL)
continue;
else {
fclose(fd);
return 0;
}
}
fclose(fd);
return -1;
}
static int
get_u32_from_file(const char *sysfs_file, uint32_t *value)
{
FILE *f;
char buf[BUFSIZ];
f = fopen(sysfs_file, "r");
if (f == NULL) {
return -1;
}
if (fgets(buf, sizeof(buf), f) != NULL) {
*value = strtoul(buf, NULL, 10);
}
fclose(f);
return 0;
}
static int
get_str_from_file(const char *sysfs_file, char *buf, int len)
{
FILE *f;
f = fopen(sysfs_file, "r");
if (f == NULL) {
return -1;
}
if (fgets(buf, len, f) != NULL) {
fclose(f);
return 0;
}
fclose(f);
return -1;
}
static int
put_u32_to_file(const char *sysfs_file, uint32_t value)
{
FILE *f;
int n;
char buf[BUFSIZ];
f = fopen(sysfs_file, "w");
if (f == NULL) {
return -1;
}
n = snprintf(buf, sizeof(buf), "%ul", value);
if ((n < 0) || (n >= (int)sizeof(buf))) {
fclose(f);
return -1;
}
if (fwrite(buf, n, 1, f) == 0) {
fclose(f);
return -1;
}
fclose(f);
return 0;
}
static int
get_u64_from_file(const char *sysfs_file, uint64_t *value)
{
FILE *f;
char buf[BUFSIZ];
f = fopen(sysfs_file, "r");
if (f == NULL) {
return -1;
}
if (fgets(buf, sizeof(buf), f) != NULL) {
*value = strtoull(buf, NULL, 10);
}
fclose(f);
return 0;
}
static void
usage(char *program_name)
{
printf("%s options\n", program_name);
printf("\t[-h usage]\n");
printf("\t[-n number of DMA channels]\n");
printf("\t[-q queue depth, per DMA channel]\n");
printf("\t[-s [n^2] transfer size, per descriptor]\n");
printf("\t[-t total [n^2] data to tranfer, per DMA channel]\n");
}
int main(int argc, char *argv[])
{
int op;
int rc;
char buf[BUFSIZ];
uint32_t i, threads = 0;
uint32_t ring_size, queue_depth = 0;
uint32_t transfer_size, order = 0;
uint64_t total_size, copied = 0;
uint64_t elapsed_time = 0;
char channel[1024];
if (check_modules("ioatdma")) {
fprintf(stderr, "Ioat driver not loaded,"
" run `modprove -v ioatdma` first\n");
return -1;
}
if (check_modules("dmaperf")) {
fprintf(stderr, "Kernel Ioat test driver not loaded,"
" run `insmod dmaperf.ko` in the kmod directory\n");
return -1;
}
rc = get_u32_from_file("/sys/module/ioatdma/parameters/ioat_ring_alloc_order",
&order);
if (rc < 0) {
fprintf(stderr, "Cannot get default ioat queue depth\n");
return -1;
}
ring_size = 1UL << order;
while ((op = getopt(argc, argv, "h:n:q:s:t:")) != -1) {
switch (op) {
case 'n':
threads = atoi(optarg);
rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/threads", threads);
if (rc < 0) {
fprintf(stderr, "Cannot set dma channels\n");
return -1;
}
break;
case 'q':
queue_depth = atoi(optarg);
if (queue_depth > ring_size) {
fprintf(stderr, "Max Ioat DMA ring size %d\n", ring_size);
return -1;
}
rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/queue_depth", queue_depth);
if (rc < 0) {
fprintf(stderr, "Cannot set queue depth\n");
return -1;
}
break;
case 's':
order = atoi(optarg);
rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/transfer_size_order", order);
if (rc < 0) {
fprintf(stderr, "Cannot set descriptor transfer size order\n");
return -1;
}
break;
case 't':
order = atoi(optarg);
rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/total_size_order", order);
if (rc < 0) {
fprintf(stderr, "Cannot set channel total transfer size order\n");
return -1;
}
break;
case 'h' :
usage(argv[0]);
exit(0);
default:
usage(argv[0]);
exit(1);
}
}
/* get driver configuration */
rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/transfer_size_order",
&order);
if (rc < 0) {
fprintf(stderr, "Cannot get channel descriptor transfer size\n");
return -1;
}
transfer_size = 1UL << order;
rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/total_size_order",
&order);
if (rc < 0) {
fprintf(stderr, "Cannot get channel total transfer size\n");
return -1;
}
total_size = 1ULL << order;
rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/threads",
&threads);
if (rc < 0) {
fprintf(stderr, "Cannot get dma channel threads\n");
return -1;
}
rc = get_u32_from_file("/sys/kernel/debug/dmaperf/dmaperf/queue_depth",
&queue_depth);
if (rc < 0) {
fprintf(stderr, "Cannot get queue depth\n");
return -1;
}
fprintf(stdout,
"Total %d Channels, Queue_Depth %d, Transfer Size %d Bytes, Total Transfer Size %"PRIu64" GB\n",
threads, queue_depth, transfer_size, total_size >> 30ULL);
/* run the channels */
rc = put_u32_to_file("/sys/kernel/debug/dmaperf/dmaperf/run", 1);
if (rc < 0) {
fprintf(stderr, "Cannot run the channels\n");
return -1;
}
fprintf(stdout, "Running I/O ");
fflush(stdout);
/* wait all the channels to be idle */
while (!get_str_from_file("/sys/kernel/debug/dmaperf/dmaperf/status", buf, BUFSIZ)) {
if (strstr(buf, "idle") != NULL) {
fprintf(stdout, "\n");
fflush(stdout);
sleep(1);
break;
}
fprintf(stdout, ". ");
fflush(stdout);
sleep(1);
}
/* collect each channel performance data */
for (i = 0; i < threads; i++) {
/* total data transfer length for the DMA channel in Bytes */
sprintf(channel, "/sys/kernel/debug/dmaperf/dmaperf/thread_%u/copied", i);
rc = get_u64_from_file(channel, &copied);
if (rc < 0) {
fprintf(stderr, "Cannot get channel copied bytes\n");
return -1;
}
/* time in microseconds for total data transfer length */
sprintf(channel, "/sys/kernel/debug/dmaperf/dmaperf/thread_%u/elapsed_time", i);
rc = get_u64_from_file(channel, &elapsed_time);
if (rc < 0) {
fprintf(stderr, "Cannot get channel elapsed time\n");
return -1;
}
assert(elapsed_time != 0);
fprintf(stdout, "Channel %d Performance Data %"PRIu64" MB/s\n",
i, copied / elapsed_time);
}
return 0;
}

View File

@ -0,0 +1,42 @@
#
# BSD LICENSE
#
# Copyright(c) 2010-2015 Intel Corporation. All rights reserved.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
#
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in
# the documentation and/or other materials provided with the
# distribution.
# * Neither the name of Intel Corporation nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#
obj-m := dmaperf.o
dmaperf-y := dma_perf.o
KDIR := /lib/modules/$(shell uname -r)/build
all:
make -C $(KDIR) M=$(shell pwd) modules
clean:
make -C $(KDIR) M=$(shell pwd) clean

View File

@ -0,0 +1,669 @@
/*
* This file is provided under a dual BSD/GPLv2 license. When using or
* redistributing this file, you may do so under either license.
*
* GPL LICENSE SUMMARY
*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of version 2 of the GNU General Public License as
* published by the Free Software Foundation.
*
* BSD LICENSE
*
* Copyright(c) 2015 Intel Corporation. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copy
* notice, this list of conditions and the following disclaimer in
* the documentation and/or other materials provided with the
* distribution.
* * Neither the name of Intel Corporation nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* PCIe DMA Perf Linux driver
*/
#include <linux/init.h>
#include <linux/kernel.h>
#include <linux/wait.h>
#include <linux/module.h>
#include <linux/kthread.h>
#include <linux/time.h>
#include <linux/timer.h>
#include <linux/dma-mapping.h>
#include <linux/pci.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/debugfs.h>
#include <linux/dmaengine.h>
#include <linux/delay.h>
#include <linux/printk.h>
#include <linux/nodemask.h>
#define DRIVER_NAME "dma_perf"
#define DRIVER_DESCRIPTION "PCIe DMA Performance Measurement Tool"
#define DRIVER_LICENSE "Dual BSD/GPL"
#define DRIVER_VERSION "1.0"
#define DRIVER_AUTHOR "Dave Jiang <dave.jiang@intel.com>"
#define MAX_THREADS 32
#define MAX_TEST_SIZE 1024 * 1024 /* 1M */
#define DMA_CHANNELS_PER_NODE 8
MODULE_LICENSE(DRIVER_LICENSE);
MODULE_VERSION(DRIVER_VERSION);
MODULE_AUTHOR(DRIVER_AUTHOR);
MODULE_AUTHOR("Changpeng Liu <changpeng.liu@intel.com>");
MODULE_DESCRIPTION(DRIVER_DESCRIPTION);
static struct dentry *perf_debugfs_dir;
static struct perf_ctx *g_perf = NULL;
static unsigned int seg_order = 12; /* 4K */
static unsigned int queue_depth = 256;
static unsigned int run_order = 32; /* 4G */
struct perf_mw {
size_t buf_size;
void *virt_addr;
};
struct perf_ctx;
struct pthr_ctx {
struct dentry *debugfs_thr_dir;
struct dentry *debugfs_copied;
struct dentry *debugfs_elapsed_time;
struct device *dev;
int node;
wait_queue_head_t wq;
struct perf_mw mw;
struct task_struct *thread;
struct perf_ctx *perf;
atomic_t dma_sync;
struct dma_chan *dma_chan;
int dma_up;
int dma_down;
int dma_prep_err;
u64 copied;
u64 elapsed_time;
};
struct perf_ctx {
spinlock_t db_lock;
struct dentry *debugfs_node_dir;
struct dentry *debugfs_run;
struct dentry *debugfs_threads;
struct dentry *debugfs_queue_depth;
struct dentry *debugfs_transfer_size_order;
struct dentry *debugfs_total_size_order;
struct dentry *debugfs_status;
u8 numa_nodes;
u8 perf_threads;
bool run;
struct pthr_ctx pthr_ctx[MAX_THREADS];
atomic_t tsync;
};
static void perf_free_mw(struct pthr_ctx *pctx);
static int perf_set_mw(struct pthr_ctx *pctx, size_t size);
static void perf_copy_callback(void *data)
{
struct pthr_ctx *pctx = data;
atomic_dec(&pctx->dma_sync);
pctx->dma_down++;
wake_up(&pctx->wq);
}
static ssize_t perf_copy(struct pthr_ctx *pctx, char *dst,
char *src, size_t size)
{
struct dma_async_tx_descriptor *txd;
struct dma_chan *chan = pctx->dma_chan;
struct dma_device *device;
struct dmaengine_unmap_data *unmap;
dma_cookie_t cookie;
size_t src_off, dst_off;
int retries = 0;
if (!chan) {
printk("DMA engine does not exist\n");
return -EINVAL;
}
device = chan->device;
src_off = (size_t)src & ~PAGE_MASK;
dst_off = (size_t)dst & ~PAGE_MASK;
if (!is_dma_copy_aligned(device, src_off, dst_off, size))
return -ENODEV;
unmap = dmaengine_get_unmap_data(device->dev, 2, GFP_NOWAIT);
if (!unmap)
return -ENOMEM;
unmap->len = size;
unmap->addr[0] = dma_map_page(device->dev, virt_to_page(src),
src_off, size, DMA_TO_DEVICE);
if (dma_mapping_error(device->dev, unmap->addr[0]))
goto err_get_unmap;
unmap->to_cnt = 1;
unmap->addr[1] = dma_map_page(device->dev, virt_to_page(dst),
dst_off, size, DMA_FROM_DEVICE);
if (dma_mapping_error(device->dev, unmap->addr[1]))
goto err_get_unmap;
unmap->from_cnt = 1;
dma_prep_retry:
txd = device->device_prep_dma_memcpy(chan, unmap->addr[1],
unmap->addr[0],
size, DMA_PREP_INTERRUPT);
if (!txd) {
if (retries++ > 20) {
pctx->dma_prep_err++;
goto err_get_unmap;
} else {
set_current_state(TASK_INTERRUPTIBLE);
schedule_timeout(50);
goto dma_prep_retry;
}
}
txd->callback = perf_copy_callback;
txd->callback_param = pctx;
dma_set_unmap(txd, unmap);
cookie = dmaengine_submit(txd);
if (dma_submit_error(cookie))
goto err_set_unmap;
atomic_inc(&pctx->dma_sync);
pctx->dma_up++;
dma_async_issue_pending(chan);
return size;
err_set_unmap:
dmaengine_unmap_put(unmap);
err_get_unmap:
dmaengine_unmap_put(unmap);
return 0;
}
static int perf_move_data(struct pthr_ctx *pctx, char *dst, char *src,
u64 buf_size, u64 win_size, u64 total)
{
int chunks, total_chunks, i;
int copied_chunks = 0;
u64 result;
char *tmp = dst;
u64 perf, diff_us;
ktime_t kstart, kstop, kdiff;
chunks = win_size / buf_size;
total_chunks = total / buf_size;
printk("%s: chunks: %d total_chunks: %d\n", current->comm, chunks, total_chunks);
kstart = ktime_get();
for (i = 0; i < total_chunks; i++) {
wait_event_interruptible(pctx->wq, atomic_read(&pctx->dma_sync) < queue_depth);
result = perf_copy(pctx, tmp, src, buf_size);
pctx->copied += result;
copied_chunks++;
if (copied_chunks == chunks) {
tmp = dst;
copied_chunks = 0;
} else
tmp += buf_size;
}
printk("%s: All DMA descriptors submitted\n", current->comm);
/* FIXME: need a timeout here eventually */
while (atomic_read(&pctx->dma_sync) != 0)
msleep(1);
pr_info("%s: dma_up: %d dma_down: %d dma_prep_err: %d\n",
current->comm, pctx->dma_up, pctx->dma_down,
pctx->dma_prep_err);
kstop = ktime_get();
kdiff = ktime_sub(kstop, kstart);
diff_us = ktime_to_us(kdiff);
pr_info("%s: copied %Lu bytes\n", current->comm, pctx->copied);
pr_info("%s: lasted %Lu usecs\n", current->comm, diff_us);
perf = pctx->copied / diff_us;
pr_info("%s: MBytes/s: %Lu\n", current->comm, perf);
pctx->elapsed_time = diff_us;
return 0;
}
static bool perf_dma_filter_fn(struct dma_chan *chan, void *node)
{
return dev_to_node(&chan->dev->device) == (int)(unsigned long)node;
}
static int dma_perf_thread(void *data)
{
struct pthr_ctx *pctx = data;
struct perf_ctx *perf = pctx->perf;
struct perf_mw *mw = &pctx->mw;
char *dst;
u64 win_size, buf_size, total;
void *src;
int rc, node;
struct dma_chan *dma_chan = NULL;
pr_info("kthread %s starting...\n", current->comm);
node = pctx->node;
if (!pctx->dma_chan) {
dma_cap_mask_t dma_mask;
dma_cap_zero(dma_mask);
dma_cap_set(DMA_MEMCPY, dma_mask);
dma_chan = dma_request_channel(dma_mask, perf_dma_filter_fn,
(void *)(unsigned long)node);
if (!dma_chan) {
pr_warn("%s: cannot acquire DMA channel, quitting\n",
current->comm);
return -ENODEV;
}
pctx->dma_chan = dma_chan;
pctx->dev = dma_chan->device->dev;
}
src = kmalloc_node(MAX_TEST_SIZE, GFP_KERNEL, node);
if (!src) {
rc = -ENOMEM;
goto err;
}
rc = perf_set_mw(pctx, MAX_TEST_SIZE);
if (rc < 0) {
pr_err("%s: set mw failed\n", current->comm);
rc = -ENXIO;
goto err;
}
win_size = mw->buf_size;
buf_size = 1ULL << seg_order;
total = 1ULL << run_order;
if (buf_size > MAX_TEST_SIZE)
buf_size = MAX_TEST_SIZE;
dst = (char *)mw->virt_addr;
atomic_inc(&perf->tsync);
while (atomic_read(&perf->tsync) != perf->perf_threads)
schedule();
rc = perf_move_data(pctx, dst, src, buf_size, win_size, total);
atomic_dec(&perf->tsync);
if (rc < 0) {
pr_err("%s: failed\n", current->comm);
rc = -ENXIO;
goto err;
}
return 0;
err:
if (src)
kfree(src);
if (dma_chan) {
dma_release_channel(dma_chan);
pctx->dma_chan = NULL;
}
return rc;
}
static void perf_free_mw(struct pthr_ctx *pctx)
{
struct perf_mw *mw = &pctx->mw;
if (!mw->virt_addr)
return;
kfree(mw->virt_addr);
mw->buf_size = 0;
mw->virt_addr = NULL;
}
static int perf_set_mw(struct pthr_ctx *pctx, size_t size)
{
struct perf_mw *mw = &pctx->mw;
if (!size)
return -EINVAL;
mw->buf_size = size;
mw->virt_addr = kmalloc_node(size, GFP_KERNEL, pctx->node);
if (!mw->virt_addr) {
mw->buf_size = 0;
return -EINVAL;
}
return 0;
}
static ssize_t debugfs_run_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *offp)
{
struct perf_ctx *perf = filp->private_data;
char *buf;
ssize_t ret, out_offset;
if (!perf)
return 0;
buf = kmalloc(64, GFP_KERNEL);
out_offset = snprintf(buf, 64, "%d\n", perf->run);
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
kfree(buf);
return ret;
}
static ssize_t debugfs_run_write(struct file *filp, const char __user *ubuf,
size_t count, loff_t *offp)
{
struct perf_ctx *perf = filp->private_data;
int node, i;
if (perf->perf_threads == 0)
return 0;
if (atomic_read(&perf->tsync) == 0)
perf->run = false;
if (perf->run == true) {
/* lets stop the threads */
perf->run = false;
for (i = 0; i < MAX_THREADS; i++) {
if (perf->pthr_ctx[i].thread) {
kthread_stop(perf->pthr_ctx[i].thread);
perf->pthr_ctx[i].thread = NULL;
} else
break;
}
} else {
perf->run = true;
if (perf->perf_threads > MAX_THREADS) {
perf->perf_threads = MAX_THREADS;
pr_info("Reset total threads to: %u\n", MAX_THREADS);
}
/* no greater than 1M */
if (seg_order > 20) {
seg_order = 20;
pr_info("Fix seg_order to %u\n", seg_order);
}
if (run_order < seg_order) {
run_order = seg_order;
pr_info("Fix run_order to %u\n", run_order);
}
/* launch kernel thread */
for (i = 0; i < perf->perf_threads; i++) {
struct pthr_ctx *pctx;
pctx = &perf->pthr_ctx[i];
atomic_set(&pctx->dma_sync, 0);
pctx->perf = perf;
pctx->elapsed_time = 0;
pctx->copied = 0;
init_waitqueue_head(&pctx->wq);
/* NUMA socket node */
pctx->node = i / DMA_CHANNELS_PER_NODE;
node = pctx->node;
pctx->thread =
kthread_create_on_node(dma_perf_thread,
(void *)pctx,
node, "dma_perf %d", i);
if (pctx->thread)
wake_up_process(pctx->thread);
else {
perf->run = false;
for (i = 0; i < MAX_THREADS; i++) {
if (pctx->thread) {
kthread_stop(pctx->thread);
pctx->thread = NULL;
} else
break;
}
}
if (perf->run == false)
return -ENXIO;
}
}
return count;
}
static const struct file_operations dma_perf_debugfs_run = {
.owner = THIS_MODULE,
.open = simple_open,
.read = debugfs_run_read,
.write = debugfs_run_write,
};
static ssize_t debugfs_status_read(struct file *filp, char __user *ubuf,
size_t count, loff_t *offp)
{
struct perf_ctx *perf = filp->private_data;
char *buf;
ssize_t ret, out_offset;
if (!perf)
return 0;
buf = kmalloc(64, GFP_KERNEL);
out_offset = snprintf(buf, 64, "%s\n", atomic_read(&perf->tsync) ? "running" : "idle");
ret = simple_read_from_buffer(ubuf, count, offp, buf, out_offset);
kfree(buf);
return ret;
}
static const struct file_operations dma_perf_debugfs_status = {
.owner = THIS_MODULE,
.open = simple_open,
.read = debugfs_status_read,
};
static int perf_debugfs_setup(struct perf_ctx *perf)
{
int i;
char temp_name[64];
if (!perf_debugfs_dir)
return -ENODEV;
perf->debugfs_node_dir = debugfs_create_dir("dmaperf",
perf_debugfs_dir);
if (!perf->debugfs_node_dir)
return -ENODEV;
perf->debugfs_run = debugfs_create_file("run", S_IRUSR | S_IWUSR,
perf->debugfs_node_dir, perf,
&dma_perf_debugfs_run);
if (!perf->debugfs_run)
return -ENODEV;
perf->debugfs_status = debugfs_create_file("status", S_IRUSR,
perf->debugfs_node_dir, perf,
&dma_perf_debugfs_status);
if (!perf->debugfs_status)
return -ENODEV;
perf->debugfs_threads = debugfs_create_u8("threads", S_IRUSR | S_IWUSR,
perf->debugfs_node_dir,
&perf->perf_threads);
if (!perf->debugfs_threads)
return -ENODEV;
perf->debugfs_queue_depth = debugfs_create_u32("queue_depth", S_IRUSR | S_IWUSR,
perf->debugfs_node_dir,
&queue_depth);
if (!perf->debugfs_queue_depth)
return -ENODEV;
perf->debugfs_transfer_size_order = debugfs_create_u32("transfer_size_order", S_IRUSR | S_IWUSR,
perf->debugfs_node_dir,
&seg_order);
if (!perf->debugfs_transfer_size_order)
return -ENODEV;
perf->debugfs_total_size_order = debugfs_create_u32("total_size_order", S_IRUSR | S_IWUSR,
perf->debugfs_node_dir,
&run_order);
if (!perf->debugfs_total_size_order)
return -ENODEV;
for (i = 0; i < MAX_THREADS; i++) {
struct pthr_ctx *pctx = &perf->pthr_ctx[i];
sprintf(temp_name, "thread_%d", i);
pctx->debugfs_thr_dir = debugfs_create_dir(temp_name, perf->debugfs_node_dir);
if (!pctx->debugfs_thr_dir)
return -ENODEV;
pctx->debugfs_copied = debugfs_create_u64("copied", S_IRUSR,
pctx->debugfs_thr_dir,
&pctx->copied);
if (!pctx->debugfs_copied)
return -ENODEV;
pctx->debugfs_elapsed_time = debugfs_create_u64("elapsed_time", S_IRUSR,
pctx->debugfs_thr_dir,
&pctx->elapsed_time);
if (!pctx->debugfs_elapsed_time)
return -ENODEV;
}
return 0;
}
static int perf_probe(void)
{
struct perf_ctx *perf;
int rc = 0;
perf = kzalloc_node(sizeof(*perf), GFP_KERNEL, 0);
if (!perf) {
rc = -ENOMEM;
goto err_perf;
}
perf->numa_nodes = num_online_nodes();
perf->perf_threads = 1;
atomic_set(&perf->tsync, 0);
perf->run = false;
spin_lock_init(&perf->db_lock);
if (debugfs_initialized() && !perf_debugfs_dir) {
perf_debugfs_dir = debugfs_create_dir(KBUILD_MODNAME, NULL);
if (!perf_debugfs_dir)
goto err_ctx;
rc = perf_debugfs_setup(perf);
if (rc)
goto err_ctx;
}
g_perf = perf;
return 0;
err_ctx:
kfree(perf);
err_perf:
return rc;
}
static void perf_remove(void)
{
int i;
struct perf_ctx *perf = g_perf;
if (perf_debugfs_dir) {
debugfs_remove_recursive(perf_debugfs_dir);
perf_debugfs_dir = NULL;
}
for (i = 0; i < MAX_THREADS; i++) {
struct pthr_ctx *pctx = &perf->pthr_ctx[i];
if (pctx->dma_chan)
dma_release_channel(pctx->dma_chan);
perf_free_mw(pctx);
}
kfree(perf);
}
static int __init perf_init_module(void)
{
printk("DMA Performance Test Init\n");
return perf_probe();
}
module_init(perf_init_module);
static void __exit perf_exit_module(void)
{
printk("DMA Performance Test Exit\n");
perf_remove();
}
module_exit(perf_exit_module);