test/trace: add performance test cases

This test case shall be used to measure the trace overhead.

Example command to run the performance test case.

echo "trace_perf_autotest" | ./build/app/test/dpdk-test -c 0x3 --trace=.*

Signed-off-by: Jerin Jacob <jerinj@marvell.com>
Acked-by: David Marchand <david.marchand@redhat.com>
This commit is contained in:
Jerin Jacob 2020-04-23 00:33:44 +05:30 committed by David Marchand
parent 9247e71dfb
commit 16a277a24c
3 changed files with 186 additions and 0 deletions

View File

@ -155,6 +155,7 @@ SRCS-y += test_version.c
SRCS-y += test_func_reentrancy.c
SRCS-y += test_trace.c
SRCS-y += test_trace_register.c
SRCS-y += test_trace_perf.c
SRCS-y += test_service_cores.c
ifeq ($(CONFIG_RTE_LIBRTE_PMD_RING),y)

View File

@ -129,6 +129,7 @@ test_sources = files('commands.c',
'test_ticketlock.c',
'test_trace.c',
'test_trace_register.c',
'test_trace_perf.c',
'test_version.c',
'virtual_pmd.c'
)
@ -283,6 +284,7 @@ perf_test_names = [
'rand_perf_autotest',
'hash_readwrite_perf_autotest',
'hash_readwrite_lf_perf_autotest',
'trace_perf_autotest',
]
driver_test_names = [

183
app/test/test_trace_perf.c Normal file
View File

@ -0,0 +1,183 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(C) 2020 Marvell International Ltd.
*/
#include <rte_cycles.h>
#include <rte_debug.h>
#include <rte_eal.h>
#include <rte_eal_trace.h>
#include <rte_malloc.h>
#include <rte_lcore.h>
#include "test.h"
#include "test_trace.h"
struct test_data;
struct lcore_data {
volatile bool done;
volatile bool started;
uint64_t total_cycles;
uint64_t total_calls;
} __rte_cache_aligned;
struct test_data {
unsigned int nb_workers;
struct lcore_data ldata[];
} __rte_cache_aligned;
#define STEP 100
#define CENT_OPS(OP) do { \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
OP; OP; OP; OP; OP; OP; OP; OP; OP; OP; \
} while (0)
static void
measure_perf(const char *str, struct test_data *data)
{
uint64_t hz = rte_get_timer_hz();
uint64_t total_cycles = 0;
uint64_t total_calls = 0;
double cycles, ns;
unsigned int workers;
for (workers = 0; workers < data->nb_workers; workers++) {
total_cycles += data->ldata[workers].total_cycles;
total_calls += data->ldata[workers].total_calls;
}
cycles = total_calls ? (double)total_cycles / (double)total_calls : 0;
cycles /= STEP;
cycles /= 100; /* CENT_OPS */
ns = (cycles / (double)hz) * 1E9;
printf("%16s: cycles=%f ns=%f\n", str, cycles, ns);
}
static void
wait_till_workers_are_ready(struct test_data *data)
{
unsigned int workers;
for (workers = 0; workers < data->nb_workers; workers++)
while (!data->ldata[workers].started)
rte_pause();
}
static void
signal_workers_to_finish(struct test_data *data)
{
unsigned int workers;
for (workers = 0; workers < data->nb_workers; workers++) {
data->ldata[workers].done = 1;
rte_smp_wmb();
}
}
#define WORKER_DEFINE(func) \
static void __rte_noinline \
__worker_##func(struct lcore_data *ldata) \
{ \
uint64_t start; \
int i; \
while (!ldata->done) { \
start = rte_get_timer_cycles(); \
for (i = 0; i < STEP; i++) \
CENT_OPS(func); \
ldata->total_cycles += rte_get_timer_cycles() - start; \
ldata->total_calls++; \
} \
} \
static int \
worker_fn_##func(void *arg) \
{ \
struct lcore_data *ldata = arg; \
ldata->started = 1; \
rte_smp_wmb(); \
__worker_##func(ldata); \
return 0; \
}
/* Test to find trace overhead */
#define GENERIC_VOID rte_eal_trace_generic_void()
#define GENERIC_U64 rte_eal_trace_generic_u64(0x120000)
#define GENERIC_INT rte_eal_trace_generic_int(-34)
#define GENERIC_FLOAT rte_eal_trace_generic_float(3.3f)
#define GENERIC_DOUBLE rte_eal_trace_generic_double(3.66666)
#define GENERIC_STR rte_eal_trace_generic_str("hello world")
#define VOID_FP app_dpdk_test_fp()
WORKER_DEFINE(GENERIC_VOID)
WORKER_DEFINE(GENERIC_U64)
WORKER_DEFINE(GENERIC_INT)
WORKER_DEFINE(GENERIC_FLOAT)
WORKER_DEFINE(GENERIC_DOUBLE)
WORKER_DEFINE(GENERIC_STR)
WORKER_DEFINE(VOID_FP)
static void
run_test(const char *str, lcore_function_t f, struct test_data *data, size_t sz)
{
unsigned int id, worker = 0;
memset(data, 0, sz);
data->nb_workers = rte_lcore_count() - 1;
RTE_LCORE_FOREACH_SLAVE(id)
rte_eal_remote_launch(f, &data->ldata[worker++], id);
wait_till_workers_are_ready(data);
rte_delay_ms(100); /* Wait for some time to accumulate the stats */
measure_perf(str, data);
signal_workers_to_finish(data);
RTE_LCORE_FOREACH_SLAVE(id)
rte_eal_wait_lcore(id);
}
static int
test_trace_perf(void)
{
unsigned int nb_cores, nb_workers;
struct test_data *data;
size_t sz;
nb_cores = rte_lcore_count();
nb_workers = nb_cores - 1;
if (nb_cores < 2) {
printf("Need minimum two cores for testing\n");
return TEST_SKIPPED;
}
printf("Timer running at %5.2fMHz\n", rte_get_timer_hz()/1E6);
sz = sizeof(struct test_data);
sz += nb_workers * sizeof(struct lcore_data);
data = rte_zmalloc(NULL, sz, RTE_CACHE_LINE_SIZE);
if (data == NULL) {
printf("Failed to allocate memory\n");
return TEST_FAILED;
}
run_test("void", worker_fn_GENERIC_VOID, data, sz);
run_test("u64", worker_fn_GENERIC_U64, data, sz);
run_test("int", worker_fn_GENERIC_INT, data, sz);
run_test("float", worker_fn_GENERIC_FLOAT, data, sz);
run_test("double", worker_fn_GENERIC_DOUBLE, data, sz);
run_test("string", worker_fn_GENERIC_STR, data, sz);
run_test("void_fp", worker_fn_VOID_FP, data, sz);
rte_free(data);
return TEST_SUCCESS;
}
REGISTER_TEST_COMMAND(trace_perf_autotest, test_trace_perf);