app/compress-perf: add weak functions for multicore test

This patch adds template functions for multi-cores performance
version of compress-perf-tool

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
Acked-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Shally Verma <shallyv@marvell.com>
This commit is contained in:
Tomasz Jozwiak 2019-07-08 20:16:13 +02:00 committed by Akhil Goyal
parent 316095eb41
commit 424dd6c8c1
8 changed files with 650 additions and 406 deletions

View File

@ -12,7 +12,6 @@ CFLAGS += -O3
# all source are stored in SRCS-y
SRCS-y := main.c
SRCS-y += comp_perf_options_parse.c
SRCS-y += comp_perf_test_verify.c
SRCS-y += comp_perf_test_benchmark.c
SRCS-y += comp_perf_test_common.c
include $(RTE_SDK)/mk/rte.app.mk

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#ifndef _COMP_PERF_
#define _COMP_PERF_
#include <rte_mempool.h>
struct comp_test_data;
typedef void *(*cperf_constructor_t)(
uint8_t dev_id,
uint16_t qp_id,
struct comp_test_data *options);
typedef int (*cperf_runner_t)(void *test_ctx);
typedef void (*cperf_destructor_t)(void *test_ctx);
struct cperf_test {
cperf_constructor_t constructor;
cperf_runner_t runner;
cperf_destructor_t destructor;
};
/* Needed for weak functions*/
void *
cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
cperf_benchmark_test_destructor(void *arg __rte_unused);
int
cperf_benchmark_test_runner(void *test_ctx __rte_unused);
void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
cperf_verify_test_destructor(void *arg __rte_unused);
int
cperf_verify_test_runner(void *test_ctx __rte_unused);
#endif /* _COMP_PERF_ */

View File

@ -13,6 +13,23 @@
#define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM)
#define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO))
extern const char *cperf_test_type_strs[];
/* Cleanup state machine */
enum cleanup_st {
ST_CLEAR = 0,
ST_TEST_DATA,
ST_COMPDEV,
ST_INPUT_DATA,
ST_MEMORY_ALLOC,
ST_DURING_TEST
};
enum cperf_perf_test_type {
CPERF_TEST_TYPE_BENCHMARK,
CPERF_TEST_TYPE_VERIFY
};
enum comp_operation {
COMPRESS_ONLY,
DECOMPRESS_ONLY,
@ -30,37 +47,26 @@ struct range_list {
struct comp_test_data {
char driver_name[64];
char input_file[64];
struct rte_mbuf **comp_bufs;
struct rte_mbuf **decomp_bufs;
uint32_t total_bufs;
enum cperf_perf_test_type test;
uint8_t *input_data;
size_t input_data_sz;
uint8_t *compressed_data;
uint8_t *decompressed_data;
struct rte_mempool *comp_buf_pool;
struct rte_mempool *decomp_buf_pool;
struct rte_mempool *op_pool;
int8_t cdev_id;
uint16_t nb_qps;
uint16_t seg_sz;
uint16_t out_seg_sz;
uint16_t burst_sz;
uint32_t pool_sz;
uint32_t num_iter;
uint16_t max_sgl_segs;
enum rte_comp_huffman huffman_enc;
enum comp_operation test_op;
int window_sz;
struct range_list level;
/* Store TSC duration for all levels (including level 0) */
uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
size_t comp_data_sz;
size_t decomp_data_sz;
struct range_list level_lst;
uint8_t level;
double ratio;
double comp_gbps;
double decomp_gbps;
double comp_tsc_byte;
double decomp_tsc_byte;
enum cleanup_st cleanup;
};
int

View File

@ -468,19 +468,20 @@ parse_level(struct comp_test_data *test_data, const char *arg)
* Try parsing the argument as a range, if it fails,
* arse it as a list
*/
if (parse_range(arg, &test_data->level.min, &test_data->level.max,
&test_data->level.inc) < 0) {
ret = parse_list(arg, test_data->level.list,
&test_data->level.min,
&test_data->level.max);
if (parse_range(arg, &test_data->level_lst.min,
&test_data->level_lst.max,
&test_data->level_lst.inc) < 0) {
ret = parse_list(arg, test_data->level_lst.list,
&test_data->level_lst.min,
&test_data->level_lst.max);
if (ret < 0) {
RTE_LOG(ERR, USER1,
"Failed to parse compression level/s\n");
return -1;
}
test_data->level.count = ret;
test_data->level_lst.count = ret;
if (test_data->level.max > RTE_COMP_LEVEL_MAX) {
if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) {
RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
RTE_COMP_LEVEL_MAX);
return -1;
@ -500,7 +501,6 @@ struct long_opt_parser {
};
static struct option lgopts[] = {
{ CPERF_DRIVER_NAME, required_argument, 0, 0 },
{ CPERF_TEST_FILE, required_argument, 0, 0 },
{ CPERF_SEG_SIZE, required_argument, 0, 0 },
@ -574,7 +574,6 @@ comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
void
comp_perf_options_default(struct comp_test_data *test_data)
{
test_data->cdev_id = -1;
test_data->seg_sz = 2048;
test_data->burst_sz = 32;
test_data->pool_sz = 8192;
@ -583,9 +582,10 @@ comp_perf_options_default(struct comp_test_data *test_data)
test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
test_data->test_op = COMPRESS_DECOMPRESS;
test_data->window_sz = -1;
test_data->level.min = 1;
test_data->level.max = 9;
test_data->level.inc = 1;
test_data->level_lst.min = 1;
test_data->level_lst.max = 9;
test_data->level_lst.inc = 1;
test_data->test = CPERF_TEST_TYPE_BENCHMARK;
}
int

View File

@ -0,0 +1,284 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#include <rte_malloc.h>
#include <rte_eal.h>
#include <rte_log.h>
#include <rte_compressdev.h>
#include "comp_perf_options.h"
#include "comp_perf_test_verify.h"
#include "comp_perf_test_benchmark.h"
#include "comp_perf.h"
#include "comp_perf_test_common.h"
#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0))
int
param_range_check(uint16_t size, const struct rte_param_log2_range *range)
{
unsigned int next_size;
/* Check lower/upper bounds */
if (size < range->min)
return -1;
if (size > range->max)
return -1;
/* If range is actually only one value, size is correct */
if (range->increment == 0)
return 0;
/* Check if value is one of the supported sizes */
for (next_size = range->min; next_size <= range->max;
next_size += range->increment)
if (size == next_size)
return 0;
return -1;
}
static uint32_t
find_buf_size(uint32_t input_size)
{
uint32_t i;
/* From performance point of view the buffer size should be a
* power of 2 but also should be enough to store incompressible data
*/
/* We're looking for nearest power of 2 buffer size, which is greather
* than input_size
*/
uint32_t size =
!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
;
return i > ((UINT16_MAX + 1) >> 1)
? (uint32_t)((float)input_size * EXPANSE_RATIO)
: i;
}
void
comp_perf_free_memory(struct cperf_mem_resources *mem)
{
uint32_t i;
for (i = 0; i < mem->total_bufs; i++) {
rte_pktmbuf_free(mem->comp_bufs[i]);
rte_pktmbuf_free(mem->decomp_bufs[i]);
}
rte_free(mem->decomp_bufs);
rte_free(mem->comp_bufs);
rte_free(mem->decompressed_data);
rte_free(mem->compressed_data);
rte_mempool_free(mem->op_pool);
rte_mempool_free(mem->decomp_buf_pool);
rte_mempool_free(mem->comp_buf_pool);
}
int
comp_perf_allocate_memory(struct comp_test_data *test_data,
struct cperf_mem_resources *mem)
{
test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
/* Number of segments for input and output
* (compression and decompression)
*/
uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
test_data->seg_sz);
char pool_name[32] = "";
snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name,
total_segs,
0, 0,
test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (mem->comp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name,
total_segs,
0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (mem->decomp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
mem->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->op_pool = rte_comp_op_pool_create(pool_name,
mem->total_bufs,
0, 0, rte_socket_id());
if (mem->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
}
/*
* Compressed data might be a bit larger than input data,
* if data cannot be compressed
*/
mem->compressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz * EXPANSE_RATIO
+ MIN_COMPRESSED_BUF_SIZE, 0,
rte_socket_id());
if (mem->compressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
}
mem->decompressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz, 0,
rte_socket_id());
if (mem->decompressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
}
mem->comp_bufs = rte_zmalloc_socket(NULL,
mem->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (mem->comp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
" could not be allocated\n");
return -1;
}
mem->decomp_bufs = rte_zmalloc_socket(NULL,
mem->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (mem->decomp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
" could not be allocated\n");
return -1;
}
return 0;
}
int
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
{
uint32_t remaining_data = test_data->input_data_sz;
uint8_t *input_data_ptr = test_data->input_data;
size_t data_sz;
uint8_t *data_addr;
uint32_t i, j;
for (i = 0; i < mem->total_bufs; i++) {
/* Allocate data in input mbuf and copy data from input file */
mem->decomp_bufs[i] =
rte_pktmbuf_alloc(mem->decomp_buf_pool);
if (mem->decomp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *) rte_pktmbuf_append(
mem->decomp_bufs[i], data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
/* Already one segment in the mbuf */
uint16_t segs_per_mbuf = 1;
/* Chain mbufs if needed for input mbufs */
while (segs_per_mbuf < test_data->max_sgl_segs
&& remaining_data > 0) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(mem->decomp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
if (rte_pktmbuf_chain(mem->decomp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
segs_per_mbuf++;
}
/* Allocate data in output mbuf */
mem->comp_bufs[i] =
rte_pktmbuf_alloc(mem->comp_buf_pool);
if (mem->comp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *) rte_pktmbuf_append(
mem->comp_bufs[i],
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
/* Chain mbufs if needed for output mbufs */
for (j = 1; j < segs_per_mbuf; j++) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(mem->comp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
if (rte_pktmbuf_chain(mem->comp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
}
}
return 0;
}

View File

@ -0,0 +1,41 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#ifndef _COMP_PERF_TEST_COMMON_H_
#define _COMP_PERF_TEST_COMMON_H_
#include <stdint.h>
#include <rte_mempool.h>
struct cperf_mem_resources {
uint8_t dev_id;
uint16_t qp_id;
uint8_t lcore_id;
uint32_t total_bufs;
uint8_t *compressed_data;
uint8_t *decompressed_data;
struct rte_mbuf **comp_bufs;
struct rte_mbuf **decomp_bufs;
struct rte_mempool *comp_buf_pool;
struct rte_mempool *decomp_buf_pool;
struct rte_mempool *op_pool;
};
int
param_range_check(uint16_t size, const struct rte_param_log2_range *range);
void
comp_perf_free_memory(struct cperf_mem_resources *mem);
int
comp_perf_allocate_memory(struct comp_test_data *test_data,
struct cperf_mem_resources *mem);
int
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
#endif /* _COMP_PERF_TEST_COMMON_H_ */

View File

@ -8,56 +8,38 @@
#include <rte_compressdev.h>
#include "comp_perf_options.h"
#include "comp_perf_test_verify.h"
#include "comp_perf_test_benchmark.h"
#include "comp_perf.h"
#include "comp_perf_test_common.h"
#define NUM_MAX_XFORMS 16
#define NUM_MAX_INFLIGHT_OPS 512
#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0))
__extension__
const char *cperf_test_type_strs[] = {
[CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
[CPERF_TEST_TYPE_VERIFY] = "verify"
};
/* Cleanup state machine */
static enum cleanup_st {
ST_CLEAR = 0,
ST_TEST_DATA,
ST_COMPDEV,
ST_INPUT_DATA,
ST_MEMORY_ALLOC,
ST_PREPARE_BUF,
ST_DURING_TEST
} cleanup = ST_CLEAR;
__extension__
static const struct cperf_test cperf_testmap[] = {
[CPERF_TEST_TYPE_BENCHMARK] = {
cperf_benchmark_test_constructor,
cperf_benchmark_test_runner,
cperf_benchmark_test_destructor
},
[CPERF_TEST_TYPE_VERIFY] = {
cperf_verify_test_constructor,
cperf_verify_test_runner,
cperf_verify_test_destructor
}
};
static int
param_range_check(uint16_t size, const struct rte_param_log2_range *range)
{
unsigned int next_size;
/* Check lower/upper bounds */
if (size < range->min)
return -1;
if (size > range->max)
return -1;
/* If range is actually only one value, size is correct */
if (range->increment == 0)
return 0;
/* Check if value is one of the supported sizes */
for (next_size = range->min; next_size <= range->max;
next_size += range->increment)
if (size == next_size)
return 0;
return -1;
}
static int
comp_perf_check_capabilities(struct comp_test_data *test_data)
comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id)
{
const struct rte_compressdev_capabilities *cap;
cap = rte_compressdev_capability_get(test_data->cdev_id,
cap = rte_compressdev_capability_get(cdev_id,
RTE_COMP_ALGO_DEFLATE);
if (cap == NULL) {
@ -105,7 +87,7 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
}
/* Level 0 support */
if (test_data->level.min == 0 &&
if (test_data->level_lst.min == 0 &&
(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
RTE_LOG(ERR, USER1, "Compress device does not support "
"level 0 (no compression)\n");
@ -115,110 +97,108 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
return 0;
}
static uint32_t
find_buf_size(uint32_t input_size)
{
uint32_t i;
/* From performance point of view the buffer size should be a
* power of 2 but also should be enough to store incompressible data
*/
/* We're looking for nearest power of 2 buffer size, which is greather
* than input_size
*/
uint32_t size =
!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
;
return i > ((UINT16_MAX + 1) >> 1)
? (uint32_t)((float)input_size * EXPANSE_RATIO)
: i;
}
static int
comp_perf_allocate_memory(struct comp_test_data *test_data)
comp_perf_initialize_compressdev(struct comp_test_data *test_data,
uint8_t *enabled_cdevs)
{
uint8_t enabled_cdev_count, nb_lcores, cdev_id;
unsigned int i, j;
int ret;
test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
/* Number of segments for input and output
* (compression and decompression)
enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
if (enabled_cdev_count == 0) {
RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
test_data->driver_name);
return -EINVAL;
}
nb_lcores = rte_lcore_count() - 1;
/*
* Use fewer devices,
* if there are more available than cores.
*/
uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
test_data->seg_sz);
test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool",
total_segs,
0, 0,
test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (test_data->comp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
cleanup = ST_MEMORY_ALLOC;
test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
total_segs,
0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (test_data->decomp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
test_data->op_pool = rte_comp_op_pool_create("op_pool",
test_data->total_bufs,
0, 0, rte_socket_id());
if (test_data->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
if (enabled_cdev_count > nb_lcores) {
enabled_cdev_count = nb_lcores;
RTE_LOG(INFO, USER1,
" There's more available devices than cores!"
" The number of devices has been aligned to %d cores\n",
nb_lcores);
}
/*
* Compressed data might be a bit larger than input data,
* if data cannot be compressed
* Calculate number of needed queue pairs, based on the amount
* of available number of logical cores and compression devices.
* For instance, if there are 4 cores and 2 compression devices,
* 2 queue pairs will be set up per device.
* One queue pair per one core.
* if e.g.: there're 3 cores and 2 compression devices,
* 2 queue pairs will be set up per device but one queue pair
* will left unused in the last one device
*/
test_data->compressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz * EXPANSE_RATIO
+ MIN_COMPRESSED_BUF_SIZE, 0,
rte_socket_id());
if (test_data->compressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
test_data->nb_qps = (nb_lcores % enabled_cdev_count) ?
(nb_lcores / enabled_cdev_count) + 1 :
nb_lcores / enabled_cdev_count;
for (i = 0; i < enabled_cdev_count &&
i < RTE_COMPRESS_MAX_DEVS; i++,
nb_lcores -= test_data->nb_qps) {
cdev_id = enabled_cdevs[i];
struct rte_compressdev_info cdev_info;
uint8_t socket_id = rte_compressdev_socket_id(cdev_id);
rte_compressdev_info_get(cdev_id, &cdev_info);
if (cdev_info.max_nb_queue_pairs &&
test_data->nb_qps > cdev_info.max_nb_queue_pairs) {
RTE_LOG(ERR, USER1,
"Number of needed queue pairs is higher "
"than the maximum number of queue pairs "
"per device.\n");
RTE_LOG(ERR, USER1,
"Lower the number of cores or increase "
"the number of crypto devices\n");
return -EINVAL;
}
test_data->decompressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz, 0,
rte_socket_id());
if (test_data->decompressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
if (comp_perf_check_capabilities(test_data, cdev_id) < 0)
return -EINVAL;
/* Configure compressdev */
struct rte_compressdev_config config = {
.socket_id = socket_id,
.nb_queue_pairs = nb_lcores > test_data->nb_qps
? test_data->nb_qps : nb_lcores,
.max_nb_priv_xforms = NUM_MAX_XFORMS,
.max_nb_streams = 0
};
if (rte_compressdev_configure(cdev_id, &config) < 0) {
RTE_LOG(ERR, USER1, "Device configuration failed\n");
return -EINVAL;
}
test_data->comp_bufs = rte_zmalloc_socket(NULL,
test_data->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (test_data->comp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
" could not be allocated\n");
return -1;
for (j = 0; j < test_data->nb_qps; j++) {
ret = rte_compressdev_queue_pair_setup(cdev_id, j,
NUM_MAX_INFLIGHT_OPS, socket_id);
if (ret < 0) {
RTE_LOG(ERR, USER1,
"Failed to setup queue pair %u on compressdev %u",
j, cdev_id);
return -EINVAL;
}
}
test_data->decomp_bufs = rte_zmalloc_socket(NULL,
test_data->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (test_data->decomp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
" could not be allocated\n");
return -1;
ret = rte_compressdev_start(cdev_id);
if (ret < 0) {
RTE_LOG(ERR, USER1,
"Failed to start device %u: error %d\n",
cdev_id, ret);
return -EPERM;
}
return 0;
}
return enabled_cdev_count;
}
static int
@ -295,187 +275,18 @@ comp_perf_dump_input_data(struct comp_test_data *test_data)
return ret;
}
static int
comp_perf_initialize_compressdev(struct comp_test_data *test_data)
{
uint8_t enabled_cdev_count;
uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
if (enabled_cdev_count == 0) {
RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
test_data->driver_name);
return -EINVAL;
}
if (enabled_cdev_count > 1)
RTE_LOG(INFO, USER1,
"Only the first compress device will be used\n");
test_data->cdev_id = enabled_cdevs[0];
if (comp_perf_check_capabilities(test_data) < 0)
return -1;
/* Configure compressdev (one device, one queue pair) */
struct rte_compressdev_config config = {
.socket_id = rte_socket_id(),
.nb_queue_pairs = 1,
.max_nb_priv_xforms = NUM_MAX_XFORMS,
.max_nb_streams = 0
};
if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
RTE_LOG(ERR, USER1, "Device configuration failed\n");
return -1;
}
if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
return -1;
}
if (rte_compressdev_start(test_data->cdev_id) < 0) {
RTE_LOG(ERR, USER1, "Device could not be started\n");
return -1;
}
return 0;
}
static int
prepare_bufs(struct comp_test_data *test_data)
{
uint32_t remaining_data = test_data->input_data_sz;
uint8_t *input_data_ptr = test_data->input_data;
size_t data_sz;
uint8_t *data_addr;
uint32_t i, j;
for (i = 0; i < test_data->total_bufs; i++) {
/* Allocate data in input mbuf and copy data from input file */
test_data->decomp_bufs[i] =
rte_pktmbuf_alloc(test_data->decomp_buf_pool);
if (test_data->decomp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
cleanup = ST_PREPARE_BUF;
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *) rte_pktmbuf_append(
test_data->decomp_bufs[i], data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
/* Already one segment in the mbuf */
uint16_t segs_per_mbuf = 1;
/* Chain mbufs if needed for input mbufs */
while (segs_per_mbuf < test_data->max_sgl_segs
&& remaining_data > 0) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(test_data->decomp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
segs_per_mbuf++;
}
/* Allocate data in output mbuf */
test_data->comp_bufs[i] =
rte_pktmbuf_alloc(test_data->comp_buf_pool);
if (test_data->comp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *) rte_pktmbuf_append(
test_data->comp_bufs[i],
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
/* Chain mbufs if needed for output mbufs */
for (j = 1; j < segs_per_mbuf; j++) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(test_data->comp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
if (rte_pktmbuf_chain(test_data->comp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
}
}
return 0;
}
static void
free_bufs(struct comp_test_data *test_data)
{
uint32_t i;
for (i = 0; i < test_data->total_bufs; i++) {
rte_pktmbuf_free(test_data->comp_bufs[i]);
rte_pktmbuf_free(test_data->decomp_bufs[i]);
}
}
int
main(int argc, char **argv)
{
uint8_t level, level_idx = 0;
uint8_t level_idx = 0;
int ret, i;
struct comp_test_data *test_data;
void *ctx[RTE_MAX_LCORE] = {};
uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
int nb_compressdevs = 0;
uint16_t total_nb_qps = 0;
uint8_t cdev_id;
uint32_t lcore_id;
/* Initialise DPDK EAL */
ret = rte_eal_init(argc, argv);
@ -492,7 +303,7 @@ main(int argc, char **argv)
rte_socket_id());
ret = EXIT_SUCCESS;
cleanup = ST_TEST_DATA;
test_data->cleanup = ST_TEST_DATA;
comp_perf_options_default(test_data);
if (comp_perf_options_parse(test_data, argc, argv) < 0) {
@ -507,99 +318,112 @@ main(int argc, char **argv)
goto end;
}
if (comp_perf_initialize_compressdev(test_data) < 0) {
nb_compressdevs =
comp_perf_initialize_compressdev(test_data, enabled_cdevs);
if (nb_compressdevs < 1) {
ret = EXIT_FAILURE;
goto end;
}
cleanup = ST_COMPDEV;
test_data->cleanup = ST_COMPDEV;
if (comp_perf_dump_input_data(test_data) < 0) {
ret = EXIT_FAILURE;
goto end;
}
cleanup = ST_INPUT_DATA;
if (comp_perf_allocate_memory(test_data) < 0) {
ret = EXIT_FAILURE;
goto end;
}
test_data->cleanup = ST_INPUT_DATA;
if (prepare_bufs(test_data) < 0) {
ret = EXIT_FAILURE;
goto end;
}
if (test_data->level.inc != 0)
level = test_data->level.min;
if (test_data->level_lst.inc != 0)
test_data->level = test_data->level_lst.min;
else
level = test_data->level.list[0];
test_data->level = test_data->level_lst.list[0];
printf("App uses socket: %u\n", rte_socket_id());
printf("Driver uses socket: %u\n",
rte_compressdev_socket_id(test_data->cdev_id));
printf("Burst size = %u\n", test_data->burst_sz);
printf("File size = %zu\n", test_data->input_data_sz);
printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n",
"Level", "Comp size", "Comp ratio [%]",
"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
test_data->cleanup = ST_DURING_TEST;
total_nb_qps = nb_compressdevs * test_data->nb_qps;
cleanup = ST_DURING_TEST;
while (level <= test_data->level.max) {
i = 0;
uint8_t qp_id = 0, cdev_index = 0;
/*
* Run a first iteration, to verify compression and
* get the compression ratio for the level
*/
if (cperf_verification(test_data, level) != EXIT_SUCCESS)
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (i == total_nb_qps)
break;
/*
* Run benchmarking test
*/
if (cperf_benchmark(test_data, level) != EXIT_SUCCESS)
cdev_id = enabled_cdevs[cdev_index];
ctx[i] = cperf_testmap[test_data->test].constructor(
cdev_id, qp_id,
test_data);
if (ctx[i] == NULL) {
RTE_LOG(ERR, USER1, "Test run constructor failed\n");
goto end;
}
qp_id = (qp_id + 1) % test_data->nb_qps;
if (qp_id == 0)
cdev_index++;
i++;
}
while (test_data->level <= test_data->level_lst.max) {
i = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (i == total_nb_qps)
break;
printf("%6u%12zu%17.2f%19"PRIu64"%21.2f"
"%15.2f%21"PRIu64"%23.2f%16.2f\n",
level, test_data->comp_data_sz, test_data->ratio,
test_data->comp_tsc_duration[level],
test_data->comp_tsc_byte, test_data->comp_gbps,
test_data->decomp_tsc_duration[level],
test_data->decomp_tsc_byte, test_data->decomp_gbps);
rte_eal_remote_launch(
cperf_testmap[test_data->test].runner,
ctx[i], lcore_id);
i++;
}
i = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (test_data->level.inc != 0)
level += test_data->level.inc;
if (i == total_nb_qps)
break;
ret |= rte_eal_wait_lcore(lcore_id);
i++;
}
if (ret != EXIT_SUCCESS)
break;
if (test_data->level_lst.inc != 0)
test_data->level += test_data->level_lst.inc;
else {
if (++level_idx == test_data->level.count)
if (++level_idx == test_data->level_lst.count)
break;
level = test_data->level.list[level_idx];
test_data->level = test_data->level_lst.list[level_idx];
}
}
end:
switch (cleanup) {
switch (test_data->cleanup) {
case ST_DURING_TEST:
case ST_PREPARE_BUF:
free_bufs(test_data);
/* fallthrough */
case ST_MEMORY_ALLOC:
rte_free(test_data->decomp_bufs);
rte_free(test_data->comp_bufs);
rte_free(test_data->decompressed_data);
rte_free(test_data->compressed_data);
rte_mempool_free(test_data->op_pool);
rte_mempool_free(test_data->decomp_buf_pool);
rte_mempool_free(test_data->comp_buf_pool);
i = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (i == total_nb_qps)
break;
if (ctx[i] && cperf_testmap[test_data->test].destructor)
cperf_testmap[test_data->test].destructor(
ctx[i]);
i++;
}
/* fallthrough */
case ST_INPUT_DATA:
rte_free(test_data->input_data);
/* fallthrough */
case ST_COMPDEV:
if (test_data->cdev_id != -1)
rte_compressdev_stop(test_data->cdev_id);
for (i = 0; i < nb_compressdevs &&
i < RTE_COMPRESS_MAX_DEVS; i++)
rte_compressdev_stop(enabled_cdevs[i]);
/* fallthrough */
case ST_TEST_DATA:
rte_free(test_data);
@ -616,3 +440,44 @@ main(int argc, char **argv)
}
return ret;
}
__rte_weak void *
cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n");
return NULL;
}
__rte_weak void
cperf_benchmark_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
cperf_benchmark_test_runner(void *test_ctx __rte_unused)
{
return 0;
}
__rte_weak void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
RTE_LOG(INFO, USER1, "Verify test is not supported yet\n");
return NULL;
}
__rte_weak void
cperf_verify_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
cperf_verify_test_runner(void *test_ctx __rte_unused)
{
return 0;
}

View File

@ -4,6 +4,5 @@
allow_experimental_apis = true
sources = files('comp_perf_options_parse.c',
'main.c',
'comp_perf_test_verify.c',
'comp_perf_test_benchmark.c')
'comp_perf_test_common.c')
deps = ['compressdev']