app/compress-perf: add weak functions for multicore test

This patch adds template functions for multi-cores performance
version of compress-perf-tool

Signed-off-by: Tomasz Jozwiak <tomaszx.jozwiak@intel.com>
Acked-by: Artur Trybula <arturx.trybula@intel.com>
Acked-by: Shally Verma <shallyv@marvell.com>
This commit is contained in:
Tomasz Jozwiak 2019-07-08 20:16:13 +02:00 committed by Akhil Goyal
parent 316095eb41
commit 424dd6c8c1
8 changed files with 650 additions and 406 deletions

View File

@ -12,7 +12,6 @@ CFLAGS += -O3
# all source are stored in SRCS-y # all source are stored in SRCS-y
SRCS-y := main.c SRCS-y := main.c
SRCS-y += comp_perf_options_parse.c SRCS-y += comp_perf_options_parse.c
SRCS-y += comp_perf_test_verify.c SRCS-y += comp_perf_test_common.c
SRCS-y += comp_perf_test_benchmark.c
include $(RTE_SDK)/mk/rte.app.mk include $(RTE_SDK)/mk/rte.app.mk

View File

@ -0,0 +1,50 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#ifndef _COMP_PERF_
#define _COMP_PERF_
#include <rte_mempool.h>
struct comp_test_data;
typedef void *(*cperf_constructor_t)(
uint8_t dev_id,
uint16_t qp_id,
struct comp_test_data *options);
typedef int (*cperf_runner_t)(void *test_ctx);
typedef void (*cperf_destructor_t)(void *test_ctx);
struct cperf_test {
cperf_constructor_t constructor;
cperf_runner_t runner;
cperf_destructor_t destructor;
};
/* Needed for weak functions*/
void *
cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
cperf_benchmark_test_destructor(void *arg __rte_unused);
int
cperf_benchmark_test_runner(void *test_ctx __rte_unused);
void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused);
void
cperf_verify_test_destructor(void *arg __rte_unused);
int
cperf_verify_test_runner(void *test_ctx __rte_unused);
#endif /* _COMP_PERF_ */

View File

@ -13,6 +13,23 @@
#define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM) #define MAX_MBUF_DATA_SIZE (UINT16_MAX - RTE_PKTMBUF_HEADROOM)
#define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO)) #define MAX_SEG_SIZE ((int)(MAX_MBUF_DATA_SIZE / EXPANSE_RATIO))
extern const char *cperf_test_type_strs[];
/* Cleanup state machine */
enum cleanup_st {
ST_CLEAR = 0,
ST_TEST_DATA,
ST_COMPDEV,
ST_INPUT_DATA,
ST_MEMORY_ALLOC,
ST_DURING_TEST
};
enum cperf_perf_test_type {
CPERF_TEST_TYPE_BENCHMARK,
CPERF_TEST_TYPE_VERIFY
};
enum comp_operation { enum comp_operation {
COMPRESS_ONLY, COMPRESS_ONLY,
DECOMPRESS_ONLY, DECOMPRESS_ONLY,
@ -30,37 +47,26 @@ struct range_list {
struct comp_test_data { struct comp_test_data {
char driver_name[64]; char driver_name[64];
char input_file[64]; char input_file[64];
struct rte_mbuf **comp_bufs; enum cperf_perf_test_type test;
struct rte_mbuf **decomp_bufs;
uint32_t total_bufs;
uint8_t *input_data; uint8_t *input_data;
size_t input_data_sz; size_t input_data_sz;
uint8_t *compressed_data; uint16_t nb_qps;
uint8_t *decompressed_data;
struct rte_mempool *comp_buf_pool;
struct rte_mempool *decomp_buf_pool;
struct rte_mempool *op_pool;
int8_t cdev_id;
uint16_t seg_sz; uint16_t seg_sz;
uint16_t out_seg_sz; uint16_t out_seg_sz;
uint16_t burst_sz; uint16_t burst_sz;
uint32_t pool_sz; uint32_t pool_sz;
uint32_t num_iter; uint32_t num_iter;
uint16_t max_sgl_segs; uint16_t max_sgl_segs;
enum rte_comp_huffman huffman_enc; enum rte_comp_huffman huffman_enc;
enum comp_operation test_op; enum comp_operation test_op;
int window_sz; int window_sz;
struct range_list level; struct range_list level_lst;
/* Store TSC duration for all levels (including level 0) */ uint8_t level;
uint64_t comp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
uint64_t decomp_tsc_duration[RTE_COMP_LEVEL_MAX + 1];
size_t comp_data_sz;
size_t decomp_data_sz;
double ratio; double ratio;
double comp_gbps; enum cleanup_st cleanup;
double decomp_gbps;
double comp_tsc_byte;
double decomp_tsc_byte;
}; };
int int

View File

@ -468,19 +468,20 @@ parse_level(struct comp_test_data *test_data, const char *arg)
* Try parsing the argument as a range, if it fails, * Try parsing the argument as a range, if it fails,
* arse it as a list * arse it as a list
*/ */
if (parse_range(arg, &test_data->level.min, &test_data->level.max, if (parse_range(arg, &test_data->level_lst.min,
&test_data->level.inc) < 0) { &test_data->level_lst.max,
ret = parse_list(arg, test_data->level.list, &test_data->level_lst.inc) < 0) {
&test_data->level.min, ret = parse_list(arg, test_data->level_lst.list,
&test_data->level.max); &test_data->level_lst.min,
&test_data->level_lst.max);
if (ret < 0) { if (ret < 0) {
RTE_LOG(ERR, USER1, RTE_LOG(ERR, USER1,
"Failed to parse compression level/s\n"); "Failed to parse compression level/s\n");
return -1; return -1;
} }
test_data->level.count = ret; test_data->level_lst.count = ret;
if (test_data->level.max > RTE_COMP_LEVEL_MAX) { if (test_data->level_lst.max > RTE_COMP_LEVEL_MAX) {
RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n", RTE_LOG(ERR, USER1, "Level cannot be higher than %u\n",
RTE_COMP_LEVEL_MAX); RTE_COMP_LEVEL_MAX);
return -1; return -1;
@ -500,7 +501,6 @@ struct long_opt_parser {
}; };
static struct option lgopts[] = { static struct option lgopts[] = {
{ CPERF_DRIVER_NAME, required_argument, 0, 0 }, { CPERF_DRIVER_NAME, required_argument, 0, 0 },
{ CPERF_TEST_FILE, required_argument, 0, 0 }, { CPERF_TEST_FILE, required_argument, 0, 0 },
{ CPERF_SEG_SIZE, required_argument, 0, 0 }, { CPERF_SEG_SIZE, required_argument, 0, 0 },
@ -574,7 +574,6 @@ comp_perf_options_parse(struct comp_test_data *test_data, int argc, char **argv)
void void
comp_perf_options_default(struct comp_test_data *test_data) comp_perf_options_default(struct comp_test_data *test_data)
{ {
test_data->cdev_id = -1;
test_data->seg_sz = 2048; test_data->seg_sz = 2048;
test_data->burst_sz = 32; test_data->burst_sz = 32;
test_data->pool_sz = 8192; test_data->pool_sz = 8192;
@ -583,9 +582,10 @@ comp_perf_options_default(struct comp_test_data *test_data)
test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC; test_data->huffman_enc = RTE_COMP_HUFFMAN_DYNAMIC;
test_data->test_op = COMPRESS_DECOMPRESS; test_data->test_op = COMPRESS_DECOMPRESS;
test_data->window_sz = -1; test_data->window_sz = -1;
test_data->level.min = 1; test_data->level_lst.min = 1;
test_data->level.max = 9; test_data->level_lst.max = 9;
test_data->level.inc = 1; test_data->level_lst.inc = 1;
test_data->test = CPERF_TEST_TYPE_BENCHMARK;
} }
int int

View File

@ -0,0 +1,284 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#include <rte_malloc.h>
#include <rte_eal.h>
#include <rte_log.h>
#include <rte_compressdev.h>
#include "comp_perf_options.h"
#include "comp_perf_test_verify.h"
#include "comp_perf_test_benchmark.h"
#include "comp_perf.h"
#include "comp_perf_test_common.h"
#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0))
int
param_range_check(uint16_t size, const struct rte_param_log2_range *range)
{
unsigned int next_size;
/* Check lower/upper bounds */
if (size < range->min)
return -1;
if (size > range->max)
return -1;
/* If range is actually only one value, size is correct */
if (range->increment == 0)
return 0;
/* Check if value is one of the supported sizes */
for (next_size = range->min; next_size <= range->max;
next_size += range->increment)
if (size == next_size)
return 0;
return -1;
}
static uint32_t
find_buf_size(uint32_t input_size)
{
uint32_t i;
/* From performance point of view the buffer size should be a
* power of 2 but also should be enough to store incompressible data
*/
/* We're looking for nearest power of 2 buffer size, which is greather
* than input_size
*/
uint32_t size =
!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
;
return i > ((UINT16_MAX + 1) >> 1)
? (uint32_t)((float)input_size * EXPANSE_RATIO)
: i;
}
void
comp_perf_free_memory(struct cperf_mem_resources *mem)
{
uint32_t i;
for (i = 0; i < mem->total_bufs; i++) {
rte_pktmbuf_free(mem->comp_bufs[i]);
rte_pktmbuf_free(mem->decomp_bufs[i]);
}
rte_free(mem->decomp_bufs);
rte_free(mem->comp_bufs);
rte_free(mem->decompressed_data);
rte_free(mem->compressed_data);
rte_mempool_free(mem->op_pool);
rte_mempool_free(mem->decomp_buf_pool);
rte_mempool_free(mem->comp_buf_pool);
}
int
comp_perf_allocate_memory(struct comp_test_data *test_data,
struct cperf_mem_resources *mem)
{
test_data->out_seg_sz = find_buf_size(test_data->seg_sz);
/* Number of segments for input and output
* (compression and decompression)
*/
uint32_t total_segs = DIV_CEIL(test_data->input_data_sz,
test_data->seg_sz);
char pool_name[32] = "";
snprintf(pool_name, sizeof(pool_name), "comp_buf_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->comp_buf_pool = rte_pktmbuf_pool_create(pool_name,
total_segs,
0, 0,
test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (mem->comp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
snprintf(pool_name, sizeof(pool_name), "decomp_buf_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->decomp_buf_pool = rte_pktmbuf_pool_create(pool_name,
total_segs,
0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (mem->decomp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
mem->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
snprintf(pool_name, sizeof(pool_name), "op_pool_%u_qp_%u",
mem->dev_id, mem->qp_id);
mem->op_pool = rte_comp_op_pool_create(pool_name,
mem->total_bufs,
0, 0, rte_socket_id());
if (mem->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
}
/*
* Compressed data might be a bit larger than input data,
* if data cannot be compressed
*/
mem->compressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz * EXPANSE_RATIO
+ MIN_COMPRESSED_BUF_SIZE, 0,
rte_socket_id());
if (mem->compressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
}
mem->decompressed_data = rte_zmalloc_socket(NULL,
test_data->input_data_sz, 0,
rte_socket_id());
if (mem->decompressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
}
mem->comp_bufs = rte_zmalloc_socket(NULL,
mem->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (mem->comp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
" could not be allocated\n");
return -1;
}
mem->decomp_bufs = rte_zmalloc_socket(NULL,
mem->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (mem->decomp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
" could not be allocated\n");
return -1;
}
return 0;
}
int
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem)
{
uint32_t remaining_data = test_data->input_data_sz;
uint8_t *input_data_ptr = test_data->input_data;
size_t data_sz;
uint8_t *data_addr;
uint32_t i, j;
for (i = 0; i < mem->total_bufs; i++) {
/* Allocate data in input mbuf and copy data from input file */
mem->decomp_bufs[i] =
rte_pktmbuf_alloc(mem->decomp_buf_pool);
if (mem->decomp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *) rte_pktmbuf_append(
mem->decomp_bufs[i], data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
/* Already one segment in the mbuf */
uint16_t segs_per_mbuf = 1;
/* Chain mbufs if needed for input mbufs */
while (segs_per_mbuf < test_data->max_sgl_segs
&& remaining_data > 0) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(mem->decomp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
if (rte_pktmbuf_chain(mem->decomp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
segs_per_mbuf++;
}
/* Allocate data in output mbuf */
mem->comp_bufs[i] =
rte_pktmbuf_alloc(mem->comp_buf_pool);
if (mem->comp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *) rte_pktmbuf_append(
mem->comp_bufs[i],
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
/* Chain mbufs if needed for output mbufs */
for (j = 1; j < segs_per_mbuf; j++) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(mem->comp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
if (rte_pktmbuf_chain(mem->comp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
}
}
return 0;
}

View File

@ -0,0 +1,41 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2019 Intel Corporation
*/
#ifndef _COMP_PERF_TEST_COMMON_H_
#define _COMP_PERF_TEST_COMMON_H_
#include <stdint.h>
#include <rte_mempool.h>
struct cperf_mem_resources {
uint8_t dev_id;
uint16_t qp_id;
uint8_t lcore_id;
uint32_t total_bufs;
uint8_t *compressed_data;
uint8_t *decompressed_data;
struct rte_mbuf **comp_bufs;
struct rte_mbuf **decomp_bufs;
struct rte_mempool *comp_buf_pool;
struct rte_mempool *decomp_buf_pool;
struct rte_mempool *op_pool;
};
int
param_range_check(uint16_t size, const struct rte_param_log2_range *range);
void
comp_perf_free_memory(struct cperf_mem_resources *mem);
int
comp_perf_allocate_memory(struct comp_test_data *test_data,
struct cperf_mem_resources *mem);
int
prepare_bufs(struct comp_test_data *test_data, struct cperf_mem_resources *mem);
#endif /* _COMP_PERF_TEST_COMMON_H_ */

View File

@ -8,56 +8,38 @@
#include <rte_compressdev.h> #include <rte_compressdev.h>
#include "comp_perf_options.h" #include "comp_perf_options.h"
#include "comp_perf_test_verify.h" #include "comp_perf.h"
#include "comp_perf_test_benchmark.h" #include "comp_perf_test_common.h"
#define NUM_MAX_XFORMS 16 #define NUM_MAX_XFORMS 16
#define NUM_MAX_INFLIGHT_OPS 512 #define NUM_MAX_INFLIGHT_OPS 512
#define DIV_CEIL(a, b) ((a) / (b) + ((a) % (b) != 0)) __extension__
const char *cperf_test_type_strs[] = {
[CPERF_TEST_TYPE_BENCHMARK] = "benchmark",
[CPERF_TEST_TYPE_VERIFY] = "verify"
};
/* Cleanup state machine */ __extension__
static enum cleanup_st { static const struct cperf_test cperf_testmap[] = {
ST_CLEAR = 0, [CPERF_TEST_TYPE_BENCHMARK] = {
ST_TEST_DATA, cperf_benchmark_test_constructor,
ST_COMPDEV, cperf_benchmark_test_runner,
ST_INPUT_DATA, cperf_benchmark_test_destructor
ST_MEMORY_ALLOC, },
ST_PREPARE_BUF, [CPERF_TEST_TYPE_VERIFY] = {
ST_DURING_TEST cperf_verify_test_constructor,
} cleanup = ST_CLEAR; cperf_verify_test_runner,
cperf_verify_test_destructor
}
};
static int static int
param_range_check(uint16_t size, const struct rte_param_log2_range *range) comp_perf_check_capabilities(struct comp_test_data *test_data, uint8_t cdev_id)
{
unsigned int next_size;
/* Check lower/upper bounds */
if (size < range->min)
return -1;
if (size > range->max)
return -1;
/* If range is actually only one value, size is correct */
if (range->increment == 0)
return 0;
/* Check if value is one of the supported sizes */
for (next_size = range->min; next_size <= range->max;
next_size += range->increment)
if (size == next_size)
return 0;
return -1;
}
static int
comp_perf_check_capabilities(struct comp_test_data *test_data)
{ {
const struct rte_compressdev_capabilities *cap; const struct rte_compressdev_capabilities *cap;
cap = rte_compressdev_capability_get(test_data->cdev_id, cap = rte_compressdev_capability_get(cdev_id,
RTE_COMP_ALGO_DEFLATE); RTE_COMP_ALGO_DEFLATE);
if (cap == NULL) { if (cap == NULL) {
@ -105,7 +87,7 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
} }
/* Level 0 support */ /* Level 0 support */
if (test_data->level.min == 0 && if (test_data->level_lst.min == 0 &&
(comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) { (comp_flags & RTE_COMP_FF_NONCOMPRESSED_BLOCKS) == 0) {
RTE_LOG(ERR, USER1, "Compress device does not support " RTE_LOG(ERR, USER1, "Compress device does not support "
"level 0 (no compression)\n"); "level 0 (no compression)\n");
@ -115,110 +97,108 @@ comp_perf_check_capabilities(struct comp_test_data *test_data)
return 0; return 0;
} }
static uint32_t
find_buf_size(uint32_t input_size)
{
uint32_t i;
/* From performance point of view the buffer size should be a
* power of 2 but also should be enough to store incompressible data
*/
/* We're looking for nearest power of 2 buffer size, which is greather
* than input_size
*/
uint32_t size =
!input_size ? MIN_COMPRESSED_BUF_SIZE : (input_size << 1);
for (i = UINT16_MAX + 1; !(i & size); i >>= 1)
;
return i > ((UINT16_MAX + 1) >> 1)
? (uint32_t)((float)input_size * EXPANSE_RATIO)
: i;
}
static int static int
comp_perf_allocate_memory(struct comp_test_data *test_data) comp_perf_initialize_compressdev(struct comp_test_data *test_data,
uint8_t *enabled_cdevs)
{ {
uint8_t enabled_cdev_count, nb_lcores, cdev_id;
unsigned int i, j;
int ret;
test_data->out_seg_sz = find_buf_size(test_data->seg_sz); enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
/* Number of segments for input and output enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
* (compression and decompression) if (enabled_cdev_count == 0) {
RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
test_data->driver_name);
return -EINVAL;
}
nb_lcores = rte_lcore_count() - 1;
/*
* Use fewer devices,
* if there are more available than cores.
*/ */
uint32_t total_segs = DIV_CEIL(test_data->input_data_sz, if (enabled_cdev_count > nb_lcores) {
test_data->seg_sz); enabled_cdev_count = nb_lcores;
test_data->comp_buf_pool = rte_pktmbuf_pool_create("comp_buf_pool", RTE_LOG(INFO, USER1,
total_segs, " There's more available devices than cores!"
0, 0, " The number of devices has been aligned to %d cores\n",
test_data->out_seg_sz + RTE_PKTMBUF_HEADROOM, nb_lcores);
rte_socket_id());
if (test_data->comp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
cleanup = ST_MEMORY_ALLOC;
test_data->decomp_buf_pool = rte_pktmbuf_pool_create("decomp_buf_pool",
total_segs,
0, 0, test_data->seg_sz + RTE_PKTMBUF_HEADROOM,
rte_socket_id());
if (test_data->decomp_buf_pool == NULL) {
RTE_LOG(ERR, USER1, "Mbuf mempool could not be created\n");
return -1;
}
test_data->total_bufs = DIV_CEIL(total_segs, test_data->max_sgl_segs);
test_data->op_pool = rte_comp_op_pool_create("op_pool",
test_data->total_bufs,
0, 0, rte_socket_id());
if (test_data->op_pool == NULL) {
RTE_LOG(ERR, USER1, "Comp op mempool could not be created\n");
return -1;
} }
/* /*
* Compressed data might be a bit larger than input data, * Calculate number of needed queue pairs, based on the amount
* if data cannot be compressed * of available number of logical cores and compression devices.
* For instance, if there are 4 cores and 2 compression devices,
* 2 queue pairs will be set up per device.
* One queue pair per one core.
* if e.g.: there're 3 cores and 2 compression devices,
* 2 queue pairs will be set up per device but one queue pair
* will left unused in the last one device
*/ */
test_data->compressed_data = rte_zmalloc_socket(NULL, test_data->nb_qps = (nb_lcores % enabled_cdev_count) ?
test_data->input_data_sz * EXPANSE_RATIO (nb_lcores / enabled_cdev_count) + 1 :
+ MIN_COMPRESSED_BUF_SIZE, 0, nb_lcores / enabled_cdev_count;
rte_socket_id());
if (test_data->compressed_data == NULL) { for (i = 0; i < enabled_cdev_count &&
RTE_LOG(ERR, USER1, "Memory to hold the data from the input " i < RTE_COMPRESS_MAX_DEVS; i++,
"file could not be allocated\n"); nb_lcores -= test_data->nb_qps) {
return -1; cdev_id = enabled_cdevs[i];
struct rte_compressdev_info cdev_info;
uint8_t socket_id = rte_compressdev_socket_id(cdev_id);
rte_compressdev_info_get(cdev_id, &cdev_info);
if (cdev_info.max_nb_queue_pairs &&
test_data->nb_qps > cdev_info.max_nb_queue_pairs) {
RTE_LOG(ERR, USER1,
"Number of needed queue pairs is higher "
"than the maximum number of queue pairs "
"per device.\n");
RTE_LOG(ERR, USER1,
"Lower the number of cores or increase "
"the number of crypto devices\n");
return -EINVAL;
}
if (comp_perf_check_capabilities(test_data, cdev_id) < 0)
return -EINVAL;
/* Configure compressdev */
struct rte_compressdev_config config = {
.socket_id = socket_id,
.nb_queue_pairs = nb_lcores > test_data->nb_qps
? test_data->nb_qps : nb_lcores,
.max_nb_priv_xforms = NUM_MAX_XFORMS,
.max_nb_streams = 0
};
if (rte_compressdev_configure(cdev_id, &config) < 0) {
RTE_LOG(ERR, USER1, "Device configuration failed\n");
return -EINVAL;
}
for (j = 0; j < test_data->nb_qps; j++) {
ret = rte_compressdev_queue_pair_setup(cdev_id, j,
NUM_MAX_INFLIGHT_OPS, socket_id);
if (ret < 0) {
RTE_LOG(ERR, USER1,
"Failed to setup queue pair %u on compressdev %u",
j, cdev_id);
return -EINVAL;
}
}
ret = rte_compressdev_start(cdev_id);
if (ret < 0) {
RTE_LOG(ERR, USER1,
"Failed to start device %u: error %d\n",
cdev_id, ret);
return -EPERM;
}
} }
test_data->decompressed_data = rte_zmalloc_socket(NULL, return enabled_cdev_count;
test_data->input_data_sz, 0,
rte_socket_id());
if (test_data->decompressed_data == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the data from the input "
"file could not be allocated\n");
return -1;
}
test_data->comp_bufs = rte_zmalloc_socket(NULL,
test_data->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (test_data->comp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the compression mbufs"
" could not be allocated\n");
return -1;
}
test_data->decomp_bufs = rte_zmalloc_socket(NULL,
test_data->total_bufs * sizeof(struct rte_mbuf *),
0, rte_socket_id());
if (test_data->decomp_bufs == NULL) {
RTE_LOG(ERR, USER1, "Memory to hold the decompression mbufs"
" could not be allocated\n");
return -1;
}
return 0;
} }
static int static int
@ -295,187 +275,18 @@ end:
return ret; return ret;
} }
static int
comp_perf_initialize_compressdev(struct comp_test_data *test_data)
{
uint8_t enabled_cdev_count;
uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
enabled_cdev_count = rte_compressdev_devices_get(test_data->driver_name,
enabled_cdevs, RTE_COMPRESS_MAX_DEVS);
if (enabled_cdev_count == 0) {
RTE_LOG(ERR, USER1, "No compress devices type %s available\n",
test_data->driver_name);
return -EINVAL;
}
if (enabled_cdev_count > 1)
RTE_LOG(INFO, USER1,
"Only the first compress device will be used\n");
test_data->cdev_id = enabled_cdevs[0];
if (comp_perf_check_capabilities(test_data) < 0)
return -1;
/* Configure compressdev (one device, one queue pair) */
struct rte_compressdev_config config = {
.socket_id = rte_socket_id(),
.nb_queue_pairs = 1,
.max_nb_priv_xforms = NUM_MAX_XFORMS,
.max_nb_streams = 0
};
if (rte_compressdev_configure(test_data->cdev_id, &config) < 0) {
RTE_LOG(ERR, USER1, "Device configuration failed\n");
return -1;
}
if (rte_compressdev_queue_pair_setup(test_data->cdev_id, 0,
NUM_MAX_INFLIGHT_OPS, rte_socket_id()) < 0) {
RTE_LOG(ERR, USER1, "Queue pair setup failed\n");
return -1;
}
if (rte_compressdev_start(test_data->cdev_id) < 0) {
RTE_LOG(ERR, USER1, "Device could not be started\n");
return -1;
}
return 0;
}
static int
prepare_bufs(struct comp_test_data *test_data)
{
uint32_t remaining_data = test_data->input_data_sz;
uint8_t *input_data_ptr = test_data->input_data;
size_t data_sz;
uint8_t *data_addr;
uint32_t i, j;
for (i = 0; i < test_data->total_bufs; i++) {
/* Allocate data in input mbuf and copy data from input file */
test_data->decomp_bufs[i] =
rte_pktmbuf_alloc(test_data->decomp_buf_pool);
if (test_data->decomp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
cleanup = ST_PREPARE_BUF;
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *) rte_pktmbuf_append(
test_data->decomp_bufs[i], data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
/* Already one segment in the mbuf */
uint16_t segs_per_mbuf = 1;
/* Chain mbufs if needed for input mbufs */
while (segs_per_mbuf < test_data->max_sgl_segs
&& remaining_data > 0) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(test_data->decomp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_sz = RTE_MIN(remaining_data, test_data->seg_sz);
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
data_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
rte_memcpy(data_addr, input_data_ptr, data_sz);
input_data_ptr += data_sz;
remaining_data -= data_sz;
if (rte_pktmbuf_chain(test_data->decomp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
segs_per_mbuf++;
}
/* Allocate data in output mbuf */
test_data->comp_bufs[i] =
rte_pktmbuf_alloc(test_data->comp_buf_pool);
if (test_data->comp_bufs[i] == NULL) {
RTE_LOG(ERR, USER1, "Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *) rte_pktmbuf_append(
test_data->comp_bufs[i],
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
/* Chain mbufs if needed for output mbufs */
for (j = 1; j < segs_per_mbuf; j++) {
struct rte_mbuf *next_seg =
rte_pktmbuf_alloc(test_data->comp_buf_pool);
if (next_seg == NULL) {
RTE_LOG(ERR, USER1,
"Could not allocate mbuf\n");
return -1;
}
data_addr = (uint8_t *)rte_pktmbuf_append(next_seg,
test_data->out_seg_sz);
if (data_addr == NULL) {
RTE_LOG(ERR, USER1, "Could not append data\n");
return -1;
}
if (rte_pktmbuf_chain(test_data->comp_bufs[i],
next_seg) < 0) {
RTE_LOG(ERR, USER1, "Could not chain mbufs\n");
return -1;
}
}
}
return 0;
}
static void
free_bufs(struct comp_test_data *test_data)
{
uint32_t i;
for (i = 0; i < test_data->total_bufs; i++) {
rte_pktmbuf_free(test_data->comp_bufs[i]);
rte_pktmbuf_free(test_data->decomp_bufs[i]);
}
}
int int
main(int argc, char **argv) main(int argc, char **argv)
{ {
uint8_t level, level_idx = 0; uint8_t level_idx = 0;
int ret, i; int ret, i;
struct comp_test_data *test_data; struct comp_test_data *test_data;
void *ctx[RTE_MAX_LCORE] = {};
uint8_t enabled_cdevs[RTE_COMPRESS_MAX_DEVS];
int nb_compressdevs = 0;
uint16_t total_nb_qps = 0;
uint8_t cdev_id;
uint32_t lcore_id;
/* Initialise DPDK EAL */ /* Initialise DPDK EAL */
ret = rte_eal_init(argc, argv); ret = rte_eal_init(argc, argv);
@ -492,7 +303,7 @@ main(int argc, char **argv)
rte_socket_id()); rte_socket_id());
ret = EXIT_SUCCESS; ret = EXIT_SUCCESS;
cleanup = ST_TEST_DATA; test_data->cleanup = ST_TEST_DATA;
comp_perf_options_default(test_data); comp_perf_options_default(test_data);
if (comp_perf_options_parse(test_data, argc, argv) < 0) { if (comp_perf_options_parse(test_data, argc, argv) < 0) {
@ -507,99 +318,112 @@ main(int argc, char **argv)
goto end; goto end;
} }
if (comp_perf_initialize_compressdev(test_data) < 0) { nb_compressdevs =
comp_perf_initialize_compressdev(test_data, enabled_cdevs);
if (nb_compressdevs < 1) {
ret = EXIT_FAILURE; ret = EXIT_FAILURE;
goto end; goto end;
} }
cleanup = ST_COMPDEV; test_data->cleanup = ST_COMPDEV;
if (comp_perf_dump_input_data(test_data) < 0) { if (comp_perf_dump_input_data(test_data) < 0) {
ret = EXIT_FAILURE; ret = EXIT_FAILURE;
goto end; goto end;
} }
cleanup = ST_INPUT_DATA; test_data->cleanup = ST_INPUT_DATA;
if (comp_perf_allocate_memory(test_data) < 0) {
ret = EXIT_FAILURE;
goto end;
}
if (prepare_bufs(test_data) < 0) { if (test_data->level_lst.inc != 0)
ret = EXIT_FAILURE; test_data->level = test_data->level_lst.min;
goto end;
}
if (test_data->level.inc != 0)
level = test_data->level.min;
else else
level = test_data->level.list[0]; test_data->level = test_data->level_lst.list[0];
printf("App uses socket: %u\n", rte_socket_id()); printf("App uses socket: %u\n", rte_socket_id());
printf("Driver uses socket: %u\n",
rte_compressdev_socket_id(test_data->cdev_id));
printf("Burst size = %u\n", test_data->burst_sz); printf("Burst size = %u\n", test_data->burst_sz);
printf("File size = %zu\n", test_data->input_data_sz); printf("File size = %zu\n", test_data->input_data_sz);
printf("%6s%12s%17s%19s%21s%15s%21s%23s%16s\n", test_data->cleanup = ST_DURING_TEST;
"Level", "Comp size", "Comp ratio [%]", total_nb_qps = nb_compressdevs * test_data->nb_qps;
"Comp [Cycles/it]", "Comp [Cycles/Byte]", "Comp [Gbps]",
"Decomp [Cycles/it]", "Decomp [Cycles/Byte]", "Decomp [Gbps]");
cleanup = ST_DURING_TEST; i = 0;
while (level <= test_data->level.max) { uint8_t qp_id = 0, cdev_index = 0;
/* RTE_LCORE_FOREACH_SLAVE(lcore_id) {
* Run a first iteration, to verify compression and
* get the compression ratio for the level if (i == total_nb_qps)
*/
if (cperf_verification(test_data, level) != EXIT_SUCCESS)
break; break;
/* cdev_id = enabled_cdevs[cdev_index];
* Run benchmarking test ctx[i] = cperf_testmap[test_data->test].constructor(
*/ cdev_id, qp_id,
if (cperf_benchmark(test_data, level) != EXIT_SUCCESS) test_data);
break; if (ctx[i] == NULL) {
RTE_LOG(ERR, USER1, "Test run constructor failed\n");
goto end;
}
qp_id = (qp_id + 1) % test_data->nb_qps;
if (qp_id == 0)
cdev_index++;
i++;
}
printf("%6u%12zu%17.2f%19"PRIu64"%21.2f" while (test_data->level <= test_data->level_lst.max) {
"%15.2f%21"PRIu64"%23.2f%16.2f\n",
level, test_data->comp_data_sz, test_data->ratio,
test_data->comp_tsc_duration[level],
test_data->comp_tsc_byte, test_data->comp_gbps,
test_data->decomp_tsc_duration[level],
test_data->decomp_tsc_byte, test_data->decomp_gbps);
if (test_data->level.inc != 0) i = 0;
level += test_data->level.inc; RTE_LCORE_FOREACH_SLAVE(lcore_id) {
else {
if (++level_idx == test_data->level.count) if (i == total_nb_qps)
break; break;
level = test_data->level.list[level_idx];
rte_eal_remote_launch(
cperf_testmap[test_data->test].runner,
ctx[i], lcore_id);
i++;
}
i = 0;
RTE_LCORE_FOREACH_SLAVE(lcore_id) {
if (i == total_nb_qps)
break;
ret |= rte_eal_wait_lcore(lcore_id);
i++;
}
if (ret != EXIT_SUCCESS)
break;
if (test_data->level_lst.inc != 0)
test_data->level += test_data->level_lst.inc;
else {
if (++level_idx == test_data->level_lst.count)
break;
test_data->level = test_data->level_lst.list[level_idx];
} }
} }
end: end:
switch (cleanup) { switch (test_data->cleanup) {
case ST_DURING_TEST: case ST_DURING_TEST:
case ST_PREPARE_BUF: i = 0;
free_bufs(test_data); RTE_LCORE_FOREACH_SLAVE(lcore_id) {
/* fallthrough */ if (i == total_nb_qps)
case ST_MEMORY_ALLOC: break;
rte_free(test_data->decomp_bufs);
rte_free(test_data->comp_bufs); if (ctx[i] && cperf_testmap[test_data->test].destructor)
rte_free(test_data->decompressed_data); cperf_testmap[test_data->test].destructor(
rte_free(test_data->compressed_data); ctx[i]);
rte_mempool_free(test_data->op_pool); i++;
rte_mempool_free(test_data->decomp_buf_pool); }
rte_mempool_free(test_data->comp_buf_pool);
/* fallthrough */ /* fallthrough */
case ST_INPUT_DATA: case ST_INPUT_DATA:
rte_free(test_data->input_data); rte_free(test_data->input_data);
/* fallthrough */ /* fallthrough */
case ST_COMPDEV: case ST_COMPDEV:
if (test_data->cdev_id != -1) for (i = 0; i < nb_compressdevs &&
rte_compressdev_stop(test_data->cdev_id); i < RTE_COMPRESS_MAX_DEVS; i++)
rte_compressdev_stop(enabled_cdevs[i]);
/* fallthrough */ /* fallthrough */
case ST_TEST_DATA: case ST_TEST_DATA:
rte_free(test_data); rte_free(test_data);
@ -616,3 +440,44 @@ end:
} }
return ret; return ret;
} }
__rte_weak void *
cperf_benchmark_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
RTE_LOG(INFO, USER1, "Benchmark test is not supported yet\n");
return NULL;
}
__rte_weak void
cperf_benchmark_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
cperf_benchmark_test_runner(void *test_ctx __rte_unused)
{
return 0;
}
__rte_weak void *
cperf_verify_test_constructor(uint8_t dev_id __rte_unused,
uint16_t qp_id __rte_unused,
struct comp_test_data *options __rte_unused)
{
RTE_LOG(INFO, USER1, "Verify test is not supported yet\n");
return NULL;
}
__rte_weak void
cperf_verify_test_destructor(void *arg __rte_unused)
{
}
__rte_weak int
cperf_verify_test_runner(void *test_ctx __rte_unused)
{
return 0;
}

View File

@ -4,6 +4,5 @@
allow_experimental_apis = true allow_experimental_apis = true
sources = files('comp_perf_options_parse.c', sources = files('comp_perf_options_parse.c',
'main.c', 'main.c',
'comp_perf_test_verify.c', 'comp_perf_test_common.c')
'comp_perf_test_benchmark.c')
deps = ['compressdev'] deps = ['compressdev']