lib/accel: Add the real chained crc32 support with the example.

This patch added the chained crc32 support API for both batched
and non batched mode usage. And also update the accel_perf
program in order to use the revised accelerated crc32 function.

For example, you can use the following command:

./build/examples/accel_perf -C 4 -q 128 -o 4096 -t 5 -w crc32c -y

In this command, "-C 4" means that caculate the chained
crc32 for an iov array.
(even if you do not have the accelerated DSA hardware)

Signed-off-by: Ziye Yang <ziye.yang@intel.com>
Change-Id: Ifede26f9040980b5791da8e5afef41177eede9f6
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/6457
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Community-CI: Mellanox Build Bot
Reviewed-by: Shuhei Matsumoto <shuhei.matsumoto.xt@hitachi.com>
Reviewed-by: Changpeng Liu <changpeng.liu@intel.com>
This commit is contained in:
Ziye Yang 2020-12-21 20:17:06 +08:00 committed by Tomasz Zawadzki
parent 1f49ee235f
commit 88754353c0
3 changed files with 255 additions and 36 deletions

View File

@ -54,6 +54,7 @@ static int g_ops_per_batch = 0;
static int g_threads_per_core = 1;
static int g_time_in_sec = 5;
static uint32_t g_crc32c_seed = 0;
static uint32_t g_crc32c_chained_count = 1;
static int g_fail_percent_goal = 0;
static uint8_t g_fill_pattern = 255;
static bool g_verify = false;
@ -74,7 +75,8 @@ struct display_info {
struct ap_task {
void *src;
struct iovec iov;
struct iovec *iovs;
uint32_t iov_cnt;
void *dst;
void *dst2;
struct worker_thread *worker;
@ -121,6 +123,7 @@ dump_user_config(struct spdk_app_opts *opts)
printf("Workload Type: %s\n", g_workload_type);
if (g_workload_selection == ACCEL_CRC32C) {
printf("CRC-32C seed: %u\n", g_crc32c_seed);
printf("vector size: %u\n", g_crc32c_chained_count);
} else if (g_workload_selection == ACCEL_FILL) {
printf("Fill pattern: 0x%x\n", g_fill_pattern);
} else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) {
@ -144,7 +147,9 @@ usage(void)
printf("accel_perf options:\n");
printf("\t[-h help message]\n");
printf("\t[-q queue depth per core]\n");
printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n");
printf("\t[-T number of threads per core\n");
printf("\t[-n number of channels]\n");
printf("\t[-o transfer size in bytes]\n");
printf("\t[-t time in seconds]\n");
printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n");
@ -162,6 +167,9 @@ parse_args(int argc, char *argv)
case 'b':
g_ops_per_batch = spdk_strtol(optarg, 10);
break;
case 'C':
g_crc32c_chained_count = spdk_strtol(optarg, 10);
break;
case 'f':
g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10);
break;
@ -204,6 +212,7 @@ parse_args(int argc, char *argv)
usage();
return 1;
}
return 0;
}
@ -230,6 +239,7 @@ static int
_get_task_data_bufs(struct ap_task *task)
{
uint32_t align = 0;
uint32_t i = 0;
/* For dualcast, the DSA HW requires 4K alignment on destination addresses but
* we do this for all engines to keep it simple.
@ -238,14 +248,38 @@ _get_task_data_bufs(struct ap_task *task)
align = ALIGN_4K;
}
task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
if (task->src == NULL) {
fprintf(stderr, "Unable to alloc src buffer\n");
return -ENOMEM;
if (g_workload_selection == ACCEL_CRC32C) {
assert(g_crc32c_chained_count > 0);
task->iov_cnt = g_crc32c_chained_count;
task->iovs = calloc(task->iov_cnt, sizeof(struct iovec));
if (!task->iovs) {
fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task);
return -ENOMEM;
}
for (i = 0; i < task->iov_cnt; i++) {
task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
if (task->iovs[i].iov_base == NULL) {
return -ENOMEM;
}
memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes);
task->iovs[i].iov_len = g_xfer_size_bytes;
}
} else {
task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
if (task->src == NULL) {
fprintf(stderr, "Unable to alloc src buffer\n");
return -ENOMEM;
}
/* For fill, set the entire src buffer so we can check if verify is enabled. */
if (g_workload_selection == ACCEL_FILL) {
memset(task->src, g_fill_pattern, g_xfer_size_bytes);
} else {
memset(task->src, DATA_PATTERN, g_xfer_size_bytes);
}
}
memset(task->src, DATA_PATTERN, g_xfer_size_bytes);
task->iov.iov_base = task->src;
task->iov.iov_len = g_xfer_size_bytes;
task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
if (task->dst == NULL) {
@ -260,11 +294,6 @@ _get_task_data_bufs(struct ap_task *task)
memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes);
}
/* For fill, set the entire src buffer so we can check if verify is enabled. */
if (g_workload_selection == ACCEL_FILL) {
memset(task->src, g_fill_pattern, g_xfer_size_bytes);
}
if (g_workload_selection == ACCEL_DUALCAST) {
task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
if (task->dst2 == NULL) {
@ -316,7 +345,7 @@ _submit_single(struct worker_thread *worker, struct ap_task *task)
break;
case ACCEL_CRC32C:
rc = spdk_accel_submit_crc32cv(worker->ch, (uint32_t *)task->dst,
&task->iov, 1, g_crc32c_seed,
task->iovs, task->iov_cnt, g_crc32c_seed,
accel_done, task);
break;
case ACCEL_COMPARE:
@ -376,7 +405,7 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
break;
case ACCEL_CRC32C:
rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst,
&task->iov, 1, g_crc32c_seed, accel_done, task);
task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task);
break;
default:
assert(false);
@ -389,7 +418,21 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
static void
_free_task_buffers(struct ap_task *task)
{
spdk_dma_free(task->src);
uint32_t i;
if (g_workload_selection == ACCEL_CRC32C) {
if (task->iovs) {
for (i = 0; i < task->iov_cnt; i++) {
if (task->iovs[i].iov_base) {
spdk_dma_free(task->iovs[i].iov_base);
}
}
free(task->iovs);
}
} else {
spdk_dma_free(task->src);
}
spdk_dma_free(task->dst);
if (g_workload_selection == ACCEL_DUALCAST) {
spdk_dma_free(task->dst2);
@ -517,6 +560,20 @@ batch_done(void *cb_arg, int status)
spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch);
}
static uint32_t
_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c)
{
int i;
for (i = 0; i < iovcnt; i++) {
assert(iov[i].iov_base != NULL);
assert(iov[i].iov_len != 0);
crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
}
return crc32c;
}
static void
_accel_done(void *arg1)
{
@ -530,8 +587,7 @@ _accel_done(void *arg1)
if (g_verify && task->status == 0) {
switch (g_workload_selection) {
case ACCEL_CRC32C:
/* calculate sw CRC-32C and compare to sw aceel result. */
sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed);
sw_crc32c = _update_crc32c_iov(task->iovs, task->iov_cnt, ~g_crc32c_seed);
if (*(uint32_t *)task->dst != sw_crc32c) {
SPDK_NOTICELOG("CRC-32C miscompare\n");
worker->xfer_failed++;
@ -912,7 +968,7 @@ main(int argc, char **argv)
pthread_mutex_init(&g_workers_lock, NULL);
spdk_app_opts_init(&opts, sizeof(opts));
opts.reactor_mask = "0x1";
if (spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:b:T:", NULL, parse_args,
if (spdk_app_parse_args(argc, argv, &opts, "C:o:q:t:yw:P:f:b:T:", NULL, parse_args,
usage) != SPDK_APP_PARSE_ARGS_SUCCESS) {
g_rc = -1;
goto cleanup;
@ -935,6 +991,13 @@ main(int argc, char **argv)
goto cleanup;
}
if (g_workload_selection == ACCEL_CRC32C &&
g_crc32c_chained_count == 0) {
usage();
g_rc = -1;
goto cleanup;
}
dump_user_config(&opts);
g_rc = spdk_app_start(&opts, accel_perf_start, NULL);
if (g_rc) {

View File

@ -80,14 +80,26 @@ struct spdk_accel_task {
struct spdk_accel_batch *batch;
spdk_accel_completion_cb cb_fn;
void *cb_arg;
void *src;
union {
struct {
struct iovec *iovs; /* iovs passed by the caller */
uint32_t iovcnt; /* iovcnt passed by the caller */
} v;
void *src;
};
union {
void *dst;
void *src2;
};
void *dst2;
uint32_t seed;
uint64_t fill_pattern;
union {
struct {
spdk_accel_completion_cb cb_fn;
void *cb_arg;
} chained;
void *dst2;
uint32_t seed;
uint64_t fill_pattern;
};
enum accel_opcode op_code;
uint64_t nbytes;
TAILQ_ENTRY(spdk_accel_task) link;

View File

@ -36,6 +36,7 @@
#include "spdk_internal/accel_engine.h"
#include "spdk/env.h"
#include "spdk/likely.h"
#include "spdk/log.h"
#include "spdk/thread.h"
#include "spdk/json.h"
@ -72,6 +73,7 @@ static void _sw_accel_copy(void *dst, void *src, uint64_t nbytes);
static int _sw_accel_compare(void *src1, void *src2, uint64_t nbytes);
static void _sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes);
static void _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes);
static void _sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed);
/* Registration of hw modules (currently supports only 1 at a time) */
void
@ -111,13 +113,19 @@ void
spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
{
struct accel_io_channel *accel_ch = accel_task->accel_ch;
struct spdk_accel_batch *batch;
struct spdk_accel_batch *batch = accel_task->batch;
spdk_accel_completion_cb cb_fn = accel_task->cb_fn;
void *cb_arg = accel_task->cb_arg;
accel_task->cb_fn(accel_task->cb_arg, status);
/* We should put the accel_task into the list firstly in order to avoid
* the accel task list is exhausted when there is recursive call to
* allocate accel_task in user's call back function (cb_fn)
*/
TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
cb_fn(cb_arg, status);
/* If this task is part of a batch, check for completion of the batch. */
if (accel_task->batch) {
batch = accel_task->batch;
if (batch) {
assert(batch->count > 0);
batch->count--;
if (batch->count == 0) {
@ -129,8 +137,6 @@ spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status)
TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link);
}
}
TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link);
}
/* Accel framework public API for discovering current engine capabilities. */
@ -308,6 +314,7 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u
accel_task->dst = (void *)dst;
accel_task->src = src;
accel_task->v.iovcnt = 0;
accel_task->seed = seed;
accel_task->nbytes = nbytes;
accel_task->op_code = ACCEL_OPCODE_CRC32C;
@ -321,19 +328,77 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u
}
}
static void
crc32cv_done(void *cb_arg, int status)
{
struct spdk_accel_task *accel_task = cb_arg;
struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch);
assert(accel_task->chained.cb_fn != NULL);
assert(accel_task->chained.cb_arg != NULL);
if (spdk_likely(!status)) {
status = spdk_accel_submit_crc32cv(ch, accel_task->dst, ++accel_task->v.iovs,
accel_task->v.iovcnt - 1, ~(*((uint32_t *)accel_task->dst)),
accel_task->chained.cb_fn, accel_task->chained.cb_arg);
if (spdk_likely(!status)) {
return;
}
}
accel_task->chained.cb_fn(accel_task->chained.cb_arg, status);
}
/* Accel framework public API for chained CRC-32C function */
int
spdk_accel_submit_crc32cv(struct spdk_io_channel *ch, uint32_t *dst, struct iovec *iov,
uint32_t iov_cnt, uint32_t seed, spdk_accel_completion_cb cb_fn, void *cb_arg)
{
struct accel_io_channel *accel_ch;
struct spdk_accel_task *accel_task;
if (iov == NULL) {
SPDK_ERRLOG("iov should not be NULL");
return -EINVAL;
}
assert(iov_cnt == 1);
if (!iov_cnt) {
SPDK_ERRLOG("iovcnt should not be zero value\n");
return -EINVAL;
}
return spdk_accel_submit_crc32c(ch, dst, iov[0].iov_base, seed, iov[0].iov_len, cb_fn, cb_arg);
if (iov_cnt == 1) {
return spdk_accel_submit_crc32c(ch, dst, iov[0].iov_base, seed, iov[0].iov_len, cb_fn, cb_arg);
}
accel_ch = spdk_io_channel_get_ctx(ch);
accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg);
if (accel_task == NULL) {
SPDK_ERRLOG("no memory\n");
assert(0);
return -ENOMEM;
}
accel_task->v.iovs = iov;
accel_task->v.iovcnt = iov_cnt;
accel_task->dst = (void *)dst;
accel_task->op_code = ACCEL_OPCODE_CRC32C;
if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
accel_task->cb_fn = crc32cv_done;
accel_task->cb_arg = accel_task;
accel_task->chained.cb_fn = cb_fn;
accel_task->chained.cb_arg = cb_arg;
accel_task->src = iov[0].iov_base;
accel_task->nbytes = iov[0].iov_len;
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_crc32cv(dst, iov, iov_cnt, seed);
spdk_accel_task_complete(accel_task, 0);
return 0;
}
}
/* Accel framework public API for getting max operations for a batch. */
@ -474,6 +539,7 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch
accel_task->dst = dst;
accel_task->src = src;
accel_task->v.iovcnt = 0;
accel_task->seed = seed;
accel_task->nbytes = nbytes;
accel_task->op_code = ACCEL_OPCODE_CRC32C;
@ -487,21 +553,81 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch
return 0;
}
static void
batched_crc32cv_done(void *cb_arg, int status)
{
struct spdk_accel_task *accel_task = cb_arg;
struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch);
struct spdk_accel_batch *batch;
batch = accel_task->batch;
assert(batch != NULL);
assert(accel_task->chained.cb_fn != NULL);
assert(accel_task->chained.cb_arg != NULL);
if (spdk_likely(!status)) {
status = spdk_accel_batch_prep_crc32cv(ch, batch, accel_task->dst,
++accel_task->v.iovs, accel_task->v.iovcnt - 1, ~(*((uint32_t *)accel_task->dst)),
accel_task->chained.cb_fn, accel_task->chained.cb_arg);
if (spdk_likely(!status)) {
return;
}
}
accel_task->chained.cb_fn(accel_task->chained.cb_arg, status);
}
int
spdk_accel_batch_prep_crc32cv(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
uint32_t *dst, struct iovec *iovs, uint32_t iov_cnt, uint32_t seed,
spdk_accel_completion_cb cb_fn, void *cb_arg)
{
struct accel_io_channel *accel_ch;
struct spdk_accel_task *accel_task;
if (iovs == NULL) {
SPDK_ERRLOG("iovs should not be NULL\n");
return -EINVAL;
}
assert(iov_cnt == 1);
if (iov_cnt == 0) {
SPDK_ERRLOG("iovcnt should not be zero value\n");
return -EINVAL;
}
return spdk_accel_batch_prep_crc32c(ch, batch, dst, iovs[0].iov_base, seed, iovs[0].iov_len, cb_fn,
cb_arg);
if (iov_cnt == 1) {
return spdk_accel_batch_prep_crc32c(ch, batch, dst, iovs[0].iov_base, seed, iovs[0].iov_len, cb_fn,
cb_arg);
}
accel_ch = spdk_io_channel_get_ctx(ch);
accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg);
if (accel_task == NULL) {
return -ENOMEM;
}
accel_task->v.iovs = iovs;
accel_task->v.iovcnt = iov_cnt;
accel_task->dst = dst;
accel_task->seed = seed;
accel_task->op_code = ACCEL_OPCODE_CRC32C;
if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) {
accel_task->cb_arg = accel_task;
accel_task->cb_fn = batched_crc32cv_done;
accel_task->cb_arg = accel_task;
accel_task->chained.cb_fn = cb_fn;
accel_task->chained.cb_arg = cb_arg;
accel_task->src = iovs[0].iov_base;
accel_task->nbytes = iovs[0].iov_len;
TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link);
} else {
TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link);
}
return 0;
}
/* Accel framework public API for batch_create function. */
@ -583,8 +709,12 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat
batch->status |= rc;
break;
case ACCEL_OPCODE_CRC32C:
_sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed,
accel_task->nbytes);
if (accel_task->v.iovcnt == 0) {
_sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed,
accel_task->nbytes);
} else {
_sw_accel_crc32cv(accel_task->dst, accel_task->v.iovs, accel_task->v.iovcnt, accel_task->seed);
}
spdk_accel_task_complete(accel_task, 0);
break;
case ACCEL_OPCODE_DUALCAST:
@ -836,6 +966,20 @@ _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes)
*dst = spdk_crc32c_update(src, nbytes, ~seed);
}
static void
_sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed)
{
uint32_t i, crc32c = ~seed;
for (i = 0; i < iovcnt; i++) {
assert(iov[i].iov_base != NULL);
assert(iov[i].iov_len != 0);
crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c);
}
*dst = crc32c;
}
static struct spdk_io_channel *sw_accel_get_io_channel(void);
static uint32_t