diff --git a/examples/accel/perf/accel_perf.c b/examples/accel/perf/accel_perf.c index 206cd2be7d..9bbecf71f3 100644 --- a/examples/accel/perf/accel_perf.c +++ b/examples/accel/perf/accel_perf.c @@ -54,6 +54,7 @@ static int g_ops_per_batch = 0; static int g_threads_per_core = 1; static int g_time_in_sec = 5; static uint32_t g_crc32c_seed = 0; +static uint32_t g_crc32c_chained_count = 1; static int g_fail_percent_goal = 0; static uint8_t g_fill_pattern = 255; static bool g_verify = false; @@ -74,7 +75,8 @@ struct display_info { struct ap_task { void *src; - struct iovec iov; + struct iovec *iovs; + uint32_t iov_cnt; void *dst; void *dst2; struct worker_thread *worker; @@ -121,6 +123,7 @@ dump_user_config(struct spdk_app_opts *opts) printf("Workload Type: %s\n", g_workload_type); if (g_workload_selection == ACCEL_CRC32C) { printf("CRC-32C seed: %u\n", g_crc32c_seed); + printf("vector size: %u\n", g_crc32c_chained_count); } else if (g_workload_selection == ACCEL_FILL) { printf("Fill pattern: 0x%x\n", g_fill_pattern); } else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) { @@ -144,7 +147,9 @@ usage(void) printf("accel_perf options:\n"); printf("\t[-h help message]\n"); printf("\t[-q queue depth per core]\n"); + printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n"); printf("\t[-T number of threads per core\n"); + printf("\t[-n number of channels]\n"); printf("\t[-o transfer size in bytes]\n"); printf("\t[-t time in seconds]\n"); printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n"); @@ -162,6 +167,9 @@ parse_args(int argc, char *argv) case 'b': g_ops_per_batch = spdk_strtol(optarg, 10); break; + case 'C': + g_crc32c_chained_count = spdk_strtol(optarg, 10); + break; case 'f': g_fill_pattern = (uint8_t)spdk_strtol(optarg, 10); break; @@ -204,6 +212,7 @@ parse_args(int argc, char *argv) usage(); return 1; } + return 0; } @@ -230,6 +239,7 @@ static int _get_task_data_bufs(struct ap_task *task) { uint32_t align = 0; + uint32_t i = 0; /* For dualcast, the DSA HW requires 4K alignment on destination addresses but * we do this for all engines to keep it simple. @@ -238,14 +248,38 @@ _get_task_data_bufs(struct ap_task *task) align = ALIGN_4K; } - task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); - if (task->src == NULL) { - fprintf(stderr, "Unable to alloc src buffer\n"); - return -ENOMEM; + if (g_workload_selection == ACCEL_CRC32C) { + assert(g_crc32c_chained_count > 0); + task->iov_cnt = g_crc32c_chained_count; + task->iovs = calloc(task->iov_cnt, sizeof(struct iovec)); + if (!task->iovs) { + fprintf(stderr, "cannot allocated task->iovs fot task=%p\n", task); + return -ENOMEM; + } + + for (i = 0; i < task->iov_cnt; i++) { + task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); + if (task->iovs[i].iov_base == NULL) { + return -ENOMEM; + } + memset(task->iovs[i].iov_base, DATA_PATTERN, g_xfer_size_bytes); + task->iovs[i].iov_len = g_xfer_size_bytes; + } + + } else { + task->src = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL); + if (task->src == NULL) { + fprintf(stderr, "Unable to alloc src buffer\n"); + return -ENOMEM; + } + + /* For fill, set the entire src buffer so we can check if verify is enabled. */ + if (g_workload_selection == ACCEL_FILL) { + memset(task->src, g_fill_pattern, g_xfer_size_bytes); + } else { + memset(task->src, DATA_PATTERN, g_xfer_size_bytes); + } } - memset(task->src, DATA_PATTERN, g_xfer_size_bytes); - task->iov.iov_base = task->src; - task->iov.iov_len = g_xfer_size_bytes; task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); if (task->dst == NULL) { @@ -260,11 +294,6 @@ _get_task_data_bufs(struct ap_task *task) memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes); } - /* For fill, set the entire src buffer so we can check if verify is enabled. */ - if (g_workload_selection == ACCEL_FILL) { - memset(task->src, g_fill_pattern, g_xfer_size_bytes); - } - if (g_workload_selection == ACCEL_DUALCAST) { task->dst2 = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL); if (task->dst2 == NULL) { @@ -316,7 +345,7 @@ _submit_single(struct worker_thread *worker, struct ap_task *task) break; case ACCEL_CRC32C: rc = spdk_accel_submit_crc32cv(worker->ch, (uint32_t *)task->dst, - &task->iov, 1, g_crc32c_seed, + task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task); break; case ACCEL_COMPARE: @@ -376,7 +405,7 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task, break; case ACCEL_CRC32C: rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst, - &task->iov, 1, g_crc32c_seed, accel_done, task); + task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task); break; default: assert(false); @@ -389,7 +418,21 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task, static void _free_task_buffers(struct ap_task *task) { - spdk_dma_free(task->src); + uint32_t i; + + if (g_workload_selection == ACCEL_CRC32C) { + if (task->iovs) { + for (i = 0; i < task->iov_cnt; i++) { + if (task->iovs[i].iov_base) { + spdk_dma_free(task->iovs[i].iov_base); + } + } + free(task->iovs); + } + } else { + spdk_dma_free(task->src); + } + spdk_dma_free(task->dst); if (g_workload_selection == ACCEL_DUALCAST) { spdk_dma_free(task->dst2); @@ -517,6 +560,20 @@ batch_done(void *cb_arg, int status) spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch); } +static uint32_t +_update_crc32c_iov(struct iovec *iov, int iovcnt, uint32_t crc32c) +{ + int i; + + for (i = 0; i < iovcnt; i++) { + assert(iov[i].iov_base != NULL); + assert(iov[i].iov_len != 0); + crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c); + + } + return crc32c; +} + static void _accel_done(void *arg1) { @@ -530,8 +587,7 @@ _accel_done(void *arg1) if (g_verify && task->status == 0) { switch (g_workload_selection) { case ACCEL_CRC32C: - /* calculate sw CRC-32C and compare to sw aceel result. */ - sw_crc32c = spdk_crc32c_update(task->src, g_xfer_size_bytes, ~g_crc32c_seed); + sw_crc32c = _update_crc32c_iov(task->iovs, task->iov_cnt, ~g_crc32c_seed); if (*(uint32_t *)task->dst != sw_crc32c) { SPDK_NOTICELOG("CRC-32C miscompare\n"); worker->xfer_failed++; @@ -912,7 +968,7 @@ main(int argc, char **argv) pthread_mutex_init(&g_workers_lock, NULL); spdk_app_opts_init(&opts, sizeof(opts)); opts.reactor_mask = "0x1"; - if (spdk_app_parse_args(argc, argv, &opts, "o:q:t:yw:P:f:b:T:", NULL, parse_args, + if (spdk_app_parse_args(argc, argv, &opts, "C:o:q:t:yw:P:f:b:T:", NULL, parse_args, usage) != SPDK_APP_PARSE_ARGS_SUCCESS) { g_rc = -1; goto cleanup; @@ -935,6 +991,13 @@ main(int argc, char **argv) goto cleanup; } + if (g_workload_selection == ACCEL_CRC32C && + g_crc32c_chained_count == 0) { + usage(); + g_rc = -1; + goto cleanup; + } + dump_user_config(&opts); g_rc = spdk_app_start(&opts, accel_perf_start, NULL); if (g_rc) { diff --git a/include/spdk_internal/accel_engine.h b/include/spdk_internal/accel_engine.h index 1433ce0a80..12d6abc95a 100644 --- a/include/spdk_internal/accel_engine.h +++ b/include/spdk_internal/accel_engine.h @@ -80,14 +80,26 @@ struct spdk_accel_task { struct spdk_accel_batch *batch; spdk_accel_completion_cb cb_fn; void *cb_arg; - void *src; + union { + struct { + struct iovec *iovs; /* iovs passed by the caller */ + uint32_t iovcnt; /* iovcnt passed by the caller */ + } v; + void *src; + }; union { void *dst; void *src2; }; - void *dst2; - uint32_t seed; - uint64_t fill_pattern; + union { + struct { + spdk_accel_completion_cb cb_fn; + void *cb_arg; + } chained; + void *dst2; + uint32_t seed; + uint64_t fill_pattern; + }; enum accel_opcode op_code; uint64_t nbytes; TAILQ_ENTRY(spdk_accel_task) link; diff --git a/lib/accel/accel_engine.c b/lib/accel/accel_engine.c index 66a82204d7..acb043895c 100644 --- a/lib/accel/accel_engine.c +++ b/lib/accel/accel_engine.c @@ -36,6 +36,7 @@ #include "spdk_internal/accel_engine.h" #include "spdk/env.h" +#include "spdk/likely.h" #include "spdk/log.h" #include "spdk/thread.h" #include "spdk/json.h" @@ -72,6 +73,7 @@ static void _sw_accel_copy(void *dst, void *src, uint64_t nbytes); static int _sw_accel_compare(void *src1, void *src2, uint64_t nbytes); static void _sw_accel_fill(void *dst, uint8_t fill, uint64_t nbytes); static void _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes); +static void _sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed); /* Registration of hw modules (currently supports only 1 at a time) */ void @@ -111,13 +113,19 @@ void spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status) { struct accel_io_channel *accel_ch = accel_task->accel_ch; - struct spdk_accel_batch *batch; + struct spdk_accel_batch *batch = accel_task->batch; + spdk_accel_completion_cb cb_fn = accel_task->cb_fn; + void *cb_arg = accel_task->cb_arg; - accel_task->cb_fn(accel_task->cb_arg, status); + /* We should put the accel_task into the list firstly in order to avoid + * the accel task list is exhausted when there is recursive call to + * allocate accel_task in user's call back function (cb_fn) + */ + TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link); + cb_fn(cb_arg, status); /* If this task is part of a batch, check for completion of the batch. */ - if (accel_task->batch) { - batch = accel_task->batch; + if (batch) { assert(batch->count > 0); batch->count--; if (batch->count == 0) { @@ -129,8 +137,6 @@ spdk_accel_task_complete(struct spdk_accel_task *accel_task, int status) TAILQ_INSERT_TAIL(&accel_ch->batch_pool, batch, link); } } - - TAILQ_INSERT_TAIL(&accel_ch->task_pool, accel_task, link); } /* Accel framework public API for discovering current engine capabilities. */ @@ -308,6 +314,7 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u accel_task->dst = (void *)dst; accel_task->src = src; + accel_task->v.iovcnt = 0; accel_task->seed = seed; accel_task->nbytes = nbytes; accel_task->op_code = ACCEL_OPCODE_CRC32C; @@ -321,19 +328,77 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src, u } } +static void +crc32cv_done(void *cb_arg, int status) +{ + struct spdk_accel_task *accel_task = cb_arg; + struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch); + + assert(accel_task->chained.cb_fn != NULL); + assert(accel_task->chained.cb_arg != NULL); + + if (spdk_likely(!status)) { + status = spdk_accel_submit_crc32cv(ch, accel_task->dst, ++accel_task->v.iovs, + accel_task->v.iovcnt - 1, ~(*((uint32_t *)accel_task->dst)), + accel_task->chained.cb_fn, accel_task->chained.cb_arg); + if (spdk_likely(!status)) { + return; + } + } + + accel_task->chained.cb_fn(accel_task->chained.cb_arg, status); +} + /* Accel framework public API for chained CRC-32C function */ int spdk_accel_submit_crc32cv(struct spdk_io_channel *ch, uint32_t *dst, struct iovec *iov, uint32_t iov_cnt, uint32_t seed, spdk_accel_completion_cb cb_fn, void *cb_arg) { + struct accel_io_channel *accel_ch; + struct spdk_accel_task *accel_task; + if (iov == NULL) { SPDK_ERRLOG("iov should not be NULL"); return -EINVAL; } - assert(iov_cnt == 1); + if (!iov_cnt) { + SPDK_ERRLOG("iovcnt should not be zero value\n"); + return -EINVAL; + } - return spdk_accel_submit_crc32c(ch, dst, iov[0].iov_base, seed, iov[0].iov_len, cb_fn, cb_arg); + if (iov_cnt == 1) { + return spdk_accel_submit_crc32c(ch, dst, iov[0].iov_base, seed, iov[0].iov_len, cb_fn, cb_arg); + } + + accel_ch = spdk_io_channel_get_ctx(ch); + accel_task = _get_task(accel_ch, NULL, cb_fn, cb_arg); + if (accel_task == NULL) { + SPDK_ERRLOG("no memory\n"); + assert(0); + return -ENOMEM; + } + + accel_task->v.iovs = iov; + accel_task->v.iovcnt = iov_cnt; + accel_task->dst = (void *)dst; + accel_task->op_code = ACCEL_OPCODE_CRC32C; + + if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) { + accel_task->cb_fn = crc32cv_done; + accel_task->cb_arg = accel_task; + accel_task->chained.cb_fn = cb_fn; + accel_task->chained.cb_arg = cb_arg; + + accel_task->src = iov[0].iov_base; + accel_task->nbytes = iov[0].iov_len; + + return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task); + } else { + _sw_accel_crc32cv(dst, iov, iov_cnt, seed); + spdk_accel_task_complete(accel_task, 0); + return 0; + } } /* Accel framework public API for getting max operations for a batch. */ @@ -474,6 +539,7 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch accel_task->dst = dst; accel_task->src = src; + accel_task->v.iovcnt = 0; accel_task->seed = seed; accel_task->nbytes = nbytes; accel_task->op_code = ACCEL_OPCODE_CRC32C; @@ -487,21 +553,81 @@ spdk_accel_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch return 0; } +static void +batched_crc32cv_done(void *cb_arg, int status) +{ + struct spdk_accel_task *accel_task = cb_arg; + struct spdk_io_channel *ch = spdk_io_channel_from_ctx(accel_task->accel_ch); + struct spdk_accel_batch *batch; + + batch = accel_task->batch; + assert(batch != NULL); + assert(accel_task->chained.cb_fn != NULL); + assert(accel_task->chained.cb_arg != NULL); + + if (spdk_likely(!status)) { + status = spdk_accel_batch_prep_crc32cv(ch, batch, accel_task->dst, + ++accel_task->v.iovs, accel_task->v.iovcnt - 1, ~(*((uint32_t *)accel_task->dst)), + accel_task->chained.cb_fn, accel_task->chained.cb_arg); + if (spdk_likely(!status)) { + return; + } + } + + accel_task->chained.cb_fn(accel_task->chained.cb_arg, status); +} + int spdk_accel_batch_prep_crc32cv(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, uint32_t *dst, struct iovec *iovs, uint32_t iov_cnt, uint32_t seed, spdk_accel_completion_cb cb_fn, void *cb_arg) { + struct accel_io_channel *accel_ch; + struct spdk_accel_task *accel_task; + if (iovs == NULL) { SPDK_ERRLOG("iovs should not be NULL\n"); return -EINVAL; } - assert(iov_cnt == 1); + if (iov_cnt == 0) { + SPDK_ERRLOG("iovcnt should not be zero value\n"); + return -EINVAL; + } - return spdk_accel_batch_prep_crc32c(ch, batch, dst, iovs[0].iov_base, seed, iovs[0].iov_len, cb_fn, - cb_arg); + if (iov_cnt == 1) { + return spdk_accel_batch_prep_crc32c(ch, batch, dst, iovs[0].iov_base, seed, iovs[0].iov_len, cb_fn, + cb_arg); + } + accel_ch = spdk_io_channel_get_ctx(ch); + accel_task = _get_task(accel_ch, batch, cb_fn, cb_arg); + if (accel_task == NULL) { + return -ENOMEM; + } + + accel_task->v.iovs = iovs; + accel_task->v.iovcnt = iov_cnt; + accel_task->dst = dst; + accel_task->seed = seed; + accel_task->op_code = ACCEL_OPCODE_CRC32C; + + if (_is_supported(accel_ch->engine, ACCEL_CRC32C)) { + accel_task->cb_arg = accel_task; + accel_task->cb_fn = batched_crc32cv_done; + accel_task->cb_arg = accel_task; + accel_task->chained.cb_fn = cb_fn; + accel_task->chained.cb_arg = cb_arg; + + accel_task->src = iovs[0].iov_base; + accel_task->nbytes = iovs[0].iov_len; + + TAILQ_INSERT_TAIL(&batch->hw_tasks, accel_task, link); + } else { + TAILQ_INSERT_TAIL(&batch->sw_tasks, accel_task, link); + } + + return 0; } /* Accel framework public API for batch_create function. */ @@ -583,8 +709,12 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat batch->status |= rc; break; case ACCEL_OPCODE_CRC32C: - _sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed, - accel_task->nbytes); + if (accel_task->v.iovcnt == 0) { + _sw_accel_crc32c(accel_task->dst, accel_task->src, accel_task->seed, + accel_task->nbytes); + } else { + _sw_accel_crc32cv(accel_task->dst, accel_task->v.iovs, accel_task->v.iovcnt, accel_task->seed); + } spdk_accel_task_complete(accel_task, 0); break; case ACCEL_OPCODE_DUALCAST: @@ -836,6 +966,20 @@ _sw_accel_crc32c(uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes) *dst = spdk_crc32c_update(src, nbytes, ~seed); } +static void +_sw_accel_crc32cv(uint32_t *dst, struct iovec *iov, uint32_t iovcnt, uint32_t seed) +{ + uint32_t i, crc32c = ~seed; + + for (i = 0; i < iovcnt; i++) { + assert(iov[i].iov_base != NULL); + assert(iov[i].iov_len != 0); + crc32c = spdk_crc32c_update(iov[i].iov_base, iov[i].iov_len, crc32c); + } + + *dst = crc32c; +} + static struct spdk_io_channel *sw_accel_get_io_channel(void); static uint32_t