lib/accel: eliminate need for caller to schedule completions

Previously the accel_perf tool would look at whether it had HW
or SW commands to know whether to execute the callback right away
or schdule to avoid blowing the stack (SW calls are sync).

Moved this to the SW module (part of the accel engine) so the
caller doesn't have to worry about.  Allowed for a few simplifcations
in the tool as well.

Also, instead of using send_msg to call the completion we add it to
a list in the sw module that a new poller uses to perform the
completions as this is more efficient the sending a message to the
same thread.

Signed-off-by: paul luse <paul.e.luse@intel.com>
Change-Id: Ifc6c5b8635f51e3fa1a825c8573378b3752d7d91
Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/9171
Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com>
Community-CI: Mellanox Build Bot
Tested-by: SPDK CI Jenkins <sys_sgci@intel.com>
Reviewed-by: Ben Walker <benjamin.walker@intel.com>
Reviewed-by: Ziye Yang <ziye.yang@intel.com>
Reviewed-by: Jim Harris <james.r.harris@intel.com>
This commit is contained in:
paul luse 2021-08-13 02:12:47 -04:00 committed by Tomasz Zawadzki
parent 31079d2d94
commit df42f3582b
4 changed files with 72 additions and 57 deletions

View File

@ -44,7 +44,6 @@
#define DATA_PATTERN 0x5a
#define ALIGN_4K 0x1000
static bool g_using_sw_engine = false;
static uint64_t g_tsc_rate;
static uint64_t g_tsc_end;
static int g_rc;
@ -84,13 +83,11 @@ struct ap_task {
void *dst2;
uint32_t crc_dst;
struct worker_thread *worker;
int status;
int expected_status; /* used for the compare operation */
TAILQ_ENTRY(ap_task) link;
};
struct accel_batch {
int status;
int cmd_count;
struct spdk_accel_batch *batch;
struct worker_thread *worker;
@ -493,7 +490,6 @@ _free_task_buffers(struct ap_task *task)
}
}
static void _batch_done(void *cb_arg);
static void
_build_batch(struct worker_thread *worker, struct ap_task *task)
{
@ -562,16 +558,17 @@ _drain_batch(struct worker_thread *worker)
}
static void
_batch_done(void *cb_arg)
batch_done(void *arg1, int status)
{
struct accel_batch *worker_batch = (struct accel_batch *)cb_arg;
struct accel_batch *worker_batch = (struct accel_batch *)arg1;
struct worker_thread *worker = worker_batch->worker;
int rc;
assert(worker);
assert(TAILQ_EMPTY(&worker->in_use_batches) == 0);
if (worker_batch->status) {
SPDK_ERRLOG("error %d\n", worker_batch->status);
if (status) {
SPDK_ERRLOG("error %d\n", status);
}
worker->current_queue_depth--;
@ -603,17 +600,6 @@ _batch_done(void *cb_arg)
}
}
static void
batch_done(void *cb_arg, int status)
{
struct accel_batch *worker_batch = (struct accel_batch *)cb_arg;
assert(worker_batch->worker);
worker_batch->status = status;
spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch);
}
static int
_vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt)
{
@ -637,7 +623,7 @@ _vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt)
}
static void
_accel_done(void *arg1)
accel_done(void *arg1, int status)
{
struct ap_task *task = arg1;
struct worker_thread *worker = task->worker;
@ -646,7 +632,7 @@ _accel_done(void *arg1)
assert(worker);
assert(worker->current_queue_depth > 0);
if (g_verify && task->status == 0) {
if (g_verify && status == 0) {
switch (g_workload_selection) {
case ACCEL_COPY_CRC32C:
sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed);
@ -697,9 +683,9 @@ _accel_done(void *arg1)
}
if (task->expected_status == -EILSEQ) {
assert(task->status != 0);
assert(status != 0);
worker->injected_miscompares++;
} else if (task->status) {
} else if (status) {
/* Expected to pass but the accel engine reported an error (ex: COMPARE operation). */
worker->xfer_failed++;
}
@ -978,22 +964,6 @@ error:
spdk_app_stop(-1);
}
static void
accel_done(void *cb_arg, int status)
{
struct ap_task *task = (struct ap_task *)cb_arg;
struct worker_thread *worker = task->worker;
assert(worker);
task->status = status;
if (g_using_sw_engine == false) {
_accel_done(task);
} else {
spdk_thread_send_msg(worker->thread, _accel_done, task);
}
}
static inline void
identify_accel_engine_usage(void)
{
@ -1005,7 +975,6 @@ identify_accel_engine_usage(void)
capabilities = spdk_accel_get_capabilities(ch);
if ((capabilities & g_workload_selection) != g_workload_selection) {
g_using_sw_engine = true;
SPDK_WARNLOG("The selected workload is not natively supported by the current engine\n");
SPDK_WARNLOG("The software engine will be used instead.\n\n");
}

View File

@ -53,6 +53,11 @@ struct accel_io_channel {
TAILQ_HEAD(, spdk_accel_batch) batches;
};
struct sw_accel_io_channel {
struct spdk_poller *completion_poller;
TAILQ_HEAD(, spdk_accel_task) tasks_to_complete;
};
struct spdk_accel_batch {
/* Lists of commands in the batch. */
TAILQ_HEAD(, spdk_accel_task) hw_tasks;
@ -104,8 +109,8 @@ struct spdk_accel_task {
uint32_t *crc_dst;
enum accel_opcode op_code;
uint64_t nbytes;
int status;
TAILQ_ENTRY(spdk_accel_task) link;
uint8_t offload_ctx[0]; /* Not currently used. */
};
struct spdk_accel_engine {

View File

@ -34,7 +34,7 @@
SPDK_ROOT_DIR := $(abspath $(CURDIR)/../..)
include $(SPDK_ROOT_DIR)/mk/spdk.common.mk
SO_VER := 6
SO_VER := 7
SO_MINOR := 0
SO_SUFFIX := $(SO_VER).$(SO_MINOR)

View File

@ -184,6 +184,17 @@ _get_task(struct accel_io_channel *accel_ch, struct spdk_accel_batch *batch,
return accel_task;
}
/* Post SW completions to a list and complete in a poller as we don't want to
* complete them on the caller's stack as they'll likely submit another. */
inline static void
_add_to_comp_list(struct accel_io_channel *accel_ch, struct spdk_accel_task *accel_task, int status)
{
struct sw_accel_io_channel *sw_ch = spdk_io_channel_get_ctx(accel_ch->engine_ch);
accel_task->status = status;
TAILQ_INSERT_TAIL(&sw_ch->tasks_to_complete, accel_task, link);
}
/* Accel framework public API for copy function */
int
spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
@ -206,7 +217,7 @@ spdk_accel_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_copy(dst, src, nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -239,7 +250,7 @@ spdk_accel_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, v
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_dualcast(dst1, dst2, src, nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -267,7 +278,7 @@ spdk_accel_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2, ui
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
rc = _sw_accel_compare(src1, src2, nbytes);
spdk_accel_task_complete(accel_task, rc);
_add_to_comp_list(accel_ch, accel_task, rc);
return 0;
}
}
@ -294,7 +305,7 @@ spdk_accel_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill, uint
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_fill(dst, fill, nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -323,7 +334,7 @@ spdk_accel_submit_crc32c(struct spdk_io_channel *ch, uint32_t *crc_dst, void *sr
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_crc32c(crc_dst, src, seed, nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -405,7 +416,7 @@ spdk_accel_submit_crc32cv(struct spdk_io_channel *ch, uint32_t *crc_dst, struct
return accel_ch->engine->submit_tasks(accel_ch->engine_ch, accel_task);
} else {
_sw_accel_crc32cv(crc_dst, iov, iov_cnt, seed);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -437,7 +448,7 @@ spdk_accel_submit_copy_crc32c(struct spdk_io_channel *ch, void *dst, void *src,
} else {
_sw_accel_copy(dst, src, nbytes);
_sw_accel_crc32c(crc_dst, src, seed, nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -493,7 +504,7 @@ spdk_accel_submit_copy_crc32cv(struct spdk_io_channel *ch, void *dst, struct iov
} else {
_sw_accel_copy(dst, src_iovs[0].iov_base, src_iovs[0].iov_len);
_sw_accel_crc32cv(crc_dst, src_iovs, iov_cnt, seed);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
return 0;
}
}
@ -826,15 +837,15 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat
switch (accel_task->op_code) {
case ACCEL_OPCODE_MEMMOVE:
_sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
break;
case ACCEL_OPCODE_MEMFILL:
_sw_accel_fill(accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
break;
case ACCEL_OPCODE_COMPARE:
rc = _sw_accel_compare(accel_task->src, accel_task->src2, accel_task->nbytes);
spdk_accel_task_complete(accel_task, rc);
_add_to_comp_list(accel_ch, accel_task, rc);
batch->status |= rc;
break;
case ACCEL_OPCODE_CRC32C:
@ -844,17 +855,17 @@ spdk_accel_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *bat
} else {
_sw_accel_crc32cv(accel_task->crc_dst, accel_task->v.iovs, accel_task->v.iovcnt, accel_task->seed);
}
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
break;
case ACCEL_OPCODE_COPY_CRC32C:
_sw_accel_copy(accel_task->dst, accel_task->src, accel_task->nbytes);
_sw_accel_crc32c(accel_task->crc_dst, accel_task->src, accel_task->seed, accel_task->nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
break;
case ACCEL_OPCODE_DUALCAST:
_sw_accel_dualcast(accel_task->dst, accel_task->dst2, accel_task->src,
accel_task->nbytes);
spdk_accel_task_complete(accel_task, 0);
_add_to_comp_list(accel_ch, accel_task, 0);
break;
default:
assert(false);
@ -1120,15 +1131,45 @@ static struct spdk_accel_engine sw_accel_engine = {
.batch_get_max = sw_accel_batch_get_max,
};
static int
accel_comp_poll(void *arg)
{
struct sw_accel_io_channel *sw_ch = arg;
TAILQ_HEAD(, spdk_accel_task) tasks_to_complete;
struct spdk_accel_task *accel_task;
if (TAILQ_EMPTY(&sw_ch->tasks_to_complete)) {
return SPDK_POLLER_IDLE;
}
TAILQ_INIT(&tasks_to_complete);
TAILQ_SWAP(&tasks_to_complete, &sw_ch->tasks_to_complete, spdk_accel_task, link);
while ((accel_task = TAILQ_FIRST(&tasks_to_complete))) {
TAILQ_REMOVE(&tasks_to_complete, accel_task, link);
spdk_accel_task_complete(accel_task, accel_task->status);
}
return SPDK_POLLER_BUSY;
}
static int
sw_accel_create_cb(void *io_device, void *ctx_buf)
{
struct sw_accel_io_channel *sw_ch = ctx_buf;
TAILQ_INIT(&sw_ch->tasks_to_complete);
sw_ch->completion_poller = SPDK_POLLER_REGISTER(accel_comp_poll, sw_ch, 0);
return 0;
}
static void
sw_accel_destroy_cb(void *io_device, void *ctx_buf)
{
struct sw_accel_io_channel *sw_ch = ctx_buf;
spdk_poller_unregister(&sw_ch->completion_poller);
}
static struct spdk_io_channel *sw_accel_get_io_channel(void)
@ -1147,7 +1188,7 @@ sw_accel_engine_init(void)
{
accel_sw_register(&sw_accel_engine);
spdk_io_device_register(&sw_accel_engine, sw_accel_create_cb, sw_accel_destroy_cb,
0, "sw_accel_engine");
sizeof(struct sw_accel_io_channel), "sw_accel_engine");
return 0;
}