accel: Move non-engine specific batch to the accel_fw layer

The new design: * Supports a generic batching capability in the accel_fw layer that keeps track of hw accelerated vs sw commands based on the capabilities of the engine and processes sw commands in the generic layer while sending a list of commands (not a batch) to the engines for processing. * Batch completions are managed via the generic layer, when using the accel_fw the engines only process commands. With DSA however, if a list of commands is sent down it will use the DSA public API to create and send batches but will rely on the generic layer to complete the batch task itself. When using DSA directly, batching works as usual (DSA handles batch completion). * The engine function tables were greatly simplified by replacing all of the individual entries (copy, fill, crc32c, etc) with one `submit_tasks` function that is used to both send lists of tasks for batches or just one task for single shot API. * Internally batching is now used to re-submit tasks that were queued for flow control reasons. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: I99c28751df32017c43490a90f4904bdabe79a270 Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/3555 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com> Community-CI: Mellanox Build Bot
2020-07-28 13:51:20 -04:00 · 2020-07-28 13:51:20 -04:00 · 17bb748a60
commit 17bb748a60
parent 8c3e71f0f9
7 changed files with 723 additions and 1429 deletions
--- a/include/spdk/accel_engine.h
+++ b/include/spdk/accel_engine.h
@ -49,9 +49,8 @@ enum accel_capability {
 	ACCEL_FILL		= 1 << 1,
 	ACCEL_DUALCAST		= 1 << 2,
 	ACCEL_COMPARE		= 1 << 3,
-	ACCEL_BATCH		= 1 << 4,
-	ACCEL_CRC32C		= 1 << 5,
-	ACCEL_DIF		= 1 << 6,
+	ACCEL_CRC32C		= 1 << 4,
+	ACCEL_DIF		= 1 << 5,
 };

 /**
@ -69,10 +68,6 @@ typedef void (*spdk_accel_completion_cb)(void *ref, int status);
 */
 typedef void (*spdk_accel_fini_cb)(void *cb_arg);

-struct spdk_io_channel;
-
-struct spdk_accel_batch;
-
 /**
 * Initialize the acceleration engine.
 *
@ -103,11 +98,11 @@ void spdk_accel_engine_module_finish(void);
 struct spdk_io_channel *spdk_accel_engine_get_io_channel(void);

 /**
- * Retrieve accel engine capabilities.
+ * Retrieve accel engine HW acceleration capabilities.
 *
 * \param ch I/O channel associated with this call.
 *
- * \return bitmap of capabilities defined by enum accel_capability.
+ * \return bitmap of HW acceleration capabilities defined by enum accel_capability.
 */
 uint64_t spdk_accel_get_capabilities(struct spdk_io_channel *ch);

--- a/include/spdk_internal/accel_engine.h
+++ b/include/spdk_internal/accel_engine.h
@ -39,44 +39,67 @@
 #include "spdk/accel_engine.h"
 #include "spdk/queue.h"

-struct spdk_accel_task {
-	spdk_accel_completion_cb	cb;
+struct spdk_accel_task;
+
+void spdk_accel_task_complete(struct spdk_accel_task *task, int status);
+
+struct accel_io_channel {
+	struct spdk_accel_engine	*engine;
+	struct spdk_io_channel		*engine_ch;
+	void				*task_pool_base;
+	TAILQ_HEAD(, spdk_accel_task)	task_pool;
+	void				*batch_pool_base;
+	TAILQ_HEAD(, spdk_accel_batch)	batch_pool;
+	TAILQ_HEAD(, spdk_accel_batch)	batches;
+};
+
+struct spdk_accel_batch {
+	/* Lists of commands in the batch. */
+	TAILQ_HEAD(, spdk_accel_task)	hw_tasks;
+	TAILQ_HEAD(, spdk_accel_task)	sw_tasks;
+	/* Specific to the batch task itself. */
+	int				status;
+	uint32_t			count;
+	spdk_accel_completion_cb	cb_fn;
 	void				*cb_arg;
 	struct accel_io_channel		*accel_ch;
+	TAILQ_ENTRY(spdk_accel_batch)	link;
+};
+
+enum accel_opcode {
+	ACCEL_OPCODE_MEMMOVE	= 0,
+	ACCEL_OPCODE_MEMFILL	= 1,
+	ACCEL_OPCODE_COMPARE	= 2,
+	ACCEL_OPCODE_BATCH	= 3,
+	ACCEL_OPCODE_CRC32C	= 4,
+	ACCEL_OPCODE_DUALCAST	= 5,
+};
+
+struct spdk_accel_task {
+	struct accel_io_channel		*accel_ch;
+	struct spdk_accel_batch		*batch;
+	spdk_accel_completion_cb	cb_fn;
+	void				*cb_arg;
+	void				*src;
+	union {
+		void			*dst;
+		void			*src2;
+	};
+	void				*dst2;
+	uint32_t			seed;
+	uint64_t			fill_pattern;
+	enum accel_opcode		op_code;
+	uint64_t			nbytes;
 	TAILQ_ENTRY(spdk_accel_task)	link;
-	uint8_t				offload_ctx[0];
+	uint8_t				offload_ctx[0]; /* Not currently used. */
 };

 struct spdk_accel_engine {
+	uint64_t capabilities;
 	uint64_t (*get_capabilities)(void);
-	int (*copy)(struct spdk_io_channel *ch, void *dst, void *src,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*dualcast)(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-			uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	uint32_t (*batch_get_max)(void);
-	struct spdk_accel_batch *(*batch_create)(struct spdk_io_channel *ch);
-	int (*batch_prep_copy)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_dualcast)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				   void *dst1, void *dst2, void *src, uint64_t nbytes,
-				   spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_compare)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				  void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_fill)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			       void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_prep_crc32c)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-				 uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-				 spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_submit)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-			    spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*batch_cancel)(struct spdk_io_channel *ch, struct spdk_accel_batch *batch);
-	int (*compare)(struct spdk_io_channel *ch, void *src1, void *src2,
-		       uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*fill)(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
-	int (*crc32c)(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		      uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg);
 	struct spdk_io_channel *(*get_io_channel)(void);
+	uint32_t (*batch_get_max)(struct spdk_io_channel *ch);
+	int (*submit_tasks)(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task);
 };

 struct spdk_accel_module_if {
--- a/lib/accel/accel_engine.c
+++ b/lib/accel/accel_engine.c
--- a/lib/accel/spdk_accel.map
+++ b/lib/accel/spdk_accel.map
@ -26,7 +26,7 @@
 	# functions needed by modules
 	spdk_accel_hw_engine_register;
 	spdk_accel_module_list_add;
-
+	spdk_accel_task_complete;

 	local: *;
 };
--- a/mk/spdk.lib_deps.mk
+++ b/mk/spdk.lib_deps.mk
@ -107,7 +107,7 @@ DEPDIRS-blobfs_bdev += event
 endif

 # module/accel
-DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel util
+DEPDIRS-accel_ioat := log ioat thread $(JSON_LIBS) accel
 DEPDIRS-accel_idxd := log idxd thread $(JSON_LIBS) accel

 # module/env_dpdk
--- a/module/accel/idxd/accel_engine_idxd.c
+++ b/module/accel/idxd/accel_engine_idxd.c
@ -46,10 +46,9 @@
 #include "spdk/util.h"
 #include "spdk/json.h"

-#define ALIGN_4K 0x1000
-
 static bool g_idxd_enable = false;
 uint32_t g_config_number;
+static uint32_t g_batch_max;

 enum channel_state {
 	IDXD_CHANNEL_ACTIVE,
@ -73,35 +72,13 @@ struct idxd_device {
 static TAILQ_HEAD(, idxd_device) g_idxd_devices = TAILQ_HEAD_INITIALIZER(g_idxd_devices);
 static struct idxd_device *g_next_dev = NULL;

-struct idxd_op {
-	struct spdk_idxd_io_channel	*chan;
-	void				*cb_arg;
-	spdk_idxd_req_cb		cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	uint32_t			op_code;
-	uint64_t			nbytes;
-	struct idxd_batch		*batch;
-	TAILQ_ENTRY(idxd_op)		link;
-};
-
 struct idxd_io_channel {
 	struct spdk_idxd_io_channel	*chan;
 	struct spdk_idxd_device		*idxd;
 	struct idxd_device		*dev;
 	enum channel_state		state;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, idxd_op)		queued_ops;
-};
-
-struct idxd_task {
-	spdk_accel_completion_cb	cb;
+	TAILQ_HEAD(, spdk_accel_task)	queued_tasks;
 };

 pthread_mutex_t g_configuration_lock = PTHREAD_MUTEX_INITIALIZER;
@ -123,12 +100,174 @@ idxd_select_device(void)
 	return g_next_dev;
 }

+static void
+idxd_done(void *cb_arg, int status)
+{
+	struct spdk_accel_task *accel_task = cb_arg;
+
+	spdk_accel_task_complete(accel_task, status);
+}
+
+static int
+_process_single_task(struct spdk_io_channel *ch, struct spdk_accel_task *task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	int rc = 0;
+
+	switch (task->op_code) {
+	case ACCEL_OPCODE_MEMMOVE:
+		rc = spdk_idxd_submit_copy(chan->chan, task->dst, task->src, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_DUALCAST:
+		rc = spdk_idxd_submit_dualcast(chan->chan, task->dst, task->dst2, task->src, task->nbytes,
+					       idxd_done, task);
+		break;
+	case ACCEL_OPCODE_COMPARE:
+		rc = spdk_idxd_submit_compare(chan->chan, task->src, task->src2, task->nbytes, idxd_done, task);
+		break;
+	case ACCEL_OPCODE_MEMFILL:
+		rc = spdk_idxd_submit_fill(chan->chan, task->dst, task->fill_pattern, task->nbytes, idxd_done,
+					   task);
+		break;
+	case ACCEL_OPCODE_CRC32C:
+		rc = spdk_idxd_submit_crc32c(chan->chan, task->dst, task->src, task->seed, task->nbytes, idxd_done,
+					     task);
+		break;
+	default:
+		assert(false);
+		rc = -EINVAL;
+		break;
+	}
+
+	return rc;
+}
+
+static int
+idxd_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *first_task)
+{
+	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
+	struct spdk_accel_task *task, *tmp, *batch_task;
+	struct idxd_batch *idxd_batch;
+	TAILQ_HEAD(, spdk_accel_task) batch_tasks;
+	int rc = 0;
+	uint32_t task_count = 0;
+
+	task = first_task;
+
+	if (chan->state == IDXD_CHANNEL_PAUSED) {
+		goto queue_tasks;
+	} else if (chan->state == IDXD_CHANNEL_ERROR) {
+		while (task) {
+			tmp = TAILQ_NEXT(task, link);
+			spdk_accel_task_complete(task, -EINVAL);
+			task = tmp;
+		}
+		return 0;
+	}
+
+	/* If this is just a single task handle it here. */
+	if (!TAILQ_NEXT(task, link)) {
+		rc = _process_single_task(ch, task);
+
+		if (rc == -EBUSY) {
+			goto queue_tasks;
+		} else if (rc) {
+			spdk_accel_task_complete(task, rc);
+		}
+
+		return 0;
+	}
+
+	/* More than one task, create IDXD batch(es). */
+	do {
+		idxd_batch = spdk_idxd_batch_create(chan->chan);
+		task_count = 0;
+		if (idxd_batch == NULL) {
+			/* Queue them all and try again later */
+			goto queue_tasks;
+		}
+
+		/* Keep track of each batch's tasks in case we need to cancel. */
+		TAILQ_INIT(&batch_tasks);
+		do {
+			switch (task->op_code) {
+			case ACCEL_OPCODE_MEMMOVE:
+				rc = spdk_idxd_batch_prep_copy(chan->chan, idxd_batch, task->dst, task->src, task->nbytes,
+							       idxd_done, task);
+				break;
+			case ACCEL_OPCODE_DUALCAST:
+				rc = spdk_idxd_batch_prep_dualcast(chan->chan, idxd_batch, task->dst, task->dst2,
+								   task->src, task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_COMPARE:
+				rc = spdk_idxd_batch_prep_compare(chan->chan, idxd_batch, task->src, task->src2,
+								  task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_MEMFILL:
+				rc = spdk_idxd_batch_prep_fill(chan->chan, idxd_batch, task->dst, task->fill_pattern,
+							       task->nbytes, idxd_done, task);
+				break;
+			case ACCEL_OPCODE_CRC32C:
+				rc = spdk_idxd_batch_prep_crc32c(chan->chan, idxd_batch, task->dst, task->src,
+								 task->seed, task->nbytes, idxd_done, task);
+				break;
+			default:
+				assert(false);
+				break;
+			}
+
+			tmp = TAILQ_NEXT(task, link);
+
+			if (rc == 0) {
+				TAILQ_INSERT_TAIL(&batch_tasks, task, link);
+			} else {
+				assert(rc != -EBUSY);
+				spdk_accel_task_complete(task, rc);
+			}
+
+			task_count++;
+			task = tmp;
+		} while (task && task_count < g_batch_max);
+
+		if (!TAILQ_EMPTY(&batch_tasks)) {
+			rc = spdk_idxd_batch_submit(chan->chan, idxd_batch, NULL, NULL);
+
+			/* If we can't submit the batch, just destroy it and queue up all the operations
+			 * from the latest batch and try again later. If this list was from an accel_fw batch,
+			 * all of the batch info is still associated with the tasks that we're about to
+			 * queue up so nothing is lost.
+			 */
+			if (rc) {
+				spdk_idxd_batch_cancel(chan->chan, idxd_batch);
+				while (!TAILQ_EMPTY(&batch_tasks)) {
+					batch_task = TAILQ_FIRST(&batch_tasks);
+					TAILQ_REMOVE(&batch_tasks, batch_task, link);
+					TAILQ_INSERT_TAIL(&chan->queued_tasks, batch_task, link);
+				}
+				rc = 0;
+			}
+		} else {
+			/* the last batch task list was empty so all tasks had their cb_fn called. */
+			rc = 0;
+		}
+	} while (task && rc == 0);
+
+	return 0;
+
+queue_tasks:
+	while (task != NULL) {
+		tmp = TAILQ_NEXT(task, link);
+		TAILQ_INSERT_TAIL(&chan->queued_tasks, task, link);
+		task = tmp;
+	}
+	return 0;
+}
+
 static int
 idxd_poll(void *arg)
 {
 	struct idxd_io_channel *chan = arg;
-	struct idxd_op *op = NULL;
-	int rc;
+	struct spdk_accel_task *task = NULL;

 	spdk_idxd_process_events(chan->chan);

@ -137,45 +276,13 @@ idxd_poll(void *arg)
 		return -1;
 	}

-	while (!TAILQ_EMPTY(&chan->queued_ops)) {
-		op = TAILQ_FIRST(&chan->queued_ops);
+	/* Submit queued tasks */
+	if (!TAILQ_EMPTY(&chan->queued_tasks)) {
+		task = TAILQ_FIRST(&chan->queued_tasks);

-		switch (op->op_code) {
-		case IDXD_OPCODE_MEMMOVE:
-			rc = spdk_idxd_submit_copy(op->chan, op->dst, op->src, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_DUALCAST:
-			rc = spdk_idxd_submit_dualcast(op->chan, op->dst, op->dst2, op->src, op->nbytes,
-						       op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_COMPARE:
-			rc = spdk_idxd_submit_compare(op->chan, op->src, op->src2, op->nbytes,
-						      op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_MEMFILL:
-			rc = spdk_idxd_submit_fill(op->chan, op->dst, op->fill_pattern, op->nbytes,
-						   op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_CRC32C_GEN:
-			rc = spdk_idxd_submit_crc32c(op->chan, op->dst, op->src, op->seed, op->nbytes,
-						     op->cb_fn, op->cb_arg);
-			break;
-		case IDXD_OPCODE_BATCH:
-			rc = spdk_idxd_batch_submit(op->chan, op->batch, op->cb_fn, op->cb_arg);
-			break;
-		default:
-			/* Should never get here */
-			assert(false);
-			break;
-		}
-		if (rc == 0) {
-			TAILQ_REMOVE(&chan->queued_ops, op, link);
-			free(op);
-		} else {
-			/* Busy, resubmit to try again later */
-			break;
-		}
+		TAILQ_INIT(&chan->queued_tasks);
+
+		idxd_submit_tasks(task->accel_ch->engine_ch, task);
 	}

 	return -1;
@ -184,403 +291,27 @@ idxd_poll(void *arg)
 static size_t
 accel_engine_idxd_get_ctx_size(void)
 {
-	return sizeof(struct idxd_task) + sizeof(struct spdk_accel_task);
-}
-
-static void
-idxd_done(void *cb_arg, int status)
-{
-	struct spdk_accel_task *accel_task;
-	struct idxd_task *idxd_task = cb_arg;
-
-	accel_task = SPDK_CONTAINEROF(idxd_task, struct spdk_accel_task,
-				      offload_ctx);
-
-	idxd_task->cb(accel_task, status);
-}
-
-static struct idxd_op *
-_prep_queue_command(struct idxd_io_channel *chan, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_op *op_to_queue;
-
-	op_to_queue = calloc(1, sizeof(struct idxd_op));
-	if (op_to_queue == NULL) {
-		SPDK_ERRLOG("Failed to allocate operation for queueing\n");
-		return NULL;
-	}
-
-	op_to_queue->chan = chan->chan;
-	op_to_queue->cb_fn = cb_fn;
-	op_to_queue->cb_arg = cb_arg;
-
-	return op_to_queue;
-}
-
-static int
-idxd_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_copy(chan->chan, dst, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMMOVE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_dualcast(struct spdk_io_channel *ch, void *dst1, void *dst2, void *src,
-		     uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_dualcast(chan->chan, dst1, dst2, src, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst1;
-		op_to_queue->dst2 = dst2;
-		op_to_queue->src = src;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_DUALCAST;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_compare(struct spdk_io_channel *ch, void *src1, void *src2,
-		    uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_compare(chan->chan, src1, src2, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->src = src1;
-		op_to_queue->src2 = src2;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_COMPARE;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-	uint64_t fill_pattern;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_fill(chan->chan, dst, fill_pattern, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->fill_pattern = fill_pattern;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_MEMFILL;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_submit_crc32c(struct spdk_io_channel *ch, uint32_t *dst, void *src,
-		   uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_submit_crc32c(chan->chan, dst, src, seed, nbytes, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->dst = dst;
-		op_to_queue->src = src;
-		op_to_queue->seed = seed;
-		op_to_queue->nbytes = nbytes;
-		op_to_queue->op_code = IDXD_OPCODE_CRC32C_GEN;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
+	return 0;
 }

 static uint64_t
 idxd_get_capabilities(void)
 {
 	return ACCEL_COPY | ACCEL_FILL | ACCEL_CRC32C | ACCEL_COMPARE |
-	       ACCEL_DUALCAST | ACCEL_BATCH;
+	       ACCEL_DUALCAST;
 }

 static uint32_t
-idxd_batch_get_max(void)
+idxd_batch_get_max(struct spdk_io_channel *ch)
 {
 	return spdk_idxd_batch_get_max();
 }

-static struct spdk_accel_batch *
-idxd_batch_start(struct spdk_io_channel *ch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-
-	return (struct spdk_accel_batch *)spdk_idxd_batch_create(chan->chan);
-}
-
-static int
-idxd_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch)
-{
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	return spdk_idxd_batch_cancel(chan->chan, batch);
-}
-
-static int
-idxd_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-	int rc = 0;
-
-	idxd_task->cb = cb_fn;
-
-	if (chan->state == IDXD_CHANNEL_ACTIVE) {
-		rc = spdk_idxd_batch_submit(chan->chan, batch, idxd_done, idxd_task);
-	}
-
-	if (chan->state == IDXD_CHANNEL_PAUSED || rc == -EBUSY) {
-		struct idxd_op *op_to_queue;
-
-		/* Commpom prep. */
-		op_to_queue = _prep_queue_command(chan, idxd_done, idxd_task);
-		if (op_to_queue == NULL) {
-			return -ENOMEM;
-		}
-
-		/* Command specific. */
-		op_to_queue->batch = batch;
-		op_to_queue->op_code = IDXD_OPCODE_BATCH;
-
-		/* Queue the operation. */
-		TAILQ_INSERT_TAIL(&chan->queued_ops, op_to_queue, link);
-		return 0;
-
-	} else if (chan->state == IDXD_CHANNEL_ERROR) {
-		return -EINVAL;
-	}
-
-	return rc;
-}
-
-static int
-idxd_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_copy(chan->chan, batch, dst, src, nbytes,
-					 idxd_done, idxd_task);
-}
-
-static int
-idxd_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		     void *dst, uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	uint64_t fill_pattern;
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	return spdk_idxd_batch_prep_fill(chan->chan, batch, dst, fill_pattern, nbytes, idxd_done,
-					 idxd_task);
-}
-
-static int
-idxd_batch_prep_dualcast(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			 void *dst1, void *dst2, void *src, uint64_t nbytes,
-			 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_dualcast(chan->chan, batch, dst1, dst2, src, nbytes, idxd_done,
-					     idxd_task);
-}
-
-static int
-idxd_batch_prep_crc32c(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-		       uint32_t *dst, void *src, uint32_t seed, uint64_t nbytes,
-		       spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_crc32c(chan->chan, batch, dst, src, seed, nbytes, idxd_done,
-					   idxd_task);
-}
-
-static int
-idxd_batch_prep_compare(struct spdk_io_channel *ch, struct spdk_accel_batch *_batch,
-			void *src1, void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct idxd_task *idxd_task = (struct idxd_task *)cb_arg;
-	struct idxd_io_channel *chan = spdk_io_channel_get_ctx(ch);
-	struct idxd_batch *batch = (struct idxd_batch *)_batch;
-
-	idxd_task->cb = cb_fn;
-
-	return spdk_idxd_batch_prep_compare(chan->chan, batch, src1, src2, nbytes, idxd_done,
-					    idxd_task);
-}
-
 static struct spdk_accel_engine idxd_accel_engine = {
 	.get_capabilities	= idxd_get_capabilities,
-	.copy			= idxd_submit_copy,
-	.batch_get_max		= idxd_batch_get_max,
-	.batch_create		= idxd_batch_start,
-	.batch_cancel		= idxd_batch_cancel,
-	.batch_prep_copy	= idxd_batch_prep_copy,
-	.batch_prep_fill	= idxd_batch_prep_fill,
-	.batch_prep_dualcast	= idxd_batch_prep_dualcast,
-	.batch_prep_crc32c	= idxd_batch_prep_crc32c,
-	.batch_prep_compare	= idxd_batch_prep_compare,
-	.batch_submit		= idxd_batch_submit,
-	.dualcast		= idxd_submit_dualcast,
-	.compare		= idxd_submit_compare,
-	.fill			= idxd_submit_fill,
-	.crc32c			= idxd_submit_crc32c,
 	.get_io_channel		= idxd_get_io_channel,
+	.batch_get_max		= idxd_batch_get_max,
+	.submit_tasks		= idxd_submit_tasks,
 };

 /*
@ -652,7 +383,7 @@ idxd_create_cb(void *io_device, void *ctx_buf)

 	chan->dev = dev;
 	chan->poller = spdk_poller_register(idxd_poll, chan, 0);
-	TAILQ_INIT(&chan->queued_ops);
+	TAILQ_INIT(&chan->queued_tasks);

 	/*
 	 * Configure the channel but leave paused until all others
@ -792,6 +523,7 @@ accel_engine_idxd_init(void)
 	}

 	g_idxd_initialized = true;
+	g_batch_max = spdk_idxd_batch_get_max();
 	SPDK_NOTICELOG("Accel engine updated to use IDXD DSA engine.\n");
 	spdk_accel_hw_engine_register(&idxd_accel_engine);
 	spdk_io_device_register(&idxd_accel_engine, idxd_create_cb, idxd_destroy_cb,
--- a/module/accel/ioat/accel_engine_ioat.c
+++ b/module/accel/ioat/accel_engine_ioat.c
@ -42,36 +42,7 @@
 #include "spdk/event.h"
 #include "spdk/thread.h"
 #include "spdk/ioat.h"
-#include "spdk/crc32.h"

-#define ALIGN_4K 0x1000
-
-enum ioat_accel_opcode {
-	IOAT_ACCEL_OPCODE_MEMMOVE	= 0,
-	IOAT_ACCEL_OPCODE_MEMFILL	= 1,
-	IOAT_ACCEL_OPCODE_COMPARE	= 2,
-	IOAT_ACCEL_OPCODE_CRC32C	= 3,
-	IOAT_ACCEL_OPCODE_DUALCAST	= 4,
-};
-
-struct ioat_accel_op {
-	struct ioat_io_channel		*ioat_ch;
-	void				*cb_arg;
-	spdk_accel_completion_cb	cb_fn;
-	void				*src;
-	union {
-		void			*dst;
-		void			*src2;
-	};
-	void				*dst2;
-	uint32_t			seed;
-	uint64_t			fill_pattern;
-	enum ioat_accel_opcode		op_code;
-	uint64_t			nbytes;
-	TAILQ_ENTRY(ioat_accel_op)	link;
-};
-
-static int g_batch_size;
 static bool g_ioat_enable = false;
 static bool g_ioat_initialized = false;

@ -103,9 +74,6 @@ struct ioat_io_channel {
 	struct spdk_ioat_chan		*ioat_ch;
 	struct ioat_device		*ioat_dev;
 	struct spdk_poller		*poller;
-	TAILQ_HEAD(, ioat_accel_op)	op_pool;
-	TAILQ_HEAD(, ioat_accel_op)	sw_batch; /* for operations not hw accelerated */
-	bool				hw_batch; /* for operations that are hw accelerated */
 };

 static int
@ -149,17 +117,13 @@ ioat_free_device(struct ioat_device *dev)
 	pthread_mutex_unlock(&g_ioat_mutex);
 }

-struct ioat_task {
-	spdk_accel_completion_cb	cb;
-};
-
 static int accel_engine_ioat_init(void);
 static void accel_engine_ioat_exit(void *ctx);

 static size_t
 accel_engine_ioat_get_ctx_size(void)
 {
-	return sizeof(struct ioat_task) + sizeof(struct spdk_accel_task);
+	return 0;
 }

 SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
@ -168,43 +132,9 @@ SPDK_ACCEL_MODULE_REGISTER(accel_engine_ioat_init, accel_engine_ioat_exit,
 static void
 ioat_done(void *cb_arg)
 {
-	struct spdk_accel_task *accel_task;
-	struct ioat_task *ioat_task = cb_arg;
+	struct spdk_accel_task *accel_task = cb_arg;

-	accel_task = (struct spdk_accel_task *)
-		     ((uintptr_t)ioat_task -
-		      offsetof(struct spdk_accel_task, offload_ctx));
-
-	ioat_task->cb(accel_task, 0);
-}
-
-static int
-ioat_submit_copy(struct spdk_io_channel *ch, void *dst, void *src, uint64_t nbytes,
-		 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_submit_fill(struct spdk_io_channel *ch, void *dst, uint8_t fill,
-		 uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	uint64_t fill64 = 0x0101010101010101ULL * fill;
-
-	assert(ioat_ch->ioat_ch != NULL);
-
-	ioat_task->cb = cb_fn;
-
-	return spdk_ioat_submit_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill64, nbytes);
+	spdk_accel_task_complete(accel_task, 0);
 }

 static int
@ -218,266 +148,62 @@ ioat_poll(void *arg)

 static struct spdk_io_channel *ioat_get_io_channel(void);

-/*
- * The IOAT engine only supports these capabilities as hardware
- * accelerated. The accel fw will handle unsupported functions
- * by calling the software implementations of the functions.
- */
 static uint64_t
 ioat_get_capabilities(void)
 {
-	return ACCEL_COPY | ACCEL_FILL | ACCEL_BATCH;
+	return ACCEL_COPY | ACCEL_FILL;
 }

-/* The IOAT batch functions exposed by the accel fw do not match up 1:1
- * with the functions in the IOAT library. The IOAT library directly only
- * supports construction of accelerated functions via the IOAT native
- * interface.  The accel_fw batch capabilities are implemented here in the
- * plug-in and rely on either the IOAT library for accelerated commands
- * or software functions for non-accelerated.
- */
 static uint32_t
-ioat_batch_get_max(void)
-{
-	return g_batch_size;
-}
-
-static struct spdk_accel_batch *
-ioat_batch_create(struct spdk_io_channel *ch)
+ioat_batch_get_max(struct spdk_io_channel *ch)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);

-	if (!TAILQ_EMPTY(&ioat_ch->sw_batch) || (ioat_ch->hw_batch == true)) {
-		SPDK_ERRLOG("IOAT accel engine only supports one batch at a time.\n");
-		return NULL;
-	}
-
-	return (struct spdk_accel_batch *)&ioat_ch->hw_batch;
-}
-
-static struct ioat_accel_op *
-_prep_op(struct ioat_io_channel *ioat_ch, struct spdk_accel_batch *batch,
-	 spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return NULL;
-	}
-
-	if (!TAILQ_EMPTY(&ioat_ch->op_pool)) {
-		op = TAILQ_FIRST(&ioat_ch->op_pool);
-		TAILQ_REMOVE(&ioat_ch->op_pool, op, link);
-	} else {
-		SPDK_ERRLOG("Ran out of operations for batch\n");
-		return NULL;
-	}
-
-	op->cb_arg = cb_arg;
-	op->cb_fn = cb_fn;
-	op->ioat_ch = ioat_ch;
-
-	return op;
+	return spdk_ioat_get_max_descriptors(ioat_ch->ioat_dev->ioat);
 }

 static int
-ioat_batch_prep_copy(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		     void *dst, void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
+ioat_submit_tasks(struct spdk_io_channel *ch, struct spdk_accel_task *accel_task)
 {
 	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
+	struct spdk_accel_task *tmp;
+	int rc = 0;

-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_copy(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, src, nbytes);
-}
-
-static int
-ioat_batch_prep_fill(struct spdk_io_channel *ch, struct spdk_accel_batch *batch, void *dst,
-		     uint8_t fill, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct ioat_task *ioat_task = (struct ioat_task *)cb_arg;
-	uint64_t fill_pattern;
-
-	ioat_task->cb = cb_fn;
-	ioat_ch->hw_batch = true;
-	memset(&fill_pattern, fill, sizeof(uint64_t));
-
-	/* Call the IOAT library prep function. */
-	return spdk_ioat_build_fill(ioat_ch->ioat_ch, ioat_task, ioat_done, dst, fill_pattern, nbytes);
-}
-
-static int
-ioat_batch_prep_dualcast(struct spdk_io_channel *ch,
-			 struct spdk_accel_batch *batch, void *dst1, void *dst2,
-			 void *src, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((uintptr_t)dst1 & (ALIGN_4K - 1) || (uintptr_t)dst2 & (ALIGN_4K - 1)) {
-		SPDK_ERRLOG("Dualcast requires 4K alignment on dst addresses\n");
-		return -EINVAL;
-	}
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src;
-	op->dst = dst1;
-	op->dst2 = dst2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_DUALCAST;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_compare(struct spdk_io_channel *ch,
-			struct spdk_accel_batch *batch, void *src1,
-			void *src2, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->src = src1;
-	op->src2 = src2;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_COMPARE;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_prep_crc32c(struct spdk_io_channel *ch,
-		       struct spdk_accel_batch *batch, uint32_t *dst, void *src,
-		       uint32_t seed, uint64_t nbytes, spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	op = _prep_op(ioat_ch, batch, cb_fn, cb_arg);
-	if (op == NULL) {
-		return -EINVAL;
-	}
-
-	/* Command specific. */
-	op->dst = (void *)dst;
-	op->src = src;
-	op->seed = seed;
-	op->nbytes = nbytes;
-	op->op_code = IOAT_ACCEL_OPCODE_CRC32C;
-	TAILQ_INSERT_TAIL(&ioat_ch->sw_batch, op, link);
-
-	return 0;
-}
-
-static int
-ioat_batch_cancel(struct spdk_io_channel *ch, struct spdk_accel_batch *batch)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items, there's no way to cancel these without resetting. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Return batched software items to the pool. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
-
-	return 0;
-}
-
-static int
-ioat_batch_submit(struct spdk_io_channel *ch, struct spdk_accel_batch *batch,
-		  spdk_accel_completion_cb cb_fn, void *cb_arg)
-{
-	struct ioat_accel_op *op;
-	struct ioat_io_channel *ioat_ch = spdk_io_channel_get_ctx(ch);
-	struct spdk_accel_task *accel_task;
-	int batch_status = 0, cmd_status = 0;
-
-	if ((struct spdk_accel_batch *)&ioat_ch->hw_batch != batch) {
-		SPDK_ERRLOG("Invalid batch\n");
-		return -EINVAL;
-	}
-
-	/* Flush the batched HW items first. */
-	spdk_ioat_flush(ioat_ch->ioat_ch);
-	ioat_ch->hw_batch = false;
-
-	/* Complete the batched software items. */
-	while ((op = TAILQ_FIRST(&ioat_ch->sw_batch))) {
-		TAILQ_REMOVE(&ioat_ch->sw_batch, op, link);
-		accel_task = (struct spdk_accel_task *)((uintptr_t)op->cb_arg -
-							offsetof(struct spdk_accel_task, offload_ctx));
-
-		switch (op->op_code) {
-		case IOAT_ACCEL_OPCODE_DUALCAST:
-			memcpy(op->dst, op->src, op->nbytes);
-			memcpy(op->dst2, op->src, op->nbytes);
+	do {
+		switch (accel_task->op_code) {
+		case ACCEL_OPCODE_MEMFILL:
+			rc = spdk_ioat_build_fill(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->fill_pattern, accel_task->nbytes);
 			break;
-		case IOAT_ACCEL_OPCODE_COMPARE:
-			cmd_status = memcmp(op->src, op->src2, op->nbytes);
-			break;
-		case IOAT_ACCEL_OPCODE_CRC32C:
-			*(uint32_t *)op->dst = spdk_crc32c_update(op->src, op->nbytes, ~op->seed);
+		case ACCEL_OPCODE_MEMMOVE:
+			rc = spdk_ioat_build_copy(ioat_ch->ioat_ch, accel_task, ioat_done,
+						  accel_task->dst, accel_task->src, accel_task->nbytes);
 			break;
 		default:
 			assert(false);
 			break;
 		}

-		batch_status |= cmd_status;
-		op->cb_fn(accel_task, cmd_status);
-		TAILQ_INSERT_TAIL(&ioat_ch->op_pool, op, link);
-	}
+		tmp = TAILQ_NEXT(accel_task, link);

-	/* Now complete the batch request itself. */
-	accel_task = (struct spdk_accel_task *)((uintptr_t)cb_arg -
-						offsetof(struct spdk_accel_task, offload_ctx));
-	cb_fn(accel_task, batch_status);
+		/* Report any build errors via the callback now. */
+		if (rc) {
+			spdk_accel_task_complete(accel_task, rc);
+		}
+
+		accel_task = tmp;
+	} while (accel_task);
+
+	spdk_ioat_flush(ioat_ch->ioat_ch);

 	return 0;
 }

 static struct spdk_accel_engine ioat_accel_engine = {
 	.get_capabilities	= ioat_get_capabilities,
-	.copy			= ioat_submit_copy,
-	.fill			= ioat_submit_fill,
-	.batch_get_max		= ioat_batch_get_max,
-	.batch_create		= ioat_batch_create,
-	.batch_cancel		= ioat_batch_cancel,
-	.batch_prep_copy	= ioat_batch_prep_copy,
-	.batch_prep_dualcast	= ioat_batch_prep_dualcast,
-	.batch_prep_compare	= ioat_batch_prep_compare,
-	.batch_prep_fill	= ioat_batch_prep_fill,
-	.batch_prep_crc32c	= ioat_batch_prep_crc32c,
-	.batch_submit		= ioat_batch_submit,
 	.get_io_channel		= ioat_get_io_channel,
+	.batch_get_max		= ioat_batch_get_max,
+	.submit_tasks		= ioat_submit_tasks,
 };

 static int
@ -485,35 +211,16 @@ ioat_create_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
 	struct ioat_device *ioat_dev;
-	struct ioat_accel_op *op;
-	int i;

 	ioat_dev = ioat_allocate_device();
 	if (ioat_dev == NULL) {
 		return -1;
 	}

-	TAILQ_INIT(&ch->sw_batch);
-	ch->hw_batch = false;
-	TAILQ_INIT(&ch->op_pool);
-
-	g_batch_size = spdk_ioat_get_max_descriptors(ioat_dev->ioat);
-	for (i = 0 ; i < g_batch_size ; i++) {
-		op = calloc(1, sizeof(struct ioat_accel_op));
-		if (op == NULL) {
-			SPDK_ERRLOG("Failed to allocate operation for batch.\n");
-			while ((op = TAILQ_FIRST(&ch->op_pool))) {
-				TAILQ_REMOVE(&ch->op_pool, op, link);
-				free(op);
-			}
-			return -ENOMEM;
-		}
-		TAILQ_INSERT_TAIL(&ch->op_pool, op, link);
-	}
-
 	ch->ioat_dev = ioat_dev;
 	ch->ioat_ch = ioat_dev->ioat;
 	ch->poller = SPDK_POLLER_REGISTER(ioat_poll, ch->ioat_ch, 0);
+
 	return 0;
 }

@ -521,12 +228,6 @@ static void
 ioat_destroy_cb(void *io_device, void *ctx_buf)
 {
 	struct ioat_io_channel *ch = ctx_buf;
-	struct ioat_accel_op *op;
-
-	while ((op = TAILQ_FIRST(&ch->op_pool))) {
-		TAILQ_REMOVE(&ch->op_pool, op, link);
-		free(op);
-	}

 	ioat_free_device(ch->ioat_dev);
 	spdk_poller_unregister(&ch->poller);