examples/accel_perf: add support for copy_crc commands

For COPY+CRC operations, we allocate a destination buffer for each task that's big enough to hold the copied input data for all stages of the chain. For example, if the operation is 4 stages of 4K input buffers, the destination will be 16K in size. Signed-off-by: paul luse <paul.e.luse@intel.com> Change-Id: Id32a98c1dbada2881a8423c050e892d5c37bf6cc Reviewed-on: https://review.spdk.io/gerrit/c/spdk/spdk/+/8204 Tested-by: SPDK CI Jenkins <sys_sgci@intel.com> Community-CI: Broadcom CI <spdk-ci.pdl@broadcom.com> Community-CI: Mellanox Build Bot Reviewed-by: Ben Walker <benjamin.walker@intel.com> Reviewed-by: Jim Harris <james.r.harris@intel.com>
2021-06-07 18:50:43 -04:00 · 2021-06-07 18:50:43 -04:00 · 221eb3f4dd
commit 221eb3f4dd
parent 71f0600c5b
1 changed files with 74 additions and 17 deletions
--- a/examples/accel/perf/accel_perf.c
+++ b/examples/accel/perf/accel_perf.c
@ -82,6 +82,7 @@ struct ap_task {
 	uint32_t		iov_cnt;
 	void			*dst;
 	void			*dst2;
+	uint32_t		crc_dst;
 	struct worker_thread	*worker;
 	int			status;
 	int			expected_status; /* used for the compare operation */
@ -124,15 +125,20 @@ dump_user_config(struct spdk_app_opts *opts)
 	printf("Core mask:      %s\n\n", opts->reactor_mask);
 	printf("Accel Perf Configuration:\n");
 	printf("Workload Type:  %s\n", g_workload_type);
-	if (g_workload_selection == ACCEL_CRC32C) {
+	if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) {
 		printf("CRC-32C seed:   %u\n", g_crc32c_seed);
-		printf("vector size:    %u\n", g_crc32c_chained_count);
+		printf("vector count    %u\n", g_crc32c_chained_count);
 	} else if (g_workload_selection == ACCEL_FILL) {
 		printf("Fill pattern:   0x%x\n", g_fill_pattern);
 	} else if ((g_workload_selection == ACCEL_COMPARE) && g_fail_percent_goal > 0) {
 		printf("Failure inject: %u percent\n", g_fail_percent_goal);
 	}
-	printf("Transfer size:  %u bytes\n", g_xfer_size_bytes);
+	if (g_workload_selection == ACCEL_COPY_CRC32C) {
+		printf("Vector size:    %u bytes\n", g_xfer_size_bytes);
+		printf("Transfer size:  %u bytes\n", g_xfer_size_bytes * g_crc32c_chained_count);
+	} else {
+		printf("Transfer size:  %u bytes\n", g_xfer_size_bytes);
+	}
 	printf("Queue depth:    %u\n", g_queue_depth);
 	printf("Allocate depth: %u\n", g_allocate_depth);
 	printf("# threads/core: %u\n", g_threads_per_core);
@ -151,12 +157,12 @@ usage(void)
 	printf("accel_perf options:\n");
 	printf("\t[-h help message]\n");
 	printf("\t[-q queue depth per core]\n");
-	printf("\t[-C for crc32c workload, use this value to configre the io vector size to test (default 1)\n");
+	printf("\t[-C for crc32c workload, use this value to configure the io vector size to test (default 1)\n");
 	printf("\t[-T number of threads per core\n");
 	printf("\t[-n number of channels]\n");
 	printf("\t[-o transfer size in bytes]\n");
 	printf("\t[-t time in seconds]\n");
-	printf("\t[-w workload type must be one of these: copy, fill, crc32c, compare, dualcast\n");
+	printf("\t[-w workload type must be one of these: copy, fill, crc32c, copy_crc32c, compare, dualcast\n");
 	printf("\t[-s for crc32c workload, use this seed value (default 0)\n");
 	printf("\t[-P for compare workload, percentage of operations that should miscompare (percent, default 0)\n");
 	printf("\t[-f for fill workload, use this BYTE value (default 255)\n");
@ -235,6 +241,8 @@ parse_args(int argc, char *argv)
 			g_workload_selection = ACCEL_FILL;
 		} else if (!strcmp(g_workload_type, "crc32c")) {
 			g_workload_selection = ACCEL_CRC32C;
+		} else if (!strcmp(g_workload_type, "copy_crc32c")) {
+			g_workload_selection = ACCEL_COPY_CRC32C;
 		} else if (!strcmp(g_workload_type, "compare")) {
 			g_workload_selection = ACCEL_COMPARE;
 		} else if (!strcmp(g_workload_type, "dualcast")) {
@ -273,6 +281,7 @@ _get_task_data_bufs(struct ap_task *task)
 {
 	uint32_t align = 0;
 	uint32_t i = 0;
+	int dst_buff_len = g_xfer_size_bytes;

 	/* For dualcast, the DSA HW requires 4K alignment on destination addresses but
 	 * we do this for all engines to keep it simple.
@ -281,7 +290,7 @@ _get_task_data_bufs(struct ap_task *task)
 		align = ALIGN_4K;
 	}

-	if (g_workload_selection == ACCEL_CRC32C) {
+	if (g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) {
 		assert(g_crc32c_chained_count > 0);
 		task->iov_cnt = g_crc32c_chained_count;
 		task->iovs = calloc(task->iov_cnt, sizeof(struct iovec));
@ -290,6 +299,10 @@ _get_task_data_bufs(struct ap_task *task)
 			return -ENOMEM;
 		}

+		if (g_workload_selection == ACCEL_COPY_CRC32C) {
+			dst_buff_len = g_xfer_size_bytes * g_crc32c_chained_count;
+		}
+
 		for (i = 0; i < task->iov_cnt; i++) {
 			task->iovs[i].iov_base = spdk_dma_zmalloc(g_xfer_size_bytes, 0, NULL);
 			if (task->iovs[i].iov_base == NULL) {
@ -314,17 +327,19 @@ _get_task_data_bufs(struct ap_task *task)
 		}
 	}

-	task->dst = spdk_dma_zmalloc(g_xfer_size_bytes, align, NULL);
-	if (task->dst == NULL) {
-		fprintf(stderr, "Unable to alloc dst buffer\n");
-		return -ENOMEM;
-	}
+	if (g_workload_selection != ACCEL_COPY_CRC32C) {
+		task->dst = spdk_dma_zmalloc(dst_buff_len, align, NULL);
+		if (task->dst == NULL) {
+			fprintf(stderr, "Unable to alloc dst buffer\n");
+			return -ENOMEM;
+		}

-	/* For compare we want the buffers to match, otherwise not. */
-	if (g_workload_selection == ACCEL_COMPARE) {
-		memset(task->dst, DATA_PATTERN, g_xfer_size_bytes);
-	} else {
-		memset(task->dst, ~DATA_PATTERN, g_xfer_size_bytes);
+		/* For compare we want the buffers to match, otherwise not. */
+		if (g_workload_selection == ACCEL_COMPARE) {
+			memset(task->dst, DATA_PATTERN, dst_buff_len);
+		} else {
+			memset(task->dst, ~DATA_PATTERN, dst_buff_len);
+		}
 	}

 	if (g_workload_selection == ACCEL_DUALCAST) {
@ -379,6 +394,10 @@ _submit_single(struct worker_thread *worker, struct ap_task *task)
 					       task->iovs, task->iov_cnt, g_crc32c_seed,
 					       accel_done, task);
 		break;
+	case ACCEL_COPY_CRC32C:
+		rc = spdk_accel_submit_copy_crc32cv(worker->ch, task->dst, task->iovs, task->iov_cnt,
+						    &task->crc_dst, g_crc32c_seed, accel_done, task);
+		break;
 	case ACCEL_COMPARE:
 		random_num = rand() % 100;
 		if (random_num < g_fail_percent_goal) {
@ -434,6 +453,10 @@ _batch_prep_cmd(struct worker_thread *worker, struct ap_task *task,
 						*(uint8_t *)task->src,
 						g_xfer_size_bytes, accel_done, task);
 		break;
+	case ACCEL_COPY_CRC32C:
+		rc = spdk_accel_batch_prep_copy_crc32c(worker->ch, batch, task->dst, task->src, &task->crc_dst,
+						       g_crc32c_seed, g_xfer_size_bytes, accel_done, task);
+		break;
 	case ACCEL_CRC32C:
 		rc = spdk_accel_batch_prep_crc32cv(worker->ch, batch, (uint32_t *)task->dst,
 						   task->iovs, task->iov_cnt, g_crc32c_seed, accel_done, task);
@ -591,6 +614,28 @@ batch_done(void *cb_arg, int status)
 	spdk_thread_send_msg(worker_batch->worker->thread, _batch_done, worker_batch);
 }

+static int
+_vector_memcmp(void *_dst, struct iovec *src_iovs, uint32_t iovcnt)
+{
+	uint32_t i;
+	uint32_t ttl_len = 0;
+	uint8_t *dst = (uint8_t *)_dst;
+
+	for (i = 0; i < iovcnt; i++) {
+		if (memcmp(dst, src_iovs[i].iov_base, src_iovs[i].iov_len)) {
+			return -1;
+		}
+		dst += src_iovs[i].iov_len;
+		ttl_len += src_iovs[i].iov_len;
+	}
+
+	if (ttl_len != iovcnt * g_xfer_size_bytes) {
+		return -1;
+	}
+
+	return 0;
+}
+
 static void
 _accel_done(void *arg1)
 {
@ -603,6 +648,17 @@ _accel_done(void *arg1)

 	if (g_verify && task->status == 0) {
 		switch (g_workload_selection) {
+		case ACCEL_COPY_CRC32C:
+			sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed);
+			if (task->crc_dst != sw_crc32c) {
+				SPDK_NOTICELOG("CRC-32C miscompare\n");
+				worker->xfer_failed++;
+			}
+			if (_vector_memcmp(task->dst, task->iovs, task->iov_cnt)) {
+				SPDK_NOTICELOG("Data miscompare\n");
+				worker->xfer_failed++;
+			}
+			break;
 		case ACCEL_CRC32C:
 			sw_crc32c = spdk_crc32c_iov_update(task->iovs, task->iov_cnt, ~g_crc32c_seed);
 			if (*(uint32_t *)task->dst != sw_crc32c) {
@ -1002,6 +1058,7 @@ main(int argc, char **argv)
 	if ((g_workload_selection != ACCEL_COPY) &&
 	    (g_workload_selection != ACCEL_FILL) &&
 	    (g_workload_selection != ACCEL_CRC32C) &&
+	    (g_workload_selection != ACCEL_COPY_CRC32C) &&
 	    (g_workload_selection != ACCEL_COMPARE) &&
 	    (g_workload_selection != ACCEL_DUALCAST)) {
 		usage();
@ -1027,7 +1084,7 @@ main(int argc, char **argv)
 		g_allocate_depth = g_queue_depth;
 	}

-	if (g_workload_selection == ACCEL_CRC32C &&
+	if ((g_workload_selection == ACCEL_CRC32C || g_workload_selection == ACCEL_COPY_CRC32C) &&
 	    g_crc32c_chained_count == 0) {
 		usage();
 		g_rc = -1;