gpudev: use CPU mapping in communication list

rte_gpu_mem_cpu_map() exposes a GPU memory area to the CPU. In gpudev communication list this is useful to store the status flag. A communication list status flag allocated on GPU memory and mapped for CPU visibility can be updated by CPU and polled by a GPU workload. The polling operation is more frequent than the CPU update operation. Having the status flag in GPU memory reduces the GPU workload polling latency. If CPU mapping feature is not enabled, status flag resides in CPU memory registered so it's visible from the GPU. To facilitate the interaction with the status flag, this patch provides also the set/get functions for it. Signed-off-by: Elena Agostini <eagostini@nvidia.com>
2022-02-22 00:22:33 +00:00 · 2022-02-22 00:22:33 +00:00 · 9b8cae4d99
commit 9b8cae4d99
parent 77f40e04d7
5 changed files with 162 additions and 9 deletions
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@ -324,7 +324,13 @@ simulate_gpu_task(struct rte_gpu_comm_list *comm_list_item, int num_pkts)
 		 * consume(comm_list_item->pkt_list[idx].addr);
 		 */
 	}
-	comm_list_item->status = RTE_GPU_COMM_LIST_DONE;
+	/*
+	 * A real GPU workload function can't directly call rte_gpu_comm_set_status
+	 * because it's a CPU-only function.
+	 * A real GPU workload should implement the content
+	 * of rte_gpu_comm_set_status() in GPU specific code.
+	 */
+	rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_DONE);

 	return 0;
 }
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@ -216,7 +216,7 @@ about how to use functions in this library in case of a CUDA application.

       /* GPU kernel keeps checking this flag to know if it has to quit or wait for more packets. */
       while (*quit_flag_ptr == 0) {
-           if (comm_list[comm_list_index]->status != RTE_GPU_COMM_LIST_READY)
+           if (comm_list[comm_list_index]->status_d[0] != RTE_GPU_COMM_LIST_READY)
               continue;

           if (threadIdx.x < comm_list[comm_list_index]->num_pkts)
--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@ -9,6 +9,7 @@
 #include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_log.h>
+#include <rte_eal_paging.h>

 #include "rte_gpudev.h"
 #include "gpudev_driver.h"
@ -846,6 +847,46 @@ rte_gpu_comm_create_list(uint16_t dev_id,
 		return NULL;
 	}

+	/*
+	 * Use GPU memory CPU map feature if enabled in the driver
+	 * to allocate the status flags of the list.
+	 * Allocating this flag in GPU memory will reduce
+	 * the latency when GPU workload is polling this flag.
+	 */
+	comm_list[0].status_d = rte_gpu_mem_alloc(dev_id,
+			sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+			rte_mem_page_size());
+	if (ret < 0) {
+		rte_errno = ENOMEM;
+		return NULL;
+	}
+
+	comm_list[0].status_h = rte_gpu_mem_cpu_map(dev_id,
+			sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+			comm_list[0].status_d);
+	if (comm_list[0].status_h == NULL) {
+		/*
+		 * If CPU mapping is not supported by driver
+		 * use regular CPU registered memory.
+		 */
+		comm_list[0].status_h = rte_zmalloc(NULL,
+				sizeof(enum rte_gpu_comm_list_status) * num_comm_items, 0);
+		if (comm_list[0].status_h == NULL) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		ret = rte_gpu_mem_register(dev_id,
+				sizeof(enum rte_gpu_comm_list_status) * num_comm_items,
+				comm_list[0].status_h);
+		if (ret < 0) {
+			rte_errno = ENOMEM;
+			return NULL;
+		}
+
+		comm_list[0].status_d = comm_list[0].status_h;
+	}
+
 	for (idx_l = 0; idx_l < num_comm_items; idx_l++) {
 		comm_list[idx_l].pkt_list = rte_zmalloc(NULL,
 				sizeof(struct rte_gpu_comm_pkt) * RTE_GPU_COMM_LIST_PKTS_MAX, 0);
@ -862,7 +903,6 @@ rte_gpu_comm_create_list(uint16_t dev_id,
 			return NULL;
 		}

-		RTE_GPU_VOLATILE(comm_list[idx_l].status) = RTE_GPU_COMM_LIST_FREE;
 		comm_list[idx_l].num_pkts = 0;
 		comm_list[idx_l].dev_id = dev_id;

@ -872,6 +912,17 @@ rte_gpu_comm_create_list(uint16_t dev_id,
 			rte_errno = ENOMEM;
 			return NULL;
 		}
+
+		if (idx_l > 0) {
+			comm_list[idx_l].status_h = &(comm_list[0].status_h[idx_l]);
+			comm_list[idx_l].status_d = &(comm_list[0].status_d[idx_l]);
+
+			ret = rte_gpu_comm_set_status(&comm_list[idx_l], RTE_GPU_COMM_LIST_FREE);
+			if (ret < 0) {
+				rte_errno = ENOMEM;
+				return NULL;
+			}
+		}
 	}

 	return comm_list;
@ -909,6 +960,14 @@ rte_gpu_comm_destroy_list(struct rte_gpu_comm_list *comm_list,
 		return -1;
 	}

+	ret = rte_gpu_mem_cpu_unmap(dev_id, comm_list[0].status_d);
+	if (ret == 0) {
+		rte_gpu_mem_free(dev_id, comm_list[0].status_d);
+	} else {
+		rte_gpu_mem_unregister(dev_id, comm_list[0].status_h);
+		rte_free(comm_list[0].status_h);
+	}
+
 	rte_free(comm_list);

 	return 0;
@ -919,6 +978,7 @@ rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
 		struct rte_mbuf **mbufs, uint32_t num_mbufs)
 {
 	uint32_t idx;
+	int ret;

 	if (comm_list_item == NULL || comm_list_item->pkt_list == NULL ||
 			mbufs == NULL || num_mbufs > RTE_GPU_COMM_LIST_PKTS_MAX) {
@ -942,7 +1002,39 @@ rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,

 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = num_mbufs;
 	rte_gpu_wmb(comm_list_item->dev_id);
-	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_READY;
+	ret = rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_READY);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	return 0;
+}
+
+int
+rte_gpu_comm_set_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status status)
+{
+	if (comm_list_item == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	RTE_GPU_VOLATILE(comm_list_item->status_h[0]) = status;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_get_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status *status)
+{
+	if (comm_list_item == NULL || status == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	*status = RTE_GPU_VOLATILE(comm_list_item->status_h[0]);

 	return 0;
 }
@ -951,14 +1043,21 @@ int
 rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
 {
 	uint32_t idx = 0;
+	enum rte_gpu_comm_list_status status;
+	int ret;

 	if (comm_list_item == NULL) {
 		rte_errno = EINVAL;
 		return -rte_errno;
 	}

-	if (RTE_GPU_VOLATILE(comm_list_item->status) ==
-			RTE_GPU_COMM_LIST_READY) {
+	ret = rte_gpu_comm_get_status(comm_list_item, &status);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	if (status == RTE_GPU_COMM_LIST_READY) {
 		GPU_LOG(ERR, "packet list is still in progress");
 		rte_errno = EINVAL;
 		return -rte_errno;
@ -973,7 +1072,11 @@ rte_gpu_comm_cleanup_list(struct rte_gpu_comm_list *comm_list_item)
 		comm_list_item->mbufs[idx] = NULL;
 	}

-	RTE_GPU_VOLATILE(comm_list_item->status) = RTE_GPU_COMM_LIST_FREE;
+	ret = rte_gpu_comm_set_status(comm_list_item, RTE_GPU_COMM_LIST_FREE);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
 	RTE_GPU_VOLATILE(comm_list_item->num_pkts) = 0;
 	rte_mb();

--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@ -124,8 +124,10 @@ struct rte_gpu_comm_list {
 	struct rte_gpu_comm_pkt *pkt_list;
 	/** Number of packets in the list. */
 	uint32_t num_pkts;
-	/** Status of the list. */
-	enum rte_gpu_comm_list_status status;
+	/** Status of the list. CPU pointer. */
+	enum rte_gpu_comm_list_status *status_h;
+	/** Status of the list. GPU pointer. */
+	enum rte_gpu_comm_list_status *status_d;
 };

 /**
@ -679,6 +681,46 @@ __rte_experimental
 int rte_gpu_comm_populate_list_pkts(struct rte_gpu_comm_list *comm_list_item,
 		struct rte_mbuf **mbufs, uint32_t num_mbufs);

+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set status flag value of a communication list item.
+ *
+ * @param comm_list_item
+ *   Communication list item to query.
+ * @param status
+ *   Status value to set.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_set_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status status);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get status flag value of a communication list item.
+ *
+ * @param comm_list_item
+ *   Communication list item to query.
+ *   Input parameter.
+ * @param status
+ *   Communication list item status flag value.
+ *   Output parameter.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_get_status(struct rte_gpu_comm_list *comm_list_item,
+		enum rte_gpu_comm_list_status *status);
+
 /**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice.
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@ -12,8 +12,10 @@ EXPERIMENTAL {
 	rte_gpu_comm_destroy_flag;
 	rte_gpu_comm_destroy_list;
 	rte_gpu_comm_get_flag_value;
+	rte_gpu_comm_get_status;
 	rte_gpu_comm_populate_list_pkts;
 	rte_gpu_comm_set_flag;
+	rte_gpu_comm_set_status;
 	rte_gpu_count_avail;
 	rte_gpu_find_next;
 	rte_gpu_info_get;