gpudev: add communication flag

In heterogeneous computing system, processing is not only in the CPU. Some tasks can be delegated to devices working in parallel. When mixing network activity with task processing there may be the need to put in communication the CPU with the device in order to synchronize operations. The purpose of this flag is to allow the CPU and the GPU to exchange ACKs. A possible use-case is described below. CPU: - Trigger some task on the GPU - Prepare some data - Signal to the GPU the data is ready updating the communication flag GPU: - Do some pre-processing - Wait for more data from the CPU polling on the communication flag - Consume the data prepared by the CPU Signed-off-by: Elena Agostini <eagostini@nvidia.com>
2021-11-08 18:58:03 +00:00 · 2021-11-08 18:58:03 +00:00 · f56160a255
commit f56160a255
parent 2d61b429cf
6 changed files with 278 additions and 0 deletions
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@ -154,6 +154,61 @@ register_cpu_memory(uint16_t gpu_id)
 	return 0;
 }

+static int
+create_update_comm_flag(uint16_t gpu_id)
+{
+	struct rte_gpu_comm_flag devflag;
+	int ret = 0;
+	uint32_t set_val;
+	uint32_t get_val;
+
+	printf("\n=======> TEST: Communication flag\n");
+
+	ret = rte_gpu_comm_create_flag(gpu_id, &devflag, RTE_GPU_COMM_FLAG_CPU);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_create_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	set_val = 25;
+	ret = rte_gpu_comm_set_flag(&devflag, set_val);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
+		return -1;
+	}
+
+	printf("Communication flag value at 0x%p was set to %d and current value is %d\n", devflag.ptr, set_val, get_val);
+
+	set_val = 38;
+	ret = rte_gpu_comm_set_flag(&devflag, set_val);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_set_flag returned error %d\n", ret);
+		return -1;
+	}
+
+	ret = rte_gpu_comm_get_flag_value(&devflag, &get_val);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_get_flag_value returned error %d\n", ret);
+		return -1;
+	}
+
+	printf("Communication flag value at 0x%p was set to %d and current value is %d\n", devflag.ptr, set_val, get_val);
+
+	ret = rte_gpu_comm_destroy_flag(&devflag);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_comm_destroy_flags returned error %d\n", ret);
+		return -1;
+	}
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@ -204,6 +259,11 @@ main(int argc, char **argv)
 	alloc_gpu_memory(gpu_id);
 	register_cpu_memory(gpu_id);

+	/**
+	 * Communication items test
+	 */
+	create_update_comm_flag(gpu_id);
+
 	/* clean up the EAL */
 	rte_eal_cleanup();
 	printf("Bye...\n");
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@ -32,6 +32,10 @@ This library provides a number of features:
 - Interoperability with device-specific library through generic handlers.
 - Allocate and free memory on the device.
 - Register CPU memory to make it visible from the device.
+- Communication between the CPU and the device.
+
+The whole CPU - GPU communication is implemented
+using CPU memory visible from the GPU.


 API Overview
@ -73,3 +77,12 @@ Some GPU drivers may need, under certain conditions,
 to enforce the coherency of external devices writes (e.g. NIC receiving packets)
 into the GPU memory.
 gpudev abstracts and exposes this capability.
+
+Communication Flag
+~~~~~~~~~~~~~~~~~~
+
+Considering an application with some GPU task
+that's waiting to receive a signal from the CPU
+to move forward with the execution.
+The communication flag allocates a CPU memory GPU-visible ``uint32_t`` flag
+that can be used by the CPU to communicate with a GPU task.
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@ -105,6 +105,7 @@ New Features

  * Device information
  * Memory management
+  * Communication flag

 * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**

--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@ -644,3 +644,95 @@ rte_gpu_wmb(int16_t dev_id)
 	}
 	return GPU_DRV_RET(dev->ops.wmb(dev));
 }
+
+int
+rte_gpu_comm_create_flag(uint16_t dev_id, struct rte_gpu_comm_flag *devflag,
+		enum rte_gpu_comm_flag_type mtype)
+{
+	size_t flag_size;
+	int ret;
+
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	flag_size = sizeof(uint32_t);
+
+	devflag->ptr = rte_zmalloc(NULL, flag_size, 0);
+	if (devflag->ptr == NULL) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+
+	ret = rte_gpu_mem_register(dev_id, flag_size, devflag->ptr);
+	if (ret < 0) {
+		rte_errno = ENOMEM;
+		return -rte_errno;
+	}
+
+	devflag->mtype = mtype;
+	devflag->dev_id = dev_id;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag)
+{
+	int ret;
+
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	ret = rte_gpu_mem_unregister(devflag->dev_id, devflag->ptr);
+	if (ret < 0) {
+		rte_errno = EINVAL;
+		return -1;
+	}
+
+	rte_free(devflag->ptr);
+
+	return 0;
+}
+
+int
+rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag, uint32_t val)
+{
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	RTE_GPU_VOLATILE(*devflag->ptr) = val;
+
+	return 0;
+}
+
+int
+rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag, uint32_t *val)
+{
+	if (devflag == NULL) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+	if (devflag->mtype != RTE_GPU_COMM_FLAG_CPU) {
+		rte_errno = EINVAL;
+		return -rte_errno;
+	}
+
+	*val = RTE_GPU_VOLATILE(*devflag->ptr);
+
+	return 0;
+}
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@ -38,6 +38,9 @@ extern "C" {
 /** Catch-all callback data. */
 #define RTE_GPU_CALLBACK_ANY_DATA ((void *)-1)

+/** Access variable as volatile. */
+#define RTE_GPU_VOLATILE(x) (*(volatile typeof(x) *)&(x))
+
 /** Store device info. */
 struct rte_gpu_info {
 	/** Unique identifier name. */
@ -68,6 +71,22 @@ enum rte_gpu_event {
 typedef void (rte_gpu_callback_t)(int16_t dev_id,
 		enum rte_gpu_event event, void *user_data);

+/** Memory where communication flag is allocated. */
+enum rte_gpu_comm_flag_type {
+	/** Allocate flag on CPU memory visible from device. */
+	RTE_GPU_COMM_FLAG_CPU = 0,
+};
+
+/** Communication flag to coordinate CPU with the device. */
+struct rte_gpu_comm_flag {
+	/** Device that will use the device flag. */
+	uint16_t dev_id;
+	/** Pointer to flag memory area. */
+	uint32_t *ptr;
+	/** Type of memory used to allocate the flag. */
+	enum rte_gpu_comm_flag_type mtype;
+};
+
 /**
 * @warning
 * @b EXPERIMENTAL: this API may change without prior notice.
@ -405,6 +424,95 @@ int rte_gpu_mem_unregister(int16_t dev_id, void *ptr);
 __rte_experimental
 int rte_gpu_wmb(int16_t dev_id);

+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Create a communication flag that can be shared
+ * between CPU threads and device workload to exchange some status info
+ * (e.g. work is done, processing can start, etc..).
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param mtype
+ *   Type of memory to allocate the communication flag.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if invalid inputs
+ *   - ENOTSUP if operation not supported by the driver
+ *   - ENOMEM if out of space
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_comm_create_flag(uint16_t dev_id,
+		struct rte_gpu_comm_flag *devflag,
+		enum rte_gpu_comm_flag_type mtype);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Deallocate a communication flag.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if NULL devflag
+ *   - ENOTSUP if operation not supported by the driver
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_comm_destroy_flag(struct rte_gpu_comm_flag *devflag);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Set the value of a communication flag as the input value.
+ * Flag memory area is treated as volatile.
+ * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param val
+ *   Value to set in the flag.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_set_flag(struct rte_gpu_comm_flag *devflag,
+		uint32_t val);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Get the value of the communication flag.
+ * Flag memory area is treated as volatile.
+ * The flag must have been allocated with RTE_GPU_COMM_FLAG_CPU.
+ *
+ * @param devflag
+ *   Pointer to the memory area of the devflag structure.
+ * @param val
+ *   Flag output value.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - EINVAL if invalid input params
+ */
+__rte_experimental
+int rte_gpu_comm_get_flag_value(struct rte_gpu_comm_flag *devflag,
+		uint32_t *val);
+
 #ifdef __cplusplus
 }
 #endif
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@ -6,6 +6,10 @@ EXPERIMENTAL {
 	rte_gpu_callback_register;
 	rte_gpu_callback_unregister;
 	rte_gpu_close;
+	rte_gpu_comm_create_flag;
+	rte_gpu_comm_destroy_flag;
+	rte_gpu_comm_get_flag_value;
+	rte_gpu_comm_set_flag;
 	rte_gpu_count_avail;
 	rte_gpu_find_next;
 	rte_gpu_info_get;