gpudev: add memory API

In heterogeneous computing system, processing is not only in the CPU. Some tasks can be delegated to devices working in parallel. Such workload distribution can be achieved by sharing some memory. As a first step, the features are focused on memory management. A function allows to allocate memory inside the device, or in the main (CPU) memory while making it visible for the device. This memory may be used to save packets or for synchronization data. The next step should focus on GPU processing task control. Signed-off-by: Elena Agostini <eagostini@nvidia.com> Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
2021-11-08 18:58:01 +00:00 · 2021-11-08 18:58:01 +00:00 · e818c4e2bf
commit e818c4e2bf
parent a9af048aba
8 changed files with 340 additions and 0 deletions
--- a/app/test-gpudev/main.c
+++ b/app/test-gpudev/main.c
@ -62,6 +62,98 @@ args_parse(int argc, char **argv)
 	}
 }

+static int
+alloc_gpu_memory(uint16_t gpu_id)
+{
+	void *ptr_1 = NULL;
+	void *ptr_2 = NULL;
+	size_t buf_bytes = 1024;
+	int ret;
+
+	printf("\n=======> TEST: Allocate GPU memory\n");
+
+	/* Alloc memory on GPU 0 */
+	ptr_1 = rte_gpu_mem_alloc(gpu_id, buf_bytes);
+	if (ptr_1 == NULL) {
+		fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
+		return -1;
+	}
+	printf("GPU memory allocated at 0x%p %zdB\n", ptr_1, buf_bytes);
+
+	ptr_2 = rte_gpu_mem_alloc(gpu_id, buf_bytes);
+	if (ptr_2 == NULL) {
+		fprintf(stderr, "rte_gpu_mem_alloc GPU memory returned error\n");
+		return -1;
+	}
+	printf("GPU memory allocated at 0x%p %zdB\n", ptr_2, buf_bytes);
+
+	ret = rte_gpu_mem_free(gpu_id, (uint8_t *)(ptr_1)+0x700);
+	if (ret < 0) {
+		printf("GPU memory 0x%p + 0x700 NOT freed because of memory address not recognized by driver\n", ptr_1);
+	} else {
+		fprintf(stderr, "rte_gpu_mem_free erroneously freed GPU memory 0x%p + 0x700\n", ptr_1);
+		return -1;
+	}
+
+	ret = rte_gpu_mem_free(gpu_id, ptr_2);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
+		return -1;
+	}
+	printf("GPU memory 0x%p freed\n", ptr_2);
+
+	ret = rte_gpu_mem_free(gpu_id, ptr_1);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_mem_free returned error %d\n", ret);
+		return -1;
+	}
+	printf("GPU memory 0x%p freed\n", ptr_1);
+
+	return 0;
+}
+
+static int
+register_cpu_memory(uint16_t gpu_id)
+{
+	void *ptr = NULL;
+	size_t buf_bytes = 1024;
+	int ret;
+
+	printf("\n=======> TEST: Register CPU memory\n");
+
+	/* Alloc memory on CPU visible from GPU 0 */
+	ptr = rte_zmalloc(NULL, buf_bytes, 0);
+	if (ptr == NULL) {
+		fprintf(stderr, "Failed to allocate CPU memory.\n");
+		return -1;
+	}
+
+	ret = rte_gpu_mem_register(gpu_id, buf_bytes, ptr);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_mem_register CPU memory returned error %d\n", ret);
+		return -1;
+	}
+	printf("CPU memory registered at 0x%p %zdB\n", ptr, buf_bytes);
+
+	ret = rte_gpu_mem_unregister(gpu_id, (uint8_t *)(ptr)+0x700);
+	if (ret < 0) {
+		printf("CPU memory 0x%p + 0x700 NOT unregistered because of memory address not recognized by driver\n", ptr);
+	} else {
+		fprintf(stderr, "rte_gpu_mem_free erroneously freed GPU memory 0x%p + 0x700\n", ptr);
+		return -1;
+	}
+	printf("CPU memory 0x%p unregistered\n", ptr);
+
+	ret = rte_gpu_mem_unregister(gpu_id, ptr);
+	if (ret < 0) {
+		fprintf(stderr, "rte_gpu_mem_unregister returned error %d\n", ret);
+		return -1;
+	}
+	printf("CPU memory 0x%p unregistered\n", ptr);
+
+	return 0;
+}
+
 int
 main(int argc, char **argv)
 {
@ -99,6 +191,19 @@ main(int argc, char **argv)
 	}
 	printf("\n\n");

+	if (nb_gpus == 0) {
+		fprintf(stderr, "Need at least one GPU on the system to run the example\n");
+		return EXIT_FAILURE;
+	}
+
+	gpu_id = 0;
+
+	/**
+	 * Memory tests
+	 */
+	alloc_gpu_memory(gpu_id);
+	register_cpu_memory(gpu_id);
+
 	/* clean up the EAL */
 	rte_eal_cleanup();
 	printf("Bye...\n");
--- a/doc/guides/gpus/features/default.ini
+++ b/doc/guides/gpus/features/default.ini
@ -8,3 +8,6 @@
 ;
 [Features]
 Get device info                =
+Share CPU memory with device   =
+Allocate device memory         =
+Free memory                    =
--- a/doc/guides/prog_guide/gpudev.rst
+++ b/doc/guides/prog_guide/gpudev.rst
@ -30,6 +30,8 @@ Features
 This library provides a number of features:

 - Interoperability with device-specific library through generic handlers.
+- Allocate and free memory on the device.
+- Register CPU memory to make it visible from the device.


 API Overview
@ -46,3 +48,20 @@ that will be registered internally by the driver as an additional device (child)
 connected to a physical device (parent).
 Each device (parent or child) is represented through a ID
 required to indicate which device a given operation should be executed on.
+
+Memory Allocation
+~~~~~~~~~~~~~~~~~
+
+gpudev can allocate on an input given GPU device a memory area
+returning the pointer to that memory.
+Later, it's also possible to free that memory with gpudev.
+GPU memory allocated outside of the gpudev library
+(e.g. with GPU-specific library) cannot be freed by the gpudev library.
+
+Memory Registration
+~~~~~~~~~~~~~~~~~~~
+
+gpudev can register a CPU memory area to make it visible from a GPU device.
+Later, it's also possible to unregister that memory with gpudev.
+CPU memory registered outside of the gpudev library
+(e.g. with GPU specific library) cannot be unregistered by the gpudev library.
--- a/doc/guides/rel_notes/release_21_11.rst
+++ b/doc/guides/rel_notes/release_21_11.rst
@ -104,6 +104,7 @@ New Features
 * **Introduced GPU device class with first features:**

  * Device information
+  * Memory management

 * **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**

--- a/lib/gpudev/gpudev.c
+++ b/lib/gpudev/gpudev.c
@ -7,6 +7,7 @@
 #include <rte_rwlock.h>
 #include <rte_string_fns.h>
 #include <rte_memzone.h>
+#include <rte_malloc.h>
 #include <rte_errno.h>
 #include <rte_log.h>

@ -524,3 +525,103 @@ rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
 	}
 	return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
 }
+
+void *
+rte_gpu_mem_alloc(int16_t dev_id, size_t size)
+{
+	struct rte_gpu *dev;
+	void *ptr;
+	int ret;
+
+	dev = gpu_get_by_id(dev_id);
+	if (dev == NULL) {
+		GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
+		rte_errno = ENODEV;
+		return NULL;
+	}
+
+	if (dev->ops.mem_alloc == NULL) {
+		GPU_LOG(ERR, "mem allocation not supported");
+		rte_errno = ENOTSUP;
+		return NULL;
+	}
+
+	if (size == 0) /* dry-run */
+		return NULL;
+
+	ret = dev->ops.mem_alloc(dev, size, &ptr);
+
+	switch (ret) {
+	case 0:
+		return ptr;
+	case -ENOMEM:
+	case -E2BIG:
+		rte_errno = -ret;
+		return NULL;
+	default:
+		rte_errno = -EPERM;
+		return NULL;
+	}
+}
+
+int
+rte_gpu_mem_free(int16_t dev_id, void *ptr)
+{
+	struct rte_gpu *dev;
+
+	dev = gpu_get_by_id(dev_id);
+	if (dev == NULL) {
+		GPU_LOG(ERR, "free mem for invalid device ID %d", dev_id);
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
+
+	if (dev->ops.mem_free == NULL) {
+		rte_errno = ENOTSUP;
+		return -rte_errno;
+	}
+	return GPU_DRV_RET(dev->ops.mem_free(dev, ptr));
+}
+
+int
+rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr)
+{
+	struct rte_gpu *dev;
+
+	dev = gpu_get_by_id(dev_id);
+	if (dev == NULL) {
+		GPU_LOG(ERR, "alloc mem for invalid device ID %d", dev_id);
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
+
+	if (dev->ops.mem_register == NULL) {
+		GPU_LOG(ERR, "mem registration not supported");
+		rte_errno = ENOTSUP;
+		return -rte_errno;
+	}
+
+	if (size == 0 || ptr == NULL) /* dry-run */
+		return -EINVAL;
+
+	return GPU_DRV_RET(dev->ops.mem_register(dev, size, ptr));
+}
+
+int
+rte_gpu_mem_unregister(int16_t dev_id, void *ptr)
+{
+	struct rte_gpu *dev;
+
+	dev = gpu_get_by_id(dev_id);
+	if (dev == NULL) {
+		GPU_LOG(ERR, "unregister mem for invalid device ID %d", dev_id);
+		rte_errno = ENODEV;
+		return -rte_errno;
+	}
+
+	if (dev->ops.mem_unregister == NULL) {
+		rte_errno = ENOTSUP;
+		return -rte_errno;
+	}
+	return GPU_DRV_RET(dev->ops.mem_unregister(dev, ptr));
+}
--- a/lib/gpudev/gpudev_driver.h
+++ b/lib/gpudev/gpudev_driver.h
@ -27,12 +27,24 @@ enum rte_gpu_state {
 struct rte_gpu;
 typedef int (rte_gpu_close_t)(struct rte_gpu *dev);
 typedef int (rte_gpu_info_get_t)(struct rte_gpu *dev, struct rte_gpu_info *info);
+typedef int (rte_gpu_mem_alloc_t)(struct rte_gpu *dev, size_t size, void **ptr);
+typedef int (rte_gpu_mem_free_t)(struct rte_gpu *dev, void *ptr);
+typedef int (rte_gpu_mem_register_t)(struct rte_gpu *dev, size_t size, void *ptr);
+typedef int (rte_gpu_mem_unregister_t)(struct rte_gpu *dev, void *ptr);

 struct rte_gpu_ops {
 	/* Get device info. If NULL, info is just copied. */
 	rte_gpu_info_get_t *dev_info_get;
 	/* Close device or child context. */
 	rte_gpu_close_t *dev_close;
+	/* Allocate memory in device. */
+	rte_gpu_mem_alloc_t *mem_alloc;
+	/* Free memory allocated in device. */
+	rte_gpu_mem_free_t *mem_free;
+	/* Register CPU memory in device. */
+	rte_gpu_mem_register_t *mem_register;
+	/* Unregister CPU memory from device. */
+	rte_gpu_mem_unregister_t *mem_unregister;
 };

 struct rte_gpu_mpshared {
--- a/lib/gpudev/rte_gpudev.h
+++ b/lib/gpudev/rte_gpudev.h
@ -9,6 +9,7 @@
 #include <stdint.h>
 #include <stdbool.h>

+#include <rte_bitops.h>
 #include <rte_compat.h>

 /**
@ -292,6 +293,100 @@ int rte_gpu_callback_unregister(int16_t dev_id, enum rte_gpu_event event,
 __rte_experimental
 int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info);

+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Allocate a chunk of memory in the device.
+ *
+ * @param dev_id
+ *   Device ID requiring allocated memory.
+ * @param size
+ *   Number of bytes to allocate.
+ *   Requesting 0 will do nothing.
+ *
+ * @return
+ *   A pointer to the allocated memory, otherwise NULL and rte_errno is set:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if reserved flags
+ *   - ENOTSUP if operation not supported by the driver
+ *   - E2BIG if size is higher than limit
+ *   - ENOMEM if out of space
+ *   - EPERM if driver error
+ */
+__rte_experimental
+void *rte_gpu_mem_alloc(int16_t dev_id, size_t size)
+__rte_alloc_size(2);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Deallocate a chunk of memory allocated with rte_gpu_mem_alloc().
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param ptr
+ *   Pointer to the memory area to be deallocated.
+ *   NULL is a no-op accepted value.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - ENOTSUP if operation not supported by the driver
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_mem_free(int16_t dev_id, void *ptr);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Register a chunk of memory on the CPU usable by the device.
+ *
+ * @param dev_id
+ *   Device ID requiring allocated memory.
+ * @param size
+ *   Number of bytes to allocate.
+ *   Requesting 0 will do nothing.
+ * @param ptr
+ *   Pointer to the memory area to be registered.
+ *   NULL is a no-op accepted value.
+
+ * @return
+ *   A pointer to the allocated memory, otherwise NULL and rte_errno is set:
+ *   - ENODEV if invalid dev_id
+ *   - EINVAL if reserved flags
+ *   - ENOTSUP if operation not supported by the driver
+ *   - E2BIG if size is higher than limit
+ *   - ENOMEM if out of space
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_mem_register(int16_t dev_id, size_t size, void *ptr);
+
+/**
+ * @warning
+ * @b EXPERIMENTAL: this API may change without prior notice.
+ *
+ * Deregister a chunk of memory previously registered with rte_gpu_mem_register()
+ *
+ * @param dev_id
+ *   Reference device ID.
+ * @param ptr
+ *   Pointer to the memory area to be unregistered.
+ *   NULL is a no-op accepted value.
+ *
+ * @return
+ *   0 on success, -rte_errno otherwise:
+ *   - ENODEV if invalid dev_id
+ *   - ENOTSUP if operation not supported by the driver
+ *   - EPERM if driver error
+ */
+__rte_experimental
+int rte_gpu_mem_unregister(int16_t dev_id, void *ptr);
+
 #ifdef __cplusplus
 }
 #endif
--- a/lib/gpudev/version.map
+++ b/lib/gpudev/version.map
@ -11,6 +11,10 @@ EXPERIMENTAL {
 	rte_gpu_info_get;
 	rte_gpu_init;
 	rte_gpu_is_valid;
+	rte_gpu_mem_alloc;
+	rte_gpu_mem_free;
+	rte_gpu_mem_register;
+	rte_gpu_mem_unregister;
 };

 INTERNAL {