gpudev: introduce GPU device class library

In heterogeneous computing system, processing is not only in the CPU.
Some tasks can be delegated to devices working in parallel.

The new library gpudev is for dealing with GPGPU computing devices
from a DPDK application running on the CPU.

The infrastructure is prepared to welcome drivers in drivers/gpu/.

Signed-off-by: Elena Agostini <eagostini@nvidia.com>
Signed-off-by: Thomas Monjalon <thomas@monjalon.net>
This commit is contained in:
Elena Agostini 2021-11-08 18:57:57 +00:00 committed by Thomas Monjalon
parent 4fd15c6af0
commit 8b8036a66e
22 changed files with 722 additions and 0 deletions

1
.gitignore vendored
View File

@ -15,6 +15,7 @@ doc/guides/compressdevs/overview_feature_table.txt
doc/guides/regexdevs/overview_feature_table.txt
doc/guides/vdpadevs/overview_feature_table.txt
doc/guides/bbdevs/overview_feature_table.txt
doc/guides/gpus/overview_feature_table.txt
# ignore generated ctags/cscope files
cscope.out.po

View File

@ -467,6 +467,13 @@ M: Bruce Richardson <bruce.richardson@intel.com>
F: examples/dma/
F: doc/guides/sample_app_ug/dma.rst
General-Purpose Graphics Processing Unit (GPU) API - EXPERIMENTAL
M: Elena Agostini <eagostini@nvidia.com>
F: lib/gpudev/
F: doc/guides/prog_guide/gpudev.rst
F: doc/guides/gpus/features/default.ini
F: app/test-gpudev/
Eventdev API
M: Jerin Jacob <jerinj@marvell.com>
T: git://dpdk.org/next/dpdk-next-eventdev

View File

@ -13,6 +13,7 @@ apps = [
'test-eventdev',
'test-fib',
'test-flow-perf',
'test-gpudev',
'test-pipeline',
'test-pmd',
'test-regex',

106
app/test-gpudev/main.c Normal file
View File

@ -0,0 +1,106 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
*/
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stdint.h>
#include <inttypes.h>
#include <stdarg.h>
#include <errno.h>
#include <getopt.h>
#include <rte_common.h>
#include <rte_malloc.h>
#include <rte_memory.h>
#include <rte_eal.h>
#include <rte_ether.h>
#include <rte_ethdev.h>
#include <rte_mempool.h>
#include <rte_mbuf.h>
#include <rte_gpudev.h>
enum app_args {
ARG_HELP,
ARG_MEMPOOL
};
static void
usage(const char *prog_name)
{
printf("%s [EAL options] --\n",
prog_name);
}
static void
args_parse(int argc, char **argv)
{
char **argvopt;
int opt;
int opt_idx;
static struct option lgopts[] = {
{ "help", 0, 0, ARG_HELP},
/* End of options */
{ 0, 0, 0, 0 }
};
argvopt = argv;
while ((opt = getopt_long(argc, argvopt, "",
lgopts, &opt_idx)) != EOF) {
switch (opt) {
case ARG_HELP:
usage(argv[0]);
break;
default:
usage(argv[0]);
rte_exit(EXIT_FAILURE, "Invalid option: %s\n", argv[optind]);
break;
}
}
}
int
main(int argc, char **argv)
{
int ret;
int nb_gpus = 0;
int16_t gpu_id = 0;
struct rte_gpu_info ginfo;
/* Init EAL. */
ret = rte_eal_init(argc, argv);
if (ret < 0)
rte_exit(EXIT_FAILURE, "EAL init failed\n");
argc -= ret;
argv += ret;
if (argc > 1)
args_parse(argc, argv);
argc -= ret;
argv += ret;
nb_gpus = rte_gpu_count_avail();
printf("\n\nDPDK found %d GPUs:\n", nb_gpus);
RTE_GPU_FOREACH(gpu_id)
{
if (rte_gpu_info_get(gpu_id, &ginfo))
rte_exit(EXIT_FAILURE, "rte_gpu_info_get error - bye\n");
printf("\tGPU ID %d\n\t\tGPU Bus ID %s NUMA node %d Tot memory %.02f MB, Tot processors %d\n",
ginfo.dev_id,
ginfo.name,
ginfo.numa_node,
(((float)ginfo.total_memory)/(float)1024)/(float)1024,
ginfo.processor_count
);
}
printf("\n\n");
/* clean up the EAL */
rte_eal_cleanup();
printf("Bye...\n");
return EXIT_SUCCESS;
}

View File

@ -0,0 +1,5 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2021 NVIDIA Corporation & Affiliates
sources = files('main.c')
deps = ['gpudev', 'ethdev']

View File

@ -41,6 +41,7 @@ INPUT = @TOPDIR@/doc/api/doxy-api-index.md \
@TOPDIR@/lib/eventdev \
@TOPDIR@/lib/fib \
@TOPDIR@/lib/flow_classify \
@TOPDIR@/lib/gpudev \
@TOPDIR@/lib/graph \
@TOPDIR@/lib/gro \
@TOPDIR@/lib/gso \

View File

@ -152,6 +152,9 @@ def generate_overview_table(output_filename, table_id, section, table_name, titl
name = ini_filename[:-4]
name = name.replace('_vf', 'vf')
pmd_names.append(name)
if not pmd_names:
# Add an empty column if table is empty (required by RST syntax)
pmd_names.append(' ')
# Pad the table header names.
max_header_len = len(max(pmd_names, key=len))
@ -393,6 +396,11 @@ def setup(app):
'Features',
'Features availability in bbdev drivers',
'Feature')
table_file = dirname(__file__) + '/gpus/overview_feature_table.txt'
generate_overview_table(table_file, 1,
'Features',
'Features availability in GPU drivers',
'Feature')
if LooseVersion(sphinx_version) < LooseVersion('1.3.1'):
print('Upgrade sphinx to version >= 1.3.1 for '

View File

@ -0,0 +1,10 @@
;
; Features of GPU drivers.
;
; This file defines the features that are valid for inclusion in
; the other driver files and also the order that they appear in
; the features table in the documentation. The feature description
; string should not exceed feature_str_len defined in conf.py.
;
[Features]
Get device info =

11
doc/guides/gpus/index.rst Normal file
View File

@ -0,0 +1,11 @@
.. SPDX-License-Identifier: BSD-3-Clause
Copyright (c) 2021 NVIDIA Corporation & Affiliates
General-Purpose Graphics Processing Unit Drivers
================================================
.. toctree::
:maxdepth: 2
:numbered:
overview

View File

@ -0,0 +1,10 @@
.. SPDX-License-Identifier: BSD-3-Clause
Copyright (c) 2021 NVIDIA Corporation & Affiliates
Overview of GPU Drivers
=======================
General-Purpose computing on Graphics Processing Unit (GPGPU)
is the use of GPU to perform parallel computation.
.. include:: overview_feature_table.txt

View File

@ -22,6 +22,7 @@ DPDK documentation
vdpadevs/index
regexdevs/index
dmadevs/index
gpus/index
eventdevs/index
rawdevs/index
mempool/index

View File

@ -0,0 +1,36 @@
.. SPDX-License-Identifier: BSD-3-Clause
Copyright (c) 2021 NVIDIA Corporation & Affiliates
General-Purpose Graphics Processing Unit Library
================================================
When mixing networking activity with task processing on a GPU device,
there may be the need to put in communication the CPU with the device
in order to manage the memory, synchronize operations, exchange info, etc..
By means of the generic GPU interface provided by this library,
it is possible to allocate a chunk of GPU memory and use it
to create a DPDK mempool with external mbufs having the payload
on the GPU memory, enabling any network interface card
(which support this feature like Mellanox NIC)
to directly transmit and receive packets using GPU memory.
Additionally, this library provides a number of functions
to enhance the dialog between CPU and GPU.
Out of scope of this library is to provide a wrapper for GPU specific libraries
(e.g. CUDA Toolkit or OpenCL), thus it is not possible to launch workload
on the device or create GPU specific objects
(e.g. CUDA Driver context or CUDA Streams in case of NVIDIA GPUs).
Features
--------
This library provides a number of features:
- Interoperability with device-specific library through generic handlers.
API Overview
------------

View File

@ -28,6 +28,7 @@ Programmer's Guide
compressdev
regexdev
dmadev
gpudev
rte_security
rawdev
link_bonding_poll_mode_drv_lib

View File

@ -101,6 +101,10 @@ New Features
Added ``rte_eth_macaddrs_get`` to allow user to retrieve all Ethernet
addresses assigned to given ethernet port.
* **Introduced GPU device class with first features:**
* Device information
* **Added new RSS offload types for IPv4/L4 checksum in RSS flow.**
Added macros ETH_RSS_IPV4_CHKSUM and ETH_RSS_L4_CHKSUM, now IPv4 and

4
drivers/gpu/meson.build Normal file
View File

@ -0,0 +1,4 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2021 NVIDIA Corporation & Affiliates
drivers = []

View File

@ -19,6 +19,7 @@ subdirs = [
'vdpa', # depends on common, bus and mempool.
'event', # depends on common, bus, mempool and net.
'baseband', # depends on common and bus.
'gpu', # depends on common and bus.
]
if meson.is_cross_build()

249
lib/gpudev/gpudev.c Normal file
View File

@ -0,0 +1,249 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
*/
#include <rte_eal.h>
#include <rte_string_fns.h>
#include <rte_errno.h>
#include <rte_log.h>
#include "rte_gpudev.h"
#include "gpudev_driver.h"
/* Logging */
RTE_LOG_REGISTER_DEFAULT(gpu_logtype, NOTICE);
#define GPU_LOG(level, ...) \
rte_log(RTE_LOG_ ## level, gpu_logtype, RTE_FMT("gpu: " \
RTE_FMT_HEAD(__VA_ARGS__, ) "\n", RTE_FMT_TAIL(__VA_ARGS__, )))
/* Set any driver error as EPERM */
#define GPU_DRV_RET(function) \
((function != 0) ? -(rte_errno = EPERM) : (rte_errno = 0))
/* Array of devices */
static struct rte_gpu *gpus;
/* Number of currently valid devices */
static int16_t gpu_max;
/* Number of currently valid devices */
static int16_t gpu_count;
int
rte_gpu_init(size_t dev_max)
{
if (dev_max == 0 || dev_max > INT16_MAX) {
GPU_LOG(ERR, "invalid array size");
rte_errno = EINVAL;
return -rte_errno;
}
/* No lock, it must be called before or during first probing. */
if (gpus != NULL) {
GPU_LOG(ERR, "already initialized");
rte_errno = EBUSY;
return -rte_errno;
}
gpus = calloc(dev_max, sizeof(struct rte_gpu));
if (gpus == NULL) {
GPU_LOG(ERR, "cannot initialize library");
rte_errno = ENOMEM;
return -rte_errno;
}
gpu_max = dev_max;
return 0;
}
uint16_t
rte_gpu_count_avail(void)
{
return gpu_count;
}
bool
rte_gpu_is_valid(int16_t dev_id)
{
if (dev_id >= 0 && dev_id < gpu_max &&
gpus[dev_id].state == RTE_GPU_STATE_INITIALIZED)
return true;
return false;
}
int16_t
rte_gpu_find_next(int16_t dev_id)
{
if (dev_id < 0)
dev_id = 0;
while (dev_id < gpu_max &&
gpus[dev_id].state == RTE_GPU_STATE_UNUSED)
dev_id++;
if (dev_id >= gpu_max)
return RTE_GPU_ID_NONE;
return dev_id;
}
static int16_t
gpu_find_free_id(void)
{
int16_t dev_id;
for (dev_id = 0; dev_id < gpu_max; dev_id++) {
if (gpus[dev_id].state == RTE_GPU_STATE_UNUSED)
return dev_id;
}
return RTE_GPU_ID_NONE;
}
static struct rte_gpu *
gpu_get_by_id(int16_t dev_id)
{
if (!rte_gpu_is_valid(dev_id))
return NULL;
return &gpus[dev_id];
}
struct rte_gpu *
rte_gpu_get_by_name(const char *name)
{
int16_t dev_id;
struct rte_gpu *dev;
if (name == NULL) {
rte_errno = EINVAL;
return NULL;
}
RTE_GPU_FOREACH(dev_id) {
dev = &gpus[dev_id];
if (strncmp(name, dev->name, RTE_DEV_NAME_MAX_LEN) == 0)
return dev;
}
return NULL;
}
struct rte_gpu *
rte_gpu_allocate(const char *name)
{
int16_t dev_id;
struct rte_gpu *dev;
if (rte_eal_process_type() != RTE_PROC_PRIMARY) {
GPU_LOG(ERR, "only primary process can allocate device");
rte_errno = EPERM;
return NULL;
}
if (name == NULL) {
GPU_LOG(ERR, "allocate device without a name");
rte_errno = EINVAL;
return NULL;
}
/* implicit initialization of library before adding first device */
if (gpus == NULL && rte_gpu_init(RTE_GPU_DEFAULT_MAX) < 0)
return NULL;
if (rte_gpu_get_by_name(name) != NULL) {
GPU_LOG(ERR, "device with name %s already exists", name);
rte_errno = EEXIST;
return NULL;
}
dev_id = gpu_find_free_id();
if (dev_id == RTE_GPU_ID_NONE) {
GPU_LOG(ERR, "reached maximum number of devices");
rte_errno = ENOENT;
return NULL;
}
dev = &gpus[dev_id];
memset(dev, 0, sizeof(*dev));
if (rte_strscpy(dev->name, name, RTE_DEV_NAME_MAX_LEN) < 0) {
GPU_LOG(ERR, "device name too long: %s", name);
rte_errno = ENAMETOOLONG;
return NULL;
}
dev->info.name = dev->name;
dev->info.dev_id = dev_id;
dev->info.numa_node = -1;
gpu_count++;
GPU_LOG(DEBUG, "new device %s (id %d) of total %d",
name, dev_id, gpu_count);
return dev;
}
void
rte_gpu_complete_new(struct rte_gpu *dev)
{
if (dev == NULL)
return;
dev->state = RTE_GPU_STATE_INITIALIZED;
}
int
rte_gpu_release(struct rte_gpu *dev)
{
if (dev == NULL) {
rte_errno = ENODEV;
return -rte_errno;
}
GPU_LOG(DEBUG, "free device %s (id %d)",
dev->info.name, dev->info.dev_id);
dev->state = RTE_GPU_STATE_UNUSED;
gpu_count--;
return 0;
}
int
rte_gpu_close(int16_t dev_id)
{
int firsterr, binerr;
int *lasterr = &firsterr;
struct rte_gpu *dev;
dev = gpu_get_by_id(dev_id);
if (dev == NULL) {
GPU_LOG(ERR, "close invalid device ID %d", dev_id);
rte_errno = ENODEV;
return -rte_errno;
}
if (dev->ops.dev_close != NULL) {
*lasterr = GPU_DRV_RET(dev->ops.dev_close(dev));
if (*lasterr != 0)
lasterr = &binerr;
}
*lasterr = rte_gpu_release(dev);
rte_errno = -firsterr;
return firsterr;
}
int
rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info)
{
struct rte_gpu *dev;
dev = gpu_get_by_id(dev_id);
if (dev == NULL) {
GPU_LOG(ERR, "query invalid device ID %d", dev_id);
rte_errno = ENODEV;
return -rte_errno;
}
if (info == NULL) {
GPU_LOG(ERR, "query without storage");
rte_errno = EINVAL;
return -rte_errno;
}
if (dev->ops.dev_info_get == NULL) {
*info = dev->info;
return 0;
}
return GPU_DRV_RET(dev->ops.dev_info_get(dev, info));
}

View File

@ -0,0 +1,67 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
*/
/*
* This header file must be included only by drivers.
* It is considered internal, i.e. hidden for the application.
* The prefix rte_ is used to avoid namespace clash in drivers.
*/
#ifndef RTE_GPUDEV_DRIVER_H
#define RTE_GPUDEV_DRIVER_H
#include <stdint.h>
#include <rte_dev.h>
#include "rte_gpudev.h"
/* Flags indicate current state of device. */
enum rte_gpu_state {
RTE_GPU_STATE_UNUSED, /* not initialized */
RTE_GPU_STATE_INITIALIZED, /* initialized */
};
struct rte_gpu;
typedef int (rte_gpu_close_t)(struct rte_gpu *dev);
typedef int (rte_gpu_info_get_t)(struct rte_gpu *dev, struct rte_gpu_info *info);
struct rte_gpu_ops {
/* Get device info. If NULL, info is just copied. */
rte_gpu_info_get_t *dev_info_get;
/* Close device. */
rte_gpu_close_t *dev_close;
};
struct rte_gpu {
/* Backing device. */
struct rte_device *device;
/* Unique identifier name. */
char name[RTE_DEV_NAME_MAX_LEN]; /* Updated by this library. */
/* Device info structure. */
struct rte_gpu_info info;
/* Driver functions. */
struct rte_gpu_ops ops;
/* Current state (used or not) in the running process. */
enum rte_gpu_state state; /* Updated by this library. */
/* Driver-specific private data for the running process. */
void *process_private;
} __rte_cache_aligned;
__rte_internal
struct rte_gpu *rte_gpu_get_by_name(const char *name);
/* First step of initialization */
__rte_internal
struct rte_gpu *rte_gpu_allocate(const char *name);
/* Last step of initialization. */
__rte_internal
void rte_gpu_complete_new(struct rte_gpu *dev);
/* Last step of removal. */
__rte_internal
int rte_gpu_release(struct rte_gpu *dev);
#endif /* RTE_GPUDEV_DRIVER_H */

10
lib/gpudev/meson.build Normal file
View File

@ -0,0 +1,10 @@
# SPDX-License-Identifier: BSD-3-Clause
# Copyright (c) 2021 NVIDIA Corporation & Affiliates
headers = files(
'rte_gpudev.h',
)
sources = files(
'gpudev.c',
)

168
lib/gpudev/rte_gpudev.h Normal file
View File

@ -0,0 +1,168 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright (c) 2021 NVIDIA Corporation & Affiliates
*/
#ifndef RTE_GPUDEV_H
#define RTE_GPUDEV_H
#include <stddef.h>
#include <stdint.h>
#include <stdbool.h>
#include <rte_compat.h>
/**
* @file
* Generic library to interact with GPU computing device.
*
* The API is not thread-safe.
* Device management must be done by a single thread.
*
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*/
#ifdef __cplusplus
extern "C" {
#endif
/** Maximum number of devices if rte_gpu_init() is not called. */
#define RTE_GPU_DEFAULT_MAX 32
/** Empty device ID. */
#define RTE_GPU_ID_NONE -1
/** Store device info. */
struct rte_gpu_info {
/** Unique identifier name. */
const char *name;
/** Device ID. */
int16_t dev_id;
/** Total processors available on device. */
uint32_t processor_count;
/** Total memory available on device. */
size_t total_memory;
/* Local NUMA memory ID. -1 if unknown. */
int16_t numa_node;
};
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Initialize the device array before probing devices.
* If not called, the maximum of probed devices is RTE_GPU_DEFAULT_MAX.
*
* @param dev_max
* Maximum number of devices.
*
* @return
* 0 on success, -rte_errno otherwise:
* - ENOMEM if out of memory
* - EINVAL if 0 size
* - EBUSY if already initialized
*/
__rte_experimental
int rte_gpu_init(size_t dev_max);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Return the number of GPU detected and associated to DPDK.
*
* @return
* The number of available computing devices.
*/
__rte_experimental
uint16_t rte_gpu_count_avail(void);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Check if the device is valid and initialized in DPDK.
*
* @param dev_id
* The input device ID.
*
* @return
* - True if dev_id is a valid and initialized computing device.
* - False otherwise.
*/
__rte_experimental
bool rte_gpu_is_valid(int16_t dev_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Get the ID of the next valid GPU initialized in DPDK.
*
* @param dev_id
* The initial device ID to start the research.
*
* @return
* Next device ID corresponding to a valid and initialized computing device,
* RTE_GPU_ID_NONE if there is none.
*/
__rte_experimental
int16_t rte_gpu_find_next(int16_t dev_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Macro to iterate over all valid GPU devices.
*
* @param dev_id
* The ID of the next possible valid device, usually 0 to iterate all.
*/
#define RTE_GPU_FOREACH(dev_id) \
for (dev_id = rte_gpu_find_next(0); \
dev_id > 0; \
dev_id = rte_gpu_find_next(dev_id + 1))
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Close device.
* All resources are released.
*
* @param dev_id
* Device ID to close.
*
* @return
* 0 on success, -rte_errno otherwise:
* - ENODEV if invalid dev_id
* - EPERM if driver error
*/
__rte_experimental
int rte_gpu_close(int16_t dev_id);
/**
* @warning
* @b EXPERIMENTAL: this API may change without prior notice.
*
* Return device specific info.
*
* @param dev_id
* Device ID to get info.
* @param info
* Memory structure to fill with the info.
*
* @return
* 0 on success, -rte_errno otherwise:
* - ENODEV if invalid dev_id
* - EINVAL if NULL info
* - EPERM if driver error
*/
__rte_experimental
int rte_gpu_info_get(int16_t dev_id, struct rte_gpu_info *info);
#ifdef __cplusplus
}
#endif
#endif /* RTE_GPUDEV_H */

20
lib/gpudev/version.map Normal file
View File

@ -0,0 +1,20 @@
EXPERIMENTAL {
global:
# added in 21.11
rte_gpu_close;
rte_gpu_count_avail;
rte_gpu_find_next;
rte_gpu_info_get;
rte_gpu_init;
rte_gpu_is_valid;
};
INTERNAL {
global:
rte_gpu_allocate;
rte_gpu_complete_new;
rte_gpu_get_by_name;
rte_gpu_release;
};

View File

@ -34,6 +34,7 @@ libraries = [
'distributor',
'efd',
'eventdev',
'gpudev',
'gro',
'gso',
'ip_frag',