raw/ioat: add bus driver for device scanning automatically

Rather than using a vdev with args, DPDK can scan and initialize the
devices automatically using a bus-type driver. This bus does not need to
worry about registering device drivers, rather it can initialize the
devices directly on probe.

The device instances (queues) to use are detected from /dev with the
additional info about them got from /sys.

Signed-off-by: Bruce Richardson <bruce.richardson@intel.com>
This commit is contained in:
Bruce Richardson 2021-05-04 14:14:54 +01:00 committed by Thomas Monjalon
parent d38b62cd6c
commit b7aaf417f9
4 changed files with 373 additions and 242 deletions

View File

@ -78,7 +78,7 @@ Example configuration for a work queue::
$ accel-config config-wq dsa0/wq0.0 --group-id=0 \
--mode=dedicated --priority=10 --wq-size=8 \
--type=user --name=app1
--type=user --name=dpdk_app1
Once the devices have been configured, they need to be enabled::
@ -114,15 +114,18 @@ the device driver on the EAL commandline, via the ``allowlist`` or ``-a`` flag e
$ dpdk-test -a <b:d:f>,max_queues=4
If the device is bound to the IDXD kernel driver (and previously configured with sysfs),
then a specific work queue needs to be passed to the application via a vdev parameter.
This vdev parameter take the driver name and work queue name as parameters.
For example, to use work queue 0 on Intel\ |reg| DSA instance 0::
For devices bound to the IDXD kernel driver,
the DPDK ioat driver will automatically perform a scan for available workqueues to use.
Any workqueues found listed in ``/dev/dsa`` on the system will be checked in ``/sys``,
and any which have ``dpdk_`` prefix in their name will be automatically probed by the
driver to make them available to the application.
Alternatively, to support use by multiple DPDK processes simultaneously,
the value used as the DPDK ``--file-prefix`` parameter may be used as a workqueue name prefix,
instead of ``dpdk_``,
allowing each DPDK application instance to only use a subset of configured queues.
$ dpdk-test --no-pci --vdev=rawdev_idxd,wq=0.0
Once probed successfully, the device will appear as a ``rawdev``, that is a
"raw device type" inside DPDK, and can be accessed using APIs from the
Once probed successfully, irrespective of kernel driver, the device will appear as a ``rawdev``,
that is a "raw device type" inside DPDK, and can be accessed using APIs from the
``rte_rawdev`` library.
Using IOAT Rawdev Devices

359
drivers/raw/ioat/idxd_bus.c Normal file
View File

@ -0,0 +1,359 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2021 Intel Corporation
*/
#include <dirent.h>
#include <libgen.h>
#include <string.h>
#include <errno.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/mman.h>
#include <rte_bus.h>
#include <rte_log.h>
#include <rte_string_fns.h>
#include "ioat_private.h"
/* default value for DSA paths, but allow override in environment for testing */
#define DSA_DEV_PATH "/dev/dsa"
#define DSA_SYSFS_PATH "/sys/bus/dsa/devices"
/** unique identifier for a DSA device/WQ instance */
struct dsa_wq_addr {
uint16_t device_id;
uint16_t wq_id;
};
/** a DSA device instance */
struct rte_dsa_device {
struct rte_device device; /**< Inherit core device */
TAILQ_ENTRY(rte_dsa_device) next; /**< next dev in list */
char wq_name[32]; /**< the workqueue name/number e.g. wq0.1 */
struct dsa_wq_addr addr; /**< Identifies the specific WQ */
};
/* forward prototypes */
struct dsa_bus;
static int dsa_scan(void);
static int dsa_probe(void);
static struct rte_device *dsa_find_device(const struct rte_device *start,
rte_dev_cmp_t cmp, const void *data);
static enum rte_iova_mode dsa_get_iommu_class(void);
static int dsa_addr_parse(const char *name, void *addr);
/** List of devices */
TAILQ_HEAD(dsa_device_list, rte_dsa_device);
/**
* Structure describing the DSA bus
*/
struct dsa_bus {
struct rte_bus bus; /**< Inherit the generic class */
struct rte_driver driver; /**< Driver struct for devices to point to */
struct dsa_device_list device_list; /**< List of PCI devices */
};
struct dsa_bus dsa_bus = {
.bus = {
.scan = dsa_scan,
.probe = dsa_probe,
.find_device = dsa_find_device,
.get_iommu_class = dsa_get_iommu_class,
.parse = dsa_addr_parse,
},
.driver = {
.name = "rawdev_idxd"
},
.device_list = TAILQ_HEAD_INITIALIZER(dsa_bus.device_list),
};
static inline const char *
dsa_get_dev_path(void)
{
const char *path = getenv("DSA_DEV_PATH");
return path ? path : DSA_DEV_PATH;
}
static inline const char *
dsa_get_sysfs_path(void)
{
const char *path = getenv("DSA_SYSFS_PATH");
return path ? path : DSA_SYSFS_PATH;
}
static const struct rte_rawdev_ops idxd_vdev_ops = {
.dev_close = idxd_rawdev_close,
.dev_selftest = ioat_rawdev_test,
.dump = idxd_dev_dump,
.dev_configure = idxd_dev_configure,
.dev_info_get = idxd_dev_info_get,
.xstats_get = ioat_xstats_get,
.xstats_get_names = ioat_xstats_get_names,
.xstats_reset = ioat_xstats_reset,
};
static void *
idxd_vdev_mmap_wq(struct rte_dsa_device *dev)
{
void *addr;
char path[PATH_MAX];
int fd;
snprintf(path, sizeof(path), "%s/%s", dsa_get_dev_path(), dev->wq_name);
fd = open(path, O_RDWR);
if (fd < 0) {
IOAT_PMD_ERR("Failed to open device path: %s", path);
return NULL;
}
addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
if (addr == MAP_FAILED) {
IOAT_PMD_ERR("Failed to mmap device %s", path);
return NULL;
}
return addr;
}
static int
read_wq_string(struct rte_dsa_device *dev, const char *filename,
char *value, size_t valuelen)
{
char sysfs_node[PATH_MAX];
int len;
int fd;
snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
dsa_get_sysfs_path(), dev->wq_name, filename);
fd = open(sysfs_node, O_RDONLY);
if (fd < 0) {
IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
__func__, sysfs_node, strerror(errno));
return -1;
}
len = read(fd, value, valuelen - 1);
close(fd);
if (len < 0) {
IOAT_PMD_ERR("%s(): error reading file '%s': %s",
__func__, sysfs_node, strerror(errno));
return -1;
}
value[len] = '\0';
return 0;
}
static int
read_wq_int(struct rte_dsa_device *dev, const char *filename,
int *value)
{
char sysfs_node[PATH_MAX];
FILE *f;
int ret = 0;
snprintf(sysfs_node, sizeof(sysfs_node), "%s/%s/%s",
dsa_get_sysfs_path(), dev->wq_name, filename);
f = fopen(sysfs_node, "r");
if (f == NULL) {
IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
__func__, sysfs_node, strerror(errno));
return -1;
}
if (fscanf(f, "%d", value) != 1) {
IOAT_PMD_ERR("%s(): error reading file '%s': %s",
__func__, sysfs_node, strerror(errno));
ret = -1;
}
fclose(f);
return ret;
}
static int
read_device_int(struct rte_dsa_device *dev, const char *filename,
int *value)
{
char sysfs_node[PATH_MAX];
FILE *f;
int ret = 0;
snprintf(sysfs_node, sizeof(sysfs_node), "%s/dsa%d/%s",
dsa_get_sysfs_path(), dev->addr.device_id, filename);
f = fopen(sysfs_node, "r");
if (f == NULL) {
IOAT_PMD_ERR("%s(): opening file '%s' failed: %s",
__func__, sysfs_node, strerror(errno));
return -1;
}
if (fscanf(f, "%d", value) != 1) {
IOAT_PMD_ERR("%s(): error reading file '%s': %s",
__func__, sysfs_node, strerror(errno));
ret = -1;
}
fclose(f);
return ret;
}
static int
idxd_rawdev_probe_dsa(struct rte_dsa_device *dev)
{
struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */
int ret = 0;
IOAT_PMD_INFO("Probing device %s on numa node %d",
dev->wq_name, dev->device.numa_node);
if (read_wq_int(dev, "size", &ret) < 0)
return -1;
idxd.max_batches = ret;
idxd.qid = dev->addr.wq_id;
idxd.u.vdev.dsa_id = dev->addr.device_id;
idxd.public.portal = idxd_vdev_mmap_wq(dev);
if (idxd.public.portal == NULL) {
IOAT_PMD_ERR("WQ mmap failed");
return -ENOENT;
}
ret = idxd_rawdev_create(dev->wq_name, &dev->device, &idxd, &idxd_vdev_ops);
if (ret) {
IOAT_PMD_ERR("Failed to create rawdev %s", dev->wq_name);
return ret;
}
return 0;
}
static int
is_for_this_process_use(const char *name)
{
char *runtime_dir = strdup(rte_eal_get_runtime_dir());
char *prefix = basename(runtime_dir);
int prefixlen = strlen(prefix);
int retval = 0;
if (strncmp(name, "dpdk_", 5) == 0)
retval = 1;
if (strncmp(name, prefix, prefixlen) == 0 && name[prefixlen] == '_')
retval = 1;
free(runtime_dir);
return retval;
}
static int
dsa_probe(void)
{
struct rte_dsa_device *dev;
TAILQ_FOREACH(dev, &dsa_bus.device_list, next) {
char type[64], name[64];
if (read_wq_string(dev, "type", type, sizeof(type)) < 0 ||
read_wq_string(dev, "name", name, sizeof(name)) < 0)
continue;
if (strncmp(type, "user", 4) == 0 && is_for_this_process_use(name)) {
dev->device.driver = &dsa_bus.driver;
idxd_rawdev_probe_dsa(dev);
continue;
}
IOAT_PMD_DEBUG("WQ '%s', not allocated to DPDK", dev->wq_name);
}
return 0;
}
static int
dsa_scan(void)
{
const char *path = dsa_get_dev_path();
struct dirent *wq;
DIR *dev_dir;
dev_dir = opendir(path);
if (dev_dir == NULL) {
if (errno == ENOENT)
return 0; /* no bus, return without error */
IOAT_PMD_ERR("%s(): opendir '%s' failed: %s",
__func__, path, strerror(errno));
return -1;
}
while ((wq = readdir(dev_dir)) != NULL) {
struct rte_dsa_device *dev;
int numa_node = -1;
if (strncmp(wq->d_name, "wq", 2) != 0)
continue;
if (strnlen(wq->d_name, sizeof(dev->wq_name)) == sizeof(dev->wq_name)) {
IOAT_PMD_ERR("%s(): wq name too long: '%s', skipping",
__func__, wq->d_name);
continue;
}
IOAT_PMD_DEBUG("%s(): found %s/%s", __func__, path, wq->d_name);
dev = malloc(sizeof(*dev));
if (dsa_addr_parse(wq->d_name, &dev->addr) < 0) {
IOAT_PMD_ERR("Error parsing WQ name: %s", wq->d_name);
free(dev);
continue;
}
dev->device.bus = &dsa_bus.bus;
strlcpy(dev->wq_name, wq->d_name, sizeof(dev->wq_name));
TAILQ_INSERT_TAIL(&dsa_bus.device_list, dev, next);
read_device_int(dev, "numa_node", &numa_node);
dev->device.numa_node = numa_node;
}
return 0;
}
static struct rte_device *
dsa_find_device(const struct rte_device *start, rte_dev_cmp_t cmp,
const void *data)
{
struct rte_dsa_device *dev = TAILQ_FIRST(&dsa_bus.device_list);
/* the rte_device struct must be at start of dsa structure */
RTE_BUILD_BUG_ON(offsetof(struct rte_dsa_device, device) != 0);
if (start != NULL) /* jump to start point if given */
dev = TAILQ_NEXT((const struct rte_dsa_device *)start, next);
while (dev != NULL) {
if (cmp(&dev->device, data) == 0)
return &dev->device;
dev = TAILQ_NEXT(dev, next);
}
return NULL;
}
static enum rte_iova_mode
dsa_get_iommu_class(void)
{
return RTE_IOVA_VA;
}
static int
dsa_addr_parse(const char *name, void *addr)
{
struct dsa_wq_addr *wq = addr;
unsigned int device_id, wq_id;
if (sscanf(name, "wq%u.%u", &device_id, &wq_id) != 2) {
IOAT_PMD_DEBUG("Parsing WQ name failed: %s", name);
return -1;
}
wq->device_id = device_id;
wq->wq_id = wq_id;
return 0;
}
RTE_REGISTER_BUS(dsa, dsa_bus.bus);

View File

@ -1,231 +0,0 @@
/* SPDX-License-Identifier: BSD-3-Clause
* Copyright(c) 2020 Intel Corporation
*/
#include <fcntl.h>
#include <unistd.h>
#include <limits.h>
#include <sys/mman.h>
#include <rte_memzone.h>
#include <rte_bus_vdev.h>
#include <rte_kvargs.h>
#include <rte_string_fns.h>
#include <rte_rawdev_pmd.h>
#include "ioat_private.h"
/** Name of the device driver */
#define IDXD_PMD_RAWDEV_NAME rawdev_idxd
/* takes a work queue(WQ) as parameter */
#define IDXD_ARG_WQ "wq"
static const char * const valid_args[] = {
IDXD_ARG_WQ,
NULL
};
struct idxd_vdev_args {
uint8_t device_id;
uint8_t wq_id;
};
static const struct rte_rawdev_ops idxd_vdev_ops = {
.dev_close = idxd_rawdev_close,
.dev_selftest = ioat_rawdev_test,
.dump = idxd_dev_dump,
.dev_configure = idxd_dev_configure,
.dev_info_get = idxd_dev_info_get,
.xstats_get = ioat_xstats_get,
.xstats_get_names = ioat_xstats_get_names,
.xstats_reset = ioat_xstats_reset,
};
static void *
idxd_vdev_mmap_wq(struct idxd_vdev_args *args)
{
void *addr;
char path[PATH_MAX];
int fd;
snprintf(path, sizeof(path), "/dev/dsa/wq%u.%u",
args->device_id, args->wq_id);
fd = open(path, O_RDWR);
if (fd < 0) {
IOAT_PMD_ERR("Failed to open device path");
return NULL;
}
addr = mmap(NULL, 0x1000, PROT_WRITE, MAP_SHARED, fd, 0);
close(fd);
if (addr == MAP_FAILED) {
IOAT_PMD_ERR("Failed to mmap device");
return NULL;
}
return addr;
}
static int
idxd_rawdev_parse_wq(const char *key __rte_unused, const char *value,
void *extra_args)
{
struct idxd_vdev_args *args = (struct idxd_vdev_args *)extra_args;
int dev, wq, bytes = -1;
int read = sscanf(value, "%d.%d%n", &dev, &wq, &bytes);
if (read != 2 || bytes != (int)strlen(value)) {
IOAT_PMD_ERR("Error parsing work-queue id. Must be in <dev_id>.<queue_id> format");
return -EINVAL;
}
if (dev >= UINT8_MAX || wq >= UINT8_MAX) {
IOAT_PMD_ERR("Device or work queue id out of range");
return -EINVAL;
}
args->device_id = dev;
args->wq_id = wq;
return 0;
}
static int
idxd_vdev_parse_params(struct rte_kvargs *kvlist, struct idxd_vdev_args *args)
{
int ret = 0;
if (rte_kvargs_count(kvlist, IDXD_ARG_WQ) == 1) {
if (rte_kvargs_process(kvlist, IDXD_ARG_WQ,
&idxd_rawdev_parse_wq, args) < 0) {
IOAT_PMD_ERR("Error parsing %s", IDXD_ARG_WQ);
ret = -EINVAL;
}
} else {
IOAT_PMD_ERR("%s is a mandatory arg", IDXD_ARG_WQ);
ret = -EINVAL;
}
rte_kvargs_free(kvlist);
return ret;
}
static int
idxd_vdev_get_max_batches(struct idxd_vdev_args *args)
{
char sysfs_path[PATH_MAX];
FILE *f;
int ret;
snprintf(sysfs_path, sizeof(sysfs_path),
"/sys/bus/dsa/devices/wq%u.%u/size",
args->device_id, args->wq_id);
f = fopen(sysfs_path, "r");
if (f == NULL)
return -1;
if (fscanf(f, "%d", &ret) != 1)
ret = -1;
fclose(f);
return ret;
}
static int
idxd_rawdev_probe_vdev(struct rte_vdev_device *vdev)
{
struct rte_kvargs *kvlist;
struct idxd_rawdev idxd = {{0}}; /* double {} to avoid error on BSD12 */
struct idxd_vdev_args vdev_args;
const char *name;
int ret = 0;
name = rte_vdev_device_name(vdev);
if (name == NULL)
return -EINVAL;
IOAT_PMD_INFO("Initializing pmd_idxd for %s", name);
kvlist = rte_kvargs_parse(rte_vdev_device_args(vdev), valid_args);
if (kvlist == NULL) {
IOAT_PMD_ERR("Invalid kvargs key");
return -EINVAL;
}
ret = idxd_vdev_parse_params(kvlist, &vdev_args);
if (ret) {
IOAT_PMD_ERR("Failed to parse kvargs");
return -EINVAL;
}
idxd.qid = vdev_args.wq_id;
idxd.u.vdev.dsa_id = vdev_args.device_id;
idxd.max_batches = idxd_vdev_get_max_batches(&vdev_args);
idxd.public.portal = idxd_vdev_mmap_wq(&vdev_args);
if (idxd.public.portal == NULL) {
IOAT_PMD_ERR("WQ mmap failed");
return -ENOENT;
}
ret = idxd_rawdev_create(name, &vdev->device, &idxd, &idxd_vdev_ops);
if (ret) {
IOAT_PMD_ERR("Failed to create rawdev %s", name);
return ret;
}
return 0;
}
static int
idxd_rawdev_remove_vdev(struct rte_vdev_device *vdev)
{
struct idxd_rawdev *idxd;
const char *name;
struct rte_rawdev *rdev;
int ret = 0;
name = rte_vdev_device_name(vdev);
if (name == NULL)
return -EINVAL;
IOAT_PMD_INFO("Remove DSA vdev %p", name);
rdev = rte_rawdev_pmd_get_named_dev(name);
if (!rdev) {
IOAT_PMD_ERR("Invalid device name (%s)", name);
return -EINVAL;
}
idxd = rdev->dev_private;
/* free context and memory */
if (rdev->dev_private != NULL) {
IOAT_PMD_DEBUG("Freeing device driver memory");
rdev->dev_private = NULL;
if (munmap(idxd->public.portal, 0x1000) < 0) {
IOAT_PMD_ERR("Error unmapping portal");
ret = -errno;
}
rte_free(idxd->public.batch_ring);
rte_free(idxd->public.hdl_ring);
rte_memzone_free(idxd->mz);
}
if (rte_rawdev_pmd_release(rdev))
IOAT_PMD_ERR("Device cleanup failed");
return ret;
}
struct rte_vdev_driver idxd_rawdev_drv_vdev = {
.probe = idxd_rawdev_probe_vdev,
.remove = idxd_rawdev_remove_vdev,
};
RTE_PMD_REGISTER_VDEV(IDXD_PMD_RAWDEV_NAME, idxd_rawdev_drv_vdev);
RTE_PMD_REGISTER_PARAM_STRING(IDXD_PMD_RAWDEV_NAME,
"wq=<string>");

View File

@ -4,13 +4,13 @@
build = dpdk_conf.has('RTE_ARCH_X86')
reason = 'only supported on x86'
sources = files(
'idxd_bus.c',
'idxd_pci.c',
'idxd_vdev.c',
'ioat_common.c',
'ioat_rawdev.c',
'ioat_rawdev_test.c',
)
deps += ['bus_pci', 'bus_vdev', 'mbuf', 'rawdev']
deps += ['bus_pci', 'mbuf', 'rawdev']
headers = files(
'rte_ioat_rawdev.h',
'rte_ioat_rawdev_fns.h',